1 #include <U2Core/disable-warnings.h>
2 U2_DISABLE_WARNINGS
3
4 //#include "phylip.h"
5 //#include "seq.h"
6 #include "dnadist.h"
7 /* version 3.696.
8 Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe.
9
10 Copyright (c) 1993-2014, Joseph Felsenstein
11 All rights reserved.
12
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 1. Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 2. Redistributions in binary form must reproduce the above copyright notice,
20 this list of conditions and the following disclaimer in the documentation
21 and/or other materials provided with the distribution.
22
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 #include <U2Algorithm/CreatePhyTreeSettings.h>
37 #include <U2Core/Task.h>
38
39 QString DNADistModelTypes::F84("F84");
40 QString DNADistModelTypes::Kimura("Kimura");
41 QString DNADistModelTypes::JukesCantor("Jukes-Cantor");
42 QString DNADistModelTypes::LogDet("LogDet");
43 static U2::CreatePhyTreeSettings dnaDistSettings;
44
setDNADistSettings(const CreatePhyTreeSettings & settings)45 void U2::setDNADistSettings( const CreatePhyTreeSettings& settings )
46 {
47 dnaDistSettings = settings;
48
49 }
50
getDNADistSettings()51 const U2::CreatePhyTreeSettings& getDNADistSettings()
52 {
53 //TODO: use mutex here later
54 return dnaDistSettings;
55 }
56
57
58
59 Phylip_Char infilename[FNMLNGTH], outfilename[FNMLNGTH], catfilename[FNMLNGTH], weightfilename[FNMLNGTH];
60 long sites, categs, weightsum, datasets, ith, rcategs;
61 boolean freqsfrom, jukes, kimura, logdet, gama, invar, similarity, lower, f84,
62 weights, progress, ctgry, mulsets, justwts, firstset, baddists;
63 boolean matrix_flags; /* Matrix output format */
64 node **nodep = NULL;
65 double xi, xv, ttratio, ttratio0, freqa, freqc, freqg, freqt, freqr, freqy,
66 freqar, freqcy, freqgr, freqty, cvi, invarfrac, sumrates, fracchange;
67 steptr oldweight = NULL;
68 double rate[maxcategs];
69 double **d = NULL;
70 double sumweightrat = 0; /* these values were propagated */
71 double *weightrat = NULL; /* to global values from */
72 valrec tbl[maxcategs]; /* function makedists. */
73
74
75
getDNADistModelTypes()76 QList<QString> DNADistModelTypes::getDNADistModelTypes()
77 {
78 static QList<QString> list;
79 if (list.isEmpty()) {
80 list.append(DNADistModelTypes::F84);
81 list.append(DNADistModelTypes::Kimura);
82 list.append(DNADistModelTypes::JukesCantor);
83 list.append(DNADistModelTypes::LogDet);
84 }
85
86 return list;
87 }
88
89
getoptions()90 void getoptions()
91 {
92 /* interactively set options */
93 long loopcount, loopcount2;
94 Phylip_Char ch, ch2;
95 boolean ttr;
96 char *str = NULL;
97
98 ctgry = false;
99 categs = 1;
100 cvi = 1.0;
101 rcategs = 1;
102 rate[0] = 1.0;
103 freqsfrom = true;
104 gama = false;
105 invar = false;
106 invarfrac = 0.0;
107 jukes = false;
108 justwts = false;
109 kimura = false;
110 logdet = false;
111 f84 = true;
112 lower = false;
113 matrix_flags = MAT_MACHINE;
114 similarity = false;
115 ttratio = 2.0;
116 ttr = false;
117 weights = false;
118 printdata = false;
119 dotdiff = true;
120 progress = false;
121 interleaved = true;
122 loopcount = 0;
123
124 QString matrixModel = getDNADistSettings().matrixId;
125 if (matrixModel == DNADistModelTypes::F84) {
126 f84 = true;
127 kimura = false;
128 jukes = false;
129 freqsfrom = true;
130 logdet = false;
131 ttratio = getDNADistSettings().ttRatio;
132 ttr = true;
133 } else if (matrixModel == DNADistModelTypes::Kimura) {
134 f84 = false;
135 kimura = true;
136 jukes = false;
137 freqsfrom = false;
138 logdet = false;
139 ttratio = getDNADistSettings().ttRatio;
140 ttr = true;
141 } else if (matrixModel == DNADistModelTypes::JukesCantor) {
142 f84 = false;
143 kimura = false;
144 jukes = true;
145 freqsfrom = false;
146 logdet = false;
147 } else if (matrixModel == DNADistModelTypes::LogDet) {
148 f84 = false;
149 kimura = false;
150 jukes = false;
151 freqsfrom = false;
152 logdet = true;
153 }
154
155 gama = getDNADistSettings().useGammaDistributionRates;
156 if (gama) {
157 cvi = getDNADistSettings().alphaFactor;
158 cvi = 1.0 / (cvi * cvi);
159 }
160
161 /*for (;;) {
162 cleerhome();
163 printf("\nNucleic acid sequence Distance Matrix program,");
164 printf(" version %s\n\n",PHY_VERSION);
165 printf("Settings for this run:\n");
166 printf(" D Distance (F84, Kimura, Jukes-Cantor, LogDet)? %s\n",
167 kimura ? "Kimura 2-parameter" :
168 jukes ? "Jukes-Cantor" :
169 logdet ? "LogDet" :
170 similarity ? "Similarity table" : "F84");
171 if (kimura || f84 || jukes) {
172 printf(" G Gamma distributed rates across sites? ");
173 if (gama)
174 printf("Yes\n");
175 else {
176 if (invar)
177 printf("Gamma+Invariant\n");
178 else
179 printf("No\n");
180 }
181 }
182 if (kimura || f84) {
183 printf(" T Transition/transversion ratio?");
184 if (!ttr)
185 printf(" 2.0\n");
186 else
187 printf("%8.4f\n", ttratio);
188 }
189 if (!logdet && !similarity && !gama && !invar) {
190 printf(" C One category of substitution rates?");
191 if (!ctgry || categs == 1)
192 printf(" Yes\n");
193 else
194 printf(" %ld categories\n", categs);
195 }
196 printf(" W Use weights for sites?");
197 if (weights)
198 printf(" Yes\n");
199 else
200 printf(" No\n");
201 if (f84)
202 printf(" F Use empirical base frequencies? %s\n",
203 (freqsfrom ? "Yes" : "No"));
204 printf(" L Form of distance matrix? ");
205 switch (matrix_flags) {
206 case MAT_MACHINE:
207 str = "Square";
208 break;
209 case MAT_LOWERTRI:
210 str = "Lower-triangular";
211 break;
212 case MAT_HUMAN:
213 str = "Human-readable";
214 break;
215 default:
216 assert( 0 );
217 str = "(unknown)";
218 }
219 puts(str);
220 printf(" M Analyze multiple data sets?");
221 if (mulsets)
222 printf(" Yes, %2ld %s\n", datasets,
223 (justwts ? "sets of weights" : "data sets"));
224 else
225 printf(" No\n");
226 printf(" I Input sequences interleaved? %s\n",
227 (interleaved ? "Yes" : "No, sequential"));
228 printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n",
229 ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)");
230 printf(" 1 Print out the data at start of run %s\n",
231 (printdata ? "Yes" : "No"));
232 printf(" 2 Print indications of progress of run %s\n",
233 (progress ? "Yes" : "No"));
234 printf("\n Y to accept these or type the letter for one to change\n");
235 #ifdef WIN32
236 phyFillScreenColor();
237 #endif
238 fflush(stdout);
239 scanf("%c%*[^\n]", &ch);
240 getchar();
241 uppercase(&ch);
242 if (ch == 'Y')
243 break;
244 if ((f84 && (strchr("CFGWLDTMI012",ch) != NULL)) ||
245 (kimura && (strchr("CGWLDTMI012",ch) != NULL)) ||
246 (jukes && (strchr("CGWLDMI012",ch) != NULL)) ||
247 ((logdet || similarity) && (strchr("WLDMI012",ch)) != NULL) ||
248 (ctgry && (strchr("CFWLDTMI012",ch) != NULL))) {
249 switch (ch) {
250
251 case 'D':
252 if (kimura) {
253 kimura = false;
254 jukes = true;
255 freqsfrom = false;
256 } else if (f84) {
257 f84 = false;
258 kimura = true;
259 freqsfrom = false;
260 } else if (logdet) {
261 logdet = false;
262 similarity = true;
263 } else if (similarity) {
264 similarity = false;
265 f84 = true;
266 freqsfrom = true;
267 } else {
268 jukes = false;
269 logdet = true;
270 freqsfrom = false;
271 }
272 break;
273
274 case 'G':
275 if (!(gama || invar))
276 gama = true;
277 else {
278 if (gama) {
279 gama = false;
280 invar = true;
281 } else {
282 if (invar)
283 invar = false;
284 }
285 }
286 break;
287
288
289 case 'C':
290 ctgry = !ctgry;
291 if (ctgry) {
292 initcatn(&categs);
293 initcategs(categs, rate);
294 }
295 break;
296
297 case 'F':
298 freqsfrom = !freqsfrom;
299 if (!freqsfrom)
300 initfreqs(&freqa, &freqc, &freqg, &freqt);
301 break;
302
303 case 'W':
304 weights = !weights;
305 break;
306
307 case 'L':
308 switch ( matrix_flags ) {
309 case MAT_HUMAN:
310 matrix_flags = MAT_MACHINE;
311 break;
312 case MAT_LOWERTRI:
313 matrix_flags = MAT_HUMAN;
314 break;
315 case MAT_MACHINE:
316 matrix_flags = MAT_LOWERTRI;
317 break;
318 default:
319 assert(0);
320 matrix_flags = MAT_HUMAN;
321 }
322 break;
323
324 case 'T':
325 ttr = !ttr;
326 if (ttr)
327 initratio(&ttratio);
328 break;
329
330 case 'M':
331 mulsets = !mulsets;
332 if (mulsets) {
333 printf("Multiple data sets or multiple weights?");
334 loopcount2 = 0;
335 do {
336 printf(" (type D or W)\n");
337 #ifdef WIN32
338 phyFillScreenColor();
339 #endif
340 fflush(stdout);
341 scanf("%c%*[^\n]", &ch2);
342 uppercase(&ch2);
343 getchar();
344 countup(&loopcount2, 10);
345 } while ((ch2 != 'W') && (ch2 != 'D'));
346 justwts = (ch2 == 'W');
347 if (justwts)
348 justweights(&datasets);
349 else
350 initdatasets(&datasets);
351 }
352 break;
353
354 case 'I':
355 interleaved = !interleaved;
356 break;
357
358 case '0':
359 initterminal(&ibmpc, &ansi);
360 break;
361
362 case '1':
363 printdata = !printdata;
364 break;
365
366 case '2':
367 progress = !progress;
368 break;
369 }
370 } else {
371 if (strchr("CFGWLDTMI012",ch) == NULL)
372 printf("Not a possible option!\n");
373 else
374 printf("That option not allowed with these settings\n");
375 printf("\nPress Enter or Return key to continue\n");
376 getchar();
377 }
378 //countup(&loopcount, 100);
379 }
380
381 if (similarity) {
382 if (matrix_flags == MAT_MACHINE)
383 matrix_flags = MAT_HUMAN;
384 else if (matrix_flags == MAT_LOWERTRI)
385 matrix_flags = MAT_LOWER | MAT_HUMAN;
386 }
387
388 if (gama || invar) {
389 loopcount = 0;
390 do {
391 printf(
392 "\nCoefficient of variation of substitution rate among sites (must be positive)\n");
393 printf(
394 " In gamma distribution parameters, this is 1/(square root of alpha)\n");
395 #ifdef WIN32
396 phyFillScreenColor();
397 #endif
398 fflush(stdout);
399 scanf("%lf%*[^\n]", &cvi);
400 getchar();
401 countup(&loopcount, 10);
402 } while (cvi <= 0.0);
403 cvi = 1.0 / (cvi * cvi);
404 }
405 if (invar) {
406 loopcount = 0;
407 do {
408 printf("Fraction of invariant sites?\n");
409 fflush(stdout);
410 scanf("%lf%*[^\n]", &invarfrac);
411 getchar();
412 countup (&loopcount, 10);
413 } while ((invarfrac <= 0.0) || (invarfrac >= 1.0));
414 }
415 if (!printdata)
416 return;
417 fprintf(outfile, "\nNucleic acid sequence Distance Matrix program,");
418 fprintf(outfile, " version %s\n\n",PHY_VERSION); */
419 } /* getoptions */
420
421
allocrest(U2::MemoryLocker & memLocker)422 void allocrest(U2::MemoryLocker& memLocker)
423 {
424 long i;
425
426 qint64 memSize = spp * (sizeof(Phylip_Char *) + sizeof(node *) + sites*sizeof(Phylip_Char) + sizeof(node) + sizeof(double *) + spp*sizeof(double) + sizeof(naym));
427 memSize += sites * sizeof(long) * 7;
428 if(!memLocker.tryAcquire(memSize)) {
429 return;
430 }
431
432 y = (Phylip_Char **)Malloc(spp*sizeof(Phylip_Char *));
433 nodep = (node **)Malloc(spp*sizeof(node *));
434 for (i = 0; i < spp; i++) {
435 y[i] = (Phylip_Char *)Malloc(sites*sizeof(Phylip_Char));
436 nodep[i] = (node *)Malloc(sizeof(node));
437 }
438 d = (double **)Malloc(spp*sizeof(double *));
439 for (i = 0; i < spp; i++)
440 d[i] = (double*)Malloc(spp*sizeof(double));
441 nayme = (naym *)Malloc(spp*sizeof(naym));
442 category = (steptr)Malloc(sites*sizeof(long));
443 oldweight = (steptr)Malloc(sites*sizeof(long));
444 weight = (steptr)Malloc(sites*sizeof(long));
445 alias = (steptr)Malloc(sites*sizeof(long));
446 ally = (steptr)Malloc(sites*sizeof(long));
447 location = (steptr)Malloc(sites*sizeof(long));
448 weightrat = (double *)Malloc(sites*sizeof(double));
449 } /* allocrest */
450
451
reallocsites()452 void reallocsites()
453 {/* The amount of sites can change between runs
454 this function reallocates all the variables
455 whose size depends on the amount of sites */
456 long i;
457
458 for (i = 0; i < spp; i++) {
459 free(y[i]);
460 y[i] = (Phylip_Char *)Malloc(sites*sizeof(Phylip_Char));
461 }
462 free(category);
463 free(oldweight);
464 free(weight);
465 free(alias);
466 free(ally);
467 free(location);
468 free(weightrat);
469
470 category = (steptr)Malloc(sites*sizeof(long));
471 oldweight = (steptr)Malloc(sites*sizeof(long));
472 weight = (steptr)Malloc(sites*sizeof(long));
473 alias = (steptr)Malloc(sites*sizeof(long));
474 ally = (steptr)Malloc(sites*sizeof(long));
475 location = (steptr)Malloc(sites*sizeof(long));
476 weightrat = (double *)Malloc(sites*sizeof(double));
477 } /* reallocsites */
478
479
doinit(U2::MemoryLocker & memLocker)480 void doinit(U2::MemoryLocker& memLocker)
481 {
482 /* initializes variables */
483
484 //inputnumbers(&spp, &sites, &nonodes, 1);
485 getoptions();
486 if (printdata) {
487 fprintf(outfile, "%2ld species, %3ld sites\n", spp, sites);
488 }
489 allocrest(memLocker);
490 } /* doinit */
491
492
inputcategories()493 void inputcategories()
494 {
495 /* reads the categories for each site */
496 long i;
497 Phylip_Char ch;
498
499 for (i = 1; i < nmlngth; i++)
500 gettc(infile);
501 for (i = 0; i < sites; i++) {
502 do {
503 if (eoln(infile))
504 scan_eoln(infile);
505 ch = gettc(infile);
506 } while (ch == ' ');
507 category[i] = ch - '0';
508 }
509 scan_eoln(infile);
510 } /* inputcategories */
511
512
printcategories()513 void printcategories()
514 { /* print out list of categories of sites */
515 long i, j;
516
517 fprintf(outfile, "Rate categories\n\n");
518 for (i = 1; i <= nmlngth + 3; i++)
519 putc(' ', outfile);
520 for (i = 1; i <= sites; i++) {
521 fprintf(outfile, "%ld", category[i - 1]);
522 if (i % 60 == 0) {
523 putc('\n', outfile);
524 for (j = 1; j <= nmlngth + 3; j++)
525 putc(' ', outfile);
526 } else if (i % 10 == 0)
527 putc(' ', outfile);
528 }
529 fprintf(outfile, "\n\n");
530 } /* printcategories */
531
532
inputoptions()533 void inputoptions()
534 {
535 /* read options information */
536 long i;
537
538 if (!firstset && !justwts) {
539 samenumsp(&sites, ith);
540 reallocsites();
541 }
542 for (i = 0; i < sites; i++) {
543 category[i] = 1;
544 oldweight[i] = 1;
545 }
546 if (justwts || weights)
547 inputweights(sites, oldweight, &weights);
548 if (printdata)
549 putc('\n', outfile);
550 if (jukes && printdata)
551 fprintf(outfile, " Jukes-Cantor Distance\n");
552 if (kimura && printdata)
553 fprintf(outfile, " Kimura 2-parameter Distance\n");
554 if (f84 && printdata)
555 fprintf(outfile, " F84 Distance\n");
556 if (similarity)
557 fprintf(outfile, " \n Table of similarity between sequences\n");
558 if (firstset && printdata && (kimura || f84))
559 fprintf(outfile, "\nTransition/transversion ratio = %10.6f\n", ttratio);
560 if (ctgry && categs > 1) {
561 inputcategs(0, sites, category, categs, "DnaDist");
562 if (printdata)
563 printcategs(outfile, sites, category, "Site categories");
564 } else if (printdata && (categs > 1)) {
565 fprintf(outfile, "\nSite category Rate of change\n\n");
566 for (i = 1; i <= categs; i++)
567 fprintf(outfile, "%12ld%13.3f\n", i, rate[i - 1]);
568 putc('\n', outfile);
569 printcategories();
570 }
571 if ((jukes || kimura || logdet) && freqsfrom) {
572 printf(" WARNING: CANNOT USE EMPIRICAL BASE FREQUENCIES");
573 printf(" WITH JUKES-CANTOR, KIMURA, JIN/NEI OR LOGDET DISTANCES\n");
574 exxit(-1);
575 }
576 if (jukes)
577 ttratio = 0.5000001;
578 if (weights && printdata)
579 printweights(outfile, 0, sites, oldweight, "Sites");
580 } /* inputoptions */
581
582
dnadist_sitesort()583 void dnadist_sitesort()
584 {
585 /* Shell sort of sites lexicographically */
586 long gap, i, j, jj, jg, k, itemp;
587 boolean flip, tied;
588
589 gap = sites / 2;
590 while (gap > 0) {
591 for (i = gap + 1; i <= sites; i++) {
592 j = i - gap;
593 flip = true;
594 while (j > 0 && flip) {
595 jj = alias[j - 1];
596 jg = alias[j + gap - 1];
597 tied = (oldweight[jj - 1] == oldweight[jg - 1]);
598 flip = (oldweight[jj - 1] < oldweight[jg - 1] ||
599 (tied && category[jj - 1] > category[jg - 1]));
600 tied = (tied && category[jj - 1] == category[jg - 1]);
601 k = 1;
602 while (k <= spp && tied) {
603 flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]);
604 tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]);
605 k++;
606 }
607 if (!flip)
608 break;
609 itemp = alias[j - 1];
610 alias[j - 1] = alias[j + gap - 1];
611 alias[j + gap - 1] = itemp;
612 j -= gap;
613 }
614 }
615 gap /= 2;
616 }
617 } /* dnadist_sitesort */
618
619
dnadist_sitecombine()620 void dnadist_sitecombine()
621 {
622 /* combine sites that have identical patterns */
623 long i, j, k;
624 boolean tied;
625
626 i = 1;
627 while (i < sites) {
628 j = i + 1;
629 tied = true;
630 while (j <= sites && tied) {
631 tied = (category[alias[i - 1] - 1] == category[alias[j - 1] - 1]);
632 k = 1;
633 while (k <= spp && tied) {
634 tied = (tied &&
635 y[k - 1][alias[i - 1] - 1] == y[k - 1][alias[j - 1] - 1]);
636 k++;
637 }
638 if (!tied)
639 break;
640 ally[alias[j - 1] - 1] = alias[i - 1];
641 j++;
642 }
643 i = j;
644 }
645 } /* dnadist_sitecombine */
646
647
dnadist_sitescrunch()648 void dnadist_sitescrunch()
649 {
650 /* move so one representative of each pattern of
651 sites comes first */
652 long i, j, itemp;
653 boolean done, found, completed;
654
655 done = false;
656 i = 1;
657 j = 2;
658 while (!done) {
659 if (ally[alias[i - 1] - 1] != alias[i - 1]) {
660 if (j <= i)
661 j = i + 1;
662 if (j <= sites) {
663 do {
664 found = (ally[alias[j - 1] - 1] == alias[j - 1]);
665 j++;
666 completed = (j > sites);
667 } while (!(found || completed));
668 if (found) {
669 j--;
670 itemp = alias[i - 1];
671 alias[i - 1] = alias[j - 1];
672 alias[j - 1] = itemp;
673 } else
674 done = true;
675 } else
676 done = true;
677 }
678 i++;
679 done = (done || i >= sites);
680 }
681 } /* dnadist_sitescrunch */
682
683
makeweights()684 void makeweights()
685 {
686 /* make up weights vector to avoid duplicate computations */
687 long i;
688
689 for (i = 1; i <= sites; i++) {
690 alias[i - 1] = i;
691 ally[i - 1] = i;
692 location[i - 1] = 0;
693 weight[i - 1] = 0;
694 }
695 dnadist_sitesort();
696 dnadist_sitecombine();
697 dnadist_sitescrunch();
698 endsite = 0;
699 for (i = 1; i <= sites; i++) {
700 if (ally[i - 1] == i)
701 endsite++;
702 }
703 for (i = 1; i <= endsite; i++)
704 location[alias[i - 1] - 1] = i;
705 weightsum = 0;
706 for (i = 0; i < sites; i++)
707 weightsum += oldweight[i];
708 sumrates = 0.0;
709 for (i = 0; i < sites; i++)
710 sumrates += oldweight[i] * rate[category[i] - 1];
711 for (i = 0; i < categs; i++)
712 rate[i] *= weightsum / sumrates;
713 for (i = 0; i < sites; i++)
714 if (location[ally[i] - 1] > 0)
715 weight[location[ally[i] - 1] - 1] += oldweight[i];
716 } /* makeweights */
717
718
dnadist_makevalues()719 void dnadist_makevalues()
720 {
721 /* set up fractional likelihoods at tips */
722 long i, j, k;
723 bases b;
724
725 for (i = 0; i < spp; i++) {
726 nodep[i]->x = (phenotype)Malloc(endsite*sizeof(ratelike));
727 for (j = 0; j < endsite; j++)
728 nodep[i]->x[j] = (ratelike)Malloc(rcategs*sizeof(sitelike));
729 }
730 for (k = 0; k < endsite; k++) {
731 j = alias[k];
732 for (i = 0; i < spp; i++) {
733 for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1))
734 nodep[i]->x[k][0][(long)b - (long)A] = 0.0;
735 switch (y[i][j - 1]) {
736
737 case 'A':
738 nodep[i]->x[k][0][0] = 1.0;
739 break;
740
741 case 'C':
742 nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
743 break;
744
745 case 'G':
746 nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
747 break;
748
749 case 'T':
750 nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
751 break;
752
753 case 'U':
754 nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
755 break;
756
757 case 'M':
758 nodep[i]->x[k][0][0] = 1.0;
759 nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
760 break;
761
762 case 'R':
763 nodep[i]->x[k][0][0] = 1.0;
764 nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
765 break;
766
767 case 'W':
768 nodep[i]->x[k][0][0] = 1.0;
769 nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
770 break;
771
772 case 'S':
773 nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
774 nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
775 break;
776
777 case 'Y':
778 nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
779 nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
780 break;
781
782 case 'K':
783 nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
784 nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
785 break;
786
787 case 'B':
788 nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
789 nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
790 nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
791 break;
792
793 case 'D':
794 nodep[i]->x[k][0][0] = 1.0;
795 nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
796 nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
797 break;
798
799 case 'H':
800 nodep[i]->x[k][0][0] = 1.0;
801 nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
802 nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
803 break;
804
805 case 'V':
806 nodep[i]->x[k][0][0] = 1.0;
807 nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
808 nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
809 break;
810
811 case 'N':
812 for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1))
813 nodep[i]->x[k][0][(long)b - (long)A] = 1.0;
814 break;
815
816 case 'X':
817 for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1))
818 nodep[i]->x[k][0][(long)b - (long)A] = 1.0;
819 break;
820
821 case '?':
822 for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1))
823 nodep[i]->x[k][0][(long)b - (long)A] = 1.0;
824 break;
825
826 case 'O':
827 for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1))
828 nodep[i]->x[k][0][(long)b - (long)A] = 1.0;
829 break;
830
831 case '-':
832 for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1))
833 nodep[i]->x[k][0][(long)b - (long)A] = 1.0;
834 break;
835 }
836 }
837 }
838 } /* dnadist_makevalues */
839
840
dnadist_empiricalfreqs()841 void dnadist_empiricalfreqs()
842 {
843 /* Get empirical base frequencies from the data */
844 long i, j, k;
845 double sum, suma, sumc, sumg, sumt, w;
846
847 freqa = 0.25;
848 freqc = 0.25;
849 freqg = 0.25;
850 freqt = 0.25;
851 for (k = 1; k <= 8; k++) {
852 suma = 0.0;
853 sumc = 0.0;
854 sumg = 0.0;
855 sumt = 0.0;
856 for (i = 0; i < spp; i++) {
857 for (j = 0; j < endsite; j++) {
858 w = weight[j];
859 sum = freqa * nodep[i]->x[j][0][0];
860 sum += freqc * nodep[i]->x[j][0][(long)C - (long)A];
861 sum += freqg * nodep[i]->x[j][0][(long)G - (long)A];
862 sum += freqt * nodep[i]->x[j][0][(long)T - (long)A];
863 suma += w * freqa * nodep[i]->x[j][0][0] / sum;
864 sumc += w * freqc * nodep[i]->x[j][0][(long)C - (long)A] / sum;
865 sumg += w * freqg * nodep[i]->x[j][0][(long)G - (long)A] / sum;
866 sumt += w * freqt * nodep[i]->x[j][0][(long)T - (long)A] / sum;
867 }
868 }
869 sum = suma + sumc + sumg + sumt;
870 freqa = suma / sum;
871 freqc = sumc / sum;
872 freqg = sumg / sum;
873 freqt = sumt / sum;
874 }
875 } /* dnadist_empiricalfreqs */
876
877
getinput()878 void getinput()
879 {
880 /* reads the input data */
881 inputoptions();
882 if ((!freqsfrom) && !logdet && !similarity) {
883 if (kimura || jukes) {
884 freqa = 0.25;
885 freqc = 0.25;
886 freqg = 0.25;
887 freqt = 0.25;
888 }
889 getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy,
890 &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange,
891 freqsfrom, printdata);
892 if (freqa < 0.00000001) {
893 freqa = 0.000001;
894 freqc = 0.999999*freqc;
895 freqg = 0.999999*freqg;
896 freqt = 0.999999*freqt;
897 }
898 if (freqc < 0.00000001) {
899 freqa = 0.999999*freqa;
900 freqc = 0.000001;
901 freqg = 0.999999*freqg;
902 freqt = 0.999999*freqt;
903 }
904 if (freqg < 0.00000001) {
905 freqa = 0.999999*freqa;
906 freqc = 0.999999*freqc;
907 freqg = 0.000001;
908 freqt = 0.999999*freqt;
909 }
910 if (freqt < 0.00000001) {
911 freqa = 0.999999*freqa;
912 freqc = 0.999999*freqc;
913 freqg = 0.999999*freqg;
914 freqt = 0.000001;
915 }
916 }
917 if (!justwts || firstset)
918 inputdata(sites);
919 makeweights();
920 dnadist_makevalues();
921 if (freqsfrom) {
922 dnadist_empiricalfreqs();
923 getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy,
924 &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange,
925 freqsfrom, printdata);
926 }
927 } /* getinput */
928
929
inittable()930 void inittable()
931 {
932 /* Define a lookup table. Precompute values and store in a table */
933 long i;
934
935 for (i = 0; i < categs; i++) {
936 tbl[i].rat = rate[i];
937 tbl[i].ratxv = rate[i] * xv;
938 }
939 } /* inittable */
940
941
lndet(double (* a)[4])942 double lndet(double (*a)[4])
943 {
944 long i, j, k;
945 double temp, ld;
946
947 /*Gauss-Jordan reduction -- invert matrix a in place,
948 overwriting previous contents of a. On exit, matrix a
949 contains the inverse, lndet contains the log of the determinant */
950 ld = 1.0;
951 for (i = 0; i < 4; i++) {
952 ld *= a[i][i];
953 temp = 1.0 / a[i][i];
954 a[i][i] = 1.0;
955 for (j = 0; j < 4; j++)
956 a[i][j] *= temp;
957 for (j = 0; j < 4; j++) {
958 if (j != i) {
959 temp = a[j][i];
960 a[j][i] = 0.0;
961 for (k = 0; k < 4; k++)
962 a[j][k] -= temp * a[i][k];
963 }
964 }
965 }
966 if (ld <= 0.0)
967 return(99.0);
968 else
969 return(log(ld));
970 } /* lndet */
971
972
makev(long m,long n,double * v)973 void makev(long m, long n, double *v)
974 {
975 /* compute one distance */
976 long i, j, k, l, it, num1, num2, idx;
977 long numerator = 0, denominator = 0;
978 double sum, sum1, sum2, sumyr, lz, aa, bb, cc, vv=0,
979 p1, p2, p3, q1, q2, q3, tt, delta = 0, slope,
980 xx1freqa, xx1freqc, xx1freqg, xx1freqt;
981 double *prod, *prod2, *prod3;
982 boolean quick, jukesquick, kimquick, logdetquick, overlap;
983 bases b;
984 node *p, *q;
985 sitelike xx1, xx2;
986 double basetable[4][4]; /* for quick logdet */
987 double basefreq1[4], basefreq2[4];
988
989 p = nodep[m - 1];
990 q = nodep[n - 1];
991
992 /* check for overlap between sequences */
993 overlap = false;
994 for(i=0 ; i < sites ; i++){
995 if((strchr("NX?O-",y[m-1][i])==NULL) &&
996 (strchr("NX?O-",y[n-1][i])==NULL)){
997 overlap = true;
998 break;
999 }
1000 }
1001 if(!overlap){
1002 printf("\nWARNING: NO OVERLAP BETWEEN SEQUENCES %ld AND %ld; -1.0 WAS WRITTEN\n", m, n);
1003 baddists = true;
1004 return;
1005 }
1006
1007 quick = (!ctgry || categs == 1);
1008 if (jukes || kimura || logdet || similarity) {
1009 numerator = 0;
1010 denominator = 0;
1011 for (i = 0; i < endsite; i++) {
1012 memcpy(xx1, p->x[i][0], sizeof(sitelike));
1013 memcpy(xx2, q->x[i][0], sizeof(sitelike));
1014 sum = 0.0;
1015 sum1 = 0.0;
1016 sum2 = 0.0;
1017 for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) {
1018 sum1 += xx1[(long)b - (long)A];
1019 sum2 += xx2[(long)b - (long)A];
1020 sum += xx1[(long)b - (long)A] * xx2[(long)b - (long)A];
1021 }
1022 quick = (quick && (sum1 == 1.0 || sum1 == 4.0) &&
1023 (sum2 == 1.0 || sum2 == 4.0));
1024 if (sum1 == 1.0 && sum2 == 1.0) {
1025 numerator += (long)(weight[i] * sum);
1026 denominator += weight[i];
1027 }
1028 }
1029 }
1030 jukesquick = ((jukes || similarity) && quick);
1031 kimquick = (kimura && quick);
1032 logdetquick = (logdet && quick);
1033 if (logdet && !quick) {
1034 printf(" WARNING: CANNOT CALCULATE LOGDET DISTANCE\n");
1035 printf(" WITH PRESENT PROGRAM IF PARTIALLY AMBIGUOUS NUCLEOTIDES\n");
1036 printf(" -1.0 WAS WRITTEN\n");
1037 baddists = true;
1038 }
1039 if (jukesquick && jukes && (numerator * 4 <= denominator)) {
1040 printf("\nWARNING: INFINITE DISTANCE BETWEEN ");
1041 printf(" SPECIES %3ld AND %3ld\n", m, n);
1042 printf(" -1.0 WAS WRITTEN\n");
1043 baddists = true;
1044 }
1045 if (jukesquick && invar
1046 && (4 * (((double)numerator / denominator) - invarfrac)
1047 <= (1.0 - invarfrac))) {
1048 printf("\nWARNING: DIFFERENCE BETWEEN SPECIES %3ld AND %3ld", m, n);
1049 printf(" TOO LARGE FOR INVARIABLE SITES\n");
1050 printf(" -1.0 WAS WRITTEN\n");
1051 baddists = true;
1052 }
1053 if (jukesquick) {
1054 if (!gama && !invar)
1055 vv = -0.75 * log((4.0*((double)numerator / denominator) - 1.0) / 3.0);
1056 else if (!invar)
1057 vv = 0.75 * cvi * (exp(-(1/cvi)*
1058 log((4.0 * ((double)numerator / denominator) - 1.0) / 3.0)) - 1.0);
1059 else
1060 vv = 0.75 * cvi * (exp(-(1/cvi)*
1061 log((4.0 * ((double)numerator / denominator - invarfrac)/
1062 (1.0-invarfrac) - 1.0) / 3.0)) - 1.0);
1063 }
1064 if (kimquick) {
1065 num1 = 0;
1066 num2 = 0;
1067 denominator = 0;
1068 for (i = 0; i < endsite; i++) {
1069 memcpy(xx1, p->x[i][0], sizeof(sitelike));
1070 memcpy(xx2, q->x[i][0], sizeof(sitelike));
1071 sum = 0.0;
1072 sum1 = 0.0;
1073 sum2 = 0.0;
1074 for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) {
1075 sum1 += xx1[(long)b - (long)A];
1076 sum2 += xx2[(long)b - (long)A];
1077 sum += xx1[(long)b - (long)A] * xx2[(long)b - (long)A];
1078 }
1079 sumyr = (xx1[0] + xx1[(long)G - (long)A])
1080 * (xx2[0] + xx2[(long)G - (long)A]) +
1081 (xx1[(long)C - (long)A] + xx1[(long)T - (long)A]) *
1082 (xx2[(long)C - (long)A] + xx2[(long)T - (long)A]);
1083 if (sum1 == 1.0 && sum2 == 1.0) {
1084 num1 += (long)(weight[i] * sum);
1085 num2 += (long)(weight[i] * (sumyr - sum));
1086 denominator += weight[i];
1087 }
1088 }
1089 tt = ((1.0 - (double)num1 / denominator)-invarfrac)/(1.0-invarfrac);
1090 if (tt > 0.0) {
1091 delta = 0.1;
1092 tt = delta;
1093 it = 0;
1094 while (fabs(delta) > 0.0000002 && it < iterationsd) {
1095 it++;
1096 if (!gama) {
1097 p1 = exp(-tt);
1098 p2 = exp(-xv * tt) - exp(-tt);
1099 p3 = 1.0 - exp(-xv * tt);
1100 } else {
1101 p1 = exp(-cvi * log(1 + tt / cvi));
1102 p2 = exp(-cvi * log(1 + xv * tt / cvi))
1103 - exp(-cvi * log(1 + tt / cvi));
1104 p3 = 1.0 - exp(-cvi * log(1 + xv * tt / cvi));
1105 }
1106 q1 = p1 + p2 / 2.0 + p3 / 4.0;
1107 q2 = p2 / 2.0 + p3 / 4.0;
1108 q3 = p3 / 2.0;
1109 q1 = q1 * (1.0-invarfrac) + invarfrac;
1110 q2 *= (1.0 - invarfrac);
1111 q3 *= (1.0 - invarfrac);
1112 if (!gama && !invar)
1113 slope = 0.5 * exp(-tt) * (num2 / q2 - num1 / q1) +
1114 0.25 * xv * exp(-xv * tt) *
1115 ((denominator - num1 - num2) * 2 / q3 - num2 / q2 - num1 / q1);
1116 else
1117 slope = 0.5 * (1 / (1 + tt / cvi)) * exp(-cvi * log(1 + tt / cvi)) *
1118 (num2 / q2 - num1 / q1) + 0.25 * (xv / (1 + xv * tt / cvi)) *
1119 exp(-cvi * log(1 + xv * tt / cvi)) *
1120 ((denominator - num1 - num2) * 2 / q3 - num2 / q2 - num1 / q1);
1121 slope *= (1.0-invarfrac);
1122 if (slope < 0.0)
1123 delta = fabs(delta) / -2.0;
1124 else
1125 delta = fabs(delta);
1126 tt += delta;
1127 }
1128 }
1129 if ((delta >= 0.1) && (!similarity)) {
1130 printf("\nWARNING: DIFFERENCE BETWEEN SPECIES %3ld AND %3ld", m, n);
1131 if (invar)
1132 printf(" TOO LARGE FOR INVARIABLE SITES\n");
1133 else
1134 printf(" TOO LARGE TO ESTIMATE DISTANCE\n");
1135 printf(" -1.0 WAS WRITTEN\n");
1136 baddists = true;
1137 }
1138 vv = fracchange * tt;
1139 }
1140 if (!(jukesquick || kimquick || logdet)) {
1141 prod = (double *)Malloc(sites*sizeof(double));
1142 prod2 = (double *)Malloc(sites*sizeof(double));
1143 prod3 = (double *)Malloc(sites*sizeof(double));
1144 for (i = 0; i < endsite; i++) {
1145 memcpy(xx1, p->x[i][0], sizeof(sitelike));
1146 memcpy(xx2, q->x[i][0], sizeof(sitelike));
1147 xx1freqa = xx1[0] * freqa;
1148 xx1freqc = xx1[(long)C - (long)A] * freqc;
1149 xx1freqg = xx1[(long)G - (long)A] * freqg;
1150 xx1freqt = xx1[(long)T - (long)A] * freqt;
1151 sum1 = xx1freqa + xx1freqc + xx1freqg + xx1freqt;
1152 sum2 = freqa * xx2[0] + freqc * xx2[(long)C - (long)A] +
1153 freqg * xx2[(long)G - (long)A] + freqt * xx2[(long)T - (long)A];
1154 prod[i] = sum1 * sum2;
1155 prod2[i] = (xx1freqa + xx1freqg) *
1156 (xx2[0] * freqar + xx2[(long)G - (long)A] * freqgr) +
1157 (xx1freqc + xx1freqt) *
1158 (xx2[(long)C - (long)A] * freqcy + xx2[(long)T - (long)A] * freqty);
1159 prod3[i] = xx1freqa * xx2[0] + xx1freqc * xx2[(long)C - (long)A] +
1160 xx1freqg * xx2[(long)G - (long)A] + xx1freqt * xx2[(long)T - (long)A];
1161 }
1162 tt = 0.1;
1163 delta = 0.1;
1164 it = 1;
1165 while (it < iterationsd && fabs(delta) > 0.0000002) {
1166 slope = 0.0;
1167 if (tt > 0.0) {
1168 lz = -tt;
1169 for (i = 0; i < categs; i++) {
1170 if (!gama) {
1171 tbl[i].z1 = exp(tbl[i].ratxv * lz);
1172 tbl[i].z1zz = exp(tbl[i].rat * lz);
1173 }
1174 else {
1175 tbl[i].z1 = exp(-cvi*log(1.0-tbl[i].ratxv * lz/cvi));
1176 tbl[i].z1zz = exp(-cvi*log(1.0-tbl[i].rat * lz/cvi));
1177 }
1178 tbl[i].y1 = 1.0 - tbl[i].z1;
1179 tbl[i].z1yy = tbl[i].z1 - tbl[i].z1zz;
1180 tbl[i].z1xv = tbl[i].z1 * xv;
1181 }
1182 for (i = 0; i < endsite; i++) {
1183 idx = category[alias[i] - 1];
1184 cc = prod[i];
1185 bb = prod2[i];
1186 aa = prod3[i];
1187 if (!gama && !invar)
1188 slope += weightrat[i] * (tbl[idx - 1].z1zz * (bb - aa) +
1189 tbl[idx - 1].z1xv * (cc - bb)) /
1190 (aa * tbl[idx - 1].z1zz + bb * tbl[idx - 1].z1yy +
1191 cc * tbl[idx - 1].y1);
1192 else
1193 slope += (1.0-invarfrac) * weightrat[i] * (
1194 ((tbl[idx-1].rat)/(1.0-tbl[idx-1].rat * lz/cvi))
1195 * tbl[idx - 1].z1zz * (bb - aa) +
1196 ((tbl[idx-1].ratxv)/(1.0-tbl[idx-1].ratxv * lz/cvi))
1197 * tbl[idx - 1].z1 * (cc - bb)) /
1198 (aa * ((1.0-invarfrac)*tbl[idx - 1].z1zz + invarfrac)
1199 + bb * (1.0-invarfrac)*tbl[idx - 1].z1yy
1200 + cc * (1.0-invarfrac)*tbl[idx - 1].y1);
1201 }
1202 }
1203 if (slope < 0.0)
1204 delta = fabs(delta) / -2.0;
1205 else
1206 delta = fabs(delta);
1207 tt += delta;
1208 it++;
1209 }
1210 if ((delta >= 0.1) && (!similarity)) {
1211 printf("\nWARNING: DIFFERENCE BETWEEN SPECIES %3ld AND %3ld", m, n);
1212 if (invar)
1213 printf(" TOO LARGE FOR INVARIABLE SITES\n");
1214 else
1215 printf(" TOO LARGE TO ESTIMATE DISTANCE\n");
1216 printf(" -1.0 WAS WRITTEN\n");
1217 baddists = true;
1218 }
1219 vv = tt * fracchange;
1220 free(prod);
1221 free(prod2);
1222 free(prod3);
1223 }
1224 if (logdetquick) { /* compute logdet when no ambiguous nucleotides */
1225 for (i = 0; i < 4; i++) {
1226 basefreq1[i] = 0.0;
1227 basefreq2[i] = 0.0;
1228 for (j = 0; j < 4; j++)
1229 basetable[i][j] = 0.0;
1230 }
1231 for (i = 0; i < endsite; i++) {
1232 k = 0;
1233 while (p->x[i][0][k] == 0.0)
1234 k++;
1235 basefreq1[k] += weight[i];
1236 l = 0;
1237 while (q->x[i][0][l] == 0.0)
1238 l++;
1239 basefreq2[l] += weight[i];
1240 basetable[k][l] += weight[i];
1241 }
1242 vv = lndet(basetable);
1243 if (vv == 99.0) {
1244 printf("\nNegative or zero determinant for distance between species");
1245 printf(" %ld and %ld\n", m, n);
1246 printf(" -1.0 WAS WRITTEN\n");
1247 baddists = true;
1248 }
1249 vv = -0.25*(vv - 0.5*(log(basefreq1[0])+log(basefreq1[1])
1250 +log(basefreq1[2])+log(basefreq1[3])
1251 +log(basefreq2[0])+log(basefreq2[1])
1252 +log(basefreq2[2])+log(basefreq2[3])));
1253 }
1254 if (similarity) {
1255 if (denominator < 1.0) {
1256 printf("\nWARNING: SPECIES %3ld AND %3ld HAVE NO BASES THAT", m, n);
1257 printf(" CAN BE COMPARED\n");
1258 printf(" -1.0 WAS WRITTEN\n");
1259 baddists = true;
1260 }
1261 vv = (double)numerator / denominator;
1262 }
1263 *v = vv;
1264 } /* makev */
1265
1266
makedists()1267 void makedists()
1268 {
1269 /* compute distance matrix */
1270 long i, j;
1271 double v;
1272
1273 inittable();
1274 for (i = 0; i < endsite; i++)
1275 weightrat[i] = weight[i] * rate[category[alias[i] - 1] - 1];
1276 if (progress) {
1277 printf("Distances calculated for species\n");
1278 #ifdef WIN32
1279 phyFillScreenColor();
1280 #endif
1281 }
1282 for (i = 0; i < spp; i++)
1283 if (similarity)
1284 d[i][i] = 1.0;
1285 else
1286 d[i][i] = 0.0;
1287 baddists = false;
1288
1289 float cur_prog = 0;
1290 int total = (spp*spp / 2) + 1;
1291 float step = (1.0f / total) * 100.0f;
1292
1293 for (i = 1; i < spp; i++) {
1294 if (progress) {
1295 printf(" ");
1296 for (j = 0; j < nmlngth; j++)
1297 putchar(nayme[i - 1][j]);
1298 printf(" ");
1299 }
1300 for (j = i + 1; j <= spp; j++) {
1301 makev(i, j, &v);
1302 v = fabs(v);
1303 if ( baddists == true ) {
1304 v = -1;
1305 baddists = false;
1306 }
1307 d[i - 1][j - 1] = v;
1308 d[j - 1][i - 1] = v;
1309
1310 U2::TaskStateInfo* ts = U2::getTaskInfo();
1311 if (ts->cancelFlag != 0) {
1312 ugene_exit("Task canceled!");
1313 } else if(!U2::isBootstr()){
1314 cur_prog += step;
1315 ts->progress = int (cur_prog);
1316 }
1317
1318 if (progress) {
1319 putchar('.');
1320 fflush(stdout);
1321 }
1322 }
1323 if (progress) {
1324 putchar('\n');
1325 #ifdef WIN32
1326 phyFillScreenColor();
1327 #endif
1328 }
1329 }
1330 if (progress) {
1331 printf(" ");
1332 for (j = 0; j < nmlngth; j++)
1333 putchar(nayme[spp - 1][j]);
1334 putchar('\n');
1335 }
1336 } /* makedists */
1337
1338
writedists()1339 void writedists()
1340 {
1341 /* write out distances */
1342 char **names;
1343
1344 names = stringnames_new();
1345 output_matrix_d(outfile, d, spp, spp, names, names, matrix_flags);
1346 stringnames_delete(names);
1347
1348 if (progress)
1349 printf("\nDistances written to file \"%s\"\n\n", outfilename);
1350 } /* writedists */
1351
1352
1353 // int main(int argc, Char *argv[])
1354 // { /* DNA Distances by Maximum Likelihood */
1355 // #ifdef MAC
1356 // argc = 1; /* macsetup("Dnadist",""); */
1357 // argv[0] = "Dnadist";
1358 // #endif
1359 // init(argc, argv);
1360 // openfile(&infile,INFILE,"input file","r",argv[0],infilename);
1361 // openfile(&outfile,OUTFILE,"output file","w",argv[0],outfilename);
1362 //
1363 // ibmpc = IBMCRT;
1364 // ansi = ANSICRT;
1365 // mulsets = false;
1366 // datasets = 1;
1367 // firstset = true;
1368 // doinit();
1369 // ttratio0 = ttratio;
1370 // if (ctgry)
1371 // openfile(&catfile,CATFILE,"categories file","r",argv[0],catfilename);
1372 // if (weights || justwts)
1373 // openfile(&weightfile,WEIGHTFILE,"weights file","r",argv[0],weightfilename);
1374 // for (ith = 1; ith <= datasets; ith++) {
1375 // ttratio = ttratio0;
1376 // getinput();
1377 // if (ith == 1)
1378 // firstset = false;
1379 // if (datasets > 1 && progress)
1380 // printf("Data set # %ld:\n\n",ith);
1381 // makedists();
1382 // writedists();
1383 // }
1384 // FClose(infile);
1385 // FClose(outfile);
1386 // #ifdef MAC
1387 // fixmacfile(outfilename);
1388 // #endif
1389 // printf("Done.\n\n");
1390 // #ifdef WIN32
1391 // phyRestoreConsoleAttributes();
1392 // #endif
1393 // return 0;
1394 // } /* DNA Distances by Maximum Likelihood */
1395
1396
1397
1398
1399