1 #include <U2Core/disable-warnings.h>
2 U2_DISABLE_WARNINGS
3 
4 //#include "phylip.h"
5 //#include "seq.h"
6 #include "dnadist.h"
7 /* version 3.696.
8 Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe.
9 
10 Copyright (c) 1993-2014, Joseph Felsenstein
11 All rights reserved.
12 
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16 1. Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18 
19 2. Redistributions in binary form must reproduce the above copyright notice,
20 this list of conditions and the following disclaimer in the documentation
21 and/or other materials provided with the distribution.
22 
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 POSSIBILITY OF SUCH DAMAGE.
34 */
35 
36 #include <U2Algorithm/CreatePhyTreeSettings.h>
37 #include <U2Core/Task.h>
38 
39 QString DNADistModelTypes::F84("F84");
40 QString DNADistModelTypes::Kimura("Kimura");
41 QString DNADistModelTypes::JukesCantor("Jukes-Cantor");
42 QString DNADistModelTypes::LogDet("LogDet");
43 static U2::CreatePhyTreeSettings dnaDistSettings;
44 
setDNADistSettings(const CreatePhyTreeSettings & settings)45 void U2::setDNADistSettings( const CreatePhyTreeSettings& settings )
46 {
47     dnaDistSettings = settings;
48 
49 }
50 
getDNADistSettings()51 const U2::CreatePhyTreeSettings& getDNADistSettings()
52 {
53     //TODO: use mutex here later
54     return dnaDistSettings;
55 }
56 
57 
58 
59 Phylip_Char infilename[FNMLNGTH], outfilename[FNMLNGTH], catfilename[FNMLNGTH], weightfilename[FNMLNGTH];
60 long sites, categs, weightsum, datasets, ith, rcategs;
61 boolean freqsfrom, jukes, kimura, logdet, gama, invar, similarity, lower, f84,
62 weights, progress, ctgry, mulsets, justwts, firstset, baddists;
63 boolean matrix_flags;       /* Matrix output format */
64 node **nodep = NULL;
65 double xi, xv, ttratio, ttratio0, freqa, freqc, freqg, freqt, freqr, freqy,
66 freqar, freqcy, freqgr, freqty, cvi, invarfrac, sumrates, fracchange;
67 steptr oldweight = NULL;
68 double rate[maxcategs];
69 double **d = NULL;
70 double sumweightrat = 0;                  /* these values were propagated  */
71 double *weightrat = NULL;                    /* to global values from         */
72 valrec tbl[maxcategs];                /* function makedists.           */
73 
74 
75 
getDNADistModelTypes()76 QList<QString> DNADistModelTypes::getDNADistModelTypes()
77 {
78     static QList<QString> list;
79     if (list.isEmpty()) {
80         list.append(DNADistModelTypes::F84);
81         list.append(DNADistModelTypes::Kimura);
82         list.append(DNADistModelTypes::JukesCantor);
83         list.append(DNADistModelTypes::LogDet);
84     }
85 
86     return list;
87 }
88 
89 
getoptions()90 void getoptions()
91 {
92   /* interactively set options */
93   long loopcount, loopcount2;
94   Phylip_Char ch, ch2;
95   boolean ttr;
96   char *str = NULL;
97 
98   ctgry = false;
99   categs = 1;
100   cvi = 1.0;
101   rcategs = 1;
102   rate[0] = 1.0;
103   freqsfrom = true;
104   gama = false;
105   invar = false;
106   invarfrac = 0.0;
107   jukes = false;
108   justwts = false;
109   kimura = false;
110   logdet = false;
111   f84 = true;
112   lower = false;
113   matrix_flags = MAT_MACHINE;
114   similarity = false;
115   ttratio = 2.0;
116   ttr = false;
117   weights = false;
118   printdata = false;
119   dotdiff = true;
120   progress = false;
121   interleaved = true;
122   loopcount = 0;
123 
124   QString matrixModel = getDNADistSettings().matrixId;
125   if (matrixModel == DNADistModelTypes::F84) {
126       f84 = true;
127       kimura = false;
128       jukes = false;
129       freqsfrom = true;
130       logdet = false;
131       ttratio = getDNADistSettings().ttRatio;
132       ttr = true;
133   } else if (matrixModel == DNADistModelTypes::Kimura) {
134       f84 = false;
135       kimura = true;
136       jukes = false;
137       freqsfrom = false;
138       logdet = false;
139       ttratio = getDNADistSettings().ttRatio;
140       ttr = true;
141   } else if (matrixModel == DNADistModelTypes::JukesCantor) {
142       f84 = false;
143       kimura = false;
144       jukes = true;
145       freqsfrom = false;
146       logdet = false;
147   } else if (matrixModel == DNADistModelTypes::LogDet) {
148       f84 = false;
149       kimura = false;
150       jukes = false;
151       freqsfrom = false;
152       logdet = true;
153   }
154 
155   gama = getDNADistSettings().useGammaDistributionRates;
156   if (gama) {
157       cvi = getDNADistSettings().alphaFactor;
158       cvi = 1.0 / (cvi * cvi);
159   }
160 
161   /*for (;;) {
162     cleerhome();
163     printf("\nNucleic acid sequence Distance Matrix program,");
164     printf(" version %s\n\n",PHY_VERSION);
165     printf("Settings for this run:\n");
166     printf("  D  Distance (F84, Kimura, Jukes-Cantor, LogDet)?  %s\n",
167            kimura ? "Kimura 2-parameter" :
168            jukes  ? "Jukes-Cantor"       :
169            logdet ? "LogDet"             :
170            similarity ? "Similarity table" : "F84");
171     if (kimura || f84 || jukes) {
172       printf("  G          Gamma distributed rates across sites?  ");
173       if (gama)
174         printf("Yes\n");
175       else {
176         if (invar)
177           printf("Gamma+Invariant\n");
178         else
179           printf("No\n");
180       }
181     }
182     if (kimura || f84) {
183       printf("  T                 Transition/transversion ratio?");
184       if (!ttr)
185         printf("  2.0\n");
186       else
187         printf("%8.4f\n", ttratio);
188     }
189     if (!logdet && !similarity && !gama && !invar) {
190       printf("  C            One category of substitution rates?");
191       if (!ctgry || categs == 1)
192         printf("  Yes\n");
193       else
194         printf("  %ld categories\n", categs);
195     }
196     printf("  W                         Use weights for sites?");
197     if (weights)
198       printf("  Yes\n");
199     else
200       printf("  No\n");
201     if (f84)
202       printf("  F                Use empirical base frequencies?  %s\n",
203              (freqsfrom ? "Yes" : "No"));
204     printf("  L                       Form of distance matrix?  ");
205     switch (matrix_flags) {
206       case MAT_MACHINE:
207         str = "Square";
208         break;
209       case MAT_LOWERTRI:
210         str = "Lower-triangular";
211         break;
212       case MAT_HUMAN:
213         str = "Human-readable";
214         break;
215       default:
216         assert( 0 );
217         str = "(unknown)";
218     }
219     puts(str);
220     printf("  M                    Analyze multiple data sets?");
221     if (mulsets)
222       printf("  Yes, %2ld %s\n", datasets,
223                (justwts ? "sets of weights" : "data sets"));
224     else
225       printf("  No\n");
226     printf("  I                   Input sequences interleaved?  %s\n",
227            (interleaved ? "Yes" : "No, sequential"));
228     printf("  0            Terminal type (IBM PC, ANSI, none)?  %s\n",
229            ibmpc ? "IBM PC" : ansi  ? "ANSI"   : "(none)");
230     printf("  1             Print out the data at start of run  %s\n",
231            (printdata ? "Yes" : "No"));
232     printf("  2           Print indications of progress of run  %s\n",
233            (progress ? "Yes" : "No"));
234     printf("\n  Y to accept these or type the letter for one to change\n");
235 #ifdef WIN32
236     phyFillScreenColor();
237 #endif
238     fflush(stdout);
239     scanf("%c%*[^\n]", &ch);
240     getchar();
241     uppercase(&ch);
242     if  (ch == 'Y')
243            break;
244     if ((f84 && (strchr("CFGWLDTMI012",ch) != NULL)) ||
245         (kimura && (strchr("CGWLDTMI012",ch) != NULL)) ||
246         (jukes && (strchr("CGWLDMI012",ch) != NULL)) ||
247         ((logdet || similarity) && (strchr("WLDMI012",ch)) != NULL) ||
248         (ctgry && (strchr("CFWLDTMI012",ch) != NULL))) {
249      switch (ch) {
250 
251      case 'D':
252        if (kimura) {
253          kimura = false;
254          jukes = true;
255          freqsfrom = false;
256        } else if (f84) {
257          f84 = false;
258          kimura = true;
259          freqsfrom = false;
260        } else if (logdet) {
261          logdet = false;
262          similarity = true;
263        } else if (similarity) {
264          similarity = false;
265          f84 = true;
266          freqsfrom = true;
267        } else {
268          jukes = false;
269          logdet = true;
270          freqsfrom = false;
271        }
272        break;
273 
274      case 'G':
275        if (!(gama || invar))
276          gama = true;
277        else {
278          if (gama) {
279            gama = false;
280            invar = true;
281          } else {
282            if (invar)
283              invar = false;
284          }
285        }
286        break;
287 
288 
289      case 'C':
290        ctgry = !ctgry;
291        if (ctgry) {
292          initcatn(&categs);
293          initcategs(categs, rate);
294        }
295        break;
296 
297      case 'F':
298        freqsfrom = !freqsfrom;
299        if (!freqsfrom)
300          initfreqs(&freqa, &freqc, &freqg, &freqt);
301        break;
302 
303      case 'W':
304        weights = !weights;
305        break;
306 
307      case 'L':
308        switch ( matrix_flags ) {
309          case MAT_HUMAN:
310            matrix_flags = MAT_MACHINE;
311            break;
312          case MAT_LOWERTRI:
313            matrix_flags = MAT_HUMAN;
314            break;
315          case MAT_MACHINE:
316            matrix_flags = MAT_LOWERTRI;
317            break;
318          default:
319            assert(0);
320            matrix_flags = MAT_HUMAN;
321        }
322        break;
323 
324      case 'T':
325        ttr = !ttr;
326        if (ttr)
327         initratio(&ttratio);
328        break;
329 
330      case 'M':
331        mulsets = !mulsets;
332        if (mulsets) {
333           printf("Multiple data sets or multiple weights?");
334           loopcount2 = 0;
335           do {
336             printf(" (type D or W)\n");
337 #ifdef WIN32
338             phyFillScreenColor();
339 #endif
340             fflush(stdout);
341             scanf("%c%*[^\n]", &ch2);
342             uppercase(&ch2);
343             getchar();
344             countup(&loopcount2, 10);
345           } while ((ch2 != 'W') && (ch2 != 'D'));
346           justwts = (ch2 == 'W');
347           if (justwts)
348             justweights(&datasets);
349           else
350             initdatasets(&datasets);
351         }
352        break;
353 
354      case 'I':
355        interleaved = !interleaved;
356        break;
357 
358      case '0':
359         initterminal(&ibmpc, &ansi);
360        break;
361 
362      case '1':
363        printdata = !printdata;
364        break;
365 
366      case '2':
367        progress = !progress;
368        break;
369      }
370    } else {
371       if (strchr("CFGWLDTMI012",ch) == NULL)
372         printf("Not a possible option!\n");
373       else
374         printf("That option not allowed with these settings\n");
375       printf("\nPress Enter or Return key to continue\n");
376       getchar();
377     }
378     //countup(&loopcount, 100);
379   }
380 
381   if (similarity) {
382     if (matrix_flags == MAT_MACHINE)
383       matrix_flags = MAT_HUMAN;
384     else if (matrix_flags == MAT_LOWERTRI)
385       matrix_flags = MAT_LOWER | MAT_HUMAN;
386   }
387 
388   if (gama || invar) {
389     loopcount = 0;
390     do {
391       printf(
392 "\nCoefficient of variation of substitution rate among sites (must be positive)\n");
393       printf(
394   " In gamma distribution parameters, this is 1/(square root of alpha)\n");
395 #ifdef WIN32
396       phyFillScreenColor();
397 #endif
398       fflush(stdout);
399       scanf("%lf%*[^\n]", &cvi);
400       getchar();
401       countup(&loopcount, 10);
402     } while (cvi <= 0.0);
403     cvi = 1.0 / (cvi * cvi);
404   }
405   if (invar) {
406     loopcount = 0;
407     do {
408       printf("Fraction of invariant sites?\n");
409       fflush(stdout);
410       scanf("%lf%*[^\n]", &invarfrac);
411       getchar();
412       countup (&loopcount, 10);
413     } while ((invarfrac <= 0.0) || (invarfrac >= 1.0));
414   }
415     if (!printdata)
416     return;
417     fprintf(outfile, "\nNucleic acid sequence Distance Matrix program,");
418     fprintf(outfile, " version %s\n\n",PHY_VERSION); */
419 }  /* getoptions */
420 
421 
allocrest(U2::MemoryLocker & memLocker)422 void allocrest(U2::MemoryLocker& memLocker)
423 {
424     long i;
425 
426     qint64 memSize = spp * (sizeof(Phylip_Char *) + sizeof(node *) + sites*sizeof(Phylip_Char) + sizeof(node) + sizeof(double *) + spp*sizeof(double) + sizeof(naym));
427     memSize += sites * sizeof(long) * 7;
428     if(!memLocker.tryAcquire(memSize)) {
429           return;
430     }
431 
432     y = (Phylip_Char **)Malloc(spp*sizeof(Phylip_Char *));
433     nodep = (node **)Malloc(spp*sizeof(node *));
434     for (i = 0; i < spp; i++) {
435         y[i] = (Phylip_Char *)Malloc(sites*sizeof(Phylip_Char));
436         nodep[i] = (node *)Malloc(sizeof(node));
437     }
438     d = (double **)Malloc(spp*sizeof(double *));
439     for (i = 0; i < spp; i++)
440         d[i] = (double*)Malloc(spp*sizeof(double));
441     nayme = (naym *)Malloc(spp*sizeof(naym));
442     category = (steptr)Malloc(sites*sizeof(long));
443     oldweight = (steptr)Malloc(sites*sizeof(long));
444     weight = (steptr)Malloc(sites*sizeof(long));
445     alias = (steptr)Malloc(sites*sizeof(long));
446     ally = (steptr)Malloc(sites*sizeof(long));
447     location = (steptr)Malloc(sites*sizeof(long));
448     weightrat = (double *)Malloc(sites*sizeof(double));
449 } /* allocrest */
450 
451 
reallocsites()452 void reallocsites()
453 {/* The amount of sites can change between runs
454      this function reallocates all the variables
455      whose size depends on the amount of sites */
456   long i;
457 
458   for (i = 0; i < spp; i++) {
459     free(y[i]);
460     y[i] = (Phylip_Char *)Malloc(sites*sizeof(Phylip_Char));
461   }
462   free(category);
463   free(oldweight);
464   free(weight);
465   free(alias);
466   free(ally);
467   free(location);
468   free(weightrat);
469 
470   category = (steptr)Malloc(sites*sizeof(long));
471   oldweight = (steptr)Malloc(sites*sizeof(long));
472   weight = (steptr)Malloc(sites*sizeof(long));
473   alias = (steptr)Malloc(sites*sizeof(long));
474   ally = (steptr)Malloc(sites*sizeof(long));
475   location = (steptr)Malloc(sites*sizeof(long));
476   weightrat = (double *)Malloc(sites*sizeof(double));
477 } /* reallocsites */
478 
479 
doinit(U2::MemoryLocker & memLocker)480 void doinit(U2::MemoryLocker& memLocker)
481 {
482     /* initializes variables */
483 
484     //inputnumbers(&spp, &sites, &nonodes, 1);
485     getoptions();
486     if (printdata) {
487         fprintf(outfile, "%2ld species, %3ld  sites\n", spp, sites);
488     }
489     allocrest(memLocker);
490 }  /* doinit */
491 
492 
inputcategories()493 void inputcategories()
494 {
495   /* reads the categories for each site */
496   long i;
497   Phylip_Char ch;
498 
499   for (i = 1; i < nmlngth; i++)
500     gettc(infile);
501   for (i = 0; i < sites; i++) {
502     do {
503       if (eoln(infile))
504         scan_eoln(infile);
505       ch = gettc(infile);
506     } while (ch == ' ');
507     category[i] = ch - '0';
508   }
509   scan_eoln(infile);
510 }  /* inputcategories */
511 
512 
printcategories()513 void printcategories()
514 { /* print out list of categories of sites */
515   long i, j;
516 
517   fprintf(outfile, "Rate categories\n\n");
518   for (i = 1; i <= nmlngth + 3; i++)
519     putc(' ', outfile);
520   for (i = 1; i <= sites; i++) {
521     fprintf(outfile, "%ld", category[i - 1]);
522     if (i % 60 == 0) {
523       putc('\n', outfile);
524       for (j = 1; j <= nmlngth + 3; j++)
525         putc(' ', outfile);
526     } else if (i % 10 == 0)
527       putc(' ', outfile);
528   }
529   fprintf(outfile, "\n\n");
530 }  /* printcategories */
531 
532 
inputoptions()533 void inputoptions()
534 {
535   /* read options information */
536   long i;
537 
538   if (!firstset && !justwts) {
539     samenumsp(&sites, ith);
540     reallocsites();
541   }
542   for (i = 0; i < sites; i++) {
543     category[i] = 1;
544     oldweight[i] = 1;
545   }
546   if (justwts || weights)
547     inputweights(sites, oldweight, &weights);
548   if (printdata)
549     putc('\n', outfile);
550   if (jukes && printdata)
551     fprintf(outfile, "  Jukes-Cantor Distance\n");
552   if (kimura && printdata)
553     fprintf(outfile, "  Kimura 2-parameter Distance\n");
554   if (f84 && printdata)
555     fprintf(outfile, "  F84 Distance\n");
556   if (similarity)
557     fprintf(outfile, "  \n  Table of similarity between sequences\n");
558   if (firstset && printdata && (kimura || f84))
559     fprintf(outfile, "\nTransition/transversion ratio = %10.6f\n", ttratio);
560   if (ctgry && categs > 1) {
561     inputcategs(0, sites, category, categs, "DnaDist");
562     if (printdata)
563       printcategs(outfile, sites, category, "Site categories");
564   } else if (printdata && (categs > 1)) {
565     fprintf(outfile, "\nSite category   Rate of change\n\n");
566     for (i = 1; i <= categs; i++)
567       fprintf(outfile, "%12ld%13.3f\n", i, rate[i - 1]);
568     putc('\n', outfile);
569     printcategories();
570   }
571   if ((jukes || kimura || logdet) && freqsfrom) {
572     printf(" WARNING: CANNOT USE EMPIRICAL BASE FREQUENCIES");
573     printf(" WITH JUKES-CANTOR, KIMURA, JIN/NEI OR LOGDET DISTANCES\n");
574     exxit(-1);
575   }
576   if (jukes)
577     ttratio = 0.5000001;
578   if (weights && printdata)
579     printweights(outfile, 0, sites, oldweight, "Sites");
580 }  /* inputoptions */
581 
582 
dnadist_sitesort()583 void dnadist_sitesort()
584 {
585   /* Shell sort of sites lexicographically */
586   long gap, i, j, jj, jg, k, itemp;
587   boolean flip, tied;
588 
589   gap = sites / 2;
590   while (gap > 0) {
591     for (i = gap + 1; i <= sites; i++) {
592       j = i - gap;
593       flip = true;
594       while (j > 0 && flip) {
595         jj = alias[j - 1];
596         jg = alias[j + gap - 1];
597         tied = (oldweight[jj - 1] == oldweight[jg - 1]);
598         flip = (oldweight[jj - 1] < oldweight[jg - 1] ||
599                 (tied && category[jj - 1] > category[jg - 1]));
600         tied = (tied && category[jj - 1] == category[jg - 1]);
601         k = 1;
602         while (k <= spp && tied) {
603           flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]);
604           tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]);
605           k++;
606         }
607         if (!flip)
608           break;
609         itemp = alias[j - 1];
610         alias[j - 1] = alias[j + gap - 1];
611         alias[j + gap - 1] = itemp;
612         j -= gap;
613       }
614     }
615     gap /= 2;
616   }
617 }  /* dnadist_sitesort */
618 
619 
dnadist_sitecombine()620 void dnadist_sitecombine()
621 {
622   /* combine sites that have identical patterns */
623   long i, j, k;
624   boolean tied;
625 
626   i = 1;
627   while (i < sites) {
628     j = i + 1;
629     tied = true;
630     while (j <= sites && tied) {
631       tied = (category[alias[i - 1] - 1] == category[alias[j - 1] - 1]);
632       k = 1;
633       while (k <= spp && tied) {
634         tied = (tied &&
635             y[k - 1][alias[i - 1] - 1] == y[k - 1][alias[j - 1] - 1]);
636         k++;
637       }
638       if (!tied)
639         break;
640       ally[alias[j - 1] - 1] = alias[i - 1];
641       j++;
642     }
643     i = j;
644   }
645 }  /* dnadist_sitecombine */
646 
647 
dnadist_sitescrunch()648 void dnadist_sitescrunch()
649 {
650   /* move so one representative of each pattern of
651      sites comes first */
652   long i, j, itemp;
653   boolean done, found, completed;
654 
655   done = false;
656   i = 1;
657   j = 2;
658   while (!done) {
659     if (ally[alias[i - 1] - 1] != alias[i - 1]) {
660       if (j <= i)
661         j = i + 1;
662       if (j <= sites) {
663         do {
664           found = (ally[alias[j - 1] - 1] == alias[j - 1]);
665           j++;
666           completed = (j > sites);
667         } while (!(found || completed));
668         if (found) {
669           j--;
670           itemp = alias[i - 1];
671           alias[i - 1] = alias[j - 1];
672           alias[j - 1] = itemp;
673         } else
674           done = true;
675       } else
676         done = true;
677     }
678     i++;
679     done = (done || i >= sites);
680   }
681 }  /* dnadist_sitescrunch */
682 
683 
makeweights()684 void makeweights()
685 {
686   /* make up weights vector to avoid duplicate computations */
687   long i;
688 
689   for (i = 1; i <= sites; i++) {
690     alias[i - 1] = i;
691     ally[i - 1] = i;
692     location[i - 1] = 0;
693     weight[i - 1] = 0;
694   }
695   dnadist_sitesort();
696   dnadist_sitecombine();
697   dnadist_sitescrunch();
698   endsite = 0;
699   for (i = 1; i <= sites; i++) {
700     if (ally[i - 1] == i)
701       endsite++;
702   }
703   for (i = 1; i <= endsite; i++)
704     location[alias[i - 1] - 1] = i;
705   weightsum = 0;
706   for (i = 0; i < sites; i++)
707     weightsum += oldweight[i];
708   sumrates = 0.0;
709   for (i = 0; i < sites; i++)
710     sumrates += oldweight[i] * rate[category[i] - 1];
711   for (i = 0; i < categs; i++)
712     rate[i] *= weightsum / sumrates;
713   for (i = 0; i < sites; i++)
714     if (location[ally[i] - 1] > 0)
715       weight[location[ally[i] - 1] - 1] += oldweight[i];
716 }  /* makeweights */
717 
718 
dnadist_makevalues()719 void dnadist_makevalues()
720 {
721   /* set up fractional likelihoods at tips */
722   long i, j, k;
723   bases b;
724 
725   for (i = 0; i < spp; i++) {
726     nodep[i]->x = (phenotype)Malloc(endsite*sizeof(ratelike));
727     for (j = 0; j < endsite; j++)
728       nodep[i]->x[j]  = (ratelike)Malloc(rcategs*sizeof(sitelike));
729   }
730   for (k = 0; k < endsite; k++) {
731     j = alias[k];
732     for (i = 0; i < spp; i++) {
733       for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1))
734         nodep[i]->x[k][0][(long)b - (long)A] = 0.0;
735       switch (y[i][j - 1]) {
736 
737       case 'A':
738         nodep[i]->x[k][0][0] = 1.0;
739         break;
740 
741       case 'C':
742         nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
743         break;
744 
745       case 'G':
746         nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
747         break;
748 
749       case 'T':
750         nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
751         break;
752 
753       case 'U':
754         nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
755         break;
756 
757       case 'M':
758         nodep[i]->x[k][0][0] = 1.0;
759         nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
760         break;
761 
762       case 'R':
763         nodep[i]->x[k][0][0] = 1.0;
764         nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
765         break;
766 
767       case 'W':
768         nodep[i]->x[k][0][0] = 1.0;
769         nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
770         break;
771 
772       case 'S':
773         nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
774         nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
775         break;
776 
777       case 'Y':
778         nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
779         nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
780         break;
781 
782       case 'K':
783         nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
784         nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
785         break;
786 
787       case 'B':
788         nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
789         nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
790         nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
791         break;
792 
793       case 'D':
794         nodep[i]->x[k][0][0] = 1.0;
795         nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
796         nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
797         break;
798 
799       case 'H':
800         nodep[i]->x[k][0][0] = 1.0;
801         nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
802         nodep[i]->x[k][0][(long)T - (long)A] = 1.0;
803         break;
804 
805       case 'V':
806         nodep[i]->x[k][0][0] = 1.0;
807         nodep[i]->x[k][0][(long)C - (long)A] = 1.0;
808         nodep[i]->x[k][0][(long)G - (long)A] = 1.0;
809         break;
810 
811       case 'N':
812         for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1))
813           nodep[i]->x[k][0][(long)b - (long)A] = 1.0;
814         break;
815 
816       case 'X':
817         for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1))
818           nodep[i]->x[k][0][(long)b - (long)A] = 1.0;
819         break;
820 
821       case '?':
822         for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1))
823           nodep[i]->x[k][0][(long)b - (long)A] = 1.0;
824         break;
825 
826       case 'O':
827         for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1))
828           nodep[i]->x[k][0][(long)b - (long)A] = 1.0;
829         break;
830 
831       case '-':
832         for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1))
833           nodep[i]->x[k][0][(long)b - (long)A] = 1.0;
834         break;
835       }
836     }
837   }
838 }  /* dnadist_makevalues */
839 
840 
dnadist_empiricalfreqs()841 void dnadist_empiricalfreqs()
842 {
843   /* Get empirical base frequencies from the data */
844   long i, j, k;
845   double sum, suma, sumc, sumg, sumt, w;
846 
847   freqa = 0.25;
848   freqc = 0.25;
849   freqg = 0.25;
850   freqt = 0.25;
851   for (k = 1; k <= 8; k++) {
852     suma = 0.0;
853     sumc = 0.0;
854     sumg = 0.0;
855     sumt = 0.0;
856     for (i = 0; i < spp; i++) {
857       for (j = 0; j < endsite; j++) {
858         w = weight[j];
859         sum = freqa * nodep[i]->x[j][0][0];
860         sum += freqc * nodep[i]->x[j][0][(long)C - (long)A];
861         sum += freqg * nodep[i]->x[j][0][(long)G - (long)A];
862         sum += freqt * nodep[i]->x[j][0][(long)T - (long)A];
863         suma += w * freqa * nodep[i]->x[j][0][0] / sum;
864         sumc += w * freqc * nodep[i]->x[j][0][(long)C - (long)A] / sum;
865         sumg += w * freqg * nodep[i]->x[j][0][(long)G - (long)A] / sum;
866         sumt += w * freqt * nodep[i]->x[j][0][(long)T - (long)A] / sum;
867       }
868     }
869     sum = suma + sumc + sumg + sumt;
870     freqa = suma / sum;
871     freqc = sumc / sum;
872     freqg = sumg / sum;
873     freqt = sumt / sum;
874   }
875 }  /* dnadist_empiricalfreqs */
876 
877 
getinput()878 void getinput()
879 {
880   /* reads the input data */
881   inputoptions();
882   if ((!freqsfrom) && !logdet && !similarity) {
883     if (kimura || jukes) {
884       freqa = 0.25;
885       freqc = 0.25;
886       freqg = 0.25;
887       freqt = 0.25;
888     }
889     getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy,
890                    &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange,
891                    freqsfrom, printdata);
892     if (freqa < 0.00000001) {
893       freqa = 0.000001;
894       freqc = 0.999999*freqc;
895       freqg = 0.999999*freqg;
896       freqt = 0.999999*freqt;
897     }
898     if (freqc < 0.00000001) {
899       freqa = 0.999999*freqa;
900       freqc = 0.000001;
901       freqg = 0.999999*freqg;
902       freqt = 0.999999*freqt;
903     }
904     if (freqg < 0.00000001) {
905       freqa = 0.999999*freqa;
906       freqc = 0.999999*freqc;
907       freqg = 0.000001;
908       freqt = 0.999999*freqt;
909     }
910     if (freqt < 0.00000001) {
911       freqa = 0.999999*freqa;
912       freqc = 0.999999*freqc;
913       freqg = 0.999999*freqg;
914       freqt = 0.000001;
915     }
916   }
917   if (!justwts || firstset)
918     inputdata(sites);
919   makeweights();
920   dnadist_makevalues();
921   if (freqsfrom) {
922     dnadist_empiricalfreqs();
923     getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy,
924                    &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange,
925                    freqsfrom, printdata);
926   }
927 }  /* getinput */
928 
929 
inittable()930 void inittable()
931 {
932   /* Define a lookup table. Precompute values and store in a table */
933   long i;
934 
935   for (i = 0; i < categs; i++) {
936     tbl[i].rat = rate[i];
937     tbl[i].ratxv = rate[i] * xv;
938   }
939 }  /* inittable */
940 
941 
lndet(double (* a)[4])942 double lndet(double (*a)[4])
943 {
944   long i, j, k;
945   double temp, ld;
946 
947   /*Gauss-Jordan reduction -- invert matrix a in place,
948          overwriting previous contents of a.  On exit, matrix a
949          contains the inverse, lndet contains the log of the determinant */
950   ld = 1.0;
951   for (i = 0; i < 4; i++) {
952     ld *= a[i][i];
953     temp = 1.0 / a[i][i];
954     a[i][i] = 1.0;
955     for (j = 0; j < 4; j++)
956       a[i][j] *= temp;
957     for (j = 0; j < 4; j++) {
958       if (j != i) {
959         temp = a[j][i];
960         a[j][i] = 0.0;
961         for (k = 0; k < 4; k++)
962           a[j][k] -= temp * a[i][k];
963       }
964     }
965   }
966   if (ld <= 0.0)
967     return(99.0);
968   else
969     return(log(ld));
970 }  /* lndet */
971 
972 
makev(long m,long n,double * v)973 void makev(long m, long n, double *v)
974 {
975   /* compute one distance */
976   long i, j, k, l, it, num1, num2, idx;
977   long numerator = 0, denominator = 0;
978   double sum, sum1, sum2, sumyr, lz, aa, bb, cc, vv=0,
979          p1, p2, p3, q1, q2, q3, tt, delta = 0, slope,
980          xx1freqa, xx1freqc, xx1freqg, xx1freqt;
981   double *prod, *prod2, *prod3;
982   boolean quick, jukesquick, kimquick, logdetquick, overlap;
983   bases b;
984   node *p, *q;
985   sitelike xx1, xx2;
986   double basetable[4][4];  /* for quick logdet */
987   double basefreq1[4], basefreq2[4];
988 
989   p = nodep[m - 1];
990   q = nodep[n - 1];
991 
992   /* check for overlap between sequences */
993   overlap = false;
994   for(i=0 ; i < sites ; i++){
995     if((strchr("NX?O-",y[m-1][i])==NULL) &&
996        (strchr("NX?O-",y[n-1][i])==NULL)){
997       overlap = true;
998       break;
999     }
1000   }
1001   if(!overlap){
1002     printf("\nWARNING: NO OVERLAP BETWEEN SEQUENCES %ld AND %ld; -1.0 WAS WRITTEN\n", m, n);
1003     baddists = true;
1004     return;
1005   }
1006 
1007   quick = (!ctgry || categs == 1);
1008   if (jukes || kimura || logdet || similarity) {
1009     numerator = 0;
1010     denominator = 0;
1011     for (i = 0; i < endsite; i++) {
1012       memcpy(xx1, p->x[i][0], sizeof(sitelike));
1013       memcpy(xx2, q->x[i][0], sizeof(sitelike));
1014       sum = 0.0;
1015       sum1 = 0.0;
1016       sum2 = 0.0;
1017       for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) {
1018         sum1 += xx1[(long)b - (long)A];
1019         sum2 += xx2[(long)b - (long)A];
1020         sum += xx1[(long)b - (long)A] * xx2[(long)b - (long)A];
1021       }
1022       quick = (quick && (sum1 == 1.0 || sum1 == 4.0) &&
1023                (sum2 == 1.0 || sum2 == 4.0));
1024       if (sum1 == 1.0 && sum2 == 1.0) {
1025         numerator += (long)(weight[i] * sum);
1026         denominator += weight[i];
1027       }
1028     }
1029   }
1030   jukesquick = ((jukes || similarity) && quick);
1031   kimquick = (kimura && quick);
1032   logdetquick = (logdet && quick);
1033   if (logdet && !quick) {
1034     printf(" WARNING: CANNOT CALCULATE LOGDET DISTANCE\n");
1035     printf("  WITH PRESENT PROGRAM IF PARTIALLY AMBIGUOUS NUCLEOTIDES\n");
1036     printf("  -1.0 WAS WRITTEN\n");
1037     baddists = true;
1038   }
1039   if (jukesquick && jukes && (numerator * 4 <= denominator)) {
1040     printf("\nWARNING: INFINITE DISTANCE BETWEEN ");
1041     printf(" SPECIES %3ld AND %3ld\n", m, n);
1042     printf("  -1.0 WAS WRITTEN\n");
1043     baddists = true;
1044   }
1045   if (jukesquick && invar
1046       && (4 * (((double)numerator / denominator) - invarfrac)
1047           <= (1.0 - invarfrac))) {
1048     printf("\nWARNING: DIFFERENCE BETWEEN SPECIES %3ld AND %3ld", m, n);
1049     printf(" TOO LARGE FOR INVARIABLE SITES\n");
1050     printf("  -1.0 WAS WRITTEN\n");
1051     baddists = true;
1052   }
1053   if (jukesquick) {
1054     if (!gama && !invar)
1055       vv = -0.75 * log((4.0*((double)numerator / denominator) - 1.0) / 3.0);
1056     else if (!invar)
1057         vv = 0.75 * cvi * (exp(-(1/cvi)*
1058            log((4.0 * ((double)numerator / denominator) - 1.0) / 3.0)) - 1.0);
1059       else
1060         vv = 0.75 * cvi * (exp(-(1/cvi)*
1061            log((4.0 * ((double)numerator / denominator - invarfrac)/
1062                         (1.0-invarfrac) - 1.0) / 3.0)) - 1.0);
1063   }
1064   if (kimquick) {
1065     num1 = 0;
1066     num2 = 0;
1067     denominator = 0;
1068     for (i = 0; i < endsite; i++) {
1069       memcpy(xx1, p->x[i][0], sizeof(sitelike));
1070       memcpy(xx2, q->x[i][0], sizeof(sitelike));
1071       sum = 0.0;
1072       sum1 = 0.0;
1073       sum2 = 0.0;
1074       for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) {
1075         sum1 += xx1[(long)b - (long)A];
1076         sum2 += xx2[(long)b - (long)A];
1077         sum += xx1[(long)b - (long)A] * xx2[(long)b - (long)A];
1078       }
1079       sumyr = (xx1[0] + xx1[(long)G - (long)A])
1080             * (xx2[0] + xx2[(long)G - (long)A]) +
1081               (xx1[(long)C - (long)A] + xx1[(long)T - (long)A]) *
1082               (xx2[(long)C - (long)A] + xx2[(long)T - (long)A]);
1083       if (sum1 == 1.0 && sum2 == 1.0) {
1084         num1 += (long)(weight[i] * sum);
1085         num2 += (long)(weight[i] * (sumyr - sum));
1086         denominator += weight[i];
1087       }
1088     }
1089     tt = ((1.0 - (double)num1 / denominator)-invarfrac)/(1.0-invarfrac);
1090     if (tt > 0.0) {
1091       delta = 0.1;
1092       tt = delta;
1093       it = 0;
1094       while (fabs(delta) > 0.0000002 && it < iterationsd) {
1095         it++;
1096         if (!gama) {
1097           p1 = exp(-tt);
1098           p2 = exp(-xv * tt) - exp(-tt);
1099           p3 = 1.0 - exp(-xv * tt);
1100         } else {
1101           p1 = exp(-cvi * log(1 + tt / cvi));
1102           p2 = exp(-cvi * log(1 + xv * tt / cvi))
1103               - exp(-cvi * log(1 + tt / cvi));
1104           p3 = 1.0 - exp(-cvi * log(1 + xv * tt / cvi));
1105         }
1106         q1 = p1 + p2 / 2.0 + p3 / 4.0;
1107         q2 = p2 / 2.0 + p3 / 4.0;
1108         q3 = p3 / 2.0;
1109         q1 = q1 * (1.0-invarfrac) + invarfrac;
1110         q2 *= (1.0 - invarfrac);
1111         q3 *= (1.0 - invarfrac);
1112         if (!gama && !invar)
1113           slope = 0.5 * exp(-tt) * (num2 / q2 - num1 / q1) +
1114                   0.25 * xv * exp(-xv * tt) *
1115                  ((denominator - num1 - num2) * 2 / q3 - num2 / q2 - num1 / q1);
1116         else
1117           slope = 0.5 * (1 / (1 + tt / cvi)) * exp(-cvi * log(1 + tt / cvi)) *
1118                   (num2 / q2 - num1 / q1) + 0.25 * (xv / (1 + xv * tt / cvi)) *
1119                     exp(-cvi * log(1 + xv * tt / cvi)) *
1120                  ((denominator - num1 - num2) * 2 / q3 - num2 / q2 - num1 / q1);
1121         slope *= (1.0-invarfrac);
1122         if (slope < 0.0)
1123           delta = fabs(delta) / -2.0;
1124         else
1125           delta = fabs(delta);
1126         tt += delta;
1127       }
1128     }
1129     if ((delta >= 0.1) && (!similarity)) {
1130       printf("\nWARNING: DIFFERENCE BETWEEN SPECIES %3ld AND %3ld", m, n);
1131       if (invar)
1132         printf(" TOO LARGE FOR INVARIABLE SITES\n");
1133       else
1134         printf(" TOO LARGE TO ESTIMATE DISTANCE\n");
1135       printf("  -1.0 WAS WRITTEN\n");
1136       baddists = true;
1137     }
1138     vv = fracchange * tt;
1139   }
1140   if (!(jukesquick || kimquick || logdet)) {
1141     prod = (double *)Malloc(sites*sizeof(double));
1142     prod2 = (double *)Malloc(sites*sizeof(double));
1143     prod3 = (double *)Malloc(sites*sizeof(double));
1144     for (i = 0; i < endsite; i++) {
1145       memcpy(xx1, p->x[i][0], sizeof(sitelike));
1146       memcpy(xx2, q->x[i][0], sizeof(sitelike));
1147       xx1freqa = xx1[0] * freqa;
1148       xx1freqc = xx1[(long)C - (long)A] * freqc;
1149       xx1freqg = xx1[(long)G - (long)A] * freqg;
1150       xx1freqt = xx1[(long)T - (long)A] * freqt;
1151       sum1 = xx1freqa + xx1freqc + xx1freqg + xx1freqt;
1152       sum2 = freqa * xx2[0] + freqc * xx2[(long)C - (long)A] +
1153              freqg * xx2[(long)G - (long)A] + freqt * xx2[(long)T - (long)A];
1154       prod[i] = sum1 * sum2;
1155       prod2[i] = (xx1freqa + xx1freqg) *
1156                  (xx2[0] * freqar + xx2[(long)G - (long)A] * freqgr) +
1157           (xx1freqc + xx1freqt) *
1158           (xx2[(long)C - (long)A] * freqcy + xx2[(long)T - (long)A] * freqty);
1159       prod3[i] = xx1freqa * xx2[0] + xx1freqc * xx2[(long)C - (long)A] +
1160          xx1freqg * xx2[(long)G - (long)A] + xx1freqt * xx2[(long)T - (long)A];
1161     }
1162     tt = 0.1;
1163     delta = 0.1;
1164     it = 1;
1165     while (it < iterationsd && fabs(delta) > 0.0000002) {
1166       slope = 0.0;
1167       if (tt > 0.0) {
1168         lz = -tt;
1169         for (i = 0; i < categs; i++) {
1170           if (!gama) {
1171             tbl[i].z1 = exp(tbl[i].ratxv * lz);
1172             tbl[i].z1zz = exp(tbl[i].rat * lz);
1173           }
1174           else {
1175             tbl[i].z1 = exp(-cvi*log(1.0-tbl[i].ratxv * lz/cvi));
1176             tbl[i].z1zz = exp(-cvi*log(1.0-tbl[i].rat * lz/cvi));
1177           }
1178           tbl[i].y1 = 1.0 - tbl[i].z1;
1179           tbl[i].z1yy = tbl[i].z1 - tbl[i].z1zz;
1180           tbl[i].z1xv = tbl[i].z1 * xv;
1181         }
1182         for (i = 0; i < endsite; i++) {
1183           idx = category[alias[i] - 1];
1184           cc = prod[i];
1185           bb = prod2[i];
1186           aa = prod3[i];
1187           if (!gama && !invar)
1188             slope += weightrat[i] * (tbl[idx - 1].z1zz * (bb - aa) +
1189                                      tbl[idx - 1].z1xv * (cc - bb)) /
1190                          (aa * tbl[idx - 1].z1zz + bb * tbl[idx - 1].z1yy +
1191                           cc * tbl[idx - 1].y1);
1192           else
1193             slope += (1.0-invarfrac) * weightrat[i] * (
1194                     ((tbl[idx-1].rat)/(1.0-tbl[idx-1].rat * lz/cvi))
1195                        * tbl[idx - 1].z1zz * (bb - aa) +
1196                     ((tbl[idx-1].ratxv)/(1.0-tbl[idx-1].ratxv * lz/cvi))
1197                        * tbl[idx - 1].z1 * (cc - bb)) /
1198                 (aa * ((1.0-invarfrac)*tbl[idx - 1].z1zz + invarfrac)
1199                   + bb * (1.0-invarfrac)*tbl[idx - 1].z1yy
1200                   + cc * (1.0-invarfrac)*tbl[idx - 1].y1);
1201         }
1202       }
1203       if (slope < 0.0)
1204         delta = fabs(delta) / -2.0;
1205       else
1206         delta = fabs(delta);
1207       tt += delta;
1208       it++;
1209     }
1210     if ((delta >= 0.1) && (!similarity)) {
1211       printf("\nWARNING: DIFFERENCE BETWEEN SPECIES %3ld AND %3ld", m, n);
1212       if (invar)
1213         printf(" TOO LARGE FOR INVARIABLE SITES\n");
1214       else
1215         printf(" TOO LARGE TO ESTIMATE DISTANCE\n");
1216       printf("  -1.0 WAS WRITTEN\n");
1217       baddists = true;
1218     }
1219     vv = tt * fracchange;
1220     free(prod);
1221     free(prod2);
1222     free(prod3);
1223   }
1224   if (logdetquick) {  /* compute logdet when no ambiguous nucleotides */
1225     for (i = 0; i < 4; i++) {
1226       basefreq1[i] = 0.0;
1227       basefreq2[i] = 0.0;
1228       for (j = 0; j < 4; j++)
1229         basetable[i][j] = 0.0;
1230     }
1231     for (i = 0; i < endsite; i++) {
1232       k = 0;
1233       while (p->x[i][0][k] == 0.0)
1234         k++;
1235       basefreq1[k] += weight[i];
1236       l = 0;
1237       while (q->x[i][0][l] == 0.0)
1238         l++;
1239       basefreq2[l] += weight[i];
1240       basetable[k][l] += weight[i];
1241     }
1242     vv = lndet(basetable);
1243     if (vv == 99.0) {
1244       printf("\nNegative or zero determinant for distance between species");
1245       printf(" %ld and %ld\n", m, n);
1246       printf("  -1.0 WAS WRITTEN\n");
1247       baddists = true;
1248     }
1249     vv = -0.25*(vv - 0.5*(log(basefreq1[0])+log(basefreq1[1])
1250                                         +log(basefreq1[2])+log(basefreq1[3])
1251                                         +log(basefreq2[0])+log(basefreq2[1])
1252                                         +log(basefreq2[2])+log(basefreq2[3])));
1253   }
1254   if (similarity) {
1255     if (denominator < 1.0) {
1256       printf("\nWARNING: SPECIES %3ld AND %3ld HAVE NO BASES THAT", m, n);
1257       printf(" CAN BE COMPARED\n");
1258       printf("  -1.0 WAS WRITTEN\n");
1259       baddists = true;
1260   }
1261     vv = (double)numerator / denominator;
1262   }
1263   *v = vv;
1264 }  /* makev */
1265 
1266 
makedists()1267 void makedists()
1268 {
1269   /* compute distance matrix */
1270   long i, j;
1271   double v;
1272 
1273   inittable();
1274   for (i = 0; i < endsite; i++)
1275     weightrat[i] = weight[i] * rate[category[alias[i] - 1] - 1];
1276   if (progress) {
1277     printf("Distances calculated for species\n");
1278 #ifdef WIN32
1279     phyFillScreenColor();
1280 #endif
1281   }
1282   for (i = 0; i < spp; i++)
1283     if (similarity)
1284       d[i][i] = 1.0;
1285     else
1286       d[i][i] = 0.0;
1287   baddists = false;
1288 
1289   float cur_prog = 0;
1290   int total = (spp*spp / 2) + 1;
1291   float step = (1.0f / total) * 100.0f;
1292 
1293   for (i = 1; i < spp; i++) {
1294     if (progress) {
1295       printf("    ");
1296       for (j = 0; j < nmlngth; j++)
1297         putchar(nayme[i - 1][j]);
1298       printf("   ");
1299     }
1300     for (j = i + 1; j <= spp; j++) {
1301       makev(i, j, &v);
1302       v = fabs(v);
1303       if ( baddists == true ) {
1304         v = -1;
1305         baddists = false;
1306       }
1307       d[i - 1][j - 1] = v;
1308       d[j - 1][i - 1] = v;
1309 
1310       U2::TaskStateInfo* ts = U2::getTaskInfo();
1311       if (ts->cancelFlag != 0) {
1312           ugene_exit("Task canceled!");
1313       } else if(!U2::isBootstr()){
1314           cur_prog += step;
1315           ts->progress = int (cur_prog);
1316       }
1317 
1318       if (progress) {
1319         putchar('.');
1320         fflush(stdout);
1321       }
1322     }
1323     if (progress) {
1324       putchar('\n');
1325 #ifdef WIN32
1326       phyFillScreenColor();
1327 #endif
1328     }
1329   }
1330   if (progress) {
1331     printf("    ");
1332     for (j = 0; j < nmlngth; j++)
1333       putchar(nayme[spp - 1][j]);
1334     putchar('\n');
1335   }
1336 }  /* makedists */
1337 
1338 
writedists()1339 void writedists()
1340 {
1341   /* write out distances */
1342   char **names;
1343 
1344   names = stringnames_new();
1345   output_matrix_d(outfile, d, spp, spp, names, names, matrix_flags);
1346   stringnames_delete(names);
1347 
1348   if (progress)
1349     printf("\nDistances written to file \"%s\"\n\n", outfilename);
1350 }  /* writedists */
1351 
1352 
1353 // int main(int argc, Char *argv[])
1354 // {  /* DNA Distances by Maximum Likelihood */
1355 // #ifdef MAC
1356 //   argc = 1;                /* macsetup("Dnadist","");        */
1357 //   argv[0] = "Dnadist";
1358 // #endif
1359 //   init(argc, argv);
1360 //   openfile(&infile,INFILE,"input file","r",argv[0],infilename);
1361 //   openfile(&outfile,OUTFILE,"output file","w",argv[0],outfilename);
1362 //
1363 //   ibmpc = IBMCRT;
1364 //   ansi = ANSICRT;
1365 //   mulsets = false;
1366 //   datasets = 1;
1367 //   firstset = true;
1368 //   doinit();
1369 //   ttratio0 = ttratio;
1370 //   if (ctgry)
1371 //     openfile(&catfile,CATFILE,"categories file","r",argv[0],catfilename);
1372 //   if (weights || justwts)
1373 //     openfile(&weightfile,WEIGHTFILE,"weights file","r",argv[0],weightfilename);
1374 //   for (ith = 1; ith <= datasets; ith++) {
1375 //     ttratio = ttratio0;
1376 //     getinput();
1377 //     if (ith == 1)
1378 //       firstset = false;
1379 //     if (datasets > 1 && progress)
1380 //       printf("Data set # %ld:\n\n",ith);
1381 //     makedists();
1382 //     writedists();
1383 //   }
1384 //   FClose(infile);
1385 //   FClose(outfile);
1386 // #ifdef MAC
1387 //   fixmacfile(outfilename);
1388 // #endif
1389 //   printf("Done.\n\n");
1390 // #ifdef WIN32
1391 //   phyRestoreConsoleAttributes();
1392 // #endif
1393 //   return 0;
1394 // }  /* DNA Distances by Maximum Likelihood */
1395 
1396 
1397 
1398 
1399