1 /* baseml.c
2      Maximum likelihood parameter estimation e aligned DNA (RNA) sequences,
3                  combined with phylogenetic tree estimation.
4                     Copyright, Ziheng YANG, July 1992 onwards
5 
6                   cc -o baseml -fast baseml.c tools.c -lm
7                   cl -O2 baseml.c tools.c
8                           baseml <ControlFileName>
9 */
10 
11 #include "paml.h"
12 
13 #define NS            7000
14 #define NBRANCH       (NS*2-2)
15 #define NNODE         (NS*2-1)
16 #define MAXNSONS      200
17 #define NGENE         500
18 #define LSPNAME       96
19 #define NCODE         5
20 #define NCATG         100
21 
22 #define NP            (NBRANCH+NGENE+11)
23 /*
24 #define NP            (NBRANCH+3*NNODE+NGENE+11+NCATG*NCATG-1)
25 */
26 extern int noisy, NFunCall, NEigenQ, NPMatUVRoot, *ancestor, GeneticCode[][64];
27 extern double Small_Diff, *SeqDistance;
28 
29 int Forestry(FILE *fout);
30 void DetailOutput(FILE *fout, double x[], double var[]);
31 int GetOptions(char *ctlf);
32 int GetInitials(double x[], int *fromfile);
33 int GetInitialsTimes(double x[]);
34 int SetxInitials(int np, double x[], double xb[][2]);
35 int SetParameters(double x[]);
36 int SetPGene(int igene, int _pi, int _UVRoot, int _alpha, double x[]);
37 int SetPSiteClass(int iclass, double x[]);
38 int testx(double x[], int np);
39 double GetBranchRate(int igene, int ibrate, double x[], int *ix);
40 int GetPMatBranch(double Pt[], double x[], double t, int inode);
41 int ConditionalPNode(int inode, int igene, double x[]);
42 
43 int TransformxBack(double x[]);
44 int AdHocRateSmoothing(FILE*fout, double x[NS * 3], double xb[NS * 3][2], double space[]);
45 void DatingHeteroData(FILE* fout);
46 
47 int TestModel(FILE *fout, double x[], int nsep, double space[]);
48 int OldDistributions(int inode, double AncientFreqs[]);
49 int SubData(int argc, char *argv[]);
50 int GroupDistances();
51 
52 
53 struct CommonInfo {
54    unsigned char *z[NS];
55    char *spname[NS], seqf[2048], outf[2048], treef[2048], cleandata;
56    char oldconP[NNODE];  /* update conP for nodes to save computation (0 yes; 1 no) */
57    int seqtype, ns, ls, ngene, posG[NGENE + 1], lgene[NGENE], *pose, npatt, readpattern;
58    int np, ntime, nrgene, nrate, nalpha, npi, nhomo, ncatG, ncode, Mgene;
59    size_t sspace, sconP;
60    int fix_kappa, fix_rgene, fix_alpha, fix_rho, nparK, fix_blength;
61    int clock, model, getSE, runmode, print, verbose, ndata, bootstrap;
62    int icode, coding, method;
63    int nbtype;
64    double *fpatt, kappa, alpha, rho, rgene[NGENE], pi[4], piG[NGENE][4], TipDate, TipDate_TimeUnit;
65    double freqK[NCATG], rK[NCATG], MK[NCATG*NCATG];
66    double *blengths0;
67    double(*plfun)(double x[], int np), *conP, *fhK, *space;
68    int conPSiteClass;        /* is conP memory allocated for each class? */
69    int NnodeScale;
70    char *nodeScale;          /* nScale[ns-1] for interior nodes */
71    double *nodeScaleF;       /* nScaleF[npatt] for scale factors */
72    int fix_omega;
73    double omega;
74 }  com;
75 
76 struct TREEB {
77    int  nbranch, nnode, root, branches[NBRANCH][2];
78    double lnL;
79 }  tree;
80 
81 struct TREEN {
82    int father, nson, sons[MAXNSONS], ibranch, ipop;
83    double branch, age, *pkappa, pi[4], *conP, label, label2;
84    char fossil, *name, *annotation;
85 }  *nodes, **gnodes, nodes_t[2 * NS - 1];
86 
87 
88 /* for stree.nodes[].fossil: lower, upper, bounds, gamma, inverse-gamma */
89 enum { LOWER_F = 1, UPPER_F, BOUND_F } FOSSIL_FLAGS;
90 char *fossils[] = { " ", "L", "U", "B" };
91 
92 struct SPECIESTREE {
93    int nbranch, nnode, root, nspecies, nfossil;
94    struct TREESPN {
95       char name[LSPNAME + 1], fossil, usefossil;  /* fossil: 0, 1, 2, 3 */
96       int father, nson, sons[2];
97       double label, age, pfossil[7];   /* lower and upper bounds or alpha & beta */
98       double *lnrates;          /* log rates for loci */
99    } nodes[2 * NS - 1];
100 }  stree;
101 /* all trees are binary & rooted, with ancestors unknown. */
102 
103 struct DATA { /* locus-specific data and tree information */
104    int ns[NGENE], ls[NGENE], npatt[NGENE], ngene, lgene[NGENE];
105    int root[NGENE + 1], BlengthMethod, fix_nu, nbrate[NGENE];
106    char *z[NGENE][NS], cleandata[NGENE];
107    double *fpatt[NGENE], lnpT, lnpR, lnpDi[NGENE];
108    double Qfactor[NGENE], pi[NGENE][NCODE], nu[NGENE];
109    double rgene[NGENE], kappa[NGENE], alpha[NGENE], omega[1];
110    int NnodeScale[NGENE];
111    char *nodeScale[NGENE];    /* nScale[data.ns[locus]-1] for interior nodes */
112 }  data;
113 
114 int nR = NCODE, LASTROUND, BayesEB;
115 double PMat[NCODE*NCODE], Cijk[NCODE*NCODE*NCODE], Root[NCODE];
116 int StepMatrix[NCODE*NCODE];
117 
118 
119 FILE *frub, *flnf, *frst, *frst1, *frst2 = NULL, *finitials = NULL;
120 char *ratef = "rates";
121 char *models[] = { "JC69","K80","F81","F84","HKY85","T92","TN93","REV","UNREST", "REVu","UNRESTu" };
122 enum { JC69, K80, F81, F84, HKY85, T92, TN93, REV, UNREST, REVu, UNRESTu } MODELS;
123 char *clockstr[] = { "", "Global clock", "Local clock", "ClockCombined" };
124 enum { GlobalClock = 1, LocalClock, ClockCombined } ClockModels;
125 
126 double _rateSite = 1; /* rate for a site */
127 int N_PMatUVRoot = 0;
128 
129 
130 #define BASEML 1
131 #include "treesub.c"
132 #include "treespace.c"
133 
main(int argc,char * argv[])134 int main (int argc, char *argv[])
135 {
136    FILE *fout, *fseq = NULL, *fpair[6];
137    char pairfs[1][32] = { "2base.t" };
138    char rstf[2048] = "rst", ctlf[2048] = "baseml.ctl", timestr[64];
139    char *Mgenestr[] = { "diff. rate", "separate data", "diff. rate & pi",
140                      "diff. rate & kappa", "diff. rate & pi & kappa" };
141    int getdistance = 1, i, idata;
142    size_t s2 = 0;
143 
144    if (argc > 2 && !strcmp(argv[argc - 1], "--stdout-no-buf"))
145       setvbuf(stdout, NULL, _IONBF, 0);
146 
147    starttimer();
148    SetSeed(-1, 0);
149 
150    com.ndata = 1;
151    com.cleandata = 0;  noisy = 0;  com.runmode = 0;
152 
153    com.clock = 0;
154    com.fix_rgene = 0;  /* 0: estimate rate factors for genes */
155    com.nhomo = 0;
156    com.getSE = 0;      /* 1: want S.E.s of estimates;  0: don't want them */
157 
158    com.seqtype = 0;   com.model = 0;
159    com.fix_kappa = 0; com.kappa = 5;
160    com.fix_alpha = 1; com.alpha = 0.;  com.ncatG = 4;    /* alpha=0 := inf */
161    com.fix_rho = 1;   com.rho = 0;     com.nparK = 0;
162    com.ncode = 4;     com.icode = 0;
163    com.print = 0;     com.verbose = 1;  com.fix_blength = 0;
164    com.method = 0;    com.space = NULL;
165 
166    printf("BASEML in %s\n", pamlVerStr);
167    frub = gfopen("rub", "w");  frst = gfopen(rstf, "w"); frst1 = gfopen("rst1", "w");
168 
169    if (argc > 1)  strncpy(ctlf, argv[1], 95);
170    GetOptions(ctlf);
171    if (com.runmode != -2) finitials = fopen("in.baseml", "r");
172    if (com.getSE == 2)    frst2 = fopen("rst2", "w");
173 
174    fprintf(frst, "Supplemental results for BASEML\n\nseqf:  %s\ntreef: %s\n",
175       com.seqf, com.treef);
176    fout = gfopen(com.outf, "w");
177    fpair[0] = (FILE*)gfopen(pairfs[0], "w");
178 
179    /* for stepwise addition, com.sspace should be calculated using com.ns. */
180    com.sspace = 1000000 * sizeof(double);
181    if ((com.space = (double*)malloc(com.sspace)) == NULL)
182       error2("oom space");
183 
184    if (com.clock == 5 || com.clock == 6)
185       DatingHeteroData(fout);
186 
187    if ((fseq = fopen(com.seqf, "r")) == NULL) {
188       printf("\n\nSequence file %s not found!\n", com.seqf);
189       exit(-1);
190    }
191 
192    for (idata = 0; idata < com.ndata; idata++) {
193       if (com.ndata > 1) {
194          printf("\nData set %4d\n", idata + 1);
195          fprintf(fout, "\n\nData set %4d\n", idata + 1);
196 
197          fprintf(frst1, "%4d", idata + 1);
198       }
199       if (idata)  GetOptions(ctlf); /* com.cleandata is read from here again. */
200       ReadSeq((com.verbose ? fout : NULL), fseq, com.cleandata, 0);
201       if (com.ngene > 1 && (com.fix_blength == 2 || com.fix_blength == 3))
202          error2("fix_blength = 2 or 3 does not work for partitioned data or Mgene models");
203       if (com.fix_blength == 3) {
204          if ((com.blengths0 = (double*)malloc((com.ns * 2 - 2) * sizeof(double))) == NULL)
205             error2("oom blengths0");
206       }
207 
208       SetMapAmbiguity(com.seqtype, 0);
209 
210       /* AllPatterns(fout); */
211 
212       if (com.rho && com.readpattern) error2("rho doesn't work with readpattern.");
213       if (com.ndata == 1) fclose(fseq);
214       i = (com.ns * 2 - 1) * sizeof(struct TREEN);
215       if ((nodes = (struct TREEN*)malloc(i)) == NULL) error2("oom");
216 
217       if (com.coding) {
218          if (com.ls % 3 != 0 || (com.ngene != 1 && com.ngene != 3))
219             error2("this is not a coding sequence.  Remove icode?");
220       }
221 
222       if (com.Mgene && com.ngene == 1)  error2("option Mgene for 1 gene?");
223       if (com.ngene > 1 && com.nhomo) error2("nhomo for mutliple genes?");
224       if (com.nalpha && (!com.alpha || com.ngene == 1 || com.fix_alpha))
225          error2("Malpha");
226       if (com.nalpha > 1 && com.rho) error2("Malpha or rho");
227       if (com.alpha == 0)  com.nalpha = 0;
228       else                 com.nalpha = (com.nalpha ? com.ngene : !com.fix_alpha);
229       if (com.Mgene == 1)  com.nalpha = !com.fix_alpha;
230 
231       if (com.ngene == 1) com.Mgene = 0;
232       if ((com.nhomo == 1 && com.ngene > 1) || (com.Mgene > 1 && com.nhomo >= 1))
233          error2("nhomo does not work with Mgene options");
234 
235       if ((com.Mgene >= 2 && com.model == JC69) || (com.Mgene >= 3 && com.model == F81)
236          || ((com.Mgene == 2 || com.Mgene == 4) && com.model == K80)
237          || (com.Mgene > 1 && com.nhomo > 1)
238          || (com.Mgene >= 2 && (com.model == UNREST || com.model == UNRESTu)))
239          error2("model || Mgene");
240       fprintf(fout, "\nBASEML (in %s)  %s  %s ", pamlVerStr, com.seqf, models[com.model]);
241       if (com.clock) fprintf(fout, " %s ", clockstr[com.clock]);
242       if (!com.nparK && com.alpha && com.rho) fprintf(fout, "  Auto-");
243       if (com.alpha) fprintf(fout, "dGamma (ncatG=%d)", com.ncatG);
244       if (com.nalpha > 1) fprintf(fout, "(%d gamma)", com.nalpha);
245       if (com.ngene > 1)
246          fprintf(fout, " (%d genes: %s)  ", com.ngene, Mgenestr[com.Mgene]);
247       if (com.nhomo > 1)
248          fprintf(fout, "\nNonhomo:%2d  fix_kappa%2d\n", com.nhomo, com.fix_kappa);
249       if (com.nparK && com.ncatG > 6)
250          printf("\a\n%d rate categories for nonparametric model?\n", com.ncatG);
251       if (com.nparK) fprintf(fout, "\nnparK:%4d  K:%4d\n", com.nparK, com.ncatG);
252 
253       if (getdistance) {
254          i = com.ns*(com.ns - 1) / 2;
255          SeqDistance = (double*)realloc(SeqDistance, i * sizeof(double));
256          ancestor = (int*)realloc(ancestor, i * sizeof(int));
257          if (SeqDistance == NULL || ancestor == NULL) error2("oom distance&ancestor");
258       }
259       InitializeBaseAA(fout);
260 
261       if (com.Mgene == 3)
262          for (i = 0; i < com.ngene; i++)
263             xtoy(com.pi, com.piG[i], com.ncode);
264 
265       if (com.model == JC69 && com.ngene <= 1 && !com.readpattern && !com.print) {
266          PatternWeightJC69like();
267          if (fout) {
268             fprintf(fout, "\n\nPrinting out site pattern counts\n");
269             printPatterns(fout);
270          }
271          if (com.verbose >= 2) {
272             fprintf(fout, "\nSite-to-pattern map: ");
273             for (i = 0; i < com.ls; i++)
274                fprintf(fout, " %2d", com.pose[i] + 1);
275             fprintf(fout, "\n");
276 
277             fprintf(fout, "\n**** Alignment mutated for JC69-like models ****\n");
278             fprintf(fout, "\n%6d %6d\n", com.ns, com.ls);
279             for (i = 0; i < com.ns; i++) {
280                fprintf(fout, "\n%-30s  ", com.spname[i]);
281                print1seq(fout, com.z[i], com.ls, com.pose);
282             }
283             fprintf(fout, "\n");
284          }
285       }
286 
287       com.sconP = (com.ns - 1)*com.ncode*(size_t)com.npatt * sizeof(double);
288       if ((com.conP = (double*)realloc(com.conP, com.sconP)) == NULL)
289          error2("oom conP");
290       if (com.alpha || com.nparK) {
291          s2 = com.npatt*com.ncatG * sizeof(double);
292          if ((com.fhK = (double*)realloc(com.fhK, s2)) == NULL) error2("oom");
293       }
294 
295       printf("\n%9zu bytes for distance ", com.ns*(com.ns - 1) / 2 * (sizeof(double) + sizeof(int)));
296       printf("\n%9zu bytes for conP\n", com.sconP);
297       printf("%9zu bytes for fhK\n%9zu bytes for space\n", s2, com.sspace);
298 
299       /* FOR(i,com.ns*2-1) xtoy(com.pi,nodes[i].pi, 4);  check this? */
300 
301       if (getdistance)
302          DistanceMatNuc(fout, fpair[0], com.model, com.alpha);
303 
304 
305 #if(0)
306       /* Selecting the most divergent sequences using the distance matrix
307       */
308       {
309          char keep[NS];
310          FILE *ftmp = gfopen("newdata.txt", "w");
311          int is, js, i, j, nskeep, isbest = 0, isk, chosen[NS];
312          double dmax, dminmax, d, dbig = 9;
313 
314          if (com.model == 0 && com.print == 0)
315             error2("choose RateAncestor = 1 to print the seqs correctly.");
316          nskeep = 5;
317          printf("\nPicking up the most different sequences.\nHow many do you want? ");
318          scanf("%d", &nskeep);
319 
320          if (nskeep >= com.ns) error2("nothing to do");
321 
322          for (is = 0; is < com.ns; is++) {
323             keep[is] = 0;
324             chosen[is] = -1;
325          }
326 
327          for (is = 0, dmax = 0; is < com.ns; is++) {
328             for (js = 0; js < is; js++) {
329                d = SeqDistance[is*(is - 1) / 2 + js];
330                if (dmax < d) { dmax = d; chosen[1] = is; chosen[0] = js; }
331             }
332          }
333          keep[chosen[0]] = keep[chosen[1]] = 1;
334          printf("selected seq %3d %s\n", chosen[0] + 1, com.spname[chosen[0]]);
335          printf("selected seq %3d %s\n", chosen[1] + 1, com.spname[chosen[1]]);
336          for (isk = 2; isk < nskeep; isk++) {
337             for (is = 0, dminmax = 0; is < com.ns; is++) {
338                if (keep[is]) continue;
339                /* d is the smallest distance to those chosen */
340                for (js = 0, d = dbig; chosen[js] != -1; js++) {
341                   i = max2(is, chosen[js]);
342                   j = min2(is, chosen[js]);
343                   if (d > SeqDistance[i*(i - 1) / 2 + j]) d = SeqDistance[i*(i - 1) / 2 + j];
344                }
345                if (dminmax < d) {
346                   dminmax = d;  isbest = is;
347                }
348             }
349             keep[isbest] = 1;
350             chosen[isk] = isbest;
351             printf("selected seq %5d (dmin = %8.4f): %s\n", isbest + 1, dminmax, com.spname[isbest]);
352          }
353 
354          fprintf(ftmp, "%6d %6d\n", nskeep, com.ls);
355          for (j = 0; j < com.ns; j++) {
356             if (keep[j] == 0) continue;
357             fprintf(ftmp, "%-40s  ", com.spname[j]);
358             print1seq(ftmp, com.z[j], com.ls, com.pose);
359             FPN(ftmp);
360          }
361          fclose(ftmp);
362          return(0);
363 
364       }
365 
366 #endif
367 
368       if (com.Mgene == 1)        MultipleGenes(fout, fpair, com.space);
369       else if (com.runmode == 0) Forestry(fout);
370       else if (com.runmode == 2) fprintf(frst, "%2d", StarDecomposition(fout, com.space));
371       else if (com.runmode == 3) StepwiseAddition(fout, com.space);
372       else if (com.runmode >= 4) Perturbation(fout, (com.runmode == 4), com.space);
373 
374       FPN(frst);  if ((idata + 1) % 10 == 0) fflush(frst);
375       if (com.ndata > 1 && com.runmode) {
376          fprintf(frst1, "\t");
377          OutTreeN(frst1, 1, 0);
378       }
379       FPN(frst1); fflush(frst1);
380       free(nodes);
381       if (com.fix_blength == 3)  free(com.blengths0);
382 
383       printf("\nTime used: %s\n", printtime(timestr));
384    }   /* for(idata) */
385    if (com.ndata > 1 && fseq) fclose(fseq);
386    free(com.space);
387    fclose(fout);  fclose(frst);   fclose(fpair[0]);
388    if (finitials) { fclose(finitials);  finitials = NULL; }
389 
390    return (0);
391 }
392 
393 
394 /* x[]: t[ntime], rgene[ngene-1], kappa[nbranch], pi[nnode*3],
395         { alpha, rho || rK[], fK[] || rK[], MK[] }
396 */
397 
398 FILE *ftree;
399 void PrintBestTree(FILE *fout, FILE *ftree, int btree);
400 
Forestry(FILE * fout)401 int Forestry(FILE *fout)
402 {
403    static int ages = 0;
404    int status = 0, inbasemlg = 0, i, j = 0, itree = 0, ntree, np, np0, iteration = 1;
405    int pauptree = 0, btree = 0, haslength;
406    double x[NP], xcom[NP - NBRANCH], lnL, lnL0 = 0, lnLbest = 0, e = 1e-7, nchange = -1;
407    double xb[NP][2], tl = 0, *g = NULL, *H = NULL;
408    FILE *finbasemlg = NULL, *frate = NULL;
409 
410    ftree = gfopen(com.treef, "r");
411    GetTreeFileType(ftree, &ntree, &pauptree, 0);
412    if (com.alpha)
413       frate = gfopen(ratef, "w");
414    if (com.alpha && com.rho == 0 && com.nhomo == 0 && com.nparK == 0 && com.ns < 15) {
415       inbasemlg = 1;  finbasemlg = gfopen("in.basemlg", "w");
416    }
417    flnf = gfopen("lnf", "w+");
418    fprintf(flnf, "%6d %6d %6d\n", ntree, com.ls, com.npatt);
419 
420    for (itree = 0; ntree == -1 || itree < ntree; itree++, iteration = 1) {
421       if (ReadTreeN(ftree, &haslength, 0, 1)) {
422          puts("\nend of tree file.");   break;
423       }
424       ProcessNodeAnnotation(&i);
425 
426       if (noisy) printf("\nTREE # %2d: ", itree + 1);
427       fprintf(fout, "\nTREE # %2d:  ", itree + 1);
428       fprintf(frub, "\n\nTREE # %2d\n", itree + 1);
429       if (com.print) fprintf(frst, "\n\nTREE # %2d\n", itree + 1);
430       fprintf(flnf, "\n\n%2d\n", itree + 1);
431 
432       LASTROUND = 0;
433       if ((com.fix_blength == 2 || com.fix_blength == 3) && !haslength)
434          error2("We need branch lengths in tree");
435       if (com.fix_blength > 0 && !haslength) com.fix_blength = 0;
436       if (ages++ == 0 && com.fix_blength > 0 && haslength) {
437          if (com.clock) puts("\nBranch lengths in tree are ignored");
438          else {
439             if (com.fix_blength == 2) puts("\nBranch lengths in tree are fixed.");
440             else if (com.fix_blength == 1)
441                puts("\nBranch lengths in tree used as initials.");
442             if (com.fix_blength == 1) {
443                FOR(i, tree.nnode)
444                   if ((x[nodes[i].ibranch] = nodes[i].branch) < 0)
445                      x[nodes[i].ibranch] = 1e-5;
446             }
447          }
448       }
449 
450       if (com.cleandata)
451          nchange = MPScore(com.space);
452       if (noisy&&com.ns < 99)  {
453          OutTreeN(F0, 0, 0); printf(" MP score: %.2f\n", nchange);
454       }
455       OutTreeN(fout, 0, 0);  fprintf(fout, "  MP score: %.2f", nchange);
456       if (!com.clock && com.model <= REV && com.nhomo <= 2
457          && nodes[tree.root].nson <= 2 && com.ns > 2) {
458          puts("\nThis is a rooted tree, without clock.  Check.");
459          if (com.verbose) fputs("\nThis is a rooted tree.  Please check!", fout);
460       }
461 
462       fflush(fout);  fflush(flnf);
463 
464       GetInitials(x, &i);
465 
466       if (i == -1) iteration = 0;
467 
468       if ((np = com.np) > NP) error2("raise NP and recompile");
469 
470       if (com.sspace < spaceming2(np)) {
471          com.sspace = spaceming2(np);
472          if ((com.space = (double*)realloc(com.space, com.sspace)) == NULL)
473             error2("oom space");
474       }
475 
476       if (itree) { np0 = np; }
477       if (itree && !finitials && (com.nhomo == 0 || com.nhomo == 2))
478          for (i = 0; i < np - com.ntime; i++) x[com.ntime + i] = max2(xcom[i], 0.001);
479 
480       PointconPnodes();
481 
482       lnL = com.plfun(x, np);
483       if (noisy) {
484          printf("\nntime & nrate & np:%6d%6d%6d\n", com.ntime, com.nrate, com.np);
485          if (noisy > 2 && com.ns < 50) { OutTreeB(F0); FPN(F0); matout(F0, x, 1, np); }
486          printf("\nlnL0 = %12.6f\n", -lnL);
487       }
488 
489       if (iteration && np) {
490          SetxBound(np, xb);
491          SetxInitials(np, x, xb); /* start within the feasible region */
492          if (com.method == 1)
493             j = minB(noisy > 2 ? frub : NULL, &lnL, x, xb, e, com.space);
494          else if (com.method == 3)
495             j = minB2(noisy > 2 ? frub : NULL, &lnL, x, xb, e, com.space);
496          else
497             j = ming2(noisy > 2 ? frub : NULL, &lnL, com.plfun, NULL, x, xb, com.space, e, np);
498 
499          if (j == -1 || lnL <= 0 || lnL > 1e7) status = -1;
500          else                                  status = 0;
501       }
502 
503       if (itree == 0) {
504          lnL0 = lnLbest = lnL; btree = 0;
505       }
506       else if (lnL < lnLbest) {
507          lnLbest = lnL;  btree = itree;
508       }
509       if (noisy) printf("Out...\nlnL  = %12.6f\n", -lnL);
510       fprintf(fout, "\nlnL(ntime:%3d  np:%3d): %13.6f %+12.6f\n",
511          com.ntime, np, -lnL, -lnL + lnL0);
512       if (com.fix_blength < 2) { OutTreeB(fout);  FPN(fout); }
513       if (LASTROUND == 0) {
514          LASTROUND = 1;
515          if ((com.npi && com.model != T92) || com.nparK >= 2) TransformxBack(x);
516       }
517       if (com.clock) { /* move ages into x[] */
518          for (i = 0, j = !nodes[tree.root].fossil; i < tree.nnode; i++)
519             if (i != tree.root && nodes[i].nson && !nodes[i].fossil)
520                x[j++] = nodes[i].age;
521       }
522       for (i = 0; i < np; i++)
523          fprintf(fout, " %8.6f", x[i]);
524       FPN(fout); fflush(fout);
525       if (inbasemlg) matout(finbasemlg, x, 1, np);
526 
527       /*
528       for(i=0;i<np;i++) fprintf(frst1,"\t%.6f", x[i]);
529       fprintf(frst1,"\t%.4f", -lnL);
530       */
531       if (com.getSE) {
532          puts("Calculating SE's");
533          if (com.sspace < np*(np + 1) * sizeof(double)) {
534             com.sspace = np*(np + 1) * sizeof(double);
535             if ((com.space = (double*)realloc(com.space, com.sspace)) == NULL)
536                error2("oom space for SE");
537          }
538 
539          g = com.space;
540          H = g + com.np;
541          HessianSKT2004(x, lnL, g, H);
542          if (com.getSE >= 2 && com.clock == 0 && nodes[tree.root].nson == 3) {  /* g & H */
543             fprintf(frst2, "\n %d\n\n", com.ns);
544             OutTreeN(frst2, 1, 1);  fprintf(frst2, "\n\n");
545             for (i = 0; i < com.ntime; i++)
546                if (x[i] > 0.0004 && fabs(g[i]) < 0.005) g[i] = 0;
547             for (i = 0; i < com.ntime; i++) fprintf(frst2, " %9.6f", x[i]);  fprintf(frst2, "\n\n");
548             for (i = 0; i < com.ntime; i++) fprintf(frst2, " %9.6f", g[i]);  fprintf(frst2, "\n\n");
549             fprintf(frst2, "\nHessian\n\n");
550             for (i = 0; i < com.ntime; i++, FPN(frst2))
551                for (j = 0; j < com.ntime; j++)
552                   fprintf(frst2, " %10.4g", H[i*np + j]);
553             fflush(frst2);
554          }
555          for (i = 0; i < np*np; i++)  H[i] *= -1;
556          matinv(H, np, np, H + np*np);
557          fprintf(fout, "SEs for parameters:\n");
558          for (i = 0; i < np; i++)
559             fprintf(fout, " %8.6f", (H[i*np + i] > 0. ? sqrt(H[i*np + i]) : -1));
560          FPN(fout);
561       }
562 
563       /* if(com.clock) SetBranch(x); */
564       /* GroupDistances(); */
565 
566       if (com.nbtype > 1 && com.clock>1)
567          fprintf(fout, "\nWarning: branch rates are not yet applied in tree length and branch lengths");
568       if (AbsoluteRate) {
569          fprintf(fout, "\nNote: mutation rate is not applied to tree length.  Tree has ages, for TreeView & FigTree\n");
570          for (i = 0; i < tree.nnode; i++)
571             nodes[i].branch *= com.TipDate_TimeUnit;
572       }
573 
574       if (!AbsoluteRate) {
575          for (i = 0, tl = 0; i < tree.nnode; i++)
576             if (i != tree.root) tl += nodes[i].branch;
577          fprintf(fout, "\ntree length = %9.5f%s\n", tl, (com.ngene > 1 ? " (1st gene)" : ""));
578       }
579       FPN(fout); OutTreeN(fout, 1, 0); FPN(fout);
580       FPN(fout); OutTreeN(fout, 1, 1); FPN(fout);
581       if (com.clock) {
582          FPN(fout); OutTreeN(fout, 1, PrNodeNum); FPN(fout);
583       }
584 
585       if (com.TipDate) {  /* scale back the ages in nodes[].branch */
586          for (i = 0; i < tree.nnode; i++)  nodes[i].branch /= com.TipDate_TimeUnit;
587       }
588       if (com.np - com.ntime || com.clock)
589          DetailOutput(fout, x, H);
590       if (status) {
591          printf("convergence?\n");  fprintf(fout, "check convergence..\n");
592       }
593       if (itree == 0)
594          for (i = 0; i < np - com.ntime; i++) xcom[i] = x[com.ntime + i];
595       else if (!j)
596          for (i = 0; i < np - com.ntime; i++) xcom[i] = xcom[i] * .8 + x[com.ntime + i] * 0.2;
597       /*
598             TestModel(fout,x,1,com.space);
599             fprintf(fout,"\n\n# of lfun calls:%10d\n", NFunCall);
600       */
601 
602       if (com.coding && com.Mgene != 1 && !AbsoluteRate) {
603          fputs("\nTree with branch lengths for codon models:\n", fout);
604          FOR(i, tree.nnode) nodes[i].branch *= (1 + com.rgene[1] + com.rgene[2]);
605          FPN(fout); OutTreeN(fout, 1, 1); FPN(fout);
606          FOR(i, tree.nnode) nodes[i].branch /= (1 + com.rgene[1] + com.rgene[2]);
607       }
608 
609       com.print -= 9;  com.plfun(x, np);  com.print += 9;
610       if (com.print) {
611          if (com.plfun != lfun)
612             lfunRates(frate, x, np);
613 
614          /** think about more-general models.  Check that this may not be working for the clock models clock>=2 **/
615          if ((com.nhomo <= 2 && com.nparK == 0 && com.model <= REV && com.rho == 0)
616             || (com.nhomo > 2 && com.alpha == 0 && com.Mgene == 0))
617             AncestralSeqs(frst, x);
618       }
619    }         /* for (itree) */
620 
621    if (ntree > 1) {
622       fprintf(frst1, "\t%d\t", btree + 1);
623       PrintBestTree(frst1, ftree, btree);
624    }
625    if (finbasemlg) fclose(finbasemlg);   if (frate) fclose(frate);
626    fclose(ftree);
627 
628    if (ntree == -1)  ntree = itree;
629    if (ntree > 1) { rewind(flnf);  rell(flnf, fout, ntree); }
630    fclose(flnf);
631 
632    return(0);
633 }
634 
635 
PrintBestTree(FILE * fout,FILE * ftree,int btree)636 void PrintBestTree(FILE *fout, FILE *ftree, int btree)
637 {
638    int itree, ntree, i;
639 
640    rewind(ftree);
641    GetTreeFileType(ftree, &ntree, &i, 0);
642    for (itree = 0; ntree == -1 || itree < ntree; itree++) {
643       if (ReadTreeN(ftree, &i, 0, 1)) {
644          puts("\nend of tree file."); break;
645       }
646       if (itree == btree)
647          OutTreeN(fout, 1, 0);
648    }
649 }
650 
651 
TransformxBack(double x[])652 int TransformxBack(double x[])
653 {
654    /* transform variables x[] back to their original definition after iteration,
655       for output and for calculating SEs.
656    */
657    int i, k, K = com.ncatG;
658 
659    k = com.ntime + com.nrgene + com.nrate;
660    for (i = 0; i < com.npi; i++)
661       f_and_x(x + k + 3 * i, x + k + 3 * i, 4, 0, 0);
662 
663    k += com.npi * 3 + K - 1;        /* K-1 for rK */
664    if (com.nparK == 2)          /* rK & fK */
665       f_and_x(x + k, x + k, K, 0, 0);
666    else if (com.nparK == 3)     /* rK & MK (double stochastic matrix) */
667       for (i = 0; i < K - 1; k += K - 1, i++)  f_and_x(x + k, x + k, K, 0, 0);
668    else if (com.nparK == 4)     /* rK & MK */
669       for (i = 0; i < K; k += K - 1, i++)  f_and_x(x + k, x + k, K, 0, 0);
670    return(0);
671 }
672 
673 
DetailOutput(FILE * fout,double x[],double var[])674 void DetailOutput(FILE *fout, double x[], double var[])
675 {
676    int n = com.ncode, i, j, k = com.ntime, nkappa[] = { 0, 1, 0, 1, 1, 1, 2, 5, 11 };
677    int n31pi = (com.model == T92 ? 1 : 3);
678    double Qfactor, *p = com.pi, t = 0, k1, k2, S, V, Y = p[0] + p[1], R = p[2] + p[3];
679    int inode, a;
680    double *Qrate = x + com.ntime + com.nrgene, *AncientFreqs = NULL;
681    double EXij[NCODE*NCODE] = { 0 }, *Q = PMat, p0[NCODE], c[NCODE], ya;
682 
683    fprintf(fout, "\nDetailed output identifying parameters\n");
684    if (com.clock) OutputTimesRates(fout, x, var);
685    k = com.ntime;
686 
687    if (com.nrgene) { /* this used to be:  if (com.nrgene && !com.clock) */
688       fprintf(fout, "\nrates for %d genes:%6.0f", com.ngene, 1.);
689       for (i = 0; i < com.nrgene; i++)
690          fprintf(fout, " %8.5f", x[k++]);
691       FPN(fout);
692    }
693 
694    if (com.nhomo == 1) {
695       if (com.nrate) fprintf(fout, "rate (kappa or abcde) under %s:", models[com.model]);
696       for (i = 0; i < com.nrate; i++) fprintf(fout, " %8.5f", x[k++]);  FPN(fout);
697       fprintf(fout, "Base frequencies:\n");
698       for (j = 0; j < n; j++) fprintf(fout, " %8.5f", com.pi[j]);
699       k += n31pi;
700    }
701    else if (com.nhomo >= 3) {
702       if (com.model >= F84 && com.model <= REV )
703          fprintf(fout, "kappa or abcde or Qrates under %s:", models[com.model]);
704       for (i = 0; i < com.nrate; i++) {
705          if (i == 0 || i % nkappa[com.model] == 0) fprintf(fout, "\n");
706          fprintf(fout, " %8.5f", x[k++]);
707       }
708       SetParameters(x);
709       if (com.alpha == 0) {
710          if ((AncientFreqs = (double*)malloc(tree.nnode * 4 * sizeof(double))) == NULL)
711             error2("out of memory for OldDistributions()");
712          OldDistributions(tree.root, AncientFreqs);
713       }
714       fputs("\n\n(frequency parameters for branches)  [frequencies at nodes] (see Yang & Roberts 1995 fig 1)\n", fout);
715       for (i = 0; i < tree.nnode; i++, FPN(fout)) {
716          fprintf(fout, "Node #%2d  (", i + 1);
717          for (j = 0; j < 4; j++) fprintf(fout, " %8.6f ", nodes[i].pi[j]);
718          fprintf(fout, ")");
719          if (com.alpha == 0) {
720             fprintf(fout, "  [");
721             for (j = 0; j < 4; j++) fprintf(fout, " %8.6f", AncientFreqs[i * 4 + j]);
722             fprintf(fout, " GC = %5.3f ]", AncientFreqs[i * 4 + 1] + AncientFreqs[i * 4 + 3]);
723          }
724       }
725       fprintf(fout, "\nNote: node %d is root.\n", tree.root + 1);
726       k += com.npi*n31pi;
727 
728       /* print expected numbers of changes along branches */
729       if (com.alpha == 0) {
730          fprintf(fout, "\n\nExpected numbers of nucleotide changes on branches\n\n");
731          for (inode = 0; inode < tree.nnode; inode++, FPN(fout)) {
732             if (inode == tree.root) continue;
733             t = nodes[inode].branch;
734             xtoy(AncientFreqs + nodes[inode].father*n, p0, n);
735 
736             if (com.nhomo > 2 && com.model <= TN93) {
737                eigenTN93(com.model, *nodes[inode].pkappa, *(nodes[inode].pkappa + 1), nodes[inode].pi, &nR, Root, Cijk);
738                QTN93(com.model, Q, *nodes[inode].pkappa, *(nodes[inode].pkappa + 1), nodes[inode].pi);
739             }
740             else if (com.nhomo > 2 && com.model == REV)
741                eigenQREVbase(NULL, Q, nodes[inode].pkappa, nodes[inode].pi, &nR, Root, Cijk);
742 
743             for (i = 0; i < n; i++)  { /* calculate correction vector c[4] */
744                c[i] = 0;   Q[i*n + i] = 0;
745             }
746 
747             for (i = 0; i < n; i++) {  /* calculate correction vector c[4] */
748                for (a = 1; a < n; a++) {
749                   ya = -(1 - exp(Root[a] * t)) / Root[a];
750                   for (k = 0; k < n; k++)
751                      c[i] += p0[k] * Cijk[k*n*n + i*n + a] * ya;
752                }
753                for (j = 0; j < n; j++)
754                   EXij[i*n + j] = Q[i*n + j] * (nodes[inode].pi[i] * t + c[i]);
755             }
756             fprintf(fout, "Node #%2d  ", inode + 1);
757             if (inode < com.ns) fprintf(fout, "%-10s  ", com.spname[inode]);
758             fprintf(fout, "(blength = %9.6f, %9.6f)", t, sum(EXij, n*n));
759             fprintf(fout, "\n%s:     ", (com.model <= TN93 ? "kappa" : "abcde"));
760             for (j = 0; j < nkappa[com.model]; j++)  fprintf(fout, " %9.6f ", *(nodes[inode].pkappa + j));
761             fprintf(fout, "\npi source: "); for (j = 0; j < n; j++) fprintf(fout, " %9.6f ", p0[j]);
762             fprintf(fout, "\npi model : "); for (j = 0; j < n; j++) fprintf(fout, " %9.6f ", nodes[inode].pi[j]);
763             fprintf(fout, "\nExij\n");
764             for (i = 0; i < n; i++, FPN(fout)) {
765                for (j = 0; j < n; j++)  fprintf(fout, " %9.6f", EXij[i*n + j]);
766                fprintf(fout, "  ");
767                for (j = 0; j < n; j++)  fprintf(fout, " %8.1f", EXij[i*n + j] * com.ls);
768             }
769             /* matout(fout, c, 1, n);  */
770          }
771       }
772 
773       if (com.alpha == 0) free(AncientFreqs);
774    }
775    else if (!com.fix_kappa) {
776       fprintf(fout, "\nParameters %s in the rate matrix", (com.model <= TN93 ? "(kappa)" : ""));
777       fprintf(fout, " (%s) (Yang 1994 J Mol Evol 39:105-111):\n", models[com.model]);
778 
779       if (com.nhomo == 2) {
780          fprintf(fout, "\nbranch         t    kappa      TS     TV\n");
781          for (i = 0; i < tree.nbranch; i++) {
782             if (com.model == F84) { k1 = 1 + x[k + i] / R;  k2 = 1 + x[k + i] / R; }
783             else                   k1 = k2 = x[k + i];
784             S = 2 * p[0] * p[1] * k1 + 2 * p[2] * p[3] * k2;
785             V = 2 * Y*R;
786             Qfactor = 1 / (S + V);
787             /* t=(com.clock ? nodes[tree.branches[i][1]].branch : x[i]); */
788             t = nodes[tree.branches[i][1]].branch;
789             fprintf(fout, "%2d..%-2d %9.5f %8.5f %9.5f %8.5f\n",
790                tree.branches[i][0] + 1, tree.branches[i][1] + 1, t, x[k + i],
791                t*S / (S + V), t*V / (S + V));
792          }
793       }
794       /* Mgene = 0:rates, 1:separate; 2:diff pi, 3:diff kapa, 4:all diff*/
795       else if (com.Mgene >= 2) {
796          for (i = 0; i < com.ngene; i++) {
797             fprintf(fout, "\nGene #%d: ", i + 1);
798             p = (com.Mgene == 3 ? com.pi : com.piG[i]);
799             Qrate = (com.Mgene == 2 ? x + k : x + k + i*nkappa[com.model]);
800             if (com.model <= TN93)
801                FOR(j, nkappa[com.model]) fprintf(fout, " %8.5f", Qrate[j]);
802             else if (com.model == REV || com.model == REVu)
803                /* output Q matrix, no eigen calculation */
804                eigenQREVbase(fout, Q, Qrate, p, &nR, Root, Cijk);
805          }
806          if (com.Mgene >= 3) k += com.ngene*nkappa[com.model];
807          else              k += nkappa[com.model];
808       }
809       else {
810          if (com.model < REV) FOR(i, com.nrate) fprintf(fout, " %8.5f", x[k++]);
811          else k += com.nrate;
812       }
813       FPN(fout);
814    }
815 
816    if (com.Mgene < 2) {
817       if ((com.model == REV || com.model == REVu) && com.nhomo <= 2) /* output Q, no eigen calculation */
818          eigenQREVbase(fout, Q, Qrate, com.pi, &nR, Root, Cijk);
819       else if (com.model == UNREST || com.model == UNRESTu)
820          QUNREST(fout, PMat, x + com.ntime + com.nrgene, com.pi);
821    }
822 
823    for (j = 0; j < com.nalpha; j++) {
824       if (!com.fix_alpha)
825          fprintf(fout, "\nalpha (gamma, K=%d) = %8.5f", com.ncatG, (com.alpha = x[k++]));
826       if (com.nalpha > 1)
827          DiscreteGamma(com.freqK, com.rK, com.alpha, com.alpha, com.ncatG, DGammaUseMedian);
828       fprintf(fout, "\nrate: "); FOR(i, com.ncatG) fprintf(fout, " %8.5f", com.rK[i]);
829       fprintf(fout, "\nfreq: "); FOR(i, com.ncatG) fprintf(fout, " %8.5f", com.freqK[i]);
830       FPN(fout);
831    }
832    if (!com.fix_rho) {
833       fprintf(fout, "rho for the auto-discrete-gamma model: %9.5f", x[k]);
834       FPN(fout);
835    }
836    if (com.nparK >= 1 && com.nalpha <= 1) {
837       fprintf(fout, "\nrate:");  for (i = 0; i<com.ncatG; i++) fprintf(fout, " %8.5f", com.rK[i]);
838       fprintf(fout, "\nfreq:");  for (i = 0; i<com.ncatG; i++) fprintf(fout, " %8.5f", com.freqK[i]);
839       FPN(fout);
840    }
841    if (com.rho || (com.nparK >= 3 && com.nalpha <= 1)) {
842       fprintf(fout, "transition probabilities between rate categories:\n");
843       for (i = 0; i < com.ncatG; i++, FPN(fout))
844          for(j=0; j<com.ncatG; j++)
845             fprintf(fout, " %8.5f", com.MK[i*com.ncatG + j]);
846    }
847    FPN(fout);
848 }
849 
GetStepMatrix(char * line)850 int GetStepMatrix(char*line)
851 {
852    /* read user definitions of the REV and UNREST models
853       StepMatrix[4*4]:
854          -1 at diagonals, 0 for default rate, and positive values for rates
855    */
856    char *p, *errstr = "StepMatrix specification in the control file";
857    int n = com.ncode, i, j, k, b1 = -1, b2;
858 
859    p = strchr(line, '[');
860    if (p == NULL) error2("model specification.  Expecting '['.");
861    sscanf(++p, "%d", &com.nrate);
862    if (com.nrate < 0 || (com.model == REVu && com.nrate > 5) || (com.model == UNRESTu && com.nrate > 11))
863       error2(errstr);
864    for (i = 0; i < n; i++) for (j = 0; j < n; j++)
865       StepMatrix[i*n + j] = (i == j ? -1 : 0);
866    for (i = 0; i < com.nrate; i++) {
867       while (*p && *p != '(') p++;
868       if (*p++ != '(') error2("expecting (");
869       for (k = 0; k < 12; k++) {
870          while (isspace(*p)) p++;
871          if (*p == ')') break;
872          b1 = CodeChara(*p++, 0);
873          b2 = CodeChara(*p++, 0);
874          if (b1 < 0 || b1>3 || b2 < 0 || b2>3) error2("bases out of range.");
875          if (b1 == b2 || StepMatrix[b1*n + b2] > 0) {
876             printf("pair %c%c already specified.\n", BASEs[b1], BASEs[b2]);
877          }
878          if (com.model == REVu) StepMatrix[b1*n + b2] = StepMatrix[b2*n + b1] = i + 1;
879          else                StepMatrix[b1*n + b2] = i + 1;
880       }
881       printf("rate %d: %d pairs\n", i + 1, k);
882    }
883    for (i = 0; i < n; i++, FPN(F0))  for (j = 0; j < n; j++)
884       printf("%3d", StepMatrix[i*n + j]);
885 
886    return(0);
887 }
888 
GetOptions(char * ctlf)889 int GetOptions(char *ctlf)
890 {
891    int iopt, i, j, nopt = 31, lline = 4096;
892    char line[4096], *pline, opt[32], *comment = "*#";
893    char *optstr[] = { "seqfile","outfile","treefile","noisy", "cleandata",
894         "verbose","runmode", "method", "clock", "TipDate", "fix_rgene","Mgene", "nhomo",
895         "getSE","RateAncestor", "model","fix_kappa","kappa",
896         "fix_alpha","alpha","Malpha","ncatG", "fix_rho","rho",
897         "nparK", "ndata", "bootstrap", "Small_Diff","icode", "fix_blength", "seqtype" };
898    double t;
899    FILE *fctl;
900    int ng = -1, markgenes[NGENE];
901 
902    com.nalpha = 0;
903    fctl = gfopen(ctlf, "r");
904    if (noisy) printf("Reading options from %s..\n", ctlf);
905    for (; ; ) {
906       if (fgets(line, lline, fctl) == NULL) break;
907       for (i = 0, t = 0, pline = line; i < lline&&line[i]; i++)
908          if (isalnum(line[i])) { t = 1; break; }
909          else if (strchr(comment, line[i])) break;
910          if (t == 0) continue;
911          sscanf(line, "%s%*s%lf", opt, &t);
912          if ((pline = strstr(line, "=")) == NULL)
913             error2("option file.");
914 
915          for (iopt = 0; iopt < nopt; iopt++) {
916             if (strncmp(opt, optstr[iopt], 8) == 0) {
917                if (noisy >= 9)
918                   printf("\n%3d %15s | %-20s %6.2f", iopt + 1, optstr[iopt], opt, t);
919                switch (iopt) {
920                case (0): sscanf(pline + 1, "%s", com.seqf);    break;
921                case (1): sscanf(pline + 1, "%s", com.outf);    break;
922                case (2): sscanf(pline + 1, "%s", com.treef);    break;
923                case (3): noisy = (int)t;           break;
924                case (4): com.cleandata = (char)t;  break;
925                case (5): com.verbose = (int)t;     break;
926                case (6): com.runmode = (int)t;     break;
927                case (7): com.method = (int)t;      break;
928                case (8): com.clock = (int)t;       break;
929                case (9):
930                   sscanf(pline + 1, "%lf%lf", &com.TipDate, &com.TipDate_TimeUnit);
931                   break;
932                case (10): com.fix_rgene = (int)t;   break;
933                case (11): com.Mgene = (int)t;       break;
934                case (12): com.nhomo = (int)t;       break;
935                case (13): com.getSE = (int)t;       break;
936                case (14): com.print = (int)t;       break;
937                case (15): com.model = (int)t;
938                   if (com.model > UNREST) GetStepMatrix(line);  break;
939                case (16): com.fix_kappa = (int)t;   break;
940                case (17):
941                   com.kappa = t;
942                   if (com.fix_kappa && (com.clock == 5 || com.clock == 6)
943                      && com.model != 0 && com.model != 2) {
944                      ng = splitline(++pline, min2(ng, com.ndata), markgenes);
945                      for (j = 0; j < min2(ng, com.ndata); j++)
946                         if (!sscanf(pline + markgenes[j], "%lf", &data.kappa[j])) break;
947                      /*
948                      matout(F0, data.kappa, 1, min2(ng,com.ndata));
949                      */
950                   }
951                   break;
952                case (18): com.fix_alpha = (int)t;   break;
953                case (19):
954                   com.alpha = t;
955                   if (com.fix_alpha && t && (com.clock == 5 || com.clock == 6)) {
956                      ng = splitline(++pline, min2(ng, com.ndata), markgenes);
957                      for (j = 0; j < min2(ng, com.ndata); j++)
958                         if (!sscanf(pline + markgenes[j], "%lf", &data.alpha[j])) break;
959                      /*
960                      matout(F0, data.alpha, 1, min2(ng,com.ndata));
961                      */
962                   }
963                   break;
964                case (20): com.nalpha = (int)t;      break;
965                case (21): com.ncatG = (int)t;       break;
966                case (22): com.fix_rho = (int)t;     break;
967                case (23): com.rho = t;              break;
968                case (24): com.nparK = (int)t;       break;
969                case (25): com.ndata = (int)t;       break;
970                case (26): com.bootstrap = (int)t;   break;
971                case (27): Small_Diff = t;           break;
972                case (28): com.icode = (int)t; com.coding = 1; break;
973                case (29): com.fix_blength = (int)t; break;
974                case (30):
975                   com.seqtype = (int)t;
976                   if (com.seqtype == 2)      com.ncode = 2;
977                   else if (com.seqtype == 5) com.ncode = 5;
978                   break;
979                }
980                break;
981             }
982          }
983          if (iopt == nopt)
984          {
985             printf("\noption %s in %s not recognised\n", opt, ctlf); exit(-1);
986          }
987    }
988    fclose(fctl);
989 
990    if ((com.fix_kappa || (com.fix_alpha&&com.alpha)) && (com.clock == 5 || com.clock == 6))
991       printf("Using parameters from the control file.");
992 
993 
994    if (com.seqtype != 0 && com.seqtype != 4 && com.seqtype != 5) error2("seqtype?");
995    if (com.fix_alpha == 1 && com.alpha == 0) {
996       if (!com.fix_rho || com.rho) error2("fix rho to 0 if alpha=0.");
997    }
998    if (com.nparK >= 1) {
999       com.fix_alpha = com.fix_rho = 1;
1000       if (com.alpha == 0) com.alpha = 0.5;  /* used to generate rK as initial values */
1001       if (com.nparK <= 2) com.rho = 0;
1002       else             com.rho = 0.4;
1003       if (com.nhomo >= 1)
1004          error2("nhomo & nparK");
1005    }
1006    if (com.model != F84 && com.kappa <= 0)  error2("init kappa..");
1007    if (!com.fix_alpha && com.alpha <= 0)  error2("init alpha..");
1008    if (!com.fix_rho && com.rho == 0) { com.rho = 0.001;  puts("init rho reset"); }
1009 
1010    if (com.alpha)
1011    {
1012       if (com.ncatG<2 || com.ncatG>NCATG) error2("ncatG");
1013    }
1014    else if (com.ncatG > 1) com.ncatG = 1;
1015 
1016    if (com.method && (com.clock || com.rho))
1017    {
1018       com.method = 0; puts("Iteration method reset: method = 0");
1019    }
1020    if (com.method && (com.model == UNREST || com.model == UNRESTu))
1021       error2("I did not implemented method = 1 for UNREST.  Use method = 0");
1022 
1023    if (com.model > UNREST && com.Mgene > 2)
1024       error2("u models don't work with Mgene");
1025 
1026    if (com.nhomo > 5)
1027       error2("nhomo");
1028    else if (com.nhomo == 2) {
1029       if (com.model != K80 && com.model != F84 && com.model != HKY85)
1030          error2("nhomo = 2 works with F84 or HKY85");
1031    }
1032    else if (com.nhomo > 2 && !(com.model >= F84 && com.model <= REV)) error2("nhomo & model");
1033    else if (com.nhomo > 2 && com.method) error2("nhomo & method.");
1034    else {
1035       if (com.nhomo == 1 && !(com.model >= F81 && com.model <= REV) && com.model != REVu)
1036          error2("nhomo=1 and model");
1037    }
1038 
1039    if (com.nhomo >= 2 && com.clock == 2) error2("clock=2 & nhomo imcompatible");
1040 
1041    if (com.clock >= 5 && com.model >= TN93) error2("model & clock imcompatible");
1042 
1043    if (com.nhomo > 1 && com.runmode > 0)  error2("nhomo incompatible with runmode");
1044    if (com.runmode == -2) error2("runmode = -2 not implemented in baseml.");
1045    if (com.clock && com.runmode > 2)  error2("runmode & clock?");
1046    if (com.runmode)  com.fix_blength = 0;
1047    if (com.runmode == 3 && (com.npi || com.nparK))
1048       error2("runmode incompatible with nparK or nhomo.");
1049 
1050    if (com.model == JC69 || com.model == K80 || com.model == UNREST)
1051       if (com.nhomo != 2)  com.nhomo = 0;
1052    if (com.model == JC69 || com.model == F81) { com.fix_kappa = 1; com.kappa = 1; }
1053    if ((com.model == TN93 || com.model == REV || com.model == REVu) && com.nhomo <= 2)
1054       com.fix_kappa = 0;
1055    if (com.seqtype == 5 && com.model != REVu)
1056       error2("RNA-editing model requires REVu");
1057 
1058    if (com.nparK == 3) {
1059       puts("\n\nnparK==3, double stochastic, may not work.  Use nparK=4?\n");
1060       getchar();
1061    }
1062    com.fix_omega = 1; com.omega = 0;
1063    if (com.ndata <= 0) com.ndata = 1;
1064    if (com.bootstrap && com.ndata != 1) error2("ndata=1 for bootstrap.");
1065 
1066    return (0);
1067 }
1068 
1069 
GetInitials(double x[],int * fromfile)1070 int GetInitials(double x[], int *fromfile)
1071 {
1072    /* This calculates com.np, com.ntime, com.npi, com.nrate etc., and gets
1073       initial values for ML iteration.  It also calculates some of the
1074       statistics (such as eigen values and vectors) that won't change during
1075       the likelihood iteration.  However, think about whether this is worthwhile.
1076       The readfile option defeats this effort right now as the x[] is read in
1077       after such calculations.  Some of the effort is duplicated in
1078       SetParameters().  Needs more careful thinking.
1079    */
1080    int i, j, k, K = com.ncatG, n31pi = (com.model == T92 ? 1 : 3);
1081    int nkappa[] = { 0, 1, 0, 1, 1, 1, 2, 5, 11, -1, -1 }, nkappasets;
1082    size_t sconP_new = (size_t)(tree.nnode - com.ns)*com.ncode*com.npatt*com.ncatG * sizeof(double);
1083    double t = -1;
1084 
1085    NFunCall = NPMatUVRoot = NEigenQ = 0;
1086    if (com.clock == ClockCombined && com.ngene <= 1)
1087       error2("Combined clock model requires mutliple genes.");
1088    GetInitialsTimes(x);
1089 
1090    com.plfun = lfunAdG;
1091    if (com.alpha == 0 && com.nparK == 0)
1092       com.plfun = lfun;
1093    else if ((com.alpha && com.rho == 0) || com.nparK == 1 || com.nparK == 2)
1094       com.plfun = lfundG;
1095 
1096    if (com.clock && com.fix_blength == -1)
1097       com.fix_blength = 0;
1098 
1099    if (com.method && com.fix_blength != 2 && com.plfun == lfundG) {
1100       com.conPSiteClass = 1;
1101       if (com.sconP < sconP_new) {
1102          com.sconP = sconP_new;
1103          printf("\n%9zu bytes for conP, adjusted\n", com.sconP);
1104          if ((com.conP = (double*)realloc(com.conP, com.sconP)) == NULL)
1105             error2("oom conP");
1106       }
1107    }
1108    InitializeNodeScale();
1109 
1110    com.nrgene = (!com.fix_rgene)*(com.ngene - 1);
1111    for (j = 0; j < com.nrgene; j++) x[com.ntime + j] = 1;
1112    if (com.fix_kappa && (com.Mgene == 3 || com.Mgene == 4)) error2("Mgene options");
1113 
1114    if (com.model <= UNREST) {
1115       com.nrate = 0;
1116       if (!com.fix_kappa) {
1117          com.nrate = nkappa[com.model];
1118          if (com.Mgene >= 3)
1119             com.nrate *= com.ngene;
1120       }
1121    }
1122    switch (com.nhomo) {
1123    case (0): com.npi = 0;           break;   /* given 1 pi */
1124    case (1): com.npi = 1;           break;   /* estimate 1 pi */
1125    case (2): com.npi = 0;  com.nrate = tree.nbranch;  break;  /* b kappa's */
1126    case (4):
1127       com.npi = tree.nnode;     /* nnode pi   */
1128       com.nrate = nkappa[com.model] * (com.fix_kappa ? 1 : tree.nbranch);
1129       break;
1130    case (3):   /* ns+2 pi: tips, internal & root */
1131    case (5):   /* pi: user-specified sets of pi for branches */
1132       if (com.nhomo == 3) {
1133          for (i = 0; i < tree.nnode; i++)
1134             nodes[i].label = (i < com.ns ? i : com.ns);
1135          com.npi = com.nbtype = com.ns + (tree.root >= com.ns);
1136          if (tree.root >= com.ns)
1137             nodes[tree.root].label = com.npi++;
1138       }
1139       else {  /* nbtype is for Qrates & npi for pi. */
1140          com.npi = com.nbtype;
1141          if (tree.root >= com.ns) {   /* root may have a new set of pi */
1142             j = (int)nodes[tree.root].label;
1143             if (j == com.nbtype)
1144                nodes[tree.root].label = com.npi++;
1145             else if (j <0 || j > com.nbtype)
1146                error2("nhomo = 5: label for root strange?");
1147          }
1148       }
1149 
1150       printf("%d sets of frequency parameters\n", com.npi);
1151       /* number of sets of Qrates */
1152       j = (com.fix_kappa == 0 ? tree.nbranch : (com.fix_kappa == 1 ? 1 : com.nbtype));
1153       com.nrate = nkappa[com.model] * j;
1154       break;
1155    }
1156 
1157    if (com.model <= TN93 && com.Mgene <= 1 && com.nhomo == 0)
1158       eigenTN93(com.model, com.kappa, com.kappa, com.pi, &nR, Root, Cijk);
1159    if (com.model == REV || com.model == UNREST)
1160       for (j = 0; j < (com.Mgene >= 3 ? com.ngene : 1); j++) {
1161          k = com.ntime + com.nrgene + j*(com.model == REV ? 5 : 11);
1162          for (i = 0; i < com.nrate; i++)
1163             x[k + i] = 0.4 + 0.2*rndu();
1164          if (com.model == UNREST)
1165             x[k] = x[k + 3] = x[k + 8] = 0.8 + 0.2*rndu();
1166          if (com.model == REV) {
1167             nkappasets = com.nrate / 5;
1168             for (j = 0; j < nkappasets; j++)
1169                x[k + j * 5] = 1 + 0.1*(j + 1); /*  0.8+0.2*rndu();  */
1170          }
1171       }
1172    else
1173       for (i = 0; i < com.nrate; i++)
1174          x[com.ntime + com.nrgene + i] = 0.02 + com.kappa*(.8 + 0.4*rndu());
1175 
1176    for (i = 0; i < com.npi*n31pi; i++)  /* initials for transformed pi's */
1177       x[com.ntime + com.nrgene + com.nrate + i] = rndu()*.2;
1178    com.np = k = com.ntime + com.nrgene + com.nrate + com.npi*n31pi;
1179 
1180    if (com.alpha || com.nparK) {
1181       for (i = 0; i < com.nalpha; i++)
1182          x[k++] = com.alpha = 0.01 + com.alpha*(.9 + 0.2*rndu());
1183 
1184       if (!com.fix_rho)   x[k++] = com.rho = 0.01 + com.rho*(.8 + 0.4*rndu());
1185 
1186       if (com.rho)
1187          AutodGamma(com.MK, com.freqK, com.rK, &t, com.alpha, com.rho, K);
1188       else
1189          DiscreteGamma(com.freqK, com.rK, com.alpha, com.alpha, K, DGammaUseMedian);
1190 
1191       if (com.nparK) {
1192          xtoy(com.rK, x + k, K - 1);
1193          k += K - 1;
1194       }
1195       switch (com.nparK) {
1196       case (2):                            /* rK & fK */
1197          /* zero(x+k, K-1); */
1198          for (i = 0; i < K - 1; i++) x[k + i] = 0.1*(0.5 - rndu());
1199          k += K - 1;
1200          break;
1201       case (3):                            /* rK & MK (double stochastic) */
1202          /* zero(x+k, (K-1)*(K-1));  */
1203          for (i = 0; i < (K - 1)*(K - 1); i++) x[k + i] = 0.1*(0.5 - rndu());
1204          k += (K - 1)*(K - 1);
1205          break;
1206       case (4):                            /* rK & MK */
1207          /* zero(x+k, K*(K-1)); */
1208          for (i = 0; i < K*(K - 1); i++) x[k + i] = 0.1*(0.5 - rndu());
1209          k += K*(K - 1);
1210          break;
1211       }
1212       com.np = k;
1213    }
1214 
1215    if (com.fix_blength == -1)
1216       for (i = 0; i < com.np; i++)  x[i] = (i < com.ntime ? .1 + rndu() : .5 + rndu());
1217 
1218    if (finitials) readx(x, fromfile);
1219    else    *fromfile = 0;
1220 
1221    return (0);
1222 }
1223 
1224 
1225 
1226 
SetParameters(double x[])1227 int SetParameters(double x[])
1228 {
1229    /* This sets parameters in com., nodes[], etc and is called before lfun() etc.
1230       Iinitialize U, V, Root etc, if necessary.
1231       For nhomo models (nhomo=1,3,4)
1232          x[] has frequencies if (LASTROUND==1) or exp(pi)/(1+SUM(exp[pi])) if otherwise
1233    */
1234    int i, j, k, K = com.ncatG, status = 0, n31pi = (com.model == T92 ? 1 : 3);
1235    int nkappa[] = { 0, 1, 0, 1, 1, 1, 2, 5, 11, -1, -1 };
1236    double k1 = com.kappa, k2 = com.kappa, t, space[NCATG*(NCATG + 1)];
1237 
1238    if (com.clock >= 5) return(0);
1239    if (com.fix_blength != 2) SetBranch(x);
1240 
1241    for (i = 0; i < com.nrgene; i++) com.rgene[i + 1] = x[com.ntime + i];
1242    if (com.clock && com.clock < 5 && AbsoluteRate)
1243       com.rgene[0] = x[0]; /* so that rgene are absolute rates */
1244 
1245    if (!com.fix_kappa && com.model <= TN93 && com.clock < 5) {
1246       com.kappa = k1 = k2 = x[com.ntime + com.nrgene];
1247       if (com.model == TN93) k2 = x[com.ntime + com.nrgene + 1];
1248    }
1249    if (com.nhomo == 1) {
1250       k = com.ntime + com.nrgene + com.nrate;
1251       if (com.model == T92) {  /* T=A, C=G */
1252          com.pi[0] = com.pi[2] = (1 - x[k]) / 2;  com.pi[1] = com.pi[3] = x[k] / 2;
1253       }
1254       else {
1255          if (!LASTROUND) f_and_x(x + k, com.pi, 4, 0, 0);
1256          else            xtoy(x + k, com.pi, 3);
1257          com.pi[3] = 1 - sum(com.pi, 3);
1258       }
1259       if (com.model <= TN93)
1260          eigenTN93(com.model, k1, k2, com.pi, &nR, Root, Cijk);
1261    }
1262    else if (com.nhomo == 2)
1263       for (i = 0, k = com.ntime + com.nrgene; i < tree.nbranch; i++)
1264          nodes[tree.branches[i][1]].pkappa = x + k + i;
1265 
1266    if (com.model <= TN93 && com.nhomo == 0 && com.Mgene <= 1)
1267       RootTN93(com.model, k1, k2, com.pi, &t, Root);
1268    else if (com.nhomo >= 3) {
1269       for (i = 0, k = com.ntime + com.nrgene; i < tree.nbranch; i++) {  /* Qrate for branches */
1270          if (com.fix_kappa == 0)       /* each branch has its Qrate */
1271             nodes[tree.branches[i][1]].pkappa = x + k + i*nkappa[com.model];
1272          else if (com.fix_kappa == 1)  /* same Qrate for all branches */
1273             nodes[tree.branches[i][1]].pkappa = x + k;
1274          else if (com.fix_kappa == 2)  /* sets of Qrate */
1275             nodes[tree.branches[i][1]].pkappa = x + k + (int)nodes[tree.branches[i][1]].label*nkappa[com.model];
1276       }
1277       k += com.nrate;
1278 
1279       for (i = 0; i < tree.nnode; i++) {          /* pi for nodes */
1280          j = (com.nhomo == 4 ? i : (int)nodes[i].label);
1281          if (com.model == T92) {
1282             nodes[i].pi[0] = nodes[i].pi[2] = (1 - x[k + j]) / 2;  /* TA */
1283             nodes[i].pi[1] = nodes[i].pi[3] = x[k + j] / 2;        /* CG */
1284          }
1285          else {
1286             if (!LASTROUND) f_and_x(x + k + j * 3, nodes[i].pi, 4, 0, 0);
1287             else            xtoy(x + k + j * 3, nodes[i].pi, 3);
1288             nodes[i].pi[3] = 1 - sum(nodes[i].pi, 3);
1289          }
1290       }
1291       xtoy(nodes[tree.root].pi, com.pi, 4);
1292    }
1293    else if ((com.model == REV || com.model == REVu) && com.Mgene <= 1)
1294       eigenQREVbase(NULL, PMat, x + com.ntime + com.nrgene, com.pi, &nR, Root, Cijk);
1295    /*
1296    else if ((com.model==UNREST || com.model==UNRESTu) && com.Mgene<=1)
1297       eigenQunrest (NULL, x+com.ntime+com.nrgene,com.pi,&nR,cRoot,cU,cV);
1298    */
1299 
1300    if (com.nparK == 0 && (com.alpha == 0 || com.fix_alpha*com.fix_rho == 1))
1301       return(status);
1302    if (com.nalpha > 1) return (status);
1303    k = com.ntime + com.nrate + com.nrgene + com.npi*n31pi;
1304    if (!com.fix_alpha) {
1305       com.alpha = x[k++];
1306       if (com.fix_rho)
1307          DiscreteGamma(com.freqK, com.rK, com.alpha, com.alpha, K, DGammaUseMedian);
1308    }
1309    if (!com.fix_rho) {
1310       com.rho = x[k++];
1311       AutodGamma(com.MK, com.freqK, com.rK, &t, com.alpha, com.rho, K);
1312    }
1313    if (com.nparK == 0) return(status);
1314 
1315    /* nparK models */
1316    xtoy(x + k, com.rK, K - 1);
1317 
1318    if (com.nparK == 2) {
1319       if (!LASTROUND)  f_and_x(x + k + K - 1, com.freqK, K, 0, 0);
1320       else             xtoy(x + k + K - 1, com.freqK, K - 1);
1321       com.freqK[K - 1] = 1 - sum(com.freqK, K - 1);
1322    }
1323    else if (com.nparK == 3) {   /* rK & MK (double stochastic matrix) */
1324       for (i = 0, k += K - 1; i < K - 1; k += K - 1, i++) {
1325          if (!LASTROUND) f_and_x(x + k, com.MK + i*K, K, 0, 0);
1326          else            xtoy(x + k, com.MK + i*K, K - 1);
1327          com.MK[i*K + K - 1] = 1 - sum(com.MK + i*K, K - 1);
1328       }
1329       FOR(j, K) {
1330          for (i = 0, com.MK[(K - 1)*K + j] = 1; i < K - 1; i++)
1331             com.MK[(K - 1)*K + j] -= com.MK[i*K + j];
1332          if (com.MK[(K - 1)*K + j] < 0)
1333             printf("SetPar: MK[K-1][j]=%.5f<0\n", com.MK[(K - 1)*K + j]);
1334       }
1335    }
1336    else if (com.nparK == 4) { /* rK & MK */
1337       for (i = 0, k += K - 1; i < K; k += K - 1, i++) {
1338          if (!LASTROUND) f_and_x(x + k, com.MK + i*K, K, 0, 0);
1339          else            xtoy(x + k, com.MK + i*K, K - 1);
1340          com.MK[i*K + K - 1] = 1 - sum(com.MK + i*K, K - 1);
1341       }
1342       PtoPi(com.MK, com.freqK, K, space);
1343    }
1344    com.rK[K - 1] = (1 - innerp(com.freqK, com.rK, K - 1)) / com.freqK[K - 1];
1345    return (status);
1346 }
1347 
1348 
SetPGene(int igene,int _pi,int _UVRoot,int _alpha,double x[])1349 int SetPGene(int igene, int _pi, int _UVRoot, int _alpha, double x[])
1350 {
1351    /* xcom[] does not contain time parameters
1352       Note that com.piG[][] have been homogeneized if (com.Mgene==3)
1353    */
1354    int nr[] = { 0, 1, 0, 1, 1, 1, 2, 5, 11 };
1355    int k = com.nrgene + (com.Mgene >= 3)*igene*nr[com.model];
1356    double *xcom = x + com.ntime;
1357    double ka1 = xcom[k], ka2 = (com.model == TN93 ? xcom[k + 1] : -1);
1358 
1359    if (com.Mgene == 2 && com.fix_kappa) ka1 = ka2 = com.kappa;
1360 
1361    if (_pi) {
1362       xtoy(com.piG[igene], com.pi, 4);
1363    }
1364    if (_UVRoot) {
1365       if (com.model == K80) com.kappa = ka1;
1366       else if (com.model <= TN93)
1367          eigenTN93(com.model, ka1, ka2, com.pi, &nR, Root, Cijk);
1368       else if (com.model == REV || com.model == REVu)
1369          eigenQREVbase(NULL, PMat, xcom + k, com.pi, &nR, Root, Cijk);
1370    }
1371    if (_alpha) {
1372       com.alpha = xcom[com.nrgene + com.nrate + com.npi + igene]; /* check?? */
1373       DiscreteGamma(com.freqK, com.rK, com.alpha, com.alpha, com.ncatG, DGammaUseMedian);
1374    }
1375    return(0);
1376 }
1377 
1378 
SetxBound(int np,double xb[][2])1379 int SetxBound(int np, double xb[][2])
1380 {
1381    /* sets lower and upper bounds for variables during iteration
1382    */
1383    int i, j, k = 0, nf = 0, n31pi = (com.model == T92 ? 1 : 3);
1384    double tb[] = { .4e-6, 9999 }, tb0 = .4e-6, rgeneb[] = { 1e-4, 999 }, rateb[] = { 1e-5, 999 };
1385    double alphab[] = { .005, 999 }, rhob[] = { -0.2, 0.99 }, pb[] = { .00001, .99999 };
1386    double fb[] = { -19,9 }; /* transformed freqs.*/
1387 
1388    SetxBoundTimes(xb);
1389    for (i = com.ntime; i < np; i++)
1390       for (j = 0; j < 2; j++)  xb[i][j] = rateb[j];
1391 
1392    for (i = 0; i < com.nrgene; i++)
1393       for (j = 0; j < 2; j++)  xb[com.ntime + i][j] = rgeneb[j];
1394    for (i = 0; i < com.nrate; i++)
1395       for (j = 0; j < 2; j++)  xb[com.ntime + com.nrgene + i][j] = rateb[j];
1396    k = com.ntime + com.nrgene + com.nrate;
1397    for (i = 0; i < com.npi*n31pi; i++) {
1398       xb[k][0] = (com.model == T92 ? pb[0] : fb[0]);
1399       xb[k++][1] = (com.model == T92 ? pb[1] : fb[1]);
1400    }
1401    for (i = 0; i < com.nalpha; i++, k++)  FOR(j, 2) xb[k][j] = alphab[j];
1402    if (!com.fix_rho)   FOR(j, 2) xb[np - 1][j] = rhob[j];
1403    if (com.nparK) {
1404       for (i = 0; i < com.ncatG - 1; i++) { xb[k][0] = rateb[0]; xb[k++][1] = rateb[1]; }
1405       if (com.nparK == 2) nf = com.ncatG - 1;
1406       else if (com.nparK == 3) nf = (com.ncatG - 1)*(com.ncatG - 1);
1407       else if (com.nparK == 4) nf = (com.ncatG - 1)*com.ncatG;
1408       for (i = 0; i < nf; i++) { xb[k][0] = fb[0]; xb[k++][1] = fb[1]; }
1409    }
1410    if (noisy > 2 && np < 50) {
1411       printf("\nBounds (np=%d):\n", np);
1412       for (i = 0; i < np; i++) printf(" %10.6f", xb[i][0]);  FPN(F0);
1413       for (i = 0; i < np; i++) printf(" %10.6f", xb[i][1]);  FPN(F0);
1414    }
1415    return(0);
1416 }
1417 
testx(double x[],int np)1418 int testx(double x[], int np)
1419 {
1420    /* This is used for LS branch length estimation by nls2, called only if(clock==0)
1421    */
1422    int i;
1423    double tb[] = { .4e-6, 99 };
1424 
1425    FOR(i, com.ntime)
1426       if (x[i]<tb[0] || x[i]>tb[1])
1427          return (-1);
1428    return (0);
1429 }
1430 
1431 
1432 
ConditionalPNode(int inode,int igene,double x[])1433 int ConditionalPNode(int inode, int igene, double x[])
1434 {
1435    int n = com.ncode, i, j, k, h, ison, pos0 = com.posG[igene], pos1 = com.posG[igene + 1];
1436    double t;
1437 
1438    for (i = 0; i < nodes[inode].nson; i++)
1439       if (nodes[nodes[inode].sons[i]].nson > 0 && !com.oldconP[nodes[inode].sons[i]])
1440          ConditionalPNode(nodes[inode].sons[i], igene, x);
1441    if (inode < com.ns) {  /* young ancestor */
1442       for (h = pos0*n; h < pos1*n; h++)
1443          nodes[inode].conP[h] = 0;
1444    }
1445    else
1446       for (h = pos0*n; h < pos1*n; h++)
1447          nodes[inode].conP[h] = 1;
1448    if (com.cleandata && inode < com.ns) /* young ancestor */
1449       for (h = pos0; h < pos1; h++)
1450          nodes[inode].conP[h*n + com.z[inode][h]] = 1;
1451 
1452    for (i = 0; i < nodes[inode].nson; i++) {
1453       ison = nodes[inode].sons[i];
1454       t = nodes[ison].branch*_rateSite;
1455 
1456       if (com.clock < 5) {
1457          if (com.clock)  t *= GetBranchRate(igene, (int)nodes[ison].label, x, NULL);
1458          else            t *= com.rgene[igene];
1459       }
1460       GetPMatBranch(PMat, x, t, ison);
1461 
1462       if (nodes[ison].nson < 1 && com.cleandata) {        /* tip && clean */
1463          for (h = pos0; h < pos1; h++)
1464             for (j = 0; j < n; j++)
1465                nodes[inode].conP[h*n + j] *= PMat[j*n + com.z[ison][h]];
1466       }
1467       else if (nodes[ison].nson < 1 && !com.cleandata) {  /* tip & unclean */
1468          for (h = pos0; h < pos1; h++)
1469             for (j = 0; j < n; j++) {
1470                for (k = 0, t = 0; k < nChara[com.z[ison][h]]; k++)
1471                   t += PMat[j*n + CharaMap[com.z[ison][h]][k]];
1472                nodes[inode].conP[h*n + j] *= t;
1473             }
1474       }
1475       else {
1476          for (h = pos0; h < pos1; h++)
1477             for (j = 0; j < n; j++) {
1478                for (k = 0, t = 0; k < n; k++)
1479                   t += PMat[j*n + k] * nodes[ison].conP[h*n + k];
1480                nodes[inode].conP[h*n + j] *= t;
1481             }
1482       }
1483    }        /*  for (ison)  */
1484    if (com.NnodeScale && com.nodeScale[inode])  NodeScale(inode, pos0, pos1);
1485    return (0);
1486 }
1487 
PMatCijk(double P[],double t)1488 int PMatCijk(double P[], double t)
1489 {
1490 /* P(t)ij = SUM Cijk * exp{Root*t}
1491 */
1492    int i, j, k, n = com.ncode, nr = nR;
1493    double exptm1[5] = {0};
1494 
1495    memset(P, 0, n*n * sizeof(double));
1496    for (k = 1; k < nr; k++) exptm1[k] = expm1(t*Root[k]);
1497    for (i = 0; i < n; i++) {
1498       for (j = 0; j < n; j++) {
1499          for (k = 0; k < nr; k++)
1500             P[i*n + j] += Cijk[i*n*nr + j*nr + k] * exptm1[k];
1501       }
1502       P[i*n + i] ++;
1503    }
1504    return (0);
1505 }
1506 
1507 
1508 
1509 #ifdef UNDEFINED
1510 
CollapsSite(FILE * fout,int nsep,int ns,int * ncat,int SiteCat[])1511 int CollapsSite(FILE *fout, int nsep, int ns, int *ncat, int SiteCat[])
1512 {
1513    int j, k, it, h, b[NS], ndiff, n1, bit1;
1514    /* n1: # of 1's   ...  bit1: the 1st nonzero bit */
1515 
1516    *ncat = 5 + (1 << (ns - 1)) - 1 + nsep * 11;
1517    if (fout) fprintf(fout, "\n# cat:%5d  # sep:%5d\n\n", *ncat, nsep);
1518 
1519    FOR(h, 1 << 2 * ns) {
1520       for (j = 0, it = h; j < ns; b[ns - 1 - j] = it % 4, it /= 4, j++);
1521       for (j = 1, ndiff = 0; j < ns; j++) {
1522          FOR(k, j) if (b[j] == b[k]) break;
1523          if (k == j) ndiff++;
1524       }
1525       switch (ndiff) {
1526       default: SiteCat[h] = 0;      break;
1527       case (0): SiteCat[h] = b[0] + 1; break;
1528       case (1):
1529          for (j = 1, it = 0, n1 = 0, bit1 = 0; j < ns; j++) {
1530             k = (b[j] != b[0]);
1531             it = it * 2 + k;
1532             n1 += k;
1533             if (bit1 == 0 && k) bit1 = ns - 1 - j;
1534          }
1535          it = 5 + it - 1;
1536          if (nsep == 0) { SiteCat[h] = it; break; }
1537 
1538          SiteCat[h] = it + min2(bit1 + 1, nsep) * 11;
1539          if (n1 == 1 && bit1 < nsep) {
1540             SiteCat[h] -= 11;
1541             SiteCat[h] += (b[0] * 4 + b[ns - 1 - bit1] - b[0] - (b[0] <= b[ns - 1 - bit1]));
1542          }
1543          break;
1544       }
1545       if (fout) {
1546          FOR(j, ns) fprintf(fout, "%1c", BASEs[b[j]]);
1547          fprintf(fout, "%5d    ", SiteCat[h]);
1548          if (h % 4 == 3) FPN(fout);
1549       }
1550    }
1551    return (0);
1552 }
1553 
GetPexpML(double x[],int ncat,int SiteCat[],double pexp[])1554 int GetPexpML(double x[], int ncat, int SiteCat[], double pexp[])
1555 {
1556    int  j, it, h, nodeb[NNODE];
1557    int  isum, nsum = 1 << (2 * (tree.nbranch - com.ns + 1));
1558    double fh, y, Pt[NBRANCH][16];
1559 
1560    if (com.ngene > 1 || com.nhomo || com.alpha || com.nparK || com.model > REV)
1561       error2("Pexp()");
1562    SetParameters(x);
1563    FOR(j, tree.nbranch)
1564       PMatCijk(Pt[j], nodes[tree.branches[j][1]].branch);
1565 
1566    for (h = 0; h < (1 << 2 * com.ns); h++) {
1567       if (SiteCat[h] == 0) continue;
1568       for (j = 0, it = h; j < com.ns; nodeb[com.ns - 1 - j] = it & 3, it >>= 2, j++);
1569       for (isum = 0, fh = 0; isum < nsum; isum++) {
1570          for (j = 0, it = isum; j < tree.nbranch - com.ns + 1; j++)
1571          {
1572             nodeb[com.ns + j] = it % 4; it /= 4;
1573          }
1574          for (j = 0, y = com.pi[nodeb[tree.root]]; j < tree.nbranch; j++)
1575             y *= Pt[j][nodeb[tree.branches[j][0]] * 4 + nodeb[tree.branches[j][1]]];
1576          fh += y;
1577       }
1578       pexp[SiteCat[h]] += fh;
1579    }
1580    pexp[0] = 1 - sum(pexp + 1, ncat - 1);
1581    return (0);
1582 }
1583 
1584 
TestModel(FILE * fout,double x[],int nsep,double space[])1585 int TestModel(FILE *fout, double x[], int nsep, double space[])
1586 {
1587    /* test of models, using com.
1588    */
1589    int j, h, it, ls = com.ls, ncat, *SiteCat;
1590    double *pexp = space, *nobs, lmax0, lnL0, X2, ef, de;
1591 
1592    SiteCat = (int*)malloc((1 << 2 * com.ns) * sizeof(int));
1593    if (SiteCat == NULL)  error2("oom");
1594    CollapsSite(F0, nsep, com.ns, &ncat, SiteCat);
1595    fprintf(fout, "\n\nAppr. test of model.. ncat%6d  nsep%6d\n", ncat, nsep);
1596 
1597    nobs = pexp + ncat;
1598    zero(pexp, 2 * ncat);
1599    /* nobs */
1600    FOR(h, com.npatt) {
1601       for (j = 0, it = 0; j < com.ns; j++) it = it * 4 + (com.z[j][h] - 1);
1602       nobs[SiteCat[it]] += com.fpatt[h];
1603    }
1604    GetPexpML(x, ncat, SiteCat, pexp);
1605 
1606    for (h = 0, lnL0 = 0, X2 = 0, lmax0 = -(double)ls*log((double)ls); h < ncat; h++) {
1607       if (nobs[h] > 1) {
1608          lmax0 += nobs[h] * log(nobs[h]);
1609          lnL0 += nobs[h] * log(pexp[h]);
1610       }
1611       ef = com.ls*pexp[h];
1612       de = square(nobs[h] - ef) / ef;
1613       X2 += de;
1614       fprintf(fout, "\nCat #%3d%9.0f%9.2f%9.2f", h, nobs[h], ef, de);
1615    }
1616    fprintf(fout, "\n\nlmax0:%12.4f  D2:%12.4f   X2:%12.4f\n",
1617       lmax0, 2 * (lmax0 - lnL0), X2);
1618    free(SiteCat);
1619 
1620    return (0);
1621 }
1622 
1623 
1624 #endif
1625 
1626 
OldDistributions(int inode,double AncientFreqs[])1627 int OldDistributions(int inode, double AncientFreqs[])
1628 {
1629    /* reconstruct nucleotide frequencies at and down inode for nonhomogeneous models com.nhomo==3 or 4.
1630       AncientFreqs[tree.nnode*4]
1631    */
1632    int i, n = 4;
1633    double kappa = com.kappa;
1634 
1635    if (com.alpha || com.model > REV) {
1636       puts("OldDistributions() does not run when alpha > 0 or model >= TN93");
1637       return(-1);
1638    }
1639    if (inode == tree.root)
1640       xtoy(nodes[inode].pi, AncientFreqs + inode*n, n);
1641    else {
1642       if (com.nhomo > 2 && com.model <= TN93)
1643          eigenTN93(com.model, *nodes[inode].pkappa, *(nodes[inode].pkappa + 1), nodes[inode].pi, &nR, Root, Cijk);
1644       else if (com.nhomo > 2 && com.model == REV)
1645          eigenQREVbase(NULL, PMat, nodes[inode].pkappa, nodes[inode].pi, &nR, Root, Cijk);
1646 
1647       PMatCijk(PMat, nodes[inode].branch);
1648       matby(AncientFreqs + nodes[inode].father*n, PMat, AncientFreqs + inode*n, 1, n, n);
1649    }
1650    for (i = 0; i < nodes[inode].nson; i++)
1651       OldDistributions(nodes[inode].sons[i], AncientFreqs);
1652    return (0);
1653 }
1654 
1655 
1656 
1657 
1658 
1659 /* problems and notes
1660 
1661    (1) AdG model: generation of com.MK[K*K] is not independent
1662        of com.alpha or DiscreteGamma().
1663 
1664 non-homogeneous process models:
1665   nhomo            fix_kappa         models
1666 
1667   0 (1 pi given)   0 (solve 1 kappa)   JC69, K80, F81, F84, HKY85,
1668                                        REV(5), UNREST(11)
1669                    1 (1 kappa given)   K80, F84, HKY85
1670 
1671   1 (solve 1 pi)   0 (as above)        F84, HKY85, REV(5)
1672                    1                   F81(0), F84, HKY85
1673 
1674   2 (b kappa's)    ?                   K80, F84, HKY85
1675 
1676   3 (ns+2 pi)      0 (solve 1 kappa)   F84 & HKY85
1677                    1 (nbranch kappa)   F84 & HKY85
1678 
1679   4 (nnode pi)     0,1  (as above)
1680 
1681 
1682 space-time process models:
1683   nparK     fix_alpha       fix_rho        parameters
1684   0         (0,1)          (0,1)           alpha & rho for AdG
1685 
1686   1         set to 1       set to 1        rK[]        (freqK=1/K) (K-1)
1687   2         set to 1       set to 1        rK[] & freqK[]          2(K-1)
1688   3         set to 1       set to 1        rK[] & MK[] (freqK=1/K) K(K-1)
1689   4         set to 1       set to 1        rK[] & MK[]             K*K-1
1690 
1691 
1692 Local clock models
1693    parameters under local clock (com.clock=2)
1694       com.ntime = (#ancestral nodes) - 1 + (#rates) - 1
1695    Parameters include (ns-1) node times t[] and rates for branches.
1696       x[0]: t0*r0
1697       x[1..(nid-1)]: ti/t0 ancestral node times expressed as ratios
1698       x[ns-1 .. ntime-1]: rates for branches
1699 
1700 */
1701