1 /* baseml.c
2 Maximum likelihood parameter estimation e aligned DNA (RNA) sequences,
3 combined with phylogenetic tree estimation.
4 Copyright, Ziheng YANG, July 1992 onwards
5
6 cc -o baseml -fast baseml.c tools.c -lm
7 cl -O2 baseml.c tools.c
8 baseml <ControlFileName>
9 */
10
11 #include "paml.h"
12
13 #define NS 7000
14 #define NBRANCH (NS*2-2)
15 #define NNODE (NS*2-1)
16 #define MAXNSONS 200
17 #define NGENE 500
18 #define LSPNAME 96
19 #define NCODE 5
20 #define NCATG 100
21
22 #define NP (NBRANCH+NGENE+11)
23 /*
24 #define NP (NBRANCH+3*NNODE+NGENE+11+NCATG*NCATG-1)
25 */
26 extern int noisy, NFunCall, NEigenQ, NPMatUVRoot, *ancestor, GeneticCode[][64];
27 extern double Small_Diff, *SeqDistance;
28
29 int Forestry(FILE *fout);
30 void DetailOutput(FILE *fout, double x[], double var[]);
31 int GetOptions(char *ctlf);
32 int GetInitials(double x[], int *fromfile);
33 int GetInitialsTimes(double x[]);
34 int SetxInitials(int np, double x[], double xb[][2]);
35 int SetParameters(double x[]);
36 int SetPGene(int igene, int _pi, int _UVRoot, int _alpha, double x[]);
37 int SetPSiteClass(int iclass, double x[]);
38 int testx(double x[], int np);
39 double GetBranchRate(int igene, int ibrate, double x[], int *ix);
40 int GetPMatBranch(double Pt[], double x[], double t, int inode);
41 int ConditionalPNode(int inode, int igene, double x[]);
42
43 int TransformxBack(double x[]);
44 int AdHocRateSmoothing(FILE*fout, double x[NS * 3], double xb[NS * 3][2], double space[]);
45 void DatingHeteroData(FILE* fout);
46
47 int TestModel(FILE *fout, double x[], int nsep, double space[]);
48 int OldDistributions(int inode, double AncientFreqs[]);
49 int SubData(int argc, char *argv[]);
50 int GroupDistances();
51
52
53 struct CommonInfo {
54 unsigned char *z[NS];
55 char *spname[NS], seqf[2048], outf[2048], treef[2048], cleandata;
56 char oldconP[NNODE]; /* update conP for nodes to save computation (0 yes; 1 no) */
57 int seqtype, ns, ls, ngene, posG[NGENE + 1], lgene[NGENE], *pose, npatt, readpattern;
58 int np, ntime, nrgene, nrate, nalpha, npi, nhomo, ncatG, ncode, Mgene;
59 size_t sspace, sconP;
60 int fix_kappa, fix_rgene, fix_alpha, fix_rho, nparK, fix_blength;
61 int clock, model, getSE, runmode, print, verbose, ndata, bootstrap;
62 int icode, coding, method;
63 int nbtype;
64 double *fpatt, kappa, alpha, rho, rgene[NGENE], pi[4], piG[NGENE][4], TipDate, TipDate_TimeUnit;
65 double freqK[NCATG], rK[NCATG], MK[NCATG*NCATG];
66 double *blengths0;
67 double(*plfun)(double x[], int np), *conP, *fhK, *space;
68 int conPSiteClass; /* is conP memory allocated for each class? */
69 int NnodeScale;
70 char *nodeScale; /* nScale[ns-1] for interior nodes */
71 double *nodeScaleF; /* nScaleF[npatt] for scale factors */
72 int fix_omega;
73 double omega;
74 } com;
75
76 struct TREEB {
77 int nbranch, nnode, root, branches[NBRANCH][2];
78 double lnL;
79 } tree;
80
81 struct TREEN {
82 int father, nson, sons[MAXNSONS], ibranch, ipop;
83 double branch, age, *pkappa, pi[4], *conP, label, label2;
84 char fossil, *name, *annotation;
85 } *nodes, **gnodes, nodes_t[2 * NS - 1];
86
87
88 /* for stree.nodes[].fossil: lower, upper, bounds, gamma, inverse-gamma */
89 enum { LOWER_F = 1, UPPER_F, BOUND_F } FOSSIL_FLAGS;
90 char *fossils[] = { " ", "L", "U", "B" };
91
92 struct SPECIESTREE {
93 int nbranch, nnode, root, nspecies, nfossil;
94 struct TREESPN {
95 char name[LSPNAME + 1], fossil, usefossil; /* fossil: 0, 1, 2, 3 */
96 int father, nson, sons[2];
97 double label, age, pfossil[7]; /* lower and upper bounds or alpha & beta */
98 double *lnrates; /* log rates for loci */
99 } nodes[2 * NS - 1];
100 } stree;
101 /* all trees are binary & rooted, with ancestors unknown. */
102
103 struct DATA { /* locus-specific data and tree information */
104 int ns[NGENE], ls[NGENE], npatt[NGENE], ngene, lgene[NGENE];
105 int root[NGENE + 1], BlengthMethod, fix_nu, nbrate[NGENE];
106 char *z[NGENE][NS], cleandata[NGENE];
107 double *fpatt[NGENE], lnpT, lnpR, lnpDi[NGENE];
108 double Qfactor[NGENE], pi[NGENE][NCODE], nu[NGENE];
109 double rgene[NGENE], kappa[NGENE], alpha[NGENE], omega[1];
110 int NnodeScale[NGENE];
111 char *nodeScale[NGENE]; /* nScale[data.ns[locus]-1] for interior nodes */
112 } data;
113
114 int nR = NCODE, LASTROUND, BayesEB;
115 double PMat[NCODE*NCODE], Cijk[NCODE*NCODE*NCODE], Root[NCODE];
116 int StepMatrix[NCODE*NCODE];
117
118
119 FILE *frub, *flnf, *frst, *frst1, *frst2 = NULL, *finitials = NULL;
120 char *ratef = "rates";
121 char *models[] = { "JC69","K80","F81","F84","HKY85","T92","TN93","REV","UNREST", "REVu","UNRESTu" };
122 enum { JC69, K80, F81, F84, HKY85, T92, TN93, REV, UNREST, REVu, UNRESTu } MODELS;
123 char *clockstr[] = { "", "Global clock", "Local clock", "ClockCombined" };
124 enum { GlobalClock = 1, LocalClock, ClockCombined } ClockModels;
125
126 double _rateSite = 1; /* rate for a site */
127 int N_PMatUVRoot = 0;
128
129
130 #define BASEML 1
131 #include "treesub.c"
132 #include "treespace.c"
133
main(int argc,char * argv[])134 int main (int argc, char *argv[])
135 {
136 FILE *fout, *fseq = NULL, *fpair[6];
137 char pairfs[1][32] = { "2base.t" };
138 char rstf[2048] = "rst", ctlf[2048] = "baseml.ctl", timestr[64];
139 char *Mgenestr[] = { "diff. rate", "separate data", "diff. rate & pi",
140 "diff. rate & kappa", "diff. rate & pi & kappa" };
141 int getdistance = 1, i, idata;
142 size_t s2 = 0;
143
144 if (argc > 2 && !strcmp(argv[argc - 1], "--stdout-no-buf"))
145 setvbuf(stdout, NULL, _IONBF, 0);
146
147 starttimer();
148 SetSeed(-1, 0);
149
150 com.ndata = 1;
151 com.cleandata = 0; noisy = 0; com.runmode = 0;
152
153 com.clock = 0;
154 com.fix_rgene = 0; /* 0: estimate rate factors for genes */
155 com.nhomo = 0;
156 com.getSE = 0; /* 1: want S.E.s of estimates; 0: don't want them */
157
158 com.seqtype = 0; com.model = 0;
159 com.fix_kappa = 0; com.kappa = 5;
160 com.fix_alpha = 1; com.alpha = 0.; com.ncatG = 4; /* alpha=0 := inf */
161 com.fix_rho = 1; com.rho = 0; com.nparK = 0;
162 com.ncode = 4; com.icode = 0;
163 com.print = 0; com.verbose = 1; com.fix_blength = 0;
164 com.method = 0; com.space = NULL;
165
166 printf("BASEML in %s\n", pamlVerStr);
167 frub = gfopen("rub", "w"); frst = gfopen(rstf, "w"); frst1 = gfopen("rst1", "w");
168
169 if (argc > 1) strncpy(ctlf, argv[1], 95);
170 GetOptions(ctlf);
171 if (com.runmode != -2) finitials = fopen("in.baseml", "r");
172 if (com.getSE == 2) frst2 = fopen("rst2", "w");
173
174 fprintf(frst, "Supplemental results for BASEML\n\nseqf: %s\ntreef: %s\n",
175 com.seqf, com.treef);
176 fout = gfopen(com.outf, "w");
177 fpair[0] = (FILE*)gfopen(pairfs[0], "w");
178
179 /* for stepwise addition, com.sspace should be calculated using com.ns. */
180 com.sspace = 1000000 * sizeof(double);
181 if ((com.space = (double*)malloc(com.sspace)) == NULL)
182 error2("oom space");
183
184 if (com.clock == 5 || com.clock == 6)
185 DatingHeteroData(fout);
186
187 if ((fseq = fopen(com.seqf, "r")) == NULL) {
188 printf("\n\nSequence file %s not found!\n", com.seqf);
189 exit(-1);
190 }
191
192 for (idata = 0; idata < com.ndata; idata++) {
193 if (com.ndata > 1) {
194 printf("\nData set %4d\n", idata + 1);
195 fprintf(fout, "\n\nData set %4d\n", idata + 1);
196
197 fprintf(frst1, "%4d", idata + 1);
198 }
199 if (idata) GetOptions(ctlf); /* com.cleandata is read from here again. */
200 ReadSeq((com.verbose ? fout : NULL), fseq, com.cleandata, 0);
201 if (com.ngene > 1 && (com.fix_blength == 2 || com.fix_blength == 3))
202 error2("fix_blength = 2 or 3 does not work for partitioned data or Mgene models");
203 if (com.fix_blength == 3) {
204 if ((com.blengths0 = (double*)malloc((com.ns * 2 - 2) * sizeof(double))) == NULL)
205 error2("oom blengths0");
206 }
207
208 SetMapAmbiguity(com.seqtype, 0);
209
210 /* AllPatterns(fout); */
211
212 if (com.rho && com.readpattern) error2("rho doesn't work with readpattern.");
213 if (com.ndata == 1) fclose(fseq);
214 i = (com.ns * 2 - 1) * sizeof(struct TREEN);
215 if ((nodes = (struct TREEN*)malloc(i)) == NULL) error2("oom");
216
217 if (com.coding) {
218 if (com.ls % 3 != 0 || (com.ngene != 1 && com.ngene != 3))
219 error2("this is not a coding sequence. Remove icode?");
220 }
221
222 if (com.Mgene && com.ngene == 1) error2("option Mgene for 1 gene?");
223 if (com.ngene > 1 && com.nhomo) error2("nhomo for mutliple genes?");
224 if (com.nalpha && (!com.alpha || com.ngene == 1 || com.fix_alpha))
225 error2("Malpha");
226 if (com.nalpha > 1 && com.rho) error2("Malpha or rho");
227 if (com.alpha == 0) com.nalpha = 0;
228 else com.nalpha = (com.nalpha ? com.ngene : !com.fix_alpha);
229 if (com.Mgene == 1) com.nalpha = !com.fix_alpha;
230
231 if (com.ngene == 1) com.Mgene = 0;
232 if ((com.nhomo == 1 && com.ngene > 1) || (com.Mgene > 1 && com.nhomo >= 1))
233 error2("nhomo does not work with Mgene options");
234
235 if ((com.Mgene >= 2 && com.model == JC69) || (com.Mgene >= 3 && com.model == F81)
236 || ((com.Mgene == 2 || com.Mgene == 4) && com.model == K80)
237 || (com.Mgene > 1 && com.nhomo > 1)
238 || (com.Mgene >= 2 && (com.model == UNREST || com.model == UNRESTu)))
239 error2("model || Mgene");
240 fprintf(fout, "\nBASEML (in %s) %s %s ", pamlVerStr, com.seqf, models[com.model]);
241 if (com.clock) fprintf(fout, " %s ", clockstr[com.clock]);
242 if (!com.nparK && com.alpha && com.rho) fprintf(fout, " Auto-");
243 if (com.alpha) fprintf(fout, "dGamma (ncatG=%d)", com.ncatG);
244 if (com.nalpha > 1) fprintf(fout, "(%d gamma)", com.nalpha);
245 if (com.ngene > 1)
246 fprintf(fout, " (%d genes: %s) ", com.ngene, Mgenestr[com.Mgene]);
247 if (com.nhomo > 1)
248 fprintf(fout, "\nNonhomo:%2d fix_kappa%2d\n", com.nhomo, com.fix_kappa);
249 if (com.nparK && com.ncatG > 6)
250 printf("\a\n%d rate categories for nonparametric model?\n", com.ncatG);
251 if (com.nparK) fprintf(fout, "\nnparK:%4d K:%4d\n", com.nparK, com.ncatG);
252
253 if (getdistance) {
254 i = com.ns*(com.ns - 1) / 2;
255 SeqDistance = (double*)realloc(SeqDistance, i * sizeof(double));
256 ancestor = (int*)realloc(ancestor, i * sizeof(int));
257 if (SeqDistance == NULL || ancestor == NULL) error2("oom distance&ancestor");
258 }
259 InitializeBaseAA(fout);
260
261 if (com.Mgene == 3)
262 for (i = 0; i < com.ngene; i++)
263 xtoy(com.pi, com.piG[i], com.ncode);
264
265 if (com.model == JC69 && com.ngene <= 1 && !com.readpattern && !com.print) {
266 PatternWeightJC69like();
267 if (fout) {
268 fprintf(fout, "\n\nPrinting out site pattern counts\n");
269 printPatterns(fout);
270 }
271 if (com.verbose >= 2) {
272 fprintf(fout, "\nSite-to-pattern map: ");
273 for (i = 0; i < com.ls; i++)
274 fprintf(fout, " %2d", com.pose[i] + 1);
275 fprintf(fout, "\n");
276
277 fprintf(fout, "\n**** Alignment mutated for JC69-like models ****\n");
278 fprintf(fout, "\n%6d %6d\n", com.ns, com.ls);
279 for (i = 0; i < com.ns; i++) {
280 fprintf(fout, "\n%-30s ", com.spname[i]);
281 print1seq(fout, com.z[i], com.ls, com.pose);
282 }
283 fprintf(fout, "\n");
284 }
285 }
286
287 com.sconP = (com.ns - 1)*com.ncode*(size_t)com.npatt * sizeof(double);
288 if ((com.conP = (double*)realloc(com.conP, com.sconP)) == NULL)
289 error2("oom conP");
290 if (com.alpha || com.nparK) {
291 s2 = com.npatt*com.ncatG * sizeof(double);
292 if ((com.fhK = (double*)realloc(com.fhK, s2)) == NULL) error2("oom");
293 }
294
295 printf("\n%9zu bytes for distance ", com.ns*(com.ns - 1) / 2 * (sizeof(double) + sizeof(int)));
296 printf("\n%9zu bytes for conP\n", com.sconP);
297 printf("%9zu bytes for fhK\n%9zu bytes for space\n", s2, com.sspace);
298
299 /* FOR(i,com.ns*2-1) xtoy(com.pi,nodes[i].pi, 4); check this? */
300
301 if (getdistance)
302 DistanceMatNuc(fout, fpair[0], com.model, com.alpha);
303
304
305 #if(0)
306 /* Selecting the most divergent sequences using the distance matrix
307 */
308 {
309 char keep[NS];
310 FILE *ftmp = gfopen("newdata.txt", "w");
311 int is, js, i, j, nskeep, isbest = 0, isk, chosen[NS];
312 double dmax, dminmax, d, dbig = 9;
313
314 if (com.model == 0 && com.print == 0)
315 error2("choose RateAncestor = 1 to print the seqs correctly.");
316 nskeep = 5;
317 printf("\nPicking up the most different sequences.\nHow many do you want? ");
318 scanf("%d", &nskeep);
319
320 if (nskeep >= com.ns) error2("nothing to do");
321
322 for (is = 0; is < com.ns; is++) {
323 keep[is] = 0;
324 chosen[is] = -1;
325 }
326
327 for (is = 0, dmax = 0; is < com.ns; is++) {
328 for (js = 0; js < is; js++) {
329 d = SeqDistance[is*(is - 1) / 2 + js];
330 if (dmax < d) { dmax = d; chosen[1] = is; chosen[0] = js; }
331 }
332 }
333 keep[chosen[0]] = keep[chosen[1]] = 1;
334 printf("selected seq %3d %s\n", chosen[0] + 1, com.spname[chosen[0]]);
335 printf("selected seq %3d %s\n", chosen[1] + 1, com.spname[chosen[1]]);
336 for (isk = 2; isk < nskeep; isk++) {
337 for (is = 0, dminmax = 0; is < com.ns; is++) {
338 if (keep[is]) continue;
339 /* d is the smallest distance to those chosen */
340 for (js = 0, d = dbig; chosen[js] != -1; js++) {
341 i = max2(is, chosen[js]);
342 j = min2(is, chosen[js]);
343 if (d > SeqDistance[i*(i - 1) / 2 + j]) d = SeqDistance[i*(i - 1) / 2 + j];
344 }
345 if (dminmax < d) {
346 dminmax = d; isbest = is;
347 }
348 }
349 keep[isbest] = 1;
350 chosen[isk] = isbest;
351 printf("selected seq %5d (dmin = %8.4f): %s\n", isbest + 1, dminmax, com.spname[isbest]);
352 }
353
354 fprintf(ftmp, "%6d %6d\n", nskeep, com.ls);
355 for (j = 0; j < com.ns; j++) {
356 if (keep[j] == 0) continue;
357 fprintf(ftmp, "%-40s ", com.spname[j]);
358 print1seq(ftmp, com.z[j], com.ls, com.pose);
359 FPN(ftmp);
360 }
361 fclose(ftmp);
362 return(0);
363
364 }
365
366 #endif
367
368 if (com.Mgene == 1) MultipleGenes(fout, fpair, com.space);
369 else if (com.runmode == 0) Forestry(fout);
370 else if (com.runmode == 2) fprintf(frst, "%2d", StarDecomposition(fout, com.space));
371 else if (com.runmode == 3) StepwiseAddition(fout, com.space);
372 else if (com.runmode >= 4) Perturbation(fout, (com.runmode == 4), com.space);
373
374 FPN(frst); if ((idata + 1) % 10 == 0) fflush(frst);
375 if (com.ndata > 1 && com.runmode) {
376 fprintf(frst1, "\t");
377 OutTreeN(frst1, 1, 0);
378 }
379 FPN(frst1); fflush(frst1);
380 free(nodes);
381 if (com.fix_blength == 3) free(com.blengths0);
382
383 printf("\nTime used: %s\n", printtime(timestr));
384 } /* for(idata) */
385 if (com.ndata > 1 && fseq) fclose(fseq);
386 free(com.space);
387 fclose(fout); fclose(frst); fclose(fpair[0]);
388 if (finitials) { fclose(finitials); finitials = NULL; }
389
390 return (0);
391 }
392
393
394 /* x[]: t[ntime], rgene[ngene-1], kappa[nbranch], pi[nnode*3],
395 { alpha, rho || rK[], fK[] || rK[], MK[] }
396 */
397
398 FILE *ftree;
399 void PrintBestTree(FILE *fout, FILE *ftree, int btree);
400
Forestry(FILE * fout)401 int Forestry(FILE *fout)
402 {
403 static int ages = 0;
404 int status = 0, inbasemlg = 0, i, j = 0, itree = 0, ntree, np, np0, iteration = 1;
405 int pauptree = 0, btree = 0, haslength;
406 double x[NP], xcom[NP - NBRANCH], lnL, lnL0 = 0, lnLbest = 0, e = 1e-7, nchange = -1;
407 double xb[NP][2], tl = 0, *g = NULL, *H = NULL;
408 FILE *finbasemlg = NULL, *frate = NULL;
409
410 ftree = gfopen(com.treef, "r");
411 GetTreeFileType(ftree, &ntree, &pauptree, 0);
412 if (com.alpha)
413 frate = gfopen(ratef, "w");
414 if (com.alpha && com.rho == 0 && com.nhomo == 0 && com.nparK == 0 && com.ns < 15) {
415 inbasemlg = 1; finbasemlg = gfopen("in.basemlg", "w");
416 }
417 flnf = gfopen("lnf", "w+");
418 fprintf(flnf, "%6d %6d %6d\n", ntree, com.ls, com.npatt);
419
420 for (itree = 0; ntree == -1 || itree < ntree; itree++, iteration = 1) {
421 if (ReadTreeN(ftree, &haslength, 0, 1)) {
422 puts("\nend of tree file."); break;
423 }
424 ProcessNodeAnnotation(&i);
425
426 if (noisy) printf("\nTREE # %2d: ", itree + 1);
427 fprintf(fout, "\nTREE # %2d: ", itree + 1);
428 fprintf(frub, "\n\nTREE # %2d\n", itree + 1);
429 if (com.print) fprintf(frst, "\n\nTREE # %2d\n", itree + 1);
430 fprintf(flnf, "\n\n%2d\n", itree + 1);
431
432 LASTROUND = 0;
433 if ((com.fix_blength == 2 || com.fix_blength == 3) && !haslength)
434 error2("We need branch lengths in tree");
435 if (com.fix_blength > 0 && !haslength) com.fix_blength = 0;
436 if (ages++ == 0 && com.fix_blength > 0 && haslength) {
437 if (com.clock) puts("\nBranch lengths in tree are ignored");
438 else {
439 if (com.fix_blength == 2) puts("\nBranch lengths in tree are fixed.");
440 else if (com.fix_blength == 1)
441 puts("\nBranch lengths in tree used as initials.");
442 if (com.fix_blength == 1) {
443 FOR(i, tree.nnode)
444 if ((x[nodes[i].ibranch] = nodes[i].branch) < 0)
445 x[nodes[i].ibranch] = 1e-5;
446 }
447 }
448 }
449
450 if (com.cleandata)
451 nchange = MPScore(com.space);
452 if (noisy&&com.ns < 99) {
453 OutTreeN(F0, 0, 0); printf(" MP score: %.2f\n", nchange);
454 }
455 OutTreeN(fout, 0, 0); fprintf(fout, " MP score: %.2f", nchange);
456 if (!com.clock && com.model <= REV && com.nhomo <= 2
457 && nodes[tree.root].nson <= 2 && com.ns > 2) {
458 puts("\nThis is a rooted tree, without clock. Check.");
459 if (com.verbose) fputs("\nThis is a rooted tree. Please check!", fout);
460 }
461
462 fflush(fout); fflush(flnf);
463
464 GetInitials(x, &i);
465
466 if (i == -1) iteration = 0;
467
468 if ((np = com.np) > NP) error2("raise NP and recompile");
469
470 if (com.sspace < spaceming2(np)) {
471 com.sspace = spaceming2(np);
472 if ((com.space = (double*)realloc(com.space, com.sspace)) == NULL)
473 error2("oom space");
474 }
475
476 if (itree) { np0 = np; }
477 if (itree && !finitials && (com.nhomo == 0 || com.nhomo == 2))
478 for (i = 0; i < np - com.ntime; i++) x[com.ntime + i] = max2(xcom[i], 0.001);
479
480 PointconPnodes();
481
482 lnL = com.plfun(x, np);
483 if (noisy) {
484 printf("\nntime & nrate & np:%6d%6d%6d\n", com.ntime, com.nrate, com.np);
485 if (noisy > 2 && com.ns < 50) { OutTreeB(F0); FPN(F0); matout(F0, x, 1, np); }
486 printf("\nlnL0 = %12.6f\n", -lnL);
487 }
488
489 if (iteration && np) {
490 SetxBound(np, xb);
491 SetxInitials(np, x, xb); /* start within the feasible region */
492 if (com.method == 1)
493 j = minB(noisy > 2 ? frub : NULL, &lnL, x, xb, e, com.space);
494 else if (com.method == 3)
495 j = minB2(noisy > 2 ? frub : NULL, &lnL, x, xb, e, com.space);
496 else
497 j = ming2(noisy > 2 ? frub : NULL, &lnL, com.plfun, NULL, x, xb, com.space, e, np);
498
499 if (j == -1 || lnL <= 0 || lnL > 1e7) status = -1;
500 else status = 0;
501 }
502
503 if (itree == 0) {
504 lnL0 = lnLbest = lnL; btree = 0;
505 }
506 else if (lnL < lnLbest) {
507 lnLbest = lnL; btree = itree;
508 }
509 if (noisy) printf("Out...\nlnL = %12.6f\n", -lnL);
510 fprintf(fout, "\nlnL(ntime:%3d np:%3d): %13.6f %+12.6f\n",
511 com.ntime, np, -lnL, -lnL + lnL0);
512 if (com.fix_blength < 2) { OutTreeB(fout); FPN(fout); }
513 if (LASTROUND == 0) {
514 LASTROUND = 1;
515 if ((com.npi && com.model != T92) || com.nparK >= 2) TransformxBack(x);
516 }
517 if (com.clock) { /* move ages into x[] */
518 for (i = 0, j = !nodes[tree.root].fossil; i < tree.nnode; i++)
519 if (i != tree.root && nodes[i].nson && !nodes[i].fossil)
520 x[j++] = nodes[i].age;
521 }
522 for (i = 0; i < np; i++)
523 fprintf(fout, " %8.6f", x[i]);
524 FPN(fout); fflush(fout);
525 if (inbasemlg) matout(finbasemlg, x, 1, np);
526
527 /*
528 for(i=0;i<np;i++) fprintf(frst1,"\t%.6f", x[i]);
529 fprintf(frst1,"\t%.4f", -lnL);
530 */
531 if (com.getSE) {
532 puts("Calculating SE's");
533 if (com.sspace < np*(np + 1) * sizeof(double)) {
534 com.sspace = np*(np + 1) * sizeof(double);
535 if ((com.space = (double*)realloc(com.space, com.sspace)) == NULL)
536 error2("oom space for SE");
537 }
538
539 g = com.space;
540 H = g + com.np;
541 HessianSKT2004(x, lnL, g, H);
542 if (com.getSE >= 2 && com.clock == 0 && nodes[tree.root].nson == 3) { /* g & H */
543 fprintf(frst2, "\n %d\n\n", com.ns);
544 OutTreeN(frst2, 1, 1); fprintf(frst2, "\n\n");
545 for (i = 0; i < com.ntime; i++)
546 if (x[i] > 0.0004 && fabs(g[i]) < 0.005) g[i] = 0;
547 for (i = 0; i < com.ntime; i++) fprintf(frst2, " %9.6f", x[i]); fprintf(frst2, "\n\n");
548 for (i = 0; i < com.ntime; i++) fprintf(frst2, " %9.6f", g[i]); fprintf(frst2, "\n\n");
549 fprintf(frst2, "\nHessian\n\n");
550 for (i = 0; i < com.ntime; i++, FPN(frst2))
551 for (j = 0; j < com.ntime; j++)
552 fprintf(frst2, " %10.4g", H[i*np + j]);
553 fflush(frst2);
554 }
555 for (i = 0; i < np*np; i++) H[i] *= -1;
556 matinv(H, np, np, H + np*np);
557 fprintf(fout, "SEs for parameters:\n");
558 for (i = 0; i < np; i++)
559 fprintf(fout, " %8.6f", (H[i*np + i] > 0. ? sqrt(H[i*np + i]) : -1));
560 FPN(fout);
561 }
562
563 /* if(com.clock) SetBranch(x); */
564 /* GroupDistances(); */
565
566 if (com.nbtype > 1 && com.clock>1)
567 fprintf(fout, "\nWarning: branch rates are not yet applied in tree length and branch lengths");
568 if (AbsoluteRate) {
569 fprintf(fout, "\nNote: mutation rate is not applied to tree length. Tree has ages, for TreeView & FigTree\n");
570 for (i = 0; i < tree.nnode; i++)
571 nodes[i].branch *= com.TipDate_TimeUnit;
572 }
573
574 if (!AbsoluteRate) {
575 for (i = 0, tl = 0; i < tree.nnode; i++)
576 if (i != tree.root) tl += nodes[i].branch;
577 fprintf(fout, "\ntree length = %9.5f%s\n", tl, (com.ngene > 1 ? " (1st gene)" : ""));
578 }
579 FPN(fout); OutTreeN(fout, 1, 0); FPN(fout);
580 FPN(fout); OutTreeN(fout, 1, 1); FPN(fout);
581 if (com.clock) {
582 FPN(fout); OutTreeN(fout, 1, PrNodeNum); FPN(fout);
583 }
584
585 if (com.TipDate) { /* scale back the ages in nodes[].branch */
586 for (i = 0; i < tree.nnode; i++) nodes[i].branch /= com.TipDate_TimeUnit;
587 }
588 if (com.np - com.ntime || com.clock)
589 DetailOutput(fout, x, H);
590 if (status) {
591 printf("convergence?\n"); fprintf(fout, "check convergence..\n");
592 }
593 if (itree == 0)
594 for (i = 0; i < np - com.ntime; i++) xcom[i] = x[com.ntime + i];
595 else if (!j)
596 for (i = 0; i < np - com.ntime; i++) xcom[i] = xcom[i] * .8 + x[com.ntime + i] * 0.2;
597 /*
598 TestModel(fout,x,1,com.space);
599 fprintf(fout,"\n\n# of lfun calls:%10d\n", NFunCall);
600 */
601
602 if (com.coding && com.Mgene != 1 && !AbsoluteRate) {
603 fputs("\nTree with branch lengths for codon models:\n", fout);
604 FOR(i, tree.nnode) nodes[i].branch *= (1 + com.rgene[1] + com.rgene[2]);
605 FPN(fout); OutTreeN(fout, 1, 1); FPN(fout);
606 FOR(i, tree.nnode) nodes[i].branch /= (1 + com.rgene[1] + com.rgene[2]);
607 }
608
609 com.print -= 9; com.plfun(x, np); com.print += 9;
610 if (com.print) {
611 if (com.plfun != lfun)
612 lfunRates(frate, x, np);
613
614 /** think about more-general models. Check that this may not be working for the clock models clock>=2 **/
615 if ((com.nhomo <= 2 && com.nparK == 0 && com.model <= REV && com.rho == 0)
616 || (com.nhomo > 2 && com.alpha == 0 && com.Mgene == 0))
617 AncestralSeqs(frst, x);
618 }
619 } /* for (itree) */
620
621 if (ntree > 1) {
622 fprintf(frst1, "\t%d\t", btree + 1);
623 PrintBestTree(frst1, ftree, btree);
624 }
625 if (finbasemlg) fclose(finbasemlg); if (frate) fclose(frate);
626 fclose(ftree);
627
628 if (ntree == -1) ntree = itree;
629 if (ntree > 1) { rewind(flnf); rell(flnf, fout, ntree); }
630 fclose(flnf);
631
632 return(0);
633 }
634
635
PrintBestTree(FILE * fout,FILE * ftree,int btree)636 void PrintBestTree(FILE *fout, FILE *ftree, int btree)
637 {
638 int itree, ntree, i;
639
640 rewind(ftree);
641 GetTreeFileType(ftree, &ntree, &i, 0);
642 for (itree = 0; ntree == -1 || itree < ntree; itree++) {
643 if (ReadTreeN(ftree, &i, 0, 1)) {
644 puts("\nend of tree file."); break;
645 }
646 if (itree == btree)
647 OutTreeN(fout, 1, 0);
648 }
649 }
650
651
TransformxBack(double x[])652 int TransformxBack(double x[])
653 {
654 /* transform variables x[] back to their original definition after iteration,
655 for output and for calculating SEs.
656 */
657 int i, k, K = com.ncatG;
658
659 k = com.ntime + com.nrgene + com.nrate;
660 for (i = 0; i < com.npi; i++)
661 f_and_x(x + k + 3 * i, x + k + 3 * i, 4, 0, 0);
662
663 k += com.npi * 3 + K - 1; /* K-1 for rK */
664 if (com.nparK == 2) /* rK & fK */
665 f_and_x(x + k, x + k, K, 0, 0);
666 else if (com.nparK == 3) /* rK & MK (double stochastic matrix) */
667 for (i = 0; i < K - 1; k += K - 1, i++) f_and_x(x + k, x + k, K, 0, 0);
668 else if (com.nparK == 4) /* rK & MK */
669 for (i = 0; i < K; k += K - 1, i++) f_and_x(x + k, x + k, K, 0, 0);
670 return(0);
671 }
672
673
DetailOutput(FILE * fout,double x[],double var[])674 void DetailOutput(FILE *fout, double x[], double var[])
675 {
676 int n = com.ncode, i, j, k = com.ntime, nkappa[] = { 0, 1, 0, 1, 1, 1, 2, 5, 11 };
677 int n31pi = (com.model == T92 ? 1 : 3);
678 double Qfactor, *p = com.pi, t = 0, k1, k2, S, V, Y = p[0] + p[1], R = p[2] + p[3];
679 int inode, a;
680 double *Qrate = x + com.ntime + com.nrgene, *AncientFreqs = NULL;
681 double EXij[NCODE*NCODE] = { 0 }, *Q = PMat, p0[NCODE], c[NCODE], ya;
682
683 fprintf(fout, "\nDetailed output identifying parameters\n");
684 if (com.clock) OutputTimesRates(fout, x, var);
685 k = com.ntime;
686
687 if (com.nrgene) { /* this used to be: if (com.nrgene && !com.clock) */
688 fprintf(fout, "\nrates for %d genes:%6.0f", com.ngene, 1.);
689 for (i = 0; i < com.nrgene; i++)
690 fprintf(fout, " %8.5f", x[k++]);
691 FPN(fout);
692 }
693
694 if (com.nhomo == 1) {
695 if (com.nrate) fprintf(fout, "rate (kappa or abcde) under %s:", models[com.model]);
696 for (i = 0; i < com.nrate; i++) fprintf(fout, " %8.5f", x[k++]); FPN(fout);
697 fprintf(fout, "Base frequencies:\n");
698 for (j = 0; j < n; j++) fprintf(fout, " %8.5f", com.pi[j]);
699 k += n31pi;
700 }
701 else if (com.nhomo >= 3) {
702 if (com.model >= F84 && com.model <= REV )
703 fprintf(fout, "kappa or abcde or Qrates under %s:", models[com.model]);
704 for (i = 0; i < com.nrate; i++) {
705 if (i == 0 || i % nkappa[com.model] == 0) fprintf(fout, "\n");
706 fprintf(fout, " %8.5f", x[k++]);
707 }
708 SetParameters(x);
709 if (com.alpha == 0) {
710 if ((AncientFreqs = (double*)malloc(tree.nnode * 4 * sizeof(double))) == NULL)
711 error2("out of memory for OldDistributions()");
712 OldDistributions(tree.root, AncientFreqs);
713 }
714 fputs("\n\n(frequency parameters for branches) [frequencies at nodes] (see Yang & Roberts 1995 fig 1)\n", fout);
715 for (i = 0; i < tree.nnode; i++, FPN(fout)) {
716 fprintf(fout, "Node #%2d (", i + 1);
717 for (j = 0; j < 4; j++) fprintf(fout, " %8.6f ", nodes[i].pi[j]);
718 fprintf(fout, ")");
719 if (com.alpha == 0) {
720 fprintf(fout, " [");
721 for (j = 0; j < 4; j++) fprintf(fout, " %8.6f", AncientFreqs[i * 4 + j]);
722 fprintf(fout, " GC = %5.3f ]", AncientFreqs[i * 4 + 1] + AncientFreqs[i * 4 + 3]);
723 }
724 }
725 fprintf(fout, "\nNote: node %d is root.\n", tree.root + 1);
726 k += com.npi*n31pi;
727
728 /* print expected numbers of changes along branches */
729 if (com.alpha == 0) {
730 fprintf(fout, "\n\nExpected numbers of nucleotide changes on branches\n\n");
731 for (inode = 0; inode < tree.nnode; inode++, FPN(fout)) {
732 if (inode == tree.root) continue;
733 t = nodes[inode].branch;
734 xtoy(AncientFreqs + nodes[inode].father*n, p0, n);
735
736 if (com.nhomo > 2 && com.model <= TN93) {
737 eigenTN93(com.model, *nodes[inode].pkappa, *(nodes[inode].pkappa + 1), nodes[inode].pi, &nR, Root, Cijk);
738 QTN93(com.model, Q, *nodes[inode].pkappa, *(nodes[inode].pkappa + 1), nodes[inode].pi);
739 }
740 else if (com.nhomo > 2 && com.model == REV)
741 eigenQREVbase(NULL, Q, nodes[inode].pkappa, nodes[inode].pi, &nR, Root, Cijk);
742
743 for (i = 0; i < n; i++) { /* calculate correction vector c[4] */
744 c[i] = 0; Q[i*n + i] = 0;
745 }
746
747 for (i = 0; i < n; i++) { /* calculate correction vector c[4] */
748 for (a = 1; a < n; a++) {
749 ya = -(1 - exp(Root[a] * t)) / Root[a];
750 for (k = 0; k < n; k++)
751 c[i] += p0[k] * Cijk[k*n*n + i*n + a] * ya;
752 }
753 for (j = 0; j < n; j++)
754 EXij[i*n + j] = Q[i*n + j] * (nodes[inode].pi[i] * t + c[i]);
755 }
756 fprintf(fout, "Node #%2d ", inode + 1);
757 if (inode < com.ns) fprintf(fout, "%-10s ", com.spname[inode]);
758 fprintf(fout, "(blength = %9.6f, %9.6f)", t, sum(EXij, n*n));
759 fprintf(fout, "\n%s: ", (com.model <= TN93 ? "kappa" : "abcde"));
760 for (j = 0; j < nkappa[com.model]; j++) fprintf(fout, " %9.6f ", *(nodes[inode].pkappa + j));
761 fprintf(fout, "\npi source: "); for (j = 0; j < n; j++) fprintf(fout, " %9.6f ", p0[j]);
762 fprintf(fout, "\npi model : "); for (j = 0; j < n; j++) fprintf(fout, " %9.6f ", nodes[inode].pi[j]);
763 fprintf(fout, "\nExij\n");
764 for (i = 0; i < n; i++, FPN(fout)) {
765 for (j = 0; j < n; j++) fprintf(fout, " %9.6f", EXij[i*n + j]);
766 fprintf(fout, " ");
767 for (j = 0; j < n; j++) fprintf(fout, " %8.1f", EXij[i*n + j] * com.ls);
768 }
769 /* matout(fout, c, 1, n); */
770 }
771 }
772
773 if (com.alpha == 0) free(AncientFreqs);
774 }
775 else if (!com.fix_kappa) {
776 fprintf(fout, "\nParameters %s in the rate matrix", (com.model <= TN93 ? "(kappa)" : ""));
777 fprintf(fout, " (%s) (Yang 1994 J Mol Evol 39:105-111):\n", models[com.model]);
778
779 if (com.nhomo == 2) {
780 fprintf(fout, "\nbranch t kappa TS TV\n");
781 for (i = 0; i < tree.nbranch; i++) {
782 if (com.model == F84) { k1 = 1 + x[k + i] / R; k2 = 1 + x[k + i] / R; }
783 else k1 = k2 = x[k + i];
784 S = 2 * p[0] * p[1] * k1 + 2 * p[2] * p[3] * k2;
785 V = 2 * Y*R;
786 Qfactor = 1 / (S + V);
787 /* t=(com.clock ? nodes[tree.branches[i][1]].branch : x[i]); */
788 t = nodes[tree.branches[i][1]].branch;
789 fprintf(fout, "%2d..%-2d %9.5f %8.5f %9.5f %8.5f\n",
790 tree.branches[i][0] + 1, tree.branches[i][1] + 1, t, x[k + i],
791 t*S / (S + V), t*V / (S + V));
792 }
793 }
794 /* Mgene = 0:rates, 1:separate; 2:diff pi, 3:diff kapa, 4:all diff*/
795 else if (com.Mgene >= 2) {
796 for (i = 0; i < com.ngene; i++) {
797 fprintf(fout, "\nGene #%d: ", i + 1);
798 p = (com.Mgene == 3 ? com.pi : com.piG[i]);
799 Qrate = (com.Mgene == 2 ? x + k : x + k + i*nkappa[com.model]);
800 if (com.model <= TN93)
801 FOR(j, nkappa[com.model]) fprintf(fout, " %8.5f", Qrate[j]);
802 else if (com.model == REV || com.model == REVu)
803 /* output Q matrix, no eigen calculation */
804 eigenQREVbase(fout, Q, Qrate, p, &nR, Root, Cijk);
805 }
806 if (com.Mgene >= 3) k += com.ngene*nkappa[com.model];
807 else k += nkappa[com.model];
808 }
809 else {
810 if (com.model < REV) FOR(i, com.nrate) fprintf(fout, " %8.5f", x[k++]);
811 else k += com.nrate;
812 }
813 FPN(fout);
814 }
815
816 if (com.Mgene < 2) {
817 if ((com.model == REV || com.model == REVu) && com.nhomo <= 2) /* output Q, no eigen calculation */
818 eigenQREVbase(fout, Q, Qrate, com.pi, &nR, Root, Cijk);
819 else if (com.model == UNREST || com.model == UNRESTu)
820 QUNREST(fout, PMat, x + com.ntime + com.nrgene, com.pi);
821 }
822
823 for (j = 0; j < com.nalpha; j++) {
824 if (!com.fix_alpha)
825 fprintf(fout, "\nalpha (gamma, K=%d) = %8.5f", com.ncatG, (com.alpha = x[k++]));
826 if (com.nalpha > 1)
827 DiscreteGamma(com.freqK, com.rK, com.alpha, com.alpha, com.ncatG, DGammaUseMedian);
828 fprintf(fout, "\nrate: "); FOR(i, com.ncatG) fprintf(fout, " %8.5f", com.rK[i]);
829 fprintf(fout, "\nfreq: "); FOR(i, com.ncatG) fprintf(fout, " %8.5f", com.freqK[i]);
830 FPN(fout);
831 }
832 if (!com.fix_rho) {
833 fprintf(fout, "rho for the auto-discrete-gamma model: %9.5f", x[k]);
834 FPN(fout);
835 }
836 if (com.nparK >= 1 && com.nalpha <= 1) {
837 fprintf(fout, "\nrate:"); for (i = 0; i<com.ncatG; i++) fprintf(fout, " %8.5f", com.rK[i]);
838 fprintf(fout, "\nfreq:"); for (i = 0; i<com.ncatG; i++) fprintf(fout, " %8.5f", com.freqK[i]);
839 FPN(fout);
840 }
841 if (com.rho || (com.nparK >= 3 && com.nalpha <= 1)) {
842 fprintf(fout, "transition probabilities between rate categories:\n");
843 for (i = 0; i < com.ncatG; i++, FPN(fout))
844 for(j=0; j<com.ncatG; j++)
845 fprintf(fout, " %8.5f", com.MK[i*com.ncatG + j]);
846 }
847 FPN(fout);
848 }
849
GetStepMatrix(char * line)850 int GetStepMatrix(char*line)
851 {
852 /* read user definitions of the REV and UNREST models
853 StepMatrix[4*4]:
854 -1 at diagonals, 0 for default rate, and positive values for rates
855 */
856 char *p, *errstr = "StepMatrix specification in the control file";
857 int n = com.ncode, i, j, k, b1 = -1, b2;
858
859 p = strchr(line, '[');
860 if (p == NULL) error2("model specification. Expecting '['.");
861 sscanf(++p, "%d", &com.nrate);
862 if (com.nrate < 0 || (com.model == REVu && com.nrate > 5) || (com.model == UNRESTu && com.nrate > 11))
863 error2(errstr);
864 for (i = 0; i < n; i++) for (j = 0; j < n; j++)
865 StepMatrix[i*n + j] = (i == j ? -1 : 0);
866 for (i = 0; i < com.nrate; i++) {
867 while (*p && *p != '(') p++;
868 if (*p++ != '(') error2("expecting (");
869 for (k = 0; k < 12; k++) {
870 while (isspace(*p)) p++;
871 if (*p == ')') break;
872 b1 = CodeChara(*p++, 0);
873 b2 = CodeChara(*p++, 0);
874 if (b1 < 0 || b1>3 || b2 < 0 || b2>3) error2("bases out of range.");
875 if (b1 == b2 || StepMatrix[b1*n + b2] > 0) {
876 printf("pair %c%c already specified.\n", BASEs[b1], BASEs[b2]);
877 }
878 if (com.model == REVu) StepMatrix[b1*n + b2] = StepMatrix[b2*n + b1] = i + 1;
879 else StepMatrix[b1*n + b2] = i + 1;
880 }
881 printf("rate %d: %d pairs\n", i + 1, k);
882 }
883 for (i = 0; i < n; i++, FPN(F0)) for (j = 0; j < n; j++)
884 printf("%3d", StepMatrix[i*n + j]);
885
886 return(0);
887 }
888
GetOptions(char * ctlf)889 int GetOptions(char *ctlf)
890 {
891 int iopt, i, j, nopt = 31, lline = 4096;
892 char line[4096], *pline, opt[32], *comment = "*#";
893 char *optstr[] = { "seqfile","outfile","treefile","noisy", "cleandata",
894 "verbose","runmode", "method", "clock", "TipDate", "fix_rgene","Mgene", "nhomo",
895 "getSE","RateAncestor", "model","fix_kappa","kappa",
896 "fix_alpha","alpha","Malpha","ncatG", "fix_rho","rho",
897 "nparK", "ndata", "bootstrap", "Small_Diff","icode", "fix_blength", "seqtype" };
898 double t;
899 FILE *fctl;
900 int ng = -1, markgenes[NGENE];
901
902 com.nalpha = 0;
903 fctl = gfopen(ctlf, "r");
904 if (noisy) printf("Reading options from %s..\n", ctlf);
905 for (; ; ) {
906 if (fgets(line, lline, fctl) == NULL) break;
907 for (i = 0, t = 0, pline = line; i < lline&&line[i]; i++)
908 if (isalnum(line[i])) { t = 1; break; }
909 else if (strchr(comment, line[i])) break;
910 if (t == 0) continue;
911 sscanf(line, "%s%*s%lf", opt, &t);
912 if ((pline = strstr(line, "=")) == NULL)
913 error2("option file.");
914
915 for (iopt = 0; iopt < nopt; iopt++) {
916 if (strncmp(opt, optstr[iopt], 8) == 0) {
917 if (noisy >= 9)
918 printf("\n%3d %15s | %-20s %6.2f", iopt + 1, optstr[iopt], opt, t);
919 switch (iopt) {
920 case (0): sscanf(pline + 1, "%s", com.seqf); break;
921 case (1): sscanf(pline + 1, "%s", com.outf); break;
922 case (2): sscanf(pline + 1, "%s", com.treef); break;
923 case (3): noisy = (int)t; break;
924 case (4): com.cleandata = (char)t; break;
925 case (5): com.verbose = (int)t; break;
926 case (6): com.runmode = (int)t; break;
927 case (7): com.method = (int)t; break;
928 case (8): com.clock = (int)t; break;
929 case (9):
930 sscanf(pline + 1, "%lf%lf", &com.TipDate, &com.TipDate_TimeUnit);
931 break;
932 case (10): com.fix_rgene = (int)t; break;
933 case (11): com.Mgene = (int)t; break;
934 case (12): com.nhomo = (int)t; break;
935 case (13): com.getSE = (int)t; break;
936 case (14): com.print = (int)t; break;
937 case (15): com.model = (int)t;
938 if (com.model > UNREST) GetStepMatrix(line); break;
939 case (16): com.fix_kappa = (int)t; break;
940 case (17):
941 com.kappa = t;
942 if (com.fix_kappa && (com.clock == 5 || com.clock == 6)
943 && com.model != 0 && com.model != 2) {
944 ng = splitline(++pline, min2(ng, com.ndata), markgenes);
945 for (j = 0; j < min2(ng, com.ndata); j++)
946 if (!sscanf(pline + markgenes[j], "%lf", &data.kappa[j])) break;
947 /*
948 matout(F0, data.kappa, 1, min2(ng,com.ndata));
949 */
950 }
951 break;
952 case (18): com.fix_alpha = (int)t; break;
953 case (19):
954 com.alpha = t;
955 if (com.fix_alpha && t && (com.clock == 5 || com.clock == 6)) {
956 ng = splitline(++pline, min2(ng, com.ndata), markgenes);
957 for (j = 0; j < min2(ng, com.ndata); j++)
958 if (!sscanf(pline + markgenes[j], "%lf", &data.alpha[j])) break;
959 /*
960 matout(F0, data.alpha, 1, min2(ng,com.ndata));
961 */
962 }
963 break;
964 case (20): com.nalpha = (int)t; break;
965 case (21): com.ncatG = (int)t; break;
966 case (22): com.fix_rho = (int)t; break;
967 case (23): com.rho = t; break;
968 case (24): com.nparK = (int)t; break;
969 case (25): com.ndata = (int)t; break;
970 case (26): com.bootstrap = (int)t; break;
971 case (27): Small_Diff = t; break;
972 case (28): com.icode = (int)t; com.coding = 1; break;
973 case (29): com.fix_blength = (int)t; break;
974 case (30):
975 com.seqtype = (int)t;
976 if (com.seqtype == 2) com.ncode = 2;
977 else if (com.seqtype == 5) com.ncode = 5;
978 break;
979 }
980 break;
981 }
982 }
983 if (iopt == nopt)
984 {
985 printf("\noption %s in %s not recognised\n", opt, ctlf); exit(-1);
986 }
987 }
988 fclose(fctl);
989
990 if ((com.fix_kappa || (com.fix_alpha&&com.alpha)) && (com.clock == 5 || com.clock == 6))
991 printf("Using parameters from the control file.");
992
993
994 if (com.seqtype != 0 && com.seqtype != 4 && com.seqtype != 5) error2("seqtype?");
995 if (com.fix_alpha == 1 && com.alpha == 0) {
996 if (!com.fix_rho || com.rho) error2("fix rho to 0 if alpha=0.");
997 }
998 if (com.nparK >= 1) {
999 com.fix_alpha = com.fix_rho = 1;
1000 if (com.alpha == 0) com.alpha = 0.5; /* used to generate rK as initial values */
1001 if (com.nparK <= 2) com.rho = 0;
1002 else com.rho = 0.4;
1003 if (com.nhomo >= 1)
1004 error2("nhomo & nparK");
1005 }
1006 if (com.model != F84 && com.kappa <= 0) error2("init kappa..");
1007 if (!com.fix_alpha && com.alpha <= 0) error2("init alpha..");
1008 if (!com.fix_rho && com.rho == 0) { com.rho = 0.001; puts("init rho reset"); }
1009
1010 if (com.alpha)
1011 {
1012 if (com.ncatG<2 || com.ncatG>NCATG) error2("ncatG");
1013 }
1014 else if (com.ncatG > 1) com.ncatG = 1;
1015
1016 if (com.method && (com.clock || com.rho))
1017 {
1018 com.method = 0; puts("Iteration method reset: method = 0");
1019 }
1020 if (com.method && (com.model == UNREST || com.model == UNRESTu))
1021 error2("I did not implemented method = 1 for UNREST. Use method = 0");
1022
1023 if (com.model > UNREST && com.Mgene > 2)
1024 error2("u models don't work with Mgene");
1025
1026 if (com.nhomo > 5)
1027 error2("nhomo");
1028 else if (com.nhomo == 2) {
1029 if (com.model != K80 && com.model != F84 && com.model != HKY85)
1030 error2("nhomo = 2 works with F84 or HKY85");
1031 }
1032 else if (com.nhomo > 2 && !(com.model >= F84 && com.model <= REV)) error2("nhomo & model");
1033 else if (com.nhomo > 2 && com.method) error2("nhomo & method.");
1034 else {
1035 if (com.nhomo == 1 && !(com.model >= F81 && com.model <= REV) && com.model != REVu)
1036 error2("nhomo=1 and model");
1037 }
1038
1039 if (com.nhomo >= 2 && com.clock == 2) error2("clock=2 & nhomo imcompatible");
1040
1041 if (com.clock >= 5 && com.model >= TN93) error2("model & clock imcompatible");
1042
1043 if (com.nhomo > 1 && com.runmode > 0) error2("nhomo incompatible with runmode");
1044 if (com.runmode == -2) error2("runmode = -2 not implemented in baseml.");
1045 if (com.clock && com.runmode > 2) error2("runmode & clock?");
1046 if (com.runmode) com.fix_blength = 0;
1047 if (com.runmode == 3 && (com.npi || com.nparK))
1048 error2("runmode incompatible with nparK or nhomo.");
1049
1050 if (com.model == JC69 || com.model == K80 || com.model == UNREST)
1051 if (com.nhomo != 2) com.nhomo = 0;
1052 if (com.model == JC69 || com.model == F81) { com.fix_kappa = 1; com.kappa = 1; }
1053 if ((com.model == TN93 || com.model == REV || com.model == REVu) && com.nhomo <= 2)
1054 com.fix_kappa = 0;
1055 if (com.seqtype == 5 && com.model != REVu)
1056 error2("RNA-editing model requires REVu");
1057
1058 if (com.nparK == 3) {
1059 puts("\n\nnparK==3, double stochastic, may not work. Use nparK=4?\n");
1060 getchar();
1061 }
1062 com.fix_omega = 1; com.omega = 0;
1063 if (com.ndata <= 0) com.ndata = 1;
1064 if (com.bootstrap && com.ndata != 1) error2("ndata=1 for bootstrap.");
1065
1066 return (0);
1067 }
1068
1069
GetInitials(double x[],int * fromfile)1070 int GetInitials(double x[], int *fromfile)
1071 {
1072 /* This calculates com.np, com.ntime, com.npi, com.nrate etc., and gets
1073 initial values for ML iteration. It also calculates some of the
1074 statistics (such as eigen values and vectors) that won't change during
1075 the likelihood iteration. However, think about whether this is worthwhile.
1076 The readfile option defeats this effort right now as the x[] is read in
1077 after such calculations. Some of the effort is duplicated in
1078 SetParameters(). Needs more careful thinking.
1079 */
1080 int i, j, k, K = com.ncatG, n31pi = (com.model == T92 ? 1 : 3);
1081 int nkappa[] = { 0, 1, 0, 1, 1, 1, 2, 5, 11, -1, -1 }, nkappasets;
1082 size_t sconP_new = (size_t)(tree.nnode - com.ns)*com.ncode*com.npatt*com.ncatG * sizeof(double);
1083 double t = -1;
1084
1085 NFunCall = NPMatUVRoot = NEigenQ = 0;
1086 if (com.clock == ClockCombined && com.ngene <= 1)
1087 error2("Combined clock model requires mutliple genes.");
1088 GetInitialsTimes(x);
1089
1090 com.plfun = lfunAdG;
1091 if (com.alpha == 0 && com.nparK == 0)
1092 com.plfun = lfun;
1093 else if ((com.alpha && com.rho == 0) || com.nparK == 1 || com.nparK == 2)
1094 com.plfun = lfundG;
1095
1096 if (com.clock && com.fix_blength == -1)
1097 com.fix_blength = 0;
1098
1099 if (com.method && com.fix_blength != 2 && com.plfun == lfundG) {
1100 com.conPSiteClass = 1;
1101 if (com.sconP < sconP_new) {
1102 com.sconP = sconP_new;
1103 printf("\n%9zu bytes for conP, adjusted\n", com.sconP);
1104 if ((com.conP = (double*)realloc(com.conP, com.sconP)) == NULL)
1105 error2("oom conP");
1106 }
1107 }
1108 InitializeNodeScale();
1109
1110 com.nrgene = (!com.fix_rgene)*(com.ngene - 1);
1111 for (j = 0; j < com.nrgene; j++) x[com.ntime + j] = 1;
1112 if (com.fix_kappa && (com.Mgene == 3 || com.Mgene == 4)) error2("Mgene options");
1113
1114 if (com.model <= UNREST) {
1115 com.nrate = 0;
1116 if (!com.fix_kappa) {
1117 com.nrate = nkappa[com.model];
1118 if (com.Mgene >= 3)
1119 com.nrate *= com.ngene;
1120 }
1121 }
1122 switch (com.nhomo) {
1123 case (0): com.npi = 0; break; /* given 1 pi */
1124 case (1): com.npi = 1; break; /* estimate 1 pi */
1125 case (2): com.npi = 0; com.nrate = tree.nbranch; break; /* b kappa's */
1126 case (4):
1127 com.npi = tree.nnode; /* nnode pi */
1128 com.nrate = nkappa[com.model] * (com.fix_kappa ? 1 : tree.nbranch);
1129 break;
1130 case (3): /* ns+2 pi: tips, internal & root */
1131 case (5): /* pi: user-specified sets of pi for branches */
1132 if (com.nhomo == 3) {
1133 for (i = 0; i < tree.nnode; i++)
1134 nodes[i].label = (i < com.ns ? i : com.ns);
1135 com.npi = com.nbtype = com.ns + (tree.root >= com.ns);
1136 if (tree.root >= com.ns)
1137 nodes[tree.root].label = com.npi++;
1138 }
1139 else { /* nbtype is for Qrates & npi for pi. */
1140 com.npi = com.nbtype;
1141 if (tree.root >= com.ns) { /* root may have a new set of pi */
1142 j = (int)nodes[tree.root].label;
1143 if (j == com.nbtype)
1144 nodes[tree.root].label = com.npi++;
1145 else if (j <0 || j > com.nbtype)
1146 error2("nhomo = 5: label for root strange?");
1147 }
1148 }
1149
1150 printf("%d sets of frequency parameters\n", com.npi);
1151 /* number of sets of Qrates */
1152 j = (com.fix_kappa == 0 ? tree.nbranch : (com.fix_kappa == 1 ? 1 : com.nbtype));
1153 com.nrate = nkappa[com.model] * j;
1154 break;
1155 }
1156
1157 if (com.model <= TN93 && com.Mgene <= 1 && com.nhomo == 0)
1158 eigenTN93(com.model, com.kappa, com.kappa, com.pi, &nR, Root, Cijk);
1159 if (com.model == REV || com.model == UNREST)
1160 for (j = 0; j < (com.Mgene >= 3 ? com.ngene : 1); j++) {
1161 k = com.ntime + com.nrgene + j*(com.model == REV ? 5 : 11);
1162 for (i = 0; i < com.nrate; i++)
1163 x[k + i] = 0.4 + 0.2*rndu();
1164 if (com.model == UNREST)
1165 x[k] = x[k + 3] = x[k + 8] = 0.8 + 0.2*rndu();
1166 if (com.model == REV) {
1167 nkappasets = com.nrate / 5;
1168 for (j = 0; j < nkappasets; j++)
1169 x[k + j * 5] = 1 + 0.1*(j + 1); /* 0.8+0.2*rndu(); */
1170 }
1171 }
1172 else
1173 for (i = 0; i < com.nrate; i++)
1174 x[com.ntime + com.nrgene + i] = 0.02 + com.kappa*(.8 + 0.4*rndu());
1175
1176 for (i = 0; i < com.npi*n31pi; i++) /* initials for transformed pi's */
1177 x[com.ntime + com.nrgene + com.nrate + i] = rndu()*.2;
1178 com.np = k = com.ntime + com.nrgene + com.nrate + com.npi*n31pi;
1179
1180 if (com.alpha || com.nparK) {
1181 for (i = 0; i < com.nalpha; i++)
1182 x[k++] = com.alpha = 0.01 + com.alpha*(.9 + 0.2*rndu());
1183
1184 if (!com.fix_rho) x[k++] = com.rho = 0.01 + com.rho*(.8 + 0.4*rndu());
1185
1186 if (com.rho)
1187 AutodGamma(com.MK, com.freqK, com.rK, &t, com.alpha, com.rho, K);
1188 else
1189 DiscreteGamma(com.freqK, com.rK, com.alpha, com.alpha, K, DGammaUseMedian);
1190
1191 if (com.nparK) {
1192 xtoy(com.rK, x + k, K - 1);
1193 k += K - 1;
1194 }
1195 switch (com.nparK) {
1196 case (2): /* rK & fK */
1197 /* zero(x+k, K-1); */
1198 for (i = 0; i < K - 1; i++) x[k + i] = 0.1*(0.5 - rndu());
1199 k += K - 1;
1200 break;
1201 case (3): /* rK & MK (double stochastic) */
1202 /* zero(x+k, (K-1)*(K-1)); */
1203 for (i = 0; i < (K - 1)*(K - 1); i++) x[k + i] = 0.1*(0.5 - rndu());
1204 k += (K - 1)*(K - 1);
1205 break;
1206 case (4): /* rK & MK */
1207 /* zero(x+k, K*(K-1)); */
1208 for (i = 0; i < K*(K - 1); i++) x[k + i] = 0.1*(0.5 - rndu());
1209 k += K*(K - 1);
1210 break;
1211 }
1212 com.np = k;
1213 }
1214
1215 if (com.fix_blength == -1)
1216 for (i = 0; i < com.np; i++) x[i] = (i < com.ntime ? .1 + rndu() : .5 + rndu());
1217
1218 if (finitials) readx(x, fromfile);
1219 else *fromfile = 0;
1220
1221 return (0);
1222 }
1223
1224
1225
1226
SetParameters(double x[])1227 int SetParameters(double x[])
1228 {
1229 /* This sets parameters in com., nodes[], etc and is called before lfun() etc.
1230 Iinitialize U, V, Root etc, if necessary.
1231 For nhomo models (nhomo=1,3,4)
1232 x[] has frequencies if (LASTROUND==1) or exp(pi)/(1+SUM(exp[pi])) if otherwise
1233 */
1234 int i, j, k, K = com.ncatG, status = 0, n31pi = (com.model == T92 ? 1 : 3);
1235 int nkappa[] = { 0, 1, 0, 1, 1, 1, 2, 5, 11, -1, -1 };
1236 double k1 = com.kappa, k2 = com.kappa, t, space[NCATG*(NCATG + 1)];
1237
1238 if (com.clock >= 5) return(0);
1239 if (com.fix_blength != 2) SetBranch(x);
1240
1241 for (i = 0; i < com.nrgene; i++) com.rgene[i + 1] = x[com.ntime + i];
1242 if (com.clock && com.clock < 5 && AbsoluteRate)
1243 com.rgene[0] = x[0]; /* so that rgene are absolute rates */
1244
1245 if (!com.fix_kappa && com.model <= TN93 && com.clock < 5) {
1246 com.kappa = k1 = k2 = x[com.ntime + com.nrgene];
1247 if (com.model == TN93) k2 = x[com.ntime + com.nrgene + 1];
1248 }
1249 if (com.nhomo == 1) {
1250 k = com.ntime + com.nrgene + com.nrate;
1251 if (com.model == T92) { /* T=A, C=G */
1252 com.pi[0] = com.pi[2] = (1 - x[k]) / 2; com.pi[1] = com.pi[3] = x[k] / 2;
1253 }
1254 else {
1255 if (!LASTROUND) f_and_x(x + k, com.pi, 4, 0, 0);
1256 else xtoy(x + k, com.pi, 3);
1257 com.pi[3] = 1 - sum(com.pi, 3);
1258 }
1259 if (com.model <= TN93)
1260 eigenTN93(com.model, k1, k2, com.pi, &nR, Root, Cijk);
1261 }
1262 else if (com.nhomo == 2)
1263 for (i = 0, k = com.ntime + com.nrgene; i < tree.nbranch; i++)
1264 nodes[tree.branches[i][1]].pkappa = x + k + i;
1265
1266 if (com.model <= TN93 && com.nhomo == 0 && com.Mgene <= 1)
1267 RootTN93(com.model, k1, k2, com.pi, &t, Root);
1268 else if (com.nhomo >= 3) {
1269 for (i = 0, k = com.ntime + com.nrgene; i < tree.nbranch; i++) { /* Qrate for branches */
1270 if (com.fix_kappa == 0) /* each branch has its Qrate */
1271 nodes[tree.branches[i][1]].pkappa = x + k + i*nkappa[com.model];
1272 else if (com.fix_kappa == 1) /* same Qrate for all branches */
1273 nodes[tree.branches[i][1]].pkappa = x + k;
1274 else if (com.fix_kappa == 2) /* sets of Qrate */
1275 nodes[tree.branches[i][1]].pkappa = x + k + (int)nodes[tree.branches[i][1]].label*nkappa[com.model];
1276 }
1277 k += com.nrate;
1278
1279 for (i = 0; i < tree.nnode; i++) { /* pi for nodes */
1280 j = (com.nhomo == 4 ? i : (int)nodes[i].label);
1281 if (com.model == T92) {
1282 nodes[i].pi[0] = nodes[i].pi[2] = (1 - x[k + j]) / 2; /* TA */
1283 nodes[i].pi[1] = nodes[i].pi[3] = x[k + j] / 2; /* CG */
1284 }
1285 else {
1286 if (!LASTROUND) f_and_x(x + k + j * 3, nodes[i].pi, 4, 0, 0);
1287 else xtoy(x + k + j * 3, nodes[i].pi, 3);
1288 nodes[i].pi[3] = 1 - sum(nodes[i].pi, 3);
1289 }
1290 }
1291 xtoy(nodes[tree.root].pi, com.pi, 4);
1292 }
1293 else if ((com.model == REV || com.model == REVu) && com.Mgene <= 1)
1294 eigenQREVbase(NULL, PMat, x + com.ntime + com.nrgene, com.pi, &nR, Root, Cijk);
1295 /*
1296 else if ((com.model==UNREST || com.model==UNRESTu) && com.Mgene<=1)
1297 eigenQunrest (NULL, x+com.ntime+com.nrgene,com.pi,&nR,cRoot,cU,cV);
1298 */
1299
1300 if (com.nparK == 0 && (com.alpha == 0 || com.fix_alpha*com.fix_rho == 1))
1301 return(status);
1302 if (com.nalpha > 1) return (status);
1303 k = com.ntime + com.nrate + com.nrgene + com.npi*n31pi;
1304 if (!com.fix_alpha) {
1305 com.alpha = x[k++];
1306 if (com.fix_rho)
1307 DiscreteGamma(com.freqK, com.rK, com.alpha, com.alpha, K, DGammaUseMedian);
1308 }
1309 if (!com.fix_rho) {
1310 com.rho = x[k++];
1311 AutodGamma(com.MK, com.freqK, com.rK, &t, com.alpha, com.rho, K);
1312 }
1313 if (com.nparK == 0) return(status);
1314
1315 /* nparK models */
1316 xtoy(x + k, com.rK, K - 1);
1317
1318 if (com.nparK == 2) {
1319 if (!LASTROUND) f_and_x(x + k + K - 1, com.freqK, K, 0, 0);
1320 else xtoy(x + k + K - 1, com.freqK, K - 1);
1321 com.freqK[K - 1] = 1 - sum(com.freqK, K - 1);
1322 }
1323 else if (com.nparK == 3) { /* rK & MK (double stochastic matrix) */
1324 for (i = 0, k += K - 1; i < K - 1; k += K - 1, i++) {
1325 if (!LASTROUND) f_and_x(x + k, com.MK + i*K, K, 0, 0);
1326 else xtoy(x + k, com.MK + i*K, K - 1);
1327 com.MK[i*K + K - 1] = 1 - sum(com.MK + i*K, K - 1);
1328 }
1329 FOR(j, K) {
1330 for (i = 0, com.MK[(K - 1)*K + j] = 1; i < K - 1; i++)
1331 com.MK[(K - 1)*K + j] -= com.MK[i*K + j];
1332 if (com.MK[(K - 1)*K + j] < 0)
1333 printf("SetPar: MK[K-1][j]=%.5f<0\n", com.MK[(K - 1)*K + j]);
1334 }
1335 }
1336 else if (com.nparK == 4) { /* rK & MK */
1337 for (i = 0, k += K - 1; i < K; k += K - 1, i++) {
1338 if (!LASTROUND) f_and_x(x + k, com.MK + i*K, K, 0, 0);
1339 else xtoy(x + k, com.MK + i*K, K - 1);
1340 com.MK[i*K + K - 1] = 1 - sum(com.MK + i*K, K - 1);
1341 }
1342 PtoPi(com.MK, com.freqK, K, space);
1343 }
1344 com.rK[K - 1] = (1 - innerp(com.freqK, com.rK, K - 1)) / com.freqK[K - 1];
1345 return (status);
1346 }
1347
1348
SetPGene(int igene,int _pi,int _UVRoot,int _alpha,double x[])1349 int SetPGene(int igene, int _pi, int _UVRoot, int _alpha, double x[])
1350 {
1351 /* xcom[] does not contain time parameters
1352 Note that com.piG[][] have been homogeneized if (com.Mgene==3)
1353 */
1354 int nr[] = { 0, 1, 0, 1, 1, 1, 2, 5, 11 };
1355 int k = com.nrgene + (com.Mgene >= 3)*igene*nr[com.model];
1356 double *xcom = x + com.ntime;
1357 double ka1 = xcom[k], ka2 = (com.model == TN93 ? xcom[k + 1] : -1);
1358
1359 if (com.Mgene == 2 && com.fix_kappa) ka1 = ka2 = com.kappa;
1360
1361 if (_pi) {
1362 xtoy(com.piG[igene], com.pi, 4);
1363 }
1364 if (_UVRoot) {
1365 if (com.model == K80) com.kappa = ka1;
1366 else if (com.model <= TN93)
1367 eigenTN93(com.model, ka1, ka2, com.pi, &nR, Root, Cijk);
1368 else if (com.model == REV || com.model == REVu)
1369 eigenQREVbase(NULL, PMat, xcom + k, com.pi, &nR, Root, Cijk);
1370 }
1371 if (_alpha) {
1372 com.alpha = xcom[com.nrgene + com.nrate + com.npi + igene]; /* check?? */
1373 DiscreteGamma(com.freqK, com.rK, com.alpha, com.alpha, com.ncatG, DGammaUseMedian);
1374 }
1375 return(0);
1376 }
1377
1378
SetxBound(int np,double xb[][2])1379 int SetxBound(int np, double xb[][2])
1380 {
1381 /* sets lower and upper bounds for variables during iteration
1382 */
1383 int i, j, k = 0, nf = 0, n31pi = (com.model == T92 ? 1 : 3);
1384 double tb[] = { .4e-6, 9999 }, tb0 = .4e-6, rgeneb[] = { 1e-4, 999 }, rateb[] = { 1e-5, 999 };
1385 double alphab[] = { .005, 999 }, rhob[] = { -0.2, 0.99 }, pb[] = { .00001, .99999 };
1386 double fb[] = { -19,9 }; /* transformed freqs.*/
1387
1388 SetxBoundTimes(xb);
1389 for (i = com.ntime; i < np; i++)
1390 for (j = 0; j < 2; j++) xb[i][j] = rateb[j];
1391
1392 for (i = 0; i < com.nrgene; i++)
1393 for (j = 0; j < 2; j++) xb[com.ntime + i][j] = rgeneb[j];
1394 for (i = 0; i < com.nrate; i++)
1395 for (j = 0; j < 2; j++) xb[com.ntime + com.nrgene + i][j] = rateb[j];
1396 k = com.ntime + com.nrgene + com.nrate;
1397 for (i = 0; i < com.npi*n31pi; i++) {
1398 xb[k][0] = (com.model == T92 ? pb[0] : fb[0]);
1399 xb[k++][1] = (com.model == T92 ? pb[1] : fb[1]);
1400 }
1401 for (i = 0; i < com.nalpha; i++, k++) FOR(j, 2) xb[k][j] = alphab[j];
1402 if (!com.fix_rho) FOR(j, 2) xb[np - 1][j] = rhob[j];
1403 if (com.nparK) {
1404 for (i = 0; i < com.ncatG - 1; i++) { xb[k][0] = rateb[0]; xb[k++][1] = rateb[1]; }
1405 if (com.nparK == 2) nf = com.ncatG - 1;
1406 else if (com.nparK == 3) nf = (com.ncatG - 1)*(com.ncatG - 1);
1407 else if (com.nparK == 4) nf = (com.ncatG - 1)*com.ncatG;
1408 for (i = 0; i < nf; i++) { xb[k][0] = fb[0]; xb[k++][1] = fb[1]; }
1409 }
1410 if (noisy > 2 && np < 50) {
1411 printf("\nBounds (np=%d):\n", np);
1412 for (i = 0; i < np; i++) printf(" %10.6f", xb[i][0]); FPN(F0);
1413 for (i = 0; i < np; i++) printf(" %10.6f", xb[i][1]); FPN(F0);
1414 }
1415 return(0);
1416 }
1417
testx(double x[],int np)1418 int testx(double x[], int np)
1419 {
1420 /* This is used for LS branch length estimation by nls2, called only if(clock==0)
1421 */
1422 int i;
1423 double tb[] = { .4e-6, 99 };
1424
1425 FOR(i, com.ntime)
1426 if (x[i]<tb[0] || x[i]>tb[1])
1427 return (-1);
1428 return (0);
1429 }
1430
1431
1432
ConditionalPNode(int inode,int igene,double x[])1433 int ConditionalPNode(int inode, int igene, double x[])
1434 {
1435 int n = com.ncode, i, j, k, h, ison, pos0 = com.posG[igene], pos1 = com.posG[igene + 1];
1436 double t;
1437
1438 for (i = 0; i < nodes[inode].nson; i++)
1439 if (nodes[nodes[inode].sons[i]].nson > 0 && !com.oldconP[nodes[inode].sons[i]])
1440 ConditionalPNode(nodes[inode].sons[i], igene, x);
1441 if (inode < com.ns) { /* young ancestor */
1442 for (h = pos0*n; h < pos1*n; h++)
1443 nodes[inode].conP[h] = 0;
1444 }
1445 else
1446 for (h = pos0*n; h < pos1*n; h++)
1447 nodes[inode].conP[h] = 1;
1448 if (com.cleandata && inode < com.ns) /* young ancestor */
1449 for (h = pos0; h < pos1; h++)
1450 nodes[inode].conP[h*n + com.z[inode][h]] = 1;
1451
1452 for (i = 0; i < nodes[inode].nson; i++) {
1453 ison = nodes[inode].sons[i];
1454 t = nodes[ison].branch*_rateSite;
1455
1456 if (com.clock < 5) {
1457 if (com.clock) t *= GetBranchRate(igene, (int)nodes[ison].label, x, NULL);
1458 else t *= com.rgene[igene];
1459 }
1460 GetPMatBranch(PMat, x, t, ison);
1461
1462 if (nodes[ison].nson < 1 && com.cleandata) { /* tip && clean */
1463 for (h = pos0; h < pos1; h++)
1464 for (j = 0; j < n; j++)
1465 nodes[inode].conP[h*n + j] *= PMat[j*n + com.z[ison][h]];
1466 }
1467 else if (nodes[ison].nson < 1 && !com.cleandata) { /* tip & unclean */
1468 for (h = pos0; h < pos1; h++)
1469 for (j = 0; j < n; j++) {
1470 for (k = 0, t = 0; k < nChara[com.z[ison][h]]; k++)
1471 t += PMat[j*n + CharaMap[com.z[ison][h]][k]];
1472 nodes[inode].conP[h*n + j] *= t;
1473 }
1474 }
1475 else {
1476 for (h = pos0; h < pos1; h++)
1477 for (j = 0; j < n; j++) {
1478 for (k = 0, t = 0; k < n; k++)
1479 t += PMat[j*n + k] * nodes[ison].conP[h*n + k];
1480 nodes[inode].conP[h*n + j] *= t;
1481 }
1482 }
1483 } /* for (ison) */
1484 if (com.NnodeScale && com.nodeScale[inode]) NodeScale(inode, pos0, pos1);
1485 return (0);
1486 }
1487
PMatCijk(double P[],double t)1488 int PMatCijk(double P[], double t)
1489 {
1490 /* P(t)ij = SUM Cijk * exp{Root*t}
1491 */
1492 int i, j, k, n = com.ncode, nr = nR;
1493 double exptm1[5] = {0};
1494
1495 memset(P, 0, n*n * sizeof(double));
1496 for (k = 1; k < nr; k++) exptm1[k] = expm1(t*Root[k]);
1497 for (i = 0; i < n; i++) {
1498 for (j = 0; j < n; j++) {
1499 for (k = 0; k < nr; k++)
1500 P[i*n + j] += Cijk[i*n*nr + j*nr + k] * exptm1[k];
1501 }
1502 P[i*n + i] ++;
1503 }
1504 return (0);
1505 }
1506
1507
1508
1509 #ifdef UNDEFINED
1510
CollapsSite(FILE * fout,int nsep,int ns,int * ncat,int SiteCat[])1511 int CollapsSite(FILE *fout, int nsep, int ns, int *ncat, int SiteCat[])
1512 {
1513 int j, k, it, h, b[NS], ndiff, n1, bit1;
1514 /* n1: # of 1's ... bit1: the 1st nonzero bit */
1515
1516 *ncat = 5 + (1 << (ns - 1)) - 1 + nsep * 11;
1517 if (fout) fprintf(fout, "\n# cat:%5d # sep:%5d\n\n", *ncat, nsep);
1518
1519 FOR(h, 1 << 2 * ns) {
1520 for (j = 0, it = h; j < ns; b[ns - 1 - j] = it % 4, it /= 4, j++);
1521 for (j = 1, ndiff = 0; j < ns; j++) {
1522 FOR(k, j) if (b[j] == b[k]) break;
1523 if (k == j) ndiff++;
1524 }
1525 switch (ndiff) {
1526 default: SiteCat[h] = 0; break;
1527 case (0): SiteCat[h] = b[0] + 1; break;
1528 case (1):
1529 for (j = 1, it = 0, n1 = 0, bit1 = 0; j < ns; j++) {
1530 k = (b[j] != b[0]);
1531 it = it * 2 + k;
1532 n1 += k;
1533 if (bit1 == 0 && k) bit1 = ns - 1 - j;
1534 }
1535 it = 5 + it - 1;
1536 if (nsep == 0) { SiteCat[h] = it; break; }
1537
1538 SiteCat[h] = it + min2(bit1 + 1, nsep) * 11;
1539 if (n1 == 1 && bit1 < nsep) {
1540 SiteCat[h] -= 11;
1541 SiteCat[h] += (b[0] * 4 + b[ns - 1 - bit1] - b[0] - (b[0] <= b[ns - 1 - bit1]));
1542 }
1543 break;
1544 }
1545 if (fout) {
1546 FOR(j, ns) fprintf(fout, "%1c", BASEs[b[j]]);
1547 fprintf(fout, "%5d ", SiteCat[h]);
1548 if (h % 4 == 3) FPN(fout);
1549 }
1550 }
1551 return (0);
1552 }
1553
GetPexpML(double x[],int ncat,int SiteCat[],double pexp[])1554 int GetPexpML(double x[], int ncat, int SiteCat[], double pexp[])
1555 {
1556 int j, it, h, nodeb[NNODE];
1557 int isum, nsum = 1 << (2 * (tree.nbranch - com.ns + 1));
1558 double fh, y, Pt[NBRANCH][16];
1559
1560 if (com.ngene > 1 || com.nhomo || com.alpha || com.nparK || com.model > REV)
1561 error2("Pexp()");
1562 SetParameters(x);
1563 FOR(j, tree.nbranch)
1564 PMatCijk(Pt[j], nodes[tree.branches[j][1]].branch);
1565
1566 for (h = 0; h < (1 << 2 * com.ns); h++) {
1567 if (SiteCat[h] == 0) continue;
1568 for (j = 0, it = h; j < com.ns; nodeb[com.ns - 1 - j] = it & 3, it >>= 2, j++);
1569 for (isum = 0, fh = 0; isum < nsum; isum++) {
1570 for (j = 0, it = isum; j < tree.nbranch - com.ns + 1; j++)
1571 {
1572 nodeb[com.ns + j] = it % 4; it /= 4;
1573 }
1574 for (j = 0, y = com.pi[nodeb[tree.root]]; j < tree.nbranch; j++)
1575 y *= Pt[j][nodeb[tree.branches[j][0]] * 4 + nodeb[tree.branches[j][1]]];
1576 fh += y;
1577 }
1578 pexp[SiteCat[h]] += fh;
1579 }
1580 pexp[0] = 1 - sum(pexp + 1, ncat - 1);
1581 return (0);
1582 }
1583
1584
TestModel(FILE * fout,double x[],int nsep,double space[])1585 int TestModel(FILE *fout, double x[], int nsep, double space[])
1586 {
1587 /* test of models, using com.
1588 */
1589 int j, h, it, ls = com.ls, ncat, *SiteCat;
1590 double *pexp = space, *nobs, lmax0, lnL0, X2, ef, de;
1591
1592 SiteCat = (int*)malloc((1 << 2 * com.ns) * sizeof(int));
1593 if (SiteCat == NULL) error2("oom");
1594 CollapsSite(F0, nsep, com.ns, &ncat, SiteCat);
1595 fprintf(fout, "\n\nAppr. test of model.. ncat%6d nsep%6d\n", ncat, nsep);
1596
1597 nobs = pexp + ncat;
1598 zero(pexp, 2 * ncat);
1599 /* nobs */
1600 FOR(h, com.npatt) {
1601 for (j = 0, it = 0; j < com.ns; j++) it = it * 4 + (com.z[j][h] - 1);
1602 nobs[SiteCat[it]] += com.fpatt[h];
1603 }
1604 GetPexpML(x, ncat, SiteCat, pexp);
1605
1606 for (h = 0, lnL0 = 0, X2 = 0, lmax0 = -(double)ls*log((double)ls); h < ncat; h++) {
1607 if (nobs[h] > 1) {
1608 lmax0 += nobs[h] * log(nobs[h]);
1609 lnL0 += nobs[h] * log(pexp[h]);
1610 }
1611 ef = com.ls*pexp[h];
1612 de = square(nobs[h] - ef) / ef;
1613 X2 += de;
1614 fprintf(fout, "\nCat #%3d%9.0f%9.2f%9.2f", h, nobs[h], ef, de);
1615 }
1616 fprintf(fout, "\n\nlmax0:%12.4f D2:%12.4f X2:%12.4f\n",
1617 lmax0, 2 * (lmax0 - lnL0), X2);
1618 free(SiteCat);
1619
1620 return (0);
1621 }
1622
1623
1624 #endif
1625
1626
OldDistributions(int inode,double AncientFreqs[])1627 int OldDistributions(int inode, double AncientFreqs[])
1628 {
1629 /* reconstruct nucleotide frequencies at and down inode for nonhomogeneous models com.nhomo==3 or 4.
1630 AncientFreqs[tree.nnode*4]
1631 */
1632 int i, n = 4;
1633 double kappa = com.kappa;
1634
1635 if (com.alpha || com.model > REV) {
1636 puts("OldDistributions() does not run when alpha > 0 or model >= TN93");
1637 return(-1);
1638 }
1639 if (inode == tree.root)
1640 xtoy(nodes[inode].pi, AncientFreqs + inode*n, n);
1641 else {
1642 if (com.nhomo > 2 && com.model <= TN93)
1643 eigenTN93(com.model, *nodes[inode].pkappa, *(nodes[inode].pkappa + 1), nodes[inode].pi, &nR, Root, Cijk);
1644 else if (com.nhomo > 2 && com.model == REV)
1645 eigenQREVbase(NULL, PMat, nodes[inode].pkappa, nodes[inode].pi, &nR, Root, Cijk);
1646
1647 PMatCijk(PMat, nodes[inode].branch);
1648 matby(AncientFreqs + nodes[inode].father*n, PMat, AncientFreqs + inode*n, 1, n, n);
1649 }
1650 for (i = 0; i < nodes[inode].nson; i++)
1651 OldDistributions(nodes[inode].sons[i], AncientFreqs);
1652 return (0);
1653 }
1654
1655
1656
1657
1658
1659 /* problems and notes
1660
1661 (1) AdG model: generation of com.MK[K*K] is not independent
1662 of com.alpha or DiscreteGamma().
1663
1664 non-homogeneous process models:
1665 nhomo fix_kappa models
1666
1667 0 (1 pi given) 0 (solve 1 kappa) JC69, K80, F81, F84, HKY85,
1668 REV(5), UNREST(11)
1669 1 (1 kappa given) K80, F84, HKY85
1670
1671 1 (solve 1 pi) 0 (as above) F84, HKY85, REV(5)
1672 1 F81(0), F84, HKY85
1673
1674 2 (b kappa's) ? K80, F84, HKY85
1675
1676 3 (ns+2 pi) 0 (solve 1 kappa) F84 & HKY85
1677 1 (nbranch kappa) F84 & HKY85
1678
1679 4 (nnode pi) 0,1 (as above)
1680
1681
1682 space-time process models:
1683 nparK fix_alpha fix_rho parameters
1684 0 (0,1) (0,1) alpha & rho for AdG
1685
1686 1 set to 1 set to 1 rK[] (freqK=1/K) (K-1)
1687 2 set to 1 set to 1 rK[] & freqK[] 2(K-1)
1688 3 set to 1 set to 1 rK[] & MK[] (freqK=1/K) K(K-1)
1689 4 set to 1 set to 1 rK[] & MK[] K*K-1
1690
1691
1692 Local clock models
1693 parameters under local clock (com.clock=2)
1694 com.ntime = (#ancestral nodes) - 1 + (#rates) - 1
1695 Parameters include (ns-1) node times t[] and rates for branches.
1696 x[0]: t0*r0
1697 x[1..(nid-1)]: ti/t0 ancestral node times expressed as ratios
1698 x[ns-1 .. ntime-1]: rates for branches
1699
1700 */
1701