1 /* PAMP.c, Copyright, Ziheng Yang, April 1995.
2 Specify the sequence type in the file pamp.ctl. Results go into mp.
3
4 gcc -o pamp pamp.c tools.o
5 pamp <ControlFileName>
6 */
7
8 #include "paml.h"
9
10 #define NS 2000
11 #define NBRANCH (NS*2-2)
12 #define NNODE (NS*2-1)
13 #define MAXNSONS 10
14 #define NGENE 2000
15 #define LSPNAME 30
16 #define NCODE 20
17 #define NCATG 16
18
19 double DistanceREV(double Ft[], int n, double alpha, double Root[], double U[],
20 double V[], double pi[], double space[], int *cond);
21 int PMatBranch(double Ptb[], int n, double branch[],
22 double Root[], double U[], double V[], double space[]);
23 int PatternLS(FILE *fout, double Ft[], double alpha, double space[], int *cond);
24 int testx(double x[], int np);
25 int GetOptions(char *ctlf);
26 int AlphaMP(FILE* fout);
27 int PatternMP(FILE *fout, double Ft[]);
28 int PathwayMP1(FILE *fout, int *maxchange, int NSiteChange[],
29 double Ft[], double space[], int job);
30 double lfunAlpha_Sullivan(double x);
31 double lfunAlpha_YK96(double x);
32
33 struct CommonInfo {
34 unsigned char *z[NS];
35 char *spname[NS], seqf[2048], outf[2048], treef[2048];
36 int seqtype, ns, ls, ngene, posG[NGENE + 1], lgene[NGENE], *pose, npatt, readpattern;
37 int np, ntime, ncode, fix_kappa, fix_rgene, fix_alpha, clock, model, ncatG, cleandata;
38 int print, nhomo, verbose;
39 double *fpatt, *conP;
40 /* not used */
41 double lmax, pi[NCODE], kappa, alpha, rou, rgene[NGENE], piG[NGENE][NCODE];
42 } com;
43 struct TREEB {
44 int nbranch, nnode, root, branches[NBRANCH][2];
45 double lnL;
46 } tree;
47 struct TREEN {
48 int father, nson, sons[MAXNSONS], ibranch;
49 double branch, age, label, label2, *conP;
50 char *annotation, fossil;
51 } *nodes;
52
53
54 #define NCATCHANGE 100
55 extern int noisy, *ancestor;
56 extern double *SeqDistance;
57 int maxchange, NSiteChange[NCATCHANGE];
58 double MuChange;
59 int LASTROUND = 0; /* no use for this */
60
61 #define LSDISTANCE
62 #define REALSEQUENCE
63 #define NODESTRUCTURE
64 #define RECONSTRUCTION
65 #define PAMP
66 #include "treesub.c"
67
main(int argc,char * argv[])68 int main(int argc, char *argv[])
69 {
70 FILE *ftree, *fout, *fseq;
71 char ctlf[2048] = "pamp.ctl";
72 char *Seqstr[] = { "nucleotide", "", "amino-acid", "Binary" };
73 int itree, ntree, i, j, s3;
74 double *space, *Ft;
75
76 com.nhomo = 1; com.print = 1;
77 noisy = 2; com.ncatG = 8; com.clock = 0; com.cleandata = 1;
78 starttimer();
79 GetOptions(ctlf);
80 if (argc > 1) { strcpy(ctlf, argv[1]); printf("\nctlfile set to %s.\n", ctlf); }
81
82 printf("PAMP in %s\n", pamlVerStr);
83 if ((fseq = fopen(com.seqf, "r")) == NULL) error2("seqfile err.");
84 if ((fout = fopen(com.outf, "w")) == NULL) error2("outfile creation err.");
85 if ((fseq = fopen(com.seqf, "r")) == NULL) error2("No sequence file!");
86 ReadSeq(NULL, fseq, com.cleandata, 0);
87 SetMapAmbiguity(com.seqtype, 0);
88 i = (com.ns * 2 - 1) * sizeof(struct TREEN);
89 if ((nodes = (struct TREEN*)malloc(i)) == NULL) error2("oom");
90
91 fprintf(fout, "PAMP %15s, %s sequences\n", com.seqf, Seqstr[com.seqtype]);
92 if (com.nhomo) fprintf(fout, "nonhomogeneous model\n");
93
94 space = (double*)malloc(1000000 * sizeof(double)); /* not safe */
95 SeqDistance = (double*)malloc(com.ns*(com.ns - 1) / 2 * sizeof(double));
96 ancestor = (int*)malloc(com.ns*(com.ns - 1) / 2 * sizeof(int));
97 if (SeqDistance == NULL || ancestor == NULL) error2("oom");
98
99 i = com.ns*(com.ns - 1) / 2;
100 s3 = sizeof(double)*((com.ns * 2 - 2)*(com.ns * 2 - 2 + 4 + i) + i);
101 s3 = max2(s3, com.ncode*com.ncode*(2 * com.ns - 2 + 1)*(int)sizeof(double));
102
103 Ft = (double*)malloc(s3);
104 if (space == NULL || Ft == NULL) error2("oom space");
105
106 InitializeBaseAA(fout);
107 if (com.ngene > 1) error2("option G not allowed yet");
108
109 /*
110 PatternLS (fout, Ft, 0., space, &i);
111 printf ("\nPairwise estimation of rate matrix done..\n");
112 fflush(fout);
113 */
114 ftree = gfopen(com.treef, "r");
115 fscanf(ftree, "%d%d", &i, &ntree);
116 if (i != com.ns) error2("ns in the tree file");
117
118 for (itree = 0; itree < ntree; itree++) {
119
120 printf("\nTREE # %2d\n", itree + 1);
121 fprintf(fout, "\nTREE # %2d\n", itree + 1);
122
123 if (ReadTreeN(ftree, &i, 0, 1)) error2("err tree..");
124 OutTreeN(F0, 0, 0); FPN(F0);
125 OutTreeN(fout, 0, 0); FPN(fout);
126
127 for (i = 0, maxchange = 0; i < NCATCHANGE; i++) NSiteChange[i] = 0;
128
129 PathwayMP1(fout, &maxchange, NSiteChange, Ft, space, 0);
130 printf("\nHartigan reconstruction done..\n");
131
132 fprintf(fout, "\n\n(1) Branch lengths and substitution pattern\n");
133 PatternMP(fout, Ft);
134 printf("pattern done..\n"); fflush(fout);
135
136 fprintf(fout, "\n\n(2) Gamma parameter\n");
137 AlphaMP(fout);
138 printf("gamma done..\n"); fflush(fout);
139
140 fprintf(fout, "\n\n(3) Parsimony reconstructions\n");
141 PathwayMP1(fout, &maxchange, NSiteChange, Ft, space, 1);
142 printf("Yang reconstruction done..\n"); fflush(fout);
143 }
144 free(nodes);
145 return (0);
146 }
147
GetOptions(char * ctlf)148 int GetOptions(char *ctlf)
149 {
150 int iopt, nopt = 6, i, lline = 4096, t;
151 char line[4096], *pline, opt[32], *comment = "*#";
152 char *optstr[] = { "seqfile","outfile","treefile", "seqtype", "ncatG", "nhomo" };
153 FILE *fctl = gfopen(ctlf, "r");
154
155 if (fctl) {
156 for (;;) {
157 if (fgets(line, lline, fctl) == NULL) break;
158 for (i = 0, t = 0; i < lline&&line[i]; i++)
159 if (isalnum(line[i])) { t = 1; break; }
160 else if (strchr(comment, line[i])) break;
161 if (t == 0) continue;
162 sscanf(line, "%s%*s%d", opt, &t);
163 if ((pline = strstr(line, "=")) == NULL) error2("option file.");
164
165 for (iopt = 0; iopt < nopt; iopt++) {
166 if (strncmp(opt, optstr[iopt], 8) == 0) {
167 if (noisy > 2)
168 printf("\n%3d %15s | %-20s %6d", iopt + 1, optstr[iopt], opt, t);
169 switch (iopt) {
170 case (0): sscanf(pline + 2, "%s", com.seqf); break;
171 case (1): sscanf(pline + 2, "%s", com.outf); break;
172 case (2): sscanf(pline + 2, "%s", com.treef); break;
173 case (3): com.seqtype = t; break;
174 case (4): com.ncatG = t; break;
175 case (5): com.nhomo = t; break;
176 }
177 break;
178 }
179 }
180 if (iopt == nopt)
181 {
182 printf("\nopt %s in %s\n", opt, ctlf); exit(-1);
183 }
184 }
185 fclose(fctl);
186 }
187 else
188 if (noisy) printf("\nno ctl file..");
189
190 if (com.seqtype == 0) com.ncode = 4;
191 else if (com.seqtype == 2) com.ncode = 20;
192 else if (com.seqtype == 3) com.ncode = 2;
193 else error2("seqtype");
194 if (com.ncatG > NCATG) error2("raise NCATG?");
195 return (0);
196 }
197
198
AlphaMP(FILE * fout)199 int AlphaMP(FILE* fout)
200 {
201 int k, ntotal;
202 double x, xb[2], lnL, var;
203
204 xb[0] = 1e-3; xb[1] = 99; /* alpha */
205
206 fprintf(fout, "\n# changes .. # sites");
207 for (k = 0, ntotal = 0, MuChange = var = 0; k < maxchange + 1; k++) {
208 fprintf(fout, "\n%6d%10d", k, NSiteChange[k]);
209 ntotal += NSiteChange[k]; MuChange += k*NSiteChange[k];
210 var += k*k*NSiteChange[k];
211 }
212 MuChange /= ntotal;
213 var = (var - MuChange*MuChange*ntotal) / (ntotal - 1.);
214 x = MuChange*MuChange / (var - MuChange);
215 fprintf(fout, "\n\n# sites%6d, total changes%6d\nmean-var%9.4f%9.4f",
216 ntotal, (int)(ntotal*MuChange + .5), MuChange, var);
217 fprintf(fout, "\nalpha (method of moments)%9.4f", x);
218 if (x <= 0) x = 9;
219
220 LineSearch(lfunAlpha_Sullivan, &lnL, &x, xb, 0.02, 1e-8);
221 fprintf(fout, "\nalpha (Sullivan et al. 1995)%9.4f\n", x);
222
223 MuChange /= tree.nbranch;
224 LineSearch(lfunAlpha_YK96, &lnL, &x, xb, 0.02, 1e-8);
225 fprintf(fout, "alpha (Yang & Kumar 1995, ncatG= %d)%9.4f\n", com.ncatG, x);
226 return (0);
227 }
228
lfunAlpha_Sullivan(double x)229 double lfunAlpha_Sullivan(double x)
230 {
231 int k;
232 double lnL = 0, a = x, t;
233
234 FOR(k, maxchange + 1) {
235 if (NSiteChange[k] == 0) continue;
236 t = -a*log(1 + MuChange / a);
237 if (k)
238 t += LnGamma(k + a) - LnGamma(k + 1.) - LnGamma(a)
239 + k*log(MuChange / a / (1 + MuChange / a));
240 lnL += NSiteChange[k] * t;
241 }
242 return(-lnL);
243 }
244
lfunAlpha_YK96(double x)245 double lfunAlpha_YK96(double x)
246 {
247 int k, ir, b = tree.nbranch, n = com.ncode;
248 double lnL = 0, prob, a = x, t = MuChange, p;
249 double freqK[NCATG], rK[NCATG];
250
251 DiscreteGamma(freqK, rK, a, a, com.ncatG, 0);
252 FOR(k, maxchange + 1) {
253 if (NSiteChange[k] == 0) continue;
254 for (ir = 0, prob = 0; ir < com.ncatG; ir++) {
255 p = 1. / n + (n - 1.) / n*exp(-n / (n - 1.)*rK[ir] * t);
256 prob += freqK[ir] * pow(p, (double)(b - k))*pow((1 - p) / (n - 1.), (double)k);
257 }
258 lnL += NSiteChange[k] * log(prob);
259 }
260 return (-lnL);
261 }
262
263
OutQ(FILE * fout,int n,double Q[],double pi[],double Root[],double U[],double V[],double space[])264 int OutQ(FILE *fout, int n, double Q[], double pi[], double Root[],
265 double U[], double V[], double space[])
266 {
267 char aa3[4] = "";
268 int i, j;
269 double *T1 = space, t;
270
271 fprintf(fout, "\nrate matrix Q: Qij*dt = prob(i->j; dt)\n");
272 if (n <= 4) {
273 /* matout (fout, pi, 1, n); */
274 matout(fout, Q, n, n);
275 if (n == 4) {
276 fprintf(fout, "Order: T, C, A, G");
277 t = pi[0] * Q[0 * 4 + 1] + pi[1] * Q[1 * 4 + 0] + pi[2] * Q[2 * 4 + 3] + pi[3] * Q[3 * 4 + 2];
278 fprintf(fout, "\nAverage Ts/Tv =%9.4f\n", t / (1 - t));
279 }
280 }
281 else if (n == 20) {
282 for (i = 0; i < n; i++, FPN(fout))
283 FOR(j, n) fprintf(fout, "%6.0f", Q[i*n + j] * 100);
284 /*
285 FOR (i,n) {
286 fprintf (fout,"\n%-4s", getAAstr(aa3,i));
287 FOR (j,i) fprintf (fout, "%4.0f", Q[i*n+j]/pi[j]*100);
288 fprintf (fout, "%4.0f", -Q[i*n+i]*100);
289 }
290 fputs("\n ",fout); FOR(i,naa) fprintf(fout,"%5s",getAAstr(aa3,i));
291 */
292 fprintf(fout, "\n\nPAM matrix, P(0.01)\n");
293 FOR(i, n) FOR(j, n) T1[i*n + j] = U[i*n + j] * exp(0.01*Root[j]);
294 matby(T1, V, Q, n, n, n);
295 FOR(i, n*n) if (Q[i] < 0) Q[i] = 0;
296 FOR(i, n) {
297 fprintf(fout, "\n%-4s", getAAstr(aa3, i));
298 FOR(j, n) fprintf(fout, "%6.0f", Q[i*n + j] * 10000);
299 }
300 fputs("\n ", fout); FOR(i, n) fprintf(fout, "%5s", getAAstr(aa3, i));
301 }
302 return (0);
303 }
304
PMatBranch(double Ptb[],int n,double branch[],double Root[],double U[],double V[],double space[])305 int PMatBranch(double Ptb[], int n, double branch[],
306 double Root[], double U[], double V[], double space[])
307 {
308 /* homogeneised transition prob matrix, with one Q assumed for the whole tree
309 */
310 int i, j, k;
311 double *T1 = space, *P;
312
313 FOR(k, tree.nbranch) {
314 P = Ptb + k*n*n;
315 FOR(i, n) FOR(j, n) T1[i*n + j] = U[i*n + j] * exp(Root[j] * branch[k]);
316 matby(T1, V, P, n, n, n);
317 FOR(i, n*n) if (P[i] < 0) P[i] = 0;
318 /*
319 printf ("\nbranch %d, P(%.5f)", k+1, branch[k]);
320 matout (F0, P, n, n);
321 testTransP (P, n);
322 */
323 }
324 return (0);
325 }
326
327
PatternMP(FILE * fout,double Ft[])328 int PatternMP(FILE *fout, double Ft[])
329 {
330 /* Ft[]: input counts for the F(t) matrix for each branch, output P(t)
331 */
332 int n = com.ncode, i, j, k;
333 double *Q, *pi, *Root, *U, *V, *branch, *space, *T1, t;
334
335 if ((Q = (double*)malloc((n*n * 6 + tree.nbranch) * sizeof(double))) == NULL)
336 error2("PathwayMP: oom");
337 pi = Q + n*n; Root = pi + n; U = Root + n; V = U + n*n; branch = V + n*n;
338 space = T1 = branch + tree.nbranch;
339
340 for (k = 0; k < tree.nbranch; k++) { /* branch lengths */
341 xtoy(Ft + k*n*n, Q, n*n);
342 branch[k] = nodes[tree.branches[k][1]].branch =
343 DistanceREV(Q, n, 0, Root, U, V, pi, space, &j);
344 }
345 OutTreeB(fout); FPN(fout);
346 FOR(i, tree.nbranch) fprintf(fout, "%9.5f", branch[i]);
347 fprintf(fout, "\ntree length: %9.5f\n", sum(branch, tree.nbranch));
348
349 /* pattern Q from average F(t) */
350 fprintf(fout, "\nF(t)");
351 xtoy(Ft + tree.nbranch*n*n, Q, n*n);
352 matout2(fout, Q, n, n, 12, 2);
353 DistanceREV(Q, n, 0, Root, U, V, pi, space, &j);
354 if (noisy >= 3 && j == -1) { puts("F(t) modified in DistanceREV"); }
355
356 OutQ(fout, n, Q, pi, Root, U, V, T1);
357 if (com.nhomo == 0)
358 PMatBranch(Ft, n, branch, Root, U, V, space);
359 else {
360 for (k = 0; k < tree.nbranch; k++) {
361 for (i = 0; i < n; i++) {
362 t = sum(Ft + k*n*n + i*n, n);
363 if (t > 1e-5) abyx(1 / t, Ft + k*n*n + i*n, n);
364 else Ft[k*n*n + i*n + i] = 1;
365 }
366 }
367 }
368 free(Q);
369 return (0);
370 }
371
372
PathwayMP1(FILE * fout,int * maxchange,int NSiteChange[],double Ft[],double space[],int job)373 int PathwayMP1(FILE *fout, int *maxchange, int NSiteChange[],
374 double Ft[], double space[], int job)
375 {
376 /* Hartigan, JA. 1973. Minimum mutation fits to a given tree.
377 Biometrics, 29:53-65.
378 Yang, Z. 1996.
379 job=0: 1st pass: calculates maxchange, NSiteChange[], and Ft[]
380 job=1: 2nd pass: reconstructs ancestral character states (->fout)
381 */
382 char *pch = (com.seqtype == 0 ? BASEs : (com.seqtype == 2 ? AAs : BINs));
383 char *zz[NNODE], nodeb[NNODE], bestPath[NNODE - NS], Equivoc[NS - 1];
384 int n = com.ncode, nid = tree.nbranch - com.ns + 1, it, i1, i2, i, j, k, h, hp, npath;
385 int *Ftt = NULL, nchange, nchange0, visit[NS - 1] = { 0 };
386 double sumpr, bestpr, pr, *pnode = NULL, *pnsite;
387
388
389 fputs("\nList of most parsimonious reconstructions (MPRs) at each site: #MPRs (#changes)\n", fout);
390 fputs("and then the most likely reconstruction out of the MPRs and its probability\n", fout);
391 if ((pnsite = (double*)malloc((com.ns - 1)*n * sizeof(double))) == NULL)
392 error2("PathwayMP1: oom");
393
394 PATHWay = (char*)malloc(nid*(n + 3) * sizeof(char));
395 NCharaCur = PATHWay + nid; ICharaCur = NCharaCur + nid; CharaCur = ICharaCur + nid;
396 if (job == 0) {
397 zero(Ft, n*n*(tree.nbranch + 1));
398 if ((Ftt = (int*)malloc(n*n*tree.nbranch * sizeof(int))) == NULL) error2("oom");
399 }
400 else {
401 pnode = (double*)malloc((nid*com.npatt + 1)*(sizeof(double) + sizeof(char)));
402 FOR(j, nid) zz[com.ns + j] = (char*)(pnode + nid*com.npatt) + j*com.npatt;
403 FOR(j, com.ns) zz[j] = com.z[j];
404 if (pnode == NULL) error2("oom");
405 }
406 for (j = 0, visit[i = 0] = tree.root - com.ns; j < tree.nbranch; j++)
407 if (tree.branches[j][1] >= com.ns)
408 visit[++i] = tree.branches[j][1] - com.ns;
409
410 for (h = 0; h < com.ls; h++) {
411 hp = com.pose[h];
412 if (job == 1) {
413 fprintf(fout, "\n%4d ", h + 1);
414 FOR(j, com.ns) fprintf(fout, "%c", pch[com.z[j][hp]]);
415 fprintf(fout, ": ");
416 FOR(j, nid*n) pnsite[j] = 0;
417 }
418 FOR(j, com.ns) nodeb[j] = com.z[j][hp];
419 if (job == 0) FOR(j, n*n*tree.nbranch) Ftt[j] = 0;
420
421 InteriorStatesMP(1, hp, &nchange, NCharaCur, CharaCur, space);
422 ICharaCur[j = tree.root - com.ns] = 0; PATHWay[j] = CharaCur[j*n + 0];
423 FOR(j, nid) Equivoc[j] = (NCharaCur[j] > 1);
424
425 if (nchange > *maxchange) *maxchange = nchange;
426 if (nchange > NCATCHANGE - 1) error2("raise NCATCHANGE");
427
428 NSiteChange[nchange]++;
429 /* NSiteChange[nchange]+=(int)com.fpatt[hp]; */
430
431 DownStates(tree.root);
432 for (npath = 0, sumpr = bestpr = 0; ;) {
433 for (j = 0, k = visit[nid - 1]; j < NCharaCur[k]; j++) {
434 PATHWay[k] = CharaCur[k*n + j]; npath++;
435 FOR(i, nid) nodeb[i + com.ns] = PATHWay[i];
436 if (job == 1) {
437 FOR(i, nid) fprintf(fout, "%c", pch[PATHWay[i]]); fputc(' ', fout);
438 pr = com.pi[(int)nodeb[tree.root]];
439 for (i = 0; i < tree.nbranch; i++) {
440 i1 = nodeb[tree.branches[i][0]];
441 i2 = nodeb[tree.branches[i][1]];
442 pr *= Ft[i*n*n + i1*n + i2];
443 }
444 sumpr += pr;
445 FOR(i, nid) pnsite[i*n + nodeb[i + com.ns]] += pr;
446 if (pr > bestpr)
447 {
448 bestpr = pr; FOR(i, nid) bestPath[i] = PATHWay[i];
449 }
450 }
451 else {
452 for (i = 0, nchange0 = 0; i < tree.nbranch; i++) {
453 i1 = nodeb[tree.branches[i][0]];
454 i2 = nodeb[tree.branches[i][1]];
455 if (i1 != i2) nchange0++;
456 Ftt[i*n*n + i1*n + i2]++;
457 }
458 if (nchange0 != nchange) {
459 printf("\a\nerr:PathwayMP %d != %d", nchange, nchange0);
460 fprintf(fout, ".%d. ", nchange0); /* ??? */
461 }
462 }
463 }
464 for (j = nid - 2; j >= 0; j--) {
465 if (Equivoc[k = visit[j]] == 0) continue;
466 if (ICharaCur[k] + 1 < NCharaCur[k]) {
467 PATHWay[k] = CharaCur[k*n + (++ICharaCur[k])];
468 DownStates(k + com.ns);
469 break;
470 }
471 else { /* if (next equivocal node is not ancestor) update node k */
472 for (i = j - 1; i >= 0; i--) if (Equivoc[(int)visit[i]]) break;
473 if (i >= 0) {
474 for (it = k + com.ns, i = visit[i] + com.ns; ; it = nodes[it].father)
475 if (it == tree.root || nodes[it].father == i) break;
476 if (it == tree.root)
477 DownStatesOneNode(k + com.ns, nodes[k + com.ns].father);
478 }
479 }
480 }
481 if (j < 0) break;
482 } /* for (npath) */
483 /*
484 printf ("\rsite pattern %4d/%4d: %6d%6d", hp+1,com.npatt,npath,nchange);
485 */
486 if (job == 0)
487 FOR(j, n*n*tree.nbranch) Ft[j] += (double)Ftt[j] / npath*com.fpatt[hp];
488 else {
489 FOR(i, nid) zz[com.ns + i][hp] = bestPath[i];
490 FOR(i, nid) pnode[i*com.npatt + hp] = pnsite[i*n + bestPath[i]] / sumpr;
491 fprintf(fout, " |%4d (%d) | ", npath, nchange);
492 if (npath > 1) {
493 FOR(i, nid) fprintf(fout, "%c", pch[bestPath[i]]);
494 fprintf(fout, " (%.3f)", bestpr / sumpr);
495
496 }
497 }
498 } /* for (h) */
499 free(PATHWay);
500 if (job == 0) {
501 free(Ftt);
502 FOR(i, tree.nbranch) FOR(j, n*n) Ft[tree.nbranch*n*n + j] += Ft[i*n*n + j];
503 }
504 else {
505 fprintf(fout, "\n\nApprox. relative accuracy at each node, by site\n");
506 FOR(h, com.ls) {
507 hp = com.pose[h];
508 fprintf(fout, "\n%4d ", h + 1);
509 FOR(j, com.ns) fprintf(fout, "%c", pch[com.z[j][hp]]);
510 fprintf(fout, ": ");
511 FOR(i, nid) if (pnode[i*com.npatt + hp] < .99999) break;
512 if (i < nid) FOR(j, nid)
513 fprintf(fout, "%c (%5.3f) ", pch[zz[j][hp]], pnode[j*com.npatt + hp]);
514 }
515 /* Site2Pattern (fout); */
516 fprintf(fout, "\n\nlist of extant and reconstructed sequences\n\n");
517 for (j = 0; j < tree.nnode; j++, FPN(fout)) {
518 if (j < com.ns) fprintf(fout, "%-20s", com.spname[j]);
519 else fprintf(fout, "node #%-14d", j + 1);
520 print1seq(fout, zz[j], (com.readpattern ? com.npatt : com.ls), com.pose);
521 }
522 free(pnode);
523 }
524 free(pnsite);
525 return (0);
526 }
527
DistanceREV(double Ft[],int n,double alpha,double Root[],double U[],double V[],double pi[],double space[],int * cond)528 double DistanceREV(double Ft[], int n, double alpha, double Root[], double U[],
529 double V[], double pi[], double space[], int *cond)
530 {
531 /* input: Ft, n, alpha
532 output: Q(in Ft), t, Root, U, V, and cond
533 space[n*n*2]
534 */
535 int i, j, InApplicable;
536 double *Q = Ft, *T1 = space, *T2 = space + n*n, t, pi_sqrt[20], small = 0.1 / com.ls;
537
538 for (i = 0, t = 0; i < n; i++) FOR(j, n) if (i - j) t += Q[i*n + j];
539 if (t < small) { *cond = 1; zero(Q, n*n); return (0); }
540
541 for (i = 0; i < n; i++) for (j = 0; j < i; j++)
542 Q[i*n + j] = Q[j*n + i] = (Q[i*n + j] + Q[j*n + i]) / 2;
543
544 abyx(1. / sum(Q, n*n), Q, n*n);
545 for (i = 0; i < n; i++) {
546 pi[i] = sum(Q + i*n, n);
547 if (pi[i] > small)
548 abyx(1 / pi[i], Q + i*n, n);
549 }
550
551 eigenQREV(Q, pi, n, Root, U, V, pi_sqrt);
552 for (i = 0, InApplicable = 0; i < n; i++) {
553 if (Root[i] <= 0) {
554 InApplicable = 1;
555 Root[i] = -300; /* adhockery */
556 }
557 else
558 Root[i] = (alpha <= 0 ? log(Root[i]) : gammap(Root[i], alpha));
559 }
560 FOR(i, n) FOR(j, n) T1[i*n + j] = U[i*n + j] * Root[j];
561 matby(T1, V, Q, n, n, n);
562 for (i = 0, t = 0; i < n; i++) t -= pi[i] * Q[i*n + i];
563
564 if (noisy >= 9 && InApplicable) printf("Root(P)<0. adhockery invoked\n");
565 if (t <= 0) error2("err: DistanceREV");
566
567 FOR(i, n) Root[i] /= t;
568 FOR(i, n) FOR(j, n) { Q[i*n + j] /= t; if (i - j) Q[i*n + j] = max2(0, Q[i*n + j]); }
569
570 return (t);
571 }
572
573
PatternLS(FILE * fout,double Ft[],double alpha,double space[],int * cond)574 int PatternLS(FILE *fout, double Ft[], double alpha, double space[], int *cond)
575 {
576 /* space[n*n*2]
577 */
578 int n = com.ncode, i, j, k, h, it;
579 double *Q = Ft, *Qt = Q + n*n, *Qm = Qt + n*n;
580 double *pi, *Root, *U, *V, *T1 = space, *branch, t;
581 FILE *fdist = gfopen("Distance", "w");
582
583 if ((pi = (double*)malloc((n*n * 3 + tree.nbranch) * sizeof(double))) == NULL)
584 error2("PatternLS: oom");
585 Root = pi + n; U = Root + n; V = U + n*n; branch = V + n*n;
586
587 *cond = 0;
588 for (i = 0, zero(Qt, n*n), zero(Qm, n*n); i < com.ns; i++) {
589 for (j = 0; j < i; j++) {
590 for (h = 0, zero(Q, n*n); h < com.npatt; h++) {
591 Q[(com.z[i][h])*n + com.z[j][h]] += com.fpatt[h] / 2;
592 Q[(com.z[j][h])*n + com.z[i][h]] += com.fpatt[h] / 2;
593 }
594 FOR(k, n*n) Qt[k] += Q[k] / (com.ns*(com.ns - 1) / 2);
595 it = i*(i - 1) / 2 + j;
596 SeqDistance[it] = DistanceREV(Q, n, alpha, Root, U, V, pi, space, &k);
597
598 if (k == -1) {
599 *cond = -1; printf("\n%d&%d: F(t) modified in DistanceREV", i + 1, j + 1);
600 }
601
602 fprintf(fdist, "%9.5f", SeqDistance[it]);
603 /*
604 FOR (k,n)
605 if (Q[k*n+k]>0) { printf ("%d %d %.5f\n", i+1, j+1, Q[k*n+k]); }
606 */
607 FOR(k, n*n) Qm[k] += Q[k] / (com.ns*(com.ns - 1) / 2);
608 }
609 FPN(fdist);
610 }
611 fclose(fdist);
612 DistanceREV(Qt, n, alpha, Root, U, V, pi, space, &k);
613 if (k == -1) { puts("F(t) modified in DistanceREV"); }
614
615 fprintf(fout, "\n\nQ: from average F over pairwise comparisons");
616 OutQ(fout, n, Qt, pi, Root, U, V, T1);
617 fprintf(fout, "\n\nQ: average of Qs over pairwise comparisons\n");
618 fprintf(fout, "(disregard this if very different from the previous Q)");
619 OutQ(fout, n, Qm, pi, Root, U, V, T1);
620
621 if (tree.nbranch) {
622 fillxc(branch, 0.1, tree.nbranch);
623 LSDistance(&t, branch, testx);
624 OutTreeB(fout); FPN(fout);
625 FOR(i, tree.nbranch) fprintf(fout, "%9.5f", branch[i]);
626 PMatBranch(Ft, com.ncode, branch, Root, U, V, space);
627 }
628 free(pi);
629 return (0);
630 }
631
testx(double x[],int np)632 int testx(double x[], int np)
633 {
634 int i;
635 double tb[] = { 1e-5, 99 };
636 FOR(i, np) if (x[i]<tb[0] || x[i]>tb[1]) return(-1);
637 return(0);
638 }
639
SetBranch(double x[])640 int SetBranch(double x[])
641 {
642 int i, status = 0;
643 double small = 1e-5;
644
645 FOR(i, tree.nnode)
646 if (i != tree.root && (nodes[i].branch = x[nodes[i].ibranch]) < -small)
647 status = -1;
648 return (status);
649 }
650