1 /* PAMP.c, Copyright, Ziheng Yang, April 1995.
2    Specify the sequence type in the file pamp.ctl.  Results go into mp.
3 
4                     gcc -o pamp pamp.c tools.o
5                     pamp <ControlFileName>
6 */
7 
8 #include "paml.h"
9 
10 #define NS            2000
11 #define NBRANCH       (NS*2-2)
12 #define NNODE         (NS*2-1)
13 #define MAXNSONS      10
14 #define NGENE         2000
15 #define LSPNAME       30
16 #define NCODE         20
17 #define NCATG         16
18 
19 double DistanceREV(double Ft[], int n, double alpha, double Root[], double U[],
20    double V[], double pi[], double space[], int *cond);
21 int PMatBranch(double Ptb[], int n, double branch[],
22    double Root[], double U[], double V[], double space[]);
23 int PatternLS(FILE *fout, double Ft[], double alpha, double space[], int *cond);
24 int testx(double x[], int np);
25 int GetOptions(char *ctlf);
26 int AlphaMP(FILE* fout);
27 int PatternMP(FILE *fout, double Ft[]);
28 int PathwayMP1(FILE *fout, int *maxchange, int NSiteChange[],
29    double Ft[], double space[], int job);
30 double lfunAlpha_Sullivan(double x);
31 double lfunAlpha_YK96(double x);
32 
33 struct CommonInfo {
34    unsigned char *z[NS];
35    char *spname[NS], seqf[2048], outf[2048], treef[2048];
36    int seqtype, ns, ls, ngene, posG[NGENE + 1], lgene[NGENE], *pose, npatt, readpattern;
37    int np, ntime, ncode, fix_kappa, fix_rgene, fix_alpha, clock, model, ncatG, cleandata;
38    int print, nhomo, verbose;
39    double *fpatt, *conP;
40    /* not used */
41    double lmax, pi[NCODE], kappa, alpha, rou, rgene[NGENE], piG[NGENE][NCODE];
42 }  com;
43 struct TREEB {
44    int nbranch, nnode, root, branches[NBRANCH][2];
45    double lnL;
46 }  tree;
47 struct TREEN {
48    int father, nson, sons[MAXNSONS], ibranch;
49    double branch, age, label, label2, *conP;
50    char *annotation, fossil;
51 }  *nodes;
52 
53 
54 #define NCATCHANGE 100
55 extern int noisy, *ancestor;
56 extern double *SeqDistance;
57 int maxchange, NSiteChange[NCATCHANGE];
58 double MuChange;
59 int LASTROUND = 0; /* no use for this */
60 
61 #define LSDISTANCE
62 #define REALSEQUENCE
63 #define NODESTRUCTURE
64 #define RECONSTRUCTION
65 #define PAMP
66 #include "treesub.c"
67 
main(int argc,char * argv[])68 int main(int argc, char *argv[])
69 {
70    FILE *ftree, *fout, *fseq;
71    char ctlf[2048] = "pamp.ctl";
72    char *Seqstr[] = { "nucleotide", "", "amino-acid", "Binary" };
73    int itree, ntree, i, j, s3;
74    double *space, *Ft;
75 
76    com.nhomo = 1;  com.print = 1;
77    noisy = 2;  com.ncatG = 8;   com.clock = 0; com.cleandata = 1;
78    starttimer();
79    GetOptions(ctlf);
80    if (argc > 1) { strcpy(ctlf, argv[1]); printf("\nctlfile set to %s.\n", ctlf); }
81 
82    printf("PAMP in %s\n", pamlVerStr);
83    if ((fseq = fopen(com.seqf, "r")) == NULL) error2("seqfile err.");
84    if ((fout = fopen(com.outf, "w")) == NULL) error2("outfile creation err.");
85    if ((fseq = fopen(com.seqf, "r")) == NULL)  error2("No sequence file!");
86    ReadSeq(NULL, fseq, com.cleandata, 0);
87    SetMapAmbiguity(com.seqtype, 0);
88    i = (com.ns * 2 - 1) * sizeof(struct TREEN);
89    if ((nodes = (struct TREEN*)malloc(i)) == NULL) error2("oom");
90 
91    fprintf(fout, "PAMP %15s, %s sequences\n", com.seqf, Seqstr[com.seqtype]);
92    if (com.nhomo) fprintf(fout, "nonhomogeneous model\n");
93 
94    space = (double*)malloc(1000000 * sizeof(double));  /* not safe */
95    SeqDistance = (double*)malloc(com.ns*(com.ns - 1) / 2 * sizeof(double));
96    ancestor = (int*)malloc(com.ns*(com.ns - 1) / 2 * sizeof(int));
97    if (SeqDistance == NULL || ancestor == NULL) error2("oom");
98 
99    i = com.ns*(com.ns - 1) / 2;
100    s3 = sizeof(double)*((com.ns * 2 - 2)*(com.ns * 2 - 2 + 4 + i) + i);
101    s3 = max2(s3, com.ncode*com.ncode*(2 * com.ns - 2 + 1)*(int)sizeof(double));
102 
103    Ft = (double*)malloc(s3);
104    if (space == NULL || Ft == NULL)  error2("oom space");
105 
106    InitializeBaseAA(fout);
107    if (com.ngene > 1) error2("option G not allowed yet");
108 
109    /*
110       PatternLS (fout, Ft, 0., space, &i);
111       printf ("\nPairwise estimation of rate matrix done..\n");
112       fflush(fout);
113    */
114    ftree = gfopen(com.treef, "r");
115    fscanf(ftree, "%d%d", &i, &ntree);
116    if (i != com.ns) error2("ns in the tree file");
117 
118    for (itree = 0; itree < ntree; itree++) {
119 
120       printf("\nTREE # %2d\n", itree + 1);
121       fprintf(fout, "\nTREE # %2d\n", itree + 1);
122 
123       if (ReadTreeN(ftree, &i, 0, 1)) error2("err tree..");
124       OutTreeN(F0, 0, 0);    FPN(F0);
125       OutTreeN(fout, 0, 0);  FPN(fout);
126 
127       for (i = 0, maxchange = 0; i < NCATCHANGE; i++) NSiteChange[i] = 0;
128 
129       PathwayMP1(fout, &maxchange, NSiteChange, Ft, space, 0);
130       printf("\nHartigan reconstruction done..\n");
131 
132       fprintf(fout, "\n\n(1) Branch lengths and substitution pattern\n");
133       PatternMP(fout, Ft);
134       printf("pattern done..\n");    fflush(fout);
135 
136       fprintf(fout, "\n\n(2) Gamma parameter\n");
137       AlphaMP(fout);
138       printf("gamma done..\n");      fflush(fout);
139 
140       fprintf(fout, "\n\n(3) Parsimony reconstructions\n");
141       PathwayMP1(fout, &maxchange, NSiteChange, Ft, space, 1);
142       printf("Yang reconstruction done..\n");    fflush(fout);
143    }
144    free(nodes);
145    return (0);
146 }
147 
GetOptions(char * ctlf)148 int GetOptions(char *ctlf)
149 {
150    int iopt, nopt = 6, i, lline = 4096, t;
151    char line[4096], *pline, opt[32], *comment = "*#";
152    char *optstr[] = { "seqfile","outfile","treefile", "seqtype", "ncatG", "nhomo" };
153    FILE  *fctl = gfopen(ctlf, "r");
154 
155    if (fctl) {
156       for (;;) {
157          if (fgets(line, lline, fctl) == NULL) break;
158          for (i = 0, t = 0; i < lline&&line[i]; i++)
159             if (isalnum(line[i])) { t = 1; break; }
160             else if (strchr(comment, line[i])) break;
161             if (t == 0) continue;
162             sscanf(line, "%s%*s%d", opt, &t);
163             if ((pline = strstr(line, "=")) == NULL) error2("option file.");
164 
165             for (iopt = 0; iopt < nopt; iopt++) {
166                if (strncmp(opt, optstr[iopt], 8) == 0) {
167                   if (noisy > 2)
168                      printf("\n%3d %15s | %-20s %6d", iopt + 1, optstr[iopt], opt, t);
169                   switch (iopt) {
170                   case (0): sscanf(pline + 2, "%s", com.seqf);    break;
171                   case (1): sscanf(pline + 2, "%s", com.outf);    break;
172                   case (2): sscanf(pline + 2, "%s", com.treef);    break;
173                   case  (3): com.seqtype = t;   break;
174                   case  (4): com.ncatG = t;     break;
175                   case  (5): com.nhomo = t;     break;
176                   }
177                   break;
178                }
179             }
180             if (iopt == nopt)
181             {
182                printf("\nopt %s in %s\n", opt, ctlf);  exit(-1);
183             }
184       }
185       fclose(fctl);
186    }
187    else
188       if (noisy) printf("\nno ctl file..");
189 
190    if (com.seqtype == 0)       com.ncode = 4;
191    else if (com.seqtype == 2)  com.ncode = 20;
192    else if (com.seqtype == 3)  com.ncode = 2;
193    else                      error2("seqtype");
194    if (com.ncatG > NCATG) error2("raise NCATG?");
195    return (0);
196 }
197 
198 
AlphaMP(FILE * fout)199 int AlphaMP(FILE* fout)
200 {
201    int k, ntotal;
202    double x, xb[2], lnL, var;
203 
204    xb[0] = 1e-3; xb[1] = 99; /* alpha */
205 
206    fprintf(fout, "\n# changes .. # sites");
207    for (k = 0, ntotal = 0, MuChange = var = 0; k < maxchange + 1; k++) {
208       fprintf(fout, "\n%6d%10d", k, NSiteChange[k]);
209       ntotal += NSiteChange[k];  MuChange += k*NSiteChange[k];
210       var += k*k*NSiteChange[k];
211    }
212    MuChange /= ntotal;
213    var = (var - MuChange*MuChange*ntotal) / (ntotal - 1.);
214    x = MuChange*MuChange / (var - MuChange);
215    fprintf(fout, "\n\n# sites%6d,  total changes%6d\nmean-var%9.4f%9.4f",
216       ntotal, (int)(ntotal*MuChange + .5), MuChange, var);
217    fprintf(fout, "\nalpha (method of moments)%9.4f", x);
218    if (x <= 0) x = 9;
219 
220    LineSearch(lfunAlpha_Sullivan, &lnL, &x, xb, 0.02, 1e-8);
221    fprintf(fout, "\nalpha (Sullivan et al. 1995)%9.4f\n", x);
222 
223    MuChange /= tree.nbranch;
224    LineSearch(lfunAlpha_YK96, &lnL, &x, xb, 0.02, 1e-8);
225    fprintf(fout, "alpha (Yang & Kumar 1995, ncatG= %d)%9.4f\n", com.ncatG, x);
226    return (0);
227 }
228 
lfunAlpha_Sullivan(double x)229 double lfunAlpha_Sullivan(double x)
230 {
231    int k;
232    double lnL = 0, a = x, t;
233 
234    FOR(k, maxchange + 1) {
235       if (NSiteChange[k] == 0) continue;
236       t = -a*log(1 + MuChange / a);
237       if (k)
238          t += LnGamma(k + a) - LnGamma(k + 1.) - LnGamma(a)
239          + k*log(MuChange / a / (1 + MuChange / a));
240       lnL += NSiteChange[k] * t;
241    }
242    return(-lnL);
243 }
244 
lfunAlpha_YK96(double x)245 double lfunAlpha_YK96(double x)
246 {
247    int k, ir, b = tree.nbranch, n = com.ncode;
248    double lnL = 0, prob, a = x, t = MuChange, p;
249    double freqK[NCATG], rK[NCATG];
250 
251    DiscreteGamma(freqK, rK, a, a, com.ncatG, 0);
252    FOR(k, maxchange + 1) {
253       if (NSiteChange[k] == 0) continue;
254       for (ir = 0, prob = 0; ir < com.ncatG; ir++) {
255          p = 1. / n + (n - 1.) / n*exp(-n / (n - 1.)*rK[ir] * t);
256          prob += freqK[ir] * pow(p, (double)(b - k))*pow((1 - p) / (n - 1.), (double)k);
257       }
258       lnL += NSiteChange[k] * log(prob);
259    }
260    return (-lnL);
261 }
262 
263 
OutQ(FILE * fout,int n,double Q[],double pi[],double Root[],double U[],double V[],double space[])264 int OutQ(FILE *fout, int n, double Q[], double pi[], double Root[],
265    double U[], double V[], double space[])
266 {
267    char aa3[4] = "";
268    int i, j;
269    double *T1 = space, t;
270 
271    fprintf(fout, "\nrate matrix Q: Qij*dt = prob(i->j; dt)\n");
272    if (n <= 4) {
273       /*      matout (fout, pi, 1, n); */
274       matout(fout, Q, n, n);
275       if (n == 4) {
276          fprintf(fout, "Order: T, C, A, G");
277          t = pi[0] * Q[0 * 4 + 1] + pi[1] * Q[1 * 4 + 0] + pi[2] * Q[2 * 4 + 3] + pi[3] * Q[3 * 4 + 2];
278          fprintf(fout, "\nAverage Ts/Tv =%9.4f\n", t / (1 - t));
279       }
280    }
281    else if (n == 20) {
282       for (i = 0; i < n; i++, FPN(fout))
283          FOR(j, n) fprintf(fout, "%6.0f", Q[i*n + j] * 100);
284       /*
285             FOR (i,n) {
286                fprintf (fout,"\n%-4s", getAAstr(aa3,i));
287                FOR (j,i) fprintf (fout, "%4.0f", Q[i*n+j]/pi[j]*100);
288                fprintf (fout, "%4.0f", -Q[i*n+i]*100);
289             }
290             fputs("\n     ",fout);  FOR(i,naa) fprintf(fout,"%5s",getAAstr(aa3,i));
291       */
292       fprintf(fout, "\n\nPAM matrix, P(0.01)\n");
293       FOR(i, n) FOR(j, n) T1[i*n + j] = U[i*n + j] * exp(0.01*Root[j]);
294       matby(T1, V, Q, n, n, n);
295       FOR(i, n*n) if (Q[i] < 0) Q[i] = 0;
296       FOR(i, n) {
297          fprintf(fout, "\n%-4s", getAAstr(aa3, i));
298          FOR(j, n) fprintf(fout, "%6.0f", Q[i*n + j] * 10000);
299       }
300       fputs("\n     ", fout);  FOR(i, n) fprintf(fout, "%5s", getAAstr(aa3, i));
301    }
302    return (0);
303 }
304 
PMatBranch(double Ptb[],int n,double branch[],double Root[],double U[],double V[],double space[])305 int PMatBranch(double Ptb[], int n, double branch[],
306    double Root[], double U[], double V[], double space[])
307 {
308    /* homogeneised transition prob matrix, with one Q assumed for the whole tree
309    */
310    int i, j, k;
311    double *T1 = space, *P;
312 
313    FOR(k, tree.nbranch) {
314       P = Ptb + k*n*n;
315       FOR(i, n) FOR(j, n) T1[i*n + j] = U[i*n + j] * exp(Root[j] * branch[k]);
316       matby(T1, V, P, n, n, n);
317       FOR(i, n*n) if (P[i] < 0) P[i] = 0;
318       /*
319             printf ("\nbranch %d, P(%.5f)", k+1, branch[k]);
320             matout (F0, P, n, n);
321             testTransP (P, n);
322       */
323    }
324    return (0);
325 }
326 
327 
PatternMP(FILE * fout,double Ft[])328 int PatternMP(FILE *fout, double Ft[])
329 {
330    /* Ft[]: input counts for the F(t) matrix for each branch, output P(t)
331    */
332    int n = com.ncode, i, j, k;
333    double *Q, *pi, *Root, *U, *V, *branch, *space, *T1, t;
334 
335    if ((Q = (double*)malloc((n*n * 6 + tree.nbranch) * sizeof(double))) == NULL)
336       error2("PathwayMP: oom");
337    pi = Q + n*n; Root = pi + n; U = Root + n; V = U + n*n; branch = V + n*n;
338    space = T1 = branch + tree.nbranch;
339 
340    for (k = 0; k < tree.nbranch; k++) {  /* branch lengths */
341       xtoy(Ft + k*n*n, Q, n*n);
342       branch[k] = nodes[tree.branches[k][1]].branch =
343          DistanceREV(Q, n, 0, Root, U, V, pi, space, &j);
344    }
345    OutTreeB(fout);  FPN(fout);
346    FOR(i, tree.nbranch) fprintf(fout, "%9.5f", branch[i]);
347    fprintf(fout, "\ntree length: %9.5f\n", sum(branch, tree.nbranch));
348 
349    /* pattern Q from average F(t) */
350    fprintf(fout, "\nF(t)");
351    xtoy(Ft + tree.nbranch*n*n, Q, n*n);
352    matout2(fout, Q, n, n, 12, 2);
353    DistanceREV(Q, n, 0, Root, U, V, pi, space, &j);
354    if (noisy >= 3 && j == -1) { puts("F(t) modified in DistanceREV"); }
355 
356    OutQ(fout, n, Q, pi, Root, U, V, T1);
357    if (com.nhomo == 0)
358       PMatBranch(Ft, n, branch, Root, U, V, space);
359    else {
360       for (k = 0; k < tree.nbranch; k++) {
361          for (i = 0; i < n; i++) {
362             t = sum(Ft + k*n*n + i*n, n);
363             if (t > 1e-5) abyx(1 / t, Ft + k*n*n + i*n, n);
364             else        Ft[k*n*n + i*n + i] = 1;
365          }
366       }
367    }
368    free(Q);
369    return (0);
370 }
371 
372 
PathwayMP1(FILE * fout,int * maxchange,int NSiteChange[],double Ft[],double space[],int job)373 int PathwayMP1(FILE *fout, int *maxchange, int NSiteChange[],
374    double Ft[], double space[], int job)
375 {
376    /* Hartigan, JA.  1973.  Minimum mutation fits to a given tree.
377       Biometrics, 29:53-65.
378       Yang, Z.  1996.
379       job=0: 1st pass: calculates maxchange, NSiteChange[], and Ft[]
380       job=1: 2nd pass: reconstructs ancestral character states (->fout)
381    */
382    char *pch = (com.seqtype == 0 ? BASEs : (com.seqtype == 2 ? AAs : BINs));
383    char *zz[NNODE], nodeb[NNODE], bestPath[NNODE - NS], Equivoc[NS - 1];
384    int n = com.ncode, nid = tree.nbranch - com.ns + 1, it, i1, i2, i, j, k, h, hp, npath;
385    int *Ftt = NULL, nchange, nchange0, visit[NS - 1] = { 0 };
386    double sumpr, bestpr, pr, *pnode = NULL, *pnsite;
387 
388 
389    fputs("\nList of most parsimonious reconstructions (MPRs) at each site: #MPRs (#changes)\n", fout);
390    fputs("and then the most likely reconstruction out of the MPRs and its probability\n", fout);
391    if ((pnsite = (double*)malloc((com.ns - 1)*n * sizeof(double))) == NULL)
392       error2("PathwayMP1: oom");
393 
394    PATHWay = (char*)malloc(nid*(n + 3) * sizeof(char));
395    NCharaCur = PATHWay + nid;  ICharaCur = NCharaCur + nid;  CharaCur = ICharaCur + nid;
396    if (job == 0) {
397       zero(Ft, n*n*(tree.nbranch + 1));
398       if ((Ftt = (int*)malloc(n*n*tree.nbranch * sizeof(int))) == NULL) error2("oom");
399    }
400    else {
401       pnode = (double*)malloc((nid*com.npatt + 1)*(sizeof(double) + sizeof(char)));
402       FOR(j, nid) zz[com.ns + j] = (char*)(pnode + nid*com.npatt) + j*com.npatt;
403       FOR(j, com.ns) zz[j] = com.z[j];
404       if (pnode == NULL) error2("oom");
405    }
406    for (j = 0, visit[i = 0] = tree.root - com.ns; j < tree.nbranch; j++)
407       if (tree.branches[j][1] >= com.ns)
408          visit[++i] = tree.branches[j][1] - com.ns;
409 
410    for (h = 0; h < com.ls; h++) {
411       hp = com.pose[h];
412       if (job == 1) {
413          fprintf(fout, "\n%4d  ", h + 1);
414          FOR(j, com.ns) fprintf(fout, "%c", pch[com.z[j][hp]]);
415          fprintf(fout, ":  ");
416          FOR(j, nid*n) pnsite[j] = 0;
417       }
418       FOR(j, com.ns) nodeb[j] = com.z[j][hp];
419       if (job == 0) FOR(j, n*n*tree.nbranch) Ftt[j] = 0;
420 
421       InteriorStatesMP(1, hp, &nchange, NCharaCur, CharaCur, space);
422       ICharaCur[j = tree.root - com.ns] = 0;  PATHWay[j] = CharaCur[j*n + 0];
423       FOR(j, nid) Equivoc[j] = (NCharaCur[j] > 1);
424 
425       if (nchange > *maxchange) *maxchange = nchange;
426       if (nchange > NCATCHANGE - 1) error2("raise NCATCHANGE");
427 
428       NSiteChange[nchange]++;
429       /* NSiteChange[nchange]+=(int)com.fpatt[hp]; */
430 
431       DownStates(tree.root);
432       for (npath = 0, sumpr = bestpr = 0; ;) {
433          for (j = 0, k = visit[nid - 1]; j < NCharaCur[k]; j++) {
434             PATHWay[k] = CharaCur[k*n + j]; npath++;
435             FOR(i, nid) nodeb[i + com.ns] = PATHWay[i];
436             if (job == 1) {
437                FOR(i, nid) fprintf(fout, "%c", pch[PATHWay[i]]); fputc(' ', fout);
438                pr = com.pi[(int)nodeb[tree.root]];
439                for (i = 0; i < tree.nbranch; i++) {
440                   i1 = nodeb[tree.branches[i][0]];
441                   i2 = nodeb[tree.branches[i][1]];
442                   pr *= Ft[i*n*n + i1*n + i2];
443                }
444                sumpr += pr;
445                FOR(i, nid) pnsite[i*n + nodeb[i + com.ns]] += pr;
446                if (pr > bestpr)
447                {
448                   bestpr = pr; FOR(i, nid) bestPath[i] = PATHWay[i];
449                }
450             }
451             else {
452                for (i = 0, nchange0 = 0; i < tree.nbranch; i++) {
453                   i1 = nodeb[tree.branches[i][0]];
454                   i2 = nodeb[tree.branches[i][1]];
455                   if (i1 != i2) nchange0++;
456                   Ftt[i*n*n + i1*n + i2]++;
457                }
458                if (nchange0 != nchange) {
459                   printf("\a\nerr:PathwayMP %d != %d", nchange, nchange0);
460                   fprintf(fout, ".%d. ", nchange0); /* ??? */
461                }
462             }
463          }
464          for (j = nid - 2; j >= 0; j--) {
465             if (Equivoc[k = visit[j]] == 0) continue;
466             if (ICharaCur[k] + 1 < NCharaCur[k]) {
467                PATHWay[k] = CharaCur[k*n + (++ICharaCur[k])];
468                DownStates(k + com.ns);
469                break;
470             }
471             else { /* if (next equivocal node is not ancestor) update node k */
472                for (i = j - 1; i >= 0; i--) if (Equivoc[(int)visit[i]]) break;
473                if (i >= 0) {
474                   for (it = k + com.ns, i = visit[i] + com.ns; ; it = nodes[it].father)
475                      if (it == tree.root || nodes[it].father == i) break;
476                   if (it == tree.root)
477                      DownStatesOneNode(k + com.ns, nodes[k + com.ns].father);
478                }
479             }
480          }
481          if (j < 0) break;
482       }      /* for (npath) */
483 /*
484       printf ("\rsite pattern %4d/%4d: %6d%6d", hp+1,com.npatt,npath,nchange);
485 */
486       if (job == 0)
487          FOR(j, n*n*tree.nbranch) Ft[j] += (double)Ftt[j] / npath*com.fpatt[hp];
488       else {
489          FOR(i, nid) zz[com.ns + i][hp] = bestPath[i];
490          FOR(i, nid) pnode[i*com.npatt + hp] = pnsite[i*n + bestPath[i]] / sumpr;
491          fprintf(fout, " |%4d (%d) | ", npath, nchange);
492          if (npath > 1) {
493             FOR(i, nid) fprintf(fout, "%c", pch[bestPath[i]]);
494             fprintf(fout, " (%.3f)", bestpr / sumpr);
495 
496          }
497       }
498    }   /* for (h) */
499    free(PATHWay);
500    if (job == 0) {
501       free(Ftt);
502       FOR(i, tree.nbranch) FOR(j, n*n) Ft[tree.nbranch*n*n + j] += Ft[i*n*n + j];
503    }
504    else {
505       fprintf(fout, "\n\nApprox. relative accuracy at each node, by site\n");
506       FOR(h, com.ls) {
507          hp = com.pose[h];
508          fprintf(fout, "\n%4d  ", h + 1);
509          FOR(j, com.ns) fprintf(fout, "%c", pch[com.z[j][hp]]);
510          fprintf(fout, ":  ");
511          FOR(i, nid) if (pnode[i*com.npatt + hp] < .99999) break;
512          if (i < nid)  FOR(j, nid)
513             fprintf(fout, "%c (%5.3f) ", pch[zz[j][hp]], pnode[j*com.npatt + hp]);
514       }
515       /* Site2Pattern (fout); */
516       fprintf(fout, "\n\nlist of extant and reconstructed sequences\n\n");
517       for (j = 0; j < tree.nnode; j++, FPN(fout)) {
518          if (j < com.ns) fprintf(fout, "%-20s", com.spname[j]);
519          else         fprintf(fout, "node #%-14d", j + 1);
520          print1seq(fout, zz[j], (com.readpattern ? com.npatt : com.ls), com.pose);
521       }
522       free(pnode);
523    }
524    free(pnsite);
525    return (0);
526 }
527 
DistanceREV(double Ft[],int n,double alpha,double Root[],double U[],double V[],double pi[],double space[],int * cond)528 double DistanceREV(double Ft[], int n, double alpha, double Root[], double U[],
529    double V[], double pi[], double space[], int *cond)
530 {
531    /* input:  Ft, n, alpha
532       output: Q(in Ft), t, Root, U, V, and cond
533       space[n*n*2]
534    */
535    int i, j, InApplicable;
536    double *Q = Ft, *T1 = space, *T2 = space + n*n, t, pi_sqrt[20], small = 0.1 / com.ls;
537 
538    for (i = 0, t = 0; i < n; i++) FOR(j, n) if (i - j) t += Q[i*n + j];
539    if (t < small) { *cond = 1; zero(Q, n*n); return (0); }
540 
541    for (i = 0; i < n; i++) for (j = 0; j < i; j++)
542       Q[i*n + j] = Q[j*n + i] = (Q[i*n + j] + Q[j*n + i]) / 2;
543 
544    abyx(1. / sum(Q, n*n), Q, n*n);
545    for (i = 0; i < n; i++) {
546       pi[i] = sum(Q + i*n, n);
547       if (pi[i] > small)
548          abyx(1 / pi[i], Q + i*n, n);
549    }
550 
551    eigenQREV(Q, pi, n, Root, U, V, pi_sqrt);
552    for (i = 0, InApplicable = 0; i < n; i++) {
553       if (Root[i] <= 0) {
554          InApplicable = 1;
555          Root[i] = -300;  /* adhockery */
556       }
557       else
558          Root[i] = (alpha <= 0 ? log(Root[i]) : gammap(Root[i], alpha));
559    }
560    FOR(i, n) FOR(j, n) T1[i*n + j] = U[i*n + j] * Root[j];
561    matby(T1, V, Q, n, n, n);
562    for (i = 0, t = 0; i < n; i++) t -= pi[i] * Q[i*n + i];
563 
564    if (noisy >= 9 && InApplicable) printf("Root(P)<0.  adhockery invoked\n");
565    if (t <= 0) error2("err: DistanceREV");
566 
567    FOR(i, n) Root[i] /= t;
568    FOR(i, n) FOR(j, n) { Q[i*n + j] /= t; if (i - j) Q[i*n + j] = max2(0, Q[i*n + j]); }
569 
570    return (t);
571 }
572 
573 
PatternLS(FILE * fout,double Ft[],double alpha,double space[],int * cond)574 int PatternLS(FILE *fout, double Ft[], double alpha, double space[], int *cond)
575 {
576    /* space[n*n*2]
577    */
578    int n = com.ncode, i, j, k, h, it;
579    double *Q = Ft, *Qt = Q + n*n, *Qm = Qt + n*n;
580    double *pi, *Root, *U, *V, *T1 = space, *branch, t;
581    FILE *fdist = gfopen("Distance", "w");
582 
583    if ((pi = (double*)malloc((n*n * 3 + tree.nbranch) * sizeof(double))) == NULL)
584       error2("PatternLS: oom");
585    Root = pi + n;  U = Root + n; V = U + n*n; branch = V + n*n;
586 
587    *cond = 0;
588    for (i = 0, zero(Qt, n*n), zero(Qm, n*n); i < com.ns; i++) {
589       for (j = 0; j < i; j++) {
590          for (h = 0, zero(Q, n*n); h < com.npatt; h++) {
591             Q[(com.z[i][h])*n + com.z[j][h]] += com.fpatt[h] / 2;
592             Q[(com.z[j][h])*n + com.z[i][h]] += com.fpatt[h] / 2;
593          }
594          FOR(k, n*n) Qt[k] += Q[k] / (com.ns*(com.ns - 1) / 2);
595          it = i*(i - 1) / 2 + j;
596          SeqDistance[it] = DistanceREV(Q, n, alpha, Root, U, V, pi, space, &k);
597 
598          if (k == -1) {
599             *cond = -1; printf("\n%d&%d: F(t) modified in DistanceREV", i + 1, j + 1);
600          }
601 
602          fprintf(fdist, "%9.5f", SeqDistance[it]);
603          /*
604          FOR (k,n)
605          if (Q[k*n+k]>0) { printf ("%d %d %.5f\n", i+1, j+1, Q[k*n+k]); }
606          */
607          FOR(k, n*n) Qm[k] += Q[k] / (com.ns*(com.ns - 1) / 2);
608       }
609       FPN(fdist);
610    }
611    fclose(fdist);
612    DistanceREV(Qt, n, alpha, Root, U, V, pi, space, &k);
613    if (k == -1) { puts("F(t) modified in DistanceREV"); }
614 
615    fprintf(fout, "\n\nQ: from average F over pairwise comparisons");
616    OutQ(fout, n, Qt, pi, Root, U, V, T1);
617    fprintf(fout, "\n\nQ: average of Qs over pairwise comparisons\n");
618    fprintf(fout, "(disregard this if very different from the previous Q)");
619    OutQ(fout, n, Qm, pi, Root, U, V, T1);
620 
621    if (tree.nbranch) {
622       fillxc(branch, 0.1, tree.nbranch);
623       LSDistance(&t, branch, testx);
624       OutTreeB(fout);  FPN(fout);
625       FOR(i, tree.nbranch) fprintf(fout, "%9.5f", branch[i]);
626       PMatBranch(Ft, com.ncode, branch, Root, U, V, space);
627    }
628    free(pi);
629    return (0);
630 }
631 
testx(double x[],int np)632 int testx(double x[], int np)
633 {
634    int i;
635    double tb[] = { 1e-5, 99 };
636    FOR(i, np) if (x[i]<tb[0] || x[i]>tb[1]) return(-1);
637    return(0);
638 }
639 
SetBranch(double x[])640 int SetBranch(double x[])
641 {
642    int i, status = 0;
643    double small = 1e-5;
644 
645    FOR(i, tree.nnode)
646       if (i != tree.root && (nodes[i].branch = x[nodes[i].ibranch]) < -small)
647          status = -1;
648    return (status);
649 }
650