1 /*
2  * FastTree -- inferring approximately-maximum-likelihood trees for large
3  * multiple sequence alignments.
4  *
5  * Morgan N. Price
6  * http://www.microbesonline.org/fasttree/
7  *
8  * Thanks to Jim Hester of the Cleveland Clinic Foundation for
9  * providing the first parallel (OpenMP) code, Siavash Mirarab of
10  * UT Austin for implementing the WAG option, Samuel Shepard
11  * at the CDC for suggesting and helping with the -quote option, and
12  * Aaron Darling (University of Technology, Sydney) for numerical changes
13  * for wide alignments of closely-related sequences.
14  *
15  *  Copyright (C) 2008-2015 The Regents of the University of California
16  *  All rights reserved.
17  *
18  *  This program is free software; you can redistribute it and/or modify
19  *  it under the terms of the GNU General Public License as published by
20  *  the Free Software Foundation; either version 2 of the License, or
21  *  (at your option) any later version.
22  *
23  *  This program is distributed in the hope that it will be useful,
24  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26  *  GNU General Public License for more details.
27  *
28  *  You should have received a copy of the GNU General Public License along
29  *  with this program; if not, write to the Free Software Foundation, Inc.,
30  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
31  *  or visit http://www.gnu.org/copyleft/gpl.html
32  *
33  *  Disclaimer
34  *
35  *  NEITHER THE UNITED STATES NOR THE UNITED STATES DEPARTMENT OF ENERGY,
36  *  NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED,
37  *  OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
38  *  COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
39  *  OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE
40  *  PRIVATELY OWNED RIGHTS.
41  */
42 
43 /*
44  * To compile FastTree, do:
45  * gcc -Wall -O3 -finline-functions -funroll-loops -o FastTree -lm FastTree.c
46  * Use -DNO_SSE to turn off use of SSE3 instructions
47  *  (should not be necessary because compiler should not set __SSE__ if
48  *  not available, and modern mallocs should return 16-byte-aligned values)
49  * Use -DOPENMP -fopenmp to use multiple threads (note, old versions of gcc
50  *   may not support -fopenmp)
51  * Use -DTRACK_MEMORY if you want detailed reports of memory usage,
52  * but results are not correct above 4GB because mallinfo stores int values.
53  * It also makes FastTree run significantly slower.
54  *
55  * To get usage guidance, do:
56  * FastTree -help
57  *
58  * FastTree uses profiles instead of a distance matrix, and computes
59  * support values for each split from the profiles of the 4 nodes
60  * around the split. It stores a profile for each node and a average
61  * profile over all active nodes (the "out-profile" for computing the
62  * total sum of distance to other nodes).  The neighbor joining phase
63  * requires O(N*L*a) space, where N is the number of sequences, L is
64  * the alignment width, and a is the alphabet size. The top-hits
65  * heuristic requires an additional O(N sqrt(N)) memory. After
66  * neighbor-joining, FastTree improves the topology with
67  * nearest-neighbor interchanges (NNIs) and subtree-prune-regraft
68  * moves (SPRs), which does not have a significant additional memory
69  * requirement. (We need only store "up-profiles" on the path from our
70  * current traversal point to the root.) These take O(NLa) time per
71  * round, and with default settings, O(N log(N) L a) time total.
72  * FastTree further improves the topology with maximum-likelihood
73  * NNIs, using similar data structures and complexity, but with a
74  * higher constant factor, and now the "profiles" are actually
75  * posterior distributions for that subtree.  Finally, FastTree
76  * resamples the site likelihoods around each NNI and uses
77  * the Shimodaira Hasegawa test to estimate the reliability of each split.
78  *
79  * Overview of the neighbor-joining phase:
80  *
81  * Although FastTree uses a log correction on profile distances to
82  * account for multiple substitutions when doing NNIs and SPRs, the
83  * operations on the profiles themselves involve "additive" distances
84  * -- either %different (for nucleotide) or by using an amino acid
85  * similarity matrix (for proteins).  If we are using %different as
86  * our distance matrix then
87  *
88  * Profile_distance(A,B) = 1 - sum over characters of freq(A)*freq(B)
89  *
90  * and we can average this value over positions. Positions with gaps
91  * are weighted by %ungapped(A) * %ungapped(B).
92  *
93  * If we are using an amino acid dissimilarity matrix D(i,j) then at
94  * each position
95  *
96  * Profile_distance(A,B) = sum(i,j) freq(A==i) * freq(B==j) * D(i,j)
97  * = sum(k) Ak * Bk * Lambda(k)
98  *
99  * where k iterates over 20 eigenvectors, Lambda(k) is the eigenvalue,
100  * and if A==i, then Ak is the kth column of the inverse of the
101  * eigenvector matrix.
102  *
103  * The exhaustive approach (-slow) takes O(N**3*L*a) time, but
104  * this can be reduced to as little as O(N**(3/2)*log(N)*L*a) time
105  * by using heuristics.
106  *
107  * It uses a combination of three heuristics: a visible set similar to
108  * that of FastTree (Elias & Lagergren 2005), a local hill-climbing
109  * search for a better join (as in relaxed neighbor-joining, Evans et
110  * al. 2006), and a top-hit list to reduce the search space (see
111  * below).
112  *
113  * The "visible" set stores, for each node, the best join for that
114  * node, as identified at some point in the past
115  *
116  * If top-hits are not being used, then the neighbor-joining phase can
117  * be summarized as:
118  *
119  * Compute the out-profile by averaging the leaves
120  * Compute the out-distance of each leaf quickly, using the out-profile
121  * Compute the visible set (or approximate it using top-hits, see below)
122  * Until we're down to 3 active nodes:
123  *   Find the best join in the visible set
124  *	(This involves recomputing the neighbor-joining criterion,
125  *      as out-distances and #active nodes may have changed)
126  *   Follow a chain of best hits (again recomputing the criterion)
127  *  	until we find a locally best join, as in relaxed neighbor joining
128  *   Create a profile of the parent node, either using simple averages (default)
129  *	or using weighted joining as in BIONJ (if -bionj was specified)
130  *   Update the out-profile and the out-distances
131  *   Update the visible set:
132  *      find the best join for the new joined node
133  *      replace hits to the joined children with hits to the parent
134  *      if we stumble across a join for the new node that is better
135  *          than the corresponding entry in the visible set, "reset"
136  *          that entry.
137  *
138  * For each iteration, this method does
139  * O(N) work to find the best hit in the visible set
140  * O(L*N*a*log(N)) work to do the local search, where log(N)
141  *	is a pessimistic estimate of the number of iterations. In
142  *      practice, we average <1 iteration for 2,000 sequences.
143  *      With -fastest, this step is omitted.
144  * O(N*a) work to compute the joined profile and update the out-profile
145  * O(L*N*a) work to update the out-distances
146  * O(L*N*a) work to compare the joined profile to the other nodes
147  *      (to find the new entry in the visible set)
148  *
149  * and there are N-3 iterations, so it takes O(N**2 * L * log(N) * a) time.
150  *
151  * The profile distances give exactly the same result as matrix
152  * distances in neighbor-joining or BIONJ would if there are no gaps
153  * in the alignment. If there are gaps, then it is an
154  * approximation. To get the same result we also store a "diameter"
155  * for each node (diameter is 0 for leaves).
156  *
157  * In the simpler case (NJ rather than BIONJ), when we join A and B to
158  * give a new node AB,
159  *
160  * Profile(AB) = (A+B)/2
161  * Profile_distance(AB,C) = (Profile_distance(A,C)+Profile_distance(B,C))/2
162  * because the formulas above are linear
163  *
164  * And according to the neighor-joining rule,
165  * d(AB,C) = (d(A,C)+d(B,C)-d(A,B))/2
166  *
167  * and we can achieve the same value by writing
168  * diameter(AB) = pd(A,B)/2
169  * diameter(leaf) = 0
170  * d(A,B) = pd(A,B) - diameter(A) - diameter(B)
171  *
172  * because
173  * d(AB,C) = (d(A,C)+d(B,C)-d(A,B))/2
174  * = (pd(A,C)-diam(A)-diam(C)+pd(B,C)-diam(B)-diam(C)-d(A,B)+diam(A)+diam(B))/2
175  * = (pd(A,C)+pd(B,C))/2 - diam(C) - pd(A,B)
176  * = pd(AB,C) - diam(AB) - diam(C)
177  *
178  * If we are using BIONJ, with weight lambda for the join:
179  * Profile(AB) = lambda*A + (1-lambda)*B
180  * then a similar argument gives
181  * diam(AB) = lambda*diam(A) + (1-lambda)*diam(B) + lambda*d(A,AB) + (1-lambda)*d(B,AB),
182  *
183  * where, as in neighbor joining,
184  * d(A,AB) = d(A,B) + (total out_distance(A) - total out_distance(B))/(n-2)
185  *
186  * A similar recursion formula works for the "variance" matrix of BIONJ,
187  * var(AB,C) = lambda*var(A,C) + (1-lambda)*var(B,C) - lambda*(1-lambda)*var(A,B)
188  * is equivalent to
189  * var(A,B) = pv(A,B) - vd(A) - vd(B), where
190  * pv(A,B) = pd(A,B)
191  * vd(A) = 0 for leaves
192  * vd(AB) = lambda*vd(A) + (1-lambda)*vd(B) + lambda*(1-lambda)*var(A,B)
193  *
194  * The top-hist heuristic to reduce the work below O(N**2*L) stores a top-hit
195  * list of size m=sqrt(N) for each active node.
196  *
197  * The list can be initialized for all the leaves in sub (N**2 * L) time as follows:
198  * Pick a "seed" sequence and compare it to all others
199  * Store the top m hits of the seed as its top-hit list
200  * Take "close" hits of the seed(within the top m, and see the "close" parameter),
201  *    and assume that their top m hits lie within the top 2*m hits of the seed.
202  *    So, compare them to the seed's neighors (if they do not already
203  *    have a top hit list) and set their top hits.
204  *
205  * This method does O(N*L) work for each seed, or O(N**(3/2)*L) work total.
206  *
207  * To avoid doing O(N*L) work at each iteration, we need to avoid
208  * updating the visible set and the out-distances. So, we use "stale"
209  * out-distances, and when searching the visible set for the best hit,
210  * we only inspect the top m=sqrt(N) entries. We then update those
211  * out-distances (up to 2*m*L*a work) and then find the best hit.
212  *
213  * To avoid searching the entire visible set, FastTree keeps
214  * and updates a list of the top sqrt(N) entries in the visible set.
215  * This costs O(sqrt(N)) time per join to find the best entry and to
216  * update, or (N sqrt(N)) time overall.
217  *
218  * Similarly, when doing the local hill-climbing, we avoid O(N*L) work
219  * by only considering the top-hits for the current node. So this adds
220  * O(m*a*log(N)) work per iteration.
221  *
222  * When we join two nodes, we compute profiles and update the
223  * out-profile as before. We need to compute the best hits of the node
224  * -- we merge the lists for the children and select the best up-to-m
225  * hits. If the top hit list contains a stale node we replace it with
226  * its parent. If we still have <m/2 entries, we do a "refresh".
227  *
228  * In a "refresh", similar to the fast top-hit computation above, we
229  * compare the "seed", in this case the new joined node, to all other
230  * nodes. We compare its close neighbors (the top m hits) to all
231  * neighbors (the top 2*m hits) and update the top-hit lists of all
232  * neighbors (by merging to give a list of 3*m entries and then
233  * selecting the best m entries).
234  *
235  * Finally, during these processes we update the visible sets for
236  * other nodes with better hits if we find them, and we set the
237  * visible entry for the new joined node to the best entry in its
238  * top-hit list. (And whenever we update a visible entry, we
239  * do O(sqrt(N)) work to update the top-visible list.)
240  * These udpates are not common so they do not alter the
241  * O(N sqrt(N) log(N) L a) total running time for the joining phase.
242  *
243  * Second-level top hits
244  *
245  * With -fastest or with -2nd, FastTree uses an additional "2nd-level" top hits
246  * heuristic to reduce the running time for the top-hits phase to
247  * O(N**1.25 L) and for the neighbor-joining phase to O(N**1.25 L a).
248  * This also reduces the memory usage for the top-hits lists to
249  * O(N**1.25), which is important for alignments with a million
250  * sequences. The key idea is to store just q = sqrt(m) top hits for
251  * most sequences.
252  *
253  * Given the neighbors of A -- either for a seed or for a neighbor
254  * from the top-hits heuristic, if B is within the top q hits of A, we
255  * set top-hits(B) from the top 3*q top-hits of A. And, we record that
256  * A is the "source" of the hits for B, so if we run low on hits for
257  * B, instead of doing a full refresh, we can do top-hits(B) :=
258  * top-hits(B) union top-hits(active_ancestor(A)).
259  * During a refresh, these "2nd-level" top hits are updated just as
260  * normal, but the source is maintained and only q entries are stored,
261  * until we near the end of the neighbor joining phase (until the
262  * root as 2*m children or less).
263  *
264  * Parallel execution with OpenMP
265  *
266  * If you compile FastTree with OpenMP support, it will take
267  * advantage of multiple CPUs on one machine. It will parallelize:
268  *
269  * The top hits phase
270  * Comparing one node to many others during the NJ phase (the simplest kind of join)
271  * The refresh phase
272  * Optimizing likelihoods for 3 alternate topologies during ML NNIs and ML supports
273  * (only 3 threads can be used)
274  *
275  * This accounts for most of the O(N L a) or slower steps except for
276  * minimum-evolution NNIs (which are fast anyway), minimum-evolution SPRs,
277  * selecting per-site rates, and optimizing branch lengths outside of ML NNIs.
278  *
279  * Parallelizing the top hits phase may lead to a slight change in the tree,
280  * as some top hits are computed from different (and potentially less optimal source).
281  * This means that results on repeated runs may not be 100% identical.
282  * However, this should not have any significant effect on tree quality
283  * after the NNIs and SPRs.
284  *
285  * The OpenMP code also turns off the star-topology test during ML
286  * NNIs, which may lead to slight improvements in likelihood.
287  */
288 
289 #include <stdio.h>
290 #include <stdbool.h>
291 #include <string.h>
292 #include <assert.h>
293 #include <math.h>
294 #include <stdlib.h>
295 #include <sys/time.h>
296 #include <ctype.h>
297 #include <unistd.h>
298 #ifdef TRACK_MEMORY
299 /* malloc.h apparently doesn't exist on MacOS */
300 #include <malloc.h>
301 #endif
302 
303 /* Compile with -DOPENMP to turn on multithreading */
304 #ifdef OPENMP
305 #include <omp.h>
306 #endif
307 
308 /* By default, tries to compile with SSE instructions for greater speed.
309    But if compiled with -DUSE_DOUBLE, uses double precision instead of single-precision
310    floating point (2x memory required), does not use SSE, and allows much shorter
311    branch lengths.
312 */
313 #ifdef __SSE__
314 #if !defined(NO_SSE) && !defined(USE_DOUBLE)
315 #define USE_SSE3
316 #endif
317 #endif
318 
319 
320 #ifdef USE_DOUBLE
321 #define SSE_STRING "Double precision (No SSE3)"
322 typedef double numeric_t;
323 #define ScanNumericSpec "%lf"
324 #else
325 typedef float numeric_t;
326 #define ScanNumericSpec "%f"
327 #endif
328 
329 #ifdef USE_SSE3
330 #define SSE_STRING "SSE3"
331 #define ALIGNED __attribute__((aligned(16)))
332 #define IS_ALIGNED(X) ((((unsigned long) new) & 15L) == 0L)
333 #include <xmmintrin.h>
334 
335 #else
336 
337 #define ALIGNED
338 #define IS_ALIGNED(X) 1
339 
340 #ifndef USE_DOUBLE
341 #define SSE_STRING "No SSE3"
342 #endif
343 
344 #endif /* USE_SSE3 */
345 
346 #define FT_VERSION "2.1.10"
347 
348 char *usage =
349   "  FastTree protein_alignment > tree\n"
350   "  FastTree < protein_alignment > tree\n"
351   "  FastTree -out tree protein_alignment\n"
352   "  FastTree -nt nucleotide_alignment > tree\n"
353   "  FastTree -nt -gtr < nucleotide_alignment > tree\n"
354   "  FastTree < nucleotide_alignment > tree\n"
355   "FastTree accepts alignments in fasta or phylip interleaved formats\n"
356   "\n"
357   "Common options (must be before the alignment file):\n"
358   "  -quiet to suppress reporting information\n"
359   "  -nopr to suppress progress indicator\n"
360   "  -log logfile -- save intermediate trees, settings, and model details\n"
361   "  -fastest -- speed up the neighbor joining phase & reduce memory usage\n"
362   "        (recommended for >50,000 sequences)\n"
363   "  -n <number> to analyze multiple alignments (phylip format only)\n"
364   "        (use for global bootstrap, with seqboot and CompareToBootstrap.pl)\n"
365   "  -nosupport to not compute support values\n"
366   "  -intree newick_file to set the starting tree(s)\n"
367   "  -intree1 newick_file to use this starting tree for all the alignments\n"
368   "        (for faster global bootstrap on huge alignments)\n"
369   "  -pseudo to use pseudocounts (recommended for highly gapped sequences)\n"
370   "  -gtr -- generalized time-reversible model (nucleotide alignments only)\n"
371   "  -lg -- Le-Gascuel 2008 model (amino acid alignments only)\n"
372   "  -wag -- Whelan-And-Goldman 2001 model (amino acid alignments only)\n"
373   "  -quote -- allow spaces and other restricted characters (but not ' ) in\n"
374   "           sequence names and quote names in the output tree (fasta input only;\n"
375   "           FastTree will not be able to read these trees back in)\n"
376   "  -noml to turn off maximum-likelihood\n"
377   "  -nome to turn off minimum-evolution NNIs and SPRs\n"
378   "        (recommended if running additional ML NNIs with -intree)\n"
379   "  -nome -mllen with -intree to optimize branch lengths for a fixed topology\n"
380   "  -cat # to specify the number of rate categories of sites (default 20)\n"
381   "      or -nocat to use constant rates\n"
382   "  -gamma -- after optimizing the tree under the CAT approximation,\n"
383   "      rescale the lengths to optimize the Gamma20 likelihood\n"
384   "  -constraints constraintAlignment to constrain the topology search\n"
385   "       constraintAlignment should have 1s or 0s to indicates splits\n"
386   "  -expert -- see more options\n"
387   "For more information, see http://www.microbesonline.org/fasttree/\n";
388 
389 char *expertUsage =
390   "FastTree [-nt] [-n 100] [-quote] [-pseudo | -pseudo 1.0]\n"
391   "           [-boot 1000 | -nosupport]\n"
392   "           [-intree starting_trees_file | -intree1 starting_tree_file]\n"
393   "           [-quiet | -nopr]\n"
394   "           [-nni 10] [-spr 2] [-noml | -mllen | -mlnni 10]\n"
395   "           [-mlacc 2] [-cat 20 | -nocat] [-gamma]\n"
396   "           [-slow | -fastest] [-2nd | -no2nd] [-slownni] [-seed 1253] \n"
397   "           [-top | -notop] [-topm 1.0 [-close 0.75] [-refresh 0.8]]\n"
398   "           [-matrix Matrix | -nomatrix] [-nj | -bionj]\n"
399   "           [-lg] [-wag] [-nt] [-gtr] [-gtrrates ac ag at cg ct gt] [-gtrfreq A C G T]\n"
400   "           [ -constraints constraintAlignment [ -constraintWeight 100.0 ] ]\n"
401   "           [-log logfile]\n"
402   "         [ alignment_file ]\n"
403   "        [ -out output_newick_file | > newick_tree]\n"
404   "\n"
405   "or\n"
406   "\n"
407   "FastTree [-nt] [-matrix Matrix | -nomatrix] [-rawdist] -makematrix [alignment]\n"
408   "    [-n 100] > phylip_distance_matrix\n"
409   "\n"
410   "  FastTree supports fasta or phylip interleaved alignments\n"
411   "  By default FastTree expects protein alignments,  use -nt for nucleotides\n"
412   "  FastTree reads standard input if no alignment file is given\n"
413   "\n"
414   "Input/output options:\n"
415   "  -n -- read in multiple alignments in. This only\n"
416   "    works with phylip interleaved format. For example, you can\n"
417   "    use it with the output from phylip's seqboot. If you use -n, FastTree\n"
418   "    will write 1 tree per line to standard output.\n"
419   "  -intree newickfile -- read the starting tree in from newickfile.\n"
420   "     Any branch lengths in the starting trees are ignored.\n"
421   "    -intree with -n will read a separate starting tree for each alignment.\n"
422   "  -intree1 newickfile -- read the same starting tree for each alignment\n"
423   "  -quiet -- do not write to standard error during normal operation (no progress\n"
424   "     indicator, no options summary, no likelihood values, etc.)\n"
425   "  -nopr -- do not write the progress indicator to stderr\n"
426   "  -log logfile -- save intermediate trees so you can extract\n"
427   "    the trees and restart long-running jobs if they crash\n"
428   "    -log also reports the per-site rates (1 means slowest category)\n"
429   "  -quote -- quote sequence names in the output and allow spaces, commas,\n"
430   "    parentheses, and colons in them but not ' characters (fasta files only)\n"
431   "\n"
432   "Distances:\n"
433   "  Default: For protein sequences, log-corrected distances and an\n"
434   "     amino acid dissimilarity matrix derived from BLOSUM45\n"
435   "  or for nucleotide sequences, Jukes-Cantor distances\n"
436   "  To specify a different matrix, use -matrix FilePrefix or -nomatrix\n"
437   "  Use -rawdist to turn the log-correction off\n"
438   "  or to use %different instead of Jukes-Cantor\n"
439   "\n"
440   "  -pseudo [weight] -- Use pseudocounts to estimate distances between\n"
441   "      sequences with little or no overlap. (Off by default.) Recommended\n"
442   "      if analyzing the alignment has sequences with little or no overlap.\n"
443   "      If the weight is not specified, it is 1.0\n"
444   "\n"
445   "Topology refinement:\n"
446   "  By default, FastTree tries to improve the tree with up to 4*log2(N)\n"
447   "  rounds of minimum-evolution nearest-neighbor interchanges (NNI),\n"
448   "  where N is the number of unique sequences, 2 rounds of\n"
449   "  subtree-prune-regraft (SPR) moves (also min. evo.), and\n"
450   "  up to 2*log(N) rounds of maximum-likelihood NNIs.\n"
451   "  Use -nni to set the number of rounds of min. evo. NNIs,\n"
452   "  and -spr to set the rounds of SPRs.\n"
453   "  Use -noml to turn off both min-evo NNIs and SPRs (useful if refining\n"
454   "       an approximately maximum-likelihood tree with further NNIs)\n"
455   "  Use -sprlength set the maximum length of a SPR move (default 10)\n"
456   "  Use -mlnni to set the number of rounds of maximum-likelihood NNIs\n"
457   "  Use -mlacc 2 or -mlacc 3 to always optimize all 5 branches at each NNI,\n"
458   "      and to optimize all 5 branches in 2 or 3 rounds\n"
459   "  Use -mllen to optimize branch lengths without ML NNIs\n"
460   "  Use -mllen -nome with -intree to optimize branch lengths on a fixed topology\n"
461   "  Use -slownni to turn off heuristics to avoid constant subtrees (affects both\n"
462   "       ML and ME NNIs)\n"
463   "\n"
464   "Maximum likelihood model options:\n"
465   "  -lg -- Le-Gascuel 2008 model instead of (default) Jones-Taylor-Thorton 1992 model (a.a. only)\n"
466   "  -wag -- Whelan-And-Goldman 2001 model instead of (default) Jones-Taylor-Thorton 1992 model (a.a. only)\n"
467   "  -gtr -- generalized time-reversible instead of (default) Jukes-Cantor (nt only)\n"
468   "  -cat # -- specify the number of rate categories of sites (default 20)\n"
469   "  -nocat -- no CAT model (just 1 category)\n"
470   "  -gamma -- after the final round of optimizing branch lengths with the CAT model,\n"
471   "            report the likelihood under the discrete gamma model with the same\n"
472   "            number of categories. FastTree uses the same branch lengths but\n"
473   "            optimizes the gamma shape parameter and the scale of the lengths.\n"
474   "            The final tree will have rescaled lengths. Used with -log, this\n"
475   "            also generates per-site likelihoods for use with CONSEL, see\n"
476   "            GammaLogToPaup.pl and documentation on the FastTree web site.\n"
477   "\n"
478   "Support value options:\n"
479   "  By default, FastTree computes local support values by resampling the site\n"
480   "  likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,\n"
481   "  it will compute minimum-evolution bootstrap supports instead\n"
482   "  In either case, the support values are proportions ranging from 0 to 1\n"
483   "\n"
484   "  Use -nosupport to turn off support values or -boot 100 to use just 100 resamples\n"
485   "  Use -seed to initialize the random number generator\n"
486   "\n"
487   "Searching for the best join:\n"
488   "  By default, FastTree combines the 'visible set' of fast neighbor-joining with\n"
489   "      local hill-climbing as in relaxed neighbor-joining\n"
490   "  -slow -- exhaustive search (like NJ or BIONJ, but different gap handling)\n"
491   "      -slow takes half an hour instead of 8 seconds for 1,250 proteins\n"
492   "  -fastest -- search the visible set (the top hit for each node) only\n"
493   "      Unlike the original fast neighbor-joining, -fastest updates visible(C)\n"
494   "      after joining A and B if join(AB,C) is better than join(C,visible(C))\n"
495   "      -fastest also updates out-distances in a very lazy way,\n"
496   "      -fastest sets -2nd on as well, use -fastest -no2nd to avoid this\n"
497   "\n"
498   "Top-hit heuristics:\n"
499   "  By default, FastTree uses a top-hit list to speed up search\n"
500   "  Use -notop (or -slow) to turn this feature off\n"
501   "         and compare all leaves to each other,\n"
502   "         and all new joined nodes to each other\n"
503   "  -topm 1.0 -- set the top-hit list size to parameter*sqrt(N)\n"
504   "         FastTree estimates the top m hits of a leaf from the\n"
505   "         top 2*m hits of a 'close' neighbor, where close is\n"
506   "         defined as d(seed,close) < 0.75 * d(seed, hit of rank 2*m),\n"
507   "         and updates the top-hits as joins proceed\n"
508   "  -close 0.75 -- modify the close heuristic, lower is more conservative\n"
509   "  -refresh 0.8 -- compare a joined node to all other nodes if its\n"
510   "         top-hit list is less than 80% of the desired length,\n"
511   "         or if the age of the top-hit list is log2(m) or greater\n"
512   "   -2nd or -no2nd to turn 2nd-level top hits heuristic on or off\n"
513   "      This reduces memory usage and running time but may lead to\n"
514   "      marginal reductions in tree quality.\n"
515   "      (By default, -fastest turns on -2nd.)\n"
516   "\n"
517   "Join options:\n"
518   "  -nj: regular (unweighted) neighbor-joining (default)\n"
519   "  -bionj: weighted joins as in BIONJ\n"
520   "          FastTree will also weight joins during NNIs\n"
521   "\n"
522   "Constrained topology search options:\n"
523   "  -constraints alignmentfile -- an alignment with values of 0, 1, and -\n"
524   "       Not all sequences need be present. A column of 0s and 1s defines a\n"
525   "       constrained split. Some constraints may be violated\n"
526   "       (see 'violating constraints:' in standard error).\n"
527   "  -constraintWeight -- how strongly to weight the constraints. A value of 1\n"
528   "       means a penalty of 1 in tree length for violating a constraint\n"
529   "       Default: 100.0\n"
530   "\n"
531   "For more information, see http://www.microbesonline.org/fasttree/\n"
532   "   or the comments in the source code\n";
533 ;
534 
535 
536 #define MAXCODES 20
537 #define NOCODE 127
538 /* Note -- sequence lines longer than BUFFER_SIZE are
539    allowed, but FASTA header lines must be within this limit */
540 #define BUFFER_SIZE 5000
541 #define MIN(X,Y) ((X) <  (Y) ? (X) : (Y))
542 #define MAX(X,Y) ((X) >  (Y) ? (X) : (Y))
543 
544 typedef struct {
545   int nPos;
546   int nSeq;
547   char **names;
548   char **seqs;
549   int nSaved; /* actual allocated size of names and seqs */
550 } alignment_t;
551 
552 /* For each position in a profile, we have a weight (% non-gapped) and a
553    frequency vector. (If using a matrix, the frequency vector is in eigenspace).
554    We also store codes for simple profile positions (all gaps or only 1 value)
555    If weight[pos] > 0 && codes[pos] == NOCODE then we store the vector
556    vectors itself is sets of nCodes long, so the vector for the ith nonconstant position
557    starts at &vectors[nCodes*i]
558 
559    To speed up comparison of outprofile to a sequence or other simple profile, we also
560    (for outprofiles) store codeDist[iPos*nCodes+k] = dist(k,profile[iPos])
561 
562    For constraints, we store a vector of nOn and nOff
563    If not using constraints, those will be NULL
564 */
565 typedef struct {
566   /* alignment profile */
567   numeric_t *weights;
568   unsigned char *codes;
569   numeric_t *vectors;		/* NULL if no non-constant positions, e.g. for leaves */
570   int nVectors;
571   numeric_t *codeDist;		/* Optional -- distance to each code at each position */
572 
573   /* constraint profile */
574   int *nOn;
575   int *nOff;
576 } profile_t;
577 
578 /* A visible node is a pair of nodes i, j such that j is the best hit of i,
579    using the neighbor-joining criterion, at the time the comparison was made,
580    or approximately so since then.
581 
582    Note that variance = dist because in BIONJ, constant factors of variance do not matter,
583    and because we weight ungapped sequences higher naturally when averaging profiles,
584    so we do not take this into account in the computation of "lambda" for BIONJ.
585 
586    For the top-hit list heuristic, if the top hit list becomes "too short",
587    we store invalid entries with i=j=-1 and dist/criterion very high.
588 */
589 typedef struct {
590   int i, j;
591   numeric_t weight;			/* Total product of weights (maximum value is nPos)
592 				   This is needed for weighted joins and for pseudocounts,
593 				   but not in most other places.
594 				   For example, it is not maintained by the top hits code */
595   numeric_t dist;			/* The uncorrected distance (includes diameter correction) */
596   numeric_t criterion;		/* changes when we update the out-profile or change nActive */
597 } besthit_t;
598 
599 typedef struct {
600   int nChild;
601   int child[3];
602 } children_t;
603 
604 typedef struct {
605   /* Distances between amino acids */
606   numeric_t distances[MAXCODES][MAXCODES];
607 
608   /* Inverse of the eigenvalue matrix, for rotating a frequency vector
609      into eigenspace so that profile similarity computations are
610      O(alphabet) not O(alphabet*alphabet) time.
611   */
612   numeric_t eigeninv[MAXCODES][MAXCODES];
613   numeric_t eigenval[MAXCODES];	/* eigenvalues */
614 
615 
616   /* eigentot=eigeninv times the all-1s frequency vector
617      useful for normalizing rotated frequency vectors
618   */
619   numeric_t eigentot[MAXCODES];
620 
621   /* codeFreq is the transpose of the eigeninv matrix is
622      the rotated frequency vector for each code */
623   numeric_t codeFreq[MAXCODES][MAXCODES];
624   numeric_t gapFreq[MAXCODES];
625 } distance_matrix_t;
626 
627 
628 /* A transition matrix gives the instantaneous rate of change of frequencies
629    df/dt = M . f
630    which is solved by
631    f(t) = exp(M) . f(0)
632    and which is not a symmetric matrix because of
633    non-uniform stationary frequencies stat, so that
634    M stat = 0
635    M(i,j) is instantaneous rate of j -> i, not of i -> j
636 
637    S = diag(sqrt(stat)) is a correction so that
638    M' = S**-1 M S is symmetric
639    Let W L W**-1 = M' be an eigendecomposition of M'
640    Because M' is symmetric, W can be a rotation, and W**-1 = t(W)
641    Set V = S*W
642    M = V L V**-1 is an eigendecomposition of M
643    Note V**-1 = W**-1 S**-1 = t(W) S**-1
644 
645    Evolution by time t is given by
646 
647    exp(M*t) = V exp(L*t) V**-1
648    P(A & B | t) = B . exp(M*t) . (A * stat)
649    note this is *not* the same as P(A->B | t)
650 
651    and we can reduce some of the computations from O(a**2) to O(a) time,
652    where a is the alphabet size, by storing frequency vectors as
653    t(V) . f = t(W) . t(S) . f
654 
655    Then
656    P(f0 & f1 | t) = f1 . exp(M*t) . f0 * (f0 . stat) = sum(r0j * r1j * exp(l_j*t))
657    where r0 and r1 are the transformed vectors
658 
659    Posterior distribution of P given children f0 and f1 is given by
660    P(i | f0, f1, t0, t1) = stat * P(i->f0 | t0) * P(i->f1 | t1)
661    = P(i & f0 | t0) * P(i & f1 | t1) / stat
662    ~ (V . exp(t0*L) . r0) * (V . exp(t1*L) . r1) / stat
663 
664    When normalize this posterior distribution (to sum to 1), divide by stat,
665    and transform by t(V) -- this is the "profile" of internal nodes
666 
667    To eliminate the O(N**2) step of transforming by t(V), if the posterior
668    distribution of an amino acid is near 1 then we can approximate it by
669    P(i) ~= (i==A) * w + nearP(i) * (1-w), where
670    w is fit so that P(i==A) is correct
671    nearP = Posterior(i | i, i, 0.1, 0.1) [0.1 is an arbitrary choice]
672    and we confirm that the approximation works well before we use it.
673 
674    Given this parameter w we can set
675    rotated_posterior = rotation(w * (i==A)/stat + (1-w) * nearP/stat)
676    = codeFreq(A) * w/stat(A) + nearFreq(A) * (1-w)
677  */
678 typedef struct {
679   numeric_t stat[MAXCODES]; /* The stationary distribution */
680   numeric_t statinv[MAXCODES];	/* 1/stat */
681   /* the eigenmatrix, with the eigenvectors as columns and rotations of individual
682      characters as rows. Also includes a NOCODE entry for gaps */
683   numeric_t codeFreq[NOCODE+1][MAXCODES];
684   numeric_t eigeninv[MAXCODES][MAXCODES]; /* Inverse of eigenmatrix */
685   numeric_t eigeninvT[MAXCODES][MAXCODES]; /* transpose of eigeninv */
686   numeric_t eigenval[MAXCODES];	/* Eigenvalues  */
687   /* These are for approximate posteriors (off by default) */
688   numeric_t nearP[MAXCODES][MAXCODES]; /* nearP[i][j] = P(parent=j | both children are i, both lengths are 0.1 */
689   numeric_t nearFreq[MAXCODES][MAXCODES]; /* rotation of nearP/stat */
690 } transition_matrix_t;
691 
692 typedef struct {
693   int nRateCategories;
694   numeric_t *rates;			/* 1 per rate category */
695   unsigned int *ratecat;	/* 1 category per position */
696 } rates_t;
697 
698 typedef struct {
699   /* The input */
700   int nSeq;
701   int nPos;
702   char **seqs;			/* the aligment sequences array (not reallocated) */
703   distance_matrix_t *distance_matrix; /* a pointer (not reallocated), or NULL if using %identity distance */
704   transition_matrix_t *transmat; /* a pointer (is allocated), or NULL for Jukes-Cantor */
705   /* Topological constraints are represented for each sequence as binary characters
706      with values of '0', '1', or '-' (for missing data)
707      Sequences that have no constraint may have a NULL string
708   */
709   int nConstraints;
710   char **constraintSeqs;
711 
712   /* The profile data structures */
713   int maxnode;			/* The next index to allocate */
714   int maxnodes;			/* Space allocated in data structures below */
715   profile_t **profiles;         /* Profiles of leaves and intermediate nodes */
716   numeric_t *diameter;		/* To correct for distance "up" from children (if any) */
717   numeric_t *varDiameter;		/* To correct variances for distance "up" */
718   numeric_t *selfdist;		/* Saved for use in some formulas */
719   numeric_t *selfweight;		/* Saved for use in some formulas */
720 
721   /* Average profile of all active nodes, the "outprofile"
722    * If all inputs are ungapped, this has weight 1 (not nSequences) at each position
723    * The frequencies all sum to one (or that is implied by the eigen-representation)
724    */
725   profile_t *outprofile;
726   double totdiam;
727 
728   /* We sometimes use stale out-distances, so we remember what nActive was  */
729   numeric_t *outDistances;		/* Sum of distances to other active (parent==-1) nodes */
730   int *nOutDistActive;		/* What nActive was when this outDistance was computed */
731 
732   /* the inferred tree */
733   int root;			/* index of the root. Unlike other internal nodes, it has 3 children */
734   int *parent;			/* -1 or index of parent */
735   children_t *child;
736   numeric_t *branchlength;		/* Distance to parent */
737   numeric_t *support;		/* 1 for high-confidence nodes */
738 
739   /* auxilliary data for maximum likelihood (defaults to 1 category of rate=1.0) */
740   rates_t rates;
741 } NJ_t;
742 
743 /* Uniquify sequences in an alignment -- map from indices
744    in the alignment to unique indicies in a NJ_t
745 */
746 typedef struct {
747   int nSeq;
748   int nUnique;
749   int *uniqueFirst;		/* iUnique -> iAln */
750   int *alnNext;			/* iAln -> next, or -1  */
751   int *alnToUniq;		/* iAln -> iUnique, or -1 if another was the exemplar */
752   char **uniqueSeq;		/* indexed by iUniq -- points to strings allocated elsewhere */
753 } uniquify_t;
754 
755 /* Describes which switch to do */
756 typedef enum {ABvsCD,ACvsBD,ADvsBC} nni_t;
757 
758 /* A list of these describes a chain of NNI moves in a rooted tree,
759    making up, in total, an SPR move
760 */
761 typedef struct {
762   int nodes[2];
763   double deltaLength;		/* change in tree length for this step (lower is better) */
764 } spr_step_t;
765 
766 /* Keep track of hits for the top-hits heuristic without wasting memory
767    j = -1 means empty
768    If j is an inactive node, this may be replaced by that node's parent (and dist recomputed)
769  */
770 typedef struct {
771   int j;
772   numeric_t dist;
773 } hit_t;
774 
775 typedef struct {
776   int nHits;			/* the allocated and desired size; some of them may be empty */
777   hit_t *hits;
778   int hitSource;		/* where to refresh hits from if a 2nd-level top-hit list, or -1 */
779   int age;			/* number of joins since a refresh */
780 } top_hits_list_t;
781 
782 typedef struct {
783   int m;			 /* size of a full top hits list, usually sqrt(N) */
784   int q;			 /* size of a 2nd-level top hits, usually sqrt(m) */
785   int maxnodes;
786   top_hits_list_t *top_hits_lists; /* one per node */
787   hit_t *visible;		/* the "visible" (very best) hit for each node */
788 
789   /* The top-visible set is a subset, usually of size m, of the visible set --
790      it is the set of joins to select from
791      Each entry is either a node whose visible set entry has a good (low) criterion,
792      or -1 for empty, or is an obsolete node (which is effectively the same).
793      Whenever we update the visible set, should also call UpdateTopVisible()
794      which ensures that none of the topvisible set are stale (that is, they
795      all point to an active node).
796   */
797   int nTopVisible;		/* nTopVisible = m * topvisibleMult */
798   int *topvisible;
799 
800   int topvisibleAge;		/* joins since the top-visible list was recomputed */
801 
802 #ifdef OPENMP
803   /* 1 lock to read or write any top hits list, no thread grabs more than one */
804   omp_lock_t *locks;
805 #endif
806 } top_hits_t;
807 
808 /* Global variables */
809 /* Options */
810 int verbose = 1;
811 int showProgress = 1;
812 int slow = 0;
813 int fastest = 0;
814 bool useTopHits2nd = false;	/* use the second-level top hits heuristic? */
815 int bionj = 0;
816 double tophitsMult = 1.0;	/* 0 means compare nodes to all other nodes */
817 double tophitsClose = -1.0;	/* Parameter for how close is close; also used as a coverage req. */
818 double topvisibleMult = 1.5;	/* nTopVisible = m * topvisibleMult; 1 or 2 did not make much difference
819 				   in either running time or accuracy so I chose a compromise. */
820 
821 double tophitsRefresh = 0.8;	/* Refresh if fraction of top-hit-length drops to this */
822 double tophits2Mult = 1.0;	/* Second-level top heuristic -- only with -fastest */
823 int tophits2Safety = 3;		/* Safety factor for second level of top-hits heuristic */
824 double tophits2Refresh = 0.6;	/* Refresh 2nd-level top hits if drops down to this fraction of length */
825 
826 double staleOutLimit = 0.01;	/* nActive changes by at most this amount before we recompute
827 				   an out-distance. (Only applies if using the top-hits heuristic) */
828 double fResetOutProfile = 0.02;	/* Recompute out profile from scratch if nActive has changed
829 				   by more than this proportion, and */
830 int nResetOutProfile = 200;	/* nActive has also changed more than this amount */
831 int nCodes=20;			/* 20 if protein, 4 if nucleotide */
832 bool useMatrix=true;		/* If false, use %different as the uncorrected distance */
833 bool logdist = true;		/* If true, do a log-correction (scoredist-like or Jukes-Cantor)
834 				   but only during NNIs and support values, not during neighbor-joining */
835 double pseudoWeight = 0.0;      /* The weight of pseudocounts to avoid artificial long branches when
836 				   nearby sequences in the tree have little or no overlap
837 				   (off by default). The prior distance is based on
838 				   all overlapping positions among the quartet or triplet under
839 				   consideration. The log correction takes place after the
840 				   pseudocount is used. */
841 double constraintWeight = 100.0;/* Cost of violation of a topological constraint in evolutionary distance
842 				   or likelihood */
843 double MEMinDelta = 1.0e-4;	/* Changes of less than this in tree-length are discounted for
844 				   purposes of identifying fixed subtrees */
845 bool fastNNI = true;
846 bool gammaLogLk = false;	/* compute gamma likelihood without reoptimizing branch lengths? */
847 
848 /* Maximum likelihood options and constants */
849 /* These are used to rescale likelihood values and avoid taking a logarithm at each position */
850 const double LkUnderflow = 1.0e-4;
851 const double LkUnderflowInv = 1.0e4;
852 const double LogLkUnderflow = 9.21034037197618; /* -log(LkUnderflowInv) */
853 const double Log2 = 0.693147180559945;
854 /* These are used to limit the optimization of branch lengths.
855    Also very short branch lengths can create numerical problems.
856    In version 2.1.7, the minimum branch lengths (MLMinBranchLength and MLMinRelBranchLength)
857    were increased to prevent numerical problems in rare cases.
858    In version 2.1.8, to provide useful branch lengths for genome-wide alignments,
859    the minimum branch lengths were dramatically decreased if USE_DOUBLE is defined.
860 */
861 #ifndef USE_DOUBLE
862 const double MLMinBranchLengthTolerance = 1.0e-4; /* absolute tolerance for optimizing branch lengths */
863 const double MLFTolBranchLength = 0.001; /* fractional tolerance for optimizing branch lengths */
864 const double MLMinBranchLength = 5.0e-4; /* minimum value for branch length */
865 const double MLMinRelBranchLength = 2.5e-4; /* minimum of rate * length */
866 const double fPostTotalTolerance = 1.0e-10; /* posterior vector must sum to at least this before rescaling */
867 #else
868 const double MLMinBranchLengthTolerance = 1.0e-9;
869 const double MLFTolBranchLength = 0.001;
870 const double MLMinBranchLength = 5.0e-9;
871 const double MLMinRelBranchLength = 2.5e-9;
872 const double fPostTotalTolerance = 1.0e-20;
873 #endif
874 
875 int mlAccuracy = 1;		/* Rounds of optimization of branch lengths; 1 means do 2nd round only if close */
876 double closeLogLkLimit = 5.0;	/* If partial optimization of an NNI looks like it would decrease the log likelihood
877 				   by this much or more then do not optimize it further */
878 double treeLogLkDelta = 0.1;	/* Give up if tree log-lk changes by less than this; NNIs that change
879 				   likelihood by less than this also are considered unimportant
880 				   by some heuristics */
881 bool exactML = true;		/* Exact or approximate posterior distributions for a.a.s */
882 double approxMLminf = 0.95;	/* Only try to approximate posterior distributions if max. value is at least this high */
883 double approxMLminratio = 2/3.0;/* Ratio of approximated/true posterior values must be at least this high */
884 double approxMLnearT = 0.2;	/* 2nd component of near-constant posterior distribution uses this time scale */
885 const int nDefaultRateCats = 20;
886 
887 /* Performance and memory usage */
888 long profileOps = 0;		/* Full profile-based distance operations */
889 long outprofileOps = 0;		/* How many of profileOps are comparisons to outprofile */
890 long seqOps = 0;		/* Faster leaf-based distance operations */
891 long profileAvgOps = 0;		/* Number of profile-average steps */
892 long nHillBetter = 0;		/* Number of hill-climbing steps */
893 long nCloseUsed = 0;		/* Number of "close" neighbors we avoid full search for */
894 long nClose2Used = 0;		/* Number of "close" neighbors we use 2nd-level top hits for */
895 long nRefreshTopHits = 0;	/* Number of full-blown searches (interior nodes) */
896 long nVisibleUpdate = 0;		/* Number of updates of the visible set */
897 long nNNI = 0;			/* Number of NNI changes performed */
898 long nSPR = 0;			/* Number of SPR changes performed */
899 long nML_NNI = 0;		/* Number of max-lik. NNI changes performed */
900 long nSuboptimalSplits = 0;	/* # of splits that are rejected given final tree (during bootstrap) */
901 long nSuboptimalConstrained = 0; /* Bad splits that are due to constraints */
902 long nConstraintViolations = 0;	/* Number of constraint violations */
903 long nProfileFreqAlloc = 0;
904 long nProfileFreqAvoid = 0;
905 long szAllAlloc = 0;
906 long mymallocUsed = 0;		/* useful allocations by mymalloc */
907 long maxmallocHeap = 0;		/* Maximum of mi.arena+mi.hblkhd from mallinfo (actual mem usage) */
908 long nLkCompute = 0;		/* # of likelihood computations for pairs of probability vectors */
909 long nPosteriorCompute = 0;	/* # of computations of posterior probabilities */
910 long nAAPosteriorExact = 0;	/* # of times compute exact AA posterior */
911 long nAAPosteriorRough = 0;	/* # of times use rough approximation */
912 long nStarTests = 0;		/* # of times we use star test to avoid testing an NNI */
913 
914 /* Protein character set */
915 unsigned char *codesStringAA = (unsigned char*) "ARNDCQEGHILKMFPSTWYV";
916 unsigned char *codesStringNT = (unsigned char*) "ACGT";
917 unsigned char *codesString = NULL;
918 
919 distance_matrix_t *ReadDistanceMatrix(char *prefix);
920 void SetupDistanceMatrix(/*IN/OUT*/distance_matrix_t *); /* set eigentot, codeFreq, gapFreq */
921 void ReadMatrix(char *filename, /*OUT*/numeric_t codes[MAXCODES][MAXCODES], bool check_codes);
922 void ReadVector(char *filename, /*OUT*/numeric_t codes[MAXCODES]);
923 alignment_t *ReadAlignment(/*READ*/FILE *fp, bool bQuote); /* Returns a list of strings (exits on failure) */
924 alignment_t *FreeAlignment(alignment_t *); /* returns NULL */
925 void FreeAlignmentSeqs(/*IN/OUT*/alignment_t *);
926 
927 /* Takes as input the transpose of the matrix V, with i -> j
928    This routine takes care of setting the diagonals
929 */
930 transition_matrix_t *CreateTransitionMatrix(/*IN*/double matrix[MAXCODES][MAXCODES],
931 					    /*IN*/double stat[MAXCODES]);
932 transition_matrix_t *CreateGTR(double *gtrrates/*ac,ag,at,cg,ct,gt*/, double *gtrfreq/*ACGT*/);
933 
934 /* For converting profiles from 1 rotation to another, or converts NULL to NULL */
935 distance_matrix_t *TransMatToDistanceMat(transition_matrix_t *transmat);
936 
937 /* Allocates memory, initializes leaf profiles */
938 NJ_t *InitNJ(char **sequences, int nSeqs, int nPos,
939 	     /*IN OPTIONAL*/char **constraintSeqs, int nConstraints,
940 	     /*IN OPTIONAL*/distance_matrix_t *,
941 	     /*IN OPTIONAL*/transition_matrix_t *);
942 
943 NJ_t *FreeNJ(NJ_t *NJ); /* returns NULL */
944 void FastNJ(/*IN/OUT*/NJ_t *NJ); /* Does the joins */
945 void ReliabilityNJ(/*IN/OUT*/NJ_t *NJ, int nBootstrap);	  /* Estimates the reliability of the joins */
946 
947 /* nni_stats_t is meaningless for leaves and root, so all of those entries
948    will just be high (for age) or 0 (for delta)
949 */
950 typedef struct {
951   int age;	    /* number of rounds since this node was modified by an NNI */
952   int subtreeAge;   /* number of rounds since self or descendent had a significant improvement */
953   double delta;	    /* improvement in score for this node (or 0 if no change) */
954   double support;   /* improvement of score for self over better of alternatives */
955 } nni_stats_t;
956 
957 /* One round of nearest-neighbor interchanges according to the
958    minimum-evolution or approximate maximum-likelihood criterion.
959    If doing maximum likelihood then this modifies the branch lengths.
960    age is the # of rounds since a node was NNId
961    Returns the # of topological changes performed
962 */
963 int NNI(/*IN/OUT*/NJ_t *NJ, int iRound, int nRounds, bool useML,
964 	/*IN/OUT*/nni_stats_t *stats,
965 	/*OUT*/double *maxDeltaCriterion);
966 nni_stats_t *InitNNIStats(NJ_t *NJ);
967 nni_stats_t *FreeNNIStats(nni_stats_t *, NJ_t *NJ);	/* returns NULL */
968 
969 /* One round of subtree-prune-regraft moves (minimum evolution) */
970 void SPR(/*IN/OUT*/NJ_t *NJ, int maxSPRLength, int iRound, int nRounds);
971 
972 /* Recomputes all branch lengths by minimum evolution criterion*/
973 void UpdateBranchLengths(/*IN/OUT*/NJ_t *NJ);
974 
975 /* Recomputes all branch lengths and, optionally, internal profiles */
976 double TreeLength(/*IN/OUT*/NJ_t *NJ, bool recomputeProfiles);
977 
978 typedef struct {
979   int nBadSplits;
980   int nConstraintViolations;
981   int nBadBoth;
982   int nSplits;
983   /* How much length would be reduce or likelihood would be increased by the
984      best NNI we find (the worst "miss") */
985   double dWorstDeltaUnconstrained;
986   double dWorstDeltaConstrained;
987 } SplitCount_t;
988 
989 void TestSplitsMinEvo(NJ_t *NJ, /*OUT*/SplitCount_t *splitcount);
990 
991 /* Sets SH-like support values if nBootstrap>0 */
992 void TestSplitsML(/*IN/OUT*/NJ_t *NJ, /*OUT*/SplitCount_t *splitcount, int nBootstrap);
993 
994 /* Pick columns for resampling, stored as returned_vector[iBoot*nPos + j] */
995 int *ResampleColumns(int nPos, int nBootstrap);
996 
997 /* Use out-profile and NJ->totdiam to recompute out-distance for node iNode
998    Only does this computation if the out-distance is "stale" (nOutDistActive[iNode] != nActive)
999    Note "IN/UPDATE" for NJ always means that we may update out-distances but otherwise
1000    make no changes.
1001  */
1002 void SetOutDistance(/*IN/UPDATE*/NJ_t *NJ, int iNode, int nActive);
1003 
1004 /* Always sets join->criterion; may update NJ->outDistance and NJ->nOutDistActive,
1005    assumes join's weight and distance are already set,
1006    and that the constraint penalty (if any) is included in the distance
1007 */
1008 void SetCriterion(/*IN/UPDATE*/NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *join);
1009 
1010 /* Computes weight and distance (which includes the constraint penalty)
1011    and then sets the criterion (maybe update out-distances)
1012 */
1013 void SetDistCriterion(/*IN/UPDATE*/NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *join);
1014 
1015 /* If join->i or join->j are inactive nodes, replaces them with their active ancestors.
1016    After doing this, if i == j, or either is -1, sets weight to 0 and dist and criterion to 1e20
1017       and returns false (not a valid join)
1018    Otherwise, if i or j changed, recomputes the distance and criterion.
1019    Note that if i and j are unchanged then the criterion could be stale
1020    If bUpdateDist is false, and i or j change, then it just sets dist to a negative number
1021 */
1022 bool UpdateBestHit(/*IN/UPDATE*/NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *join,
1023 		   bool bUpdateDist);
1024 
1025 /* This recomputes the criterion, or returns false if the visible node
1026    is no longer active.
1027 */
1028 bool GetVisible(/*IN/UPDATE*/NJ_t *NJ, int nActive, /*IN/OUT*/top_hits_t *tophits,
1029 		int iNode, /*OUT*/besthit_t *visible);
1030 
1031 int ActiveAncestor(/*IN*/NJ_t *NJ, int node);
1032 
1033 /* Compute the constraint penalty for a join. This is added to the "distance"
1034    by SetCriterion */
1035 int JoinConstraintPenalty(/*IN*/NJ_t *NJ, int node1, int node2);
1036 int JoinConstraintPenaltyPiece(NJ_t *NJ, int node1, int node2, int iConstraint);
1037 
1038 /* Helper function for computing the number of constraints violated by
1039    a split, represented as counts of on and off on each side */
1040 int SplitConstraintPenalty(int nOn1, int nOff1, int nOn2, int nOff2);
1041 
1042 /* Reports the (min. evo.) support for the (1,2) vs. (3,4) split
1043    col[iBoot*nPos+j] is column j for bootstrap iBoot
1044 */
1045 double SplitSupport(profile_t *p1, profile_t *p2, profile_t *p3, profile_t *p4,
1046 		    /*OPTIONAL*/distance_matrix_t *dmat,
1047 		    int nPos,
1048 		    int nBootstrap,
1049 		    int *col);
1050 
1051 /* Returns SH-like support given resampling spec. (in col) and site likelihods
1052    for the three quartets
1053 */
1054 double SHSupport(int nPos, int nBoostrap, int *col, double loglk[3], double *site_likelihoods[3]);
1055 
1056 profile_t *SeqToProfile(/*IN/OUT*/NJ_t *NJ,
1057 			char *seq, int nPos,
1058 			/*OPTIONAL*/char *constraintSeqs, int nConstraints,
1059 			int iNode,
1060 			unsigned long counts[256]);
1061 
1062 /* ProfileDist and SeqDist only set the dist and weight fields
1063    If using an outprofile, use the second argument of ProfileDist
1064    for better performance.
1065 
1066    These produce uncorrected distances.
1067 */
1068 void ProfileDist(profile_t *profile1, profile_t *profile2, int nPos,
1069 		 /*OPTIONAL*/distance_matrix_t *distance_matrix,
1070 		 /*OUT*/besthit_t *hit);
1071 void SeqDist(unsigned char *codes1, unsigned char *codes2, int nPos,
1072 	     /*OPTIONAL*/distance_matrix_t *distance_matrix,
1073 	     /*OUT*/besthit_t *hit);
1074 
1075 /* Computes all pairs of profile distances, applies pseudocounts
1076    if pseudoWeight > 0, and applies log-correction if logdist is true.
1077    The lower index is compared to the higher index, e.g. for profiles
1078    A,B,C,D the comparison will be as in quartet_pair_t
1079 */
1080 typedef enum {qAB,qAC,qAD,qBC,qBD,qCD} quartet_pair_t;
1081 void CorrectedPairDistances(profile_t **profiles, int nProfiles,
1082 			    /*OPTIONAL*/distance_matrix_t *distance_matrix,
1083 			    int nPos,
1084 			    /*OUT*/double *distances);
1085 
1086 /* output is indexed by nni_t
1087    To ensure good behavior while evaluating a subtree-prune-regraft move as a series
1088    of nearest-neighbor interchanges, this uses a distance-ish model of constraints,
1089    as given by PairConstraintDistance(), rather than
1090    counting the number of violated splits (which is what FastTree does
1091    during neighbor-joining).
1092    Thus, penalty values may well be >0 even if no constraints are violated, but the
1093    relative scores for the three NNIs will be correct.
1094  */
1095 void QuartetConstraintPenalties(profile_t *profiles[4], int nConstraints, /*OUT*/double d[3]);
1096 
1097 double PairConstraintDistance(int nOn1, int nOff1, int nOn2, int nOff2);
1098 
1099 /* the split is consistent with the constraint if any of the profiles have no data
1100    or if three of the profiles have the same uniform value (all on or all off)
1101    or if AB|CD = 00|11 or 11|00 (all uniform)
1102  */
1103 bool SplitViolatesConstraint(profile_t *profiles[4], int iConstraint);
1104 
1105 /* If false, no values were set because this constraint was not relevant.
1106    output is for the 3 splits
1107 */
1108 bool QuartetConstraintPenaltiesPiece(profile_t *profiles[4], int iConstraint, /*OUT*/double penalty[3]);
1109 
1110 /* Apply Jukes-Cantor or scoredist-like log(1-d) transform
1111    to correct the distance for multiple substitutions.
1112 */
1113 double LogCorrect(double distance);
1114 
1115 /* AverageProfile is used to do a weighted combination of nodes
1116    when doing a join. If weight is negative, then the value is ignored and the profiles
1117    are averaged. The weight is *not* adjusted for the gap content of the nodes.
1118    Also, the weight does not affect the representation of the constraints
1119 */
1120 profile_t *AverageProfile(profile_t *profile1, profile_t *profile2,
1121 			  int nPos, int nConstraints,
1122 			  distance_matrix_t *distance_matrix,
1123 			  double weight1);
1124 
1125 /* PosteriorProfile() is like AverageProfile() but it computes posterior probabilities
1126    rather than an average
1127 */
1128 profile_t *PosteriorProfile(profile_t *profile1, profile_t *profile2,
1129 			    double len1, double len2,
1130 			    /*OPTIONAL*/transition_matrix_t *transmat,
1131 			    rates_t *rates,
1132 			    int nPos, int nConstraints);
1133 
1134 /* Set a node's profile from its children.
1135    Deletes the previous profile if it exists
1136    Use -1.0 for a balanced join
1137    Fails unless the node has two children (e.g., no leaves or root)
1138 */
1139 void SetProfile(/*IN/OUT*/NJ_t *NJ, int node, double weight1);
1140 
1141 /* OutProfile does an unweighted combination of nodes to create the
1142    out-profile. It always sets code to NOCODE so that UpdateOutProfile
1143    can work.
1144 */
1145 profile_t *OutProfile(profile_t **profiles, int nProfiles,
1146 		      int nPos, int nConstraints,
1147 		      distance_matrix_t *distance_matrix);
1148 
1149 void UpdateOutProfile(/*UPDATE*/profile_t *out, profile_t *old1, profile_t *old2,
1150 		      profile_t *new, int nActiveOld,
1151 		      int nPos, int nConstraints,
1152 		      distance_matrix_t *distance_matrix);
1153 
1154 profile_t *NewProfile(int nPos, int nConstraints); /* returned has no vectors */
1155 profile_t *FreeProfile(profile_t *profile, int nPos, int nConstraints); /* returns NULL */
1156 
1157 void AllocRateCategories(/*IN/OUT*/rates_t *rates, int nRateCategories, int nPos);
1158 
1159 /* f1 can be NULL if code1 != NOCODE, and similarly for f2
1160    Or, if (say) weight1 was 0, then can have code1==NOCODE *and* f1==NULL
1161    In that case, returns an arbitrary large number.
1162 */
1163 double ProfileDistPiece(unsigned int code1, unsigned int code2,
1164 			numeric_t *f1, numeric_t *f2,
1165 			/*OPTIONAL*/distance_matrix_t *dmat,
1166 			/*OPTIONAL*/numeric_t *codeDist2);
1167 
1168 /* Adds (or subtracts, if weight is negative) fIn/codeIn from fOut
1169    fOut is assumed to exist (as from an outprofile)
1170    do not call unless weight of input profile > 0
1171  */
1172 void AddToFreq(/*IN/OUT*/numeric_t *fOut, double weight,
1173 	       unsigned int codeIn, /*OPTIONAL*/numeric_t *fIn,
1174 	       /*OPTIONAL*/distance_matrix_t *dmat);
1175 
1176 /* Divide the vector (of length nCodes) by a constant
1177    so that the total (unrotated) frequency is 1.0 */
1178 void NormalizeFreq(/*IN/OUT*/numeric_t *freq, distance_matrix_t *distance_matrix);
1179 
1180 /* Allocate, if necessary, and recompute the codeDist*/
1181 void SetCodeDist(/*IN/OUT*/profile_t *profile, int nPos, distance_matrix_t *dmat);
1182 
1183 /* The allhits list contains the distances of the node to all other active nodes
1184    This is useful for the "reset" improvement to the visible set
1185    Note that the following routines do not handle the tophits heuristic
1186    and assume that out-distances are up to date.
1187 */
1188 void SetBestHit(int node, NJ_t *NJ, int nActive,
1189 		/*OUT*/besthit_t *bestjoin,
1190 		/*OUT OPTIONAL*/besthit_t *allhits);
1191 void ExhaustiveNJSearch(NJ_t *NJ, int nActive, /*OUT*/besthit_t *bestjoin);
1192 
1193 /* Searches the visible set */
1194 void FastNJSearch(NJ_t *NJ, int nActive, /*UPDATE*/besthit_t *visible, /*OUT*/besthit_t *bestjoin);
1195 
1196 /* Subroutines for handling the tophits heuristic */
1197 
1198 top_hits_t *InitTopHits(NJ_t *NJ, int m);
1199 top_hits_t *FreeTopHits(top_hits_t *tophits); /* returns NULL */
1200 
1201 /* Before we do any joins -- sets tophits and visible
1202    NJ may be modified by setting out-distances
1203  */
1204 void SetAllLeafTopHits(/*IN/UPDATE*/NJ_t *NJ, /*IN/OUT*/top_hits_t *tophits);
1205 
1206 /* Find the best join to do. */
1207 void TopHitNJSearch(/*IN/UPDATE*/NJ_t *NJ,
1208 		    int nActive,
1209 		    /*IN/OUT*/top_hits_t *tophits,
1210 		    /*OUT*/besthit_t *bestjoin);
1211 
1212 /* Returns the best hit within top hits
1213    NJ may be modified because it updates out-distances if they are too stale
1214    Does *not* update visible set
1215 */
1216 void GetBestFromTopHits(int iNode, /*IN/UPDATE*/NJ_t *NJ, int nActive,
1217 			/*IN*/top_hits_t *tophits,
1218 			/*OUT*/besthit_t *bestjoin);
1219 
1220 /* visible set is modifiable so that we can reset it more globally when we do
1221    a "refresh", but we also set the visible set for newnode and do any
1222    "reset" updates too. And, we update many outdistances.
1223  */
1224 void TopHitJoin(int newnode,
1225 		/*IN/UPDATE*/NJ_t *NJ, int nActive,
1226 		/*IN/OUT*/top_hits_t *tophits);
1227 
1228 /* Sort the input besthits by criterion
1229    and save the best nOut hits as a new array in top_hits_lists
1230    Does not update criterion or out-distances
1231    Ignores (silently removes) hit to self
1232    Saved list may be shorter than requested if there are insufficient entries
1233 */
1234 void SortSaveBestHits(int iNode, /*IN/SORT*/besthit_t *besthits,
1235 		      int nIn, int nOut,
1236 		      /*IN/OUT*/top_hits_t *tophits);
1237 
1238 /* Given candidate hits from one node, "transfer" them to another node:
1239    Stores them in a new place in the same order
1240    searches up to active nodes if hits involve non-active nodes
1241    If update flag is set, it also recomputes distance and criterion
1242    (and ensures that out-distances are updated); otherwise
1243    it sets dist to -1e20 and criterion to 1e20
1244 
1245  */
1246 void TransferBestHits(/*IN/UPDATE*/NJ_t *NJ, int nActive,
1247 		      int iNode,
1248 		      /*IN*/besthit_t *oldhits,
1249 		      int nOldHits,
1250 		      /*OUT*/besthit_t *newhits,
1251 		      bool updateDistance);
1252 
1253 /* Create best hit objects from 1 or more hits. Do not update out-distances or set criteria */
1254 void HitsToBestHits(/*IN*/hit_t *hits, int nHits, int iNode, /*OUT*/besthit_t *newhits);
1255 besthit_t HitToBestHit(int i, hit_t hit);
1256 
1257 /* Given a set of besthit entries,
1258    look for improvements to the visible set of the j entries.
1259    Updates out-distances as it goes.
1260    Also replaces stale nodes with this node, because a join is usually
1261    how this happens (i.e. it does not need to walk up to ancestors).
1262    Note this calls UpdateTopVisible() on any change
1263 */
1264 void UpdateVisible(/*IN/UPDATE*/NJ_t *NJ, int nActive,
1265 		   /*IN*/besthit_t *tophitsNode,
1266 		   int nTopHits,
1267 		   /*IN/OUT*/top_hits_t *tophits);
1268 
1269 /* Update the top-visible list to perhaps include this hit (O(sqrt(N)) time) */
1270 void UpdateTopVisible(/*IN*/NJ_t * NJ, int nActive,
1271 		      int iNode, /*IN*/hit_t *hit,
1272 		      /*IN/OUT*/top_hits_t *tophits);
1273 
1274 /* Recompute the top-visible subset of the visible set */
1275 void ResetTopVisible(/*IN/UPDATE*/NJ_t *NJ,
1276 		     int nActive,
1277 		     /*IN/OUT*/top_hits_t *tophits);
1278 
1279 /* Make a shorter list with only unique entries.
1280    Replaces any "dead" hits to nodes that have parents with their active ancestors
1281    and ignores any that become dead.
1282    Updates all criteria.
1283    Combined gets sorted by i & j
1284    The returned list is allocated to nCombined even though only *nUniqueOut entries are filled
1285 */
1286 besthit_t *UniqueBestHits(/*IN/UPDATE*/NJ_t *NJ, int nActive,
1287 			  /*IN/SORT*/besthit_t *combined, int nCombined,
1288 			  /*OUT*/int *nUniqueOut);
1289 
1290 nni_t ChooseNNI(profile_t *profiles[4],
1291 		/*OPTIONAL*/distance_matrix_t *dmat,
1292 		int nPos, int nConstraints,
1293 		/*OUT*/double criteria[3]); /* The three internal branch lengths or log likelihoods*/
1294 
1295 /* length[] is ordered as described by quartet_length_t, but after we do the swap
1296    of B with C (to give AC|BD) or B with D (to get AD|BC), if that is the returned choice
1297    bFast means do not consider NNIs if AB|CD is noticeably better than the star topology
1298    (as implemented by MLQuartetOptimize).
1299    If there are constraints, then the constraint penalty is included in criteria[]
1300 */
1301 nni_t MLQuartetNNI(profile_t *profiles[4],
1302 		   /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
1303 		   int nPos, int nConstraints,
1304 		   /*OUT*/double criteria[3], /* The three potential quartet log-likelihoods */
1305 		   /*IN/OUT*/numeric_t length[5],
1306 		   bool bFast);
1307 
1308 void OptimizeAllBranchLengths(/*IN/OUT*/NJ_t *NJ);
1309 double TreeLogLk(/*IN*/NJ_t *NJ, /*OPTIONAL OUT*/double *site_loglk);
1310 double MLQuartetLogLk(profile_t *pA, profile_t *pB, profile_t *pC, profile_t *pD,
1311 		      int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
1312 		      /*IN*/double branch_lengths[5],
1313 		      /*OPTIONAL OUT*/double *site_likelihoods);
1314 
1315 /* Given a topology and branch lengths, estimate rates & recompute profiles */
1316 void SetMLRates(/*IN/OUT*/NJ_t *NJ, int nRateCategories);
1317 
1318 /* Returns a set of nRateCategories potential rates; the caller must free it */
1319 numeric_t *MLSiteRates(int nRateCategories);
1320 
1321 /* returns site_loglk so that
1322    site_loglk[nPos*iRate + j] is the log likelihood of site j with rate iRate
1323    The caller must free it.
1324 */
1325 double *MLSiteLikelihoodsByRate(/*IN*/NJ_t *NJ, /*IN*/numeric_t *rates, int nRateCategories);
1326 
1327 typedef struct {
1328   double mult;			/* multiplier for the rates / divisor for the tree-length */
1329   double alpha;
1330   int nPos;
1331   int nRateCats;
1332   numeric_t *rates;
1333   double *site_loglk;
1334 } siteratelk_t;
1335 
1336 double GammaLogLk(/*IN*/siteratelk_t *s, /*OPTIONAL OUT*/double *gamma_loglk_sites);
1337 
1338 /* Input site_loglk must be for each rate. Note that FastTree does not reoptimize
1339    the branch lengths under the Gamma model -- it optimizes the overall scale.
1340    Reports the gamma log likelihhod (and logs site likelihoods if fpLog is set),
1341    and reports the rescaling value.
1342 */
1343 double RescaleGammaLogLk(int nPos, int nRateCats,
1344 			/*IN*/numeric_t *rates, /*IN*/double *site_loglk,
1345 			/*OPTIONAL*/FILE *fpLog);
1346 
1347 /* P(value<=x) for the gamma distribution with shape parameter alpha and scale 1/alpha */
1348 double PGamma(double x, double alpha);
1349 
1350 /* Given a topology and branch lengths, optimize GTR rates and quickly reoptimize branch lengths
1351    If gtrfreq is NULL, then empirical frequencies are used
1352 */
1353 void SetMLGtr(/*IN/OUT*/NJ_t *NJ, /*OPTIONAL IN*/double *gtrfreq, /*OPTIONAL WRITE*/FILE *fpLog);
1354 
1355 /* P(A & B | len) = P(B | A, len) * P(A)
1356    If site_likelihoods is present, multiplies those values by the site likelihood at each point
1357    (Note it does not handle underflow)
1358  */
1359 double PairLogLk(/*IN*/profile_t *p1, /*IN*/profile_t *p2, double length,
1360 		 int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
1361 		 /*OPTIONAL IN/OUT*/double *site_likelihoods);
1362 
1363 /* Branch lengths for 4-taxon tree ((A,B),C,D); I means internal */
1364 typedef enum {LEN_A,LEN_B,LEN_C,LEN_D,LEN_I} quartet_length_t;
1365 
1366 typedef struct {
1367   int nPos;
1368   transition_matrix_t *transmat;
1369   rates_t *rates;
1370   int nEval;			/* number of likelihood evaluations */
1371   /* The pair to optimize */
1372   profile_t *pair1;
1373   profile_t *pair2;
1374 } quartet_opt_t;
1375 
1376 double PairNegLogLk(double x, void *data); /* data must be a quartet_opt_t */
1377 
1378 typedef struct {
1379   NJ_t *NJ;
1380   double freq[4];
1381   double rates[6];
1382   int iRate;			/* which rate to set x from */
1383   FILE *fpLog; /* OPTIONAL WRITE */
1384 } gtr_opt_t;
1385 
1386 /* Returns -log_likelihood for the tree with the given rates
1387    data must be a gtr_opt_t and x is used to set rate iRate
1388    Does not recompute profiles -- assumes that the caller will
1389 */
1390 double GTRNegLogLk(double x, void *data);
1391 
1392 /* Returns the resulting log likelihood. Optionally returns whether other
1393    topologies should be abandoned, based on the difference between AB|CD and
1394    the "star topology" (AB|CD with a branch length of MLMinBranchLength) exceeding
1395    closeLogLkLimit.
1396    If bStarTest is passed in, it only optimized the internal branch if
1397    the star test is true. Otherwise, it optimized all 5 branch lengths
1398    in turn.
1399  */
1400 double MLQuartetOptimize(profile_t *pA, profile_t *pB, profile_t *pC, profile_t *pD,
1401 			 int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
1402 			 /*IN/OUT*/double branch_lengths[5],
1403 			 /*OPTIONAL OUT*/bool *pStarTest,
1404 			 /*OPTIONAL OUT*/double *site_likelihoods);
1405 
1406 /* Returns the resulting log likelihood */
1407 double MLPairOptimize(profile_t *pA, profile_t *pB,
1408 		      int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
1409 		      /*IN/OUT*/double *branch_length);
1410 
1411 /* Returns the number of steps considered, with the actual steps in steps[]
1412    Modifies the tree by this chain of NNIs
1413 */
1414 int FindSPRSteps(/*IN/OUT*/NJ_t *NJ,
1415 		 int node,
1416 		 int parent,	/* sibling or parent of node to NNI to start the chain */
1417 		 /*IN/OUT*/profile_t **upProfiles,
1418 		 /*OUT*/spr_step_t *steps,
1419 		 int maxSteps,
1420 		 bool bFirstAC);
1421 
1422 /* Undo a single NNI */
1423 void UnwindSPRStep(/*IN/OUT*/NJ_t *NJ,
1424 	       /*IN*/spr_step_t *step,
1425 	       /*IN/OUT*/profile_t **upProfiles);
1426 
1427 
1428 /* Update the profile of node and its ancestor, and delete nearby out-profiles */
1429 void UpdateForNNI(/*IN/OUT*/NJ_t *NJ, int node, /*IN/OUT*/profile_t **upProfiles, bool useML);
1430 
1431 /* Sets NJ->parent[newchild] and replaces oldchild with newchild
1432    in the list of children of parent
1433 */
1434 void ReplaceChild(/*IN/OUT*/NJ_t *NJ, int parent, int oldchild, int newchild);
1435 
1436 int CompareHitsByCriterion(const void *c1, const void *c2);
1437 int CompareHitsByIJ(const void *c1, const void *c2);
1438 
1439 int NGaps(NJ_t *NJ, int node);	/* only handles leaf sequences */
1440 
1441 /* node is the parent of AB, sibling of C
1442    node cannot be root or a leaf
1443    If node is the child of root, then D is the other sibling of node,
1444    and the 4th profile is D's profile.
1445    Otherwise, D is the parent of node, and we use its upprofile
1446    Call this with profiles=NULL to get the nodes, without fetching or
1447    computing profiles
1448 */
1449 void SetupABCD(NJ_t *NJ, int node,
1450 	       /* the 4 profiles for ABCD; the last one is an upprofile */
1451 	       /*OPTIONAL OUT*/profile_t *profiles[4],
1452 	       /*OPTIONAL IN/OUT*/profile_t **upProfiles,
1453 	       /*OUT*/int nodeABCD[4],
1454 	       bool useML);
1455 
1456 int Sibling(NJ_t *NJ, int node); /* At root, no unique sibling so returns -1 */
1457 void RootSiblings(NJ_t *NJ, int node, /*OUT*/int sibs[2]);
1458 
1459 /* JC probability of nucleotide not changing, for each rate category */
1460 double *PSameVector(double length, rates_t *rates);
1461 
1462 /* JC probability of nucleotide not changing, for each rate category */
1463 double *PDiffVector(double *pSame, rates_t *rates);
1464 
1465 /* expeigen[iRate*nCodes + j] = exp(length * rate iRate * eigenvalue j) */
1466 numeric_t *ExpEigenRates(double length, transition_matrix_t *transmat, rates_t *rates);
1467 
1468 /* Print a progress report if more than 0.1 second has gone by since the progress report */
1469 /* Format should include 0-4 %d references and no newlines */
1470 void ProgressReport(char *format, int iArg1, int iArg2, int iArg3, int iArg4);
1471 void LogTree(char *format, int round, /*OPTIONAL WRITE*/FILE *fp, NJ_t *NJ, char **names, uniquify_t *unique, bool bQuote);
1472 void LogMLRates(/*OPTIONAL WRITE*/FILE *fpLog, NJ_t *NJ);
1473 
1474 void *mymalloc(size_t sz);       /* Prints "Out of memory" and exits on failure */
1475 void *myfree(void *, size_t sz); /* Always returns NULL */
1476 
1477 /* One-dimensional minimization using brent's function, with
1478    a fractional and an absolute tolerance */
1479 double onedimenmin(double xmin, double xguess, double xmax, double (*f)(double,void*), void *data,
1480 		   double ftol, double atol,
1481 		   /*OUT*/double *fx, /*OUT*/double *f2x);
1482 
1483 double brent(double ax, double bx, double cx, double (*f)(double, void *), void *data,
1484 	     double ftol, double atol,
1485 	     double *foptx, double *f2optx, double fax, double fbx, double fcx);
1486 
1487 /* Vector operations, either using SSE3 or not
1488    Code assumes that vectors are a multiple of 4 in size
1489 */
1490 void vector_multiply(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, int n, /*OUT*/numeric_t *fOut);
1491 numeric_t vector_multiply_sum(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, int n);
1492 void vector_add_mult(/*IN/OUT*/numeric_t *f, /*IN*/numeric_t *add, numeric_t weight, int n);
1493 
1494 /* multiply the transpose of a matrix by a vector */
1495 void matrixt_by_vector4(/*IN*/numeric_t mat[4][MAXCODES], /*IN*/numeric_t vec[4], /*OUT*/numeric_t out[4]);
1496 
1497 /* sum(f1*fBy)*sum(f2*fBy) */
1498 numeric_t vector_dot_product_rot(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, /*IN*/numeric_t* fBy, int n);
1499 
1500 /* sum(f1*f2*f3) */
1501 numeric_t vector_multiply3_sum(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, /*IN*/numeric_t* f3, int n);
1502 
1503 numeric_t vector_sum(/*IN*/numeric_t *f1, int n);
1504 void vector_multiply_by(/*IN/OUT*/numeric_t *f, /*IN*/numeric_t fBy, int n);
1505 
1506 double clockDiff(/*IN*/struct timeval *clock_start);
1507 int timeval_subtract (/*OUT*/struct timeval *result, /*IN*/struct timeval *x, /*IN*/struct timeval *y);
1508 
1509 char *OpenMPString(void);
1510 
1511 void ran_start(long seed);
1512 double knuth_rand();		/* Random number between 0 and 1 */
1513 void tred2 (double *a, const int n, const int np, double *d, double *e);
1514 double pythag(double a, double b);
1515 void tqli(double *d, double *e, int n, int np, double *z);
1516 
1517 /* Like mymalloc; duplicates the input (returns NULL if given NULL) */
1518 void *mymemdup(void *data, size_t sz);
1519 void *myrealloc(void *data, size_t szOld, size_t szNew, bool bCopy);
1520 
1521 double pnorm(double z);		/* Probability(value <=z)  */
1522 
1523 /* Hashtable functions */
1524 typedef struct
1525 {
1526   char *string;
1527   int nCount;			/* number of times this entry was seen */
1528   int first;			/* index of first entry with this value */
1529 } hashbucket_t;
1530 
1531 typedef struct {
1532   int nBuckets;
1533   /* hashvalue -> bucket. Or look in bucket + 1, +2, etc., till you hit a NULL string */
1534   hashbucket_t *buckets;
1535 } hashstrings_t;
1536 typedef int hashiterator_t;
1537 
1538 hashstrings_t *MakeHashtable(char **strings, int nStrings);
1539 hashstrings_t *FreeHashtable(hashstrings_t* hash); /*returns NULL*/
1540 hashiterator_t FindMatch(hashstrings_t *hash, char *string);
1541 
1542 /* Return NULL if we have run out of values */
1543 char *GetHashString(hashstrings_t *hash, hashiterator_t hi);
1544 int HashCount(hashstrings_t *hash, hashiterator_t hi);
1545 int HashFirst(hashstrings_t *hash, hashiterator_t hi);
1546 
1547 void PrintNJ(/*WRITE*/FILE *, NJ_t *NJ, char **names, uniquify_t *unique, bool bShowSupport, bool bQuoteNames);
1548 
1549 /* Print topology using node indices as node names */
1550 void PrintNJInternal(/*WRITE*/FILE *, NJ_t *NJ, bool useLen);
1551 
1552 uniquify_t *UniquifyAln(/*IN*/alignment_t *aln);
1553 uniquify_t *FreeUniquify(uniquify_t *);	/* returns NULL */
1554 
1555 /* Convert a constraint alignment to a list of sequences. The returned array is indexed
1556    by iUnique and points to values in the input alignment
1557 */
1558 char **AlnToConstraints(alignment_t *constraints, uniquify_t *unique, hashstrings_t *hashnames);
1559 
1560 /* ReadTree ignores non-unique leaves after the first instance.
1561    At the end, it prunes the tree to ignore empty children and it
1562    unroots the tree if necessary.
1563 */
1564 void ReadTree(/*IN/OUT*/NJ_t *NJ,
1565 	      /*IN*/uniquify_t *unique,
1566 	      /*IN*/hashstrings_t *hashnames,
1567 	      /*READ*/FILE *fpInTree);
1568 char *ReadTreeToken(/*READ*/FILE *fp); /* returns a static array, or NULL on EOF */
1569 void ReadTreeAddChild(int parent, int child, /*IN/OUT*/int *parents, /*IN/OUT*/children_t *children);
1570 /* Do not add the leaf if we already set this unique-set to another parent */
1571 void ReadTreeMaybeAddLeaf(int parent, char *name,
1572 			  hashstrings_t *hashnames, uniquify_t *unique,
1573 			  /*IN/OUT*/int *parents, /*IN/OUT*/children_t *children);
1574 void ReadTreeRemove(/*IN/OUT*/int *parents, /*IN/OUT*/children_t *children, int node);
1575 
1576 /* Routines to support tree traversal and prevent visiting a node >1 time
1577    (esp. if topology changes).
1578 */
1579 typedef bool *traversal_t;
1580 traversal_t InitTraversal(NJ_t*);
1581 void SkipTraversalInto(int node, /*IN/OUT*/traversal_t traversal);
1582 traversal_t FreeTraversal(traversal_t, NJ_t*); /*returns NULL*/
1583 
1584 /* returns new node, or -1 if nothing left to do. Use root for the first call.
1585    Will return every node and then root.
1586    Uses postorder tree traversal (depth-first search going down to leaves first)
1587    Keeps track of which nodes are visited, so even after an NNI that swaps a
1588    visited child with an unvisited uncle, the next call will visit the
1589    was-uncle-now-child. (However, after SPR moves, there is no such guarantee.)
1590 
1591    If pUp is not NULL, then, if going "back up" through a previously visited node
1592    (presumably due to an NNI), then it will return the node another time,
1593    with *pUp = true.
1594 */
1595 int TraversePostorder(int lastnode, NJ_t *NJ, /*IN/OUT*/traversal_t,
1596 		      /*OUT OPTIONAL*/bool *pUp);
1597 
1598 /* Routines to support storing up-profiles during tree traversal
1599    Eventually these should be smart enough to do weighted joins and
1600    to minimize memory usage
1601 */
1602 profile_t **UpProfiles(NJ_t *NJ);
1603 profile_t *GetUpProfile(/*IN/OUT*/profile_t **upProfiles, NJ_t *NJ, int node, bool useML);
1604 profile_t *DeleteUpProfile(/*IN/OUT*/profile_t **upProfiles, NJ_t *NJ, int node); /* returns NULL */
1605 profile_t **FreeUpProfiles(profile_t **upProfiles, NJ_t *NJ); /* returns NULL */
1606 
1607 /* Recomputes the profile for a node, presumably to reflect topology changes
1608    If bionj is set, does a weighted join -- which requires using upProfiles
1609    If useML is set, computes the posterior probability instead of averaging
1610  */
1611 void RecomputeProfile(/*IN/OUT*/NJ_t *NJ, /*IN/OUT*/profile_t **upProfiles, int node, bool useML);
1612 
1613 /* Recompute profiles going up from the leaves, using the provided distance matrix
1614    and unweighted joins
1615 */
1616 void RecomputeProfiles(/*IN/OUT*/NJ_t *NJ, /*OPTIONAL*/distance_matrix_t *dmat);
1617 
1618 void RecomputeMLProfiles(/*IN/OUT*/NJ_t *NJ);
1619 
1620 /* If bionj is set, computes the weight to be given to A when computing the
1621    profile for the ancestor of A and B. C and D are the other profiles in the quartet
1622    If bionj is not set, returns -1 (which means unweighted in AverageProfile).
1623    (A and B are the first two profiles in the array)
1624 */
1625 double QuartetWeight(profile_t *profiles[4], distance_matrix_t *dmat, int nPos);
1626 
1627 /* Returns a list of nodes, starting with node and ending with root */
1628 int *PathToRoot(NJ_t *NJ, int node, /*OUT*/int *depth);
1629 int *FreePath(int *path, NJ_t *NJ); /* returns NULL */
1630 
1631 /* The default amino acid distance matrix, derived from the BLOSUM45 similarity matrix */
1632 distance_matrix_t matrixBLOSUM45;
1633 
1634 /* The default amino acid transition matrix (Jones Taylor Thorton 1992) */
1635 double matrixJTT92[MAXCODES][MAXCODES];
1636 double statJTT92[MAXCODES];
1637 
1638 /* The Le-Gascuel 2008 amino acid transition matrix */
1639 double matrixLG08[MAXCODES][MAXCODES];
1640 double statLG08[MAXCODES];
1641 
1642 /* The WAG amino acid transition matrix (Whelan-And-Goldman 2001) */
1643 double matrixWAG01[MAXCODES][MAXCODES];
1644 double statWAG01[MAXCODES];
1645 
1646 
main(int argc,char ** argv)1647 int main(int argc, char **argv) {
1648   int nAlign = 1; /* number of alignments to read */
1649   int iArg;
1650   char *matrixPrefix = NULL;
1651   distance_matrix_t *distance_matrix = NULL;
1652   bool make_matrix = false;
1653   char *constraintsFile = NULL;
1654   char *intreeFile = NULL;
1655   bool intree1 = false;		/* the same starting tree each round */
1656   int nni = -1;			/* number of rounds of NNI, defaults to 4*log2(n) */
1657   int spr = 2;			/* number of rounds of SPR */
1658   int maxSPRLength = 10;	/* maximum distance to move a node */
1659   int MLnni = -1;		/* number of rounds of ML NNI, defaults to 2*log2(n) */
1660   bool MLlen = false;		/* optimize branch lengths; no topology changes */
1661   int nBootstrap = 1000;		/* If set, number of replicates of local bootstrap to do */
1662   int nRateCats = nDefaultRateCats;
1663   char *logfile = NULL;
1664   bool bUseGtr = false;
1665   bool bUseLg = false;
1666   bool bUseWag = false;
1667   bool bUseGtrRates = false;
1668   double gtrrates[6] = {1,1,1,1,1,1};
1669   bool bUseGtrFreq = false;
1670   double gtrfreq[4] = {0.25,0.25,0.25,0.25};
1671   bool bQuote = false;
1672   FILE *fpOut = stdout;
1673 
1674   if (isatty(STDIN_FILENO) && argc == 1) {
1675     fprintf(stderr,"Usage for FastTree version %s %s%s:\n%s",
1676 	    FT_VERSION, SSE_STRING, OpenMPString(), usage);
1677 #if (defined _WIN32 || defined WIN32 || defined WIN64 || defined _WIN64)
1678     fprintf(stderr, "Windows users: Please remember to run this inside a command shell\n");
1679     fprintf(stderr,"Hit return to continue\n");
1680     fgetc(stdin);
1681 #endif
1682     exit(0);
1683   }
1684   for (iArg = 1; iArg < argc; iArg++) {
1685     if (strcmp(argv[iArg],"-makematrix") == 0) {
1686       make_matrix = true;
1687     } else if (strcmp(argv[iArg],"-logdist") == 0) {
1688       fprintf(stderr, "Warning: logdist is now on by default and obsolete\n");
1689     } else if (strcmp(argv[iArg],"-rawdist") == 0) {
1690       logdist = false;
1691     } else if (strcmp(argv[iArg],"-verbose") == 0 && iArg < argc-1) {
1692       verbose = atoi(argv[++iArg]);
1693     } else if (strcmp(argv[iArg],"-quiet") == 0) {
1694       verbose = 0;
1695       showProgress = 0;
1696     } else if (strcmp(argv[iArg],"-nopr") == 0) {
1697       showProgress = 0;
1698     } else if (strcmp(argv[iArg],"-slow") == 0) {
1699       slow = 1;
1700     } else if (strcmp(argv[iArg],"-fastest") == 0) {
1701       fastest = 1;
1702       tophitsRefresh = 0.5;
1703       useTopHits2nd = true;
1704     } else if (strcmp(argv[iArg],"-2nd") == 0) {
1705       useTopHits2nd = true;
1706     } else if (strcmp(argv[iArg],"-no2nd") == 0) {
1707       useTopHits2nd = false;
1708     } else if (strcmp(argv[iArg],"-slownni") == 0) {
1709       fastNNI = false;
1710     } else if (strcmp(argv[iArg], "-matrix") == 0 && iArg < argc-1) {
1711       iArg++;
1712       matrixPrefix = argv[iArg];
1713     } else if (strcmp(argv[iArg], "-nomatrix") == 0) {
1714       useMatrix = false;
1715     } else if (strcmp(argv[iArg], "-n") == 0 && iArg < argc-1) {
1716       iArg++;
1717       nAlign = atoi(argv[iArg]);
1718       if (nAlign < 1) {
1719 	fprintf(stderr, "-n argument for #input alignments must be > 0 not %s\n", argv[iArg]);
1720 	exit(1);
1721       }
1722     } else if (strcmp(argv[iArg], "-quote") == 0) {
1723       bQuote = true;
1724     } else if (strcmp(argv[iArg], "-nt") == 0) {
1725       nCodes = 4;
1726     } else if (strcmp(argv[iArg], "-intree") == 0 && iArg < argc-1) {
1727       iArg++;
1728       intreeFile = argv[iArg];
1729     } else if (strcmp(argv[iArg], "-intree1") == 0 && iArg < argc-1) {
1730       iArg++;
1731       intreeFile = argv[iArg];
1732       intree1 = true;
1733     } else if (strcmp(argv[iArg], "-nj") == 0) {
1734       bionj = 0;
1735     } else if (strcmp(argv[iArg], "-bionj") == 0) {
1736       bionj = 1;
1737     } else if (strcmp(argv[iArg], "-boot") == 0 && iArg < argc-1) {
1738       iArg++;
1739       nBootstrap = atoi(argv[iArg]);
1740     } else if (strcmp(argv[iArg], "-noboot") == 0 || strcmp(argv[iArg], "-nosupport") == 0) {
1741       nBootstrap = 0;
1742     } else if (strcmp(argv[iArg], "-seed") == 0 && iArg < argc-1) {
1743       iArg++;
1744       long seed = atol(argv[iArg]);
1745       ran_start(seed);
1746     } else if (strcmp(argv[iArg],"-top") == 0) {
1747       if(tophitsMult < 0.01)
1748 	tophitsMult = 1.0;
1749     } else if (strcmp(argv[iArg],"-notop") == 0) {
1750       tophitsMult = 0.0;
1751     } else if (strcmp(argv[iArg], "-topm") == 0 && iArg < argc-1) {
1752       iArg++;
1753       tophitsMult = atof(argv[iArg]);
1754     } else if (strcmp(argv[iArg], "-close") == 0 && iArg < argc-1) {
1755       iArg++;
1756       tophitsClose = atof(argv[iArg]);
1757       if (tophitsMult <= 0) {
1758 	fprintf(stderr, "Cannot use -close unless -top is set above 0\n");
1759 	exit(1);
1760       }
1761       if (tophitsClose <= 0 || tophitsClose >= 1) {
1762 	fprintf(stderr, "-close argument must be between 0 and 1\n");
1763 	exit(1);
1764       }
1765     } else if (strcmp(argv[iArg], "-refresh") == 0 && iArg < argc-1) {
1766       iArg++;
1767       tophitsRefresh = atof(argv[iArg]);
1768       if (tophitsMult <= 0) {
1769 	fprintf(stderr, "Cannot use -refresh unless -top is set above 0\n");
1770 	exit(1);
1771       }
1772       if (tophitsRefresh <= 0 || tophitsRefresh >= 1) {
1773 	fprintf(stderr, "-refresh argument must be between 0 and 1\n");
1774 	exit(1);
1775       }
1776     } else if (strcmp(argv[iArg],"-nni") == 0 && iArg < argc-1) {
1777       iArg++;
1778       nni = atoi(argv[iArg]);
1779       if (nni == 0)
1780 	spr = 0;
1781     } else if (strcmp(argv[iArg],"-spr") == 0 && iArg < argc-1) {
1782       iArg++;
1783       spr = atoi(argv[iArg]);
1784     } else if (strcmp(argv[iArg],"-sprlength") == 0 && iArg < argc-1) {
1785       iArg++;
1786       maxSPRLength = atoi(argv[iArg]);
1787     } else if (strcmp(argv[iArg],"-mlnni") == 0 && iArg < argc-1) {
1788       iArg++;
1789       MLnni = atoi(argv[iArg]);
1790     } else if (strcmp(argv[iArg],"-noml") == 0) {
1791       MLnni = 0;
1792     } else if (strcmp(argv[iArg],"-mllen") == 0) {
1793       MLnni = 0;
1794       MLlen = true;
1795     } else if (strcmp(argv[iArg],"-nome") == 0) {
1796       spr = 0;
1797       nni = 0;
1798     } else if (strcmp(argv[iArg],"-help") == 0) {
1799       fprintf(stderr,"FastTree %s %s%s:\n%s", FT_VERSION, SSE_STRING, OpenMPString(), usage);
1800       exit(0);
1801     } else if (strcmp(argv[iArg],"-expert") == 0) {
1802       fprintf(stderr, "Detailed usage for FastTree %s %s%s:\n%s",
1803 	      FT_VERSION, SSE_STRING, OpenMPString(), expertUsage);
1804       exit(0);
1805     } else if (strcmp(argv[iArg],"-pseudo") == 0) {
1806       if (iArg < argc-1 && isdigit(argv[iArg+1][0])) {
1807 	iArg++;
1808 	pseudoWeight = atof(argv[iArg]);
1809 	if (pseudoWeight < 0.0) {
1810 	  fprintf(stderr,"Illegal argument to -pseudo: %s\n", argv[iArg]);
1811 	  exit(1);
1812 	}
1813       } else {
1814 	pseudoWeight = 1.0;
1815       }
1816     } else if (strcmp(argv[iArg],"-constraints") == 0 && iArg < argc-1) {
1817       iArg++;
1818       constraintsFile = argv[iArg];
1819     } else if (strcmp(argv[iArg],"-constraintWeight") == 0 && iArg < argc-1) {
1820       iArg++;
1821       constraintWeight = atof(argv[iArg]);
1822       if (constraintWeight <= 0.0) {
1823 	fprintf(stderr, "Illegal argument to -constraintWeight (must be greater than zero): %s\n", argv[iArg]);
1824 	exit(1);
1825       }
1826     } else if (strcmp(argv[iArg],"-mlacc") == 0 && iArg < argc-1) {
1827       iArg++;
1828       mlAccuracy = atoi(argv[iArg]);
1829       if (mlAccuracy < 1) {
1830 	fprintf(stderr, "Illlegal -mlacc argument: %s\n", argv[iArg]);
1831 	exit(1);
1832       }
1833     } else if (strcmp(argv[iArg],"-exactml") == 0 || strcmp(argv[iArg],"-mlexact") == 0) {
1834       fprintf(stderr,"-exactml is not required -- exact posteriors is the default now\n");
1835     } else if (strcmp(argv[iArg],"-approxml") == 0 || strcmp(argv[iArg],"-mlapprox") == 0) {
1836       exactML = false;
1837     } else if (strcmp(argv[iArg],"-cat") == 0 && iArg < argc-1) {
1838       iArg++;
1839       nRateCats = atoi(argv[iArg]);
1840       if (nRateCats < 1) {
1841 	fprintf(stderr, "Illlegal argument to -ncat (must be greater than zero): %s\n", argv[iArg]);
1842 	exit(1);
1843       }
1844     } else if (strcmp(argv[iArg],"-nocat") == 0) {
1845       nRateCats = 1;
1846     } else if (strcmp(argv[iArg], "-lg") == 0) {
1847         bUseLg = true;
1848     } else if (strcmp(argv[iArg], "-wag") == 0) {
1849         bUseWag = true;
1850     } else if (strcmp(argv[iArg], "-gtr") == 0) {
1851       bUseGtr = true;
1852     } else if (strcmp(argv[iArg], "-gtrrates") == 0 && iArg < argc-6) {
1853       bUseGtr = true;
1854       bUseGtrRates = true;
1855       int i;
1856       for (i = 0; i < 6; i++) {
1857 	gtrrates[i] = atof(argv[++iArg]);
1858 	if (gtrrates[i] < 1e-5) {
1859 	  fprintf(stderr, "Illegal or too small value of GTR rate: %s\n", argv[iArg]);
1860 	  exit(1);
1861 	}
1862       }
1863     } else if (strcmp(argv[iArg],"-gtrfreq") == 0 && iArg < argc-4) {
1864       bUseGtr = true;
1865       bUseGtrFreq = true;
1866       int i;
1867       double sum = 0;
1868       for (i = 0; i < 4; i++) {
1869 	gtrfreq[i] = atof(argv[++iArg]);
1870 	sum += gtrfreq[i];
1871 	if (gtrfreq[i] < 1e-5) {
1872 	  fprintf(stderr, "Illegal or too small value of GTR frequency: %s\n", argv[iArg]);
1873 	  exit(1);
1874 	}
1875       }
1876       if (fabs(1.0-sum) > 0.01) {
1877 	fprintf(stderr, "-gtrfreq values do not sum to 1\n");
1878 	exit(1);
1879       }
1880       for (i = 0; i < 4; i++)
1881 	gtrfreq[i] /= sum;
1882     } else if (strcmp(argv[iArg],"-log") == 0 && iArg < argc-1) {
1883       iArg++;
1884       logfile = argv[iArg];
1885     } else if (strcmp(argv[iArg],"-gamma") == 0) {
1886       gammaLogLk = true;
1887     } else if (strcmp(argv[iArg],"-out") == 0 && iArg < argc-1) {
1888       iArg++;
1889       fpOut = fopen(argv[iArg],"w");
1890       if(fpOut==NULL) {
1891 	fprintf(stderr,"Cannot write to %s\n",argv[iArg]);
1892 	exit(1);
1893       }
1894     } else if (argv[iArg][0] == '-') {
1895       fprintf(stderr, "Unknown or incorrect use of option %s\n%s", argv[iArg], usage);
1896       exit(1);
1897     } else
1898       break;
1899   }
1900   if(iArg < argc-1) {
1901     fprintf(stderr, "%s", usage);
1902     exit(1);
1903   }
1904 
1905   codesString = nCodes == 20 ? codesStringAA : codesStringNT;
1906   if (nCodes == 4 && matrixPrefix == NULL)
1907     useMatrix = false; 		/* no default nucleotide matrix */
1908 
1909   char *fileName = iArg == (argc-1) ?  argv[argc-1] : NULL;
1910 
1911   if (slow && fastest) {
1912     fprintf(stderr,"Cannot be both slow and fastest\n");
1913     exit(1);
1914   }
1915   if (slow && tophitsMult > 0) {
1916     tophitsMult = 0.0;
1917   }
1918 
1919   FILE *fpLog = NULL;
1920   if (logfile != NULL) {
1921     fpLog = fopen(logfile, "w");
1922     if (fpLog == NULL) {
1923       fprintf(stderr, "Cannot write to: %s\n", logfile);
1924       exit(1);
1925     }
1926     fprintf(fpLog, "Command:");
1927     int i;
1928     for (i=0; i < argc; i++)
1929       fprintf(fpLog, " %s", argv[i]);
1930     fprintf(fpLog,"\n");
1931     fflush(fpLog);
1932   }
1933 
1934     int i;
1935   FILE *fps[2] = {NULL,NULL};
1936   int nFPs = 0;
1937   if (verbose)
1938     fps[nFPs++] = stderr;
1939   if (fpLog != NULL)
1940     fps[nFPs++] = fpLog;
1941 
1942   if (!make_matrix) {		/* Report settings */
1943     char tophitString[100] = "no";
1944     char tophitsCloseStr[100] = "default";
1945     if(tophitsClose > 0) sprintf(tophitsCloseStr,"%.2f",tophitsClose);
1946     if(tophitsMult>0) sprintf(tophitString,"%.2f*sqrtN close=%s refresh=%.2f",
1947 			      tophitsMult, tophitsCloseStr, tophitsRefresh);
1948     char supportString[100] = "none";
1949     if (nBootstrap>0) {
1950       if (MLnni != 0 || MLlen)
1951 	sprintf(supportString, "SH-like %d", nBootstrap);
1952       else
1953 	sprintf(supportString,"Local boot %d",nBootstrap);
1954     }
1955     char nniString[100] = "(no NNI)";
1956     if (nni > 0)
1957       sprintf(nniString, "+NNI (%d rounds)", nni);
1958     if (nni == -1)
1959       strcpy(nniString, "+NNI");
1960     char sprString[100] = "(no SPR)";
1961     if (spr > 0)
1962       sprintf(sprString, "+SPR (%d rounds range %d)", spr, maxSPRLength);
1963     char mlnniString[100] = "(no ML-NNI)";
1964     if(MLnni > 0)
1965       sprintf(mlnniString, "+ML-NNI (%d rounds)", MLnni);
1966     else if (MLnni == -1)
1967       sprintf(mlnniString, "+ML-NNI");
1968     else if (MLlen)
1969       sprintf(mlnniString, "+ML branch lengths");
1970     if ((MLlen || MLnni != 0) && !exactML)
1971       strcat(mlnniString, " approx");
1972     if (MLnni != 0)
1973       sprintf(mlnniString+strlen(mlnniString), " opt-each=%d",mlAccuracy);
1974 
1975     for (i = 0; i < nFPs; i++) {
1976       FILE *fp = fps[i];
1977       fprintf(fp,"FastTree Version %s %s%s\nAlignment: %s",
1978 	      FT_VERSION, SSE_STRING, OpenMPString(), fileName != NULL ? fileName : "standard input");
1979       if (nAlign>1)
1980 	fprintf(fp, " (%d alignments)", nAlign);
1981       fprintf(fp,"\n%s distances: %s Joins: %s Support: %s\n",
1982 	      nCodes == 20 ? "Amino acid" : "Nucleotide",
1983 	      matrixPrefix ? matrixPrefix : (useMatrix? "BLOSUM45"
1984 					     : (nCodes==4 && logdist ? "Jukes-Cantor" : "%different")),
1985 	      bionj ? "weighted" : "balanced" ,
1986 	      supportString);
1987       if (intreeFile == NULL)
1988 	fprintf(fp, "Search: %s%s %s %s %s\nTopHits: %s\n",
1989 		slow?"Exhaustive (slow)" : (fastest ? "Fastest" : "Normal"),
1990 		useTopHits2nd ? "+2nd" : "",
1991 		nniString, sprString, mlnniString,
1992 		tophitString);
1993       else
1994 	fprintf(fp, "Start at tree from %s %s %s\n", intreeFile, nniString, sprString);
1995 
1996       if (MLnni != 0 || MLlen) {
1997 	fprintf(fp, "ML Model: %s,",
1998 		(nCodes == 4) ?
1999 			(bUseGtr ? "Generalized Time-Reversible" : "Jukes-Cantor") :
2000 			(bUseLg ? "Le-Gascuel 2008" : (bUseWag ? "Whelan-And-Goldman" : "Jones-Taylor-Thorton"))
2001 
2002 	);
2003 	if (nRateCats == 1)
2004 	  fprintf(fp, " No rate variation across sites");
2005 	else
2006 	  fprintf(fp, " CAT approximation with %d rate categories", nRateCats);
2007 	fprintf(fp, "\n");
2008 	if (nCodes == 4 && bUseGtrRates)
2009 	  fprintf(fp, "GTR rates(ac ag at cg ct gt) %.4f %.4f %.4f %.4f %.4f %.4f\n",
2010 		  gtrrates[0],gtrrates[1],gtrrates[2],gtrrates[3],gtrrates[4],gtrrates[5]);
2011 	if (nCodes == 4 && bUseGtrFreq)
2012 	  fprintf(fp, "GTR frequencies(A C G T) %.4f %.4f %.4f %.4f\n",
2013 		  gtrfreq[0],gtrfreq[1],gtrfreq[2],gtrfreq[3]);
2014       }
2015       if (constraintsFile != NULL)
2016 	fprintf(fp, "Constraints: %s Weight: %.3f\n", constraintsFile, constraintWeight);
2017       if (pseudoWeight > 0)
2018 	fprintf(fp, "Pseudocount weight for comparing sequences with little overlap: %.3lf\n",pseudoWeight);
2019       fflush(fp);
2020     }
2021   }
2022   if (matrixPrefix != NULL) {
2023     if (!useMatrix) {
2024       fprintf(stderr,"Cannot use both -matrix and -nomatrix arguments!");
2025       exit(1);
2026     }
2027     distance_matrix = ReadDistanceMatrix(matrixPrefix);
2028   } else if (useMatrix) { 	/* use default matrix */
2029     assert(nCodes==20);
2030     distance_matrix = &matrixBLOSUM45;
2031     SetupDistanceMatrix(distance_matrix);
2032   } else {
2033     distance_matrix = NULL;
2034   }
2035 
2036   int iAln;
2037   FILE *fpIn = fileName != NULL ? fopen(fileName, "r") : stdin;
2038   if (fpIn == NULL) {
2039     fprintf(stderr, "Cannot read %s\n", fileName);
2040     exit(1);
2041   }
2042   FILE *fpConstraints = NULL;
2043   if (constraintsFile != NULL) {
2044     fpConstraints = fopen(constraintsFile, "r");
2045     if (fpConstraints == NULL) {
2046       fprintf(stderr, "Cannot read %s\n", constraintsFile);
2047       exit(1);
2048     }
2049   }
2050 
2051   FILE *fpInTree = NULL;
2052   if (intreeFile != NULL) {
2053     fpInTree = fopen(intreeFile,"r");
2054     if (fpInTree == NULL) {
2055       fprintf(stderr, "Cannot read %s\n", intreeFile);
2056       exit(1);
2057     }
2058   }
2059 
2060   for(iAln = 0; iAln < nAlign; iAln++) {
2061     alignment_t *aln = ReadAlignment(fpIn, bQuote);
2062     if (aln->nSeq < 1) {
2063       fprintf(stderr, "No alignment sequences\n");
2064       exit(1);
2065     }
2066     if (fpLog) {
2067       fprintf(fpLog, "Read %d sequences, %d positions\n", aln->nSeq, aln->nPos);
2068       fflush(fpLog);
2069     }
2070 
2071     struct timeval clock_start;
2072     gettimeofday(&clock_start,NULL);
2073     ProgressReport("Read alignment",0,0,0,0);
2074 
2075     /* Check that all names in alignment are unique */
2076     hashstrings_t *hashnames = MakeHashtable(aln->names, aln->nSeq);
2077     int i;
2078     for (i=0; i<aln->nSeq; i++) {
2079       hashiterator_t hi = FindMatch(hashnames,aln->names[i]);
2080       if (HashCount(hashnames,hi) != 1) {
2081 	fprintf(stderr,"Non-unique name '%s' in the alignment\n",aln->names[i]);
2082 	exit(1);
2083       }
2084     }
2085 
2086     /* Make a list of unique sequences -- note some lists are bigger than required */
2087     ProgressReport("Hashed the names",0,0,0,0);
2088     if (make_matrix) {
2089       NJ_t *NJ = InitNJ(aln->seqs, aln->nSeq, aln->nPos,
2090 			/*constraintSeqs*/NULL, /*nConstraints*/0,
2091 			distance_matrix, /*transmat*/NULL);
2092       printf("   %d\n",aln->nSeq);
2093       int i,j;
2094       for(i = 0; i < NJ->nSeq; i++) {
2095 	printf("%s",aln->names[i]);
2096 	for (j = 0; j < NJ->nSeq; j++) {
2097 	  besthit_t hit;
2098 	  SeqDist(NJ->profiles[i]->codes,NJ->profiles[j]->codes,NJ->nPos,NJ->distance_matrix,/*OUT*/&hit);
2099 	  if (logdist)
2100 	    hit.dist = LogCorrect(hit.dist);
2101 	  /* Make sure -0 prints as 0 */
2102 	  printf(" %f", hit.dist <= 0.0 ? 0.0 : hit.dist);
2103 	}
2104 	printf("\n");
2105       }
2106     } else {
2107       /* reset counters*/
2108       profileOps = 0;
2109       outprofileOps = 0;
2110       seqOps = 0;
2111       profileAvgOps = 0;
2112       nHillBetter = 0;
2113       nCloseUsed = 0;
2114       nClose2Used = 0;
2115       nRefreshTopHits = 0;
2116       nVisibleUpdate = 0;
2117       nNNI = 0;
2118       nML_NNI = 0;
2119       nProfileFreqAlloc = 0;
2120       nProfileFreqAvoid = 0;
2121       szAllAlloc = 0;
2122       mymallocUsed = 0;
2123       maxmallocHeap = 0;
2124       nLkCompute = 0;
2125       nPosteriorCompute = 0;
2126       nAAPosteriorExact = 0;
2127       nAAPosteriorRough = 0;
2128       nStarTests = 0;
2129 
2130       uniquify_t *unique = UniquifyAln(aln);
2131       ProgressReport("Identified unique sequences",0,0,0,0);
2132 
2133       /* read constraints */
2134       alignment_t *constraints = NULL;
2135       char **uniqConstraints = NULL;
2136       if (constraintsFile != NULL) {
2137 	constraints = ReadAlignment(fpConstraints, bQuote);
2138 	if (constraints->nSeq < 4) {
2139 	  fprintf(stderr, "Warning: constraints file with less than 4 sequences ignored:\nalignment #%d in %s\n",
2140 		  iAln+1, constraintsFile);
2141 	  constraints = FreeAlignment(constraints);
2142 	} else {
2143 	  uniqConstraints = AlnToConstraints(constraints, unique, hashnames);
2144 	  ProgressReport("Read the constraints",0,0,0,0);
2145 	}
2146       }	/* end load constraints */
2147 
2148       transition_matrix_t *transmat = NULL;
2149       if (nCodes == 20) {
2150 			transmat = bUseLg? CreateTransitionMatrix(matrixLG08,statLG08) :
2151                           (bUseWag? CreateTransitionMatrix(matrixWAG01,statWAG01) :
2152                            CreateTransitionMatrix(matrixJTT92,statJTT92));
2153       } else if (nCodes == 4 && bUseGtr && (bUseGtrRates || bUseGtrFreq)) {
2154 	transmat = CreateGTR(gtrrates,gtrfreq);
2155       }
2156       NJ_t *NJ = InitNJ(unique->uniqueSeq, unique->nUnique, aln->nPos,
2157 			uniqConstraints,
2158 			uniqConstraints != NULL ? constraints->nPos : 0, /* nConstraints */
2159 			distance_matrix,
2160 			transmat);
2161       if (verbose>2) fprintf(stderr, "read %s seqs %d (%d unique) positions %d nameLast %s seqLast %s\n",
2162 			     fileName ? fileName : "standard input",
2163 			     aln->nSeq, unique->nUnique, aln->nPos, aln->names[aln->nSeq-1], aln->seqs[aln->nSeq-1]);
2164       FreeAlignmentSeqs(/*IN/OUT*/aln); /*no longer needed*/
2165       if (fpInTree != NULL) {
2166 	if (intree1)
2167 	  fseek(fpInTree, 0L, SEEK_SET);
2168 	ReadTree(/*IN/OUT*/NJ, /*IN*/unique, /*IN*/hashnames, /*READ*/fpInTree);
2169 	if (verbose > 2)
2170 	  fprintf(stderr, "Read tree from %s\n", intreeFile);
2171 	if (verbose > 2)
2172 	  PrintNJ(stderr, NJ, aln->names, unique, /*support*/false, bQuote);
2173       } else {
2174 	FastNJ(NJ);
2175       }
2176       LogTree("NJ", 0, fpLog, NJ, aln->names, unique, bQuote);
2177 
2178       /* profile-frequencies for the "up-profiles" in ReliabilityNJ take only diameter(Tree)*L*a
2179 	 space not N*L*a space, because we can free them as we go.
2180 	 And up-profile by their nature tend to be complicated.
2181 	 So save the profile-frequency memory allocation counters now to exclude later results.
2182       */
2183 #ifdef TRACK_MEMORY
2184       long svProfileFreqAlloc = nProfileFreqAlloc;
2185       long svProfileFreqAvoid = nProfileFreqAvoid;
2186 #endif
2187       int nniToDo = nni == -1 ? (int)(0.5 + 4.0 * log(NJ->nSeq)/log(2)) : nni;
2188       int sprRemaining = spr;
2189       int MLnniToDo = (MLnni != -1) ? MLnni : (int)(0.5 + 2.0*log(NJ->nSeq)/log(2));
2190       if(verbose>0) {
2191 	if (fpInTree == NULL)
2192 	  fprintf(stderr, "Initial topology in %.2f seconds\n", clockDiff(&clock_start));
2193 	if (spr > 0 || nniToDo > 0 || MLnniToDo > 0)
2194 	  fprintf(stderr,"Refining topology: %d rounds ME-NNIs, %d rounds ME-SPRs, %d rounds ML-NNIs\n", nniToDo, spr, MLnniToDo);
2195       }
2196 
2197       if (nniToDo>0) {
2198 	int i;
2199 	bool bConverged = false;
2200 	nni_stats_t *nni_stats = InitNNIStats(NJ);
2201 	for (i=0; i < nniToDo; i++) {
2202 	  double maxDelta;
2203 	  if (!bConverged) {
2204 	    int nChange = NNI(/*IN/OUT*/NJ, i, nniToDo, /*use ml*/false, /*IN/OUT*/nni_stats, /*OUT*/&maxDelta);
2205 	    LogTree("ME_NNI%d",i+1, fpLog, NJ, aln->names, unique, bQuote);
2206 	    if (nChange == 0) {
2207 	      bConverged = true;
2208 	      if (verbose>1)
2209 		fprintf(stderr, "Min_evolution NNIs converged at round %d -- skipping some rounds\n", i+1);
2210 	      if (fpLog)
2211 		fprintf(fpLog, "Min_evolution NNIs converged at round %d -- skipping some rounds\n", i+1);
2212 	    }
2213 	  }
2214 
2215 	  /* Interleave SPRs with NNIs (typically 1/3rd NNI, SPR, 1/3rd NNI, SPR, 1/3rd NNI */
2216 	  if (sprRemaining > 0 && (nniToDo/(spr+1) > 0 && ((i+1) % (nniToDo/(spr+1))) == 0)) {
2217 	    SPR(/*IN/OUT*/NJ, maxSPRLength, spr-sprRemaining, spr);
2218 	    LogTree("ME_SPR%d",spr-sprRemaining+1, fpLog, NJ, aln->names, unique, bQuote);
2219 	    sprRemaining--;
2220 	    /* Restart the NNIs -- set all ages to 0, etc. */
2221 	    bConverged = false;
2222 	    nni_stats = FreeNNIStats(nni_stats, NJ);
2223 	    nni_stats = InitNNIStats(NJ);
2224 	  }
2225 	}
2226 	nni_stats = FreeNNIStats(nni_stats, NJ);
2227       }
2228       while(sprRemaining > 0) {	/* do any remaining SPR rounds */
2229 	SPR(/*IN/OUT*/NJ, maxSPRLength, spr-sprRemaining, spr);
2230 	LogTree("ME_SPR%d",spr-sprRemaining+1, fpLog, NJ, aln->names, unique, bQuote);
2231 	sprRemaining--;
2232       }
2233 
2234       /* In minimum-evolution mode, update branch lengths, even if no NNIs or SPRs,
2235 	 so that they are log-corrected, do not include penalties from constraints,
2236 	 and avoid errors due to approximation of out-distances.
2237 	 If doing maximum-likelihood NNIs, then we'll also use these
2238 	 to get estimates of starting distances for quartets, etc.
2239 	*/
2240       UpdateBranchLengths(/*IN/OUT*/NJ);
2241       LogTree("ME_Lengths",0, fpLog, NJ, aln->names, unique, bQuote);
2242 
2243       double total_len = 0;
2244       int iNode;
2245       for (iNode = 0; iNode < NJ->maxnode; iNode++)
2246 	total_len += fabs(NJ->branchlength[iNode]);
2247 
2248       if (verbose>0) {
2249 	fprintf(stderr, "Total branch-length %.3f after %.2f sec\n",
2250 		total_len, clockDiff(&clock_start));
2251 	fflush(stderr);
2252       }
2253       if (fpLog) {
2254 	fprintf(fpLog, "Total branch-length %.3f after %.2f sec\n",
2255 		total_len, clockDiff(&clock_start));
2256 	fflush(stderr);
2257       }
2258 
2259 #ifdef TRACK_MEMORY
2260   if (verbose>1) {
2261     struct mallinfo mi = mallinfo();
2262     fprintf(stderr, "Memory @ end of ME phase: %.2f MB (%.1f byte/pos) useful %.2f expected %.2f\n",
2263 	    (mi.arena+mi.hblkhd)/1.0e6, (mi.arena+mi.hblkhd)/(double)(NJ->nSeq*(double)NJ->nPos),
2264 	    mi.uordblks/1.0e6, mymallocUsed/1e6);
2265   }
2266 #endif
2267 
2268       SplitCount_t splitcount = {0,0,0,0,0.0,0.0};
2269 
2270       if (MLnniToDo > 0 || MLlen) {
2271 	bool warn_len = total_len/NJ->maxnode < 0.001 && MLMinBranchLengthTolerance > 1.0/aln->nPos;
2272 	bool warn = warn_len || (total_len/NJ->maxnode < 0.001 && aln->nPos >= 10000);
2273 	if (warn)
2274 	  fprintf(stderr, "\nWARNING! This alignment consists of closely-related and very-long sequences.\n");
2275 	if (warn_len)
2276 	  fprintf(stderr,
2277 		  "This version of FastTree may not report reasonable branch lengths!\n"
2278 #ifdef USE_DOUBLE
2279 		  "Consider changing MLMinBranchLengthTolerance.\n"
2280 #else
2281 		  "Consider recompiling FastTree with -DUSE_DOUBLE.\n"
2282 #endif
2283 		  "For more information, visit\n"
2284 		  "http://www.microbesonline.org/fasttree/#BranchLen\n\n");
2285 	if (warn)
2286 	  fprintf(stderr, "WARNING! FastTree (or other standard maximum-likelihood tools)\n"
2287 		  "may not be appropriate for aligments of very closely-related sequences\n"
2288 		  "like this one, as FastTree does not account for recombination or gene conversion\n\n");
2289 
2290 	/* Do maximum-likelihood computations */
2291 	/* Convert profiles to use the transition matrix */
2292 	distance_matrix_t *tmatAsDist = TransMatToDistanceMat(/*OPTIONAL*/NJ->transmat);
2293 	RecomputeProfiles(NJ, /*OPTIONAL*/tmatAsDist);
2294 	tmatAsDist = myfree(tmatAsDist, sizeof(distance_matrix_t));
2295 	double lastloglk = -1e20;
2296 	nni_stats_t *nni_stats = InitNNIStats(NJ);
2297 	bool resetGtr = nCodes == 4 && bUseGtr && !bUseGtrRates;
2298 
2299 	if (MLlen) {
2300 	  int iRound;
2301 	  int maxRound = (int)(0.5 + log(NJ->nSeq)/log(2));
2302 	  double dLastLogLk = -1e20;
2303 	  for (iRound = 1; iRound <= maxRound; iRound++) {
2304 	    int node;
2305 	    numeric_t *oldlength = (numeric_t*)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2306 	    for (node = 0; node < NJ->maxnode; node++)
2307 	      oldlength[node] = NJ->branchlength[node];
2308 	    OptimizeAllBranchLengths(/*IN/OUT*/NJ);
2309 	    LogTree("ML_Lengths",iRound, fpLog, NJ, aln->names, unique, bQuote);
2310 	    double dMaxChange = 0; /* biggest change in branch length */
2311 	    for (node = 0; node < NJ->maxnode; node++) {
2312 	      double d = fabs(oldlength[node] - NJ->branchlength[node]);
2313 	      if (dMaxChange < d)
2314 		dMaxChange = d;
2315 	    }
2316 	    oldlength = myfree(oldlength, sizeof(numeric_t)*NJ->maxnodes);
2317 	    double loglk = TreeLogLk(NJ, /*site_likelihoods*/NULL);
2318 	    bool bConverged = iRound > 1 && (dMaxChange < 0.001 || loglk < (dLastLogLk+treeLogLkDelta));
2319 	    if (verbose)
2320 	      fprintf(stderr, "%d rounds ML lengths: LogLk %s= %.3lf Max-change %.4lf%s Time %.2f\n",
2321 		      iRound,
2322 		      exactML || nCodes != 20 ? "" : "~",
2323 		      loglk,
2324 		      dMaxChange,
2325 		      bConverged ? " (converged)" : "",
2326 		      clockDiff(&clock_start));
2327 	    if (fpLog)
2328 	      fprintf(fpLog, "TreeLogLk\tLength%d\t%.4lf\tMaxChange\t%.4lf\n",
2329 		      iRound, loglk, dMaxChange);
2330 	    if (iRound == 1) {
2331 	      if (resetGtr)
2332 		SetMLGtr(/*IN/OUT*/NJ, bUseGtrFreq ? gtrfreq : NULL, fpLog);
2333 	      SetMLRates(/*IN/OUT*/NJ, nRateCats);
2334 	      LogMLRates(fpLog, NJ);
2335 	    }
2336 	    if (bConverged)
2337 	      break;
2338 	  }
2339 	}
2340 
2341 	if (MLnniToDo > 0) {
2342 	  /* This may help us converge faster, and is fast */
2343 	  OptimizeAllBranchLengths(/*IN/OUT*/NJ);
2344 	  LogTree("ML_Lengths%d",1, fpLog, NJ, aln->names, unique, bQuote);
2345 	}
2346 
2347 	int iMLnni;
2348 	double maxDelta;
2349 	bool bConverged = false;
2350 	for (iMLnni = 0; iMLnni < MLnniToDo; iMLnni++) {
2351 	  int changes = NNI(/*IN/OUT*/NJ, iMLnni, MLnniToDo, /*use ml*/true, /*IN/OUT*/nni_stats, /*OUT*/&maxDelta);
2352 	  LogTree("ML_NNI%d",iMLnni+1, fpLog, NJ, aln->names, unique, bQuote);
2353 	  double loglk = TreeLogLk(NJ, /*site_likelihoods*/NULL);
2354 	  bool bConvergedHere = (iMLnni > 0) && ((loglk < lastloglk + treeLogLkDelta) || maxDelta < treeLogLkDelta);
2355 	  if (verbose)
2356 	    fprintf(stderr, "ML-NNI round %d: LogLk %s= %.3f NNIs %d max delta %.2f Time %.2f%s\n",
2357 		    iMLnni+1,
2358 		    exactML || nCodes != 20 ? "" : "~",
2359 		    loglk, changes, maxDelta,  clockDiff(&clock_start),
2360 		    bConverged ? " (final)" : "");
2361 	  if (fpLog)
2362 	    fprintf(fpLog, "TreeLogLk\tML_NNI%d\t%.4lf\tMaxChange\t%.4lf\n", iMLnni+1, loglk, maxDelta);
2363 	  if (bConverged)
2364 	    break;		/* we did our extra round */
2365 	  if (bConvergedHere)
2366 	    bConverged = true;
2367 	  if (bConverged || iMLnni == MLnniToDo-2) {
2368 	    /* last round uses high-accuracy seettings -- reset NNI stats to tone down heuristics */
2369 	    nni_stats = FreeNNIStats(nni_stats, NJ);
2370 	    nni_stats = InitNNIStats(NJ);
2371 	    if (verbose)
2372 	      fprintf(stderr, "Turning off heuristics for final round of ML NNIs%s\n",
2373 		      bConvergedHere? " (converged)" : "");
2374 	    if (fpLog)
2375 	      fprintf(fpLog, "Turning off heuristics for final round of ML NNIs%s\n",
2376 		      bConvergedHere? " (converged)" : "");
2377 	  }
2378 	  lastloglk = loglk;
2379 	  if (iMLnni == 0 && NJ->rates.nRateCategories == 1) {
2380 	    if (resetGtr)
2381 	      SetMLGtr(/*IN/OUT*/NJ, bUseGtrFreq ? gtrfreq : NULL, fpLog);
2382 	    SetMLRates(/*IN/OUT*/NJ, nRateCats);
2383 	    LogMLRates(fpLog, NJ);
2384 	  }
2385 	}
2386 	nni_stats = FreeNNIStats(nni_stats, NJ);
2387 
2388 	/* This does not take long and improves the results */
2389 	if (MLnniToDo > 0) {
2390 	  OptimizeAllBranchLengths(/*IN/OUT*/NJ);
2391 	  LogTree("ML_Lengths%d",2, fpLog, NJ, aln->names, unique, bQuote);
2392 	  if (verbose || fpLog) {
2393 	    double loglk = TreeLogLk(NJ, /*site_likelihoods*/NULL);
2394 	    if (verbose)
2395 	      fprintf(stderr, "Optimize all lengths: LogLk %s= %.3f Time %.2f\n",
2396 		      exactML || nCodes != 20 ? "" : "~",
2397 		      loglk,
2398 		      clockDiff(&clock_start));
2399 	    if (fpLog) {
2400 	      fprintf(fpLog, "TreeLogLk\tML_Lengths%d\t%.4f\n", 2, loglk);
2401 	      fflush(fpLog);
2402 	    }
2403 	  }
2404 	}
2405 
2406 	/* Count bad splits and compute SH-like supports if desired */
2407 	if ((MLnniToDo > 0 && !fastest) || nBootstrap > 0)
2408 	  TestSplitsML(NJ, /*OUT*/&splitcount, nBootstrap);
2409 
2410 	/* Compute gamma-based likelihood? */
2411 	if (gammaLogLk && nRateCats > 1) {
2412 	  numeric_t *rates = MLSiteRates(nRateCats);
2413 	  double *site_loglk = MLSiteLikelihoodsByRate(NJ, rates, nRateCats);
2414 	  double scale = RescaleGammaLogLk(NJ->nPos, nRateCats, rates, /*IN*/site_loglk, /*OPTIONAL*/fpLog);
2415 	  rates = myfree(rates, sizeof(numeric_t) * nRateCats);
2416 	  site_loglk = myfree(site_loglk, sizeof(double) * nRateCats * NJ->nPos);
2417 
2418 	  for (i = 0; i < NJ->maxnodes; i++)
2419 	    NJ->branchlength[i] *= scale;
2420 	}
2421       } else {
2422 	/* Minimum evolution supports */
2423 	TestSplitsMinEvo(NJ, /*OUT*/&splitcount);
2424 	if (nBootstrap > 0)
2425 	  ReliabilityNJ(NJ, nBootstrap);
2426       }
2427 
2428       for (i = 0; i < nFPs; i++) {
2429 	FILE *fp = fps[i];
2430 	fprintf(fp, "Total time: %.2f seconds Unique: %d/%d Bad splits: %d/%d",
2431 		clockDiff(&clock_start),
2432 		NJ->nSeq, aln->nSeq,
2433 		splitcount.nBadSplits, splitcount.nSplits);
2434 	if (splitcount.dWorstDeltaUnconstrained >  0)
2435 	  fprintf(fp, " Worst %sdelta-%s %.3f",
2436 		  uniqConstraints != NULL ? "unconstrained " : "",
2437 		  (MLnniToDo > 0 || MLlen) ? "LogLk" : "Len",
2438 		  splitcount.dWorstDeltaUnconstrained);
2439 	fprintf(fp,"\n");
2440 	if (NJ->nSeq > 3 && NJ->nConstraints > 0) {
2441 	    fprintf(fp, "Violating constraints: %d both bad: %d",
2442 		    splitcount.nConstraintViolations, splitcount.nBadBoth);
2443 	    if (splitcount.dWorstDeltaConstrained >  0)
2444 	      fprintf(fp, " Worst delta-%s due to constraints: %.3f",
2445 		      (MLnniToDo > 0 || MLlen) ? "LogLk" : "Len",
2446 		      splitcount.dWorstDeltaConstrained);
2447 	    fprintf(fp,"\n");
2448 	}
2449 	if (verbose > 1 || fp == fpLog) {
2450 	  double dN2 = NJ->nSeq*(double)NJ->nSeq;
2451 	  fprintf(fp, "Dist/N**2: by-profile %.3f (out %.3f) by-leaf %.3f avg-prof %.3f\n",
2452 		  profileOps/dN2, outprofileOps/dN2, seqOps/dN2, profileAvgOps/dN2);
2453 	  if (nCloseUsed>0 || nClose2Used > 0 || nRefreshTopHits>0)
2454 	    fprintf(fp, "Top hits: close neighbors %ld/%d 2nd-level %ld refreshes %ld",
2455 		    nCloseUsed, NJ->nSeq, nClose2Used, nRefreshTopHits);
2456 	  if(!slow) fprintf(fp, " Hill-climb: %ld Update-best: %ld\n", nHillBetter, nVisibleUpdate);
2457 	  if (nniToDo > 0 || spr > 0 || MLnniToDo > 0)
2458 	    fprintf(fp, "NNI: %ld SPR: %ld ML-NNI: %ld\n", nNNI, nSPR, nML_NNI);
2459 	  if (MLnniToDo > 0) {
2460 	    fprintf(fp, "Max-lk operations: lk %ld posterior %ld", nLkCompute, nPosteriorCompute);
2461 	    if (nAAPosteriorExact > 0 || nAAPosteriorRough > 0)
2462 	      fprintf(fp, " approximate-posteriors %.2f%%",
2463 		      (100.0*nAAPosteriorRough)/(double)(nAAPosteriorExact+nAAPosteriorRough));
2464 	    if (mlAccuracy < 2)
2465 	      fprintf(fp, " star-only %ld", nStarTests);
2466 	    fprintf(fp, "\n");
2467 	  }
2468 	}
2469 #ifdef TRACK_MEMORY
2470 	fprintf(fp, "Memory: %.2f MB (%.1f byte/pos) ",
2471 		maxmallocHeap/1.0e6, maxmallocHeap/(double)(aln->nSeq*(double)aln->nPos));
2472 	/* Only report numbers from before we do reliability estimates */
2473 	fprintf(fp, "profile-freq-alloc %ld avoided %.2f%%\n",
2474 		svProfileFreqAlloc,
2475 		svProfileFreqAvoid > 0 ?
2476 		100.0*svProfileFreqAvoid/(double)(svProfileFreqAlloc+svProfileFreqAvoid)
2477 		: 0);
2478 #endif
2479 	fflush(fp);
2480       }
2481       PrintNJ(fpOut, NJ, aln->names, unique, /*support*/nBootstrap > 0, bQuote);
2482       fflush(fpOut);
2483       if (fpLog) {
2484 	fprintf(fpLog,"TreeCompleted\n");
2485 	fflush(fpLog);
2486       }
2487       FreeNJ(NJ);
2488       if (uniqConstraints != NULL)
2489 	uniqConstraints = myfree(uniqConstraints, sizeof(char*) * unique->nUnique);
2490       constraints = FreeAlignment(constraints);
2491       unique = FreeUniquify(unique);
2492     } /* end build tree */
2493     hashnames = FreeHashtable(hashnames);
2494     aln = FreeAlignment(aln);
2495   } /* end loop over alignments */
2496   if (fpLog != NULL)
2497     fclose(fpLog);
2498   if (fpOut != stdout) fclose(fpOut);
2499   exit(0);
2500 }
2501 
ProgressReport(char * format,int i1,int i2,int i3,int i4)2502 void ProgressReport(char *format, int i1, int i2, int i3, int i4) {
2503   static bool time_set = false;
2504   static struct timeval time_last;
2505   static struct timeval time_begin;
2506 
2507   if (!showProgress)
2508     return;
2509 
2510   static struct timeval time_now;
2511   gettimeofday(&time_now,NULL);
2512   if (!time_set) {
2513     time_begin = time_last = time_now;
2514     time_set = true;
2515   }
2516   static struct timeval elapsed;
2517   timeval_subtract(&elapsed,&time_now,&time_last);
2518 
2519   if (elapsed.tv_sec > 1 || elapsed.tv_usec > 100*1000 || verbose > 1) {
2520     timeval_subtract(&elapsed,&time_now,&time_begin);
2521     fprintf(stderr, "%7i.%2.2i seconds: ", (int)elapsed.tv_sec, (int)(elapsed.tv_usec/10000));
2522     fprintf(stderr, format, i1, i2, i3, i4);
2523     if (verbose > 1 || !isatty(STDERR_FILENO)) {
2524       fprintf(stderr, "\n");
2525     } else {
2526       fprintf(stderr, "   \r");
2527     }
2528     fflush(stderr);
2529     time_last = time_now;
2530   }
2531 }
2532 
LogMLRates(FILE * fpLog,NJ_t * NJ)2533 void LogMLRates(/*OPTIONAL WRITE*/FILE *fpLog, NJ_t *NJ) {
2534   if (fpLog != NULL) {
2535     rates_t *rates = &NJ->rates;
2536     fprintf(fpLog, "NCategories\t%d\nRates",rates->nRateCategories);
2537     assert(rates->nRateCategories > 0);
2538     int iRate;
2539     for (iRate = 0; iRate < rates->nRateCategories; iRate++)
2540       fprintf(fpLog, " %f", rates->rates[iRate]);
2541     fprintf(fpLog,"\nSiteCategories");
2542     int iPos;
2543     for (iPos = 0; iPos < NJ->nPos; iPos++) {
2544       iRate = rates->ratecat[iPos];
2545       fprintf(fpLog," %d",iRate+1);
2546     }
2547     fprintf(fpLog,"\n");
2548     fflush(fpLog);
2549   }
2550 }
2551 
LogTree(char * format,int i,FILE * fpLog,NJ_t * NJ,char ** names,uniquify_t * unique,bool bQuote)2552 void LogTree(char *format, int i, /*OPTIONAL WRITE*/FILE *fpLog, NJ_t *NJ, char **names, uniquify_t *unique, bool bQuote) {
2553   if(fpLog != NULL) {
2554     fprintf(fpLog, format, i);
2555     fprintf(fpLog, "\t");
2556     PrintNJ(fpLog, NJ, names, unique, /*support*/false, bQuote);
2557     fflush(fpLog);
2558   }
2559 }
2560 
InitNJ(char ** sequences,int nSeq,int nPos,char ** constraintSeqs,int nConstraints,distance_matrix_t * distance_matrix,transition_matrix_t * transmat)2561 NJ_t *InitNJ(char **sequences, int nSeq, int nPos,
2562 	     /*OPTIONAL*/char **constraintSeqs, int nConstraints,
2563 	     /*OPTIONAL*/distance_matrix_t *distance_matrix,
2564 	     /*OPTIONAL*/transition_matrix_t *transmat) {
2565   int iNode;
2566 
2567   NJ_t *NJ = (NJ_t*)mymalloc(sizeof(NJ_t));
2568   NJ->root = -1; 		/* set at end of FastNJ() */
2569   NJ->maxnode = NJ->nSeq = nSeq;
2570   NJ->nPos = nPos;
2571   NJ->maxnodes = 2*nSeq;
2572   NJ->seqs = sequences;
2573   NJ->distance_matrix = distance_matrix;
2574   NJ->transmat = transmat;
2575   NJ->nConstraints = nConstraints;
2576   NJ->constraintSeqs = constraintSeqs;
2577 
2578   NJ->profiles = (profile_t **)mymalloc(sizeof(profile_t*) * NJ->maxnodes);
2579 
2580   unsigned long counts[256];
2581   int i;
2582   for (i = 0; i < 256; i++)
2583     counts[i] = 0;
2584   for (iNode = 0; iNode < NJ->nSeq; iNode++) {
2585     NJ->profiles[iNode] = SeqToProfile(NJ, NJ->seqs[iNode], nPos,
2586 				       constraintSeqs != NULL ? constraintSeqs[iNode] : NULL,
2587 				       nConstraints,
2588 				       iNode,
2589 				       /*IN/OUT*/counts);
2590   }
2591   unsigned long totCount = 0;
2592   for (i = 0; i < 256; i++)
2593     totCount += counts[i];
2594 
2595   /* warnings about unknown characters */
2596   for (i = 0; i < 256; i++) {
2597     if (counts[i] == 0 || i == '.' || i == '-')
2598       continue;
2599     unsigned char *codesP;
2600     bool bMatched = false;
2601     for (codesP = codesString; *codesP != '\0'; codesP++) {
2602       if (*codesP == i || tolower(*codesP) == i) {
2603 	bMatched = true;
2604 	break;
2605       }
2606     }
2607     if (!bMatched)
2608       fprintf(stderr, "Ignored unknown character %c (seen %lu times)\n", i, counts[i]);
2609   }
2610 
2611 
2612   /* warnings about the counts */
2613   double fACGTUN = (counts['A'] + counts['C'] + counts['G'] + counts['T'] + counts['U'] + counts['N']
2614 		    + counts['a'] + counts['c'] + counts['g'] + counts['t'] + counts['u'] + counts['n'])
2615     / (double)(totCount - counts['-'] - counts['.']);
2616   if (nCodes == 4 && fACGTUN < 0.9)
2617     fprintf(stderr, "WARNING! ONLY %.1f%% NUCLEOTIDE CHARACTERS -- IS THIS REALLY A NUCLEOTIDE ALIGNMENT?\n",
2618 	    100.0 * fACGTUN);
2619   else if (nCodes == 20 && fACGTUN >= 0.9)
2620     fprintf(stderr, "WARNING! %.1f%% NUCLEOTIDE CHARACTERS -- IS THIS REALLY A PROTEIN ALIGNMENT?\n",
2621 	    100.0 * fACGTUN);
2622 
2623   if(verbose>10) fprintf(stderr,"Made sequence profiles\n");
2624   for (iNode = NJ->nSeq; iNode < NJ->maxnodes; iNode++)
2625     NJ->profiles[iNode] = NULL; /* not yet exists */
2626 
2627   NJ->outprofile = OutProfile(NJ->profiles, NJ->nSeq,
2628 			      NJ->nPos, NJ->nConstraints,
2629 			      NJ->distance_matrix);
2630   if(verbose>10) fprintf(stderr,"Made out-profile\n");
2631 
2632   NJ->totdiam = 0.0;
2633 
2634   NJ->diameter = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2635   for (iNode = 0; iNode < NJ->maxnodes; iNode++) NJ->diameter[iNode] = 0;
2636 
2637   NJ->varDiameter = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2638   for (iNode = 0; iNode < NJ->maxnodes; iNode++) NJ->varDiameter[iNode] = 0;
2639 
2640   NJ->selfdist = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2641   for (iNode = 0; iNode < NJ->maxnodes; iNode++) NJ->selfdist[iNode] = 0;
2642 
2643   NJ->selfweight = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2644   for (iNode = 0; iNode < NJ->nSeq; iNode++)
2645     NJ->selfweight[iNode] = NJ->nPos - NGaps(NJ,iNode);
2646 
2647   NJ->outDistances = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2648   NJ->nOutDistActive = (int *)mymalloc(sizeof(int)*NJ->maxnodes);
2649   for (iNode = 0; iNode < NJ->maxnodes; iNode++)
2650     NJ->nOutDistActive[iNode] = NJ->nSeq * 10; /* unreasonably high value */
2651   NJ->parent = NULL;		/* so SetOutDistance ignores it */
2652   for (iNode = 0; iNode < NJ->nSeq; iNode++)
2653     SetOutDistance(/*IN/UPDATE*/NJ, iNode, /*nActive*/NJ->nSeq);
2654 
2655   if (verbose>2) {
2656     for (iNode = 0; iNode < 4 && iNode < NJ->nSeq; iNode++)
2657       fprintf(stderr, "Node %d outdist %f\n", iNode, NJ->outDistances[iNode]);
2658   }
2659 
2660   NJ->parent = (int *)mymalloc(sizeof(int)*NJ->maxnodes);
2661   for (iNode = 0; iNode < NJ->maxnodes; iNode++) NJ->parent[iNode] = -1;
2662 
2663   NJ->branchlength = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes); /* distance to parent */
2664   for (iNode = 0; iNode < NJ->maxnodes; iNode++) NJ->branchlength[iNode] = 0;
2665 
2666   NJ->support = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2667   for (iNode = 0; iNode < NJ->maxnodes; iNode++) NJ->support[iNode] = -1.0;
2668 
2669   NJ->child = (children_t*)mymalloc(sizeof(children_t)*NJ->maxnodes);
2670   for (iNode= 0; iNode < NJ->maxnode; iNode++) NJ->child[iNode].nChild = 0;
2671 
2672   NJ->rates.nRateCategories = 0;
2673   NJ->rates.rates = NULL;
2674   NJ->rates.ratecat = NULL;
2675   AllocRateCategories(&NJ->rates, 1, NJ->nPos);
2676   return(NJ);
2677 }
2678 
FreeNJ(NJ_t * NJ)2679 NJ_t *FreeNJ(NJ_t *NJ) {
2680   if (NJ==NULL)
2681     return(NJ);
2682 
2683   int i;
2684   for (i=0; i < NJ->maxnode; i++)
2685     NJ->profiles[i] = FreeProfile(NJ->profiles[i], NJ->nPos, NJ->nConstraints);
2686   NJ->profiles = myfree(NJ->profiles, sizeof(profile_t*) * NJ->maxnodes);
2687   NJ->outprofile = FreeProfile(NJ->outprofile, NJ->nPos, NJ->nConstraints);
2688   NJ->diameter = myfree(NJ->diameter, sizeof(numeric_t)*NJ->maxnodes);
2689   NJ->varDiameter = myfree(NJ->varDiameter, sizeof(numeric_t)*NJ->maxnodes);
2690   NJ->selfdist = myfree(NJ->selfdist, sizeof(numeric_t)*NJ->maxnodes);
2691   NJ->selfweight = myfree(NJ->selfweight, sizeof(numeric_t)*NJ->maxnodes);
2692   NJ->outDistances = myfree(NJ->outDistances, sizeof(numeric_t)*NJ->maxnodes);
2693   NJ->nOutDistActive = myfree(NJ->nOutDistActive, sizeof(int)*NJ->maxnodes);
2694   NJ->parent = myfree(NJ->parent, sizeof(int)*NJ->maxnodes);
2695   NJ->branchlength = myfree(NJ->branchlength, sizeof(numeric_t)*NJ->maxnodes);
2696   NJ->support = myfree(NJ->support, sizeof(numeric_t)*NJ->maxnodes);
2697   NJ->child = myfree(NJ->child, sizeof(children_t)*NJ->maxnodes);
2698   NJ->transmat = myfree(NJ->transmat, sizeof(transition_matrix_t));
2699   AllocRateCategories(&NJ->rates, 0, NJ->nPos);
2700   return(myfree(NJ, sizeof(NJ_t)));
2701 }
2702 
2703 /* Allocate or reallocate the rate categories, and set every position
2704    to category 0 and every category's rate to 1.0
2705    If nRateCategories=0, just deallocate
2706 */
AllocRateCategories(rates_t * rates,int nRateCategories,int nPos)2707 void AllocRateCategories(/*IN/OUT*/rates_t *rates, int nRateCategories, int nPos) {
2708   assert(nRateCategories >= 0);
2709   rates->rates = myfree(rates->rates, sizeof(numeric_t)*rates->nRateCategories);
2710   rates->ratecat = myfree(rates->ratecat, sizeof(unsigned int)*nPos);
2711   rates->nRateCategories = nRateCategories;
2712   if (rates->nRateCategories > 0) {
2713     rates->rates = (numeric_t*)mymalloc(sizeof(numeric_t)*rates->nRateCategories);
2714     int i;
2715     for (i = 0; i < nRateCategories; i++)
2716       rates->rates[i] = 1.0;
2717     rates->ratecat = (unsigned int *)mymalloc(sizeof(unsigned int)*nPos);
2718     for (i = 0; i < nPos; i++)
2719       rates->ratecat[i] = 0;
2720   }
2721 }
2722 
FastNJ(NJ_t * NJ)2723 void FastNJ(NJ_t *NJ) {
2724   int iNode;
2725 
2726   assert(NJ->nSeq >= 1);
2727   if (NJ->nSeq < 3) {
2728     NJ->root = NJ->maxnode++;
2729     NJ->child[NJ->root].nChild = NJ->nSeq;
2730     for (iNode = 0; iNode < NJ->nSeq; iNode++) {
2731       NJ->parent[iNode] = NJ->root;
2732       NJ->child[NJ->root].child[iNode] = iNode;
2733     }
2734     if (NJ->nSeq == 1) {
2735       NJ->branchlength[0] = 0;
2736     } else {
2737       assert (NJ->nSeq == 2);
2738       besthit_t hit;
2739       SeqDist(NJ->profiles[0]->codes,NJ->profiles[1]->codes,NJ->nPos,NJ->distance_matrix,/*OUT*/&hit);
2740       NJ->branchlength[0] = hit.dist/2.0;
2741       NJ->branchlength[1] = hit.dist/2.0;
2742     }
2743     return;
2744   }
2745 
2746   /* else 3 or more sequences */
2747 
2748   /* The visible set stores the best hit of each node (unless using top hits, in which case
2749      it is handled by the top hits routines) */
2750   besthit_t *visible = NULL;	/* Not used if doing top hits */
2751   besthit_t *besthitNew = NULL;	/* All hits of new node -- not used if doing top-hits */
2752 
2753   /* The top-hits lists, with the key parameter m = length of each top-hit list */
2754   top_hits_t *tophits = NULL;
2755   int m = 0;			/* maximum length of a top-hits list */
2756   if (tophitsMult > 0) {
2757     m = (int)(0.5 + tophitsMult*sqrt(NJ->nSeq));
2758     if(m<4 || 2*m >= NJ->nSeq) {
2759       m=0;
2760       if(verbose>1) fprintf(stderr,"Too few leaves, turning off top-hits\n");
2761     } else {
2762       if(verbose>2) fprintf(stderr,"Top-hit-list size = %d of %d\n", m, NJ->nSeq);
2763     }
2764   }
2765   assert(!(slow && m>0));
2766 
2767   /* Initialize top-hits or visible set */
2768   if (m>0) {
2769     tophits = InitTopHits(NJ, m);
2770     SetAllLeafTopHits(/*IN/UPDATE*/NJ, /*OUT*/tophits);
2771     ResetTopVisible(/*IN/UPDATE*/NJ, /*nActive*/NJ->nSeq, /*IN/OUT*/tophits);
2772   } else if (!slow) {
2773     visible = (besthit_t*)mymalloc(sizeof(besthit_t)*NJ->maxnodes);
2774     besthitNew = (besthit_t*)mymalloc(sizeof(besthit_t)*NJ->maxnodes);
2775     for (iNode = 0; iNode < NJ->nSeq; iNode++)
2776       SetBestHit(iNode, NJ, /*nActive*/NJ->nSeq, /*OUT*/&visible[iNode], /*OUT IGNORED*/NULL);
2777   }
2778 
2779   /* Iterate over joins */
2780   int nActiveOutProfileReset = NJ->nSeq;
2781   int nActive;
2782   for (nActive = NJ->nSeq; nActive > 3; nActive--) {
2783     int nJoinsDone = NJ->nSeq - nActive;
2784     if (nJoinsDone > 0 && (nJoinsDone % 100) == 0)
2785       ProgressReport("Joined %6d of %6d", nJoinsDone, NJ->nSeq-3, 0, 0);
2786 
2787     besthit_t join; 		/* the join to do */
2788     if (slow) {
2789       ExhaustiveNJSearch(NJ,nActive,/*OUT*/&join);
2790     } else if (m>0) {
2791       TopHitNJSearch(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits, /*OUT*/&join);
2792     } else {
2793       FastNJSearch(NJ, nActive, /*IN/OUT*/visible, /*OUT*/&join);
2794     }
2795 
2796     if (verbose>2) {
2797       double penalty = constraintWeight
2798 	* (double)JoinConstraintPenalty(NJ, join.i, join.j);
2799       if (penalty > 0.001) {
2800 	fprintf(stderr, "Constraint violation during neighbor-joining %d %d into %d penalty %.3f\n",
2801 		join.i, join.j, NJ->maxnode, penalty);
2802 	int iC;
2803 	for (iC = 0; iC < NJ->nConstraints; iC++) {
2804 	  int local = JoinConstraintPenaltyPiece(NJ, join.i, join.j, iC);
2805 	  if (local > 0)
2806 	    fprintf(stderr, "Constraint %d piece %d %d/%d %d/%d %d/%d\n", iC, local,
2807 		    NJ->profiles[join.i]->nOn[iC],
2808 		    NJ->profiles[join.i]->nOff[iC],
2809 		    NJ->profiles[join.j]->nOn[iC],
2810 		    NJ->profiles[join.j]->nOff[iC],
2811 		    NJ->outprofile->nOn[iC] - NJ->profiles[join.i]->nOn[iC] - NJ->profiles[join.j]->nOn[iC],
2812 		    NJ->outprofile->nOff[iC] - NJ->profiles[join.i]->nOff[iC] - NJ->profiles[join.j]->nOff[iC]);
2813 	}
2814       }
2815     }
2816 
2817     /* because of the stale out-distance heuristic, make sure that these are up-to-date */
2818     SetOutDistance(NJ, join.i, nActive);
2819     SetOutDistance(NJ, join.j, nActive);
2820     /* Make sure weight is set and criterion is up to date */
2821     SetDistCriterion(NJ, nActive, /*IN/OUT*/&join);
2822     assert(NJ->nOutDistActive[join.i] == nActive);
2823     assert(NJ->nOutDistActive[join.j] == nActive);
2824 
2825     int newnode = NJ->maxnode++;
2826     NJ->parent[join.i] = newnode;
2827     NJ->parent[join.j] = newnode;
2828     NJ->child[newnode].nChild = 2;
2829     NJ->child[newnode].child[0] = join.i < join.j ? join.i : join.j;
2830     NJ->child[newnode].child[1] = join.i > join.j ? join.i : join.j;
2831 
2832     double rawIJ = join.dist + NJ->diameter[join.i] + NJ->diameter[join.j];
2833     double distIJ = join.dist;
2834 
2835     double deltaDist = (NJ->outDistances[join.i]-NJ->outDistances[join.j])/(double)(nActive-2);
2836     NJ->branchlength[join.i] = (distIJ + deltaDist)/2;
2837     NJ->branchlength[join.j] = (distIJ - deltaDist)/2;
2838 
2839     double bionjWeight = 0.5;	/* IJ = bionjWeight*I + (1-bionjWeight)*J */
2840     double varIJ = rawIJ - NJ->varDiameter[join.i] - NJ->varDiameter[join.j];
2841 
2842     if (bionj && join.weight > 0.01 && varIJ > 0.001) {
2843       /* Set bionjWeight according to the BIONJ formula, where
2844 	 the variance matrix is approximated by
2845 
2846 	 Vij = ProfileVar(i,j) - varDiameter(i) - varDiameter(j)
2847 	 ProfileVar(i,j) = distance(i,j) = top(i,j)/weight(i,j)
2848 
2849 	 (The node's distance diameter does not affect the variances.)
2850 
2851 	 The BIONJ formula is equation 9 from Gascuel 1997:
2852 
2853 	 bionjWeight = 1/2 + sum(k!=i,j) (Vjk - Vik) / ((nActive-2)*Vij)
2854 	 sum(k!=i,j) (Vjk - Vik) = sum(k!=i,j) Vik - varDiameter(j) + varDiameter(i)
2855 	 = sum(k!=i,j) ProfileVar(j,k) - sum(k!=i,j) ProfileVar(i,k) + (nActive-2)*(varDiameter(i)-varDiameter(j))
2856 
2857 	 sum(k!=i,j) ProfileVar(i,k)
2858 	 ~= (sum(k!=i,j) distance(i,k) * weight(i,k))/(mean(k!=i,j) weight(i,k))
2859 	 ~= (N-2) * top(i, Out-i-j) / weight(i, Out-i-j)
2860 
2861 	 weight(i, Out-i-j) = N*weight(i,Out) - weight(i,i) - weight(i,j)
2862 	 top(i, Out-i-j) = N*top(i,Out) - top(i,i) - top(i,j)
2863       */
2864       besthit_t outI;
2865       besthit_t outJ;
2866       ProfileDist(NJ->profiles[join.i],NJ->outprofile,NJ->nPos,NJ->distance_matrix,/*OUT*/&outI);
2867       ProfileDist(NJ->profiles[join.j],NJ->outprofile,NJ->nPos,NJ->distance_matrix,/*OUT*/&outJ);
2868       outprofileOps += 2;
2869 
2870       double varIWeight = (nActive * outI.weight - NJ->selfweight[join.i] - join.weight);
2871       double varJWeight = (nActive * outJ.weight - NJ->selfweight[join.j] - join.weight);
2872 
2873       double varITop = outI.dist * outI.weight * nActive
2874 	- NJ->selfdist[join.i] * NJ->selfweight[join.i] - rawIJ * join.weight;
2875       double varJTop = outJ.dist * outJ.weight * nActive
2876 	- NJ->selfdist[join.j] * NJ->selfweight[join.j] - rawIJ * join.weight;
2877 
2878       double deltaProfileVarOut = (nActive-2) * (varJTop/varJWeight - varITop/varIWeight);
2879       double deltaVarDiam = (nActive-2)*(NJ->varDiameter[join.i] - NJ->varDiameter[join.j]);
2880       if (varJWeight > 0.01 && varIWeight > 0.01)
2881 	bionjWeight = 0.5 + (deltaProfileVarOut+deltaVarDiam)/(2*(nActive-2)*varIJ);
2882       if(bionjWeight<0) bionjWeight=0;
2883       if(bionjWeight>1) bionjWeight=1;
2884       if (verbose>2) fprintf(stderr,"dVarO %f dVarDiam %f varIJ %f from dist %f weight %f (pos %d) bionjWeight %f %f\n",
2885 			     deltaProfileVarOut, deltaVarDiam,
2886 			     varIJ, join.dist, join.weight, NJ->nPos,
2887 			     bionjWeight, 1-bionjWeight);
2888       if (verbose>3 && (newnode%5) == 0) {
2889 	/* Compare weight estimated from outprofiles from weight made by summing over other nodes */
2890 	double deltaProfileVarTot = 0;
2891 	for (iNode = 0; iNode < newnode; iNode++) {
2892 	  if (NJ->parent[iNode] < 0) { /* excludes join.i, join.j */
2893 	    besthit_t di, dj;
2894 	    ProfileDist(NJ->profiles[join.i],NJ->profiles[iNode],NJ->nPos,NJ->distance_matrix,/*OUT*/&di);
2895 	    ProfileDist(NJ->profiles[join.j],NJ->profiles[iNode],NJ->nPos,NJ->distance_matrix,/*OUT*/&dj);
2896 	    deltaProfileVarTot += dj.dist - di.dist;
2897 	  }
2898 	}
2899 	double lambdaTot = 0.5 + (deltaProfileVarTot+deltaVarDiam)/(2*(nActive-2)*varIJ);
2900 	if (lambdaTot < 0) lambdaTot = 0;
2901 	if (lambdaTot > 1) lambdaTot = 1;
2902 	if (fabs(bionjWeight-lambdaTot) > 0.01 || verbose > 4)
2903 	  fprintf(stderr, "deltaProfileVar actual %.6f estimated %.6f lambda actual %.3f estimated %.3f\n",
2904 		  deltaProfileVarTot,deltaProfileVarOut,lambdaTot,bionjWeight);
2905       }
2906     }
2907     if (verbose > 2) fprintf(stderr, "Join\t%d\t%d\t%.6f\tlambda\t%.6f\tselfw\t%.3f\t%.3f\tnew\t%d\n",
2908 			      join.i < join.j ? join.i : join.j,
2909 			      join.i < join.j ? join.j : join.i,
2910 			      join.criterion, bionjWeight,
2911 			      NJ->selfweight[join.i < join.j ? join.i : join.j],
2912 			      NJ->selfweight[join.i < join.j ? join.j : join.i],
2913 			      newnode);
2914 
2915     NJ->diameter[newnode] = bionjWeight * (NJ->branchlength[join.i] + NJ->diameter[join.i])
2916       + (1-bionjWeight) * (NJ->branchlength[join.j] + NJ->diameter[join.j]);
2917     NJ->varDiameter[newnode] = bionjWeight * NJ->varDiameter[join.i]
2918       + (1-bionjWeight) * NJ->varDiameter[join.j]
2919       + bionjWeight * (1-bionjWeight) * varIJ;
2920 
2921     NJ->profiles[newnode] = AverageProfile(NJ->profiles[join.i],NJ->profiles[join.j],
2922 					   NJ->nPos, NJ->nConstraints,
2923 					   NJ->distance_matrix,
2924 					   bionj ? bionjWeight : /*noweight*/-1.0);
2925 
2926     /* Update out-distances and total diameters */
2927     int changedActiveOutProfile = nActiveOutProfileReset - (nActive-1);
2928     if (changedActiveOutProfile >= nResetOutProfile
2929 	&& changedActiveOutProfile >= fResetOutProfile * nActiveOutProfileReset) {
2930       /* Recompute the outprofile from scratch to avoid roundoff error */
2931       profile_t **activeProfiles = (profile_t**)mymalloc(sizeof(profile_t*)*(nActive-1));
2932       int nSaved = 0;
2933       NJ->totdiam = 0;
2934       for (iNode=0;iNode<NJ->maxnode;iNode++) {
2935 	if (NJ->parent[iNode]<0) {
2936 	  assert(nSaved < nActive-1);
2937 	  activeProfiles[nSaved++] = NJ->profiles[iNode];
2938 	  NJ->totdiam += NJ->diameter[iNode];
2939 	}
2940       }
2941       assert(nSaved==nActive-1);
2942       FreeProfile(NJ->outprofile, NJ->nPos, NJ->nConstraints);
2943       if(verbose>2) fprintf(stderr,"Recomputing outprofile %d %d\n",nActiveOutProfileReset,nActive-1);
2944       NJ->outprofile = OutProfile(activeProfiles, nSaved,
2945 				  NJ->nPos, NJ->nConstraints,
2946 				  NJ->distance_matrix);
2947       activeProfiles = myfree(activeProfiles, sizeof(profile_t*)*(nActive-1));
2948       nActiveOutProfileReset = nActive-1;
2949     } else {
2950       UpdateOutProfile(/*OUT*/NJ->outprofile,
2951 		       NJ->profiles[join.i], NJ->profiles[join.j], NJ->profiles[newnode],
2952 		       nActive,
2953 		       NJ->nPos, NJ->nConstraints,
2954 		       NJ->distance_matrix);
2955       NJ->totdiam += NJ->diameter[newnode] - NJ->diameter[join.i] - NJ->diameter[join.j];
2956     }
2957 
2958     /* Store self-dist for use in other computations */
2959     besthit_t selfdist;
2960     ProfileDist(NJ->profiles[newnode],NJ->profiles[newnode],NJ->nPos,NJ->distance_matrix,/*OUT*/&selfdist);
2961     NJ->selfdist[newnode] = selfdist.dist;
2962     NJ->selfweight[newnode] = selfdist.weight;
2963 
2964     /* Find the best hit of the joined node IJ */
2965     if (m>0) {
2966       TopHitJoin(newnode, /*IN/UPDATE*/NJ, nActive-1, /*IN/OUT*/tophits);
2967     } else {
2968       /* Not using top-hits, so we update all out-distances */
2969       for (iNode = 0; iNode < NJ->maxnode; iNode++) {
2970 	if (NJ->parent[iNode] < 0) {
2971 	  /* True nActive is now nActive-1 */
2972 	  SetOutDistance(/*IN/UPDATE*/NJ, iNode, nActive-1);
2973 	}
2974       }
2975 
2976       if(visible != NULL) {
2977 	SetBestHit(newnode, NJ, nActive-1, /*OUT*/&visible[newnode], /*OUT OPTIONAL*/besthitNew);
2978 	if (verbose>2)
2979 	  fprintf(stderr,"Visible %d %d %f %f\n",
2980 		  visible[newnode].i, visible[newnode].j,
2981 		  visible[newnode].dist, visible[newnode].criterion);
2982 	if (besthitNew != NULL) {
2983 	  /* Use distances to new node to update visible set entries that are non-optimal */
2984 	  for (iNode = 0; iNode < NJ->maxnode; iNode++) {
2985 	    if (NJ->parent[iNode] >= 0 || iNode == newnode)
2986 	      continue;
2987 	    int iOldVisible = visible[iNode].j;
2988 	    assert(iOldVisible>=0);
2989 	    assert(visible[iNode].i == iNode);
2990 
2991 	    /* Update the criterion; use nActive-1 because haven't decremented nActive yet */
2992 	    if (NJ->parent[iOldVisible] < 0)
2993 	      SetCriterion(/*IN/OUT*/NJ, nActive-1, &visible[iNode]);
2994 
2995 	    if (NJ->parent[iOldVisible] >= 0
2996 		|| besthitNew[iNode].criterion < visible[iNode].criterion) {
2997 	      if(verbose>3) fprintf(stderr,"Visible %d reset from %d to %d (%f vs. %f)\n",
2998 				     iNode, iOldVisible,
2999 				     newnode, visible[iNode].criterion, besthitNew[iNode].criterion);
3000 	      if(NJ->parent[iOldVisible] < 0) nVisibleUpdate++;
3001 	      visible[iNode].j = newnode;
3002 	      visible[iNode].dist = besthitNew[iNode].dist;
3003 	      visible[iNode].criterion = besthitNew[iNode].criterion;
3004 	    }
3005 	  } /* end loop over all nodes */
3006 	} /* end if recording all hits of new node */
3007       } /* end if keeping a visible set */
3008     } /* end else (m==0) */
3009   } /* end loop over nActive */
3010 
3011 #ifdef TRACK_MEMORY
3012   if (verbose>1) {
3013     struct mallinfo mi = mallinfo();
3014     fprintf(stderr, "Memory @ end of FastNJ(): %.2f MB (%.1f byte/pos) useful %.2f expected %.2f\n",
3015 	    (mi.arena+mi.hblkhd)/1.0e6, (mi.arena+mi.hblkhd)/(double)(NJ->nSeq*(double)NJ->nPos),
3016 	    mi.uordblks/1.0e6, mymallocUsed/1e6);
3017   }
3018 #endif
3019 
3020   /* We no longer need the tophits, visible set, etc. */
3021   if (visible != NULL) visible = myfree(visible,sizeof(besthit_t)*NJ->maxnodes);
3022   if (besthitNew != NULL) besthitNew = myfree(besthitNew,sizeof(besthit_t)*NJ->maxnodes);
3023   tophits = FreeTopHits(tophits);
3024 
3025   /* Add a root for the 3 remaining nodes */
3026   int top[3];
3027   int nTop = 0;
3028   for (iNode = 0; iNode < NJ->maxnode; iNode++) {
3029     if (NJ->parent[iNode] < 0) {
3030       assert(nTop <= 2);
3031       top[nTop++] = iNode;
3032     }
3033   }
3034   assert(nTop==3);
3035 
3036   NJ->root = NJ->maxnode++;
3037   NJ->child[NJ->root].nChild = 3;
3038   for (nTop = 0; nTop < 3; nTop++) {
3039     NJ->parent[top[nTop]] = NJ->root;
3040     NJ->child[NJ->root].child[nTop] = top[nTop];
3041   }
3042 
3043   besthit_t dist01, dist02, dist12;
3044   ProfileDist(NJ->profiles[top[0]], NJ->profiles[top[1]], NJ->nPos, NJ->distance_matrix, /*OUT*/&dist01);
3045   ProfileDist(NJ->profiles[top[0]], NJ->profiles[top[2]], NJ->nPos, NJ->distance_matrix, /*OUT*/&dist02);
3046   ProfileDist(NJ->profiles[top[1]], NJ->profiles[top[2]], NJ->nPos, NJ->distance_matrix, /*OUT*/&dist12);
3047 
3048   double d01 = dist01.dist - NJ->diameter[top[0]] - NJ->diameter[top[1]];
3049   double d02 = dist02.dist - NJ->diameter[top[0]] - NJ->diameter[top[2]];
3050   double d12 = dist12.dist - NJ->diameter[top[1]] - NJ->diameter[top[2]];
3051   NJ->branchlength[top[0]] = (d01 + d02 - d12)/2;
3052   NJ->branchlength[top[1]] = (d01 + d12 - d02)/2;
3053   NJ->branchlength[top[2]] = (d02 + d12 - d01)/2;
3054 
3055   /* Check how accurate the outprofile is */
3056   if (verbose>2) {
3057     profile_t *p[3] = {NJ->profiles[top[0]], NJ->profiles[top[1]], NJ->profiles[top[2]]};
3058     profile_t *out = OutProfile(p, 3, NJ->nPos, NJ->nConstraints, NJ->distance_matrix);
3059     int i;
3060     double freqerror = 0;
3061     double weighterror = 0;
3062     for (i=0;i<NJ->nPos;i++) {
3063       weighterror += fabs(out->weights[i] - NJ->outprofile->weights[i]);
3064       int k;
3065       for(k=0;k<nCodes;k++)
3066 	freqerror += fabs(out->vectors[nCodes*i+k] - NJ->outprofile->vectors[nCodes*i+k]);
3067     }
3068     fprintf(stderr,"Roundoff error in outprofile@end: WeightError %f FreqError %f\n", weighterror, freqerror);
3069     FreeProfile(out, NJ->nPos, NJ->nConstraints);
3070   }
3071   return;
3072 }
3073 
ExhaustiveNJSearch(NJ_t * NJ,int nActive,besthit_t * join)3074 void ExhaustiveNJSearch(NJ_t *NJ, int nActive, /*OUT*/besthit_t *join) {
3075   join->i = -1;
3076   join->j = -1;
3077   join->weight = 0;
3078   join->dist = 1e20;
3079   join->criterion = 1e20;
3080   double bestCriterion = 1e20;
3081 
3082   int i, j;
3083   for (i = 0; i < NJ->maxnode-1; i++) {
3084     if (NJ->parent[i] < 0) {
3085       for (j = i+1; j < NJ->maxnode; j++) {
3086 	if (NJ->parent[j] < 0) {
3087 	  besthit_t hit;
3088 	  hit.i = i;
3089 	  hit.j = j;
3090 	  SetDistCriterion(NJ, nActive, /*IN/OUT*/&hit);
3091 	  if (hit.criterion < bestCriterion) {
3092 	    *join = hit;
3093 	    bestCriterion = hit.criterion;
3094 	  }
3095 	}
3096       }
3097     }
3098   }
3099   assert (join->i >= 0 && join->j >= 0);
3100 }
3101 
FastNJSearch(NJ_t * NJ,int nActive,besthit_t * besthits,besthit_t * join)3102 void FastNJSearch(NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *besthits, /*OUT*/besthit_t *join) {
3103   join->i = -1;
3104   join->j = -1;
3105   join->dist = 1e20;
3106   join->weight = 0;
3107   join->criterion = 1e20;
3108   int iNode;
3109   for (iNode = 0; iNode < NJ->maxnode; iNode++) {
3110     int jNode = besthits[iNode].j;
3111     if (NJ->parent[iNode] < 0 && NJ->parent[jNode] < 0) { /* both i and j still active */
3112       /* recompute criterion to reflect the current out-distances */
3113       SetCriterion(NJ, nActive, /*IN/OUT*/&besthits[iNode]);
3114       if (besthits[iNode].criterion < join->criterion)
3115 	*join = besthits[iNode];
3116     }
3117   }
3118 
3119   if(!fastest) {
3120     int changed;
3121     do {
3122       changed = 0;
3123       assert(join->i >= 0 && join->j >= 0);
3124       SetBestHit(join->i, NJ, nActive, /*OUT*/&besthits[join->i], /*OUT IGNORED*/NULL);
3125       if (besthits[join->i].j != join->j) {
3126 	changed = 1;
3127 	if (verbose>2)
3128 	  fprintf(stderr,"BetterI\t%d\t%d\t%d\t%d\t%f\t%f\n",
3129 		  join->i,join->j,besthits[join->i].i,besthits[join->i].j,
3130 		  join->criterion,besthits[join->i].criterion);
3131       }
3132 
3133       /* Save the best hit either way, because the out-distance has probably changed
3134 	 since we started the computation. */
3135       join->j = besthits[join->i].j;
3136       join->weight = besthits[join->i].weight;
3137       join->dist = besthits[join->i].dist;
3138       join->criterion = besthits[join->i].criterion;
3139 
3140       SetBestHit(join->j, NJ, nActive, /*OUT*/&besthits[join->j], /*OUT IGNORE*/NULL);
3141       if (besthits[join->j].j != join->i) {
3142 	changed = 1;
3143 	if (verbose>2)
3144 	  fprintf(stderr,"BetterJ\t%d\t%d\t%d\t%d\t%f\t%f\n",
3145 		  join->i,join->j,besthits[join->j].i,besthits[join->j].j,
3146 		  join->criterion,besthits[join->j].criterion);
3147 	join->i = besthits[join->j].j;
3148 	join->weight = besthits[join->j].weight;
3149 	join->dist = besthits[join->j].dist;
3150 	join->criterion = besthits[join->j].criterion;
3151       }
3152       if(changed) nHillBetter++;
3153     } while(changed);
3154   }
3155 }
3156 
3157 /* A token is one of ():;, or an alphanumeric string without whitespace
3158    Any whitespace between tokens is ignored */
ReadTreeToken(FILE * fp)3159 char *ReadTreeToken(FILE *fp) {
3160   static char buf[BUFFER_SIZE];
3161   int len = 0;
3162   int c;
3163   for (c = fgetc(fp); c != EOF; c = fgetc(fp)) {
3164     if (c == '(' || c == ')' || c == ':' || c == ';' || c == ',') {
3165       /* standalone token */
3166       if (len == 0) {
3167 	buf[len++] = c;
3168 	buf[len] = '\0';
3169 	return(buf);
3170       } else {
3171 	ungetc(c, fp);
3172 	buf[len] = '\0';
3173 	return(buf);
3174       }
3175     } else if (isspace(c)) {
3176       if (len > 0) {
3177 	buf[len] = '\0';
3178 	return(buf);
3179       }
3180       /* else ignore whitespace at beginning of token */
3181     } else {
3182       /* not whitespace or standalone token */
3183       buf[len++] = c;
3184       if (len >= BUFFER_SIZE) {
3185 	buf[BUFFER_SIZE-1] = '\0';
3186 	fprintf(stderr, "Token too long in tree file, token begins with\n%s\n", buf);
3187 	exit(1);
3188       }
3189     }
3190   }
3191   if (len > 0) {
3192     /* return the token we have so far */
3193     buf[len] = '\0';
3194     return(buf);
3195   }
3196   /* else */
3197   return(NULL);
3198 }
3199 
ReadTreeError(char * err,char * token)3200 void ReadTreeError(char *err, char *token) {
3201   fprintf(stderr, "Tree parse error: unexpected token '%s' -- %s\n",
3202 	  token == NULL ? "(End of file)" : token,
3203 	  err);
3204   exit(1);
3205 }
3206 
ReadTreeAddChild(int parent,int child,int * parents,children_t * children)3207 void ReadTreeAddChild(int parent, int child, /*IN/OUT*/int *parents, /*IN/OUT*/children_t *children) {
3208   assert(parent >= 0);
3209   assert(child >= 0);
3210   assert(parents[child] < 0);
3211   assert(children[parent].nChild < 3);
3212   parents[child] = parent;
3213   children[parent].child[children[parent].nChild++] = child;
3214 }
3215 
ReadTreeMaybeAddLeaf(int parent,char * name,hashstrings_t * hashnames,uniquify_t * unique,int * parents,children_t * children)3216 void ReadTreeMaybeAddLeaf(int parent, char *name,
3217 			  hashstrings_t *hashnames, uniquify_t *unique,
3218 			  /*IN/OUT*/int *parents, /*IN/OUT*/children_t *children) {
3219   hashiterator_t hi = FindMatch(hashnames,name);
3220   if (HashCount(hashnames,hi) != 1)
3221     ReadTreeError("not recognized as a sequence name", name);
3222 
3223   int iSeqNonunique = HashFirst(hashnames,hi);
3224   assert(iSeqNonunique >= 0 && iSeqNonunique < unique->nSeq);
3225   int iSeqUnique = unique->alnToUniq[iSeqNonunique];
3226   assert(iSeqUnique >= 0 && iSeqUnique < unique->nUnique);
3227   /* Either record this leaves' parent (if it is -1) or ignore this leaf (if already seen) */
3228   if (parents[iSeqUnique] < 0) {
3229     ReadTreeAddChild(parent, iSeqUnique, /*IN/OUT*/parents, /*IN/OUT*/children);
3230     if(verbose > 5)
3231       fprintf(stderr, "Found leaf uniq%d name %s child of %d\n", iSeqUnique, name, parent);
3232   } else {
3233     if (verbose > 5)
3234       fprintf(stderr, "Skipped redundant leaf uniq%d name %s\n", iSeqUnique, name);
3235   }
3236 }
3237 
ReadTreeRemove(int * parents,children_t * children,int node)3238 void ReadTreeRemove(/*IN/OUT*/int *parents, /*IN/OUT*/children_t *children, int node) {
3239   if(verbose > 5)
3240     fprintf(stderr,"Removing node %d parent %d\n", node, parents[node]);
3241   assert(parents[node] >= 0);
3242   int parent = parents[node];
3243   parents[node] = -1;
3244   children_t *pc = &children[parent];
3245   int oldn;
3246   for (oldn = 0; oldn < pc->nChild; oldn++) {
3247     if (pc->child[oldn] == node)
3248       break;
3249   }
3250   assert(oldn < pc->nChild);
3251 
3252   /* move successor nodes back in child list and shorten list */
3253   int i;
3254   for (i = oldn; i < pc->nChild-1; i++)
3255     pc->child[i] = pc->child[i+1];
3256   pc->nChild--;
3257 
3258   /* add its children to parent's child list */
3259   children_t *nc = &children[node];
3260   if (nc->nChild > 0) {
3261     assert(nc->nChild<=2);
3262     assert(pc->nChild < 3);
3263     assert(pc->nChild + nc->nChild <= 3);
3264     int j;
3265     for (j = 0; j < nc->nChild; j++) {
3266       if(verbose > 5)
3267 	fprintf(stderr,"Repointing parent %d to child %d\n", parent, nc->child[j]);
3268       pc->child[pc->nChild++] = nc->child[j];
3269       parents[nc->child[j]] = parent;
3270     }
3271     nc->nChild = 0;
3272   }
3273 }
3274 
ReadTree(NJ_t * NJ,uniquify_t * unique,hashstrings_t * hashnames,FILE * fpInTree)3275 void ReadTree(/*IN/OUT*/NJ_t *NJ,
3276 	      /*IN*/uniquify_t *unique,
3277 	      /*IN*/hashstrings_t *hashnames,
3278 	      /*READ*/FILE *fpInTree) {
3279   assert(NJ->nSeq == unique->nUnique);
3280   /* First, do a preliminary parse of the tree to with non-unique leaves ignored
3281      We need to store this separately from NJ because it may have too many internal nodes
3282      (matching sequences show up once in the NJ but could be in multiple places in the tree)
3283      Will use iUnique as the index of nodes, as in the NJ structure
3284   */
3285   int maxnodes = unique->nSeq*2;
3286   int maxnode = unique->nSeq;
3287   int *parent = (int*)mymalloc(sizeof(int)*maxnodes);
3288   children_t *children = (children_t *)mymalloc(sizeof(children_t)*maxnodes);
3289   int root = maxnode++;
3290   int i;
3291   for (i = 0; i < maxnodes; i++) {
3292     parent[i] = -1;
3293     children[i].nChild = 0;
3294   }
3295 
3296   /* The stack is the current path to the root, with the root at the first (top) position */
3297   int stack_size = 1;
3298   int *stack = (int*)mymalloc(sizeof(int)*maxnodes);
3299   stack[0] = root;
3300   int nDown = 0;
3301   int nUp = 0;
3302 
3303   char *token;
3304   token = ReadTreeToken(fpInTree);
3305   if (token == NULL || *token != '(')
3306     ReadTreeError("No '(' at start", token);
3307   /* nDown is still 0 because we have created the root */
3308 
3309   while ((token = ReadTreeToken(fpInTree)) != NULL) {
3310     if (nDown > 0) {		/* In a stream of parentheses */
3311       if (*token == '(')
3312 	nDown++;
3313       else if (*token == ',' || *token == ';' || *token == ':' || *token == ')')
3314 	ReadTreeError("while reading parentheses", token);
3315       else {
3316 	/* Add intermediate nodes if nDown was > 1 (for nDown=1, the only new node is the leaf) */
3317 	while (nDown-- > 0) {
3318 	  int new = maxnode++;
3319 	  assert(new < maxnodes);
3320 	  ReadTreeAddChild(stack[stack_size-1], new, /*IN/OUT*/parent, /*IN/OUT*/children);
3321 	  if(verbose > 5)
3322 	    fprintf(stderr, "Added internal child %d of %d, stack size increase to %d\n",
3323 		    new, stack[stack_size-1],stack_size+1);
3324 	  stack[stack_size++] = new;
3325 	  assert(stack_size < maxnodes);
3326 	}
3327 	ReadTreeMaybeAddLeaf(stack[stack_size-1], token,
3328 			     hashnames, unique,
3329 			     /*IN/OUT*/parent, /*IN/OUT*/children);
3330       }
3331     } else if (nUp > 0) {
3332       if (*token == ';') {	/* end the tree? */
3333 	if (nUp != stack_size)
3334 	  ReadTreeError("unbalanced parentheses", token);
3335 	else
3336 	  break;
3337       } else if (*token == ')')
3338 	nUp++;
3339       else if (*token == '(')
3340 	ReadTreeError("unexpected '(' after ')'", token);
3341       else if (*token == ':') {
3342 	token = ReadTreeToken(fpInTree);
3343 	/* Read the branch length and ignore it */
3344 	if (token == NULL || (*token != '-' && !isdigit(*token)))
3345 	  ReadTreeError("not recognized as a branch length", token);
3346       } else if (*token == ',') {
3347 	/* Go back up the stack the correct #times */
3348 	while (nUp-- > 0) {
3349 	  stack_size--;
3350 	  if(verbose > 5)
3351 	    fprintf(stderr, "Up to nUp=%d stack size %d at %d\n",
3352 		    nUp, stack_size, stack[stack_size-1]);
3353 	  if (stack_size <= 0)
3354 	    ReadTreeError("too many ')'", token);
3355 	}
3356 	nUp = 0;
3357       } else if (*token == '-' || isdigit(*token))
3358 	; 			/* ignore bootstrap value */
3359       else
3360 	fprintf(stderr, "Warning while parsing tree: non-numeric label %s for internal node\n",
3361 		token);
3362     } else if (*token == '(') {
3363       nDown = 1;
3364     } else if (*token == ')') {
3365       nUp = 1;
3366     } else if (*token == ':') {
3367       token = ReadTreeToken(fpInTree);
3368       if (token == NULL || (*token != '-' && !isdigit(*token)))
3369 	ReadTreeError("not recognized as a branch length", token);
3370     } else if (*token == ',') {
3371       ;				/* do nothing */
3372     } else if (*token == ';')
3373       ReadTreeError("unexpected token", token);
3374     else
3375       ReadTreeMaybeAddLeaf(stack[stack_size-1], token,
3376 			   hashnames, unique,
3377 			   /*IN/OUT*/parent, /*IN/OUT*/children);
3378   }
3379 
3380   /* Verify that all sequences were seen */
3381   for (i = 0; i < unique->nUnique; i++) {
3382     if (parent[i] < 0) {
3383       fprintf(stderr, "Alignment sequence %d (unique %d) absent from input tree\n"
3384 	      "The starting tree (the argument to -intree) must include all sequences in the alignment!\n",
3385 	      unique->uniqueFirst[i], i);
3386       exit(1);
3387     }
3388   }
3389 
3390   /* Simplify the tree -- remove all internal nodes with < 2 children
3391      Keep trying until no nodes get removed
3392   */
3393   int nRemoved;
3394   do {
3395     nRemoved = 0;
3396     /* Here stack is the list of nodes we haven't visited yet while doing
3397        a tree traversal */
3398     stack_size = 1;
3399     stack[0] = root;
3400     while (stack_size > 0) {
3401       int node = stack[--stack_size];
3402       if (node >= unique->nUnique) { /* internal node */
3403 	if (children[node].nChild <= 1) {
3404 	  if (node != root) {
3405 	    ReadTreeRemove(/*IN/OUT*/parent,/*IN/OUT*/children,node);
3406 	    nRemoved++;
3407 	  } else if (node == root && children[node].nChild == 1) {
3408 	    int newroot = children[node].child[0];
3409 	    parent[newroot] = -1;
3410 	    children[root].nChild = 0;
3411 	    nRemoved++;
3412 	    if(verbose > 5)
3413 	      fprintf(stderr,"Changed root from %d to %d\n",root,newroot);
3414 	    root = newroot;
3415 	    stack[stack_size++] = newroot;
3416 	  }
3417 	} else {
3418 	  int j;
3419 	  for (j = 0; j < children[node].nChild; j++) {
3420 	    assert(stack_size < maxnodes);
3421 	    stack[stack_size++] = children[node].child[j];
3422 	    if(verbose > 5)
3423 	      fprintf(stderr,"Added %d to stack\n", stack[stack_size-1]);
3424 	  }
3425 	}
3426       }
3427     }
3428   } while (nRemoved > 0);
3429 
3430   /* Simplify the root node to 3 children if it has 2 */
3431   if (children[root].nChild == 2) {
3432     for (i = 0; i < 2; i++) {
3433       int child = children[root].child[i];
3434       assert(child >= 0 && child < maxnodes);
3435       if (children[child].nChild == 2) {
3436 	ReadTreeRemove(parent,children,child); /* replace root -> child -> A,B with root->A,B */
3437 	break;
3438       }
3439     }
3440   }
3441 
3442   for (i = 0; i < maxnodes; i++)
3443     if(verbose > 5)
3444       fprintf(stderr,"Simplfied node %d has parent %d nchild %d\n",
3445 	      i, parent[i], children[i].nChild);
3446 
3447   /* Map the remaining internal nodes to NJ nodes */
3448   int *map = (int*)mymalloc(sizeof(int)*maxnodes);
3449   for (i = 0; i < unique->nUnique; i++)
3450     map[i] = i;
3451   for (i = unique->nUnique; i < maxnodes; i++)
3452     map[i] = -1;
3453   stack_size = 1;
3454   stack[0] = root;
3455   while (stack_size > 0) {
3456     int node = stack[--stack_size];
3457     if (node >= unique->nUnique) { /* internal node */
3458       assert(node == root || children[node].nChild > 1);
3459       map[node] =  NJ->maxnode++;
3460       for (i = 0; i < children[node].nChild; i++) {
3461 	assert(stack_size < maxnodes);
3462 	stack[stack_size++] = children[node].child[i];
3463       }
3464     }
3465   }
3466   for (i = 0; i < maxnodes; i++)
3467     if(verbose > 5)
3468       fprintf(stderr,"Map %d to %d (parent %d nchild %d)\n",
3469 	      i, map[i], parent[i], children[i].nChild);
3470 
3471   /* Set NJ->parent, NJ->children, NJ->root */
3472   NJ->root = map[root];
3473   int node;
3474   for (node = 0; node < maxnodes; node++) {
3475     int njnode = map[node];
3476     if (njnode >= 0) {
3477       NJ->child[njnode].nChild = children[node].nChild;
3478       for (i = 0; i < children[node].nChild; i++) {
3479 	assert(children[node].child[i] >= 0 && children[node].child[i] < maxnodes);
3480 	NJ->child[njnode].child[i] = map[children[node].child[i]];
3481       }
3482       if (parent[node] >= 0)
3483 	NJ->parent[njnode] = map[parent[node]];
3484     }
3485   }
3486 
3487   /* Make sure that parent/child relationships match */
3488   for (i = 0; i < NJ->maxnode; i++) {
3489     children_t *c = &NJ->child[i];
3490     int j;
3491     for (j = 0; j < c->nChild;j++)
3492       assert(c->child[j] >= 0 && c->child[j] < NJ->maxnode && NJ->parent[c->child[j]] == i);
3493   }
3494   assert(NJ->parent[NJ->root] < 0);
3495 
3496   map = myfree(map,sizeof(int)*maxnodes);
3497   stack = myfree(stack,sizeof(int)*maxnodes);
3498   children = myfree(children,sizeof(children_t)*maxnodes);
3499   parent = myfree(parent,sizeof(int)*maxnodes);
3500 
3501   /* Compute profiles as balanced -- the NNI stage will recompute these
3502      profiles anyway
3503   */
3504   traversal_t traversal = InitTraversal(NJ);
3505   node = NJ->root;
3506   while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
3507     if (node >= NJ->nSeq && node != NJ->root)
3508       SetProfile(/*IN/OUT*/NJ, node, /*noweight*/-1.0);
3509   }
3510   traversal = FreeTraversal(traversal,NJ);
3511 }
3512 
3513 /* Print topology using node indices as node names */
PrintNJInternal(FILE * fp,NJ_t * NJ,bool useLen)3514 void PrintNJInternal(FILE *fp, NJ_t *NJ, bool useLen) {
3515   if (NJ->nSeq < 4) {
3516     return;
3517   }
3518   typedef struct { int node; int end; } stack_t;
3519   stack_t *stack = (stack_t *)mymalloc(sizeof(stack_t)*NJ->maxnodes);
3520   int stackSize = 1;
3521   stack[0].node = NJ->root;
3522   stack[0].end = 0;
3523 
3524   while(stackSize>0) {
3525     stack_t *last = &stack[stackSize-1];
3526     stackSize--;
3527     /* Save last, as we are about to overwrite it */
3528     int node = last->node;
3529     int end = last->end;
3530 
3531     if (node < NJ->nSeq) {
3532       if (NJ->child[NJ->parent[node]].child[0] != node) fputs(",",fp);
3533       fprintf(fp, "%d", node);
3534       if (useLen)
3535 	fprintf(fp, ":%.4f", NJ->branchlength[node]);
3536     } else if (end) {
3537       fprintf(fp, ")%d", node);
3538       if (useLen)
3539 	fprintf(fp, ":%.4f", NJ->branchlength[node]);
3540     } else {
3541             if (node != NJ->root && NJ->child[NJ->parent[node]].child[0] != node) fprintf(fp, ",");
3542       fprintf(fp, "(");
3543       stackSize++;
3544       stack[stackSize-1].node = node;
3545       stack[stackSize-1].end = 1;
3546       children_t *c = &NJ->child[node];
3547       /* put children on in reverse order because we use the last one first */
3548       int i;
3549       for (i = c->nChild-1; i >=0; i--) {
3550 	stackSize++;
3551 	stack[stackSize-1].node = c->child[i];
3552 	stack[stackSize-1].end = 0;
3553       }
3554     }
3555   }
3556   fprintf(fp, ";\n");
3557   stack = myfree(stack, sizeof(stack_t)*NJ->maxnodes);
3558 }
3559 
PrintNJ(FILE * fp,NJ_t * NJ,char ** names,uniquify_t * unique,bool bShowSupport,bool bQuote)3560 void PrintNJ(FILE *fp, NJ_t *NJ, char **names, uniquify_t *unique, bool bShowSupport, bool bQuote) {
3561   /* And print the tree: depth first search
3562    * The stack contains
3563    * list of remaining children with their depth
3564    * parent node, with a flag of -1 so I know to print right-paren
3565    */
3566   if (NJ->nSeq==1 && unique->alnNext[unique->uniqueFirst[0]] >= 0) {
3567     /* Special case -- otherwise we end up with double parens */
3568     int first = unique->uniqueFirst[0];
3569     assert(first >= 0 && first < unique->nSeq);
3570     fprintf(fp, bQuote ? "('%s':0.0" : "(%s:0.0", names[first]);
3571     int iName = unique->alnNext[first];
3572     while (iName >= 0) {
3573       assert(iName < unique->nSeq);
3574       fprintf(fp, bQuote ? ",'%s':0.0" : ",%s:0.0", names[iName]);
3575       iName = unique->alnNext[iName];
3576     }
3577     fprintf(fp,");\n");
3578     return;
3579   }
3580 
3581   typedef struct { int node; int end; } stack_t;
3582   stack_t *stack = (stack_t *)mymalloc(sizeof(stack_t)*NJ->maxnodes);
3583   int stackSize = 1;
3584   stack[0].node = NJ->root;
3585   stack[0].end = 0;
3586 
3587   while(stackSize>0) {
3588     stack_t *last = &stack[stackSize-1];
3589     stackSize--;
3590     /* Save last, as we are about to overwrite it */
3591     int node = last->node;
3592     int end = last->end;
3593 
3594     if (node < NJ->nSeq) {
3595       if (NJ->child[NJ->parent[node]].child[0] != node) fputs(",",fp);
3596       int first = unique->uniqueFirst[node];
3597       assert(first >= 0 && first < unique->nSeq);
3598       /* Print the name, or the subtree of duplicate names */
3599       if (unique->alnNext[first] == -1) {
3600 	fprintf(fp, bQuote ? "'%s'" : "%s", names[first]);
3601       } else {
3602 	fprintf(fp, bQuote ? "('%s':0.0" : "(%s:0.0", names[first]);
3603 	int iName = unique->alnNext[first];
3604 	while (iName >= 0) {
3605 	  assert(iName < unique->nSeq);
3606 	  fprintf(fp, bQuote ? ",'%s':0.0" : ",%s:0.0", names[iName]);
3607 	  iName = unique->alnNext[iName];
3608 	}
3609 	fprintf(fp,")");
3610       }
3611       /* Print the branch length */
3612 #ifdef USE_DOUBLE
3613 #define FP_FORMAT "%.9f"
3614 #else
3615 #define FP_FORMAT "%.5f"
3616 #endif
3617       fprintf(fp, ":" FP_FORMAT, NJ->branchlength[node]);
3618     } else if (end) {
3619       if (node == NJ->root)
3620 	fprintf(fp, ")");
3621       else if (bShowSupport)
3622 	fprintf(fp, ")%.3f:" FP_FORMAT, NJ->support[node], NJ->branchlength[node]);
3623       else
3624 	fprintf(fp, "):" FP_FORMAT, NJ->branchlength[node]);
3625     } else {
3626       if (node != NJ->root && NJ->child[NJ->parent[node]].child[0] != node) fprintf(fp, ",");
3627       fprintf(fp, "(");
3628       stackSize++;
3629       stack[stackSize-1].node = node;
3630       stack[stackSize-1].end = 1;
3631       children_t *c = &NJ->child[node];
3632       /* put children on in reverse order because we use the last one first */
3633       int i;
3634       for (i = c->nChild-1; i >=0; i--) {
3635 	stackSize++;
3636 	stack[stackSize-1].node = c->child[i];
3637 	stack[stackSize-1].end = 0;
3638       }
3639     }
3640   }
3641   fprintf(fp, ";\n");
3642   stack = myfree(stack, sizeof(stack_t)*NJ->maxnodes);
3643 }
3644 
ReadAlignment(FILE * fp,bool bQuote)3645 alignment_t *ReadAlignment(/*IN*/FILE *fp, bool bQuote) {
3646   /* bQuote supports the -quote option */
3647   int nSeq = 0;
3648   int nPos = 0;
3649   char **names = NULL;
3650   char **seqs = NULL;
3651   char buf[BUFFER_SIZE] = "";
3652   if (fgets(buf,sizeof(buf),fp) == NULL) {
3653     fprintf(stderr, "Error reading header line\n");
3654     exit(1);
3655   }
3656   int nSaved = 100;
3657   if (buf[0] == '>') {
3658     /* FASTA, truncate names at any of these */
3659     char *nameStop = bQuote ? "'\t\r\n" : "(),: \t\r\n";
3660     char *seqSkip = " \t\r\n";	/* skip these characters in the sequence */
3661     seqs = (char**)mymalloc(sizeof(char*) * nSaved);
3662     names = (char**)mymalloc(sizeof(char*) * nSaved);
3663 
3664     do {
3665       /* loop over lines */
3666       if (buf[0] == '>') {
3667 	/* truncate the name */
3668 	char *p, *q;
3669 	for (p = buf+1; *p != '\0'; p++) {
3670 	  for (q = nameStop; *q != '\0'; q++) {
3671 	    if (*p == *q) {
3672 	      *p = '\0';
3673 	      break;
3674 	    }
3675 	  }
3676 	  if (*p == '\0') break;
3677 	}
3678 
3679 	/* allocate space for another sequence */
3680 	nSeq++;
3681 	if (nSeq > nSaved) {
3682 	  int nNewSaved = nSaved*2;
3683 	  seqs = myrealloc(seqs,sizeof(char*)*nSaved,sizeof(char*)*nNewSaved, /*copy*/false);
3684 	  names = myrealloc(names,sizeof(char*)*nSaved,sizeof(char*)*nNewSaved, /*copy*/false);
3685 	  nSaved = nNewSaved;
3686 	}
3687 	names[nSeq-1] = (char*)mymemdup(buf+1,strlen(buf));
3688 	seqs[nSeq-1] = NULL;
3689       } else {
3690 	/* count non-space characters and append to sequence */
3691 	int nKeep = 0;
3692 	char *p, *q;
3693 	for (p=buf; *p != '\0'; p++) {
3694 	  for (q=seqSkip; *q != '\0'; q++) {
3695 	    if (*p == *q)
3696 	      break;
3697 	  }
3698 	  if (*p != *q)
3699 	    nKeep++;
3700 	}
3701 	int nOld = (seqs[nSeq-1] == NULL) ? 0 : strlen(seqs[nSeq-1]);
3702 	seqs[nSeq-1] = (char*)myrealloc(seqs[nSeq-1], nOld, nOld+nKeep+1, /*copy*/false);
3703 	if (nOld+nKeep > nPos)
3704 	  nPos = nOld + nKeep;
3705 	char *out = seqs[nSeq-1] + nOld;
3706 	for (p=buf; *p != '\0'; p++) {
3707 	  for (q=seqSkip; *q != '\0'; q++) {
3708 	    if (*p == *q)
3709 	      break;
3710 	  }
3711 	  if (*p != *q) {
3712 	    *out = *p;
3713 	    out++;
3714 	  }
3715 	}
3716 	assert(out-seqs[nSeq-1] == nKeep + nOld);
3717 	*out = '\0';
3718       }
3719     } while(fgets(buf,sizeof(buf),fp) != NULL);
3720 
3721     if (seqs[nSeq-1] == NULL) {
3722       fprintf(stderr, "No sequence data for last entry %s\n",names[nSeq-1]);
3723       exit(1);
3724     }
3725     names = myrealloc(names,sizeof(char*)*nSaved,sizeof(char*)*nSeq, /*copy*/false);
3726     seqs = myrealloc(seqs,sizeof(char*)*nSaved,sizeof(char*)*nSeq, /*copy*/false);
3727   } else {
3728     /* PHYLIP interleaved-like format
3729        Allow arbitrary length names, require spaces between names and sequences
3730        Allow multiple alignments, either separated by a single empty line (e.g. seqboot output)
3731        or not.
3732      */
3733     if (buf[0] == '\n' || buf[0] == '\r') {
3734       if (fgets(buf,sizeof(buf),fp) == NULL) {
3735 	fprintf(stderr, "Empty header line followed by EOF\n");
3736 	exit(1);
3737       }
3738     }
3739     if (sscanf(buf, "%d%d", &nSeq, &nPos) != 2
3740       || nSeq < 1 || nPos < 1) {
3741       fprintf(stderr, "Error parsing header line:%s\n", buf);
3742       exit(1);
3743     }
3744     names = (char **)mymalloc(sizeof(char*) * nSeq);
3745     seqs = (char **)mymalloc(sizeof(char*) * nSeq);
3746     nSaved = nSeq;
3747 
3748     int i;
3749     for (i = 0; i < nSeq; i++) {
3750       names[i] = NULL;
3751       seqs[i] = (char *)mymalloc(nPos+1);	/* null-terminate */
3752       seqs[i][0] = '\0';
3753     }
3754     int iSeq = 0;
3755 
3756     while(fgets(buf,sizeof(buf),fp)) {
3757       if ((buf[0] == '\n' || buf[0] == '\r') && (iSeq == nSeq || iSeq == 0)) {
3758 	iSeq = 0;
3759       } else {
3760 	int j = 0; /* character just past end of name */
3761 	if (buf[0] == ' ') {
3762 	  if (names[iSeq] == NULL) {
3763 	    fprintf(stderr, "No name in phylip line %s", buf);
3764 	    exit(1);
3765 	  }
3766 	} else {
3767 	  while (buf[j] != '\n' && buf[j] != '\0' && buf[j] != ' ')
3768 	    j++;
3769 	  if (buf[j] != ' ' || j == 0) {
3770 	    fprintf(stderr, "No sequence in phylip line %s", buf);
3771 	    exit(1);
3772 	  }
3773 	  if (iSeq >= nSeq) {
3774 	    fprintf(stderr, "No empty line between sequence blocks (is the sequence count wrong?)\n");
3775 	    exit(1);
3776 	  }
3777 	  if (names[iSeq] == NULL) {
3778 	    /* save the name */
3779 	    names[iSeq] = (char *)mymalloc(j+1);
3780 	    int k;
3781 	    for (k = 0; k < j; k++) names[iSeq][k] = buf[k];
3782 	    names[iSeq][j] = '\0';
3783 	  } else {
3784 	    /* check the name */
3785 	    int k;
3786 	    int match = 1;
3787 	    for (k = 0; k < j; k++) {
3788 	      if (names[iSeq][k] != buf[k]) {
3789 		match = 0;
3790 		break;
3791 	      }
3792 	    }
3793 	    if (!match || names[iSeq][j] != '\0') {
3794 	      fprintf(stderr, "Wrong name in phylip line %s\nExpected %s\n", buf, names[iSeq]);
3795 	      exit(1);
3796 	    }
3797 	  }
3798 	}
3799 	int seqlen = strlen(seqs[iSeq]);
3800 	for (; buf[j] != '\n' && buf[j] != '\0'; j++) {
3801 	  if (buf[j] != ' ') {
3802 	    if (seqlen >= nPos) {
3803 	      fprintf(stderr, "Too many characters (expected %d) for sequence named %s\nSo far have:\n%s\n",
3804 		      nPos, names[iSeq], seqs[iSeq]);
3805 	      exit(1);
3806 	    }
3807 	    seqs[iSeq][seqlen++] = toupper(buf[j]);
3808 	  }
3809 	}
3810 	seqs[iSeq][seqlen] = '\0'; /* null-terminate */
3811 	if(verbose>10) fprintf(stderr,"Read iSeq %d name %s seqsofar %s\n", iSeq, names[iSeq], seqs[iSeq]);
3812 	iSeq++;
3813 	if (iSeq == nSeq && strlen(seqs[0]) == nPos)
3814 	  break; /* finished alignment */
3815       } /* end else non-empty phylip line */
3816     }
3817     if (iSeq != nSeq && iSeq != 0) {
3818       fprintf(stderr, "Wrong number of sequences: expected %d\n", nSeq);
3819       exit(1);
3820     }
3821   }
3822   /* Check lengths of sequences */
3823   int i;
3824   for (i = 0; i < nSeq; i++) {
3825     int seqlen = strlen(seqs[i]);
3826     if (seqlen != nPos) {
3827       fprintf(stderr, "Wrong number of characters for %s: expected %d but have %d instead.\n"
3828 	      "This sequence may be truncated, or another sequence may be too long.\n",
3829 	      names[i], nPos, seqlen);
3830       exit(1);
3831     }
3832   }
3833   /* Replace "." with "-" and warn if we find any */
3834   /* If nucleotide sequences, replace U with T and N with X */
3835   bool findDot = false;
3836   for (i = 0; i < nSeq; i++) {
3837     char *p;
3838     for (p = seqs[i]; *p != '\0'; p++) {
3839       if (*p == '.') {
3840 	findDot = true;
3841 	*p = '-';
3842       }
3843       if (nCodes == 4 && *p == 'U')
3844 	*p = 'T';
3845       if (nCodes == 4 && *p == 'N')
3846 	*p = 'X';
3847     }
3848   }
3849   if (findDot)
3850     fprintf(stderr, "Warning! Found \".\" character(s). These are treated as gaps\n");
3851 
3852   if (ferror(fp)) {
3853     fprintf(stderr, "Error reading input file\n");
3854     exit(1);
3855   }
3856 
3857   alignment_t *align = (alignment_t*)mymalloc(sizeof(alignment_t));
3858   align->nSeq = nSeq;
3859   align->nPos = nPos;
3860   align->names = names;
3861   align->seqs = seqs;
3862   align->nSaved = nSaved;
3863   return(align);
3864 }
3865 
FreeAlignmentSeqs(alignment_t * aln)3866 void FreeAlignmentSeqs(/*IN/OUT*/alignment_t *aln) {
3867   assert(aln != NULL);
3868   int i;
3869   for (i = 0; i < aln->nSeq; i++)
3870     aln->seqs[i] = myfree(aln->seqs[i], aln->nPos+1);
3871 }
3872 
FreeAlignment(alignment_t * aln)3873 alignment_t *FreeAlignment(alignment_t *aln) {
3874   if(aln==NULL)
3875     return(NULL);
3876   int i;
3877   for (i = 0; i < aln->nSeq; i++) {
3878     aln->names[i] = myfree(aln->names[i],strlen(aln->names[i])+1);
3879     aln->seqs[i] = myfree(aln->seqs[i], aln->nPos+1);
3880   }
3881   aln->names = myfree(aln->names, sizeof(char*)*aln->nSaved);
3882   aln->seqs = myfree(aln->seqs, sizeof(char*)*aln->nSaved);
3883   myfree(aln, sizeof(alignment_t));
3884   return(NULL);
3885 }
3886 
AlnToConstraints(alignment_t * constraints,uniquify_t * unique,hashstrings_t * hashnames)3887 char **AlnToConstraints(alignment_t *constraints, uniquify_t *unique, hashstrings_t *hashnames) {
3888   /* look up constraints as names and map to unique-space */
3889   char **  uniqConstraints = (char**)mymalloc(sizeof(char*) * unique->nUnique);
3890   int i;
3891   for (i = 0; i < unique->nUnique; i++)
3892     uniqConstraints[i] = NULL;
3893   for (i = 0; i < constraints->nSeq; i++) {
3894     char *name = constraints->names[i];
3895     char *constraintSeq = constraints->seqs[i];
3896     hashiterator_t hi = FindMatch(hashnames,name);
3897     if (HashCount(hashnames,hi) != 1) {
3898       fprintf(stderr, "Sequence %s from constraints file is not in the alignment\n", name);
3899       exit(1);
3900     }
3901     int iSeqNonunique = HashFirst(hashnames,hi);
3902     assert(iSeqNonunique >= 0 && iSeqNonunique < unique->nSeq);
3903     int iSeqUnique = unique->alnToUniq[iSeqNonunique];
3904     assert(iSeqUnique >= 0 && iSeqUnique < unique->nUnique);
3905     if (uniqConstraints[iSeqUnique] != NULL) {
3906       /* Already set a constraint for this group of sequences!
3907 	 Warn that we are ignoring this one unless the constraints match */
3908       if (strcmp(uniqConstraints[iSeqUnique],constraintSeq) != 0) {
3909 	fprintf(stderr,
3910 		"Warning: ignoring constraints for %s:\n%s\n"
3911 		"Another sequence has the same sequence but different constraints\n",
3912 		name, constraintSeq);
3913       }
3914     } else {
3915       uniqConstraints[iSeqUnique] = constraintSeq;
3916     }
3917   }
3918   return(uniqConstraints);
3919 }
3920 
3921 
SeqToProfile(NJ_t * NJ,char * seq,int nPos,char * constraintSeq,int nConstraints,int iNode,unsigned long counts[256])3922 profile_t *SeqToProfile(/*IN/OUT*/NJ_t *NJ,
3923 			char *seq, int nPos,
3924 			/*OPTIONAL*/char *constraintSeq, int nConstraints,
3925 			int iNode,
3926 			unsigned long counts[256]) {
3927   static unsigned char charToCode[256];
3928   static int codeSet = 0;
3929   int c, i;
3930 
3931   if (!codeSet) {
3932     for (c = 0; c < 256; c++) {
3933       charToCode[c] = nCodes;
3934     }
3935     for (i = 0; codesString[i]; i++) {
3936       charToCode[codesString[i]] = i;
3937       charToCode[tolower(codesString[i])] = i;
3938     }
3939     charToCode['-'] = NOCODE;
3940     codeSet=1;
3941   }
3942 
3943   assert(strlen(seq) == nPos);
3944   profile_t *profile = NewProfile(nPos,nConstraints);
3945 
3946   for (i = 0; i < nPos; i++) {
3947     unsigned int character = (unsigned int) seq[i];
3948     counts[character]++;
3949     c = charToCode[character];
3950     if(verbose>10 && i < 2) fprintf(stderr,"pos %d char %c code %d\n", i, seq[i], c);
3951     /* treat unknowns as gaps */
3952     if (c == nCodes || c == NOCODE) {
3953       profile->codes[i] = NOCODE;
3954       profile->weights[i] = 0.0;
3955     } else {
3956       profile->codes[i] = c;
3957       profile->weights[i] = 1.0;
3958     }
3959   }
3960   if (nConstraints > 0) {
3961     for (i = 0; i < nConstraints; i++) {
3962       profile->nOn[i] = 0;
3963       profile->nOff[i] = 0;
3964     }
3965     bool bWarn = false;
3966     if (constraintSeq != NULL) {
3967       assert(strlen(constraintSeq) == nConstraints);
3968       for (i = 0; i < nConstraints; i++) {
3969 	if (constraintSeq[i] == '1') {
3970 	  profile->nOn[i] = 1;
3971 	} else if (constraintSeq[i] == '0') {
3972 	  profile->nOff[i] = 1;
3973 	} else if (constraintSeq[i] != '-') {
3974 	  if (!bWarn) {
3975 	    fprintf(stderr, "Constraint characters in unique sequence %d replaced with gap:", iNode+1);
3976 	    bWarn = true;
3977 	  }
3978 	  fprintf(stderr, " %c%d", constraintSeq[i], i+1);
3979 	  /* For the benefit of ConstraintSequencePenalty -- this is a bit of a hack, as
3980 	     this modifies the value read from the alignment
3981 	  */
3982 	  constraintSeq[i] = '-';
3983 	}
3984       }
3985       if (bWarn)
3986 	fprintf(stderr, "\n");
3987     }
3988   }
3989   return profile;
3990 }
3991 
SeqDist(unsigned char * codes1,unsigned char * codes2,int nPos,distance_matrix_t * dmat,besthit_t * hit)3992 void SeqDist(unsigned char *codes1, unsigned char *codes2, int nPos,
3993 	     distance_matrix_t *dmat,
3994 	     /*OUT*/besthit_t *hit) {
3995   double top = 0;		/* summed over positions */
3996   int nUse = 0;
3997   int i;
3998   if (dmat==NULL) {
3999     int nDiff = 0;
4000     for (i = 0; i < nPos; i++) {
4001       if (codes1[i] != NOCODE && codes2[i] != NOCODE) {
4002 	nUse++;
4003 	if (codes1[i] != codes2[i]) nDiff++;
4004       }
4005     }
4006     top = (double)nDiff;
4007   } else {
4008     for (i = 0; i < nPos; i++) {
4009       if (codes1[i] != NOCODE && codes2[i] != NOCODE) {
4010 	nUse++;
4011 	top += dmat->distances[(unsigned int)codes1[i]][(unsigned int)codes2[i]];
4012       }
4013     }
4014   }
4015   hit->weight = (double)nUse;
4016   hit->dist = nUse > 0 ? top/(double)nUse : 1.0;
4017   seqOps++;
4018 }
4019 
CorrectedPairDistances(profile_t ** profiles,int nProfiles,distance_matrix_t * distance_matrix,int nPos,double * distances)4020 void CorrectedPairDistances(profile_t **profiles, int nProfiles,
4021 			    /*OPTIONAL*/distance_matrix_t *distance_matrix,
4022 			    int nPos,
4023 			    /*OUT*/double *distances) {
4024   assert(distances != NULL);
4025   assert(profiles != NULL);
4026   assert(nProfiles>1 && nProfiles <= 4);
4027   besthit_t hit[6];
4028   int iHit,i,j;
4029 
4030   for (iHit=0, i=0; i < nProfiles; i++) {
4031     for (j=i+1; j < nProfiles; j++, iHit++) {
4032       ProfileDist(profiles[i],profiles[j],nPos,distance_matrix,/*OUT*/&hit[iHit]);
4033       distances[iHit] = hit[iHit].dist;
4034     }
4035   }
4036   if (pseudoWeight > 0) {
4037     /* Estimate the prior distance */
4038     double dTop = 0;
4039     double dBottom = 0;
4040     for (iHit=0; iHit < (nProfiles*(nProfiles-1))/2; iHit++) {
4041       dTop += hit[iHit].dist * hit[iHit].weight;
4042       dBottom += hit[iHit].weight;
4043     }
4044     double prior = (dBottom > 0.01) ? dTop/dBottom : 3.0;
4045     for (iHit=0; iHit < (nProfiles*(nProfiles-1))/2; iHit++)
4046       distances[iHit] = (distances[iHit] * hit[iHit].weight + prior * pseudoWeight)
4047 	/ (hit[iHit].weight + pseudoWeight);
4048   }
4049   if (logdist) {
4050     for (iHit=0; iHit < (nProfiles*(nProfiles-1))/2; iHit++)
4051       distances[iHit] = LogCorrect(distances[iHit]);
4052   }
4053 }
4054 
4055 /* During the neighbor-joining phase, a join only violates our constraints if
4056    node1, node2, and other are all represented in the constraint
4057    and if one of the 3 is split and the other two do not agree
4058  */
JoinConstraintPenalty(NJ_t * NJ,int node1,int node2)4059 int JoinConstraintPenalty(/*IN*/NJ_t *NJ, int node1, int node2) {
4060   if (NJ->nConstraints == 0)
4061     return(0.0);
4062   int penalty = 0;
4063   int iC;
4064   for (iC = 0; iC < NJ->nConstraints; iC++)
4065     penalty += JoinConstraintPenaltyPiece(NJ, node1, node2, iC);
4066   return(penalty);
4067 }
4068 
JoinConstraintPenaltyPiece(NJ_t * NJ,int node1,int node2,int iC)4069 int JoinConstraintPenaltyPiece(NJ_t *NJ, int node1, int node2, int iC) {
4070   profile_t *pOut = NJ->outprofile;
4071   profile_t *p1 = NJ->profiles[node1];
4072   profile_t *p2 = NJ->profiles[node2];
4073   int nOn1 = p1->nOn[iC];
4074   int nOff1 = p1->nOff[iC];
4075   int nOn2 = p2->nOn[iC];
4076   int nOff2 = p2->nOff[iC];
4077   int nOnOut = pOut->nOn[iC] - nOn1 - nOn2;
4078   int nOffOut = pOut->nOff[iC] - nOff1 - nOff2;
4079 
4080   if ((nOn1+nOff1) > 0 && (nOn2+nOff2) > 0 && (nOnOut+nOffOut) > 0) {
4081     /* code is -1 for split, 0 for off, 1 for on */
4082     int code1 = (nOn1 > 0 && nOff1 > 0) ? -1 : (nOn1 > 0 ? 1 : 0);
4083     int code2 = (nOn2 > 0 && nOff2 > 0) ? -1 : (nOn2 > 0 ? 1 : 0);
4084     int code3 = (nOnOut > 0 && nOffOut) > 0 ? -1 : (nOnOut > 0 ? 1 : 0);
4085     int nSplit = (code1 == -1 ? 1 : 0) + (code2 == -1 ? 1 : 0) + (code3 == -1 ? 1 : 0);
4086     int nOn = (code1 == 1 ? 1 : 0) + (code2 == 1 ? 1 : 0) + (code3 == 1 ? 1 : 0);
4087     if (nSplit == 1 && nOn == 1)
4088       return(SplitConstraintPenalty(nOn1+nOn2, nOff1+nOff2, nOnOut, nOffOut));
4089   }
4090   /* else */
4091   return(0);
4092 }
4093 
QuartetConstraintPenalties(profile_t * profiles[4],int nConstraints,double penalty[3])4094 void QuartetConstraintPenalties(profile_t *profiles[4], int nConstraints, /*OUT*/double penalty[3]) {
4095   int i;
4096   for (i=0; i < 3; i++)
4097     penalty[i] = 0.0;
4098   if(nConstraints == 0)
4099     return;
4100   int iC;
4101   for (iC = 0; iC < nConstraints; iC++) {
4102     double part[3];
4103     if (QuartetConstraintPenaltiesPiece(profiles, iC, /*OUT*/part)) {
4104       for (i=0;i<3;i++)
4105 	penalty[i] += part[i];
4106 
4107       if (verbose>2
4108 	  && (fabs(part[ABvsCD]-part[ACvsBD]) > 0.001 || fabs(part[ABvsCD]-part[ADvsBC]) > 0.001))
4109 	fprintf(stderr, "Constraint Penalties at %d: ABvsCD %.3f ACvsBD %.3f ADvsBC %.3f %d/%d %d/%d %d/%d %d/%d\n",
4110 		iC, part[ABvsCD], part[ACvsBD], part[ADvsBC],
4111 		profiles[0]->nOn[iC], profiles[0]->nOff[iC],
4112 		profiles[1]->nOn[iC], profiles[1]->nOff[iC],
4113 		profiles[2]->nOn[iC], profiles[2]->nOff[iC],
4114 		profiles[3]->nOn[iC], profiles[3]->nOff[iC]);
4115     }
4116   }
4117   if (verbose>2)
4118     fprintf(stderr, "Total Constraint Penalties: ABvsCD %.3f ACvsBD %.3f ADvsBC %.3f\n",
4119 	    penalty[ABvsCD], penalty[ACvsBD], penalty[ADvsBC]);
4120 }
4121 
PairConstraintDistance(int nOn1,int nOff1,int nOn2,int nOff2)4122 double PairConstraintDistance(int nOn1, int nOff1, int nOn2, int nOff2) {
4123   double f1 = nOn1/(double)(nOn1+nOff1);
4124   double f2 = nOn2/(double)(nOn2+nOff2);
4125   /* 1 - f1 * f2 - (1-f1)*(1-f2) = 1 - f1 * f2 - 1 + f1 + f2 - f1 * f2 */
4126   return(f1 + f2 - 2.0 * f1 * f2);
4127 }
4128 
QuartetConstraintPenaltiesPiece(profile_t * profiles[4],int iC,double piece[3])4129 bool QuartetConstraintPenaltiesPiece(profile_t *profiles[4], int iC, /*OUT*/double piece[3]) {
4130   int nOn[4];
4131   int nOff[4];
4132   int i;
4133   int nSplit = 0;
4134   int nPlus = 0;
4135   int nMinus = 0;
4136 
4137   for (i=0; i < 4; i++) {
4138     nOn[i] = profiles[i]->nOn[iC];
4139     nOff[i] = profiles[i]->nOff[iC];
4140     if (nOn[i] + nOff[i] == 0)
4141       return(false);		/* ignore */
4142     else if (nOn[i] > 0 && nOff[i] > 0)
4143       nSplit++;
4144     else if (nOn[i] > 0)
4145       nPlus++;
4146     else
4147       nMinus++;
4148   }
4149   /* If just one of them is split or on the other side and the others all agree, also ignore */
4150   if (nPlus >= 3 || nMinus >= 3)
4151     return(false);
4152   piece[ABvsCD] = constraintWeight
4153     * (PairConstraintDistance(nOn[0],nOff[0],nOn[1],nOff[1])
4154        + PairConstraintDistance(nOn[2],nOff[2],nOn[3],nOff[3]));
4155   piece[ACvsBD] = constraintWeight
4156     * (PairConstraintDistance(nOn[0],nOff[0],nOn[2],nOff[2])
4157        + PairConstraintDistance(nOn[1],nOff[1],nOn[3],nOff[3]));
4158   piece[ADvsBC] = constraintWeight
4159     * (PairConstraintDistance(nOn[0],nOff[0],nOn[3],nOff[3])
4160        + PairConstraintDistance(nOn[2],nOff[2],nOn[1],nOff[1]));
4161   return(true);
4162 }
4163 
4164 /* Minimum number of constrained leaves that need to be moved
4165    to satisfy the constraint (or 0 if constraint is satisfied)
4166    Defining it this way should ensure that SPR moves that break
4167    constraints get a penalty
4168 */
SplitConstraintPenalty(int nOn1,int nOff1,int nOn2,int nOff2)4169 int SplitConstraintPenalty(int nOn1, int nOff1, int nOn2, int nOff2) {
4170   return(nOn1 + nOff2 < nOn2 + nOff1 ?
4171 	 (nOn1 < nOff2 ? nOn1 : nOff2)
4172 	 : (nOn2 < nOff1 ? nOn2 : nOff1));
4173 }
4174 
SplitViolatesConstraint(profile_t * profiles[4],int iConstraint)4175 bool SplitViolatesConstraint(profile_t *profiles[4], int iConstraint) {
4176   int i;
4177   int codes[4]; /* 0 for off, 1 for on, -1 for split (quit if not constrained at all) */
4178   for (i = 0; i < 4; i++) {
4179     if (profiles[i]->nOn[iConstraint] + profiles[i]->nOff[iConstraint] == 0)
4180       return(false);
4181     else if (profiles[i]->nOn[iConstraint] > 0 && profiles[i]->nOff[iConstraint] == 0)
4182       codes[i] = 1;
4183     else if (profiles[i]->nOn[iConstraint] == 0 && profiles[i]->nOff[iConstraint] > 0)
4184       codes[i] = 0;
4185     else
4186       codes[i] = -1;
4187   }
4188   int n0 = 0;
4189   int n1 = 0;
4190   for (i = 0; i < 4; i++) {
4191     if (codes[i] == 0)
4192       n0++;
4193     else if (codes[i] == 1)
4194       n1++;
4195   }
4196   /* 3 on one side means no violation, even if other is code -1
4197      otherwise must have code != -1 and agreement on the split
4198    */
4199   if (n0 >= 3 || n1 >= 3)
4200     return(false);
4201   if (n0==2 && n1==2 && codes[0] == codes[1] && codes[2] == codes[3])
4202     return(false);
4203   return(true);
4204 }
4205 
LogCorrect(double dist)4206 double LogCorrect(double dist) {
4207   const double maxscore = 3.0;
4208   if (nCodes == 4 && !useMatrix) { /* Jukes-Cantor */
4209     dist = dist < 0.74 ? -0.75*log(1.0 - dist * 4.0/3.0) : maxscore;
4210   } else {			/* scoredist-like */
4211     dist = dist < 0.99 ? -1.3*log(1.0 - dist) : maxscore;
4212   }
4213   return (dist < maxscore ? dist : maxscore);
4214 }
4215 
4216 /* A helper function -- f1 and f2 can be NULL if the corresponding code != NOCODE
4217 */
ProfileDistPiece(unsigned int code1,unsigned int code2,numeric_t * f1,numeric_t * f2,distance_matrix_t * dmat,numeric_t * codeDist2)4218 double ProfileDistPiece(unsigned int code1, unsigned int code2,
4219 			numeric_t *f1, numeric_t *f2,
4220 			/*OPTIONAL*/distance_matrix_t *dmat,
4221 			/*OPTIONAL*/numeric_t *codeDist2) {
4222   if (dmat) {
4223     if (code1 != NOCODE && code2 != NOCODE) { /* code1 vs code2 */
4224       return(dmat->distances[code1][code2]);
4225     } else if (codeDist2 != NULL && code1 != NOCODE) { /* code1 vs. codeDist2 */
4226       return(codeDist2[code1]);
4227     } else { /* f1 vs f2 */
4228       if (f1 == NULL) {
4229 	if(code1 == NOCODE) return(10.0);
4230 	f1 = &dmat->codeFreq[code1][0];
4231       }
4232       if (f2 == NULL) {
4233 	if(code2 == NOCODE) return(10.0);
4234 	f2 = &dmat->codeFreq[code2][0];
4235       }
4236       return(vector_multiply3_sum(f1,f2,dmat->eigenval,nCodes));
4237     }
4238   } else {
4239     /* no matrix */
4240     if (code1 != NOCODE) {
4241       if (code2 != NOCODE) {
4242 	return(code1 == code2 ? 0.0 : 1.0); /* code1 vs code2 */
4243       } else {
4244 	if(f2 == NULL) return(10.0);
4245 	return(1.0 - f2[code1]); /* code1 vs. f2 */
4246       }
4247     } else {
4248       if (code2 != NOCODE) {
4249 	if(f1 == NULL) return(10.0);
4250 	return(1.0 - f1[code2]); /* f1 vs code2 */
4251       } else { /* f1 vs. f2 */
4252 	if (f1 == NULL || f2 == NULL) return(10.0);
4253 	double piece = 1.0;
4254 	int k;
4255 	for (k = 0; k < nCodes; k++) {
4256 	  piece -= f1[k] * f2[k];
4257 	}
4258 	return(piece);
4259       }
4260     }
4261   }
4262   assert(0);
4263 }
4264 
4265 /* E.g. GET_FREQ(profile,iPos,iVector)
4266    Gets the next element of the vectors (and updates iVector), or
4267    returns NULL if we didn't store a vector
4268 */
4269 #define GET_FREQ(P,I,IVECTOR) \
4270 (P->weights[I] > 0 && P->codes[I] == NOCODE ? &P->vectors[nCodes*(IVECTOR++)] : NULL)
4271 
ProfileDist(profile_t * profile1,profile_t * profile2,int nPos,distance_matrix_t * dmat,besthit_t * hit)4272 void ProfileDist(profile_t *profile1, profile_t *profile2, int nPos,
4273 		 /*OPTIONAL*/distance_matrix_t *dmat,
4274 		 /*OUT*/besthit_t *hit) {
4275   double top = 0;
4276   double denom = 0;
4277   int iFreq1 = 0;
4278   int iFreq2 = 0;
4279   int i = 0;
4280   for (i = 0; i < nPos; i++) {
4281       numeric_t *f1 = GET_FREQ(profile1,i,/*IN/OUT*/iFreq1);
4282       numeric_t *f2 = GET_FREQ(profile2,i,/*IN/OUT*/iFreq2);
4283       if (profile1->weights[i] > 0 && profile2->weights[i] > 0) {
4284 	double weight = profile1->weights[i] * profile2->weights[i];
4285 	denom += weight;
4286 	double piece = ProfileDistPiece(profile1->codes[i],profile2->codes[i],f1,f2,dmat,
4287 					profile2->codeDist ? &profile2->codeDist[i*nCodes] : NULL);
4288 	top += weight * piece;
4289       }
4290   }
4291   assert(iFreq1 == profile1->nVectors);
4292   assert(iFreq2 == profile2->nVectors);
4293   hit->weight = denom > 0 ? denom : 0.01; /* 0.01 is an arbitrarily low value of weight (normally >>1) */
4294   hit->dist = denom > 0 ? top/denom : 1;
4295   profileOps++;
4296 }
4297 
4298 /* This should not be called if the update weight is 0, as
4299    in that case code==NOCODE and in=NULL is possible, and then
4300    it will fail.
4301 */
AddToFreq(numeric_t * fOut,double weight,unsigned int codeIn,numeric_t * fIn,distance_matrix_t * dmat)4302 void AddToFreq(/*IN/OUT*/numeric_t *fOut,
4303 	       double weight,
4304 	       unsigned int codeIn, /*OPTIONAL*/numeric_t *fIn,
4305 	       /*OPTIONAL*/distance_matrix_t *dmat) {
4306   assert(fOut != NULL);
4307   if (fIn != NULL) {
4308     vector_add_mult(fOut, fIn, weight, nCodes);
4309   } else if (dmat) {
4310     assert(codeIn != NOCODE);
4311     vector_add_mult(fOut, dmat->codeFreq[codeIn], weight, nCodes);
4312   } else {
4313     assert(codeIn != NOCODE);
4314     fOut[codeIn] += weight;
4315   }
4316 }
4317 
SetProfile(NJ_t * NJ,int node,double weight1)4318 void SetProfile(/*IN/OUT*/NJ_t *NJ, int node, double weight1) {
4319     children_t *c = &NJ->child[node];
4320     assert(c->nChild == 2);
4321     assert(NJ->profiles[c->child[0]] != NULL);
4322     assert(NJ->profiles[c->child[1]] != NULL);
4323     if (NJ->profiles[node] != NULL)
4324       FreeProfile(NJ->profiles[node], NJ->nPos, NJ->nConstraints);
4325     NJ->profiles[node] = AverageProfile(NJ->profiles[c->child[0]],
4326 					NJ->profiles[c->child[1]],
4327 					NJ->nPos, NJ->nConstraints,
4328 					NJ->distance_matrix,
4329 					weight1);
4330 }
4331 
4332 /* bionjWeight is the weight of the first sequence (between 0 and 1),
4333    or -1 to do the average.
4334    */
AverageProfile(profile_t * profile1,profile_t * profile2,int nPos,int nConstraints,distance_matrix_t * dmat,double bionjWeight)4335 profile_t *AverageProfile(profile_t *profile1, profile_t *profile2,
4336 			  int nPos, int nConstraints,
4337 			  distance_matrix_t *dmat,
4338 			  double bionjWeight) {
4339   int i;
4340   if (bionjWeight < 0) {
4341     bionjWeight = 0.5;
4342   }
4343 
4344   /* First, set codes and weights and see how big vectors will be */
4345   profile_t *out = NewProfile(nPos, nConstraints);
4346 
4347   for (i = 0; i < nPos; i++) {
4348     out->weights[i] = bionjWeight * profile1->weights[i]
4349       + (1-bionjWeight) * profile2->weights[i];
4350     out->codes[i] = NOCODE;
4351     if (out->weights[i] > 0) {
4352       if (profile1->weights[i] > 0 && profile1->codes[i] != NOCODE
4353 	  && (profile2->weights[i] <= 0 || profile1->codes[i] == profile2->codes[i])) {
4354 	out->codes[i] = profile1->codes[i];
4355       } else if (profile1->weights[i] <= 0
4356 		 && profile2->weights[i] > 0
4357 		 && profile2->codes[i] != NOCODE) {
4358 	out->codes[i] = profile2->codes[i];
4359       }
4360       if (out->codes[i] == NOCODE) out->nVectors++;
4361     }
4362   }
4363 
4364   /* Allocate and set the vectors */
4365   out->vectors = (numeric_t*)mymalloc(sizeof(numeric_t)*nCodes*out->nVectors);
4366   for (i = 0; i < nCodes * out->nVectors; i++) out->vectors[i] = 0;
4367   nProfileFreqAlloc += out->nVectors;
4368   nProfileFreqAvoid += nPos - out->nVectors;
4369   int iFreqOut = 0;
4370   int iFreq1 = 0;
4371   int iFreq2 = 0;
4372   for (i=0; i < nPos; i++) {
4373     numeric_t *f = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
4374     numeric_t *f1 = GET_FREQ(profile1,i,/*IN/OUT*/iFreq1);
4375     numeric_t *f2 = GET_FREQ(profile2,i,/*IN/OUT*/iFreq2);
4376     if (f != NULL) {
4377       if (profile1->weights[i] > 0)
4378 	AddToFreq(/*IN/OUT*/f, profile1->weights[i] * bionjWeight,
4379 		  profile1->codes[i], f1, dmat);
4380       if (profile2->weights[i] > 0)
4381 	AddToFreq(/*IN/OUT*/f, profile2->weights[i] * (1.0-bionjWeight),
4382 		  profile2->codes[i], f2, dmat);
4383       NormalizeFreq(/*IN/OUT*/f, dmat);
4384     } /* end if computing f */
4385     if (verbose > 10 && i < 5) {
4386       fprintf(stderr,"Average profiles: pos %d in-w1 %f in-w2 %f bionjWeight %f to weight %f code %d\n",
4387 	      i, profile1->weights[i], profile2->weights[i], bionjWeight,
4388 	      out->weights[i], out->codes[i]);
4389       if (f!= NULL) {
4390 	int k;
4391 	for (k = 0; k < nCodes; k++)
4392 	  fprintf(stderr, "\t%c:%f", codesString[k], f ? f[k] : -1.0);
4393 	fprintf(stderr,"\n");
4394       }
4395     }
4396   } /* end loop over positions */
4397   assert(iFreq1 == profile1->nVectors);
4398   assert(iFreq2 == profile2->nVectors);
4399   assert(iFreqOut == out->nVectors);
4400 
4401   /* compute total constraints */
4402   for (i = 0; i < nConstraints; i++) {
4403     out->nOn[i] = profile1->nOn[i] + profile2->nOn[i];
4404     out->nOff[i] = profile1->nOff[i] + profile2->nOff[i];
4405   }
4406   profileAvgOps++;
4407   return(out);
4408 }
4409 
4410 /* Make the (unrotated) frequencies sum to 1
4411    Simply dividing by total_weight is not ideal because of roundoff error
4412    So compute total_freq instead
4413 */
NormalizeFreq(numeric_t * freq,distance_matrix_t * dmat)4414 void NormalizeFreq(/*IN/OUT*/numeric_t *freq, distance_matrix_t *dmat) {
4415   double total_freq = 0;
4416   int k;
4417   if (dmat != NULL) {
4418     /* The total frequency is dot_product(true_frequencies, 1)
4419        So we rotate the 1 vector by eigeninv (stored in eigentot)
4420     */
4421     total_freq = vector_multiply_sum(freq, dmat->eigentot, nCodes);
4422   } else {
4423     for (k = 0; k < nCodes; k++)
4424       total_freq += freq[k];
4425   }
4426   if (total_freq > fPostTotalTolerance) {
4427     numeric_t inverse_weight = 1.0/total_freq;
4428     vector_multiply_by(/*IN/OUT*/freq, inverse_weight, nCodes);
4429   } else {
4430     /* This can happen if we are in a very low-weight region, e.g. if a mostly-gap position gets weighted down
4431        repeatedly; just set them all to arbitrary but legal values */
4432     if (dmat == NULL) {
4433       for (k = 0; k < nCodes; k++)
4434 	freq[k] = 1.0/nCodes;
4435     } else {
4436       for (k = 0; k < nCodes; k++)
4437 	freq[k] = dmat->codeFreq[0][k];
4438     }
4439   }
4440 }
4441 
4442 /* OutProfile() computes the out-profile */
OutProfile(profile_t ** profiles,int nProfiles,int nPos,int nConstraints,distance_matrix_t * dmat)4443 profile_t *OutProfile(profile_t **profiles, int nProfiles,
4444 		      int nPos, int nConstraints,
4445 		      distance_matrix_t *dmat) {
4446   int i;			/* position */
4447   int in;			/* profile */
4448   profile_t *out = NewProfile(nPos, nConstraints);
4449 
4450   double inweight = 1.0/(double)nProfiles;   /* The maximal output weight is 1.0 */
4451 
4452   /* First, set weights -- code is always NOCODE, prevent weight=0 */
4453   for (i = 0; i < nPos; i++) {
4454     out->weights[i] = 0;
4455     for (in = 0; in < nProfiles; in++)
4456       out->weights[i] += profiles[in]->weights[i] * inweight;
4457     if (out->weights[i] <= 0) out->weights[i] = 1e-20; /* always store a vector */
4458     out->nVectors++;
4459     out->codes[i] = NOCODE;		/* outprofile is normally complicated */
4460   }
4461 
4462   /* Initialize the frequencies to 0 */
4463   out->vectors = (numeric_t*)mymalloc(sizeof(numeric_t)*nCodes*out->nVectors);
4464   for (i = 0; i < nCodes*out->nVectors; i++)
4465     out->vectors[i] = 0;
4466 
4467   /* Add up the weights, going through each sequence in turn */
4468   for (in = 0; in < nProfiles; in++) {
4469     int iFreqOut = 0;
4470     int iFreqIn = 0;
4471     for (i = 0; i < nPos; i++) {
4472       numeric_t *fIn = GET_FREQ(profiles[in],i,/*IN/OUT*/iFreqIn);
4473       numeric_t *fOut = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
4474       if (profiles[in]->weights[i] > 0)
4475 	AddToFreq(/*IN/OUT*/fOut, profiles[in]->weights[i],
4476 		  profiles[in]->codes[i], fIn, dmat);
4477     }
4478     assert(iFreqOut == out->nVectors);
4479     assert(iFreqIn == profiles[in]->nVectors);
4480   }
4481 
4482   /* And normalize the frequencies to sum to 1 */
4483   int iFreqOut = 0;
4484   for (i = 0; i < nPos; i++) {
4485     numeric_t *fOut = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
4486     if (fOut)
4487       NormalizeFreq(/*IN/OUT*/fOut, dmat);
4488   }
4489   assert(iFreqOut == out->nVectors);
4490   if (verbose > 10) fprintf(stderr,"Average %d profiles\n", nProfiles);
4491   if(dmat)
4492     SetCodeDist(/*IN/OUT*/out, nPos, dmat);
4493 
4494   /* Compute constraints */
4495   for (i = 0; i < nConstraints; i++) {
4496     out->nOn[i] = 0;
4497     out->nOff[i] = 0;
4498     for (in = 0; in < nProfiles; in++) {
4499       out->nOn[i] += profiles[in]->nOn[i];
4500       out->nOff[i] += profiles[in]->nOff[i];
4501     }
4502   }
4503   return(out);
4504 }
4505 
UpdateOutProfile(profile_t * out,profile_t * old1,profile_t * old2,profile_t * new,int nActiveOld,int nPos,int nConstraints,distance_matrix_t * dmat)4506 void UpdateOutProfile(/*IN/OUT*/profile_t *out, profile_t *old1, profile_t *old2,
4507 		      profile_t *new, int nActiveOld,
4508 		      int nPos, int nConstraints,
4509 		      distance_matrix_t *dmat) {
4510   int i, k;
4511   int iFreqOut = 0;
4512   int iFreq1 = 0;
4513   int iFreq2 = 0;
4514   int iFreqNew = 0;
4515   assert(nActiveOld > 0);
4516 
4517   for (i = 0; i < nPos; i++) {
4518     numeric_t *fOut = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
4519     numeric_t *fOld1 = GET_FREQ(old1,i,/*IN/OUT*/iFreq1);
4520     numeric_t *fOld2 = GET_FREQ(old2,i,/*IN/OUT*/iFreq2);
4521     numeric_t *fNew = GET_FREQ(new,i,/*IN/OUT*/iFreqNew);
4522 
4523     assert(out->codes[i] == NOCODE && fOut != NULL); /* No no-vector optimization for outprofiles */
4524     if (verbose > 3 && i < 3) {
4525       fprintf(stderr,"Updating out-profile position %d weight %f (mult %f)\n",
4526 	      i, out->weights[i], out->weights[i]*nActiveOld);
4527     }
4528     double originalMult = out->weights[i]*nActiveOld;
4529     double newMult = originalMult + new->weights[i] - old1->weights[i] - old2->weights[i];
4530     out->weights[i] = newMult/(nActiveOld-1);
4531     if (out->weights[i] <= 0) out->weights[i] = 1e-20; /* always use the vector */
4532 
4533     for (k = 0; k < nCodes; k++) fOut[k] *= originalMult;
4534 
4535     if (old1->weights[i] > 0)
4536       AddToFreq(/*IN/OUT*/fOut, -old1->weights[i], old1->codes[i], fOld1, dmat);
4537     if (old2->weights[i] > 0)
4538       AddToFreq(/*IN/OUT*/fOut, -old2->weights[i], old2->codes[i], fOld2, dmat);
4539     if (new->weights[i] > 0)
4540       AddToFreq(/*IN/OUT*/fOut, new->weights[i], new->codes[i], fNew, dmat);
4541 
4542     /* And renormalize */
4543     NormalizeFreq(/*IN/OUT*/fOut, dmat);
4544 
4545     if (verbose > 2 && i < 3) {
4546       fprintf(stderr,"Updated out-profile position %d weight %f (mult %f)",
4547 	      i, out->weights[i], out->weights[i]*nActiveOld);
4548       if(out->weights[i] > 0)
4549 	for (k=0;k<nCodes;k++)
4550 	  fprintf(stderr, " %c:%f", dmat?'?':codesString[k], fOut[k]);
4551       fprintf(stderr,"\n");
4552     }
4553   }
4554   assert(iFreqOut == out->nVectors);
4555   assert(iFreq1 == old1->nVectors);
4556   assert(iFreq2 == old2->nVectors);
4557   assert(iFreqNew == new->nVectors);
4558   if(dmat)
4559     SetCodeDist(/*IN/OUT*/out,nPos,dmat);
4560 
4561   /* update constraints -- note in practice this should be a no-op */
4562   for (i = 0; i < nConstraints; i++) {
4563     out->nOn[i] += new->nOn[i] - old1->nOn[i] - old2->nOn[i];
4564     out->nOff[i] += new->nOff[i] - old1->nOff[i] - old2->nOff[i];
4565   }
4566 }
4567 
SetCodeDist(profile_t * profile,int nPos,distance_matrix_t * dmat)4568 void SetCodeDist(/*IN/OUT*/profile_t *profile, int nPos,
4569 			   distance_matrix_t *dmat) {
4570   if (profile->codeDist == NULL)
4571     profile->codeDist = (numeric_t*)mymalloc(sizeof(numeric_t)*nPos*nCodes);
4572   int i;
4573   int iFreq = 0;
4574   for (i = 0; i < nPos; i++) {
4575     numeric_t *f = GET_FREQ(profile,i,/*IN/OUT*/iFreq);
4576 
4577     int k;
4578     for (k = 0; k < nCodes; k++)
4579       profile->codeDist[i*nCodes+k] = ProfileDistPiece(/*code1*/profile->codes[i], /*code2*/k,
4580 						       /*f1*/f, /*f2*/NULL,
4581 						       dmat, NULL);
4582   }
4583   assert(iFreq==profile->nVectors);
4584 }
4585 
4586 
SetBestHit(int node,NJ_t * NJ,int nActive,besthit_t * bestjoin,besthit_t * allhits)4587 void SetBestHit(int node, NJ_t *NJ, int nActive,
4588 		/*OUT*/besthit_t *bestjoin, /*OUT OPTIONAL*/besthit_t *allhits) {
4589   assert(NJ->parent[node] <  0);
4590 
4591   bestjoin->i = node;
4592   bestjoin->j = -1;
4593   bestjoin->dist = 1e20;
4594   bestjoin->criterion = 1e20;
4595 
4596   int j;
4597   besthit_t tmp;
4598 
4599 #ifdef OPENMP
4600   /* Note -- if we are already in a parallel region, this will be ignored */
4601   #pragma omp parallel for schedule(dynamic, 50)
4602 #endif
4603   for (j = 0; j < NJ->maxnode; j++) {
4604     besthit_t *sv = allhits != NULL ? &allhits[j] : &tmp;
4605     sv->i = node;
4606     sv->j = j;
4607     if (NJ->parent[j] >= 0) {
4608       sv->i = -1;		/* illegal/empty join */
4609       sv->weight = 0.0;
4610       sv->criterion = sv->dist = 1e20;
4611       continue;
4612     }
4613     /* Note that we compute self-distances (allow j==node) because the top-hit heuristic
4614        expects self to be within its top hits, but we exclude those from the bestjoin
4615        that we return...
4616     */
4617     SetDistCriterion(NJ, nActive, /*IN/OUT*/sv);
4618     if (sv->criterion < bestjoin->criterion && node != j)
4619       *bestjoin = *sv;
4620   }
4621   if (verbose>5) {
4622     fprintf(stderr, "SetBestHit %d %d %f %f\n", bestjoin->i, bestjoin->j, bestjoin->dist, bestjoin->criterion);
4623   }
4624 }
4625 
ReadMatrix(char * filename,numeric_t codes[MAXCODES][MAXCODES],bool checkCodes)4626 void ReadMatrix(char *filename, /*OUT*/numeric_t codes[MAXCODES][MAXCODES], bool checkCodes) {
4627   char buf[BUFFER_SIZE] = "";
4628   FILE *fp = fopen(filename, "r");
4629   if (fp == NULL) {
4630     fprintf(stderr, "Cannot read %s\n",filename);
4631     exit(1);
4632   }
4633   if (fgets(buf,sizeof(buf),fp) == NULL) {
4634     fprintf(stderr, "Error reading header line for %s:\n%s\n", filename, buf);
4635     exit(1);
4636   }
4637   if (checkCodes) {
4638     int i;
4639     int iBufPos;
4640     for (iBufPos=0,i=0;i<nCodes;i++,iBufPos++) {
4641       if(buf[iBufPos] != codesString[i]) {
4642 	fprintf(stderr,"Header line\n%s\nin file %s does not have expected code %c # %d in %s\n",
4643 		buf, filename, codesString[i], i, codesString);
4644 	exit(1);
4645       }
4646       iBufPos++;
4647       if(buf[iBufPos] != '\n' && buf[iBufPos] != '\r' && buf[iBufPos] != '\0' && buf[iBufPos] != '\t') {
4648 	fprintf(stderr, "Header line in %s should be tab-delimited\n", filename);
4649 	exit(1);
4650       }
4651       if (buf[iBufPos] == '\0' && i < nCodes-1) {
4652 	fprintf(stderr, "Header line in %s ends prematurely\n",filename);
4653 	exit(1);
4654       }
4655     } /* end loop over codes */
4656     /* Should be at end, but allow \n because of potential DOS \r\n */
4657     if(buf[iBufPos] != '\0' && buf[iBufPos] != '\n' && buf[iBufPos] != '\r') {
4658       fprintf(stderr, "Header line in %s has too many entries\n", filename);
4659       exit(1);
4660     }
4661   }
4662   int iLine;
4663   for (iLine = 0; iLine < nCodes; iLine++) {
4664     buf[0] = '\0';
4665     if (fgets(buf,sizeof(buf),fp) == NULL) {
4666       fprintf(stderr, "Cannot read line %d from file %s\n", iLine+2, filename);
4667       exit(1);
4668     }
4669     char *field = strtok(buf,"\t\r\n");
4670     field = strtok(NULL, "\t");	/* ignore first column */
4671     int iColumn;
4672     for (iColumn = 0; iColumn < nCodes && field != NULL; iColumn++, field = strtok(NULL,"\t")) {
4673       if(sscanf(field,ScanNumericSpec,&codes[iLine][iColumn]) != 1) {
4674 	fprintf(stderr,"Cannot parse field %s in file %s\n", field, filename);
4675 	exit(1);
4676       }
4677     }
4678   }
4679 }
4680 
ReadVector(char * filename,numeric_t codes[MAXCODES])4681 void ReadVector(char *filename, /*OUT*/numeric_t codes[MAXCODES]) {
4682   FILE *fp = fopen(filename,"r");
4683   if (fp == NULL) {
4684     fprintf(stderr, "Cannot read %s\n",filename);
4685     exit(1);
4686   }
4687   int i;
4688   for (i = 0; i < nCodes; i++) {
4689     if (fscanf(fp,ScanNumericSpec,&codes[i]) != 1) {
4690       fprintf(stderr,"Cannot read %d entry of %s\n",i+1,filename);
4691       exit(1);
4692     }
4693   }
4694   if (fclose(fp) != 0) {
4695     fprintf(stderr, "Error reading %s\n",filename);
4696     exit(1);
4697   }
4698 }
4699 
ReadDistanceMatrix(char * prefix)4700 distance_matrix_t *ReadDistanceMatrix(char *prefix) {
4701   char buffer[BUFFER_SIZE];
4702   distance_matrix_t *dmat = (distance_matrix_t*)mymalloc(sizeof(distance_matrix_t));
4703 
4704   if(strlen(prefix) > BUFFER_SIZE-20) {
4705     fprintf(stderr,"Filename %s too long\n", prefix);
4706     exit(1);
4707   }
4708 
4709   strcpy(buffer, prefix);
4710   strcat(buffer, ".distances");
4711   ReadMatrix(buffer, /*OUT*/dmat->distances, /*checkCodes*/true);
4712 
4713   strcpy(buffer, prefix);
4714   strcat(buffer, ".inverses");
4715   ReadMatrix(buffer, /*OUT*/dmat->eigeninv, /*checkCodes*/false);
4716 
4717   strcpy(buffer, prefix);
4718   strcat(buffer, ".eigenvalues");
4719   ReadVector(buffer, /*OUT*/dmat->eigenval);
4720 
4721   if(verbose>1) fprintf(stderr, "Read distance matrix from %s\n",prefix);
4722   SetupDistanceMatrix(/*IN/OUT*/dmat);
4723   return(dmat);
4724 }
4725 
SetupDistanceMatrix(distance_matrix_t * dmat)4726 void SetupDistanceMatrix(/*IN/OUT*/distance_matrix_t *dmat) {
4727   /* Check that the eigenvalues and eigen-inverse are consistent with the
4728      distance matrix and that the matrix is symmetric */
4729   int i,j,k;
4730   for (i = 0; i < nCodes; i++) {
4731     for (j = 0; j < nCodes; j++) {
4732       if(fabs(dmat->distances[i][j]-dmat->distances[j][i]) > 1e-6) {
4733 	fprintf(stderr,"Distance matrix not symmetric for %d,%d: %f vs %f\n",
4734 		i+1,j+1,
4735 		dmat->distances[i][j],
4736 		dmat->distances[j][i]);
4737 	exit(1);
4738       }
4739       double total = 0.0;
4740       for (k = 0; k < nCodes; k++)
4741 	total += dmat->eigenval[k] * dmat->eigeninv[k][i] * dmat->eigeninv[k][j];
4742       if(fabs(total - dmat->distances[i][j]) > 1e-6) {
4743 	fprintf(stderr,"Distance matrix entry %d,%d should be %f but eigen-representation gives %f\n",
4744 		i+1,j+1,dmat->distances[i][j],total);
4745 	exit(1);
4746       }
4747     }
4748   }
4749 
4750   /* And compute eigentot */
4751   for (k = 0; k < nCodes; k++) {
4752     dmat->eigentot[k] = 0.;
4753     int j;
4754     for (j = 0; j < nCodes; j++)
4755       dmat->eigentot[k] += dmat->eigeninv[k][j];
4756   }
4757 
4758   /* And compute codeFreq */
4759   int code;
4760   for(code = 0; code < nCodes; code++) {
4761     for (k = 0; k < nCodes; k++) {
4762       dmat->codeFreq[code][k] = dmat->eigeninv[k][code];
4763     }
4764   }
4765   /* And gapFreq */
4766   for(code = 0; code < nCodes; code++) {
4767     double gapFreq = 0.0;
4768     for (k = 0; k < nCodes; k++)
4769       gapFreq += dmat->codeFreq[k][code];
4770     dmat->gapFreq[code] = gapFreq / nCodes;
4771   }
4772 
4773   if(verbose>10) fprintf(stderr, "Made codeFreq\n");
4774 }
4775 
ChooseNNI(profile_t * profiles[4],distance_matrix_t * dmat,int nPos,int nConstraints,double criteria[3])4776 nni_t ChooseNNI(profile_t *profiles[4],
4777 		/*OPTIONAL*/distance_matrix_t *dmat,
4778 		int nPos, int nConstraints,
4779 		/*OUT*/double criteria[3]) {
4780   double d[6];
4781   CorrectedPairDistances(profiles, 4, dmat, nPos, /*OUT*/d);
4782   double penalty[3]; 		/* indexed as nni_t */
4783   QuartetConstraintPenalties(profiles, nConstraints, /*OUT*/penalty);
4784   criteria[ABvsCD] = d[qAB] + d[qCD] + penalty[ABvsCD];
4785   criteria[ACvsBD] = d[qAC] + d[qBD] + penalty[ACvsBD];
4786   criteria[ADvsBC] = d[qAD] + d[qBC] + penalty[ADvsBC];
4787 
4788   nni_t choice = ABvsCD;
4789   if (criteria[ACvsBD] < criteria[ABvsCD] && criteria[ACvsBD] <= criteria[ADvsBC]) {
4790     choice = ACvsBD;
4791   } else if (criteria[ADvsBC] < criteria[ABvsCD] && criteria[ADvsBC] <= criteria[ACvsBD]) {
4792     choice = ADvsBC;
4793   }
4794   if (verbose > 1 && penalty[choice] > penalty[ABvsCD] + 1e-6) {
4795     fprintf(stderr, "Worsen constraint: from %.3f to %.3f distance %.3f to %.3f: ",
4796 	    penalty[ABvsCD], penalty[choice],
4797 	    criteria[ABvsCD], choice == ACvsBD ? criteria[ACvsBD] : criteria[ADvsBC]);
4798     int iC;
4799     for (iC = 0; iC < nConstraints; iC++) {
4800       double ppart[3];
4801       if (QuartetConstraintPenaltiesPiece(profiles, iC, /*OUT*/ppart)) {
4802 	double old_penalty = ppart[ABvsCD];
4803 	double new_penalty = ppart[choice];
4804 	if (new_penalty > old_penalty + 1e-6)
4805 	  fprintf(stderr, " %d (%d/%d %d/%d %d/%d %d/%d)", iC,
4806 		  profiles[0]->nOn[iC], profiles[0]->nOff[iC],
4807 		  profiles[1]->nOn[iC], profiles[1]->nOff[iC],
4808 		  profiles[2]->nOn[iC], profiles[2]->nOff[iC],
4809 		  profiles[3]->nOn[iC], profiles[3]->nOff[iC]);
4810       }
4811     }
4812     fprintf(stderr,"\n");
4813   }
4814   if (verbose > 3)
4815     fprintf(stderr, "NNI scores ABvsCD %.5f ACvsBD %.5f ADvsBC %.5f choice %s\n",
4816 	    criteria[ABvsCD], criteria[ACvsBD], criteria[ADvsBC],
4817 	    choice == ABvsCD ? "AB|CD" : (choice == ACvsBD ? "AC|BD" : "AD|BC"));
4818   return(choice);
4819 }
4820 
PosteriorProfile(profile_t * p1,profile_t * p2,double len1,double len2,transition_matrix_t * transmat,rates_t * rates,int nPos,int nConstraints)4821 profile_t *PosteriorProfile(profile_t *p1, profile_t *p2,
4822 			    double len1, double len2,
4823 			    /*OPTIONAL*/transition_matrix_t *transmat,
4824 			    rates_t *rates,
4825 			    int nPos, int nConstraints) {
4826   if (len1 < MLMinBranchLength)
4827     len1 = MLMinBranchLength;
4828   if (len2 < MLMinBranchLength)
4829     len2 = MLMinBranchLength;
4830 
4831   int i,j,k;
4832   profile_t *out = NewProfile(nPos, nConstraints);
4833   for (i = 0; i < nPos; i++) {
4834     out->codes[i] = NOCODE;
4835     out->weights[i] = 1.0;
4836   }
4837   out->nVectors = nPos;
4838   out->vectors = (numeric_t*)mymalloc(sizeof(numeric_t)*nCodes*out->nVectors);
4839   for (i = 0; i < nCodes * out->nVectors; i++) out->vectors[i] = 0;
4840   int iFreqOut = 0;
4841   int iFreq1 = 0;
4842   int iFreq2 = 0;
4843   numeric_t *expeigenRates1 = NULL, *expeigenRates2 = NULL;
4844 
4845   if (transmat != NULL) {
4846     expeigenRates1 = ExpEigenRates(len1, transmat, rates);
4847     expeigenRates2 = ExpEigenRates(len2, transmat, rates);
4848   }
4849 
4850   if (transmat == NULL) {	/* Jukes-Cantor */
4851     assert(nCodes == 4);
4852 
4853     double *PSame1 = PSameVector(len1, rates);
4854     double *PDiff1 = PDiffVector(PSame1, rates);
4855     double *PSame2 = PSameVector(len2, rates);
4856     double *PDiff2 = PDiffVector(PSame2, rates);
4857 
4858     numeric_t mix1[4], mix2[4];
4859 
4860     for (i=0; i < nPos; i++) {
4861       int iRate = rates->ratecat[i];
4862       double w1 = p1->weights[i];
4863       double w2 = p2->weights[i];
4864       int code1 = p1->codes[i];
4865       int code2 = p2->codes[i];
4866       numeric_t *f1 = GET_FREQ(p1,i,/*IN/OUT*/iFreq1);
4867       numeric_t *f2 = GET_FREQ(p2,i,/*IN/OUT*/iFreq2);
4868 
4869       /* First try to store a simple profile */
4870       if (f1 == NULL && f2 == NULL) {
4871 	if (code1 == NOCODE && code2 == NOCODE) {
4872 	  out->codes[i] = NOCODE;
4873 	  out->weights[i] = 0.0;
4874 	  continue;
4875 	} else if (code1 == NOCODE) {
4876 	  /* Posterior(parent | character & gap, len1, len2) = Posterior(parent | character, len1)
4877 	     = PSame() for matching characters and 1-PSame() for the rest
4878 	     = (pSame - pDiff) * character + (1-(pSame-pDiff)) * gap
4879 	  */
4880 	  out->codes[i] = code2;
4881 	  out->weights[i] = w2 * (PSame2[iRate] - PDiff2[iRate]);
4882 	  continue;
4883 	} else if (code2 == NOCODE) {
4884 	  out->codes[i] = code1;
4885 	  out->weights[i] = w1 * (PSame1[iRate] - PDiff1[iRate]);
4886 	  continue;
4887 	} else if (code1 == code2) {
4888 	  out->codes[i] = code1;
4889 	  double f12code = (w1*PSame1[iRate] + (1-w1)*0.25) * (w2*PSame2[iRate] + (1-w2)*0.25);
4890 	  double f12other = (w1*PDiff1[iRate] + (1-w1)*0.25) * (w2*PDiff2[iRate] + (1-w2)*0.25);
4891 	  /* posterior probability of code1/code2 after scaling */
4892 	  double pcode = f12code/(f12code+3*f12other);
4893 	  /* Now f = w * (code ? 1 : 0) + (1-w) * 0.25, so to get pcode we need
4894 	     fcode = 1/4 + w1*3/4 or w = (f-1/4)*4/3
4895 	   */
4896 	  out->weights[i] = (pcode - 0.25) * 4.0/3.0;
4897 	  /* This can be zero because of numerical problems, I think */
4898 	  if (out->weights[i] < 1e-6) {
4899 	    if (verbose > 1)
4900 	      fprintf(stderr, "Replaced weight %f with %f from w1 %f w2 %f PSame %f %f f12code %f f12other %f\n",
4901 		      out->weights[i], 1e-6,
4902 		      w1, w2,
4903 		      PSame1[iRate], PSame2[iRate],
4904 		      f12code, f12other);
4905 	    out->weights[i] = 1e-6;
4906 	  }
4907 	  continue;
4908 	}
4909       }
4910       /* if we did not compute a simple profile, then do the full computation and
4911          store the full vector
4912       */
4913       if (f1 == NULL) {
4914 	for (j = 0; j < 4; j++)
4915 	  mix1[j] = (1-w1)*0.25;
4916 	if(code1 != NOCODE)
4917 	  mix1[code1] += w1;
4918 	f1 = mix1;
4919       }
4920       if (f2 == NULL) {
4921 	for (j = 0; j < 4; j++)
4922 	  mix2[j] = (1-w2)*0.25;
4923 	if(code2 != NOCODE)
4924 	  mix2[code2] += w2;
4925 	f2 = mix2;
4926       }
4927       out->codes[i] = NOCODE;
4928       out->weights[i] = 1.0;
4929       numeric_t *f = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
4930       double lkAB = 0;
4931       for (j = 0; j < 4; j++) {
4932 	f[j] = (f1[j] * PSame1[iRate] + (1.0-f1[j]) * PDiff1[iRate])
4933 	  * (f2[j] * PSame2[iRate] + (1.0-f2[j]) * PDiff2[iRate]);
4934 	lkAB += f[j];
4935       }
4936       double lkABInv = 1.0/lkAB;
4937       for (j = 0; j < 4; j++)
4938 	f[j] *= lkABInv;
4939     }
4940     PSame1 = myfree(PSame1, sizeof(double) * rates->nRateCategories);
4941     PSame2 = myfree(PSame2, sizeof(double) * rates->nRateCategories);
4942     PDiff1 = myfree(PDiff1, sizeof(double) * rates->nRateCategories);
4943     PDiff2 = myfree(PDiff2, sizeof(double) * rates->nRateCategories);
4944   } else if (nCodes == 4) {	/* matrix model on nucleotides */
4945     numeric_t *fGap = &transmat->codeFreq[NOCODE][0];
4946     numeric_t f1mix[4], f2mix[4];
4947 
4948     for (i=0; i < nPos; i++) {
4949       if (p1->codes[i] == NOCODE && p2->codes[i] == NOCODE
4950 	  && p1->weights[i] == 0 && p2->weights[i] == 0) {
4951 	/* aligning gap with gap -- just output a gap
4952 	   out->codes[i] is already set to NOCODE so need not set that */
4953 	out->weights[i] = 0;
4954 	continue;
4955       }
4956       int iRate = rates->ratecat[i];
4957       numeric_t *expeigen1 = &expeigenRates1[iRate*4];
4958       numeric_t *expeigen2 = &expeigenRates2[iRate*4];
4959       numeric_t *f1 = GET_FREQ(p1,i,/*IN/OUT*/iFreq1);
4960       numeric_t *f2 = GET_FREQ(p2,i,/*IN/OUT*/iFreq2);
4961       numeric_t *fOut = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
4962       assert(fOut != NULL);
4963 
4964       if (f1 == NULL) {
4965 	f1 = &transmat->codeFreq[p1->codes[i]][0]; /* codeFreq includes an entry for NOCODE */
4966 	double w = p1->weights[i];
4967 	if (w > 0.0 && w < 1.0) {
4968 	  for (j = 0; j < 4; j++)
4969 	    f1mix[j] = w * f1[j] + (1.0-w) * fGap[j];
4970 	  f1 = f1mix;
4971 	}
4972       }
4973       if (f2 == NULL) {
4974 	f2 = &transmat->codeFreq[p2->codes[i]][0];
4975 	double w = p2->weights[i];
4976 	if (w > 0.0 && w < 1.0) {
4977 	  for (j = 0; j < 4; j++)
4978 	    f2mix[j] = w * f2[j] + (1.0-w) * fGap[j];
4979 	  f2 = f2mix;
4980 	}
4981       }
4982       numeric_t fMult1[4] ALIGNED;	/* rotated1 * expeigen1 */
4983       numeric_t fMult2[4] ALIGNED;	/* rotated2 * expeigen2 */
4984 #if 0 /* SSE3 is slower */
4985       vector_multiply(f1, expeigen1, 4, /*OUT*/fMult1);
4986       vector_multiply(f2, expeigen2, 4, /*OUT*/fMult2);
4987 #else
4988       for (j = 0; j < 4; j++) {
4989 	fMult1[j] = f1[j]*expeigen1[j];
4990 	fMult2[j] = f2[j]*expeigen2[j];
4991       }
4992 #endif
4993       numeric_t fPost[4] ALIGNED;		/* in  unrotated space */
4994       for (j = 0; j < 4; j++) {
4995 #if 0 /* SSE3 is slower */
4996 	fPost[j] = vector_dot_product_rot(fMult1, fMult2, &transmat->codeFreq[j][0], 4)
4997 	  * transmat->statinv[j]; */
4998 #else
4999 	double out1 = 0;
5000 	double out2 = 0;
5001 	for (k = 0; k < 4; k++) {
5002 	  out1 += fMult1[k] * transmat->codeFreq[j][k];
5003 	  out2 += fMult2[k] * transmat->codeFreq[j][k];
5004 	}
5005 	fPost[j] = out1*out2*transmat->statinv[j];
5006 #endif
5007       }
5008       double fPostTot = 0;
5009       for (j = 0; j < 4; j++)
5010 	fPostTot += fPost[j];
5011       assert(fPostTot > fPostTotalTolerance);
5012       double fPostInv = 1.0/fPostTot;
5013 #if 0 /* SSE3 is slower */
5014       vector_multiply_by(fPost, fPostInv, 4);
5015 #else
5016       for (j = 0; j < 4; j++)
5017 	fPost[j] *= fPostInv;
5018 #endif
5019 
5020       /* and finally, divide by stat again & rotate to give the new frequencies */
5021       matrixt_by_vector4(transmat->eigeninvT, fPost, /*OUT*/fOut);
5022     }  /* end loop over position i */
5023   } else if (nCodes == 20) {	/* matrix model on amino acids */
5024     numeric_t *fGap = &transmat->codeFreq[NOCODE][0];
5025     numeric_t f1mix[20] ALIGNED;
5026     numeric_t f2mix[20] ALIGNED;
5027 
5028     for (i=0; i < nPos; i++) {
5029       if (p1->codes[i] == NOCODE && p2->codes[i] == NOCODE
5030 	  && p1->weights[i] == 0 && p2->weights[i] == 0) {
5031 	/* aligning gap with gap -- just output a gap
5032 	   out->codes[i] is already set to NOCODE so need not set that */
5033 	out->weights[i] = 0;
5034 	continue;
5035       }
5036       int iRate = rates->ratecat[i];
5037       numeric_t *expeigen1 = &expeigenRates1[iRate*20];
5038       numeric_t *expeigen2 = &expeigenRates2[iRate*20];
5039       numeric_t *f1 = GET_FREQ(p1,i,/*IN/OUT*/iFreq1);
5040       numeric_t *f2 = GET_FREQ(p2,i,/*IN/OUT*/iFreq2);
5041       numeric_t *fOut = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
5042       assert(fOut != NULL);
5043 
5044       if (f1 == NULL) {
5045 	f1 = &transmat->codeFreq[p1->codes[i]][0]; /* codeFreq includes an entry for NOCODE */
5046 	double w = p1->weights[i];
5047 	if (w > 0.0 && w < 1.0) {
5048 	  for (j = 0; j < 20; j++)
5049 	    f1mix[j] = w * f1[j] + (1.0-w) * fGap[j];
5050 	  f1 = f1mix;
5051 	}
5052       }
5053       if (f2 == NULL) {
5054 	f2 = &transmat->codeFreq[p2->codes[i]][0];
5055 	double w = p2->weights[i];
5056 	if (w > 0.0 && w < 1.0) {
5057 	  for (j = 0; j < 20; j++)
5058 	    f2mix[j] = w * f2[j] + (1.0-w) * fGap[j];
5059 	  f2 = f2mix;
5060 	}
5061       }
5062       numeric_t fMult1[20] ALIGNED;	/* rotated1 * expeigen1 */
5063       numeric_t fMult2[20] ALIGNED;	/* rotated2 * expeigen2 */
5064       vector_multiply(f1, expeigen1, 20, /*OUT*/fMult1);
5065       vector_multiply(f2, expeigen2, 20, /*OUT*/fMult2);
5066       numeric_t fPost[20] ALIGNED;		/* in  unrotated space */
5067       for (j = 0; j < 20; j++) {
5068 	numeric_t value = vector_dot_product_rot(fMult1, fMult2, &transmat->codeFreq[j][0], 20)
5069 	  * transmat->statinv[j];
5070 	/* Added this logic try to avoid rare numerical problems */
5071 	fPost[j] = value >= 0 ? value : 0;
5072       }
5073       double fPostTot = vector_sum(fPost, 20);
5074       assert(fPostTot > fPostTotalTolerance);
5075       double fPostInv = 1.0/fPostTot;
5076       vector_multiply_by(/*IN/OUT*/fPost, fPostInv, 20);
5077       int ch = -1;		/* the dominant character, if any */
5078       if (!exactML) {
5079 	for (j = 0; j < 20; j++) {
5080 	  if (fPost[j] >= approxMLminf) {
5081 	    ch = j;
5082 	    break;
5083 	  }
5084 	}
5085       }
5086 
5087       /* now, see if we can use the approximation
5088 	 fPost ~= (1 or 0) * w + nearP * (1-w)
5089 	 to avoid rotating */
5090       double w = 0;
5091       if (ch >= 0) {
5092 	w = (fPost[ch] - transmat->nearP[ch][ch]) / (1.0 - transmat->nearP[ch][ch]);
5093 	for (j = 0; j < 20; j++) {
5094 	  if (j != ch) {
5095 	    double fRough = (1.0-w) * transmat->nearP[ch][j];
5096 	    if (fRough < fPost[j]  * approxMLminratio) {
5097 	      ch = -1;		/* give up on the approximation */
5098 	      break;
5099 	    }
5100 	  }
5101 	}
5102       }
5103       if (ch >= 0) {
5104 	nAAPosteriorRough++;
5105 	double wInvStat = w * transmat->statinv[ch];
5106 	for (j = 0; j < 20; j++)
5107 	  fOut[j] = wInvStat * transmat->codeFreq[ch][j] + (1.0-w) * transmat->nearFreq[ch][j];
5108       } else {
5109 	/* and finally, divide by stat again & rotate to give the new frequencies */
5110 	nAAPosteriorExact++;
5111 	for (j = 0; j < 20; j++)
5112 	  fOut[j] = vector_multiply_sum(fPost, &transmat->eigeninv[j][0], 20);
5113       }
5114     } /* end loop over position i */
5115   } else {
5116     assert(0);			/* illegal nCodes */
5117   }
5118 
5119   if (transmat != NULL) {
5120     expeigenRates1 = myfree(expeigenRates1, sizeof(numeric_t) * rates->nRateCategories * nCodes);
5121     expeigenRates2 = myfree(expeigenRates2, sizeof(numeric_t) * rates->nRateCategories * nCodes);
5122   }
5123 
5124   /* Reallocate out->vectors to be the right size */
5125   out->nVectors = iFreqOut;
5126   if (out->nVectors == 0)
5127     out->vectors = (numeric_t*)myfree(out->vectors, sizeof(numeric_t)*nCodes*nPos);
5128   else
5129     out->vectors = (numeric_t*)myrealloc(out->vectors,
5130 				     /*OLDSIZE*/sizeof(numeric_t)*nCodes*nPos,
5131 				     /*NEWSIZE*/sizeof(numeric_t)*nCodes*out->nVectors,
5132 				     /*copy*/true); /* try to save space */
5133   nProfileFreqAlloc += out->nVectors;
5134   nProfileFreqAvoid += nPos - out->nVectors;
5135 
5136   /* compute total constraints */
5137   for (i = 0; i < nConstraints; i++) {
5138     out->nOn[i] = p1->nOn[i] + p2->nOn[i];
5139     out->nOff[i] = p1->nOff[i] + p2->nOff[i];
5140   }
5141   nPosteriorCompute++;
5142   return(out);
5143 }
5144 
PSameVector(double length,rates_t * rates)5145 double *PSameVector(double length, rates_t *rates) {
5146   double *pSame = mymalloc(sizeof(double) * rates->nRateCategories);
5147   int iRate;
5148   for (iRate = 0; iRate < rates->nRateCategories; iRate++)
5149     pSame[iRate] = 0.25 + 0.75 * exp((-4.0/3.0) * fabs(length*rates->rates[iRate]));
5150   return(pSame);
5151 }
5152 
PDiffVector(double * pSame,rates_t * rates)5153 double *PDiffVector(double *pSame, rates_t *rates) {
5154   double *pDiff = mymalloc(sizeof(double) * rates->nRateCategories);
5155   int iRate;
5156   for (iRate = 0; iRate < rates->nRateCategories; iRate++)
5157     pDiff[iRate] = (1.0 - pSame[iRate])/3.0;
5158   return(pDiff);
5159 }
5160 
ExpEigenRates(double length,transition_matrix_t * transmat,rates_t * rates)5161 numeric_t *ExpEigenRates(double length, transition_matrix_t *transmat, rates_t *rates) {
5162   numeric_t *expeigen = mymalloc(sizeof(numeric_t) * nCodes * rates->nRateCategories);
5163   int iRate, j;
5164   for (iRate = 0; iRate < rates->nRateCategories; iRate++) {
5165     for (j = 0; j < nCodes; j++) {
5166       double relLen = length * rates->rates[iRate];
5167       /* very short branch lengths lead to numerical problems so prevent them */
5168       if (relLen < MLMinRelBranchLength)
5169 	relLen  = MLMinRelBranchLength;
5170       expeigen[iRate*nCodes + j] = exp(relLen * transmat->eigenval[j]);
5171     }
5172   }
5173   return(expeigen);
5174 }
5175 
PairLogLk(profile_t * pA,profile_t * pB,double length,int nPos,transition_matrix_t * transmat,rates_t * rates,double * site_likelihoods)5176 double PairLogLk(profile_t *pA, profile_t *pB, double length, int nPos,
5177 		 /*OPTIONAL*/transition_matrix_t *transmat,
5178 		 rates_t *rates,
5179 		 /*OPTIONAL IN/OUT*/double *site_likelihoods) {
5180   double lk = 1.0;
5181   double loglk = 0.0;		/* stores underflow of lk during the loop over positions */
5182   int i,j;
5183   assert(rates != NULL && rates->nRateCategories > 0);
5184   numeric_t *expeigenRates = NULL;
5185   if (transmat != NULL)
5186     expeigenRates = ExpEigenRates(length, transmat, rates);
5187 
5188   if (transmat == NULL) {	/* Jukes-Cantor */
5189     assert (nCodes == 4);
5190     double *pSame = PSameVector(length, rates);
5191     double *pDiff = PDiffVector(pSame, rates);
5192 
5193     int iFreqA = 0;
5194     int iFreqB = 0;
5195     for (i = 0; i < nPos; i++) {
5196       int iRate = rates->ratecat[i];
5197       double wA = pA->weights[i];
5198       double wB = pB->weights[i];
5199       int codeA = pA->codes[i];
5200       int codeB = pB->codes[i];
5201       numeric_t *fA = GET_FREQ(pA,i,/*IN/OUT*/iFreqA);
5202       numeric_t *fB = GET_FREQ(pB,i,/*IN/OUT*/iFreqB);
5203       double lkAB = 0;
5204 
5205       if (fA == NULL && fB == NULL) {
5206 	if (codeA == NOCODE) {	/* A is all gaps */
5207 	  /* gap to gap is sum(j) 0.25 * (0.25 * pSame + 0.75 * pDiff) = sum(i) 0.25*0.25 = 0.25
5208 	     gap to any character gives the same result
5209 	  */
5210 	  lkAB = 0.25;
5211 	} else if (codeB == NOCODE) { /* B is all gaps */
5212 	  lkAB = 0.25;
5213 	} else if (codeA == codeB) { /* A and B match */
5214 	  lkAB = pSame[iRate] * wA*wB + 0.25 * (1-wA*wB);
5215 	} else {		/* codeA != codeB */
5216 	  lkAB = pDiff[iRate] * wA*wB + 0.25 * (1-wA*wB);
5217 	}
5218       } else if (fA == NULL) {
5219 	/* Compare codeA to profile of B */
5220 	if (codeA == NOCODE)
5221 	  lkAB = 0.25;
5222 	else
5223 	  lkAB = wA * (pDiff[iRate] + fB[codeA] * (pSame[iRate]-pDiff[iRate])) + (1.0-wA) * 0.25;
5224 	/* because lkAB = wA * P(codeA->B) + (1-wA) * 0.25
5225 	   P(codeA -> B) = sum(j) P(B==j) * (j==codeA ? pSame : pDiff)
5226 	   = sum(j) P(B==j) * pDiff +
5227 	   = pDiff + P(B==codeA) * (pSame-pDiff)
5228 	*/
5229       } else if (fB == NULL) { /* Compare codeB to profile of A */
5230 	if (codeB == NOCODE)
5231 	  lkAB = 0.25;
5232 	else
5233 	  lkAB = wB * (pDiff[iRate] + fA[codeB] * (pSame[iRate]-pDiff[iRate])) + (1.0-wB) * 0.25;
5234       } else { /* both are full profiles */
5235 	for (j = 0; j < 4; j++)
5236 	  lkAB += fB[j] * (fA[j] * pSame[iRate] + (1-fA[j])* pDiff[iRate]); /* P(A|B) */
5237       }
5238       assert(lkAB > 0);
5239       lk *= lkAB;
5240       while (lk < LkUnderflow) {
5241 	lk *= LkUnderflowInv;
5242 	loglk -= LogLkUnderflow;
5243       }
5244       if (site_likelihoods != NULL)
5245 	site_likelihoods[i] *= lkAB;
5246     }
5247     pSame = myfree(pSame, sizeof(double) * rates->nRateCategories);
5248     pDiff = myfree(pDiff, sizeof(double) * rates->nRateCategories);
5249   } else if (nCodes == 4) {	/* matrix model on nucleotides */
5250     int iFreqA = 0;
5251     int iFreqB = 0;
5252     numeric_t fAmix[4], fBmix[4];
5253     numeric_t *fGap = &transmat->codeFreq[NOCODE][0];
5254 
5255     for (i = 0; i < nPos; i++) {
5256       int iRate = rates->ratecat[i];
5257       numeric_t *expeigen = &expeigenRates[iRate*4];
5258       double wA = pA->weights[i];
5259       double wB = pB->weights[i];
5260       if (wA == 0 && wB == 0 && pA->codes[i] == NOCODE && pB->codes[i] == NOCODE) {
5261 	/* Likelihood of A vs B is 1, so nothing changes
5262 	   Do not need to advance iFreqA or iFreqB */
5263 	continue;
5264       }
5265       numeric_t *fA = GET_FREQ(pA,i,/*IN/OUT*/iFreqA);
5266       numeric_t *fB = GET_FREQ(pB,i,/*IN/OUT*/iFreqB);
5267       if (fA == NULL)
5268 	fA = &transmat->codeFreq[pA->codes[i]][0];
5269       if (wA > 0.0 && wA < 1.0) {
5270 	for (j  = 0; j < 4; j++)
5271 	  fAmix[j] = wA*fA[j] + (1.0-wA)*fGap[j];
5272 	fA = fAmix;
5273       }
5274       if (fB == NULL)
5275 	fB = &transmat->codeFreq[pB->codes[i]][0];
5276       if (wB > 0.0 && wB < 1.0) {
5277 	for (j  = 0; j < 4; j++)
5278 	  fBmix[j] = wB*fB[j] + (1.0-wB)*fGap[j];
5279 	fB = fBmix;
5280       }
5281       /* SSE3 instructions do not speed this step up:
5282 	 numeric_t lkAB = vector_multiply3_sum(expeigen, fA, fB); */
5283 		// dsp this is where check for <=0 was added in 2.1.1.LG
5284       double lkAB = 0;
5285       for (j = 0; j < 4; j++)
5286 	lkAB += expeigen[j]*fA[j]*fB[j];
5287       assert(lkAB > 0);
5288       if (site_likelihoods != NULL)
5289 	site_likelihoods[i] *= lkAB;
5290       lk *= lkAB;
5291       while (lk < LkUnderflow) {
5292 	lk *= LkUnderflowInv;
5293 	loglk -= LogLkUnderflow;
5294       }
5295       while (lk > LkUnderflowInv) {
5296 	lk *= LkUnderflow;
5297 	loglk += LogLkUnderflow;
5298       }
5299     }
5300   } else if (nCodes == 20) {	/* matrix model on amino acids */
5301     int iFreqA = 0;
5302     int iFreqB = 0;
5303     numeric_t fAmix[20], fBmix[20];
5304     numeric_t *fGap = &transmat->codeFreq[NOCODE][0];
5305 
5306     for (i = 0; i < nPos; i++) {
5307       int iRate = rates->ratecat[i];
5308       numeric_t *expeigen = &expeigenRates[iRate*20];
5309       double wA = pA->weights[i];
5310       double wB = pB->weights[i];
5311       if (wA == 0 && wB == 0 && pA->codes[i] == NOCODE && pB->codes[i] == NOCODE) {
5312 	/* Likelihood of A vs B is 1, so nothing changes
5313 	   Do not need to advance iFreqA or iFreqB */
5314 	continue;
5315       }
5316       numeric_t *fA = GET_FREQ(pA,i,/*IN/OUT*/iFreqA);
5317       numeric_t *fB = GET_FREQ(pB,i,/*IN/OUT*/iFreqB);
5318       if (fA == NULL)
5319 	fA = &transmat->codeFreq[pA->codes[i]][0];
5320       if (wA > 0.0 && wA < 1.0) {
5321 	for (j  = 0; j < 20; j++)
5322 	  fAmix[j] = wA*fA[j] + (1.0-wA)*fGap[j];
5323 	fA = fAmix;
5324       }
5325       if (fB == NULL)
5326 	fB = &transmat->codeFreq[pB->codes[i]][0];
5327       if (wB > 0.0 && wB < 1.0) {
5328 	for (j  = 0; j < 20; j++)
5329 	  fBmix[j] = wB*fB[j] + (1.0-wB)*fGap[j];
5330 	fB = fBmix;
5331       }
5332       numeric_t lkAB = vector_multiply3_sum(expeigen, fA, fB, 20);
5333       if (!(lkAB > 0)) {
5334 	/* If this happens, it indicates a numerical problem that needs to be addressed elsewhere,
5335 	   so report all the details */
5336 	fprintf(stderr, "# FastTree.c::PairLogLk -- numerical problem!\n");
5337 	fprintf(stderr, "# This block is intended for loading into R\n");
5338 
5339 	fprintf(stderr, "lkAB = %.8g\n", lkAB);
5340 	fprintf(stderr, "Branch_length= %.8g\nalignment_position=%d\nnCodes=%d\nrate_category=%d\nrate=%.8g\n",
5341 		length, i, nCodes, iRate, rates->rates[iRate]);
5342 	fprintf(stderr, "wA=%.8g\nwB=%.8g\n", wA, wB);
5343 	fprintf(stderr, "codeA = %d\ncodeB = %d\n", pA->codes[i], pB->codes[i]);
5344 
5345 	fprintf(stderr, "fA = c(");
5346 	for (j = 0; j < nCodes; j++) fprintf(stderr, "%s %.8g", j==0?"":",", fA[j]);
5347 	fprintf(stderr,")\n");
5348 
5349 	fprintf(stderr, "fB = c(");
5350 	for (j = 0; j < nCodes; j++) fprintf(stderr, "%s %.8g", j==0?"":",", fB[j]);
5351 	fprintf(stderr,")\n");
5352 
5353 	fprintf(stderr, "stat = c(");
5354 	for (j = 0; j < nCodes; j++) fprintf(stderr, "%s %.8g", j==0?"":",", transmat->stat[j]);
5355 	fprintf(stderr,")\n");
5356 
5357 	fprintf(stderr, "eigenval = c(");
5358 	for (j = 0; j < nCodes; j++) fprintf(stderr, "%s %.8g", j==0?"":",", transmat->eigenval[j]);
5359 	fprintf(stderr,")\n");
5360 
5361 	fprintf(stderr, "expeigen = c(");
5362 	for (j = 0; j < nCodes; j++) fprintf(stderr, "%s %.8g", j==0?"":",", expeigen[j]);
5363 	fprintf(stderr,")\n");
5364 
5365 	int k;
5366 	fprintf(stderr, "codeFreq = c(");
5367 	for (j = 0; j < nCodes; j++) for(k = 0; k < nCodes; k++) fprintf(stderr, "%s %.8g", j==0 && k==0?"":",",
5368 									     transmat->codeFreq[j][k]);
5369 	fprintf(stderr,")\n");
5370 
5371 	fprintf(stderr, "eigeninv = c(");
5372 	for (j = 0; j < nCodes; j++) for(k = 0; k < nCodes; k++) fprintf(stderr, "%s %.8g", j==0 && k==0?"":",",
5373 									     transmat->eigeninv[j][k]);
5374 	fprintf(stderr,")\n");
5375 
5376 	fprintf(stderr, "# Transform into matrices and compute un-rotated vectors for profiles A and B\n");
5377 	fprintf(stderr, "codeFreq = matrix(codeFreq,nrow=20);\n");
5378 	fprintf(stderr, "eigeninv = matrix(eigeninv,nrow=20);\n");
5379 	fputs("unrotA = stat * (eigeninv %*% fA)\n", stderr);
5380 	fputs("unrotB = stat * (eigeninv %*% fB)\n", stderr);
5381 	fprintf(stderr,"# End of R block\n");
5382       }
5383       assert(lkAB > 0);
5384       if (site_likelihoods != NULL)
5385 	site_likelihoods[i] *= lkAB;
5386       lk *= lkAB;
5387       while (lk < LkUnderflow) {
5388 	lk *= LkUnderflowInv;
5389 	loglk -= LogLkUnderflow;
5390       }
5391       while (lk > LkUnderflowInv) {
5392 	lk *= LkUnderflow;
5393 	loglk += LogLkUnderflow;
5394       }
5395     }
5396   } else {
5397     assert(0);			/* illegal nCodes */
5398   }
5399   if (transmat != NULL)
5400     expeigenRates = myfree(expeigenRates, sizeof(numeric_t) * rates->nRateCategories * 20);
5401   loglk += log(lk);
5402   nLkCompute++;
5403   return(loglk);
5404 }
5405 
MLQuartetLogLk(profile_t * pA,profile_t * pB,profile_t * pC,profile_t * pD,int nPos,transition_matrix_t * transmat,rates_t * rates,double branch_lengths[5],double * site_likelihoods)5406 double MLQuartetLogLk(profile_t *pA, profile_t *pB, profile_t *pC, profile_t *pD,
5407 		      int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
5408 		      /*IN*/double branch_lengths[5],
5409 		      /*OPTIONAL OUT*/double *site_likelihoods) {
5410   profile_t *pAB = PosteriorProfile(pA, pB,
5411 				    branch_lengths[0], branch_lengths[1],
5412 				    transmat,
5413 				    rates,
5414 				    nPos, /*nConstraints*/0);
5415   profile_t *pCD = PosteriorProfile(pC, pD,
5416 				    branch_lengths[2], branch_lengths[3],
5417 				    transmat,
5418 				    rates,
5419 				    nPos, /*nConstraints*/0);
5420   if (site_likelihoods != NULL) {
5421     int i;
5422     for (i = 0; i < nPos; i++)
5423       site_likelihoods[i] = 1.0;
5424   }
5425   /* Roughly, P(A,B,C,D) = P(A) P(B|A) P(D|C) P(AB | CD) */
5426   double loglk = PairLogLk(pA, pB, branch_lengths[0]+branch_lengths[1],
5427 			   nPos, transmat, rates, /*OPTIONAL IN/OUT*/site_likelihoods)
5428     + PairLogLk(pC, pD, branch_lengths[2]+branch_lengths[3],
5429 		nPos, transmat, rates, /*OPTIONAL IN/OUT*/site_likelihoods)
5430     + PairLogLk(pAB, pCD, branch_lengths[4],
5431 		nPos, transmat, rates, /*OPTIONAL IN/OUT*/site_likelihoods);
5432   pAB = FreeProfile(pAB, nPos, /*nConstraints*/0);
5433   pCD = FreeProfile(pCD, nPos, /*nConstraints*/0);
5434   return(loglk);
5435 }
5436 
PairNegLogLk(double x,void * data)5437 double PairNegLogLk(double x, void *data) {
5438   quartet_opt_t *qo = (quartet_opt_t *)data;
5439   assert(qo != NULL);
5440   assert(qo->pair1 != NULL && qo->pair2 != NULL);
5441   qo->nEval++;
5442   double loglk = PairLogLk(qo->pair1, qo->pair2, x, qo->nPos, qo->transmat, qo->rates, /*site_lk*/NULL);
5443   assert(loglk < 1e100);
5444   if (verbose > 5)
5445     fprintf(stderr, "PairLogLk(%.4f) =  %.4f\n", x, loglk);
5446   return(-loglk);
5447 }
5448 
MLQuartetOptimize(profile_t * pA,profile_t * pB,profile_t * pC,profile_t * pD,int nPos,transition_matrix_t * transmat,rates_t * rates,double branch_lengths[5],bool * pStarTest,double * site_likelihoods)5449 double MLQuartetOptimize(profile_t *pA, profile_t *pB, profile_t *pC, profile_t *pD,
5450 			 int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
5451 			 /*IN/OUT*/double branch_lengths[5],
5452 			 /*OPTIONAL OUT*/bool *pStarTest,
5453 			 /*OPTIONAL OUT*/double *site_likelihoods) {
5454   int j;
5455   double start_length[5];
5456   for (j = 0; j < 5; j++) {
5457     start_length[j] = branch_lengths[j];
5458     if (branch_lengths[j] < MLMinBranchLength)
5459       branch_lengths[j] = MLMinBranchLength;
5460   }
5461   quartet_opt_t qopt = { nPos, transmat, rates, /*nEval*/0,
5462 			 /*pair1*/NULL, /*pair2*/NULL };
5463   double f2x, negloglk;
5464 
5465   if (pStarTest != NULL)
5466     *pStarTest = false;
5467 
5468   /* First optimize internal branch, then branch to A, B, C, D, in turn
5469      May use star test to quit after internal branch
5470    */
5471   profile_t *pAB = PosteriorProfile(pA, pB,
5472 				    branch_lengths[LEN_A], branch_lengths[LEN_B],
5473 				    transmat, rates, nPos, /*nConstraints*/0);
5474   profile_t *pCD = PosteriorProfile(pC, pD,
5475 				    branch_lengths[LEN_C], branch_lengths[LEN_D],
5476 				    transmat, rates, nPos, /*nConstraints*/0);
5477   qopt.pair1 = pAB;
5478   qopt.pair2 = pCD;
5479   branch_lengths[LEN_I] = onedimenmin(/*xmin*/MLMinBranchLength,
5480 				      /*xguess*/branch_lengths[LEN_I],
5481 				      /*xmax*/6.0,
5482 				      PairNegLogLk,
5483 				      /*data*/&qopt,
5484 				      /*ftol*/MLFTolBranchLength,
5485 				      /*atol*/MLMinBranchLengthTolerance,
5486 				      /*OUT*/&negloglk,
5487 				      /*OUT*/&f2x);
5488 
5489   if (pStarTest != NULL) {
5490     assert(site_likelihoods == NULL);
5491     double loglkStar = -PairNegLogLk(MLMinBranchLength, &qopt);
5492     if (loglkStar < -negloglk - closeLogLkLimit) {
5493       *pStarTest = true;
5494       double off = PairLogLk(pA, pB,
5495 			     branch_lengths[LEN_A] + branch_lengths[LEN_B],
5496 			     qopt.nPos, qopt.transmat, qopt.rates, /*site_lk*/NULL)
5497 	+ PairLogLk(pC, pD,
5498 		    branch_lengths[LEN_C] + branch_lengths[LEN_D],
5499 		    qopt.nPos, qopt.transmat, qopt.rates, /*site_lk*/NULL);
5500       pAB = FreeProfile(pAB, nPos, /*nConstraints*/0);
5501       pCD = FreeProfile(pCD, nPos, /*nConstraints*/0);
5502       return (-negloglk + off);
5503     }
5504   }
5505   pAB = FreeProfile(pAB, nPos, /*nConstraints*/0);
5506   profile_t *pBCD = PosteriorProfile(pB, pCD,
5507 				     branch_lengths[LEN_B], branch_lengths[LEN_I],
5508 				     transmat, rates, nPos, /*nConstraints*/0);
5509   qopt.pair1 = pA;
5510   qopt.pair2 = pBCD;
5511   branch_lengths[LEN_A] = onedimenmin(/*xmin*/MLMinBranchLength,
5512 				      /*xguess*/branch_lengths[LEN_A],
5513 				      /*xmax*/6.0,
5514 				      PairNegLogLk,
5515 				      /*data*/&qopt,
5516 				      /*ftol*/MLFTolBranchLength,
5517 				      /*atol*/MLMinBranchLengthTolerance,
5518 				      /*OUT*/&negloglk,
5519 				      /*OUT*/&f2x);
5520   pBCD = FreeProfile(pBCD, nPos, /*nConstraints*/0);
5521   profile_t *pACD = PosteriorProfile(pA, pCD,
5522 				     branch_lengths[LEN_A], branch_lengths[LEN_I],
5523 				     transmat, rates, nPos, /*nConstraints*/0);
5524   qopt.pair1 = pB;
5525   qopt.pair2 = pACD;
5526   branch_lengths[LEN_B] = onedimenmin(/*xmin*/MLMinBranchLength,
5527 				      /*xguess*/branch_lengths[LEN_B],
5528 				      /*xmax*/6.0,
5529 				      PairNegLogLk,
5530 				      /*data*/&qopt,
5531 				      /*ftol*/MLFTolBranchLength,
5532 				      /*atol*/MLMinBranchLengthTolerance,
5533 				      /*OUT*/&negloglk,
5534 				      /*OUT*/&f2x);
5535   pACD = FreeProfile(pACD, nPos, /*nConstraints*/0);
5536   pCD = FreeProfile(pCD, nPos, /*nConstraints*/0);
5537   pAB = PosteriorProfile(pA, pB,
5538 			 branch_lengths[LEN_A], branch_lengths[LEN_B],
5539 			 transmat, rates, nPos, /*nConstraints*/0);
5540   profile_t *pABD = PosteriorProfile(pAB, pD,
5541 				     branch_lengths[LEN_I], branch_lengths[LEN_D],
5542 				     transmat, rates, nPos, /*nConstraints*/0);
5543   qopt.pair1 = pC;
5544   qopt.pair2 = pABD;
5545   branch_lengths[LEN_C] = onedimenmin(/*xmin*/MLMinBranchLength,
5546 				      /*xguess*/branch_lengths[LEN_C],
5547 				      /*xmax*/6.0,
5548 				      PairNegLogLk,
5549 				      /*data*/&qopt,
5550 				      /*ftol*/MLFTolBranchLength,
5551 				      /*atol*/MLMinBranchLengthTolerance,
5552 				      /*OUT*/&negloglk,
5553 				      /*OUT*/&f2x);
5554   pABD = FreeProfile(pABD, nPos, /*nConstraints*/0);
5555   profile_t *pABC = PosteriorProfile(pAB, pC,
5556 				     branch_lengths[LEN_I], branch_lengths[LEN_C],
5557 				     transmat, rates, nPos, /*nConstraints*/0);
5558   qopt.pair1 = pD;
5559   qopt.pair2 = pABC;
5560   branch_lengths[LEN_D] = onedimenmin(/*xmin*/MLMinBranchLength,
5561 				      /*xguess*/branch_lengths[LEN_D],
5562 				      /*xmax*/6.0,
5563 				      PairNegLogLk,
5564 				      /*data*/&qopt,
5565 				      /*ftol*/MLFTolBranchLength,
5566 				      /*atol*/MLMinBranchLengthTolerance,
5567 				      /*OUT*/&negloglk,
5568 				      /*OUT*/&f2x);
5569 
5570   /* Compute the total quartet likelihood
5571      PairLogLk(ABC,D) + PairLogLk(AB,C) + PairLogLk(A,B)
5572    */
5573   double loglkABCvsD = -negloglk;
5574   if (site_likelihoods) {
5575     for (j = 0; j < nPos; j++)
5576       site_likelihoods[j] = 1.0;
5577     PairLogLk(pABC, pD, branch_lengths[LEN_D],
5578 	      qopt.nPos, qopt.transmat, qopt.rates, /*IN/OUT*/site_likelihoods);
5579   }
5580   double quartetloglk = loglkABCvsD
5581     + PairLogLk(pAB, pC, branch_lengths[LEN_I] + branch_lengths[LEN_C],
5582 		qopt.nPos, qopt.transmat, qopt.rates,
5583 		/*IN/OUT*/site_likelihoods)
5584     + PairLogLk(pA, pB, branch_lengths[LEN_A] + branch_lengths[LEN_B],
5585 		qopt.nPos, qopt.transmat, qopt.rates,
5586 		/*IN/OUT*/site_likelihoods);
5587 
5588   pABC = FreeProfile(pABC, nPos, /*nConstraints*/0);
5589   pAB = FreeProfile(pAB, nPos, /*nConstraints*/0);
5590 
5591   if (verbose > 3) {
5592     double loglkStart = MLQuartetLogLk(pA, pB, pC, pD, nPos, transmat, rates, start_length, /*site_lk*/NULL);
5593     fprintf(stderr, "Optimize loglk from %.5f to %.5f eval %d lengths from\n"
5594 	    "   %.5f %.5f %.5f %.5f %.5f to\n"
5595 	    "   %.5f %.5f %.5f %.5f %.5f\n",
5596 	    loglkStart, quartetloglk, qopt.nEval,
5597 	    start_length[0], start_length[1], start_length[2], start_length[3], start_length[4],
5598 	    branch_lengths[0], branch_lengths[1], branch_lengths[2], branch_lengths[3], branch_lengths[4]);
5599   }
5600   return(quartetloglk);
5601 }
5602 
MLQuartetNNI(profile_t * profiles[4],transition_matrix_t * transmat,rates_t * rates,int nPos,int nConstraints,double criteria[3],numeric_t len[5],bool bFast)5603 nni_t MLQuartetNNI(profile_t *profiles[4],
5604 		   /*OPTIONAL*/transition_matrix_t *transmat,
5605 		   rates_t *rates,
5606 		   int nPos, int nConstraints,
5607 		   /*OUT*/double criteria[3], /* The three potential quartet log-likelihoods */
5608 		   /*IN/OUT*/numeric_t len[5],
5609 		   bool bFast)
5610 {
5611   int i;
5612   double lenABvsCD[5] = {len[LEN_A], len[LEN_B], len[LEN_C], len[LEN_D], len[LEN_I]};
5613   double lenACvsBD[5] = {len[LEN_A], len[LEN_C], len[LEN_B], len[LEN_D], len[LEN_I]};   /* Swap B & C */
5614   double lenADvsBC[5] = {len[LEN_A], len[LEN_D], len[LEN_C], len[LEN_B], len[LEN_I]};   /* Swap B & D */
5615   bool bConsiderAC = true;
5616   bool bConsiderAD = true;
5617   int iRound;
5618   int nRounds = mlAccuracy < 2 ? 2 : mlAccuracy;
5619   double penalty[3];
5620   QuartetConstraintPenalties(profiles, nConstraints, /*OUT*/penalty);
5621   if (penalty[ABvsCD] > penalty[ACvsBD] || penalty[ABvsCD] > penalty[ADvsBC])
5622     bFast = false;
5623 #ifdef OPENMP
5624       bFast = false;		/* turn off star topology test */
5625 #endif
5626 
5627   for (iRound = 0; iRound < nRounds; iRound++) {
5628     bool bStarTest = false;
5629     {
5630 #ifdef OPENMP
5631       #pragma omp parallel
5632       #pragma omp sections
5633 #endif
5634       {
5635 #ifdef OPENMP
5636         #pragma omp section
5637 #endif
5638 	{
5639 	  criteria[ABvsCD] = MLQuartetOptimize(profiles[0], profiles[1], profiles[2], profiles[3],
5640 					       nPos, transmat, rates,
5641 					       /*IN/OUT*/lenABvsCD,
5642 					       bFast ? &bStarTest : NULL,
5643 					       /*site_likelihoods*/NULL)
5644 	    - penalty[ABvsCD];	/* subtract penalty b/c we are trying to maximize log lk */
5645 	}
5646 
5647 #ifdef OPENMP
5648         #pragma omp section
5649 #else
5650 	if (bStarTest) {
5651 	  nStarTests++;
5652 	  criteria[ACvsBD] = -1e20;
5653 	  criteria[ADvsBC] = -1e20;
5654 	  len[LEN_I] = lenABvsCD[LEN_I];
5655 	  return(ABvsCD);
5656 	}
5657 #endif
5658 	{
5659 	  if (bConsiderAC)
5660 	    criteria[ACvsBD] = MLQuartetOptimize(profiles[0], profiles[2], profiles[1], profiles[3],
5661 						 nPos, transmat, rates,
5662 						 /*IN/OUT*/lenACvsBD, NULL, /*site_likelihoods*/NULL)
5663 	      - penalty[ACvsBD];
5664 	}
5665 
5666 #ifdef OPENMP
5667         #pragma omp section
5668 #endif
5669 	{
5670 	  if (bConsiderAD)
5671 	    criteria[ADvsBC] = MLQuartetOptimize(profiles[0], profiles[3], profiles[2], profiles[1],
5672 						 nPos, transmat, rates,
5673 						 /*IN/OUT*/lenADvsBC, NULL, /*site_likelihoods*/NULL)
5674 	      - penalty[ADvsBC];
5675 	}
5676       }
5677     } /* end parallel sections */
5678     if (mlAccuracy < 2) {
5679       /* If clearly worse then ABvsCD, or have short internal branch length and worse, then
5680          give up */
5681       if (criteria[ACvsBD] < criteria[ABvsCD] - closeLogLkLimit
5682 	  || (lenACvsBD[LEN_I] <= 2.0*MLMinBranchLength && criteria[ACvsBD] < criteria[ABvsCD]))
5683 	bConsiderAC = false;
5684       if (criteria[ADvsBC] < criteria[ABvsCD] - closeLogLkLimit
5685 	  || (lenADvsBC[LEN_I] <= 2.0*MLMinBranchLength && criteria[ADvsBC] < criteria[ABvsCD]))
5686 	bConsiderAD = false;
5687       if (!bConsiderAC && !bConsiderAD)
5688 	break;
5689       /* If clearly better than either alternative, then give up
5690          (Comparison is probably biased in favor of ABvsCD anyway) */
5691       if (criteria[ACvsBD] > criteria[ABvsCD] + closeLogLkLimit
5692 	  && criteria[ACvsBD] > criteria[ADvsBC] + closeLogLkLimit)
5693 	break;
5694       if (criteria[ADvsBC] > criteria[ABvsCD] + closeLogLkLimit
5695 	  && criteria[ADvsBC] > criteria[ACvsBD] + closeLogLkLimit)
5696 	break;
5697     }
5698   } /* end loop over rounds */
5699 
5700   if (verbose > 2) {
5701     fprintf(stderr, "Optimized quartet for %d rounds: ABvsCD %.5f ACvsBD %.5f ADvsBC %.5f\n",
5702 	    iRound, criteria[ABvsCD], criteria[ACvsBD], criteria[ADvsBC]);
5703   }
5704   if (criteria[ACvsBD] > criteria[ABvsCD] && criteria[ACvsBD] > criteria[ADvsBC]) {
5705     for (i = 0; i < 5; i++) len[i] = lenACvsBD[i];
5706     return(ACvsBD);
5707   } else if (criteria[ADvsBC] > criteria[ABvsCD] && criteria[ADvsBC] > criteria[ACvsBD]) {
5708     for (i = 0; i < 5; i++) len[i] = lenADvsBC[i];
5709     return(ADvsBC);
5710   } else {
5711     for (i = 0; i < 5; i++) len[i] = lenABvsCD[i];
5712     return(ABvsCD);
5713   }
5714 }
5715 
TreeLength(NJ_t * NJ,bool recomputeProfiles)5716 double TreeLength(/*IN/OUT*/NJ_t *NJ, bool recomputeProfiles) {
5717   if (recomputeProfiles) {
5718     traversal_t traversal2 = InitTraversal(NJ);
5719     int j = NJ->root;
5720     while((j = TraversePostorder(j, NJ, /*IN/OUT*/traversal2, /*pUp*/NULL)) >= 0) {
5721       /* nothing to do for leaves or root */
5722       if (j >= NJ->nSeq && j != NJ->root)
5723 	SetProfile(/*IN/OUT*/NJ, j, /*noweight*/-1.0);
5724     }
5725     traversal2 = FreeTraversal(traversal2,NJ);
5726   }
5727   UpdateBranchLengths(/*IN/OUT*/NJ);
5728   double total_len = 0;
5729   int iNode;
5730   for (iNode = 0; iNode < NJ->maxnode; iNode++)
5731     total_len += NJ->branchlength[iNode];
5732   return(total_len);
5733 }
5734 
TreeLogLk(NJ_t * NJ,double * site_loglk)5735 double TreeLogLk(/*IN*/NJ_t *NJ, /*OPTIONAL OUT*/double *site_loglk) {
5736   int i;
5737   if (NJ->nSeq < 2)
5738     return(0.0);
5739   double loglk = 0.0;
5740   double *site_likelihood = NULL;
5741   if (site_loglk != NULL) {
5742     site_likelihood = mymalloc(sizeof(double)*NJ->nPos);
5743     for (i = 0; i < NJ->nPos; i++) {
5744       site_likelihood[i] = 1.0;
5745       site_loglk[i] = 0.0;
5746     }
5747   }
5748   traversal_t traversal = InitTraversal(NJ);
5749   int node = NJ->root;
5750   while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
5751     int nChild = NJ->child[node].nChild;
5752     if (nChild == 0)
5753       continue;
5754     assert(nChild >= 2);
5755     int *children = NJ->child[node].child;
5756     double loglkchild = PairLogLk(NJ->profiles[children[0]], NJ->profiles[children[1]],
5757 				  NJ->branchlength[children[0]]+NJ->branchlength[children[1]],
5758 				  NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/site_likelihood);
5759     loglk += loglkchild;
5760     if (site_likelihood != NULL) {
5761       /* prevent underflows */
5762       for (i = 0; i < NJ->nPos; i++) {
5763 	while(site_likelihood[i] < LkUnderflow) {
5764 	  site_likelihood[i] *= LkUnderflowInv;
5765 	  site_loglk[i] -= LogLkUnderflow;
5766 	}
5767       }
5768     }
5769     if (verbose > 2)
5770       fprintf(stderr, "At %d: LogLk(%d:%.4f,%d:%.4f) = %.3f\n",
5771 	      node,
5772 	      children[0], NJ->branchlength[children[0]],
5773 	      children[1], NJ->branchlength[children[1]],
5774 	      loglkchild);
5775     if (NJ->child[node].nChild == 3) {
5776       assert(node == NJ->root);
5777       /* Infer the common parent of the 1st two to define the third... */
5778       profile_t *pAB = PosteriorProfile(NJ->profiles[children[0]],
5779 					NJ->profiles[children[1]],
5780 					NJ->branchlength[children[0]],
5781 					NJ->branchlength[children[1]],
5782 					NJ->transmat, &NJ->rates,
5783 					NJ->nPos, /*nConstraints*/0);
5784       double loglkup = PairLogLk(pAB, NJ->profiles[children[2]],
5785 				 NJ->branchlength[children[2]],
5786 				 NJ->nPos, NJ->transmat, &NJ->rates,
5787 				 /*IN/OUT*/site_likelihood);
5788       loglk += loglkup;
5789       if (verbose > 2)
5790 	fprintf(stderr, "At root %d: LogLk((%d/%d),%d:%.3f) = %.3f\n",
5791 		node, children[0], children[1], children[2],
5792 		NJ->branchlength[children[2]],
5793 		loglkup);
5794       pAB = FreeProfile(pAB, NJ->nPos, NJ->nConstraints);
5795     }
5796   }
5797   traversal = FreeTraversal(traversal,NJ);
5798   if (site_likelihood != NULL) {
5799     for (i = 0; i < NJ->nPos; i++) {
5800       site_loglk[i] += log(site_likelihood[i]);
5801     }
5802     site_likelihood = myfree(site_likelihood, sizeof(double)*NJ->nPos);
5803   }
5804 
5805   /* For Jukes-Cantor, with a tree of size 4, if the children of the root are
5806      (A,B), C, and D, then
5807      P(ABCD) = P(A) P(B|A) P(C|AB) P(D|ABC)
5808 
5809      Above we compute P(B|A) P(C|AB) P(D|ABC) -- note P(B|A) is at the child of root
5810      and P(C|AB) P(D|ABC) is at root.
5811 
5812      Similarly if the children of the root are C, D, and (A,B), then
5813      P(ABCD) = P(C|D) P(A|B) P(AB|CD) P(D), and above we compute that except for P(D)
5814 
5815      So we need to multiply by P(A) = 0.25, so we pay log(4) at each position
5816      (if ungapped). Each gapped position in any sequence reduces the payment by log(4)
5817 
5818      For JTT or GTR, we are computing P(A & B) and the posterior profiles are scaled to take
5819      the prior into account, so we do not need any correction.
5820      codeFreq[NOCODE] is scaled x higher so that P(-) = 1 not P(-)=1/nCodes, so gaps
5821      do not need to be corrected either.
5822    */
5823 
5824   if (nCodes == 4 && NJ->transmat == NULL) {
5825     int nGaps = 0;
5826     double logNCodes = log((double)nCodes);
5827     for (i = 0; i < NJ->nPos; i++) {
5828       int nGapsThisPos = 0;
5829       for (node = 0; node < NJ->nSeq; node++) {
5830 	unsigned char *codes = NJ->profiles[node]->codes;
5831 	if (codes[i] == NOCODE)
5832 	  nGapsThisPos++;
5833       }
5834       nGaps += nGapsThisPos;
5835       if (site_loglk != NULL) {
5836 	site_loglk[i] += nGapsThisPos * logNCodes;
5837 	if (nCodes == 4 && NJ->transmat == NULL)
5838 	  site_loglk[i] -= logNCodes;
5839       }
5840     }
5841     loglk -= NJ->nPos * logNCodes;
5842     loglk += nGaps * logNCodes;	/* do not pay for gaps -- only Jukes-Cantor */
5843   }
5844   return(loglk);
5845 }
5846 
SetMLGtr(NJ_t * NJ,double * freq_in,FILE * fpLog)5847 void SetMLGtr(/*IN/OUT*/NJ_t *NJ, /*OPTIONAL IN*/double *freq_in, /*OPTIONAL WRITE*/FILE *fpLog) {
5848   int i;
5849   assert(nCodes==4);
5850   gtr_opt_t gtr;
5851   gtr.NJ = NJ;
5852   gtr.fpLog = fpLog;
5853   if (freq_in != NULL) {
5854     for (i=0; i<4; i++)
5855       gtr.freq[i]=freq_in[i];
5856   } else {
5857     /* n[] and sum were int in FastTree 2.1.9 and earlier -- this
5858        caused gtr analyses to fail on analyses with >2e9 positions */
5859     long n[4] = {1,1,1,1};	/* pseudocounts */
5860     for (i=0; i<NJ->nSeq; i++) {
5861       unsigned char *codes = NJ->profiles[i]->codes;
5862       int iPos;
5863       for (iPos=0; iPos<NJ->nPos; iPos++)
5864 	if (codes[iPos] < 4)
5865 	  n[codes[iPos]]++;
5866     }
5867     long sum = n[0]+n[1]+n[2]+n[3];
5868     for (i=0; i<4; i++)
5869       gtr.freq[i] = n[i]/(double)sum;
5870   }
5871   for (i=0; i<6; i++)
5872     gtr.rates[i] = 1.0;
5873   int nRounds = mlAccuracy < 2 ? 2 : mlAccuracy;
5874   for (i = 0; i < nRounds; i++) {
5875     for (gtr.iRate = 0; gtr.iRate < 6; gtr.iRate++) {
5876       ProgressReport("Optimizing GTR model, step %d of %d", i*6+gtr.iRate+1, 12, 0, 0);
5877       double negloglk, f2x;
5878       gtr.rates[gtr.iRate] = onedimenmin(/*xmin*/0.05,
5879 					 /*xguess*/gtr.rates[gtr.iRate],
5880 					 /*xmax*/20.0,
5881 					 GTRNegLogLk,
5882 					 /*data*/&gtr,
5883 					 /*ftol*/0.001,
5884 					 /*atol*/0.0001,
5885 					 /*OUT*/&negloglk,
5886 					 /*OUT*/&f2x);
5887     }
5888   }
5889   /* normalize gtr so last rate is 1 -- specifying that rate separately is useful for optimization only */
5890   for (i = 0; i < 5; i++)
5891     gtr.rates[i] /= gtr.rates[5];
5892   gtr.rates[5] = 1.0;
5893   if (verbose) {
5894     fprintf(stderr, "GTR Frequencies: %.4f %.4f %.4f %.4f\n", gtr.freq[0], gtr.freq[1], gtr.freq[2], gtr.freq[3]);
5895     fprintf(stderr, "GTR rates(ac ag at cg ct gt) %.4f %.4f %.4f %.4f %.4f %.4f\n",
5896 	    gtr.rates[0],gtr.rates[1],gtr.rates[2],gtr.rates[3],gtr.rates[4],gtr.rates[5]);
5897   }
5898   if (fpLog != NULL) {
5899     fprintf(fpLog, "GTRFreq\t%.4f\t%.4f\t%.4f\t%.4f\n", gtr.freq[0], gtr.freq[1], gtr.freq[2], gtr.freq[3]);
5900     fprintf(fpLog, "GTRRates\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n",
5901 	    gtr.rates[0],gtr.rates[1],gtr.rates[2],gtr.rates[3],gtr.rates[4],gtr.rates[5]);
5902   }
5903   myfree(NJ->transmat, sizeof(transition_matrix_t));
5904   NJ->transmat = CreateGTR(gtr.rates, gtr.freq);
5905   RecomputeMLProfiles(/*IN/OUT*/NJ);
5906   OptimizeAllBranchLengths(/*IN/OUT*/NJ);
5907 }
5908 
GTRNegLogLk(double x,void * data)5909 double GTRNegLogLk(double x, void *data) {
5910 
5911   gtr_opt_t *gtr = (gtr_opt_t*)data;
5912   assert(nCodes == 4);
5913   assert(gtr->NJ != NULL);
5914   assert(gtr->iRate >= 0 && gtr->iRate < 6);
5915   assert(x > 0);
5916   transition_matrix_t *old = gtr->NJ->transmat;
5917   double rates[6];
5918   int i;
5919   for (i = 0; i < 6; i++)
5920     rates[i] = gtr->rates[i];
5921   rates[gtr->iRate] = x;
5922 
5923   FILE *fpLog = gtr->fpLog;
5924   if (fpLog)
5925     fprintf(fpLog, "GTR_Opt\tfreq %.5f %.5f %.5f %.5f rates %.5f %.5f %.5f %.5f %.5f %.5f\n",
5926           gtr->freq[0], gtr->freq[1], gtr->freq[2], gtr->freq[3],
5927           rates[0], rates[1], rates[2], rates[3], rates[4], rates[5]);
5928 
5929   gtr->NJ->transmat = CreateGTR(rates, gtr->freq);
5930   RecomputeMLProfiles(/*IN/OUT*/gtr->NJ);
5931   double loglk = TreeLogLk(gtr->NJ, /*site_loglk*/NULL);
5932   myfree(gtr->NJ->transmat, sizeof(transition_matrix_t));
5933   gtr->NJ->transmat = old;
5934   /* Do not recompute profiles -- assume the caller will do that */
5935   if (verbose > 2)
5936     fprintf(stderr, "GTR LogLk(%.5f %.5f %.5f %.5f %.5f %.5f) = %f\n",
5937 	    rates[0], rates[1], rates[2], rates[3], rates[4], rates[5], loglk);
5938   if (fpLog)
5939     fprintf(fpLog, "GTR_Opt\tGTR LogLk(%.5f %.5f %.5f %.5f %.5f %.5f) = %f\n",
5940 	    rates[0], rates[1], rates[2], rates[3], rates[4], rates[5], loglk);
5941   return(-loglk);
5942 }
5943 
5944 /* Caller must free the resulting vector of n rates */
MLSiteRates(int nRateCategories)5945 numeric_t *MLSiteRates(int nRateCategories) {
5946   /* Even spacing from 1/nRate to nRate */
5947   double logNCat = log((double)nRateCategories);
5948   double logMinRate = -logNCat;
5949   double logMaxRate = logNCat;
5950   double logd = (logMaxRate-logMinRate)/(double)(nRateCategories-1);
5951 
5952   numeric_t *rates = mymalloc(sizeof(numeric_t)*nRateCategories);
5953   int i;
5954   for (i = 0; i < nRateCategories; i++)
5955     rates[i] = exp(logMinRate + logd*(double)i);
5956   return(rates);
5957 }
5958 
MLSiteLikelihoodsByRate(NJ_t * NJ,numeric_t * rates,int nRateCategories)5959 double *MLSiteLikelihoodsByRate(/*IN*/NJ_t *NJ, /*IN*/numeric_t *rates, int nRateCategories) {
5960   double *site_loglk = mymalloc(sizeof(double)*NJ->nPos*nRateCategories);
5961 
5962   /* save the original rates */
5963   assert(NJ->rates.nRateCategories > 0);
5964   numeric_t *oldRates = NJ->rates.rates;
5965   NJ->rates.rates = mymalloc(sizeof(numeric_t) * NJ->rates.nRateCategories);
5966 
5967   /* Compute site likelihood for each rate */
5968   int iPos;
5969   int iRate;
5970   for (iRate = 0; iRate  < nRateCategories; iRate++) {
5971     int i;
5972     for (i = 0; i < NJ->rates.nRateCategories; i++)
5973       NJ->rates.rates[i] = rates[iRate];
5974     RecomputeMLProfiles(/*IN/OUT*/NJ);
5975     double loglk = TreeLogLk(NJ, /*OUT*/&site_loglk[NJ->nPos*iRate]);
5976     ProgressReport("Site likelihoods with rate category %d of %d", iRate+1, nRateCategories, 0, 0);
5977     if(verbose > 2) {
5978       fprintf(stderr, "Rate %.3f Loglk %.3f SiteLogLk", rates[iRate], loglk);
5979       for (iPos = 0; iPos < NJ->nPos; iPos++)
5980 	fprintf(stderr,"\t%.3f", site_loglk[NJ->nPos*iRate + iPos]);
5981       fprintf(stderr,"\n");
5982     }
5983   }
5984 
5985   /* restore original rates and profiles */
5986   myfree(NJ->rates.rates, sizeof(numeric_t) * NJ->rates.nRateCategories);
5987   NJ->rates.rates = oldRates;
5988   RecomputeMLProfiles(/*IN/OUT*/NJ);
5989 
5990   return(site_loglk);
5991 }
5992 
SetMLRates(NJ_t * NJ,int nRateCategories)5993 void SetMLRates(/*IN/OUT*/NJ_t *NJ, int nRateCategories) {
5994   assert(nRateCategories > 0);
5995   AllocRateCategories(/*IN/OUT*/&NJ->rates, 1, NJ->nPos); /* set to 1 category of rate 1 */
5996   if (nRateCategories == 1) {
5997     RecomputeMLProfiles(/*IN/OUT*/NJ);
5998     return;
5999   }
6000   numeric_t *rates = MLSiteRates(nRateCategories);
6001   double *site_loglk = MLSiteLikelihoodsByRate(/*IN*/NJ, /*IN*/rates, nRateCategories);
6002 
6003   /* Select best rate for each site, correcting for the prior
6004      For a prior, use a gamma distribution with shape parameter 3, scale 1/3, so
6005      Prior(rate) ~ rate**2 * exp(-3*rate)
6006      log Prior(rate) = C + 2 * log(rate) - 3 * rate
6007   */
6008   double sumRates = 0;
6009   int iPos;
6010   int iRate;
6011   for (iPos = 0; iPos < NJ->nPos; iPos++) {
6012     int iBest = -1;
6013     double dBest = -1e20;
6014     for (iRate = 0; iRate < nRateCategories; iRate++) {
6015       double site_loglk_with_prior = site_loglk[NJ->nPos*iRate + iPos]
6016 	+ 2.0 * log(rates[iRate]) - 3.0 * rates[iRate];
6017       if (site_loglk_with_prior > dBest) {
6018 	iBest = iRate;
6019 	dBest = site_loglk_with_prior;
6020       }
6021     }
6022     if (verbose > 2)
6023       fprintf(stderr, "Selected rate category %d rate %.3f for position %d\n",
6024 	      iBest, rates[iBest], iPos+1);
6025     NJ->rates.ratecat[iPos] = iBest;
6026     sumRates += rates[iBest];
6027   }
6028   site_loglk = myfree(site_loglk, sizeof(double)*NJ->nPos*nRateCategories);
6029 
6030   /* Force the rates to average to 1 */
6031   double avgRate = sumRates/NJ->nPos;
6032   for (iRate = 0; iRate < nRateCategories; iRate++)
6033     rates[iRate] /= avgRate;
6034 
6035   /* Save the rates */
6036   NJ->rates.rates = myfree(NJ->rates.rates, sizeof(numeric_t) * NJ->rates.nRateCategories);
6037   NJ->rates.rates = rates;
6038   NJ->rates.nRateCategories = nRateCategories;
6039 
6040   /* Update profiles based on rates */
6041   RecomputeMLProfiles(/*IN/OUT*/NJ);
6042 
6043   if (verbose) {
6044     fprintf(stderr, "Switched to using %d rate categories (CAT approximation)\n", nRateCategories);
6045     fprintf(stderr, "Rate categories were divided by %.3f so that average rate = 1.0\n", avgRate);
6046     fprintf(stderr, "CAT-based log-likelihoods may not be comparable across runs\n");
6047     if (!gammaLogLk)
6048       fprintf(stderr, "Use -gamma for approximate but comparable Gamma(20) log-likelihoods\n");
6049   }
6050 }
6051 
GammaLogLk(siteratelk_t * s,double * gamma_loglk_sites)6052 double GammaLogLk(/*IN*/siteratelk_t *s, /*OPTIONAL OUT*/double *gamma_loglk_sites) {
6053   int iRate, iPos;
6054   double *dRate = mymalloc(sizeof(double) * s->nRateCats);
6055   for (iRate = 0; iRate < s->nRateCats; iRate++) {
6056     /* The probability density for each rate is approximated by the total
6057        density between the midpoints */
6058     double pMin = iRate == 0 ? 0.0 :
6059       PGamma(s->mult * (s->rates[iRate-1] + s->rates[iRate])/2.0, s->alpha);
6060     double pMax = iRate == s->nRateCats-1 ? 1.0 :
6061       PGamma(s->mult * (s->rates[iRate]+s->rates[iRate+1])/2.0, s->alpha);
6062     dRate[iRate] = pMax-pMin;
6063   }
6064 
6065   double loglk = 0.0;
6066   for (iPos = 0; iPos < s->nPos; iPos++) {
6067     /* Prevent underflow on large trees by comparing to maximum loglk */
6068     double maxloglk = -1e20;
6069     for (iRate = 0; iRate < s->nRateCats; iRate++) {
6070       double site_loglk = s->site_loglk[s->nPos*iRate + iPos];
6071       if (site_loglk > maxloglk)
6072 	maxloglk = site_loglk;
6073     }
6074     double rellk = 0; /* likelihood scaled by exp(maxloglk) */
6075     for (iRate = 0; iRate < s->nRateCats; iRate++) {
6076       double lk = exp(s->site_loglk[s->nPos*iRate + iPos] - maxloglk);
6077       rellk += lk * dRate[iRate];
6078     }
6079     double loglk_site = maxloglk + log(rellk);
6080     loglk += loglk_site;
6081     if (gamma_loglk_sites != NULL)
6082       gamma_loglk_sites[iPos] = loglk_site;
6083   }
6084   dRate = myfree(dRate, sizeof(double)*s->nRateCats);
6085   return(loglk);
6086 }
6087 
OptAlpha(double alpha,void * data)6088 double OptAlpha(double alpha, void *data) {
6089   siteratelk_t *s = (siteratelk_t *)data;
6090   s->alpha = alpha;
6091   return(-GammaLogLk(s, NULL));
6092 }
6093 
OptMult(double mult,void * data)6094 double OptMult(double mult, void *data) {
6095   siteratelk_t *s = (siteratelk_t *)data;
6096   s->mult = mult;
6097   return(-GammaLogLk(s, NULL));
6098 }
6099 
6100 /* Input site_loglk must be for each rate */
RescaleGammaLogLk(int nPos,int nRateCats,numeric_t * rates,double * site_loglk,FILE * fpLog)6101 double RescaleGammaLogLk(int nPos, int nRateCats, /*IN*/numeric_t *rates, /*IN*/double *site_loglk,
6102 			 /*OPTIONAL*/FILE *fpLog) {
6103   siteratelk_t s = { /*mult*/1.0, /*alpha*/1.0, nPos, nRateCats, rates, site_loglk };
6104   double fx, f2x;
6105   int i;
6106   fx = -GammaLogLk(&s, NULL);
6107   if (verbose>2)
6108     fprintf(stderr, "Optimizing alpha, starting at loglk %.3f\n", -fx);
6109   for (i = 0; i < 10; i++) {
6110     ProgressReport("Optimizing alpha round %d", i+1, 0, 0, 0);
6111     double start = fx;
6112     s.alpha = onedimenmin(0.01, s.alpha, 10.0, OptAlpha, &s, 0.001, 0.001, &fx, &f2x);
6113     if (verbose>2)
6114       fprintf(stderr, "Optimize alpha round %d to %.3f lk %.3f\n", i+1, s.alpha, -fx);
6115     s.mult = onedimenmin(0.01, s.mult, 10.0, OptMult, &s, 0.001, 0.001, &fx, &f2x);
6116     if (verbose>2)
6117       fprintf(stderr, "Optimize mult round %d to %.3f lk %.3f\n", i+1, s.mult, -fx);
6118     if (fx > start - 0.001) {
6119       if (verbose>2)
6120 	fprintf(stderr, "Optimizing alpha & mult converged\n");
6121       break;
6122     }
6123   }
6124 
6125   double *gamma_loglk_sites = mymalloc(sizeof(double) * nPos);
6126   double gammaLogLk = GammaLogLk(&s, /*OUT*/gamma_loglk_sites);
6127   if (verbose > 0)
6128     fprintf(stderr, "Gamma(%d) LogLk = %.3f alpha = %.3f rescaling lengths by %.3f\n",
6129 	    nRateCats, gammaLogLk, s.alpha, 1/s.mult);
6130   if (fpLog) {
6131     int iPos;
6132     int iRate;
6133     fprintf(fpLog, "Gamma%dLogLk\t%.3f\tApproximate\tAlpha\t%.3f\tRescale\t%.3f\n",
6134 	    nRateCats, gammaLogLk, s.alpha, 1/s.mult);
6135     fprintf(fpLog, "Gamma%d\tSite\tLogLk", nRateCats);
6136     for (iRate = 0; iRate < nRateCats; iRate++)
6137       fprintf(fpLog, "\tr=%.3f", rates[iRate]/s.mult);
6138     fprintf(fpLog,"\n");
6139     for (iPos = 0; iPos < nPos; iPos++) {
6140       fprintf(fpLog, "Gamma%d\t%d\t%.3f", nRateCats, iPos, gamma_loglk_sites[iPos]);
6141       for (iRate = 0; iRate < nRateCats; iRate++)
6142 	fprintf(fpLog, "\t%.3f", site_loglk[nPos*iRate + iPos]);
6143       fprintf(fpLog,"\n");
6144     }
6145   }
6146   gamma_loglk_sites = myfree(gamma_loglk_sites, sizeof(double) * nPos);
6147   return(1.0/s.mult);
6148 }
6149 
MLPairOptimize(profile_t * pA,profile_t * pB,int nPos,transition_matrix_t * transmat,rates_t * rates,double * branch_length)6150 double MLPairOptimize(profile_t *pA, profile_t *pB,
6151 		      int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
6152 		      /*IN/OUT*/double *branch_length) {
6153   quartet_opt_t qopt = { nPos, transmat, rates,
6154 			 /*nEval*/0, /*pair1*/pA, /*pair2*/pB };
6155   double f2x,negloglk;
6156   *branch_length = onedimenmin(/*xmin*/MLMinBranchLength,
6157 			       /*xguess*/*branch_length,
6158 			       /*xmax*/6.0,
6159 			       PairNegLogLk,
6160 			       /*data*/&qopt,
6161 			       /*ftol*/MLFTolBranchLength,
6162 			       /*atol*/MLMinBranchLengthTolerance,
6163 			       /*OUT*/&negloglk,
6164 			       /*OUT*/&f2x);
6165   return(-negloglk);		/* the log likelihood */
6166 }
6167 
OptimizeAllBranchLengths(NJ_t * NJ)6168 void OptimizeAllBranchLengths(/*IN/OUT*/NJ_t *NJ) {
6169   if (NJ->nSeq < 2)
6170     return;
6171   if (NJ->nSeq == 2) {
6172     int parent = NJ->root;
6173     assert(NJ->child[parent].nChild==2);
6174     int nodes[2] = { NJ->child[parent].child[0], NJ->child[parent].child[1] };
6175     double length = 1.0;
6176     (void)MLPairOptimize(NJ->profiles[nodes[0]], NJ->profiles[nodes[1]],
6177 			 NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/&length);
6178     NJ->branchlength[nodes[0]] = length/2.0;
6179     NJ->branchlength[nodes[1]] = length/2.0;
6180     return;
6181   };
6182 
6183   traversal_t traversal = InitTraversal(NJ);
6184   profile_t **upProfiles = UpProfiles(NJ);
6185   int node = NJ->root;
6186   int iDone = 0;
6187   while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
6188     int nChild = NJ->child[node].nChild;
6189     if (nChild > 0) {
6190       if ((iDone % 100) == 0)
6191 	ProgressReport("ML Lengths %d of %d splits", iDone+1, NJ->maxnode - NJ->nSeq, 0, 0);
6192       iDone++;
6193 
6194       /* optimize the branch lengths between self, parent, and children,
6195          with two iterations
6196       */
6197       assert(nChild == 2 || nChild == 3);
6198       int nodes[3] = { NJ->child[node].child[0],
6199 		       NJ->child[node].child[1],
6200 		       nChild == 3 ? NJ->child[node].child[2] : node };
6201       profile_t *profiles[3] = { NJ->profiles[nodes[0]],
6202 			   NJ->profiles[nodes[1]],
6203 			   nChild == 3 ? NJ->profiles[nodes[2]]
6204 			   : GetUpProfile(/*IN/OUT*/upProfiles, NJ, node, /*useML*/true) };
6205       int iter;
6206       for (iter = 0; iter < 2; iter++) {
6207 	int i;
6208 	for (i = 0; i < 3; i++) {
6209 	  profile_t *pA = profiles[i];
6210 	  int b1 = (i+1) % 3;
6211 	  int b2 = (i+2) % 3;
6212 	  profile_t *pB = PosteriorProfile(profiles[b1], profiles[b2],
6213 					   NJ->branchlength[nodes[b1]],
6214 					   NJ->branchlength[nodes[b2]],
6215 					   NJ->transmat, &NJ->rates, NJ->nPos, /*nConstraints*/0);
6216 	  double len = NJ->branchlength[nodes[i]];
6217 	  if (len < MLMinBranchLength)
6218 	    len = MLMinBranchLength;
6219 	  (void)MLPairOptimize(pA, pB, NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/&len);
6220 	  NJ->branchlength[nodes[i]] = len;
6221 	  pB = FreeProfile(pB, NJ->nPos, /*nConstraints*/0);
6222 	  if (verbose>3)
6223 	    fprintf(stderr, "Optimize length for %d to %.3f\n",
6224 		    nodes[i], NJ->branchlength[nodes[i]]);
6225 	}
6226       }
6227       if (node != NJ->root) {
6228 	RecomputeProfile(/*IN/OUT*/NJ, /*IN/OUT*/upProfiles, node, /*useML*/true);
6229 	DeleteUpProfile(upProfiles, NJ, node);
6230       }
6231     }
6232   }
6233   traversal = FreeTraversal(traversal,NJ);
6234   upProfiles = FreeUpProfiles(upProfiles,NJ);
6235 }
6236 
RecomputeMLProfiles(NJ_t * NJ)6237 void RecomputeMLProfiles(/*IN/OUT*/NJ_t *NJ) {
6238   traversal_t traversal = InitTraversal(NJ);
6239   int node = NJ->root;
6240   while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
6241     if (NJ->child[node].nChild == 2) {
6242       NJ->profiles[node] = FreeProfile(NJ->profiles[node], NJ->nPos, NJ->nConstraints);
6243       int *children = NJ->child[node].child;
6244       NJ->profiles[node] = PosteriorProfile(NJ->profiles[children[0]], NJ->profiles[children[1]],
6245 					    NJ->branchlength[children[0]], NJ->branchlength[children[1]],
6246 					    NJ->transmat, &NJ->rates, NJ->nPos, NJ->nConstraints);
6247     }
6248   }
6249   traversal = FreeTraversal(traversal, NJ);
6250 }
6251 
RecomputeProfiles(NJ_t * NJ,distance_matrix_t * dmat)6252 void RecomputeProfiles(/*IN/OUT*/NJ_t *NJ, /*OPTIONAL*/distance_matrix_t *dmat) {
6253   traversal_t traversal = InitTraversal(NJ);
6254   int node = NJ->root;
6255   while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
6256     if (NJ->child[node].nChild == 2) {
6257       int *child = NJ->child[node].child;
6258       NJ->profiles[node] = FreeProfile(NJ->profiles[node], NJ->nPos, NJ->nConstraints);
6259       NJ->profiles[node] = AverageProfile(NJ->profiles[child[0]], NJ->profiles[child[1]],
6260 					  NJ->nPos, NJ->nConstraints,
6261 					  dmat, /*unweighted*/-1.0);
6262     }
6263   }
6264   traversal = FreeTraversal(traversal,NJ);
6265 }
6266 
NNI(NJ_t * NJ,int iRound,int nRounds,bool useML,nni_stats_t * stats,double * dMaxDelta)6267 int NNI(/*IN/OUT*/NJ_t *NJ, int iRound, int nRounds, bool useML,
6268 	/*IN/OUT*/nni_stats_t *stats,
6269 	/*OUT*/double *dMaxDelta) {
6270   /* For each non-root node N, with children A,B, sibling C, and uncle D,
6271      we compare the current topology AB|CD to the alternate topologies
6272      AC|BD and AD|BC, by using the 4 relevant profiles.
6273 
6274      If useML is true, it uses quartet maximum likelihood, and it
6275      updates branch lengths as it goes.
6276 
6277      If useML is false, it uses the minimum-evolution criterion with
6278      log-corrected distances on profiles.  (If logdist is false, then
6279      the log correction is not done.) If useML is false, then NNI()
6280      does NOT modify the branch lengths.
6281 
6282      Regardless of whether it changes the topology, it recomputes the
6283      profile for the node, using the pairwise distances and BIONJ-like
6284      weightings (if bionj is set). The parent's profile has changed,
6285      but recomputing it is not necessary because we will visit it
6286      before we need it (we use postorder, so we may visit the sibling
6287      and its children before we visit the parent, but we never
6288      consider an ancestor's profile, so that is OK). When we change
6289      the parent's profile, this alters the uncle's up-profile, so we
6290      remove that.  Finally, if the topology has changed, we remove the
6291      up-profiles of the nodes.
6292 
6293      If we do an NNI during post-order traversal, the result is a bit
6294      tricky. E.g. if we are at node N, and have visited its children A
6295      and B but not its uncle C, and we do an NNI that swaps B & C,
6296      then the post-order traversal will visit C, and its children, but
6297      then on the way back up, it will skip N, as it has already
6298      visited it.  So, the profile of N will not be recomputed: any
6299      changes beneath C will not be reflected in the profile of N, and
6300      the profile of N will be slightly stale. This will be corrected
6301      on the next round of NNIs.
6302   */
6303   double supportThreshold = useML ? treeLogLkDelta : MEMinDelta;
6304   int i;
6305   *dMaxDelta = 0.0;
6306   int nNNIThisRound = 0;
6307 
6308   if (NJ->nSeq <= 3)
6309     return(0);			/* nothing to do */
6310   if (verbose > 2) {
6311     fprintf(stderr, "Beginning round %d of NNIs with ml? %d\n", iRound, useML?1:0);
6312     PrintNJInternal(/*WRITE*/stderr, NJ, /*useLen*/useML && iRound > 0 ? 1 : 0);
6313   }
6314   /* For each node the upProfile or NULL */
6315   profile_t **upProfiles = UpProfiles(NJ);
6316 
6317   traversal_t traversal = InitTraversal(NJ);
6318 
6319   /* Identify nodes we can skip traversing into */
6320   int node;
6321   if (fastNNI) {
6322     for (node = 0; node < NJ->maxnode; node++) {
6323       if (node != NJ->root
6324 	  && node >= NJ->nSeq
6325 	  && stats[node].age >= 2
6326 	  && stats[node].subtreeAge >= 2
6327 	  && stats[node].support > supportThreshold) {
6328 	int nodeABCD[4];
6329 	SetupABCD(NJ, node, NULL, NULL, /*OUT*/nodeABCD, useML);
6330 	for (i = 0; i < 4; i++)
6331 	  if (stats[nodeABCD[i]].age == 0 && stats[nodeABCD[i]].support > supportThreshold)
6332 	    break;
6333 	if (i == 4) {
6334 	  SkipTraversalInto(node, /*IN/OUT*/traversal);
6335 	  if (verbose > 2)
6336 	    fprintf(stderr, "Skipping subtree at %d: child %d %d parent %d age %d subtreeAge %d support %.3f\n",
6337 		    node, nodeABCD[0], nodeABCD[1], NJ->parent[node],
6338 		    stats[node].age, stats[node].subtreeAge, stats[node].support);
6339 	}
6340       }
6341     }
6342   }
6343 
6344   int iDone = 0;
6345   bool bUp;
6346   node = NJ->root;
6347   while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, &bUp)) >= 0) {
6348     if (node < NJ->nSeq || node == NJ->root)
6349       continue; /* nothing to do for leaves or root */
6350     if (bUp) {
6351       if(verbose > 2)
6352 	fprintf(stderr, "Going up back to node %d\n", node);
6353       /* No longer needed */
6354       for (i = 0; i < NJ->child[node].nChild; i++)
6355 	DeleteUpProfile(upProfiles, NJ, NJ->child[node].child[i]);
6356       DeleteUpProfile(upProfiles, NJ, node);
6357       RecomputeProfile(/*IN/OUT*/NJ, /*IN/OUT*/upProfiles, node, useML);
6358       continue;
6359     }
6360     if ((iDone % 100) == 0) {
6361       char buf[100];
6362       sprintf(buf, "%s NNI round %%d of %%d, %%d of %%d splits", useML ? "ML" : "ME");
6363       if (iDone > 0)
6364 	sprintf(buf+strlen(buf), ", %d changes", nNNIThisRound);
6365       if (nNNIThisRound > 0)
6366 	sprintf(buf+strlen(buf), " (max delta %.3f)", *dMaxDelta);
6367       ProgressReport(buf, iRound+1, nRounds, iDone+1, NJ->maxnode - NJ->nSeq);
6368     }
6369     iDone++;
6370 
6371     profile_t *profiles[4];
6372     int nodeABCD[4];
6373     /* Note -- during the first round of ML NNIs, we use the min-evo-based branch lengths,
6374        which may be suboptimal */
6375     SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, useML);
6376 
6377     /* Given our 4 profiles, consider doing a swap */
6378     int nodeA = nodeABCD[0];
6379     int nodeB = nodeABCD[1];
6380     int nodeC = nodeABCD[2];
6381     int nodeD = nodeABCD[3];
6382 
6383     nni_t choice = ABvsCD;
6384 
6385     if (verbose > 2)
6386       fprintf(stderr,"Considering NNI around %d: Swap A=%d B=%d C=%d D=up(%d) or parent %d\n",
6387 	      node, nodeA, nodeB, nodeC, nodeD, NJ->parent[node]);
6388     if (verbose > 3 && useML) {
6389       double len[5] = { NJ->branchlength[nodeA], NJ->branchlength[nodeB], NJ->branchlength[nodeC], NJ->branchlength[nodeD],
6390 			NJ->branchlength[node] };
6391       for (i=0; i < 5; i++)
6392 	if (len[i] < MLMinBranchLength)
6393 	  len[i] = MLMinBranchLength;
6394       fprintf(stderr, "Starting quartet likelihood %.3f len %.3f %.3f %.3f %.3f %.3f\n",
6395 	      MLQuartetLogLk(profiles[0],profiles[1],profiles[2],profiles[3],NJ->nPos,NJ->transmat,&NJ->rates,len, /*site_lk*/NULL),
6396 	      len[0], len[1], len[2], len[3], len[4]);
6397     }
6398 
6399     numeric_t newlength[5];
6400     double criteria[3];
6401     if (useML) {
6402       for (i = 0; i < 4; i++)
6403 	newlength[i] = NJ->branchlength[nodeABCD[i]];
6404       newlength[4] = NJ->branchlength[node];
6405       bool bFast = mlAccuracy < 2 && stats[node].age > 0;
6406       choice = MLQuartetNNI(profiles, NJ->transmat, &NJ->rates, NJ->nPos, NJ->nConstraints,
6407 			    /*OUT*/criteria, /*IN/OUT*/newlength, bFast);
6408     } else {
6409       choice = ChooseNNI(profiles, NJ->distance_matrix, NJ->nPos, NJ->nConstraints,
6410 			 /*OUT*/criteria);
6411       /* invert criteria so that higher is better, as in ML case, to simplify code below */
6412       for (i = 0; i < 3; i++)
6413 	criteria[i] = -criteria[i];
6414     }
6415 
6416     if (choice == ACvsBD) {
6417       /* swap B and C */
6418       ReplaceChild(/*IN/OUT*/NJ, node, nodeB, nodeC);
6419       ReplaceChild(/*IN/OUT*/NJ, NJ->parent[node], nodeC, nodeB);
6420     } else if (choice == ADvsBC) {
6421       /* swap A and C */
6422       ReplaceChild(/*IN/OUT*/NJ, node, nodeA, nodeC);
6423       ReplaceChild(/*IN/OUT*/NJ, NJ->parent[node], nodeC, nodeA);
6424     }
6425 
6426     if (useML) {
6427       /* update branch length for the internal branch, and of any
6428 	 branches that lead to leaves, b/c those will not are not
6429 	 the internal branch for NNI and would not otherwise be set.
6430       */
6431       if (choice == ADvsBC) {
6432 	/* For ADvsBC, MLQuartetNNI swaps B with D, but we swap A with C */
6433 	double length2[5] = { newlength[LEN_C], newlength[LEN_D],
6434 			      newlength[LEN_A], newlength[LEN_B],
6435 			      newlength[LEN_I] };
6436 	int i;
6437 	for (i = 0; i < 5; i++) newlength[i] = length2[i];
6438 	/* and swap A and C */
6439 	double tmp = newlength[LEN_A];
6440 	newlength[LEN_A] = newlength[LEN_C];
6441 	newlength[LEN_C] = tmp;
6442       } else if (choice == ACvsBD) {
6443 	/* swap B and C */
6444 	double tmp = newlength[LEN_B];
6445 	newlength[LEN_B] = newlength[LEN_C];
6446 	newlength[LEN_C] = tmp;
6447       }
6448 
6449       NJ->branchlength[node] = newlength[LEN_I];
6450       NJ->branchlength[nodeA] = newlength[LEN_A];
6451       NJ->branchlength[nodeB] = newlength[LEN_B];
6452       NJ->branchlength[nodeC] = newlength[LEN_C];
6453       NJ->branchlength[nodeD] = newlength[LEN_D];
6454     }
6455 
6456     if (verbose>2 && (choice != ABvsCD || verbose > 2))
6457       fprintf(stderr,"NNI around %d: Swap A=%d B=%d C=%d D=out(C) -- choose %s %s %.4f\n",
6458 	      node, nodeA, nodeB, nodeC,
6459 	      choice == ACvsBD ? "AC|BD" : (choice == ABvsCD ? "AB|CD" : "AD|BC"),
6460 	      useML ? "delta-loglk" : "-deltaLen",
6461 	      criteria[choice] - criteria[ABvsCD]);
6462     if(verbose >= 3 && slow && useML)
6463       fprintf(stderr, "Old tree lk -- %.4f\n", TreeLogLk(NJ, /*site_likelihoods*/NULL));
6464 
6465     /* update stats, *dMaxDelta, etc. */
6466     if (choice == ABvsCD) {
6467       stats[node].age++;
6468     } else {
6469       if (useML)
6470 	nML_NNI++;
6471       else
6472 	nNNI++;
6473       nNNIThisRound++;
6474       stats[node].age = 0;
6475       stats[nodeA].age = 0;
6476       stats[nodeB].age = 0;
6477       stats[nodeC].age = 0;
6478       stats[nodeD].age = 0;
6479     }
6480     stats[node].delta = criteria[choice] - criteria[ABvsCD]; /* 0 if ABvsCD */
6481     if (stats[node].delta > *dMaxDelta)
6482       *dMaxDelta = stats[node].delta;
6483 
6484     /* support is improvement of score for self over better of alternatives */
6485     stats[node].support = 1e20;
6486     for (i = 0; i < 3; i++)
6487       if (choice != i && criteria[choice]-criteria[i] < stats[node].support)
6488 	stats[node].support = criteria[choice]-criteria[i];
6489 
6490     /* subtreeAge is the number of rounds since self or descendent had a significant improvement */
6491     if (stats[node].delta > supportThreshold)
6492       stats[node].subtreeAge = 0;
6493     else {
6494       stats[node].subtreeAge++;
6495       for (i = 0; i < 2; i++) {
6496 	int child = NJ->child[node].child[i];
6497 	if (stats[node].subtreeAge > stats[child].subtreeAge)
6498 	  stats[node].subtreeAge = stats[child].subtreeAge;
6499       }
6500     }
6501 
6502     /* update profiles and free up unneeded up-profiles */
6503     if (choice == ABvsCD) {
6504       /* No longer needed */
6505       DeleteUpProfile(upProfiles, NJ, nodeA);
6506       DeleteUpProfile(upProfiles, NJ, nodeB);
6507       DeleteUpProfile(upProfiles, NJ, nodeC);
6508       RecomputeProfile(/*IN/OUT*/NJ, /*IN/OUT*/upProfiles, node, useML);
6509       if(slow && useML)
6510 	UpdateForNNI(NJ, node, upProfiles, useML);
6511     } else {
6512       UpdateForNNI(NJ, node, upProfiles, useML);
6513     }
6514     if(verbose > 2 && slow && useML) {
6515       /* Note we recomputed profiles back up to root already if slow */
6516       PrintNJInternal(/*WRITE*/stderr, NJ, /*useLen*/true);
6517       fprintf(stderr, "New tree lk -- %.4f\n", TreeLogLk(NJ, /*site_likelihoods*/NULL));
6518     }
6519   } /* end postorder traversal */
6520   traversal = FreeTraversal(traversal,NJ);
6521   if (verbose>=2) {
6522     int nUp = 0;
6523     for (i = 0; i < NJ->maxnodes; i++)
6524       if (upProfiles[i] != NULL)
6525 	nUp++;
6526     fprintf(stderr, "N up profiles at end of NNI:  %d\n", nUp);
6527   }
6528   upProfiles = FreeUpProfiles(upProfiles,NJ);
6529   return(nNNIThisRound);
6530 }
6531 
InitNNIStats(NJ_t * NJ)6532 nni_stats_t *InitNNIStats(NJ_t *NJ) {
6533   nni_stats_t *stats = mymalloc(sizeof(nni_stats_t)*NJ->maxnode);
6534   const int LargeAge = 1000000;
6535   int i;
6536   for (i = 0; i < NJ->maxnode; i++) {
6537     stats[i].delta = 0;
6538     stats[i].support = 0;
6539     if (i == NJ->root || i < NJ->nSeq) {
6540       stats[i].age = LargeAge;
6541       stats[i].subtreeAge = LargeAge;
6542     } else {
6543       stats[i].age = 0;
6544       stats[i].subtreeAge = 0;
6545     }
6546   }
6547   return(stats);
6548 }
6549 
FreeNNIStats(nni_stats_t * stats,NJ_t * NJ)6550 nni_stats_t *FreeNNIStats(nni_stats_t *stats, NJ_t *NJ) {
6551   return(myfree(stats, sizeof(nni_stats_t)*NJ->maxnode));
6552 }
6553 
FindSPRSteps(NJ_t * NJ,int nodeMove,int nodeAround,profile_t ** upProfiles,spr_step_t * steps,int maxSteps,bool bFirstAC)6554 int FindSPRSteps(/*IN/OUT*/NJ_t *NJ,
6555 		 int nodeMove,	 /* the node to move multiple times */
6556 		 int nodeAround, /* sibling or parent of node to NNI to start the chain */
6557 		 /*IN/OUT*/profile_t **upProfiles,
6558 		 /*OUT*/spr_step_t *steps,
6559 		 int maxSteps,
6560 		 bool bFirstAC) {
6561   int iStep;
6562   for (iStep = 0; iStep < maxSteps; iStep++) {
6563     if (NJ->child[nodeAround].nChild != 2)
6564       break;			/* no further to go */
6565 
6566     /* Consider the NNIs around nodeAround */
6567     profile_t *profiles[4];
6568     int nodeABCD[4];
6569     SetupABCD(NJ, nodeAround, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, /*useML*/false);
6570     double criteria[3];
6571     (void) ChooseNNI(profiles, NJ->distance_matrix, NJ->nPos, NJ->nConstraints,
6572 		     /*OUT*/criteria);
6573 
6574     /* Do & save the swap */
6575     spr_step_t *step = &steps[iStep];
6576     if (iStep == 0 ? bFirstAC : criteria[ACvsBD] < criteria[ADvsBC]) {
6577       /* swap B & C to put AC together */
6578       step->deltaLength = criteria[ACvsBD] - criteria[ABvsCD];
6579       step->nodes[0] = nodeABCD[1];
6580       step->nodes[1] = nodeABCD[2];
6581     } else {
6582       /* swap AC to put AD together */
6583       step->deltaLength = criteria[ADvsBC] - criteria[ABvsCD];
6584       step->nodes[0] = nodeABCD[0];
6585       step->nodes[1] = nodeABCD[2];
6586     }
6587 
6588     if (verbose>3) {
6589       fprintf(stderr, "SPR chain step %d for %d around %d swap %d %d deltaLen %.5f\n",
6590 	      iStep+1, nodeAround, nodeMove, step->nodes[0], step->nodes[1], step->deltaLength);
6591       if (verbose>4)
6592 	PrintNJInternal(stderr, NJ, /*useLen*/false);
6593     }
6594     ReplaceChild(/*IN/OUT*/NJ, nodeAround, step->nodes[0], step->nodes[1]);
6595     ReplaceChild(/*IN/OUT*/NJ, NJ->parent[nodeAround], step->nodes[1], step->nodes[0]);
6596     UpdateForNNI(/*IN/OUT*/NJ, nodeAround, /*IN/OUT*/upProfiles, /*useML*/false);
6597 
6598     /* set the new nodeAround -- either parent(nodeMove) or sibling(nodeMove) --
6599        so that it different from current nodeAround
6600      */
6601     int newAround[2] = { NJ->parent[nodeMove], Sibling(NJ, nodeMove) };
6602     if (NJ->parent[nodeMove] == NJ->root)
6603       RootSiblings(NJ, nodeMove, /*OUT*/newAround);
6604     assert(newAround[0] == nodeAround || newAround[1] == nodeAround);
6605     assert(newAround[0] != newAround[1]);
6606     nodeAround = newAround[newAround[0] == nodeAround ? 1 : 0];
6607   }
6608   return(iStep);
6609 }
6610 
UnwindSPRStep(NJ_t * NJ,spr_step_t * step,profile_t ** upProfiles)6611 void UnwindSPRStep(/*IN/OUT*/NJ_t *NJ,
6612 		   /*IN*/spr_step_t *step,
6613 		   /*IN/OUT*/profile_t **upProfiles) {
6614   int parents[2];
6615   int i;
6616   for (i = 0; i < 2; i++) {
6617     assert(step->nodes[i] >= 0 && step->nodes[i] < NJ->maxnodes);
6618     parents[i] = NJ->parent[step->nodes[i]];
6619     assert(parents[i] >= 0);
6620   }
6621   assert(parents[0] != parents[1]);
6622   ReplaceChild(/*IN/OUT*/NJ, parents[0], step->nodes[0], step->nodes[1]);
6623   ReplaceChild(/*IN/OUT*/NJ, parents[1], step->nodes[1], step->nodes[0]);
6624   int iYounger = 0;
6625   if (NJ->parent[parents[0]] == parents[1]) {
6626     iYounger = 0;
6627   } else {
6628     assert(NJ->parent[parents[1]] == parents[0]);
6629     iYounger = 1;
6630   }
6631   UpdateForNNI(/*IN/OUT*/NJ, parents[iYounger], /*IN/OUT*/upProfiles, /*useML*/false);
6632 }
6633 
6634 /* Update the profile of node and its ancestor, and delete nearby out-profiles */
UpdateForNNI(NJ_t * NJ,int node,profile_t ** upProfiles,bool useML)6635 void UpdateForNNI(/*IN/OUT*/NJ_t *NJ, int node, /*IN/OUT*/profile_t **upProfiles,
6636 		  bool useML) {
6637   int i;
6638   if (slow) {
6639     /* exhaustive update */
6640     for (i = 0; i < NJ->maxnodes; i++)
6641       DeleteUpProfile(upProfiles, NJ, i);
6642 
6643     /* update profiles back to root */
6644     int ancestor;
6645     for (ancestor = node; ancestor >= 0; ancestor = NJ->parent[ancestor])
6646       RecomputeProfile(/*IN/OUT*/NJ, upProfiles, ancestor, useML);
6647 
6648     /* remove any up-profiles made while doing that*/
6649     for (i = 0; i < NJ->maxnodes; i++)
6650       DeleteUpProfile(upProfiles, NJ, i);
6651   } else {
6652     /* if fast, only update around self
6653        note that upProfile(parent) is still OK after an NNI, but
6654        up-profiles of uncles may not be
6655     */
6656     DeleteUpProfile(upProfiles, NJ, node);
6657     for (i = 0; i < NJ->child[node].nChild; i++)
6658       DeleteUpProfile(upProfiles, NJ, NJ->child[node].child[i]);
6659     assert(node != NJ->root);
6660     int parent = NJ->parent[node];
6661     int neighbors[2] = { parent, Sibling(NJ, node) };
6662     if (parent == NJ->root)
6663       RootSiblings(NJ, node, /*OUT*/neighbors);
6664     DeleteUpProfile(upProfiles, NJ, neighbors[0]);
6665     DeleteUpProfile(upProfiles, NJ, neighbors[1]);
6666     int uncle = Sibling(NJ, parent);
6667     if (uncle >= 0)
6668       DeleteUpProfile(upProfiles, NJ, uncle);
6669     RecomputeProfile(/*IN/OUT*/NJ, upProfiles, node, useML);
6670     RecomputeProfile(/*IN/OUT*/NJ, upProfiles, parent, useML);
6671   }
6672 }
6673 
SPR(NJ_t * NJ,int maxSPRLength,int iRound,int nRounds)6674 void SPR(/*IN/OUT*/NJ_t *NJ, int maxSPRLength, int iRound, int nRounds) {
6675   /* Given a non-root node N with children A,B, sibling C, and uncle D,
6676      we can try to move A by doing three types of moves (4 choices):
6677      "down" -- swap A with a child of B (if B is not a leaf) [2 choices]
6678      "over" -- swap B with C
6679      "up" -- swap A with D
6680      We follow down moves with down moves, over moves with down moves, and
6681      up moves with either up or over moves. (Other choices are just backing
6682      up and hence useless.)
6683 
6684      As with NNIs, we keep track of up-profiles as we go. However, some of the regular
6685      profiles may also become "stale" so it is a bit trickier.
6686 
6687      We store the traversal before we do SPRs to avoid any possible infinite loop
6688   */
6689   double last_tot_len = 0.0;
6690   if (NJ->nSeq <= 3 || maxSPRLength < 1)
6691     return;
6692   if (slow)
6693     last_tot_len = TreeLength(NJ, /*recomputeLengths*/true);
6694   int *nodeList = mymalloc(sizeof(int) * NJ->maxnodes);
6695   int nodeListLen = 0;
6696   traversal_t traversal = InitTraversal(NJ);
6697   int node = NJ->root;
6698   while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
6699     nodeList[nodeListLen++] = node;
6700   }
6701   assert(nodeListLen == NJ->maxnode);
6702   traversal = FreeTraversal(traversal,NJ);
6703 
6704   profile_t **upProfiles = UpProfiles(NJ);
6705   spr_step_t *steps = mymalloc(sizeof(spr_step_t) * maxSPRLength); /* current chain of SPRs */
6706 
6707   int i;
6708   for (i = 0; i < nodeListLen; i++) {
6709     node = nodeList[i];
6710     if ((i % 100) == 0)
6711       ProgressReport("SPR round %3d of %3d, %d of %d nodes",
6712 		     iRound+1, nRounds, i+1, nodeListLen);
6713     if (node == NJ->root)
6714       continue; /* nothing to do for root */
6715     /* The nodes to NNI around */
6716     int nodeAround[2] = { NJ->parent[node], Sibling(NJ, node) };
6717     if (NJ->parent[node] == NJ->root) {
6718       /* NNI around both siblings instead */
6719       RootSiblings(NJ, node, /*OUT*/nodeAround);
6720     }
6721     bool bChanged = false;
6722     int iAround;
6723     for (iAround = 0; iAround < 2 && bChanged == false; iAround++) {
6724       int ACFirst;
6725       for (ACFirst = 0; ACFirst < 2 && bChanged == false; ACFirst++) {
6726 	if(verbose > 3)
6727 	  PrintNJInternal(stderr, NJ, /*useLen*/false);
6728 	int chainLength = FindSPRSteps(/*IN/OUT*/NJ, node, nodeAround[iAround],
6729 				       upProfiles, /*OUT*/steps, maxSPRLength, (bool)ACFirst);
6730 	double dMinDelta = 0.0;
6731 	int iCBest = -1;
6732 	double dTotDelta = 0.0;
6733 	int iC;
6734 	for (iC = 0; iC < chainLength; iC++) {
6735 	  dTotDelta += steps[iC].deltaLength;
6736 	  if (dTotDelta < dMinDelta) {
6737 	    dMinDelta = dTotDelta;
6738 	    iCBest = iC;
6739 	  }
6740 	}
6741 
6742 	if (verbose>3) {
6743 	  fprintf(stderr, "SPR %s %d around %d chainLength %d of %d deltaLength %.5f swaps:",
6744 		  iCBest >= 0 ? "move" : "abandoned",
6745 		  node,nodeAround[iAround],iCBest+1,chainLength,dMinDelta);
6746 	  for (iC = 0; iC < chainLength; iC++)
6747 	    fprintf(stderr, " (%d,%d)%.4f", steps[iC].nodes[0], steps[iC].nodes[1], steps[iC].deltaLength);
6748 	  fprintf(stderr,"\n");
6749 	}
6750 	for (iC = chainLength - 1; iC > iCBest; iC--)
6751 	  UnwindSPRStep(/*IN/OUT*/NJ, /*IN*/&steps[iC], /*IN/OUT*/upProfiles);
6752 	if(verbose > 3)
6753 	  PrintNJInternal(stderr, NJ, /*useLen*/false);
6754 	while (slow && iCBest >= 0) {
6755 	  double expected_tot_len = last_tot_len + dMinDelta;
6756 	  double new_tot_len = TreeLength(NJ, /*recompute*/true);
6757 	  if (verbose > 2)
6758 	    fprintf(stderr, "Total branch-length is now %.4f was %.4f expected %.4f\n",
6759 		    new_tot_len, last_tot_len, expected_tot_len);
6760 	  if (new_tot_len < last_tot_len) {
6761 	    last_tot_len = new_tot_len;
6762 	    break;		/* no rewinding necessary */
6763 	  }
6764 	  if (verbose > 2)
6765 	    fprintf(stderr, "Rewinding SPR to %d\n",iCBest);
6766 	  UnwindSPRStep(/*IN/OUT*/NJ, /*IN*/&steps[iCBest], /*IN/OUT*/upProfiles);
6767 	  dMinDelta -= steps[iCBest].deltaLength;
6768 	  iCBest--;
6769 	}
6770 	if (iCBest >= 0)
6771 	  bChanged = true;
6772       }	/* loop over which step to take at 1st NNI */
6773     } /* loop over which node to pivot around */
6774 
6775     if (bChanged) {
6776       nSPR++;		/* the SPR move is OK */
6777       /* make sure all the profiles are OK */
6778       int j;
6779       for (j = 0; j < NJ->maxnodes; j++)
6780 	DeleteUpProfile(upProfiles, NJ, j);
6781       int ancestor;
6782       for (ancestor = NJ->parent[node]; ancestor >= 0; ancestor = NJ->parent[ancestor])
6783 	RecomputeProfile(/*IN/OUT*/NJ, upProfiles, ancestor, /*useML*/false);
6784     }
6785   } /* end loop over subtrees to prune & regraft */
6786   steps = myfree(steps, sizeof(spr_step_t) * maxSPRLength);
6787   upProfiles = FreeUpProfiles(upProfiles,NJ);
6788   nodeList = myfree(nodeList, sizeof(int) * NJ->maxnodes);
6789 }
6790 
RecomputeProfile(NJ_t * NJ,profile_t ** upProfiles,int node,bool useML)6791 void RecomputeProfile(/*IN/OUT*/NJ_t *NJ, /*IN/OUT*/profile_t **upProfiles, int node,
6792 		      bool useML) {
6793   if (node < NJ->nSeq || node == NJ->root)
6794     return;			/* no profile to compute */
6795   assert(NJ->child[node].nChild==2);
6796 
6797   profile_t *profiles[4];
6798   double weight = 0.5;
6799   if (useML || !bionj) {
6800     profiles[0] = NJ->profiles[NJ->child[node].child[0]];
6801     profiles[1] = NJ->profiles[NJ->child[node].child[1]];
6802   } else {
6803     int nodeABCD[4];
6804     SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, useML);
6805     weight = QuartetWeight(profiles, NJ->distance_matrix, NJ->nPos);
6806   }
6807   if (verbose>3) {
6808     if (useML) {
6809       fprintf(stderr, "Recompute %d from %d %d lengths %.4f %.4f\n",
6810 	      node,
6811 	      NJ->child[node].child[0],
6812 	      NJ->child[node].child[1],
6813 	      NJ->branchlength[NJ->child[node].child[0]],
6814 	      NJ->branchlength[NJ->child[node].child[1]]);
6815     } else {
6816       fprintf(stderr, "Recompute %d from %d %d weight %.3f\n",
6817 	      node, NJ->child[node].child[0], NJ->child[node].child[1], weight);
6818     }
6819   }
6820   NJ->profiles[node] = FreeProfile(NJ->profiles[node], NJ->nPos, NJ->nConstraints);
6821   if (useML) {
6822     NJ->profiles[node] = PosteriorProfile(profiles[0], profiles[1],
6823 					  NJ->branchlength[NJ->child[node].child[0]],
6824 					  NJ->branchlength[NJ->child[node].child[1]],
6825 					  NJ->transmat, &NJ->rates, NJ->nPos, NJ->nConstraints);
6826   } else {
6827     NJ->profiles[node] = AverageProfile(profiles[0], profiles[1],
6828 					NJ->nPos, NJ->nConstraints,
6829 					NJ->distance_matrix, weight);
6830   }
6831 }
6832 
6833 /* The BIONJ-like formula for the weight of A when building a profile for AB is
6834      1/2 + (avgD(B,CD) - avgD(A,CD))/(2*d(A,B))
6835 */
QuartetWeight(profile_t * profiles[4],distance_matrix_t * dmat,int nPos)6836 double QuartetWeight(profile_t *profiles[4], distance_matrix_t *dmat, int nPos) {
6837   if (!bionj)
6838     return(-1.0); /* even weighting */
6839   double d[6];
6840   CorrectedPairDistances(profiles, 4, dmat, nPos, /*OUT*/d);
6841   if (d[qAB] < 0.01)
6842     return -1.0;
6843   double weight = 0.5 + ((d[qBC]+d[qBD])-(d[qAC]+d[qAD]))/(4*d[qAB]);
6844   if (weight < 0)
6845     weight = 0;
6846   if (weight > 1)
6847     weight = 1;
6848   return (weight);
6849 }
6850 
6851 /* Resets the children entry of parent and also the parent entry of newchild */
ReplaceChild(NJ_t * NJ,int parent,int oldchild,int newchild)6852 void ReplaceChild(/*IN/OUT*/NJ_t *NJ, int parent, int oldchild, int newchild) {
6853   NJ->parent[newchild] = parent;
6854 
6855   int iChild;
6856   for (iChild = 0; iChild < NJ->child[parent].nChild; iChild++) {
6857     if (NJ->child[parent].child[iChild] == oldchild) {
6858       NJ->child[parent].child[iChild] = newchild;
6859       return;
6860     }
6861   }
6862   assert(0);
6863 }
6864 
6865 /* Recomputes all branch lengths
6866 
6867    For internal branches such as (A,B) vs. (C,D), uses the formula
6868 
6869    length(AB|CD) = (d(A,C)+d(A,D)+d(B,C)+d(B,D))/4 - d(A,B)/2 - d(C,D)/2
6870 
6871    (where all distances are profile distances - diameters).
6872 
6873    For external branches (e.g. to leaves) A vs. (B,C), use the formula
6874 
6875    length(A|BC) = (d(A,B)+d(A,C)-d(B,C))/2
6876 */
UpdateBranchLengths(NJ_t * NJ)6877 void UpdateBranchLengths(/*IN/OUT*/NJ_t *NJ) {
6878   if (NJ->nSeq < 2)
6879     return;
6880   else if (NJ->nSeq == 2) {
6881     int root = NJ->root;
6882     int nodeA = NJ->child[root].child[0];
6883     int nodeB = NJ->child[root].child[1];
6884     besthit_t h;
6885     ProfileDist(NJ->profiles[nodeA],NJ->profiles[nodeB],
6886 		NJ->nPos, NJ->distance_matrix, /*OUT*/&h);
6887     if (logdist)
6888       h.dist = LogCorrect(h.dist);
6889     NJ->branchlength[nodeA] = h.dist/2.0;
6890     NJ->branchlength[nodeB] = h.dist/2.0;
6891     return;
6892   }
6893 
6894   profile_t **upProfiles = UpProfiles(NJ);
6895   traversal_t traversal = InitTraversal(NJ);
6896   int node = NJ->root;
6897 
6898   while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
6899     /* reset branch length of node (distance to its parent) */
6900     if (node == NJ->root)
6901       continue; /* no branch length to set */
6902     if (node < NJ->nSeq) { /* a leaf */
6903       profile_t *profileA = NJ->profiles[node];
6904       profile_t *profileB = NULL;
6905       profile_t *profileC = NULL;
6906 
6907       int sib = Sibling(NJ,node);
6908       if (sib == -1) { /* at root, have 2 siblings */
6909 	int sibs[2];
6910 	RootSiblings(NJ, node, /*OUT*/sibs);
6911 	profileB = NJ->profiles[sibs[0]];
6912 	profileC = NJ->profiles[sibs[1]];
6913       } else {
6914 	profileB = NJ->profiles[sib];
6915 	profileC = GetUpProfile(/*IN/OUT*/upProfiles, NJ, NJ->parent[node], /*useML*/false);
6916       }
6917       profile_t *profiles[3] = {profileA,profileB,profileC};
6918       double d[3]; /*AB,AC,BC*/
6919       CorrectedPairDistances(profiles, 3, NJ->distance_matrix, NJ->nPos, /*OUT*/d);
6920       /* d(A,BC) = (dAB+dAC-dBC)/2 */
6921       NJ->branchlength[node] = (d[0]+d[1]-d[2])/2.0;
6922     } else {
6923       profile_t *profiles[4];
6924       int nodeABCD[4];
6925       SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, /*useML*/false);
6926       double d[6];
6927       CorrectedPairDistances(profiles, 4, NJ->distance_matrix, NJ->nPos, /*OUT*/d);
6928       NJ->branchlength[node] = (d[qAC]+d[qAD]+d[qBC]+d[qBD])/4.0 - (d[qAB]+d[qCD])/2.0;
6929 
6930       /* no longer needed */
6931       DeleteUpProfile(upProfiles, NJ, nodeABCD[0]);
6932       DeleteUpProfile(upProfiles, NJ, nodeABCD[1]);
6933     }
6934   }
6935   traversal = FreeTraversal(traversal,NJ);
6936   upProfiles = FreeUpProfiles(upProfiles,NJ);
6937 }
6938 
6939 /* Pick columns for resampling, stored as returned_vector[iBoot*nPos + j] */
ResampleColumns(int nPos,int nBootstrap)6940 int *ResampleColumns(int nPos, int nBootstrap) {
6941   long lPos = nPos; /* to prevent overflow on very long alignments when multiplying nPos * nBootstrap */
6942   int *col = (int*)mymalloc(sizeof(int)*lPos*(size_t)nBootstrap);
6943   int i;
6944   for (i = 0; i < nBootstrap; i++) {
6945     int j;
6946     for (j = 0; j < nPos; j++) {
6947       int pos   = (int)(knuth_rand() * nPos);
6948       if (pos<0)
6949 	pos = 0;
6950       else if (pos == nPos)
6951 	pos = nPos-1;
6952       col[i*lPos + j] = pos;
6953     }
6954   }
6955   if (verbose > 5) {
6956     for (i=0; i < 3 && i < nBootstrap; i++) {
6957       fprintf(stderr,"Boot%d",i);
6958       int j;
6959       for (j = 0; j < nPos; j++) {
6960 	fprintf(stderr,"\t%d",col[i*lPos+j]);
6961       }
6962       fprintf(stderr,"\n");
6963     }
6964   }
6965   return(col);
6966 }
6967 
ReliabilityNJ(NJ_t * NJ,int nBootstrap)6968 void ReliabilityNJ(/*IN/OUT*/NJ_t *NJ, int nBootstrap) {
6969   /* For each non-root node N, with children A,B, parent P, sibling C, and grandparent G,
6970      we test the reliability of the split (A,B) versus rest by comparing the profiles
6971      of A, B, C, and the "up-profile" of P.
6972 
6973      Each node's upProfile is the average of its sibling's (down)-profile + its parent's up-profile
6974      (If node's parent is the root, then there are two siblings and we don't need an up-profile)
6975 
6976      To save memory, we do depth-first-search down from the root, and we only keep
6977      up-profiles for nodes in the active path.
6978   */
6979   if (NJ->nSeq <= 3 || nBootstrap <= 0)
6980     return;			/* nothing to do */
6981   int *col = ResampleColumns(NJ->nPos, nBootstrap);
6982 
6983   profile_t **upProfiles = UpProfiles(NJ);
6984   traversal_t traversal = InitTraversal(NJ);
6985   int node = NJ->root;
6986   int iNodesDone = 0;
6987   while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
6988     if (node < NJ->nSeq || node == NJ->root)
6989       continue; /* nothing to do for leaves or root */
6990 
6991     if(iNodesDone > 0 && (iNodesDone % 100) == 0)
6992       ProgressReport("Local bootstrap for %6d of %6d internal splits", iNodesDone, NJ->nSeq-3, 0, 0);
6993     iNodesDone++;
6994 
6995     profile_t *profiles[4];
6996     int nodeABCD[4];
6997     SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, /*useML*/false);
6998 
6999     NJ->support[node] = SplitSupport(profiles[0], profiles[1], profiles[2], profiles[3],
7000 				     NJ->distance_matrix,
7001 				     NJ->nPos,
7002 				     nBootstrap,
7003 				     col);
7004 
7005     /* no longer needed */
7006     DeleteUpProfile(upProfiles, NJ, nodeABCD[0]);
7007     DeleteUpProfile(upProfiles, NJ, nodeABCD[1]);
7008     DeleteUpProfile(upProfiles, NJ, nodeABCD[2]);
7009   }
7010   traversal = FreeTraversal(traversal,NJ);
7011   upProfiles = FreeUpProfiles(upProfiles,NJ);
7012   col = myfree(col, sizeof(int)*((size_t)NJ->nPos)*nBootstrap);
7013 }
7014 
NewProfile(int nPos,int nConstraints)7015 profile_t *NewProfile(int nPos, int nConstraints) {
7016   profile_t *profile = (profile_t *)mymalloc(sizeof(profile_t));
7017   profile->weights = mymalloc(sizeof(numeric_t)*nPos);
7018   profile->codes = mymalloc(sizeof(unsigned char)*nPos);
7019   profile->vectors = NULL;
7020   profile->nVectors = 0;
7021   profile->codeDist = NULL;
7022   if (nConstraints == 0) {
7023     profile->nOn = NULL;
7024     profile->nOff = NULL;
7025   } else {
7026     profile->nOn = mymalloc(sizeof(int)*nConstraints);
7027     profile->nOff = mymalloc(sizeof(int)*nConstraints);
7028   }
7029   return(profile);
7030 }
7031 
FreeProfile(profile_t * profile,int nPos,int nConstraints)7032 profile_t *FreeProfile(profile_t *profile, int nPos, int nConstraints) {
7033     if(profile==NULL) return(NULL);
7034     myfree(profile->codes, nPos);
7035     myfree(profile->weights, nPos);
7036     myfree(profile->vectors, sizeof(numeric_t)*nCodes*profile->nVectors);
7037     myfree(profile->codeDist, sizeof(numeric_t)*nCodes*nPos);
7038     if (nConstraints > 0) {
7039       myfree(profile->nOn, sizeof(int)*nConstraints);
7040       myfree(profile->nOff,  sizeof(int)*nConstraints);
7041     }
7042     return(myfree(profile, sizeof(profile_t)));
7043 }
7044 
SetupABCD(NJ_t * NJ,int node,profile_t * profiles[4],profile_t ** upProfiles,int nodeABCD[4],bool useML)7045 void SetupABCD(NJ_t *NJ, int node,
7046 	       /* the 4 profiles; the last one is an outprofile */
7047 	       /*OPTIONAL OUT*/profile_t *profiles[4],
7048 	       /*OPTIONAL IN/OUT*/profile_t **upProfiles,
7049 	       /*OUT*/int nodeABCD[4],
7050 	       bool useML) {
7051   int parent = NJ->parent[node];
7052   assert(parent >= 0);
7053   assert(NJ->child[node].nChild == 2);
7054   nodeABCD[0] = NJ->child[node].child[0]; /*A*/
7055   nodeABCD[1] = NJ->child[node].child[1]; /*B*/
7056 
7057   profile_t *profile4 = NULL;
7058   if (parent == NJ->root) {
7059     int sibs[2];
7060     RootSiblings(NJ, node, /*OUT*/sibs);
7061     nodeABCD[2] = sibs[0];
7062     nodeABCD[3] = sibs[1];
7063     if (profiles == NULL)
7064       return;
7065     profile4 = NJ->profiles[sibs[1]];
7066   } else {
7067     nodeABCD[2] = Sibling(NJ,node);
7068     assert(nodeABCD[2] >= 0);
7069     nodeABCD[3] = parent;
7070     if (profiles == NULL)
7071       return;
7072     profile4 = GetUpProfile(upProfiles,NJ,parent,useML);
7073   }
7074   assert(upProfiles != NULL);
7075   int i;
7076   for (i = 0; i < 3; i++)
7077     profiles[i] = NJ->profiles[nodeABCD[i]];
7078   profiles[3] = profile4;
7079 }
7080 
7081 
Sibling(NJ_t * NJ,int node)7082 int Sibling(NJ_t *NJ, int node) {
7083   int parent = NJ->parent[node];
7084   if (parent < 0 || parent == NJ->root)
7085     return(-1);
7086   int iChild;
7087   for(iChild=0;iChild<NJ->child[parent].nChild;iChild++) {
7088     if(NJ->child[parent].child[iChild] != node)
7089       return (NJ->child[parent].child[iChild]);
7090   }
7091   assert(0);
7092   return(-1);
7093 }
7094 
RootSiblings(NJ_t * NJ,int node,int sibs[2])7095 void RootSiblings(NJ_t *NJ, int node, /*OUT*/int sibs[2]) {
7096   assert(NJ->parent[node] == NJ->root);
7097   assert(NJ->child[NJ->root].nChild == 3);
7098 
7099   int nSibs = 0;
7100   int iChild;
7101   for(iChild=0; iChild < NJ->child[NJ->root].nChild; iChild++) {
7102     int child = NJ->child[NJ->root].child[iChild];
7103     if (child != node) sibs[nSibs++] = child;
7104   }
7105   assert(nSibs==2);
7106 }
7107 
TestSplitsML(NJ_t * NJ,SplitCount_t * splitcount,int nBootstrap)7108 void TestSplitsML(/*IN/OUT*/NJ_t *NJ, /*OUT*/SplitCount_t *splitcount, int nBootstrap) {
7109   const double tolerance = 1e-6;
7110   splitcount->nBadSplits = 0;
7111   splitcount->nConstraintViolations = 0;
7112   splitcount->nBadBoth = 0;
7113   splitcount->nSplits = 0;
7114   splitcount->dWorstDeltaUnconstrained = 0;
7115   splitcount->dWorstDeltaConstrained = 0;
7116 
7117   profile_t **upProfiles = UpProfiles(NJ);
7118   traversal_t traversal = InitTraversal(NJ);
7119   int node = NJ->root;
7120 
7121   int *col = nBootstrap > 0 ? ResampleColumns(NJ->nPos, nBootstrap) : NULL;
7122   double *site_likelihoods[3];
7123   int choice;
7124   for (choice = 0; choice < 3; choice++)
7125     site_likelihoods[choice] = mymalloc(sizeof(double)*NJ->nPos);
7126 
7127   int iNodesDone = 0;
7128   while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
7129     if (node < NJ->nSeq || node == NJ->root)
7130       continue; /* nothing to do for leaves or root */
7131 
7132     if(iNodesDone > 0 && (iNodesDone % 100) == 0)
7133       ProgressReport("ML split tests for %6d of %6d internal splits", iNodesDone, NJ->nSeq-3, 0, 0);
7134     iNodesDone++;
7135 
7136     profile_t *profiles[4];
7137     int nodeABCD[4];
7138     SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, /*useML*/true);
7139     double loglk[3];
7140     double len[5];
7141     int i;
7142     for (i = 0; i < 4; i++)
7143       len[i] = NJ->branchlength[nodeABCD[i]];
7144     len[4] = NJ->branchlength[node];
7145     double lenABvsCD[5] = {len[LEN_A], len[LEN_B], len[LEN_C], len[LEN_D], len[LEN_I]};
7146     double lenACvsBD[5] = {len[LEN_A], len[LEN_C], len[LEN_B], len[LEN_D], len[LEN_I]};   /* Swap B & C */
7147     double lenADvsBC[5] = {len[LEN_A], len[LEN_D], len[LEN_C], len[LEN_B], len[LEN_I]};   /* Swap B & D */
7148 
7149     {
7150 #ifdef OPENMP
7151       #pragma omp parallel
7152       #pragma omp sections
7153 #endif
7154       {
7155 #ifdef OPENMP
7156       #pragma omp section
7157 #endif
7158 	{
7159 	  /* Lengths are already optimized for ABvsCD */
7160 	  loglk[ABvsCD] = MLQuartetLogLk(profiles[0], profiles[1], profiles[2], profiles[3],
7161 					 NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/lenABvsCD,
7162 					 /*OUT*/site_likelihoods[ABvsCD]);
7163 	}
7164 
7165 #ifdef OPENMP
7166       #pragma omp section
7167 #endif
7168 	{
7169 	  loglk[ACvsBD] = MLQuartetOptimize(profiles[0], profiles[2], profiles[1], profiles[3],
7170 					    NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/lenACvsBD, /*pStarTest*/NULL,
7171 					    /*OUT*/site_likelihoods[ACvsBD]);
7172 	}
7173 
7174 #ifdef OPENMP
7175       #pragma omp section
7176 #endif
7177 	{
7178 	  loglk[ADvsBC] = MLQuartetOptimize(profiles[0], profiles[3], profiles[2], profiles[1],
7179 					    NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/lenADvsBC, /*pStarTest*/NULL,
7180 					    /*OUT*/site_likelihoods[ADvsBC]);
7181 	}
7182       }
7183     }
7184 
7185     /* do a second pass on the better alternative if it is close */
7186     if (loglk[ACvsBD] > loglk[ADvsBC]) {
7187       if (mlAccuracy > 1 || loglk[ACvsBD] > loglk[ABvsCD] - closeLogLkLimit) {
7188 	loglk[ACvsBD] = MLQuartetOptimize(profiles[0], profiles[2], profiles[1], profiles[3],
7189 					  NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/lenACvsBD, /*pStarTest*/NULL,
7190 					  /*OUT*/site_likelihoods[ACvsBD]);
7191       }
7192     } else {
7193       if (mlAccuracy > 1 || loglk[ADvsBC] > loglk[ABvsCD] - closeLogLkLimit) {
7194 	loglk[ADvsBC] = MLQuartetOptimize(profiles[0], profiles[3], profiles[2], profiles[1],
7195 					  NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/lenADvsBC, /*pStarTest*/NULL,
7196 					  /*OUT*/site_likelihoods[ADvsBC]);
7197       }
7198     }
7199 
7200     if (loglk[ABvsCD] >= loglk[ACvsBD] && loglk[ABvsCD] >= loglk[ADvsBC])
7201       choice = ABvsCD;
7202     else if (loglk[ACvsBD] >= loglk[ABvsCD] && loglk[ACvsBD] >= loglk[ADvsBC])
7203       choice = ACvsBD;
7204     else
7205       choice = ADvsBC;
7206     bool badSplit = loglk[choice] > loglk[ABvsCD] + treeLogLkDelta; /* ignore small changes in likelihood */
7207 
7208     /* constraint penalties, indexed by nni_t (lower is better) */
7209     double p[3];
7210     QuartetConstraintPenalties(profiles, NJ->nConstraints, /*OUT*/p);
7211     bool bBadConstr = p[ABvsCD] > p[ACvsBD] + tolerance || p[ABvsCD] > p[ADvsBC] + tolerance;
7212     bool violateConstraint = false;
7213     int iC;
7214     for (iC=0; iC < NJ->nConstraints; iC++) {
7215       if (SplitViolatesConstraint(profiles, iC)) {
7216 	violateConstraint = true;
7217 	break;
7218       }
7219     }
7220     splitcount->nSplits++;
7221     if (violateConstraint)
7222       splitcount->nConstraintViolations++;
7223     if (badSplit)
7224       splitcount->nBadSplits++;
7225     if (badSplit && bBadConstr)
7226       splitcount->nBadBoth++;
7227     if (badSplit) {
7228       double delta = loglk[choice] - loglk[ABvsCD];
7229       /* If ABvsCD is favored over the more likely NNI by constraints,
7230 	 then this is probably a bad split because of the constraint */
7231       if (p[choice] > p[ABvsCD] + tolerance)
7232 	splitcount->dWorstDeltaConstrained = MAX(delta, splitcount->dWorstDeltaConstrained);
7233       else
7234 	splitcount->dWorstDeltaUnconstrained = MAX(delta, splitcount->dWorstDeltaUnconstrained);
7235     }
7236     if (nBootstrap>0)
7237       NJ->support[node] = badSplit ? 0.0 : SHSupport(NJ->nPos, nBootstrap, col, loglk, site_likelihoods);
7238 
7239     /* No longer needed */
7240     DeleteUpProfile(upProfiles, NJ, nodeABCD[0]);
7241     DeleteUpProfile(upProfiles, NJ, nodeABCD[1]);
7242     DeleteUpProfile(upProfiles, NJ, nodeABCD[2]);
7243   }
7244   traversal = FreeTraversal(traversal,NJ);
7245   upProfiles = FreeUpProfiles(upProfiles,NJ);
7246   if (nBootstrap>0)
7247     col = myfree(col, sizeof(int)*((size_t)NJ->nPos)*nBootstrap);
7248   for (choice = 0; choice < 3; choice++)
7249     site_likelihoods[choice] = myfree(site_likelihoods[choice], sizeof(double)*NJ->nPos);
7250 }
7251 
7252 
TestSplitsMinEvo(NJ_t * NJ,SplitCount_t * splitcount)7253 void TestSplitsMinEvo(NJ_t *NJ, /*OUT*/SplitCount_t *splitcount) {
7254   const double tolerance = 1e-6;
7255   splitcount->nBadSplits = 0;
7256   splitcount->nConstraintViolations = 0;
7257   splitcount->nBadBoth = 0;
7258   splitcount->nSplits = 0;
7259   splitcount->dWorstDeltaUnconstrained = 0.0;
7260   splitcount->dWorstDeltaConstrained = 0.0;
7261 
7262   profile_t **upProfiles = UpProfiles(NJ);
7263   traversal_t traversal = InitTraversal(NJ);
7264   int node = NJ->root;
7265 
7266   while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
7267     if (node < NJ->nSeq || node == NJ->root)
7268       continue; /* nothing to do for leaves or root */
7269 
7270     profile_t *profiles[4];
7271     int nodeABCD[4];
7272     SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, /*useML*/false);
7273 
7274     if (verbose>2)
7275       fprintf(stderr,"Testing Split around %d: A=%d B=%d C=%d D=up(%d) or node parent %d\n",
7276 	      node, nodeABCD[0], nodeABCD[1], nodeABCD[2], nodeABCD[3], NJ->parent[node]);
7277 
7278     double d[6];		/* distances, perhaps log-corrected distances, no constraint penalties */
7279     CorrectedPairDistances(profiles, 4, NJ->distance_matrix, NJ->nPos, /*OUT*/d);
7280 
7281     /* alignment-based scores for each split (lower is better) */
7282     double sABvsCD = d[qAB] + d[qCD];
7283     double sACvsBD = d[qAC] + d[qBD];
7284     double sADvsBC = d[qAD] + d[qBC];
7285 
7286     /* constraint penalties, indexed by nni_t (lower is better) */
7287     double p[3];
7288     QuartetConstraintPenalties(profiles, NJ->nConstraints, /*OUT*/p);
7289 
7290     int nConstraintsViolated = 0;
7291     int iC;
7292     for (iC=0; iC < NJ->nConstraints; iC++) {
7293       if (SplitViolatesConstraint(profiles, iC)) {
7294 	nConstraintsViolated++;
7295 	if (verbose > 2) {
7296 	  double penalty[3] = {0.0,0.0,0.0};
7297 	  (void)QuartetConstraintPenaltiesPiece(profiles, iC, /*OUT*/penalty);
7298 	  fprintf(stderr, "Violate constraint %d at %d (children %d %d) penalties %.3f %.3f %.3f %d/%d %d/%d %d/%d %d/%d\n",
7299 		  iC, node, NJ->child[node].child[0], NJ->child[node].child[1],
7300 		  penalty[ABvsCD], penalty[ACvsBD], penalty[ADvsBC],
7301 		  profiles[0]->nOn[iC], profiles[0]->nOff[iC],
7302 		  profiles[1]->nOn[iC], profiles[1]->nOff[iC],
7303 		  profiles[2]->nOn[iC], profiles[2]->nOff[iC],
7304 		  profiles[3]->nOn[iC], profiles[3]->nOff[iC]);
7305 	}
7306       }
7307     }
7308 
7309     double delta = sABvsCD - MIN(sACvsBD,sADvsBC);
7310     bool bBadDist = delta > tolerance;
7311     bool bBadConstr = p[ABvsCD] > p[ACvsBD] + tolerance || p[ABvsCD] > p[ADvsBC] + tolerance;
7312 
7313     splitcount->nSplits++;
7314     if (bBadDist) {
7315       nni_t choice = sACvsBD < sADvsBC ? ACvsBD : ADvsBC;
7316       /* If ABvsCD is favored over the shorter NNI by constraints,
7317 	 then this is probably a bad split because of the constraint */
7318       if (p[choice] > p[ABvsCD] + tolerance)
7319 	splitcount->dWorstDeltaConstrained = MAX(delta, splitcount->dWorstDeltaConstrained);
7320       else
7321 	splitcount->dWorstDeltaUnconstrained = MAX(delta, splitcount->dWorstDeltaUnconstrained);
7322     }
7323 
7324     if (nConstraintsViolated > 0)
7325       splitcount->nConstraintViolations++; /* count splits with any violations, not #constraints in a splits */
7326     if (bBadDist)
7327       splitcount->nBadSplits++;
7328     if (bBadDist && bBadConstr)
7329       splitcount->nBadBoth++;
7330     if (bBadConstr && verbose > 2) {
7331       /* Which NNI would be better */
7332       double dist_advantage = 0;
7333       double constraint_penalty = 0;
7334       if (p[ACvsBD] < p[ADvsBC]) {
7335 	dist_advantage = sACvsBD - sABvsCD;
7336 	constraint_penalty = p[ABvsCD] - p[ACvsBD];
7337       } else {
7338 	dist_advantage = sADvsBC - sABvsCD;
7339 	constraint_penalty = p[ABvsCD] - p[ADvsBC];
7340       }
7341       fprintf(stderr, "Violate constraints %d distance_advantage %.3f constraint_penalty %.3f (children %d %d):",
7342 	      node, dist_advantage, constraint_penalty,
7343 	      NJ->child[node].child[0], NJ->child[node].child[1]);
7344       /* list the constraints with a penalty, meaning that ABCD all have non-zero
7345          values and that AB|CD worse than others */
7346       for (iC = 0; iC < NJ->nConstraints; iC++) {
7347 	double ppart[6];
7348 	if (QuartetConstraintPenaltiesPiece(profiles, iC, /*OUT*/ppart)) {
7349 	  if (ppart[qAB] + ppart[qCD] > ppart[qAD] + ppart[qBC] + tolerance
7350 	      || ppart[qAB] + ppart[qCD] > ppart[qAC] + ppart[qBD] + tolerance)
7351 	    fprintf(stderr, " %d (%d/%d %d/%d %d/%d %d/%d)", iC,
7352 		    profiles[0]->nOn[iC], profiles[0]->nOff[iC],
7353 		    profiles[1]->nOn[iC], profiles[1]->nOff[iC],
7354 		    profiles[2]->nOn[iC], profiles[2]->nOff[iC],
7355 		    profiles[3]->nOn[iC], profiles[3]->nOff[iC]);
7356 	}
7357       }
7358       fprintf(stderr, "\n");
7359     }
7360 
7361     /* no longer needed */
7362     DeleteUpProfile(upProfiles, NJ, nodeABCD[0]);
7363     DeleteUpProfile(upProfiles, NJ, nodeABCD[1]);
7364   }
7365   traversal = FreeTraversal(traversal,NJ);
7366   upProfiles = FreeUpProfiles(upProfiles,NJ);
7367 }
7368 
7369 /* Computes support for (A,B),(C,D) compared to that for (A,C),(B,D) and (A,D),(B,C) */
SplitSupport(profile_t * pA,profile_t * pB,profile_t * pC,profile_t * pD,distance_matrix_t * dmat,int nPos,int nBootstrap,int * col)7370 double SplitSupport(profile_t *pA, profile_t *pB, profile_t *pC, profile_t *pD,
7371 		    /*OPTIONAL*/distance_matrix_t *dmat,
7372 		    int nPos,
7373 		    int nBootstrap,
7374 		    int *col) {
7375   int i,j;
7376   long lPos = nPos; 		/* to avoid overflow when multiplying */
7377 
7378   /* Note distpieces are weighted */
7379   double *distpieces[6];
7380   double *weights[6];
7381   for (j = 0; j < 6; j++) {
7382     distpieces[j] = (double*)mymalloc(sizeof(double)*nPos);
7383     weights[j] = (double*)mymalloc(sizeof(double)*nPos);
7384   }
7385 
7386   int iFreqA = 0;
7387   int iFreqB = 0;
7388   int iFreqC = 0;
7389   int iFreqD = 0;
7390   for (i = 0; i < nPos; i++) {
7391     numeric_t *fA = GET_FREQ(pA, i, /*IN/OUT*/iFreqA);
7392     numeric_t *fB = GET_FREQ(pB, i, /*IN/OUT*/iFreqB);
7393     numeric_t *fC = GET_FREQ(pC, i, /*IN/OUT*/iFreqC);
7394     numeric_t *fD = GET_FREQ(pD, i, /*IN/OUT*/iFreqD);
7395 
7396     weights[qAB][i] = pA->weights[i] * pB->weights[i];
7397     weights[qAC][i] = pA->weights[i] * pC->weights[i];
7398     weights[qAD][i] = pA->weights[i] * pD->weights[i];
7399     weights[qBC][i] = pB->weights[i] * pC->weights[i];
7400     weights[qBD][i] = pB->weights[i] * pD->weights[i];
7401     weights[qCD][i] = pC->weights[i] * pD->weights[i];
7402 
7403     distpieces[qAB][i] = weights[qAB][i] * ProfileDistPiece(pA->codes[i], pB->codes[i], fA, fB, dmat, NULL);
7404     distpieces[qAC][i] = weights[qAC][i] * ProfileDistPiece(pA->codes[i], pC->codes[i], fA, fC, dmat, NULL);
7405     distpieces[qAD][i] = weights[qAD][i] * ProfileDistPiece(pA->codes[i], pD->codes[i], fA, fD, dmat, NULL);
7406     distpieces[qBC][i] = weights[qBC][i] * ProfileDistPiece(pB->codes[i], pC->codes[i], fB, fC, dmat, NULL);
7407     distpieces[qBD][i] = weights[qBD][i] * ProfileDistPiece(pB->codes[i], pD->codes[i], fB, fD, dmat, NULL);
7408     distpieces[qCD][i] = weights[qCD][i] * ProfileDistPiece(pC->codes[i], pD->codes[i], fC, fD, dmat, NULL);
7409   }
7410   assert(iFreqA == pA->nVectors);
7411   assert(iFreqB == pB->nVectors);
7412   assert(iFreqC == pC->nVectors);
7413   assert(iFreqD == pD->nVectors);
7414 
7415   double totpieces[6];
7416   double totweights[6];
7417   double dists[6];
7418   for (j = 0; j < 6; j++) {
7419     totpieces[j] = 0.0;
7420     totweights[j] = 0.0;
7421     for (i = 0; i < nPos; i++) {
7422       totpieces[j] += distpieces[j][i];
7423       totweights[j] += weights[j][i];
7424     }
7425     dists[j] = totweights[j] > 0.01 ? totpieces[j]/totweights[j] : 3.0;
7426     if (logdist)
7427       dists[j] = LogCorrect(dists[j]);
7428   }
7429 
7430   /* Support1 = Support(AB|CD over AC|BD) = d(A,C)+d(B,D)-d(A,B)-d(C,D)
7431      Support2 = Support(AB|CD over AD|BC) = d(A,D)+d(B,C)-d(A,B)-d(C,D)
7432   */
7433   double support1 = dists[qAC] + dists[qBD] - dists[qAB] - dists[qCD];
7434   double support2 = dists[qAD] + dists[qBC] - dists[qAB] - dists[qCD];
7435 
7436   if (support1 < 0 || support2 < 0) {
7437     nSuboptimalSplits++;	/* Another split seems superior */
7438   }
7439 
7440   assert(nBootstrap > 0);
7441   int nSupport = 0;
7442 
7443   int iBoot;
7444   for (iBoot=0;iBoot<nBootstrap;iBoot++) {
7445     int *colw = &col[lPos*iBoot];
7446 
7447     for (j = 0; j < 6; j++) {
7448       double totp = 0;
7449       double totw = 0;
7450       double *d = distpieces[j];
7451       double *w = weights[j];
7452       for (i=0; i<nPos; i++) {
7453 	int c = colw[i];
7454 	totp += d[c];
7455 	totw += w[c];
7456       }
7457       dists[j] = totw > 0.01 ? totp/totw : 3.0;
7458       if (logdist)
7459 	dists[j] = LogCorrect(dists[j]);
7460     }
7461     support1 = dists[qAC] + dists[qBD] - dists[qAB] - dists[qCD];
7462     support2 = dists[qAD] + dists[qBC] - dists[qAB] - dists[qCD];
7463     if (support1 > 0 && support2 > 0)
7464       nSupport++;
7465   } /* end loop over bootstrap replicates */
7466 
7467   for (j = 0; j < 6; j++) {
7468     distpieces[j] = myfree(distpieces[j], sizeof(double)*nPos);
7469     weights[j] = myfree(weights[j], sizeof(double)*nPos);
7470   }
7471   return( nSupport/(double)nBootstrap );
7472 }
7473 
SHSupport(int nPos,int nBootstrap,int * col,double loglk[3],double * site_likelihoods[3])7474 double SHSupport(int nPos, int nBootstrap, int *col, double loglk[3], double *site_likelihoods[3]) {
7475   long lPos = nPos;		/* to avoid overflow when multiplying */
7476   assert(nBootstrap>0);
7477   double delta1 = loglk[0]-loglk[1];
7478   double delta2 = loglk[0]-loglk[2];
7479   double delta = delta1 < delta2 ? delta1 : delta2;
7480 
7481   double *siteloglk[3];
7482   int i,j;
7483   for (i = 0; i < 3; i++) {
7484     siteloglk[i] = mymalloc(sizeof(double)*nPos);
7485     for (j = 0; j < nPos; j++)
7486       siteloglk[i][j] = log(site_likelihoods[i][j]);
7487   }
7488 
7489   int nSupport = 0;
7490   int iBoot;
7491   for (iBoot = 0; iBoot < nBootstrap; iBoot++) {
7492     double resampled[3];
7493     for (i = 0; i < 3; i++)
7494       resampled[i] = -loglk[i];
7495     for (j = 0; j < nPos; j++) {
7496       int pos = col[iBoot*lPos+j];
7497       for (i = 0; i < 3; i++)
7498 	resampled[i] += siteloglk[i][pos];
7499     }
7500     int iBest = 0;
7501     for (i = 1; i < 3; i++)
7502       if (resampled[i] > resampled[iBest])
7503 	iBest = i;
7504     double resample1 = resampled[iBest] - resampled[(iBest+1)%3];
7505     double resample2 = resampled[iBest] - resampled[(iBest+2)%3];
7506     double resampleDelta = resample1 < resample2 ? resample1 : resample2;
7507     if (resampleDelta < delta)
7508       nSupport++;
7509   }
7510   for (i=0;i<3;i++)
7511     siteloglk[i] = myfree(siteloglk[i], sizeof(double)*nPos);
7512   return(nSupport/(double)nBootstrap);
7513 }
7514 
7515 
SetDistCriterion(NJ_t * NJ,int nActive,besthit_t * hit)7516 void SetDistCriterion(/*IN/OUT*/NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *hit) {
7517   if (hit->i < NJ->nSeq && hit->j < NJ->nSeq) {
7518     SeqDist(NJ->profiles[hit->i]->codes,
7519 	    NJ->profiles[hit->j]->codes,
7520 	    NJ->nPos, NJ->distance_matrix, /*OUT*/hit);
7521   } else {
7522     ProfileDist(NJ->profiles[hit->i],
7523 		NJ->profiles[hit->j],
7524 		NJ->nPos, NJ->distance_matrix, /*OUT*/hit);
7525     hit->dist -= (NJ->diameter[hit->i] + NJ->diameter[hit->j]);
7526   }
7527   hit->dist += constraintWeight
7528     * (double)JoinConstraintPenalty(NJ, hit->i, hit->j);
7529   SetCriterion(NJ,nActive,/*IN/OUT*/hit);
7530 }
7531 
SetCriterion(NJ_t * NJ,int nActive,besthit_t * join)7532 void SetCriterion(/*IN/UPDATE*/NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *join) {
7533   if(join->i < 0
7534      || join->j < 0
7535      || NJ->parent[join->i] >= 0
7536      || NJ->parent[join->j] >= 0)
7537     return;
7538   assert(NJ->nOutDistActive[join->i] >= nActive);
7539   assert(NJ->nOutDistActive[join->j] >= nActive);
7540 
7541   int nDiffAllow = tophitsMult > 0 ? (int)(nActive*staleOutLimit) : 0;
7542   if (NJ->nOutDistActive[join->i] - nActive > nDiffAllow)
7543     SetOutDistance(NJ, join->i, nActive);
7544   if (NJ->nOutDistActive[join->j] - nActive > nDiffAllow)
7545     SetOutDistance(NJ, join->j, nActive);
7546   double outI = NJ->outDistances[join->i];
7547   if (NJ->nOutDistActive[join->i] != nActive)
7548     outI *= (nActive-1)/(double)(NJ->nOutDistActive[join->i]-1);
7549   double outJ = NJ->outDistances[join->j];
7550   if (NJ->nOutDistActive[join->j] != nActive)
7551     outJ *= (nActive-1)/(double)(NJ->nOutDistActive[join->j]-1);
7552   join->criterion = join->dist - (outI+outJ)/(double)(nActive-2);
7553   if (verbose > 2 && nActive <= 5) {
7554     fprintf(stderr, "Set Criterion to join %d %d with nActive=%d dist+penalty %.3f criterion %.3f\n",
7555 	    join->i, join->j, nActive, join->dist, join->criterion);
7556   }
7557 }
7558 
SetOutDistance(NJ_t * NJ,int iNode,int nActive)7559 void SetOutDistance(NJ_t *NJ, int iNode, int nActive) {
7560   if (NJ->nOutDistActive[iNode] == nActive)
7561     return;
7562 
7563   /* May be called by InitNJ before we have parents */
7564   assert(iNode>=0 && (NJ->parent == NULL || NJ->parent[iNode]<0));
7565   besthit_t dist;
7566   ProfileDist(NJ->profiles[iNode], NJ->outprofile, NJ->nPos, NJ->distance_matrix, &dist);
7567   outprofileOps++;
7568 
7569   /* out(A) = sum(X!=A) d(A,X)
7570      = sum(X!=A) (profiledist(A,X) - diam(A) - diam(X))
7571      = sum(X!=A) profiledist(A,X) - (N-1)*diam(A) - (totdiam - diam(A))
7572 
7573      in the absence of gaps:
7574      profiledist(A,out) = mean profiledist(A, all active nodes)
7575      sum(X!=A) profiledist(A,X) = N * profiledist(A,out) - profiledist(A,A)
7576 
7577      With gaps, we need to take the weights of the comparisons into account, where
7578      w(Ai) is the weight of position i in profile A:
7579      w(A,B) = sum_i w(Ai) * w(Bi)
7580      d(A,B) = sum_i w(Ai) * w(Bi) * d(Ai,Bi) / w(A,B)
7581 
7582      sum(X!=A) profiledist(A,X) ~= (N-1) * profiledist(A, Out w/o A)
7583      profiledist(A, Out w/o A) = sum_X!=A sum_i d(Ai,Xi) * w(Ai) * w(Bi) / ( sum_X!=A sum_i w(Ai) * w(Bi) )
7584      d(A, Out) = sum_A sum_i d(Ai,Xi) * w(Ai) * w(Bi) / ( sum_X sum_i w(Ai) * w(Bi) )
7585 
7586      and so we get
7587      profiledist(A,out w/o A) = (top of d(A,Out) - top of d(A,A)) / (weight of d(A,Out) - weight of d(A,A))
7588      top = dist * weight
7589      with another correction of nActive because the weight of the out-profile is the average
7590      weight not the total weight.
7591   */
7592   double top = (nActive-1)
7593     * (dist.dist * dist.weight * nActive - NJ->selfweight[iNode] * NJ->selfdist[iNode]);
7594   double bottom = (dist.weight * nActive - NJ->selfweight[iNode]);
7595   double pdistOutWithoutA = top/bottom;
7596   NJ->outDistances[iNode] =  bottom > 0.01 ?
7597     pdistOutWithoutA - NJ->diameter[iNode] * (nActive-1) - (NJ->totdiam - NJ->diameter[iNode])
7598     : 3.0;
7599   NJ->nOutDistActive[iNode] = nActive;
7600 
7601   if(verbose>3 && iNode < 5)
7602     fprintf(stderr,"NewOutDist for %d %f from dist %f selfd %f diam %f totdiam %f newActive %d\n",
7603 	    iNode, NJ->outDistances[iNode], dist.dist, NJ->selfdist[iNode], NJ->diameter[iNode],
7604 	    NJ->totdiam, nActive);
7605   if (verbose>6 && (iNode % 10) == 0) {
7606     /* Compute the actual out-distance and compare */
7607     double total = 0.0;
7608     double total_pd = 0.0;
7609     int j;
7610     for (j=0;j<NJ->maxnode;j++) {
7611       if (j!=iNode && (NJ->parent==NULL || NJ->parent[j]<0)) {
7612 	besthit_t bh;
7613 	ProfileDist(NJ->profiles[iNode], NJ->profiles[j], NJ->nPos, NJ->distance_matrix, /*OUT*/&bh);
7614 	total_pd += bh.dist;
7615 	total += bh.dist - (NJ->diameter[iNode] + NJ->diameter[j]);
7616       }
7617     }
7618     fprintf(stderr,"OutDist for Node %d %f truth %f profiled %f truth %f pd_err %f\n",
7619 	    iNode, NJ->outDistances[iNode], total, pdistOutWithoutA, total_pd,fabs(pdistOutWithoutA-total_pd));
7620   }
7621 }
7622 
FreeTopHits(top_hits_t * tophits)7623 top_hits_t *FreeTopHits(top_hits_t *tophits) {
7624   if (tophits == NULL)
7625     return(NULL);
7626   int iNode;
7627   for (iNode = 0; iNode < tophits->maxnodes; iNode++) {
7628     top_hits_list_t *l = &tophits->top_hits_lists[iNode];
7629     if (l->hits != NULL)
7630       l->hits = myfree(l->hits, sizeof(hit_t) * l->nHits);
7631   }
7632   tophits->top_hits_lists = myfree(tophits->top_hits_lists, sizeof(top_hits_list_t) * tophits->maxnodes);
7633   tophits->visible = myfree(tophits->visible, sizeof(hit_t*) * tophits->maxnodes);
7634   tophits->topvisible = myfree(tophits->topvisible, sizeof(int) * tophits->nTopVisible);
7635 #ifdef OPENMP
7636   for (iNode = 0; iNode < tophits->maxnodes; iNode++)
7637     omp_destroy_lock(&tophits->locks[iNode]);
7638   tophits->locks = myfree(tophits->locks, sizeof(omp_lock_t) * tophits->maxnodes);
7639 #endif
7640   return(myfree(tophits, sizeof(top_hits_t)));
7641 }
7642 
InitTopHits(NJ_t * NJ,int m)7643 top_hits_t *InitTopHits(NJ_t *NJ, int m) {
7644   int iNode;
7645   assert(m > 0);
7646   top_hits_t *tophits = mymalloc(sizeof(top_hits_t));
7647   tophits->m = m;
7648   tophits->q = (int)(0.5 + tophits2Mult * sqrt(tophits->m));
7649   if (!useTopHits2nd || tophits->q >= tophits->m)
7650     tophits->q = 0;
7651   tophits->maxnodes = NJ->maxnodes;
7652   tophits->top_hits_lists = mymalloc(sizeof(top_hits_list_t) * tophits->maxnodes);
7653   tophits->visible = mymalloc(sizeof(hit_t) * tophits->maxnodes);
7654   tophits->nTopVisible = (int)(0.5 + topvisibleMult*m);
7655   tophits->topvisible = mymalloc(sizeof(int) * tophits->nTopVisible);
7656 #ifdef OPENMP
7657   tophits->locks = mymalloc(sizeof(omp_lock_t) * tophits->maxnodes);
7658   for (iNode = 0; iNode < tophits->maxnodes; iNode++)
7659     omp_init_lock(&tophits->locks[iNode]);
7660 #endif
7661   int i;
7662   for (i = 0; i < tophits->nTopVisible; i++)
7663     tophits->topvisible[i] = -1; /* empty */
7664   tophits->topvisibleAge = 0;
7665 
7666   for (iNode = 0; iNode < tophits->maxnodes; iNode++) {
7667     top_hits_list_t *l = &tophits->top_hits_lists[iNode];
7668     l->nHits = 0;
7669     l->hits = NULL;
7670     l->hitSource = -1;
7671     l->age = 0;
7672     hit_t *v = &tophits->visible[iNode];
7673     v->j = -1;
7674     v->dist = 1e20;
7675   }
7676   return(tophits);
7677 }
7678 
7679 /* Helper function for sorting in SetAllLeafTopHits,
7680    and the global variables it needs
7681 */
7682 NJ_t *CompareSeedNJ = NULL;
7683 int *CompareSeedGaps = NULL;
CompareSeeds(const void * c1,const void * c2)7684 int CompareSeeds(const void *c1, const void *c2) {
7685   int seed1 = *(int *)c1;
7686   int seed2 = *(int *)c2;
7687   int gapdiff = CompareSeedGaps[seed1] - CompareSeedGaps[seed2];
7688   if (gapdiff != 0) return(gapdiff);	/* fewer gaps is better */
7689   double outdiff = CompareSeedNJ->outDistances[seed1] - CompareSeedNJ->outDistances[seed2];
7690   if(outdiff < 0) return(-1);	/* closer to more nodes is better */
7691   if(outdiff > 0) return(1);
7692   return(0);
7693 }
7694 
7695 /* Using the seed heuristic and the close global variable */
SetAllLeafTopHits(NJ_t * NJ,top_hits_t * tophits)7696 void SetAllLeafTopHits(/*IN/UPDATE*/NJ_t *NJ, /*IN/OUT*/top_hits_t *tophits) {
7697   double close = tophitsClose;
7698   if (close < 0) {
7699     if (fastest && NJ->nSeq >= 50000) {
7700       close = 0.99;
7701     } else {
7702       double logN = log((double)NJ->nSeq)/log(2.0);
7703       close = logN/(logN+2.0);
7704     }
7705   }
7706   /* Sort the potential seeds, by a combination of nGaps and NJ->outDistances
7707      We don't store nGaps so we need to compute that
7708   */
7709   int *nGaps = (int*)mymalloc(sizeof(int)*NJ->nSeq);
7710   int iNode;
7711   for(iNode=0; iNode<NJ->nSeq; iNode++) {
7712     nGaps[iNode] = (int)(0.5 + NJ->nPos - NJ->selfweight[iNode]);
7713   }
7714   int *seeds = (int*)mymalloc(sizeof(int)*NJ->nSeq);
7715   for (iNode=0; iNode<NJ->nSeq; iNode++) seeds[iNode] = iNode;
7716   CompareSeedNJ = NJ;
7717   CompareSeedGaps = nGaps;
7718   qsort(/*IN/OUT*/seeds, NJ->nSeq, sizeof(int), CompareSeeds);
7719   CompareSeedNJ = NULL;
7720   CompareSeedGaps = NULL;
7721 
7722   /* For each seed, save its top 2*m hits and then look for close neighbors */
7723   assert(2 * tophits->m <= NJ->nSeq);
7724   int iSeed;
7725   int nHasTopHits = 0;
7726 #ifdef OPENMP
7727   #pragma omp parallel for schedule(dynamic, 50)
7728 #endif
7729   for(iSeed=0; iSeed < NJ->nSeq; iSeed++) {
7730     int seed = seeds[iSeed];
7731     if (iSeed > 0 && (iSeed % 100) == 0) {
7732 #ifdef OPENMP
7733       #pragma omp critical
7734 #endif
7735       ProgressReport("Top hits for %6d of %6d seqs (at seed %6d)",
7736 		     nHasTopHits, NJ->nSeq,
7737 		     iSeed, 0);
7738     }
7739     if (tophits->top_hits_lists[seed].nHits > 0) {
7740       if(verbose>2) fprintf(stderr, "Skipping seed %d\n", seed);
7741       continue;
7742     }
7743 
7744     besthit_t *besthitsSeed = (besthit_t*)mymalloc(sizeof(besthit_t)*NJ->nSeq);
7745     besthit_t *besthitsNeighbor = (besthit_t*)mymalloc(sizeof(besthit_t) * 2 * tophits->m);
7746     besthit_t bestjoin;
7747 
7748     if(verbose>2) fprintf(stderr,"Trying seed %d\n", seed);
7749     SetBestHit(seed, NJ, /*nActive*/NJ->nSeq, /*OUT*/&bestjoin, /*OUT*/besthitsSeed);
7750 
7751     /* sort & save top hits of self. besthitsSeed is now sorted. */
7752     SortSaveBestHits(seed, /*IN/SORT*/besthitsSeed, /*IN-SIZE*/NJ->nSeq,
7753 		     /*OUT-SIZE*/tophits->m, /*IN/OUT*/tophits);
7754     nHasTopHits++;
7755 
7756     /* find "close" neighbors and compute their top hits */
7757     double neardist = besthitsSeed[2 * tophits->m - 1].dist * close;
7758     /* must have at least average weight, rem higher is better
7759        and allow a bit more than average, e.g. if we are looking for within 30% away,
7760        20% more gaps than usual seems OK
7761        Alternatively, have a coverage requirement in case neighbor is short
7762        If fastest, consider the top q/2 hits to be close neighbors, regardless
7763     */
7764     double nearweight = 0;
7765     int iClose;
7766     for (iClose = 0; iClose < 2 * tophits->m; iClose++)
7767       nearweight += besthitsSeed[iClose].weight;
7768     nearweight = nearweight/(2.0 * tophits->m); /* average */
7769     nearweight *= (1.0-2.0*neardist/3.0);
7770     double nearcover = 1.0 - neardist/2.0;
7771 
7772     if(verbose>2) fprintf(stderr,"Distance limit for close neighbors %f weight %f ungapped %d\n",
7773 			  neardist, nearweight, NJ->nPos-nGaps[seed]);
7774     for (iClose = 0; iClose < tophits->m; iClose++) {
7775       besthit_t *closehit = &besthitsSeed[iClose];
7776       int closeNode = closehit->j;
7777       if (tophits->top_hits_lists[closeNode].nHits > 0)
7778 	continue;
7779 
7780       /* If within close-distance, or identical, use as close neighbor */
7781       bool close = closehit->dist <= neardist
7782 	&& (closehit->weight >= nearweight
7783 	    || closehit->weight >= (NJ->nPos-nGaps[closeNode])*nearcover);
7784       bool identical = closehit->dist < 1e-6
7785 	&& fabs(closehit->weight - (NJ->nPos - nGaps[seed])) < 1e-5
7786 	&& fabs(closehit->weight - (NJ->nPos - nGaps[closeNode])) < 1e-5;
7787       if (useTopHits2nd && iClose < tophits->q && (close || identical)) {
7788 	nHasTopHits++;
7789 	nClose2Used++;
7790 	int nUse = MIN(tophits->q * tophits2Safety, 2 * tophits->m);
7791 	besthit_t *besthitsClose = mymalloc(sizeof(besthit_t) * nUse);
7792 	TransferBestHits(NJ, /*nActive*/NJ->nSeq,
7793 			 closeNode,
7794 			 /*IN*/besthitsSeed, /*SIZE*/nUse,
7795 			 /*OUT*/besthitsClose,
7796 			 /*updateDistance*/true);
7797 	SortSaveBestHits(closeNode, /*IN/SORT*/besthitsClose,
7798 			 /*IN-SIZE*/nUse, /*OUT-SIZE*/tophits->q,
7799 			 /*IN/OUT*/tophits);
7800 	tophits->top_hits_lists[closeNode].hitSource = seed;
7801 	besthitsClose = myfree(besthitsClose, sizeof(besthit_t) * nUse);
7802       } else if (close || identical || (fastest && iClose < (tophits->q+1)/2)) {
7803 	nHasTopHits++;
7804 	nCloseUsed++;
7805 	if(verbose>2) fprintf(stderr, "Near neighbor %d (rank %d weight %f ungapped %d %d)\n",
7806 			      closeNode, iClose, besthitsSeed[iClose].weight,
7807 			      NJ->nPos-nGaps[seed],
7808 			      NJ->nPos-nGaps[closeNode]);
7809 
7810 	/* compute top 2*m hits */
7811 	TransferBestHits(NJ, /*nActive*/NJ->nSeq,
7812 			 closeNode,
7813 			 /*IN*/besthitsSeed, /*SIZE*/2 * tophits->m,
7814 			 /*OUT*/besthitsNeighbor,
7815 			 /*updateDistance*/true);
7816 	SortSaveBestHits(closeNode, /*IN/SORT*/besthitsNeighbor,
7817 			 /*IN-SIZE*/2 * tophits->m, /*OUT-SIZE*/tophits->m,
7818 			 /*IN/OUT*/tophits);
7819 
7820 	/* And then try for a second level of transfer. We assume we
7821 	   are in a good area, because of the 1st
7822 	   level of transfer, and in a small neighborhood, because q is
7823 	   small (32 for 1 million sequences), so we do not make any close checks.
7824 	 */
7825 	int iClose2;
7826 	for (iClose2 = 0; iClose2 < tophits->q && iClose2 < 2 * tophits->m; iClose2++) {
7827 	  int closeNode2 = besthitsNeighbor[iClose2].j;
7828 	  assert(closeNode2 >= 0);
7829 	  if (tophits->top_hits_lists[closeNode2].hits == NULL) {
7830 	    nClose2Used++;
7831 	    nHasTopHits++;
7832 	    int nUse = MIN(tophits->q * tophits2Safety, 2 * tophits->m);
7833 	    besthit_t *besthitsClose2 = mymalloc(sizeof(besthit_t) * nUse);
7834 	    TransferBestHits(NJ, /*nActive*/NJ->nSeq,
7835 			     closeNode2,
7836 			     /*IN*/besthitsNeighbor, /*SIZE*/nUse,
7837 			     /*OUT*/besthitsClose2,
7838 			     /*updateDistance*/true);
7839 	    SortSaveBestHits(closeNode2, /*IN/SORT*/besthitsClose2,
7840 			     /*IN-SIZE*/nUse, /*OUT-SIZE*/tophits->q,
7841 			     /*IN/OUT*/tophits);
7842 	    tophits->top_hits_lists[closeNode2].hitSource = closeNode;
7843 	    besthitsClose2 = myfree(besthitsClose2, sizeof(besthit_t) * nUse);
7844 	  } /* end if should do 2nd-level transfer */
7845 	}
7846       }
7847     } /* end loop over close candidates */
7848     besthitsSeed = myfree(besthitsSeed, sizeof(besthit_t)*NJ->nSeq);
7849     besthitsNeighbor = myfree(besthitsNeighbor, sizeof(besthit_t) * 2 * tophits->m);
7850   } /* end loop over seeds */
7851 
7852   for (iNode=0; iNode<NJ->nSeq; iNode++) {
7853     top_hits_list_t *l = &tophits->top_hits_lists[iNode];
7854     assert(l->hits != NULL);
7855     assert(l->hits[0].j >= 0);
7856     assert(l->hits[0].j < NJ->nSeq);
7857     assert(l->hits[0].j != iNode);
7858     tophits->visible[iNode] = l->hits[0];
7859   }
7860 
7861   if (verbose >= 2) fprintf(stderr, "#Close neighbors among leaves: 1st-level %ld 2nd-level %ld seeds %ld\n",
7862 			    nCloseUsed, nClose2Used, NJ->nSeq-nCloseUsed-nClose2Used);
7863   nGaps = myfree(nGaps, sizeof(int)*NJ->nSeq);
7864   seeds = myfree(seeds, sizeof(int)*NJ->nSeq);
7865 
7866   /* Now add a "checking phase" where we ensure that the q or 2*sqrt(m) hits
7867      of i are represented in j (if they should be)
7868    */
7869   long lReplace = 0;
7870   int nCheck = tophits->q > 0 ? tophits->q : (int)(0.5 + 2.0*sqrt(tophits->m));
7871   for (iNode = 0; iNode < NJ->nSeq; iNode++) {
7872     if ((iNode % 100) == 0)
7873       ProgressReport("Checking top hits for %6d of %6d seqs",
7874 		     iNode+1, NJ->nSeq, 0, 0);
7875     top_hits_list_t *lNode = &tophits->top_hits_lists[iNode];
7876     int iHit;
7877     for (iHit = 0; iHit < nCheck && iHit < lNode->nHits; iHit++) {
7878       besthit_t bh = HitToBestHit(iNode, lNode->hits[iHit]);
7879       SetCriterion(NJ, /*nActive*/NJ->nSeq, /*IN/OUT*/&bh);
7880       top_hits_list_t *lTarget = &tophits->top_hits_lists[bh.j];
7881 
7882       /* If this criterion is worse than the nCheck-1 entry of the target,
7883 	 then skip the check.
7884 	 This logic is based on assuming that the list is sorted,
7885 	 which is true initially but may not be true later.
7886 	 Still, is a good heuristic.
7887       */
7888       assert(nCheck > 0);
7889       assert(nCheck <= lTarget->nHits);
7890       besthit_t bhCheck = HitToBestHit(bh.j, lTarget->hits[nCheck-1]);
7891       SetCriterion(NJ, /*nActive*/NJ->nSeq, /*IN/OUT*/&bhCheck);
7892       if (bhCheck.criterion < bh.criterion)
7893 	continue;		/* no check needed */
7894 
7895       /* Check if this is present in the top-hit list */
7896       int iHit2;
7897       bool bFound = false;
7898       for (iHit2 = 0; iHit2 < lTarget->nHits && !bFound; iHit2++)
7899 	if (lTarget->hits[iHit2].j == iNode)
7900 	  bFound = true;
7901       if (!bFound) {
7902 	/* Find the hit with the worst criterion and replace it with this one */
7903 	int iWorst = -1;
7904 	double dWorstCriterion = -1e20;
7905 	for (iHit2 = 0; iHit2 < lTarget->nHits; iHit2++) {
7906 	  besthit_t bh2 = HitToBestHit(bh.j, lTarget->hits[iHit2]);
7907 	  SetCriterion(NJ, /*nActive*/NJ->nSeq, /*IN/OUT*/&bh2);
7908 	  if (bh2.criterion > dWorstCriterion) {
7909 	    iWorst = iHit2;
7910 	    dWorstCriterion = bh2.criterion;
7911 	  }
7912 	}
7913 	if (dWorstCriterion > bh.criterion) {
7914 	  assert(iWorst >= 0);
7915 	  lTarget->hits[iWorst].j = iNode;
7916 	  lTarget->hits[iWorst].dist = bh.dist;
7917 	  lReplace++;
7918 	  /* and perhaps update visible */
7919 	  besthit_t v;
7920 	  bool bSuccess = GetVisible(NJ, /*nActive*/NJ->nSeq, tophits, bh.j, /*OUT*/&v);
7921 	  assert(bSuccess);
7922 	  if (bh.criterion < v.criterion)
7923 	    tophits->visible[bh.j] = lTarget->hits[iWorst];
7924 	}
7925       }
7926     }
7927   }
7928 
7929   if (verbose >= 2)
7930     fprintf(stderr, "Replaced %ld top hit entries\n", lReplace);
7931 }
7932 
7933 /* Updates out-distances but does not reset or update visible set */
GetBestFromTopHits(int iNode,NJ_t * NJ,int nActive,top_hits_t * tophits,besthit_t * bestjoin)7934 void GetBestFromTopHits(int iNode,
7935 			/*IN/UPDATE*/NJ_t *NJ,
7936 			int nActive,
7937 			/*IN*/top_hits_t *tophits,
7938 			/*OUT*/besthit_t *bestjoin) {
7939   assert(iNode >= 0);
7940   assert(NJ->parent[iNode] < 0);
7941   top_hits_list_t *l = &tophits->top_hits_lists[iNode];
7942   assert(l->nHits > 0);
7943   assert(l->hits != NULL);
7944 
7945   if(!fastest)
7946     SetOutDistance(NJ, iNode, nActive); /* ensure out-distances are not stale */
7947 
7948   bestjoin->i = -1;
7949   bestjoin->j = -1;
7950   bestjoin->dist = 1e20;
7951   bestjoin->criterion = 1e20;
7952 
7953   int iBest;
7954   for(iBest=0; iBest < l->nHits; iBest++) {
7955     besthit_t bh = HitToBestHit(iNode, l->hits[iBest]);
7956     if (UpdateBestHit(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/&bh, /*update dist*/true)) {
7957       SetCriterion(/*IN/OUT*/NJ, nActive, /*IN/OUT*/&bh); /* make sure criterion is correct */
7958       if (bh.criterion < bestjoin->criterion)
7959 	*bestjoin = bh;
7960     }
7961   }
7962   assert(bestjoin->j >= 0);	/* a hit was found */
7963   assert(bestjoin->i == iNode);
7964 }
7965 
ActiveAncestor(NJ_t * NJ,int iNode)7966 int ActiveAncestor(/*IN*/NJ_t *NJ, int iNode) {
7967   if (iNode < 0)
7968     return(iNode);
7969   while(NJ->parent[iNode] >= 0)
7970     iNode = NJ->parent[iNode];
7971   return(iNode);
7972 }
7973 
UpdateBestHit(NJ_t * NJ,int nActive,besthit_t * hit,bool bUpdateDist)7974 bool UpdateBestHit(/*IN/UPDATE*/NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *hit,
7975 		   bool bUpdateDist) {
7976   int i = ActiveAncestor(/*IN*/NJ, hit->i);
7977   int j = ActiveAncestor(/*IN*/NJ, hit->j);
7978   if (i < 0 || j < 0 || i == j) {
7979     hit->i = -1;
7980     hit->j = -1;
7981     hit->weight = 0;
7982     hit->dist = 1e20;
7983     hit->criterion = 1e20;
7984     return(false);
7985   }
7986   if (i != hit->i || j != hit->j) {
7987     hit->i = i;
7988     hit->j = j;
7989     if (bUpdateDist) {
7990       SetDistCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/hit);
7991     } else {
7992       hit->dist = -1e20;
7993       hit->criterion = 1e20;
7994     }
7995   }
7996   return(true);
7997 }
7998 
GetVisible(NJ_t * NJ,int nActive,top_hits_t * tophits,int iNode,besthit_t * visible)7999 bool GetVisible(/*IN/UPDATE*/NJ_t *NJ, int nActive,
8000 		/*IN/OUT*/top_hits_t *tophits,
8001 		int iNode, /*OUT*/besthit_t *visible) {
8002   if (iNode < 0 || NJ->parent[iNode] >= 0)
8003     return(false);
8004   hit_t *v = &tophits->visible[iNode];
8005   if (v->j < 0 || NJ->parent[v->j] >= 0)
8006     return(false);
8007   *visible = HitToBestHit(iNode, *v);
8008   SetCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/visible);
8009   return(true);
8010 }
8011 
UniqueBestHits(NJ_t * NJ,int nActive,besthit_t * combined,int nCombined,int * nUniqueOut)8012 besthit_t *UniqueBestHits(/*IN/UPDATE*/NJ_t *NJ, int nActive,
8013 			  /*IN/SORT*/besthit_t *combined, int nCombined,
8014 			  /*OUT*/int *nUniqueOut) {
8015   int iHit;
8016   for (iHit = 0; iHit < nCombined; iHit++) {
8017     besthit_t *hit = &combined[iHit];
8018     UpdateBestHit(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/hit, /*update*/false);
8019   }
8020   qsort(/*IN/OUT*/combined, nCombined, sizeof(besthit_t), CompareHitsByIJ);
8021 
8022   besthit_t *uniqueList = (besthit_t*)mymalloc(sizeof(besthit_t)*nCombined);
8023   int nUnique = 0;
8024   int iSavedLast = -1;
8025 
8026   /* First build the new list */
8027   for (iHit = 0; iHit < nCombined; iHit++) {
8028     besthit_t *hit = &combined[iHit];
8029     if (hit->i < 0 || hit->j < 0)
8030       continue;
8031     if (iSavedLast >= 0) {
8032       /* toss out duplicates */
8033       besthit_t *saved = &combined[iSavedLast];
8034       if (saved->i == hit->i && saved->j == hit->j)
8035 	continue;
8036     }
8037     assert(nUnique < nCombined);
8038     assert(hit->j >= 0 && NJ->parent[hit->j] < 0);
8039     uniqueList[nUnique++] = *hit;
8040     iSavedLast = iHit;
8041   }
8042   *nUniqueOut = nUnique;
8043 
8044   /* Then do any updates to the criterion or the distances in parallel */
8045 #ifdef OPENMP
8046     #pragma omp parallel for schedule(dynamic, 50)
8047 #endif
8048   for (iHit = 0; iHit < nUnique; iHit++) {
8049     besthit_t *hit = &uniqueList[iHit];
8050     if (hit->dist < 0.0)
8051       SetDistCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/hit);
8052     else
8053       SetCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/hit);
8054   }
8055   return(uniqueList);
8056 }
8057 
8058 /*
8059   Create a top hit list for the new node, either
8060   from children (if there are enough best hits left) or by a "refresh"
8061   Also set visible set for newnode
8062   Also update visible set for other nodes if we stumble across a "better" hit
8063 */
8064 
TopHitJoin(int newnode,NJ_t * NJ,int nActive,top_hits_t * tophits)8065 void TopHitJoin(int newnode,
8066 		/*IN/UPDATE*/NJ_t *NJ,
8067 		int nActive,
8068 		/*IN/OUT*/top_hits_t *tophits) {
8069   long startProfileOps = profileOps;
8070   long startOutProfileOps = outprofileOps;
8071   assert(NJ->child[newnode].nChild == 2);
8072   top_hits_list_t *lNew = &tophits->top_hits_lists[newnode];
8073   assert(lNew->hits == NULL);
8074 
8075   /* Copy the hits */
8076   int i;
8077   top_hits_list_t *lChild[2];
8078   for (i = 0; i< 2; i++) {
8079     lChild[i] = &tophits->top_hits_lists[NJ->child[newnode].child[i]];
8080     assert(lChild[i]->hits != NULL && lChild[i]->nHits > 0);
8081   }
8082   int nCombined = lChild[0]->nHits + lChild[1]->nHits;
8083   besthit_t *combinedList = (besthit_t*)mymalloc(sizeof(besthit_t)*nCombined);
8084   HitsToBestHits(lChild[0]->hits, lChild[0]->nHits, NJ->child[newnode].child[0],
8085 		 /*OUT*/combinedList);
8086   HitsToBestHits(lChild[1]->hits, lChild[1]->nHits, NJ->child[newnode].child[1],
8087 		 /*OUT*/combinedList + lChild[0]->nHits);
8088   int nUnique;
8089   /* UniqueBestHits() replaces children (used in the calls to HitsToBestHits)
8090      with active ancestors, so all distances & criteria will be recomputed */
8091   besthit_t *uniqueList = UniqueBestHits(/*IN/UPDATE*/NJ, nActive,
8092 					 /*IN/SORT*/combinedList,
8093 					 nCombined,
8094 					 /*OUT*/&nUnique);
8095   int nUniqueAlloc = nCombined;
8096   combinedList = myfree(combinedList, sizeof(besthit_t)*nCombined);
8097 
8098   /* Forget the top-hit lists of the joined nodes */
8099   for (i = 0; i < 2; i++) {
8100     lChild[i]->hits = myfree(lChild[i]->hits, sizeof(hit_t) * lChild[i]->nHits);
8101     lChild[i]->nHits = 0;
8102   }
8103 
8104   /* Use the average age, rounded up, by 1 Versions 2.0 and earlier
8105      used the maximum age, which leads to more refreshes without
8106      improving the accuracy of the NJ phase. Intuitively, if one of
8107      them was just refreshed then another refresh is unlikely to help.
8108    */
8109   lNew->age = (lChild[0]->age+lChild[1]->age+1)/2 + 1;
8110 
8111   /* If top hit ages always match (perfectly balanced), then a
8112      limit of log2(m) would mean a refresh after
8113      m joins, which is about what we want.
8114   */
8115   int tophitAgeLimit = MAX(1, (int)(0.5 + log((double)tophits->m)/log(2.0)));
8116 
8117   /* Either use the merged list as candidate top hits, or
8118      move from 2nd level to 1st level, or do a refresh
8119      UniqueBestHits eliminates hits to self, so if nUnique==nActive-1,
8120      we've already done the exhaustive search.
8121 
8122      Either way, we set tophits, visible(newnode), update visible of its top hits,
8123      and modify topvisible: if we do a refresh, then we reset it, otherwise we update
8124   */
8125   bool bSecondLevel = lChild[0]->hitSource >= 0 && lChild[1]->hitSource >= 0;
8126   bool bUseUnique = nUnique==nActive-1
8127     || (lNew->age <= tophitAgeLimit
8128 	&& nUnique >= (bSecondLevel ? (int)(0.5 + tophits2Refresh * tophits->q)
8129 		       : (int)(0.5 + tophits->m * tophitsRefresh) ));
8130   if (bUseUnique && verbose > 2)
8131     fprintf(stderr,"Top hits for %d from combined %d nActive=%d tophitsage %d %s\n",
8132 	    newnode,nUnique,nActive,lNew->age,
8133 	    bSecondLevel ? "2ndlevel" : "1stlevel");
8134 
8135   if (!bUseUnique
8136       && bSecondLevel
8137       && lNew->age <= tophitAgeLimit) {
8138     int source = ActiveAncestor(NJ, lChild[0]->hitSource);
8139     if (source == newnode)
8140       source = ActiveAncestor(NJ, lChild[1]->hitSource);
8141     /* In parallel mode, it is possible that we would select a node as the
8142        hit-source and then over-write that top hit with a short list.
8143        So we need this sanity check.
8144     */
8145     if (source != newnode
8146 	&& source >= 0
8147 	&& tophits->top_hits_lists[source].hitSource < 0) {
8148 
8149       /* switch from 2nd-level to 1st-level top hits -- compute top hits list
8150 	 of node from what we have so far plus the active source plus its top hits */
8151       top_hits_list_t *lSource = &tophits->top_hits_lists[source];
8152       assert(lSource->hitSource < 0);
8153       assert(lSource->nHits > 0);
8154       int nMerge = 1 + lSource->nHits + nUnique;
8155       besthit_t *mergeList = mymalloc(sizeof(besthit_t) * nMerge);
8156       memcpy(/*to*/mergeList, /*from*/uniqueList, nUnique * sizeof(besthit_t));
8157 
8158       int iMerge = nUnique;
8159       mergeList[iMerge].i = newnode;
8160       mergeList[iMerge].j = source;
8161       SetDistCriterion(NJ, nActive, /*IN/OUT*/&mergeList[iMerge]);
8162       iMerge++;
8163       HitsToBestHits(lSource->hits, lSource->nHits, newnode, /*OUT*/mergeList+iMerge);
8164       for (i = 0; i < lSource->nHits; i++) {
8165 	SetDistCriterion(NJ, nActive, /*IN/OUT*/&mergeList[iMerge]);
8166 	iMerge++;
8167       }
8168       assert(iMerge == nMerge);
8169 
8170       uniqueList = myfree(uniqueList, nUniqueAlloc * sizeof(besthit_t));
8171       uniqueList = UniqueBestHits(/*IN/UPDATE*/NJ, nActive,
8172 				  /*IN/SORT*/mergeList,
8173 				  nMerge,
8174 				  /*OUT*/&nUnique);
8175       nUniqueAlloc = nMerge;
8176       mergeList = myfree(mergeList, sizeof(besthit_t)*nMerge);
8177 
8178       assert(nUnique > 0);
8179       bUseUnique = nUnique >= (int)(0.5 + tophits->m * tophitsRefresh);
8180       bSecondLevel = false;
8181 
8182       if (bUseUnique && verbose > 2)
8183 	fprintf(stderr, "Top hits for %d from children and source %d's %d hits, nUnique %d\n",
8184 		newnode, source, lSource->nHits, nUnique);
8185     }
8186   }
8187 
8188   if (bUseUnique) {
8189     if (bSecondLevel) {
8190       /* pick arbitrarily */
8191       lNew->hitSource = lChild[0]->hitSource;
8192     }
8193     int nSave = MIN(nUnique, bSecondLevel ? tophits->q : tophits->m);
8194     assert(nSave>0);
8195     if (verbose > 2)
8196       fprintf(stderr, "Combined %d ops so far %ld\n", nUnique, profileOps - startProfileOps);
8197     SortSaveBestHits(newnode, /*IN/SORT*/uniqueList, /*nIn*/nUnique,
8198 		     /*nOut*/nSave, /*IN/OUT*/tophits);
8199     assert(lNew->hits != NULL); /* set by sort/save */
8200     tophits->visible[newnode] = lNew->hits[0];
8201     UpdateTopVisible(/*IN*/NJ, nActive, newnode, &tophits->visible[newnode],
8202 		     /*IN/OUT*/tophits);
8203     UpdateVisible(/*IN/UPDATE*/NJ, nActive, /*IN*/uniqueList, nSave, /*IN/OUT*/tophits);
8204   } else {
8205     /* need to refresh: set top hits for node and for its top hits */
8206     if(verbose > 2) fprintf(stderr,"Top hits for %d by refresh (%d unique age %d) nActive=%d\n",
8207 			  newnode,nUnique,lNew->age,nActive);
8208     nRefreshTopHits++;
8209     lNew->age = 0;
8210 
8211     int iNode;
8212     /* ensure all out-distances are up to date ahead of time
8213        to avoid any data overwriting issues.
8214     */
8215 #ifdef OPENMP
8216     #pragma omp parallel for schedule(dynamic, 50)
8217 #endif
8218     for (iNode = 0; iNode < NJ->maxnode; iNode++) {
8219       if (NJ->parent[iNode] < 0) {
8220 	if (fastest) {
8221 	  besthit_t bh;
8222 	  bh.i = iNode;
8223 	  bh.j = iNode;
8224 	  bh.dist = 0;
8225 	  SetCriterion(/*IN/UPDATE*/NJ, nActive, &bh);
8226 	} else {
8227 	  SetOutDistance(/*IN/UDPATE*/NJ, iNode, nActive);
8228 	}
8229       }
8230     }
8231 
8232     /* exhaustively get the best 2*m hits for newnode, set visible, and save the top m */
8233     besthit_t *allhits = (besthit_t*)mymalloc(sizeof(besthit_t)*NJ->maxnode);
8234     assert(2 * tophits->m <= NJ->maxnode);
8235     besthit_t bh;
8236     SetBestHit(newnode, NJ, nActive, /*OUT*/&bh, /*OUT*/allhits);
8237     qsort(/*IN/OUT*/allhits, NJ->maxnode, sizeof(besthit_t), CompareHitsByCriterion);
8238     SortSaveBestHits(newnode, /*IN/SORT*/allhits, /*nIn*/NJ->maxnode,
8239 		     /*nOut*/tophits->m, /*IN/OUT*/tophits);
8240 
8241     /* Do not need to call UpdateVisible because we set visible below */
8242 
8243     /* And use the top 2*m entries to expand other best-hit lists, but only for top m */
8244     int iHit;
8245 #ifdef OPENMP
8246     #pragma omp parallel for schedule(dynamic, 50)
8247 #endif
8248     for (iHit=0; iHit < tophits->m; iHit++) {
8249       if (allhits[iHit].i < 0) continue;
8250       int iNode = allhits[iHit].j;
8251       assert(iNode>=0);
8252       if (NJ->parent[iNode] >= 0) continue;
8253       top_hits_list_t *l = &tophits->top_hits_lists[iNode];
8254       int nHitsOld = l->nHits;
8255       assert(nHitsOld <= tophits->m);
8256       l->age = 0;
8257 
8258       /* Merge: old hits into 0->nHitsOld and hits from iNode above that */
8259       besthit_t *bothList = (besthit_t*)mymalloc(sizeof(besthit_t) * 3 * tophits->m);
8260       HitsToBestHits(/*IN*/l->hits, nHitsOld, iNode, /*OUT*/bothList); /* does not compute criterion */
8261       for (i = 0; i < nHitsOld; i++)
8262 	SetCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/&bothList[i]);
8263       if (nActive <= 2 * tophits->m)
8264 	l->hitSource = -1;	/* abandon the 2nd-level top-hits heuristic */
8265       int nNewHits = l->hitSource >= 0 ? tophits->q : tophits->m;
8266       assert(nNewHits > 0);
8267 
8268       TransferBestHits(/*IN/UPDATE*/NJ, nActive, iNode,
8269 		       /*IN*/allhits, /*nOldHits*/2 * nNewHits,
8270 		       /*OUT*/&bothList[nHitsOld],
8271 		       /*updateDist*/false); /* rely on UniqueBestHits to update dist and/or criterion */
8272       int nUnique2;
8273       besthit_t *uniqueList2 = UniqueBestHits(/*IN/UPDATE*/NJ, nActive,
8274 					      /*IN/SORT*/bothList, nHitsOld + 2 * nNewHits,
8275 					      /*OUT*/&nUnique2);
8276       assert(nUnique2 > 0);
8277       bothList = myfree(bothList,3 * tophits->m * sizeof(besthit_t));
8278 
8279       /* Note this will overwrite l, but we saved nHitsOld */
8280       SortSaveBestHits(iNode, /*IN/SORT*/uniqueList2, /*nIn*/nUnique2,
8281 		       /*nOut*/nNewHits, /*IN/OUT*/tophits);
8282       /* will update topvisible below */
8283       tophits->visible[iNode] = tophits->top_hits_lists[iNode].hits[0];
8284       uniqueList2 = myfree(uniqueList2, (nHitsOld + 2 * tophits->m) * sizeof(besthit_t));
8285     }
8286 
8287     ResetTopVisible(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits); /* outside of the parallel phase */
8288     allhits = myfree(allhits,sizeof(besthit_t)*NJ->maxnode);
8289   }
8290   uniqueList = myfree(uniqueList, nUniqueAlloc * sizeof(besthit_t));
8291   if (verbose > 2) {
8292     fprintf(stderr, "New top-hit list for %d profile-ops %ld (out-ops %ld): source %d age %d members ",
8293 	    newnode,
8294 	    profileOps - startProfileOps,
8295 	    outprofileOps - startOutProfileOps,
8296 	    lNew->hitSource, lNew->age);
8297 
8298     int i;
8299     for (i = 0; i < lNew->nHits; i++)
8300       fprintf(stderr, " %d", lNew->hits[i].j);
8301     fprintf(stderr,"\n");
8302   }
8303 }
8304 
UpdateVisible(NJ_t * NJ,int nActive,besthit_t * tophitsNode,int nTopHits,top_hits_t * tophits)8305 void UpdateVisible(/*IN/UPDATE*/NJ_t *NJ, int nActive,
8306 		   /*IN*/besthit_t *tophitsNode,
8307 		   int nTopHits,
8308 		  /*IN/OUT*/top_hits_t *tophits) {
8309   int iHit;
8310 
8311   for(iHit = 0; iHit < nTopHits; iHit++) {
8312     besthit_t *hit = &tophitsNode[iHit];
8313     if (hit->i < 0) continue;	/* possible empty entries */
8314     assert(NJ->parent[hit->i] < 0);
8315     assert(hit->j >= 0 && NJ->parent[hit->j] < 0);
8316     besthit_t visible;
8317     bool bSuccess = GetVisible(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits, hit->j, /*OUT*/&visible);
8318     if (!bSuccess || hit->criterion < visible.criterion) {
8319       if (bSuccess)
8320 	nVisibleUpdate++;
8321       hit_t *v = &tophits->visible[hit->j];
8322       v->j = hit->i;
8323       v->dist = hit->dist;
8324       UpdateTopVisible(NJ, nActive, hit->j, v, /*IN/OUT*/tophits);
8325       if(verbose>5) fprintf(stderr,"NewVisible %d %d %f\n",
8326 			    hit->j,v->j,v->dist);
8327     }
8328   } /* end loop over hits */
8329 }
8330 
8331 /* Update the top-visible list to perhaps include visible[iNode] */
UpdateTopVisible(NJ_t * NJ,int nActive,int iIn,hit_t * hit,top_hits_t * tophits)8332 void UpdateTopVisible(/*IN*/NJ_t * NJ, int nActive,
8333 		      int iIn, /*IN*/hit_t *hit,
8334 		      /*IN/OUT*/top_hits_t *tophits) {
8335   assert(tophits != NULL);
8336   bool bIn = false; 		/* placed in the list */
8337   int i;
8338 
8339   /* First, if the list is not full, put it in somewhere */
8340   for (i = 0; i < tophits->nTopVisible && !bIn; i++) {
8341     int iNode = tophits->topvisible[i];
8342     if (iNode == iIn) {
8343       /* this node is already in the top hit list */
8344       bIn = true;
8345     } else if (iNode < 0 || NJ->parent[iNode] >= 0) {
8346       /* found an empty spot */
8347       bIn = true;
8348       tophits->topvisible[i] = iIn;
8349     }
8350   }
8351 
8352   int iPosWorst = -1;
8353   double dCriterionWorst = -1e20;
8354   if (!bIn) {
8355     /* Search for the worst hit */
8356     for (i = 0; i < tophits->nTopVisible && !bIn; i++) {
8357       int iNode = tophits->topvisible[i];
8358       assert(iNode >= 0 && NJ->parent[iNode] < 0 && iNode != iIn);
8359       besthit_t visible;
8360       if (!GetVisible(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits, iNode, /*OUT*/&visible)) {
8361 	/* found an empty spot */
8362 	tophits->topvisible[i] = iIn;
8363 	bIn = true;
8364       } else if (visible.i == hit->j && visible.j == iIn) {
8365 	/* the reverse hit is already in the top hit list */
8366 	bIn = true;
8367       } else if (visible.criterion >= dCriterionWorst) {
8368 	iPosWorst = i;
8369 	dCriterionWorst = visible.criterion;
8370       }
8371     }
8372   }
8373 
8374   if (!bIn && iPosWorst >= 0) {
8375     besthit_t visible = HitToBestHit(iIn, *hit);
8376     SetCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/&visible);
8377     if (visible.criterion < dCriterionWorst) {
8378       if (verbose > 2) {
8379 	int iOld = tophits->topvisible[iPosWorst];
8380 	fprintf(stderr, "TopVisible replace %d=>%d with %d=>%d\n",
8381 		iOld, tophits->visible[iOld].j, visible.i, visible.j);
8382       }
8383       tophits->topvisible[iPosWorst] = iIn;
8384     }
8385   }
8386 
8387   if (verbose > 2) {
8388     fprintf(stderr, "Updated TopVisible: ");
8389     for (i = 0; i < tophits->nTopVisible; i++) {
8390       int iNode = tophits->topvisible[i];
8391       if (iNode >= 0 && NJ->parent[iNode] < 0) {
8392 	besthit_t bh = HitToBestHit(iNode, tophits->visible[iNode]);
8393 	SetDistCriterion(NJ, nActive, &bh);
8394 	fprintf(stderr, " %d=>%d:%.4f", bh.i, bh.j, bh.criterion);
8395       }
8396     }
8397     fprintf(stderr,"\n");
8398   }
8399 }
8400 
8401 /* Recompute the topvisible list */
ResetTopVisible(NJ_t * NJ,int nActive,top_hits_t * tophits)8402 void ResetTopVisible(/*IN/UPDATE*/NJ_t *NJ,
8403 		     int nActive,
8404 		     /*IN/OUT*/top_hits_t *tophits) {
8405   besthit_t *visibleSorted = mymalloc(sizeof(besthit_t)*nActive);
8406   int nVisible = 0;		/* #entries in visibleSorted */
8407   int iNode;
8408   for (iNode = 0; iNode < NJ->maxnode; iNode++) {
8409     /* skip joins involving stale nodes */
8410     if (NJ->parent[iNode] >= 0)
8411       continue;
8412     besthit_t v;
8413     if (GetVisible(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits, iNode, /*OUT*/&v)) {
8414       assert(nVisible < nActive);
8415       visibleSorted[nVisible++] = v;
8416     }
8417   }
8418   assert(nVisible > 0);
8419 
8420   qsort(/*IN/OUT*/visibleSorted,nVisible,sizeof(besthit_t),CompareHitsByCriterion);
8421 
8422   /* Only keep the top m items, and try to avoid duplicating i->j with j->i
8423      Note that visible(i) -> j does not necessarily imply visible(j) -> i,
8424      so we store what the pairing was (or -1 for not used yet)
8425    */
8426   int *inTopVisible = malloc(sizeof(int) * NJ->maxnodes);
8427   int i;
8428   for (i = 0; i < NJ->maxnodes; i++)
8429     inTopVisible[i] = -1;
8430 
8431   if (verbose > 2)
8432     fprintf(stderr, "top-hit search: nActive %d nVisible %d considering up to %d items\n",
8433 	    nActive, nVisible, tophits->m);
8434 
8435   /* save the sorted indices in topvisible */
8436   int iSave = 0;
8437   for (i = 0; i < nVisible && iSave < tophits->nTopVisible; i++) {
8438     besthit_t *v = &visibleSorted[i];
8439     if (inTopVisible[v->i] != v->j) { /* not seen already */
8440       tophits->topvisible[iSave++] = v->i;
8441       inTopVisible[v->i] = v->j;
8442       inTopVisible[v->j] = v->i;
8443     }
8444   }
8445   while(iSave < tophits->nTopVisible)
8446     tophits->topvisible[iSave++] = -1;
8447   myfree(visibleSorted, sizeof(besthit_t)*nActive);
8448   myfree(inTopVisible, sizeof(int) * NJ->maxnodes);
8449   tophits->topvisibleAge = 0;
8450   if (verbose > 2) {
8451     fprintf(stderr, "Reset TopVisible: ");
8452     for (i = 0; i < tophits->nTopVisible; i++) {
8453       int iNode = tophits->topvisible[i];
8454       if (iNode < 0)
8455 	break;
8456       fprintf(stderr, " %d=>%d", iNode, tophits->visible[iNode].j);
8457     }
8458     fprintf(stderr,"\n");
8459   }
8460 }
8461 
8462 /*
8463   Find best hit to do in O(N*log(N) + m*L*log(N)) time, by
8464   copying and sorting the visible list
8465   updating out-distances for the top (up to m) candidates
8466   selecting the best hit
8467   if !fastest then
8468   	local hill-climbing for a better join,
8469 	using best-hit lists only, and updating
8470 	all out-distances in every best-hit list
8471 */
TopHitNJSearch(NJ_t * NJ,int nActive,top_hits_t * tophits,besthit_t * join)8472 void TopHitNJSearch(/*IN/UPDATE*/NJ_t *NJ, int nActive,
8473 		    /*IN/OUT*/top_hits_t *tophits,
8474 		    /*OUT*/besthit_t *join) {
8475   /* first, do we have at least m/2 candidates in topvisible?
8476      And remember the best one */
8477   int nCandidate = 0;
8478   int iNodeBestCandidate = -1;
8479   double dBestCriterion = 1e20;
8480 
8481   int i;
8482   for (i = 0; i < tophits->nTopVisible; i++) {
8483     int iNode = tophits->topvisible[i];
8484     besthit_t visible;
8485     if (GetVisible(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits, iNode, /*OUT*/&visible)) {
8486       nCandidate++;
8487       if (iNodeBestCandidate < 0 || visible.criterion < dBestCriterion) {
8488 	iNodeBestCandidate = iNode;
8489 	dBestCriterion = visible.criterion;
8490       }
8491     }
8492   }
8493 
8494   tophits->topvisibleAge++;
8495   /* Note we may have only nActive/2 joins b/c we try to store them once */
8496   if (2 * tophits->topvisibleAge > tophits->m
8497       || (3*nCandidate < tophits->nTopVisible && 3*nCandidate < nActive)) {
8498     /* recompute top visible */
8499     if (verbose > 2)
8500       fprintf(stderr, "Resetting the top-visible list at nActive=%d\n",nActive);
8501 
8502     /* If age is low, then our visible set is becoming too sparse, because we have
8503        recently recomputed the top visible subset. This is very rare but can happen
8504        with -fastest. A quick-and-dirty solution is to walk up
8505        the parents to get additional entries in top hit lists. To ensure that the
8506        visible set becomes full, pick an arbitrary node if walking up terminates at self.
8507     */
8508     if (tophits->topvisibleAge <= 2) {
8509       if (verbose > 2)
8510 	fprintf(stderr, "Expanding visible set by walking up to active nodes at nActive=%d\n", nActive);
8511       int iNode;
8512       for (iNode = 0; iNode < NJ->maxnode; iNode++) {
8513 	if (NJ->parent[iNode] >= 0)
8514 	  continue;
8515 	hit_t *v = &tophits->visible[iNode];
8516 	int newj = ActiveAncestor(NJ, v->j);
8517 	if (newj >= 0 && newj != v->j) {
8518 	  if (newj == iNode) {
8519 	    /* pick arbitrarily */
8520 	    newj = 0;
8521 	    while (NJ->parent[newj] >= 0 || newj == iNode)
8522 	      newj++;
8523 	  }
8524 	  assert(newj >= 0 && newj < NJ->maxnodes
8525 		 && newj != iNode
8526 		 && NJ->parent[newj] < 0);
8527 
8528 	  /* Set v to point to newj */
8529 	  besthit_t bh = { iNode, newj, -1e20, -1e20, -1e20 };
8530 	  SetDistCriterion(NJ, nActive, /*IN/OUT*/&bh);
8531 	  v->j = newj;
8532 	  v->dist = bh.dist;
8533 	}
8534       }
8535     }
8536     ResetTopVisible(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits);
8537     /* and recurse to try again */
8538     TopHitNJSearch(NJ, nActive, tophits, join);
8539     return;
8540   }
8541   if (verbose > 2)
8542     fprintf(stderr, "Top-visible list size %d (nActive %d m %d)\n",
8543 	    nCandidate, nActive, tophits->m);
8544   assert(iNodeBestCandidate >= 0 && NJ->parent[iNodeBestCandidate] < 0);
8545   bool bSuccess = GetVisible(NJ, nActive, tophits, iNodeBestCandidate, /*OUT*/join);
8546   assert(bSuccess);
8547   assert(join->i >= 0 && NJ->parent[join->i] < 0);
8548   assert(join->j >= 0 && NJ->parent[join->j] < 0);
8549 
8550   if(fastest)
8551     return;
8552 
8553   int changed;
8554   do {
8555     changed = 0;
8556 
8557     besthit_t bestI;
8558     GetBestFromTopHits(join->i, NJ, nActive, tophits, /*OUT*/&bestI);
8559     assert(bestI.i == join->i);
8560     if (bestI.j != join->j && bestI.criterion < join->criterion) {
8561       changed = 1;
8562       if (verbose>2)
8563 	fprintf(stderr,"BetterI\t%d\t%d\t%d\t%d\t%f\t%f\n",
8564 		join->i,join->j,bestI.i,bestI.j,
8565 		join->criterion,bestI.criterion);
8566       *join = bestI;
8567     }
8568 
8569     besthit_t bestJ;
8570     GetBestFromTopHits(join->j, NJ, nActive, tophits, /*OUT*/&bestJ);
8571     assert(bestJ.i == join->j);
8572     if (bestJ.j != join->i && bestJ.criterion < join->criterion) {
8573       changed = 1;
8574       if (verbose>2)
8575 	fprintf(stderr,"BetterJ\t%d\t%d\t%d\t%d\t%f\t%f\n",
8576 		join->i,join->j,bestJ.i,bestJ.j,
8577 		join->criterion,bestJ.criterion);
8578       *join = bestJ;
8579     }
8580     if(changed) nHillBetter++;
8581   } while(changed);
8582 }
8583 
NGaps(NJ_t * NJ,int iNode)8584 int NGaps(/*IN*/NJ_t *NJ, int iNode) {
8585   assert(iNode < NJ->nSeq);
8586   int nGaps = 0;
8587   int p;
8588   for(p=0; p<NJ->nPos; p++) {
8589     if (NJ->profiles[iNode]->codes[p] == NOCODE)
8590       nGaps++;
8591   }
8592   return(nGaps);
8593 }
8594 
CompareHitsByCriterion(const void * c1,const void * c2)8595 int CompareHitsByCriterion(const void *c1, const void *c2) {
8596   const besthit_t *hit1 = (besthit_t*)c1;
8597   const besthit_t *hit2 = (besthit_t*)c2;
8598   if (hit1->criterion < hit2->criterion) return(-1);
8599   if (hit1->criterion > hit2->criterion) return(1);
8600   return(0);
8601 }
8602 
CompareHitsByIJ(const void * c1,const void * c2)8603 int CompareHitsByIJ(const void *c1, const void *c2) {
8604   const besthit_t *hit1 = (besthit_t*)c1;
8605   const besthit_t *hit2 = (besthit_t*)c2;
8606   return hit1->i != hit2->i ? hit1->i - hit2->i : hit1->j - hit2->j;
8607 }
8608 
SortSaveBestHits(int iNode,besthit_t * besthits,int nIn,int nOut,top_hits_t * tophits)8609 void SortSaveBestHits(int iNode, /*IN/SORT*/besthit_t *besthits,
8610 		      int nIn, int nOut,
8611 		      /*IN/OUT*/top_hits_t *tophits) {
8612   assert(nIn > 0);
8613   assert(nOut > 0);
8614   top_hits_list_t *l = &tophits->top_hits_lists[iNode];
8615   /*  */
8616   qsort(/*IN/OUT*/besthits,nIn,sizeof(besthit_t),CompareHitsByCriterion);
8617 
8618   /* First count how many we will save
8619      Not sure if removing duplicates is actually necessary.
8620    */
8621   int nSave = 0;
8622   int jLast = -1;
8623   int iBest;
8624   for (iBest = 0; iBest < nIn && nSave < nOut; iBest++) {
8625     if (besthits[iBest].i < 0)
8626       continue;
8627     assert(besthits[iBest].i == iNode);
8628     int j = besthits[iBest].j;
8629     if (j != iNode && j != jLast && j >= 0) {
8630       nSave++;
8631       jLast = j;
8632     }
8633   }
8634 
8635   assert(nSave > 0);
8636 
8637 #ifdef OPENMP
8638   omp_set_lock(&tophits->locks[iNode]);
8639 #endif
8640   if (l->hits != NULL) {
8641     l->hits = myfree(l->hits, l->nHits * sizeof(hit_t));
8642     l->nHits = 0;
8643   }
8644   l->hits = mymalloc(sizeof(hit_t) * nSave);
8645   l->nHits = nSave;
8646   int iSave = 0;
8647   jLast = -1;
8648   for (iBest = 0; iBest < nIn && iSave < nSave; iBest++) {
8649     int j = besthits[iBest].j;
8650     if (j != iNode && j != jLast && j >= 0) {
8651       l->hits[iSave].j = j;
8652       l->hits[iSave].dist = besthits[iBest].dist;
8653       iSave++;
8654       jLast = j;
8655     }
8656   }
8657 #ifdef OPENMP
8658   omp_unset_lock(&tophits->locks[iNode]);
8659 #endif
8660   assert(iSave == nSave);
8661 }
8662 
TransferBestHits(NJ_t * NJ,int nActive,int iNode,besthit_t * oldhits,int nOldHits,besthit_t * newhits,bool updateDistances)8663 void TransferBestHits(/*IN/UPDATE*/NJ_t *NJ,
8664 		       int nActive,
8665 		      int iNode,
8666 		      /*IN*/besthit_t *oldhits,
8667 		      int nOldHits,
8668 		      /*OUT*/besthit_t *newhits,
8669 		      bool updateDistances) {
8670   assert(iNode >= 0);
8671   assert(NJ->parent[iNode] < 0);
8672 
8673   int iBest;
8674   for(iBest = 0; iBest < nOldHits; iBest++) {
8675     besthit_t *old = &oldhits[iBest];
8676     besthit_t *new = &newhits[iBest];
8677     new->i = iNode;
8678     new->j = ActiveAncestor(/*IN*/NJ, old->j);
8679     new->dist = old->dist;	/* may get reset below */
8680     new->weight = old->weight;
8681     new->criterion = old->criterion;
8682 
8683     if(new->j < 0 || new->j == iNode) {
8684       new->weight = 0;
8685       new->dist = -1e20;
8686       new->criterion = 1e20;
8687     } else if (new->i != old->i || new->j != old->j) {
8688       if (updateDistances)
8689 	SetDistCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/new);
8690       else {
8691 	new->dist = -1e20;
8692 	new->criterion = 1e20;
8693       }
8694     } else {
8695       if (updateDistances)
8696 	SetCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/new);
8697       else
8698 	new->criterion = 1e20;	/* leave dist alone */
8699     }
8700   }
8701 }
8702 
HitsToBestHits(hit_t * hits,int nHits,int iNode,besthit_t * newhits)8703 void HitsToBestHits(/*IN*/hit_t *hits, int nHits, int iNode, /*OUT*/besthit_t *newhits) {
8704   int i;
8705   for (i = 0; i < nHits; i++) {
8706     hit_t *hit = &hits[i];
8707     besthit_t *bh = &newhits[i];
8708     bh->i = iNode;
8709     bh->j = hit->j;
8710     bh->dist = hit->dist;
8711     bh->criterion = 1e20;
8712     bh->weight = -1;		/* not the true value -- we compute these directly when needed */
8713   }
8714 }
8715 
HitToBestHit(int i,hit_t hit)8716 besthit_t HitToBestHit(int i, hit_t hit) {
8717   besthit_t bh;
8718   bh.i = i;
8719   bh.j = hit.j;
8720   bh.dist = hit.dist;
8721   bh.criterion = 1e20;
8722   bh.weight = -1;
8723   return(bh);
8724 }
8725 
OpenMPString(void)8726 char *OpenMPString(void) {
8727 #ifdef OPENMP
8728   static char buf[100];
8729   sprintf(buf, ", OpenMP (%d threads)", omp_get_max_threads());
8730   return(buf);
8731 #else
8732   return("");
8733 #endif
8734 }
8735 
8736 /* Algorithm 26.2.17 from Abromowitz and Stegun, Handbook of Mathematical Functions
8737    Absolute accuracy of only about 1e-7, which is enough for us
8738 */
pnorm(double x)8739 double pnorm(double x)
8740 {
8741   double b1 =  0.319381530;
8742   double b2 = -0.356563782;
8743   double b3 =  1.781477937;
8744   double b4 = -1.821255978;
8745   double b5 =  1.330274429;
8746   double p  =  0.2316419;
8747   double c  =  0.39894228;
8748 
8749   if(x >= 0.0) {
8750     double t = 1.0 / ( 1.0 + p * x );
8751     return (1.0 - c * exp( -x * x / 2.0 ) * t *
8752 	    ( t *( t * ( t * ( t * b5 + b4 ) + b3 ) + b2 ) + b1 ));
8753   }
8754   /*else*/
8755   double t = 1.0 / ( 1.0 - p * x );
8756   return ( c * exp( -x * x / 2.0 ) * t *
8757 	   ( t *( t * ( t * ( t * b5 + b4 ) + b3 ) + b2 ) + b1 ));
8758 }
8759 
mymalloc(size_t sz)8760 void *mymalloc(size_t sz) {
8761   if (sz == 0) return(NULL);
8762   void *new = aligned_alloc(16, sz);
8763   if (new == NULL) {
8764     fprintf(stderr, "Out of memory\n");
8765     exit(1);
8766   }
8767   szAllAlloc += sz;
8768   mymallocUsed += sz;
8769 #ifdef TRACK_MEMORY
8770   struct mallinfo mi = mallinfo();
8771   if (mi.arena+mi.hblkhd > maxmallocHeap)
8772     maxmallocHeap = mi.arena+mi.hblkhd;
8773 #endif
8774   /* gcc malloc should always return 16-byte-aligned values... */
8775   assert(IS_ALIGNED(new));
8776   return (new);
8777 }
8778 
mymemdup(void * data,size_t sz)8779 void *mymemdup(void *data, size_t sz) {
8780   if(data==NULL) return(NULL);
8781   void *new = mymalloc(sz);
8782   memcpy(/*to*/new, /*from*/data, sz);
8783   return(new);
8784 }
8785 
myrealloc(void * data,size_t szOld,size_t szNew,bool bCopy)8786 void *myrealloc(void *data, size_t szOld, size_t szNew, bool bCopy) {
8787   if (data == NULL && szOld == 0)
8788     return(mymalloc(szNew));
8789   if (data == NULL || szOld == 0 || szNew == 0) {
8790     fprintf(stderr,"Empty myrealloc\n");
8791     exit(1);
8792   }
8793   if (szOld == szNew)
8794     return(data);
8795   void *new = NULL;
8796   if (bCopy) {
8797     /* Try to reduce memory fragmentation by allocating anew and copying
8798        Seems to help in practice */
8799     new = mymemdup(data, szNew);
8800     myfree(data, szOld);
8801   } else {
8802     new = realloc(data,szNew);
8803     if (new == NULL) {
8804       fprintf(stderr, "Out of memory\n");
8805       exit(1);
8806     }
8807     assert(IS_ALIGNED(new));
8808     szAllAlloc += (szNew-szOld);
8809     mymallocUsed += (szNew-szOld);
8810 #ifdef TRACK_MEMORY
8811     struct mallinfo mi = mallinfo();
8812     if (mi.arena+mi.hblkhd > maxmallocHeap)
8813       maxmallocHeap = mi.arena+mi.hblkhd;
8814 #endif
8815   }
8816   return(new);
8817 }
8818 
myfree(void * p,size_t sz)8819 void *myfree(void *p, size_t sz) {
8820   if(p==NULL) return(NULL);
8821   free(p);
8822   mymallocUsed -= sz;
8823   return(NULL);
8824 }
8825 
8826 /******************************************************************************/
8827 /* Minimization of a 1-dimensional function by Brent's method (Numerical Recipes)
8828  * Borrowed from Tree-Puzzle 5.1 util.c under GPL
8829  * Modified by M.N.P to pass in the accessory data for the optimization function,
8830  * to use 2x bounds around the starting guess and expand them if necessary,
8831  * and to use both a fractional and an absolute tolerance
8832  */
8833 
8834 #define ITMAX 100
8835 #define CGOLD 0.3819660
8836 #define TINY 1.0e-20
8837 #define ZEPS 1.0e-10
8838 #define SHFT(a,b,c,d) (a)=(b);(b)=(c);(c)=(d);
8839 #define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))
8840 
8841 /* Brents method in one dimension */
brent(double ax,double bx,double cx,double (* f)(double,void *),void * data,double ftol,double atol,double * foptx,double * f2optx,double fax,double fbx,double fcx)8842 double brent(double ax, double bx, double cx, double (*f)(double, void *), void *data,
8843 	     double ftol, double atol,
8844 	     double *foptx, double *f2optx, double fax, double fbx, double fcx)
8845 {
8846 	int iter;
8847 	double a,b,d=0,etemp,fu,fv,fw,fx,p,q,r,tol1,tol2,u,v,w,x,xm;
8848 	double xw,wv,vx;
8849 	double e=0.0;
8850 
8851 	a=(ax < cx ? ax : cx);
8852 	b=(ax > cx ? ax : cx);
8853 	x=bx;
8854 	fx=fbx;
8855 	if (fax < fcx) {
8856 		w=ax;
8857 		fw=fax;
8858 		v=cx;
8859 		fv=fcx;
8860 	} else {
8861 		w=cx;
8862 		fw=fcx;
8863 		v=ax;
8864 		fv=fax;
8865 	}
8866 	for (iter=1;iter<=ITMAX;iter++) {
8867 		xm=0.5*(a+b);
8868 		tol1=ftol*fabs(x);
8869 		tol2=2.0*(tol1+ZEPS);
8870 		if (fabs(x-xm) <= (tol2-0.5*(b-a))
8871 		    || fabs(a-b) < atol) {
8872 			*foptx = fx;
8873 			xw = x-w;
8874 			wv = w-v;
8875 			vx = v-x;
8876 			*f2optx = 2.0*(fv*xw + fx*wv + fw*vx)/
8877 				(v*v*xw + x*x*wv + w*w*vx);
8878 			return x;
8879 		}
8880 		if (fabs(e) > tol1) {
8881 			r=(x-w)*(fx-fv);
8882 			q=(x-v)*(fx-fw);
8883 			p=(x-v)*q-(x-w)*r;
8884 			q=2.0*(q-r);
8885 			if (q > 0.0) p = -p;
8886 			q=fabs(q);
8887 			etemp=e;
8888 			e=d;
8889 			if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x))
8890 				d=CGOLD*(e=(x >= xm ? a-x : b-x));
8891 			else {
8892 				d=p/q;
8893 				u=x+d;
8894 				if (u-a < tol2 || b-u < tol2)
8895 					d=SIGN(tol1,xm-x);
8896 			}
8897 		} else {
8898 			d=CGOLD*(e=(x >= xm ? a-x : b-x));
8899 		}
8900 		u=(fabs(d) >= tol1 ? x+d : x+SIGN(tol1,d));
8901 		fu=(*f)(u,data);
8902 		if (fu <= fx) {
8903 			if (u >= x) a=x; else b=x;
8904 			SHFT(v,w,x,u)
8905 			SHFT(fv,fw,fx,fu)
8906 		} else {
8907 			if (u < x) a=u; else b=u;
8908 			if (fu <= fw || w == x) {
8909 				v=w;
8910 				w=u;
8911 				fv=fw;
8912 				fw=fu;
8913 			} else if (fu <= fv || v == x || v == w) {
8914 				v=u;
8915 				fv=fu;
8916 			}
8917 		}
8918 	}
8919 	*foptx = fx;
8920 	xw = x-w;
8921 	wv = w-v;
8922 	vx = v-x;
8923 	*f2optx = 2.0*(fv*xw + fx*wv + fw*vx)/
8924 		(v*v*xw + x*x*wv + w*w*vx);
8925 	return x;
8926 } /* brent */
8927 #undef ITMAX
8928 #undef CGOLD
8929 #undef ZEPS
8930 #undef SHFT
8931 #undef SIGN
8932 
8933 /* one-dimensional minimization - as input a lower and an upper limit and a trial
8934   value for the minimum is needed: xmin < xguess < xmax
8935   the function and a fractional tolerance has to be specified
8936   onedimenmin returns the optimal x value and the value of the function
8937   and its second derivative at this point
8938   */
onedimenmin(double xmin,double xguess,double xmax,double (* f)(double,void *),void * data,double ftol,double atol,double * fx,double * f2x)8939 double onedimenmin(double xmin, double xguess, double xmax, double (*f)(double,void*), void *data,
8940 		   double ftol, double atol,
8941 		   /*OUT*/double *fx, /*OUT*/double *f2x)
8942 {
8943 	double optx, ax, bx, cx, fa, fb, fc;
8944 
8945 	/* first attempt to bracketize minimum */
8946 	if (xguess == xmin) {
8947 	  ax = xmin;
8948 	  bx = 2.0*xguess;
8949 	  cx = 10.0*xguess;
8950 	} else if (xguess <= 2.0 * xmin) {
8951 	  ax = xmin;
8952 	  bx = xguess;
8953 	  cx = 5.0*xguess;
8954 	} else {
8955 	  ax = 0.5*xguess;
8956 	  bx = xguess;
8957 	  cx = 2.0*xguess;
8958 	}
8959 	if (cx > xmax)
8960 	  cx = xmax;
8961 	if (bx >= cx)
8962 	  bx = 0.5*(ax+cx);
8963 	if (verbose > 4)
8964 	  fprintf(stderr, "onedimenmin lo %.4f guess %.4f hi %.4f range %.4f %.4f\n",
8965 		  ax, bx, cx, xmin, xmax);
8966 	/* ideally this range includes the true minimum, i.e.,
8967 	   fb < fa and fb < fc
8968 	   if not, we gradually expand the boundaries until it does,
8969 	   or we near the boundary of the allowed range and use that
8970 	*/
8971 	fa = (*f)(ax,data);
8972 	fb = (*f)(bx,data);
8973 	fc = (*f)(cx,data);
8974 	while(fa < fb && ax > xmin) {
8975 	  ax = (ax+xmin)/2.0;
8976 	  if (ax < 2.0*xmin)	/* give up on shrinking the region */
8977 	    ax = xmin;
8978 	  fa = (*f)(ax,data);
8979 	}
8980 	while(fc < fb && cx < xmax) {
8981 	  cx = (cx+xmax)/2.0;
8982 	  if (cx > xmax * 0.95)
8983 	    cx = xmax;
8984 	  fc = (*f)(cx,data);
8985 	}
8986 	optx = brent(ax, bx, cx, f, data, ftol, atol, fx, f2x, fa, fb, fc);
8987 
8988 	if (verbose > 4)
8989 	  fprintf(stderr, "onedimenmin reaches optimum f(%.4f) = %.4f f2x %.4f\n", optx, *fx, *f2x);
8990 	return optx; /* return optimal x */
8991 } /* onedimenmin */
8992 
8993 /* Numerical code for the gamma distribution is modified from the PhyML 3 code
8994    (GNU public license) of Stephane Guindon
8995 */
8996 
LnGamma(double alpha)8997 double LnGamma (double alpha)
8998 {
8999 /* returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places.
9000    Stirling's formula is used for the central polynomial part of the procedure.
9001    Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.
9002    Communications of the Association for Computing Machinery, 9:684
9003 */
9004    double x=alpha, f=0, z;
9005    if (x<7) {
9006       f=1;  z=x-1;
9007       while (++z<7)  f*=z;
9008       x=z;   f=-(double)log(f);
9009    }
9010    z = 1/(x*x);
9011    return  f + (x-0.5)*(double)log(x) - x + .918938533204673
9012 	  + (((-.000595238095238*z+.000793650793651)*z-.002777777777778)*z
9013 	       +.083333333333333)/x;
9014 }
9015 
IncompleteGamma(double x,double alpha,double ln_gamma_alpha)9016 double IncompleteGamma(double x, double alpha, double ln_gamma_alpha)
9017 {
9018 /* returns the incomplete gamma ratio I(x,alpha) where x is the upper
9019 	   limit of the integration and alpha is the shape parameter.
9020    returns (-1) if in error
9021    ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant.
9022    (1) series expansion     if (alpha>x || x<=1)
9023    (2) continued fraction   otherwise
9024    RATNEST FORTRAN by
9025    Bhattacharjee GP (1970) The incomplete gamma integral.  Applied Statistics,
9026    19: 285-287 (AS32)
9027 */
9028    int i;
9029    double p=alpha, g=ln_gamma_alpha;
9030    double accurate=1e-8, overflow=1e30;
9031    double factor, gin=0, rn=0, a=0,b=0,an=0,dif=0, term=0, pn[6];
9032 
9033    if (x==0) return (0);
9034    if (x<0 || p<=0) return (-1);
9035 
9036    factor=(double)exp(p*(double)log(x)-x-g);
9037    if (x>1 && x>=p) goto l30;
9038    /* (1) series expansion */
9039    gin=1;  term=1;  rn=p;
9040  l20:
9041    rn++;
9042    term*=x/rn;   gin+=term;
9043 
9044    if (term > accurate) goto l20;
9045    gin*=factor/p;
9046    goto l50;
9047  l30:
9048    /* (2) continued fraction */
9049    a=1-p;   b=a+x+1;  term=0;
9050    pn[0]=1;  pn[1]=x;  pn[2]=x+1;  pn[3]=x*b;
9051    gin=pn[2]/pn[3];
9052  l32:
9053    a++;  b+=2;  term++;   an=a*term;
9054    for (i=0; i<2; i++) pn[i+4]=b*pn[i+2]-an*pn[i];
9055    if (pn[5] == 0) goto l35;
9056    rn=pn[4]/pn[5];   dif=fabs(gin-rn);
9057    if (dif>accurate) goto l34;
9058    if (dif<=accurate*rn) goto l42;
9059  l34:
9060    gin=rn;
9061  l35:
9062    for (i=0; i<4; i++) pn[i]=pn[i+2];
9063    if (fabs(pn[4]) < overflow) goto l32;
9064    for (i=0; i<4; i++) pn[i]/=overflow;
9065    goto l32;
9066  l42:
9067    gin=1-factor*gin;
9068 
9069  l50:
9070    return (gin);
9071 }
9072 
PGamma(double x,double alpha)9073 double PGamma(double x, double alpha)
9074 {
9075   /* scale = 1/alpha */
9076   return IncompleteGamma(x*alpha,alpha,LnGamma(alpha));
9077 }
9078 
9079 /* helper function to subtract timval structures */
9080 /* Subtract the `struct timeval' values X and Y,
9081         storing the result in RESULT.
9082         Return 1 if the difference is negative, otherwise 0.  */
timeval_subtract(struct timeval * result,struct timeval * x,struct timeval * y)9083 int     timeval_subtract (struct timeval *result, struct timeval *x, struct timeval *y)
9084 {
9085   /* Perform the carry for the later subtraction by updating y. */
9086   if (x->tv_usec < y->tv_usec) {
9087     int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
9088     y->tv_usec -= 1000000 * nsec;
9089     y->tv_sec += nsec;
9090   }
9091   if (x->tv_usec - y->tv_usec > 1000000) {
9092     int nsec = (x->tv_usec - y->tv_usec) / 1000000;
9093     y->tv_usec += 1000000 * nsec;
9094     y->tv_sec -= nsec;
9095   }
9096 
9097   /* Compute the time remaining to wait.
9098      tv_usec is certainly positive. */
9099   result->tv_sec = x->tv_sec - y->tv_sec;
9100   result->tv_usec = x->tv_usec - y->tv_usec;
9101 
9102   /* Return 1 if result is negative. */
9103   return x->tv_sec < y->tv_sec;
9104 }
9105 
clockDiff(struct timeval * clock_start)9106 double clockDiff(/*IN*/struct timeval *clock_start) {
9107   struct timeval time_now, elapsed;
9108   gettimeofday(/*OUT*/&time_now,NULL);
9109   timeval_subtract(/*OUT*/&elapsed,/*IN*/&time_now,/*IN*/clock_start);
9110   return(elapsed.tv_sec + elapsed.tv_usec*1e-6);
9111 }
9112 
9113 
9114 /* The random number generator is taken from D E Knuth
9115    http://www-cs-faculty.stanford.edu/~knuth/taocp.html
9116 */
9117 
9118 /*    This program by D E Knuth is in the public domain and freely copyable.
9119  *    It is explained in Seminumerical Algorithms, 3rd edition, Section 3.6
9120  *    (or in the errata to the 2nd edition --- see
9121  *        http://www-cs-faculty.stanford.edu/~knuth/taocp.html
9122  *    in the changes to Volume 2 on pages 171 and following).              */
9123 
9124 /*    N.B. The MODIFICATIONS introduced in the 9th printing (2002) are
9125       included here; there's no backwards compatibility with the original. */
9126 
9127 /*    This version also adopts Brendan McKay's suggestion to
9128       accommodate naive users who forget to call ran_start(seed).          */
9129 
9130 /*    If you find any bugs, please report them immediately to
9131  *                 taocp@cs.stanford.edu
9132  *    (and you will be rewarded if the bug is genuine). Thanks!            */
9133 
9134 /************ see the book for explanations and caveats! *******************/
9135 /************ in particular, you need two's complement arithmetic **********/
9136 
9137 #define KK 100                     /* the long lag */
9138 #define LL  37                     /* the short lag */
9139 #define MM (1L<<30)                 /* the modulus */
9140 #define mod_diff(x,y) (((x)-(y))&(MM-1)) /* subtraction mod MM */
9141 
9142 long ran_x[KK];                    /* the generator state */
9143 
9144 #ifdef __STDC__
ran_array(long aa[],int n)9145 void ran_array(long aa[],int n)
9146 #else
9147      void ran_array(aa,n)    /* put n new random numbers in aa */
9148      long *aa;   /* destination */
9149      int n;      /* array length (must be at least KK) */
9150 #endif
9151 {
9152   register int i,j;
9153   for (j=0;j<KK;j++) aa[j]=ran_x[j];
9154   for (;j<n;j++) aa[j]=mod_diff(aa[j-KK],aa[j-LL]);
9155   for (i=0;i<LL;i++,j++) ran_x[i]=mod_diff(aa[j-KK],aa[j-LL]);
9156   for (;i<KK;i++,j++) ran_x[i]=mod_diff(aa[j-KK],ran_x[i-LL]);
9157 }
9158 
9159 /* the following routines are from exercise 3.6--15 */
9160 /* after calling ran_start, get new randoms by, e.g., "x=ran_arr_next()" */
9161 
9162 #define QUALITY 1009 /* recommended quality level for high-res use */
9163 long ran_arr_buf[QUALITY];
9164 long ran_arr_dummy=-1, ran_arr_started=-1;
9165 long *ran_arr_ptr=&ran_arr_dummy; /* the next random number, or -1 */
9166 
9167 #define TT  70   /* guaranteed separation between streams */
9168 #define is_odd(x)  ((x)&1)          /* units bit of x */
9169 
9170 #ifdef __STDC__
ran_start(long seed)9171 void ran_start(long seed)
9172 #else
9173      void ran_start(seed)    /* do this before using ran_array */
9174      long seed;            /* selector for different streams */
9175 #endif
9176 {
9177   register int t,j;
9178   long x[KK+KK-1];              /* the preparation buffer */
9179   register long ss=(seed+2)&(MM-2);
9180   for (j=0;j<KK;j++) {
9181     x[j]=ss;                      /* bootstrap the buffer */
9182     ss<<=1; if (ss>=MM) ss-=MM-2; /* cyclic shift 29 bits */
9183   }
9184   x[1]++;              /* make x[1] (and only x[1]) odd */
9185   for (ss=seed&(MM-1),t=TT-1; t; ) {
9186     for (j=KK-1;j>0;j--) x[j+j]=x[j], x[j+j-1]=0; /* "square" */
9187     for (j=KK+KK-2;j>=KK;j--)
9188       x[j-(KK-LL)]=mod_diff(x[j-(KK-LL)],x[j]),
9189 	x[j-KK]=mod_diff(x[j-KK],x[j]);
9190     if (is_odd(ss)) {              /* "multiply by z" */
9191       for (j=KK;j>0;j--)  x[j]=x[j-1];
9192       x[0]=x[KK];            /* shift the buffer cyclically */
9193       x[LL]=mod_diff(x[LL],x[KK]);
9194     }
9195     if (ss) ss>>=1; else t--;
9196   }
9197   for (j=0;j<LL;j++) ran_x[j+KK-LL]=x[j];
9198   for (;j<KK;j++) ran_x[j-LL]=x[j];
9199   for (j=0;j<10;j++) ran_array(x,KK+KK-1); /* warm things up */
9200   ran_arr_ptr=&ran_arr_started;
9201 }
9202 
9203 #define ran_arr_next() (*ran_arr_ptr>=0? *ran_arr_ptr++: ran_arr_cycle())
ran_arr_cycle()9204 long ran_arr_cycle()
9205 {
9206   if (ran_arr_ptr==&ran_arr_dummy)
9207     ran_start(314159L); /* the user forgot to initialize */
9208   ran_array(ran_arr_buf,QUALITY);
9209   ran_arr_buf[KK]=-1;
9210   ran_arr_ptr=ran_arr_buf+1;
9211   return ran_arr_buf[0];
9212 }
9213 
9214 /* end of code from Knuth */
9215 
knuth_rand()9216 double knuth_rand() {
9217   return(9.31322574615479e-10 * ran_arr_next()); /* multiply by 2**-30 */
9218 }
9219 
MakeHashtable(char ** strings,int nStrings)9220 hashstrings_t *MakeHashtable(char **strings, int nStrings) {
9221   hashstrings_t *hash = (hashstrings_t*)mymalloc(sizeof(hashstrings_t));
9222   hash->nBuckets = 8*nStrings;
9223   hash->buckets = (hashbucket_t*)mymalloc(sizeof(hashbucket_t) * hash->nBuckets);
9224   int i;
9225   for (i=0; i < hash->nBuckets; i++) {
9226     hash->buckets[i].string = NULL;
9227     hash->buckets[i].nCount = 0;
9228     hash->buckets[i].first = -1;
9229   }
9230   for (i=0; i < nStrings; i++) {
9231     hashiterator_t hi = FindMatch(hash, strings[i]);
9232     if (hash->buckets[hi].string == NULL) {
9233       /* save a unique entry */
9234       assert(hash->buckets[hi].nCount == 0);
9235       hash->buckets[hi].string = strings[i];
9236       hash->buckets[hi].nCount = 1;
9237       hash->buckets[hi].first = i;
9238     } else {
9239       /* record a duplicate entry */
9240       assert(hash->buckets[hi].string != NULL);
9241       assert(strcmp(hash->buckets[hi].string, strings[i]) == 0);
9242       assert(hash->buckets[hi].first >= 0);
9243       hash->buckets[hi].nCount++;
9244     }
9245   }
9246   return(hash);
9247 }
9248 
FreeHashtable(hashstrings_t * hash)9249 hashstrings_t *FreeHashtable(hashstrings_t* hash) {
9250   if (hash != NULL) {
9251     myfree(hash->buckets, sizeof(hashbucket_t) * hash->nBuckets);
9252     myfree(hash, sizeof(hashstrings_t));
9253   }
9254   return(NULL);
9255 }
9256 
9257 #define MAXADLER 65521
FindMatch(hashstrings_t * hash,char * string)9258 hashiterator_t FindMatch(hashstrings_t *hash, char *string) {
9259   /* Adler-32 checksum */
9260   unsigned int hashA = 1;
9261   unsigned int hashB = 0;
9262   char *p;
9263   for (p = string; *p != '\0'; p++) {
9264     hashA = ((unsigned int)*p + hashA);
9265     hashB = hashA+hashB;
9266   }
9267   hashA %= MAXADLER;
9268   hashB %= MAXADLER;
9269   hashiterator_t hi = (hashB*65536+hashA) % hash->nBuckets;
9270   while(hash->buckets[hi].string != NULL
9271 	&& strcmp(hash->buckets[hi].string, string) != 0) {
9272     hi++;
9273     if (hi >= hash->nBuckets)
9274       hi = 0;
9275   }
9276   return(hi);
9277 }
9278 
GetHashString(hashstrings_t * hash,hashiterator_t hi)9279 char *GetHashString(hashstrings_t *hash, hashiterator_t hi) {
9280   return(hash->buckets[hi].string);
9281 }
9282 
HashCount(hashstrings_t * hash,hashiterator_t hi)9283 int HashCount(hashstrings_t *hash, hashiterator_t hi) {
9284   return(hash->buckets[hi].nCount);
9285 }
9286 
HashFirst(hashstrings_t * hash,hashiterator_t hi)9287 int HashFirst(hashstrings_t *hash, hashiterator_t hi) {
9288   return(hash->buckets[hi].first);
9289 }
9290 
UniquifyAln(alignment_t * aln)9291 uniquify_t *UniquifyAln(alignment_t *aln) {
9292     int nUniqueSeq = 0;
9293     char **uniqueSeq = (char**)mymalloc(aln->nSeq * sizeof(char*)); /* iUnique -> seq */
9294     int *uniqueFirst = (int*)mymalloc(aln->nSeq * sizeof(int)); /* iUnique -> iFirst in aln */
9295     int *alnNext = (int*)mymalloc(aln->nSeq * sizeof(int)); /* i in aln -> next, or -1 */
9296     int *alnToUniq = (int*)mymalloc(aln->nSeq * sizeof(int)); /* i in aln -> iUnique; many -> -1 */
9297 
9298     int i;
9299     for (i = 0; i < aln->nSeq; i++) {
9300       uniqueSeq[i] = NULL;
9301       uniqueFirst[i] = -1;
9302       alnNext[i] = -1;
9303       alnToUniq[i] = -1;
9304     }
9305     hashstrings_t *hashseqs = MakeHashtable(aln->seqs, aln->nSeq);
9306     for (i=0; i<aln->nSeq; i++) {
9307       hashiterator_t hi = FindMatch(hashseqs,aln->seqs[i]);
9308       int first = HashFirst(hashseqs,hi);
9309       if (first == i) {
9310 	uniqueSeq[nUniqueSeq] = aln->seqs[i];
9311 	uniqueFirst[nUniqueSeq] = i;
9312 	alnToUniq[i] = nUniqueSeq;
9313 	nUniqueSeq++;
9314       } else {
9315 	int last = first;
9316 	while (alnNext[last] != -1)
9317 	  last = alnNext[last];
9318 	assert(last>=0);
9319 	alnNext[last] = i;
9320 	assert(alnToUniq[last] >= 0 && alnToUniq[last] < nUniqueSeq);
9321 	alnToUniq[i] = alnToUniq[last];
9322       }
9323     }
9324     assert(nUniqueSeq>0);
9325     hashseqs = FreeHashtable(hashseqs);
9326 
9327     uniquify_t *uniquify = (uniquify_t*)mymalloc(sizeof(uniquify_t));
9328     uniquify->nSeq = aln->nSeq;
9329     uniquify->nUnique = nUniqueSeq;
9330     uniquify->uniqueFirst = uniqueFirst;
9331     uniquify->alnNext = alnNext;
9332     uniquify->alnToUniq = alnToUniq;
9333     uniquify->uniqueSeq = uniqueSeq;
9334     return(uniquify);
9335 }
9336 
FreeUniquify(uniquify_t * unique)9337 uniquify_t *FreeUniquify(uniquify_t *unique) {
9338   if (unique != NULL) {
9339     myfree(unique->uniqueFirst, sizeof(int)*unique->nSeq);
9340     myfree(unique->alnNext, sizeof(int)*unique->nSeq);
9341     myfree(unique->alnToUniq, sizeof(int)*unique->nSeq);
9342     myfree(unique->uniqueSeq, sizeof(char*)*unique->nSeq);
9343     myfree(unique,sizeof(uniquify_t));
9344     unique = NULL;
9345   }
9346   return(unique);
9347 }
9348 
InitTraversal(NJ_t * NJ)9349 traversal_t InitTraversal(NJ_t *NJ) {
9350   traversal_t worked = (bool*)mymalloc(sizeof(bool)*NJ->maxnodes);
9351   int i;
9352   for (i=0; i<NJ->maxnodes; i++)
9353     worked[i] = false;
9354   return(worked);
9355 }
9356 
SkipTraversalInto(int node,traversal_t traversal)9357 void SkipTraversalInto(int node, /*IN/OUT*/traversal_t traversal) {
9358   traversal[node] = true;
9359 }
9360 
TraversePostorder(int node,NJ_t * NJ,traversal_t traversal,bool * pUp)9361 int TraversePostorder(int node, NJ_t *NJ, /*IN/OUT*/traversal_t traversal,
9362 		      /*OPTIONAL OUT*/bool *pUp) {
9363   if (pUp)
9364     *pUp = false;
9365   while(1) {
9366     assert(node >= 0);
9367 
9368     /* move to a child if possible */
9369     bool found = false;
9370     int iChild;
9371     for (iChild=0; iChild < NJ->child[node].nChild; iChild++) {
9372       int child = NJ->child[node].child[iChild];
9373       if (!traversal[child]) {
9374 	node = child;
9375 	found = true;
9376 	break;
9377       }
9378     }
9379     if (found)
9380       continue; /* keep moving down */
9381     if (!traversal[node]) {
9382       traversal[node] = true;
9383       return(node);
9384     }
9385     /* If we've already done this node, need to move up */
9386     if (node == NJ->root)
9387       return(-1); /* nowhere to go -- done traversing */
9388     node = NJ->parent[node];
9389     /* If we go up to someplace that was already marked as visited, this is due
9390        to a change in topology, so return it marked as "up" */
9391     if (pUp && traversal[node]) {
9392       *pUp = true;
9393       return(node);
9394     }
9395   }
9396 }
9397 
FreeTraversal(traversal_t traversal,NJ_t * NJ)9398 traversal_t FreeTraversal(traversal_t traversal, NJ_t *NJ) {
9399   myfree(traversal, sizeof(bool)*NJ->maxnodes);
9400   return(NULL);
9401 }
9402 
UpProfiles(NJ_t * NJ)9403 profile_t **UpProfiles(NJ_t *NJ) {
9404   profile_t **upProfiles = (profile_t**)mymalloc(sizeof(profile_t*)*NJ->maxnodes);
9405   int i;
9406   for (i=0; i<NJ->maxnodes; i++) upProfiles[i] = NULL;
9407   return(upProfiles);
9408 }
9409 
GetUpProfile(profile_t ** upProfiles,NJ_t * NJ,int outnode,bool useML)9410 profile_t *GetUpProfile(/*IN/OUT*/profile_t **upProfiles, NJ_t *NJ, int outnode, bool useML) {
9411   assert(outnode != NJ->root && outnode >= NJ->nSeq); /* not for root or leaves */
9412   if (upProfiles[outnode] != NULL)
9413     return(upProfiles[outnode]);
9414 
9415   int depth;
9416   int *pathToRoot = PathToRoot(NJ, outnode, /*OUT*/&depth);
9417   int i;
9418   /* depth-1 is root */
9419   for (i = depth-2; i>=0; i--) {
9420     int node = pathToRoot[i];
9421 
9422     if (upProfiles[node] == NULL) {
9423       /* Note -- SetupABCD may call GetUpProfile, but it should do it farther
9424 	 up in the path to the root
9425       */
9426       profile_t *profiles[4];
9427       int nodeABCD[4];
9428       SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, useML);
9429       if (useML) {
9430 	/* If node is a child of root, then the 4th profile is of the 2nd root-sibling of node
9431 	   Otherwise, the 4th profile is the up-profile of the parent of node, and that
9432 	   is the branch-length we need
9433 	 */
9434 	double lenC = NJ->branchlength[nodeABCD[2]];
9435 	double lenD = NJ->branchlength[nodeABCD[3]];
9436 	if (verbose > 3) {
9437 	  fprintf(stderr, "Computing UpProfile for node %d with lenC %.4f lenD %.4f pair-loglk %.3f\n",
9438 		  node, lenC, lenD,
9439 		  PairLogLk(profiles[2],profiles[3],lenC+lenD,NJ->nPos,NJ->transmat,&NJ->rates, /*site_lk*/NULL));
9440 	  PrintNJInternal(stderr, NJ, /*useLen*/true);
9441 	}
9442 	upProfiles[node] = PosteriorProfile(/*C*/profiles[2], /*D*/profiles[3],
9443 					    lenC, lenD,
9444 					    NJ->transmat, &NJ->rates, NJ->nPos, NJ->nConstraints);
9445       } else {
9446 	profile_t *profilesCDAB[4] = { profiles[2], profiles[3], profiles[0], profiles[1] };
9447 	double weight = QuartetWeight(profilesCDAB, NJ->distance_matrix, NJ->nPos);
9448 	if (verbose>3)
9449 	  fprintf(stderr, "Compute upprofile of %d from %d and parents (vs. children %d %d) with weight %.3f\n",
9450 		  node, nodeABCD[2], nodeABCD[0], nodeABCD[1], weight);
9451 	upProfiles[node] = AverageProfile(profiles[2], profiles[3],
9452 					  NJ->nPos, NJ->nConstraints,
9453 					  NJ->distance_matrix,
9454 					  weight);
9455       }
9456     }
9457   }
9458   FreePath(pathToRoot,NJ);
9459   assert(upProfiles[outnode] != NULL);
9460   return(upProfiles[outnode]);
9461 }
9462 
DeleteUpProfile(profile_t ** upProfiles,NJ_t * NJ,int node)9463 profile_t *DeleteUpProfile(/*IN/OUT*/profile_t **upProfiles, NJ_t *NJ, int node) {
9464   assert(node>=0 && node < NJ->maxnodes);
9465   if (upProfiles[node] != NULL)
9466     upProfiles[node] = FreeProfile(upProfiles[node], NJ->nPos, NJ->nConstraints); /* returns NULL */
9467   return(NULL);
9468 }
9469 
FreeUpProfiles(profile_t ** upProfiles,NJ_t * NJ)9470 profile_t **FreeUpProfiles(profile_t **upProfiles, NJ_t *NJ) {
9471   int i;
9472   int nUsed = 0;
9473   for (i=0; i < NJ->maxnodes; i++) {
9474     if (upProfiles[i] != NULL)
9475       nUsed++;
9476     DeleteUpProfile(upProfiles, NJ, i);
9477   }
9478   myfree(upProfiles, sizeof(profile_t*)*NJ->maxnodes);
9479   if (verbose >= 3)
9480     fprintf(stderr,"FreeUpProfiles -- freed %d\n", nUsed);
9481   return(NULL);
9482 }
9483 
PathToRoot(NJ_t * NJ,int node,int * outDepth)9484 int *PathToRoot(NJ_t *NJ, int node, /*OUT*/int *outDepth) {
9485   int *pathToRoot = (int*)mymalloc(sizeof(int)*NJ->maxnodes);
9486   int depth = 0;
9487   int ancestor = node;
9488   while(ancestor >= 0) {
9489     pathToRoot[depth] = ancestor;
9490     ancestor = NJ->parent[ancestor];
9491     depth++;
9492   }
9493   *outDepth = depth;
9494   return(pathToRoot);
9495 }
9496 
FreePath(int * path,NJ_t * NJ)9497 int *FreePath(int *path, NJ_t *NJ) {
9498   myfree(path, sizeof(int)*NJ->maxnodes);
9499   return(NULL);
9500 }
9501 
CreateGTR(double * r,double * f)9502 transition_matrix_t *CreateGTR(double *r/*ac ag at cg ct gt*/, double *f/*acgt*/) {
9503   double matrix[4][MAXCODES];
9504   assert(nCodes==4);
9505   int i, j;
9506   /* Place rates onto a symmetric matrix, but correct by f(target), so that
9507      stationary distribution f[] is maintained
9508      Leave diagonals as 0 (CreateTransitionMatrix will fix them)
9509   */
9510   int imat = 0;
9511   for (i = 0; i < nCodes; i++) {
9512     matrix[i][i] = 0;
9513     for (j = i+1; j < nCodes; j++) {
9514       double rate = r[imat++];
9515       assert(rate > 0);
9516       /* Want t(matrix) * f to be 0 */
9517       matrix[i][j] = rate * f[i];
9518       matrix[j][i] = rate * f[j];
9519     }
9520   }
9521   /* Compute average mutation rate */
9522   double total_rate = 0;
9523   for (i = 0; i < nCodes; i++)
9524     for (j = 0; j < nCodes; j++)
9525       total_rate += f[i] * matrix[i][j];
9526   assert(total_rate > 1e-6);
9527   double inv = 1.0/total_rate;
9528   for (i = 0; i < nCodes; i++)
9529     for (j = 0; j < nCodes; j++)
9530       matrix[i][j] *= inv;
9531   return(CreateTransitionMatrix(matrix,f));
9532 }
9533 
CreateTransitionMatrix(double matrix[MAXCODES][MAXCODES],double stat[MAXCODES])9534 transition_matrix_t *CreateTransitionMatrix(/*IN*/double matrix[MAXCODES][MAXCODES],
9535 					    /*IN*/double stat[MAXCODES]) {
9536   int i,j,k;
9537   transition_matrix_t *transmat = mymalloc(sizeof(transition_matrix_t));
9538   double sqrtstat[20];
9539   for (i = 0; i < nCodes; i++) {
9540     transmat->stat[i] = stat[i];
9541     transmat->statinv[i] = 1.0/stat[i];
9542     sqrtstat[i] = sqrt(stat[i]);
9543   }
9544 
9545   double sym[20*20];		/* symmetrized matrix M' */
9546   /* set diagonals so columns sums are 0 before symmetrization */
9547   for (i = 0; i < nCodes; i++)
9548     for (j = 0; j < nCodes; j++)
9549       sym[nCodes*i+j] = matrix[i][j];
9550   for (j = 0; j < nCodes; j++) {
9551     double sum = 0;
9552     sym[nCodes*j+j] = 0;
9553     for (i = 0; i < nCodes; i++)
9554       sum += sym[nCodes*i+j];
9555     sym[nCodes*j+j] = -sum;
9556   }
9557   /* M' = S**-1 M S */
9558   for (i = 0; i < nCodes; i++)
9559     for (j = 0; j < nCodes; j++)
9560       sym[nCodes*i+j] *= sqrtstat[j]/sqrtstat[i];
9561 
9562   /* eigen decomposition of M' -- note that eigenW is the transpose of what we want,
9563      which is eigenvectors in columns */
9564   double eigenW[20*20], eval[20], e[20];
9565   for (i = 0; i < nCodes*nCodes; i++)
9566     eigenW[i] = sym[i];
9567   tred2(eigenW, nCodes, nCodes, eval, e);
9568   tqli(eval, e, nCodes , nCodes, eigenW);
9569 
9570   /* save eigenvalues */
9571   for (i = 0; i < nCodes; i++)
9572     transmat->eigenval[i] = eval[i];
9573 
9574   /* compute eigen decomposition of M into t(codeFreq): V = S*W */
9575   /* compute inverse of V in eigeninv: V**-1 = t(W) S**-1  */
9576   for (i = 0; i < nCodes; i++) {
9577     for (j = 0; j < nCodes; j++) {
9578       transmat->eigeninv[i][j] = eigenW[nCodes*i+j] / sqrtstat[j];
9579       transmat->eigeninvT[j][i] = transmat->eigeninv[i][j];
9580     }
9581   }
9582   for (i = 0; i < nCodes; i++)
9583     for (j = 0; j < nCodes; j++)
9584       transmat->codeFreq[i][j] = eigenW[j*nCodes+i] * sqrtstat[i];
9585   /* codeFreq[NOCODE] is the rotation of (1,1,...) not (1/nCodes,1/nCodes,...), which
9586      gives correct posterior probabilities
9587   */
9588   for (j = 0; j < nCodes; j++) {
9589     transmat->codeFreq[NOCODE][j] = 0.0;
9590     for (i = 0; i < nCodes; i++)
9591       transmat->codeFreq[NOCODE][j] += transmat->codeFreq[i][j];
9592   }
9593   /* save some posterior probabilities for approximating later:
9594      first, we compute P(B | A, t) for t = approxMLnearT, by using
9595      V * exp(L*t) * V**-1 */
9596   double expvalues[MAXCODES];
9597   for (i = 0; i < nCodes; i++)
9598     expvalues[i] = exp(approxMLnearT * transmat->eigenval[i]);
9599   double LVinv[MAXCODES][MAXCODES]; /* exp(L*t) * V**-1 */
9600   for (i = 0; i < nCodes; i++) {
9601     for (j = 0; j < nCodes; j++)
9602       LVinv[i][j] = transmat->eigeninv[i][j] * expvalues[i];
9603   }
9604   /* matrix transform for converting A -> B given t: transt[i][j] = P(j->i | t) */
9605   double transt[MAXCODES][MAXCODES];
9606   for (i = 0; i < nCodes; i++) {
9607     for (j = 0; j < nCodes; j++) {
9608       transt[i][j] = 0;
9609       for (k = 0; k < nCodes; k++)
9610 	transt[i][j] += transmat->codeFreq[i][k] * LVinv[k][j];
9611     }
9612   }
9613   /* nearP[i][j] = P(parent = j | both children are i) = P(j | i,i) ~ stat(j) * P(j->i | t)**2 */
9614   for (i = 0; i < nCodes; i++) {
9615     double nearP[MAXCODES];
9616     double tot = 0;
9617     for (j = 0; j < nCodes; j++) {
9618       assert(transt[j][i] > 0);
9619       assert(transmat->stat[j] > 0);
9620       nearP[j] = transmat->stat[j] * transt[i][j] * transt[i][j];
9621       tot += nearP[j];
9622     }
9623     assert(tot > 0);
9624     for (j = 0; j < nCodes; j++)
9625       nearP[j] *= 1.0/tot;
9626     /* save nearP in transmat->nearP[i][] */
9627     for (j = 0; j < nCodes; j++)
9628       transmat->nearP[i][j] = nearP[j];
9629     /* multiply by 1/stat and rotate nearP */
9630     for (j = 0; j < nCodes; j++)
9631       nearP[j] /= transmat->stat[j];
9632     for (j = 0; j < nCodes; j++) {
9633       double rot = 0;
9634       for (k = 0; k < nCodes; k++)
9635 	rot += nearP[k] * transmat->codeFreq[i][j];
9636       transmat->nearFreq[i][j] = rot;
9637     }
9638   }
9639   return(transmat);
9640   assert(0);
9641 }
9642 
TransMatToDistanceMat(transition_matrix_t * transmat)9643 distance_matrix_t *TransMatToDistanceMat(transition_matrix_t *transmat) {
9644   if (transmat == NULL)
9645     return(NULL);
9646   distance_matrix_t *dmat = mymalloc(sizeof(distance_matrix_t));
9647   int i, j;
9648   for (i=0; i<nCodes; i++) {
9649     for (j=0; j<nCodes; j++) {
9650       dmat->distances[i][j] = 0;	/* never actually used */
9651       dmat->eigeninv[i][j] = transmat->eigeninv[i][j];
9652       dmat->codeFreq[i][j] = transmat->codeFreq[i][j];
9653     }
9654   }
9655   /* eigentot . rotated-vector is the total frequency of the unrotated vector
9656      (used to normalize in NormalizeFreq()
9657      For transition matrices, we rotate by transpose of eigenvectors, so
9658      we need to multiply by the inverse matrix by 1....1 to get this vector,
9659      or in other words, sum the columns
9660   */
9661   for(i = 0; i<nCodes; i++) {
9662       dmat->eigentot[i] = 0.0;
9663       for (j = 0; j<nCodes; j++)
9664 	dmat->eigentot[i] += transmat->eigeninv[i][j];
9665   }
9666   return(dmat);
9667 }
9668 
9669 /* Numerical recipes code for eigen decomposition (actually taken from RAxML rev_functions.c) */
tred2(double * a,const int n,const int np,double * d,double * e)9670 void tred2 (double *a, const int n, const int np, double *d, double *e)
9671 {
9672 #define a(i,j) a[(j-1)*np + (i-1)]
9673 #define e(i)   e[i-1]
9674 #define d(i)   d[i-1]
9675   int i, j, k, l;
9676   double f, g, h, hh, scale;
9677   for (i = n; i > 1; i--) {
9678     l = i-1;
9679     h = 0;
9680     scale = 0;
9681     if ( l > 1 ) {
9682       for ( k = 1; k <= l; k++ )
9683 	scale += fabs(a(i,k));
9684       if (scale == 0)
9685 	e(i) = a(i,l);
9686       else {
9687 	for (k = 1; k <= l; k++) {
9688 	  a(i,k) /= scale;
9689 	  h += a(i,k) * a(i,k);
9690 	}
9691 	f = a(i,l);
9692 	g = -sqrt(h);
9693 	if (f < 0) g = -g;
9694 	e(i) = scale *g;
9695 	h -= f*g;
9696 	a(i,l) = f-g;
9697 	f = 0;
9698 	for (j = 1; j <=l ; j++) {
9699 	  a(j,i) = a(i,j) / h;
9700 	  g = 0;
9701 	  for (k = 1; k <= j; k++)
9702 	    g += a(j,k)*a(i,k);
9703 	  for (k = j+1; k <= l; k++)
9704 	    g += a(k,j)*a(i,k);
9705 	  e(j) = g/h;
9706 	  f += e(j)*a(i,j);
9707 	}
9708 	hh = f/(h+h);
9709 	for (j = 1; j <= l; j++) {
9710 	  f = a(i,j);
9711 	  g = e(j) - hh * f;
9712 	  e(j) = g;
9713 	  for (k = 1; k <= j; k++)
9714 	    a(j,k) -= f*e(k) + g*a(i,k);
9715 	}
9716       }
9717     } else
9718       e(i) = a(i,l);
9719     d(i) = h;
9720   }
9721   d(1) = 0;
9722   e(1) = 0;
9723   for (i = 1; i <= n; i++) {
9724     l = i-1;
9725     if (d(i) != 0) {
9726       for (j = 1; j <=l; j++) {
9727 	g = 0;
9728 	for (k = 1; k <= l; k++)
9729 	  g += a(i,k)*a(k,j);
9730 	for (k=1; k <=l; k++)
9731 	  a(k,j) -= g * a(k,i);
9732       }
9733     }
9734     d(i) = a(i,i);
9735     a(i,i) = 1;
9736     for (j=1; j<=l; j++)
9737       a(i,j) = a(j,i) = 0;
9738   }
9739 
9740   return;
9741 #undef a
9742 #undef e
9743 #undef d
9744 }
9745 
pythag(double a,double b)9746 double pythag(double a, double b) {
9747   double absa = fabs(a), absb = fabs(b);
9748   return (absa > absb) ?
9749        absa * sqrt(1+ (absb/absa)*(absb/absa)) :
9750     absb == 0 ?
9751        0 :
9752        absb * sqrt(1+ (absa/absb)*(absa/absb));
9753 }
9754 
tqli(double * d,double * e,int n,int np,double * z)9755 void tqli(double *d, double *e, int n, int np, double *z)
9756 {
9757 #define z(i,j) z[(j-1)*np + (i-1)]
9758 #define e(i)   e[i-1]
9759 #define d(i)   d[i-1]
9760 
9761   int i = 0, iter = 0, k = 0, l = 0, m = 0;
9762   double b = 0, c = 0, dd = 0, f = 0, g = 0, p = 0, r = 0, s = 0;
9763 
9764   for(i=2; i<=n; i++)
9765     e(i-1) = e(i);
9766   e(n) = 0;
9767 
9768   for (l = 1; l <= n; l++)
9769     {
9770       iter = 0;
9771     labelExtra:
9772 
9773       for (m = l; (m < n); m++)
9774 	{
9775 	  dd = fabs(d(m))+fabs(d(m+1));
9776 
9777 	  if (fabs(e(m))+dd == dd)
9778 	    break;
9779 	}
9780 
9781       if (m != l)
9782 	{
9783 	  assert(iter < 30);
9784 
9785 	  iter++;
9786 	  g = (d(l+1)-d(l))/(2*e(l));
9787 	  r = pythag(g,1.);
9788 	  g = d(m)-d(l)+e(l)/(g+(g<0?-r:r));
9789 	  s = 1;
9790 	  c = 1;
9791 	  p = 0;
9792 
9793 	  for (i = m-1; i>=l; i--)
9794 	    {
9795 	      f = s*e(i);
9796 	      b = c*e(i);
9797 	      r = pythag(f,g);
9798 
9799 	      e(i+1) = r;
9800 	      if (r == 0)
9801 		{
9802 		  d (i+1) -= p;
9803 		  e (m) = 0;
9804 
9805 		  goto labelExtra;
9806 		}
9807 	      s = f/r;
9808 	      c = g/r;
9809 	      g = d(i+1)-p;
9810 	      r = (d(i)-g)*s + 2*c*b;
9811 	      p = s*r;
9812 	      d(i+1) = g + p;
9813 	      g = c*r - b;
9814 	      for (k=1; k <= n; k++)
9815 		{
9816 		  f = z(k,i+1);
9817 		  z(k,i+1) = s * z(k,i) + c*f;
9818 		  z(k,i) = c * z(k,i) - s*f;
9819 		}
9820 	    }
9821 	  d(l) -= p;
9822 	  e(l) = g;
9823 	  e(m) = 0;
9824 
9825 	  goto labelExtra;
9826 	}
9827     }
9828 
9829   return;
9830 #undef z
9831 #undef e
9832 #undef d
9833 
9834 }
9835 
9836 #ifdef USE_SSE3
mm_sum(register __m128 sum)9837 inline float mm_sum(register __m128 sum) {
9838 #if 1
9839   /* stupider but faster */
9840   float f[4] ALIGNED;
9841   _mm_store_ps(f,sum);
9842   return(f[0]+f[1]+f[2]+f[3]);
9843 #else
9844   /* first we get sum[0]+sum[1], sum[2]+sum[3] by selecting 0/1 and 2/3 */
9845   sum = _mm_add_ps(sum,_mm_shuffle_ps(sum,sum,_MM_SHUFFLE(0,1,2,3)));
9846   /* then get sum[0]+sum[1]+sum[2]+sum[3] by selecting 0/1 and 0/1 */
9847   sum = _mm_add_ps(sum,_mm_shuffle_ps(sum,sum,_MM_SHUFFLE(0,1,0,1)));
9848   float f;
9849   _mm_store_ss(&f, sum);	/* save the lowest word */
9850   return(f);
9851 #endif
9852 }
9853 #endif
9854 
vector_multiply(numeric_t * f1,numeric_t * f2,int n,numeric_t * fOut)9855 void vector_multiply(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, int n, /*OUT*/numeric_t *fOut) {
9856 #ifdef USE_SSE3
9857   int i;
9858   for (i = 0; i < n; i += 4) {
9859     __m128 a, b, c;
9860     a = _mm_load_ps(f1+i);
9861     b = _mm_load_ps(f2+i);
9862     c = _mm_mul_ps(a, b);
9863     _mm_store_ps(fOut+i,c);
9864   }
9865 #else
9866   int i;
9867   for (i = 0; i < n; i++)
9868     fOut[i] = f1[i]*f2[i];
9869 #endif
9870 }
9871 
vector_multiply_sum(numeric_t * f1,numeric_t * f2,int n)9872 numeric_t vector_multiply_sum(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, int n) {
9873 #ifdef USE_SSE3
9874   if (n == 4)
9875     return(f1[0]*f2[0]+f1[1]*f2[1]+f1[2]*f2[2]+f1[3]*f2[3]);
9876   __m128 sum = _mm_setzero_ps();
9877   int i;
9878   for (i = 0; i < n; i += 4) {
9879     __m128 a, b, c;
9880     a = _mm_load_ps(f1+i);
9881     b = _mm_load_ps(f2+i);
9882     c = _mm_mul_ps(a, b);
9883     sum = _mm_add_ps(c, sum);
9884   }
9885   return(mm_sum(sum));
9886 #else
9887   int i;
9888   numeric_t out = 0.0;
9889   for (i=0; i < n; i++)
9890     out += f1[i]*f2[i];
9891   return(out);
9892 #endif
9893 }
9894 
9895 /* sum(f1*f2*f3) */
vector_multiply3_sum(numeric_t * f1,numeric_t * f2,numeric_t * f3,int n)9896 numeric_t vector_multiply3_sum(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, /*IN*/numeric_t* f3, int n) {
9897 #ifdef USE_SSE3
9898   __m128 sum = _mm_setzero_ps();
9899   int i;
9900   for (i = 0; i < n; i += 4) {
9901     __m128 a1, a2, a3;
9902     a1 = _mm_load_ps(f1+i);
9903     a2 = _mm_load_ps(f2+i);
9904     a3 = _mm_load_ps(f3+i);
9905     sum = _mm_add_ps(_mm_mul_ps(_mm_mul_ps(a1,a2),a3),sum);
9906   }
9907   return(mm_sum(sum));
9908 #else
9909   int i;
9910   numeric_t sum = 0.0;
9911   for (i = 0; i < n; i++)
9912     sum += f1[i]*f2[i]*f3[i];
9913   return(sum);
9914 #endif
9915 }
9916 
vector_dot_product_rot(numeric_t * f1,numeric_t * f2,numeric_t * fBy,int n)9917 numeric_t vector_dot_product_rot(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, /*IN*/numeric_t *fBy, int n) {
9918 #ifdef USE_SSE3
9919   __m128 sum1 = _mm_setzero_ps();
9920   __m128 sum2 = _mm_setzero_ps();
9921   int i;
9922   for (i = 0; i < n; i += 4) {
9923     __m128 a1, a2, aBy;
9924     a1 = _mm_load_ps(f1+i);
9925     a2 = _mm_load_ps(f2+i);
9926     aBy = _mm_load_ps(fBy+i);
9927     sum1 = _mm_add_ps(_mm_mul_ps(a1, aBy), sum1);
9928     sum2 = _mm_add_ps(_mm_mul_ps(a2, aBy), sum2);
9929   }
9930   return(mm_sum(sum1)*mm_sum(sum2));
9931 #else
9932   int i;
9933   numeric_t out1 = 0.0;
9934   numeric_t out2 = 0.0;
9935   for (i=0; i < n; i++) {
9936     out1 += f1[i]*fBy[i];
9937     out2 += f2[i]*fBy[i];
9938   }
9939   return(out1*out2);
9940 #endif
9941 }
9942 
vector_sum(numeric_t * f1,int n)9943 numeric_t vector_sum(/*IN*/numeric_t *f1, int n) {
9944 #ifdef USE_SSE3
9945   if (n==4)
9946     return(f1[0]+f1[1]+f1[2]+f1[3]);
9947   __m128 sum = _mm_setzero_ps();
9948   int i;
9949   for (i = 0; i < n; i+=4) {
9950     __m128 a;
9951     a = _mm_load_ps(f1+i);
9952     sum = _mm_add_ps(a, sum);
9953   }
9954   return(mm_sum(sum));
9955 #else
9956   numeric_t out = 0.0;
9957   int i;
9958   for (i = 0; i < n; i++)
9959     out += f1[i];
9960   return(out);
9961 #endif
9962 }
9963 
vector_multiply_by(numeric_t * f,numeric_t fBy,int n)9964 void vector_multiply_by(/*IN/OUT*/numeric_t *f, /*IN*/numeric_t fBy, int n) {
9965   int i;
9966 #ifdef USE_SSE3
9967   __m128 c = _mm_set1_ps(fBy);
9968   for (i = 0; i < n; i += 4) {
9969     __m128 a, b;
9970     a = _mm_load_ps(f+i);
9971     b = _mm_mul_ps(a,c);
9972     _mm_store_ps(f+i,b);
9973   }
9974 #else
9975   for (i = 0; i < n; i++)
9976     f[i] *= fBy;
9977 #endif
9978 }
9979 
vector_add_mult(numeric_t * fTot,numeric_t * fAdd,numeric_t weight,int n)9980 void vector_add_mult(/*IN/OUT*/numeric_t *fTot, /*IN*/numeric_t *fAdd, numeric_t weight, int n) {
9981 #ifdef USE_SSE3
9982   int i;
9983   __m128 w = _mm_set1_ps(weight);
9984   for (i = 0; i < n; i += 4) {
9985     __m128 tot, add;
9986     tot = _mm_load_ps(fTot+i);
9987     add = _mm_load_ps(fAdd+i);
9988     _mm_store_ps(fTot+i, _mm_add_ps(tot, _mm_mul_ps(add,w)));
9989   }
9990 #else
9991   int i;
9992   for (i = 0; i < n; i++)
9993     fTot[i] += fAdd[i] * weight;
9994 #endif
9995 }
9996 
matrixt_by_vector4(numeric_t mat[4][MAXCODES],numeric_t vec[4],numeric_t out[4])9997 void matrixt_by_vector4(/*IN*/numeric_t mat[4][MAXCODES], /*IN*/numeric_t vec[4], /*OUT*/numeric_t out[4]) {
9998 #ifdef USE_SSE3
9999   /*__m128 v = _mm_load_ps(vec);*/
10000   __m128 o = _mm_setzero_ps();
10001   int j;
10002   /* result is a sum of vectors: sum(k) v[k] * mat[k][] */
10003   for (j = 0; j < 4; j++) {
10004     __m128 m = _mm_load_ps(&mat[j][0]);
10005     __m128 vj = _mm_load1_ps(&vec[j]);	/* is it faster to shuffle v? */
10006     o = _mm_add_ps(o, _mm_mul_ps(vj,m));
10007   }
10008   _mm_store_ps(out, o);
10009 #else
10010   int j,k;
10011   for (j = 0; j < 4; j++) {
10012     double sum = 0;
10013     for (k = 0; k < 4; k++)
10014       sum += vec[k] * mat[k][j];
10015     out[j] = sum;
10016   }
10017 #endif
10018 }
10019 
10020 distance_matrix_t matrixBLOSUM45 =
10021   {
10022     /*distances*/
10023     {
10024       {0, 1.31097856157468, 1.06573001937323, 1.2682782988532, 0.90471293383305, 1.05855446876905, 1.05232790675508, 0.769574440593014, 1.27579668305679, 0.964604099952603, 0.987178199640556, 1.05007594438157, 1.05464162250736, 1.1985987403937, 0.967404475245526, 0.700490199584332, 0.880060189098976, 1.09748548316685, 1.28141710375267, 0.800038509951648},
10025       {1.31097856157468, 0, 0.8010890222701, 0.953340718498495, 1.36011107208122, 0.631543775840481, 0.791014908659279, 1.15694899265629, 0.761152570032029, 1.45014917711188, 1.17792001455227, 0.394661075648738, 0.998807558909651, 1.135143404599, 1.15432562628921, 1.05309036790541, 1.05010474413616, 1.03938321130789, 0.963216908696184, 1.20274751778601},
10026       {1.06573001937323, 0.8010890222701, 0, 0.488217214273568, 1.10567116937273, 0.814970207038261, 0.810176440932339, 0.746487413974582, 0.61876156253224, 1.17886558630004, 1.52003670190022, 0.808442678243754, 1.2889025816028, 1.16264109995678, 1.18228799147301, 0.679475681649858, 0.853658619686283, 1.68988558988005, 1.24297493464833, 1.55207513886163},
10027       {1.2682782988532, 0.953340718498495, 0.488217214273568, 0, 1.31581050011876, 0.769778474953791, 0.482077627352988, 0.888361752320536, 0.736360849050364, 1.76756333403346, 1.43574761894039, 0.763612910719347, 1.53386612356483, 1.74323672079854, 0.886347403928663, 0.808614044804528, 1.01590147813779, 1.59617804551619, 1.1740494822217, 1.46600946033173},
10028       {0.90471293383305, 1.36011107208122, 1.10567116937273, 1.31581050011876, 0, 1.3836789310481, 1.37553994252576, 1.26740695314856, 1.32361065635259, 1.26087264215993, 1.02417540515351, 1.37259631233791, 1.09416720447891, 0.986982088723923, 1.59321190226694, 0.915638787768407, 0.913042853922533, 1.80744143643002, 1.3294417177004, 0.830022143283238},
10029       {1.05855446876905, 0.631543775840481, 0.814970207038261, 0.769778474953791, 1.3836789310481, 0, 0.506942797642807, 1.17699648087288, 0.614595446514896, 1.17092829494457, 1.19833088638994, 0.637341078675405, 0.806490842729072, 1.83315144709714, 0.932064479113502, 0.850321696813199, 1.06830084665916, 1.05739353225849, 0.979907428113788, 1.5416250309563},
10030       {1.05232790675508, 0.791014908659279, 0.810176440932339, 0.482077627352988, 1.37553994252576, 0.506942797642807, 0, 1.17007322676118, 0.769786956320484, 1.46659942462342, 1.19128214039009, 0.633592151371708, 1.27269395724349, 1.44641491621774, 0.735428579892476, 0.845319988414402, 1.06201695511881, 1.324395996498, 1.22734387448031, 1.53255698189437},
10031       {0.769574440593014, 1.15694899265629, 0.746487413974582, 0.888361752320536, 1.26740695314856, 1.17699648087288, 1.17007322676118, 0, 1.1259007054424, 1.7025415585924, 1.38293205218175, 1.16756929156758, 1.17264582493965, 1.33271035269688, 1.07564768421292, 0.778868281341681, 1.23287107008366, 0.968539655354582, 1.42479529031801, 1.41208067821187},
10032       {1.27579668305679, 0.761152570032029, 0.61876156253224, 0.736360849050364, 1.32361065635259, 0.614595446514896, 0.769786956320484, 1.1259007054424, 0, 1.4112324673522, 1.14630894167097, 0.967795284542623, 0.771479459384692, 1.10468029976148, 1.12334774065132, 1.02482926701639, 1.28754326478771, 1.27439749294131, 0.468683841672724, 1.47469999960758},
10033       {0.964604099952603, 1.45014917711188, 1.17886558630004, 1.76756333403346, 1.26087264215993, 1.17092829494457, 1.46659942462342, 1.7025415585924, 1.4112324673522, 0, 0.433350517223017, 1.463460928818, 0.462965544381851, 0.66291968000662, 1.07010201755441, 1.23000200130049, 0.973485453109068, 0.963546200571036, 0.708724769805536, 0.351200119909572},
10034       {0.987178199640556, 1.17792001455227, 1.52003670190022, 1.43574761894039, 1.02417540515351, 1.19833088638994, 1.19128214039009, 1.38293205218175, 1.14630894167097, 0.433350517223017, 0, 1.49770950074319, 0.473800072611076, 0.538473125003292, 1.37979627224964, 1.5859723170438, 0.996267398224516, 0.986095542821092, 0.725310666139274, 0.570542199221932},
10035       {1.05007594438157, 0.394661075648738, 0.808442678243754, 0.763612910719347, 1.37259631233791, 0.637341078675405, 0.633592151371708, 1.16756929156758, 0.967795284542623, 1.463460928818, 1.49770950074319, 0, 1.0079761868248, 1.44331961488922, 0.924599080166146, 1.06275728888356, 1.05974425835993, 1.04892430642749, 0.972058829603409, 1.21378822764856},
10036       {1.05464162250736, 0.998807558909651, 1.2889025816028, 1.53386612356483, 1.09416720447891, 0.806490842729072, 1.27269395724349, 1.17264582493965, 0.771479459384692, 0.462965544381851, 0.473800072611076, 1.0079761868248, 0, 0.72479754849538, 1.1699868662153, 1.34481214251794, 1.06435197383538, 1.05348497728858, 0.774878150710318, 0.609532859331199},
10037       {1.1985987403937, 1.135143404599, 1.16264109995678, 1.74323672079854, 0.986982088723923, 1.83315144709714, 1.44641491621774, 1.33271035269688, 1.10468029976148, 0.66291968000662, 0.538473125003292, 1.44331961488922, 0.72479754849538, 0, 1.32968844979665, 1.21307373491949, 0.960087571600877, 0.475142555482979, 0.349485367759138, 0.692733248746636},
10038       {0.967404475245526, 1.15432562628921, 1.18228799147301, 0.886347403928663, 1.59321190226694, 0.932064479113502, 0.735428579892476, 1.07564768421292, 1.12334774065132, 1.07010201755441, 1.37979627224964, 0.924599080166146, 1.1699868662153, 1.32968844979665, 0, 0.979087429691819, 0.97631161216338, 1.21751652292503, 1.42156458605332, 1.40887880416009},
10039       {0.700490199584332, 1.05309036790541, 0.679475681649858, 0.808614044804528, 0.915638787768407, 0.850321696813199, 0.845319988414402, 0.778868281341681, 1.02482926701639, 1.23000200130049, 1.5859723170438, 1.06275728888356, 1.34481214251794, 1.21307373491949, 0.979087429691819, 0, 0.56109848274013, 1.76318885009194, 1.29689226231656, 1.02015839286433},
10040       {0.880060189098976, 1.05010474413616, 0.853658619686283, 1.01590147813779, 0.913042853922533, 1.06830084665916, 1.06201695511881, 1.23287107008366, 1.28754326478771, 0.973485453109068, 0.996267398224516, 1.05974425835993, 1.06435197383538, 0.960087571600877, 0.97631161216338, 0.56109848274013, 0, 1.39547634461879, 1.02642577026706, 0.807404666228614},
10041       {1.09748548316685, 1.03938321130789, 1.68988558988005, 1.59617804551619, 1.80744143643002, 1.05739353225849, 1.324395996498, 0.968539655354582, 1.27439749294131, 0.963546200571036, 0.986095542821092, 1.04892430642749, 1.05348497728858, 0.475142555482979, 1.21751652292503, 1.76318885009194, 1.39547634461879, 0, 0.320002937404137, 1.268589159299},
10042       {1.28141710375267, 0.963216908696184, 1.24297493464833, 1.1740494822217, 1.3294417177004, 0.979907428113788, 1.22734387448031, 1.42479529031801, 0.468683841672724, 0.708724769805536, 0.725310666139274, 0.972058829603409, 0.774878150710318, 0.349485367759138, 1.42156458605332, 1.29689226231656, 1.02642577026706, 0.320002937404137, 0, 0.933095433689795},
10043       {0.800038509951648, 1.20274751778601, 1.55207513886163, 1.46600946033173, 0.830022143283238, 1.5416250309563, 1.53255698189437, 1.41208067821187, 1.47469999960758, 0.351200119909572, 0.570542199221932, 1.21378822764856, 0.609532859331199, 0.692733248746636, 1.40887880416009, 1.02015839286433, 0.807404666228614, 1.268589159299, 0.933095433689795, 0}
10044     },
10045     /*eigeninv*/
10046     {
10047       {-0.216311217101265, -0.215171653035930, -0.217000020881064, -0.232890860601250, -0.25403526530177, -0.211569372858927, -0.218073620637049, -0.240585637190076, -0.214507049619293, -0.228476323330312, -0.223235445346107, -0.216116483840334, -0.206903836810903, -0.223553828183343, -0.236937609127783, -0.217652789023588, -0.211982652566286, -0.245995223308316, -0.206187718714279, -0.227670670439422},
10048       {-0.0843931919568687, -0.0342164464991033, 0.393702284928246, -0.166018266253027, 0.0500896782860136, -0.262731388032538, 0.030139964190519, -0.253997503551094, -0.0932603349591988, -0.32884667697173, 0.199966846276877, -0.117543453869516, 0.196248237055757, -0.456448703853250, 0.139286961076387, 0.241166801918811, -0.0783508285295053, 0.377438091416498, 0.109499076984234, 0.128581669647144},
10049       {-0.0690428674271772, 0.0133858672878363, -0.208289917312908, 0.161232925220819, 0.0735806288007248, -0.316269599838174, -0.0640708424745702, -0.117078801507436, 0.360805085405857, 0.336899760384943, 0.0332447078185156, 0.132954055834276, 0.00595209121998118, -0.157755611190327, -0.199839273133436, 0.193688928807663, 0.0970290928040946, 0.374683975138541, -0.478110944870958, -0.243290196936098},
10050       {0.117284581850481, 0.310399467781876, -0.143513477698805, 0.088808130300351, 0.105747812943691, -0.373871701179853, 0.189069306295134, 0.133258225034741, -0.213043549687694, 0.301303731259140, -0.182085224761849, -0.161971915020789, 0.229301173581378, -0.293586313243755, -0.0260480060747498, -0.0217953684540699, 0.0202675755458796, -0.160134624443657, 0.431950096999465, -0.329885160320501},
10051       {0.256496969244703, 0.0907408349583135, 0.0135731083898029, 0.477557831930769, -0.0727379669280703, 0.101732675207959, -0.147293025369251, -0.348325291603251, -0.255678082078362, -0.187092643740172, -0.177164064346593, -0.225921480146133, 0.422318841046522, 0.319959853469398, -0.0623652546300045, 0.0824203908606883, -0.102057926881110, 0.120728407576411, -0.156845807891241, -0.123528163091204},
10052       {-0.00906668858975576, -0.0814722888231236, -0.0762715085459023, 0.055819989938286, -0.0540516675257271, -0.0070589302769034, -0.315813159989213, -0.0103527463419808, -0.194634331372293, -0.0185860407566822, 0.50134169352609, 0.384531812730061, -0.0405008616742061, 0.0781033650669525, 0.069334900096687, 0.396455180448549, -0.204065801866462, -0.215272089630713, 0.171046818996465, -0.396393364716348},
10053       {0.201971098571663, 0.489747667606921, 0.00226258734592836, 0.0969514005747054, 0.0853921636903791, 0.0862068740282345, -0.465412154271164, -0.130516676347786, 0.165513616974634, 0.0712238027886633, 0.140746943067963, -0.325919272273406, -0.421213488261598, -0.163508199065965, 0.269695802810568, -0.110296405171437, -0.106834099902202, 0.00509414588152415, 0.00909215239544615, 0.0500401865589727},
10054       {0.515854176692456, -0.087468413428258, 0.102796468891449, -0.06046105990993, -0.212014383772414, -0.259853648383794, -0.0997372883043333, -0.109934574535736, 0.284891018406112, -0.250578342940183, 0.142174204994568, 0.210384918947619, 0.118803190788946, -0.0268434355996836, 0.0103721198836548, -0.355555176478458, 0.428042332431476, -0.150610175411631, 0.0464090887952940, -0.140238796382057},
10055       {-0.239392215229762, -0.315483492656425, 0.100205194952396, 0.197830195325302, 0.40178804665223, 0.195809461460298, -0.407817115321684, 0.0226836686147386, -0.169780276210306, 0.0818161585952184, -0.172886230584939, 0.174982644851064, 0.0868786992159535, -0.198450519980824, 0.168581078329968, -0.361514336004068, 0.238668430084722, 0.165494019791904, 0.110437707249228, -0.169592003035203},
10056       {-0.313151735678025, 0.10757884850664, -0.49249098807229, 0.0993472335619114, -0.148695715250836, 0.0573801136941699, -0.190040373500722, 0.254848437434773, 0.134147888304352, -0.352719341442756, 0.0839609323513986, -0.207904182300122, 0.253940523323376, -0.109832138553288, 0.0980084518687944, 0.209026594443723, 0.406236051871548, -0.0521120230935943, 0.0554108014592302, 0.134681046631955},
10057       {-0.102905214421384, 0.235803606800009, 0.213414976431981, -0.253606415825635, 0.00945656859370683, 0.259551282655855, 0.159527348902192, 0.083218761193016, -0.286815935191867, 0.0135069477264877, 0.336758103107357, -0.271707359524149, -0.0400009875851839, 0.0871186292716414, -0.171506310409388, -0.0954276577211755, 0.393467571460712, 0.111732846649458, -0.239886066474217, -0.426474828195231},
10058       {-0.0130795552324104, 0.0758967690968058, -0.165099404017689, -0.46035152559912, 0.409888158016031, -0.0235053940299396, 0.0699393201709723, -0.161320910316996, 0.226111732196825, -0.177811841258496, -0.219073917645916, -0.00703219376737286, 0.162831878334912, 0.271670554900684, 0.451033612762052, 0.0820942662443393, -0.0904983490498446, -0.0587000279313978, -0.0938852980928252, -0.306078621571843},
10059       {0.345092040577428, -0.257721588971295, -0.301689123771848, -0.0875212184538126, 0.161012613069275, 0.385104899829821, 0.118355290985046, -0.241723794416731, 0.083201920119646, -0.0809095291508749, -0.0820275390511991, -0.115569770103317, -0.250105681098033, -0.164197583037664, -0.299481453795592, 0.255906951902366, 0.129042051416371, 0.203761730442746, 0.347550071284268, -0.109264854744020},
10060       {0.056345924962239, 0.072536751679082, 0.303127492633681, -0.368877185781648, -0.343024497082421, 0.206879529669083, -0.413012709639426, 0.078538816203612, 0.103382383425097, 0.288319996147499, -0.392663258459423, 0.0319588502083897, 0.220316797792669, -0.0563686494606947, -0.0869286063283735, 0.323677017794391, 0.0984875197088935, -0.0303289828821742, 0.0450197853450979, -0.0261771221270139},
10061       {-0.253701638374729, -0.148922815783583, 0.111794052194159, 0.157313977830326, -0.269846001260543, -0.222989872703583, 0.115441028189268, -0.350456582262355, -0.0409581422905941, 0.174078744248002, -0.130673397086811, -0.123963802708056, -0.351609207081548, 0.281548012920868, 0.340382662112428, 0.180262131025562, 0.3895263830793, 0.0121546812430960, 0.214830943227063, -0.0617782909660214},
10062       {-0.025854479416026, 0.480654788977767, -0.138024550829229, -0.130191670810919, 0.107816875829919, -0.111243997319276, -0.0679814460571245, -0.183167991080677, -0.363355166018786, -0.183934891092050, -0.216097125080962, 0.520240628803255, -0.179616013606479, 0.0664131536100941, -0.178350708111064, 0.0352047611606709, 0.223857228692892, 0.128363679623513, -0.000403433628490731, 0.224972110977704},
10063       {0.159207394033448, -0.0371517305736114, -0.294302634912281, -0.0866954375908417, -0.259998567870054, 0.284966673982689, 0.205356416771391, -0.257613708650298, -0.264820519037270, 0.293359248624603, 0.0997476397434102, 0.151390539497369, 0.165571346773648, -0.347569523551258, 0.43792310820533, -0.0723248163210163, 0.0379214984816955, -0.0542758730251438, -0.258020301801603, 0.128680501102363},
10064       {0.316853842351797, -0.153950010941153, -0.13387065213508, -0.0702971390607613, -0.202558481846057, -0.172941438694837, -0.068882524588574, 0.524738203063889, -0.271670479920716, -0.112864756695310, -0.146831636946145, -0.0352336188578041, -0.211108490884767, 0.097857111349555, 0.276459740956662, 0.0231297536754823, -0.0773173324868396, 0.487208384389438, -0.0734191389266824, -0.113198765573319},
10065       {-0.274285525741087, 0.227334266052039, -0.0973746625709059, -0.00965256583655389, -0.402438444750043, 0.198586229519026, 0.0958135064575833, -0.108934376958686, 0.253641732094319, -0.0551918478254021, 0.0243640218331436, 0.181936272247179, 0.090952738347629, 0.0603352483029044, -0.0043821671755761, -0.347720824658591, -0.267879988539971, 0.403804652116592, 0.337654323971186, -0.241509293972297},
10066       {-0.0197089518344238, 0.139681034626696, 0.251980475788267, 0.341846624362846, -0.075141195125153, 0.2184951591319, 0.268870823491343, 0.150392399018138, 0.134592404015057, -0.337050200539163, -0.313109373497998, 0.201993318439135, -0.217140733851970, -0.337622749083808, 0.135253284365068, 0.181729249828045, -0.00627813335422765, -0.197218833324039, -0.194060005031698, -0.303055888528004}
10067     },
10068     /*eigenval*/
10069     {
10070       20.29131, 0.5045685, 0.2769945, 0.1551147, 0.03235484, -0.04127639, -0.3516426, -0.469973, -0.5835191, -0.6913107, -0.7207972, -0.7907875, -0.9524307, -1.095310, -1.402153, -1.424179, -1.936704, -2.037965, -3.273561, -5.488734
10071     },
10072     /*eigentot and codeFreq left out, these are initialized elsewhere*/
10073   };
10074 
10075 /* The JTT92 matrix, D. T. Jones, W. R. Taylor, & J. M. Thorton, CABIOS 8:275 (1992)
10076    Derived from the PhyML source code (models.c) by filling in the other side of the symmetric matrix,
10077    scaling the entries by the stationary rate (to give the rate of a->b not b|a), to set the diagonals
10078    so the rows sum to 0, to rescale the matrix so that the implied rate of evolution is 1.
10079    The resulting matrix is the transpose (I think).
10080 */
10081 #if 0
10082 {
10083   int i,j;
10084   for (i=0; i<20; i++)  for (j=0; j<i; j++)  daa[j*20+i] = daa[i*20+j];
10085   for (i = 0; i < 20; i++) for (j = 0; j < 20; j++) daa[i*20+j] *= pi[j] / 100.0;
10086   double mr = 0;		/* mean rate */
10087   for (i = 0; i < 20; i++) {
10088     double sum = 0;
10089     for (j = 0; j < 20; j++)
10090     sum += daa[i*20+j];
10091     daa[i*20+i] = -sum;
10092     mr += pi[i] * sum;
10093   }
10094   for (i = 0; i < 20*20; i++)
10095     daa[i] /= mr;
10096 }
10097 #endif
10098 
10099 double statJTT92[MAXCODES] = {0.07674789,0.05169087,0.04264509,0.05154407,0.01980301,0.04075195,0.06182989,0.07315199,0.02294399,0.05376110,0.09190390,0.05867583,0.02382594,0.04012589,0.05090097,0.06876503,0.05856501,0.01426057,0.03210196,0.06600504};
10100 double matrixJTT92[MAXCODES][MAXCODES] = {
10101   { -1.247831,0.044229,0.041179,0.061769,0.042704,0.043467,0.08007,0.136501,0.02059,0.027453,0.022877,0.02669,0.041179,0.011439,0.14794,0.288253,0.362223,0.006863,0.008388,0.227247 },
10102   { 0.029789,-1.025965,0.023112,0.008218,0.058038,0.159218,0.014895,0.070364,0.168463,0.011299,0.019517,0.33179,0.022599,0.002568,0.038007,0.051874,0.032871,0.064714,0.010272,0.008731 },
10103   { 0.022881,0.019068,-1.280568,0.223727,0.014407,0.03644,0.024576,0.034322,0.165676,0.019915,0.005085,0.11144,0.012712,0.004237,0.006356,0.213134,0.098304,0.00339,0.029661,0.00678 },
10104   { 0.041484,0.008194,0.270413,-1.044903,0.005121,0.025095,0.392816,0.066579,0.05736,0.005634,0.003585,0.013316,0.007682,0.002049,0.007682,0.030217,0.019462,0.002049,0.023559,0.015877 },
10105   { 0.011019,0.022234,0.00669,0.001968,-0.56571,0.001771,0.000984,0.011609,0.013577,0.003345,0.004526,0.001377,0.0061,0.015348,0.002755,0.043878,0.008264,0.022628,0.041124,0.012199 },
10106   { 0.02308,0.125524,0.034823,0.019841,0.003644,-1.04415,0.130788,0.010528,0.241735,0.003644,0.029154,0.118235,0.017411,0.00162,0.066406,0.021461,0.020651,0.007288,0.009718,0.008098 },
10107   { 0.064507,0.017816,0.035632,0.471205,0.003072,0.198435,-0.944343,0.073107,0.015973,0.007372,0.005529,0.111197,0.011058,0.003072,0.011058,0.01843,0.019659,0.006143,0.0043,0.027646 },
10108   { 0.130105,0.099578,0.058874,0.09449,0.042884,0.018898,0.086495,-0.647831,0.016717,0.004361,0.004361,0.019625,0.010176,0.003634,0.017444,0.146096,0.023986,0.039976,0.005815,0.034162 },
10109   { 0.006155,0.074775,0.089138,0.025533,0.01573,0.1361,0.005927,0.005243,-1.135695,0.003648,0.012767,0.010259,0.007523,0.009119,0.026217,0.016642,0.010487,0.001824,0.130629,0.002508 },
10110   { 0.01923,0.011752,0.025106,0.005876,0.009081,0.004808,0.00641,0.003205,0.008547,-1.273602,0.122326,0.011218,0.25587,0.047542,0.005342,0.021367,0.130873,0.004808,0.017094,0.513342 },
10111   { 0.027395,0.0347,0.010958,0.006392,0.021003,0.065748,0.008219,0.005479,0.051137,0.209115,-0.668139,0.012784,0.354309,0.226465,0.093143,0.053877,0.022829,0.047485,0.021916,0.16437 },
10112   { 0.020405,0.376625,0.153332,0.015158,0.004081,0.170239,0.105525,0.015741,0.026235,0.012243,0.008162,-0.900734,0.037896,0.002332,0.012243,0.027401,0.06005,0.00583,0.004664,0.008162 },
10113   { 0.012784,0.010416,0.007102,0.003551,0.007339,0.01018,0.004261,0.003314,0.007812,0.113397,0.091854,0.015388,-1.182051,0.01018,0.003788,0.006865,0.053503,0.005682,0.004261,0.076466 },
10114   { 0.00598,0.001993,0.003987,0.001595,0.031098,0.001595,0.001993,0.001993,0.015948,0.035484,0.098877,0.001595,0.017144,-0.637182,0.006778,0.03668,0.004784,0.021131,0.213701,0.024719 },
10115   { 0.098117,0.037426,0.007586,0.007586,0.007081,0.082944,0.009104,0.012138,0.058162,0.005058,0.051587,0.010621,0.008092,0.008598,-0.727675,0.144141,0.059679,0.003035,0.005058,0.011632 },
10116   { 0.258271,0.069009,0.343678,0.040312,0.152366,0.036213,0.020498,0.137334,0.049878,0.02733,0.040312,0.032113,0.019814,0.06286,0.194728,-1.447863,0.325913,0.023914,0.043045,0.025964 },
10117   { 0.276406,0.037242,0.135003,0.022112,0.02444,0.029677,0.018621,0.019203,0.026768,0.142567,0.014548,0.059936,0.131511,0.006983,0.068665,0.27757,-1.335389,0.006983,0.01222,0.065174 },
10118   { 0.001275,0.017854,0.001134,0.000567,0.016295,0.002551,0.001417,0.007793,0.001134,0.001275,0.007368,0.001417,0.003401,0.00751,0.00085,0.004959,0.0017,-0.312785,0.010061,0.003542 },
10119   { 0.003509,0.006379,0.022328,0.014673,0.066664,0.007655,0.002233,0.002552,0.182769,0.010207,0.007655,0.002552,0.005741,0.170967,0.00319,0.020095,0.006698,0.022647,-0.605978,0.005103 },
10120   { 0.195438,0.011149,0.010493,0.020331,0.040662,0.013117,0.029512,0.030824,0.007214,0.630254,0.11805,0.009182,0.211834,0.040662,0.015084,0.024922,0.073453,0.016396,0.010493,-1.241722 }
10121 };
10122 
10123 double statWAG01[MAXCODES] = {0.0866279,0.043972, 0.0390894,0.0570451,0.0193078,0.0367281,0.0580589,0.0832518,0.0244314,0.048466, 0.086209, 0.0620286,0.0195027,0.0384319,0.0457631,0.0695179,0.0610127,0.0143859,0.0352742,0.0708956};
10124 double matrixWAG01[MAXCODES][MAXCODES] = {
10125 	{-1.117151, 0.050147, 0.046354, 0.067188, 0.093376, 0.082607, 0.143908, 0.128804, 0.028817, 0.017577, 0.036177, 0.082395, 0.081234, 0.019138, 0.130789, 0.306463, 0.192846, 0.010286, 0.021887, 0.182381},
10126 	{0.025455, -0.974318, 0.029321, 0.006798, 0.024376, 0.140086, 0.020267, 0.026982, 0.098628, 0.008629, 0.022967, 0.246964, 0.031527, 0.004740, 0.031358, 0.056495, 0.025586, 0.053714, 0.017607, 0.011623},
10127 	{0.020916, 0.026065, -1.452438, 0.222741, 0.010882, 0.063328, 0.038859, 0.046176, 0.162306, 0.022737, 0.005396, 0.123567, 0.008132, 0.003945, 0.008003, 0.163042, 0.083283, 0.002950, 0.044553, 0.008051},
10128 	{0.044244, 0.008819, 0.325058, -0.989665, 0.001814, 0.036927, 0.369645, 0.051822, 0.055719, 0.002361, 0.005077, 0.028729, 0.006212, 0.002798, 0.025384, 0.064166, 0.022443, 0.007769, 0.019500, 0.009120},
10129 	{0.020812, 0.010703, 0.005375, 0.000614, -0.487357, 0.002002, 0.000433, 0.006214, 0.005045, 0.003448, 0.007787, 0.001500, 0.007913, 0.008065, 0.002217, 0.028525, 0.010395, 0.014531, 0.011020, 0.020307},
10130 	{0.035023, 0.117008, 0.059502, 0.023775, 0.003809, -1.379785, 0.210830, 0.012722, 0.165524, 0.004391, 0.033516, 0.150135, 0.059565, 0.003852, 0.035978, 0.039660, 0.033070, 0.008316, 0.008777, 0.011613},
10131 	{0.096449, 0.026759, 0.057716, 0.376214, 0.001301, 0.333275, -1.236894, 0.034593, 0.034734, 0.007763, 0.009400, 0.157479, 0.019202, 0.004944, 0.041578, 0.042955, 0.050134, 0.009540, 0.011961, 0.035874},
10132 	{0.123784, 0.051085, 0.098345, 0.075630, 0.026795, 0.028838, 0.049604, -0.497615, 0.021792, 0.002661, 0.005356, 0.032639, 0.015212, 0.004363, 0.021282, 0.117240, 0.019732, 0.029444, 0.009052, 0.016361},
10133 	{0.008127, 0.054799, 0.101443, 0.023863, 0.006384, 0.110105, 0.014616, 0.006395, -0.992342, 0.003543, 0.012807, 0.022832, 0.010363, 0.017420, 0.017851, 0.018979, 0.012136, 0.006733, 0.099319, 0.003035},
10134 	{0.009834, 0.009511, 0.028192, 0.002006, 0.008654, 0.005794, 0.006480, 0.001549, 0.007029, -1.233162, 0.161294, 0.016472, 0.216559, 0.053891, 0.005083, 0.016249, 0.074170, 0.010808, 0.021372, 0.397837},
10135 	{0.036002, 0.045028, 0.011900, 0.007673, 0.034769, 0.078669, 0.013957, 0.005547, 0.045190, 0.286902, -0.726011, 0.023303, 0.439180, 0.191376, 0.037625, 0.031191, 0.029552, 0.060196, 0.036066, 0.162890},
10136 	{0.058998, 0.348377, 0.196082, 0.031239, 0.004820, 0.253558, 0.168246, 0.024319, 0.057967, 0.021081, 0.016767, -1.124580, 0.060821, 0.005783, 0.036254, 0.062960, 0.090292, 0.008952, 0.008675, 0.019884},
10137 	{0.018288, 0.013983, 0.004057, 0.002124, 0.007993, 0.031629, 0.006450, 0.003564, 0.008272, 0.087143, 0.099354, 0.019123, -1.322098, 0.024370, 0.003507, 0.010109, 0.031033, 0.010556, 0.008769, 0.042133},
10138 	{0.008490, 0.004143, 0.003879, 0.001885, 0.016054, 0.004030, 0.003273, 0.002014, 0.027402, 0.042734, 0.085315, 0.003583, 0.048024, -0.713669, 0.006512, 0.022020, 0.006934, 0.061698, 0.260332, 0.026213},
10139 	{0.069092, 0.032635, 0.009370, 0.020364, 0.005255, 0.044829, 0.032773, 0.011698, 0.033438, 0.004799, 0.019973, 0.026747, 0.008229, 0.007754, -0.605590, 0.077484, 0.038202, 0.006695, 0.010376, 0.015124},
10140 	{0.245933, 0.089317, 0.289960, 0.078196, 0.102703, 0.075066, 0.051432, 0.097899, 0.054003, 0.023306, 0.025152, 0.070562, 0.036035, 0.039831, 0.117705, -1.392239, 0.319421, 0.038212, 0.057419, 0.016981},
10141 	{0.135823, 0.035501, 0.129992, 0.024004, 0.032848, 0.054936, 0.052685, 0.014461, 0.030308, 0.093371, 0.020915, 0.088814, 0.097083, 0.011008, 0.050931, 0.280341, -1.154973, 0.007099, 0.018643, 0.088894},
10142 	{0.001708, 0.017573, 0.001086, 0.001959, 0.010826, 0.003257, 0.002364, 0.005088, 0.003964, 0.003208, 0.010045, 0.002076, 0.007786, 0.023095, 0.002105, 0.007908, 0.001674, -0.466694, 0.037525, 0.005516},
10143 	{0.008912, 0.014125, 0.040205, 0.012058, 0.020133, 0.008430, 0.007267, 0.003836, 0.143398, 0.015555, 0.014757, 0.004934, 0.015861, 0.238943, 0.007998, 0.029135, 0.010779, 0.092011, -0.726275, 0.011652},
10144 	{0.149259, 0.018739, 0.014602, 0.011335, 0.074565, 0.022417, 0.043805, 0.013932, 0.008807, 0.581952, 0.133956, 0.022726, 0.153161, 0.048356, 0.023429, 0.017317, 0.103293, 0.027186, 0.023418, -1.085487},
10145 };
10146 
10147 /* Le-Gascuel 2008 model data from Harry Yoo
10148    https://github.com/hyoo/FastTree
10149 */
10150 double statLG08[MAXCODES] = {0.079066, 0.055941, 0.041977, 0.053052, 0.012937, 0.040767, 0.071586, 0.057337, 0.022355, 0.062157, 0.099081, 0.0646, 0.022951, 0.042302, 0.04404, 0.061197, 0.053287, 0.012066, 0.034155, 0.069147};
10151 
10152 double matrixLG08[MAXCODES][MAXCODES] = {
10153    {-1.08959879,0.03361031,0.02188683,0.03124237,0.19680136,0.07668542,0.08211337,0.16335306,0.02837339,0.01184642,0.03125763,0.04242021,0.08887270,0.02005907,0.09311189,0.37375830,0.16916131,0.01428853,0.01731216,0.20144931},
10154    {0.02378006,-0.88334349,0.04206069,0.00693409,0.02990323,0.15707674,0.02036079,0.02182767,0.13574610,0.00710398,0.01688563,0.35388551,0.02708281,0.00294931,0.01860218,0.04800569,0.03238902,0.03320688,0.01759004,0.00955956},
10155    {0.01161996,0.03156149,-1.18705869,0.21308090,0.02219603,0.07118238,0.02273938,0.06034785,0.18928374,0.00803870,0.00287235,0.09004368,0.01557359,0.00375798,0.00679131,0.16825837,0.08398226,0.00190474,0.02569090,0.00351296},
10156    {0.02096312,0.00657599,0.26929909,-0.86328733,0.00331871,0.02776660,0.27819699,0.04482489,0.04918511,0.00056712,0.00079981,0.01501150,0.00135537,0.00092395,0.02092662,0.06579888,0.02259266,0.00158572,0.00716768,0.00201422},
10157    {0.03220119,0.00691547,0.00684065,0.00080928,-0.86781864,0.00109716,0.00004527,0.00736456,0.00828668,0.00414794,0.00768465,0.00017162,0.01156150,0.01429859,0.00097521,0.03602269,0.01479316,0.00866942,0.01507844,0.02534728},
10158    {0.03953956,0.11446966,0.06913053,0.02133682,0.00345736,-1.24953177,0.16830979,0.01092385,0.19623161,0.00297003,0.02374496,0.13185209,0.06818543,0.00146170,0.02545052,0.04989165,0.04403378,0.00962910,0.01049079,0.00857458},
10159    {0.07434507,0.02605508,0.03877888,0.37538659,0.00025048,0.29554848,-0.84254259,0.02497249,0.03034386,0.00316875,0.00498760,0.12936820,0.01243696,0.00134660,0.03002373,0.04380857,0.04327684,0.00557310,0.00859294,0.01754095},
10160    {0.11846020,0.02237238,0.08243001,0.04844538,0.03263985,0.01536392,0.02000178,-0.50414422,0.01785951,0.00049912,0.00253779,0.01700817,0.00800067,0.00513658,0.01129312,0.09976552,0.00744439,0.01539442,0.00313512,0.00439779},
10161    {0.00802225,0.05424651,0.10080372,0.02072557,0.01431930,0.10760560,0.00947583,0.00696321,-1.09324335,0.00243405,0.00818899,0.01558729,0.00989143,0.01524917,0.01137533,0.02213166,0.01306114,0.01334710,0.11863394,0.00266053},
10162    {0.00931296,0.00789336,0.01190322,0.00066446,0.01992916,0.00452837,0.00275137,0.00054108,0.00676776,-1.41499789,0.25764421,0.00988722,0.26563382,0.06916358,0.00486570,0.00398456,0.06425393,0.00694043,0.01445289,0.66191466},
10163    {0.03917027,0.02990732,0.00677980,0.00149374,0.05885464,0.05771026,0.00690325,0.00438541,0.03629495,0.41069624,-0.79375308,0.01362360,0.62543296,0.25688578,0.02467704,0.01806113,0.03001512,0.06139358,0.02968934,0.16870919},
10164    {0.03465896,0.40866276,0.13857164,0.01827910,0.00085698,0.20893479,0.11674330,0.01916263,0.04504313,0.01027583,0.00888247,-0.97644156,0.04241650,0.00154510,0.02521473,0.04836478,0.07344114,0.00322392,0.00852278,0.01196402},
10165    {0.02579765,0.01111131,0.00851489,0.00058635,0.02051079,0.03838702,0.00398738,0.00320253,0.01015515,0.09808327,0.14487451,0.01506968,-1.54195698,0.04128536,0.00229163,0.00796306,0.04636929,0.01597787,0.01104642,0.04357735},
10166    {0.01073203,0.00223024,0.00378708,0.00073673,0.04675419,0.00151673,0.00079574,0.00378966,0.02885576,0.04707045,0.10967574,0.00101178,0.07609486,-0.81061579,0.00399600,0.01530562,0.00697985,0.10394083,0.33011973,0.02769432},
10167    {0.05186360,0.01464471,0.00712508,0.01737179,0.00331981,0.02749383,0.01847072,0.00867414,0.02240973,0.00344749,0.01096857,0.01718973,0.00439734,0.00416018,-0.41664685,0.05893117,0.02516738,0.00418956,0.00394655,0.01305787},
10168    {0.28928853,0.05251612,0.24529879,0.07590089,0.17040121,0.07489439,0.03745080,0.10648187,0.06058559,0.00392302,0.01115539,0.04581702,0.02123285,0.02214217,0.08188943,-1.42842431,0.39608294,0.01522956,0.02451220,0.00601987},
10169    {0.11400727,0.03085239,0.10660988,0.02269274,0.06093244,0.05755704,0.03221430,0.00691855,0.03113348,0.05508469,0.01614250,0.06057985,0.10765893,0.00879238,0.03045173,0.34488735,-1.23444419,0.00750412,0.01310009,0.11660005},
10170    {0.00218053,0.00716244,0.00054751,0.00036065,0.00808574,0.00284997,0.00093936,0.00323960,0.00720403,0.00134729,0.00747646,0.00060216,0.00840002,0.02964754,0.00114785,0.00300276,0.00169919,-0.44275283,0.03802969,0.00228662},
10171    {0.00747852,0.01073967,0.02090366,0.00461457,0.03980863,0.00878929,0.00409985,0.00186756,0.18125441,0.00794180,0.01023445,0.00450612,0.01643896,0.26654152,0.00306072,0.01368064,0.00839668,0.10764993,-0.71435091,0.00851526},
10172    {0.17617706,0.01181629,0.00578676,0.00262530,0.13547871,0.01454379,0.01694332,0.00530363,0.00822937,0.73635171,0.11773937,0.01280613,0.13129028,0.04526924,0.02050210,0.00680190,0.15130413,0.01310401,0.01723920,-1.33539639}
10173 };
10174