1 /*
2 * FastTree -- inferring approximately-maximum-likelihood trees for large
3 * multiple sequence alignments.
4 *
5 * Morgan N. Price
6 * http://www.microbesonline.org/fasttree/
7 *
8 * Thanks to Jim Hester of the Cleveland Clinic Foundation for
9 * providing the first parallel (OpenMP) code, Siavash Mirarab of
10 * UT Austin for implementing the WAG option, Samuel Shepard
11 * at the CDC for suggesting and helping with the -quote option, and
12 * Aaron Darling (University of Technology, Sydney) for numerical changes
13 * for wide alignments of closely-related sequences.
14 *
15 * Copyright (C) 2008-2015 The Regents of the University of California
16 * All rights reserved.
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License along
29 * with this program; if not, write to the Free Software Foundation, Inc.,
30 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
31 * or visit http://www.gnu.org/copyleft/gpl.html
32 *
33 * Disclaimer
34 *
35 * NEITHER THE UNITED STATES NOR THE UNITED STATES DEPARTMENT OF ENERGY,
36 * NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED,
37 * OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
38 * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
39 * OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE
40 * PRIVATELY OWNED RIGHTS.
41 */
42
43 /*
44 * To compile FastTree, do:
45 * gcc -Wall -O3 -finline-functions -funroll-loops -o FastTree -lm FastTree.c
46 * Use -DNO_SSE to turn off use of SSE3 instructions
47 * (should not be necessary because compiler should not set __SSE__ if
48 * not available, and modern mallocs should return 16-byte-aligned values)
49 * Use -DOPENMP -fopenmp to use multiple threads (note, old versions of gcc
50 * may not support -fopenmp)
51 * Use -DTRACK_MEMORY if you want detailed reports of memory usage,
52 * but results are not correct above 4GB because mallinfo stores int values.
53 * It also makes FastTree run significantly slower.
54 *
55 * To get usage guidance, do:
56 * FastTree -help
57 *
58 * FastTree uses profiles instead of a distance matrix, and computes
59 * support values for each split from the profiles of the 4 nodes
60 * around the split. It stores a profile for each node and a average
61 * profile over all active nodes (the "out-profile" for computing the
62 * total sum of distance to other nodes). The neighbor joining phase
63 * requires O(N*L*a) space, where N is the number of sequences, L is
64 * the alignment width, and a is the alphabet size. The top-hits
65 * heuristic requires an additional O(N sqrt(N)) memory. After
66 * neighbor-joining, FastTree improves the topology with
67 * nearest-neighbor interchanges (NNIs) and subtree-prune-regraft
68 * moves (SPRs), which does not have a significant additional memory
69 * requirement. (We need only store "up-profiles" on the path from our
70 * current traversal point to the root.) These take O(NLa) time per
71 * round, and with default settings, O(N log(N) L a) time total.
72 * FastTree further improves the topology with maximum-likelihood
73 * NNIs, using similar data structures and complexity, but with a
74 * higher constant factor, and now the "profiles" are actually
75 * posterior distributions for that subtree. Finally, FastTree
76 * resamples the site likelihoods around each NNI and uses
77 * the Shimodaira Hasegawa test to estimate the reliability of each split.
78 *
79 * Overview of the neighbor-joining phase:
80 *
81 * Although FastTree uses a log correction on profile distances to
82 * account for multiple substitutions when doing NNIs and SPRs, the
83 * operations on the profiles themselves involve "additive" distances
84 * -- either %different (for nucleotide) or by using an amino acid
85 * similarity matrix (for proteins). If we are using %different as
86 * our distance matrix then
87 *
88 * Profile_distance(A,B) = 1 - sum over characters of freq(A)*freq(B)
89 *
90 * and we can average this value over positions. Positions with gaps
91 * are weighted by %ungapped(A) * %ungapped(B).
92 *
93 * If we are using an amino acid dissimilarity matrix D(i,j) then at
94 * each position
95 *
96 * Profile_distance(A,B) = sum(i,j) freq(A==i) * freq(B==j) * D(i,j)
97 * = sum(k) Ak * Bk * Lambda(k)
98 *
99 * where k iterates over 20 eigenvectors, Lambda(k) is the eigenvalue,
100 * and if A==i, then Ak is the kth column of the inverse of the
101 * eigenvector matrix.
102 *
103 * The exhaustive approach (-slow) takes O(N**3*L*a) time, but
104 * this can be reduced to as little as O(N**(3/2)*log(N)*L*a) time
105 * by using heuristics.
106 *
107 * It uses a combination of three heuristics: a visible set similar to
108 * that of FastTree (Elias & Lagergren 2005), a local hill-climbing
109 * search for a better join (as in relaxed neighbor-joining, Evans et
110 * al. 2006), and a top-hit list to reduce the search space (see
111 * below).
112 *
113 * The "visible" set stores, for each node, the best join for that
114 * node, as identified at some point in the past
115 *
116 * If top-hits are not being used, then the neighbor-joining phase can
117 * be summarized as:
118 *
119 * Compute the out-profile by averaging the leaves
120 * Compute the out-distance of each leaf quickly, using the out-profile
121 * Compute the visible set (or approximate it using top-hits, see below)
122 * Until we're down to 3 active nodes:
123 * Find the best join in the visible set
124 * (This involves recomputing the neighbor-joining criterion,
125 * as out-distances and #active nodes may have changed)
126 * Follow a chain of best hits (again recomputing the criterion)
127 * until we find a locally best join, as in relaxed neighbor joining
128 * Create a profile of the parent node, either using simple averages (default)
129 * or using weighted joining as in BIONJ (if -bionj was specified)
130 * Update the out-profile and the out-distances
131 * Update the visible set:
132 * find the best join for the new joined node
133 * replace hits to the joined children with hits to the parent
134 * if we stumble across a join for the new node that is better
135 * than the corresponding entry in the visible set, "reset"
136 * that entry.
137 *
138 * For each iteration, this method does
139 * O(N) work to find the best hit in the visible set
140 * O(L*N*a*log(N)) work to do the local search, where log(N)
141 * is a pessimistic estimate of the number of iterations. In
142 * practice, we average <1 iteration for 2,000 sequences.
143 * With -fastest, this step is omitted.
144 * O(N*a) work to compute the joined profile and update the out-profile
145 * O(L*N*a) work to update the out-distances
146 * O(L*N*a) work to compare the joined profile to the other nodes
147 * (to find the new entry in the visible set)
148 *
149 * and there are N-3 iterations, so it takes O(N**2 * L * log(N) * a) time.
150 *
151 * The profile distances give exactly the same result as matrix
152 * distances in neighbor-joining or BIONJ would if there are no gaps
153 * in the alignment. If there are gaps, then it is an
154 * approximation. To get the same result we also store a "diameter"
155 * for each node (diameter is 0 for leaves).
156 *
157 * In the simpler case (NJ rather than BIONJ), when we join A and B to
158 * give a new node AB,
159 *
160 * Profile(AB) = (A+B)/2
161 * Profile_distance(AB,C) = (Profile_distance(A,C)+Profile_distance(B,C))/2
162 * because the formulas above are linear
163 *
164 * And according to the neighor-joining rule,
165 * d(AB,C) = (d(A,C)+d(B,C)-d(A,B))/2
166 *
167 * and we can achieve the same value by writing
168 * diameter(AB) = pd(A,B)/2
169 * diameter(leaf) = 0
170 * d(A,B) = pd(A,B) - diameter(A) - diameter(B)
171 *
172 * because
173 * d(AB,C) = (d(A,C)+d(B,C)-d(A,B))/2
174 * = (pd(A,C)-diam(A)-diam(C)+pd(B,C)-diam(B)-diam(C)-d(A,B)+diam(A)+diam(B))/2
175 * = (pd(A,C)+pd(B,C))/2 - diam(C) - pd(A,B)
176 * = pd(AB,C) - diam(AB) - diam(C)
177 *
178 * If we are using BIONJ, with weight lambda for the join:
179 * Profile(AB) = lambda*A + (1-lambda)*B
180 * then a similar argument gives
181 * diam(AB) = lambda*diam(A) + (1-lambda)*diam(B) + lambda*d(A,AB) + (1-lambda)*d(B,AB),
182 *
183 * where, as in neighbor joining,
184 * d(A,AB) = d(A,B) + (total out_distance(A) - total out_distance(B))/(n-2)
185 *
186 * A similar recursion formula works for the "variance" matrix of BIONJ,
187 * var(AB,C) = lambda*var(A,C) + (1-lambda)*var(B,C) - lambda*(1-lambda)*var(A,B)
188 * is equivalent to
189 * var(A,B) = pv(A,B) - vd(A) - vd(B), where
190 * pv(A,B) = pd(A,B)
191 * vd(A) = 0 for leaves
192 * vd(AB) = lambda*vd(A) + (1-lambda)*vd(B) + lambda*(1-lambda)*var(A,B)
193 *
194 * The top-hist heuristic to reduce the work below O(N**2*L) stores a top-hit
195 * list of size m=sqrt(N) for each active node.
196 *
197 * The list can be initialized for all the leaves in sub (N**2 * L) time as follows:
198 * Pick a "seed" sequence and compare it to all others
199 * Store the top m hits of the seed as its top-hit list
200 * Take "close" hits of the seed(within the top m, and see the "close" parameter),
201 * and assume that their top m hits lie within the top 2*m hits of the seed.
202 * So, compare them to the seed's neighors (if they do not already
203 * have a top hit list) and set their top hits.
204 *
205 * This method does O(N*L) work for each seed, or O(N**(3/2)*L) work total.
206 *
207 * To avoid doing O(N*L) work at each iteration, we need to avoid
208 * updating the visible set and the out-distances. So, we use "stale"
209 * out-distances, and when searching the visible set for the best hit,
210 * we only inspect the top m=sqrt(N) entries. We then update those
211 * out-distances (up to 2*m*L*a work) and then find the best hit.
212 *
213 * To avoid searching the entire visible set, FastTree keeps
214 * and updates a list of the top sqrt(N) entries in the visible set.
215 * This costs O(sqrt(N)) time per join to find the best entry and to
216 * update, or (N sqrt(N)) time overall.
217 *
218 * Similarly, when doing the local hill-climbing, we avoid O(N*L) work
219 * by only considering the top-hits for the current node. So this adds
220 * O(m*a*log(N)) work per iteration.
221 *
222 * When we join two nodes, we compute profiles and update the
223 * out-profile as before. We need to compute the best hits of the node
224 * -- we merge the lists for the children and select the best up-to-m
225 * hits. If the top hit list contains a stale node we replace it with
226 * its parent. If we still have <m/2 entries, we do a "refresh".
227 *
228 * In a "refresh", similar to the fast top-hit computation above, we
229 * compare the "seed", in this case the new joined node, to all other
230 * nodes. We compare its close neighbors (the top m hits) to all
231 * neighbors (the top 2*m hits) and update the top-hit lists of all
232 * neighbors (by merging to give a list of 3*m entries and then
233 * selecting the best m entries).
234 *
235 * Finally, during these processes we update the visible sets for
236 * other nodes with better hits if we find them, and we set the
237 * visible entry for the new joined node to the best entry in its
238 * top-hit list. (And whenever we update a visible entry, we
239 * do O(sqrt(N)) work to update the top-visible list.)
240 * These udpates are not common so they do not alter the
241 * O(N sqrt(N) log(N) L a) total running time for the joining phase.
242 *
243 * Second-level top hits
244 *
245 * With -fastest or with -2nd, FastTree uses an additional "2nd-level" top hits
246 * heuristic to reduce the running time for the top-hits phase to
247 * O(N**1.25 L) and for the neighbor-joining phase to O(N**1.25 L a).
248 * This also reduces the memory usage for the top-hits lists to
249 * O(N**1.25), which is important for alignments with a million
250 * sequences. The key idea is to store just q = sqrt(m) top hits for
251 * most sequences.
252 *
253 * Given the neighbors of A -- either for a seed or for a neighbor
254 * from the top-hits heuristic, if B is within the top q hits of A, we
255 * set top-hits(B) from the top 3*q top-hits of A. And, we record that
256 * A is the "source" of the hits for B, so if we run low on hits for
257 * B, instead of doing a full refresh, we can do top-hits(B) :=
258 * top-hits(B) union top-hits(active_ancestor(A)).
259 * During a refresh, these "2nd-level" top hits are updated just as
260 * normal, but the source is maintained and only q entries are stored,
261 * until we near the end of the neighbor joining phase (until the
262 * root as 2*m children or less).
263 *
264 * Parallel execution with OpenMP
265 *
266 * If you compile FastTree with OpenMP support, it will take
267 * advantage of multiple CPUs on one machine. It will parallelize:
268 *
269 * The top hits phase
270 * Comparing one node to many others during the NJ phase (the simplest kind of join)
271 * The refresh phase
272 * Optimizing likelihoods for 3 alternate topologies during ML NNIs and ML supports
273 * (only 3 threads can be used)
274 *
275 * This accounts for most of the O(N L a) or slower steps except for
276 * minimum-evolution NNIs (which are fast anyway), minimum-evolution SPRs,
277 * selecting per-site rates, and optimizing branch lengths outside of ML NNIs.
278 *
279 * Parallelizing the top hits phase may lead to a slight change in the tree,
280 * as some top hits are computed from different (and potentially less optimal source).
281 * This means that results on repeated runs may not be 100% identical.
282 * However, this should not have any significant effect on tree quality
283 * after the NNIs and SPRs.
284 *
285 * The OpenMP code also turns off the star-topology test during ML
286 * NNIs, which may lead to slight improvements in likelihood.
287 */
288
289 #include <stdio.h>
290 #include <stdbool.h>
291 #include <string.h>
292 #include <assert.h>
293 #include <math.h>
294 #include <stdlib.h>
295 #include <sys/time.h>
296 #include <ctype.h>
297 #include <unistd.h>
298 #ifdef TRACK_MEMORY
299 /* malloc.h apparently doesn't exist on MacOS */
300 #include <malloc.h>
301 #endif
302
303 /* Compile with -DOPENMP to turn on multithreading */
304 #ifdef OPENMP
305 #include <omp.h>
306 #endif
307
308 /* By default, tries to compile with SSE instructions for greater speed.
309 But if compiled with -DUSE_DOUBLE, uses double precision instead of single-precision
310 floating point (2x memory required), does not use SSE, and allows much shorter
311 branch lengths.
312 */
313 #ifdef __SSE__
314 #if !defined(NO_SSE) && !defined(USE_DOUBLE)
315 #define USE_SSE3
316 #endif
317 #endif
318
319
320 #ifdef USE_DOUBLE
321 #define SSE_STRING "Double precision (No SSE3)"
322 typedef double numeric_t;
323 #define ScanNumericSpec "%lf"
324 #else
325 typedef float numeric_t;
326 #define ScanNumericSpec "%f"
327 #endif
328
329 #ifdef USE_SSE3
330 #define SSE_STRING "SSE3"
331 #define ALIGNED __attribute__((aligned(16)))
332 #define IS_ALIGNED(X) ((((unsigned long) new) & 15L) == 0L)
333 #include <xmmintrin.h>
334
335 #else
336
337 #define ALIGNED
338 #define IS_ALIGNED(X) 1
339
340 #ifndef USE_DOUBLE
341 #define SSE_STRING "No SSE3"
342 #endif
343
344 #endif /* USE_SSE3 */
345
346 #define FT_VERSION "2.1.10"
347
348 char *usage =
349 " FastTree protein_alignment > tree\n"
350 " FastTree < protein_alignment > tree\n"
351 " FastTree -out tree protein_alignment\n"
352 " FastTree -nt nucleotide_alignment > tree\n"
353 " FastTree -nt -gtr < nucleotide_alignment > tree\n"
354 " FastTree < nucleotide_alignment > tree\n"
355 "FastTree accepts alignments in fasta or phylip interleaved formats\n"
356 "\n"
357 "Common options (must be before the alignment file):\n"
358 " -quiet to suppress reporting information\n"
359 " -nopr to suppress progress indicator\n"
360 " -log logfile -- save intermediate trees, settings, and model details\n"
361 " -fastest -- speed up the neighbor joining phase & reduce memory usage\n"
362 " (recommended for >50,000 sequences)\n"
363 " -n <number> to analyze multiple alignments (phylip format only)\n"
364 " (use for global bootstrap, with seqboot and CompareToBootstrap.pl)\n"
365 " -nosupport to not compute support values\n"
366 " -intree newick_file to set the starting tree(s)\n"
367 " -intree1 newick_file to use this starting tree for all the alignments\n"
368 " (for faster global bootstrap on huge alignments)\n"
369 " -pseudo to use pseudocounts (recommended for highly gapped sequences)\n"
370 " -gtr -- generalized time-reversible model (nucleotide alignments only)\n"
371 " -lg -- Le-Gascuel 2008 model (amino acid alignments only)\n"
372 " -wag -- Whelan-And-Goldman 2001 model (amino acid alignments only)\n"
373 " -quote -- allow spaces and other restricted characters (but not ' ) in\n"
374 " sequence names and quote names in the output tree (fasta input only;\n"
375 " FastTree will not be able to read these trees back in)\n"
376 " -noml to turn off maximum-likelihood\n"
377 " -nome to turn off minimum-evolution NNIs and SPRs\n"
378 " (recommended if running additional ML NNIs with -intree)\n"
379 " -nome -mllen with -intree to optimize branch lengths for a fixed topology\n"
380 " -cat # to specify the number of rate categories of sites (default 20)\n"
381 " or -nocat to use constant rates\n"
382 " -gamma -- after optimizing the tree under the CAT approximation,\n"
383 " rescale the lengths to optimize the Gamma20 likelihood\n"
384 " -constraints constraintAlignment to constrain the topology search\n"
385 " constraintAlignment should have 1s or 0s to indicates splits\n"
386 " -expert -- see more options\n"
387 "For more information, see http://www.microbesonline.org/fasttree/\n";
388
389 char *expertUsage =
390 "FastTree [-nt] [-n 100] [-quote] [-pseudo | -pseudo 1.0]\n"
391 " [-boot 1000 | -nosupport]\n"
392 " [-intree starting_trees_file | -intree1 starting_tree_file]\n"
393 " [-quiet | -nopr]\n"
394 " [-nni 10] [-spr 2] [-noml | -mllen | -mlnni 10]\n"
395 " [-mlacc 2] [-cat 20 | -nocat] [-gamma]\n"
396 " [-slow | -fastest] [-2nd | -no2nd] [-slownni] [-seed 1253] \n"
397 " [-top | -notop] [-topm 1.0 [-close 0.75] [-refresh 0.8]]\n"
398 " [-matrix Matrix | -nomatrix] [-nj | -bionj]\n"
399 " [-lg] [-wag] [-nt] [-gtr] [-gtrrates ac ag at cg ct gt] [-gtrfreq A C G T]\n"
400 " [ -constraints constraintAlignment [ -constraintWeight 100.0 ] ]\n"
401 " [-log logfile]\n"
402 " [ alignment_file ]\n"
403 " [ -out output_newick_file | > newick_tree]\n"
404 "\n"
405 "or\n"
406 "\n"
407 "FastTree [-nt] [-matrix Matrix | -nomatrix] [-rawdist] -makematrix [alignment]\n"
408 " [-n 100] > phylip_distance_matrix\n"
409 "\n"
410 " FastTree supports fasta or phylip interleaved alignments\n"
411 " By default FastTree expects protein alignments, use -nt for nucleotides\n"
412 " FastTree reads standard input if no alignment file is given\n"
413 "\n"
414 "Input/output options:\n"
415 " -n -- read in multiple alignments in. This only\n"
416 " works with phylip interleaved format. For example, you can\n"
417 " use it with the output from phylip's seqboot. If you use -n, FastTree\n"
418 " will write 1 tree per line to standard output.\n"
419 " -intree newickfile -- read the starting tree in from newickfile.\n"
420 " Any branch lengths in the starting trees are ignored.\n"
421 " -intree with -n will read a separate starting tree for each alignment.\n"
422 " -intree1 newickfile -- read the same starting tree for each alignment\n"
423 " -quiet -- do not write to standard error during normal operation (no progress\n"
424 " indicator, no options summary, no likelihood values, etc.)\n"
425 " -nopr -- do not write the progress indicator to stderr\n"
426 " -log logfile -- save intermediate trees so you can extract\n"
427 " the trees and restart long-running jobs if they crash\n"
428 " -log also reports the per-site rates (1 means slowest category)\n"
429 " -quote -- quote sequence names in the output and allow spaces, commas,\n"
430 " parentheses, and colons in them but not ' characters (fasta files only)\n"
431 "\n"
432 "Distances:\n"
433 " Default: For protein sequences, log-corrected distances and an\n"
434 " amino acid dissimilarity matrix derived from BLOSUM45\n"
435 " or for nucleotide sequences, Jukes-Cantor distances\n"
436 " To specify a different matrix, use -matrix FilePrefix or -nomatrix\n"
437 " Use -rawdist to turn the log-correction off\n"
438 " or to use %different instead of Jukes-Cantor\n"
439 "\n"
440 " -pseudo [weight] -- Use pseudocounts to estimate distances between\n"
441 " sequences with little or no overlap. (Off by default.) Recommended\n"
442 " if analyzing the alignment has sequences with little or no overlap.\n"
443 " If the weight is not specified, it is 1.0\n"
444 "\n"
445 "Topology refinement:\n"
446 " By default, FastTree tries to improve the tree with up to 4*log2(N)\n"
447 " rounds of minimum-evolution nearest-neighbor interchanges (NNI),\n"
448 " where N is the number of unique sequences, 2 rounds of\n"
449 " subtree-prune-regraft (SPR) moves (also min. evo.), and\n"
450 " up to 2*log(N) rounds of maximum-likelihood NNIs.\n"
451 " Use -nni to set the number of rounds of min. evo. NNIs,\n"
452 " and -spr to set the rounds of SPRs.\n"
453 " Use -noml to turn off both min-evo NNIs and SPRs (useful if refining\n"
454 " an approximately maximum-likelihood tree with further NNIs)\n"
455 " Use -sprlength set the maximum length of a SPR move (default 10)\n"
456 " Use -mlnni to set the number of rounds of maximum-likelihood NNIs\n"
457 " Use -mlacc 2 or -mlacc 3 to always optimize all 5 branches at each NNI,\n"
458 " and to optimize all 5 branches in 2 or 3 rounds\n"
459 " Use -mllen to optimize branch lengths without ML NNIs\n"
460 " Use -mllen -nome with -intree to optimize branch lengths on a fixed topology\n"
461 " Use -slownni to turn off heuristics to avoid constant subtrees (affects both\n"
462 " ML and ME NNIs)\n"
463 "\n"
464 "Maximum likelihood model options:\n"
465 " -lg -- Le-Gascuel 2008 model instead of (default) Jones-Taylor-Thorton 1992 model (a.a. only)\n"
466 " -wag -- Whelan-And-Goldman 2001 model instead of (default) Jones-Taylor-Thorton 1992 model (a.a. only)\n"
467 " -gtr -- generalized time-reversible instead of (default) Jukes-Cantor (nt only)\n"
468 " -cat # -- specify the number of rate categories of sites (default 20)\n"
469 " -nocat -- no CAT model (just 1 category)\n"
470 " -gamma -- after the final round of optimizing branch lengths with the CAT model,\n"
471 " report the likelihood under the discrete gamma model with the same\n"
472 " number of categories. FastTree uses the same branch lengths but\n"
473 " optimizes the gamma shape parameter and the scale of the lengths.\n"
474 " The final tree will have rescaled lengths. Used with -log, this\n"
475 " also generates per-site likelihoods for use with CONSEL, see\n"
476 " GammaLogToPaup.pl and documentation on the FastTree web site.\n"
477 "\n"
478 "Support value options:\n"
479 " By default, FastTree computes local support values by resampling the site\n"
480 " likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,\n"
481 " it will compute minimum-evolution bootstrap supports instead\n"
482 " In either case, the support values are proportions ranging from 0 to 1\n"
483 "\n"
484 " Use -nosupport to turn off support values or -boot 100 to use just 100 resamples\n"
485 " Use -seed to initialize the random number generator\n"
486 "\n"
487 "Searching for the best join:\n"
488 " By default, FastTree combines the 'visible set' of fast neighbor-joining with\n"
489 " local hill-climbing as in relaxed neighbor-joining\n"
490 " -slow -- exhaustive search (like NJ or BIONJ, but different gap handling)\n"
491 " -slow takes half an hour instead of 8 seconds for 1,250 proteins\n"
492 " -fastest -- search the visible set (the top hit for each node) only\n"
493 " Unlike the original fast neighbor-joining, -fastest updates visible(C)\n"
494 " after joining A and B if join(AB,C) is better than join(C,visible(C))\n"
495 " -fastest also updates out-distances in a very lazy way,\n"
496 " -fastest sets -2nd on as well, use -fastest -no2nd to avoid this\n"
497 "\n"
498 "Top-hit heuristics:\n"
499 " By default, FastTree uses a top-hit list to speed up search\n"
500 " Use -notop (or -slow) to turn this feature off\n"
501 " and compare all leaves to each other,\n"
502 " and all new joined nodes to each other\n"
503 " -topm 1.0 -- set the top-hit list size to parameter*sqrt(N)\n"
504 " FastTree estimates the top m hits of a leaf from the\n"
505 " top 2*m hits of a 'close' neighbor, where close is\n"
506 " defined as d(seed,close) < 0.75 * d(seed, hit of rank 2*m),\n"
507 " and updates the top-hits as joins proceed\n"
508 " -close 0.75 -- modify the close heuristic, lower is more conservative\n"
509 " -refresh 0.8 -- compare a joined node to all other nodes if its\n"
510 " top-hit list is less than 80% of the desired length,\n"
511 " or if the age of the top-hit list is log2(m) or greater\n"
512 " -2nd or -no2nd to turn 2nd-level top hits heuristic on or off\n"
513 " This reduces memory usage and running time but may lead to\n"
514 " marginal reductions in tree quality.\n"
515 " (By default, -fastest turns on -2nd.)\n"
516 "\n"
517 "Join options:\n"
518 " -nj: regular (unweighted) neighbor-joining (default)\n"
519 " -bionj: weighted joins as in BIONJ\n"
520 " FastTree will also weight joins during NNIs\n"
521 "\n"
522 "Constrained topology search options:\n"
523 " -constraints alignmentfile -- an alignment with values of 0, 1, and -\n"
524 " Not all sequences need be present. A column of 0s and 1s defines a\n"
525 " constrained split. Some constraints may be violated\n"
526 " (see 'violating constraints:' in standard error).\n"
527 " -constraintWeight -- how strongly to weight the constraints. A value of 1\n"
528 " means a penalty of 1 in tree length for violating a constraint\n"
529 " Default: 100.0\n"
530 "\n"
531 "For more information, see http://www.microbesonline.org/fasttree/\n"
532 " or the comments in the source code\n";
533 ;
534
535
536 #define MAXCODES 20
537 #define NOCODE 127
538 /* Note -- sequence lines longer than BUFFER_SIZE are
539 allowed, but FASTA header lines must be within this limit */
540 #define BUFFER_SIZE 5000
541 #define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
542 #define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
543
544 typedef struct {
545 int nPos;
546 int nSeq;
547 char **names;
548 char **seqs;
549 int nSaved; /* actual allocated size of names and seqs */
550 } alignment_t;
551
552 /* For each position in a profile, we have a weight (% non-gapped) and a
553 frequency vector. (If using a matrix, the frequency vector is in eigenspace).
554 We also store codes for simple profile positions (all gaps or only 1 value)
555 If weight[pos] > 0 && codes[pos] == NOCODE then we store the vector
556 vectors itself is sets of nCodes long, so the vector for the ith nonconstant position
557 starts at &vectors[nCodes*i]
558
559 To speed up comparison of outprofile to a sequence or other simple profile, we also
560 (for outprofiles) store codeDist[iPos*nCodes+k] = dist(k,profile[iPos])
561
562 For constraints, we store a vector of nOn and nOff
563 If not using constraints, those will be NULL
564 */
565 typedef struct {
566 /* alignment profile */
567 numeric_t *weights;
568 unsigned char *codes;
569 numeric_t *vectors; /* NULL if no non-constant positions, e.g. for leaves */
570 int nVectors;
571 numeric_t *codeDist; /* Optional -- distance to each code at each position */
572
573 /* constraint profile */
574 int *nOn;
575 int *nOff;
576 } profile_t;
577
578 /* A visible node is a pair of nodes i, j such that j is the best hit of i,
579 using the neighbor-joining criterion, at the time the comparison was made,
580 or approximately so since then.
581
582 Note that variance = dist because in BIONJ, constant factors of variance do not matter,
583 and because we weight ungapped sequences higher naturally when averaging profiles,
584 so we do not take this into account in the computation of "lambda" for BIONJ.
585
586 For the top-hit list heuristic, if the top hit list becomes "too short",
587 we store invalid entries with i=j=-1 and dist/criterion very high.
588 */
589 typedef struct {
590 int i, j;
591 numeric_t weight; /* Total product of weights (maximum value is nPos)
592 This is needed for weighted joins and for pseudocounts,
593 but not in most other places.
594 For example, it is not maintained by the top hits code */
595 numeric_t dist; /* The uncorrected distance (includes diameter correction) */
596 numeric_t criterion; /* changes when we update the out-profile or change nActive */
597 } besthit_t;
598
599 typedef struct {
600 int nChild;
601 int child[3];
602 } children_t;
603
604 typedef struct {
605 /* Distances between amino acids */
606 numeric_t distances[MAXCODES][MAXCODES];
607
608 /* Inverse of the eigenvalue matrix, for rotating a frequency vector
609 into eigenspace so that profile similarity computations are
610 O(alphabet) not O(alphabet*alphabet) time.
611 */
612 numeric_t eigeninv[MAXCODES][MAXCODES];
613 numeric_t eigenval[MAXCODES]; /* eigenvalues */
614
615
616 /* eigentot=eigeninv times the all-1s frequency vector
617 useful for normalizing rotated frequency vectors
618 */
619 numeric_t eigentot[MAXCODES];
620
621 /* codeFreq is the transpose of the eigeninv matrix is
622 the rotated frequency vector for each code */
623 numeric_t codeFreq[MAXCODES][MAXCODES];
624 numeric_t gapFreq[MAXCODES];
625 } distance_matrix_t;
626
627
628 /* A transition matrix gives the instantaneous rate of change of frequencies
629 df/dt = M . f
630 which is solved by
631 f(t) = exp(M) . f(0)
632 and which is not a symmetric matrix because of
633 non-uniform stationary frequencies stat, so that
634 M stat = 0
635 M(i,j) is instantaneous rate of j -> i, not of i -> j
636
637 S = diag(sqrt(stat)) is a correction so that
638 M' = S**-1 M S is symmetric
639 Let W L W**-1 = M' be an eigendecomposition of M'
640 Because M' is symmetric, W can be a rotation, and W**-1 = t(W)
641 Set V = S*W
642 M = V L V**-1 is an eigendecomposition of M
643 Note V**-1 = W**-1 S**-1 = t(W) S**-1
644
645 Evolution by time t is given by
646
647 exp(M*t) = V exp(L*t) V**-1
648 P(A & B | t) = B . exp(M*t) . (A * stat)
649 note this is *not* the same as P(A->B | t)
650
651 and we can reduce some of the computations from O(a**2) to O(a) time,
652 where a is the alphabet size, by storing frequency vectors as
653 t(V) . f = t(W) . t(S) . f
654
655 Then
656 P(f0 & f1 | t) = f1 . exp(M*t) . f0 * (f0 . stat) = sum(r0j * r1j * exp(l_j*t))
657 where r0 and r1 are the transformed vectors
658
659 Posterior distribution of P given children f0 and f1 is given by
660 P(i | f0, f1, t0, t1) = stat * P(i->f0 | t0) * P(i->f1 | t1)
661 = P(i & f0 | t0) * P(i & f1 | t1) / stat
662 ~ (V . exp(t0*L) . r0) * (V . exp(t1*L) . r1) / stat
663
664 When normalize this posterior distribution (to sum to 1), divide by stat,
665 and transform by t(V) -- this is the "profile" of internal nodes
666
667 To eliminate the O(N**2) step of transforming by t(V), if the posterior
668 distribution of an amino acid is near 1 then we can approximate it by
669 P(i) ~= (i==A) * w + nearP(i) * (1-w), where
670 w is fit so that P(i==A) is correct
671 nearP = Posterior(i | i, i, 0.1, 0.1) [0.1 is an arbitrary choice]
672 and we confirm that the approximation works well before we use it.
673
674 Given this parameter w we can set
675 rotated_posterior = rotation(w * (i==A)/stat + (1-w) * nearP/stat)
676 = codeFreq(A) * w/stat(A) + nearFreq(A) * (1-w)
677 */
678 typedef struct {
679 numeric_t stat[MAXCODES]; /* The stationary distribution */
680 numeric_t statinv[MAXCODES]; /* 1/stat */
681 /* the eigenmatrix, with the eigenvectors as columns and rotations of individual
682 characters as rows. Also includes a NOCODE entry for gaps */
683 numeric_t codeFreq[NOCODE+1][MAXCODES];
684 numeric_t eigeninv[MAXCODES][MAXCODES]; /* Inverse of eigenmatrix */
685 numeric_t eigeninvT[MAXCODES][MAXCODES]; /* transpose of eigeninv */
686 numeric_t eigenval[MAXCODES]; /* Eigenvalues */
687 /* These are for approximate posteriors (off by default) */
688 numeric_t nearP[MAXCODES][MAXCODES]; /* nearP[i][j] = P(parent=j | both children are i, both lengths are 0.1 */
689 numeric_t nearFreq[MAXCODES][MAXCODES]; /* rotation of nearP/stat */
690 } transition_matrix_t;
691
692 typedef struct {
693 int nRateCategories;
694 numeric_t *rates; /* 1 per rate category */
695 unsigned int *ratecat; /* 1 category per position */
696 } rates_t;
697
698 typedef struct {
699 /* The input */
700 int nSeq;
701 int nPos;
702 char **seqs; /* the aligment sequences array (not reallocated) */
703 distance_matrix_t *distance_matrix; /* a pointer (not reallocated), or NULL if using %identity distance */
704 transition_matrix_t *transmat; /* a pointer (is allocated), or NULL for Jukes-Cantor */
705 /* Topological constraints are represented for each sequence as binary characters
706 with values of '0', '1', or '-' (for missing data)
707 Sequences that have no constraint may have a NULL string
708 */
709 int nConstraints;
710 char **constraintSeqs;
711
712 /* The profile data structures */
713 int maxnode; /* The next index to allocate */
714 int maxnodes; /* Space allocated in data structures below */
715 profile_t **profiles; /* Profiles of leaves and intermediate nodes */
716 numeric_t *diameter; /* To correct for distance "up" from children (if any) */
717 numeric_t *varDiameter; /* To correct variances for distance "up" */
718 numeric_t *selfdist; /* Saved for use in some formulas */
719 numeric_t *selfweight; /* Saved for use in some formulas */
720
721 /* Average profile of all active nodes, the "outprofile"
722 * If all inputs are ungapped, this has weight 1 (not nSequences) at each position
723 * The frequencies all sum to one (or that is implied by the eigen-representation)
724 */
725 profile_t *outprofile;
726 double totdiam;
727
728 /* We sometimes use stale out-distances, so we remember what nActive was */
729 numeric_t *outDistances; /* Sum of distances to other active (parent==-1) nodes */
730 int *nOutDistActive; /* What nActive was when this outDistance was computed */
731
732 /* the inferred tree */
733 int root; /* index of the root. Unlike other internal nodes, it has 3 children */
734 int *parent; /* -1 or index of parent */
735 children_t *child;
736 numeric_t *branchlength; /* Distance to parent */
737 numeric_t *support; /* 1 for high-confidence nodes */
738
739 /* auxilliary data for maximum likelihood (defaults to 1 category of rate=1.0) */
740 rates_t rates;
741 } NJ_t;
742
743 /* Uniquify sequences in an alignment -- map from indices
744 in the alignment to unique indicies in a NJ_t
745 */
746 typedef struct {
747 int nSeq;
748 int nUnique;
749 int *uniqueFirst; /* iUnique -> iAln */
750 int *alnNext; /* iAln -> next, or -1 */
751 int *alnToUniq; /* iAln -> iUnique, or -1 if another was the exemplar */
752 char **uniqueSeq; /* indexed by iUniq -- points to strings allocated elsewhere */
753 } uniquify_t;
754
755 /* Describes which switch to do */
756 typedef enum {ABvsCD,ACvsBD,ADvsBC} nni_t;
757
758 /* A list of these describes a chain of NNI moves in a rooted tree,
759 making up, in total, an SPR move
760 */
761 typedef struct {
762 int nodes[2];
763 double deltaLength; /* change in tree length for this step (lower is better) */
764 } spr_step_t;
765
766 /* Keep track of hits for the top-hits heuristic without wasting memory
767 j = -1 means empty
768 If j is an inactive node, this may be replaced by that node's parent (and dist recomputed)
769 */
770 typedef struct {
771 int j;
772 numeric_t dist;
773 } hit_t;
774
775 typedef struct {
776 int nHits; /* the allocated and desired size; some of them may be empty */
777 hit_t *hits;
778 int hitSource; /* where to refresh hits from if a 2nd-level top-hit list, or -1 */
779 int age; /* number of joins since a refresh */
780 } top_hits_list_t;
781
782 typedef struct {
783 int m; /* size of a full top hits list, usually sqrt(N) */
784 int q; /* size of a 2nd-level top hits, usually sqrt(m) */
785 int maxnodes;
786 top_hits_list_t *top_hits_lists; /* one per node */
787 hit_t *visible; /* the "visible" (very best) hit for each node */
788
789 /* The top-visible set is a subset, usually of size m, of the visible set --
790 it is the set of joins to select from
791 Each entry is either a node whose visible set entry has a good (low) criterion,
792 or -1 for empty, or is an obsolete node (which is effectively the same).
793 Whenever we update the visible set, should also call UpdateTopVisible()
794 which ensures that none of the topvisible set are stale (that is, they
795 all point to an active node).
796 */
797 int nTopVisible; /* nTopVisible = m * topvisibleMult */
798 int *topvisible;
799
800 int topvisibleAge; /* joins since the top-visible list was recomputed */
801
802 #ifdef OPENMP
803 /* 1 lock to read or write any top hits list, no thread grabs more than one */
804 omp_lock_t *locks;
805 #endif
806 } top_hits_t;
807
808 /* Global variables */
809 /* Options */
810 int verbose = 1;
811 int showProgress = 1;
812 int slow = 0;
813 int fastest = 0;
814 bool useTopHits2nd = false; /* use the second-level top hits heuristic? */
815 int bionj = 0;
816 double tophitsMult = 1.0; /* 0 means compare nodes to all other nodes */
817 double tophitsClose = -1.0; /* Parameter for how close is close; also used as a coverage req. */
818 double topvisibleMult = 1.5; /* nTopVisible = m * topvisibleMult; 1 or 2 did not make much difference
819 in either running time or accuracy so I chose a compromise. */
820
821 double tophitsRefresh = 0.8; /* Refresh if fraction of top-hit-length drops to this */
822 double tophits2Mult = 1.0; /* Second-level top heuristic -- only with -fastest */
823 int tophits2Safety = 3; /* Safety factor for second level of top-hits heuristic */
824 double tophits2Refresh = 0.6; /* Refresh 2nd-level top hits if drops down to this fraction of length */
825
826 double staleOutLimit = 0.01; /* nActive changes by at most this amount before we recompute
827 an out-distance. (Only applies if using the top-hits heuristic) */
828 double fResetOutProfile = 0.02; /* Recompute out profile from scratch if nActive has changed
829 by more than this proportion, and */
830 int nResetOutProfile = 200; /* nActive has also changed more than this amount */
831 int nCodes=20; /* 20 if protein, 4 if nucleotide */
832 bool useMatrix=true; /* If false, use %different as the uncorrected distance */
833 bool logdist = true; /* If true, do a log-correction (scoredist-like or Jukes-Cantor)
834 but only during NNIs and support values, not during neighbor-joining */
835 double pseudoWeight = 0.0; /* The weight of pseudocounts to avoid artificial long branches when
836 nearby sequences in the tree have little or no overlap
837 (off by default). The prior distance is based on
838 all overlapping positions among the quartet or triplet under
839 consideration. The log correction takes place after the
840 pseudocount is used. */
841 double constraintWeight = 100.0;/* Cost of violation of a topological constraint in evolutionary distance
842 or likelihood */
843 double MEMinDelta = 1.0e-4; /* Changes of less than this in tree-length are discounted for
844 purposes of identifying fixed subtrees */
845 bool fastNNI = true;
846 bool gammaLogLk = false; /* compute gamma likelihood without reoptimizing branch lengths? */
847
848 /* Maximum likelihood options and constants */
849 /* These are used to rescale likelihood values and avoid taking a logarithm at each position */
850 const double LkUnderflow = 1.0e-4;
851 const double LkUnderflowInv = 1.0e4;
852 const double LogLkUnderflow = 9.21034037197618; /* -log(LkUnderflowInv) */
853 const double Log2 = 0.693147180559945;
854 /* These are used to limit the optimization of branch lengths.
855 Also very short branch lengths can create numerical problems.
856 In version 2.1.7, the minimum branch lengths (MLMinBranchLength and MLMinRelBranchLength)
857 were increased to prevent numerical problems in rare cases.
858 In version 2.1.8, to provide useful branch lengths for genome-wide alignments,
859 the minimum branch lengths were dramatically decreased if USE_DOUBLE is defined.
860 */
861 #ifndef USE_DOUBLE
862 const double MLMinBranchLengthTolerance = 1.0e-4; /* absolute tolerance for optimizing branch lengths */
863 const double MLFTolBranchLength = 0.001; /* fractional tolerance for optimizing branch lengths */
864 const double MLMinBranchLength = 5.0e-4; /* minimum value for branch length */
865 const double MLMinRelBranchLength = 2.5e-4; /* minimum of rate * length */
866 const double fPostTotalTolerance = 1.0e-10; /* posterior vector must sum to at least this before rescaling */
867 #else
868 const double MLMinBranchLengthTolerance = 1.0e-9;
869 const double MLFTolBranchLength = 0.001;
870 const double MLMinBranchLength = 5.0e-9;
871 const double MLMinRelBranchLength = 2.5e-9;
872 const double fPostTotalTolerance = 1.0e-20;
873 #endif
874
875 int mlAccuracy = 1; /* Rounds of optimization of branch lengths; 1 means do 2nd round only if close */
876 double closeLogLkLimit = 5.0; /* If partial optimization of an NNI looks like it would decrease the log likelihood
877 by this much or more then do not optimize it further */
878 double treeLogLkDelta = 0.1; /* Give up if tree log-lk changes by less than this; NNIs that change
879 likelihood by less than this also are considered unimportant
880 by some heuristics */
881 bool exactML = true; /* Exact or approximate posterior distributions for a.a.s */
882 double approxMLminf = 0.95; /* Only try to approximate posterior distributions if max. value is at least this high */
883 double approxMLminratio = 2/3.0;/* Ratio of approximated/true posterior values must be at least this high */
884 double approxMLnearT = 0.2; /* 2nd component of near-constant posterior distribution uses this time scale */
885 const int nDefaultRateCats = 20;
886
887 /* Performance and memory usage */
888 long profileOps = 0; /* Full profile-based distance operations */
889 long outprofileOps = 0; /* How many of profileOps are comparisons to outprofile */
890 long seqOps = 0; /* Faster leaf-based distance operations */
891 long profileAvgOps = 0; /* Number of profile-average steps */
892 long nHillBetter = 0; /* Number of hill-climbing steps */
893 long nCloseUsed = 0; /* Number of "close" neighbors we avoid full search for */
894 long nClose2Used = 0; /* Number of "close" neighbors we use 2nd-level top hits for */
895 long nRefreshTopHits = 0; /* Number of full-blown searches (interior nodes) */
896 long nVisibleUpdate = 0; /* Number of updates of the visible set */
897 long nNNI = 0; /* Number of NNI changes performed */
898 long nSPR = 0; /* Number of SPR changes performed */
899 long nML_NNI = 0; /* Number of max-lik. NNI changes performed */
900 long nSuboptimalSplits = 0; /* # of splits that are rejected given final tree (during bootstrap) */
901 long nSuboptimalConstrained = 0; /* Bad splits that are due to constraints */
902 long nConstraintViolations = 0; /* Number of constraint violations */
903 long nProfileFreqAlloc = 0;
904 long nProfileFreqAvoid = 0;
905 long szAllAlloc = 0;
906 long mymallocUsed = 0; /* useful allocations by mymalloc */
907 long maxmallocHeap = 0; /* Maximum of mi.arena+mi.hblkhd from mallinfo (actual mem usage) */
908 long nLkCompute = 0; /* # of likelihood computations for pairs of probability vectors */
909 long nPosteriorCompute = 0; /* # of computations of posterior probabilities */
910 long nAAPosteriorExact = 0; /* # of times compute exact AA posterior */
911 long nAAPosteriorRough = 0; /* # of times use rough approximation */
912 long nStarTests = 0; /* # of times we use star test to avoid testing an NNI */
913
914 /* Protein character set */
915 unsigned char *codesStringAA = (unsigned char*) "ARNDCQEGHILKMFPSTWYV";
916 unsigned char *codesStringNT = (unsigned char*) "ACGT";
917 unsigned char *codesString = NULL;
918
919 distance_matrix_t *ReadDistanceMatrix(char *prefix);
920 void SetupDistanceMatrix(/*IN/OUT*/distance_matrix_t *); /* set eigentot, codeFreq, gapFreq */
921 void ReadMatrix(char *filename, /*OUT*/numeric_t codes[MAXCODES][MAXCODES], bool check_codes);
922 void ReadVector(char *filename, /*OUT*/numeric_t codes[MAXCODES]);
923 alignment_t *ReadAlignment(/*READ*/FILE *fp, bool bQuote); /* Returns a list of strings (exits on failure) */
924 alignment_t *FreeAlignment(alignment_t *); /* returns NULL */
925 void FreeAlignmentSeqs(/*IN/OUT*/alignment_t *);
926
927 /* Takes as input the transpose of the matrix V, with i -> j
928 This routine takes care of setting the diagonals
929 */
930 transition_matrix_t *CreateTransitionMatrix(/*IN*/double matrix[MAXCODES][MAXCODES],
931 /*IN*/double stat[MAXCODES]);
932 transition_matrix_t *CreateGTR(double *gtrrates/*ac,ag,at,cg,ct,gt*/, double *gtrfreq/*ACGT*/);
933
934 /* For converting profiles from 1 rotation to another, or converts NULL to NULL */
935 distance_matrix_t *TransMatToDistanceMat(transition_matrix_t *transmat);
936
937 /* Allocates memory, initializes leaf profiles */
938 NJ_t *InitNJ(char **sequences, int nSeqs, int nPos,
939 /*IN OPTIONAL*/char **constraintSeqs, int nConstraints,
940 /*IN OPTIONAL*/distance_matrix_t *,
941 /*IN OPTIONAL*/transition_matrix_t *);
942
943 NJ_t *FreeNJ(NJ_t *NJ); /* returns NULL */
944 void FastNJ(/*IN/OUT*/NJ_t *NJ); /* Does the joins */
945 void ReliabilityNJ(/*IN/OUT*/NJ_t *NJ, int nBootstrap); /* Estimates the reliability of the joins */
946
947 /* nni_stats_t is meaningless for leaves and root, so all of those entries
948 will just be high (for age) or 0 (for delta)
949 */
950 typedef struct {
951 int age; /* number of rounds since this node was modified by an NNI */
952 int subtreeAge; /* number of rounds since self or descendent had a significant improvement */
953 double delta; /* improvement in score for this node (or 0 if no change) */
954 double support; /* improvement of score for self over better of alternatives */
955 } nni_stats_t;
956
957 /* One round of nearest-neighbor interchanges according to the
958 minimum-evolution or approximate maximum-likelihood criterion.
959 If doing maximum likelihood then this modifies the branch lengths.
960 age is the # of rounds since a node was NNId
961 Returns the # of topological changes performed
962 */
963 int NNI(/*IN/OUT*/NJ_t *NJ, int iRound, int nRounds, bool useML,
964 /*IN/OUT*/nni_stats_t *stats,
965 /*OUT*/double *maxDeltaCriterion);
966 nni_stats_t *InitNNIStats(NJ_t *NJ);
967 nni_stats_t *FreeNNIStats(nni_stats_t *, NJ_t *NJ); /* returns NULL */
968
969 /* One round of subtree-prune-regraft moves (minimum evolution) */
970 void SPR(/*IN/OUT*/NJ_t *NJ, int maxSPRLength, int iRound, int nRounds);
971
972 /* Recomputes all branch lengths by minimum evolution criterion*/
973 void UpdateBranchLengths(/*IN/OUT*/NJ_t *NJ);
974
975 /* Recomputes all branch lengths and, optionally, internal profiles */
976 double TreeLength(/*IN/OUT*/NJ_t *NJ, bool recomputeProfiles);
977
978 typedef struct {
979 int nBadSplits;
980 int nConstraintViolations;
981 int nBadBoth;
982 int nSplits;
983 /* How much length would be reduce or likelihood would be increased by the
984 best NNI we find (the worst "miss") */
985 double dWorstDeltaUnconstrained;
986 double dWorstDeltaConstrained;
987 } SplitCount_t;
988
989 void TestSplitsMinEvo(NJ_t *NJ, /*OUT*/SplitCount_t *splitcount);
990
991 /* Sets SH-like support values if nBootstrap>0 */
992 void TestSplitsML(/*IN/OUT*/NJ_t *NJ, /*OUT*/SplitCount_t *splitcount, int nBootstrap);
993
994 /* Pick columns for resampling, stored as returned_vector[iBoot*nPos + j] */
995 int *ResampleColumns(int nPos, int nBootstrap);
996
997 /* Use out-profile and NJ->totdiam to recompute out-distance for node iNode
998 Only does this computation if the out-distance is "stale" (nOutDistActive[iNode] != nActive)
999 Note "IN/UPDATE" for NJ always means that we may update out-distances but otherwise
1000 make no changes.
1001 */
1002 void SetOutDistance(/*IN/UPDATE*/NJ_t *NJ, int iNode, int nActive);
1003
1004 /* Always sets join->criterion; may update NJ->outDistance and NJ->nOutDistActive,
1005 assumes join's weight and distance are already set,
1006 and that the constraint penalty (if any) is included in the distance
1007 */
1008 void SetCriterion(/*IN/UPDATE*/NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *join);
1009
1010 /* Computes weight and distance (which includes the constraint penalty)
1011 and then sets the criterion (maybe update out-distances)
1012 */
1013 void SetDistCriterion(/*IN/UPDATE*/NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *join);
1014
1015 /* If join->i or join->j are inactive nodes, replaces them with their active ancestors.
1016 After doing this, if i == j, or either is -1, sets weight to 0 and dist and criterion to 1e20
1017 and returns false (not a valid join)
1018 Otherwise, if i or j changed, recomputes the distance and criterion.
1019 Note that if i and j are unchanged then the criterion could be stale
1020 If bUpdateDist is false, and i or j change, then it just sets dist to a negative number
1021 */
1022 bool UpdateBestHit(/*IN/UPDATE*/NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *join,
1023 bool bUpdateDist);
1024
1025 /* This recomputes the criterion, or returns false if the visible node
1026 is no longer active.
1027 */
1028 bool GetVisible(/*IN/UPDATE*/NJ_t *NJ, int nActive, /*IN/OUT*/top_hits_t *tophits,
1029 int iNode, /*OUT*/besthit_t *visible);
1030
1031 int ActiveAncestor(/*IN*/NJ_t *NJ, int node);
1032
1033 /* Compute the constraint penalty for a join. This is added to the "distance"
1034 by SetCriterion */
1035 int JoinConstraintPenalty(/*IN*/NJ_t *NJ, int node1, int node2);
1036 int JoinConstraintPenaltyPiece(NJ_t *NJ, int node1, int node2, int iConstraint);
1037
1038 /* Helper function for computing the number of constraints violated by
1039 a split, represented as counts of on and off on each side */
1040 int SplitConstraintPenalty(int nOn1, int nOff1, int nOn2, int nOff2);
1041
1042 /* Reports the (min. evo.) support for the (1,2) vs. (3,4) split
1043 col[iBoot*nPos+j] is column j for bootstrap iBoot
1044 */
1045 double SplitSupport(profile_t *p1, profile_t *p2, profile_t *p3, profile_t *p4,
1046 /*OPTIONAL*/distance_matrix_t *dmat,
1047 int nPos,
1048 int nBootstrap,
1049 int *col);
1050
1051 /* Returns SH-like support given resampling spec. (in col) and site likelihods
1052 for the three quartets
1053 */
1054 double SHSupport(int nPos, int nBoostrap, int *col, double loglk[3], double *site_likelihoods[3]);
1055
1056 profile_t *SeqToProfile(/*IN/OUT*/NJ_t *NJ,
1057 char *seq, int nPos,
1058 /*OPTIONAL*/char *constraintSeqs, int nConstraints,
1059 int iNode,
1060 unsigned long counts[256]);
1061
1062 /* ProfileDist and SeqDist only set the dist and weight fields
1063 If using an outprofile, use the second argument of ProfileDist
1064 for better performance.
1065
1066 These produce uncorrected distances.
1067 */
1068 void ProfileDist(profile_t *profile1, profile_t *profile2, int nPos,
1069 /*OPTIONAL*/distance_matrix_t *distance_matrix,
1070 /*OUT*/besthit_t *hit);
1071 void SeqDist(unsigned char *codes1, unsigned char *codes2, int nPos,
1072 /*OPTIONAL*/distance_matrix_t *distance_matrix,
1073 /*OUT*/besthit_t *hit);
1074
1075 /* Computes all pairs of profile distances, applies pseudocounts
1076 if pseudoWeight > 0, and applies log-correction if logdist is true.
1077 The lower index is compared to the higher index, e.g. for profiles
1078 A,B,C,D the comparison will be as in quartet_pair_t
1079 */
1080 typedef enum {qAB,qAC,qAD,qBC,qBD,qCD} quartet_pair_t;
1081 void CorrectedPairDistances(profile_t **profiles, int nProfiles,
1082 /*OPTIONAL*/distance_matrix_t *distance_matrix,
1083 int nPos,
1084 /*OUT*/double *distances);
1085
1086 /* output is indexed by nni_t
1087 To ensure good behavior while evaluating a subtree-prune-regraft move as a series
1088 of nearest-neighbor interchanges, this uses a distance-ish model of constraints,
1089 as given by PairConstraintDistance(), rather than
1090 counting the number of violated splits (which is what FastTree does
1091 during neighbor-joining).
1092 Thus, penalty values may well be >0 even if no constraints are violated, but the
1093 relative scores for the three NNIs will be correct.
1094 */
1095 void QuartetConstraintPenalties(profile_t *profiles[4], int nConstraints, /*OUT*/double d[3]);
1096
1097 double PairConstraintDistance(int nOn1, int nOff1, int nOn2, int nOff2);
1098
1099 /* the split is consistent with the constraint if any of the profiles have no data
1100 or if three of the profiles have the same uniform value (all on or all off)
1101 or if AB|CD = 00|11 or 11|00 (all uniform)
1102 */
1103 bool SplitViolatesConstraint(profile_t *profiles[4], int iConstraint);
1104
1105 /* If false, no values were set because this constraint was not relevant.
1106 output is for the 3 splits
1107 */
1108 bool QuartetConstraintPenaltiesPiece(profile_t *profiles[4], int iConstraint, /*OUT*/double penalty[3]);
1109
1110 /* Apply Jukes-Cantor or scoredist-like log(1-d) transform
1111 to correct the distance for multiple substitutions.
1112 */
1113 double LogCorrect(double distance);
1114
1115 /* AverageProfile is used to do a weighted combination of nodes
1116 when doing a join. If weight is negative, then the value is ignored and the profiles
1117 are averaged. The weight is *not* adjusted for the gap content of the nodes.
1118 Also, the weight does not affect the representation of the constraints
1119 */
1120 profile_t *AverageProfile(profile_t *profile1, profile_t *profile2,
1121 int nPos, int nConstraints,
1122 distance_matrix_t *distance_matrix,
1123 double weight1);
1124
1125 /* PosteriorProfile() is like AverageProfile() but it computes posterior probabilities
1126 rather than an average
1127 */
1128 profile_t *PosteriorProfile(profile_t *profile1, profile_t *profile2,
1129 double len1, double len2,
1130 /*OPTIONAL*/transition_matrix_t *transmat,
1131 rates_t *rates,
1132 int nPos, int nConstraints);
1133
1134 /* Set a node's profile from its children.
1135 Deletes the previous profile if it exists
1136 Use -1.0 for a balanced join
1137 Fails unless the node has two children (e.g., no leaves or root)
1138 */
1139 void SetProfile(/*IN/OUT*/NJ_t *NJ, int node, double weight1);
1140
1141 /* OutProfile does an unweighted combination of nodes to create the
1142 out-profile. It always sets code to NOCODE so that UpdateOutProfile
1143 can work.
1144 */
1145 profile_t *OutProfile(profile_t **profiles, int nProfiles,
1146 int nPos, int nConstraints,
1147 distance_matrix_t *distance_matrix);
1148
1149 void UpdateOutProfile(/*UPDATE*/profile_t *out, profile_t *old1, profile_t *old2,
1150 profile_t *new, int nActiveOld,
1151 int nPos, int nConstraints,
1152 distance_matrix_t *distance_matrix);
1153
1154 profile_t *NewProfile(int nPos, int nConstraints); /* returned has no vectors */
1155 profile_t *FreeProfile(profile_t *profile, int nPos, int nConstraints); /* returns NULL */
1156
1157 void AllocRateCategories(/*IN/OUT*/rates_t *rates, int nRateCategories, int nPos);
1158
1159 /* f1 can be NULL if code1 != NOCODE, and similarly for f2
1160 Or, if (say) weight1 was 0, then can have code1==NOCODE *and* f1==NULL
1161 In that case, returns an arbitrary large number.
1162 */
1163 double ProfileDistPiece(unsigned int code1, unsigned int code2,
1164 numeric_t *f1, numeric_t *f2,
1165 /*OPTIONAL*/distance_matrix_t *dmat,
1166 /*OPTIONAL*/numeric_t *codeDist2);
1167
1168 /* Adds (or subtracts, if weight is negative) fIn/codeIn from fOut
1169 fOut is assumed to exist (as from an outprofile)
1170 do not call unless weight of input profile > 0
1171 */
1172 void AddToFreq(/*IN/OUT*/numeric_t *fOut, double weight,
1173 unsigned int codeIn, /*OPTIONAL*/numeric_t *fIn,
1174 /*OPTIONAL*/distance_matrix_t *dmat);
1175
1176 /* Divide the vector (of length nCodes) by a constant
1177 so that the total (unrotated) frequency is 1.0 */
1178 void NormalizeFreq(/*IN/OUT*/numeric_t *freq, distance_matrix_t *distance_matrix);
1179
1180 /* Allocate, if necessary, and recompute the codeDist*/
1181 void SetCodeDist(/*IN/OUT*/profile_t *profile, int nPos, distance_matrix_t *dmat);
1182
1183 /* The allhits list contains the distances of the node to all other active nodes
1184 This is useful for the "reset" improvement to the visible set
1185 Note that the following routines do not handle the tophits heuristic
1186 and assume that out-distances are up to date.
1187 */
1188 void SetBestHit(int node, NJ_t *NJ, int nActive,
1189 /*OUT*/besthit_t *bestjoin,
1190 /*OUT OPTIONAL*/besthit_t *allhits);
1191 void ExhaustiveNJSearch(NJ_t *NJ, int nActive, /*OUT*/besthit_t *bestjoin);
1192
1193 /* Searches the visible set */
1194 void FastNJSearch(NJ_t *NJ, int nActive, /*UPDATE*/besthit_t *visible, /*OUT*/besthit_t *bestjoin);
1195
1196 /* Subroutines for handling the tophits heuristic */
1197
1198 top_hits_t *InitTopHits(NJ_t *NJ, int m);
1199 top_hits_t *FreeTopHits(top_hits_t *tophits); /* returns NULL */
1200
1201 /* Before we do any joins -- sets tophits and visible
1202 NJ may be modified by setting out-distances
1203 */
1204 void SetAllLeafTopHits(/*IN/UPDATE*/NJ_t *NJ, /*IN/OUT*/top_hits_t *tophits);
1205
1206 /* Find the best join to do. */
1207 void TopHitNJSearch(/*IN/UPDATE*/NJ_t *NJ,
1208 int nActive,
1209 /*IN/OUT*/top_hits_t *tophits,
1210 /*OUT*/besthit_t *bestjoin);
1211
1212 /* Returns the best hit within top hits
1213 NJ may be modified because it updates out-distances if they are too stale
1214 Does *not* update visible set
1215 */
1216 void GetBestFromTopHits(int iNode, /*IN/UPDATE*/NJ_t *NJ, int nActive,
1217 /*IN*/top_hits_t *tophits,
1218 /*OUT*/besthit_t *bestjoin);
1219
1220 /* visible set is modifiable so that we can reset it more globally when we do
1221 a "refresh", but we also set the visible set for newnode and do any
1222 "reset" updates too. And, we update many outdistances.
1223 */
1224 void TopHitJoin(int newnode,
1225 /*IN/UPDATE*/NJ_t *NJ, int nActive,
1226 /*IN/OUT*/top_hits_t *tophits);
1227
1228 /* Sort the input besthits by criterion
1229 and save the best nOut hits as a new array in top_hits_lists
1230 Does not update criterion or out-distances
1231 Ignores (silently removes) hit to self
1232 Saved list may be shorter than requested if there are insufficient entries
1233 */
1234 void SortSaveBestHits(int iNode, /*IN/SORT*/besthit_t *besthits,
1235 int nIn, int nOut,
1236 /*IN/OUT*/top_hits_t *tophits);
1237
1238 /* Given candidate hits from one node, "transfer" them to another node:
1239 Stores them in a new place in the same order
1240 searches up to active nodes if hits involve non-active nodes
1241 If update flag is set, it also recomputes distance and criterion
1242 (and ensures that out-distances are updated); otherwise
1243 it sets dist to -1e20 and criterion to 1e20
1244
1245 */
1246 void TransferBestHits(/*IN/UPDATE*/NJ_t *NJ, int nActive,
1247 int iNode,
1248 /*IN*/besthit_t *oldhits,
1249 int nOldHits,
1250 /*OUT*/besthit_t *newhits,
1251 bool updateDistance);
1252
1253 /* Create best hit objects from 1 or more hits. Do not update out-distances or set criteria */
1254 void HitsToBestHits(/*IN*/hit_t *hits, int nHits, int iNode, /*OUT*/besthit_t *newhits);
1255 besthit_t HitToBestHit(int i, hit_t hit);
1256
1257 /* Given a set of besthit entries,
1258 look for improvements to the visible set of the j entries.
1259 Updates out-distances as it goes.
1260 Also replaces stale nodes with this node, because a join is usually
1261 how this happens (i.e. it does not need to walk up to ancestors).
1262 Note this calls UpdateTopVisible() on any change
1263 */
1264 void UpdateVisible(/*IN/UPDATE*/NJ_t *NJ, int nActive,
1265 /*IN*/besthit_t *tophitsNode,
1266 int nTopHits,
1267 /*IN/OUT*/top_hits_t *tophits);
1268
1269 /* Update the top-visible list to perhaps include this hit (O(sqrt(N)) time) */
1270 void UpdateTopVisible(/*IN*/NJ_t * NJ, int nActive,
1271 int iNode, /*IN*/hit_t *hit,
1272 /*IN/OUT*/top_hits_t *tophits);
1273
1274 /* Recompute the top-visible subset of the visible set */
1275 void ResetTopVisible(/*IN/UPDATE*/NJ_t *NJ,
1276 int nActive,
1277 /*IN/OUT*/top_hits_t *tophits);
1278
1279 /* Make a shorter list with only unique entries.
1280 Replaces any "dead" hits to nodes that have parents with their active ancestors
1281 and ignores any that become dead.
1282 Updates all criteria.
1283 Combined gets sorted by i & j
1284 The returned list is allocated to nCombined even though only *nUniqueOut entries are filled
1285 */
1286 besthit_t *UniqueBestHits(/*IN/UPDATE*/NJ_t *NJ, int nActive,
1287 /*IN/SORT*/besthit_t *combined, int nCombined,
1288 /*OUT*/int *nUniqueOut);
1289
1290 nni_t ChooseNNI(profile_t *profiles[4],
1291 /*OPTIONAL*/distance_matrix_t *dmat,
1292 int nPos, int nConstraints,
1293 /*OUT*/double criteria[3]); /* The three internal branch lengths or log likelihoods*/
1294
1295 /* length[] is ordered as described by quartet_length_t, but after we do the swap
1296 of B with C (to give AC|BD) or B with D (to get AD|BC), if that is the returned choice
1297 bFast means do not consider NNIs if AB|CD is noticeably better than the star topology
1298 (as implemented by MLQuartetOptimize).
1299 If there are constraints, then the constraint penalty is included in criteria[]
1300 */
1301 nni_t MLQuartetNNI(profile_t *profiles[4],
1302 /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
1303 int nPos, int nConstraints,
1304 /*OUT*/double criteria[3], /* The three potential quartet log-likelihoods */
1305 /*IN/OUT*/numeric_t length[5],
1306 bool bFast);
1307
1308 void OptimizeAllBranchLengths(/*IN/OUT*/NJ_t *NJ);
1309 double TreeLogLk(/*IN*/NJ_t *NJ, /*OPTIONAL OUT*/double *site_loglk);
1310 double MLQuartetLogLk(profile_t *pA, profile_t *pB, profile_t *pC, profile_t *pD,
1311 int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
1312 /*IN*/double branch_lengths[5],
1313 /*OPTIONAL OUT*/double *site_likelihoods);
1314
1315 /* Given a topology and branch lengths, estimate rates & recompute profiles */
1316 void SetMLRates(/*IN/OUT*/NJ_t *NJ, int nRateCategories);
1317
1318 /* Returns a set of nRateCategories potential rates; the caller must free it */
1319 numeric_t *MLSiteRates(int nRateCategories);
1320
1321 /* returns site_loglk so that
1322 site_loglk[nPos*iRate + j] is the log likelihood of site j with rate iRate
1323 The caller must free it.
1324 */
1325 double *MLSiteLikelihoodsByRate(/*IN*/NJ_t *NJ, /*IN*/numeric_t *rates, int nRateCategories);
1326
1327 typedef struct {
1328 double mult; /* multiplier for the rates / divisor for the tree-length */
1329 double alpha;
1330 int nPos;
1331 int nRateCats;
1332 numeric_t *rates;
1333 double *site_loglk;
1334 } siteratelk_t;
1335
1336 double GammaLogLk(/*IN*/siteratelk_t *s, /*OPTIONAL OUT*/double *gamma_loglk_sites);
1337
1338 /* Input site_loglk must be for each rate. Note that FastTree does not reoptimize
1339 the branch lengths under the Gamma model -- it optimizes the overall scale.
1340 Reports the gamma log likelihhod (and logs site likelihoods if fpLog is set),
1341 and reports the rescaling value.
1342 */
1343 double RescaleGammaLogLk(int nPos, int nRateCats,
1344 /*IN*/numeric_t *rates, /*IN*/double *site_loglk,
1345 /*OPTIONAL*/FILE *fpLog);
1346
1347 /* P(value<=x) for the gamma distribution with shape parameter alpha and scale 1/alpha */
1348 double PGamma(double x, double alpha);
1349
1350 /* Given a topology and branch lengths, optimize GTR rates and quickly reoptimize branch lengths
1351 If gtrfreq is NULL, then empirical frequencies are used
1352 */
1353 void SetMLGtr(/*IN/OUT*/NJ_t *NJ, /*OPTIONAL IN*/double *gtrfreq, /*OPTIONAL WRITE*/FILE *fpLog);
1354
1355 /* P(A & B | len) = P(B | A, len) * P(A)
1356 If site_likelihoods is present, multiplies those values by the site likelihood at each point
1357 (Note it does not handle underflow)
1358 */
1359 double PairLogLk(/*IN*/profile_t *p1, /*IN*/profile_t *p2, double length,
1360 int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
1361 /*OPTIONAL IN/OUT*/double *site_likelihoods);
1362
1363 /* Branch lengths for 4-taxon tree ((A,B),C,D); I means internal */
1364 typedef enum {LEN_A,LEN_B,LEN_C,LEN_D,LEN_I} quartet_length_t;
1365
1366 typedef struct {
1367 int nPos;
1368 transition_matrix_t *transmat;
1369 rates_t *rates;
1370 int nEval; /* number of likelihood evaluations */
1371 /* The pair to optimize */
1372 profile_t *pair1;
1373 profile_t *pair2;
1374 } quartet_opt_t;
1375
1376 double PairNegLogLk(double x, void *data); /* data must be a quartet_opt_t */
1377
1378 typedef struct {
1379 NJ_t *NJ;
1380 double freq[4];
1381 double rates[6];
1382 int iRate; /* which rate to set x from */
1383 FILE *fpLog; /* OPTIONAL WRITE */
1384 } gtr_opt_t;
1385
1386 /* Returns -log_likelihood for the tree with the given rates
1387 data must be a gtr_opt_t and x is used to set rate iRate
1388 Does not recompute profiles -- assumes that the caller will
1389 */
1390 double GTRNegLogLk(double x, void *data);
1391
1392 /* Returns the resulting log likelihood. Optionally returns whether other
1393 topologies should be abandoned, based on the difference between AB|CD and
1394 the "star topology" (AB|CD with a branch length of MLMinBranchLength) exceeding
1395 closeLogLkLimit.
1396 If bStarTest is passed in, it only optimized the internal branch if
1397 the star test is true. Otherwise, it optimized all 5 branch lengths
1398 in turn.
1399 */
1400 double MLQuartetOptimize(profile_t *pA, profile_t *pB, profile_t *pC, profile_t *pD,
1401 int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
1402 /*IN/OUT*/double branch_lengths[5],
1403 /*OPTIONAL OUT*/bool *pStarTest,
1404 /*OPTIONAL OUT*/double *site_likelihoods);
1405
1406 /* Returns the resulting log likelihood */
1407 double MLPairOptimize(profile_t *pA, profile_t *pB,
1408 int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
1409 /*IN/OUT*/double *branch_length);
1410
1411 /* Returns the number of steps considered, with the actual steps in steps[]
1412 Modifies the tree by this chain of NNIs
1413 */
1414 int FindSPRSteps(/*IN/OUT*/NJ_t *NJ,
1415 int node,
1416 int parent, /* sibling or parent of node to NNI to start the chain */
1417 /*IN/OUT*/profile_t **upProfiles,
1418 /*OUT*/spr_step_t *steps,
1419 int maxSteps,
1420 bool bFirstAC);
1421
1422 /* Undo a single NNI */
1423 void UnwindSPRStep(/*IN/OUT*/NJ_t *NJ,
1424 /*IN*/spr_step_t *step,
1425 /*IN/OUT*/profile_t **upProfiles);
1426
1427
1428 /* Update the profile of node and its ancestor, and delete nearby out-profiles */
1429 void UpdateForNNI(/*IN/OUT*/NJ_t *NJ, int node, /*IN/OUT*/profile_t **upProfiles, bool useML);
1430
1431 /* Sets NJ->parent[newchild] and replaces oldchild with newchild
1432 in the list of children of parent
1433 */
1434 void ReplaceChild(/*IN/OUT*/NJ_t *NJ, int parent, int oldchild, int newchild);
1435
1436 int CompareHitsByCriterion(const void *c1, const void *c2);
1437 int CompareHitsByIJ(const void *c1, const void *c2);
1438
1439 int NGaps(NJ_t *NJ, int node); /* only handles leaf sequences */
1440
1441 /* node is the parent of AB, sibling of C
1442 node cannot be root or a leaf
1443 If node is the child of root, then D is the other sibling of node,
1444 and the 4th profile is D's profile.
1445 Otherwise, D is the parent of node, and we use its upprofile
1446 Call this with profiles=NULL to get the nodes, without fetching or
1447 computing profiles
1448 */
1449 void SetupABCD(NJ_t *NJ, int node,
1450 /* the 4 profiles for ABCD; the last one is an upprofile */
1451 /*OPTIONAL OUT*/profile_t *profiles[4],
1452 /*OPTIONAL IN/OUT*/profile_t **upProfiles,
1453 /*OUT*/int nodeABCD[4],
1454 bool useML);
1455
1456 int Sibling(NJ_t *NJ, int node); /* At root, no unique sibling so returns -1 */
1457 void RootSiblings(NJ_t *NJ, int node, /*OUT*/int sibs[2]);
1458
1459 /* JC probability of nucleotide not changing, for each rate category */
1460 double *PSameVector(double length, rates_t *rates);
1461
1462 /* JC probability of nucleotide not changing, for each rate category */
1463 double *PDiffVector(double *pSame, rates_t *rates);
1464
1465 /* expeigen[iRate*nCodes + j] = exp(length * rate iRate * eigenvalue j) */
1466 numeric_t *ExpEigenRates(double length, transition_matrix_t *transmat, rates_t *rates);
1467
1468 /* Print a progress report if more than 0.1 second has gone by since the progress report */
1469 /* Format should include 0-4 %d references and no newlines */
1470 void ProgressReport(char *format, int iArg1, int iArg2, int iArg3, int iArg4);
1471 void LogTree(char *format, int round, /*OPTIONAL WRITE*/FILE *fp, NJ_t *NJ, char **names, uniquify_t *unique, bool bQuote);
1472 void LogMLRates(/*OPTIONAL WRITE*/FILE *fpLog, NJ_t *NJ);
1473
1474 void *mymalloc(size_t sz); /* Prints "Out of memory" and exits on failure */
1475 void *myfree(void *, size_t sz); /* Always returns NULL */
1476
1477 /* One-dimensional minimization using brent's function, with
1478 a fractional and an absolute tolerance */
1479 double onedimenmin(double xmin, double xguess, double xmax, double (*f)(double,void*), void *data,
1480 double ftol, double atol,
1481 /*OUT*/double *fx, /*OUT*/double *f2x);
1482
1483 double brent(double ax, double bx, double cx, double (*f)(double, void *), void *data,
1484 double ftol, double atol,
1485 double *foptx, double *f2optx, double fax, double fbx, double fcx);
1486
1487 /* Vector operations, either using SSE3 or not
1488 Code assumes that vectors are a multiple of 4 in size
1489 */
1490 void vector_multiply(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, int n, /*OUT*/numeric_t *fOut);
1491 numeric_t vector_multiply_sum(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, int n);
1492 void vector_add_mult(/*IN/OUT*/numeric_t *f, /*IN*/numeric_t *add, numeric_t weight, int n);
1493
1494 /* multiply the transpose of a matrix by a vector */
1495 void matrixt_by_vector4(/*IN*/numeric_t mat[4][MAXCODES], /*IN*/numeric_t vec[4], /*OUT*/numeric_t out[4]);
1496
1497 /* sum(f1*fBy)*sum(f2*fBy) */
1498 numeric_t vector_dot_product_rot(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, /*IN*/numeric_t* fBy, int n);
1499
1500 /* sum(f1*f2*f3) */
1501 numeric_t vector_multiply3_sum(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, /*IN*/numeric_t* f3, int n);
1502
1503 numeric_t vector_sum(/*IN*/numeric_t *f1, int n);
1504 void vector_multiply_by(/*IN/OUT*/numeric_t *f, /*IN*/numeric_t fBy, int n);
1505
1506 double clockDiff(/*IN*/struct timeval *clock_start);
1507 int timeval_subtract (/*OUT*/struct timeval *result, /*IN*/struct timeval *x, /*IN*/struct timeval *y);
1508
1509 char *OpenMPString(void);
1510
1511 void ran_start(long seed);
1512 double knuth_rand(); /* Random number between 0 and 1 */
1513 void tred2 (double *a, const int n, const int np, double *d, double *e);
1514 double pythag(double a, double b);
1515 void tqli(double *d, double *e, int n, int np, double *z);
1516
1517 /* Like mymalloc; duplicates the input (returns NULL if given NULL) */
1518 void *mymemdup(void *data, size_t sz);
1519 void *myrealloc(void *data, size_t szOld, size_t szNew, bool bCopy);
1520
1521 double pnorm(double z); /* Probability(value <=z) */
1522
1523 /* Hashtable functions */
1524 typedef struct
1525 {
1526 char *string;
1527 int nCount; /* number of times this entry was seen */
1528 int first; /* index of first entry with this value */
1529 } hashbucket_t;
1530
1531 typedef struct {
1532 int nBuckets;
1533 /* hashvalue -> bucket. Or look in bucket + 1, +2, etc., till you hit a NULL string */
1534 hashbucket_t *buckets;
1535 } hashstrings_t;
1536 typedef int hashiterator_t;
1537
1538 hashstrings_t *MakeHashtable(char **strings, int nStrings);
1539 hashstrings_t *FreeHashtable(hashstrings_t* hash); /*returns NULL*/
1540 hashiterator_t FindMatch(hashstrings_t *hash, char *string);
1541
1542 /* Return NULL if we have run out of values */
1543 char *GetHashString(hashstrings_t *hash, hashiterator_t hi);
1544 int HashCount(hashstrings_t *hash, hashiterator_t hi);
1545 int HashFirst(hashstrings_t *hash, hashiterator_t hi);
1546
1547 void PrintNJ(/*WRITE*/FILE *, NJ_t *NJ, char **names, uniquify_t *unique, bool bShowSupport, bool bQuoteNames);
1548
1549 /* Print topology using node indices as node names */
1550 void PrintNJInternal(/*WRITE*/FILE *, NJ_t *NJ, bool useLen);
1551
1552 uniquify_t *UniquifyAln(/*IN*/alignment_t *aln);
1553 uniquify_t *FreeUniquify(uniquify_t *); /* returns NULL */
1554
1555 /* Convert a constraint alignment to a list of sequences. The returned array is indexed
1556 by iUnique and points to values in the input alignment
1557 */
1558 char **AlnToConstraints(alignment_t *constraints, uniquify_t *unique, hashstrings_t *hashnames);
1559
1560 /* ReadTree ignores non-unique leaves after the first instance.
1561 At the end, it prunes the tree to ignore empty children and it
1562 unroots the tree if necessary.
1563 */
1564 void ReadTree(/*IN/OUT*/NJ_t *NJ,
1565 /*IN*/uniquify_t *unique,
1566 /*IN*/hashstrings_t *hashnames,
1567 /*READ*/FILE *fpInTree);
1568 char *ReadTreeToken(/*READ*/FILE *fp); /* returns a static array, or NULL on EOF */
1569 void ReadTreeAddChild(int parent, int child, /*IN/OUT*/int *parents, /*IN/OUT*/children_t *children);
1570 /* Do not add the leaf if we already set this unique-set to another parent */
1571 void ReadTreeMaybeAddLeaf(int parent, char *name,
1572 hashstrings_t *hashnames, uniquify_t *unique,
1573 /*IN/OUT*/int *parents, /*IN/OUT*/children_t *children);
1574 void ReadTreeRemove(/*IN/OUT*/int *parents, /*IN/OUT*/children_t *children, int node);
1575
1576 /* Routines to support tree traversal and prevent visiting a node >1 time
1577 (esp. if topology changes).
1578 */
1579 typedef bool *traversal_t;
1580 traversal_t InitTraversal(NJ_t*);
1581 void SkipTraversalInto(int node, /*IN/OUT*/traversal_t traversal);
1582 traversal_t FreeTraversal(traversal_t, NJ_t*); /*returns NULL*/
1583
1584 /* returns new node, or -1 if nothing left to do. Use root for the first call.
1585 Will return every node and then root.
1586 Uses postorder tree traversal (depth-first search going down to leaves first)
1587 Keeps track of which nodes are visited, so even after an NNI that swaps a
1588 visited child with an unvisited uncle, the next call will visit the
1589 was-uncle-now-child. (However, after SPR moves, there is no such guarantee.)
1590
1591 If pUp is not NULL, then, if going "back up" through a previously visited node
1592 (presumably due to an NNI), then it will return the node another time,
1593 with *pUp = true.
1594 */
1595 int TraversePostorder(int lastnode, NJ_t *NJ, /*IN/OUT*/traversal_t,
1596 /*OUT OPTIONAL*/bool *pUp);
1597
1598 /* Routines to support storing up-profiles during tree traversal
1599 Eventually these should be smart enough to do weighted joins and
1600 to minimize memory usage
1601 */
1602 profile_t **UpProfiles(NJ_t *NJ);
1603 profile_t *GetUpProfile(/*IN/OUT*/profile_t **upProfiles, NJ_t *NJ, int node, bool useML);
1604 profile_t *DeleteUpProfile(/*IN/OUT*/profile_t **upProfiles, NJ_t *NJ, int node); /* returns NULL */
1605 profile_t **FreeUpProfiles(profile_t **upProfiles, NJ_t *NJ); /* returns NULL */
1606
1607 /* Recomputes the profile for a node, presumably to reflect topology changes
1608 If bionj is set, does a weighted join -- which requires using upProfiles
1609 If useML is set, computes the posterior probability instead of averaging
1610 */
1611 void RecomputeProfile(/*IN/OUT*/NJ_t *NJ, /*IN/OUT*/profile_t **upProfiles, int node, bool useML);
1612
1613 /* Recompute profiles going up from the leaves, using the provided distance matrix
1614 and unweighted joins
1615 */
1616 void RecomputeProfiles(/*IN/OUT*/NJ_t *NJ, /*OPTIONAL*/distance_matrix_t *dmat);
1617
1618 void RecomputeMLProfiles(/*IN/OUT*/NJ_t *NJ);
1619
1620 /* If bionj is set, computes the weight to be given to A when computing the
1621 profile for the ancestor of A and B. C and D are the other profiles in the quartet
1622 If bionj is not set, returns -1 (which means unweighted in AverageProfile).
1623 (A and B are the first two profiles in the array)
1624 */
1625 double QuartetWeight(profile_t *profiles[4], distance_matrix_t *dmat, int nPos);
1626
1627 /* Returns a list of nodes, starting with node and ending with root */
1628 int *PathToRoot(NJ_t *NJ, int node, /*OUT*/int *depth);
1629 int *FreePath(int *path, NJ_t *NJ); /* returns NULL */
1630
1631 /* The default amino acid distance matrix, derived from the BLOSUM45 similarity matrix */
1632 distance_matrix_t matrixBLOSUM45;
1633
1634 /* The default amino acid transition matrix (Jones Taylor Thorton 1992) */
1635 double matrixJTT92[MAXCODES][MAXCODES];
1636 double statJTT92[MAXCODES];
1637
1638 /* The Le-Gascuel 2008 amino acid transition matrix */
1639 double matrixLG08[MAXCODES][MAXCODES];
1640 double statLG08[MAXCODES];
1641
1642 /* The WAG amino acid transition matrix (Whelan-And-Goldman 2001) */
1643 double matrixWAG01[MAXCODES][MAXCODES];
1644 double statWAG01[MAXCODES];
1645
1646
main(int argc,char ** argv)1647 int main(int argc, char **argv) {
1648 int nAlign = 1; /* number of alignments to read */
1649 int iArg;
1650 char *matrixPrefix = NULL;
1651 distance_matrix_t *distance_matrix = NULL;
1652 bool make_matrix = false;
1653 char *constraintsFile = NULL;
1654 char *intreeFile = NULL;
1655 bool intree1 = false; /* the same starting tree each round */
1656 int nni = -1; /* number of rounds of NNI, defaults to 4*log2(n) */
1657 int spr = 2; /* number of rounds of SPR */
1658 int maxSPRLength = 10; /* maximum distance to move a node */
1659 int MLnni = -1; /* number of rounds of ML NNI, defaults to 2*log2(n) */
1660 bool MLlen = false; /* optimize branch lengths; no topology changes */
1661 int nBootstrap = 1000; /* If set, number of replicates of local bootstrap to do */
1662 int nRateCats = nDefaultRateCats;
1663 char *logfile = NULL;
1664 bool bUseGtr = false;
1665 bool bUseLg = false;
1666 bool bUseWag = false;
1667 bool bUseGtrRates = false;
1668 double gtrrates[6] = {1,1,1,1,1,1};
1669 bool bUseGtrFreq = false;
1670 double gtrfreq[4] = {0.25,0.25,0.25,0.25};
1671 bool bQuote = false;
1672 FILE *fpOut = stdout;
1673
1674 if (isatty(STDIN_FILENO) && argc == 1) {
1675 fprintf(stderr,"Usage for FastTree version %s %s%s:\n%s",
1676 FT_VERSION, SSE_STRING, OpenMPString(), usage);
1677 #if (defined _WIN32 || defined WIN32 || defined WIN64 || defined _WIN64)
1678 fprintf(stderr, "Windows users: Please remember to run this inside a command shell\n");
1679 fprintf(stderr,"Hit return to continue\n");
1680 fgetc(stdin);
1681 #endif
1682 exit(0);
1683 }
1684 for (iArg = 1; iArg < argc; iArg++) {
1685 if (strcmp(argv[iArg],"-makematrix") == 0) {
1686 make_matrix = true;
1687 } else if (strcmp(argv[iArg],"-logdist") == 0) {
1688 fprintf(stderr, "Warning: logdist is now on by default and obsolete\n");
1689 } else if (strcmp(argv[iArg],"-rawdist") == 0) {
1690 logdist = false;
1691 } else if (strcmp(argv[iArg],"-verbose") == 0 && iArg < argc-1) {
1692 verbose = atoi(argv[++iArg]);
1693 } else if (strcmp(argv[iArg],"-quiet") == 0) {
1694 verbose = 0;
1695 showProgress = 0;
1696 } else if (strcmp(argv[iArg],"-nopr") == 0) {
1697 showProgress = 0;
1698 } else if (strcmp(argv[iArg],"-slow") == 0) {
1699 slow = 1;
1700 } else if (strcmp(argv[iArg],"-fastest") == 0) {
1701 fastest = 1;
1702 tophitsRefresh = 0.5;
1703 useTopHits2nd = true;
1704 } else if (strcmp(argv[iArg],"-2nd") == 0) {
1705 useTopHits2nd = true;
1706 } else if (strcmp(argv[iArg],"-no2nd") == 0) {
1707 useTopHits2nd = false;
1708 } else if (strcmp(argv[iArg],"-slownni") == 0) {
1709 fastNNI = false;
1710 } else if (strcmp(argv[iArg], "-matrix") == 0 && iArg < argc-1) {
1711 iArg++;
1712 matrixPrefix = argv[iArg];
1713 } else if (strcmp(argv[iArg], "-nomatrix") == 0) {
1714 useMatrix = false;
1715 } else if (strcmp(argv[iArg], "-n") == 0 && iArg < argc-1) {
1716 iArg++;
1717 nAlign = atoi(argv[iArg]);
1718 if (nAlign < 1) {
1719 fprintf(stderr, "-n argument for #input alignments must be > 0 not %s\n", argv[iArg]);
1720 exit(1);
1721 }
1722 } else if (strcmp(argv[iArg], "-quote") == 0) {
1723 bQuote = true;
1724 } else if (strcmp(argv[iArg], "-nt") == 0) {
1725 nCodes = 4;
1726 } else if (strcmp(argv[iArg], "-intree") == 0 && iArg < argc-1) {
1727 iArg++;
1728 intreeFile = argv[iArg];
1729 } else if (strcmp(argv[iArg], "-intree1") == 0 && iArg < argc-1) {
1730 iArg++;
1731 intreeFile = argv[iArg];
1732 intree1 = true;
1733 } else if (strcmp(argv[iArg], "-nj") == 0) {
1734 bionj = 0;
1735 } else if (strcmp(argv[iArg], "-bionj") == 0) {
1736 bionj = 1;
1737 } else if (strcmp(argv[iArg], "-boot") == 0 && iArg < argc-1) {
1738 iArg++;
1739 nBootstrap = atoi(argv[iArg]);
1740 } else if (strcmp(argv[iArg], "-noboot") == 0 || strcmp(argv[iArg], "-nosupport") == 0) {
1741 nBootstrap = 0;
1742 } else if (strcmp(argv[iArg], "-seed") == 0 && iArg < argc-1) {
1743 iArg++;
1744 long seed = atol(argv[iArg]);
1745 ran_start(seed);
1746 } else if (strcmp(argv[iArg],"-top") == 0) {
1747 if(tophitsMult < 0.01)
1748 tophitsMult = 1.0;
1749 } else if (strcmp(argv[iArg],"-notop") == 0) {
1750 tophitsMult = 0.0;
1751 } else if (strcmp(argv[iArg], "-topm") == 0 && iArg < argc-1) {
1752 iArg++;
1753 tophitsMult = atof(argv[iArg]);
1754 } else if (strcmp(argv[iArg], "-close") == 0 && iArg < argc-1) {
1755 iArg++;
1756 tophitsClose = atof(argv[iArg]);
1757 if (tophitsMult <= 0) {
1758 fprintf(stderr, "Cannot use -close unless -top is set above 0\n");
1759 exit(1);
1760 }
1761 if (tophitsClose <= 0 || tophitsClose >= 1) {
1762 fprintf(stderr, "-close argument must be between 0 and 1\n");
1763 exit(1);
1764 }
1765 } else if (strcmp(argv[iArg], "-refresh") == 0 && iArg < argc-1) {
1766 iArg++;
1767 tophitsRefresh = atof(argv[iArg]);
1768 if (tophitsMult <= 0) {
1769 fprintf(stderr, "Cannot use -refresh unless -top is set above 0\n");
1770 exit(1);
1771 }
1772 if (tophitsRefresh <= 0 || tophitsRefresh >= 1) {
1773 fprintf(stderr, "-refresh argument must be between 0 and 1\n");
1774 exit(1);
1775 }
1776 } else if (strcmp(argv[iArg],"-nni") == 0 && iArg < argc-1) {
1777 iArg++;
1778 nni = atoi(argv[iArg]);
1779 if (nni == 0)
1780 spr = 0;
1781 } else if (strcmp(argv[iArg],"-spr") == 0 && iArg < argc-1) {
1782 iArg++;
1783 spr = atoi(argv[iArg]);
1784 } else if (strcmp(argv[iArg],"-sprlength") == 0 && iArg < argc-1) {
1785 iArg++;
1786 maxSPRLength = atoi(argv[iArg]);
1787 } else if (strcmp(argv[iArg],"-mlnni") == 0 && iArg < argc-1) {
1788 iArg++;
1789 MLnni = atoi(argv[iArg]);
1790 } else if (strcmp(argv[iArg],"-noml") == 0) {
1791 MLnni = 0;
1792 } else if (strcmp(argv[iArg],"-mllen") == 0) {
1793 MLnni = 0;
1794 MLlen = true;
1795 } else if (strcmp(argv[iArg],"-nome") == 0) {
1796 spr = 0;
1797 nni = 0;
1798 } else if (strcmp(argv[iArg],"-help") == 0) {
1799 fprintf(stderr,"FastTree %s %s%s:\n%s", FT_VERSION, SSE_STRING, OpenMPString(), usage);
1800 exit(0);
1801 } else if (strcmp(argv[iArg],"-expert") == 0) {
1802 fprintf(stderr, "Detailed usage for FastTree %s %s%s:\n%s",
1803 FT_VERSION, SSE_STRING, OpenMPString(), expertUsage);
1804 exit(0);
1805 } else if (strcmp(argv[iArg],"-pseudo") == 0) {
1806 if (iArg < argc-1 && isdigit(argv[iArg+1][0])) {
1807 iArg++;
1808 pseudoWeight = atof(argv[iArg]);
1809 if (pseudoWeight < 0.0) {
1810 fprintf(stderr,"Illegal argument to -pseudo: %s\n", argv[iArg]);
1811 exit(1);
1812 }
1813 } else {
1814 pseudoWeight = 1.0;
1815 }
1816 } else if (strcmp(argv[iArg],"-constraints") == 0 && iArg < argc-1) {
1817 iArg++;
1818 constraintsFile = argv[iArg];
1819 } else if (strcmp(argv[iArg],"-constraintWeight") == 0 && iArg < argc-1) {
1820 iArg++;
1821 constraintWeight = atof(argv[iArg]);
1822 if (constraintWeight <= 0.0) {
1823 fprintf(stderr, "Illegal argument to -constraintWeight (must be greater than zero): %s\n", argv[iArg]);
1824 exit(1);
1825 }
1826 } else if (strcmp(argv[iArg],"-mlacc") == 0 && iArg < argc-1) {
1827 iArg++;
1828 mlAccuracy = atoi(argv[iArg]);
1829 if (mlAccuracy < 1) {
1830 fprintf(stderr, "Illlegal -mlacc argument: %s\n", argv[iArg]);
1831 exit(1);
1832 }
1833 } else if (strcmp(argv[iArg],"-exactml") == 0 || strcmp(argv[iArg],"-mlexact") == 0) {
1834 fprintf(stderr,"-exactml is not required -- exact posteriors is the default now\n");
1835 } else if (strcmp(argv[iArg],"-approxml") == 0 || strcmp(argv[iArg],"-mlapprox") == 0) {
1836 exactML = false;
1837 } else if (strcmp(argv[iArg],"-cat") == 0 && iArg < argc-1) {
1838 iArg++;
1839 nRateCats = atoi(argv[iArg]);
1840 if (nRateCats < 1) {
1841 fprintf(stderr, "Illlegal argument to -ncat (must be greater than zero): %s\n", argv[iArg]);
1842 exit(1);
1843 }
1844 } else if (strcmp(argv[iArg],"-nocat") == 0) {
1845 nRateCats = 1;
1846 } else if (strcmp(argv[iArg], "-lg") == 0) {
1847 bUseLg = true;
1848 } else if (strcmp(argv[iArg], "-wag") == 0) {
1849 bUseWag = true;
1850 } else if (strcmp(argv[iArg], "-gtr") == 0) {
1851 bUseGtr = true;
1852 } else if (strcmp(argv[iArg], "-gtrrates") == 0 && iArg < argc-6) {
1853 bUseGtr = true;
1854 bUseGtrRates = true;
1855 int i;
1856 for (i = 0; i < 6; i++) {
1857 gtrrates[i] = atof(argv[++iArg]);
1858 if (gtrrates[i] < 1e-5) {
1859 fprintf(stderr, "Illegal or too small value of GTR rate: %s\n", argv[iArg]);
1860 exit(1);
1861 }
1862 }
1863 } else if (strcmp(argv[iArg],"-gtrfreq") == 0 && iArg < argc-4) {
1864 bUseGtr = true;
1865 bUseGtrFreq = true;
1866 int i;
1867 double sum = 0;
1868 for (i = 0; i < 4; i++) {
1869 gtrfreq[i] = atof(argv[++iArg]);
1870 sum += gtrfreq[i];
1871 if (gtrfreq[i] < 1e-5) {
1872 fprintf(stderr, "Illegal or too small value of GTR frequency: %s\n", argv[iArg]);
1873 exit(1);
1874 }
1875 }
1876 if (fabs(1.0-sum) > 0.01) {
1877 fprintf(stderr, "-gtrfreq values do not sum to 1\n");
1878 exit(1);
1879 }
1880 for (i = 0; i < 4; i++)
1881 gtrfreq[i] /= sum;
1882 } else if (strcmp(argv[iArg],"-log") == 0 && iArg < argc-1) {
1883 iArg++;
1884 logfile = argv[iArg];
1885 } else if (strcmp(argv[iArg],"-gamma") == 0) {
1886 gammaLogLk = true;
1887 } else if (strcmp(argv[iArg],"-out") == 0 && iArg < argc-1) {
1888 iArg++;
1889 fpOut = fopen(argv[iArg],"w");
1890 if(fpOut==NULL) {
1891 fprintf(stderr,"Cannot write to %s\n",argv[iArg]);
1892 exit(1);
1893 }
1894 } else if (argv[iArg][0] == '-') {
1895 fprintf(stderr, "Unknown or incorrect use of option %s\n%s", argv[iArg], usage);
1896 exit(1);
1897 } else
1898 break;
1899 }
1900 if(iArg < argc-1) {
1901 fprintf(stderr, "%s", usage);
1902 exit(1);
1903 }
1904
1905 codesString = nCodes == 20 ? codesStringAA : codesStringNT;
1906 if (nCodes == 4 && matrixPrefix == NULL)
1907 useMatrix = false; /* no default nucleotide matrix */
1908
1909 char *fileName = iArg == (argc-1) ? argv[argc-1] : NULL;
1910
1911 if (slow && fastest) {
1912 fprintf(stderr,"Cannot be both slow and fastest\n");
1913 exit(1);
1914 }
1915 if (slow && tophitsMult > 0) {
1916 tophitsMult = 0.0;
1917 }
1918
1919 FILE *fpLog = NULL;
1920 if (logfile != NULL) {
1921 fpLog = fopen(logfile, "w");
1922 if (fpLog == NULL) {
1923 fprintf(stderr, "Cannot write to: %s\n", logfile);
1924 exit(1);
1925 }
1926 fprintf(fpLog, "Command:");
1927 int i;
1928 for (i=0; i < argc; i++)
1929 fprintf(fpLog, " %s", argv[i]);
1930 fprintf(fpLog,"\n");
1931 fflush(fpLog);
1932 }
1933
1934 int i;
1935 FILE *fps[2] = {NULL,NULL};
1936 int nFPs = 0;
1937 if (verbose)
1938 fps[nFPs++] = stderr;
1939 if (fpLog != NULL)
1940 fps[nFPs++] = fpLog;
1941
1942 if (!make_matrix) { /* Report settings */
1943 char tophitString[100] = "no";
1944 char tophitsCloseStr[100] = "default";
1945 if(tophitsClose > 0) sprintf(tophitsCloseStr,"%.2f",tophitsClose);
1946 if(tophitsMult>0) sprintf(tophitString,"%.2f*sqrtN close=%s refresh=%.2f",
1947 tophitsMult, tophitsCloseStr, tophitsRefresh);
1948 char supportString[100] = "none";
1949 if (nBootstrap>0) {
1950 if (MLnni != 0 || MLlen)
1951 sprintf(supportString, "SH-like %d", nBootstrap);
1952 else
1953 sprintf(supportString,"Local boot %d",nBootstrap);
1954 }
1955 char nniString[100] = "(no NNI)";
1956 if (nni > 0)
1957 sprintf(nniString, "+NNI (%d rounds)", nni);
1958 if (nni == -1)
1959 strcpy(nniString, "+NNI");
1960 char sprString[100] = "(no SPR)";
1961 if (spr > 0)
1962 sprintf(sprString, "+SPR (%d rounds range %d)", spr, maxSPRLength);
1963 char mlnniString[100] = "(no ML-NNI)";
1964 if(MLnni > 0)
1965 sprintf(mlnniString, "+ML-NNI (%d rounds)", MLnni);
1966 else if (MLnni == -1)
1967 sprintf(mlnniString, "+ML-NNI");
1968 else if (MLlen)
1969 sprintf(mlnniString, "+ML branch lengths");
1970 if ((MLlen || MLnni != 0) && !exactML)
1971 strcat(mlnniString, " approx");
1972 if (MLnni != 0)
1973 sprintf(mlnniString+strlen(mlnniString), " opt-each=%d",mlAccuracy);
1974
1975 for (i = 0; i < nFPs; i++) {
1976 FILE *fp = fps[i];
1977 fprintf(fp,"FastTree Version %s %s%s\nAlignment: %s",
1978 FT_VERSION, SSE_STRING, OpenMPString(), fileName != NULL ? fileName : "standard input");
1979 if (nAlign>1)
1980 fprintf(fp, " (%d alignments)", nAlign);
1981 fprintf(fp,"\n%s distances: %s Joins: %s Support: %s\n",
1982 nCodes == 20 ? "Amino acid" : "Nucleotide",
1983 matrixPrefix ? matrixPrefix : (useMatrix? "BLOSUM45"
1984 : (nCodes==4 && logdist ? "Jukes-Cantor" : "%different")),
1985 bionj ? "weighted" : "balanced" ,
1986 supportString);
1987 if (intreeFile == NULL)
1988 fprintf(fp, "Search: %s%s %s %s %s\nTopHits: %s\n",
1989 slow?"Exhaustive (slow)" : (fastest ? "Fastest" : "Normal"),
1990 useTopHits2nd ? "+2nd" : "",
1991 nniString, sprString, mlnniString,
1992 tophitString);
1993 else
1994 fprintf(fp, "Start at tree from %s %s %s\n", intreeFile, nniString, sprString);
1995
1996 if (MLnni != 0 || MLlen) {
1997 fprintf(fp, "ML Model: %s,",
1998 (nCodes == 4) ?
1999 (bUseGtr ? "Generalized Time-Reversible" : "Jukes-Cantor") :
2000 (bUseLg ? "Le-Gascuel 2008" : (bUseWag ? "Whelan-And-Goldman" : "Jones-Taylor-Thorton"))
2001
2002 );
2003 if (nRateCats == 1)
2004 fprintf(fp, " No rate variation across sites");
2005 else
2006 fprintf(fp, " CAT approximation with %d rate categories", nRateCats);
2007 fprintf(fp, "\n");
2008 if (nCodes == 4 && bUseGtrRates)
2009 fprintf(fp, "GTR rates(ac ag at cg ct gt) %.4f %.4f %.4f %.4f %.4f %.4f\n",
2010 gtrrates[0],gtrrates[1],gtrrates[2],gtrrates[3],gtrrates[4],gtrrates[5]);
2011 if (nCodes == 4 && bUseGtrFreq)
2012 fprintf(fp, "GTR frequencies(A C G T) %.4f %.4f %.4f %.4f\n",
2013 gtrfreq[0],gtrfreq[1],gtrfreq[2],gtrfreq[3]);
2014 }
2015 if (constraintsFile != NULL)
2016 fprintf(fp, "Constraints: %s Weight: %.3f\n", constraintsFile, constraintWeight);
2017 if (pseudoWeight > 0)
2018 fprintf(fp, "Pseudocount weight for comparing sequences with little overlap: %.3lf\n",pseudoWeight);
2019 fflush(fp);
2020 }
2021 }
2022 if (matrixPrefix != NULL) {
2023 if (!useMatrix) {
2024 fprintf(stderr,"Cannot use both -matrix and -nomatrix arguments!");
2025 exit(1);
2026 }
2027 distance_matrix = ReadDistanceMatrix(matrixPrefix);
2028 } else if (useMatrix) { /* use default matrix */
2029 assert(nCodes==20);
2030 distance_matrix = &matrixBLOSUM45;
2031 SetupDistanceMatrix(distance_matrix);
2032 } else {
2033 distance_matrix = NULL;
2034 }
2035
2036 int iAln;
2037 FILE *fpIn = fileName != NULL ? fopen(fileName, "r") : stdin;
2038 if (fpIn == NULL) {
2039 fprintf(stderr, "Cannot read %s\n", fileName);
2040 exit(1);
2041 }
2042 FILE *fpConstraints = NULL;
2043 if (constraintsFile != NULL) {
2044 fpConstraints = fopen(constraintsFile, "r");
2045 if (fpConstraints == NULL) {
2046 fprintf(stderr, "Cannot read %s\n", constraintsFile);
2047 exit(1);
2048 }
2049 }
2050
2051 FILE *fpInTree = NULL;
2052 if (intreeFile != NULL) {
2053 fpInTree = fopen(intreeFile,"r");
2054 if (fpInTree == NULL) {
2055 fprintf(stderr, "Cannot read %s\n", intreeFile);
2056 exit(1);
2057 }
2058 }
2059
2060 for(iAln = 0; iAln < nAlign; iAln++) {
2061 alignment_t *aln = ReadAlignment(fpIn, bQuote);
2062 if (aln->nSeq < 1) {
2063 fprintf(stderr, "No alignment sequences\n");
2064 exit(1);
2065 }
2066 if (fpLog) {
2067 fprintf(fpLog, "Read %d sequences, %d positions\n", aln->nSeq, aln->nPos);
2068 fflush(fpLog);
2069 }
2070
2071 struct timeval clock_start;
2072 gettimeofday(&clock_start,NULL);
2073 ProgressReport("Read alignment",0,0,0,0);
2074
2075 /* Check that all names in alignment are unique */
2076 hashstrings_t *hashnames = MakeHashtable(aln->names, aln->nSeq);
2077 int i;
2078 for (i=0; i<aln->nSeq; i++) {
2079 hashiterator_t hi = FindMatch(hashnames,aln->names[i]);
2080 if (HashCount(hashnames,hi) != 1) {
2081 fprintf(stderr,"Non-unique name '%s' in the alignment\n",aln->names[i]);
2082 exit(1);
2083 }
2084 }
2085
2086 /* Make a list of unique sequences -- note some lists are bigger than required */
2087 ProgressReport("Hashed the names",0,0,0,0);
2088 if (make_matrix) {
2089 NJ_t *NJ = InitNJ(aln->seqs, aln->nSeq, aln->nPos,
2090 /*constraintSeqs*/NULL, /*nConstraints*/0,
2091 distance_matrix, /*transmat*/NULL);
2092 printf(" %d\n",aln->nSeq);
2093 int i,j;
2094 for(i = 0; i < NJ->nSeq; i++) {
2095 printf("%s",aln->names[i]);
2096 for (j = 0; j < NJ->nSeq; j++) {
2097 besthit_t hit;
2098 SeqDist(NJ->profiles[i]->codes,NJ->profiles[j]->codes,NJ->nPos,NJ->distance_matrix,/*OUT*/&hit);
2099 if (logdist)
2100 hit.dist = LogCorrect(hit.dist);
2101 /* Make sure -0 prints as 0 */
2102 printf(" %f", hit.dist <= 0.0 ? 0.0 : hit.dist);
2103 }
2104 printf("\n");
2105 }
2106 } else {
2107 /* reset counters*/
2108 profileOps = 0;
2109 outprofileOps = 0;
2110 seqOps = 0;
2111 profileAvgOps = 0;
2112 nHillBetter = 0;
2113 nCloseUsed = 0;
2114 nClose2Used = 0;
2115 nRefreshTopHits = 0;
2116 nVisibleUpdate = 0;
2117 nNNI = 0;
2118 nML_NNI = 0;
2119 nProfileFreqAlloc = 0;
2120 nProfileFreqAvoid = 0;
2121 szAllAlloc = 0;
2122 mymallocUsed = 0;
2123 maxmallocHeap = 0;
2124 nLkCompute = 0;
2125 nPosteriorCompute = 0;
2126 nAAPosteriorExact = 0;
2127 nAAPosteriorRough = 0;
2128 nStarTests = 0;
2129
2130 uniquify_t *unique = UniquifyAln(aln);
2131 ProgressReport("Identified unique sequences",0,0,0,0);
2132
2133 /* read constraints */
2134 alignment_t *constraints = NULL;
2135 char **uniqConstraints = NULL;
2136 if (constraintsFile != NULL) {
2137 constraints = ReadAlignment(fpConstraints, bQuote);
2138 if (constraints->nSeq < 4) {
2139 fprintf(stderr, "Warning: constraints file with less than 4 sequences ignored:\nalignment #%d in %s\n",
2140 iAln+1, constraintsFile);
2141 constraints = FreeAlignment(constraints);
2142 } else {
2143 uniqConstraints = AlnToConstraints(constraints, unique, hashnames);
2144 ProgressReport("Read the constraints",0,0,0,0);
2145 }
2146 } /* end load constraints */
2147
2148 transition_matrix_t *transmat = NULL;
2149 if (nCodes == 20) {
2150 transmat = bUseLg? CreateTransitionMatrix(matrixLG08,statLG08) :
2151 (bUseWag? CreateTransitionMatrix(matrixWAG01,statWAG01) :
2152 CreateTransitionMatrix(matrixJTT92,statJTT92));
2153 } else if (nCodes == 4 && bUseGtr && (bUseGtrRates || bUseGtrFreq)) {
2154 transmat = CreateGTR(gtrrates,gtrfreq);
2155 }
2156 NJ_t *NJ = InitNJ(unique->uniqueSeq, unique->nUnique, aln->nPos,
2157 uniqConstraints,
2158 uniqConstraints != NULL ? constraints->nPos : 0, /* nConstraints */
2159 distance_matrix,
2160 transmat);
2161 if (verbose>2) fprintf(stderr, "read %s seqs %d (%d unique) positions %d nameLast %s seqLast %s\n",
2162 fileName ? fileName : "standard input",
2163 aln->nSeq, unique->nUnique, aln->nPos, aln->names[aln->nSeq-1], aln->seqs[aln->nSeq-1]);
2164 FreeAlignmentSeqs(/*IN/OUT*/aln); /*no longer needed*/
2165 if (fpInTree != NULL) {
2166 if (intree1)
2167 fseek(fpInTree, 0L, SEEK_SET);
2168 ReadTree(/*IN/OUT*/NJ, /*IN*/unique, /*IN*/hashnames, /*READ*/fpInTree);
2169 if (verbose > 2)
2170 fprintf(stderr, "Read tree from %s\n", intreeFile);
2171 if (verbose > 2)
2172 PrintNJ(stderr, NJ, aln->names, unique, /*support*/false, bQuote);
2173 } else {
2174 FastNJ(NJ);
2175 }
2176 LogTree("NJ", 0, fpLog, NJ, aln->names, unique, bQuote);
2177
2178 /* profile-frequencies for the "up-profiles" in ReliabilityNJ take only diameter(Tree)*L*a
2179 space not N*L*a space, because we can free them as we go.
2180 And up-profile by their nature tend to be complicated.
2181 So save the profile-frequency memory allocation counters now to exclude later results.
2182 */
2183 #ifdef TRACK_MEMORY
2184 long svProfileFreqAlloc = nProfileFreqAlloc;
2185 long svProfileFreqAvoid = nProfileFreqAvoid;
2186 #endif
2187 int nniToDo = nni == -1 ? (int)(0.5 + 4.0 * log(NJ->nSeq)/log(2)) : nni;
2188 int sprRemaining = spr;
2189 int MLnniToDo = (MLnni != -1) ? MLnni : (int)(0.5 + 2.0*log(NJ->nSeq)/log(2));
2190 if(verbose>0) {
2191 if (fpInTree == NULL)
2192 fprintf(stderr, "Initial topology in %.2f seconds\n", clockDiff(&clock_start));
2193 if (spr > 0 || nniToDo > 0 || MLnniToDo > 0)
2194 fprintf(stderr,"Refining topology: %d rounds ME-NNIs, %d rounds ME-SPRs, %d rounds ML-NNIs\n", nniToDo, spr, MLnniToDo);
2195 }
2196
2197 if (nniToDo>0) {
2198 int i;
2199 bool bConverged = false;
2200 nni_stats_t *nni_stats = InitNNIStats(NJ);
2201 for (i=0; i < nniToDo; i++) {
2202 double maxDelta;
2203 if (!bConverged) {
2204 int nChange = NNI(/*IN/OUT*/NJ, i, nniToDo, /*use ml*/false, /*IN/OUT*/nni_stats, /*OUT*/&maxDelta);
2205 LogTree("ME_NNI%d",i+1, fpLog, NJ, aln->names, unique, bQuote);
2206 if (nChange == 0) {
2207 bConverged = true;
2208 if (verbose>1)
2209 fprintf(stderr, "Min_evolution NNIs converged at round %d -- skipping some rounds\n", i+1);
2210 if (fpLog)
2211 fprintf(fpLog, "Min_evolution NNIs converged at round %d -- skipping some rounds\n", i+1);
2212 }
2213 }
2214
2215 /* Interleave SPRs with NNIs (typically 1/3rd NNI, SPR, 1/3rd NNI, SPR, 1/3rd NNI */
2216 if (sprRemaining > 0 && (nniToDo/(spr+1) > 0 && ((i+1) % (nniToDo/(spr+1))) == 0)) {
2217 SPR(/*IN/OUT*/NJ, maxSPRLength, spr-sprRemaining, spr);
2218 LogTree("ME_SPR%d",spr-sprRemaining+1, fpLog, NJ, aln->names, unique, bQuote);
2219 sprRemaining--;
2220 /* Restart the NNIs -- set all ages to 0, etc. */
2221 bConverged = false;
2222 nni_stats = FreeNNIStats(nni_stats, NJ);
2223 nni_stats = InitNNIStats(NJ);
2224 }
2225 }
2226 nni_stats = FreeNNIStats(nni_stats, NJ);
2227 }
2228 while(sprRemaining > 0) { /* do any remaining SPR rounds */
2229 SPR(/*IN/OUT*/NJ, maxSPRLength, spr-sprRemaining, spr);
2230 LogTree("ME_SPR%d",spr-sprRemaining+1, fpLog, NJ, aln->names, unique, bQuote);
2231 sprRemaining--;
2232 }
2233
2234 /* In minimum-evolution mode, update branch lengths, even if no NNIs or SPRs,
2235 so that they are log-corrected, do not include penalties from constraints,
2236 and avoid errors due to approximation of out-distances.
2237 If doing maximum-likelihood NNIs, then we'll also use these
2238 to get estimates of starting distances for quartets, etc.
2239 */
2240 UpdateBranchLengths(/*IN/OUT*/NJ);
2241 LogTree("ME_Lengths",0, fpLog, NJ, aln->names, unique, bQuote);
2242
2243 double total_len = 0;
2244 int iNode;
2245 for (iNode = 0; iNode < NJ->maxnode; iNode++)
2246 total_len += fabs(NJ->branchlength[iNode]);
2247
2248 if (verbose>0) {
2249 fprintf(stderr, "Total branch-length %.3f after %.2f sec\n",
2250 total_len, clockDiff(&clock_start));
2251 fflush(stderr);
2252 }
2253 if (fpLog) {
2254 fprintf(fpLog, "Total branch-length %.3f after %.2f sec\n",
2255 total_len, clockDiff(&clock_start));
2256 fflush(stderr);
2257 }
2258
2259 #ifdef TRACK_MEMORY
2260 if (verbose>1) {
2261 struct mallinfo mi = mallinfo();
2262 fprintf(stderr, "Memory @ end of ME phase: %.2f MB (%.1f byte/pos) useful %.2f expected %.2f\n",
2263 (mi.arena+mi.hblkhd)/1.0e6, (mi.arena+mi.hblkhd)/(double)(NJ->nSeq*(double)NJ->nPos),
2264 mi.uordblks/1.0e6, mymallocUsed/1e6);
2265 }
2266 #endif
2267
2268 SplitCount_t splitcount = {0,0,0,0,0.0,0.0};
2269
2270 if (MLnniToDo > 0 || MLlen) {
2271 bool warn_len = total_len/NJ->maxnode < 0.001 && MLMinBranchLengthTolerance > 1.0/aln->nPos;
2272 bool warn = warn_len || (total_len/NJ->maxnode < 0.001 && aln->nPos >= 10000);
2273 if (warn)
2274 fprintf(stderr, "\nWARNING! This alignment consists of closely-related and very-long sequences.\n");
2275 if (warn_len)
2276 fprintf(stderr,
2277 "This version of FastTree may not report reasonable branch lengths!\n"
2278 #ifdef USE_DOUBLE
2279 "Consider changing MLMinBranchLengthTolerance.\n"
2280 #else
2281 "Consider recompiling FastTree with -DUSE_DOUBLE.\n"
2282 #endif
2283 "For more information, visit\n"
2284 "http://www.microbesonline.org/fasttree/#BranchLen\n\n");
2285 if (warn)
2286 fprintf(stderr, "WARNING! FastTree (or other standard maximum-likelihood tools)\n"
2287 "may not be appropriate for aligments of very closely-related sequences\n"
2288 "like this one, as FastTree does not account for recombination or gene conversion\n\n");
2289
2290 /* Do maximum-likelihood computations */
2291 /* Convert profiles to use the transition matrix */
2292 distance_matrix_t *tmatAsDist = TransMatToDistanceMat(/*OPTIONAL*/NJ->transmat);
2293 RecomputeProfiles(NJ, /*OPTIONAL*/tmatAsDist);
2294 tmatAsDist = myfree(tmatAsDist, sizeof(distance_matrix_t));
2295 double lastloglk = -1e20;
2296 nni_stats_t *nni_stats = InitNNIStats(NJ);
2297 bool resetGtr = nCodes == 4 && bUseGtr && !bUseGtrRates;
2298
2299 if (MLlen) {
2300 int iRound;
2301 int maxRound = (int)(0.5 + log(NJ->nSeq)/log(2));
2302 double dLastLogLk = -1e20;
2303 for (iRound = 1; iRound <= maxRound; iRound++) {
2304 int node;
2305 numeric_t *oldlength = (numeric_t*)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2306 for (node = 0; node < NJ->maxnode; node++)
2307 oldlength[node] = NJ->branchlength[node];
2308 OptimizeAllBranchLengths(/*IN/OUT*/NJ);
2309 LogTree("ML_Lengths",iRound, fpLog, NJ, aln->names, unique, bQuote);
2310 double dMaxChange = 0; /* biggest change in branch length */
2311 for (node = 0; node < NJ->maxnode; node++) {
2312 double d = fabs(oldlength[node] - NJ->branchlength[node]);
2313 if (dMaxChange < d)
2314 dMaxChange = d;
2315 }
2316 oldlength = myfree(oldlength, sizeof(numeric_t)*NJ->maxnodes);
2317 double loglk = TreeLogLk(NJ, /*site_likelihoods*/NULL);
2318 bool bConverged = iRound > 1 && (dMaxChange < 0.001 || loglk < (dLastLogLk+treeLogLkDelta));
2319 if (verbose)
2320 fprintf(stderr, "%d rounds ML lengths: LogLk %s= %.3lf Max-change %.4lf%s Time %.2f\n",
2321 iRound,
2322 exactML || nCodes != 20 ? "" : "~",
2323 loglk,
2324 dMaxChange,
2325 bConverged ? " (converged)" : "",
2326 clockDiff(&clock_start));
2327 if (fpLog)
2328 fprintf(fpLog, "TreeLogLk\tLength%d\t%.4lf\tMaxChange\t%.4lf\n",
2329 iRound, loglk, dMaxChange);
2330 if (iRound == 1) {
2331 if (resetGtr)
2332 SetMLGtr(/*IN/OUT*/NJ, bUseGtrFreq ? gtrfreq : NULL, fpLog);
2333 SetMLRates(/*IN/OUT*/NJ, nRateCats);
2334 LogMLRates(fpLog, NJ);
2335 }
2336 if (bConverged)
2337 break;
2338 }
2339 }
2340
2341 if (MLnniToDo > 0) {
2342 /* This may help us converge faster, and is fast */
2343 OptimizeAllBranchLengths(/*IN/OUT*/NJ);
2344 LogTree("ML_Lengths%d",1, fpLog, NJ, aln->names, unique, bQuote);
2345 }
2346
2347 int iMLnni;
2348 double maxDelta;
2349 bool bConverged = false;
2350 for (iMLnni = 0; iMLnni < MLnniToDo; iMLnni++) {
2351 int changes = NNI(/*IN/OUT*/NJ, iMLnni, MLnniToDo, /*use ml*/true, /*IN/OUT*/nni_stats, /*OUT*/&maxDelta);
2352 LogTree("ML_NNI%d",iMLnni+1, fpLog, NJ, aln->names, unique, bQuote);
2353 double loglk = TreeLogLk(NJ, /*site_likelihoods*/NULL);
2354 bool bConvergedHere = (iMLnni > 0) && ((loglk < lastloglk + treeLogLkDelta) || maxDelta < treeLogLkDelta);
2355 if (verbose)
2356 fprintf(stderr, "ML-NNI round %d: LogLk %s= %.3f NNIs %d max delta %.2f Time %.2f%s\n",
2357 iMLnni+1,
2358 exactML || nCodes != 20 ? "" : "~",
2359 loglk, changes, maxDelta, clockDiff(&clock_start),
2360 bConverged ? " (final)" : "");
2361 if (fpLog)
2362 fprintf(fpLog, "TreeLogLk\tML_NNI%d\t%.4lf\tMaxChange\t%.4lf\n", iMLnni+1, loglk, maxDelta);
2363 if (bConverged)
2364 break; /* we did our extra round */
2365 if (bConvergedHere)
2366 bConverged = true;
2367 if (bConverged || iMLnni == MLnniToDo-2) {
2368 /* last round uses high-accuracy seettings -- reset NNI stats to tone down heuristics */
2369 nni_stats = FreeNNIStats(nni_stats, NJ);
2370 nni_stats = InitNNIStats(NJ);
2371 if (verbose)
2372 fprintf(stderr, "Turning off heuristics for final round of ML NNIs%s\n",
2373 bConvergedHere? " (converged)" : "");
2374 if (fpLog)
2375 fprintf(fpLog, "Turning off heuristics for final round of ML NNIs%s\n",
2376 bConvergedHere? " (converged)" : "");
2377 }
2378 lastloglk = loglk;
2379 if (iMLnni == 0 && NJ->rates.nRateCategories == 1) {
2380 if (resetGtr)
2381 SetMLGtr(/*IN/OUT*/NJ, bUseGtrFreq ? gtrfreq : NULL, fpLog);
2382 SetMLRates(/*IN/OUT*/NJ, nRateCats);
2383 LogMLRates(fpLog, NJ);
2384 }
2385 }
2386 nni_stats = FreeNNIStats(nni_stats, NJ);
2387
2388 /* This does not take long and improves the results */
2389 if (MLnniToDo > 0) {
2390 OptimizeAllBranchLengths(/*IN/OUT*/NJ);
2391 LogTree("ML_Lengths%d",2, fpLog, NJ, aln->names, unique, bQuote);
2392 if (verbose || fpLog) {
2393 double loglk = TreeLogLk(NJ, /*site_likelihoods*/NULL);
2394 if (verbose)
2395 fprintf(stderr, "Optimize all lengths: LogLk %s= %.3f Time %.2f\n",
2396 exactML || nCodes != 20 ? "" : "~",
2397 loglk,
2398 clockDiff(&clock_start));
2399 if (fpLog) {
2400 fprintf(fpLog, "TreeLogLk\tML_Lengths%d\t%.4f\n", 2, loglk);
2401 fflush(fpLog);
2402 }
2403 }
2404 }
2405
2406 /* Count bad splits and compute SH-like supports if desired */
2407 if ((MLnniToDo > 0 && !fastest) || nBootstrap > 0)
2408 TestSplitsML(NJ, /*OUT*/&splitcount, nBootstrap);
2409
2410 /* Compute gamma-based likelihood? */
2411 if (gammaLogLk && nRateCats > 1) {
2412 numeric_t *rates = MLSiteRates(nRateCats);
2413 double *site_loglk = MLSiteLikelihoodsByRate(NJ, rates, nRateCats);
2414 double scale = RescaleGammaLogLk(NJ->nPos, nRateCats, rates, /*IN*/site_loglk, /*OPTIONAL*/fpLog);
2415 rates = myfree(rates, sizeof(numeric_t) * nRateCats);
2416 site_loglk = myfree(site_loglk, sizeof(double) * nRateCats * NJ->nPos);
2417
2418 for (i = 0; i < NJ->maxnodes; i++)
2419 NJ->branchlength[i] *= scale;
2420 }
2421 } else {
2422 /* Minimum evolution supports */
2423 TestSplitsMinEvo(NJ, /*OUT*/&splitcount);
2424 if (nBootstrap > 0)
2425 ReliabilityNJ(NJ, nBootstrap);
2426 }
2427
2428 for (i = 0; i < nFPs; i++) {
2429 FILE *fp = fps[i];
2430 fprintf(fp, "Total time: %.2f seconds Unique: %d/%d Bad splits: %d/%d",
2431 clockDiff(&clock_start),
2432 NJ->nSeq, aln->nSeq,
2433 splitcount.nBadSplits, splitcount.nSplits);
2434 if (splitcount.dWorstDeltaUnconstrained > 0)
2435 fprintf(fp, " Worst %sdelta-%s %.3f",
2436 uniqConstraints != NULL ? "unconstrained " : "",
2437 (MLnniToDo > 0 || MLlen) ? "LogLk" : "Len",
2438 splitcount.dWorstDeltaUnconstrained);
2439 fprintf(fp,"\n");
2440 if (NJ->nSeq > 3 && NJ->nConstraints > 0) {
2441 fprintf(fp, "Violating constraints: %d both bad: %d",
2442 splitcount.nConstraintViolations, splitcount.nBadBoth);
2443 if (splitcount.dWorstDeltaConstrained > 0)
2444 fprintf(fp, " Worst delta-%s due to constraints: %.3f",
2445 (MLnniToDo > 0 || MLlen) ? "LogLk" : "Len",
2446 splitcount.dWorstDeltaConstrained);
2447 fprintf(fp,"\n");
2448 }
2449 if (verbose > 1 || fp == fpLog) {
2450 double dN2 = NJ->nSeq*(double)NJ->nSeq;
2451 fprintf(fp, "Dist/N**2: by-profile %.3f (out %.3f) by-leaf %.3f avg-prof %.3f\n",
2452 profileOps/dN2, outprofileOps/dN2, seqOps/dN2, profileAvgOps/dN2);
2453 if (nCloseUsed>0 || nClose2Used > 0 || nRefreshTopHits>0)
2454 fprintf(fp, "Top hits: close neighbors %ld/%d 2nd-level %ld refreshes %ld",
2455 nCloseUsed, NJ->nSeq, nClose2Used, nRefreshTopHits);
2456 if(!slow) fprintf(fp, " Hill-climb: %ld Update-best: %ld\n", nHillBetter, nVisibleUpdate);
2457 if (nniToDo > 0 || spr > 0 || MLnniToDo > 0)
2458 fprintf(fp, "NNI: %ld SPR: %ld ML-NNI: %ld\n", nNNI, nSPR, nML_NNI);
2459 if (MLnniToDo > 0) {
2460 fprintf(fp, "Max-lk operations: lk %ld posterior %ld", nLkCompute, nPosteriorCompute);
2461 if (nAAPosteriorExact > 0 || nAAPosteriorRough > 0)
2462 fprintf(fp, " approximate-posteriors %.2f%%",
2463 (100.0*nAAPosteriorRough)/(double)(nAAPosteriorExact+nAAPosteriorRough));
2464 if (mlAccuracy < 2)
2465 fprintf(fp, " star-only %ld", nStarTests);
2466 fprintf(fp, "\n");
2467 }
2468 }
2469 #ifdef TRACK_MEMORY
2470 fprintf(fp, "Memory: %.2f MB (%.1f byte/pos) ",
2471 maxmallocHeap/1.0e6, maxmallocHeap/(double)(aln->nSeq*(double)aln->nPos));
2472 /* Only report numbers from before we do reliability estimates */
2473 fprintf(fp, "profile-freq-alloc %ld avoided %.2f%%\n",
2474 svProfileFreqAlloc,
2475 svProfileFreqAvoid > 0 ?
2476 100.0*svProfileFreqAvoid/(double)(svProfileFreqAlloc+svProfileFreqAvoid)
2477 : 0);
2478 #endif
2479 fflush(fp);
2480 }
2481 PrintNJ(fpOut, NJ, aln->names, unique, /*support*/nBootstrap > 0, bQuote);
2482 fflush(fpOut);
2483 if (fpLog) {
2484 fprintf(fpLog,"TreeCompleted\n");
2485 fflush(fpLog);
2486 }
2487 FreeNJ(NJ);
2488 if (uniqConstraints != NULL)
2489 uniqConstraints = myfree(uniqConstraints, sizeof(char*) * unique->nUnique);
2490 constraints = FreeAlignment(constraints);
2491 unique = FreeUniquify(unique);
2492 } /* end build tree */
2493 hashnames = FreeHashtable(hashnames);
2494 aln = FreeAlignment(aln);
2495 } /* end loop over alignments */
2496 if (fpLog != NULL)
2497 fclose(fpLog);
2498 if (fpOut != stdout) fclose(fpOut);
2499 exit(0);
2500 }
2501
ProgressReport(char * format,int i1,int i2,int i3,int i4)2502 void ProgressReport(char *format, int i1, int i2, int i3, int i4) {
2503 static bool time_set = false;
2504 static struct timeval time_last;
2505 static struct timeval time_begin;
2506
2507 if (!showProgress)
2508 return;
2509
2510 static struct timeval time_now;
2511 gettimeofday(&time_now,NULL);
2512 if (!time_set) {
2513 time_begin = time_last = time_now;
2514 time_set = true;
2515 }
2516 static struct timeval elapsed;
2517 timeval_subtract(&elapsed,&time_now,&time_last);
2518
2519 if (elapsed.tv_sec > 1 || elapsed.tv_usec > 100*1000 || verbose > 1) {
2520 timeval_subtract(&elapsed,&time_now,&time_begin);
2521 fprintf(stderr, "%7i.%2.2i seconds: ", (int)elapsed.tv_sec, (int)(elapsed.tv_usec/10000));
2522 fprintf(stderr, format, i1, i2, i3, i4);
2523 if (verbose > 1 || !isatty(STDERR_FILENO)) {
2524 fprintf(stderr, "\n");
2525 } else {
2526 fprintf(stderr, " \r");
2527 }
2528 fflush(stderr);
2529 time_last = time_now;
2530 }
2531 }
2532
LogMLRates(FILE * fpLog,NJ_t * NJ)2533 void LogMLRates(/*OPTIONAL WRITE*/FILE *fpLog, NJ_t *NJ) {
2534 if (fpLog != NULL) {
2535 rates_t *rates = &NJ->rates;
2536 fprintf(fpLog, "NCategories\t%d\nRates",rates->nRateCategories);
2537 assert(rates->nRateCategories > 0);
2538 int iRate;
2539 for (iRate = 0; iRate < rates->nRateCategories; iRate++)
2540 fprintf(fpLog, " %f", rates->rates[iRate]);
2541 fprintf(fpLog,"\nSiteCategories");
2542 int iPos;
2543 for (iPos = 0; iPos < NJ->nPos; iPos++) {
2544 iRate = rates->ratecat[iPos];
2545 fprintf(fpLog," %d",iRate+1);
2546 }
2547 fprintf(fpLog,"\n");
2548 fflush(fpLog);
2549 }
2550 }
2551
LogTree(char * format,int i,FILE * fpLog,NJ_t * NJ,char ** names,uniquify_t * unique,bool bQuote)2552 void LogTree(char *format, int i, /*OPTIONAL WRITE*/FILE *fpLog, NJ_t *NJ, char **names, uniquify_t *unique, bool bQuote) {
2553 if(fpLog != NULL) {
2554 fprintf(fpLog, format, i);
2555 fprintf(fpLog, "\t");
2556 PrintNJ(fpLog, NJ, names, unique, /*support*/false, bQuote);
2557 fflush(fpLog);
2558 }
2559 }
2560
InitNJ(char ** sequences,int nSeq,int nPos,char ** constraintSeqs,int nConstraints,distance_matrix_t * distance_matrix,transition_matrix_t * transmat)2561 NJ_t *InitNJ(char **sequences, int nSeq, int nPos,
2562 /*OPTIONAL*/char **constraintSeqs, int nConstraints,
2563 /*OPTIONAL*/distance_matrix_t *distance_matrix,
2564 /*OPTIONAL*/transition_matrix_t *transmat) {
2565 int iNode;
2566
2567 NJ_t *NJ = (NJ_t*)mymalloc(sizeof(NJ_t));
2568 NJ->root = -1; /* set at end of FastNJ() */
2569 NJ->maxnode = NJ->nSeq = nSeq;
2570 NJ->nPos = nPos;
2571 NJ->maxnodes = 2*nSeq;
2572 NJ->seqs = sequences;
2573 NJ->distance_matrix = distance_matrix;
2574 NJ->transmat = transmat;
2575 NJ->nConstraints = nConstraints;
2576 NJ->constraintSeqs = constraintSeqs;
2577
2578 NJ->profiles = (profile_t **)mymalloc(sizeof(profile_t*) * NJ->maxnodes);
2579
2580 unsigned long counts[256];
2581 int i;
2582 for (i = 0; i < 256; i++)
2583 counts[i] = 0;
2584 for (iNode = 0; iNode < NJ->nSeq; iNode++) {
2585 NJ->profiles[iNode] = SeqToProfile(NJ, NJ->seqs[iNode], nPos,
2586 constraintSeqs != NULL ? constraintSeqs[iNode] : NULL,
2587 nConstraints,
2588 iNode,
2589 /*IN/OUT*/counts);
2590 }
2591 unsigned long totCount = 0;
2592 for (i = 0; i < 256; i++)
2593 totCount += counts[i];
2594
2595 /* warnings about unknown characters */
2596 for (i = 0; i < 256; i++) {
2597 if (counts[i] == 0 || i == '.' || i == '-')
2598 continue;
2599 unsigned char *codesP;
2600 bool bMatched = false;
2601 for (codesP = codesString; *codesP != '\0'; codesP++) {
2602 if (*codesP == i || tolower(*codesP) == i) {
2603 bMatched = true;
2604 break;
2605 }
2606 }
2607 if (!bMatched)
2608 fprintf(stderr, "Ignored unknown character %c (seen %lu times)\n", i, counts[i]);
2609 }
2610
2611
2612 /* warnings about the counts */
2613 double fACGTUN = (counts['A'] + counts['C'] + counts['G'] + counts['T'] + counts['U'] + counts['N']
2614 + counts['a'] + counts['c'] + counts['g'] + counts['t'] + counts['u'] + counts['n'])
2615 / (double)(totCount - counts['-'] - counts['.']);
2616 if (nCodes == 4 && fACGTUN < 0.9)
2617 fprintf(stderr, "WARNING! ONLY %.1f%% NUCLEOTIDE CHARACTERS -- IS THIS REALLY A NUCLEOTIDE ALIGNMENT?\n",
2618 100.0 * fACGTUN);
2619 else if (nCodes == 20 && fACGTUN >= 0.9)
2620 fprintf(stderr, "WARNING! %.1f%% NUCLEOTIDE CHARACTERS -- IS THIS REALLY A PROTEIN ALIGNMENT?\n",
2621 100.0 * fACGTUN);
2622
2623 if(verbose>10) fprintf(stderr,"Made sequence profiles\n");
2624 for (iNode = NJ->nSeq; iNode < NJ->maxnodes; iNode++)
2625 NJ->profiles[iNode] = NULL; /* not yet exists */
2626
2627 NJ->outprofile = OutProfile(NJ->profiles, NJ->nSeq,
2628 NJ->nPos, NJ->nConstraints,
2629 NJ->distance_matrix);
2630 if(verbose>10) fprintf(stderr,"Made out-profile\n");
2631
2632 NJ->totdiam = 0.0;
2633
2634 NJ->diameter = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2635 for (iNode = 0; iNode < NJ->maxnodes; iNode++) NJ->diameter[iNode] = 0;
2636
2637 NJ->varDiameter = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2638 for (iNode = 0; iNode < NJ->maxnodes; iNode++) NJ->varDiameter[iNode] = 0;
2639
2640 NJ->selfdist = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2641 for (iNode = 0; iNode < NJ->maxnodes; iNode++) NJ->selfdist[iNode] = 0;
2642
2643 NJ->selfweight = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2644 for (iNode = 0; iNode < NJ->nSeq; iNode++)
2645 NJ->selfweight[iNode] = NJ->nPos - NGaps(NJ,iNode);
2646
2647 NJ->outDistances = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2648 NJ->nOutDistActive = (int *)mymalloc(sizeof(int)*NJ->maxnodes);
2649 for (iNode = 0; iNode < NJ->maxnodes; iNode++)
2650 NJ->nOutDistActive[iNode] = NJ->nSeq * 10; /* unreasonably high value */
2651 NJ->parent = NULL; /* so SetOutDistance ignores it */
2652 for (iNode = 0; iNode < NJ->nSeq; iNode++)
2653 SetOutDistance(/*IN/UPDATE*/NJ, iNode, /*nActive*/NJ->nSeq);
2654
2655 if (verbose>2) {
2656 for (iNode = 0; iNode < 4 && iNode < NJ->nSeq; iNode++)
2657 fprintf(stderr, "Node %d outdist %f\n", iNode, NJ->outDistances[iNode]);
2658 }
2659
2660 NJ->parent = (int *)mymalloc(sizeof(int)*NJ->maxnodes);
2661 for (iNode = 0; iNode < NJ->maxnodes; iNode++) NJ->parent[iNode] = -1;
2662
2663 NJ->branchlength = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes); /* distance to parent */
2664 for (iNode = 0; iNode < NJ->maxnodes; iNode++) NJ->branchlength[iNode] = 0;
2665
2666 NJ->support = (numeric_t *)mymalloc(sizeof(numeric_t)*NJ->maxnodes);
2667 for (iNode = 0; iNode < NJ->maxnodes; iNode++) NJ->support[iNode] = -1.0;
2668
2669 NJ->child = (children_t*)mymalloc(sizeof(children_t)*NJ->maxnodes);
2670 for (iNode= 0; iNode < NJ->maxnode; iNode++) NJ->child[iNode].nChild = 0;
2671
2672 NJ->rates.nRateCategories = 0;
2673 NJ->rates.rates = NULL;
2674 NJ->rates.ratecat = NULL;
2675 AllocRateCategories(&NJ->rates, 1, NJ->nPos);
2676 return(NJ);
2677 }
2678
FreeNJ(NJ_t * NJ)2679 NJ_t *FreeNJ(NJ_t *NJ) {
2680 if (NJ==NULL)
2681 return(NJ);
2682
2683 int i;
2684 for (i=0; i < NJ->maxnode; i++)
2685 NJ->profiles[i] = FreeProfile(NJ->profiles[i], NJ->nPos, NJ->nConstraints);
2686 NJ->profiles = myfree(NJ->profiles, sizeof(profile_t*) * NJ->maxnodes);
2687 NJ->outprofile = FreeProfile(NJ->outprofile, NJ->nPos, NJ->nConstraints);
2688 NJ->diameter = myfree(NJ->diameter, sizeof(numeric_t)*NJ->maxnodes);
2689 NJ->varDiameter = myfree(NJ->varDiameter, sizeof(numeric_t)*NJ->maxnodes);
2690 NJ->selfdist = myfree(NJ->selfdist, sizeof(numeric_t)*NJ->maxnodes);
2691 NJ->selfweight = myfree(NJ->selfweight, sizeof(numeric_t)*NJ->maxnodes);
2692 NJ->outDistances = myfree(NJ->outDistances, sizeof(numeric_t)*NJ->maxnodes);
2693 NJ->nOutDistActive = myfree(NJ->nOutDistActive, sizeof(int)*NJ->maxnodes);
2694 NJ->parent = myfree(NJ->parent, sizeof(int)*NJ->maxnodes);
2695 NJ->branchlength = myfree(NJ->branchlength, sizeof(numeric_t)*NJ->maxnodes);
2696 NJ->support = myfree(NJ->support, sizeof(numeric_t)*NJ->maxnodes);
2697 NJ->child = myfree(NJ->child, sizeof(children_t)*NJ->maxnodes);
2698 NJ->transmat = myfree(NJ->transmat, sizeof(transition_matrix_t));
2699 AllocRateCategories(&NJ->rates, 0, NJ->nPos);
2700 return(myfree(NJ, sizeof(NJ_t)));
2701 }
2702
2703 /* Allocate or reallocate the rate categories, and set every position
2704 to category 0 and every category's rate to 1.0
2705 If nRateCategories=0, just deallocate
2706 */
AllocRateCategories(rates_t * rates,int nRateCategories,int nPos)2707 void AllocRateCategories(/*IN/OUT*/rates_t *rates, int nRateCategories, int nPos) {
2708 assert(nRateCategories >= 0);
2709 rates->rates = myfree(rates->rates, sizeof(numeric_t)*rates->nRateCategories);
2710 rates->ratecat = myfree(rates->ratecat, sizeof(unsigned int)*nPos);
2711 rates->nRateCategories = nRateCategories;
2712 if (rates->nRateCategories > 0) {
2713 rates->rates = (numeric_t*)mymalloc(sizeof(numeric_t)*rates->nRateCategories);
2714 int i;
2715 for (i = 0; i < nRateCategories; i++)
2716 rates->rates[i] = 1.0;
2717 rates->ratecat = (unsigned int *)mymalloc(sizeof(unsigned int)*nPos);
2718 for (i = 0; i < nPos; i++)
2719 rates->ratecat[i] = 0;
2720 }
2721 }
2722
FastNJ(NJ_t * NJ)2723 void FastNJ(NJ_t *NJ) {
2724 int iNode;
2725
2726 assert(NJ->nSeq >= 1);
2727 if (NJ->nSeq < 3) {
2728 NJ->root = NJ->maxnode++;
2729 NJ->child[NJ->root].nChild = NJ->nSeq;
2730 for (iNode = 0; iNode < NJ->nSeq; iNode++) {
2731 NJ->parent[iNode] = NJ->root;
2732 NJ->child[NJ->root].child[iNode] = iNode;
2733 }
2734 if (NJ->nSeq == 1) {
2735 NJ->branchlength[0] = 0;
2736 } else {
2737 assert (NJ->nSeq == 2);
2738 besthit_t hit;
2739 SeqDist(NJ->profiles[0]->codes,NJ->profiles[1]->codes,NJ->nPos,NJ->distance_matrix,/*OUT*/&hit);
2740 NJ->branchlength[0] = hit.dist/2.0;
2741 NJ->branchlength[1] = hit.dist/2.0;
2742 }
2743 return;
2744 }
2745
2746 /* else 3 or more sequences */
2747
2748 /* The visible set stores the best hit of each node (unless using top hits, in which case
2749 it is handled by the top hits routines) */
2750 besthit_t *visible = NULL; /* Not used if doing top hits */
2751 besthit_t *besthitNew = NULL; /* All hits of new node -- not used if doing top-hits */
2752
2753 /* The top-hits lists, with the key parameter m = length of each top-hit list */
2754 top_hits_t *tophits = NULL;
2755 int m = 0; /* maximum length of a top-hits list */
2756 if (tophitsMult > 0) {
2757 m = (int)(0.5 + tophitsMult*sqrt(NJ->nSeq));
2758 if(m<4 || 2*m >= NJ->nSeq) {
2759 m=0;
2760 if(verbose>1) fprintf(stderr,"Too few leaves, turning off top-hits\n");
2761 } else {
2762 if(verbose>2) fprintf(stderr,"Top-hit-list size = %d of %d\n", m, NJ->nSeq);
2763 }
2764 }
2765 assert(!(slow && m>0));
2766
2767 /* Initialize top-hits or visible set */
2768 if (m>0) {
2769 tophits = InitTopHits(NJ, m);
2770 SetAllLeafTopHits(/*IN/UPDATE*/NJ, /*OUT*/tophits);
2771 ResetTopVisible(/*IN/UPDATE*/NJ, /*nActive*/NJ->nSeq, /*IN/OUT*/tophits);
2772 } else if (!slow) {
2773 visible = (besthit_t*)mymalloc(sizeof(besthit_t)*NJ->maxnodes);
2774 besthitNew = (besthit_t*)mymalloc(sizeof(besthit_t)*NJ->maxnodes);
2775 for (iNode = 0; iNode < NJ->nSeq; iNode++)
2776 SetBestHit(iNode, NJ, /*nActive*/NJ->nSeq, /*OUT*/&visible[iNode], /*OUT IGNORED*/NULL);
2777 }
2778
2779 /* Iterate over joins */
2780 int nActiveOutProfileReset = NJ->nSeq;
2781 int nActive;
2782 for (nActive = NJ->nSeq; nActive > 3; nActive--) {
2783 int nJoinsDone = NJ->nSeq - nActive;
2784 if (nJoinsDone > 0 && (nJoinsDone % 100) == 0)
2785 ProgressReport("Joined %6d of %6d", nJoinsDone, NJ->nSeq-3, 0, 0);
2786
2787 besthit_t join; /* the join to do */
2788 if (slow) {
2789 ExhaustiveNJSearch(NJ,nActive,/*OUT*/&join);
2790 } else if (m>0) {
2791 TopHitNJSearch(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits, /*OUT*/&join);
2792 } else {
2793 FastNJSearch(NJ, nActive, /*IN/OUT*/visible, /*OUT*/&join);
2794 }
2795
2796 if (verbose>2) {
2797 double penalty = constraintWeight
2798 * (double)JoinConstraintPenalty(NJ, join.i, join.j);
2799 if (penalty > 0.001) {
2800 fprintf(stderr, "Constraint violation during neighbor-joining %d %d into %d penalty %.3f\n",
2801 join.i, join.j, NJ->maxnode, penalty);
2802 int iC;
2803 for (iC = 0; iC < NJ->nConstraints; iC++) {
2804 int local = JoinConstraintPenaltyPiece(NJ, join.i, join.j, iC);
2805 if (local > 0)
2806 fprintf(stderr, "Constraint %d piece %d %d/%d %d/%d %d/%d\n", iC, local,
2807 NJ->profiles[join.i]->nOn[iC],
2808 NJ->profiles[join.i]->nOff[iC],
2809 NJ->profiles[join.j]->nOn[iC],
2810 NJ->profiles[join.j]->nOff[iC],
2811 NJ->outprofile->nOn[iC] - NJ->profiles[join.i]->nOn[iC] - NJ->profiles[join.j]->nOn[iC],
2812 NJ->outprofile->nOff[iC] - NJ->profiles[join.i]->nOff[iC] - NJ->profiles[join.j]->nOff[iC]);
2813 }
2814 }
2815 }
2816
2817 /* because of the stale out-distance heuristic, make sure that these are up-to-date */
2818 SetOutDistance(NJ, join.i, nActive);
2819 SetOutDistance(NJ, join.j, nActive);
2820 /* Make sure weight is set and criterion is up to date */
2821 SetDistCriterion(NJ, nActive, /*IN/OUT*/&join);
2822 assert(NJ->nOutDistActive[join.i] == nActive);
2823 assert(NJ->nOutDistActive[join.j] == nActive);
2824
2825 int newnode = NJ->maxnode++;
2826 NJ->parent[join.i] = newnode;
2827 NJ->parent[join.j] = newnode;
2828 NJ->child[newnode].nChild = 2;
2829 NJ->child[newnode].child[0] = join.i < join.j ? join.i : join.j;
2830 NJ->child[newnode].child[1] = join.i > join.j ? join.i : join.j;
2831
2832 double rawIJ = join.dist + NJ->diameter[join.i] + NJ->diameter[join.j];
2833 double distIJ = join.dist;
2834
2835 double deltaDist = (NJ->outDistances[join.i]-NJ->outDistances[join.j])/(double)(nActive-2);
2836 NJ->branchlength[join.i] = (distIJ + deltaDist)/2;
2837 NJ->branchlength[join.j] = (distIJ - deltaDist)/2;
2838
2839 double bionjWeight = 0.5; /* IJ = bionjWeight*I + (1-bionjWeight)*J */
2840 double varIJ = rawIJ - NJ->varDiameter[join.i] - NJ->varDiameter[join.j];
2841
2842 if (bionj && join.weight > 0.01 && varIJ > 0.001) {
2843 /* Set bionjWeight according to the BIONJ formula, where
2844 the variance matrix is approximated by
2845
2846 Vij = ProfileVar(i,j) - varDiameter(i) - varDiameter(j)
2847 ProfileVar(i,j) = distance(i,j) = top(i,j)/weight(i,j)
2848
2849 (The node's distance diameter does not affect the variances.)
2850
2851 The BIONJ formula is equation 9 from Gascuel 1997:
2852
2853 bionjWeight = 1/2 + sum(k!=i,j) (Vjk - Vik) / ((nActive-2)*Vij)
2854 sum(k!=i,j) (Vjk - Vik) = sum(k!=i,j) Vik - varDiameter(j) + varDiameter(i)
2855 = sum(k!=i,j) ProfileVar(j,k) - sum(k!=i,j) ProfileVar(i,k) + (nActive-2)*(varDiameter(i)-varDiameter(j))
2856
2857 sum(k!=i,j) ProfileVar(i,k)
2858 ~= (sum(k!=i,j) distance(i,k) * weight(i,k))/(mean(k!=i,j) weight(i,k))
2859 ~= (N-2) * top(i, Out-i-j) / weight(i, Out-i-j)
2860
2861 weight(i, Out-i-j) = N*weight(i,Out) - weight(i,i) - weight(i,j)
2862 top(i, Out-i-j) = N*top(i,Out) - top(i,i) - top(i,j)
2863 */
2864 besthit_t outI;
2865 besthit_t outJ;
2866 ProfileDist(NJ->profiles[join.i],NJ->outprofile,NJ->nPos,NJ->distance_matrix,/*OUT*/&outI);
2867 ProfileDist(NJ->profiles[join.j],NJ->outprofile,NJ->nPos,NJ->distance_matrix,/*OUT*/&outJ);
2868 outprofileOps += 2;
2869
2870 double varIWeight = (nActive * outI.weight - NJ->selfweight[join.i] - join.weight);
2871 double varJWeight = (nActive * outJ.weight - NJ->selfweight[join.j] - join.weight);
2872
2873 double varITop = outI.dist * outI.weight * nActive
2874 - NJ->selfdist[join.i] * NJ->selfweight[join.i] - rawIJ * join.weight;
2875 double varJTop = outJ.dist * outJ.weight * nActive
2876 - NJ->selfdist[join.j] * NJ->selfweight[join.j] - rawIJ * join.weight;
2877
2878 double deltaProfileVarOut = (nActive-2) * (varJTop/varJWeight - varITop/varIWeight);
2879 double deltaVarDiam = (nActive-2)*(NJ->varDiameter[join.i] - NJ->varDiameter[join.j]);
2880 if (varJWeight > 0.01 && varIWeight > 0.01)
2881 bionjWeight = 0.5 + (deltaProfileVarOut+deltaVarDiam)/(2*(nActive-2)*varIJ);
2882 if(bionjWeight<0) bionjWeight=0;
2883 if(bionjWeight>1) bionjWeight=1;
2884 if (verbose>2) fprintf(stderr,"dVarO %f dVarDiam %f varIJ %f from dist %f weight %f (pos %d) bionjWeight %f %f\n",
2885 deltaProfileVarOut, deltaVarDiam,
2886 varIJ, join.dist, join.weight, NJ->nPos,
2887 bionjWeight, 1-bionjWeight);
2888 if (verbose>3 && (newnode%5) == 0) {
2889 /* Compare weight estimated from outprofiles from weight made by summing over other nodes */
2890 double deltaProfileVarTot = 0;
2891 for (iNode = 0; iNode < newnode; iNode++) {
2892 if (NJ->parent[iNode] < 0) { /* excludes join.i, join.j */
2893 besthit_t di, dj;
2894 ProfileDist(NJ->profiles[join.i],NJ->profiles[iNode],NJ->nPos,NJ->distance_matrix,/*OUT*/&di);
2895 ProfileDist(NJ->profiles[join.j],NJ->profiles[iNode],NJ->nPos,NJ->distance_matrix,/*OUT*/&dj);
2896 deltaProfileVarTot += dj.dist - di.dist;
2897 }
2898 }
2899 double lambdaTot = 0.5 + (deltaProfileVarTot+deltaVarDiam)/(2*(nActive-2)*varIJ);
2900 if (lambdaTot < 0) lambdaTot = 0;
2901 if (lambdaTot > 1) lambdaTot = 1;
2902 if (fabs(bionjWeight-lambdaTot) > 0.01 || verbose > 4)
2903 fprintf(stderr, "deltaProfileVar actual %.6f estimated %.6f lambda actual %.3f estimated %.3f\n",
2904 deltaProfileVarTot,deltaProfileVarOut,lambdaTot,bionjWeight);
2905 }
2906 }
2907 if (verbose > 2) fprintf(stderr, "Join\t%d\t%d\t%.6f\tlambda\t%.6f\tselfw\t%.3f\t%.3f\tnew\t%d\n",
2908 join.i < join.j ? join.i : join.j,
2909 join.i < join.j ? join.j : join.i,
2910 join.criterion, bionjWeight,
2911 NJ->selfweight[join.i < join.j ? join.i : join.j],
2912 NJ->selfweight[join.i < join.j ? join.j : join.i],
2913 newnode);
2914
2915 NJ->diameter[newnode] = bionjWeight * (NJ->branchlength[join.i] + NJ->diameter[join.i])
2916 + (1-bionjWeight) * (NJ->branchlength[join.j] + NJ->diameter[join.j]);
2917 NJ->varDiameter[newnode] = bionjWeight * NJ->varDiameter[join.i]
2918 + (1-bionjWeight) * NJ->varDiameter[join.j]
2919 + bionjWeight * (1-bionjWeight) * varIJ;
2920
2921 NJ->profiles[newnode] = AverageProfile(NJ->profiles[join.i],NJ->profiles[join.j],
2922 NJ->nPos, NJ->nConstraints,
2923 NJ->distance_matrix,
2924 bionj ? bionjWeight : /*noweight*/-1.0);
2925
2926 /* Update out-distances and total diameters */
2927 int changedActiveOutProfile = nActiveOutProfileReset - (nActive-1);
2928 if (changedActiveOutProfile >= nResetOutProfile
2929 && changedActiveOutProfile >= fResetOutProfile * nActiveOutProfileReset) {
2930 /* Recompute the outprofile from scratch to avoid roundoff error */
2931 profile_t **activeProfiles = (profile_t**)mymalloc(sizeof(profile_t*)*(nActive-1));
2932 int nSaved = 0;
2933 NJ->totdiam = 0;
2934 for (iNode=0;iNode<NJ->maxnode;iNode++) {
2935 if (NJ->parent[iNode]<0) {
2936 assert(nSaved < nActive-1);
2937 activeProfiles[nSaved++] = NJ->profiles[iNode];
2938 NJ->totdiam += NJ->diameter[iNode];
2939 }
2940 }
2941 assert(nSaved==nActive-1);
2942 FreeProfile(NJ->outprofile, NJ->nPos, NJ->nConstraints);
2943 if(verbose>2) fprintf(stderr,"Recomputing outprofile %d %d\n",nActiveOutProfileReset,nActive-1);
2944 NJ->outprofile = OutProfile(activeProfiles, nSaved,
2945 NJ->nPos, NJ->nConstraints,
2946 NJ->distance_matrix);
2947 activeProfiles = myfree(activeProfiles, sizeof(profile_t*)*(nActive-1));
2948 nActiveOutProfileReset = nActive-1;
2949 } else {
2950 UpdateOutProfile(/*OUT*/NJ->outprofile,
2951 NJ->profiles[join.i], NJ->profiles[join.j], NJ->profiles[newnode],
2952 nActive,
2953 NJ->nPos, NJ->nConstraints,
2954 NJ->distance_matrix);
2955 NJ->totdiam += NJ->diameter[newnode] - NJ->diameter[join.i] - NJ->diameter[join.j];
2956 }
2957
2958 /* Store self-dist for use in other computations */
2959 besthit_t selfdist;
2960 ProfileDist(NJ->profiles[newnode],NJ->profiles[newnode],NJ->nPos,NJ->distance_matrix,/*OUT*/&selfdist);
2961 NJ->selfdist[newnode] = selfdist.dist;
2962 NJ->selfweight[newnode] = selfdist.weight;
2963
2964 /* Find the best hit of the joined node IJ */
2965 if (m>0) {
2966 TopHitJoin(newnode, /*IN/UPDATE*/NJ, nActive-1, /*IN/OUT*/tophits);
2967 } else {
2968 /* Not using top-hits, so we update all out-distances */
2969 for (iNode = 0; iNode < NJ->maxnode; iNode++) {
2970 if (NJ->parent[iNode] < 0) {
2971 /* True nActive is now nActive-1 */
2972 SetOutDistance(/*IN/UPDATE*/NJ, iNode, nActive-1);
2973 }
2974 }
2975
2976 if(visible != NULL) {
2977 SetBestHit(newnode, NJ, nActive-1, /*OUT*/&visible[newnode], /*OUT OPTIONAL*/besthitNew);
2978 if (verbose>2)
2979 fprintf(stderr,"Visible %d %d %f %f\n",
2980 visible[newnode].i, visible[newnode].j,
2981 visible[newnode].dist, visible[newnode].criterion);
2982 if (besthitNew != NULL) {
2983 /* Use distances to new node to update visible set entries that are non-optimal */
2984 for (iNode = 0; iNode < NJ->maxnode; iNode++) {
2985 if (NJ->parent[iNode] >= 0 || iNode == newnode)
2986 continue;
2987 int iOldVisible = visible[iNode].j;
2988 assert(iOldVisible>=0);
2989 assert(visible[iNode].i == iNode);
2990
2991 /* Update the criterion; use nActive-1 because haven't decremented nActive yet */
2992 if (NJ->parent[iOldVisible] < 0)
2993 SetCriterion(/*IN/OUT*/NJ, nActive-1, &visible[iNode]);
2994
2995 if (NJ->parent[iOldVisible] >= 0
2996 || besthitNew[iNode].criterion < visible[iNode].criterion) {
2997 if(verbose>3) fprintf(stderr,"Visible %d reset from %d to %d (%f vs. %f)\n",
2998 iNode, iOldVisible,
2999 newnode, visible[iNode].criterion, besthitNew[iNode].criterion);
3000 if(NJ->parent[iOldVisible] < 0) nVisibleUpdate++;
3001 visible[iNode].j = newnode;
3002 visible[iNode].dist = besthitNew[iNode].dist;
3003 visible[iNode].criterion = besthitNew[iNode].criterion;
3004 }
3005 } /* end loop over all nodes */
3006 } /* end if recording all hits of new node */
3007 } /* end if keeping a visible set */
3008 } /* end else (m==0) */
3009 } /* end loop over nActive */
3010
3011 #ifdef TRACK_MEMORY
3012 if (verbose>1) {
3013 struct mallinfo mi = mallinfo();
3014 fprintf(stderr, "Memory @ end of FastNJ(): %.2f MB (%.1f byte/pos) useful %.2f expected %.2f\n",
3015 (mi.arena+mi.hblkhd)/1.0e6, (mi.arena+mi.hblkhd)/(double)(NJ->nSeq*(double)NJ->nPos),
3016 mi.uordblks/1.0e6, mymallocUsed/1e6);
3017 }
3018 #endif
3019
3020 /* We no longer need the tophits, visible set, etc. */
3021 if (visible != NULL) visible = myfree(visible,sizeof(besthit_t)*NJ->maxnodes);
3022 if (besthitNew != NULL) besthitNew = myfree(besthitNew,sizeof(besthit_t)*NJ->maxnodes);
3023 tophits = FreeTopHits(tophits);
3024
3025 /* Add a root for the 3 remaining nodes */
3026 int top[3];
3027 int nTop = 0;
3028 for (iNode = 0; iNode < NJ->maxnode; iNode++) {
3029 if (NJ->parent[iNode] < 0) {
3030 assert(nTop <= 2);
3031 top[nTop++] = iNode;
3032 }
3033 }
3034 assert(nTop==3);
3035
3036 NJ->root = NJ->maxnode++;
3037 NJ->child[NJ->root].nChild = 3;
3038 for (nTop = 0; nTop < 3; nTop++) {
3039 NJ->parent[top[nTop]] = NJ->root;
3040 NJ->child[NJ->root].child[nTop] = top[nTop];
3041 }
3042
3043 besthit_t dist01, dist02, dist12;
3044 ProfileDist(NJ->profiles[top[0]], NJ->profiles[top[1]], NJ->nPos, NJ->distance_matrix, /*OUT*/&dist01);
3045 ProfileDist(NJ->profiles[top[0]], NJ->profiles[top[2]], NJ->nPos, NJ->distance_matrix, /*OUT*/&dist02);
3046 ProfileDist(NJ->profiles[top[1]], NJ->profiles[top[2]], NJ->nPos, NJ->distance_matrix, /*OUT*/&dist12);
3047
3048 double d01 = dist01.dist - NJ->diameter[top[0]] - NJ->diameter[top[1]];
3049 double d02 = dist02.dist - NJ->diameter[top[0]] - NJ->diameter[top[2]];
3050 double d12 = dist12.dist - NJ->diameter[top[1]] - NJ->diameter[top[2]];
3051 NJ->branchlength[top[0]] = (d01 + d02 - d12)/2;
3052 NJ->branchlength[top[1]] = (d01 + d12 - d02)/2;
3053 NJ->branchlength[top[2]] = (d02 + d12 - d01)/2;
3054
3055 /* Check how accurate the outprofile is */
3056 if (verbose>2) {
3057 profile_t *p[3] = {NJ->profiles[top[0]], NJ->profiles[top[1]], NJ->profiles[top[2]]};
3058 profile_t *out = OutProfile(p, 3, NJ->nPos, NJ->nConstraints, NJ->distance_matrix);
3059 int i;
3060 double freqerror = 0;
3061 double weighterror = 0;
3062 for (i=0;i<NJ->nPos;i++) {
3063 weighterror += fabs(out->weights[i] - NJ->outprofile->weights[i]);
3064 int k;
3065 for(k=0;k<nCodes;k++)
3066 freqerror += fabs(out->vectors[nCodes*i+k] - NJ->outprofile->vectors[nCodes*i+k]);
3067 }
3068 fprintf(stderr,"Roundoff error in outprofile@end: WeightError %f FreqError %f\n", weighterror, freqerror);
3069 FreeProfile(out, NJ->nPos, NJ->nConstraints);
3070 }
3071 return;
3072 }
3073
ExhaustiveNJSearch(NJ_t * NJ,int nActive,besthit_t * join)3074 void ExhaustiveNJSearch(NJ_t *NJ, int nActive, /*OUT*/besthit_t *join) {
3075 join->i = -1;
3076 join->j = -1;
3077 join->weight = 0;
3078 join->dist = 1e20;
3079 join->criterion = 1e20;
3080 double bestCriterion = 1e20;
3081
3082 int i, j;
3083 for (i = 0; i < NJ->maxnode-1; i++) {
3084 if (NJ->parent[i] < 0) {
3085 for (j = i+1; j < NJ->maxnode; j++) {
3086 if (NJ->parent[j] < 0) {
3087 besthit_t hit;
3088 hit.i = i;
3089 hit.j = j;
3090 SetDistCriterion(NJ, nActive, /*IN/OUT*/&hit);
3091 if (hit.criterion < bestCriterion) {
3092 *join = hit;
3093 bestCriterion = hit.criterion;
3094 }
3095 }
3096 }
3097 }
3098 }
3099 assert (join->i >= 0 && join->j >= 0);
3100 }
3101
FastNJSearch(NJ_t * NJ,int nActive,besthit_t * besthits,besthit_t * join)3102 void FastNJSearch(NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *besthits, /*OUT*/besthit_t *join) {
3103 join->i = -1;
3104 join->j = -1;
3105 join->dist = 1e20;
3106 join->weight = 0;
3107 join->criterion = 1e20;
3108 int iNode;
3109 for (iNode = 0; iNode < NJ->maxnode; iNode++) {
3110 int jNode = besthits[iNode].j;
3111 if (NJ->parent[iNode] < 0 && NJ->parent[jNode] < 0) { /* both i and j still active */
3112 /* recompute criterion to reflect the current out-distances */
3113 SetCriterion(NJ, nActive, /*IN/OUT*/&besthits[iNode]);
3114 if (besthits[iNode].criterion < join->criterion)
3115 *join = besthits[iNode];
3116 }
3117 }
3118
3119 if(!fastest) {
3120 int changed;
3121 do {
3122 changed = 0;
3123 assert(join->i >= 0 && join->j >= 0);
3124 SetBestHit(join->i, NJ, nActive, /*OUT*/&besthits[join->i], /*OUT IGNORED*/NULL);
3125 if (besthits[join->i].j != join->j) {
3126 changed = 1;
3127 if (verbose>2)
3128 fprintf(stderr,"BetterI\t%d\t%d\t%d\t%d\t%f\t%f\n",
3129 join->i,join->j,besthits[join->i].i,besthits[join->i].j,
3130 join->criterion,besthits[join->i].criterion);
3131 }
3132
3133 /* Save the best hit either way, because the out-distance has probably changed
3134 since we started the computation. */
3135 join->j = besthits[join->i].j;
3136 join->weight = besthits[join->i].weight;
3137 join->dist = besthits[join->i].dist;
3138 join->criterion = besthits[join->i].criterion;
3139
3140 SetBestHit(join->j, NJ, nActive, /*OUT*/&besthits[join->j], /*OUT IGNORE*/NULL);
3141 if (besthits[join->j].j != join->i) {
3142 changed = 1;
3143 if (verbose>2)
3144 fprintf(stderr,"BetterJ\t%d\t%d\t%d\t%d\t%f\t%f\n",
3145 join->i,join->j,besthits[join->j].i,besthits[join->j].j,
3146 join->criterion,besthits[join->j].criterion);
3147 join->i = besthits[join->j].j;
3148 join->weight = besthits[join->j].weight;
3149 join->dist = besthits[join->j].dist;
3150 join->criterion = besthits[join->j].criterion;
3151 }
3152 if(changed) nHillBetter++;
3153 } while(changed);
3154 }
3155 }
3156
3157 /* A token is one of ():;, or an alphanumeric string without whitespace
3158 Any whitespace between tokens is ignored */
ReadTreeToken(FILE * fp)3159 char *ReadTreeToken(FILE *fp) {
3160 static char buf[BUFFER_SIZE];
3161 int len = 0;
3162 int c;
3163 for (c = fgetc(fp); c != EOF; c = fgetc(fp)) {
3164 if (c == '(' || c == ')' || c == ':' || c == ';' || c == ',') {
3165 /* standalone token */
3166 if (len == 0) {
3167 buf[len++] = c;
3168 buf[len] = '\0';
3169 return(buf);
3170 } else {
3171 ungetc(c, fp);
3172 buf[len] = '\0';
3173 return(buf);
3174 }
3175 } else if (isspace(c)) {
3176 if (len > 0) {
3177 buf[len] = '\0';
3178 return(buf);
3179 }
3180 /* else ignore whitespace at beginning of token */
3181 } else {
3182 /* not whitespace or standalone token */
3183 buf[len++] = c;
3184 if (len >= BUFFER_SIZE) {
3185 buf[BUFFER_SIZE-1] = '\0';
3186 fprintf(stderr, "Token too long in tree file, token begins with\n%s\n", buf);
3187 exit(1);
3188 }
3189 }
3190 }
3191 if (len > 0) {
3192 /* return the token we have so far */
3193 buf[len] = '\0';
3194 return(buf);
3195 }
3196 /* else */
3197 return(NULL);
3198 }
3199
ReadTreeError(char * err,char * token)3200 void ReadTreeError(char *err, char *token) {
3201 fprintf(stderr, "Tree parse error: unexpected token '%s' -- %s\n",
3202 token == NULL ? "(End of file)" : token,
3203 err);
3204 exit(1);
3205 }
3206
ReadTreeAddChild(int parent,int child,int * parents,children_t * children)3207 void ReadTreeAddChild(int parent, int child, /*IN/OUT*/int *parents, /*IN/OUT*/children_t *children) {
3208 assert(parent >= 0);
3209 assert(child >= 0);
3210 assert(parents[child] < 0);
3211 assert(children[parent].nChild < 3);
3212 parents[child] = parent;
3213 children[parent].child[children[parent].nChild++] = child;
3214 }
3215
ReadTreeMaybeAddLeaf(int parent,char * name,hashstrings_t * hashnames,uniquify_t * unique,int * parents,children_t * children)3216 void ReadTreeMaybeAddLeaf(int parent, char *name,
3217 hashstrings_t *hashnames, uniquify_t *unique,
3218 /*IN/OUT*/int *parents, /*IN/OUT*/children_t *children) {
3219 hashiterator_t hi = FindMatch(hashnames,name);
3220 if (HashCount(hashnames,hi) != 1)
3221 ReadTreeError("not recognized as a sequence name", name);
3222
3223 int iSeqNonunique = HashFirst(hashnames,hi);
3224 assert(iSeqNonunique >= 0 && iSeqNonunique < unique->nSeq);
3225 int iSeqUnique = unique->alnToUniq[iSeqNonunique];
3226 assert(iSeqUnique >= 0 && iSeqUnique < unique->nUnique);
3227 /* Either record this leaves' parent (if it is -1) or ignore this leaf (if already seen) */
3228 if (parents[iSeqUnique] < 0) {
3229 ReadTreeAddChild(parent, iSeqUnique, /*IN/OUT*/parents, /*IN/OUT*/children);
3230 if(verbose > 5)
3231 fprintf(stderr, "Found leaf uniq%d name %s child of %d\n", iSeqUnique, name, parent);
3232 } else {
3233 if (verbose > 5)
3234 fprintf(stderr, "Skipped redundant leaf uniq%d name %s\n", iSeqUnique, name);
3235 }
3236 }
3237
ReadTreeRemove(int * parents,children_t * children,int node)3238 void ReadTreeRemove(/*IN/OUT*/int *parents, /*IN/OUT*/children_t *children, int node) {
3239 if(verbose > 5)
3240 fprintf(stderr,"Removing node %d parent %d\n", node, parents[node]);
3241 assert(parents[node] >= 0);
3242 int parent = parents[node];
3243 parents[node] = -1;
3244 children_t *pc = &children[parent];
3245 int oldn;
3246 for (oldn = 0; oldn < pc->nChild; oldn++) {
3247 if (pc->child[oldn] == node)
3248 break;
3249 }
3250 assert(oldn < pc->nChild);
3251
3252 /* move successor nodes back in child list and shorten list */
3253 int i;
3254 for (i = oldn; i < pc->nChild-1; i++)
3255 pc->child[i] = pc->child[i+1];
3256 pc->nChild--;
3257
3258 /* add its children to parent's child list */
3259 children_t *nc = &children[node];
3260 if (nc->nChild > 0) {
3261 assert(nc->nChild<=2);
3262 assert(pc->nChild < 3);
3263 assert(pc->nChild + nc->nChild <= 3);
3264 int j;
3265 for (j = 0; j < nc->nChild; j++) {
3266 if(verbose > 5)
3267 fprintf(stderr,"Repointing parent %d to child %d\n", parent, nc->child[j]);
3268 pc->child[pc->nChild++] = nc->child[j];
3269 parents[nc->child[j]] = parent;
3270 }
3271 nc->nChild = 0;
3272 }
3273 }
3274
ReadTree(NJ_t * NJ,uniquify_t * unique,hashstrings_t * hashnames,FILE * fpInTree)3275 void ReadTree(/*IN/OUT*/NJ_t *NJ,
3276 /*IN*/uniquify_t *unique,
3277 /*IN*/hashstrings_t *hashnames,
3278 /*READ*/FILE *fpInTree) {
3279 assert(NJ->nSeq == unique->nUnique);
3280 /* First, do a preliminary parse of the tree to with non-unique leaves ignored
3281 We need to store this separately from NJ because it may have too many internal nodes
3282 (matching sequences show up once in the NJ but could be in multiple places in the tree)
3283 Will use iUnique as the index of nodes, as in the NJ structure
3284 */
3285 int maxnodes = unique->nSeq*2;
3286 int maxnode = unique->nSeq;
3287 int *parent = (int*)mymalloc(sizeof(int)*maxnodes);
3288 children_t *children = (children_t *)mymalloc(sizeof(children_t)*maxnodes);
3289 int root = maxnode++;
3290 int i;
3291 for (i = 0; i < maxnodes; i++) {
3292 parent[i] = -1;
3293 children[i].nChild = 0;
3294 }
3295
3296 /* The stack is the current path to the root, with the root at the first (top) position */
3297 int stack_size = 1;
3298 int *stack = (int*)mymalloc(sizeof(int)*maxnodes);
3299 stack[0] = root;
3300 int nDown = 0;
3301 int nUp = 0;
3302
3303 char *token;
3304 token = ReadTreeToken(fpInTree);
3305 if (token == NULL || *token != '(')
3306 ReadTreeError("No '(' at start", token);
3307 /* nDown is still 0 because we have created the root */
3308
3309 while ((token = ReadTreeToken(fpInTree)) != NULL) {
3310 if (nDown > 0) { /* In a stream of parentheses */
3311 if (*token == '(')
3312 nDown++;
3313 else if (*token == ',' || *token == ';' || *token == ':' || *token == ')')
3314 ReadTreeError("while reading parentheses", token);
3315 else {
3316 /* Add intermediate nodes if nDown was > 1 (for nDown=1, the only new node is the leaf) */
3317 while (nDown-- > 0) {
3318 int new = maxnode++;
3319 assert(new < maxnodes);
3320 ReadTreeAddChild(stack[stack_size-1], new, /*IN/OUT*/parent, /*IN/OUT*/children);
3321 if(verbose > 5)
3322 fprintf(stderr, "Added internal child %d of %d, stack size increase to %d\n",
3323 new, stack[stack_size-1],stack_size+1);
3324 stack[stack_size++] = new;
3325 assert(stack_size < maxnodes);
3326 }
3327 ReadTreeMaybeAddLeaf(stack[stack_size-1], token,
3328 hashnames, unique,
3329 /*IN/OUT*/parent, /*IN/OUT*/children);
3330 }
3331 } else if (nUp > 0) {
3332 if (*token == ';') { /* end the tree? */
3333 if (nUp != stack_size)
3334 ReadTreeError("unbalanced parentheses", token);
3335 else
3336 break;
3337 } else if (*token == ')')
3338 nUp++;
3339 else if (*token == '(')
3340 ReadTreeError("unexpected '(' after ')'", token);
3341 else if (*token == ':') {
3342 token = ReadTreeToken(fpInTree);
3343 /* Read the branch length and ignore it */
3344 if (token == NULL || (*token != '-' && !isdigit(*token)))
3345 ReadTreeError("not recognized as a branch length", token);
3346 } else if (*token == ',') {
3347 /* Go back up the stack the correct #times */
3348 while (nUp-- > 0) {
3349 stack_size--;
3350 if(verbose > 5)
3351 fprintf(stderr, "Up to nUp=%d stack size %d at %d\n",
3352 nUp, stack_size, stack[stack_size-1]);
3353 if (stack_size <= 0)
3354 ReadTreeError("too many ')'", token);
3355 }
3356 nUp = 0;
3357 } else if (*token == '-' || isdigit(*token))
3358 ; /* ignore bootstrap value */
3359 else
3360 fprintf(stderr, "Warning while parsing tree: non-numeric label %s for internal node\n",
3361 token);
3362 } else if (*token == '(') {
3363 nDown = 1;
3364 } else if (*token == ')') {
3365 nUp = 1;
3366 } else if (*token == ':') {
3367 token = ReadTreeToken(fpInTree);
3368 if (token == NULL || (*token != '-' && !isdigit(*token)))
3369 ReadTreeError("not recognized as a branch length", token);
3370 } else if (*token == ',') {
3371 ; /* do nothing */
3372 } else if (*token == ';')
3373 ReadTreeError("unexpected token", token);
3374 else
3375 ReadTreeMaybeAddLeaf(stack[stack_size-1], token,
3376 hashnames, unique,
3377 /*IN/OUT*/parent, /*IN/OUT*/children);
3378 }
3379
3380 /* Verify that all sequences were seen */
3381 for (i = 0; i < unique->nUnique; i++) {
3382 if (parent[i] < 0) {
3383 fprintf(stderr, "Alignment sequence %d (unique %d) absent from input tree\n"
3384 "The starting tree (the argument to -intree) must include all sequences in the alignment!\n",
3385 unique->uniqueFirst[i], i);
3386 exit(1);
3387 }
3388 }
3389
3390 /* Simplify the tree -- remove all internal nodes with < 2 children
3391 Keep trying until no nodes get removed
3392 */
3393 int nRemoved;
3394 do {
3395 nRemoved = 0;
3396 /* Here stack is the list of nodes we haven't visited yet while doing
3397 a tree traversal */
3398 stack_size = 1;
3399 stack[0] = root;
3400 while (stack_size > 0) {
3401 int node = stack[--stack_size];
3402 if (node >= unique->nUnique) { /* internal node */
3403 if (children[node].nChild <= 1) {
3404 if (node != root) {
3405 ReadTreeRemove(/*IN/OUT*/parent,/*IN/OUT*/children,node);
3406 nRemoved++;
3407 } else if (node == root && children[node].nChild == 1) {
3408 int newroot = children[node].child[0];
3409 parent[newroot] = -1;
3410 children[root].nChild = 0;
3411 nRemoved++;
3412 if(verbose > 5)
3413 fprintf(stderr,"Changed root from %d to %d\n",root,newroot);
3414 root = newroot;
3415 stack[stack_size++] = newroot;
3416 }
3417 } else {
3418 int j;
3419 for (j = 0; j < children[node].nChild; j++) {
3420 assert(stack_size < maxnodes);
3421 stack[stack_size++] = children[node].child[j];
3422 if(verbose > 5)
3423 fprintf(stderr,"Added %d to stack\n", stack[stack_size-1]);
3424 }
3425 }
3426 }
3427 }
3428 } while (nRemoved > 0);
3429
3430 /* Simplify the root node to 3 children if it has 2 */
3431 if (children[root].nChild == 2) {
3432 for (i = 0; i < 2; i++) {
3433 int child = children[root].child[i];
3434 assert(child >= 0 && child < maxnodes);
3435 if (children[child].nChild == 2) {
3436 ReadTreeRemove(parent,children,child); /* replace root -> child -> A,B with root->A,B */
3437 break;
3438 }
3439 }
3440 }
3441
3442 for (i = 0; i < maxnodes; i++)
3443 if(verbose > 5)
3444 fprintf(stderr,"Simplfied node %d has parent %d nchild %d\n",
3445 i, parent[i], children[i].nChild);
3446
3447 /* Map the remaining internal nodes to NJ nodes */
3448 int *map = (int*)mymalloc(sizeof(int)*maxnodes);
3449 for (i = 0; i < unique->nUnique; i++)
3450 map[i] = i;
3451 for (i = unique->nUnique; i < maxnodes; i++)
3452 map[i] = -1;
3453 stack_size = 1;
3454 stack[0] = root;
3455 while (stack_size > 0) {
3456 int node = stack[--stack_size];
3457 if (node >= unique->nUnique) { /* internal node */
3458 assert(node == root || children[node].nChild > 1);
3459 map[node] = NJ->maxnode++;
3460 for (i = 0; i < children[node].nChild; i++) {
3461 assert(stack_size < maxnodes);
3462 stack[stack_size++] = children[node].child[i];
3463 }
3464 }
3465 }
3466 for (i = 0; i < maxnodes; i++)
3467 if(verbose > 5)
3468 fprintf(stderr,"Map %d to %d (parent %d nchild %d)\n",
3469 i, map[i], parent[i], children[i].nChild);
3470
3471 /* Set NJ->parent, NJ->children, NJ->root */
3472 NJ->root = map[root];
3473 int node;
3474 for (node = 0; node < maxnodes; node++) {
3475 int njnode = map[node];
3476 if (njnode >= 0) {
3477 NJ->child[njnode].nChild = children[node].nChild;
3478 for (i = 0; i < children[node].nChild; i++) {
3479 assert(children[node].child[i] >= 0 && children[node].child[i] < maxnodes);
3480 NJ->child[njnode].child[i] = map[children[node].child[i]];
3481 }
3482 if (parent[node] >= 0)
3483 NJ->parent[njnode] = map[parent[node]];
3484 }
3485 }
3486
3487 /* Make sure that parent/child relationships match */
3488 for (i = 0; i < NJ->maxnode; i++) {
3489 children_t *c = &NJ->child[i];
3490 int j;
3491 for (j = 0; j < c->nChild;j++)
3492 assert(c->child[j] >= 0 && c->child[j] < NJ->maxnode && NJ->parent[c->child[j]] == i);
3493 }
3494 assert(NJ->parent[NJ->root] < 0);
3495
3496 map = myfree(map,sizeof(int)*maxnodes);
3497 stack = myfree(stack,sizeof(int)*maxnodes);
3498 children = myfree(children,sizeof(children_t)*maxnodes);
3499 parent = myfree(parent,sizeof(int)*maxnodes);
3500
3501 /* Compute profiles as balanced -- the NNI stage will recompute these
3502 profiles anyway
3503 */
3504 traversal_t traversal = InitTraversal(NJ);
3505 node = NJ->root;
3506 while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
3507 if (node >= NJ->nSeq && node != NJ->root)
3508 SetProfile(/*IN/OUT*/NJ, node, /*noweight*/-1.0);
3509 }
3510 traversal = FreeTraversal(traversal,NJ);
3511 }
3512
3513 /* Print topology using node indices as node names */
PrintNJInternal(FILE * fp,NJ_t * NJ,bool useLen)3514 void PrintNJInternal(FILE *fp, NJ_t *NJ, bool useLen) {
3515 if (NJ->nSeq < 4) {
3516 return;
3517 }
3518 typedef struct { int node; int end; } stack_t;
3519 stack_t *stack = (stack_t *)mymalloc(sizeof(stack_t)*NJ->maxnodes);
3520 int stackSize = 1;
3521 stack[0].node = NJ->root;
3522 stack[0].end = 0;
3523
3524 while(stackSize>0) {
3525 stack_t *last = &stack[stackSize-1];
3526 stackSize--;
3527 /* Save last, as we are about to overwrite it */
3528 int node = last->node;
3529 int end = last->end;
3530
3531 if (node < NJ->nSeq) {
3532 if (NJ->child[NJ->parent[node]].child[0] != node) fputs(",",fp);
3533 fprintf(fp, "%d", node);
3534 if (useLen)
3535 fprintf(fp, ":%.4f", NJ->branchlength[node]);
3536 } else if (end) {
3537 fprintf(fp, ")%d", node);
3538 if (useLen)
3539 fprintf(fp, ":%.4f", NJ->branchlength[node]);
3540 } else {
3541 if (node != NJ->root && NJ->child[NJ->parent[node]].child[0] != node) fprintf(fp, ",");
3542 fprintf(fp, "(");
3543 stackSize++;
3544 stack[stackSize-1].node = node;
3545 stack[stackSize-1].end = 1;
3546 children_t *c = &NJ->child[node];
3547 /* put children on in reverse order because we use the last one first */
3548 int i;
3549 for (i = c->nChild-1; i >=0; i--) {
3550 stackSize++;
3551 stack[stackSize-1].node = c->child[i];
3552 stack[stackSize-1].end = 0;
3553 }
3554 }
3555 }
3556 fprintf(fp, ";\n");
3557 stack = myfree(stack, sizeof(stack_t)*NJ->maxnodes);
3558 }
3559
PrintNJ(FILE * fp,NJ_t * NJ,char ** names,uniquify_t * unique,bool bShowSupport,bool bQuote)3560 void PrintNJ(FILE *fp, NJ_t *NJ, char **names, uniquify_t *unique, bool bShowSupport, bool bQuote) {
3561 /* And print the tree: depth first search
3562 * The stack contains
3563 * list of remaining children with their depth
3564 * parent node, with a flag of -1 so I know to print right-paren
3565 */
3566 if (NJ->nSeq==1 && unique->alnNext[unique->uniqueFirst[0]] >= 0) {
3567 /* Special case -- otherwise we end up with double parens */
3568 int first = unique->uniqueFirst[0];
3569 assert(first >= 0 && first < unique->nSeq);
3570 fprintf(fp, bQuote ? "('%s':0.0" : "(%s:0.0", names[first]);
3571 int iName = unique->alnNext[first];
3572 while (iName >= 0) {
3573 assert(iName < unique->nSeq);
3574 fprintf(fp, bQuote ? ",'%s':0.0" : ",%s:0.0", names[iName]);
3575 iName = unique->alnNext[iName];
3576 }
3577 fprintf(fp,");\n");
3578 return;
3579 }
3580
3581 typedef struct { int node; int end; } stack_t;
3582 stack_t *stack = (stack_t *)mymalloc(sizeof(stack_t)*NJ->maxnodes);
3583 int stackSize = 1;
3584 stack[0].node = NJ->root;
3585 stack[0].end = 0;
3586
3587 while(stackSize>0) {
3588 stack_t *last = &stack[stackSize-1];
3589 stackSize--;
3590 /* Save last, as we are about to overwrite it */
3591 int node = last->node;
3592 int end = last->end;
3593
3594 if (node < NJ->nSeq) {
3595 if (NJ->child[NJ->parent[node]].child[0] != node) fputs(",",fp);
3596 int first = unique->uniqueFirst[node];
3597 assert(first >= 0 && first < unique->nSeq);
3598 /* Print the name, or the subtree of duplicate names */
3599 if (unique->alnNext[first] == -1) {
3600 fprintf(fp, bQuote ? "'%s'" : "%s", names[first]);
3601 } else {
3602 fprintf(fp, bQuote ? "('%s':0.0" : "(%s:0.0", names[first]);
3603 int iName = unique->alnNext[first];
3604 while (iName >= 0) {
3605 assert(iName < unique->nSeq);
3606 fprintf(fp, bQuote ? ",'%s':0.0" : ",%s:0.0", names[iName]);
3607 iName = unique->alnNext[iName];
3608 }
3609 fprintf(fp,")");
3610 }
3611 /* Print the branch length */
3612 #ifdef USE_DOUBLE
3613 #define FP_FORMAT "%.9f"
3614 #else
3615 #define FP_FORMAT "%.5f"
3616 #endif
3617 fprintf(fp, ":" FP_FORMAT, NJ->branchlength[node]);
3618 } else if (end) {
3619 if (node == NJ->root)
3620 fprintf(fp, ")");
3621 else if (bShowSupport)
3622 fprintf(fp, ")%.3f:" FP_FORMAT, NJ->support[node], NJ->branchlength[node]);
3623 else
3624 fprintf(fp, "):" FP_FORMAT, NJ->branchlength[node]);
3625 } else {
3626 if (node != NJ->root && NJ->child[NJ->parent[node]].child[0] != node) fprintf(fp, ",");
3627 fprintf(fp, "(");
3628 stackSize++;
3629 stack[stackSize-1].node = node;
3630 stack[stackSize-1].end = 1;
3631 children_t *c = &NJ->child[node];
3632 /* put children on in reverse order because we use the last one first */
3633 int i;
3634 for (i = c->nChild-1; i >=0; i--) {
3635 stackSize++;
3636 stack[stackSize-1].node = c->child[i];
3637 stack[stackSize-1].end = 0;
3638 }
3639 }
3640 }
3641 fprintf(fp, ";\n");
3642 stack = myfree(stack, sizeof(stack_t)*NJ->maxnodes);
3643 }
3644
ReadAlignment(FILE * fp,bool bQuote)3645 alignment_t *ReadAlignment(/*IN*/FILE *fp, bool bQuote) {
3646 /* bQuote supports the -quote option */
3647 int nSeq = 0;
3648 int nPos = 0;
3649 char **names = NULL;
3650 char **seqs = NULL;
3651 char buf[BUFFER_SIZE] = "";
3652 if (fgets(buf,sizeof(buf),fp) == NULL) {
3653 fprintf(stderr, "Error reading header line\n");
3654 exit(1);
3655 }
3656 int nSaved = 100;
3657 if (buf[0] == '>') {
3658 /* FASTA, truncate names at any of these */
3659 char *nameStop = bQuote ? "'\t\r\n" : "(),: \t\r\n";
3660 char *seqSkip = " \t\r\n"; /* skip these characters in the sequence */
3661 seqs = (char**)mymalloc(sizeof(char*) * nSaved);
3662 names = (char**)mymalloc(sizeof(char*) * nSaved);
3663
3664 do {
3665 /* loop over lines */
3666 if (buf[0] == '>') {
3667 /* truncate the name */
3668 char *p, *q;
3669 for (p = buf+1; *p != '\0'; p++) {
3670 for (q = nameStop; *q != '\0'; q++) {
3671 if (*p == *q) {
3672 *p = '\0';
3673 break;
3674 }
3675 }
3676 if (*p == '\0') break;
3677 }
3678
3679 /* allocate space for another sequence */
3680 nSeq++;
3681 if (nSeq > nSaved) {
3682 int nNewSaved = nSaved*2;
3683 seqs = myrealloc(seqs,sizeof(char*)*nSaved,sizeof(char*)*nNewSaved, /*copy*/false);
3684 names = myrealloc(names,sizeof(char*)*nSaved,sizeof(char*)*nNewSaved, /*copy*/false);
3685 nSaved = nNewSaved;
3686 }
3687 names[nSeq-1] = (char*)mymemdup(buf+1,strlen(buf));
3688 seqs[nSeq-1] = NULL;
3689 } else {
3690 /* count non-space characters and append to sequence */
3691 int nKeep = 0;
3692 char *p, *q;
3693 for (p=buf; *p != '\0'; p++) {
3694 for (q=seqSkip; *q != '\0'; q++) {
3695 if (*p == *q)
3696 break;
3697 }
3698 if (*p != *q)
3699 nKeep++;
3700 }
3701 int nOld = (seqs[nSeq-1] == NULL) ? 0 : strlen(seqs[nSeq-1]);
3702 seqs[nSeq-1] = (char*)myrealloc(seqs[nSeq-1], nOld, nOld+nKeep+1, /*copy*/false);
3703 if (nOld+nKeep > nPos)
3704 nPos = nOld + nKeep;
3705 char *out = seqs[nSeq-1] + nOld;
3706 for (p=buf; *p != '\0'; p++) {
3707 for (q=seqSkip; *q != '\0'; q++) {
3708 if (*p == *q)
3709 break;
3710 }
3711 if (*p != *q) {
3712 *out = *p;
3713 out++;
3714 }
3715 }
3716 assert(out-seqs[nSeq-1] == nKeep + nOld);
3717 *out = '\0';
3718 }
3719 } while(fgets(buf,sizeof(buf),fp) != NULL);
3720
3721 if (seqs[nSeq-1] == NULL) {
3722 fprintf(stderr, "No sequence data for last entry %s\n",names[nSeq-1]);
3723 exit(1);
3724 }
3725 names = myrealloc(names,sizeof(char*)*nSaved,sizeof(char*)*nSeq, /*copy*/false);
3726 seqs = myrealloc(seqs,sizeof(char*)*nSaved,sizeof(char*)*nSeq, /*copy*/false);
3727 } else {
3728 /* PHYLIP interleaved-like format
3729 Allow arbitrary length names, require spaces between names and sequences
3730 Allow multiple alignments, either separated by a single empty line (e.g. seqboot output)
3731 or not.
3732 */
3733 if (buf[0] == '\n' || buf[0] == '\r') {
3734 if (fgets(buf,sizeof(buf),fp) == NULL) {
3735 fprintf(stderr, "Empty header line followed by EOF\n");
3736 exit(1);
3737 }
3738 }
3739 if (sscanf(buf, "%d%d", &nSeq, &nPos) != 2
3740 || nSeq < 1 || nPos < 1) {
3741 fprintf(stderr, "Error parsing header line:%s\n", buf);
3742 exit(1);
3743 }
3744 names = (char **)mymalloc(sizeof(char*) * nSeq);
3745 seqs = (char **)mymalloc(sizeof(char*) * nSeq);
3746 nSaved = nSeq;
3747
3748 int i;
3749 for (i = 0; i < nSeq; i++) {
3750 names[i] = NULL;
3751 seqs[i] = (char *)mymalloc(nPos+1); /* null-terminate */
3752 seqs[i][0] = '\0';
3753 }
3754 int iSeq = 0;
3755
3756 while(fgets(buf,sizeof(buf),fp)) {
3757 if ((buf[0] == '\n' || buf[0] == '\r') && (iSeq == nSeq || iSeq == 0)) {
3758 iSeq = 0;
3759 } else {
3760 int j = 0; /* character just past end of name */
3761 if (buf[0] == ' ') {
3762 if (names[iSeq] == NULL) {
3763 fprintf(stderr, "No name in phylip line %s", buf);
3764 exit(1);
3765 }
3766 } else {
3767 while (buf[j] != '\n' && buf[j] != '\0' && buf[j] != ' ')
3768 j++;
3769 if (buf[j] != ' ' || j == 0) {
3770 fprintf(stderr, "No sequence in phylip line %s", buf);
3771 exit(1);
3772 }
3773 if (iSeq >= nSeq) {
3774 fprintf(stderr, "No empty line between sequence blocks (is the sequence count wrong?)\n");
3775 exit(1);
3776 }
3777 if (names[iSeq] == NULL) {
3778 /* save the name */
3779 names[iSeq] = (char *)mymalloc(j+1);
3780 int k;
3781 for (k = 0; k < j; k++) names[iSeq][k] = buf[k];
3782 names[iSeq][j] = '\0';
3783 } else {
3784 /* check the name */
3785 int k;
3786 int match = 1;
3787 for (k = 0; k < j; k++) {
3788 if (names[iSeq][k] != buf[k]) {
3789 match = 0;
3790 break;
3791 }
3792 }
3793 if (!match || names[iSeq][j] != '\0') {
3794 fprintf(stderr, "Wrong name in phylip line %s\nExpected %s\n", buf, names[iSeq]);
3795 exit(1);
3796 }
3797 }
3798 }
3799 int seqlen = strlen(seqs[iSeq]);
3800 for (; buf[j] != '\n' && buf[j] != '\0'; j++) {
3801 if (buf[j] != ' ') {
3802 if (seqlen >= nPos) {
3803 fprintf(stderr, "Too many characters (expected %d) for sequence named %s\nSo far have:\n%s\n",
3804 nPos, names[iSeq], seqs[iSeq]);
3805 exit(1);
3806 }
3807 seqs[iSeq][seqlen++] = toupper(buf[j]);
3808 }
3809 }
3810 seqs[iSeq][seqlen] = '\0'; /* null-terminate */
3811 if(verbose>10) fprintf(stderr,"Read iSeq %d name %s seqsofar %s\n", iSeq, names[iSeq], seqs[iSeq]);
3812 iSeq++;
3813 if (iSeq == nSeq && strlen(seqs[0]) == nPos)
3814 break; /* finished alignment */
3815 } /* end else non-empty phylip line */
3816 }
3817 if (iSeq != nSeq && iSeq != 0) {
3818 fprintf(stderr, "Wrong number of sequences: expected %d\n", nSeq);
3819 exit(1);
3820 }
3821 }
3822 /* Check lengths of sequences */
3823 int i;
3824 for (i = 0; i < nSeq; i++) {
3825 int seqlen = strlen(seqs[i]);
3826 if (seqlen != nPos) {
3827 fprintf(stderr, "Wrong number of characters for %s: expected %d but have %d instead.\n"
3828 "This sequence may be truncated, or another sequence may be too long.\n",
3829 names[i], nPos, seqlen);
3830 exit(1);
3831 }
3832 }
3833 /* Replace "." with "-" and warn if we find any */
3834 /* If nucleotide sequences, replace U with T and N with X */
3835 bool findDot = false;
3836 for (i = 0; i < nSeq; i++) {
3837 char *p;
3838 for (p = seqs[i]; *p != '\0'; p++) {
3839 if (*p == '.') {
3840 findDot = true;
3841 *p = '-';
3842 }
3843 if (nCodes == 4 && *p == 'U')
3844 *p = 'T';
3845 if (nCodes == 4 && *p == 'N')
3846 *p = 'X';
3847 }
3848 }
3849 if (findDot)
3850 fprintf(stderr, "Warning! Found \".\" character(s). These are treated as gaps\n");
3851
3852 if (ferror(fp)) {
3853 fprintf(stderr, "Error reading input file\n");
3854 exit(1);
3855 }
3856
3857 alignment_t *align = (alignment_t*)mymalloc(sizeof(alignment_t));
3858 align->nSeq = nSeq;
3859 align->nPos = nPos;
3860 align->names = names;
3861 align->seqs = seqs;
3862 align->nSaved = nSaved;
3863 return(align);
3864 }
3865
FreeAlignmentSeqs(alignment_t * aln)3866 void FreeAlignmentSeqs(/*IN/OUT*/alignment_t *aln) {
3867 assert(aln != NULL);
3868 int i;
3869 for (i = 0; i < aln->nSeq; i++)
3870 aln->seqs[i] = myfree(aln->seqs[i], aln->nPos+1);
3871 }
3872
FreeAlignment(alignment_t * aln)3873 alignment_t *FreeAlignment(alignment_t *aln) {
3874 if(aln==NULL)
3875 return(NULL);
3876 int i;
3877 for (i = 0; i < aln->nSeq; i++) {
3878 aln->names[i] = myfree(aln->names[i],strlen(aln->names[i])+1);
3879 aln->seqs[i] = myfree(aln->seqs[i], aln->nPos+1);
3880 }
3881 aln->names = myfree(aln->names, sizeof(char*)*aln->nSaved);
3882 aln->seqs = myfree(aln->seqs, sizeof(char*)*aln->nSaved);
3883 myfree(aln, sizeof(alignment_t));
3884 return(NULL);
3885 }
3886
AlnToConstraints(alignment_t * constraints,uniquify_t * unique,hashstrings_t * hashnames)3887 char **AlnToConstraints(alignment_t *constraints, uniquify_t *unique, hashstrings_t *hashnames) {
3888 /* look up constraints as names and map to unique-space */
3889 char ** uniqConstraints = (char**)mymalloc(sizeof(char*) * unique->nUnique);
3890 int i;
3891 for (i = 0; i < unique->nUnique; i++)
3892 uniqConstraints[i] = NULL;
3893 for (i = 0; i < constraints->nSeq; i++) {
3894 char *name = constraints->names[i];
3895 char *constraintSeq = constraints->seqs[i];
3896 hashiterator_t hi = FindMatch(hashnames,name);
3897 if (HashCount(hashnames,hi) != 1) {
3898 fprintf(stderr, "Sequence %s from constraints file is not in the alignment\n", name);
3899 exit(1);
3900 }
3901 int iSeqNonunique = HashFirst(hashnames,hi);
3902 assert(iSeqNonunique >= 0 && iSeqNonunique < unique->nSeq);
3903 int iSeqUnique = unique->alnToUniq[iSeqNonunique];
3904 assert(iSeqUnique >= 0 && iSeqUnique < unique->nUnique);
3905 if (uniqConstraints[iSeqUnique] != NULL) {
3906 /* Already set a constraint for this group of sequences!
3907 Warn that we are ignoring this one unless the constraints match */
3908 if (strcmp(uniqConstraints[iSeqUnique],constraintSeq) != 0) {
3909 fprintf(stderr,
3910 "Warning: ignoring constraints for %s:\n%s\n"
3911 "Another sequence has the same sequence but different constraints\n",
3912 name, constraintSeq);
3913 }
3914 } else {
3915 uniqConstraints[iSeqUnique] = constraintSeq;
3916 }
3917 }
3918 return(uniqConstraints);
3919 }
3920
3921
SeqToProfile(NJ_t * NJ,char * seq,int nPos,char * constraintSeq,int nConstraints,int iNode,unsigned long counts[256])3922 profile_t *SeqToProfile(/*IN/OUT*/NJ_t *NJ,
3923 char *seq, int nPos,
3924 /*OPTIONAL*/char *constraintSeq, int nConstraints,
3925 int iNode,
3926 unsigned long counts[256]) {
3927 static unsigned char charToCode[256];
3928 static int codeSet = 0;
3929 int c, i;
3930
3931 if (!codeSet) {
3932 for (c = 0; c < 256; c++) {
3933 charToCode[c] = nCodes;
3934 }
3935 for (i = 0; codesString[i]; i++) {
3936 charToCode[codesString[i]] = i;
3937 charToCode[tolower(codesString[i])] = i;
3938 }
3939 charToCode['-'] = NOCODE;
3940 codeSet=1;
3941 }
3942
3943 assert(strlen(seq) == nPos);
3944 profile_t *profile = NewProfile(nPos,nConstraints);
3945
3946 for (i = 0; i < nPos; i++) {
3947 unsigned int character = (unsigned int) seq[i];
3948 counts[character]++;
3949 c = charToCode[character];
3950 if(verbose>10 && i < 2) fprintf(stderr,"pos %d char %c code %d\n", i, seq[i], c);
3951 /* treat unknowns as gaps */
3952 if (c == nCodes || c == NOCODE) {
3953 profile->codes[i] = NOCODE;
3954 profile->weights[i] = 0.0;
3955 } else {
3956 profile->codes[i] = c;
3957 profile->weights[i] = 1.0;
3958 }
3959 }
3960 if (nConstraints > 0) {
3961 for (i = 0; i < nConstraints; i++) {
3962 profile->nOn[i] = 0;
3963 profile->nOff[i] = 0;
3964 }
3965 bool bWarn = false;
3966 if (constraintSeq != NULL) {
3967 assert(strlen(constraintSeq) == nConstraints);
3968 for (i = 0; i < nConstraints; i++) {
3969 if (constraintSeq[i] == '1') {
3970 profile->nOn[i] = 1;
3971 } else if (constraintSeq[i] == '0') {
3972 profile->nOff[i] = 1;
3973 } else if (constraintSeq[i] != '-') {
3974 if (!bWarn) {
3975 fprintf(stderr, "Constraint characters in unique sequence %d replaced with gap:", iNode+1);
3976 bWarn = true;
3977 }
3978 fprintf(stderr, " %c%d", constraintSeq[i], i+1);
3979 /* For the benefit of ConstraintSequencePenalty -- this is a bit of a hack, as
3980 this modifies the value read from the alignment
3981 */
3982 constraintSeq[i] = '-';
3983 }
3984 }
3985 if (bWarn)
3986 fprintf(stderr, "\n");
3987 }
3988 }
3989 return profile;
3990 }
3991
SeqDist(unsigned char * codes1,unsigned char * codes2,int nPos,distance_matrix_t * dmat,besthit_t * hit)3992 void SeqDist(unsigned char *codes1, unsigned char *codes2, int nPos,
3993 distance_matrix_t *dmat,
3994 /*OUT*/besthit_t *hit) {
3995 double top = 0; /* summed over positions */
3996 int nUse = 0;
3997 int i;
3998 if (dmat==NULL) {
3999 int nDiff = 0;
4000 for (i = 0; i < nPos; i++) {
4001 if (codes1[i] != NOCODE && codes2[i] != NOCODE) {
4002 nUse++;
4003 if (codes1[i] != codes2[i]) nDiff++;
4004 }
4005 }
4006 top = (double)nDiff;
4007 } else {
4008 for (i = 0; i < nPos; i++) {
4009 if (codes1[i] != NOCODE && codes2[i] != NOCODE) {
4010 nUse++;
4011 top += dmat->distances[(unsigned int)codes1[i]][(unsigned int)codes2[i]];
4012 }
4013 }
4014 }
4015 hit->weight = (double)nUse;
4016 hit->dist = nUse > 0 ? top/(double)nUse : 1.0;
4017 seqOps++;
4018 }
4019
CorrectedPairDistances(profile_t ** profiles,int nProfiles,distance_matrix_t * distance_matrix,int nPos,double * distances)4020 void CorrectedPairDistances(profile_t **profiles, int nProfiles,
4021 /*OPTIONAL*/distance_matrix_t *distance_matrix,
4022 int nPos,
4023 /*OUT*/double *distances) {
4024 assert(distances != NULL);
4025 assert(profiles != NULL);
4026 assert(nProfiles>1 && nProfiles <= 4);
4027 besthit_t hit[6];
4028 int iHit,i,j;
4029
4030 for (iHit=0, i=0; i < nProfiles; i++) {
4031 for (j=i+1; j < nProfiles; j++, iHit++) {
4032 ProfileDist(profiles[i],profiles[j],nPos,distance_matrix,/*OUT*/&hit[iHit]);
4033 distances[iHit] = hit[iHit].dist;
4034 }
4035 }
4036 if (pseudoWeight > 0) {
4037 /* Estimate the prior distance */
4038 double dTop = 0;
4039 double dBottom = 0;
4040 for (iHit=0; iHit < (nProfiles*(nProfiles-1))/2; iHit++) {
4041 dTop += hit[iHit].dist * hit[iHit].weight;
4042 dBottom += hit[iHit].weight;
4043 }
4044 double prior = (dBottom > 0.01) ? dTop/dBottom : 3.0;
4045 for (iHit=0; iHit < (nProfiles*(nProfiles-1))/2; iHit++)
4046 distances[iHit] = (distances[iHit] * hit[iHit].weight + prior * pseudoWeight)
4047 / (hit[iHit].weight + pseudoWeight);
4048 }
4049 if (logdist) {
4050 for (iHit=0; iHit < (nProfiles*(nProfiles-1))/2; iHit++)
4051 distances[iHit] = LogCorrect(distances[iHit]);
4052 }
4053 }
4054
4055 /* During the neighbor-joining phase, a join only violates our constraints if
4056 node1, node2, and other are all represented in the constraint
4057 and if one of the 3 is split and the other two do not agree
4058 */
JoinConstraintPenalty(NJ_t * NJ,int node1,int node2)4059 int JoinConstraintPenalty(/*IN*/NJ_t *NJ, int node1, int node2) {
4060 if (NJ->nConstraints == 0)
4061 return(0.0);
4062 int penalty = 0;
4063 int iC;
4064 for (iC = 0; iC < NJ->nConstraints; iC++)
4065 penalty += JoinConstraintPenaltyPiece(NJ, node1, node2, iC);
4066 return(penalty);
4067 }
4068
JoinConstraintPenaltyPiece(NJ_t * NJ,int node1,int node2,int iC)4069 int JoinConstraintPenaltyPiece(NJ_t *NJ, int node1, int node2, int iC) {
4070 profile_t *pOut = NJ->outprofile;
4071 profile_t *p1 = NJ->profiles[node1];
4072 profile_t *p2 = NJ->profiles[node2];
4073 int nOn1 = p1->nOn[iC];
4074 int nOff1 = p1->nOff[iC];
4075 int nOn2 = p2->nOn[iC];
4076 int nOff2 = p2->nOff[iC];
4077 int nOnOut = pOut->nOn[iC] - nOn1 - nOn2;
4078 int nOffOut = pOut->nOff[iC] - nOff1 - nOff2;
4079
4080 if ((nOn1+nOff1) > 0 && (nOn2+nOff2) > 0 && (nOnOut+nOffOut) > 0) {
4081 /* code is -1 for split, 0 for off, 1 for on */
4082 int code1 = (nOn1 > 0 && nOff1 > 0) ? -1 : (nOn1 > 0 ? 1 : 0);
4083 int code2 = (nOn2 > 0 && nOff2 > 0) ? -1 : (nOn2 > 0 ? 1 : 0);
4084 int code3 = (nOnOut > 0 && nOffOut) > 0 ? -1 : (nOnOut > 0 ? 1 : 0);
4085 int nSplit = (code1 == -1 ? 1 : 0) + (code2 == -1 ? 1 : 0) + (code3 == -1 ? 1 : 0);
4086 int nOn = (code1 == 1 ? 1 : 0) + (code2 == 1 ? 1 : 0) + (code3 == 1 ? 1 : 0);
4087 if (nSplit == 1 && nOn == 1)
4088 return(SplitConstraintPenalty(nOn1+nOn2, nOff1+nOff2, nOnOut, nOffOut));
4089 }
4090 /* else */
4091 return(0);
4092 }
4093
QuartetConstraintPenalties(profile_t * profiles[4],int nConstraints,double penalty[3])4094 void QuartetConstraintPenalties(profile_t *profiles[4], int nConstraints, /*OUT*/double penalty[3]) {
4095 int i;
4096 for (i=0; i < 3; i++)
4097 penalty[i] = 0.0;
4098 if(nConstraints == 0)
4099 return;
4100 int iC;
4101 for (iC = 0; iC < nConstraints; iC++) {
4102 double part[3];
4103 if (QuartetConstraintPenaltiesPiece(profiles, iC, /*OUT*/part)) {
4104 for (i=0;i<3;i++)
4105 penalty[i] += part[i];
4106
4107 if (verbose>2
4108 && (fabs(part[ABvsCD]-part[ACvsBD]) > 0.001 || fabs(part[ABvsCD]-part[ADvsBC]) > 0.001))
4109 fprintf(stderr, "Constraint Penalties at %d: ABvsCD %.3f ACvsBD %.3f ADvsBC %.3f %d/%d %d/%d %d/%d %d/%d\n",
4110 iC, part[ABvsCD], part[ACvsBD], part[ADvsBC],
4111 profiles[0]->nOn[iC], profiles[0]->nOff[iC],
4112 profiles[1]->nOn[iC], profiles[1]->nOff[iC],
4113 profiles[2]->nOn[iC], profiles[2]->nOff[iC],
4114 profiles[3]->nOn[iC], profiles[3]->nOff[iC]);
4115 }
4116 }
4117 if (verbose>2)
4118 fprintf(stderr, "Total Constraint Penalties: ABvsCD %.3f ACvsBD %.3f ADvsBC %.3f\n",
4119 penalty[ABvsCD], penalty[ACvsBD], penalty[ADvsBC]);
4120 }
4121
PairConstraintDistance(int nOn1,int nOff1,int nOn2,int nOff2)4122 double PairConstraintDistance(int nOn1, int nOff1, int nOn2, int nOff2) {
4123 double f1 = nOn1/(double)(nOn1+nOff1);
4124 double f2 = nOn2/(double)(nOn2+nOff2);
4125 /* 1 - f1 * f2 - (1-f1)*(1-f2) = 1 - f1 * f2 - 1 + f1 + f2 - f1 * f2 */
4126 return(f1 + f2 - 2.0 * f1 * f2);
4127 }
4128
QuartetConstraintPenaltiesPiece(profile_t * profiles[4],int iC,double piece[3])4129 bool QuartetConstraintPenaltiesPiece(profile_t *profiles[4], int iC, /*OUT*/double piece[3]) {
4130 int nOn[4];
4131 int nOff[4];
4132 int i;
4133 int nSplit = 0;
4134 int nPlus = 0;
4135 int nMinus = 0;
4136
4137 for (i=0; i < 4; i++) {
4138 nOn[i] = profiles[i]->nOn[iC];
4139 nOff[i] = profiles[i]->nOff[iC];
4140 if (nOn[i] + nOff[i] == 0)
4141 return(false); /* ignore */
4142 else if (nOn[i] > 0 && nOff[i] > 0)
4143 nSplit++;
4144 else if (nOn[i] > 0)
4145 nPlus++;
4146 else
4147 nMinus++;
4148 }
4149 /* If just one of them is split or on the other side and the others all agree, also ignore */
4150 if (nPlus >= 3 || nMinus >= 3)
4151 return(false);
4152 piece[ABvsCD] = constraintWeight
4153 * (PairConstraintDistance(nOn[0],nOff[0],nOn[1],nOff[1])
4154 + PairConstraintDistance(nOn[2],nOff[2],nOn[3],nOff[3]));
4155 piece[ACvsBD] = constraintWeight
4156 * (PairConstraintDistance(nOn[0],nOff[0],nOn[2],nOff[2])
4157 + PairConstraintDistance(nOn[1],nOff[1],nOn[3],nOff[3]));
4158 piece[ADvsBC] = constraintWeight
4159 * (PairConstraintDistance(nOn[0],nOff[0],nOn[3],nOff[3])
4160 + PairConstraintDistance(nOn[2],nOff[2],nOn[1],nOff[1]));
4161 return(true);
4162 }
4163
4164 /* Minimum number of constrained leaves that need to be moved
4165 to satisfy the constraint (or 0 if constraint is satisfied)
4166 Defining it this way should ensure that SPR moves that break
4167 constraints get a penalty
4168 */
SplitConstraintPenalty(int nOn1,int nOff1,int nOn2,int nOff2)4169 int SplitConstraintPenalty(int nOn1, int nOff1, int nOn2, int nOff2) {
4170 return(nOn1 + nOff2 < nOn2 + nOff1 ?
4171 (nOn1 < nOff2 ? nOn1 : nOff2)
4172 : (nOn2 < nOff1 ? nOn2 : nOff1));
4173 }
4174
SplitViolatesConstraint(profile_t * profiles[4],int iConstraint)4175 bool SplitViolatesConstraint(profile_t *profiles[4], int iConstraint) {
4176 int i;
4177 int codes[4]; /* 0 for off, 1 for on, -1 for split (quit if not constrained at all) */
4178 for (i = 0; i < 4; i++) {
4179 if (profiles[i]->nOn[iConstraint] + profiles[i]->nOff[iConstraint] == 0)
4180 return(false);
4181 else if (profiles[i]->nOn[iConstraint] > 0 && profiles[i]->nOff[iConstraint] == 0)
4182 codes[i] = 1;
4183 else if (profiles[i]->nOn[iConstraint] == 0 && profiles[i]->nOff[iConstraint] > 0)
4184 codes[i] = 0;
4185 else
4186 codes[i] = -1;
4187 }
4188 int n0 = 0;
4189 int n1 = 0;
4190 for (i = 0; i < 4; i++) {
4191 if (codes[i] == 0)
4192 n0++;
4193 else if (codes[i] == 1)
4194 n1++;
4195 }
4196 /* 3 on one side means no violation, even if other is code -1
4197 otherwise must have code != -1 and agreement on the split
4198 */
4199 if (n0 >= 3 || n1 >= 3)
4200 return(false);
4201 if (n0==2 && n1==2 && codes[0] == codes[1] && codes[2] == codes[3])
4202 return(false);
4203 return(true);
4204 }
4205
LogCorrect(double dist)4206 double LogCorrect(double dist) {
4207 const double maxscore = 3.0;
4208 if (nCodes == 4 && !useMatrix) { /* Jukes-Cantor */
4209 dist = dist < 0.74 ? -0.75*log(1.0 - dist * 4.0/3.0) : maxscore;
4210 } else { /* scoredist-like */
4211 dist = dist < 0.99 ? -1.3*log(1.0 - dist) : maxscore;
4212 }
4213 return (dist < maxscore ? dist : maxscore);
4214 }
4215
4216 /* A helper function -- f1 and f2 can be NULL if the corresponding code != NOCODE
4217 */
ProfileDistPiece(unsigned int code1,unsigned int code2,numeric_t * f1,numeric_t * f2,distance_matrix_t * dmat,numeric_t * codeDist2)4218 double ProfileDistPiece(unsigned int code1, unsigned int code2,
4219 numeric_t *f1, numeric_t *f2,
4220 /*OPTIONAL*/distance_matrix_t *dmat,
4221 /*OPTIONAL*/numeric_t *codeDist2) {
4222 if (dmat) {
4223 if (code1 != NOCODE && code2 != NOCODE) { /* code1 vs code2 */
4224 return(dmat->distances[code1][code2]);
4225 } else if (codeDist2 != NULL && code1 != NOCODE) { /* code1 vs. codeDist2 */
4226 return(codeDist2[code1]);
4227 } else { /* f1 vs f2 */
4228 if (f1 == NULL) {
4229 if(code1 == NOCODE) return(10.0);
4230 f1 = &dmat->codeFreq[code1][0];
4231 }
4232 if (f2 == NULL) {
4233 if(code2 == NOCODE) return(10.0);
4234 f2 = &dmat->codeFreq[code2][0];
4235 }
4236 return(vector_multiply3_sum(f1,f2,dmat->eigenval,nCodes));
4237 }
4238 } else {
4239 /* no matrix */
4240 if (code1 != NOCODE) {
4241 if (code2 != NOCODE) {
4242 return(code1 == code2 ? 0.0 : 1.0); /* code1 vs code2 */
4243 } else {
4244 if(f2 == NULL) return(10.0);
4245 return(1.0 - f2[code1]); /* code1 vs. f2 */
4246 }
4247 } else {
4248 if (code2 != NOCODE) {
4249 if(f1 == NULL) return(10.0);
4250 return(1.0 - f1[code2]); /* f1 vs code2 */
4251 } else { /* f1 vs. f2 */
4252 if (f1 == NULL || f2 == NULL) return(10.0);
4253 double piece = 1.0;
4254 int k;
4255 for (k = 0; k < nCodes; k++) {
4256 piece -= f1[k] * f2[k];
4257 }
4258 return(piece);
4259 }
4260 }
4261 }
4262 assert(0);
4263 }
4264
4265 /* E.g. GET_FREQ(profile,iPos,iVector)
4266 Gets the next element of the vectors (and updates iVector), or
4267 returns NULL if we didn't store a vector
4268 */
4269 #define GET_FREQ(P,I,IVECTOR) \
4270 (P->weights[I] > 0 && P->codes[I] == NOCODE ? &P->vectors[nCodes*(IVECTOR++)] : NULL)
4271
ProfileDist(profile_t * profile1,profile_t * profile2,int nPos,distance_matrix_t * dmat,besthit_t * hit)4272 void ProfileDist(profile_t *profile1, profile_t *profile2, int nPos,
4273 /*OPTIONAL*/distance_matrix_t *dmat,
4274 /*OUT*/besthit_t *hit) {
4275 double top = 0;
4276 double denom = 0;
4277 int iFreq1 = 0;
4278 int iFreq2 = 0;
4279 int i = 0;
4280 for (i = 0; i < nPos; i++) {
4281 numeric_t *f1 = GET_FREQ(profile1,i,/*IN/OUT*/iFreq1);
4282 numeric_t *f2 = GET_FREQ(profile2,i,/*IN/OUT*/iFreq2);
4283 if (profile1->weights[i] > 0 && profile2->weights[i] > 0) {
4284 double weight = profile1->weights[i] * profile2->weights[i];
4285 denom += weight;
4286 double piece = ProfileDistPiece(profile1->codes[i],profile2->codes[i],f1,f2,dmat,
4287 profile2->codeDist ? &profile2->codeDist[i*nCodes] : NULL);
4288 top += weight * piece;
4289 }
4290 }
4291 assert(iFreq1 == profile1->nVectors);
4292 assert(iFreq2 == profile2->nVectors);
4293 hit->weight = denom > 0 ? denom : 0.01; /* 0.01 is an arbitrarily low value of weight (normally >>1) */
4294 hit->dist = denom > 0 ? top/denom : 1;
4295 profileOps++;
4296 }
4297
4298 /* This should not be called if the update weight is 0, as
4299 in that case code==NOCODE and in=NULL is possible, and then
4300 it will fail.
4301 */
AddToFreq(numeric_t * fOut,double weight,unsigned int codeIn,numeric_t * fIn,distance_matrix_t * dmat)4302 void AddToFreq(/*IN/OUT*/numeric_t *fOut,
4303 double weight,
4304 unsigned int codeIn, /*OPTIONAL*/numeric_t *fIn,
4305 /*OPTIONAL*/distance_matrix_t *dmat) {
4306 assert(fOut != NULL);
4307 if (fIn != NULL) {
4308 vector_add_mult(fOut, fIn, weight, nCodes);
4309 } else if (dmat) {
4310 assert(codeIn != NOCODE);
4311 vector_add_mult(fOut, dmat->codeFreq[codeIn], weight, nCodes);
4312 } else {
4313 assert(codeIn != NOCODE);
4314 fOut[codeIn] += weight;
4315 }
4316 }
4317
SetProfile(NJ_t * NJ,int node,double weight1)4318 void SetProfile(/*IN/OUT*/NJ_t *NJ, int node, double weight1) {
4319 children_t *c = &NJ->child[node];
4320 assert(c->nChild == 2);
4321 assert(NJ->profiles[c->child[0]] != NULL);
4322 assert(NJ->profiles[c->child[1]] != NULL);
4323 if (NJ->profiles[node] != NULL)
4324 FreeProfile(NJ->profiles[node], NJ->nPos, NJ->nConstraints);
4325 NJ->profiles[node] = AverageProfile(NJ->profiles[c->child[0]],
4326 NJ->profiles[c->child[1]],
4327 NJ->nPos, NJ->nConstraints,
4328 NJ->distance_matrix,
4329 weight1);
4330 }
4331
4332 /* bionjWeight is the weight of the first sequence (between 0 and 1),
4333 or -1 to do the average.
4334 */
AverageProfile(profile_t * profile1,profile_t * profile2,int nPos,int nConstraints,distance_matrix_t * dmat,double bionjWeight)4335 profile_t *AverageProfile(profile_t *profile1, profile_t *profile2,
4336 int nPos, int nConstraints,
4337 distance_matrix_t *dmat,
4338 double bionjWeight) {
4339 int i;
4340 if (bionjWeight < 0) {
4341 bionjWeight = 0.5;
4342 }
4343
4344 /* First, set codes and weights and see how big vectors will be */
4345 profile_t *out = NewProfile(nPos, nConstraints);
4346
4347 for (i = 0; i < nPos; i++) {
4348 out->weights[i] = bionjWeight * profile1->weights[i]
4349 + (1-bionjWeight) * profile2->weights[i];
4350 out->codes[i] = NOCODE;
4351 if (out->weights[i] > 0) {
4352 if (profile1->weights[i] > 0 && profile1->codes[i] != NOCODE
4353 && (profile2->weights[i] <= 0 || profile1->codes[i] == profile2->codes[i])) {
4354 out->codes[i] = profile1->codes[i];
4355 } else if (profile1->weights[i] <= 0
4356 && profile2->weights[i] > 0
4357 && profile2->codes[i] != NOCODE) {
4358 out->codes[i] = profile2->codes[i];
4359 }
4360 if (out->codes[i] == NOCODE) out->nVectors++;
4361 }
4362 }
4363
4364 /* Allocate and set the vectors */
4365 out->vectors = (numeric_t*)mymalloc(sizeof(numeric_t)*nCodes*out->nVectors);
4366 for (i = 0; i < nCodes * out->nVectors; i++) out->vectors[i] = 0;
4367 nProfileFreqAlloc += out->nVectors;
4368 nProfileFreqAvoid += nPos - out->nVectors;
4369 int iFreqOut = 0;
4370 int iFreq1 = 0;
4371 int iFreq2 = 0;
4372 for (i=0; i < nPos; i++) {
4373 numeric_t *f = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
4374 numeric_t *f1 = GET_FREQ(profile1,i,/*IN/OUT*/iFreq1);
4375 numeric_t *f2 = GET_FREQ(profile2,i,/*IN/OUT*/iFreq2);
4376 if (f != NULL) {
4377 if (profile1->weights[i] > 0)
4378 AddToFreq(/*IN/OUT*/f, profile1->weights[i] * bionjWeight,
4379 profile1->codes[i], f1, dmat);
4380 if (profile2->weights[i] > 0)
4381 AddToFreq(/*IN/OUT*/f, profile2->weights[i] * (1.0-bionjWeight),
4382 profile2->codes[i], f2, dmat);
4383 NormalizeFreq(/*IN/OUT*/f, dmat);
4384 } /* end if computing f */
4385 if (verbose > 10 && i < 5) {
4386 fprintf(stderr,"Average profiles: pos %d in-w1 %f in-w2 %f bionjWeight %f to weight %f code %d\n",
4387 i, profile1->weights[i], profile2->weights[i], bionjWeight,
4388 out->weights[i], out->codes[i]);
4389 if (f!= NULL) {
4390 int k;
4391 for (k = 0; k < nCodes; k++)
4392 fprintf(stderr, "\t%c:%f", codesString[k], f ? f[k] : -1.0);
4393 fprintf(stderr,"\n");
4394 }
4395 }
4396 } /* end loop over positions */
4397 assert(iFreq1 == profile1->nVectors);
4398 assert(iFreq2 == profile2->nVectors);
4399 assert(iFreqOut == out->nVectors);
4400
4401 /* compute total constraints */
4402 for (i = 0; i < nConstraints; i++) {
4403 out->nOn[i] = profile1->nOn[i] + profile2->nOn[i];
4404 out->nOff[i] = profile1->nOff[i] + profile2->nOff[i];
4405 }
4406 profileAvgOps++;
4407 return(out);
4408 }
4409
4410 /* Make the (unrotated) frequencies sum to 1
4411 Simply dividing by total_weight is not ideal because of roundoff error
4412 So compute total_freq instead
4413 */
NormalizeFreq(numeric_t * freq,distance_matrix_t * dmat)4414 void NormalizeFreq(/*IN/OUT*/numeric_t *freq, distance_matrix_t *dmat) {
4415 double total_freq = 0;
4416 int k;
4417 if (dmat != NULL) {
4418 /* The total frequency is dot_product(true_frequencies, 1)
4419 So we rotate the 1 vector by eigeninv (stored in eigentot)
4420 */
4421 total_freq = vector_multiply_sum(freq, dmat->eigentot, nCodes);
4422 } else {
4423 for (k = 0; k < nCodes; k++)
4424 total_freq += freq[k];
4425 }
4426 if (total_freq > fPostTotalTolerance) {
4427 numeric_t inverse_weight = 1.0/total_freq;
4428 vector_multiply_by(/*IN/OUT*/freq, inverse_weight, nCodes);
4429 } else {
4430 /* This can happen if we are in a very low-weight region, e.g. if a mostly-gap position gets weighted down
4431 repeatedly; just set them all to arbitrary but legal values */
4432 if (dmat == NULL) {
4433 for (k = 0; k < nCodes; k++)
4434 freq[k] = 1.0/nCodes;
4435 } else {
4436 for (k = 0; k < nCodes; k++)
4437 freq[k] = dmat->codeFreq[0][k];
4438 }
4439 }
4440 }
4441
4442 /* OutProfile() computes the out-profile */
OutProfile(profile_t ** profiles,int nProfiles,int nPos,int nConstraints,distance_matrix_t * dmat)4443 profile_t *OutProfile(profile_t **profiles, int nProfiles,
4444 int nPos, int nConstraints,
4445 distance_matrix_t *dmat) {
4446 int i; /* position */
4447 int in; /* profile */
4448 profile_t *out = NewProfile(nPos, nConstraints);
4449
4450 double inweight = 1.0/(double)nProfiles; /* The maximal output weight is 1.0 */
4451
4452 /* First, set weights -- code is always NOCODE, prevent weight=0 */
4453 for (i = 0; i < nPos; i++) {
4454 out->weights[i] = 0;
4455 for (in = 0; in < nProfiles; in++)
4456 out->weights[i] += profiles[in]->weights[i] * inweight;
4457 if (out->weights[i] <= 0) out->weights[i] = 1e-20; /* always store a vector */
4458 out->nVectors++;
4459 out->codes[i] = NOCODE; /* outprofile is normally complicated */
4460 }
4461
4462 /* Initialize the frequencies to 0 */
4463 out->vectors = (numeric_t*)mymalloc(sizeof(numeric_t)*nCodes*out->nVectors);
4464 for (i = 0; i < nCodes*out->nVectors; i++)
4465 out->vectors[i] = 0;
4466
4467 /* Add up the weights, going through each sequence in turn */
4468 for (in = 0; in < nProfiles; in++) {
4469 int iFreqOut = 0;
4470 int iFreqIn = 0;
4471 for (i = 0; i < nPos; i++) {
4472 numeric_t *fIn = GET_FREQ(profiles[in],i,/*IN/OUT*/iFreqIn);
4473 numeric_t *fOut = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
4474 if (profiles[in]->weights[i] > 0)
4475 AddToFreq(/*IN/OUT*/fOut, profiles[in]->weights[i],
4476 profiles[in]->codes[i], fIn, dmat);
4477 }
4478 assert(iFreqOut == out->nVectors);
4479 assert(iFreqIn == profiles[in]->nVectors);
4480 }
4481
4482 /* And normalize the frequencies to sum to 1 */
4483 int iFreqOut = 0;
4484 for (i = 0; i < nPos; i++) {
4485 numeric_t *fOut = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
4486 if (fOut)
4487 NormalizeFreq(/*IN/OUT*/fOut, dmat);
4488 }
4489 assert(iFreqOut == out->nVectors);
4490 if (verbose > 10) fprintf(stderr,"Average %d profiles\n", nProfiles);
4491 if(dmat)
4492 SetCodeDist(/*IN/OUT*/out, nPos, dmat);
4493
4494 /* Compute constraints */
4495 for (i = 0; i < nConstraints; i++) {
4496 out->nOn[i] = 0;
4497 out->nOff[i] = 0;
4498 for (in = 0; in < nProfiles; in++) {
4499 out->nOn[i] += profiles[in]->nOn[i];
4500 out->nOff[i] += profiles[in]->nOff[i];
4501 }
4502 }
4503 return(out);
4504 }
4505
UpdateOutProfile(profile_t * out,profile_t * old1,profile_t * old2,profile_t * new,int nActiveOld,int nPos,int nConstraints,distance_matrix_t * dmat)4506 void UpdateOutProfile(/*IN/OUT*/profile_t *out, profile_t *old1, profile_t *old2,
4507 profile_t *new, int nActiveOld,
4508 int nPos, int nConstraints,
4509 distance_matrix_t *dmat) {
4510 int i, k;
4511 int iFreqOut = 0;
4512 int iFreq1 = 0;
4513 int iFreq2 = 0;
4514 int iFreqNew = 0;
4515 assert(nActiveOld > 0);
4516
4517 for (i = 0; i < nPos; i++) {
4518 numeric_t *fOut = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
4519 numeric_t *fOld1 = GET_FREQ(old1,i,/*IN/OUT*/iFreq1);
4520 numeric_t *fOld2 = GET_FREQ(old2,i,/*IN/OUT*/iFreq2);
4521 numeric_t *fNew = GET_FREQ(new,i,/*IN/OUT*/iFreqNew);
4522
4523 assert(out->codes[i] == NOCODE && fOut != NULL); /* No no-vector optimization for outprofiles */
4524 if (verbose > 3 && i < 3) {
4525 fprintf(stderr,"Updating out-profile position %d weight %f (mult %f)\n",
4526 i, out->weights[i], out->weights[i]*nActiveOld);
4527 }
4528 double originalMult = out->weights[i]*nActiveOld;
4529 double newMult = originalMult + new->weights[i] - old1->weights[i] - old2->weights[i];
4530 out->weights[i] = newMult/(nActiveOld-1);
4531 if (out->weights[i] <= 0) out->weights[i] = 1e-20; /* always use the vector */
4532
4533 for (k = 0; k < nCodes; k++) fOut[k] *= originalMult;
4534
4535 if (old1->weights[i] > 0)
4536 AddToFreq(/*IN/OUT*/fOut, -old1->weights[i], old1->codes[i], fOld1, dmat);
4537 if (old2->weights[i] > 0)
4538 AddToFreq(/*IN/OUT*/fOut, -old2->weights[i], old2->codes[i], fOld2, dmat);
4539 if (new->weights[i] > 0)
4540 AddToFreq(/*IN/OUT*/fOut, new->weights[i], new->codes[i], fNew, dmat);
4541
4542 /* And renormalize */
4543 NormalizeFreq(/*IN/OUT*/fOut, dmat);
4544
4545 if (verbose > 2 && i < 3) {
4546 fprintf(stderr,"Updated out-profile position %d weight %f (mult %f)",
4547 i, out->weights[i], out->weights[i]*nActiveOld);
4548 if(out->weights[i] > 0)
4549 for (k=0;k<nCodes;k++)
4550 fprintf(stderr, " %c:%f", dmat?'?':codesString[k], fOut[k]);
4551 fprintf(stderr,"\n");
4552 }
4553 }
4554 assert(iFreqOut == out->nVectors);
4555 assert(iFreq1 == old1->nVectors);
4556 assert(iFreq2 == old2->nVectors);
4557 assert(iFreqNew == new->nVectors);
4558 if(dmat)
4559 SetCodeDist(/*IN/OUT*/out,nPos,dmat);
4560
4561 /* update constraints -- note in practice this should be a no-op */
4562 for (i = 0; i < nConstraints; i++) {
4563 out->nOn[i] += new->nOn[i] - old1->nOn[i] - old2->nOn[i];
4564 out->nOff[i] += new->nOff[i] - old1->nOff[i] - old2->nOff[i];
4565 }
4566 }
4567
SetCodeDist(profile_t * profile,int nPos,distance_matrix_t * dmat)4568 void SetCodeDist(/*IN/OUT*/profile_t *profile, int nPos,
4569 distance_matrix_t *dmat) {
4570 if (profile->codeDist == NULL)
4571 profile->codeDist = (numeric_t*)mymalloc(sizeof(numeric_t)*nPos*nCodes);
4572 int i;
4573 int iFreq = 0;
4574 for (i = 0; i < nPos; i++) {
4575 numeric_t *f = GET_FREQ(profile,i,/*IN/OUT*/iFreq);
4576
4577 int k;
4578 for (k = 0; k < nCodes; k++)
4579 profile->codeDist[i*nCodes+k] = ProfileDistPiece(/*code1*/profile->codes[i], /*code2*/k,
4580 /*f1*/f, /*f2*/NULL,
4581 dmat, NULL);
4582 }
4583 assert(iFreq==profile->nVectors);
4584 }
4585
4586
SetBestHit(int node,NJ_t * NJ,int nActive,besthit_t * bestjoin,besthit_t * allhits)4587 void SetBestHit(int node, NJ_t *NJ, int nActive,
4588 /*OUT*/besthit_t *bestjoin, /*OUT OPTIONAL*/besthit_t *allhits) {
4589 assert(NJ->parent[node] < 0);
4590
4591 bestjoin->i = node;
4592 bestjoin->j = -1;
4593 bestjoin->dist = 1e20;
4594 bestjoin->criterion = 1e20;
4595
4596 int j;
4597 besthit_t tmp;
4598
4599 #ifdef OPENMP
4600 /* Note -- if we are already in a parallel region, this will be ignored */
4601 #pragma omp parallel for schedule(dynamic, 50)
4602 #endif
4603 for (j = 0; j < NJ->maxnode; j++) {
4604 besthit_t *sv = allhits != NULL ? &allhits[j] : &tmp;
4605 sv->i = node;
4606 sv->j = j;
4607 if (NJ->parent[j] >= 0) {
4608 sv->i = -1; /* illegal/empty join */
4609 sv->weight = 0.0;
4610 sv->criterion = sv->dist = 1e20;
4611 continue;
4612 }
4613 /* Note that we compute self-distances (allow j==node) because the top-hit heuristic
4614 expects self to be within its top hits, but we exclude those from the bestjoin
4615 that we return...
4616 */
4617 SetDistCriterion(NJ, nActive, /*IN/OUT*/sv);
4618 if (sv->criterion < bestjoin->criterion && node != j)
4619 *bestjoin = *sv;
4620 }
4621 if (verbose>5) {
4622 fprintf(stderr, "SetBestHit %d %d %f %f\n", bestjoin->i, bestjoin->j, bestjoin->dist, bestjoin->criterion);
4623 }
4624 }
4625
ReadMatrix(char * filename,numeric_t codes[MAXCODES][MAXCODES],bool checkCodes)4626 void ReadMatrix(char *filename, /*OUT*/numeric_t codes[MAXCODES][MAXCODES], bool checkCodes) {
4627 char buf[BUFFER_SIZE] = "";
4628 FILE *fp = fopen(filename, "r");
4629 if (fp == NULL) {
4630 fprintf(stderr, "Cannot read %s\n",filename);
4631 exit(1);
4632 }
4633 if (fgets(buf,sizeof(buf),fp) == NULL) {
4634 fprintf(stderr, "Error reading header line for %s:\n%s\n", filename, buf);
4635 exit(1);
4636 }
4637 if (checkCodes) {
4638 int i;
4639 int iBufPos;
4640 for (iBufPos=0,i=0;i<nCodes;i++,iBufPos++) {
4641 if(buf[iBufPos] != codesString[i]) {
4642 fprintf(stderr,"Header line\n%s\nin file %s does not have expected code %c # %d in %s\n",
4643 buf, filename, codesString[i], i, codesString);
4644 exit(1);
4645 }
4646 iBufPos++;
4647 if(buf[iBufPos] != '\n' && buf[iBufPos] != '\r' && buf[iBufPos] != '\0' && buf[iBufPos] != '\t') {
4648 fprintf(stderr, "Header line in %s should be tab-delimited\n", filename);
4649 exit(1);
4650 }
4651 if (buf[iBufPos] == '\0' && i < nCodes-1) {
4652 fprintf(stderr, "Header line in %s ends prematurely\n",filename);
4653 exit(1);
4654 }
4655 } /* end loop over codes */
4656 /* Should be at end, but allow \n because of potential DOS \r\n */
4657 if(buf[iBufPos] != '\0' && buf[iBufPos] != '\n' && buf[iBufPos] != '\r') {
4658 fprintf(stderr, "Header line in %s has too many entries\n", filename);
4659 exit(1);
4660 }
4661 }
4662 int iLine;
4663 for (iLine = 0; iLine < nCodes; iLine++) {
4664 buf[0] = '\0';
4665 if (fgets(buf,sizeof(buf),fp) == NULL) {
4666 fprintf(stderr, "Cannot read line %d from file %s\n", iLine+2, filename);
4667 exit(1);
4668 }
4669 char *field = strtok(buf,"\t\r\n");
4670 field = strtok(NULL, "\t"); /* ignore first column */
4671 int iColumn;
4672 for (iColumn = 0; iColumn < nCodes && field != NULL; iColumn++, field = strtok(NULL,"\t")) {
4673 if(sscanf(field,ScanNumericSpec,&codes[iLine][iColumn]) != 1) {
4674 fprintf(stderr,"Cannot parse field %s in file %s\n", field, filename);
4675 exit(1);
4676 }
4677 }
4678 }
4679 }
4680
ReadVector(char * filename,numeric_t codes[MAXCODES])4681 void ReadVector(char *filename, /*OUT*/numeric_t codes[MAXCODES]) {
4682 FILE *fp = fopen(filename,"r");
4683 if (fp == NULL) {
4684 fprintf(stderr, "Cannot read %s\n",filename);
4685 exit(1);
4686 }
4687 int i;
4688 for (i = 0; i < nCodes; i++) {
4689 if (fscanf(fp,ScanNumericSpec,&codes[i]) != 1) {
4690 fprintf(stderr,"Cannot read %d entry of %s\n",i+1,filename);
4691 exit(1);
4692 }
4693 }
4694 if (fclose(fp) != 0) {
4695 fprintf(stderr, "Error reading %s\n",filename);
4696 exit(1);
4697 }
4698 }
4699
ReadDistanceMatrix(char * prefix)4700 distance_matrix_t *ReadDistanceMatrix(char *prefix) {
4701 char buffer[BUFFER_SIZE];
4702 distance_matrix_t *dmat = (distance_matrix_t*)mymalloc(sizeof(distance_matrix_t));
4703
4704 if(strlen(prefix) > BUFFER_SIZE-20) {
4705 fprintf(stderr,"Filename %s too long\n", prefix);
4706 exit(1);
4707 }
4708
4709 strcpy(buffer, prefix);
4710 strcat(buffer, ".distances");
4711 ReadMatrix(buffer, /*OUT*/dmat->distances, /*checkCodes*/true);
4712
4713 strcpy(buffer, prefix);
4714 strcat(buffer, ".inverses");
4715 ReadMatrix(buffer, /*OUT*/dmat->eigeninv, /*checkCodes*/false);
4716
4717 strcpy(buffer, prefix);
4718 strcat(buffer, ".eigenvalues");
4719 ReadVector(buffer, /*OUT*/dmat->eigenval);
4720
4721 if(verbose>1) fprintf(stderr, "Read distance matrix from %s\n",prefix);
4722 SetupDistanceMatrix(/*IN/OUT*/dmat);
4723 return(dmat);
4724 }
4725
SetupDistanceMatrix(distance_matrix_t * dmat)4726 void SetupDistanceMatrix(/*IN/OUT*/distance_matrix_t *dmat) {
4727 /* Check that the eigenvalues and eigen-inverse are consistent with the
4728 distance matrix and that the matrix is symmetric */
4729 int i,j,k;
4730 for (i = 0; i < nCodes; i++) {
4731 for (j = 0; j < nCodes; j++) {
4732 if(fabs(dmat->distances[i][j]-dmat->distances[j][i]) > 1e-6) {
4733 fprintf(stderr,"Distance matrix not symmetric for %d,%d: %f vs %f\n",
4734 i+1,j+1,
4735 dmat->distances[i][j],
4736 dmat->distances[j][i]);
4737 exit(1);
4738 }
4739 double total = 0.0;
4740 for (k = 0; k < nCodes; k++)
4741 total += dmat->eigenval[k] * dmat->eigeninv[k][i] * dmat->eigeninv[k][j];
4742 if(fabs(total - dmat->distances[i][j]) > 1e-6) {
4743 fprintf(stderr,"Distance matrix entry %d,%d should be %f but eigen-representation gives %f\n",
4744 i+1,j+1,dmat->distances[i][j],total);
4745 exit(1);
4746 }
4747 }
4748 }
4749
4750 /* And compute eigentot */
4751 for (k = 0; k < nCodes; k++) {
4752 dmat->eigentot[k] = 0.;
4753 int j;
4754 for (j = 0; j < nCodes; j++)
4755 dmat->eigentot[k] += dmat->eigeninv[k][j];
4756 }
4757
4758 /* And compute codeFreq */
4759 int code;
4760 for(code = 0; code < nCodes; code++) {
4761 for (k = 0; k < nCodes; k++) {
4762 dmat->codeFreq[code][k] = dmat->eigeninv[k][code];
4763 }
4764 }
4765 /* And gapFreq */
4766 for(code = 0; code < nCodes; code++) {
4767 double gapFreq = 0.0;
4768 for (k = 0; k < nCodes; k++)
4769 gapFreq += dmat->codeFreq[k][code];
4770 dmat->gapFreq[code] = gapFreq / nCodes;
4771 }
4772
4773 if(verbose>10) fprintf(stderr, "Made codeFreq\n");
4774 }
4775
ChooseNNI(profile_t * profiles[4],distance_matrix_t * dmat,int nPos,int nConstraints,double criteria[3])4776 nni_t ChooseNNI(profile_t *profiles[4],
4777 /*OPTIONAL*/distance_matrix_t *dmat,
4778 int nPos, int nConstraints,
4779 /*OUT*/double criteria[3]) {
4780 double d[6];
4781 CorrectedPairDistances(profiles, 4, dmat, nPos, /*OUT*/d);
4782 double penalty[3]; /* indexed as nni_t */
4783 QuartetConstraintPenalties(profiles, nConstraints, /*OUT*/penalty);
4784 criteria[ABvsCD] = d[qAB] + d[qCD] + penalty[ABvsCD];
4785 criteria[ACvsBD] = d[qAC] + d[qBD] + penalty[ACvsBD];
4786 criteria[ADvsBC] = d[qAD] + d[qBC] + penalty[ADvsBC];
4787
4788 nni_t choice = ABvsCD;
4789 if (criteria[ACvsBD] < criteria[ABvsCD] && criteria[ACvsBD] <= criteria[ADvsBC]) {
4790 choice = ACvsBD;
4791 } else if (criteria[ADvsBC] < criteria[ABvsCD] && criteria[ADvsBC] <= criteria[ACvsBD]) {
4792 choice = ADvsBC;
4793 }
4794 if (verbose > 1 && penalty[choice] > penalty[ABvsCD] + 1e-6) {
4795 fprintf(stderr, "Worsen constraint: from %.3f to %.3f distance %.3f to %.3f: ",
4796 penalty[ABvsCD], penalty[choice],
4797 criteria[ABvsCD], choice == ACvsBD ? criteria[ACvsBD] : criteria[ADvsBC]);
4798 int iC;
4799 for (iC = 0; iC < nConstraints; iC++) {
4800 double ppart[3];
4801 if (QuartetConstraintPenaltiesPiece(profiles, iC, /*OUT*/ppart)) {
4802 double old_penalty = ppart[ABvsCD];
4803 double new_penalty = ppart[choice];
4804 if (new_penalty > old_penalty + 1e-6)
4805 fprintf(stderr, " %d (%d/%d %d/%d %d/%d %d/%d)", iC,
4806 profiles[0]->nOn[iC], profiles[0]->nOff[iC],
4807 profiles[1]->nOn[iC], profiles[1]->nOff[iC],
4808 profiles[2]->nOn[iC], profiles[2]->nOff[iC],
4809 profiles[3]->nOn[iC], profiles[3]->nOff[iC]);
4810 }
4811 }
4812 fprintf(stderr,"\n");
4813 }
4814 if (verbose > 3)
4815 fprintf(stderr, "NNI scores ABvsCD %.5f ACvsBD %.5f ADvsBC %.5f choice %s\n",
4816 criteria[ABvsCD], criteria[ACvsBD], criteria[ADvsBC],
4817 choice == ABvsCD ? "AB|CD" : (choice == ACvsBD ? "AC|BD" : "AD|BC"));
4818 return(choice);
4819 }
4820
PosteriorProfile(profile_t * p1,profile_t * p2,double len1,double len2,transition_matrix_t * transmat,rates_t * rates,int nPos,int nConstraints)4821 profile_t *PosteriorProfile(profile_t *p1, profile_t *p2,
4822 double len1, double len2,
4823 /*OPTIONAL*/transition_matrix_t *transmat,
4824 rates_t *rates,
4825 int nPos, int nConstraints) {
4826 if (len1 < MLMinBranchLength)
4827 len1 = MLMinBranchLength;
4828 if (len2 < MLMinBranchLength)
4829 len2 = MLMinBranchLength;
4830
4831 int i,j,k;
4832 profile_t *out = NewProfile(nPos, nConstraints);
4833 for (i = 0; i < nPos; i++) {
4834 out->codes[i] = NOCODE;
4835 out->weights[i] = 1.0;
4836 }
4837 out->nVectors = nPos;
4838 out->vectors = (numeric_t*)mymalloc(sizeof(numeric_t)*nCodes*out->nVectors);
4839 for (i = 0; i < nCodes * out->nVectors; i++) out->vectors[i] = 0;
4840 int iFreqOut = 0;
4841 int iFreq1 = 0;
4842 int iFreq2 = 0;
4843 numeric_t *expeigenRates1 = NULL, *expeigenRates2 = NULL;
4844
4845 if (transmat != NULL) {
4846 expeigenRates1 = ExpEigenRates(len1, transmat, rates);
4847 expeigenRates2 = ExpEigenRates(len2, transmat, rates);
4848 }
4849
4850 if (transmat == NULL) { /* Jukes-Cantor */
4851 assert(nCodes == 4);
4852
4853 double *PSame1 = PSameVector(len1, rates);
4854 double *PDiff1 = PDiffVector(PSame1, rates);
4855 double *PSame2 = PSameVector(len2, rates);
4856 double *PDiff2 = PDiffVector(PSame2, rates);
4857
4858 numeric_t mix1[4], mix2[4];
4859
4860 for (i=0; i < nPos; i++) {
4861 int iRate = rates->ratecat[i];
4862 double w1 = p1->weights[i];
4863 double w2 = p2->weights[i];
4864 int code1 = p1->codes[i];
4865 int code2 = p2->codes[i];
4866 numeric_t *f1 = GET_FREQ(p1,i,/*IN/OUT*/iFreq1);
4867 numeric_t *f2 = GET_FREQ(p2,i,/*IN/OUT*/iFreq2);
4868
4869 /* First try to store a simple profile */
4870 if (f1 == NULL && f2 == NULL) {
4871 if (code1 == NOCODE && code2 == NOCODE) {
4872 out->codes[i] = NOCODE;
4873 out->weights[i] = 0.0;
4874 continue;
4875 } else if (code1 == NOCODE) {
4876 /* Posterior(parent | character & gap, len1, len2) = Posterior(parent | character, len1)
4877 = PSame() for matching characters and 1-PSame() for the rest
4878 = (pSame - pDiff) * character + (1-(pSame-pDiff)) * gap
4879 */
4880 out->codes[i] = code2;
4881 out->weights[i] = w2 * (PSame2[iRate] - PDiff2[iRate]);
4882 continue;
4883 } else if (code2 == NOCODE) {
4884 out->codes[i] = code1;
4885 out->weights[i] = w1 * (PSame1[iRate] - PDiff1[iRate]);
4886 continue;
4887 } else if (code1 == code2) {
4888 out->codes[i] = code1;
4889 double f12code = (w1*PSame1[iRate] + (1-w1)*0.25) * (w2*PSame2[iRate] + (1-w2)*0.25);
4890 double f12other = (w1*PDiff1[iRate] + (1-w1)*0.25) * (w2*PDiff2[iRate] + (1-w2)*0.25);
4891 /* posterior probability of code1/code2 after scaling */
4892 double pcode = f12code/(f12code+3*f12other);
4893 /* Now f = w * (code ? 1 : 0) + (1-w) * 0.25, so to get pcode we need
4894 fcode = 1/4 + w1*3/4 or w = (f-1/4)*4/3
4895 */
4896 out->weights[i] = (pcode - 0.25) * 4.0/3.0;
4897 /* This can be zero because of numerical problems, I think */
4898 if (out->weights[i] < 1e-6) {
4899 if (verbose > 1)
4900 fprintf(stderr, "Replaced weight %f with %f from w1 %f w2 %f PSame %f %f f12code %f f12other %f\n",
4901 out->weights[i], 1e-6,
4902 w1, w2,
4903 PSame1[iRate], PSame2[iRate],
4904 f12code, f12other);
4905 out->weights[i] = 1e-6;
4906 }
4907 continue;
4908 }
4909 }
4910 /* if we did not compute a simple profile, then do the full computation and
4911 store the full vector
4912 */
4913 if (f1 == NULL) {
4914 for (j = 0; j < 4; j++)
4915 mix1[j] = (1-w1)*0.25;
4916 if(code1 != NOCODE)
4917 mix1[code1] += w1;
4918 f1 = mix1;
4919 }
4920 if (f2 == NULL) {
4921 for (j = 0; j < 4; j++)
4922 mix2[j] = (1-w2)*0.25;
4923 if(code2 != NOCODE)
4924 mix2[code2] += w2;
4925 f2 = mix2;
4926 }
4927 out->codes[i] = NOCODE;
4928 out->weights[i] = 1.0;
4929 numeric_t *f = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
4930 double lkAB = 0;
4931 for (j = 0; j < 4; j++) {
4932 f[j] = (f1[j] * PSame1[iRate] + (1.0-f1[j]) * PDiff1[iRate])
4933 * (f2[j] * PSame2[iRate] + (1.0-f2[j]) * PDiff2[iRate]);
4934 lkAB += f[j];
4935 }
4936 double lkABInv = 1.0/lkAB;
4937 for (j = 0; j < 4; j++)
4938 f[j] *= lkABInv;
4939 }
4940 PSame1 = myfree(PSame1, sizeof(double) * rates->nRateCategories);
4941 PSame2 = myfree(PSame2, sizeof(double) * rates->nRateCategories);
4942 PDiff1 = myfree(PDiff1, sizeof(double) * rates->nRateCategories);
4943 PDiff2 = myfree(PDiff2, sizeof(double) * rates->nRateCategories);
4944 } else if (nCodes == 4) { /* matrix model on nucleotides */
4945 numeric_t *fGap = &transmat->codeFreq[NOCODE][0];
4946 numeric_t f1mix[4], f2mix[4];
4947
4948 for (i=0; i < nPos; i++) {
4949 if (p1->codes[i] == NOCODE && p2->codes[i] == NOCODE
4950 && p1->weights[i] == 0 && p2->weights[i] == 0) {
4951 /* aligning gap with gap -- just output a gap
4952 out->codes[i] is already set to NOCODE so need not set that */
4953 out->weights[i] = 0;
4954 continue;
4955 }
4956 int iRate = rates->ratecat[i];
4957 numeric_t *expeigen1 = &expeigenRates1[iRate*4];
4958 numeric_t *expeigen2 = &expeigenRates2[iRate*4];
4959 numeric_t *f1 = GET_FREQ(p1,i,/*IN/OUT*/iFreq1);
4960 numeric_t *f2 = GET_FREQ(p2,i,/*IN/OUT*/iFreq2);
4961 numeric_t *fOut = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
4962 assert(fOut != NULL);
4963
4964 if (f1 == NULL) {
4965 f1 = &transmat->codeFreq[p1->codes[i]][0]; /* codeFreq includes an entry for NOCODE */
4966 double w = p1->weights[i];
4967 if (w > 0.0 && w < 1.0) {
4968 for (j = 0; j < 4; j++)
4969 f1mix[j] = w * f1[j] + (1.0-w) * fGap[j];
4970 f1 = f1mix;
4971 }
4972 }
4973 if (f2 == NULL) {
4974 f2 = &transmat->codeFreq[p2->codes[i]][0];
4975 double w = p2->weights[i];
4976 if (w > 0.0 && w < 1.0) {
4977 for (j = 0; j < 4; j++)
4978 f2mix[j] = w * f2[j] + (1.0-w) * fGap[j];
4979 f2 = f2mix;
4980 }
4981 }
4982 numeric_t fMult1[4] ALIGNED; /* rotated1 * expeigen1 */
4983 numeric_t fMult2[4] ALIGNED; /* rotated2 * expeigen2 */
4984 #if 0 /* SSE3 is slower */
4985 vector_multiply(f1, expeigen1, 4, /*OUT*/fMult1);
4986 vector_multiply(f2, expeigen2, 4, /*OUT*/fMult2);
4987 #else
4988 for (j = 0; j < 4; j++) {
4989 fMult1[j] = f1[j]*expeigen1[j];
4990 fMult2[j] = f2[j]*expeigen2[j];
4991 }
4992 #endif
4993 numeric_t fPost[4] ALIGNED; /* in unrotated space */
4994 for (j = 0; j < 4; j++) {
4995 #if 0 /* SSE3 is slower */
4996 fPost[j] = vector_dot_product_rot(fMult1, fMult2, &transmat->codeFreq[j][0], 4)
4997 * transmat->statinv[j]; */
4998 #else
4999 double out1 = 0;
5000 double out2 = 0;
5001 for (k = 0; k < 4; k++) {
5002 out1 += fMult1[k] * transmat->codeFreq[j][k];
5003 out2 += fMult2[k] * transmat->codeFreq[j][k];
5004 }
5005 fPost[j] = out1*out2*transmat->statinv[j];
5006 #endif
5007 }
5008 double fPostTot = 0;
5009 for (j = 0; j < 4; j++)
5010 fPostTot += fPost[j];
5011 assert(fPostTot > fPostTotalTolerance);
5012 double fPostInv = 1.0/fPostTot;
5013 #if 0 /* SSE3 is slower */
5014 vector_multiply_by(fPost, fPostInv, 4);
5015 #else
5016 for (j = 0; j < 4; j++)
5017 fPost[j] *= fPostInv;
5018 #endif
5019
5020 /* and finally, divide by stat again & rotate to give the new frequencies */
5021 matrixt_by_vector4(transmat->eigeninvT, fPost, /*OUT*/fOut);
5022 } /* end loop over position i */
5023 } else if (nCodes == 20) { /* matrix model on amino acids */
5024 numeric_t *fGap = &transmat->codeFreq[NOCODE][0];
5025 numeric_t f1mix[20] ALIGNED;
5026 numeric_t f2mix[20] ALIGNED;
5027
5028 for (i=0; i < nPos; i++) {
5029 if (p1->codes[i] == NOCODE && p2->codes[i] == NOCODE
5030 && p1->weights[i] == 0 && p2->weights[i] == 0) {
5031 /* aligning gap with gap -- just output a gap
5032 out->codes[i] is already set to NOCODE so need not set that */
5033 out->weights[i] = 0;
5034 continue;
5035 }
5036 int iRate = rates->ratecat[i];
5037 numeric_t *expeigen1 = &expeigenRates1[iRate*20];
5038 numeric_t *expeigen2 = &expeigenRates2[iRate*20];
5039 numeric_t *f1 = GET_FREQ(p1,i,/*IN/OUT*/iFreq1);
5040 numeric_t *f2 = GET_FREQ(p2,i,/*IN/OUT*/iFreq2);
5041 numeric_t *fOut = GET_FREQ(out,i,/*IN/OUT*/iFreqOut);
5042 assert(fOut != NULL);
5043
5044 if (f1 == NULL) {
5045 f1 = &transmat->codeFreq[p1->codes[i]][0]; /* codeFreq includes an entry for NOCODE */
5046 double w = p1->weights[i];
5047 if (w > 0.0 && w < 1.0) {
5048 for (j = 0; j < 20; j++)
5049 f1mix[j] = w * f1[j] + (1.0-w) * fGap[j];
5050 f1 = f1mix;
5051 }
5052 }
5053 if (f2 == NULL) {
5054 f2 = &transmat->codeFreq[p2->codes[i]][0];
5055 double w = p2->weights[i];
5056 if (w > 0.0 && w < 1.0) {
5057 for (j = 0; j < 20; j++)
5058 f2mix[j] = w * f2[j] + (1.0-w) * fGap[j];
5059 f2 = f2mix;
5060 }
5061 }
5062 numeric_t fMult1[20] ALIGNED; /* rotated1 * expeigen1 */
5063 numeric_t fMult2[20] ALIGNED; /* rotated2 * expeigen2 */
5064 vector_multiply(f1, expeigen1, 20, /*OUT*/fMult1);
5065 vector_multiply(f2, expeigen2, 20, /*OUT*/fMult2);
5066 numeric_t fPost[20] ALIGNED; /* in unrotated space */
5067 for (j = 0; j < 20; j++) {
5068 numeric_t value = vector_dot_product_rot(fMult1, fMult2, &transmat->codeFreq[j][0], 20)
5069 * transmat->statinv[j];
5070 /* Added this logic try to avoid rare numerical problems */
5071 fPost[j] = value >= 0 ? value : 0;
5072 }
5073 double fPostTot = vector_sum(fPost, 20);
5074 assert(fPostTot > fPostTotalTolerance);
5075 double fPostInv = 1.0/fPostTot;
5076 vector_multiply_by(/*IN/OUT*/fPost, fPostInv, 20);
5077 int ch = -1; /* the dominant character, if any */
5078 if (!exactML) {
5079 for (j = 0; j < 20; j++) {
5080 if (fPost[j] >= approxMLminf) {
5081 ch = j;
5082 break;
5083 }
5084 }
5085 }
5086
5087 /* now, see if we can use the approximation
5088 fPost ~= (1 or 0) * w + nearP * (1-w)
5089 to avoid rotating */
5090 double w = 0;
5091 if (ch >= 0) {
5092 w = (fPost[ch] - transmat->nearP[ch][ch]) / (1.0 - transmat->nearP[ch][ch]);
5093 for (j = 0; j < 20; j++) {
5094 if (j != ch) {
5095 double fRough = (1.0-w) * transmat->nearP[ch][j];
5096 if (fRough < fPost[j] * approxMLminratio) {
5097 ch = -1; /* give up on the approximation */
5098 break;
5099 }
5100 }
5101 }
5102 }
5103 if (ch >= 0) {
5104 nAAPosteriorRough++;
5105 double wInvStat = w * transmat->statinv[ch];
5106 for (j = 0; j < 20; j++)
5107 fOut[j] = wInvStat * transmat->codeFreq[ch][j] + (1.0-w) * transmat->nearFreq[ch][j];
5108 } else {
5109 /* and finally, divide by stat again & rotate to give the new frequencies */
5110 nAAPosteriorExact++;
5111 for (j = 0; j < 20; j++)
5112 fOut[j] = vector_multiply_sum(fPost, &transmat->eigeninv[j][0], 20);
5113 }
5114 } /* end loop over position i */
5115 } else {
5116 assert(0); /* illegal nCodes */
5117 }
5118
5119 if (transmat != NULL) {
5120 expeigenRates1 = myfree(expeigenRates1, sizeof(numeric_t) * rates->nRateCategories * nCodes);
5121 expeigenRates2 = myfree(expeigenRates2, sizeof(numeric_t) * rates->nRateCategories * nCodes);
5122 }
5123
5124 /* Reallocate out->vectors to be the right size */
5125 out->nVectors = iFreqOut;
5126 if (out->nVectors == 0)
5127 out->vectors = (numeric_t*)myfree(out->vectors, sizeof(numeric_t)*nCodes*nPos);
5128 else
5129 out->vectors = (numeric_t*)myrealloc(out->vectors,
5130 /*OLDSIZE*/sizeof(numeric_t)*nCodes*nPos,
5131 /*NEWSIZE*/sizeof(numeric_t)*nCodes*out->nVectors,
5132 /*copy*/true); /* try to save space */
5133 nProfileFreqAlloc += out->nVectors;
5134 nProfileFreqAvoid += nPos - out->nVectors;
5135
5136 /* compute total constraints */
5137 for (i = 0; i < nConstraints; i++) {
5138 out->nOn[i] = p1->nOn[i] + p2->nOn[i];
5139 out->nOff[i] = p1->nOff[i] + p2->nOff[i];
5140 }
5141 nPosteriorCompute++;
5142 return(out);
5143 }
5144
PSameVector(double length,rates_t * rates)5145 double *PSameVector(double length, rates_t *rates) {
5146 double *pSame = mymalloc(sizeof(double) * rates->nRateCategories);
5147 int iRate;
5148 for (iRate = 0; iRate < rates->nRateCategories; iRate++)
5149 pSame[iRate] = 0.25 + 0.75 * exp((-4.0/3.0) * fabs(length*rates->rates[iRate]));
5150 return(pSame);
5151 }
5152
PDiffVector(double * pSame,rates_t * rates)5153 double *PDiffVector(double *pSame, rates_t *rates) {
5154 double *pDiff = mymalloc(sizeof(double) * rates->nRateCategories);
5155 int iRate;
5156 for (iRate = 0; iRate < rates->nRateCategories; iRate++)
5157 pDiff[iRate] = (1.0 - pSame[iRate])/3.0;
5158 return(pDiff);
5159 }
5160
ExpEigenRates(double length,transition_matrix_t * transmat,rates_t * rates)5161 numeric_t *ExpEigenRates(double length, transition_matrix_t *transmat, rates_t *rates) {
5162 numeric_t *expeigen = mymalloc(sizeof(numeric_t) * nCodes * rates->nRateCategories);
5163 int iRate, j;
5164 for (iRate = 0; iRate < rates->nRateCategories; iRate++) {
5165 for (j = 0; j < nCodes; j++) {
5166 double relLen = length * rates->rates[iRate];
5167 /* very short branch lengths lead to numerical problems so prevent them */
5168 if (relLen < MLMinRelBranchLength)
5169 relLen = MLMinRelBranchLength;
5170 expeigen[iRate*nCodes + j] = exp(relLen * transmat->eigenval[j]);
5171 }
5172 }
5173 return(expeigen);
5174 }
5175
PairLogLk(profile_t * pA,profile_t * pB,double length,int nPos,transition_matrix_t * transmat,rates_t * rates,double * site_likelihoods)5176 double PairLogLk(profile_t *pA, profile_t *pB, double length, int nPos,
5177 /*OPTIONAL*/transition_matrix_t *transmat,
5178 rates_t *rates,
5179 /*OPTIONAL IN/OUT*/double *site_likelihoods) {
5180 double lk = 1.0;
5181 double loglk = 0.0; /* stores underflow of lk during the loop over positions */
5182 int i,j;
5183 assert(rates != NULL && rates->nRateCategories > 0);
5184 numeric_t *expeigenRates = NULL;
5185 if (transmat != NULL)
5186 expeigenRates = ExpEigenRates(length, transmat, rates);
5187
5188 if (transmat == NULL) { /* Jukes-Cantor */
5189 assert (nCodes == 4);
5190 double *pSame = PSameVector(length, rates);
5191 double *pDiff = PDiffVector(pSame, rates);
5192
5193 int iFreqA = 0;
5194 int iFreqB = 0;
5195 for (i = 0; i < nPos; i++) {
5196 int iRate = rates->ratecat[i];
5197 double wA = pA->weights[i];
5198 double wB = pB->weights[i];
5199 int codeA = pA->codes[i];
5200 int codeB = pB->codes[i];
5201 numeric_t *fA = GET_FREQ(pA,i,/*IN/OUT*/iFreqA);
5202 numeric_t *fB = GET_FREQ(pB,i,/*IN/OUT*/iFreqB);
5203 double lkAB = 0;
5204
5205 if (fA == NULL && fB == NULL) {
5206 if (codeA == NOCODE) { /* A is all gaps */
5207 /* gap to gap is sum(j) 0.25 * (0.25 * pSame + 0.75 * pDiff) = sum(i) 0.25*0.25 = 0.25
5208 gap to any character gives the same result
5209 */
5210 lkAB = 0.25;
5211 } else if (codeB == NOCODE) { /* B is all gaps */
5212 lkAB = 0.25;
5213 } else if (codeA == codeB) { /* A and B match */
5214 lkAB = pSame[iRate] * wA*wB + 0.25 * (1-wA*wB);
5215 } else { /* codeA != codeB */
5216 lkAB = pDiff[iRate] * wA*wB + 0.25 * (1-wA*wB);
5217 }
5218 } else if (fA == NULL) {
5219 /* Compare codeA to profile of B */
5220 if (codeA == NOCODE)
5221 lkAB = 0.25;
5222 else
5223 lkAB = wA * (pDiff[iRate] + fB[codeA] * (pSame[iRate]-pDiff[iRate])) + (1.0-wA) * 0.25;
5224 /* because lkAB = wA * P(codeA->B) + (1-wA) * 0.25
5225 P(codeA -> B) = sum(j) P(B==j) * (j==codeA ? pSame : pDiff)
5226 = sum(j) P(B==j) * pDiff +
5227 = pDiff + P(B==codeA) * (pSame-pDiff)
5228 */
5229 } else if (fB == NULL) { /* Compare codeB to profile of A */
5230 if (codeB == NOCODE)
5231 lkAB = 0.25;
5232 else
5233 lkAB = wB * (pDiff[iRate] + fA[codeB] * (pSame[iRate]-pDiff[iRate])) + (1.0-wB) * 0.25;
5234 } else { /* both are full profiles */
5235 for (j = 0; j < 4; j++)
5236 lkAB += fB[j] * (fA[j] * pSame[iRate] + (1-fA[j])* pDiff[iRate]); /* P(A|B) */
5237 }
5238 assert(lkAB > 0);
5239 lk *= lkAB;
5240 while (lk < LkUnderflow) {
5241 lk *= LkUnderflowInv;
5242 loglk -= LogLkUnderflow;
5243 }
5244 if (site_likelihoods != NULL)
5245 site_likelihoods[i] *= lkAB;
5246 }
5247 pSame = myfree(pSame, sizeof(double) * rates->nRateCategories);
5248 pDiff = myfree(pDiff, sizeof(double) * rates->nRateCategories);
5249 } else if (nCodes == 4) { /* matrix model on nucleotides */
5250 int iFreqA = 0;
5251 int iFreqB = 0;
5252 numeric_t fAmix[4], fBmix[4];
5253 numeric_t *fGap = &transmat->codeFreq[NOCODE][0];
5254
5255 for (i = 0; i < nPos; i++) {
5256 int iRate = rates->ratecat[i];
5257 numeric_t *expeigen = &expeigenRates[iRate*4];
5258 double wA = pA->weights[i];
5259 double wB = pB->weights[i];
5260 if (wA == 0 && wB == 0 && pA->codes[i] == NOCODE && pB->codes[i] == NOCODE) {
5261 /* Likelihood of A vs B is 1, so nothing changes
5262 Do not need to advance iFreqA or iFreqB */
5263 continue;
5264 }
5265 numeric_t *fA = GET_FREQ(pA,i,/*IN/OUT*/iFreqA);
5266 numeric_t *fB = GET_FREQ(pB,i,/*IN/OUT*/iFreqB);
5267 if (fA == NULL)
5268 fA = &transmat->codeFreq[pA->codes[i]][0];
5269 if (wA > 0.0 && wA < 1.0) {
5270 for (j = 0; j < 4; j++)
5271 fAmix[j] = wA*fA[j] + (1.0-wA)*fGap[j];
5272 fA = fAmix;
5273 }
5274 if (fB == NULL)
5275 fB = &transmat->codeFreq[pB->codes[i]][0];
5276 if (wB > 0.0 && wB < 1.0) {
5277 for (j = 0; j < 4; j++)
5278 fBmix[j] = wB*fB[j] + (1.0-wB)*fGap[j];
5279 fB = fBmix;
5280 }
5281 /* SSE3 instructions do not speed this step up:
5282 numeric_t lkAB = vector_multiply3_sum(expeigen, fA, fB); */
5283 // dsp this is where check for <=0 was added in 2.1.1.LG
5284 double lkAB = 0;
5285 for (j = 0; j < 4; j++)
5286 lkAB += expeigen[j]*fA[j]*fB[j];
5287 assert(lkAB > 0);
5288 if (site_likelihoods != NULL)
5289 site_likelihoods[i] *= lkAB;
5290 lk *= lkAB;
5291 while (lk < LkUnderflow) {
5292 lk *= LkUnderflowInv;
5293 loglk -= LogLkUnderflow;
5294 }
5295 while (lk > LkUnderflowInv) {
5296 lk *= LkUnderflow;
5297 loglk += LogLkUnderflow;
5298 }
5299 }
5300 } else if (nCodes == 20) { /* matrix model on amino acids */
5301 int iFreqA = 0;
5302 int iFreqB = 0;
5303 numeric_t fAmix[20], fBmix[20];
5304 numeric_t *fGap = &transmat->codeFreq[NOCODE][0];
5305
5306 for (i = 0; i < nPos; i++) {
5307 int iRate = rates->ratecat[i];
5308 numeric_t *expeigen = &expeigenRates[iRate*20];
5309 double wA = pA->weights[i];
5310 double wB = pB->weights[i];
5311 if (wA == 0 && wB == 0 && pA->codes[i] == NOCODE && pB->codes[i] == NOCODE) {
5312 /* Likelihood of A vs B is 1, so nothing changes
5313 Do not need to advance iFreqA or iFreqB */
5314 continue;
5315 }
5316 numeric_t *fA = GET_FREQ(pA,i,/*IN/OUT*/iFreqA);
5317 numeric_t *fB = GET_FREQ(pB,i,/*IN/OUT*/iFreqB);
5318 if (fA == NULL)
5319 fA = &transmat->codeFreq[pA->codes[i]][0];
5320 if (wA > 0.0 && wA < 1.0) {
5321 for (j = 0; j < 20; j++)
5322 fAmix[j] = wA*fA[j] + (1.0-wA)*fGap[j];
5323 fA = fAmix;
5324 }
5325 if (fB == NULL)
5326 fB = &transmat->codeFreq[pB->codes[i]][0];
5327 if (wB > 0.0 && wB < 1.0) {
5328 for (j = 0; j < 20; j++)
5329 fBmix[j] = wB*fB[j] + (1.0-wB)*fGap[j];
5330 fB = fBmix;
5331 }
5332 numeric_t lkAB = vector_multiply3_sum(expeigen, fA, fB, 20);
5333 if (!(lkAB > 0)) {
5334 /* If this happens, it indicates a numerical problem that needs to be addressed elsewhere,
5335 so report all the details */
5336 fprintf(stderr, "# FastTree.c::PairLogLk -- numerical problem!\n");
5337 fprintf(stderr, "# This block is intended for loading into R\n");
5338
5339 fprintf(stderr, "lkAB = %.8g\n", lkAB);
5340 fprintf(stderr, "Branch_length= %.8g\nalignment_position=%d\nnCodes=%d\nrate_category=%d\nrate=%.8g\n",
5341 length, i, nCodes, iRate, rates->rates[iRate]);
5342 fprintf(stderr, "wA=%.8g\nwB=%.8g\n", wA, wB);
5343 fprintf(stderr, "codeA = %d\ncodeB = %d\n", pA->codes[i], pB->codes[i]);
5344
5345 fprintf(stderr, "fA = c(");
5346 for (j = 0; j < nCodes; j++) fprintf(stderr, "%s %.8g", j==0?"":",", fA[j]);
5347 fprintf(stderr,")\n");
5348
5349 fprintf(stderr, "fB = c(");
5350 for (j = 0; j < nCodes; j++) fprintf(stderr, "%s %.8g", j==0?"":",", fB[j]);
5351 fprintf(stderr,")\n");
5352
5353 fprintf(stderr, "stat = c(");
5354 for (j = 0; j < nCodes; j++) fprintf(stderr, "%s %.8g", j==0?"":",", transmat->stat[j]);
5355 fprintf(stderr,")\n");
5356
5357 fprintf(stderr, "eigenval = c(");
5358 for (j = 0; j < nCodes; j++) fprintf(stderr, "%s %.8g", j==0?"":",", transmat->eigenval[j]);
5359 fprintf(stderr,")\n");
5360
5361 fprintf(stderr, "expeigen = c(");
5362 for (j = 0; j < nCodes; j++) fprintf(stderr, "%s %.8g", j==0?"":",", expeigen[j]);
5363 fprintf(stderr,")\n");
5364
5365 int k;
5366 fprintf(stderr, "codeFreq = c(");
5367 for (j = 0; j < nCodes; j++) for(k = 0; k < nCodes; k++) fprintf(stderr, "%s %.8g", j==0 && k==0?"":",",
5368 transmat->codeFreq[j][k]);
5369 fprintf(stderr,")\n");
5370
5371 fprintf(stderr, "eigeninv = c(");
5372 for (j = 0; j < nCodes; j++) for(k = 0; k < nCodes; k++) fprintf(stderr, "%s %.8g", j==0 && k==0?"":",",
5373 transmat->eigeninv[j][k]);
5374 fprintf(stderr,")\n");
5375
5376 fprintf(stderr, "# Transform into matrices and compute un-rotated vectors for profiles A and B\n");
5377 fprintf(stderr, "codeFreq = matrix(codeFreq,nrow=20);\n");
5378 fprintf(stderr, "eigeninv = matrix(eigeninv,nrow=20);\n");
5379 fputs("unrotA = stat * (eigeninv %*% fA)\n", stderr);
5380 fputs("unrotB = stat * (eigeninv %*% fB)\n", stderr);
5381 fprintf(stderr,"# End of R block\n");
5382 }
5383 assert(lkAB > 0);
5384 if (site_likelihoods != NULL)
5385 site_likelihoods[i] *= lkAB;
5386 lk *= lkAB;
5387 while (lk < LkUnderflow) {
5388 lk *= LkUnderflowInv;
5389 loglk -= LogLkUnderflow;
5390 }
5391 while (lk > LkUnderflowInv) {
5392 lk *= LkUnderflow;
5393 loglk += LogLkUnderflow;
5394 }
5395 }
5396 } else {
5397 assert(0); /* illegal nCodes */
5398 }
5399 if (transmat != NULL)
5400 expeigenRates = myfree(expeigenRates, sizeof(numeric_t) * rates->nRateCategories * 20);
5401 loglk += log(lk);
5402 nLkCompute++;
5403 return(loglk);
5404 }
5405
MLQuartetLogLk(profile_t * pA,profile_t * pB,profile_t * pC,profile_t * pD,int nPos,transition_matrix_t * transmat,rates_t * rates,double branch_lengths[5],double * site_likelihoods)5406 double MLQuartetLogLk(profile_t *pA, profile_t *pB, profile_t *pC, profile_t *pD,
5407 int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
5408 /*IN*/double branch_lengths[5],
5409 /*OPTIONAL OUT*/double *site_likelihoods) {
5410 profile_t *pAB = PosteriorProfile(pA, pB,
5411 branch_lengths[0], branch_lengths[1],
5412 transmat,
5413 rates,
5414 nPos, /*nConstraints*/0);
5415 profile_t *pCD = PosteriorProfile(pC, pD,
5416 branch_lengths[2], branch_lengths[3],
5417 transmat,
5418 rates,
5419 nPos, /*nConstraints*/0);
5420 if (site_likelihoods != NULL) {
5421 int i;
5422 for (i = 0; i < nPos; i++)
5423 site_likelihoods[i] = 1.0;
5424 }
5425 /* Roughly, P(A,B,C,D) = P(A) P(B|A) P(D|C) P(AB | CD) */
5426 double loglk = PairLogLk(pA, pB, branch_lengths[0]+branch_lengths[1],
5427 nPos, transmat, rates, /*OPTIONAL IN/OUT*/site_likelihoods)
5428 + PairLogLk(pC, pD, branch_lengths[2]+branch_lengths[3],
5429 nPos, transmat, rates, /*OPTIONAL IN/OUT*/site_likelihoods)
5430 + PairLogLk(pAB, pCD, branch_lengths[4],
5431 nPos, transmat, rates, /*OPTIONAL IN/OUT*/site_likelihoods);
5432 pAB = FreeProfile(pAB, nPos, /*nConstraints*/0);
5433 pCD = FreeProfile(pCD, nPos, /*nConstraints*/0);
5434 return(loglk);
5435 }
5436
PairNegLogLk(double x,void * data)5437 double PairNegLogLk(double x, void *data) {
5438 quartet_opt_t *qo = (quartet_opt_t *)data;
5439 assert(qo != NULL);
5440 assert(qo->pair1 != NULL && qo->pair2 != NULL);
5441 qo->nEval++;
5442 double loglk = PairLogLk(qo->pair1, qo->pair2, x, qo->nPos, qo->transmat, qo->rates, /*site_lk*/NULL);
5443 assert(loglk < 1e100);
5444 if (verbose > 5)
5445 fprintf(stderr, "PairLogLk(%.4f) = %.4f\n", x, loglk);
5446 return(-loglk);
5447 }
5448
MLQuartetOptimize(profile_t * pA,profile_t * pB,profile_t * pC,profile_t * pD,int nPos,transition_matrix_t * transmat,rates_t * rates,double branch_lengths[5],bool * pStarTest,double * site_likelihoods)5449 double MLQuartetOptimize(profile_t *pA, profile_t *pB, profile_t *pC, profile_t *pD,
5450 int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
5451 /*IN/OUT*/double branch_lengths[5],
5452 /*OPTIONAL OUT*/bool *pStarTest,
5453 /*OPTIONAL OUT*/double *site_likelihoods) {
5454 int j;
5455 double start_length[5];
5456 for (j = 0; j < 5; j++) {
5457 start_length[j] = branch_lengths[j];
5458 if (branch_lengths[j] < MLMinBranchLength)
5459 branch_lengths[j] = MLMinBranchLength;
5460 }
5461 quartet_opt_t qopt = { nPos, transmat, rates, /*nEval*/0,
5462 /*pair1*/NULL, /*pair2*/NULL };
5463 double f2x, negloglk;
5464
5465 if (pStarTest != NULL)
5466 *pStarTest = false;
5467
5468 /* First optimize internal branch, then branch to A, B, C, D, in turn
5469 May use star test to quit after internal branch
5470 */
5471 profile_t *pAB = PosteriorProfile(pA, pB,
5472 branch_lengths[LEN_A], branch_lengths[LEN_B],
5473 transmat, rates, nPos, /*nConstraints*/0);
5474 profile_t *pCD = PosteriorProfile(pC, pD,
5475 branch_lengths[LEN_C], branch_lengths[LEN_D],
5476 transmat, rates, nPos, /*nConstraints*/0);
5477 qopt.pair1 = pAB;
5478 qopt.pair2 = pCD;
5479 branch_lengths[LEN_I] = onedimenmin(/*xmin*/MLMinBranchLength,
5480 /*xguess*/branch_lengths[LEN_I],
5481 /*xmax*/6.0,
5482 PairNegLogLk,
5483 /*data*/&qopt,
5484 /*ftol*/MLFTolBranchLength,
5485 /*atol*/MLMinBranchLengthTolerance,
5486 /*OUT*/&negloglk,
5487 /*OUT*/&f2x);
5488
5489 if (pStarTest != NULL) {
5490 assert(site_likelihoods == NULL);
5491 double loglkStar = -PairNegLogLk(MLMinBranchLength, &qopt);
5492 if (loglkStar < -negloglk - closeLogLkLimit) {
5493 *pStarTest = true;
5494 double off = PairLogLk(pA, pB,
5495 branch_lengths[LEN_A] + branch_lengths[LEN_B],
5496 qopt.nPos, qopt.transmat, qopt.rates, /*site_lk*/NULL)
5497 + PairLogLk(pC, pD,
5498 branch_lengths[LEN_C] + branch_lengths[LEN_D],
5499 qopt.nPos, qopt.transmat, qopt.rates, /*site_lk*/NULL);
5500 pAB = FreeProfile(pAB, nPos, /*nConstraints*/0);
5501 pCD = FreeProfile(pCD, nPos, /*nConstraints*/0);
5502 return (-negloglk + off);
5503 }
5504 }
5505 pAB = FreeProfile(pAB, nPos, /*nConstraints*/0);
5506 profile_t *pBCD = PosteriorProfile(pB, pCD,
5507 branch_lengths[LEN_B], branch_lengths[LEN_I],
5508 transmat, rates, nPos, /*nConstraints*/0);
5509 qopt.pair1 = pA;
5510 qopt.pair2 = pBCD;
5511 branch_lengths[LEN_A] = onedimenmin(/*xmin*/MLMinBranchLength,
5512 /*xguess*/branch_lengths[LEN_A],
5513 /*xmax*/6.0,
5514 PairNegLogLk,
5515 /*data*/&qopt,
5516 /*ftol*/MLFTolBranchLength,
5517 /*atol*/MLMinBranchLengthTolerance,
5518 /*OUT*/&negloglk,
5519 /*OUT*/&f2x);
5520 pBCD = FreeProfile(pBCD, nPos, /*nConstraints*/0);
5521 profile_t *pACD = PosteriorProfile(pA, pCD,
5522 branch_lengths[LEN_A], branch_lengths[LEN_I],
5523 transmat, rates, nPos, /*nConstraints*/0);
5524 qopt.pair1 = pB;
5525 qopt.pair2 = pACD;
5526 branch_lengths[LEN_B] = onedimenmin(/*xmin*/MLMinBranchLength,
5527 /*xguess*/branch_lengths[LEN_B],
5528 /*xmax*/6.0,
5529 PairNegLogLk,
5530 /*data*/&qopt,
5531 /*ftol*/MLFTolBranchLength,
5532 /*atol*/MLMinBranchLengthTolerance,
5533 /*OUT*/&negloglk,
5534 /*OUT*/&f2x);
5535 pACD = FreeProfile(pACD, nPos, /*nConstraints*/0);
5536 pCD = FreeProfile(pCD, nPos, /*nConstraints*/0);
5537 pAB = PosteriorProfile(pA, pB,
5538 branch_lengths[LEN_A], branch_lengths[LEN_B],
5539 transmat, rates, nPos, /*nConstraints*/0);
5540 profile_t *pABD = PosteriorProfile(pAB, pD,
5541 branch_lengths[LEN_I], branch_lengths[LEN_D],
5542 transmat, rates, nPos, /*nConstraints*/0);
5543 qopt.pair1 = pC;
5544 qopt.pair2 = pABD;
5545 branch_lengths[LEN_C] = onedimenmin(/*xmin*/MLMinBranchLength,
5546 /*xguess*/branch_lengths[LEN_C],
5547 /*xmax*/6.0,
5548 PairNegLogLk,
5549 /*data*/&qopt,
5550 /*ftol*/MLFTolBranchLength,
5551 /*atol*/MLMinBranchLengthTolerance,
5552 /*OUT*/&negloglk,
5553 /*OUT*/&f2x);
5554 pABD = FreeProfile(pABD, nPos, /*nConstraints*/0);
5555 profile_t *pABC = PosteriorProfile(pAB, pC,
5556 branch_lengths[LEN_I], branch_lengths[LEN_C],
5557 transmat, rates, nPos, /*nConstraints*/0);
5558 qopt.pair1 = pD;
5559 qopt.pair2 = pABC;
5560 branch_lengths[LEN_D] = onedimenmin(/*xmin*/MLMinBranchLength,
5561 /*xguess*/branch_lengths[LEN_D],
5562 /*xmax*/6.0,
5563 PairNegLogLk,
5564 /*data*/&qopt,
5565 /*ftol*/MLFTolBranchLength,
5566 /*atol*/MLMinBranchLengthTolerance,
5567 /*OUT*/&negloglk,
5568 /*OUT*/&f2x);
5569
5570 /* Compute the total quartet likelihood
5571 PairLogLk(ABC,D) + PairLogLk(AB,C) + PairLogLk(A,B)
5572 */
5573 double loglkABCvsD = -negloglk;
5574 if (site_likelihoods) {
5575 for (j = 0; j < nPos; j++)
5576 site_likelihoods[j] = 1.0;
5577 PairLogLk(pABC, pD, branch_lengths[LEN_D],
5578 qopt.nPos, qopt.transmat, qopt.rates, /*IN/OUT*/site_likelihoods);
5579 }
5580 double quartetloglk = loglkABCvsD
5581 + PairLogLk(pAB, pC, branch_lengths[LEN_I] + branch_lengths[LEN_C],
5582 qopt.nPos, qopt.transmat, qopt.rates,
5583 /*IN/OUT*/site_likelihoods)
5584 + PairLogLk(pA, pB, branch_lengths[LEN_A] + branch_lengths[LEN_B],
5585 qopt.nPos, qopt.transmat, qopt.rates,
5586 /*IN/OUT*/site_likelihoods);
5587
5588 pABC = FreeProfile(pABC, nPos, /*nConstraints*/0);
5589 pAB = FreeProfile(pAB, nPos, /*nConstraints*/0);
5590
5591 if (verbose > 3) {
5592 double loglkStart = MLQuartetLogLk(pA, pB, pC, pD, nPos, transmat, rates, start_length, /*site_lk*/NULL);
5593 fprintf(stderr, "Optimize loglk from %.5f to %.5f eval %d lengths from\n"
5594 " %.5f %.5f %.5f %.5f %.5f to\n"
5595 " %.5f %.5f %.5f %.5f %.5f\n",
5596 loglkStart, quartetloglk, qopt.nEval,
5597 start_length[0], start_length[1], start_length[2], start_length[3], start_length[4],
5598 branch_lengths[0], branch_lengths[1], branch_lengths[2], branch_lengths[3], branch_lengths[4]);
5599 }
5600 return(quartetloglk);
5601 }
5602
MLQuartetNNI(profile_t * profiles[4],transition_matrix_t * transmat,rates_t * rates,int nPos,int nConstraints,double criteria[3],numeric_t len[5],bool bFast)5603 nni_t MLQuartetNNI(profile_t *profiles[4],
5604 /*OPTIONAL*/transition_matrix_t *transmat,
5605 rates_t *rates,
5606 int nPos, int nConstraints,
5607 /*OUT*/double criteria[3], /* The three potential quartet log-likelihoods */
5608 /*IN/OUT*/numeric_t len[5],
5609 bool bFast)
5610 {
5611 int i;
5612 double lenABvsCD[5] = {len[LEN_A], len[LEN_B], len[LEN_C], len[LEN_D], len[LEN_I]};
5613 double lenACvsBD[5] = {len[LEN_A], len[LEN_C], len[LEN_B], len[LEN_D], len[LEN_I]}; /* Swap B & C */
5614 double lenADvsBC[5] = {len[LEN_A], len[LEN_D], len[LEN_C], len[LEN_B], len[LEN_I]}; /* Swap B & D */
5615 bool bConsiderAC = true;
5616 bool bConsiderAD = true;
5617 int iRound;
5618 int nRounds = mlAccuracy < 2 ? 2 : mlAccuracy;
5619 double penalty[3];
5620 QuartetConstraintPenalties(profiles, nConstraints, /*OUT*/penalty);
5621 if (penalty[ABvsCD] > penalty[ACvsBD] || penalty[ABvsCD] > penalty[ADvsBC])
5622 bFast = false;
5623 #ifdef OPENMP
5624 bFast = false; /* turn off star topology test */
5625 #endif
5626
5627 for (iRound = 0; iRound < nRounds; iRound++) {
5628 bool bStarTest = false;
5629 {
5630 #ifdef OPENMP
5631 #pragma omp parallel
5632 #pragma omp sections
5633 #endif
5634 {
5635 #ifdef OPENMP
5636 #pragma omp section
5637 #endif
5638 {
5639 criteria[ABvsCD] = MLQuartetOptimize(profiles[0], profiles[1], profiles[2], profiles[3],
5640 nPos, transmat, rates,
5641 /*IN/OUT*/lenABvsCD,
5642 bFast ? &bStarTest : NULL,
5643 /*site_likelihoods*/NULL)
5644 - penalty[ABvsCD]; /* subtract penalty b/c we are trying to maximize log lk */
5645 }
5646
5647 #ifdef OPENMP
5648 #pragma omp section
5649 #else
5650 if (bStarTest) {
5651 nStarTests++;
5652 criteria[ACvsBD] = -1e20;
5653 criteria[ADvsBC] = -1e20;
5654 len[LEN_I] = lenABvsCD[LEN_I];
5655 return(ABvsCD);
5656 }
5657 #endif
5658 {
5659 if (bConsiderAC)
5660 criteria[ACvsBD] = MLQuartetOptimize(profiles[0], profiles[2], profiles[1], profiles[3],
5661 nPos, transmat, rates,
5662 /*IN/OUT*/lenACvsBD, NULL, /*site_likelihoods*/NULL)
5663 - penalty[ACvsBD];
5664 }
5665
5666 #ifdef OPENMP
5667 #pragma omp section
5668 #endif
5669 {
5670 if (bConsiderAD)
5671 criteria[ADvsBC] = MLQuartetOptimize(profiles[0], profiles[3], profiles[2], profiles[1],
5672 nPos, transmat, rates,
5673 /*IN/OUT*/lenADvsBC, NULL, /*site_likelihoods*/NULL)
5674 - penalty[ADvsBC];
5675 }
5676 }
5677 } /* end parallel sections */
5678 if (mlAccuracy < 2) {
5679 /* If clearly worse then ABvsCD, or have short internal branch length and worse, then
5680 give up */
5681 if (criteria[ACvsBD] < criteria[ABvsCD] - closeLogLkLimit
5682 || (lenACvsBD[LEN_I] <= 2.0*MLMinBranchLength && criteria[ACvsBD] < criteria[ABvsCD]))
5683 bConsiderAC = false;
5684 if (criteria[ADvsBC] < criteria[ABvsCD] - closeLogLkLimit
5685 || (lenADvsBC[LEN_I] <= 2.0*MLMinBranchLength && criteria[ADvsBC] < criteria[ABvsCD]))
5686 bConsiderAD = false;
5687 if (!bConsiderAC && !bConsiderAD)
5688 break;
5689 /* If clearly better than either alternative, then give up
5690 (Comparison is probably biased in favor of ABvsCD anyway) */
5691 if (criteria[ACvsBD] > criteria[ABvsCD] + closeLogLkLimit
5692 && criteria[ACvsBD] > criteria[ADvsBC] + closeLogLkLimit)
5693 break;
5694 if (criteria[ADvsBC] > criteria[ABvsCD] + closeLogLkLimit
5695 && criteria[ADvsBC] > criteria[ACvsBD] + closeLogLkLimit)
5696 break;
5697 }
5698 } /* end loop over rounds */
5699
5700 if (verbose > 2) {
5701 fprintf(stderr, "Optimized quartet for %d rounds: ABvsCD %.5f ACvsBD %.5f ADvsBC %.5f\n",
5702 iRound, criteria[ABvsCD], criteria[ACvsBD], criteria[ADvsBC]);
5703 }
5704 if (criteria[ACvsBD] > criteria[ABvsCD] && criteria[ACvsBD] > criteria[ADvsBC]) {
5705 for (i = 0; i < 5; i++) len[i] = lenACvsBD[i];
5706 return(ACvsBD);
5707 } else if (criteria[ADvsBC] > criteria[ABvsCD] && criteria[ADvsBC] > criteria[ACvsBD]) {
5708 for (i = 0; i < 5; i++) len[i] = lenADvsBC[i];
5709 return(ADvsBC);
5710 } else {
5711 for (i = 0; i < 5; i++) len[i] = lenABvsCD[i];
5712 return(ABvsCD);
5713 }
5714 }
5715
TreeLength(NJ_t * NJ,bool recomputeProfiles)5716 double TreeLength(/*IN/OUT*/NJ_t *NJ, bool recomputeProfiles) {
5717 if (recomputeProfiles) {
5718 traversal_t traversal2 = InitTraversal(NJ);
5719 int j = NJ->root;
5720 while((j = TraversePostorder(j, NJ, /*IN/OUT*/traversal2, /*pUp*/NULL)) >= 0) {
5721 /* nothing to do for leaves or root */
5722 if (j >= NJ->nSeq && j != NJ->root)
5723 SetProfile(/*IN/OUT*/NJ, j, /*noweight*/-1.0);
5724 }
5725 traversal2 = FreeTraversal(traversal2,NJ);
5726 }
5727 UpdateBranchLengths(/*IN/OUT*/NJ);
5728 double total_len = 0;
5729 int iNode;
5730 for (iNode = 0; iNode < NJ->maxnode; iNode++)
5731 total_len += NJ->branchlength[iNode];
5732 return(total_len);
5733 }
5734
TreeLogLk(NJ_t * NJ,double * site_loglk)5735 double TreeLogLk(/*IN*/NJ_t *NJ, /*OPTIONAL OUT*/double *site_loglk) {
5736 int i;
5737 if (NJ->nSeq < 2)
5738 return(0.0);
5739 double loglk = 0.0;
5740 double *site_likelihood = NULL;
5741 if (site_loglk != NULL) {
5742 site_likelihood = mymalloc(sizeof(double)*NJ->nPos);
5743 for (i = 0; i < NJ->nPos; i++) {
5744 site_likelihood[i] = 1.0;
5745 site_loglk[i] = 0.0;
5746 }
5747 }
5748 traversal_t traversal = InitTraversal(NJ);
5749 int node = NJ->root;
5750 while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
5751 int nChild = NJ->child[node].nChild;
5752 if (nChild == 0)
5753 continue;
5754 assert(nChild >= 2);
5755 int *children = NJ->child[node].child;
5756 double loglkchild = PairLogLk(NJ->profiles[children[0]], NJ->profiles[children[1]],
5757 NJ->branchlength[children[0]]+NJ->branchlength[children[1]],
5758 NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/site_likelihood);
5759 loglk += loglkchild;
5760 if (site_likelihood != NULL) {
5761 /* prevent underflows */
5762 for (i = 0; i < NJ->nPos; i++) {
5763 while(site_likelihood[i] < LkUnderflow) {
5764 site_likelihood[i] *= LkUnderflowInv;
5765 site_loglk[i] -= LogLkUnderflow;
5766 }
5767 }
5768 }
5769 if (verbose > 2)
5770 fprintf(stderr, "At %d: LogLk(%d:%.4f,%d:%.4f) = %.3f\n",
5771 node,
5772 children[0], NJ->branchlength[children[0]],
5773 children[1], NJ->branchlength[children[1]],
5774 loglkchild);
5775 if (NJ->child[node].nChild == 3) {
5776 assert(node == NJ->root);
5777 /* Infer the common parent of the 1st two to define the third... */
5778 profile_t *pAB = PosteriorProfile(NJ->profiles[children[0]],
5779 NJ->profiles[children[1]],
5780 NJ->branchlength[children[0]],
5781 NJ->branchlength[children[1]],
5782 NJ->transmat, &NJ->rates,
5783 NJ->nPos, /*nConstraints*/0);
5784 double loglkup = PairLogLk(pAB, NJ->profiles[children[2]],
5785 NJ->branchlength[children[2]],
5786 NJ->nPos, NJ->transmat, &NJ->rates,
5787 /*IN/OUT*/site_likelihood);
5788 loglk += loglkup;
5789 if (verbose > 2)
5790 fprintf(stderr, "At root %d: LogLk((%d/%d),%d:%.3f) = %.3f\n",
5791 node, children[0], children[1], children[2],
5792 NJ->branchlength[children[2]],
5793 loglkup);
5794 pAB = FreeProfile(pAB, NJ->nPos, NJ->nConstraints);
5795 }
5796 }
5797 traversal = FreeTraversal(traversal,NJ);
5798 if (site_likelihood != NULL) {
5799 for (i = 0; i < NJ->nPos; i++) {
5800 site_loglk[i] += log(site_likelihood[i]);
5801 }
5802 site_likelihood = myfree(site_likelihood, sizeof(double)*NJ->nPos);
5803 }
5804
5805 /* For Jukes-Cantor, with a tree of size 4, if the children of the root are
5806 (A,B), C, and D, then
5807 P(ABCD) = P(A) P(B|A) P(C|AB) P(D|ABC)
5808
5809 Above we compute P(B|A) P(C|AB) P(D|ABC) -- note P(B|A) is at the child of root
5810 and P(C|AB) P(D|ABC) is at root.
5811
5812 Similarly if the children of the root are C, D, and (A,B), then
5813 P(ABCD) = P(C|D) P(A|B) P(AB|CD) P(D), and above we compute that except for P(D)
5814
5815 So we need to multiply by P(A) = 0.25, so we pay log(4) at each position
5816 (if ungapped). Each gapped position in any sequence reduces the payment by log(4)
5817
5818 For JTT or GTR, we are computing P(A & B) and the posterior profiles are scaled to take
5819 the prior into account, so we do not need any correction.
5820 codeFreq[NOCODE] is scaled x higher so that P(-) = 1 not P(-)=1/nCodes, so gaps
5821 do not need to be corrected either.
5822 */
5823
5824 if (nCodes == 4 && NJ->transmat == NULL) {
5825 int nGaps = 0;
5826 double logNCodes = log((double)nCodes);
5827 for (i = 0; i < NJ->nPos; i++) {
5828 int nGapsThisPos = 0;
5829 for (node = 0; node < NJ->nSeq; node++) {
5830 unsigned char *codes = NJ->profiles[node]->codes;
5831 if (codes[i] == NOCODE)
5832 nGapsThisPos++;
5833 }
5834 nGaps += nGapsThisPos;
5835 if (site_loglk != NULL) {
5836 site_loglk[i] += nGapsThisPos * logNCodes;
5837 if (nCodes == 4 && NJ->transmat == NULL)
5838 site_loglk[i] -= logNCodes;
5839 }
5840 }
5841 loglk -= NJ->nPos * logNCodes;
5842 loglk += nGaps * logNCodes; /* do not pay for gaps -- only Jukes-Cantor */
5843 }
5844 return(loglk);
5845 }
5846
SetMLGtr(NJ_t * NJ,double * freq_in,FILE * fpLog)5847 void SetMLGtr(/*IN/OUT*/NJ_t *NJ, /*OPTIONAL IN*/double *freq_in, /*OPTIONAL WRITE*/FILE *fpLog) {
5848 int i;
5849 assert(nCodes==4);
5850 gtr_opt_t gtr;
5851 gtr.NJ = NJ;
5852 gtr.fpLog = fpLog;
5853 if (freq_in != NULL) {
5854 for (i=0; i<4; i++)
5855 gtr.freq[i]=freq_in[i];
5856 } else {
5857 /* n[] and sum were int in FastTree 2.1.9 and earlier -- this
5858 caused gtr analyses to fail on analyses with >2e9 positions */
5859 long n[4] = {1,1,1,1}; /* pseudocounts */
5860 for (i=0; i<NJ->nSeq; i++) {
5861 unsigned char *codes = NJ->profiles[i]->codes;
5862 int iPos;
5863 for (iPos=0; iPos<NJ->nPos; iPos++)
5864 if (codes[iPos] < 4)
5865 n[codes[iPos]]++;
5866 }
5867 long sum = n[0]+n[1]+n[2]+n[3];
5868 for (i=0; i<4; i++)
5869 gtr.freq[i] = n[i]/(double)sum;
5870 }
5871 for (i=0; i<6; i++)
5872 gtr.rates[i] = 1.0;
5873 int nRounds = mlAccuracy < 2 ? 2 : mlAccuracy;
5874 for (i = 0; i < nRounds; i++) {
5875 for (gtr.iRate = 0; gtr.iRate < 6; gtr.iRate++) {
5876 ProgressReport("Optimizing GTR model, step %d of %d", i*6+gtr.iRate+1, 12, 0, 0);
5877 double negloglk, f2x;
5878 gtr.rates[gtr.iRate] = onedimenmin(/*xmin*/0.05,
5879 /*xguess*/gtr.rates[gtr.iRate],
5880 /*xmax*/20.0,
5881 GTRNegLogLk,
5882 /*data*/>r,
5883 /*ftol*/0.001,
5884 /*atol*/0.0001,
5885 /*OUT*/&negloglk,
5886 /*OUT*/&f2x);
5887 }
5888 }
5889 /* normalize gtr so last rate is 1 -- specifying that rate separately is useful for optimization only */
5890 for (i = 0; i < 5; i++)
5891 gtr.rates[i] /= gtr.rates[5];
5892 gtr.rates[5] = 1.0;
5893 if (verbose) {
5894 fprintf(stderr, "GTR Frequencies: %.4f %.4f %.4f %.4f\n", gtr.freq[0], gtr.freq[1], gtr.freq[2], gtr.freq[3]);
5895 fprintf(stderr, "GTR rates(ac ag at cg ct gt) %.4f %.4f %.4f %.4f %.4f %.4f\n",
5896 gtr.rates[0],gtr.rates[1],gtr.rates[2],gtr.rates[3],gtr.rates[4],gtr.rates[5]);
5897 }
5898 if (fpLog != NULL) {
5899 fprintf(fpLog, "GTRFreq\t%.4f\t%.4f\t%.4f\t%.4f\n", gtr.freq[0], gtr.freq[1], gtr.freq[2], gtr.freq[3]);
5900 fprintf(fpLog, "GTRRates\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n",
5901 gtr.rates[0],gtr.rates[1],gtr.rates[2],gtr.rates[3],gtr.rates[4],gtr.rates[5]);
5902 }
5903 myfree(NJ->transmat, sizeof(transition_matrix_t));
5904 NJ->transmat = CreateGTR(gtr.rates, gtr.freq);
5905 RecomputeMLProfiles(/*IN/OUT*/NJ);
5906 OptimizeAllBranchLengths(/*IN/OUT*/NJ);
5907 }
5908
GTRNegLogLk(double x,void * data)5909 double GTRNegLogLk(double x, void *data) {
5910
5911 gtr_opt_t *gtr = (gtr_opt_t*)data;
5912 assert(nCodes == 4);
5913 assert(gtr->NJ != NULL);
5914 assert(gtr->iRate >= 0 && gtr->iRate < 6);
5915 assert(x > 0);
5916 transition_matrix_t *old = gtr->NJ->transmat;
5917 double rates[6];
5918 int i;
5919 for (i = 0; i < 6; i++)
5920 rates[i] = gtr->rates[i];
5921 rates[gtr->iRate] = x;
5922
5923 FILE *fpLog = gtr->fpLog;
5924 if (fpLog)
5925 fprintf(fpLog, "GTR_Opt\tfreq %.5f %.5f %.5f %.5f rates %.5f %.5f %.5f %.5f %.5f %.5f\n",
5926 gtr->freq[0], gtr->freq[1], gtr->freq[2], gtr->freq[3],
5927 rates[0], rates[1], rates[2], rates[3], rates[4], rates[5]);
5928
5929 gtr->NJ->transmat = CreateGTR(rates, gtr->freq);
5930 RecomputeMLProfiles(/*IN/OUT*/gtr->NJ);
5931 double loglk = TreeLogLk(gtr->NJ, /*site_loglk*/NULL);
5932 myfree(gtr->NJ->transmat, sizeof(transition_matrix_t));
5933 gtr->NJ->transmat = old;
5934 /* Do not recompute profiles -- assume the caller will do that */
5935 if (verbose > 2)
5936 fprintf(stderr, "GTR LogLk(%.5f %.5f %.5f %.5f %.5f %.5f) = %f\n",
5937 rates[0], rates[1], rates[2], rates[3], rates[4], rates[5], loglk);
5938 if (fpLog)
5939 fprintf(fpLog, "GTR_Opt\tGTR LogLk(%.5f %.5f %.5f %.5f %.5f %.5f) = %f\n",
5940 rates[0], rates[1], rates[2], rates[3], rates[4], rates[5], loglk);
5941 return(-loglk);
5942 }
5943
5944 /* Caller must free the resulting vector of n rates */
MLSiteRates(int nRateCategories)5945 numeric_t *MLSiteRates(int nRateCategories) {
5946 /* Even spacing from 1/nRate to nRate */
5947 double logNCat = log((double)nRateCategories);
5948 double logMinRate = -logNCat;
5949 double logMaxRate = logNCat;
5950 double logd = (logMaxRate-logMinRate)/(double)(nRateCategories-1);
5951
5952 numeric_t *rates = mymalloc(sizeof(numeric_t)*nRateCategories);
5953 int i;
5954 for (i = 0; i < nRateCategories; i++)
5955 rates[i] = exp(logMinRate + logd*(double)i);
5956 return(rates);
5957 }
5958
MLSiteLikelihoodsByRate(NJ_t * NJ,numeric_t * rates,int nRateCategories)5959 double *MLSiteLikelihoodsByRate(/*IN*/NJ_t *NJ, /*IN*/numeric_t *rates, int nRateCategories) {
5960 double *site_loglk = mymalloc(sizeof(double)*NJ->nPos*nRateCategories);
5961
5962 /* save the original rates */
5963 assert(NJ->rates.nRateCategories > 0);
5964 numeric_t *oldRates = NJ->rates.rates;
5965 NJ->rates.rates = mymalloc(sizeof(numeric_t) * NJ->rates.nRateCategories);
5966
5967 /* Compute site likelihood for each rate */
5968 int iPos;
5969 int iRate;
5970 for (iRate = 0; iRate < nRateCategories; iRate++) {
5971 int i;
5972 for (i = 0; i < NJ->rates.nRateCategories; i++)
5973 NJ->rates.rates[i] = rates[iRate];
5974 RecomputeMLProfiles(/*IN/OUT*/NJ);
5975 double loglk = TreeLogLk(NJ, /*OUT*/&site_loglk[NJ->nPos*iRate]);
5976 ProgressReport("Site likelihoods with rate category %d of %d", iRate+1, nRateCategories, 0, 0);
5977 if(verbose > 2) {
5978 fprintf(stderr, "Rate %.3f Loglk %.3f SiteLogLk", rates[iRate], loglk);
5979 for (iPos = 0; iPos < NJ->nPos; iPos++)
5980 fprintf(stderr,"\t%.3f", site_loglk[NJ->nPos*iRate + iPos]);
5981 fprintf(stderr,"\n");
5982 }
5983 }
5984
5985 /* restore original rates and profiles */
5986 myfree(NJ->rates.rates, sizeof(numeric_t) * NJ->rates.nRateCategories);
5987 NJ->rates.rates = oldRates;
5988 RecomputeMLProfiles(/*IN/OUT*/NJ);
5989
5990 return(site_loglk);
5991 }
5992
SetMLRates(NJ_t * NJ,int nRateCategories)5993 void SetMLRates(/*IN/OUT*/NJ_t *NJ, int nRateCategories) {
5994 assert(nRateCategories > 0);
5995 AllocRateCategories(/*IN/OUT*/&NJ->rates, 1, NJ->nPos); /* set to 1 category of rate 1 */
5996 if (nRateCategories == 1) {
5997 RecomputeMLProfiles(/*IN/OUT*/NJ);
5998 return;
5999 }
6000 numeric_t *rates = MLSiteRates(nRateCategories);
6001 double *site_loglk = MLSiteLikelihoodsByRate(/*IN*/NJ, /*IN*/rates, nRateCategories);
6002
6003 /* Select best rate for each site, correcting for the prior
6004 For a prior, use a gamma distribution with shape parameter 3, scale 1/3, so
6005 Prior(rate) ~ rate**2 * exp(-3*rate)
6006 log Prior(rate) = C + 2 * log(rate) - 3 * rate
6007 */
6008 double sumRates = 0;
6009 int iPos;
6010 int iRate;
6011 for (iPos = 0; iPos < NJ->nPos; iPos++) {
6012 int iBest = -1;
6013 double dBest = -1e20;
6014 for (iRate = 0; iRate < nRateCategories; iRate++) {
6015 double site_loglk_with_prior = site_loglk[NJ->nPos*iRate + iPos]
6016 + 2.0 * log(rates[iRate]) - 3.0 * rates[iRate];
6017 if (site_loglk_with_prior > dBest) {
6018 iBest = iRate;
6019 dBest = site_loglk_with_prior;
6020 }
6021 }
6022 if (verbose > 2)
6023 fprintf(stderr, "Selected rate category %d rate %.3f for position %d\n",
6024 iBest, rates[iBest], iPos+1);
6025 NJ->rates.ratecat[iPos] = iBest;
6026 sumRates += rates[iBest];
6027 }
6028 site_loglk = myfree(site_loglk, sizeof(double)*NJ->nPos*nRateCategories);
6029
6030 /* Force the rates to average to 1 */
6031 double avgRate = sumRates/NJ->nPos;
6032 for (iRate = 0; iRate < nRateCategories; iRate++)
6033 rates[iRate] /= avgRate;
6034
6035 /* Save the rates */
6036 NJ->rates.rates = myfree(NJ->rates.rates, sizeof(numeric_t) * NJ->rates.nRateCategories);
6037 NJ->rates.rates = rates;
6038 NJ->rates.nRateCategories = nRateCategories;
6039
6040 /* Update profiles based on rates */
6041 RecomputeMLProfiles(/*IN/OUT*/NJ);
6042
6043 if (verbose) {
6044 fprintf(stderr, "Switched to using %d rate categories (CAT approximation)\n", nRateCategories);
6045 fprintf(stderr, "Rate categories were divided by %.3f so that average rate = 1.0\n", avgRate);
6046 fprintf(stderr, "CAT-based log-likelihoods may not be comparable across runs\n");
6047 if (!gammaLogLk)
6048 fprintf(stderr, "Use -gamma for approximate but comparable Gamma(20) log-likelihoods\n");
6049 }
6050 }
6051
GammaLogLk(siteratelk_t * s,double * gamma_loglk_sites)6052 double GammaLogLk(/*IN*/siteratelk_t *s, /*OPTIONAL OUT*/double *gamma_loglk_sites) {
6053 int iRate, iPos;
6054 double *dRate = mymalloc(sizeof(double) * s->nRateCats);
6055 for (iRate = 0; iRate < s->nRateCats; iRate++) {
6056 /* The probability density for each rate is approximated by the total
6057 density between the midpoints */
6058 double pMin = iRate == 0 ? 0.0 :
6059 PGamma(s->mult * (s->rates[iRate-1] + s->rates[iRate])/2.0, s->alpha);
6060 double pMax = iRate == s->nRateCats-1 ? 1.0 :
6061 PGamma(s->mult * (s->rates[iRate]+s->rates[iRate+1])/2.0, s->alpha);
6062 dRate[iRate] = pMax-pMin;
6063 }
6064
6065 double loglk = 0.0;
6066 for (iPos = 0; iPos < s->nPos; iPos++) {
6067 /* Prevent underflow on large trees by comparing to maximum loglk */
6068 double maxloglk = -1e20;
6069 for (iRate = 0; iRate < s->nRateCats; iRate++) {
6070 double site_loglk = s->site_loglk[s->nPos*iRate + iPos];
6071 if (site_loglk > maxloglk)
6072 maxloglk = site_loglk;
6073 }
6074 double rellk = 0; /* likelihood scaled by exp(maxloglk) */
6075 for (iRate = 0; iRate < s->nRateCats; iRate++) {
6076 double lk = exp(s->site_loglk[s->nPos*iRate + iPos] - maxloglk);
6077 rellk += lk * dRate[iRate];
6078 }
6079 double loglk_site = maxloglk + log(rellk);
6080 loglk += loglk_site;
6081 if (gamma_loglk_sites != NULL)
6082 gamma_loglk_sites[iPos] = loglk_site;
6083 }
6084 dRate = myfree(dRate, sizeof(double)*s->nRateCats);
6085 return(loglk);
6086 }
6087
OptAlpha(double alpha,void * data)6088 double OptAlpha(double alpha, void *data) {
6089 siteratelk_t *s = (siteratelk_t *)data;
6090 s->alpha = alpha;
6091 return(-GammaLogLk(s, NULL));
6092 }
6093
OptMult(double mult,void * data)6094 double OptMult(double mult, void *data) {
6095 siteratelk_t *s = (siteratelk_t *)data;
6096 s->mult = mult;
6097 return(-GammaLogLk(s, NULL));
6098 }
6099
6100 /* Input site_loglk must be for each rate */
RescaleGammaLogLk(int nPos,int nRateCats,numeric_t * rates,double * site_loglk,FILE * fpLog)6101 double RescaleGammaLogLk(int nPos, int nRateCats, /*IN*/numeric_t *rates, /*IN*/double *site_loglk,
6102 /*OPTIONAL*/FILE *fpLog) {
6103 siteratelk_t s = { /*mult*/1.0, /*alpha*/1.0, nPos, nRateCats, rates, site_loglk };
6104 double fx, f2x;
6105 int i;
6106 fx = -GammaLogLk(&s, NULL);
6107 if (verbose>2)
6108 fprintf(stderr, "Optimizing alpha, starting at loglk %.3f\n", -fx);
6109 for (i = 0; i < 10; i++) {
6110 ProgressReport("Optimizing alpha round %d", i+1, 0, 0, 0);
6111 double start = fx;
6112 s.alpha = onedimenmin(0.01, s.alpha, 10.0, OptAlpha, &s, 0.001, 0.001, &fx, &f2x);
6113 if (verbose>2)
6114 fprintf(stderr, "Optimize alpha round %d to %.3f lk %.3f\n", i+1, s.alpha, -fx);
6115 s.mult = onedimenmin(0.01, s.mult, 10.0, OptMult, &s, 0.001, 0.001, &fx, &f2x);
6116 if (verbose>2)
6117 fprintf(stderr, "Optimize mult round %d to %.3f lk %.3f\n", i+1, s.mult, -fx);
6118 if (fx > start - 0.001) {
6119 if (verbose>2)
6120 fprintf(stderr, "Optimizing alpha & mult converged\n");
6121 break;
6122 }
6123 }
6124
6125 double *gamma_loglk_sites = mymalloc(sizeof(double) * nPos);
6126 double gammaLogLk = GammaLogLk(&s, /*OUT*/gamma_loglk_sites);
6127 if (verbose > 0)
6128 fprintf(stderr, "Gamma(%d) LogLk = %.3f alpha = %.3f rescaling lengths by %.3f\n",
6129 nRateCats, gammaLogLk, s.alpha, 1/s.mult);
6130 if (fpLog) {
6131 int iPos;
6132 int iRate;
6133 fprintf(fpLog, "Gamma%dLogLk\t%.3f\tApproximate\tAlpha\t%.3f\tRescale\t%.3f\n",
6134 nRateCats, gammaLogLk, s.alpha, 1/s.mult);
6135 fprintf(fpLog, "Gamma%d\tSite\tLogLk", nRateCats);
6136 for (iRate = 0; iRate < nRateCats; iRate++)
6137 fprintf(fpLog, "\tr=%.3f", rates[iRate]/s.mult);
6138 fprintf(fpLog,"\n");
6139 for (iPos = 0; iPos < nPos; iPos++) {
6140 fprintf(fpLog, "Gamma%d\t%d\t%.3f", nRateCats, iPos, gamma_loglk_sites[iPos]);
6141 for (iRate = 0; iRate < nRateCats; iRate++)
6142 fprintf(fpLog, "\t%.3f", site_loglk[nPos*iRate + iPos]);
6143 fprintf(fpLog,"\n");
6144 }
6145 }
6146 gamma_loglk_sites = myfree(gamma_loglk_sites, sizeof(double) * nPos);
6147 return(1.0/s.mult);
6148 }
6149
MLPairOptimize(profile_t * pA,profile_t * pB,int nPos,transition_matrix_t * transmat,rates_t * rates,double * branch_length)6150 double MLPairOptimize(profile_t *pA, profile_t *pB,
6151 int nPos, /*OPTIONAL*/transition_matrix_t *transmat, rates_t *rates,
6152 /*IN/OUT*/double *branch_length) {
6153 quartet_opt_t qopt = { nPos, transmat, rates,
6154 /*nEval*/0, /*pair1*/pA, /*pair2*/pB };
6155 double f2x,negloglk;
6156 *branch_length = onedimenmin(/*xmin*/MLMinBranchLength,
6157 /*xguess*/*branch_length,
6158 /*xmax*/6.0,
6159 PairNegLogLk,
6160 /*data*/&qopt,
6161 /*ftol*/MLFTolBranchLength,
6162 /*atol*/MLMinBranchLengthTolerance,
6163 /*OUT*/&negloglk,
6164 /*OUT*/&f2x);
6165 return(-negloglk); /* the log likelihood */
6166 }
6167
OptimizeAllBranchLengths(NJ_t * NJ)6168 void OptimizeAllBranchLengths(/*IN/OUT*/NJ_t *NJ) {
6169 if (NJ->nSeq < 2)
6170 return;
6171 if (NJ->nSeq == 2) {
6172 int parent = NJ->root;
6173 assert(NJ->child[parent].nChild==2);
6174 int nodes[2] = { NJ->child[parent].child[0], NJ->child[parent].child[1] };
6175 double length = 1.0;
6176 (void)MLPairOptimize(NJ->profiles[nodes[0]], NJ->profiles[nodes[1]],
6177 NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/&length);
6178 NJ->branchlength[nodes[0]] = length/2.0;
6179 NJ->branchlength[nodes[1]] = length/2.0;
6180 return;
6181 };
6182
6183 traversal_t traversal = InitTraversal(NJ);
6184 profile_t **upProfiles = UpProfiles(NJ);
6185 int node = NJ->root;
6186 int iDone = 0;
6187 while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
6188 int nChild = NJ->child[node].nChild;
6189 if (nChild > 0) {
6190 if ((iDone % 100) == 0)
6191 ProgressReport("ML Lengths %d of %d splits", iDone+1, NJ->maxnode - NJ->nSeq, 0, 0);
6192 iDone++;
6193
6194 /* optimize the branch lengths between self, parent, and children,
6195 with two iterations
6196 */
6197 assert(nChild == 2 || nChild == 3);
6198 int nodes[3] = { NJ->child[node].child[0],
6199 NJ->child[node].child[1],
6200 nChild == 3 ? NJ->child[node].child[2] : node };
6201 profile_t *profiles[3] = { NJ->profiles[nodes[0]],
6202 NJ->profiles[nodes[1]],
6203 nChild == 3 ? NJ->profiles[nodes[2]]
6204 : GetUpProfile(/*IN/OUT*/upProfiles, NJ, node, /*useML*/true) };
6205 int iter;
6206 for (iter = 0; iter < 2; iter++) {
6207 int i;
6208 for (i = 0; i < 3; i++) {
6209 profile_t *pA = profiles[i];
6210 int b1 = (i+1) % 3;
6211 int b2 = (i+2) % 3;
6212 profile_t *pB = PosteriorProfile(profiles[b1], profiles[b2],
6213 NJ->branchlength[nodes[b1]],
6214 NJ->branchlength[nodes[b2]],
6215 NJ->transmat, &NJ->rates, NJ->nPos, /*nConstraints*/0);
6216 double len = NJ->branchlength[nodes[i]];
6217 if (len < MLMinBranchLength)
6218 len = MLMinBranchLength;
6219 (void)MLPairOptimize(pA, pB, NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/&len);
6220 NJ->branchlength[nodes[i]] = len;
6221 pB = FreeProfile(pB, NJ->nPos, /*nConstraints*/0);
6222 if (verbose>3)
6223 fprintf(stderr, "Optimize length for %d to %.3f\n",
6224 nodes[i], NJ->branchlength[nodes[i]]);
6225 }
6226 }
6227 if (node != NJ->root) {
6228 RecomputeProfile(/*IN/OUT*/NJ, /*IN/OUT*/upProfiles, node, /*useML*/true);
6229 DeleteUpProfile(upProfiles, NJ, node);
6230 }
6231 }
6232 }
6233 traversal = FreeTraversal(traversal,NJ);
6234 upProfiles = FreeUpProfiles(upProfiles,NJ);
6235 }
6236
RecomputeMLProfiles(NJ_t * NJ)6237 void RecomputeMLProfiles(/*IN/OUT*/NJ_t *NJ) {
6238 traversal_t traversal = InitTraversal(NJ);
6239 int node = NJ->root;
6240 while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
6241 if (NJ->child[node].nChild == 2) {
6242 NJ->profiles[node] = FreeProfile(NJ->profiles[node], NJ->nPos, NJ->nConstraints);
6243 int *children = NJ->child[node].child;
6244 NJ->profiles[node] = PosteriorProfile(NJ->profiles[children[0]], NJ->profiles[children[1]],
6245 NJ->branchlength[children[0]], NJ->branchlength[children[1]],
6246 NJ->transmat, &NJ->rates, NJ->nPos, NJ->nConstraints);
6247 }
6248 }
6249 traversal = FreeTraversal(traversal, NJ);
6250 }
6251
RecomputeProfiles(NJ_t * NJ,distance_matrix_t * dmat)6252 void RecomputeProfiles(/*IN/OUT*/NJ_t *NJ, /*OPTIONAL*/distance_matrix_t *dmat) {
6253 traversal_t traversal = InitTraversal(NJ);
6254 int node = NJ->root;
6255 while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
6256 if (NJ->child[node].nChild == 2) {
6257 int *child = NJ->child[node].child;
6258 NJ->profiles[node] = FreeProfile(NJ->profiles[node], NJ->nPos, NJ->nConstraints);
6259 NJ->profiles[node] = AverageProfile(NJ->profiles[child[0]], NJ->profiles[child[1]],
6260 NJ->nPos, NJ->nConstraints,
6261 dmat, /*unweighted*/-1.0);
6262 }
6263 }
6264 traversal = FreeTraversal(traversal,NJ);
6265 }
6266
NNI(NJ_t * NJ,int iRound,int nRounds,bool useML,nni_stats_t * stats,double * dMaxDelta)6267 int NNI(/*IN/OUT*/NJ_t *NJ, int iRound, int nRounds, bool useML,
6268 /*IN/OUT*/nni_stats_t *stats,
6269 /*OUT*/double *dMaxDelta) {
6270 /* For each non-root node N, with children A,B, sibling C, and uncle D,
6271 we compare the current topology AB|CD to the alternate topologies
6272 AC|BD and AD|BC, by using the 4 relevant profiles.
6273
6274 If useML is true, it uses quartet maximum likelihood, and it
6275 updates branch lengths as it goes.
6276
6277 If useML is false, it uses the minimum-evolution criterion with
6278 log-corrected distances on profiles. (If logdist is false, then
6279 the log correction is not done.) If useML is false, then NNI()
6280 does NOT modify the branch lengths.
6281
6282 Regardless of whether it changes the topology, it recomputes the
6283 profile for the node, using the pairwise distances and BIONJ-like
6284 weightings (if bionj is set). The parent's profile has changed,
6285 but recomputing it is not necessary because we will visit it
6286 before we need it (we use postorder, so we may visit the sibling
6287 and its children before we visit the parent, but we never
6288 consider an ancestor's profile, so that is OK). When we change
6289 the parent's profile, this alters the uncle's up-profile, so we
6290 remove that. Finally, if the topology has changed, we remove the
6291 up-profiles of the nodes.
6292
6293 If we do an NNI during post-order traversal, the result is a bit
6294 tricky. E.g. if we are at node N, and have visited its children A
6295 and B but not its uncle C, and we do an NNI that swaps B & C,
6296 then the post-order traversal will visit C, and its children, but
6297 then on the way back up, it will skip N, as it has already
6298 visited it. So, the profile of N will not be recomputed: any
6299 changes beneath C will not be reflected in the profile of N, and
6300 the profile of N will be slightly stale. This will be corrected
6301 on the next round of NNIs.
6302 */
6303 double supportThreshold = useML ? treeLogLkDelta : MEMinDelta;
6304 int i;
6305 *dMaxDelta = 0.0;
6306 int nNNIThisRound = 0;
6307
6308 if (NJ->nSeq <= 3)
6309 return(0); /* nothing to do */
6310 if (verbose > 2) {
6311 fprintf(stderr, "Beginning round %d of NNIs with ml? %d\n", iRound, useML?1:0);
6312 PrintNJInternal(/*WRITE*/stderr, NJ, /*useLen*/useML && iRound > 0 ? 1 : 0);
6313 }
6314 /* For each node the upProfile or NULL */
6315 profile_t **upProfiles = UpProfiles(NJ);
6316
6317 traversal_t traversal = InitTraversal(NJ);
6318
6319 /* Identify nodes we can skip traversing into */
6320 int node;
6321 if (fastNNI) {
6322 for (node = 0; node < NJ->maxnode; node++) {
6323 if (node != NJ->root
6324 && node >= NJ->nSeq
6325 && stats[node].age >= 2
6326 && stats[node].subtreeAge >= 2
6327 && stats[node].support > supportThreshold) {
6328 int nodeABCD[4];
6329 SetupABCD(NJ, node, NULL, NULL, /*OUT*/nodeABCD, useML);
6330 for (i = 0; i < 4; i++)
6331 if (stats[nodeABCD[i]].age == 0 && stats[nodeABCD[i]].support > supportThreshold)
6332 break;
6333 if (i == 4) {
6334 SkipTraversalInto(node, /*IN/OUT*/traversal);
6335 if (verbose > 2)
6336 fprintf(stderr, "Skipping subtree at %d: child %d %d parent %d age %d subtreeAge %d support %.3f\n",
6337 node, nodeABCD[0], nodeABCD[1], NJ->parent[node],
6338 stats[node].age, stats[node].subtreeAge, stats[node].support);
6339 }
6340 }
6341 }
6342 }
6343
6344 int iDone = 0;
6345 bool bUp;
6346 node = NJ->root;
6347 while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, &bUp)) >= 0) {
6348 if (node < NJ->nSeq || node == NJ->root)
6349 continue; /* nothing to do for leaves or root */
6350 if (bUp) {
6351 if(verbose > 2)
6352 fprintf(stderr, "Going up back to node %d\n", node);
6353 /* No longer needed */
6354 for (i = 0; i < NJ->child[node].nChild; i++)
6355 DeleteUpProfile(upProfiles, NJ, NJ->child[node].child[i]);
6356 DeleteUpProfile(upProfiles, NJ, node);
6357 RecomputeProfile(/*IN/OUT*/NJ, /*IN/OUT*/upProfiles, node, useML);
6358 continue;
6359 }
6360 if ((iDone % 100) == 0) {
6361 char buf[100];
6362 sprintf(buf, "%s NNI round %%d of %%d, %%d of %%d splits", useML ? "ML" : "ME");
6363 if (iDone > 0)
6364 sprintf(buf+strlen(buf), ", %d changes", nNNIThisRound);
6365 if (nNNIThisRound > 0)
6366 sprintf(buf+strlen(buf), " (max delta %.3f)", *dMaxDelta);
6367 ProgressReport(buf, iRound+1, nRounds, iDone+1, NJ->maxnode - NJ->nSeq);
6368 }
6369 iDone++;
6370
6371 profile_t *profiles[4];
6372 int nodeABCD[4];
6373 /* Note -- during the first round of ML NNIs, we use the min-evo-based branch lengths,
6374 which may be suboptimal */
6375 SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, useML);
6376
6377 /* Given our 4 profiles, consider doing a swap */
6378 int nodeA = nodeABCD[0];
6379 int nodeB = nodeABCD[1];
6380 int nodeC = nodeABCD[2];
6381 int nodeD = nodeABCD[3];
6382
6383 nni_t choice = ABvsCD;
6384
6385 if (verbose > 2)
6386 fprintf(stderr,"Considering NNI around %d: Swap A=%d B=%d C=%d D=up(%d) or parent %d\n",
6387 node, nodeA, nodeB, nodeC, nodeD, NJ->parent[node]);
6388 if (verbose > 3 && useML) {
6389 double len[5] = { NJ->branchlength[nodeA], NJ->branchlength[nodeB], NJ->branchlength[nodeC], NJ->branchlength[nodeD],
6390 NJ->branchlength[node] };
6391 for (i=0; i < 5; i++)
6392 if (len[i] < MLMinBranchLength)
6393 len[i] = MLMinBranchLength;
6394 fprintf(stderr, "Starting quartet likelihood %.3f len %.3f %.3f %.3f %.3f %.3f\n",
6395 MLQuartetLogLk(profiles[0],profiles[1],profiles[2],profiles[3],NJ->nPos,NJ->transmat,&NJ->rates,len, /*site_lk*/NULL),
6396 len[0], len[1], len[2], len[3], len[4]);
6397 }
6398
6399 numeric_t newlength[5];
6400 double criteria[3];
6401 if (useML) {
6402 for (i = 0; i < 4; i++)
6403 newlength[i] = NJ->branchlength[nodeABCD[i]];
6404 newlength[4] = NJ->branchlength[node];
6405 bool bFast = mlAccuracy < 2 && stats[node].age > 0;
6406 choice = MLQuartetNNI(profiles, NJ->transmat, &NJ->rates, NJ->nPos, NJ->nConstraints,
6407 /*OUT*/criteria, /*IN/OUT*/newlength, bFast);
6408 } else {
6409 choice = ChooseNNI(profiles, NJ->distance_matrix, NJ->nPos, NJ->nConstraints,
6410 /*OUT*/criteria);
6411 /* invert criteria so that higher is better, as in ML case, to simplify code below */
6412 for (i = 0; i < 3; i++)
6413 criteria[i] = -criteria[i];
6414 }
6415
6416 if (choice == ACvsBD) {
6417 /* swap B and C */
6418 ReplaceChild(/*IN/OUT*/NJ, node, nodeB, nodeC);
6419 ReplaceChild(/*IN/OUT*/NJ, NJ->parent[node], nodeC, nodeB);
6420 } else if (choice == ADvsBC) {
6421 /* swap A and C */
6422 ReplaceChild(/*IN/OUT*/NJ, node, nodeA, nodeC);
6423 ReplaceChild(/*IN/OUT*/NJ, NJ->parent[node], nodeC, nodeA);
6424 }
6425
6426 if (useML) {
6427 /* update branch length for the internal branch, and of any
6428 branches that lead to leaves, b/c those will not are not
6429 the internal branch for NNI and would not otherwise be set.
6430 */
6431 if (choice == ADvsBC) {
6432 /* For ADvsBC, MLQuartetNNI swaps B with D, but we swap A with C */
6433 double length2[5] = { newlength[LEN_C], newlength[LEN_D],
6434 newlength[LEN_A], newlength[LEN_B],
6435 newlength[LEN_I] };
6436 int i;
6437 for (i = 0; i < 5; i++) newlength[i] = length2[i];
6438 /* and swap A and C */
6439 double tmp = newlength[LEN_A];
6440 newlength[LEN_A] = newlength[LEN_C];
6441 newlength[LEN_C] = tmp;
6442 } else if (choice == ACvsBD) {
6443 /* swap B and C */
6444 double tmp = newlength[LEN_B];
6445 newlength[LEN_B] = newlength[LEN_C];
6446 newlength[LEN_C] = tmp;
6447 }
6448
6449 NJ->branchlength[node] = newlength[LEN_I];
6450 NJ->branchlength[nodeA] = newlength[LEN_A];
6451 NJ->branchlength[nodeB] = newlength[LEN_B];
6452 NJ->branchlength[nodeC] = newlength[LEN_C];
6453 NJ->branchlength[nodeD] = newlength[LEN_D];
6454 }
6455
6456 if (verbose>2 && (choice != ABvsCD || verbose > 2))
6457 fprintf(stderr,"NNI around %d: Swap A=%d B=%d C=%d D=out(C) -- choose %s %s %.4f\n",
6458 node, nodeA, nodeB, nodeC,
6459 choice == ACvsBD ? "AC|BD" : (choice == ABvsCD ? "AB|CD" : "AD|BC"),
6460 useML ? "delta-loglk" : "-deltaLen",
6461 criteria[choice] - criteria[ABvsCD]);
6462 if(verbose >= 3 && slow && useML)
6463 fprintf(stderr, "Old tree lk -- %.4f\n", TreeLogLk(NJ, /*site_likelihoods*/NULL));
6464
6465 /* update stats, *dMaxDelta, etc. */
6466 if (choice == ABvsCD) {
6467 stats[node].age++;
6468 } else {
6469 if (useML)
6470 nML_NNI++;
6471 else
6472 nNNI++;
6473 nNNIThisRound++;
6474 stats[node].age = 0;
6475 stats[nodeA].age = 0;
6476 stats[nodeB].age = 0;
6477 stats[nodeC].age = 0;
6478 stats[nodeD].age = 0;
6479 }
6480 stats[node].delta = criteria[choice] - criteria[ABvsCD]; /* 0 if ABvsCD */
6481 if (stats[node].delta > *dMaxDelta)
6482 *dMaxDelta = stats[node].delta;
6483
6484 /* support is improvement of score for self over better of alternatives */
6485 stats[node].support = 1e20;
6486 for (i = 0; i < 3; i++)
6487 if (choice != i && criteria[choice]-criteria[i] < stats[node].support)
6488 stats[node].support = criteria[choice]-criteria[i];
6489
6490 /* subtreeAge is the number of rounds since self or descendent had a significant improvement */
6491 if (stats[node].delta > supportThreshold)
6492 stats[node].subtreeAge = 0;
6493 else {
6494 stats[node].subtreeAge++;
6495 for (i = 0; i < 2; i++) {
6496 int child = NJ->child[node].child[i];
6497 if (stats[node].subtreeAge > stats[child].subtreeAge)
6498 stats[node].subtreeAge = stats[child].subtreeAge;
6499 }
6500 }
6501
6502 /* update profiles and free up unneeded up-profiles */
6503 if (choice == ABvsCD) {
6504 /* No longer needed */
6505 DeleteUpProfile(upProfiles, NJ, nodeA);
6506 DeleteUpProfile(upProfiles, NJ, nodeB);
6507 DeleteUpProfile(upProfiles, NJ, nodeC);
6508 RecomputeProfile(/*IN/OUT*/NJ, /*IN/OUT*/upProfiles, node, useML);
6509 if(slow && useML)
6510 UpdateForNNI(NJ, node, upProfiles, useML);
6511 } else {
6512 UpdateForNNI(NJ, node, upProfiles, useML);
6513 }
6514 if(verbose > 2 && slow && useML) {
6515 /* Note we recomputed profiles back up to root already if slow */
6516 PrintNJInternal(/*WRITE*/stderr, NJ, /*useLen*/true);
6517 fprintf(stderr, "New tree lk -- %.4f\n", TreeLogLk(NJ, /*site_likelihoods*/NULL));
6518 }
6519 } /* end postorder traversal */
6520 traversal = FreeTraversal(traversal,NJ);
6521 if (verbose>=2) {
6522 int nUp = 0;
6523 for (i = 0; i < NJ->maxnodes; i++)
6524 if (upProfiles[i] != NULL)
6525 nUp++;
6526 fprintf(stderr, "N up profiles at end of NNI: %d\n", nUp);
6527 }
6528 upProfiles = FreeUpProfiles(upProfiles,NJ);
6529 return(nNNIThisRound);
6530 }
6531
InitNNIStats(NJ_t * NJ)6532 nni_stats_t *InitNNIStats(NJ_t *NJ) {
6533 nni_stats_t *stats = mymalloc(sizeof(nni_stats_t)*NJ->maxnode);
6534 const int LargeAge = 1000000;
6535 int i;
6536 for (i = 0; i < NJ->maxnode; i++) {
6537 stats[i].delta = 0;
6538 stats[i].support = 0;
6539 if (i == NJ->root || i < NJ->nSeq) {
6540 stats[i].age = LargeAge;
6541 stats[i].subtreeAge = LargeAge;
6542 } else {
6543 stats[i].age = 0;
6544 stats[i].subtreeAge = 0;
6545 }
6546 }
6547 return(stats);
6548 }
6549
FreeNNIStats(nni_stats_t * stats,NJ_t * NJ)6550 nni_stats_t *FreeNNIStats(nni_stats_t *stats, NJ_t *NJ) {
6551 return(myfree(stats, sizeof(nni_stats_t)*NJ->maxnode));
6552 }
6553
FindSPRSteps(NJ_t * NJ,int nodeMove,int nodeAround,profile_t ** upProfiles,spr_step_t * steps,int maxSteps,bool bFirstAC)6554 int FindSPRSteps(/*IN/OUT*/NJ_t *NJ,
6555 int nodeMove, /* the node to move multiple times */
6556 int nodeAround, /* sibling or parent of node to NNI to start the chain */
6557 /*IN/OUT*/profile_t **upProfiles,
6558 /*OUT*/spr_step_t *steps,
6559 int maxSteps,
6560 bool bFirstAC) {
6561 int iStep;
6562 for (iStep = 0; iStep < maxSteps; iStep++) {
6563 if (NJ->child[nodeAround].nChild != 2)
6564 break; /* no further to go */
6565
6566 /* Consider the NNIs around nodeAround */
6567 profile_t *profiles[4];
6568 int nodeABCD[4];
6569 SetupABCD(NJ, nodeAround, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, /*useML*/false);
6570 double criteria[3];
6571 (void) ChooseNNI(profiles, NJ->distance_matrix, NJ->nPos, NJ->nConstraints,
6572 /*OUT*/criteria);
6573
6574 /* Do & save the swap */
6575 spr_step_t *step = &steps[iStep];
6576 if (iStep == 0 ? bFirstAC : criteria[ACvsBD] < criteria[ADvsBC]) {
6577 /* swap B & C to put AC together */
6578 step->deltaLength = criteria[ACvsBD] - criteria[ABvsCD];
6579 step->nodes[0] = nodeABCD[1];
6580 step->nodes[1] = nodeABCD[2];
6581 } else {
6582 /* swap AC to put AD together */
6583 step->deltaLength = criteria[ADvsBC] - criteria[ABvsCD];
6584 step->nodes[0] = nodeABCD[0];
6585 step->nodes[1] = nodeABCD[2];
6586 }
6587
6588 if (verbose>3) {
6589 fprintf(stderr, "SPR chain step %d for %d around %d swap %d %d deltaLen %.5f\n",
6590 iStep+1, nodeAround, nodeMove, step->nodes[0], step->nodes[1], step->deltaLength);
6591 if (verbose>4)
6592 PrintNJInternal(stderr, NJ, /*useLen*/false);
6593 }
6594 ReplaceChild(/*IN/OUT*/NJ, nodeAround, step->nodes[0], step->nodes[1]);
6595 ReplaceChild(/*IN/OUT*/NJ, NJ->parent[nodeAround], step->nodes[1], step->nodes[0]);
6596 UpdateForNNI(/*IN/OUT*/NJ, nodeAround, /*IN/OUT*/upProfiles, /*useML*/false);
6597
6598 /* set the new nodeAround -- either parent(nodeMove) or sibling(nodeMove) --
6599 so that it different from current nodeAround
6600 */
6601 int newAround[2] = { NJ->parent[nodeMove], Sibling(NJ, nodeMove) };
6602 if (NJ->parent[nodeMove] == NJ->root)
6603 RootSiblings(NJ, nodeMove, /*OUT*/newAround);
6604 assert(newAround[0] == nodeAround || newAround[1] == nodeAround);
6605 assert(newAround[0] != newAround[1]);
6606 nodeAround = newAround[newAround[0] == nodeAround ? 1 : 0];
6607 }
6608 return(iStep);
6609 }
6610
UnwindSPRStep(NJ_t * NJ,spr_step_t * step,profile_t ** upProfiles)6611 void UnwindSPRStep(/*IN/OUT*/NJ_t *NJ,
6612 /*IN*/spr_step_t *step,
6613 /*IN/OUT*/profile_t **upProfiles) {
6614 int parents[2];
6615 int i;
6616 for (i = 0; i < 2; i++) {
6617 assert(step->nodes[i] >= 0 && step->nodes[i] < NJ->maxnodes);
6618 parents[i] = NJ->parent[step->nodes[i]];
6619 assert(parents[i] >= 0);
6620 }
6621 assert(parents[0] != parents[1]);
6622 ReplaceChild(/*IN/OUT*/NJ, parents[0], step->nodes[0], step->nodes[1]);
6623 ReplaceChild(/*IN/OUT*/NJ, parents[1], step->nodes[1], step->nodes[0]);
6624 int iYounger = 0;
6625 if (NJ->parent[parents[0]] == parents[1]) {
6626 iYounger = 0;
6627 } else {
6628 assert(NJ->parent[parents[1]] == parents[0]);
6629 iYounger = 1;
6630 }
6631 UpdateForNNI(/*IN/OUT*/NJ, parents[iYounger], /*IN/OUT*/upProfiles, /*useML*/false);
6632 }
6633
6634 /* Update the profile of node and its ancestor, and delete nearby out-profiles */
UpdateForNNI(NJ_t * NJ,int node,profile_t ** upProfiles,bool useML)6635 void UpdateForNNI(/*IN/OUT*/NJ_t *NJ, int node, /*IN/OUT*/profile_t **upProfiles,
6636 bool useML) {
6637 int i;
6638 if (slow) {
6639 /* exhaustive update */
6640 for (i = 0; i < NJ->maxnodes; i++)
6641 DeleteUpProfile(upProfiles, NJ, i);
6642
6643 /* update profiles back to root */
6644 int ancestor;
6645 for (ancestor = node; ancestor >= 0; ancestor = NJ->parent[ancestor])
6646 RecomputeProfile(/*IN/OUT*/NJ, upProfiles, ancestor, useML);
6647
6648 /* remove any up-profiles made while doing that*/
6649 for (i = 0; i < NJ->maxnodes; i++)
6650 DeleteUpProfile(upProfiles, NJ, i);
6651 } else {
6652 /* if fast, only update around self
6653 note that upProfile(parent) is still OK after an NNI, but
6654 up-profiles of uncles may not be
6655 */
6656 DeleteUpProfile(upProfiles, NJ, node);
6657 for (i = 0; i < NJ->child[node].nChild; i++)
6658 DeleteUpProfile(upProfiles, NJ, NJ->child[node].child[i]);
6659 assert(node != NJ->root);
6660 int parent = NJ->parent[node];
6661 int neighbors[2] = { parent, Sibling(NJ, node) };
6662 if (parent == NJ->root)
6663 RootSiblings(NJ, node, /*OUT*/neighbors);
6664 DeleteUpProfile(upProfiles, NJ, neighbors[0]);
6665 DeleteUpProfile(upProfiles, NJ, neighbors[1]);
6666 int uncle = Sibling(NJ, parent);
6667 if (uncle >= 0)
6668 DeleteUpProfile(upProfiles, NJ, uncle);
6669 RecomputeProfile(/*IN/OUT*/NJ, upProfiles, node, useML);
6670 RecomputeProfile(/*IN/OUT*/NJ, upProfiles, parent, useML);
6671 }
6672 }
6673
SPR(NJ_t * NJ,int maxSPRLength,int iRound,int nRounds)6674 void SPR(/*IN/OUT*/NJ_t *NJ, int maxSPRLength, int iRound, int nRounds) {
6675 /* Given a non-root node N with children A,B, sibling C, and uncle D,
6676 we can try to move A by doing three types of moves (4 choices):
6677 "down" -- swap A with a child of B (if B is not a leaf) [2 choices]
6678 "over" -- swap B with C
6679 "up" -- swap A with D
6680 We follow down moves with down moves, over moves with down moves, and
6681 up moves with either up or over moves. (Other choices are just backing
6682 up and hence useless.)
6683
6684 As with NNIs, we keep track of up-profiles as we go. However, some of the regular
6685 profiles may also become "stale" so it is a bit trickier.
6686
6687 We store the traversal before we do SPRs to avoid any possible infinite loop
6688 */
6689 double last_tot_len = 0.0;
6690 if (NJ->nSeq <= 3 || maxSPRLength < 1)
6691 return;
6692 if (slow)
6693 last_tot_len = TreeLength(NJ, /*recomputeLengths*/true);
6694 int *nodeList = mymalloc(sizeof(int) * NJ->maxnodes);
6695 int nodeListLen = 0;
6696 traversal_t traversal = InitTraversal(NJ);
6697 int node = NJ->root;
6698 while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
6699 nodeList[nodeListLen++] = node;
6700 }
6701 assert(nodeListLen == NJ->maxnode);
6702 traversal = FreeTraversal(traversal,NJ);
6703
6704 profile_t **upProfiles = UpProfiles(NJ);
6705 spr_step_t *steps = mymalloc(sizeof(spr_step_t) * maxSPRLength); /* current chain of SPRs */
6706
6707 int i;
6708 for (i = 0; i < nodeListLen; i++) {
6709 node = nodeList[i];
6710 if ((i % 100) == 0)
6711 ProgressReport("SPR round %3d of %3d, %d of %d nodes",
6712 iRound+1, nRounds, i+1, nodeListLen);
6713 if (node == NJ->root)
6714 continue; /* nothing to do for root */
6715 /* The nodes to NNI around */
6716 int nodeAround[2] = { NJ->parent[node], Sibling(NJ, node) };
6717 if (NJ->parent[node] == NJ->root) {
6718 /* NNI around both siblings instead */
6719 RootSiblings(NJ, node, /*OUT*/nodeAround);
6720 }
6721 bool bChanged = false;
6722 int iAround;
6723 for (iAround = 0; iAround < 2 && bChanged == false; iAround++) {
6724 int ACFirst;
6725 for (ACFirst = 0; ACFirst < 2 && bChanged == false; ACFirst++) {
6726 if(verbose > 3)
6727 PrintNJInternal(stderr, NJ, /*useLen*/false);
6728 int chainLength = FindSPRSteps(/*IN/OUT*/NJ, node, nodeAround[iAround],
6729 upProfiles, /*OUT*/steps, maxSPRLength, (bool)ACFirst);
6730 double dMinDelta = 0.0;
6731 int iCBest = -1;
6732 double dTotDelta = 0.0;
6733 int iC;
6734 for (iC = 0; iC < chainLength; iC++) {
6735 dTotDelta += steps[iC].deltaLength;
6736 if (dTotDelta < dMinDelta) {
6737 dMinDelta = dTotDelta;
6738 iCBest = iC;
6739 }
6740 }
6741
6742 if (verbose>3) {
6743 fprintf(stderr, "SPR %s %d around %d chainLength %d of %d deltaLength %.5f swaps:",
6744 iCBest >= 0 ? "move" : "abandoned",
6745 node,nodeAround[iAround],iCBest+1,chainLength,dMinDelta);
6746 for (iC = 0; iC < chainLength; iC++)
6747 fprintf(stderr, " (%d,%d)%.4f", steps[iC].nodes[0], steps[iC].nodes[1], steps[iC].deltaLength);
6748 fprintf(stderr,"\n");
6749 }
6750 for (iC = chainLength - 1; iC > iCBest; iC--)
6751 UnwindSPRStep(/*IN/OUT*/NJ, /*IN*/&steps[iC], /*IN/OUT*/upProfiles);
6752 if(verbose > 3)
6753 PrintNJInternal(stderr, NJ, /*useLen*/false);
6754 while (slow && iCBest >= 0) {
6755 double expected_tot_len = last_tot_len + dMinDelta;
6756 double new_tot_len = TreeLength(NJ, /*recompute*/true);
6757 if (verbose > 2)
6758 fprintf(stderr, "Total branch-length is now %.4f was %.4f expected %.4f\n",
6759 new_tot_len, last_tot_len, expected_tot_len);
6760 if (new_tot_len < last_tot_len) {
6761 last_tot_len = new_tot_len;
6762 break; /* no rewinding necessary */
6763 }
6764 if (verbose > 2)
6765 fprintf(stderr, "Rewinding SPR to %d\n",iCBest);
6766 UnwindSPRStep(/*IN/OUT*/NJ, /*IN*/&steps[iCBest], /*IN/OUT*/upProfiles);
6767 dMinDelta -= steps[iCBest].deltaLength;
6768 iCBest--;
6769 }
6770 if (iCBest >= 0)
6771 bChanged = true;
6772 } /* loop over which step to take at 1st NNI */
6773 } /* loop over which node to pivot around */
6774
6775 if (bChanged) {
6776 nSPR++; /* the SPR move is OK */
6777 /* make sure all the profiles are OK */
6778 int j;
6779 for (j = 0; j < NJ->maxnodes; j++)
6780 DeleteUpProfile(upProfiles, NJ, j);
6781 int ancestor;
6782 for (ancestor = NJ->parent[node]; ancestor >= 0; ancestor = NJ->parent[ancestor])
6783 RecomputeProfile(/*IN/OUT*/NJ, upProfiles, ancestor, /*useML*/false);
6784 }
6785 } /* end loop over subtrees to prune & regraft */
6786 steps = myfree(steps, sizeof(spr_step_t) * maxSPRLength);
6787 upProfiles = FreeUpProfiles(upProfiles,NJ);
6788 nodeList = myfree(nodeList, sizeof(int) * NJ->maxnodes);
6789 }
6790
RecomputeProfile(NJ_t * NJ,profile_t ** upProfiles,int node,bool useML)6791 void RecomputeProfile(/*IN/OUT*/NJ_t *NJ, /*IN/OUT*/profile_t **upProfiles, int node,
6792 bool useML) {
6793 if (node < NJ->nSeq || node == NJ->root)
6794 return; /* no profile to compute */
6795 assert(NJ->child[node].nChild==2);
6796
6797 profile_t *profiles[4];
6798 double weight = 0.5;
6799 if (useML || !bionj) {
6800 profiles[0] = NJ->profiles[NJ->child[node].child[0]];
6801 profiles[1] = NJ->profiles[NJ->child[node].child[1]];
6802 } else {
6803 int nodeABCD[4];
6804 SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, useML);
6805 weight = QuartetWeight(profiles, NJ->distance_matrix, NJ->nPos);
6806 }
6807 if (verbose>3) {
6808 if (useML) {
6809 fprintf(stderr, "Recompute %d from %d %d lengths %.4f %.4f\n",
6810 node,
6811 NJ->child[node].child[0],
6812 NJ->child[node].child[1],
6813 NJ->branchlength[NJ->child[node].child[0]],
6814 NJ->branchlength[NJ->child[node].child[1]]);
6815 } else {
6816 fprintf(stderr, "Recompute %d from %d %d weight %.3f\n",
6817 node, NJ->child[node].child[0], NJ->child[node].child[1], weight);
6818 }
6819 }
6820 NJ->profiles[node] = FreeProfile(NJ->profiles[node], NJ->nPos, NJ->nConstraints);
6821 if (useML) {
6822 NJ->profiles[node] = PosteriorProfile(profiles[0], profiles[1],
6823 NJ->branchlength[NJ->child[node].child[0]],
6824 NJ->branchlength[NJ->child[node].child[1]],
6825 NJ->transmat, &NJ->rates, NJ->nPos, NJ->nConstraints);
6826 } else {
6827 NJ->profiles[node] = AverageProfile(profiles[0], profiles[1],
6828 NJ->nPos, NJ->nConstraints,
6829 NJ->distance_matrix, weight);
6830 }
6831 }
6832
6833 /* The BIONJ-like formula for the weight of A when building a profile for AB is
6834 1/2 + (avgD(B,CD) - avgD(A,CD))/(2*d(A,B))
6835 */
QuartetWeight(profile_t * profiles[4],distance_matrix_t * dmat,int nPos)6836 double QuartetWeight(profile_t *profiles[4], distance_matrix_t *dmat, int nPos) {
6837 if (!bionj)
6838 return(-1.0); /* even weighting */
6839 double d[6];
6840 CorrectedPairDistances(profiles, 4, dmat, nPos, /*OUT*/d);
6841 if (d[qAB] < 0.01)
6842 return -1.0;
6843 double weight = 0.5 + ((d[qBC]+d[qBD])-(d[qAC]+d[qAD]))/(4*d[qAB]);
6844 if (weight < 0)
6845 weight = 0;
6846 if (weight > 1)
6847 weight = 1;
6848 return (weight);
6849 }
6850
6851 /* Resets the children entry of parent and also the parent entry of newchild */
ReplaceChild(NJ_t * NJ,int parent,int oldchild,int newchild)6852 void ReplaceChild(/*IN/OUT*/NJ_t *NJ, int parent, int oldchild, int newchild) {
6853 NJ->parent[newchild] = parent;
6854
6855 int iChild;
6856 for (iChild = 0; iChild < NJ->child[parent].nChild; iChild++) {
6857 if (NJ->child[parent].child[iChild] == oldchild) {
6858 NJ->child[parent].child[iChild] = newchild;
6859 return;
6860 }
6861 }
6862 assert(0);
6863 }
6864
6865 /* Recomputes all branch lengths
6866
6867 For internal branches such as (A,B) vs. (C,D), uses the formula
6868
6869 length(AB|CD) = (d(A,C)+d(A,D)+d(B,C)+d(B,D))/4 - d(A,B)/2 - d(C,D)/2
6870
6871 (where all distances are profile distances - diameters).
6872
6873 For external branches (e.g. to leaves) A vs. (B,C), use the formula
6874
6875 length(A|BC) = (d(A,B)+d(A,C)-d(B,C))/2
6876 */
UpdateBranchLengths(NJ_t * NJ)6877 void UpdateBranchLengths(/*IN/OUT*/NJ_t *NJ) {
6878 if (NJ->nSeq < 2)
6879 return;
6880 else if (NJ->nSeq == 2) {
6881 int root = NJ->root;
6882 int nodeA = NJ->child[root].child[0];
6883 int nodeB = NJ->child[root].child[1];
6884 besthit_t h;
6885 ProfileDist(NJ->profiles[nodeA],NJ->profiles[nodeB],
6886 NJ->nPos, NJ->distance_matrix, /*OUT*/&h);
6887 if (logdist)
6888 h.dist = LogCorrect(h.dist);
6889 NJ->branchlength[nodeA] = h.dist/2.0;
6890 NJ->branchlength[nodeB] = h.dist/2.0;
6891 return;
6892 }
6893
6894 profile_t **upProfiles = UpProfiles(NJ);
6895 traversal_t traversal = InitTraversal(NJ);
6896 int node = NJ->root;
6897
6898 while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
6899 /* reset branch length of node (distance to its parent) */
6900 if (node == NJ->root)
6901 continue; /* no branch length to set */
6902 if (node < NJ->nSeq) { /* a leaf */
6903 profile_t *profileA = NJ->profiles[node];
6904 profile_t *profileB = NULL;
6905 profile_t *profileC = NULL;
6906
6907 int sib = Sibling(NJ,node);
6908 if (sib == -1) { /* at root, have 2 siblings */
6909 int sibs[2];
6910 RootSiblings(NJ, node, /*OUT*/sibs);
6911 profileB = NJ->profiles[sibs[0]];
6912 profileC = NJ->profiles[sibs[1]];
6913 } else {
6914 profileB = NJ->profiles[sib];
6915 profileC = GetUpProfile(/*IN/OUT*/upProfiles, NJ, NJ->parent[node], /*useML*/false);
6916 }
6917 profile_t *profiles[3] = {profileA,profileB,profileC};
6918 double d[3]; /*AB,AC,BC*/
6919 CorrectedPairDistances(profiles, 3, NJ->distance_matrix, NJ->nPos, /*OUT*/d);
6920 /* d(A,BC) = (dAB+dAC-dBC)/2 */
6921 NJ->branchlength[node] = (d[0]+d[1]-d[2])/2.0;
6922 } else {
6923 profile_t *profiles[4];
6924 int nodeABCD[4];
6925 SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, /*useML*/false);
6926 double d[6];
6927 CorrectedPairDistances(profiles, 4, NJ->distance_matrix, NJ->nPos, /*OUT*/d);
6928 NJ->branchlength[node] = (d[qAC]+d[qAD]+d[qBC]+d[qBD])/4.0 - (d[qAB]+d[qCD])/2.0;
6929
6930 /* no longer needed */
6931 DeleteUpProfile(upProfiles, NJ, nodeABCD[0]);
6932 DeleteUpProfile(upProfiles, NJ, nodeABCD[1]);
6933 }
6934 }
6935 traversal = FreeTraversal(traversal,NJ);
6936 upProfiles = FreeUpProfiles(upProfiles,NJ);
6937 }
6938
6939 /* Pick columns for resampling, stored as returned_vector[iBoot*nPos + j] */
ResampleColumns(int nPos,int nBootstrap)6940 int *ResampleColumns(int nPos, int nBootstrap) {
6941 long lPos = nPos; /* to prevent overflow on very long alignments when multiplying nPos * nBootstrap */
6942 int *col = (int*)mymalloc(sizeof(int)*lPos*(size_t)nBootstrap);
6943 int i;
6944 for (i = 0; i < nBootstrap; i++) {
6945 int j;
6946 for (j = 0; j < nPos; j++) {
6947 int pos = (int)(knuth_rand() * nPos);
6948 if (pos<0)
6949 pos = 0;
6950 else if (pos == nPos)
6951 pos = nPos-1;
6952 col[i*lPos + j] = pos;
6953 }
6954 }
6955 if (verbose > 5) {
6956 for (i=0; i < 3 && i < nBootstrap; i++) {
6957 fprintf(stderr,"Boot%d",i);
6958 int j;
6959 for (j = 0; j < nPos; j++) {
6960 fprintf(stderr,"\t%d",col[i*lPos+j]);
6961 }
6962 fprintf(stderr,"\n");
6963 }
6964 }
6965 return(col);
6966 }
6967
ReliabilityNJ(NJ_t * NJ,int nBootstrap)6968 void ReliabilityNJ(/*IN/OUT*/NJ_t *NJ, int nBootstrap) {
6969 /* For each non-root node N, with children A,B, parent P, sibling C, and grandparent G,
6970 we test the reliability of the split (A,B) versus rest by comparing the profiles
6971 of A, B, C, and the "up-profile" of P.
6972
6973 Each node's upProfile is the average of its sibling's (down)-profile + its parent's up-profile
6974 (If node's parent is the root, then there are two siblings and we don't need an up-profile)
6975
6976 To save memory, we do depth-first-search down from the root, and we only keep
6977 up-profiles for nodes in the active path.
6978 */
6979 if (NJ->nSeq <= 3 || nBootstrap <= 0)
6980 return; /* nothing to do */
6981 int *col = ResampleColumns(NJ->nPos, nBootstrap);
6982
6983 profile_t **upProfiles = UpProfiles(NJ);
6984 traversal_t traversal = InitTraversal(NJ);
6985 int node = NJ->root;
6986 int iNodesDone = 0;
6987 while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
6988 if (node < NJ->nSeq || node == NJ->root)
6989 continue; /* nothing to do for leaves or root */
6990
6991 if(iNodesDone > 0 && (iNodesDone % 100) == 0)
6992 ProgressReport("Local bootstrap for %6d of %6d internal splits", iNodesDone, NJ->nSeq-3, 0, 0);
6993 iNodesDone++;
6994
6995 profile_t *profiles[4];
6996 int nodeABCD[4];
6997 SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, /*useML*/false);
6998
6999 NJ->support[node] = SplitSupport(profiles[0], profiles[1], profiles[2], profiles[3],
7000 NJ->distance_matrix,
7001 NJ->nPos,
7002 nBootstrap,
7003 col);
7004
7005 /* no longer needed */
7006 DeleteUpProfile(upProfiles, NJ, nodeABCD[0]);
7007 DeleteUpProfile(upProfiles, NJ, nodeABCD[1]);
7008 DeleteUpProfile(upProfiles, NJ, nodeABCD[2]);
7009 }
7010 traversal = FreeTraversal(traversal,NJ);
7011 upProfiles = FreeUpProfiles(upProfiles,NJ);
7012 col = myfree(col, sizeof(int)*((size_t)NJ->nPos)*nBootstrap);
7013 }
7014
NewProfile(int nPos,int nConstraints)7015 profile_t *NewProfile(int nPos, int nConstraints) {
7016 profile_t *profile = (profile_t *)mymalloc(sizeof(profile_t));
7017 profile->weights = mymalloc(sizeof(numeric_t)*nPos);
7018 profile->codes = mymalloc(sizeof(unsigned char)*nPos);
7019 profile->vectors = NULL;
7020 profile->nVectors = 0;
7021 profile->codeDist = NULL;
7022 if (nConstraints == 0) {
7023 profile->nOn = NULL;
7024 profile->nOff = NULL;
7025 } else {
7026 profile->nOn = mymalloc(sizeof(int)*nConstraints);
7027 profile->nOff = mymalloc(sizeof(int)*nConstraints);
7028 }
7029 return(profile);
7030 }
7031
FreeProfile(profile_t * profile,int nPos,int nConstraints)7032 profile_t *FreeProfile(profile_t *profile, int nPos, int nConstraints) {
7033 if(profile==NULL) return(NULL);
7034 myfree(profile->codes, nPos);
7035 myfree(profile->weights, nPos);
7036 myfree(profile->vectors, sizeof(numeric_t)*nCodes*profile->nVectors);
7037 myfree(profile->codeDist, sizeof(numeric_t)*nCodes*nPos);
7038 if (nConstraints > 0) {
7039 myfree(profile->nOn, sizeof(int)*nConstraints);
7040 myfree(profile->nOff, sizeof(int)*nConstraints);
7041 }
7042 return(myfree(profile, sizeof(profile_t)));
7043 }
7044
SetupABCD(NJ_t * NJ,int node,profile_t * profiles[4],profile_t ** upProfiles,int nodeABCD[4],bool useML)7045 void SetupABCD(NJ_t *NJ, int node,
7046 /* the 4 profiles; the last one is an outprofile */
7047 /*OPTIONAL OUT*/profile_t *profiles[4],
7048 /*OPTIONAL IN/OUT*/profile_t **upProfiles,
7049 /*OUT*/int nodeABCD[4],
7050 bool useML) {
7051 int parent = NJ->parent[node];
7052 assert(parent >= 0);
7053 assert(NJ->child[node].nChild == 2);
7054 nodeABCD[0] = NJ->child[node].child[0]; /*A*/
7055 nodeABCD[1] = NJ->child[node].child[1]; /*B*/
7056
7057 profile_t *profile4 = NULL;
7058 if (parent == NJ->root) {
7059 int sibs[2];
7060 RootSiblings(NJ, node, /*OUT*/sibs);
7061 nodeABCD[2] = sibs[0];
7062 nodeABCD[3] = sibs[1];
7063 if (profiles == NULL)
7064 return;
7065 profile4 = NJ->profiles[sibs[1]];
7066 } else {
7067 nodeABCD[2] = Sibling(NJ,node);
7068 assert(nodeABCD[2] >= 0);
7069 nodeABCD[3] = parent;
7070 if (profiles == NULL)
7071 return;
7072 profile4 = GetUpProfile(upProfiles,NJ,parent,useML);
7073 }
7074 assert(upProfiles != NULL);
7075 int i;
7076 for (i = 0; i < 3; i++)
7077 profiles[i] = NJ->profiles[nodeABCD[i]];
7078 profiles[3] = profile4;
7079 }
7080
7081
Sibling(NJ_t * NJ,int node)7082 int Sibling(NJ_t *NJ, int node) {
7083 int parent = NJ->parent[node];
7084 if (parent < 0 || parent == NJ->root)
7085 return(-1);
7086 int iChild;
7087 for(iChild=0;iChild<NJ->child[parent].nChild;iChild++) {
7088 if(NJ->child[parent].child[iChild] != node)
7089 return (NJ->child[parent].child[iChild]);
7090 }
7091 assert(0);
7092 return(-1);
7093 }
7094
RootSiblings(NJ_t * NJ,int node,int sibs[2])7095 void RootSiblings(NJ_t *NJ, int node, /*OUT*/int sibs[2]) {
7096 assert(NJ->parent[node] == NJ->root);
7097 assert(NJ->child[NJ->root].nChild == 3);
7098
7099 int nSibs = 0;
7100 int iChild;
7101 for(iChild=0; iChild < NJ->child[NJ->root].nChild; iChild++) {
7102 int child = NJ->child[NJ->root].child[iChild];
7103 if (child != node) sibs[nSibs++] = child;
7104 }
7105 assert(nSibs==2);
7106 }
7107
TestSplitsML(NJ_t * NJ,SplitCount_t * splitcount,int nBootstrap)7108 void TestSplitsML(/*IN/OUT*/NJ_t *NJ, /*OUT*/SplitCount_t *splitcount, int nBootstrap) {
7109 const double tolerance = 1e-6;
7110 splitcount->nBadSplits = 0;
7111 splitcount->nConstraintViolations = 0;
7112 splitcount->nBadBoth = 0;
7113 splitcount->nSplits = 0;
7114 splitcount->dWorstDeltaUnconstrained = 0;
7115 splitcount->dWorstDeltaConstrained = 0;
7116
7117 profile_t **upProfiles = UpProfiles(NJ);
7118 traversal_t traversal = InitTraversal(NJ);
7119 int node = NJ->root;
7120
7121 int *col = nBootstrap > 0 ? ResampleColumns(NJ->nPos, nBootstrap) : NULL;
7122 double *site_likelihoods[3];
7123 int choice;
7124 for (choice = 0; choice < 3; choice++)
7125 site_likelihoods[choice] = mymalloc(sizeof(double)*NJ->nPos);
7126
7127 int iNodesDone = 0;
7128 while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
7129 if (node < NJ->nSeq || node == NJ->root)
7130 continue; /* nothing to do for leaves or root */
7131
7132 if(iNodesDone > 0 && (iNodesDone % 100) == 0)
7133 ProgressReport("ML split tests for %6d of %6d internal splits", iNodesDone, NJ->nSeq-3, 0, 0);
7134 iNodesDone++;
7135
7136 profile_t *profiles[4];
7137 int nodeABCD[4];
7138 SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, /*useML*/true);
7139 double loglk[3];
7140 double len[5];
7141 int i;
7142 for (i = 0; i < 4; i++)
7143 len[i] = NJ->branchlength[nodeABCD[i]];
7144 len[4] = NJ->branchlength[node];
7145 double lenABvsCD[5] = {len[LEN_A], len[LEN_B], len[LEN_C], len[LEN_D], len[LEN_I]};
7146 double lenACvsBD[5] = {len[LEN_A], len[LEN_C], len[LEN_B], len[LEN_D], len[LEN_I]}; /* Swap B & C */
7147 double lenADvsBC[5] = {len[LEN_A], len[LEN_D], len[LEN_C], len[LEN_B], len[LEN_I]}; /* Swap B & D */
7148
7149 {
7150 #ifdef OPENMP
7151 #pragma omp parallel
7152 #pragma omp sections
7153 #endif
7154 {
7155 #ifdef OPENMP
7156 #pragma omp section
7157 #endif
7158 {
7159 /* Lengths are already optimized for ABvsCD */
7160 loglk[ABvsCD] = MLQuartetLogLk(profiles[0], profiles[1], profiles[2], profiles[3],
7161 NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/lenABvsCD,
7162 /*OUT*/site_likelihoods[ABvsCD]);
7163 }
7164
7165 #ifdef OPENMP
7166 #pragma omp section
7167 #endif
7168 {
7169 loglk[ACvsBD] = MLQuartetOptimize(profiles[0], profiles[2], profiles[1], profiles[3],
7170 NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/lenACvsBD, /*pStarTest*/NULL,
7171 /*OUT*/site_likelihoods[ACvsBD]);
7172 }
7173
7174 #ifdef OPENMP
7175 #pragma omp section
7176 #endif
7177 {
7178 loglk[ADvsBC] = MLQuartetOptimize(profiles[0], profiles[3], profiles[2], profiles[1],
7179 NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/lenADvsBC, /*pStarTest*/NULL,
7180 /*OUT*/site_likelihoods[ADvsBC]);
7181 }
7182 }
7183 }
7184
7185 /* do a second pass on the better alternative if it is close */
7186 if (loglk[ACvsBD] > loglk[ADvsBC]) {
7187 if (mlAccuracy > 1 || loglk[ACvsBD] > loglk[ABvsCD] - closeLogLkLimit) {
7188 loglk[ACvsBD] = MLQuartetOptimize(profiles[0], profiles[2], profiles[1], profiles[3],
7189 NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/lenACvsBD, /*pStarTest*/NULL,
7190 /*OUT*/site_likelihoods[ACvsBD]);
7191 }
7192 } else {
7193 if (mlAccuracy > 1 || loglk[ADvsBC] > loglk[ABvsCD] - closeLogLkLimit) {
7194 loglk[ADvsBC] = MLQuartetOptimize(profiles[0], profiles[3], profiles[2], profiles[1],
7195 NJ->nPos, NJ->transmat, &NJ->rates, /*IN/OUT*/lenADvsBC, /*pStarTest*/NULL,
7196 /*OUT*/site_likelihoods[ADvsBC]);
7197 }
7198 }
7199
7200 if (loglk[ABvsCD] >= loglk[ACvsBD] && loglk[ABvsCD] >= loglk[ADvsBC])
7201 choice = ABvsCD;
7202 else if (loglk[ACvsBD] >= loglk[ABvsCD] && loglk[ACvsBD] >= loglk[ADvsBC])
7203 choice = ACvsBD;
7204 else
7205 choice = ADvsBC;
7206 bool badSplit = loglk[choice] > loglk[ABvsCD] + treeLogLkDelta; /* ignore small changes in likelihood */
7207
7208 /* constraint penalties, indexed by nni_t (lower is better) */
7209 double p[3];
7210 QuartetConstraintPenalties(profiles, NJ->nConstraints, /*OUT*/p);
7211 bool bBadConstr = p[ABvsCD] > p[ACvsBD] + tolerance || p[ABvsCD] > p[ADvsBC] + tolerance;
7212 bool violateConstraint = false;
7213 int iC;
7214 for (iC=0; iC < NJ->nConstraints; iC++) {
7215 if (SplitViolatesConstraint(profiles, iC)) {
7216 violateConstraint = true;
7217 break;
7218 }
7219 }
7220 splitcount->nSplits++;
7221 if (violateConstraint)
7222 splitcount->nConstraintViolations++;
7223 if (badSplit)
7224 splitcount->nBadSplits++;
7225 if (badSplit && bBadConstr)
7226 splitcount->nBadBoth++;
7227 if (badSplit) {
7228 double delta = loglk[choice] - loglk[ABvsCD];
7229 /* If ABvsCD is favored over the more likely NNI by constraints,
7230 then this is probably a bad split because of the constraint */
7231 if (p[choice] > p[ABvsCD] + tolerance)
7232 splitcount->dWorstDeltaConstrained = MAX(delta, splitcount->dWorstDeltaConstrained);
7233 else
7234 splitcount->dWorstDeltaUnconstrained = MAX(delta, splitcount->dWorstDeltaUnconstrained);
7235 }
7236 if (nBootstrap>0)
7237 NJ->support[node] = badSplit ? 0.0 : SHSupport(NJ->nPos, nBootstrap, col, loglk, site_likelihoods);
7238
7239 /* No longer needed */
7240 DeleteUpProfile(upProfiles, NJ, nodeABCD[0]);
7241 DeleteUpProfile(upProfiles, NJ, nodeABCD[1]);
7242 DeleteUpProfile(upProfiles, NJ, nodeABCD[2]);
7243 }
7244 traversal = FreeTraversal(traversal,NJ);
7245 upProfiles = FreeUpProfiles(upProfiles,NJ);
7246 if (nBootstrap>0)
7247 col = myfree(col, sizeof(int)*((size_t)NJ->nPos)*nBootstrap);
7248 for (choice = 0; choice < 3; choice++)
7249 site_likelihoods[choice] = myfree(site_likelihoods[choice], sizeof(double)*NJ->nPos);
7250 }
7251
7252
TestSplitsMinEvo(NJ_t * NJ,SplitCount_t * splitcount)7253 void TestSplitsMinEvo(NJ_t *NJ, /*OUT*/SplitCount_t *splitcount) {
7254 const double tolerance = 1e-6;
7255 splitcount->nBadSplits = 0;
7256 splitcount->nConstraintViolations = 0;
7257 splitcount->nBadBoth = 0;
7258 splitcount->nSplits = 0;
7259 splitcount->dWorstDeltaUnconstrained = 0.0;
7260 splitcount->dWorstDeltaConstrained = 0.0;
7261
7262 profile_t **upProfiles = UpProfiles(NJ);
7263 traversal_t traversal = InitTraversal(NJ);
7264 int node = NJ->root;
7265
7266 while((node = TraversePostorder(node, NJ, /*IN/OUT*/traversal, /*pUp*/NULL)) >= 0) {
7267 if (node < NJ->nSeq || node == NJ->root)
7268 continue; /* nothing to do for leaves or root */
7269
7270 profile_t *profiles[4];
7271 int nodeABCD[4];
7272 SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, /*useML*/false);
7273
7274 if (verbose>2)
7275 fprintf(stderr,"Testing Split around %d: A=%d B=%d C=%d D=up(%d) or node parent %d\n",
7276 node, nodeABCD[0], nodeABCD[1], nodeABCD[2], nodeABCD[3], NJ->parent[node]);
7277
7278 double d[6]; /* distances, perhaps log-corrected distances, no constraint penalties */
7279 CorrectedPairDistances(profiles, 4, NJ->distance_matrix, NJ->nPos, /*OUT*/d);
7280
7281 /* alignment-based scores for each split (lower is better) */
7282 double sABvsCD = d[qAB] + d[qCD];
7283 double sACvsBD = d[qAC] + d[qBD];
7284 double sADvsBC = d[qAD] + d[qBC];
7285
7286 /* constraint penalties, indexed by nni_t (lower is better) */
7287 double p[3];
7288 QuartetConstraintPenalties(profiles, NJ->nConstraints, /*OUT*/p);
7289
7290 int nConstraintsViolated = 0;
7291 int iC;
7292 for (iC=0; iC < NJ->nConstraints; iC++) {
7293 if (SplitViolatesConstraint(profiles, iC)) {
7294 nConstraintsViolated++;
7295 if (verbose > 2) {
7296 double penalty[3] = {0.0,0.0,0.0};
7297 (void)QuartetConstraintPenaltiesPiece(profiles, iC, /*OUT*/penalty);
7298 fprintf(stderr, "Violate constraint %d at %d (children %d %d) penalties %.3f %.3f %.3f %d/%d %d/%d %d/%d %d/%d\n",
7299 iC, node, NJ->child[node].child[0], NJ->child[node].child[1],
7300 penalty[ABvsCD], penalty[ACvsBD], penalty[ADvsBC],
7301 profiles[0]->nOn[iC], profiles[0]->nOff[iC],
7302 profiles[1]->nOn[iC], profiles[1]->nOff[iC],
7303 profiles[2]->nOn[iC], profiles[2]->nOff[iC],
7304 profiles[3]->nOn[iC], profiles[3]->nOff[iC]);
7305 }
7306 }
7307 }
7308
7309 double delta = sABvsCD - MIN(sACvsBD,sADvsBC);
7310 bool bBadDist = delta > tolerance;
7311 bool bBadConstr = p[ABvsCD] > p[ACvsBD] + tolerance || p[ABvsCD] > p[ADvsBC] + tolerance;
7312
7313 splitcount->nSplits++;
7314 if (bBadDist) {
7315 nni_t choice = sACvsBD < sADvsBC ? ACvsBD : ADvsBC;
7316 /* If ABvsCD is favored over the shorter NNI by constraints,
7317 then this is probably a bad split because of the constraint */
7318 if (p[choice] > p[ABvsCD] + tolerance)
7319 splitcount->dWorstDeltaConstrained = MAX(delta, splitcount->dWorstDeltaConstrained);
7320 else
7321 splitcount->dWorstDeltaUnconstrained = MAX(delta, splitcount->dWorstDeltaUnconstrained);
7322 }
7323
7324 if (nConstraintsViolated > 0)
7325 splitcount->nConstraintViolations++; /* count splits with any violations, not #constraints in a splits */
7326 if (bBadDist)
7327 splitcount->nBadSplits++;
7328 if (bBadDist && bBadConstr)
7329 splitcount->nBadBoth++;
7330 if (bBadConstr && verbose > 2) {
7331 /* Which NNI would be better */
7332 double dist_advantage = 0;
7333 double constraint_penalty = 0;
7334 if (p[ACvsBD] < p[ADvsBC]) {
7335 dist_advantage = sACvsBD - sABvsCD;
7336 constraint_penalty = p[ABvsCD] - p[ACvsBD];
7337 } else {
7338 dist_advantage = sADvsBC - sABvsCD;
7339 constraint_penalty = p[ABvsCD] - p[ADvsBC];
7340 }
7341 fprintf(stderr, "Violate constraints %d distance_advantage %.3f constraint_penalty %.3f (children %d %d):",
7342 node, dist_advantage, constraint_penalty,
7343 NJ->child[node].child[0], NJ->child[node].child[1]);
7344 /* list the constraints with a penalty, meaning that ABCD all have non-zero
7345 values and that AB|CD worse than others */
7346 for (iC = 0; iC < NJ->nConstraints; iC++) {
7347 double ppart[6];
7348 if (QuartetConstraintPenaltiesPiece(profiles, iC, /*OUT*/ppart)) {
7349 if (ppart[qAB] + ppart[qCD] > ppart[qAD] + ppart[qBC] + tolerance
7350 || ppart[qAB] + ppart[qCD] > ppart[qAC] + ppart[qBD] + tolerance)
7351 fprintf(stderr, " %d (%d/%d %d/%d %d/%d %d/%d)", iC,
7352 profiles[0]->nOn[iC], profiles[0]->nOff[iC],
7353 profiles[1]->nOn[iC], profiles[1]->nOff[iC],
7354 profiles[2]->nOn[iC], profiles[2]->nOff[iC],
7355 profiles[3]->nOn[iC], profiles[3]->nOff[iC]);
7356 }
7357 }
7358 fprintf(stderr, "\n");
7359 }
7360
7361 /* no longer needed */
7362 DeleteUpProfile(upProfiles, NJ, nodeABCD[0]);
7363 DeleteUpProfile(upProfiles, NJ, nodeABCD[1]);
7364 }
7365 traversal = FreeTraversal(traversal,NJ);
7366 upProfiles = FreeUpProfiles(upProfiles,NJ);
7367 }
7368
7369 /* Computes support for (A,B),(C,D) compared to that for (A,C),(B,D) and (A,D),(B,C) */
SplitSupport(profile_t * pA,profile_t * pB,profile_t * pC,profile_t * pD,distance_matrix_t * dmat,int nPos,int nBootstrap,int * col)7370 double SplitSupport(profile_t *pA, profile_t *pB, profile_t *pC, profile_t *pD,
7371 /*OPTIONAL*/distance_matrix_t *dmat,
7372 int nPos,
7373 int nBootstrap,
7374 int *col) {
7375 int i,j;
7376 long lPos = nPos; /* to avoid overflow when multiplying */
7377
7378 /* Note distpieces are weighted */
7379 double *distpieces[6];
7380 double *weights[6];
7381 for (j = 0; j < 6; j++) {
7382 distpieces[j] = (double*)mymalloc(sizeof(double)*nPos);
7383 weights[j] = (double*)mymalloc(sizeof(double)*nPos);
7384 }
7385
7386 int iFreqA = 0;
7387 int iFreqB = 0;
7388 int iFreqC = 0;
7389 int iFreqD = 0;
7390 for (i = 0; i < nPos; i++) {
7391 numeric_t *fA = GET_FREQ(pA, i, /*IN/OUT*/iFreqA);
7392 numeric_t *fB = GET_FREQ(pB, i, /*IN/OUT*/iFreqB);
7393 numeric_t *fC = GET_FREQ(pC, i, /*IN/OUT*/iFreqC);
7394 numeric_t *fD = GET_FREQ(pD, i, /*IN/OUT*/iFreqD);
7395
7396 weights[qAB][i] = pA->weights[i] * pB->weights[i];
7397 weights[qAC][i] = pA->weights[i] * pC->weights[i];
7398 weights[qAD][i] = pA->weights[i] * pD->weights[i];
7399 weights[qBC][i] = pB->weights[i] * pC->weights[i];
7400 weights[qBD][i] = pB->weights[i] * pD->weights[i];
7401 weights[qCD][i] = pC->weights[i] * pD->weights[i];
7402
7403 distpieces[qAB][i] = weights[qAB][i] * ProfileDistPiece(pA->codes[i], pB->codes[i], fA, fB, dmat, NULL);
7404 distpieces[qAC][i] = weights[qAC][i] * ProfileDistPiece(pA->codes[i], pC->codes[i], fA, fC, dmat, NULL);
7405 distpieces[qAD][i] = weights[qAD][i] * ProfileDistPiece(pA->codes[i], pD->codes[i], fA, fD, dmat, NULL);
7406 distpieces[qBC][i] = weights[qBC][i] * ProfileDistPiece(pB->codes[i], pC->codes[i], fB, fC, dmat, NULL);
7407 distpieces[qBD][i] = weights[qBD][i] * ProfileDistPiece(pB->codes[i], pD->codes[i], fB, fD, dmat, NULL);
7408 distpieces[qCD][i] = weights[qCD][i] * ProfileDistPiece(pC->codes[i], pD->codes[i], fC, fD, dmat, NULL);
7409 }
7410 assert(iFreqA == pA->nVectors);
7411 assert(iFreqB == pB->nVectors);
7412 assert(iFreqC == pC->nVectors);
7413 assert(iFreqD == pD->nVectors);
7414
7415 double totpieces[6];
7416 double totweights[6];
7417 double dists[6];
7418 for (j = 0; j < 6; j++) {
7419 totpieces[j] = 0.0;
7420 totweights[j] = 0.0;
7421 for (i = 0; i < nPos; i++) {
7422 totpieces[j] += distpieces[j][i];
7423 totweights[j] += weights[j][i];
7424 }
7425 dists[j] = totweights[j] > 0.01 ? totpieces[j]/totweights[j] : 3.0;
7426 if (logdist)
7427 dists[j] = LogCorrect(dists[j]);
7428 }
7429
7430 /* Support1 = Support(AB|CD over AC|BD) = d(A,C)+d(B,D)-d(A,B)-d(C,D)
7431 Support2 = Support(AB|CD over AD|BC) = d(A,D)+d(B,C)-d(A,B)-d(C,D)
7432 */
7433 double support1 = dists[qAC] + dists[qBD] - dists[qAB] - dists[qCD];
7434 double support2 = dists[qAD] + dists[qBC] - dists[qAB] - dists[qCD];
7435
7436 if (support1 < 0 || support2 < 0) {
7437 nSuboptimalSplits++; /* Another split seems superior */
7438 }
7439
7440 assert(nBootstrap > 0);
7441 int nSupport = 0;
7442
7443 int iBoot;
7444 for (iBoot=0;iBoot<nBootstrap;iBoot++) {
7445 int *colw = &col[lPos*iBoot];
7446
7447 for (j = 0; j < 6; j++) {
7448 double totp = 0;
7449 double totw = 0;
7450 double *d = distpieces[j];
7451 double *w = weights[j];
7452 for (i=0; i<nPos; i++) {
7453 int c = colw[i];
7454 totp += d[c];
7455 totw += w[c];
7456 }
7457 dists[j] = totw > 0.01 ? totp/totw : 3.0;
7458 if (logdist)
7459 dists[j] = LogCorrect(dists[j]);
7460 }
7461 support1 = dists[qAC] + dists[qBD] - dists[qAB] - dists[qCD];
7462 support2 = dists[qAD] + dists[qBC] - dists[qAB] - dists[qCD];
7463 if (support1 > 0 && support2 > 0)
7464 nSupport++;
7465 } /* end loop over bootstrap replicates */
7466
7467 for (j = 0; j < 6; j++) {
7468 distpieces[j] = myfree(distpieces[j], sizeof(double)*nPos);
7469 weights[j] = myfree(weights[j], sizeof(double)*nPos);
7470 }
7471 return( nSupport/(double)nBootstrap );
7472 }
7473
SHSupport(int nPos,int nBootstrap,int * col,double loglk[3],double * site_likelihoods[3])7474 double SHSupport(int nPos, int nBootstrap, int *col, double loglk[3], double *site_likelihoods[3]) {
7475 long lPos = nPos; /* to avoid overflow when multiplying */
7476 assert(nBootstrap>0);
7477 double delta1 = loglk[0]-loglk[1];
7478 double delta2 = loglk[0]-loglk[2];
7479 double delta = delta1 < delta2 ? delta1 : delta2;
7480
7481 double *siteloglk[3];
7482 int i,j;
7483 for (i = 0; i < 3; i++) {
7484 siteloglk[i] = mymalloc(sizeof(double)*nPos);
7485 for (j = 0; j < nPos; j++)
7486 siteloglk[i][j] = log(site_likelihoods[i][j]);
7487 }
7488
7489 int nSupport = 0;
7490 int iBoot;
7491 for (iBoot = 0; iBoot < nBootstrap; iBoot++) {
7492 double resampled[3];
7493 for (i = 0; i < 3; i++)
7494 resampled[i] = -loglk[i];
7495 for (j = 0; j < nPos; j++) {
7496 int pos = col[iBoot*lPos+j];
7497 for (i = 0; i < 3; i++)
7498 resampled[i] += siteloglk[i][pos];
7499 }
7500 int iBest = 0;
7501 for (i = 1; i < 3; i++)
7502 if (resampled[i] > resampled[iBest])
7503 iBest = i;
7504 double resample1 = resampled[iBest] - resampled[(iBest+1)%3];
7505 double resample2 = resampled[iBest] - resampled[(iBest+2)%3];
7506 double resampleDelta = resample1 < resample2 ? resample1 : resample2;
7507 if (resampleDelta < delta)
7508 nSupport++;
7509 }
7510 for (i=0;i<3;i++)
7511 siteloglk[i] = myfree(siteloglk[i], sizeof(double)*nPos);
7512 return(nSupport/(double)nBootstrap);
7513 }
7514
7515
SetDistCriterion(NJ_t * NJ,int nActive,besthit_t * hit)7516 void SetDistCriterion(/*IN/OUT*/NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *hit) {
7517 if (hit->i < NJ->nSeq && hit->j < NJ->nSeq) {
7518 SeqDist(NJ->profiles[hit->i]->codes,
7519 NJ->profiles[hit->j]->codes,
7520 NJ->nPos, NJ->distance_matrix, /*OUT*/hit);
7521 } else {
7522 ProfileDist(NJ->profiles[hit->i],
7523 NJ->profiles[hit->j],
7524 NJ->nPos, NJ->distance_matrix, /*OUT*/hit);
7525 hit->dist -= (NJ->diameter[hit->i] + NJ->diameter[hit->j]);
7526 }
7527 hit->dist += constraintWeight
7528 * (double)JoinConstraintPenalty(NJ, hit->i, hit->j);
7529 SetCriterion(NJ,nActive,/*IN/OUT*/hit);
7530 }
7531
SetCriterion(NJ_t * NJ,int nActive,besthit_t * join)7532 void SetCriterion(/*IN/UPDATE*/NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *join) {
7533 if(join->i < 0
7534 || join->j < 0
7535 || NJ->parent[join->i] >= 0
7536 || NJ->parent[join->j] >= 0)
7537 return;
7538 assert(NJ->nOutDistActive[join->i] >= nActive);
7539 assert(NJ->nOutDistActive[join->j] >= nActive);
7540
7541 int nDiffAllow = tophitsMult > 0 ? (int)(nActive*staleOutLimit) : 0;
7542 if (NJ->nOutDistActive[join->i] - nActive > nDiffAllow)
7543 SetOutDistance(NJ, join->i, nActive);
7544 if (NJ->nOutDistActive[join->j] - nActive > nDiffAllow)
7545 SetOutDistance(NJ, join->j, nActive);
7546 double outI = NJ->outDistances[join->i];
7547 if (NJ->nOutDistActive[join->i] != nActive)
7548 outI *= (nActive-1)/(double)(NJ->nOutDistActive[join->i]-1);
7549 double outJ = NJ->outDistances[join->j];
7550 if (NJ->nOutDistActive[join->j] != nActive)
7551 outJ *= (nActive-1)/(double)(NJ->nOutDistActive[join->j]-1);
7552 join->criterion = join->dist - (outI+outJ)/(double)(nActive-2);
7553 if (verbose > 2 && nActive <= 5) {
7554 fprintf(stderr, "Set Criterion to join %d %d with nActive=%d dist+penalty %.3f criterion %.3f\n",
7555 join->i, join->j, nActive, join->dist, join->criterion);
7556 }
7557 }
7558
SetOutDistance(NJ_t * NJ,int iNode,int nActive)7559 void SetOutDistance(NJ_t *NJ, int iNode, int nActive) {
7560 if (NJ->nOutDistActive[iNode] == nActive)
7561 return;
7562
7563 /* May be called by InitNJ before we have parents */
7564 assert(iNode>=0 && (NJ->parent == NULL || NJ->parent[iNode]<0));
7565 besthit_t dist;
7566 ProfileDist(NJ->profiles[iNode], NJ->outprofile, NJ->nPos, NJ->distance_matrix, &dist);
7567 outprofileOps++;
7568
7569 /* out(A) = sum(X!=A) d(A,X)
7570 = sum(X!=A) (profiledist(A,X) - diam(A) - diam(X))
7571 = sum(X!=A) profiledist(A,X) - (N-1)*diam(A) - (totdiam - diam(A))
7572
7573 in the absence of gaps:
7574 profiledist(A,out) = mean profiledist(A, all active nodes)
7575 sum(X!=A) profiledist(A,X) = N * profiledist(A,out) - profiledist(A,A)
7576
7577 With gaps, we need to take the weights of the comparisons into account, where
7578 w(Ai) is the weight of position i in profile A:
7579 w(A,B) = sum_i w(Ai) * w(Bi)
7580 d(A,B) = sum_i w(Ai) * w(Bi) * d(Ai,Bi) / w(A,B)
7581
7582 sum(X!=A) profiledist(A,X) ~= (N-1) * profiledist(A, Out w/o A)
7583 profiledist(A, Out w/o A) = sum_X!=A sum_i d(Ai,Xi) * w(Ai) * w(Bi) / ( sum_X!=A sum_i w(Ai) * w(Bi) )
7584 d(A, Out) = sum_A sum_i d(Ai,Xi) * w(Ai) * w(Bi) / ( sum_X sum_i w(Ai) * w(Bi) )
7585
7586 and so we get
7587 profiledist(A,out w/o A) = (top of d(A,Out) - top of d(A,A)) / (weight of d(A,Out) - weight of d(A,A))
7588 top = dist * weight
7589 with another correction of nActive because the weight of the out-profile is the average
7590 weight not the total weight.
7591 */
7592 double top = (nActive-1)
7593 * (dist.dist * dist.weight * nActive - NJ->selfweight[iNode] * NJ->selfdist[iNode]);
7594 double bottom = (dist.weight * nActive - NJ->selfweight[iNode]);
7595 double pdistOutWithoutA = top/bottom;
7596 NJ->outDistances[iNode] = bottom > 0.01 ?
7597 pdistOutWithoutA - NJ->diameter[iNode] * (nActive-1) - (NJ->totdiam - NJ->diameter[iNode])
7598 : 3.0;
7599 NJ->nOutDistActive[iNode] = nActive;
7600
7601 if(verbose>3 && iNode < 5)
7602 fprintf(stderr,"NewOutDist for %d %f from dist %f selfd %f diam %f totdiam %f newActive %d\n",
7603 iNode, NJ->outDistances[iNode], dist.dist, NJ->selfdist[iNode], NJ->diameter[iNode],
7604 NJ->totdiam, nActive);
7605 if (verbose>6 && (iNode % 10) == 0) {
7606 /* Compute the actual out-distance and compare */
7607 double total = 0.0;
7608 double total_pd = 0.0;
7609 int j;
7610 for (j=0;j<NJ->maxnode;j++) {
7611 if (j!=iNode && (NJ->parent==NULL || NJ->parent[j]<0)) {
7612 besthit_t bh;
7613 ProfileDist(NJ->profiles[iNode], NJ->profiles[j], NJ->nPos, NJ->distance_matrix, /*OUT*/&bh);
7614 total_pd += bh.dist;
7615 total += bh.dist - (NJ->diameter[iNode] + NJ->diameter[j]);
7616 }
7617 }
7618 fprintf(stderr,"OutDist for Node %d %f truth %f profiled %f truth %f pd_err %f\n",
7619 iNode, NJ->outDistances[iNode], total, pdistOutWithoutA, total_pd,fabs(pdistOutWithoutA-total_pd));
7620 }
7621 }
7622
FreeTopHits(top_hits_t * tophits)7623 top_hits_t *FreeTopHits(top_hits_t *tophits) {
7624 if (tophits == NULL)
7625 return(NULL);
7626 int iNode;
7627 for (iNode = 0; iNode < tophits->maxnodes; iNode++) {
7628 top_hits_list_t *l = &tophits->top_hits_lists[iNode];
7629 if (l->hits != NULL)
7630 l->hits = myfree(l->hits, sizeof(hit_t) * l->nHits);
7631 }
7632 tophits->top_hits_lists = myfree(tophits->top_hits_lists, sizeof(top_hits_list_t) * tophits->maxnodes);
7633 tophits->visible = myfree(tophits->visible, sizeof(hit_t*) * tophits->maxnodes);
7634 tophits->topvisible = myfree(tophits->topvisible, sizeof(int) * tophits->nTopVisible);
7635 #ifdef OPENMP
7636 for (iNode = 0; iNode < tophits->maxnodes; iNode++)
7637 omp_destroy_lock(&tophits->locks[iNode]);
7638 tophits->locks = myfree(tophits->locks, sizeof(omp_lock_t) * tophits->maxnodes);
7639 #endif
7640 return(myfree(tophits, sizeof(top_hits_t)));
7641 }
7642
InitTopHits(NJ_t * NJ,int m)7643 top_hits_t *InitTopHits(NJ_t *NJ, int m) {
7644 int iNode;
7645 assert(m > 0);
7646 top_hits_t *tophits = mymalloc(sizeof(top_hits_t));
7647 tophits->m = m;
7648 tophits->q = (int)(0.5 + tophits2Mult * sqrt(tophits->m));
7649 if (!useTopHits2nd || tophits->q >= tophits->m)
7650 tophits->q = 0;
7651 tophits->maxnodes = NJ->maxnodes;
7652 tophits->top_hits_lists = mymalloc(sizeof(top_hits_list_t) * tophits->maxnodes);
7653 tophits->visible = mymalloc(sizeof(hit_t) * tophits->maxnodes);
7654 tophits->nTopVisible = (int)(0.5 + topvisibleMult*m);
7655 tophits->topvisible = mymalloc(sizeof(int) * tophits->nTopVisible);
7656 #ifdef OPENMP
7657 tophits->locks = mymalloc(sizeof(omp_lock_t) * tophits->maxnodes);
7658 for (iNode = 0; iNode < tophits->maxnodes; iNode++)
7659 omp_init_lock(&tophits->locks[iNode]);
7660 #endif
7661 int i;
7662 for (i = 0; i < tophits->nTopVisible; i++)
7663 tophits->topvisible[i] = -1; /* empty */
7664 tophits->topvisibleAge = 0;
7665
7666 for (iNode = 0; iNode < tophits->maxnodes; iNode++) {
7667 top_hits_list_t *l = &tophits->top_hits_lists[iNode];
7668 l->nHits = 0;
7669 l->hits = NULL;
7670 l->hitSource = -1;
7671 l->age = 0;
7672 hit_t *v = &tophits->visible[iNode];
7673 v->j = -1;
7674 v->dist = 1e20;
7675 }
7676 return(tophits);
7677 }
7678
7679 /* Helper function for sorting in SetAllLeafTopHits,
7680 and the global variables it needs
7681 */
7682 NJ_t *CompareSeedNJ = NULL;
7683 int *CompareSeedGaps = NULL;
CompareSeeds(const void * c1,const void * c2)7684 int CompareSeeds(const void *c1, const void *c2) {
7685 int seed1 = *(int *)c1;
7686 int seed2 = *(int *)c2;
7687 int gapdiff = CompareSeedGaps[seed1] - CompareSeedGaps[seed2];
7688 if (gapdiff != 0) return(gapdiff); /* fewer gaps is better */
7689 double outdiff = CompareSeedNJ->outDistances[seed1] - CompareSeedNJ->outDistances[seed2];
7690 if(outdiff < 0) return(-1); /* closer to more nodes is better */
7691 if(outdiff > 0) return(1);
7692 return(0);
7693 }
7694
7695 /* Using the seed heuristic and the close global variable */
SetAllLeafTopHits(NJ_t * NJ,top_hits_t * tophits)7696 void SetAllLeafTopHits(/*IN/UPDATE*/NJ_t *NJ, /*IN/OUT*/top_hits_t *tophits) {
7697 double close = tophitsClose;
7698 if (close < 0) {
7699 if (fastest && NJ->nSeq >= 50000) {
7700 close = 0.99;
7701 } else {
7702 double logN = log((double)NJ->nSeq)/log(2.0);
7703 close = logN/(logN+2.0);
7704 }
7705 }
7706 /* Sort the potential seeds, by a combination of nGaps and NJ->outDistances
7707 We don't store nGaps so we need to compute that
7708 */
7709 int *nGaps = (int*)mymalloc(sizeof(int)*NJ->nSeq);
7710 int iNode;
7711 for(iNode=0; iNode<NJ->nSeq; iNode++) {
7712 nGaps[iNode] = (int)(0.5 + NJ->nPos - NJ->selfweight[iNode]);
7713 }
7714 int *seeds = (int*)mymalloc(sizeof(int)*NJ->nSeq);
7715 for (iNode=0; iNode<NJ->nSeq; iNode++) seeds[iNode] = iNode;
7716 CompareSeedNJ = NJ;
7717 CompareSeedGaps = nGaps;
7718 qsort(/*IN/OUT*/seeds, NJ->nSeq, sizeof(int), CompareSeeds);
7719 CompareSeedNJ = NULL;
7720 CompareSeedGaps = NULL;
7721
7722 /* For each seed, save its top 2*m hits and then look for close neighbors */
7723 assert(2 * tophits->m <= NJ->nSeq);
7724 int iSeed;
7725 int nHasTopHits = 0;
7726 #ifdef OPENMP
7727 #pragma omp parallel for schedule(dynamic, 50)
7728 #endif
7729 for(iSeed=0; iSeed < NJ->nSeq; iSeed++) {
7730 int seed = seeds[iSeed];
7731 if (iSeed > 0 && (iSeed % 100) == 0) {
7732 #ifdef OPENMP
7733 #pragma omp critical
7734 #endif
7735 ProgressReport("Top hits for %6d of %6d seqs (at seed %6d)",
7736 nHasTopHits, NJ->nSeq,
7737 iSeed, 0);
7738 }
7739 if (tophits->top_hits_lists[seed].nHits > 0) {
7740 if(verbose>2) fprintf(stderr, "Skipping seed %d\n", seed);
7741 continue;
7742 }
7743
7744 besthit_t *besthitsSeed = (besthit_t*)mymalloc(sizeof(besthit_t)*NJ->nSeq);
7745 besthit_t *besthitsNeighbor = (besthit_t*)mymalloc(sizeof(besthit_t) * 2 * tophits->m);
7746 besthit_t bestjoin;
7747
7748 if(verbose>2) fprintf(stderr,"Trying seed %d\n", seed);
7749 SetBestHit(seed, NJ, /*nActive*/NJ->nSeq, /*OUT*/&bestjoin, /*OUT*/besthitsSeed);
7750
7751 /* sort & save top hits of self. besthitsSeed is now sorted. */
7752 SortSaveBestHits(seed, /*IN/SORT*/besthitsSeed, /*IN-SIZE*/NJ->nSeq,
7753 /*OUT-SIZE*/tophits->m, /*IN/OUT*/tophits);
7754 nHasTopHits++;
7755
7756 /* find "close" neighbors and compute their top hits */
7757 double neardist = besthitsSeed[2 * tophits->m - 1].dist * close;
7758 /* must have at least average weight, rem higher is better
7759 and allow a bit more than average, e.g. if we are looking for within 30% away,
7760 20% more gaps than usual seems OK
7761 Alternatively, have a coverage requirement in case neighbor is short
7762 If fastest, consider the top q/2 hits to be close neighbors, regardless
7763 */
7764 double nearweight = 0;
7765 int iClose;
7766 for (iClose = 0; iClose < 2 * tophits->m; iClose++)
7767 nearweight += besthitsSeed[iClose].weight;
7768 nearweight = nearweight/(2.0 * tophits->m); /* average */
7769 nearweight *= (1.0-2.0*neardist/3.0);
7770 double nearcover = 1.0 - neardist/2.0;
7771
7772 if(verbose>2) fprintf(stderr,"Distance limit for close neighbors %f weight %f ungapped %d\n",
7773 neardist, nearweight, NJ->nPos-nGaps[seed]);
7774 for (iClose = 0; iClose < tophits->m; iClose++) {
7775 besthit_t *closehit = &besthitsSeed[iClose];
7776 int closeNode = closehit->j;
7777 if (tophits->top_hits_lists[closeNode].nHits > 0)
7778 continue;
7779
7780 /* If within close-distance, or identical, use as close neighbor */
7781 bool close = closehit->dist <= neardist
7782 && (closehit->weight >= nearweight
7783 || closehit->weight >= (NJ->nPos-nGaps[closeNode])*nearcover);
7784 bool identical = closehit->dist < 1e-6
7785 && fabs(closehit->weight - (NJ->nPos - nGaps[seed])) < 1e-5
7786 && fabs(closehit->weight - (NJ->nPos - nGaps[closeNode])) < 1e-5;
7787 if (useTopHits2nd && iClose < tophits->q && (close || identical)) {
7788 nHasTopHits++;
7789 nClose2Used++;
7790 int nUse = MIN(tophits->q * tophits2Safety, 2 * tophits->m);
7791 besthit_t *besthitsClose = mymalloc(sizeof(besthit_t) * nUse);
7792 TransferBestHits(NJ, /*nActive*/NJ->nSeq,
7793 closeNode,
7794 /*IN*/besthitsSeed, /*SIZE*/nUse,
7795 /*OUT*/besthitsClose,
7796 /*updateDistance*/true);
7797 SortSaveBestHits(closeNode, /*IN/SORT*/besthitsClose,
7798 /*IN-SIZE*/nUse, /*OUT-SIZE*/tophits->q,
7799 /*IN/OUT*/tophits);
7800 tophits->top_hits_lists[closeNode].hitSource = seed;
7801 besthitsClose = myfree(besthitsClose, sizeof(besthit_t) * nUse);
7802 } else if (close || identical || (fastest && iClose < (tophits->q+1)/2)) {
7803 nHasTopHits++;
7804 nCloseUsed++;
7805 if(verbose>2) fprintf(stderr, "Near neighbor %d (rank %d weight %f ungapped %d %d)\n",
7806 closeNode, iClose, besthitsSeed[iClose].weight,
7807 NJ->nPos-nGaps[seed],
7808 NJ->nPos-nGaps[closeNode]);
7809
7810 /* compute top 2*m hits */
7811 TransferBestHits(NJ, /*nActive*/NJ->nSeq,
7812 closeNode,
7813 /*IN*/besthitsSeed, /*SIZE*/2 * tophits->m,
7814 /*OUT*/besthitsNeighbor,
7815 /*updateDistance*/true);
7816 SortSaveBestHits(closeNode, /*IN/SORT*/besthitsNeighbor,
7817 /*IN-SIZE*/2 * tophits->m, /*OUT-SIZE*/tophits->m,
7818 /*IN/OUT*/tophits);
7819
7820 /* And then try for a second level of transfer. We assume we
7821 are in a good area, because of the 1st
7822 level of transfer, and in a small neighborhood, because q is
7823 small (32 for 1 million sequences), so we do not make any close checks.
7824 */
7825 int iClose2;
7826 for (iClose2 = 0; iClose2 < tophits->q && iClose2 < 2 * tophits->m; iClose2++) {
7827 int closeNode2 = besthitsNeighbor[iClose2].j;
7828 assert(closeNode2 >= 0);
7829 if (tophits->top_hits_lists[closeNode2].hits == NULL) {
7830 nClose2Used++;
7831 nHasTopHits++;
7832 int nUse = MIN(tophits->q * tophits2Safety, 2 * tophits->m);
7833 besthit_t *besthitsClose2 = mymalloc(sizeof(besthit_t) * nUse);
7834 TransferBestHits(NJ, /*nActive*/NJ->nSeq,
7835 closeNode2,
7836 /*IN*/besthitsNeighbor, /*SIZE*/nUse,
7837 /*OUT*/besthitsClose2,
7838 /*updateDistance*/true);
7839 SortSaveBestHits(closeNode2, /*IN/SORT*/besthitsClose2,
7840 /*IN-SIZE*/nUse, /*OUT-SIZE*/tophits->q,
7841 /*IN/OUT*/tophits);
7842 tophits->top_hits_lists[closeNode2].hitSource = closeNode;
7843 besthitsClose2 = myfree(besthitsClose2, sizeof(besthit_t) * nUse);
7844 } /* end if should do 2nd-level transfer */
7845 }
7846 }
7847 } /* end loop over close candidates */
7848 besthitsSeed = myfree(besthitsSeed, sizeof(besthit_t)*NJ->nSeq);
7849 besthitsNeighbor = myfree(besthitsNeighbor, sizeof(besthit_t) * 2 * tophits->m);
7850 } /* end loop over seeds */
7851
7852 for (iNode=0; iNode<NJ->nSeq; iNode++) {
7853 top_hits_list_t *l = &tophits->top_hits_lists[iNode];
7854 assert(l->hits != NULL);
7855 assert(l->hits[0].j >= 0);
7856 assert(l->hits[0].j < NJ->nSeq);
7857 assert(l->hits[0].j != iNode);
7858 tophits->visible[iNode] = l->hits[0];
7859 }
7860
7861 if (verbose >= 2) fprintf(stderr, "#Close neighbors among leaves: 1st-level %ld 2nd-level %ld seeds %ld\n",
7862 nCloseUsed, nClose2Used, NJ->nSeq-nCloseUsed-nClose2Used);
7863 nGaps = myfree(nGaps, sizeof(int)*NJ->nSeq);
7864 seeds = myfree(seeds, sizeof(int)*NJ->nSeq);
7865
7866 /* Now add a "checking phase" where we ensure that the q or 2*sqrt(m) hits
7867 of i are represented in j (if they should be)
7868 */
7869 long lReplace = 0;
7870 int nCheck = tophits->q > 0 ? tophits->q : (int)(0.5 + 2.0*sqrt(tophits->m));
7871 for (iNode = 0; iNode < NJ->nSeq; iNode++) {
7872 if ((iNode % 100) == 0)
7873 ProgressReport("Checking top hits for %6d of %6d seqs",
7874 iNode+1, NJ->nSeq, 0, 0);
7875 top_hits_list_t *lNode = &tophits->top_hits_lists[iNode];
7876 int iHit;
7877 for (iHit = 0; iHit < nCheck && iHit < lNode->nHits; iHit++) {
7878 besthit_t bh = HitToBestHit(iNode, lNode->hits[iHit]);
7879 SetCriterion(NJ, /*nActive*/NJ->nSeq, /*IN/OUT*/&bh);
7880 top_hits_list_t *lTarget = &tophits->top_hits_lists[bh.j];
7881
7882 /* If this criterion is worse than the nCheck-1 entry of the target,
7883 then skip the check.
7884 This logic is based on assuming that the list is sorted,
7885 which is true initially but may not be true later.
7886 Still, is a good heuristic.
7887 */
7888 assert(nCheck > 0);
7889 assert(nCheck <= lTarget->nHits);
7890 besthit_t bhCheck = HitToBestHit(bh.j, lTarget->hits[nCheck-1]);
7891 SetCriterion(NJ, /*nActive*/NJ->nSeq, /*IN/OUT*/&bhCheck);
7892 if (bhCheck.criterion < bh.criterion)
7893 continue; /* no check needed */
7894
7895 /* Check if this is present in the top-hit list */
7896 int iHit2;
7897 bool bFound = false;
7898 for (iHit2 = 0; iHit2 < lTarget->nHits && !bFound; iHit2++)
7899 if (lTarget->hits[iHit2].j == iNode)
7900 bFound = true;
7901 if (!bFound) {
7902 /* Find the hit with the worst criterion and replace it with this one */
7903 int iWorst = -1;
7904 double dWorstCriterion = -1e20;
7905 for (iHit2 = 0; iHit2 < lTarget->nHits; iHit2++) {
7906 besthit_t bh2 = HitToBestHit(bh.j, lTarget->hits[iHit2]);
7907 SetCriterion(NJ, /*nActive*/NJ->nSeq, /*IN/OUT*/&bh2);
7908 if (bh2.criterion > dWorstCriterion) {
7909 iWorst = iHit2;
7910 dWorstCriterion = bh2.criterion;
7911 }
7912 }
7913 if (dWorstCriterion > bh.criterion) {
7914 assert(iWorst >= 0);
7915 lTarget->hits[iWorst].j = iNode;
7916 lTarget->hits[iWorst].dist = bh.dist;
7917 lReplace++;
7918 /* and perhaps update visible */
7919 besthit_t v;
7920 bool bSuccess = GetVisible(NJ, /*nActive*/NJ->nSeq, tophits, bh.j, /*OUT*/&v);
7921 assert(bSuccess);
7922 if (bh.criterion < v.criterion)
7923 tophits->visible[bh.j] = lTarget->hits[iWorst];
7924 }
7925 }
7926 }
7927 }
7928
7929 if (verbose >= 2)
7930 fprintf(stderr, "Replaced %ld top hit entries\n", lReplace);
7931 }
7932
7933 /* Updates out-distances but does not reset or update visible set */
GetBestFromTopHits(int iNode,NJ_t * NJ,int nActive,top_hits_t * tophits,besthit_t * bestjoin)7934 void GetBestFromTopHits(int iNode,
7935 /*IN/UPDATE*/NJ_t *NJ,
7936 int nActive,
7937 /*IN*/top_hits_t *tophits,
7938 /*OUT*/besthit_t *bestjoin) {
7939 assert(iNode >= 0);
7940 assert(NJ->parent[iNode] < 0);
7941 top_hits_list_t *l = &tophits->top_hits_lists[iNode];
7942 assert(l->nHits > 0);
7943 assert(l->hits != NULL);
7944
7945 if(!fastest)
7946 SetOutDistance(NJ, iNode, nActive); /* ensure out-distances are not stale */
7947
7948 bestjoin->i = -1;
7949 bestjoin->j = -1;
7950 bestjoin->dist = 1e20;
7951 bestjoin->criterion = 1e20;
7952
7953 int iBest;
7954 for(iBest=0; iBest < l->nHits; iBest++) {
7955 besthit_t bh = HitToBestHit(iNode, l->hits[iBest]);
7956 if (UpdateBestHit(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/&bh, /*update dist*/true)) {
7957 SetCriterion(/*IN/OUT*/NJ, nActive, /*IN/OUT*/&bh); /* make sure criterion is correct */
7958 if (bh.criterion < bestjoin->criterion)
7959 *bestjoin = bh;
7960 }
7961 }
7962 assert(bestjoin->j >= 0); /* a hit was found */
7963 assert(bestjoin->i == iNode);
7964 }
7965
ActiveAncestor(NJ_t * NJ,int iNode)7966 int ActiveAncestor(/*IN*/NJ_t *NJ, int iNode) {
7967 if (iNode < 0)
7968 return(iNode);
7969 while(NJ->parent[iNode] >= 0)
7970 iNode = NJ->parent[iNode];
7971 return(iNode);
7972 }
7973
UpdateBestHit(NJ_t * NJ,int nActive,besthit_t * hit,bool bUpdateDist)7974 bool UpdateBestHit(/*IN/UPDATE*/NJ_t *NJ, int nActive, /*IN/OUT*/besthit_t *hit,
7975 bool bUpdateDist) {
7976 int i = ActiveAncestor(/*IN*/NJ, hit->i);
7977 int j = ActiveAncestor(/*IN*/NJ, hit->j);
7978 if (i < 0 || j < 0 || i == j) {
7979 hit->i = -1;
7980 hit->j = -1;
7981 hit->weight = 0;
7982 hit->dist = 1e20;
7983 hit->criterion = 1e20;
7984 return(false);
7985 }
7986 if (i != hit->i || j != hit->j) {
7987 hit->i = i;
7988 hit->j = j;
7989 if (bUpdateDist) {
7990 SetDistCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/hit);
7991 } else {
7992 hit->dist = -1e20;
7993 hit->criterion = 1e20;
7994 }
7995 }
7996 return(true);
7997 }
7998
GetVisible(NJ_t * NJ,int nActive,top_hits_t * tophits,int iNode,besthit_t * visible)7999 bool GetVisible(/*IN/UPDATE*/NJ_t *NJ, int nActive,
8000 /*IN/OUT*/top_hits_t *tophits,
8001 int iNode, /*OUT*/besthit_t *visible) {
8002 if (iNode < 0 || NJ->parent[iNode] >= 0)
8003 return(false);
8004 hit_t *v = &tophits->visible[iNode];
8005 if (v->j < 0 || NJ->parent[v->j] >= 0)
8006 return(false);
8007 *visible = HitToBestHit(iNode, *v);
8008 SetCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/visible);
8009 return(true);
8010 }
8011
UniqueBestHits(NJ_t * NJ,int nActive,besthit_t * combined,int nCombined,int * nUniqueOut)8012 besthit_t *UniqueBestHits(/*IN/UPDATE*/NJ_t *NJ, int nActive,
8013 /*IN/SORT*/besthit_t *combined, int nCombined,
8014 /*OUT*/int *nUniqueOut) {
8015 int iHit;
8016 for (iHit = 0; iHit < nCombined; iHit++) {
8017 besthit_t *hit = &combined[iHit];
8018 UpdateBestHit(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/hit, /*update*/false);
8019 }
8020 qsort(/*IN/OUT*/combined, nCombined, sizeof(besthit_t), CompareHitsByIJ);
8021
8022 besthit_t *uniqueList = (besthit_t*)mymalloc(sizeof(besthit_t)*nCombined);
8023 int nUnique = 0;
8024 int iSavedLast = -1;
8025
8026 /* First build the new list */
8027 for (iHit = 0; iHit < nCombined; iHit++) {
8028 besthit_t *hit = &combined[iHit];
8029 if (hit->i < 0 || hit->j < 0)
8030 continue;
8031 if (iSavedLast >= 0) {
8032 /* toss out duplicates */
8033 besthit_t *saved = &combined[iSavedLast];
8034 if (saved->i == hit->i && saved->j == hit->j)
8035 continue;
8036 }
8037 assert(nUnique < nCombined);
8038 assert(hit->j >= 0 && NJ->parent[hit->j] < 0);
8039 uniqueList[nUnique++] = *hit;
8040 iSavedLast = iHit;
8041 }
8042 *nUniqueOut = nUnique;
8043
8044 /* Then do any updates to the criterion or the distances in parallel */
8045 #ifdef OPENMP
8046 #pragma omp parallel for schedule(dynamic, 50)
8047 #endif
8048 for (iHit = 0; iHit < nUnique; iHit++) {
8049 besthit_t *hit = &uniqueList[iHit];
8050 if (hit->dist < 0.0)
8051 SetDistCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/hit);
8052 else
8053 SetCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/hit);
8054 }
8055 return(uniqueList);
8056 }
8057
8058 /*
8059 Create a top hit list for the new node, either
8060 from children (if there are enough best hits left) or by a "refresh"
8061 Also set visible set for newnode
8062 Also update visible set for other nodes if we stumble across a "better" hit
8063 */
8064
TopHitJoin(int newnode,NJ_t * NJ,int nActive,top_hits_t * tophits)8065 void TopHitJoin(int newnode,
8066 /*IN/UPDATE*/NJ_t *NJ,
8067 int nActive,
8068 /*IN/OUT*/top_hits_t *tophits) {
8069 long startProfileOps = profileOps;
8070 long startOutProfileOps = outprofileOps;
8071 assert(NJ->child[newnode].nChild == 2);
8072 top_hits_list_t *lNew = &tophits->top_hits_lists[newnode];
8073 assert(lNew->hits == NULL);
8074
8075 /* Copy the hits */
8076 int i;
8077 top_hits_list_t *lChild[2];
8078 for (i = 0; i< 2; i++) {
8079 lChild[i] = &tophits->top_hits_lists[NJ->child[newnode].child[i]];
8080 assert(lChild[i]->hits != NULL && lChild[i]->nHits > 0);
8081 }
8082 int nCombined = lChild[0]->nHits + lChild[1]->nHits;
8083 besthit_t *combinedList = (besthit_t*)mymalloc(sizeof(besthit_t)*nCombined);
8084 HitsToBestHits(lChild[0]->hits, lChild[0]->nHits, NJ->child[newnode].child[0],
8085 /*OUT*/combinedList);
8086 HitsToBestHits(lChild[1]->hits, lChild[1]->nHits, NJ->child[newnode].child[1],
8087 /*OUT*/combinedList + lChild[0]->nHits);
8088 int nUnique;
8089 /* UniqueBestHits() replaces children (used in the calls to HitsToBestHits)
8090 with active ancestors, so all distances & criteria will be recomputed */
8091 besthit_t *uniqueList = UniqueBestHits(/*IN/UPDATE*/NJ, nActive,
8092 /*IN/SORT*/combinedList,
8093 nCombined,
8094 /*OUT*/&nUnique);
8095 int nUniqueAlloc = nCombined;
8096 combinedList = myfree(combinedList, sizeof(besthit_t)*nCombined);
8097
8098 /* Forget the top-hit lists of the joined nodes */
8099 for (i = 0; i < 2; i++) {
8100 lChild[i]->hits = myfree(lChild[i]->hits, sizeof(hit_t) * lChild[i]->nHits);
8101 lChild[i]->nHits = 0;
8102 }
8103
8104 /* Use the average age, rounded up, by 1 Versions 2.0 and earlier
8105 used the maximum age, which leads to more refreshes without
8106 improving the accuracy of the NJ phase. Intuitively, if one of
8107 them was just refreshed then another refresh is unlikely to help.
8108 */
8109 lNew->age = (lChild[0]->age+lChild[1]->age+1)/2 + 1;
8110
8111 /* If top hit ages always match (perfectly balanced), then a
8112 limit of log2(m) would mean a refresh after
8113 m joins, which is about what we want.
8114 */
8115 int tophitAgeLimit = MAX(1, (int)(0.5 + log((double)tophits->m)/log(2.0)));
8116
8117 /* Either use the merged list as candidate top hits, or
8118 move from 2nd level to 1st level, or do a refresh
8119 UniqueBestHits eliminates hits to self, so if nUnique==nActive-1,
8120 we've already done the exhaustive search.
8121
8122 Either way, we set tophits, visible(newnode), update visible of its top hits,
8123 and modify topvisible: if we do a refresh, then we reset it, otherwise we update
8124 */
8125 bool bSecondLevel = lChild[0]->hitSource >= 0 && lChild[1]->hitSource >= 0;
8126 bool bUseUnique = nUnique==nActive-1
8127 || (lNew->age <= tophitAgeLimit
8128 && nUnique >= (bSecondLevel ? (int)(0.5 + tophits2Refresh * tophits->q)
8129 : (int)(0.5 + tophits->m * tophitsRefresh) ));
8130 if (bUseUnique && verbose > 2)
8131 fprintf(stderr,"Top hits for %d from combined %d nActive=%d tophitsage %d %s\n",
8132 newnode,nUnique,nActive,lNew->age,
8133 bSecondLevel ? "2ndlevel" : "1stlevel");
8134
8135 if (!bUseUnique
8136 && bSecondLevel
8137 && lNew->age <= tophitAgeLimit) {
8138 int source = ActiveAncestor(NJ, lChild[0]->hitSource);
8139 if (source == newnode)
8140 source = ActiveAncestor(NJ, lChild[1]->hitSource);
8141 /* In parallel mode, it is possible that we would select a node as the
8142 hit-source and then over-write that top hit with a short list.
8143 So we need this sanity check.
8144 */
8145 if (source != newnode
8146 && source >= 0
8147 && tophits->top_hits_lists[source].hitSource < 0) {
8148
8149 /* switch from 2nd-level to 1st-level top hits -- compute top hits list
8150 of node from what we have so far plus the active source plus its top hits */
8151 top_hits_list_t *lSource = &tophits->top_hits_lists[source];
8152 assert(lSource->hitSource < 0);
8153 assert(lSource->nHits > 0);
8154 int nMerge = 1 + lSource->nHits + nUnique;
8155 besthit_t *mergeList = mymalloc(sizeof(besthit_t) * nMerge);
8156 memcpy(/*to*/mergeList, /*from*/uniqueList, nUnique * sizeof(besthit_t));
8157
8158 int iMerge = nUnique;
8159 mergeList[iMerge].i = newnode;
8160 mergeList[iMerge].j = source;
8161 SetDistCriterion(NJ, nActive, /*IN/OUT*/&mergeList[iMerge]);
8162 iMerge++;
8163 HitsToBestHits(lSource->hits, lSource->nHits, newnode, /*OUT*/mergeList+iMerge);
8164 for (i = 0; i < lSource->nHits; i++) {
8165 SetDistCriterion(NJ, nActive, /*IN/OUT*/&mergeList[iMerge]);
8166 iMerge++;
8167 }
8168 assert(iMerge == nMerge);
8169
8170 uniqueList = myfree(uniqueList, nUniqueAlloc * sizeof(besthit_t));
8171 uniqueList = UniqueBestHits(/*IN/UPDATE*/NJ, nActive,
8172 /*IN/SORT*/mergeList,
8173 nMerge,
8174 /*OUT*/&nUnique);
8175 nUniqueAlloc = nMerge;
8176 mergeList = myfree(mergeList, sizeof(besthit_t)*nMerge);
8177
8178 assert(nUnique > 0);
8179 bUseUnique = nUnique >= (int)(0.5 + tophits->m * tophitsRefresh);
8180 bSecondLevel = false;
8181
8182 if (bUseUnique && verbose > 2)
8183 fprintf(stderr, "Top hits for %d from children and source %d's %d hits, nUnique %d\n",
8184 newnode, source, lSource->nHits, nUnique);
8185 }
8186 }
8187
8188 if (bUseUnique) {
8189 if (bSecondLevel) {
8190 /* pick arbitrarily */
8191 lNew->hitSource = lChild[0]->hitSource;
8192 }
8193 int nSave = MIN(nUnique, bSecondLevel ? tophits->q : tophits->m);
8194 assert(nSave>0);
8195 if (verbose > 2)
8196 fprintf(stderr, "Combined %d ops so far %ld\n", nUnique, profileOps - startProfileOps);
8197 SortSaveBestHits(newnode, /*IN/SORT*/uniqueList, /*nIn*/nUnique,
8198 /*nOut*/nSave, /*IN/OUT*/tophits);
8199 assert(lNew->hits != NULL); /* set by sort/save */
8200 tophits->visible[newnode] = lNew->hits[0];
8201 UpdateTopVisible(/*IN*/NJ, nActive, newnode, &tophits->visible[newnode],
8202 /*IN/OUT*/tophits);
8203 UpdateVisible(/*IN/UPDATE*/NJ, nActive, /*IN*/uniqueList, nSave, /*IN/OUT*/tophits);
8204 } else {
8205 /* need to refresh: set top hits for node and for its top hits */
8206 if(verbose > 2) fprintf(stderr,"Top hits for %d by refresh (%d unique age %d) nActive=%d\n",
8207 newnode,nUnique,lNew->age,nActive);
8208 nRefreshTopHits++;
8209 lNew->age = 0;
8210
8211 int iNode;
8212 /* ensure all out-distances are up to date ahead of time
8213 to avoid any data overwriting issues.
8214 */
8215 #ifdef OPENMP
8216 #pragma omp parallel for schedule(dynamic, 50)
8217 #endif
8218 for (iNode = 0; iNode < NJ->maxnode; iNode++) {
8219 if (NJ->parent[iNode] < 0) {
8220 if (fastest) {
8221 besthit_t bh;
8222 bh.i = iNode;
8223 bh.j = iNode;
8224 bh.dist = 0;
8225 SetCriterion(/*IN/UPDATE*/NJ, nActive, &bh);
8226 } else {
8227 SetOutDistance(/*IN/UDPATE*/NJ, iNode, nActive);
8228 }
8229 }
8230 }
8231
8232 /* exhaustively get the best 2*m hits for newnode, set visible, and save the top m */
8233 besthit_t *allhits = (besthit_t*)mymalloc(sizeof(besthit_t)*NJ->maxnode);
8234 assert(2 * tophits->m <= NJ->maxnode);
8235 besthit_t bh;
8236 SetBestHit(newnode, NJ, nActive, /*OUT*/&bh, /*OUT*/allhits);
8237 qsort(/*IN/OUT*/allhits, NJ->maxnode, sizeof(besthit_t), CompareHitsByCriterion);
8238 SortSaveBestHits(newnode, /*IN/SORT*/allhits, /*nIn*/NJ->maxnode,
8239 /*nOut*/tophits->m, /*IN/OUT*/tophits);
8240
8241 /* Do not need to call UpdateVisible because we set visible below */
8242
8243 /* And use the top 2*m entries to expand other best-hit lists, but only for top m */
8244 int iHit;
8245 #ifdef OPENMP
8246 #pragma omp parallel for schedule(dynamic, 50)
8247 #endif
8248 for (iHit=0; iHit < tophits->m; iHit++) {
8249 if (allhits[iHit].i < 0) continue;
8250 int iNode = allhits[iHit].j;
8251 assert(iNode>=0);
8252 if (NJ->parent[iNode] >= 0) continue;
8253 top_hits_list_t *l = &tophits->top_hits_lists[iNode];
8254 int nHitsOld = l->nHits;
8255 assert(nHitsOld <= tophits->m);
8256 l->age = 0;
8257
8258 /* Merge: old hits into 0->nHitsOld and hits from iNode above that */
8259 besthit_t *bothList = (besthit_t*)mymalloc(sizeof(besthit_t) * 3 * tophits->m);
8260 HitsToBestHits(/*IN*/l->hits, nHitsOld, iNode, /*OUT*/bothList); /* does not compute criterion */
8261 for (i = 0; i < nHitsOld; i++)
8262 SetCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/&bothList[i]);
8263 if (nActive <= 2 * tophits->m)
8264 l->hitSource = -1; /* abandon the 2nd-level top-hits heuristic */
8265 int nNewHits = l->hitSource >= 0 ? tophits->q : tophits->m;
8266 assert(nNewHits > 0);
8267
8268 TransferBestHits(/*IN/UPDATE*/NJ, nActive, iNode,
8269 /*IN*/allhits, /*nOldHits*/2 * nNewHits,
8270 /*OUT*/&bothList[nHitsOld],
8271 /*updateDist*/false); /* rely on UniqueBestHits to update dist and/or criterion */
8272 int nUnique2;
8273 besthit_t *uniqueList2 = UniqueBestHits(/*IN/UPDATE*/NJ, nActive,
8274 /*IN/SORT*/bothList, nHitsOld + 2 * nNewHits,
8275 /*OUT*/&nUnique2);
8276 assert(nUnique2 > 0);
8277 bothList = myfree(bothList,3 * tophits->m * sizeof(besthit_t));
8278
8279 /* Note this will overwrite l, but we saved nHitsOld */
8280 SortSaveBestHits(iNode, /*IN/SORT*/uniqueList2, /*nIn*/nUnique2,
8281 /*nOut*/nNewHits, /*IN/OUT*/tophits);
8282 /* will update topvisible below */
8283 tophits->visible[iNode] = tophits->top_hits_lists[iNode].hits[0];
8284 uniqueList2 = myfree(uniqueList2, (nHitsOld + 2 * tophits->m) * sizeof(besthit_t));
8285 }
8286
8287 ResetTopVisible(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits); /* outside of the parallel phase */
8288 allhits = myfree(allhits,sizeof(besthit_t)*NJ->maxnode);
8289 }
8290 uniqueList = myfree(uniqueList, nUniqueAlloc * sizeof(besthit_t));
8291 if (verbose > 2) {
8292 fprintf(stderr, "New top-hit list for %d profile-ops %ld (out-ops %ld): source %d age %d members ",
8293 newnode,
8294 profileOps - startProfileOps,
8295 outprofileOps - startOutProfileOps,
8296 lNew->hitSource, lNew->age);
8297
8298 int i;
8299 for (i = 0; i < lNew->nHits; i++)
8300 fprintf(stderr, " %d", lNew->hits[i].j);
8301 fprintf(stderr,"\n");
8302 }
8303 }
8304
UpdateVisible(NJ_t * NJ,int nActive,besthit_t * tophitsNode,int nTopHits,top_hits_t * tophits)8305 void UpdateVisible(/*IN/UPDATE*/NJ_t *NJ, int nActive,
8306 /*IN*/besthit_t *tophitsNode,
8307 int nTopHits,
8308 /*IN/OUT*/top_hits_t *tophits) {
8309 int iHit;
8310
8311 for(iHit = 0; iHit < nTopHits; iHit++) {
8312 besthit_t *hit = &tophitsNode[iHit];
8313 if (hit->i < 0) continue; /* possible empty entries */
8314 assert(NJ->parent[hit->i] < 0);
8315 assert(hit->j >= 0 && NJ->parent[hit->j] < 0);
8316 besthit_t visible;
8317 bool bSuccess = GetVisible(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits, hit->j, /*OUT*/&visible);
8318 if (!bSuccess || hit->criterion < visible.criterion) {
8319 if (bSuccess)
8320 nVisibleUpdate++;
8321 hit_t *v = &tophits->visible[hit->j];
8322 v->j = hit->i;
8323 v->dist = hit->dist;
8324 UpdateTopVisible(NJ, nActive, hit->j, v, /*IN/OUT*/tophits);
8325 if(verbose>5) fprintf(stderr,"NewVisible %d %d %f\n",
8326 hit->j,v->j,v->dist);
8327 }
8328 } /* end loop over hits */
8329 }
8330
8331 /* Update the top-visible list to perhaps include visible[iNode] */
UpdateTopVisible(NJ_t * NJ,int nActive,int iIn,hit_t * hit,top_hits_t * tophits)8332 void UpdateTopVisible(/*IN*/NJ_t * NJ, int nActive,
8333 int iIn, /*IN*/hit_t *hit,
8334 /*IN/OUT*/top_hits_t *tophits) {
8335 assert(tophits != NULL);
8336 bool bIn = false; /* placed in the list */
8337 int i;
8338
8339 /* First, if the list is not full, put it in somewhere */
8340 for (i = 0; i < tophits->nTopVisible && !bIn; i++) {
8341 int iNode = tophits->topvisible[i];
8342 if (iNode == iIn) {
8343 /* this node is already in the top hit list */
8344 bIn = true;
8345 } else if (iNode < 0 || NJ->parent[iNode] >= 0) {
8346 /* found an empty spot */
8347 bIn = true;
8348 tophits->topvisible[i] = iIn;
8349 }
8350 }
8351
8352 int iPosWorst = -1;
8353 double dCriterionWorst = -1e20;
8354 if (!bIn) {
8355 /* Search for the worst hit */
8356 for (i = 0; i < tophits->nTopVisible && !bIn; i++) {
8357 int iNode = tophits->topvisible[i];
8358 assert(iNode >= 0 && NJ->parent[iNode] < 0 && iNode != iIn);
8359 besthit_t visible;
8360 if (!GetVisible(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits, iNode, /*OUT*/&visible)) {
8361 /* found an empty spot */
8362 tophits->topvisible[i] = iIn;
8363 bIn = true;
8364 } else if (visible.i == hit->j && visible.j == iIn) {
8365 /* the reverse hit is already in the top hit list */
8366 bIn = true;
8367 } else if (visible.criterion >= dCriterionWorst) {
8368 iPosWorst = i;
8369 dCriterionWorst = visible.criterion;
8370 }
8371 }
8372 }
8373
8374 if (!bIn && iPosWorst >= 0) {
8375 besthit_t visible = HitToBestHit(iIn, *hit);
8376 SetCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/&visible);
8377 if (visible.criterion < dCriterionWorst) {
8378 if (verbose > 2) {
8379 int iOld = tophits->topvisible[iPosWorst];
8380 fprintf(stderr, "TopVisible replace %d=>%d with %d=>%d\n",
8381 iOld, tophits->visible[iOld].j, visible.i, visible.j);
8382 }
8383 tophits->topvisible[iPosWorst] = iIn;
8384 }
8385 }
8386
8387 if (verbose > 2) {
8388 fprintf(stderr, "Updated TopVisible: ");
8389 for (i = 0; i < tophits->nTopVisible; i++) {
8390 int iNode = tophits->topvisible[i];
8391 if (iNode >= 0 && NJ->parent[iNode] < 0) {
8392 besthit_t bh = HitToBestHit(iNode, tophits->visible[iNode]);
8393 SetDistCriterion(NJ, nActive, &bh);
8394 fprintf(stderr, " %d=>%d:%.4f", bh.i, bh.j, bh.criterion);
8395 }
8396 }
8397 fprintf(stderr,"\n");
8398 }
8399 }
8400
8401 /* Recompute the topvisible list */
ResetTopVisible(NJ_t * NJ,int nActive,top_hits_t * tophits)8402 void ResetTopVisible(/*IN/UPDATE*/NJ_t *NJ,
8403 int nActive,
8404 /*IN/OUT*/top_hits_t *tophits) {
8405 besthit_t *visibleSorted = mymalloc(sizeof(besthit_t)*nActive);
8406 int nVisible = 0; /* #entries in visibleSorted */
8407 int iNode;
8408 for (iNode = 0; iNode < NJ->maxnode; iNode++) {
8409 /* skip joins involving stale nodes */
8410 if (NJ->parent[iNode] >= 0)
8411 continue;
8412 besthit_t v;
8413 if (GetVisible(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits, iNode, /*OUT*/&v)) {
8414 assert(nVisible < nActive);
8415 visibleSorted[nVisible++] = v;
8416 }
8417 }
8418 assert(nVisible > 0);
8419
8420 qsort(/*IN/OUT*/visibleSorted,nVisible,sizeof(besthit_t),CompareHitsByCriterion);
8421
8422 /* Only keep the top m items, and try to avoid duplicating i->j with j->i
8423 Note that visible(i) -> j does not necessarily imply visible(j) -> i,
8424 so we store what the pairing was (or -1 for not used yet)
8425 */
8426 int *inTopVisible = malloc(sizeof(int) * NJ->maxnodes);
8427 int i;
8428 for (i = 0; i < NJ->maxnodes; i++)
8429 inTopVisible[i] = -1;
8430
8431 if (verbose > 2)
8432 fprintf(stderr, "top-hit search: nActive %d nVisible %d considering up to %d items\n",
8433 nActive, nVisible, tophits->m);
8434
8435 /* save the sorted indices in topvisible */
8436 int iSave = 0;
8437 for (i = 0; i < nVisible && iSave < tophits->nTopVisible; i++) {
8438 besthit_t *v = &visibleSorted[i];
8439 if (inTopVisible[v->i] != v->j) { /* not seen already */
8440 tophits->topvisible[iSave++] = v->i;
8441 inTopVisible[v->i] = v->j;
8442 inTopVisible[v->j] = v->i;
8443 }
8444 }
8445 while(iSave < tophits->nTopVisible)
8446 tophits->topvisible[iSave++] = -1;
8447 myfree(visibleSorted, sizeof(besthit_t)*nActive);
8448 myfree(inTopVisible, sizeof(int) * NJ->maxnodes);
8449 tophits->topvisibleAge = 0;
8450 if (verbose > 2) {
8451 fprintf(stderr, "Reset TopVisible: ");
8452 for (i = 0; i < tophits->nTopVisible; i++) {
8453 int iNode = tophits->topvisible[i];
8454 if (iNode < 0)
8455 break;
8456 fprintf(stderr, " %d=>%d", iNode, tophits->visible[iNode].j);
8457 }
8458 fprintf(stderr,"\n");
8459 }
8460 }
8461
8462 /*
8463 Find best hit to do in O(N*log(N) + m*L*log(N)) time, by
8464 copying and sorting the visible list
8465 updating out-distances for the top (up to m) candidates
8466 selecting the best hit
8467 if !fastest then
8468 local hill-climbing for a better join,
8469 using best-hit lists only, and updating
8470 all out-distances in every best-hit list
8471 */
TopHitNJSearch(NJ_t * NJ,int nActive,top_hits_t * tophits,besthit_t * join)8472 void TopHitNJSearch(/*IN/UPDATE*/NJ_t *NJ, int nActive,
8473 /*IN/OUT*/top_hits_t *tophits,
8474 /*OUT*/besthit_t *join) {
8475 /* first, do we have at least m/2 candidates in topvisible?
8476 And remember the best one */
8477 int nCandidate = 0;
8478 int iNodeBestCandidate = -1;
8479 double dBestCriterion = 1e20;
8480
8481 int i;
8482 for (i = 0; i < tophits->nTopVisible; i++) {
8483 int iNode = tophits->topvisible[i];
8484 besthit_t visible;
8485 if (GetVisible(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits, iNode, /*OUT*/&visible)) {
8486 nCandidate++;
8487 if (iNodeBestCandidate < 0 || visible.criterion < dBestCriterion) {
8488 iNodeBestCandidate = iNode;
8489 dBestCriterion = visible.criterion;
8490 }
8491 }
8492 }
8493
8494 tophits->topvisibleAge++;
8495 /* Note we may have only nActive/2 joins b/c we try to store them once */
8496 if (2 * tophits->topvisibleAge > tophits->m
8497 || (3*nCandidate < tophits->nTopVisible && 3*nCandidate < nActive)) {
8498 /* recompute top visible */
8499 if (verbose > 2)
8500 fprintf(stderr, "Resetting the top-visible list at nActive=%d\n",nActive);
8501
8502 /* If age is low, then our visible set is becoming too sparse, because we have
8503 recently recomputed the top visible subset. This is very rare but can happen
8504 with -fastest. A quick-and-dirty solution is to walk up
8505 the parents to get additional entries in top hit lists. To ensure that the
8506 visible set becomes full, pick an arbitrary node if walking up terminates at self.
8507 */
8508 if (tophits->topvisibleAge <= 2) {
8509 if (verbose > 2)
8510 fprintf(stderr, "Expanding visible set by walking up to active nodes at nActive=%d\n", nActive);
8511 int iNode;
8512 for (iNode = 0; iNode < NJ->maxnode; iNode++) {
8513 if (NJ->parent[iNode] >= 0)
8514 continue;
8515 hit_t *v = &tophits->visible[iNode];
8516 int newj = ActiveAncestor(NJ, v->j);
8517 if (newj >= 0 && newj != v->j) {
8518 if (newj == iNode) {
8519 /* pick arbitrarily */
8520 newj = 0;
8521 while (NJ->parent[newj] >= 0 || newj == iNode)
8522 newj++;
8523 }
8524 assert(newj >= 0 && newj < NJ->maxnodes
8525 && newj != iNode
8526 && NJ->parent[newj] < 0);
8527
8528 /* Set v to point to newj */
8529 besthit_t bh = { iNode, newj, -1e20, -1e20, -1e20 };
8530 SetDistCriterion(NJ, nActive, /*IN/OUT*/&bh);
8531 v->j = newj;
8532 v->dist = bh.dist;
8533 }
8534 }
8535 }
8536 ResetTopVisible(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/tophits);
8537 /* and recurse to try again */
8538 TopHitNJSearch(NJ, nActive, tophits, join);
8539 return;
8540 }
8541 if (verbose > 2)
8542 fprintf(stderr, "Top-visible list size %d (nActive %d m %d)\n",
8543 nCandidate, nActive, tophits->m);
8544 assert(iNodeBestCandidate >= 0 && NJ->parent[iNodeBestCandidate] < 0);
8545 bool bSuccess = GetVisible(NJ, nActive, tophits, iNodeBestCandidate, /*OUT*/join);
8546 assert(bSuccess);
8547 assert(join->i >= 0 && NJ->parent[join->i] < 0);
8548 assert(join->j >= 0 && NJ->parent[join->j] < 0);
8549
8550 if(fastest)
8551 return;
8552
8553 int changed;
8554 do {
8555 changed = 0;
8556
8557 besthit_t bestI;
8558 GetBestFromTopHits(join->i, NJ, nActive, tophits, /*OUT*/&bestI);
8559 assert(bestI.i == join->i);
8560 if (bestI.j != join->j && bestI.criterion < join->criterion) {
8561 changed = 1;
8562 if (verbose>2)
8563 fprintf(stderr,"BetterI\t%d\t%d\t%d\t%d\t%f\t%f\n",
8564 join->i,join->j,bestI.i,bestI.j,
8565 join->criterion,bestI.criterion);
8566 *join = bestI;
8567 }
8568
8569 besthit_t bestJ;
8570 GetBestFromTopHits(join->j, NJ, nActive, tophits, /*OUT*/&bestJ);
8571 assert(bestJ.i == join->j);
8572 if (bestJ.j != join->i && bestJ.criterion < join->criterion) {
8573 changed = 1;
8574 if (verbose>2)
8575 fprintf(stderr,"BetterJ\t%d\t%d\t%d\t%d\t%f\t%f\n",
8576 join->i,join->j,bestJ.i,bestJ.j,
8577 join->criterion,bestJ.criterion);
8578 *join = bestJ;
8579 }
8580 if(changed) nHillBetter++;
8581 } while(changed);
8582 }
8583
NGaps(NJ_t * NJ,int iNode)8584 int NGaps(/*IN*/NJ_t *NJ, int iNode) {
8585 assert(iNode < NJ->nSeq);
8586 int nGaps = 0;
8587 int p;
8588 for(p=0; p<NJ->nPos; p++) {
8589 if (NJ->profiles[iNode]->codes[p] == NOCODE)
8590 nGaps++;
8591 }
8592 return(nGaps);
8593 }
8594
CompareHitsByCriterion(const void * c1,const void * c2)8595 int CompareHitsByCriterion(const void *c1, const void *c2) {
8596 const besthit_t *hit1 = (besthit_t*)c1;
8597 const besthit_t *hit2 = (besthit_t*)c2;
8598 if (hit1->criterion < hit2->criterion) return(-1);
8599 if (hit1->criterion > hit2->criterion) return(1);
8600 return(0);
8601 }
8602
CompareHitsByIJ(const void * c1,const void * c2)8603 int CompareHitsByIJ(const void *c1, const void *c2) {
8604 const besthit_t *hit1 = (besthit_t*)c1;
8605 const besthit_t *hit2 = (besthit_t*)c2;
8606 return hit1->i != hit2->i ? hit1->i - hit2->i : hit1->j - hit2->j;
8607 }
8608
SortSaveBestHits(int iNode,besthit_t * besthits,int nIn,int nOut,top_hits_t * tophits)8609 void SortSaveBestHits(int iNode, /*IN/SORT*/besthit_t *besthits,
8610 int nIn, int nOut,
8611 /*IN/OUT*/top_hits_t *tophits) {
8612 assert(nIn > 0);
8613 assert(nOut > 0);
8614 top_hits_list_t *l = &tophits->top_hits_lists[iNode];
8615 /* */
8616 qsort(/*IN/OUT*/besthits,nIn,sizeof(besthit_t),CompareHitsByCriterion);
8617
8618 /* First count how many we will save
8619 Not sure if removing duplicates is actually necessary.
8620 */
8621 int nSave = 0;
8622 int jLast = -1;
8623 int iBest;
8624 for (iBest = 0; iBest < nIn && nSave < nOut; iBest++) {
8625 if (besthits[iBest].i < 0)
8626 continue;
8627 assert(besthits[iBest].i == iNode);
8628 int j = besthits[iBest].j;
8629 if (j != iNode && j != jLast && j >= 0) {
8630 nSave++;
8631 jLast = j;
8632 }
8633 }
8634
8635 assert(nSave > 0);
8636
8637 #ifdef OPENMP
8638 omp_set_lock(&tophits->locks[iNode]);
8639 #endif
8640 if (l->hits != NULL) {
8641 l->hits = myfree(l->hits, l->nHits * sizeof(hit_t));
8642 l->nHits = 0;
8643 }
8644 l->hits = mymalloc(sizeof(hit_t) * nSave);
8645 l->nHits = nSave;
8646 int iSave = 0;
8647 jLast = -1;
8648 for (iBest = 0; iBest < nIn && iSave < nSave; iBest++) {
8649 int j = besthits[iBest].j;
8650 if (j != iNode && j != jLast && j >= 0) {
8651 l->hits[iSave].j = j;
8652 l->hits[iSave].dist = besthits[iBest].dist;
8653 iSave++;
8654 jLast = j;
8655 }
8656 }
8657 #ifdef OPENMP
8658 omp_unset_lock(&tophits->locks[iNode]);
8659 #endif
8660 assert(iSave == nSave);
8661 }
8662
TransferBestHits(NJ_t * NJ,int nActive,int iNode,besthit_t * oldhits,int nOldHits,besthit_t * newhits,bool updateDistances)8663 void TransferBestHits(/*IN/UPDATE*/NJ_t *NJ,
8664 int nActive,
8665 int iNode,
8666 /*IN*/besthit_t *oldhits,
8667 int nOldHits,
8668 /*OUT*/besthit_t *newhits,
8669 bool updateDistances) {
8670 assert(iNode >= 0);
8671 assert(NJ->parent[iNode] < 0);
8672
8673 int iBest;
8674 for(iBest = 0; iBest < nOldHits; iBest++) {
8675 besthit_t *old = &oldhits[iBest];
8676 besthit_t *new = &newhits[iBest];
8677 new->i = iNode;
8678 new->j = ActiveAncestor(/*IN*/NJ, old->j);
8679 new->dist = old->dist; /* may get reset below */
8680 new->weight = old->weight;
8681 new->criterion = old->criterion;
8682
8683 if(new->j < 0 || new->j == iNode) {
8684 new->weight = 0;
8685 new->dist = -1e20;
8686 new->criterion = 1e20;
8687 } else if (new->i != old->i || new->j != old->j) {
8688 if (updateDistances)
8689 SetDistCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/new);
8690 else {
8691 new->dist = -1e20;
8692 new->criterion = 1e20;
8693 }
8694 } else {
8695 if (updateDistances)
8696 SetCriterion(/*IN/UPDATE*/NJ, nActive, /*IN/OUT*/new);
8697 else
8698 new->criterion = 1e20; /* leave dist alone */
8699 }
8700 }
8701 }
8702
HitsToBestHits(hit_t * hits,int nHits,int iNode,besthit_t * newhits)8703 void HitsToBestHits(/*IN*/hit_t *hits, int nHits, int iNode, /*OUT*/besthit_t *newhits) {
8704 int i;
8705 for (i = 0; i < nHits; i++) {
8706 hit_t *hit = &hits[i];
8707 besthit_t *bh = &newhits[i];
8708 bh->i = iNode;
8709 bh->j = hit->j;
8710 bh->dist = hit->dist;
8711 bh->criterion = 1e20;
8712 bh->weight = -1; /* not the true value -- we compute these directly when needed */
8713 }
8714 }
8715
HitToBestHit(int i,hit_t hit)8716 besthit_t HitToBestHit(int i, hit_t hit) {
8717 besthit_t bh;
8718 bh.i = i;
8719 bh.j = hit.j;
8720 bh.dist = hit.dist;
8721 bh.criterion = 1e20;
8722 bh.weight = -1;
8723 return(bh);
8724 }
8725
OpenMPString(void)8726 char *OpenMPString(void) {
8727 #ifdef OPENMP
8728 static char buf[100];
8729 sprintf(buf, ", OpenMP (%d threads)", omp_get_max_threads());
8730 return(buf);
8731 #else
8732 return("");
8733 #endif
8734 }
8735
8736 /* Algorithm 26.2.17 from Abromowitz and Stegun, Handbook of Mathematical Functions
8737 Absolute accuracy of only about 1e-7, which is enough for us
8738 */
pnorm(double x)8739 double pnorm(double x)
8740 {
8741 double b1 = 0.319381530;
8742 double b2 = -0.356563782;
8743 double b3 = 1.781477937;
8744 double b4 = -1.821255978;
8745 double b5 = 1.330274429;
8746 double p = 0.2316419;
8747 double c = 0.39894228;
8748
8749 if(x >= 0.0) {
8750 double t = 1.0 / ( 1.0 + p * x );
8751 return (1.0 - c * exp( -x * x / 2.0 ) * t *
8752 ( t *( t * ( t * ( t * b5 + b4 ) + b3 ) + b2 ) + b1 ));
8753 }
8754 /*else*/
8755 double t = 1.0 / ( 1.0 - p * x );
8756 return ( c * exp( -x * x / 2.0 ) * t *
8757 ( t *( t * ( t * ( t * b5 + b4 ) + b3 ) + b2 ) + b1 ));
8758 }
8759
mymalloc(size_t sz)8760 void *mymalloc(size_t sz) {
8761 if (sz == 0) return(NULL);
8762 void *new = aligned_alloc(16, sz);
8763 if (new == NULL) {
8764 fprintf(stderr, "Out of memory\n");
8765 exit(1);
8766 }
8767 szAllAlloc += sz;
8768 mymallocUsed += sz;
8769 #ifdef TRACK_MEMORY
8770 struct mallinfo mi = mallinfo();
8771 if (mi.arena+mi.hblkhd > maxmallocHeap)
8772 maxmallocHeap = mi.arena+mi.hblkhd;
8773 #endif
8774 /* gcc malloc should always return 16-byte-aligned values... */
8775 assert(IS_ALIGNED(new));
8776 return (new);
8777 }
8778
mymemdup(void * data,size_t sz)8779 void *mymemdup(void *data, size_t sz) {
8780 if(data==NULL) return(NULL);
8781 void *new = mymalloc(sz);
8782 memcpy(/*to*/new, /*from*/data, sz);
8783 return(new);
8784 }
8785
myrealloc(void * data,size_t szOld,size_t szNew,bool bCopy)8786 void *myrealloc(void *data, size_t szOld, size_t szNew, bool bCopy) {
8787 if (data == NULL && szOld == 0)
8788 return(mymalloc(szNew));
8789 if (data == NULL || szOld == 0 || szNew == 0) {
8790 fprintf(stderr,"Empty myrealloc\n");
8791 exit(1);
8792 }
8793 if (szOld == szNew)
8794 return(data);
8795 void *new = NULL;
8796 if (bCopy) {
8797 /* Try to reduce memory fragmentation by allocating anew and copying
8798 Seems to help in practice */
8799 new = mymemdup(data, szNew);
8800 myfree(data, szOld);
8801 } else {
8802 new = realloc(data,szNew);
8803 if (new == NULL) {
8804 fprintf(stderr, "Out of memory\n");
8805 exit(1);
8806 }
8807 assert(IS_ALIGNED(new));
8808 szAllAlloc += (szNew-szOld);
8809 mymallocUsed += (szNew-szOld);
8810 #ifdef TRACK_MEMORY
8811 struct mallinfo mi = mallinfo();
8812 if (mi.arena+mi.hblkhd > maxmallocHeap)
8813 maxmallocHeap = mi.arena+mi.hblkhd;
8814 #endif
8815 }
8816 return(new);
8817 }
8818
myfree(void * p,size_t sz)8819 void *myfree(void *p, size_t sz) {
8820 if(p==NULL) return(NULL);
8821 free(p);
8822 mymallocUsed -= sz;
8823 return(NULL);
8824 }
8825
8826 /******************************************************************************/
8827 /* Minimization of a 1-dimensional function by Brent's method (Numerical Recipes)
8828 * Borrowed from Tree-Puzzle 5.1 util.c under GPL
8829 * Modified by M.N.P to pass in the accessory data for the optimization function,
8830 * to use 2x bounds around the starting guess and expand them if necessary,
8831 * and to use both a fractional and an absolute tolerance
8832 */
8833
8834 #define ITMAX 100
8835 #define CGOLD 0.3819660
8836 #define TINY 1.0e-20
8837 #define ZEPS 1.0e-10
8838 #define SHFT(a,b,c,d) (a)=(b);(b)=(c);(c)=(d);
8839 #define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))
8840
8841 /* Brents method in one dimension */
brent(double ax,double bx,double cx,double (* f)(double,void *),void * data,double ftol,double atol,double * foptx,double * f2optx,double fax,double fbx,double fcx)8842 double brent(double ax, double bx, double cx, double (*f)(double, void *), void *data,
8843 double ftol, double atol,
8844 double *foptx, double *f2optx, double fax, double fbx, double fcx)
8845 {
8846 int iter;
8847 double a,b,d=0,etemp,fu,fv,fw,fx,p,q,r,tol1,tol2,u,v,w,x,xm;
8848 double xw,wv,vx;
8849 double e=0.0;
8850
8851 a=(ax < cx ? ax : cx);
8852 b=(ax > cx ? ax : cx);
8853 x=bx;
8854 fx=fbx;
8855 if (fax < fcx) {
8856 w=ax;
8857 fw=fax;
8858 v=cx;
8859 fv=fcx;
8860 } else {
8861 w=cx;
8862 fw=fcx;
8863 v=ax;
8864 fv=fax;
8865 }
8866 for (iter=1;iter<=ITMAX;iter++) {
8867 xm=0.5*(a+b);
8868 tol1=ftol*fabs(x);
8869 tol2=2.0*(tol1+ZEPS);
8870 if (fabs(x-xm) <= (tol2-0.5*(b-a))
8871 || fabs(a-b) < atol) {
8872 *foptx = fx;
8873 xw = x-w;
8874 wv = w-v;
8875 vx = v-x;
8876 *f2optx = 2.0*(fv*xw + fx*wv + fw*vx)/
8877 (v*v*xw + x*x*wv + w*w*vx);
8878 return x;
8879 }
8880 if (fabs(e) > tol1) {
8881 r=(x-w)*(fx-fv);
8882 q=(x-v)*(fx-fw);
8883 p=(x-v)*q-(x-w)*r;
8884 q=2.0*(q-r);
8885 if (q > 0.0) p = -p;
8886 q=fabs(q);
8887 etemp=e;
8888 e=d;
8889 if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x))
8890 d=CGOLD*(e=(x >= xm ? a-x : b-x));
8891 else {
8892 d=p/q;
8893 u=x+d;
8894 if (u-a < tol2 || b-u < tol2)
8895 d=SIGN(tol1,xm-x);
8896 }
8897 } else {
8898 d=CGOLD*(e=(x >= xm ? a-x : b-x));
8899 }
8900 u=(fabs(d) >= tol1 ? x+d : x+SIGN(tol1,d));
8901 fu=(*f)(u,data);
8902 if (fu <= fx) {
8903 if (u >= x) a=x; else b=x;
8904 SHFT(v,w,x,u)
8905 SHFT(fv,fw,fx,fu)
8906 } else {
8907 if (u < x) a=u; else b=u;
8908 if (fu <= fw || w == x) {
8909 v=w;
8910 w=u;
8911 fv=fw;
8912 fw=fu;
8913 } else if (fu <= fv || v == x || v == w) {
8914 v=u;
8915 fv=fu;
8916 }
8917 }
8918 }
8919 *foptx = fx;
8920 xw = x-w;
8921 wv = w-v;
8922 vx = v-x;
8923 *f2optx = 2.0*(fv*xw + fx*wv + fw*vx)/
8924 (v*v*xw + x*x*wv + w*w*vx);
8925 return x;
8926 } /* brent */
8927 #undef ITMAX
8928 #undef CGOLD
8929 #undef ZEPS
8930 #undef SHFT
8931 #undef SIGN
8932
8933 /* one-dimensional minimization - as input a lower and an upper limit and a trial
8934 value for the minimum is needed: xmin < xguess < xmax
8935 the function and a fractional tolerance has to be specified
8936 onedimenmin returns the optimal x value and the value of the function
8937 and its second derivative at this point
8938 */
onedimenmin(double xmin,double xguess,double xmax,double (* f)(double,void *),void * data,double ftol,double atol,double * fx,double * f2x)8939 double onedimenmin(double xmin, double xguess, double xmax, double (*f)(double,void*), void *data,
8940 double ftol, double atol,
8941 /*OUT*/double *fx, /*OUT*/double *f2x)
8942 {
8943 double optx, ax, bx, cx, fa, fb, fc;
8944
8945 /* first attempt to bracketize minimum */
8946 if (xguess == xmin) {
8947 ax = xmin;
8948 bx = 2.0*xguess;
8949 cx = 10.0*xguess;
8950 } else if (xguess <= 2.0 * xmin) {
8951 ax = xmin;
8952 bx = xguess;
8953 cx = 5.0*xguess;
8954 } else {
8955 ax = 0.5*xguess;
8956 bx = xguess;
8957 cx = 2.0*xguess;
8958 }
8959 if (cx > xmax)
8960 cx = xmax;
8961 if (bx >= cx)
8962 bx = 0.5*(ax+cx);
8963 if (verbose > 4)
8964 fprintf(stderr, "onedimenmin lo %.4f guess %.4f hi %.4f range %.4f %.4f\n",
8965 ax, bx, cx, xmin, xmax);
8966 /* ideally this range includes the true minimum, i.e.,
8967 fb < fa and fb < fc
8968 if not, we gradually expand the boundaries until it does,
8969 or we near the boundary of the allowed range and use that
8970 */
8971 fa = (*f)(ax,data);
8972 fb = (*f)(bx,data);
8973 fc = (*f)(cx,data);
8974 while(fa < fb && ax > xmin) {
8975 ax = (ax+xmin)/2.0;
8976 if (ax < 2.0*xmin) /* give up on shrinking the region */
8977 ax = xmin;
8978 fa = (*f)(ax,data);
8979 }
8980 while(fc < fb && cx < xmax) {
8981 cx = (cx+xmax)/2.0;
8982 if (cx > xmax * 0.95)
8983 cx = xmax;
8984 fc = (*f)(cx,data);
8985 }
8986 optx = brent(ax, bx, cx, f, data, ftol, atol, fx, f2x, fa, fb, fc);
8987
8988 if (verbose > 4)
8989 fprintf(stderr, "onedimenmin reaches optimum f(%.4f) = %.4f f2x %.4f\n", optx, *fx, *f2x);
8990 return optx; /* return optimal x */
8991 } /* onedimenmin */
8992
8993 /* Numerical code for the gamma distribution is modified from the PhyML 3 code
8994 (GNU public license) of Stephane Guindon
8995 */
8996
LnGamma(double alpha)8997 double LnGamma (double alpha)
8998 {
8999 /* returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places.
9000 Stirling's formula is used for the central polynomial part of the procedure.
9001 Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.
9002 Communications of the Association for Computing Machinery, 9:684
9003 */
9004 double x=alpha, f=0, z;
9005 if (x<7) {
9006 f=1; z=x-1;
9007 while (++z<7) f*=z;
9008 x=z; f=-(double)log(f);
9009 }
9010 z = 1/(x*x);
9011 return f + (x-0.5)*(double)log(x) - x + .918938533204673
9012 + (((-.000595238095238*z+.000793650793651)*z-.002777777777778)*z
9013 +.083333333333333)/x;
9014 }
9015
IncompleteGamma(double x,double alpha,double ln_gamma_alpha)9016 double IncompleteGamma(double x, double alpha, double ln_gamma_alpha)
9017 {
9018 /* returns the incomplete gamma ratio I(x,alpha) where x is the upper
9019 limit of the integration and alpha is the shape parameter.
9020 returns (-1) if in error
9021 ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant.
9022 (1) series expansion if (alpha>x || x<=1)
9023 (2) continued fraction otherwise
9024 RATNEST FORTRAN by
9025 Bhattacharjee GP (1970) The incomplete gamma integral. Applied Statistics,
9026 19: 285-287 (AS32)
9027 */
9028 int i;
9029 double p=alpha, g=ln_gamma_alpha;
9030 double accurate=1e-8, overflow=1e30;
9031 double factor, gin=0, rn=0, a=0,b=0,an=0,dif=0, term=0, pn[6];
9032
9033 if (x==0) return (0);
9034 if (x<0 || p<=0) return (-1);
9035
9036 factor=(double)exp(p*(double)log(x)-x-g);
9037 if (x>1 && x>=p) goto l30;
9038 /* (1) series expansion */
9039 gin=1; term=1; rn=p;
9040 l20:
9041 rn++;
9042 term*=x/rn; gin+=term;
9043
9044 if (term > accurate) goto l20;
9045 gin*=factor/p;
9046 goto l50;
9047 l30:
9048 /* (2) continued fraction */
9049 a=1-p; b=a+x+1; term=0;
9050 pn[0]=1; pn[1]=x; pn[2]=x+1; pn[3]=x*b;
9051 gin=pn[2]/pn[3];
9052 l32:
9053 a++; b+=2; term++; an=a*term;
9054 for (i=0; i<2; i++) pn[i+4]=b*pn[i+2]-an*pn[i];
9055 if (pn[5] == 0) goto l35;
9056 rn=pn[4]/pn[5]; dif=fabs(gin-rn);
9057 if (dif>accurate) goto l34;
9058 if (dif<=accurate*rn) goto l42;
9059 l34:
9060 gin=rn;
9061 l35:
9062 for (i=0; i<4; i++) pn[i]=pn[i+2];
9063 if (fabs(pn[4]) < overflow) goto l32;
9064 for (i=0; i<4; i++) pn[i]/=overflow;
9065 goto l32;
9066 l42:
9067 gin=1-factor*gin;
9068
9069 l50:
9070 return (gin);
9071 }
9072
PGamma(double x,double alpha)9073 double PGamma(double x, double alpha)
9074 {
9075 /* scale = 1/alpha */
9076 return IncompleteGamma(x*alpha,alpha,LnGamma(alpha));
9077 }
9078
9079 /* helper function to subtract timval structures */
9080 /* Subtract the `struct timeval' values X and Y,
9081 storing the result in RESULT.
9082 Return 1 if the difference is negative, otherwise 0. */
timeval_subtract(struct timeval * result,struct timeval * x,struct timeval * y)9083 int timeval_subtract (struct timeval *result, struct timeval *x, struct timeval *y)
9084 {
9085 /* Perform the carry for the later subtraction by updating y. */
9086 if (x->tv_usec < y->tv_usec) {
9087 int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
9088 y->tv_usec -= 1000000 * nsec;
9089 y->tv_sec += nsec;
9090 }
9091 if (x->tv_usec - y->tv_usec > 1000000) {
9092 int nsec = (x->tv_usec - y->tv_usec) / 1000000;
9093 y->tv_usec += 1000000 * nsec;
9094 y->tv_sec -= nsec;
9095 }
9096
9097 /* Compute the time remaining to wait.
9098 tv_usec is certainly positive. */
9099 result->tv_sec = x->tv_sec - y->tv_sec;
9100 result->tv_usec = x->tv_usec - y->tv_usec;
9101
9102 /* Return 1 if result is negative. */
9103 return x->tv_sec < y->tv_sec;
9104 }
9105
clockDiff(struct timeval * clock_start)9106 double clockDiff(/*IN*/struct timeval *clock_start) {
9107 struct timeval time_now, elapsed;
9108 gettimeofday(/*OUT*/&time_now,NULL);
9109 timeval_subtract(/*OUT*/&elapsed,/*IN*/&time_now,/*IN*/clock_start);
9110 return(elapsed.tv_sec + elapsed.tv_usec*1e-6);
9111 }
9112
9113
9114 /* The random number generator is taken from D E Knuth
9115 http://www-cs-faculty.stanford.edu/~knuth/taocp.html
9116 */
9117
9118 /* This program by D E Knuth is in the public domain and freely copyable.
9119 * It is explained in Seminumerical Algorithms, 3rd edition, Section 3.6
9120 * (or in the errata to the 2nd edition --- see
9121 * http://www-cs-faculty.stanford.edu/~knuth/taocp.html
9122 * in the changes to Volume 2 on pages 171 and following). */
9123
9124 /* N.B. The MODIFICATIONS introduced in the 9th printing (2002) are
9125 included here; there's no backwards compatibility with the original. */
9126
9127 /* This version also adopts Brendan McKay's suggestion to
9128 accommodate naive users who forget to call ran_start(seed). */
9129
9130 /* If you find any bugs, please report them immediately to
9131 * taocp@cs.stanford.edu
9132 * (and you will be rewarded if the bug is genuine). Thanks! */
9133
9134 /************ see the book for explanations and caveats! *******************/
9135 /************ in particular, you need two's complement arithmetic **********/
9136
9137 #define KK 100 /* the long lag */
9138 #define LL 37 /* the short lag */
9139 #define MM (1L<<30) /* the modulus */
9140 #define mod_diff(x,y) (((x)-(y))&(MM-1)) /* subtraction mod MM */
9141
9142 long ran_x[KK]; /* the generator state */
9143
9144 #ifdef __STDC__
ran_array(long aa[],int n)9145 void ran_array(long aa[],int n)
9146 #else
9147 void ran_array(aa,n) /* put n new random numbers in aa */
9148 long *aa; /* destination */
9149 int n; /* array length (must be at least KK) */
9150 #endif
9151 {
9152 register int i,j;
9153 for (j=0;j<KK;j++) aa[j]=ran_x[j];
9154 for (;j<n;j++) aa[j]=mod_diff(aa[j-KK],aa[j-LL]);
9155 for (i=0;i<LL;i++,j++) ran_x[i]=mod_diff(aa[j-KK],aa[j-LL]);
9156 for (;i<KK;i++,j++) ran_x[i]=mod_diff(aa[j-KK],ran_x[i-LL]);
9157 }
9158
9159 /* the following routines are from exercise 3.6--15 */
9160 /* after calling ran_start, get new randoms by, e.g., "x=ran_arr_next()" */
9161
9162 #define QUALITY 1009 /* recommended quality level for high-res use */
9163 long ran_arr_buf[QUALITY];
9164 long ran_arr_dummy=-1, ran_arr_started=-1;
9165 long *ran_arr_ptr=&ran_arr_dummy; /* the next random number, or -1 */
9166
9167 #define TT 70 /* guaranteed separation between streams */
9168 #define is_odd(x) ((x)&1) /* units bit of x */
9169
9170 #ifdef __STDC__
ran_start(long seed)9171 void ran_start(long seed)
9172 #else
9173 void ran_start(seed) /* do this before using ran_array */
9174 long seed; /* selector for different streams */
9175 #endif
9176 {
9177 register int t,j;
9178 long x[KK+KK-1]; /* the preparation buffer */
9179 register long ss=(seed+2)&(MM-2);
9180 for (j=0;j<KK;j++) {
9181 x[j]=ss; /* bootstrap the buffer */
9182 ss<<=1; if (ss>=MM) ss-=MM-2; /* cyclic shift 29 bits */
9183 }
9184 x[1]++; /* make x[1] (and only x[1]) odd */
9185 for (ss=seed&(MM-1),t=TT-1; t; ) {
9186 for (j=KK-1;j>0;j--) x[j+j]=x[j], x[j+j-1]=0; /* "square" */
9187 for (j=KK+KK-2;j>=KK;j--)
9188 x[j-(KK-LL)]=mod_diff(x[j-(KK-LL)],x[j]),
9189 x[j-KK]=mod_diff(x[j-KK],x[j]);
9190 if (is_odd(ss)) { /* "multiply by z" */
9191 for (j=KK;j>0;j--) x[j]=x[j-1];
9192 x[0]=x[KK]; /* shift the buffer cyclically */
9193 x[LL]=mod_diff(x[LL],x[KK]);
9194 }
9195 if (ss) ss>>=1; else t--;
9196 }
9197 for (j=0;j<LL;j++) ran_x[j+KK-LL]=x[j];
9198 for (;j<KK;j++) ran_x[j-LL]=x[j];
9199 for (j=0;j<10;j++) ran_array(x,KK+KK-1); /* warm things up */
9200 ran_arr_ptr=&ran_arr_started;
9201 }
9202
9203 #define ran_arr_next() (*ran_arr_ptr>=0? *ran_arr_ptr++: ran_arr_cycle())
ran_arr_cycle()9204 long ran_arr_cycle()
9205 {
9206 if (ran_arr_ptr==&ran_arr_dummy)
9207 ran_start(314159L); /* the user forgot to initialize */
9208 ran_array(ran_arr_buf,QUALITY);
9209 ran_arr_buf[KK]=-1;
9210 ran_arr_ptr=ran_arr_buf+1;
9211 return ran_arr_buf[0];
9212 }
9213
9214 /* end of code from Knuth */
9215
knuth_rand()9216 double knuth_rand() {
9217 return(9.31322574615479e-10 * ran_arr_next()); /* multiply by 2**-30 */
9218 }
9219
MakeHashtable(char ** strings,int nStrings)9220 hashstrings_t *MakeHashtable(char **strings, int nStrings) {
9221 hashstrings_t *hash = (hashstrings_t*)mymalloc(sizeof(hashstrings_t));
9222 hash->nBuckets = 8*nStrings;
9223 hash->buckets = (hashbucket_t*)mymalloc(sizeof(hashbucket_t) * hash->nBuckets);
9224 int i;
9225 for (i=0; i < hash->nBuckets; i++) {
9226 hash->buckets[i].string = NULL;
9227 hash->buckets[i].nCount = 0;
9228 hash->buckets[i].first = -1;
9229 }
9230 for (i=0; i < nStrings; i++) {
9231 hashiterator_t hi = FindMatch(hash, strings[i]);
9232 if (hash->buckets[hi].string == NULL) {
9233 /* save a unique entry */
9234 assert(hash->buckets[hi].nCount == 0);
9235 hash->buckets[hi].string = strings[i];
9236 hash->buckets[hi].nCount = 1;
9237 hash->buckets[hi].first = i;
9238 } else {
9239 /* record a duplicate entry */
9240 assert(hash->buckets[hi].string != NULL);
9241 assert(strcmp(hash->buckets[hi].string, strings[i]) == 0);
9242 assert(hash->buckets[hi].first >= 0);
9243 hash->buckets[hi].nCount++;
9244 }
9245 }
9246 return(hash);
9247 }
9248
FreeHashtable(hashstrings_t * hash)9249 hashstrings_t *FreeHashtable(hashstrings_t* hash) {
9250 if (hash != NULL) {
9251 myfree(hash->buckets, sizeof(hashbucket_t) * hash->nBuckets);
9252 myfree(hash, sizeof(hashstrings_t));
9253 }
9254 return(NULL);
9255 }
9256
9257 #define MAXADLER 65521
FindMatch(hashstrings_t * hash,char * string)9258 hashiterator_t FindMatch(hashstrings_t *hash, char *string) {
9259 /* Adler-32 checksum */
9260 unsigned int hashA = 1;
9261 unsigned int hashB = 0;
9262 char *p;
9263 for (p = string; *p != '\0'; p++) {
9264 hashA = ((unsigned int)*p + hashA);
9265 hashB = hashA+hashB;
9266 }
9267 hashA %= MAXADLER;
9268 hashB %= MAXADLER;
9269 hashiterator_t hi = (hashB*65536+hashA) % hash->nBuckets;
9270 while(hash->buckets[hi].string != NULL
9271 && strcmp(hash->buckets[hi].string, string) != 0) {
9272 hi++;
9273 if (hi >= hash->nBuckets)
9274 hi = 0;
9275 }
9276 return(hi);
9277 }
9278
GetHashString(hashstrings_t * hash,hashiterator_t hi)9279 char *GetHashString(hashstrings_t *hash, hashiterator_t hi) {
9280 return(hash->buckets[hi].string);
9281 }
9282
HashCount(hashstrings_t * hash,hashiterator_t hi)9283 int HashCount(hashstrings_t *hash, hashiterator_t hi) {
9284 return(hash->buckets[hi].nCount);
9285 }
9286
HashFirst(hashstrings_t * hash,hashiterator_t hi)9287 int HashFirst(hashstrings_t *hash, hashiterator_t hi) {
9288 return(hash->buckets[hi].first);
9289 }
9290
UniquifyAln(alignment_t * aln)9291 uniquify_t *UniquifyAln(alignment_t *aln) {
9292 int nUniqueSeq = 0;
9293 char **uniqueSeq = (char**)mymalloc(aln->nSeq * sizeof(char*)); /* iUnique -> seq */
9294 int *uniqueFirst = (int*)mymalloc(aln->nSeq * sizeof(int)); /* iUnique -> iFirst in aln */
9295 int *alnNext = (int*)mymalloc(aln->nSeq * sizeof(int)); /* i in aln -> next, or -1 */
9296 int *alnToUniq = (int*)mymalloc(aln->nSeq * sizeof(int)); /* i in aln -> iUnique; many -> -1 */
9297
9298 int i;
9299 for (i = 0; i < aln->nSeq; i++) {
9300 uniqueSeq[i] = NULL;
9301 uniqueFirst[i] = -1;
9302 alnNext[i] = -1;
9303 alnToUniq[i] = -1;
9304 }
9305 hashstrings_t *hashseqs = MakeHashtable(aln->seqs, aln->nSeq);
9306 for (i=0; i<aln->nSeq; i++) {
9307 hashiterator_t hi = FindMatch(hashseqs,aln->seqs[i]);
9308 int first = HashFirst(hashseqs,hi);
9309 if (first == i) {
9310 uniqueSeq[nUniqueSeq] = aln->seqs[i];
9311 uniqueFirst[nUniqueSeq] = i;
9312 alnToUniq[i] = nUniqueSeq;
9313 nUniqueSeq++;
9314 } else {
9315 int last = first;
9316 while (alnNext[last] != -1)
9317 last = alnNext[last];
9318 assert(last>=0);
9319 alnNext[last] = i;
9320 assert(alnToUniq[last] >= 0 && alnToUniq[last] < nUniqueSeq);
9321 alnToUniq[i] = alnToUniq[last];
9322 }
9323 }
9324 assert(nUniqueSeq>0);
9325 hashseqs = FreeHashtable(hashseqs);
9326
9327 uniquify_t *uniquify = (uniquify_t*)mymalloc(sizeof(uniquify_t));
9328 uniquify->nSeq = aln->nSeq;
9329 uniquify->nUnique = nUniqueSeq;
9330 uniquify->uniqueFirst = uniqueFirst;
9331 uniquify->alnNext = alnNext;
9332 uniquify->alnToUniq = alnToUniq;
9333 uniquify->uniqueSeq = uniqueSeq;
9334 return(uniquify);
9335 }
9336
FreeUniquify(uniquify_t * unique)9337 uniquify_t *FreeUniquify(uniquify_t *unique) {
9338 if (unique != NULL) {
9339 myfree(unique->uniqueFirst, sizeof(int)*unique->nSeq);
9340 myfree(unique->alnNext, sizeof(int)*unique->nSeq);
9341 myfree(unique->alnToUniq, sizeof(int)*unique->nSeq);
9342 myfree(unique->uniqueSeq, sizeof(char*)*unique->nSeq);
9343 myfree(unique,sizeof(uniquify_t));
9344 unique = NULL;
9345 }
9346 return(unique);
9347 }
9348
InitTraversal(NJ_t * NJ)9349 traversal_t InitTraversal(NJ_t *NJ) {
9350 traversal_t worked = (bool*)mymalloc(sizeof(bool)*NJ->maxnodes);
9351 int i;
9352 for (i=0; i<NJ->maxnodes; i++)
9353 worked[i] = false;
9354 return(worked);
9355 }
9356
SkipTraversalInto(int node,traversal_t traversal)9357 void SkipTraversalInto(int node, /*IN/OUT*/traversal_t traversal) {
9358 traversal[node] = true;
9359 }
9360
TraversePostorder(int node,NJ_t * NJ,traversal_t traversal,bool * pUp)9361 int TraversePostorder(int node, NJ_t *NJ, /*IN/OUT*/traversal_t traversal,
9362 /*OPTIONAL OUT*/bool *pUp) {
9363 if (pUp)
9364 *pUp = false;
9365 while(1) {
9366 assert(node >= 0);
9367
9368 /* move to a child if possible */
9369 bool found = false;
9370 int iChild;
9371 for (iChild=0; iChild < NJ->child[node].nChild; iChild++) {
9372 int child = NJ->child[node].child[iChild];
9373 if (!traversal[child]) {
9374 node = child;
9375 found = true;
9376 break;
9377 }
9378 }
9379 if (found)
9380 continue; /* keep moving down */
9381 if (!traversal[node]) {
9382 traversal[node] = true;
9383 return(node);
9384 }
9385 /* If we've already done this node, need to move up */
9386 if (node == NJ->root)
9387 return(-1); /* nowhere to go -- done traversing */
9388 node = NJ->parent[node];
9389 /* If we go up to someplace that was already marked as visited, this is due
9390 to a change in topology, so return it marked as "up" */
9391 if (pUp && traversal[node]) {
9392 *pUp = true;
9393 return(node);
9394 }
9395 }
9396 }
9397
FreeTraversal(traversal_t traversal,NJ_t * NJ)9398 traversal_t FreeTraversal(traversal_t traversal, NJ_t *NJ) {
9399 myfree(traversal, sizeof(bool)*NJ->maxnodes);
9400 return(NULL);
9401 }
9402
UpProfiles(NJ_t * NJ)9403 profile_t **UpProfiles(NJ_t *NJ) {
9404 profile_t **upProfiles = (profile_t**)mymalloc(sizeof(profile_t*)*NJ->maxnodes);
9405 int i;
9406 for (i=0; i<NJ->maxnodes; i++) upProfiles[i] = NULL;
9407 return(upProfiles);
9408 }
9409
GetUpProfile(profile_t ** upProfiles,NJ_t * NJ,int outnode,bool useML)9410 profile_t *GetUpProfile(/*IN/OUT*/profile_t **upProfiles, NJ_t *NJ, int outnode, bool useML) {
9411 assert(outnode != NJ->root && outnode >= NJ->nSeq); /* not for root or leaves */
9412 if (upProfiles[outnode] != NULL)
9413 return(upProfiles[outnode]);
9414
9415 int depth;
9416 int *pathToRoot = PathToRoot(NJ, outnode, /*OUT*/&depth);
9417 int i;
9418 /* depth-1 is root */
9419 for (i = depth-2; i>=0; i--) {
9420 int node = pathToRoot[i];
9421
9422 if (upProfiles[node] == NULL) {
9423 /* Note -- SetupABCD may call GetUpProfile, but it should do it farther
9424 up in the path to the root
9425 */
9426 profile_t *profiles[4];
9427 int nodeABCD[4];
9428 SetupABCD(NJ, node, /*OUT*/profiles, /*IN/OUT*/upProfiles, /*OUT*/nodeABCD, useML);
9429 if (useML) {
9430 /* If node is a child of root, then the 4th profile is of the 2nd root-sibling of node
9431 Otherwise, the 4th profile is the up-profile of the parent of node, and that
9432 is the branch-length we need
9433 */
9434 double lenC = NJ->branchlength[nodeABCD[2]];
9435 double lenD = NJ->branchlength[nodeABCD[3]];
9436 if (verbose > 3) {
9437 fprintf(stderr, "Computing UpProfile for node %d with lenC %.4f lenD %.4f pair-loglk %.3f\n",
9438 node, lenC, lenD,
9439 PairLogLk(profiles[2],profiles[3],lenC+lenD,NJ->nPos,NJ->transmat,&NJ->rates, /*site_lk*/NULL));
9440 PrintNJInternal(stderr, NJ, /*useLen*/true);
9441 }
9442 upProfiles[node] = PosteriorProfile(/*C*/profiles[2], /*D*/profiles[3],
9443 lenC, lenD,
9444 NJ->transmat, &NJ->rates, NJ->nPos, NJ->nConstraints);
9445 } else {
9446 profile_t *profilesCDAB[4] = { profiles[2], profiles[3], profiles[0], profiles[1] };
9447 double weight = QuartetWeight(profilesCDAB, NJ->distance_matrix, NJ->nPos);
9448 if (verbose>3)
9449 fprintf(stderr, "Compute upprofile of %d from %d and parents (vs. children %d %d) with weight %.3f\n",
9450 node, nodeABCD[2], nodeABCD[0], nodeABCD[1], weight);
9451 upProfiles[node] = AverageProfile(profiles[2], profiles[3],
9452 NJ->nPos, NJ->nConstraints,
9453 NJ->distance_matrix,
9454 weight);
9455 }
9456 }
9457 }
9458 FreePath(pathToRoot,NJ);
9459 assert(upProfiles[outnode] != NULL);
9460 return(upProfiles[outnode]);
9461 }
9462
DeleteUpProfile(profile_t ** upProfiles,NJ_t * NJ,int node)9463 profile_t *DeleteUpProfile(/*IN/OUT*/profile_t **upProfiles, NJ_t *NJ, int node) {
9464 assert(node>=0 && node < NJ->maxnodes);
9465 if (upProfiles[node] != NULL)
9466 upProfiles[node] = FreeProfile(upProfiles[node], NJ->nPos, NJ->nConstraints); /* returns NULL */
9467 return(NULL);
9468 }
9469
FreeUpProfiles(profile_t ** upProfiles,NJ_t * NJ)9470 profile_t **FreeUpProfiles(profile_t **upProfiles, NJ_t *NJ) {
9471 int i;
9472 int nUsed = 0;
9473 for (i=0; i < NJ->maxnodes; i++) {
9474 if (upProfiles[i] != NULL)
9475 nUsed++;
9476 DeleteUpProfile(upProfiles, NJ, i);
9477 }
9478 myfree(upProfiles, sizeof(profile_t*)*NJ->maxnodes);
9479 if (verbose >= 3)
9480 fprintf(stderr,"FreeUpProfiles -- freed %d\n", nUsed);
9481 return(NULL);
9482 }
9483
PathToRoot(NJ_t * NJ,int node,int * outDepth)9484 int *PathToRoot(NJ_t *NJ, int node, /*OUT*/int *outDepth) {
9485 int *pathToRoot = (int*)mymalloc(sizeof(int)*NJ->maxnodes);
9486 int depth = 0;
9487 int ancestor = node;
9488 while(ancestor >= 0) {
9489 pathToRoot[depth] = ancestor;
9490 ancestor = NJ->parent[ancestor];
9491 depth++;
9492 }
9493 *outDepth = depth;
9494 return(pathToRoot);
9495 }
9496
FreePath(int * path,NJ_t * NJ)9497 int *FreePath(int *path, NJ_t *NJ) {
9498 myfree(path, sizeof(int)*NJ->maxnodes);
9499 return(NULL);
9500 }
9501
CreateGTR(double * r,double * f)9502 transition_matrix_t *CreateGTR(double *r/*ac ag at cg ct gt*/, double *f/*acgt*/) {
9503 double matrix[4][MAXCODES];
9504 assert(nCodes==4);
9505 int i, j;
9506 /* Place rates onto a symmetric matrix, but correct by f(target), so that
9507 stationary distribution f[] is maintained
9508 Leave diagonals as 0 (CreateTransitionMatrix will fix them)
9509 */
9510 int imat = 0;
9511 for (i = 0; i < nCodes; i++) {
9512 matrix[i][i] = 0;
9513 for (j = i+1; j < nCodes; j++) {
9514 double rate = r[imat++];
9515 assert(rate > 0);
9516 /* Want t(matrix) * f to be 0 */
9517 matrix[i][j] = rate * f[i];
9518 matrix[j][i] = rate * f[j];
9519 }
9520 }
9521 /* Compute average mutation rate */
9522 double total_rate = 0;
9523 for (i = 0; i < nCodes; i++)
9524 for (j = 0; j < nCodes; j++)
9525 total_rate += f[i] * matrix[i][j];
9526 assert(total_rate > 1e-6);
9527 double inv = 1.0/total_rate;
9528 for (i = 0; i < nCodes; i++)
9529 for (j = 0; j < nCodes; j++)
9530 matrix[i][j] *= inv;
9531 return(CreateTransitionMatrix(matrix,f));
9532 }
9533
CreateTransitionMatrix(double matrix[MAXCODES][MAXCODES],double stat[MAXCODES])9534 transition_matrix_t *CreateTransitionMatrix(/*IN*/double matrix[MAXCODES][MAXCODES],
9535 /*IN*/double stat[MAXCODES]) {
9536 int i,j,k;
9537 transition_matrix_t *transmat = mymalloc(sizeof(transition_matrix_t));
9538 double sqrtstat[20];
9539 for (i = 0; i < nCodes; i++) {
9540 transmat->stat[i] = stat[i];
9541 transmat->statinv[i] = 1.0/stat[i];
9542 sqrtstat[i] = sqrt(stat[i]);
9543 }
9544
9545 double sym[20*20]; /* symmetrized matrix M' */
9546 /* set diagonals so columns sums are 0 before symmetrization */
9547 for (i = 0; i < nCodes; i++)
9548 for (j = 0; j < nCodes; j++)
9549 sym[nCodes*i+j] = matrix[i][j];
9550 for (j = 0; j < nCodes; j++) {
9551 double sum = 0;
9552 sym[nCodes*j+j] = 0;
9553 for (i = 0; i < nCodes; i++)
9554 sum += sym[nCodes*i+j];
9555 sym[nCodes*j+j] = -sum;
9556 }
9557 /* M' = S**-1 M S */
9558 for (i = 0; i < nCodes; i++)
9559 for (j = 0; j < nCodes; j++)
9560 sym[nCodes*i+j] *= sqrtstat[j]/sqrtstat[i];
9561
9562 /* eigen decomposition of M' -- note that eigenW is the transpose of what we want,
9563 which is eigenvectors in columns */
9564 double eigenW[20*20], eval[20], e[20];
9565 for (i = 0; i < nCodes*nCodes; i++)
9566 eigenW[i] = sym[i];
9567 tred2(eigenW, nCodes, nCodes, eval, e);
9568 tqli(eval, e, nCodes , nCodes, eigenW);
9569
9570 /* save eigenvalues */
9571 for (i = 0; i < nCodes; i++)
9572 transmat->eigenval[i] = eval[i];
9573
9574 /* compute eigen decomposition of M into t(codeFreq): V = S*W */
9575 /* compute inverse of V in eigeninv: V**-1 = t(W) S**-1 */
9576 for (i = 0; i < nCodes; i++) {
9577 for (j = 0; j < nCodes; j++) {
9578 transmat->eigeninv[i][j] = eigenW[nCodes*i+j] / sqrtstat[j];
9579 transmat->eigeninvT[j][i] = transmat->eigeninv[i][j];
9580 }
9581 }
9582 for (i = 0; i < nCodes; i++)
9583 for (j = 0; j < nCodes; j++)
9584 transmat->codeFreq[i][j] = eigenW[j*nCodes+i] * sqrtstat[i];
9585 /* codeFreq[NOCODE] is the rotation of (1,1,...) not (1/nCodes,1/nCodes,...), which
9586 gives correct posterior probabilities
9587 */
9588 for (j = 0; j < nCodes; j++) {
9589 transmat->codeFreq[NOCODE][j] = 0.0;
9590 for (i = 0; i < nCodes; i++)
9591 transmat->codeFreq[NOCODE][j] += transmat->codeFreq[i][j];
9592 }
9593 /* save some posterior probabilities for approximating later:
9594 first, we compute P(B | A, t) for t = approxMLnearT, by using
9595 V * exp(L*t) * V**-1 */
9596 double expvalues[MAXCODES];
9597 for (i = 0; i < nCodes; i++)
9598 expvalues[i] = exp(approxMLnearT * transmat->eigenval[i]);
9599 double LVinv[MAXCODES][MAXCODES]; /* exp(L*t) * V**-1 */
9600 for (i = 0; i < nCodes; i++) {
9601 for (j = 0; j < nCodes; j++)
9602 LVinv[i][j] = transmat->eigeninv[i][j] * expvalues[i];
9603 }
9604 /* matrix transform for converting A -> B given t: transt[i][j] = P(j->i | t) */
9605 double transt[MAXCODES][MAXCODES];
9606 for (i = 0; i < nCodes; i++) {
9607 for (j = 0; j < nCodes; j++) {
9608 transt[i][j] = 0;
9609 for (k = 0; k < nCodes; k++)
9610 transt[i][j] += transmat->codeFreq[i][k] * LVinv[k][j];
9611 }
9612 }
9613 /* nearP[i][j] = P(parent = j | both children are i) = P(j | i,i) ~ stat(j) * P(j->i | t)**2 */
9614 for (i = 0; i < nCodes; i++) {
9615 double nearP[MAXCODES];
9616 double tot = 0;
9617 for (j = 0; j < nCodes; j++) {
9618 assert(transt[j][i] > 0);
9619 assert(transmat->stat[j] > 0);
9620 nearP[j] = transmat->stat[j] * transt[i][j] * transt[i][j];
9621 tot += nearP[j];
9622 }
9623 assert(tot > 0);
9624 for (j = 0; j < nCodes; j++)
9625 nearP[j] *= 1.0/tot;
9626 /* save nearP in transmat->nearP[i][] */
9627 for (j = 0; j < nCodes; j++)
9628 transmat->nearP[i][j] = nearP[j];
9629 /* multiply by 1/stat and rotate nearP */
9630 for (j = 0; j < nCodes; j++)
9631 nearP[j] /= transmat->stat[j];
9632 for (j = 0; j < nCodes; j++) {
9633 double rot = 0;
9634 for (k = 0; k < nCodes; k++)
9635 rot += nearP[k] * transmat->codeFreq[i][j];
9636 transmat->nearFreq[i][j] = rot;
9637 }
9638 }
9639 return(transmat);
9640 assert(0);
9641 }
9642
TransMatToDistanceMat(transition_matrix_t * transmat)9643 distance_matrix_t *TransMatToDistanceMat(transition_matrix_t *transmat) {
9644 if (transmat == NULL)
9645 return(NULL);
9646 distance_matrix_t *dmat = mymalloc(sizeof(distance_matrix_t));
9647 int i, j;
9648 for (i=0; i<nCodes; i++) {
9649 for (j=0; j<nCodes; j++) {
9650 dmat->distances[i][j] = 0; /* never actually used */
9651 dmat->eigeninv[i][j] = transmat->eigeninv[i][j];
9652 dmat->codeFreq[i][j] = transmat->codeFreq[i][j];
9653 }
9654 }
9655 /* eigentot . rotated-vector is the total frequency of the unrotated vector
9656 (used to normalize in NormalizeFreq()
9657 For transition matrices, we rotate by transpose of eigenvectors, so
9658 we need to multiply by the inverse matrix by 1....1 to get this vector,
9659 or in other words, sum the columns
9660 */
9661 for(i = 0; i<nCodes; i++) {
9662 dmat->eigentot[i] = 0.0;
9663 for (j = 0; j<nCodes; j++)
9664 dmat->eigentot[i] += transmat->eigeninv[i][j];
9665 }
9666 return(dmat);
9667 }
9668
9669 /* Numerical recipes code for eigen decomposition (actually taken from RAxML rev_functions.c) */
tred2(double * a,const int n,const int np,double * d,double * e)9670 void tred2 (double *a, const int n, const int np, double *d, double *e)
9671 {
9672 #define a(i,j) a[(j-1)*np + (i-1)]
9673 #define e(i) e[i-1]
9674 #define d(i) d[i-1]
9675 int i, j, k, l;
9676 double f, g, h, hh, scale;
9677 for (i = n; i > 1; i--) {
9678 l = i-1;
9679 h = 0;
9680 scale = 0;
9681 if ( l > 1 ) {
9682 for ( k = 1; k <= l; k++ )
9683 scale += fabs(a(i,k));
9684 if (scale == 0)
9685 e(i) = a(i,l);
9686 else {
9687 for (k = 1; k <= l; k++) {
9688 a(i,k) /= scale;
9689 h += a(i,k) * a(i,k);
9690 }
9691 f = a(i,l);
9692 g = -sqrt(h);
9693 if (f < 0) g = -g;
9694 e(i) = scale *g;
9695 h -= f*g;
9696 a(i,l) = f-g;
9697 f = 0;
9698 for (j = 1; j <=l ; j++) {
9699 a(j,i) = a(i,j) / h;
9700 g = 0;
9701 for (k = 1; k <= j; k++)
9702 g += a(j,k)*a(i,k);
9703 for (k = j+1; k <= l; k++)
9704 g += a(k,j)*a(i,k);
9705 e(j) = g/h;
9706 f += e(j)*a(i,j);
9707 }
9708 hh = f/(h+h);
9709 for (j = 1; j <= l; j++) {
9710 f = a(i,j);
9711 g = e(j) - hh * f;
9712 e(j) = g;
9713 for (k = 1; k <= j; k++)
9714 a(j,k) -= f*e(k) + g*a(i,k);
9715 }
9716 }
9717 } else
9718 e(i) = a(i,l);
9719 d(i) = h;
9720 }
9721 d(1) = 0;
9722 e(1) = 0;
9723 for (i = 1; i <= n; i++) {
9724 l = i-1;
9725 if (d(i) != 0) {
9726 for (j = 1; j <=l; j++) {
9727 g = 0;
9728 for (k = 1; k <= l; k++)
9729 g += a(i,k)*a(k,j);
9730 for (k=1; k <=l; k++)
9731 a(k,j) -= g * a(k,i);
9732 }
9733 }
9734 d(i) = a(i,i);
9735 a(i,i) = 1;
9736 for (j=1; j<=l; j++)
9737 a(i,j) = a(j,i) = 0;
9738 }
9739
9740 return;
9741 #undef a
9742 #undef e
9743 #undef d
9744 }
9745
pythag(double a,double b)9746 double pythag(double a, double b) {
9747 double absa = fabs(a), absb = fabs(b);
9748 return (absa > absb) ?
9749 absa * sqrt(1+ (absb/absa)*(absb/absa)) :
9750 absb == 0 ?
9751 0 :
9752 absb * sqrt(1+ (absa/absb)*(absa/absb));
9753 }
9754
tqli(double * d,double * e,int n,int np,double * z)9755 void tqli(double *d, double *e, int n, int np, double *z)
9756 {
9757 #define z(i,j) z[(j-1)*np + (i-1)]
9758 #define e(i) e[i-1]
9759 #define d(i) d[i-1]
9760
9761 int i = 0, iter = 0, k = 0, l = 0, m = 0;
9762 double b = 0, c = 0, dd = 0, f = 0, g = 0, p = 0, r = 0, s = 0;
9763
9764 for(i=2; i<=n; i++)
9765 e(i-1) = e(i);
9766 e(n) = 0;
9767
9768 for (l = 1; l <= n; l++)
9769 {
9770 iter = 0;
9771 labelExtra:
9772
9773 for (m = l; (m < n); m++)
9774 {
9775 dd = fabs(d(m))+fabs(d(m+1));
9776
9777 if (fabs(e(m))+dd == dd)
9778 break;
9779 }
9780
9781 if (m != l)
9782 {
9783 assert(iter < 30);
9784
9785 iter++;
9786 g = (d(l+1)-d(l))/(2*e(l));
9787 r = pythag(g,1.);
9788 g = d(m)-d(l)+e(l)/(g+(g<0?-r:r));
9789 s = 1;
9790 c = 1;
9791 p = 0;
9792
9793 for (i = m-1; i>=l; i--)
9794 {
9795 f = s*e(i);
9796 b = c*e(i);
9797 r = pythag(f,g);
9798
9799 e(i+1) = r;
9800 if (r == 0)
9801 {
9802 d (i+1) -= p;
9803 e (m) = 0;
9804
9805 goto labelExtra;
9806 }
9807 s = f/r;
9808 c = g/r;
9809 g = d(i+1)-p;
9810 r = (d(i)-g)*s + 2*c*b;
9811 p = s*r;
9812 d(i+1) = g + p;
9813 g = c*r - b;
9814 for (k=1; k <= n; k++)
9815 {
9816 f = z(k,i+1);
9817 z(k,i+1) = s * z(k,i) + c*f;
9818 z(k,i) = c * z(k,i) - s*f;
9819 }
9820 }
9821 d(l) -= p;
9822 e(l) = g;
9823 e(m) = 0;
9824
9825 goto labelExtra;
9826 }
9827 }
9828
9829 return;
9830 #undef z
9831 #undef e
9832 #undef d
9833
9834 }
9835
9836 #ifdef USE_SSE3
mm_sum(register __m128 sum)9837 inline float mm_sum(register __m128 sum) {
9838 #if 1
9839 /* stupider but faster */
9840 float f[4] ALIGNED;
9841 _mm_store_ps(f,sum);
9842 return(f[0]+f[1]+f[2]+f[3]);
9843 #else
9844 /* first we get sum[0]+sum[1], sum[2]+sum[3] by selecting 0/1 and 2/3 */
9845 sum = _mm_add_ps(sum,_mm_shuffle_ps(sum,sum,_MM_SHUFFLE(0,1,2,3)));
9846 /* then get sum[0]+sum[1]+sum[2]+sum[3] by selecting 0/1 and 0/1 */
9847 sum = _mm_add_ps(sum,_mm_shuffle_ps(sum,sum,_MM_SHUFFLE(0,1,0,1)));
9848 float f;
9849 _mm_store_ss(&f, sum); /* save the lowest word */
9850 return(f);
9851 #endif
9852 }
9853 #endif
9854
vector_multiply(numeric_t * f1,numeric_t * f2,int n,numeric_t * fOut)9855 void vector_multiply(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, int n, /*OUT*/numeric_t *fOut) {
9856 #ifdef USE_SSE3
9857 int i;
9858 for (i = 0; i < n; i += 4) {
9859 __m128 a, b, c;
9860 a = _mm_load_ps(f1+i);
9861 b = _mm_load_ps(f2+i);
9862 c = _mm_mul_ps(a, b);
9863 _mm_store_ps(fOut+i,c);
9864 }
9865 #else
9866 int i;
9867 for (i = 0; i < n; i++)
9868 fOut[i] = f1[i]*f2[i];
9869 #endif
9870 }
9871
vector_multiply_sum(numeric_t * f1,numeric_t * f2,int n)9872 numeric_t vector_multiply_sum(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, int n) {
9873 #ifdef USE_SSE3
9874 if (n == 4)
9875 return(f1[0]*f2[0]+f1[1]*f2[1]+f1[2]*f2[2]+f1[3]*f2[3]);
9876 __m128 sum = _mm_setzero_ps();
9877 int i;
9878 for (i = 0; i < n; i += 4) {
9879 __m128 a, b, c;
9880 a = _mm_load_ps(f1+i);
9881 b = _mm_load_ps(f2+i);
9882 c = _mm_mul_ps(a, b);
9883 sum = _mm_add_ps(c, sum);
9884 }
9885 return(mm_sum(sum));
9886 #else
9887 int i;
9888 numeric_t out = 0.0;
9889 for (i=0; i < n; i++)
9890 out += f1[i]*f2[i];
9891 return(out);
9892 #endif
9893 }
9894
9895 /* sum(f1*f2*f3) */
vector_multiply3_sum(numeric_t * f1,numeric_t * f2,numeric_t * f3,int n)9896 numeric_t vector_multiply3_sum(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, /*IN*/numeric_t* f3, int n) {
9897 #ifdef USE_SSE3
9898 __m128 sum = _mm_setzero_ps();
9899 int i;
9900 for (i = 0; i < n; i += 4) {
9901 __m128 a1, a2, a3;
9902 a1 = _mm_load_ps(f1+i);
9903 a2 = _mm_load_ps(f2+i);
9904 a3 = _mm_load_ps(f3+i);
9905 sum = _mm_add_ps(_mm_mul_ps(_mm_mul_ps(a1,a2),a3),sum);
9906 }
9907 return(mm_sum(sum));
9908 #else
9909 int i;
9910 numeric_t sum = 0.0;
9911 for (i = 0; i < n; i++)
9912 sum += f1[i]*f2[i]*f3[i];
9913 return(sum);
9914 #endif
9915 }
9916
vector_dot_product_rot(numeric_t * f1,numeric_t * f2,numeric_t * fBy,int n)9917 numeric_t vector_dot_product_rot(/*IN*/numeric_t *f1, /*IN*/numeric_t *f2, /*IN*/numeric_t *fBy, int n) {
9918 #ifdef USE_SSE3
9919 __m128 sum1 = _mm_setzero_ps();
9920 __m128 sum2 = _mm_setzero_ps();
9921 int i;
9922 for (i = 0; i < n; i += 4) {
9923 __m128 a1, a2, aBy;
9924 a1 = _mm_load_ps(f1+i);
9925 a2 = _mm_load_ps(f2+i);
9926 aBy = _mm_load_ps(fBy+i);
9927 sum1 = _mm_add_ps(_mm_mul_ps(a1, aBy), sum1);
9928 sum2 = _mm_add_ps(_mm_mul_ps(a2, aBy), sum2);
9929 }
9930 return(mm_sum(sum1)*mm_sum(sum2));
9931 #else
9932 int i;
9933 numeric_t out1 = 0.0;
9934 numeric_t out2 = 0.0;
9935 for (i=0; i < n; i++) {
9936 out1 += f1[i]*fBy[i];
9937 out2 += f2[i]*fBy[i];
9938 }
9939 return(out1*out2);
9940 #endif
9941 }
9942
vector_sum(numeric_t * f1,int n)9943 numeric_t vector_sum(/*IN*/numeric_t *f1, int n) {
9944 #ifdef USE_SSE3
9945 if (n==4)
9946 return(f1[0]+f1[1]+f1[2]+f1[3]);
9947 __m128 sum = _mm_setzero_ps();
9948 int i;
9949 for (i = 0; i < n; i+=4) {
9950 __m128 a;
9951 a = _mm_load_ps(f1+i);
9952 sum = _mm_add_ps(a, sum);
9953 }
9954 return(mm_sum(sum));
9955 #else
9956 numeric_t out = 0.0;
9957 int i;
9958 for (i = 0; i < n; i++)
9959 out += f1[i];
9960 return(out);
9961 #endif
9962 }
9963
vector_multiply_by(numeric_t * f,numeric_t fBy,int n)9964 void vector_multiply_by(/*IN/OUT*/numeric_t *f, /*IN*/numeric_t fBy, int n) {
9965 int i;
9966 #ifdef USE_SSE3
9967 __m128 c = _mm_set1_ps(fBy);
9968 for (i = 0; i < n; i += 4) {
9969 __m128 a, b;
9970 a = _mm_load_ps(f+i);
9971 b = _mm_mul_ps(a,c);
9972 _mm_store_ps(f+i,b);
9973 }
9974 #else
9975 for (i = 0; i < n; i++)
9976 f[i] *= fBy;
9977 #endif
9978 }
9979
vector_add_mult(numeric_t * fTot,numeric_t * fAdd,numeric_t weight,int n)9980 void vector_add_mult(/*IN/OUT*/numeric_t *fTot, /*IN*/numeric_t *fAdd, numeric_t weight, int n) {
9981 #ifdef USE_SSE3
9982 int i;
9983 __m128 w = _mm_set1_ps(weight);
9984 for (i = 0; i < n; i += 4) {
9985 __m128 tot, add;
9986 tot = _mm_load_ps(fTot+i);
9987 add = _mm_load_ps(fAdd+i);
9988 _mm_store_ps(fTot+i, _mm_add_ps(tot, _mm_mul_ps(add,w)));
9989 }
9990 #else
9991 int i;
9992 for (i = 0; i < n; i++)
9993 fTot[i] += fAdd[i] * weight;
9994 #endif
9995 }
9996
matrixt_by_vector4(numeric_t mat[4][MAXCODES],numeric_t vec[4],numeric_t out[4])9997 void matrixt_by_vector4(/*IN*/numeric_t mat[4][MAXCODES], /*IN*/numeric_t vec[4], /*OUT*/numeric_t out[4]) {
9998 #ifdef USE_SSE3
9999 /*__m128 v = _mm_load_ps(vec);*/
10000 __m128 o = _mm_setzero_ps();
10001 int j;
10002 /* result is a sum of vectors: sum(k) v[k] * mat[k][] */
10003 for (j = 0; j < 4; j++) {
10004 __m128 m = _mm_load_ps(&mat[j][0]);
10005 __m128 vj = _mm_load1_ps(&vec[j]); /* is it faster to shuffle v? */
10006 o = _mm_add_ps(o, _mm_mul_ps(vj,m));
10007 }
10008 _mm_store_ps(out, o);
10009 #else
10010 int j,k;
10011 for (j = 0; j < 4; j++) {
10012 double sum = 0;
10013 for (k = 0; k < 4; k++)
10014 sum += vec[k] * mat[k][j];
10015 out[j] = sum;
10016 }
10017 #endif
10018 }
10019
10020 distance_matrix_t matrixBLOSUM45 =
10021 {
10022 /*distances*/
10023 {
10024 {0, 1.31097856157468, 1.06573001937323, 1.2682782988532, 0.90471293383305, 1.05855446876905, 1.05232790675508, 0.769574440593014, 1.27579668305679, 0.964604099952603, 0.987178199640556, 1.05007594438157, 1.05464162250736, 1.1985987403937, 0.967404475245526, 0.700490199584332, 0.880060189098976, 1.09748548316685, 1.28141710375267, 0.800038509951648},
10025 {1.31097856157468, 0, 0.8010890222701, 0.953340718498495, 1.36011107208122, 0.631543775840481, 0.791014908659279, 1.15694899265629, 0.761152570032029, 1.45014917711188, 1.17792001455227, 0.394661075648738, 0.998807558909651, 1.135143404599, 1.15432562628921, 1.05309036790541, 1.05010474413616, 1.03938321130789, 0.963216908696184, 1.20274751778601},
10026 {1.06573001937323, 0.8010890222701, 0, 0.488217214273568, 1.10567116937273, 0.814970207038261, 0.810176440932339, 0.746487413974582, 0.61876156253224, 1.17886558630004, 1.52003670190022, 0.808442678243754, 1.2889025816028, 1.16264109995678, 1.18228799147301, 0.679475681649858, 0.853658619686283, 1.68988558988005, 1.24297493464833, 1.55207513886163},
10027 {1.2682782988532, 0.953340718498495, 0.488217214273568, 0, 1.31581050011876, 0.769778474953791, 0.482077627352988, 0.888361752320536, 0.736360849050364, 1.76756333403346, 1.43574761894039, 0.763612910719347, 1.53386612356483, 1.74323672079854, 0.886347403928663, 0.808614044804528, 1.01590147813779, 1.59617804551619, 1.1740494822217, 1.46600946033173},
10028 {0.90471293383305, 1.36011107208122, 1.10567116937273, 1.31581050011876, 0, 1.3836789310481, 1.37553994252576, 1.26740695314856, 1.32361065635259, 1.26087264215993, 1.02417540515351, 1.37259631233791, 1.09416720447891, 0.986982088723923, 1.59321190226694, 0.915638787768407, 0.913042853922533, 1.80744143643002, 1.3294417177004, 0.830022143283238},
10029 {1.05855446876905, 0.631543775840481, 0.814970207038261, 0.769778474953791, 1.3836789310481, 0, 0.506942797642807, 1.17699648087288, 0.614595446514896, 1.17092829494457, 1.19833088638994, 0.637341078675405, 0.806490842729072, 1.83315144709714, 0.932064479113502, 0.850321696813199, 1.06830084665916, 1.05739353225849, 0.979907428113788, 1.5416250309563},
10030 {1.05232790675508, 0.791014908659279, 0.810176440932339, 0.482077627352988, 1.37553994252576, 0.506942797642807, 0, 1.17007322676118, 0.769786956320484, 1.46659942462342, 1.19128214039009, 0.633592151371708, 1.27269395724349, 1.44641491621774, 0.735428579892476, 0.845319988414402, 1.06201695511881, 1.324395996498, 1.22734387448031, 1.53255698189437},
10031 {0.769574440593014, 1.15694899265629, 0.746487413974582, 0.888361752320536, 1.26740695314856, 1.17699648087288, 1.17007322676118, 0, 1.1259007054424, 1.7025415585924, 1.38293205218175, 1.16756929156758, 1.17264582493965, 1.33271035269688, 1.07564768421292, 0.778868281341681, 1.23287107008366, 0.968539655354582, 1.42479529031801, 1.41208067821187},
10032 {1.27579668305679, 0.761152570032029, 0.61876156253224, 0.736360849050364, 1.32361065635259, 0.614595446514896, 0.769786956320484, 1.1259007054424, 0, 1.4112324673522, 1.14630894167097, 0.967795284542623, 0.771479459384692, 1.10468029976148, 1.12334774065132, 1.02482926701639, 1.28754326478771, 1.27439749294131, 0.468683841672724, 1.47469999960758},
10033 {0.964604099952603, 1.45014917711188, 1.17886558630004, 1.76756333403346, 1.26087264215993, 1.17092829494457, 1.46659942462342, 1.7025415585924, 1.4112324673522, 0, 0.433350517223017, 1.463460928818, 0.462965544381851, 0.66291968000662, 1.07010201755441, 1.23000200130049, 0.973485453109068, 0.963546200571036, 0.708724769805536, 0.351200119909572},
10034 {0.987178199640556, 1.17792001455227, 1.52003670190022, 1.43574761894039, 1.02417540515351, 1.19833088638994, 1.19128214039009, 1.38293205218175, 1.14630894167097, 0.433350517223017, 0, 1.49770950074319, 0.473800072611076, 0.538473125003292, 1.37979627224964, 1.5859723170438, 0.996267398224516, 0.986095542821092, 0.725310666139274, 0.570542199221932},
10035 {1.05007594438157, 0.394661075648738, 0.808442678243754, 0.763612910719347, 1.37259631233791, 0.637341078675405, 0.633592151371708, 1.16756929156758, 0.967795284542623, 1.463460928818, 1.49770950074319, 0, 1.0079761868248, 1.44331961488922, 0.924599080166146, 1.06275728888356, 1.05974425835993, 1.04892430642749, 0.972058829603409, 1.21378822764856},
10036 {1.05464162250736, 0.998807558909651, 1.2889025816028, 1.53386612356483, 1.09416720447891, 0.806490842729072, 1.27269395724349, 1.17264582493965, 0.771479459384692, 0.462965544381851, 0.473800072611076, 1.0079761868248, 0, 0.72479754849538, 1.1699868662153, 1.34481214251794, 1.06435197383538, 1.05348497728858, 0.774878150710318, 0.609532859331199},
10037 {1.1985987403937, 1.135143404599, 1.16264109995678, 1.74323672079854, 0.986982088723923, 1.83315144709714, 1.44641491621774, 1.33271035269688, 1.10468029976148, 0.66291968000662, 0.538473125003292, 1.44331961488922, 0.72479754849538, 0, 1.32968844979665, 1.21307373491949, 0.960087571600877, 0.475142555482979, 0.349485367759138, 0.692733248746636},
10038 {0.967404475245526, 1.15432562628921, 1.18228799147301, 0.886347403928663, 1.59321190226694, 0.932064479113502, 0.735428579892476, 1.07564768421292, 1.12334774065132, 1.07010201755441, 1.37979627224964, 0.924599080166146, 1.1699868662153, 1.32968844979665, 0, 0.979087429691819, 0.97631161216338, 1.21751652292503, 1.42156458605332, 1.40887880416009},
10039 {0.700490199584332, 1.05309036790541, 0.679475681649858, 0.808614044804528, 0.915638787768407, 0.850321696813199, 0.845319988414402, 0.778868281341681, 1.02482926701639, 1.23000200130049, 1.5859723170438, 1.06275728888356, 1.34481214251794, 1.21307373491949, 0.979087429691819, 0, 0.56109848274013, 1.76318885009194, 1.29689226231656, 1.02015839286433},
10040 {0.880060189098976, 1.05010474413616, 0.853658619686283, 1.01590147813779, 0.913042853922533, 1.06830084665916, 1.06201695511881, 1.23287107008366, 1.28754326478771, 0.973485453109068, 0.996267398224516, 1.05974425835993, 1.06435197383538, 0.960087571600877, 0.97631161216338, 0.56109848274013, 0, 1.39547634461879, 1.02642577026706, 0.807404666228614},
10041 {1.09748548316685, 1.03938321130789, 1.68988558988005, 1.59617804551619, 1.80744143643002, 1.05739353225849, 1.324395996498, 0.968539655354582, 1.27439749294131, 0.963546200571036, 0.986095542821092, 1.04892430642749, 1.05348497728858, 0.475142555482979, 1.21751652292503, 1.76318885009194, 1.39547634461879, 0, 0.320002937404137, 1.268589159299},
10042 {1.28141710375267, 0.963216908696184, 1.24297493464833, 1.1740494822217, 1.3294417177004, 0.979907428113788, 1.22734387448031, 1.42479529031801, 0.468683841672724, 0.708724769805536, 0.725310666139274, 0.972058829603409, 0.774878150710318, 0.349485367759138, 1.42156458605332, 1.29689226231656, 1.02642577026706, 0.320002937404137, 0, 0.933095433689795},
10043 {0.800038509951648, 1.20274751778601, 1.55207513886163, 1.46600946033173, 0.830022143283238, 1.5416250309563, 1.53255698189437, 1.41208067821187, 1.47469999960758, 0.351200119909572, 0.570542199221932, 1.21378822764856, 0.609532859331199, 0.692733248746636, 1.40887880416009, 1.02015839286433, 0.807404666228614, 1.268589159299, 0.933095433689795, 0}
10044 },
10045 /*eigeninv*/
10046 {
10047 {-0.216311217101265, -0.215171653035930, -0.217000020881064, -0.232890860601250, -0.25403526530177, -0.211569372858927, -0.218073620637049, -0.240585637190076, -0.214507049619293, -0.228476323330312, -0.223235445346107, -0.216116483840334, -0.206903836810903, -0.223553828183343, -0.236937609127783, -0.217652789023588, -0.211982652566286, -0.245995223308316, -0.206187718714279, -0.227670670439422},
10048 {-0.0843931919568687, -0.0342164464991033, 0.393702284928246, -0.166018266253027, 0.0500896782860136, -0.262731388032538, 0.030139964190519, -0.253997503551094, -0.0932603349591988, -0.32884667697173, 0.199966846276877, -0.117543453869516, 0.196248237055757, -0.456448703853250, 0.139286961076387, 0.241166801918811, -0.0783508285295053, 0.377438091416498, 0.109499076984234, 0.128581669647144},
10049 {-0.0690428674271772, 0.0133858672878363, -0.208289917312908, 0.161232925220819, 0.0735806288007248, -0.316269599838174, -0.0640708424745702, -0.117078801507436, 0.360805085405857, 0.336899760384943, 0.0332447078185156, 0.132954055834276, 0.00595209121998118, -0.157755611190327, -0.199839273133436, 0.193688928807663, 0.0970290928040946, 0.374683975138541, -0.478110944870958, -0.243290196936098},
10050 {0.117284581850481, 0.310399467781876, -0.143513477698805, 0.088808130300351, 0.105747812943691, -0.373871701179853, 0.189069306295134, 0.133258225034741, -0.213043549687694, 0.301303731259140, -0.182085224761849, -0.161971915020789, 0.229301173581378, -0.293586313243755, -0.0260480060747498, -0.0217953684540699, 0.0202675755458796, -0.160134624443657, 0.431950096999465, -0.329885160320501},
10051 {0.256496969244703, 0.0907408349583135, 0.0135731083898029, 0.477557831930769, -0.0727379669280703, 0.101732675207959, -0.147293025369251, -0.348325291603251, -0.255678082078362, -0.187092643740172, -0.177164064346593, -0.225921480146133, 0.422318841046522, 0.319959853469398, -0.0623652546300045, 0.0824203908606883, -0.102057926881110, 0.120728407576411, -0.156845807891241, -0.123528163091204},
10052 {-0.00906668858975576, -0.0814722888231236, -0.0762715085459023, 0.055819989938286, -0.0540516675257271, -0.0070589302769034, -0.315813159989213, -0.0103527463419808, -0.194634331372293, -0.0185860407566822, 0.50134169352609, 0.384531812730061, -0.0405008616742061, 0.0781033650669525, 0.069334900096687, 0.396455180448549, -0.204065801866462, -0.215272089630713, 0.171046818996465, -0.396393364716348},
10053 {0.201971098571663, 0.489747667606921, 0.00226258734592836, 0.0969514005747054, 0.0853921636903791, 0.0862068740282345, -0.465412154271164, -0.130516676347786, 0.165513616974634, 0.0712238027886633, 0.140746943067963, -0.325919272273406, -0.421213488261598, -0.163508199065965, 0.269695802810568, -0.110296405171437, -0.106834099902202, 0.00509414588152415, 0.00909215239544615, 0.0500401865589727},
10054 {0.515854176692456, -0.087468413428258, 0.102796468891449, -0.06046105990993, -0.212014383772414, -0.259853648383794, -0.0997372883043333, -0.109934574535736, 0.284891018406112, -0.250578342940183, 0.142174204994568, 0.210384918947619, 0.118803190788946, -0.0268434355996836, 0.0103721198836548, -0.355555176478458, 0.428042332431476, -0.150610175411631, 0.0464090887952940, -0.140238796382057},
10055 {-0.239392215229762, -0.315483492656425, 0.100205194952396, 0.197830195325302, 0.40178804665223, 0.195809461460298, -0.407817115321684, 0.0226836686147386, -0.169780276210306, 0.0818161585952184, -0.172886230584939, 0.174982644851064, 0.0868786992159535, -0.198450519980824, 0.168581078329968, -0.361514336004068, 0.238668430084722, 0.165494019791904, 0.110437707249228, -0.169592003035203},
10056 {-0.313151735678025, 0.10757884850664, -0.49249098807229, 0.0993472335619114, -0.148695715250836, 0.0573801136941699, -0.190040373500722, 0.254848437434773, 0.134147888304352, -0.352719341442756, 0.0839609323513986, -0.207904182300122, 0.253940523323376, -0.109832138553288, 0.0980084518687944, 0.209026594443723, 0.406236051871548, -0.0521120230935943, 0.0554108014592302, 0.134681046631955},
10057 {-0.102905214421384, 0.235803606800009, 0.213414976431981, -0.253606415825635, 0.00945656859370683, 0.259551282655855, 0.159527348902192, 0.083218761193016, -0.286815935191867, 0.0135069477264877, 0.336758103107357, -0.271707359524149, -0.0400009875851839, 0.0871186292716414, -0.171506310409388, -0.0954276577211755, 0.393467571460712, 0.111732846649458, -0.239886066474217, -0.426474828195231},
10058 {-0.0130795552324104, 0.0758967690968058, -0.165099404017689, -0.46035152559912, 0.409888158016031, -0.0235053940299396, 0.0699393201709723, -0.161320910316996, 0.226111732196825, -0.177811841258496, -0.219073917645916, -0.00703219376737286, 0.162831878334912, 0.271670554900684, 0.451033612762052, 0.0820942662443393, -0.0904983490498446, -0.0587000279313978, -0.0938852980928252, -0.306078621571843},
10059 {0.345092040577428, -0.257721588971295, -0.301689123771848, -0.0875212184538126, 0.161012613069275, 0.385104899829821, 0.118355290985046, -0.241723794416731, 0.083201920119646, -0.0809095291508749, -0.0820275390511991, -0.115569770103317, -0.250105681098033, -0.164197583037664, -0.299481453795592, 0.255906951902366, 0.129042051416371, 0.203761730442746, 0.347550071284268, -0.109264854744020},
10060 {0.056345924962239, 0.072536751679082, 0.303127492633681, -0.368877185781648, -0.343024497082421, 0.206879529669083, -0.413012709639426, 0.078538816203612, 0.103382383425097, 0.288319996147499, -0.392663258459423, 0.0319588502083897, 0.220316797792669, -0.0563686494606947, -0.0869286063283735, 0.323677017794391, 0.0984875197088935, -0.0303289828821742, 0.0450197853450979, -0.0261771221270139},
10061 {-0.253701638374729, -0.148922815783583, 0.111794052194159, 0.157313977830326, -0.269846001260543, -0.222989872703583, 0.115441028189268, -0.350456582262355, -0.0409581422905941, 0.174078744248002, -0.130673397086811, -0.123963802708056, -0.351609207081548, 0.281548012920868, 0.340382662112428, 0.180262131025562, 0.3895263830793, 0.0121546812430960, 0.214830943227063, -0.0617782909660214},
10062 {-0.025854479416026, 0.480654788977767, -0.138024550829229, -0.130191670810919, 0.107816875829919, -0.111243997319276, -0.0679814460571245, -0.183167991080677, -0.363355166018786, -0.183934891092050, -0.216097125080962, 0.520240628803255, -0.179616013606479, 0.0664131536100941, -0.178350708111064, 0.0352047611606709, 0.223857228692892, 0.128363679623513, -0.000403433628490731, 0.224972110977704},
10063 {0.159207394033448, -0.0371517305736114, -0.294302634912281, -0.0866954375908417, -0.259998567870054, 0.284966673982689, 0.205356416771391, -0.257613708650298, -0.264820519037270, 0.293359248624603, 0.0997476397434102, 0.151390539497369, 0.165571346773648, -0.347569523551258, 0.43792310820533, -0.0723248163210163, 0.0379214984816955, -0.0542758730251438, -0.258020301801603, 0.128680501102363},
10064 {0.316853842351797, -0.153950010941153, -0.13387065213508, -0.0702971390607613, -0.202558481846057, -0.172941438694837, -0.068882524588574, 0.524738203063889, -0.271670479920716, -0.112864756695310, -0.146831636946145, -0.0352336188578041, -0.211108490884767, 0.097857111349555, 0.276459740956662, 0.0231297536754823, -0.0773173324868396, 0.487208384389438, -0.0734191389266824, -0.113198765573319},
10065 {-0.274285525741087, 0.227334266052039, -0.0973746625709059, -0.00965256583655389, -0.402438444750043, 0.198586229519026, 0.0958135064575833, -0.108934376958686, 0.253641732094319, -0.0551918478254021, 0.0243640218331436, 0.181936272247179, 0.090952738347629, 0.0603352483029044, -0.0043821671755761, -0.347720824658591, -0.267879988539971, 0.403804652116592, 0.337654323971186, -0.241509293972297},
10066 {-0.0197089518344238, 0.139681034626696, 0.251980475788267, 0.341846624362846, -0.075141195125153, 0.2184951591319, 0.268870823491343, 0.150392399018138, 0.134592404015057, -0.337050200539163, -0.313109373497998, 0.201993318439135, -0.217140733851970, -0.337622749083808, 0.135253284365068, 0.181729249828045, -0.00627813335422765, -0.197218833324039, -0.194060005031698, -0.303055888528004}
10067 },
10068 /*eigenval*/
10069 {
10070 20.29131, 0.5045685, 0.2769945, 0.1551147, 0.03235484, -0.04127639, -0.3516426, -0.469973, -0.5835191, -0.6913107, -0.7207972, -0.7907875, -0.9524307, -1.095310, -1.402153, -1.424179, -1.936704, -2.037965, -3.273561, -5.488734
10071 },
10072 /*eigentot and codeFreq left out, these are initialized elsewhere*/
10073 };
10074
10075 /* The JTT92 matrix, D. T. Jones, W. R. Taylor, & J. M. Thorton, CABIOS 8:275 (1992)
10076 Derived from the PhyML source code (models.c) by filling in the other side of the symmetric matrix,
10077 scaling the entries by the stationary rate (to give the rate of a->b not b|a), to set the diagonals
10078 so the rows sum to 0, to rescale the matrix so that the implied rate of evolution is 1.
10079 The resulting matrix is the transpose (I think).
10080 */
10081 #if 0
10082 {
10083 int i,j;
10084 for (i=0; i<20; i++) for (j=0; j<i; j++) daa[j*20+i] = daa[i*20+j];
10085 for (i = 0; i < 20; i++) for (j = 0; j < 20; j++) daa[i*20+j] *= pi[j] / 100.0;
10086 double mr = 0; /* mean rate */
10087 for (i = 0; i < 20; i++) {
10088 double sum = 0;
10089 for (j = 0; j < 20; j++)
10090 sum += daa[i*20+j];
10091 daa[i*20+i] = -sum;
10092 mr += pi[i] * sum;
10093 }
10094 for (i = 0; i < 20*20; i++)
10095 daa[i] /= mr;
10096 }
10097 #endif
10098
10099 double statJTT92[MAXCODES] = {0.07674789,0.05169087,0.04264509,0.05154407,0.01980301,0.04075195,0.06182989,0.07315199,0.02294399,0.05376110,0.09190390,0.05867583,0.02382594,0.04012589,0.05090097,0.06876503,0.05856501,0.01426057,0.03210196,0.06600504};
10100 double matrixJTT92[MAXCODES][MAXCODES] = {
10101 { -1.247831,0.044229,0.041179,0.061769,0.042704,0.043467,0.08007,0.136501,0.02059,0.027453,0.022877,0.02669,0.041179,0.011439,0.14794,0.288253,0.362223,0.006863,0.008388,0.227247 },
10102 { 0.029789,-1.025965,0.023112,0.008218,0.058038,0.159218,0.014895,0.070364,0.168463,0.011299,0.019517,0.33179,0.022599,0.002568,0.038007,0.051874,0.032871,0.064714,0.010272,0.008731 },
10103 { 0.022881,0.019068,-1.280568,0.223727,0.014407,0.03644,0.024576,0.034322,0.165676,0.019915,0.005085,0.11144,0.012712,0.004237,0.006356,0.213134,0.098304,0.00339,0.029661,0.00678 },
10104 { 0.041484,0.008194,0.270413,-1.044903,0.005121,0.025095,0.392816,0.066579,0.05736,0.005634,0.003585,0.013316,0.007682,0.002049,0.007682,0.030217,0.019462,0.002049,0.023559,0.015877 },
10105 { 0.011019,0.022234,0.00669,0.001968,-0.56571,0.001771,0.000984,0.011609,0.013577,0.003345,0.004526,0.001377,0.0061,0.015348,0.002755,0.043878,0.008264,0.022628,0.041124,0.012199 },
10106 { 0.02308,0.125524,0.034823,0.019841,0.003644,-1.04415,0.130788,0.010528,0.241735,0.003644,0.029154,0.118235,0.017411,0.00162,0.066406,0.021461,0.020651,0.007288,0.009718,0.008098 },
10107 { 0.064507,0.017816,0.035632,0.471205,0.003072,0.198435,-0.944343,0.073107,0.015973,0.007372,0.005529,0.111197,0.011058,0.003072,0.011058,0.01843,0.019659,0.006143,0.0043,0.027646 },
10108 { 0.130105,0.099578,0.058874,0.09449,0.042884,0.018898,0.086495,-0.647831,0.016717,0.004361,0.004361,0.019625,0.010176,0.003634,0.017444,0.146096,0.023986,0.039976,0.005815,0.034162 },
10109 { 0.006155,0.074775,0.089138,0.025533,0.01573,0.1361,0.005927,0.005243,-1.135695,0.003648,0.012767,0.010259,0.007523,0.009119,0.026217,0.016642,0.010487,0.001824,0.130629,0.002508 },
10110 { 0.01923,0.011752,0.025106,0.005876,0.009081,0.004808,0.00641,0.003205,0.008547,-1.273602,0.122326,0.011218,0.25587,0.047542,0.005342,0.021367,0.130873,0.004808,0.017094,0.513342 },
10111 { 0.027395,0.0347,0.010958,0.006392,0.021003,0.065748,0.008219,0.005479,0.051137,0.209115,-0.668139,0.012784,0.354309,0.226465,0.093143,0.053877,0.022829,0.047485,0.021916,0.16437 },
10112 { 0.020405,0.376625,0.153332,0.015158,0.004081,0.170239,0.105525,0.015741,0.026235,0.012243,0.008162,-0.900734,0.037896,0.002332,0.012243,0.027401,0.06005,0.00583,0.004664,0.008162 },
10113 { 0.012784,0.010416,0.007102,0.003551,0.007339,0.01018,0.004261,0.003314,0.007812,0.113397,0.091854,0.015388,-1.182051,0.01018,0.003788,0.006865,0.053503,0.005682,0.004261,0.076466 },
10114 { 0.00598,0.001993,0.003987,0.001595,0.031098,0.001595,0.001993,0.001993,0.015948,0.035484,0.098877,0.001595,0.017144,-0.637182,0.006778,0.03668,0.004784,0.021131,0.213701,0.024719 },
10115 { 0.098117,0.037426,0.007586,0.007586,0.007081,0.082944,0.009104,0.012138,0.058162,0.005058,0.051587,0.010621,0.008092,0.008598,-0.727675,0.144141,0.059679,0.003035,0.005058,0.011632 },
10116 { 0.258271,0.069009,0.343678,0.040312,0.152366,0.036213,0.020498,0.137334,0.049878,0.02733,0.040312,0.032113,0.019814,0.06286,0.194728,-1.447863,0.325913,0.023914,0.043045,0.025964 },
10117 { 0.276406,0.037242,0.135003,0.022112,0.02444,0.029677,0.018621,0.019203,0.026768,0.142567,0.014548,0.059936,0.131511,0.006983,0.068665,0.27757,-1.335389,0.006983,0.01222,0.065174 },
10118 { 0.001275,0.017854,0.001134,0.000567,0.016295,0.002551,0.001417,0.007793,0.001134,0.001275,0.007368,0.001417,0.003401,0.00751,0.00085,0.004959,0.0017,-0.312785,0.010061,0.003542 },
10119 { 0.003509,0.006379,0.022328,0.014673,0.066664,0.007655,0.002233,0.002552,0.182769,0.010207,0.007655,0.002552,0.005741,0.170967,0.00319,0.020095,0.006698,0.022647,-0.605978,0.005103 },
10120 { 0.195438,0.011149,0.010493,0.020331,0.040662,0.013117,0.029512,0.030824,0.007214,0.630254,0.11805,0.009182,0.211834,0.040662,0.015084,0.024922,0.073453,0.016396,0.010493,-1.241722 }
10121 };
10122
10123 double statWAG01[MAXCODES] = {0.0866279,0.043972, 0.0390894,0.0570451,0.0193078,0.0367281,0.0580589,0.0832518,0.0244314,0.048466, 0.086209, 0.0620286,0.0195027,0.0384319,0.0457631,0.0695179,0.0610127,0.0143859,0.0352742,0.0708956};
10124 double matrixWAG01[MAXCODES][MAXCODES] = {
10125 {-1.117151, 0.050147, 0.046354, 0.067188, 0.093376, 0.082607, 0.143908, 0.128804, 0.028817, 0.017577, 0.036177, 0.082395, 0.081234, 0.019138, 0.130789, 0.306463, 0.192846, 0.010286, 0.021887, 0.182381},
10126 {0.025455, -0.974318, 0.029321, 0.006798, 0.024376, 0.140086, 0.020267, 0.026982, 0.098628, 0.008629, 0.022967, 0.246964, 0.031527, 0.004740, 0.031358, 0.056495, 0.025586, 0.053714, 0.017607, 0.011623},
10127 {0.020916, 0.026065, -1.452438, 0.222741, 0.010882, 0.063328, 0.038859, 0.046176, 0.162306, 0.022737, 0.005396, 0.123567, 0.008132, 0.003945, 0.008003, 0.163042, 0.083283, 0.002950, 0.044553, 0.008051},
10128 {0.044244, 0.008819, 0.325058, -0.989665, 0.001814, 0.036927, 0.369645, 0.051822, 0.055719, 0.002361, 0.005077, 0.028729, 0.006212, 0.002798, 0.025384, 0.064166, 0.022443, 0.007769, 0.019500, 0.009120},
10129 {0.020812, 0.010703, 0.005375, 0.000614, -0.487357, 0.002002, 0.000433, 0.006214, 0.005045, 0.003448, 0.007787, 0.001500, 0.007913, 0.008065, 0.002217, 0.028525, 0.010395, 0.014531, 0.011020, 0.020307},
10130 {0.035023, 0.117008, 0.059502, 0.023775, 0.003809, -1.379785, 0.210830, 0.012722, 0.165524, 0.004391, 0.033516, 0.150135, 0.059565, 0.003852, 0.035978, 0.039660, 0.033070, 0.008316, 0.008777, 0.011613},
10131 {0.096449, 0.026759, 0.057716, 0.376214, 0.001301, 0.333275, -1.236894, 0.034593, 0.034734, 0.007763, 0.009400, 0.157479, 0.019202, 0.004944, 0.041578, 0.042955, 0.050134, 0.009540, 0.011961, 0.035874},
10132 {0.123784, 0.051085, 0.098345, 0.075630, 0.026795, 0.028838, 0.049604, -0.497615, 0.021792, 0.002661, 0.005356, 0.032639, 0.015212, 0.004363, 0.021282, 0.117240, 0.019732, 0.029444, 0.009052, 0.016361},
10133 {0.008127, 0.054799, 0.101443, 0.023863, 0.006384, 0.110105, 0.014616, 0.006395, -0.992342, 0.003543, 0.012807, 0.022832, 0.010363, 0.017420, 0.017851, 0.018979, 0.012136, 0.006733, 0.099319, 0.003035},
10134 {0.009834, 0.009511, 0.028192, 0.002006, 0.008654, 0.005794, 0.006480, 0.001549, 0.007029, -1.233162, 0.161294, 0.016472, 0.216559, 0.053891, 0.005083, 0.016249, 0.074170, 0.010808, 0.021372, 0.397837},
10135 {0.036002, 0.045028, 0.011900, 0.007673, 0.034769, 0.078669, 0.013957, 0.005547, 0.045190, 0.286902, -0.726011, 0.023303, 0.439180, 0.191376, 0.037625, 0.031191, 0.029552, 0.060196, 0.036066, 0.162890},
10136 {0.058998, 0.348377, 0.196082, 0.031239, 0.004820, 0.253558, 0.168246, 0.024319, 0.057967, 0.021081, 0.016767, -1.124580, 0.060821, 0.005783, 0.036254, 0.062960, 0.090292, 0.008952, 0.008675, 0.019884},
10137 {0.018288, 0.013983, 0.004057, 0.002124, 0.007993, 0.031629, 0.006450, 0.003564, 0.008272, 0.087143, 0.099354, 0.019123, -1.322098, 0.024370, 0.003507, 0.010109, 0.031033, 0.010556, 0.008769, 0.042133},
10138 {0.008490, 0.004143, 0.003879, 0.001885, 0.016054, 0.004030, 0.003273, 0.002014, 0.027402, 0.042734, 0.085315, 0.003583, 0.048024, -0.713669, 0.006512, 0.022020, 0.006934, 0.061698, 0.260332, 0.026213},
10139 {0.069092, 0.032635, 0.009370, 0.020364, 0.005255, 0.044829, 0.032773, 0.011698, 0.033438, 0.004799, 0.019973, 0.026747, 0.008229, 0.007754, -0.605590, 0.077484, 0.038202, 0.006695, 0.010376, 0.015124},
10140 {0.245933, 0.089317, 0.289960, 0.078196, 0.102703, 0.075066, 0.051432, 0.097899, 0.054003, 0.023306, 0.025152, 0.070562, 0.036035, 0.039831, 0.117705, -1.392239, 0.319421, 0.038212, 0.057419, 0.016981},
10141 {0.135823, 0.035501, 0.129992, 0.024004, 0.032848, 0.054936, 0.052685, 0.014461, 0.030308, 0.093371, 0.020915, 0.088814, 0.097083, 0.011008, 0.050931, 0.280341, -1.154973, 0.007099, 0.018643, 0.088894},
10142 {0.001708, 0.017573, 0.001086, 0.001959, 0.010826, 0.003257, 0.002364, 0.005088, 0.003964, 0.003208, 0.010045, 0.002076, 0.007786, 0.023095, 0.002105, 0.007908, 0.001674, -0.466694, 0.037525, 0.005516},
10143 {0.008912, 0.014125, 0.040205, 0.012058, 0.020133, 0.008430, 0.007267, 0.003836, 0.143398, 0.015555, 0.014757, 0.004934, 0.015861, 0.238943, 0.007998, 0.029135, 0.010779, 0.092011, -0.726275, 0.011652},
10144 {0.149259, 0.018739, 0.014602, 0.011335, 0.074565, 0.022417, 0.043805, 0.013932, 0.008807, 0.581952, 0.133956, 0.022726, 0.153161, 0.048356, 0.023429, 0.017317, 0.103293, 0.027186, 0.023418, -1.085487},
10145 };
10146
10147 /* Le-Gascuel 2008 model data from Harry Yoo
10148 https://github.com/hyoo/FastTree
10149 */
10150 double statLG08[MAXCODES] = {0.079066, 0.055941, 0.041977, 0.053052, 0.012937, 0.040767, 0.071586, 0.057337, 0.022355, 0.062157, 0.099081, 0.0646, 0.022951, 0.042302, 0.04404, 0.061197, 0.053287, 0.012066, 0.034155, 0.069147};
10151
10152 double matrixLG08[MAXCODES][MAXCODES] = {
10153 {-1.08959879,0.03361031,0.02188683,0.03124237,0.19680136,0.07668542,0.08211337,0.16335306,0.02837339,0.01184642,0.03125763,0.04242021,0.08887270,0.02005907,0.09311189,0.37375830,0.16916131,0.01428853,0.01731216,0.20144931},
10154 {0.02378006,-0.88334349,0.04206069,0.00693409,0.02990323,0.15707674,0.02036079,0.02182767,0.13574610,0.00710398,0.01688563,0.35388551,0.02708281,0.00294931,0.01860218,0.04800569,0.03238902,0.03320688,0.01759004,0.00955956},
10155 {0.01161996,0.03156149,-1.18705869,0.21308090,0.02219603,0.07118238,0.02273938,0.06034785,0.18928374,0.00803870,0.00287235,0.09004368,0.01557359,0.00375798,0.00679131,0.16825837,0.08398226,0.00190474,0.02569090,0.00351296},
10156 {0.02096312,0.00657599,0.26929909,-0.86328733,0.00331871,0.02776660,0.27819699,0.04482489,0.04918511,0.00056712,0.00079981,0.01501150,0.00135537,0.00092395,0.02092662,0.06579888,0.02259266,0.00158572,0.00716768,0.00201422},
10157 {0.03220119,0.00691547,0.00684065,0.00080928,-0.86781864,0.00109716,0.00004527,0.00736456,0.00828668,0.00414794,0.00768465,0.00017162,0.01156150,0.01429859,0.00097521,0.03602269,0.01479316,0.00866942,0.01507844,0.02534728},
10158 {0.03953956,0.11446966,0.06913053,0.02133682,0.00345736,-1.24953177,0.16830979,0.01092385,0.19623161,0.00297003,0.02374496,0.13185209,0.06818543,0.00146170,0.02545052,0.04989165,0.04403378,0.00962910,0.01049079,0.00857458},
10159 {0.07434507,0.02605508,0.03877888,0.37538659,0.00025048,0.29554848,-0.84254259,0.02497249,0.03034386,0.00316875,0.00498760,0.12936820,0.01243696,0.00134660,0.03002373,0.04380857,0.04327684,0.00557310,0.00859294,0.01754095},
10160 {0.11846020,0.02237238,0.08243001,0.04844538,0.03263985,0.01536392,0.02000178,-0.50414422,0.01785951,0.00049912,0.00253779,0.01700817,0.00800067,0.00513658,0.01129312,0.09976552,0.00744439,0.01539442,0.00313512,0.00439779},
10161 {0.00802225,0.05424651,0.10080372,0.02072557,0.01431930,0.10760560,0.00947583,0.00696321,-1.09324335,0.00243405,0.00818899,0.01558729,0.00989143,0.01524917,0.01137533,0.02213166,0.01306114,0.01334710,0.11863394,0.00266053},
10162 {0.00931296,0.00789336,0.01190322,0.00066446,0.01992916,0.00452837,0.00275137,0.00054108,0.00676776,-1.41499789,0.25764421,0.00988722,0.26563382,0.06916358,0.00486570,0.00398456,0.06425393,0.00694043,0.01445289,0.66191466},
10163 {0.03917027,0.02990732,0.00677980,0.00149374,0.05885464,0.05771026,0.00690325,0.00438541,0.03629495,0.41069624,-0.79375308,0.01362360,0.62543296,0.25688578,0.02467704,0.01806113,0.03001512,0.06139358,0.02968934,0.16870919},
10164 {0.03465896,0.40866276,0.13857164,0.01827910,0.00085698,0.20893479,0.11674330,0.01916263,0.04504313,0.01027583,0.00888247,-0.97644156,0.04241650,0.00154510,0.02521473,0.04836478,0.07344114,0.00322392,0.00852278,0.01196402},
10165 {0.02579765,0.01111131,0.00851489,0.00058635,0.02051079,0.03838702,0.00398738,0.00320253,0.01015515,0.09808327,0.14487451,0.01506968,-1.54195698,0.04128536,0.00229163,0.00796306,0.04636929,0.01597787,0.01104642,0.04357735},
10166 {0.01073203,0.00223024,0.00378708,0.00073673,0.04675419,0.00151673,0.00079574,0.00378966,0.02885576,0.04707045,0.10967574,0.00101178,0.07609486,-0.81061579,0.00399600,0.01530562,0.00697985,0.10394083,0.33011973,0.02769432},
10167 {0.05186360,0.01464471,0.00712508,0.01737179,0.00331981,0.02749383,0.01847072,0.00867414,0.02240973,0.00344749,0.01096857,0.01718973,0.00439734,0.00416018,-0.41664685,0.05893117,0.02516738,0.00418956,0.00394655,0.01305787},
10168 {0.28928853,0.05251612,0.24529879,0.07590089,0.17040121,0.07489439,0.03745080,0.10648187,0.06058559,0.00392302,0.01115539,0.04581702,0.02123285,0.02214217,0.08188943,-1.42842431,0.39608294,0.01522956,0.02451220,0.00601987},
10169 {0.11400727,0.03085239,0.10660988,0.02269274,0.06093244,0.05755704,0.03221430,0.00691855,0.03113348,0.05508469,0.01614250,0.06057985,0.10765893,0.00879238,0.03045173,0.34488735,-1.23444419,0.00750412,0.01310009,0.11660005},
10170 {0.00218053,0.00716244,0.00054751,0.00036065,0.00808574,0.00284997,0.00093936,0.00323960,0.00720403,0.00134729,0.00747646,0.00060216,0.00840002,0.02964754,0.00114785,0.00300276,0.00169919,-0.44275283,0.03802969,0.00228662},
10171 {0.00747852,0.01073967,0.02090366,0.00461457,0.03980863,0.00878929,0.00409985,0.00186756,0.18125441,0.00794180,0.01023445,0.00450612,0.01643896,0.26654152,0.00306072,0.01368064,0.00839668,0.10764993,-0.71435091,0.00851526},
10172 {0.17617706,0.01181629,0.00578676,0.00262530,0.13547871,0.01454379,0.01694332,0.00530363,0.00822937,0.73635171,0.11773937,0.01280613,0.13129028,0.04526924,0.02050210,0.00680190,0.15130413,0.01310401,0.01723920,-1.33539639}
10173 };
10174