1 // $Id: plforces.cpp,v 1.74 2011/04/23 02:02:49 bobgian Exp $
2
3 /*
4 Copyright 2002 Peter Beerli, Mary Kuhner, Jon Yamato and Joseph Felsenstein
5
6 This software is distributed free of charge for non-commercial use
7 and is copyrighted. Of course, we do not guarantee that the software
8 works, and are not responsible for any damage you may cause or have.
9 */
10
11 // Posterior Likelihood forces classes
12 // - CoalescePL
13 // - coalesce with no-growth [using compressed summaries]
14 // - CoalesceGrowPL
15 // - coalesce with exponential growth [using non-compressed summaries]
16 // - GrowPL
17 // - coalesce with exponential growth [using non-compressed summaries]
18 // - MigratePL
19 // - RecombinePL
20 // - SelectPL [stubs]
21
22 //------------------------------------------------------------------------------------
23
24 #include <cassert>
25
26 #include "local_build.h"
27
28 #include "mathx.h"
29 #include "plforces.h"
30 #include "runreport.h"
31 #include "summary.h"
32 #include "vectorx.h"
33 #include "force.h"
34 #include "timemanager.h" // for CoalesceGrowPL::lnPoint()
35
36 #ifdef DMALLOC_FUNC_CHECK
37 #include <dmalloc.h>
38 #endif
39
40 // DEBUG CODE HELPERS
41 #include <iostream>
42 #include <fstream>
43 #include <sstream>
44 std::ofstream numfile;
45
46 using namespace std;
47
48 //------------------------------------------------------------------------------------
49
50 const double LOG_ONEPLUSEPSILON = log(1.0 + numeric_limits<double>::epsilon());
51
52 //------------------------------------------------------------------------------------
53 // DiseasePL: disease force specific waiting time and
54 // point probabilites plus derivatives
55 //------------------------------------------------------------------------------------
56
lnWait(const vector<double> & param,const TreeSummary * treedata)57 double DiseasePL::lnWait (const vector < double >¶m, const TreeSummary * treedata)
58 {
59 vector < double >::iterator i;
60 vector < double >::const_iterator pstart;
61 vector < double >::const_iterator pend;
62
63 const vector<double>& mWait = treedata->GetDiseaseSummary()->GetShortWait();
64 // precomputing the transition rates to a status Sum[d_ji] into
65 // membervariable m_msum
66 for (i = m_msum.begin (),
67 pstart = param.begin () + m_start,
68 pend = param.begin () + m_start + m_nPop;
69 i != m_msum.end (); ++i, pstart += m_nPop, pend += m_nPop)
70 {
71 (*i) = accumulate (pstart, pend, 0.0);
72 }
73
74 // sum_pop(SM_status *kt)
75 return -1.0 * inner_product (mWait.begin (),
76 mWait.end (), m_msum.begin (), 0.0);
77 }
78
79 //------------------------------------------------------------------------------------
80
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)81 double DiseasePL::lnPoint (const vector < double >¶m, const vector < double >&lparam,
82 const TreeSummary * treedata)
83 {
84 // Sum_j(Sum_i(disevent[j,i] * log(M[j,i]))
85 const vector<double>& nevents = treedata->GetDiseaseSummary()->GetShortPoint();
86 return inner_product (nevents.begin (), nevents.end (),
87 lparam.begin () + m_start, 0.0);
88 }
89
90 //------------------------------------------------------------------------------------
91
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)92 double DiseasePL::DlnWait (const vector < double >¶m, const TreeSummary * treedata, const long int &whichparam)
93 {
94 long int which = (whichparam - m_start) / m_nPop;
95 const vector<double>& mWait = treedata->GetDiseaseSummary()->GetShortWait();
96 return -mWait[which];
97 }
98
99 //------------------------------------------------------------------------------------
100
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)101 double DiseasePL::DlnPoint (const vector < double >¶m, const TreeSummary * treedata, const long int &whichparam)
102 {
103 long int which = whichparam - m_start;
104 const vector<double>& nmig = treedata->GetDiseaseSummary()->GetShortPoint();
105 return SafeDivide (nmig[which], param[whichparam]);
106 }
107
108 //------------------------------------------------------------------------------------
109
lnWait(const vector<double> & param,const TreeSummary * treedata)110 double DiseaseLogisticSelectionPL::lnWait(const vector<double>& param, const TreeSummary * treedata)
111 {
112 return 0.0; // contained in CoalesceLogisticSelectionPL::lnWait(), for speed
113 }
114
115 //------------------------------------------------------------------------------------
116 // Note: DlnPoint() is selection-independent, because the point terms
117 // separate into a sum of logarithms. So, the DiseasePL method is used for that.
118
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)119 double DiseaseLogisticSelectionPL::lnPoint(const vector<double>& param, const vector<double>& lparam,
120 const TreeSummary * treedata)
121 {
122 const Interval *treesum = treedata->GetDiseaseSummary()->GetLongPoint();
123 const Interval *pTreesum;
124
125 double logTheta_A0(lparam[0]), logTheta_a0(lparam[1]), s(param[m_s_is_here]);
126 double log_mu_into_A_from_a(lparam[3]), log_mu_into_a_from_A(lparam[4]);
127
128 if (0 != param[2] || 0 != param[5] || 0 == param[3] || 0 == param[4])
129 throw implementation_error("Error parsing disease rates in DiseaseLogisticSelectionPL::lnPoint()");
130
131 double result = 0.0;
132
133 for(pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
134 {
135 if (0L == pTreesum->oldstatus)
136 result += log_mu_into_A_from_a + logTheta_a0 - logTheta_A0 + s*pTreesum->endtime;
137 else if (1L == pTreesum->oldstatus)
138 result += log_mu_into_a_from_A + logTheta_A0 - logTheta_a0 - s*pTreesum->endtime;
139 else
140 {
141 string msg = "DiseaseLogisticSelectionPL::lnPoint(), received a TreeSummary ";
142 msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
143 msg += ". \"oldstatus\" must be either 0 or 1, reflecting a coalescence in either ";
144 msg += "the subpopulation with allele A or the subpopulation with allele a.";
145 throw implementation_error(msg);
146 }
147 }
148
149 return result;
150 }
151
152 //------------------------------------------------------------------------------------
153
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)154 double DiseaseLogisticSelectionPL::DlnWait(const vector<double>¶m, const TreeSummary * treedata,
155 const long int &whichparam)
156 {
157 if (2 != m_nPop)
158 {
159 string msg = "DiseaseLogisticSelectionPL::DlnWait() called with m_nPop = ";
160 msg += ToString(m_nPop);
161 msg += ". m_nPop must equal 2, reflecting one population with the major ";
162 msg += "allele and one population with the minor allele.";
163 throw implementation_error(msg);
164 }
165
166 if (3 != whichparam && 4 != whichparam)
167 throw implementation_error("DiseaseLogisticSelectionPL::DlnWait(), bad \"whichparam\"");
168
169 const list<Interval>& treesum = treedata->GetDiseaseSummary()->GetLongWait();
170 list<Interval>::const_iterator treesum_it;
171
172 double result(0.0), s(param[m_s_is_here]);
173 double theta_A0(param[m_start]), theta_a0(param[m_start+1]);
174
175 if (theta_a0 <= 0.0 || theta_A0 <= 0.0)
176 {
177 string msg = "DiseaseLogisticSelectionPL::DlnWait(), received an invalid Theta value ";
178 msg += "(theta_A0 = " + ToString(theta_A0) + ", theta_a0 = " + ToString(theta_a0) + ").";
179 throw impossible_error(msg);
180 }
181
182 if (fabs(s) < LOGISTIC_SELECTION_COEFFICIENT_EPSILON)
183 return DlnWaitForTinyLogisticSelectionCoefficient(param, treedata, whichparam);
184
185 if (fabs(s) >= DBL_BIG)
186 return -DBL_BIG; // unavoidable overflow
187
188 double t_e, t_s(0.0), dt, e_toThe_sts(1.0), e_toThe_ste, term(0.0);
189 double term_A(0.0), term_a(0.0);
190 double factor_A(theta_a0/(s*theta_A0)), factor_a(theta_A0/(-s*theta_a0));
191 bool derivativeWithRespectTo_mu_into_A_from_a = (3 == whichparam ? true : false);
192
193 for (treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
194 {
195 t_e = treesum_it->endtime;
196 dt = t_e - t_s;
197 term_A = term_a = 0.0;
198 const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
199 const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
200
201 if (s > 0.0)
202 {
203 // Use overflow protection for term_A; if term_a underflows, that's okay.
204 if (s*dt > LOG_ONEPLUSEPSILON)
205 {
206 term = SafeProductWithExp(factor_A,s*dt);
207 if (term >= DBL_BIG && derivativeWithRespectTo_mu_into_A_from_a)
208 return -DBL_BIG; // unavoidable overflow
209 e_toThe_ste = (term/factor_A)*e_toThe_sts;
210 if (k_A > 0.0 && derivativeWithRespectTo_mu_into_A_from_a)
211 {
212 term_A = k_A*(term - factor_A)*e_toThe_sts;
213 if (term_A >= DBL_BIG)
214 return -DBL_BIG; // unavoidable overflow
215 }
216 }
217 else
218 {
219 if (s*dt >= numeric_limits<double>::epsilon())
220 e_toThe_ste = (1.0 + s*dt)*e_toThe_sts;
221 else
222 e_toThe_ste = SafeProductWithExp(1.0,s*t_e);
223 if (k_A > 0.0 && derivativeWithRespectTo_mu_into_A_from_a)
224 {
225 term_A = k_A*factor_A*s*dt*e_toThe_sts;
226 if (term_A >= DBL_BIG)
227 return -DBL_BIG; // unavoidable overflow
228 }
229 }
230 if (k_a > 0.0 && !derivativeWithRespectTo_mu_into_A_from_a)
231 term_a = k_a*factor_a*(1.0/e_toThe_ste - 1.0/e_toThe_sts); // note: factor_a includes minus sign
232 }
233 else // s < 0
234 {
235 // Use overflow protection for term_a; if term_A underflows, that's okay.
236
237 if (-s*dt > LOG_ONEPLUSEPSILON)
238 {
239 term = SafeProductWithExp(factor_a,-s*dt);
240 if (term >= DBL_BIG && !derivativeWithRespectTo_mu_into_A_from_a)
241 return -DBL_BIG; // unavoidable overflow
242 e_toThe_ste = e_toThe_sts/(term/factor_a);
243 if (k_a > 0.0 && !derivativeWithRespectTo_mu_into_A_from_a)
244 {
245 term_a = k_a*(term - factor_a)/e_toThe_sts;
246 if (term_a >= DBL_BIG)
247 return -DBL_BIG; // unavoidable overflow
248 }
249 }
250 else
251 {
252 if (-s*dt >= numeric_limits<double>::epsilon())
253 e_toThe_ste = (1.0 + s*dt)*e_toThe_sts;
254 else
255 e_toThe_ste = SafeProductWithExp(1.0,s*t_e);
256 if (k_a > 0.0 && !derivativeWithRespectTo_mu_into_A_from_a)
257 {
258 term_a = k_a*factor_a*(-s)*dt/e_toThe_sts;
259 if (term_a >= DBL_BIG)
260 return -DBL_BIG; // unavoidable overflow
261 }
262 }
263 if (k_A > 0.0 && derivativeWithRespectTo_mu_into_A_from_a)
264 term_A = k_A*factor_A*(e_toThe_ste - e_toThe_sts);
265 }
266
267 if (derivativeWithRespectTo_mu_into_A_from_a)
268 result -= term_A;
269 else
270 result -= term_a;
271 if (result <= -DBL_BIG)
272 return -DBL_BIG;
273
274 t_s = t_e;
275 e_toThe_sts = e_toThe_ste;
276 }
277
278 return result;
279 }
280
281 //------------------------------------------------------------------------------------
282
DlnWaitForTinyLogisticSelectionCoefficient(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)283 double DiseaseLogisticSelectionPL::DlnWaitForTinyLogisticSelectionCoefficient(const vector<double>& param,
284 const TreeSummary *treedata,
285 const long int &whichparam)
286 {
287 const list<Interval>& treesum = treedata->GetDiseaseSummary()->GetLongWait();
288 list<Interval>::const_iterator treesum_it;
289 double result(0.0), s(param[m_s_is_here]);
290 double theta_A0(param[m_start]), theta_a0(param[m_start+1]);
291 double t_e, t_s(0.0);
292 bool derivativeWithRespectTo_mu_into_A_from_a = (4 == whichparam ? true : false);
293
294 for (treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
295 {
296 t_e = treesum_it->endtime;
297 const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
298 const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
299
300 if (derivativeWithRespectTo_mu_into_A_from_a)
301 result -= (k_A*theta_a0/theta_A0)*((t_e - t_s) + 0.5*s*(t_e*t_e - t_s*t_s));
302 else
303 result -= (k_a*theta_A0/theta_a0)*((t_e - t_s) - 0.5*s*(t_e*t_e - t_s*t_s));
304
305 t_s = t_e;
306 }
307
308 return result;
309 }
310
311 //------------------------------------------------------------------------------------
312 // MigratePL: migration forces specific waiting time and
313 // point probabilites and its derivatives
314
lnWait(const vector<double> & param,const TreeSummary * treedata)315 double MigratePL::lnWait (const vector < double >¶m, const TreeSummary * treedata)
316 {
317 vector < double >::iterator i;
318 vector < double >::const_iterator pstart;
319 vector < double >::const_iterator pend;
320
321 const vector<double>& mWait = treedata->GetMigSummary()->GetShortWait();
322
323 // precomputing the immigration rates Sum[m_ji] into membervariable m_msum
324 for (i = m_msum.begin (),
325 pstart = param.begin () + m_start,
326 pend = param.begin () + m_start + m_nPop;
327 i != m_msum.end (); ++i, pstart += m_nPop, pend += m_nPop)
328 {
329 (*i) = accumulate (pstart, pend, 0.0);
330 }
331
332 // sum_pop(SM_pop *kt)
333 return -1.0 * inner_product (mWait.begin (),
334 mWait.end (), m_msum.begin (), 0.0);
335 }
336
337 //------------------------------------------------------------------------------------
338
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)339 double MigratePL::lnPoint (const vector < double >¶m, const vector < double >&lparam,
340 const TreeSummary * treedata)
341 {
342 // Sum_j(Sum_i(migevent[j,i] * log(M[j,i]))
343 const vector<double>& nmig = treedata->GetMigSummary()->GetShortPoint();
344
345 return inner_product (nmig.begin(), nmig.end(), lparam.begin() + m_start,
346 0.0);
347 }
348
349 //------------------------------------------------------------------------------------
350
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)351 double MigratePL::DlnWait (const vector < double >¶m, const TreeSummary * treedata, const long int & whichparam)
352 {
353 long int which = (whichparam - m_start) / m_nPop;
354 const vector<double>& mWait = treedata->GetMigSummary()->GetShortWait();
355 return -mWait[which];
356 }
357
358 //------------------------------------------------------------------------------------
359
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)360 double MigratePL::DlnPoint (const vector < double >¶m, const TreeSummary * treedata, const long int & whichparam)
361 {
362 long int which = whichparam - m_start;
363 const vector<double>& nmig = treedata->GetMigSummary()->GetShortPoint();
364
365 return SafeDivide (nmig[which], param[whichparam]);
366 }
367
368 //------------------------------------------------------------------------------------
369
370 // CoalescePL: coalescence forces specific waiting time and
371 // point probabilites and its derivatives
372
373 // CalculateScaledLPCounts returns the log-likelihood of all
374 // of the choices of recombination-branch partition assignment
375 // made in a genealogy. It does this by summing the log of (the
376 // number of times each partition was chosen times the relative
377 // frequency of that partition).
378 //
379 // JDEBUG--: This code cannot handle migration. To do so, it
380 // will be necessary to (a) store the partition in which each
381 // recombination happened, and (b) make the relative frequency
382 // relative to the partition, not the whole. For example, the
383 // term for choice of a disease state in Boston should be
384 // Theta(disease & Boston) over Theta(Boston). Currently it
385 // is Theta(disease) over Theta(total).
386
CalculateScaledLPCounts(const DoubleVec1d & params,const LongVec2d & picks) const387 double CoalescePL::CalculateScaledLPCounts(const DoubleVec1d& params, const LongVec2d& picks) const
388 {
389 double total_theta(std::accumulate(params.begin()+m_start,
390 params.begin()+m_end,0.0));
391 DoubleVec1d myths(params.begin()+m_start,params.begin()+m_end);
392 double answ(0.0);
393 LongVec2d::size_type lpforce;
394 for(lpforce = 0; lpforce < picks.size(); ++lpforce)
395 {
396 LocalPartitionForce* pforce(dynamic_cast<LocalPartitionForce*>
397 (m_localpartforces[lpforce]));
398 assert(pforce); // it wasn't a local partition force?
399 DoubleVec1d part_thetas(pforce->SumXPartsToParts(myths));
400 LongVec1d::size_type partition;
401 for(partition = 0; partition < picks[lpforce].size(); ++partition)
402 {
403 answ += log(picks[lpforce][partition] * part_thetas[partition] / total_theta);
404 }
405 }
406 return answ;
407 }
408
409 //------------------------------------------------------------------------------------
410
SetLocalPartForces(const ForceSummary & fs)411 void CoalescePL::SetLocalPartForces(const ForceSummary& fs)
412 {
413 m_localpartforces = fs.GetLocalPartitionForces();
414 if (m_localpartforces.empty()) return;
415
416 // set up vector which answers the following question:
417 // for each local partition force
418 // for any xpartition theta (identified by index),
419 // what partition (of the given force) is it?
420
421 // This code is copied approximately from PartitionForce::
422 // SumXPartsToParts().
423 ForceVec::const_iterator pforce;
424 const ForceVec& partforces = fs.GetPartitionForces();
425 LongVec1d indicator(partforces.size(), 0L);
426 LongVec1d nparts(registry.GetDataPack().GetAllNPartitions());
427 DoubleVec1d::size_type xpart;
428 long partindex;
429 for (pforce = partforces.begin(), partindex = 0;
430 pforce != partforces.end(); ++pforce, ++partindex)
431 {
432 vector<DoubleVec1d::size_type> indices(m_nPop, 0);
433 for(xpart = 0; xpart < static_cast<DoubleVec1d::size_type>(m_nPop); ++xpart)
434 {
435 indices[xpart] = indicator[partindex];
436 long int part;
437 for (part = nparts.size() - 1; part >= 0; --part)
438 {
439 ++indicator[part];
440 if (indicator[part] < nparts[part]) break;
441 indicator[part] = 0;
442 }
443 }
444 // initialize xparts vectors
445 vector<DoubleVec1d::size_type> emptyvec;
446 vector<vector<DoubleVec1d::size_type> > partvec(nparts[partindex], emptyvec);
447
448 if ((*pforce)->IsLocalPartitionForce())
449 {
450 m_whichlocalpart.push_back(indices);
451 m_whichlocalxparts.push_back(partvec);
452 }
453 m_whichpart.push_back(indices);
454 m_whichxparts.push_back(partvec);
455 }
456
457 // now construct the vector mapping partition to a set of xpartitions.
458 //
459 // we will do this by going through the vector mapping xpartition to
460 // partition we just constructed, letting its contents tell us which
461 // partition to add the parameter (xpartition) to and keeping a
462 // counter to let us know which xpartition to add.
463 long lpindex = 0;
464 for (pforce = partforces.begin(), partindex = 0;
465 pforce != partforces.end(); ++pforce, ++partindex)
466 {
467 for(xpart = 0; xpart < m_whichpart[partindex].size(); ++xpart)
468 m_whichxparts[partindex][m_whichpart[partindex][xpart]].push_back(xpart);
469
470 if ((*pforce)->IsLocalPartitionForce())
471 {
472 for(xpart = 0; xpart < m_whichpart[partindex].size(); ++xpart)
473 {
474 m_whichlocalxparts[lpindex][m_whichpart[partindex][xpart]].
475 push_back(xpart);
476 }
477 ++lpindex;
478 }
479 }
480 } // SetLocalPartForces
481
482 //------------------------------------------------------------------------------------
483 // COMPRESSED DATA (SHORT) SUMMARIES
484
lnWait(const vector<double> & param,const TreeSummary * treedata)485 double CoalescePL::lnWait (const vector < double >¶m, const TreeSummary * treedata)
486 {
487 // result = sum_pop(k(k-1)t/theta_pop)
488 const vector<double>& cWait = treedata->GetCoalSummary()->GetShortWait();
489
490 return -1.0 * inner_product (cWait.begin (), cWait.end (),
491 param.begin () + m_start,
492 0.0, plus < double >(),
493 divides < double >());
494 }
495
496 //------------------------------------------------------------------------------------
497
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)498 double CoalescePL::lnPoint (const vector < double >¶m, const vector < double >&lparam,
499 const TreeSummary * treedata)
500 {
501 // in general it is Sum_j(coalesceevent[j] * (log(2) - log(theta_j)))
502 // sumcpoint = Sum_j(coalesceevent[j] * log(2))
503 // - Sum_j(coalesceevent[j] * log(theta_j))
504 const vector<double>& ncoal = treedata->GetCoalSummary()->GetShortPoint();
505
506 double point = (LOG2 * accumulate (ncoal.begin (), ncoal.end (), 0.0) -
507 inner_product (ncoal.begin (), ncoal.end (),
508 lparam.begin () + m_start, 0.0));
509 const LongVec2d& picks = treedata->GetCoalSummary()->GetShortPicks();
510 if (picks.empty()) return point;
511 else return point + CalculateScaledLPCounts(param, picks);
512 }
513
514 //------------------------------------------------------------------------------------
515
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)516 double CoalescePL::DlnWait (const vector < double >¶m, const TreeSummary * treedata, const long int & whichparam)
517 {
518 long int which = whichparam - m_start;
519 const vector<double>& cWait = treedata->GetCoalSummary()->GetShortWait();
520
521 return cWait[which] / (param[whichparam] * param[whichparam]);
522 }
523
524 //------------------------------------------------------------------------------------
525 // erynes 2004/03/11 -- This method presently returns the following:
526 //
527 // -ncoal/theta,
528 //
529 // where theta is the current estimate of parameter theta for the
530 // given population ("whichparam"), and ncoal is the total number
531 // of coalescent events for this population (i.e., one less than the
532 // total number of tips that the population has in this genealogy tree,
533 // unless recombinations are present).
534 //
535 // Note: CoalescePL::DlnPoint calculates the partial derivative of log(Point)
536 // with respect to theta for the given population ("whichparam"),
537 // in both the absence and presence of growth.
538 // When growth is present, CoalesceGrowPL::DlnWait does this for log(Wait).
539 // Partial derivatives with respect to growth are found in GrowPL.
540 //
541 // jay 2007/03/22 -- added support for disease w/ recombination
542 //
543 // JDEBUG--: only in absence of migration
544
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)545 double CoalescePL::DlnPoint (const vector < double >¶m, const TreeSummary * treedata, const long int & whichparam)
546 {
547 long int which = whichparam - m_start;
548 const vector<double>& ncoal = treedata->GetCoalSummary()->GetShortPoint();
549
550 double answ(-ncoal[which] / param[whichparam]);
551
552 const LongVec2d& picks = treedata->GetCoalSummary()->GetShortPicks();
553 if (picks.empty()) return answ;
554 else {
555 // count recs in crosspartition corresponding to whichparam
556 const RecSummary* rsum = dynamic_cast<const RecSummary*>(treedata->GetSummary(force_REC));
557 long recs_in_xpart = rsum->GetNRecsByXPart()[whichparam];
558 // divide by whichparam
559 double xpart_theta = param[whichparam];
560 answ += recs_in_xpart / xpart_theta;
561 // from that, subtract number of recs (should be "in that partition")
562 long nrecs = rsum->GetNRecs();
563 // divided by sum of thetas (should be "for that partition")
564 double total_theta(std::accumulate(param.begin()+m_start,
565 param.begin()+m_end,0.0));
566 answ -= nrecs/total_theta;
567 }
568
569 #if 0 // this was what CVS merged into here.....
570 if (!m_localpartforces.empty())
571 {
572 double total_theta(std::accumulate(param.begin()+m_start,
573 param.begin()+m_end,0.0));
574 DoubleVec1d myths(param.begin()+m_start,param.begin()+m_end);
575 long int total_npartitions(0);
576 LongVec2d::size_type lpforce;
577 for(lpforce = 0; lpforce < m_localpartforces.size(); ++lpforce)
578 {
579 LocalPartitionForce* pforce(dynamic_cast<LocalPartitionForce*>
580 (m_localpartforces[lpforce]));
581 assert(pforce); // it wasn't a local partition force?
582 total_npartitions += pforce->GetNPartitions();
583
584 const vector<DoubleVec1d::size_type>& indices(
585 m_whichxparts[lpforce][m_whichpart[lpforce][which]]);
586 vector<DoubleVec1d::size_type>::const_iterator index;
587 double part_theta(0.0);
588 for(index = indices.begin(); index != indices.end(); ++index)
589 {
590 part_theta += myths[*index];
591 }
592 answ += 1.0 / part_theta;
593
594 }
595 answ -= total_npartitions / total_theta;
596 }
597 #endif
598
599 return answ;
600 }
601
602 //------------------------------------------------------------------------------------
603 // CoalesceGrowPL: coalescence forces specific waiting time and
604 // point probabilites and its derivatives
605 // LONG DATA SUMMARIES
606 // [only in effect when GROWTH force is turned on]
607
608 //------------------------------------------------------------------------------------
609 // erynes 2004/03/11 -- This method presently returns the following:
610 //
611 // sum_over_populations(sum_over_each_population's_coalescent_time_intervals(
612 // (k_i*(k_i - 1)/(theta_p * g_p)) * (exp(g_p * t_start)- exp(g_p * t_end))
613 // )
614 // )
615 //
616 // where
617 // k_i is the number of lineages of population p in time interval i
618 // theta_p is the present estimate of parameter theta for population p
619 // g_p is the present estimate of the growth parameter for population p
620 // t_start is the timepoint at the start of time interval i
621 // t_end is the timepoint at the end of time interval i
622 //
623 // This quantity is equal to log(WaitProb(G|P)), where WaitProb(G|P) is the
624 // "waiting" probability of the input genealogy G (parameter "treedata")
625 // given the parameters P (parameter "param").
626 // Prob(G|P) = PointProb(G|P) * WaitProb(G|P).
627 // See CoalesceGrowPL::Point() for the point probability term.
628
lnWait(const vector<double> & param,const TreeSummary * treedata)629 double CoalesceGrowPL::lnWait (const vector < double >¶m, const TreeSummary * treedata)
630 {
631 const list<Interval>& treesum=treedata->GetCoalSummary()->GetLongWait();
632 list <Interval> :: const_iterator tit;
633 vector < double > :: const_iterator pit = param.begin() + m_start;
634 vector < double > :: const_iterator pend = param.begin() + m_start + m_nPop;
635 vector < double > :: const_iterator git = param.begin() + m_growthstart;
636
637 unsigned long int xpartition = 0L;
638 double growth, theta, gts, gte;
639 double coeff_s, coeff_e, arg1_s, arg1_e;
640 double result = 0.0;
641 double starttime;
642
643 for( ; pit != pend; ++pit, ++git, ++xpartition)
644 {
645 growth = *git;
646 theta = *pit;
647
648 if (fabs(growth) < GROWTH_EPSILON)
649 {
650 result += lnWaitForTinyGrowth(theta, growth, xpartition, treesum);
651 continue;
652 }
653
654 starttime = 0.0;
655 for(tit = treesum.begin(); tit != treesum.end(); ++tit)
656 {
657 gts = growth * starttime;
658 gte = growth * (*tit).endtime;
659
660 coeff_s = coeff_e = arg1_s = arg1_e = 1.0;
661 const double& k = (*tit).xpartlines[xpartition];
662
663 // IMPORTANT NOTE: The following over/underflow checking
664 // assumes that each starttime and endtime is always >= 0.
665 // Hence, e.g., gts < 0 means growth < 0.
666
667 if (gts > 1.0)
668 {
669 coeff_s = k * (k - 1.0);
670 if (growth > 1.0)
671 arg1_s /= growth;
672 else
673 coeff_s /= growth;
674 if (theta > 1.0)
675 arg1_s /= theta;
676 else
677 coeff_s /= theta;
678 }
679 else if (gts < -1.0)
680 {
681 arg1_s *= k * (k - 1.0);
682 if (growth < -1.0)
683 coeff_s /= growth;
684 else
685 arg1_s /= growth;
686 if (theta > 1.0)
687 coeff_s /= theta;
688 else
689 arg1_s /= theta;
690 }
691 else // no over/underflow in exp()
692 coeff_s = k * (k - 1.0)/(theta * growth); // arg1_s already set
693
694 // BUGBUG erynes -- what follows is easy to understand,
695 // but we should revisit this and reuse some of what we
696 // learned about growth and theta above, while considering gts.
697
698 if (gte > 1.0)
699 {
700 coeff_e = k * (k - 1.0);
701 if (growth > 1.0)
702 arg1_e /= growth;
703 else
704 coeff_e /= growth;
705 if (theta > 1.0)
706 arg1_e /= theta;
707 else
708 coeff_e /= theta;
709 }
710 else if (gte < -1.0)
711 {
712 arg1_e *= k * (k - 1.0);
713 if (growth < -1.0)
714 coeff_e /= growth;
715 else
716 arg1_e /= growth;
717 if (theta > 1.0)
718 coeff_e /= theta;
719 else
720 arg1_e /= theta;
721 }
722 else // no over/underflow in exp()
723 coeff_e = k * (k - 1.0)/(theta * growth); // arg1_e already set
724
725 double incrementalResult =
726 coeff_s * SafeProductWithExp(arg1_s, gts) -
727 coeff_e * SafeProductWithExp(arg1_e, gte);
728
729 if (incrementalResult > 0.0)
730 {
731 // We have precision problems,
732 // because we expect te > ts always,
733 // and hence gte > gts always.
734 // During debugging, we have encountered precision problems
735 // in which gdb claims gts == gte and arg1_s == arg1_e
736 // but incrementalResult > 0, typically reflecting
737 // a difference near the 15th digit.
738 // (Note that te == ts corresponds to two events
739 // occurring simultaneously--e.g., k = 1 for a really long
740 // time in each of two populations, then one finally
741 // migrates to the other, then they coalesce instantaneously.)
742 // It should be safe to set incrementalResult to zero in cases of
743 // roundoff error. If incrementalResult is positive,
744 // but _not_ tiny compared with either of the terms
745 // whose difference yielded incrementalResult,
746 // then something is _very_ wrong--probably te < ts,
747 // which should never happen. We assert in this case;
748 // this will enable us to see the problem, but the end user
749 // will not see the assertion, unless s/he's running in debug mode.
750 if (incrementalResult/fabs(coeff_s * SafeProductWithExp(arg1_s, gts)) > 1e-10)
751 {
752 string msg = "incrementalResult in plforces.cpp is greater "
753 "than zero. coeff_s = "
754 + ToString(coeff_s) + ", coeff_e = " + ToString(coeff_e)
755 + ", theta = " + ToString(theta) + ", growth = "
756 + ToString(growth) + ", and incrementalResult is "
757 + ToString(incrementalResult);
758 registry.GetRunReport().ReportDebug(msg);
759 //assert(0);
760 }
761 incrementalResult = 0.0;
762 }
763
764 starttime = (*tit).endtime;
765 result += incrementalResult;
766
767 }
768
769 }
770
771 return result;
772 }
773
774 //------------------------------------------------------------------------------------
775 // erynes 2004/05/20 -- This method presently returns the following:
776 //
777 // sum_over_a_population's_coalescent_time_intervals(
778 // (k_i*(k_i - 1)/theta_p) * ( (tis - tie) + (1/2)*g_p*(tis^2 - tie^2) )
779 // )
780 //
781 // where
782 // k_i is the number of lineages of population p in time interval i
783 // theta_p is the present estimate of parameter theta for population p
784 // g_p is the present estimate of the growth parameter for population p
785 // tis is the timepoint at the start of time interval i
786 // tie is the timepoint at the end of time interval i
787 //
788 // The population "p" is determined in Wait() by iterating over all
789 // populations. That method calls this method with the appropriate
790 // parameters for the population in question.
791 //
792 // This non-public method is used to handle the case in which fabs(g)
793 // is tiny--that is, less than GROWTH_EPSILON. It is derived from the
794 // formula used in Wait by expanding exp(g*t) in a Taylor series
795 // about g == 0, multiplying this by the term k(k-1)/(g*theta),
796 // and dropping all terms of order g^2 and above.
797 // Brief tests with Mathematica suggest that the original formula
798 // converges very nicely to this linear formula for normal values
799 // of the timepoints, the difference being less than one part in one
800 // billion, dropping down to zero difference for g == 0 (using
801 // L'Hopital's rule).
802 //
803 // Please also see the comments for DWait.
804
lnWaitForTinyGrowth(const double theta,const double growth,const unsigned long int xpartition,const list<Interval> & treesummary)805 double CoalesceGrowPL::lnWaitForTinyGrowth(const double theta, const double growth,
806 const unsigned long int xpartition,
807 const list<Interval>& treesummary)
808 {
809 assert(fabs(growth) < GROWTH_EPSILON); // otherwise use Wait
810
811 list<Interval>::const_iterator tit;
812 double k, te;
813
814 double result = 0.0;
815 // ts starts at 0 and subsequently is the previous cycle's te
816 double ts = 0.0;
817
818 for(tit = treesummary.begin(); tit != treesummary.end(); ++tit)
819 {
820 te = (*tit).endtime;
821 k = (*tit).xpartlines[xpartition];
822
823 result += (k * (k - 1.0) / theta) *
824 ((ts - te) + 0.5 * growth * (ts * ts - te * te));
825 ts = te;
826 }
827
828 return result;
829 }
830
831 //------------------------------------------------------------------------------------
832 // erynes 2004/03/11 -- This method presently returns the following:
833 //
834 // sum_over_populations(sum_over_each_population's_coalescent_time_intervals(
835 // log(2) + g_p*t_i_end - log(theta_p)
836 // )
837 // )
838 //
839 // where
840 // theta_p is the present estimate of parameter theta for population p
841 // g_p is the present estimate of the growth parameter for population p
842 // t_i_end is the timepoint at the end of time interval i
843 //
844 // This quantity is equal to log(PointProb(G|P)), where PointProb(G|P) is the
845 // "point" probability of the input genealogy G (parameter "treedata")
846 // given the parameters P (parameter "param").
847 // Prob(G|P) = PointProb(G|P) * WaitProb(G|P).
848 // See CoalesceGrowPL::lnWait() for the wait probability term.
849
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)850 double CoalesceGrowPL::lnPoint (const vector < double >¶m, const vector < double >&lparam,
851 const TreeSummary * treedata)
852
853 {
854 const Interval * treesum = treedata->GetCoalSummary()->GetLongPoint();
855 const Interval * tit;
856 vector < double > :: const_iterator pit = lparam.begin() + m_start;
857 vector < double > :: const_iterator pend = lparam.begin () + m_start + m_nPop;
858 vector < double > :: const_iterator git = param.begin() + m_growthstart;
859
860 long int xpartition = 0L;
861 double result = 0.0;
862
863 for( ; pit != pend; ++pit, ++git, ++xpartition)
864 {
865 for(tit= treesum; tit != NULL; tit = tit->next)
866 {
867 if(xpartition==(*tit).oldstatus)
868 result += LOG2 + (*tit).endtime * (*git) - (*pit);
869 }
870 }
871
872 // we pull this array only to check for emptyness, it is not used!
873 // JDEBUG--: not correct in presence of migration
874 const LongVec2d& picks = treedata->GetCoalSummary()->GetShortPicks();
875 if (!picks.empty()) // partner-picks must be accomodated
876 {
877 DoubleVec1d timesizes(m_nPop, 0.0);
878 ForceParameters fp(unknown_region);
879 DoubleVec1d myths(param.begin()+m_start,param.begin()+m_end);
880 fp.SetRegionalThetas(myths);
881 DoubleVec1d mygs(param.begin()+m_growthstart,
882 param.begin()+m_growthstart+m_nPop);
883 fp.SetGrowthRates(mygs);
884 // now for every recombination event....
885 treesum = treedata->GetRecSummary()->GetLongPoint();
886 for (tit = treesum; tit != NULL; tit = tit->next)
887 {
888 // compute partner-picks denominator
889 timesizes = m_timesize->XpartThetasAtT(tit->endtime,fp);
890 assert(timesizes.size() == static_cast<DoubleVec1d::size_type>(m_nPop));
891 double denominator(std::accumulate(timesizes.begin(), timesizes.end(),0.0));
892
893 // compute partner-picks numerator
894 ForceVec::size_type lpforce = 0;
895 // assuming no migration and only one local partition force
896 long whichtheta =
897 m_whichlocalxparts[lpforce][tit->partnerpicks[lpforce]][0];
898 result += log(timesizes[whichtheta] / denominator);
899 }
900 }
901
902 #if 0 // JDEBUG--JWARNING this is the logic that was in Jim's original commit
903 // compute partner-picks numerator
904 for(lpforce = 0; lpforce < m_localpartforces.size(); ++lpforce)
905 {
906 LocalPartitionForce* pforce(dynamic_cast<LocalPartitionForce*>
907 (m_localpartforces[lpforce]));
908 assert(pforce); // it wasn't a local partition force?
909 DoubleVec1d part_thetas(pforce->SumXPartsToParts(myths, mygs, tit->endtime));
910 DoubleVec1d::size_type partition;
911 for(partition = 0; partition < part_thetas.size(); ++partition)
912 {
913 result += log(part_thetas[partition] / denominator);
914 }
915 }
916 #endif
917
918 return result;
919 }
920
921 //------------------------------------------------------------------------------------
922 // erynes 2004/03/11 -- This method currently returns the following:
923 //
924 // sum_over_coalescent_time_intervals_of_population_whichparam(
925 // -(k_i(k_i - 1)/(g*theta*theta))*(exp(g*t_i_start) - exp(g*t_i_end))
926 // )
927 // where
928 // k_i is the number of lineages of this population in time interval i
929 // g is the current estimate of the growth parameter for this pop.
930 // theta is the current estimate of the theta parameter for this pop.
931 // t_i_start is the timepoint at the beginning of time interval i
932 // t_i_end is the timepoint at the end of time interval i
933 //
934 // IMPORTANT NOTE:
935 // Wait() and Point() return log(WaitProb(G|P)) and log(PointProb(G|P)),
936 // and PostLike::DCalculate() currently assumes that DWait() and DPoint()
937 // return the derivatives of log(WaitProb(G|P)) and log(PointProb(G|P)),
938 // instead of returning the derivatives of WaitProb/PointProb directly.
939 // As long as everything is kept consistent, this is all very good
940 // for reducing complexity and improving speed.
941 //
942 // Note: CoalesceGrowPL::DWait calculates the partial derivative of Wait
943 // with respect to theta for the given population ("whichparam").
944 // CoalescePL::DPoint does the same for Point.
945 // Partial derivatives with respect to growth are found in GrowPL.
946
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)947 double CoalesceGrowPL::DlnWait (const vector < double >¶m, const TreeSummary * treedata,
948 const long int & whichparam)
949 {
950 const double growth = param[whichparam + m_growthstart];
951 if(fabs(growth) < GROWTH_EPSILON)
952 return DlnWaitForTinyGrowth(param, treedata, whichparam);
953
954 double result = 0.0;
955 long int which = whichparam - m_start;
956 const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
957 list <Interval> :: const_iterator tit=treesum.begin();
958 const double theta = param[whichparam];
959 double coeff_s, coeff_e, arg1_s, arg1_e;
960 double starttime = 0.0;
961
962 for( ; tit != treesum.end(); ++tit)
963 {
964 double gts = growth * starttime;
965 double gte = growth * (*tit).endtime;
966
967 const long int & k = (*tit).xpartlines[which];
968
969 coeff_s = coeff_e = arg1_s = arg1_e = 1.0;
970
971 // IMPORTANT NOTE: The following over/underflow checking
972 // assumes that each starttime and endtime is always >= 0.
973 // Hence, e.g., gts < 0 means growth < 0.
974
975 if (gts > 1.0)
976 {
977 coeff_s = -k * (k - 1.0);
978 if (growth > 1.0)
979 arg1_s /= growth;
980 else
981 coeff_s /= growth;
982 if (theta > 1.0)
983 arg1_s /= theta*theta;
984 else
985 coeff_s /= theta*theta;
986 }
987 else if (gts < -1.0)
988 {
989 arg1_s *= -k * (k - 1.0);
990 if (growth < -1.0)
991 coeff_s /= growth;
992 else
993 arg1_s /= growth;
994 if (theta > 1.0)
995 coeff_s /= theta*theta;
996 else
997 arg1_s /= theta*theta;
998 }
999 else // no over/underflow in exp()
1000 coeff_s = -k * (k - 1.0)/(theta * theta * growth); // arg1_s already set
1001
1002 // BUGBUG erynes -- what follows is easy to understand,
1003 // but we should revisit this and reuse some of what we
1004 // learned about growth and theta above, while considering gts.
1005
1006 if (gte > 1.0)
1007 {
1008 coeff_e = -k * (k - 1.0);
1009 if (growth > 1.0)
1010 arg1_e /= growth;
1011 else
1012 coeff_e /= growth;
1013 if (theta > 1.0)
1014 arg1_e /= theta*theta;
1015 else
1016 coeff_e /= theta*theta;
1017 }
1018 else if (gte < -1.0)
1019 {
1020 arg1_e *= -k * (k - 1.0);
1021 if (growth < -1.0)
1022 coeff_e /= growth;
1023 else
1024 arg1_e /= growth;
1025 if (theta > 1.0)
1026 coeff_e /= theta*theta;
1027 else
1028 arg1_e /= theta*theta;
1029 }
1030 else // no over/underflow in exp()
1031 coeff_e = -k * (k - 1.0)/(theta * theta * growth); // arg1_e already set
1032
1033 double incrementalResult =
1034 coeff_s * SafeProductWithExp(arg1_s, gts) -
1035 coeff_e * SafeProductWithExp(arg1_e, gte);
1036
1037 if (incrementalResult < 0.0)
1038 {
1039 // We have precision problems,
1040 // because we expect te > ts always,
1041 // and hence gte > gts always.
1042 // During debugging, we have encountered precision problems
1043 // in which gdb claims gts == gte and arg1_s == arg1_e
1044 // but incrementalResult < 0, typically reflecting
1045 // a difference near the 15th digit.
1046 // (Note that te == ts corresponds to two events
1047 // occurring simultaneously--e.g., k = 1 for a really long
1048 // time in each of two populations, then one finally
1049 // migrates to the other, then they coalesce instantaneously.)
1050 // It should be safe to set incrementalResult to zero in cases of
1051 // roundoff error. If incrementalResult is negative,
1052 // but _not_ tiny compared with either of the terms
1053 // whose difference yielded incrementalResult,
1054 // then something is _very_ wrong--probably te < ts,
1055 // which should never happen. We assert in this case;
1056 // this will enable us to see the problem, but the end user
1057 // will not see the assertion, unless s/he's running in debug mode.
1058 if (incrementalResult/fabs(coeff_s * SafeProductWithExp(arg1_s, gts))
1059 < -1e-10)
1060 assert(0);
1061 incrementalResult = 0.0;
1062 }
1063
1064 starttime = (*tit).endtime;
1065 result += incrementalResult;
1066
1067 }
1068
1069 return result;
1070 }
1071
1072 //------------------------------------------------------------------------------------
1073 // erynes 2004/05/20 -- This method currently returns the following:
1074 //
1075 // sum_over_coalescent_time_intervals_of_population_whichparam(
1076 // -(k_i(k_i - 1)/(theta*theta))*( (t_i_start - t_i_end) +
1077 // (1/2)*g*(t_i_start^2 - t_i_end^2) )
1078 // )
1079 // where
1080 // k_i is the number of lineages of this population in time interval i
1081 // g is the current estimate of the growth parameter for this pop.
1082 // theta is the current estimate of the theta parameter for this pop.
1083 // t_i_start is the timepoint at the beginning of time interval i
1084 // t_i_end is the timepoint at the end of time interval i
1085 //
1086 // This non-public method is used to handle the case in which fabs(g)
1087 // is tiny--that is, less than GROWTH_EPSILON. It is derived from the
1088 // formula used in DWait by expanding exp(g*t) in a Taylor series
1089 // about g == 0, multiplying this by the term -k(k-1)/(g*theta*theta),
1090 // and dropping all terms of order g^2 and above.
1091 // Brief tests with Mathematica suggest that the original formula
1092 // converges very nicely to this linear formula for normal values
1093 // of the timepoints, the difference being less than one part in one
1094 // billion, dropping down to zero difference for g == 0 (using
1095 // L'Hopital's rule).
1096 //
1097 // Please also see the comments for DWait.
1098
DlnWaitForTinyGrowth(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1099 double CoalesceGrowPL::DlnWaitForTinyGrowth(const vector < double >¶m, const TreeSummary * treedata,
1100 const long int & whichparam)
1101 {
1102 const double growth = param[whichparam + m_growthstart];
1103 assert(fabs(growth) < GROWTH_EPSILON); // otherwise use DWait()
1104 const double theta = param[whichparam];
1105 double k, te;
1106 long int which = whichparam - m_start;
1107 const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1108 list<Interval>::const_iterator tit = treesum.begin();
1109
1110 double result = 0.0;
1111 double ts = 0.0;
1112
1113 for( ; tit != treesum.end(); ++tit)
1114 {
1115 k = (*tit).xpartlines[which];
1116 te = (*tit).endtime;
1117
1118 result += -(k * (k - 1.0) / (theta * theta)) *
1119 ((ts - te) + 0.5 * growth * (ts * ts - te * te));
1120 ts = te;
1121 }
1122
1123 return result;
1124 }
1125
1126 //------------------------------------------------------------------------------------
1127 // Mary 2007/08/13 -- added support for disease w/ recombination
1128 //
1129 // JDEBUG--: only in absence of migration
1130
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long & whichparam)1131 double CoalesceGrowPL::DlnPoint (const vector < double >¶m, const TreeSummary * treedata, const long &whichparam)
1132 {
1133 long which = whichparam - m_start;
1134 const vector<double>& ncoal = treedata->GetCoalSummary()->GetShortPoint();
1135
1136 double answ(-ncoal[which] / param[whichparam]);
1137
1138 const LongVec2d& picks = treedata->GetCoalSummary()->GetShortPicks();
1139 if (picks.empty()) return answ;
1140 else {
1141 DoubleVec1d timesizes(m_nPop, 0.0);
1142 ForceParameters fp(unknown_region);
1143 DoubleVec1d myths(param.begin()+m_start,param.begin()+m_end);
1144 fp.SetRegionalThetas(myths);
1145 DoubleVec1d mygs(param.begin()+m_growthstart,
1146 param.begin()+m_growthstart+m_nPop);
1147 fp.SetGrowthRates(mygs);
1148 // now for every recombination event....
1149 const Interval* reclist = treedata->GetRecSummary()->GetLongPoint();
1150 const Interval* tit;
1151 for (tit = reclist; tit != NULL; tit = tit->next)
1152 {
1153 // compute partner-picks denominator
1154 timesizes = m_timesize->XpartThetasAtT(tit->endtime,fp);
1155 assert(timesizes.size() == static_cast<DoubleVec1d::size_type>(m_nPop));
1156 double denominator(std::accumulate(timesizes.begin(), timesizes.end(),0.0));
1157
1158 // compute partner-picks numerator
1159 // ALL assuming no migration and only one local partition force
1160 // therefore xpart == part
1161 answ -= 1.0/denominator;
1162 if (tit->partnerpicks[0] == which)
1163 {
1164 answ += 1.0/timesizes[which];
1165 }
1166 }
1167 }
1168
1169 return answ;
1170 } // CoalesceGrowPL::DlnPoint
1171
1172 //------------------------------------------------------------------------------------
1173 // GrowPL: exponential growth forces specific waiting time (=0) and
1174 // point probabilites and its derivatives
1175 // LONG DATA SUMMARIES
1176
lnWait(const vector<double> & param,const TreeSummary * treedata)1177 double GrowPL::lnWait (const vector < double >¶m, const TreeSummary * treedata)
1178 {
1179 // Growth is a modifier of theta, hence its contribution
1180 // is included in CoalesceGrowPL::Wait.
1181 return 0.0;
1182 }
1183
1184 //------------------------------------------------------------------------------------
1185
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)1186 double GrowPL::lnPoint (const vector < double >¶m, const vector < double >&lparam, const TreeSummary * treedata)
1187 {
1188 // Growth is a modifier of theta, hence its contribution
1189 // is included in CoalesceGrowPL::Point.
1190 return 0.0;
1191 }
1192
1193 //------------------------------------------------------------------------------------
1194 // erynes 2004/04/09 -- This method currently returns the following:
1195 //
1196 // sum_over_coalescent_time_intervals_of_population_whichparam(
1197 // -(k_i(k_i - 1)/(g*g*theta))*(
1198 // (1 - g*t_i_start)*exp(g*t_i_start) -
1199 // (1 - g*t_i_end) *exp(g*t_i_end)
1200 // )
1201 // )
1202 // where
1203 // k_i is the number of lineages of this population in time interval i
1204 // g is the current estimate of the growth parameter for this pop.
1205 // theta is the current estimate of the theta parameter for this pop.
1206 // t_i_start is the timepoint at the beginning of time interval i
1207 // t_i_end is the timepoint at the end of time interval i
1208
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1209 double GrowPL::DlnWait (const vector < double >¶m, const TreeSummary * treedata, const long int & whichparam)
1210 {
1211 const double growth = param[whichparam];
1212 if (fabs(growth) < GROWTH_EPSILON)
1213 return DlnWaitForTinyGrowth(param, treedata, whichparam);
1214
1215 const long int whicht = m_thetastart + whichparam - m_start;
1216 const double theta = param[whicht];
1217 assert(theta); // we divide by it below
1218 double gte, gts, arg1_s, arg1_e, coeff_s, coeff_e;
1219 long int k;
1220
1221 const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1222 list <Interval> :: const_iterator tit = treesum.begin();
1223
1224 double result = 0.0;
1225 double starttime = 0.0;
1226
1227 for( ; tit != treesum.end(); ++tit)
1228 {
1229 gts = growth * starttime;
1230 gte = growth * (*tit).endtime;
1231
1232 k = (*tit).xpartlines[whicht];
1233
1234 arg1_s = 1.0 - gts;
1235 arg1_e = 1.0 - gte;
1236 coeff_s = 1.0;
1237 coeff_e = 1.0;
1238
1239 // IMPORTANT NOTE: The following over/underflow checking
1240 // assumes that each starttime and endtime is always >= 0.
1241 // Hence, e.g., gts < 0 means growth < 0.
1242
1243 if (gts > 1.0)
1244 {
1245 coeff_s = -k * (k - 1.0);
1246 if (growth > 1.0)
1247 arg1_s /= growth*growth;
1248 else
1249 coeff_s /= growth*growth;
1250 if (theta > 1.0)
1251 arg1_s /= theta;
1252 else
1253 coeff_s /= theta;
1254 }
1255 else if (gts < -1.0)
1256 {
1257 arg1_s *= -k*(k-1.0);
1258 if (growth < -1.0)
1259 coeff_s /= growth*growth;
1260 else
1261 arg1_s /= growth*growth;
1262 if (theta > 1.0)
1263 coeff_s /= theta;
1264 else
1265 arg1_s /= theta;
1266 }
1267 else // no over/underflow in exp()
1268 coeff_s = -k*(k-1.0)/(theta*growth*growth); // arg1_s already set
1269
1270 // BUGBUG erynes -- this is easy to follow,
1271 // but we should revisit it and possibly reuse
1272 // the growth and theta checks that we made for the gts case.
1273
1274 if (gte > 1.0)
1275 {
1276 coeff_e = -k*(k-1.0);
1277 if (growth > 1.0)
1278 arg1_e /= growth*growth;
1279 else
1280 coeff_e /= growth*growth;
1281 if (theta > 1.0)
1282 arg1_e /= theta;
1283 else
1284 coeff_e /= theta;
1285 }
1286 else if (gte < -1.0)
1287 {
1288 arg1_e *= -k*(k-1.0);
1289 if (growth < -1.0)
1290 coeff_e /= growth*growth;
1291 else
1292 arg1_e /= growth*growth;
1293 if (theta > 1.0)
1294 coeff_e /= theta;
1295 else
1296 arg1_e /= theta;
1297 }
1298 else // no over/underflow in exp()
1299 coeff_e = -k*(k-1.0)/(theta*growth*growth); // arg1_e already set
1300
1301 starttime = (*tit).endtime;
1302 result += coeff_s * SafeProductWithExp(arg1_s, gts)
1303 - coeff_e * SafeProductWithExp(arg1_e, gte);
1304 }
1305
1306 return result;
1307 }
1308
1309 //------------------------------------------------------------------------------------
1310 // erynes 2004/05/20 -- This method currently returns the following:
1311 //
1312 // sum_over_coalescent_time_intervals_of_population_whichparam(
1313 // (k_i(k_i - 1)/(2*theta))*(
1314 // tis^2 * (1 + g*tis) - tie^2 * (1 + g*tie)
1315 // )
1316 // )
1317 // where
1318 // k_i is the number of lineages of this population in time interval i
1319 // g is the current estimate of the growth parameter for this pop.
1320 // theta is the current estimate of the theta parameter for this pop.
1321 // tis is the timepoint at the beginning of time interval i
1322 // tie is the timepoint at the end of time interval i
1323 //
1324 // This non-public method is used to handle the case in which fabs(g)
1325 // is tiny--that is, less than GROWTH_EPSILON. It is derived from the
1326 // formula used in DWait by expanding exp(g*t) in a Taylor series
1327 // about g == 0, multiplying through in the original formula,
1328 // and dropping all terms of order g^2 and above.
1329 // Brief tests with Mathematica suggest that the original formula
1330 // converges very nicely to this linear formula for normal values
1331 // of the timepoints, the difference being less than one part in one
1332 // billion, dropping down to zero difference for g == 0 (using
1333 // L'Hopital's rule).
1334 //
1335 // Please also see the comments for DWait.
1336
DlnWaitForTinyGrowth(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1337 double GrowPL::DlnWaitForTinyGrowth(const vector < double >¶m, const TreeSummary * treedata,
1338 const long int & whichparam)
1339 {
1340 const double growth = param[whichparam];
1341 assert(fabs(growth) < GROWTH_EPSILON); // otherwise use DWait
1342
1343 const long int whicht = m_thetastart + whichparam - m_start;
1344 const double theta = param[whicht];
1345 assert(theta); // we divide by it below
1346 double k, te;
1347 const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1348 list <Interval> :: const_iterator tit = treesum.begin();
1349
1350 double result = 0.0;
1351 double ts = 0.0;
1352
1353 for( ; tit != treesum.end(); ++tit)
1354 {
1355 te = (*tit).endtime;
1356 k = (*tit).xpartlines[whicht];
1357
1358 result += (k * (k - 1.0) / (2.0 * theta)) *
1359 (ts * ts * (1.0 + growth * ts) - te * te * (1.0 + growth * te));
1360 ts = te;
1361 }
1362
1363 return result;
1364 }
1365
1366 //------------------------------------------------------------------------------------
1367 // This method presently returns:
1368 //
1369 // sum_over_time_intervals_of_this_population's_coalescences(endtime)
1370 //
1371 // This is the derivative of log(Point) with respect to growth.
1372
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1373 double GrowPL::DlnPoint (const vector < double >¶m, const TreeSummary * treedata, const long int & whichparam)
1374 {
1375 const Interval * treesum = treedata->GetCoalSummary()->GetLongPoint();
1376 const Interval * tit;
1377 double result = 0.0 ;
1378 // indicator to Theta, we need this to get the correct
1379 // time intervals in treesum
1380 long int whicht = m_thetastart + whichparam - m_start;
1381
1382 for(tit = treesum; tit != NULL; tit = tit->next)
1383 {
1384 if(whicht == (*tit).oldstatus)
1385 result += (*tit).endtime;
1386 }
1387
1388 return result;
1389 }
1390
1391 //------------------------------------------------------------------------------------
1392 //------------------------------------------------------------------------------------
1393
lnWait(const vector<double> & param,const TreeSummary * treedata)1394 double DivMigPL::lnWait(const vector<double>& param, const TreeSummary* treedata)
1395 {
1396 const vector<double>& mWait = treedata->GetDivMigSummary()->GetShortWait();
1397
1398 // sum_pop(SM_pop *kt)
1399 return -1.0 * inner_product (mWait.begin (),
1400 mWait.end (), param.begin() + m_start, 0.0);
1401
1402 }
1403
1404 //------------------------------------------------------------------------------------
1405
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)1406 double DivMigPL::lnPoint(const vector<double>& param, const vector<double>& lparam, const TreeSummary* treedata)
1407 {
1408 // Sum_j(Sum_i(migevent[j,i] * log(M[j,i]))
1409 const vector<double>& nmig = treedata->GetDivMigSummary()->GetShortPoint();
1410
1411 return inner_product (nmig.begin(), nmig.end(), lparam.begin() + m_start,
1412 0.0);
1413 }
1414
1415 //------------------------------------------------------------------------------------
1416 //------------------------------------------------------------------------------------
1417
1418 // RecombinePL: recombination forces specific waiting time and
1419 // point probabilites and its derivatives
1420
lnWait(const vector<double> & param,const TreeSummary * treedata)1421 double RecombinePL::lnWait (const vector < double >¶m, const TreeSummary * treedata)
1422 {
1423 const vector<double>& rWait = treedata->GetRecSummary()->GetShortWait();
1424 return -rWait[0] * *(param.begin () + m_start);
1425 }
1426
1427 //------------------------------------------------------------------------------------
1428
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)1429 double RecombinePL::lnPoint (const vector < double >¶m, const vector < double >&lparam,
1430 const TreeSummary * treedata)
1431 {
1432 // Sum_j(recevents * log(r))
1433 const vector<double>& nrec = treedata->GetRecSummary()->GetShortPoint();
1434 return nrec[0] * *(lparam.begin () + m_start);
1435 }
1436
1437 //------------------------------------------------------------------------------------
1438
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1439 double RecombinePL::DlnWait (const vector < double >¶m, const TreeSummary * treedata,
1440 const long int & whichparam)
1441 {
1442 const vector<double>& rWait = treedata->GetRecSummary()->GetShortWait();
1443 return -rWait[0];
1444 }
1445
1446 //------------------------------------------------------------------------------------
1447
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1448 double RecombinePL::DlnPoint (const vector < double >¶m, const TreeSummary * treedata,
1449 const long int & whichparam)
1450 {
1451 const vector<double>& nrecs = treedata->GetRecSummary()->GetShortPoint();
1452 if(nrecs[0] == 0)
1453 {
1454 return 0.0;
1455 }
1456 else
1457 {
1458 return SafeDivide(nrecs[0], *(param.begin() + m_start));
1459 }
1460 }
1461
1462 //------------------------------------------------------------------------------------
1463 // SelectPL: selection forces specific waiting time and
1464 // point probabilites and its derivatives
1465
lnWait(const vector<double> & param,const TreeSummary * treedata)1466 double SelectPL::lnWait (const vector < double >¶m, const TreeSummary * treedata)
1467 {
1468 return 0.0;
1469 }
1470
1471 //------------------------------------------------------------------------------------
1472
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)1473 double SelectPL::lnPoint (const vector < double >¶m, const vector < double >&lparam, const TreeSummary * treedata)
1474 {
1475 return 0.0;
1476 }
1477
1478 //------------------------------------------------------------------------------------
1479
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1480 double SelectPL::DlnWait (const vector < double >¶m, const TreeSummary * treedata, const long int & whichparam)
1481 {
1482 return 0.0;
1483 }
1484
1485 //------------------------------------------------------------------------------------
1486
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1487 double SelectPL::DlnPoint (const vector < double >¶m, const TreeSummary * treedata, const long int & whichparam)
1488 {
1489 return 0.0;
1490 }
1491
1492 //------------------------------------------------------------------------------------
1493 // This function computes the "waiting time" contribution to the log-likelihood.
1494 // It computes and returns
1495 //
1496 // sum_over_all_timeIntervals_i( -kA_i*(kA_i - 1)*dtauA_i/thetaA0 -ka_i*(ka_i - 1)*dtaua_i/thetaa0 ),
1497 //
1498 // where "A" is the favored allele, "a" is the disfavored allele, "k" is the number of lineages
1499 // in population "A" or "a," and dtau is the length of the time interval in the "Kingman reference frame"
1500 // (i.e., time running as usual and pop. size remaining constant). The transformations from
1501 // "magic time" dtau to real time "dt," where dt = te - ts = endtime - starttime, are:
1502 //
1503 // dtau_A = (theta_A0*dt + (theta_a0/s)*(exp(s*te) - exp(s*ts)))/(theta_A0 + theta_a0),
1504 // dtau_a = (theta_a0*dt - (theta_A0/s)*(exp(-s*te) - exp(-s*ts)))/(theta_A0 + theta_a0).
1505
lnWait(const vector<double> & param,const TreeSummary * treedata)1506 double CoalesceLogisticSelectionPL::lnWait(const vector<double>& param, const TreeSummary *treedata)
1507 {
1508 if (2 != m_nPop)
1509 {
1510 string msg = "CoalesceLogisticSelectionPL::lnWait() called with m_nPop = ";
1511 msg += ToString(m_nPop);
1512 msg += ". m_nPop must equal 2, reflecting one population with the major ";
1513 msg += "allele and one population with the minor allele.";
1514 throw implementation_error(msg);
1515 }
1516 double mu_into_A_from_a(param[3]), mu_into_a_from_A(param[4]);
1517
1518 if (0 != param[2] || 0 != param[5] || 0 == param[3] || 0 == param[4])
1519 throw implementation_error("Error parsing disease rates in CoalesceLogisticSelectionPL::lnWait()");
1520
1521 const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1522 list<Interval>::const_iterator treesum_it;
1523
1524 double result(0.0), s(param[m_s_is_here]);
1525 double theta_A0(param[m_start]), theta_a0(param[m_start+1]);
1526 double totalTheta_0(theta_A0 + theta_a0);
1527
1528 if (theta_a0 <= 0.0 || theta_A0 <= 0.0)
1529 {
1530 string msg = "CoalesceLogisticSelectionPL::lnWait(), received an invalid Theta value ";
1531 msg += "(theta_A0 = " + ToString(theta_A0) + ", theta_a0 = " + ToString(theta_a0) + ").";
1532 throw impossible_error(msg);
1533 }
1534
1535 if (fabs(s) < LOGISTIC_SELECTION_COEFFICIENT_EPSILON)
1536 return lnWaitForTinyLogisticSelectionCoefficient(param, treedata);
1537
1538 if (fabs(s) >= DBL_BIG)
1539 return -DBL_BIG; // unavoidable overflow
1540
1541 double factor_A(theta_a0/(s*theta_A0)), factor_a(theta_A0/(-s*theta_a0));
1542 double t_e, t_s(0.0), dt, e_toThe_sts(1.0), e_toThe_ste, term;
1543 double term_A(0.0), term_a(0.0);
1544
1545 for (treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
1546 {
1547 t_e = treesum_it->endtime;
1548 dt = t_e - t_s;
1549 term_A = term_a = 0.0;
1550 const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
1551 const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
1552
1553 if (s > 0.0)
1554 {
1555 // Use overflow protection for term_A; if term_a underflows, that's okay.
1556
1557 if (s*dt > LOG_ONEPLUSEPSILON)
1558 {
1559 term = SafeProductWithExp(factor_A, s*dt); // coefficient guaranteed finite and positive
1560 if (term >= DBL_BIG)
1561 return -DBL_BIG; // unavoidable overflow
1562 e_toThe_ste = (term/factor_A)*e_toThe_sts;
1563 if (k_A > 0.0)
1564 {
1565 term_A = k_A*(term - factor_A)*e_toThe_sts*((k_A - 1.0)/totalTheta_0 + mu_into_A_from_a)
1566 + k_A*(k_A - 1.0)*dt/totalTheta_0;
1567 // Note: A factor of theta_A0 cancels in the "dt" term.
1568 if (term_A >= DBL_BIG)
1569 return -DBL_BIG; // unavoidable overflow
1570 }
1571 }
1572 else
1573 {
1574 if (s*dt >= numeric_limits<double>::epsilon())
1575 e_toThe_ste = (1.0 + s*dt)*e_toThe_sts;
1576 else
1577 e_toThe_ste = SafeProductWithExp(1.0,s*t_e);
1578 if (k_A > 0.0)
1579 {
1580 term_A = k_A*factor_A*s*dt*e_toThe_sts*((k_A - 1.0)/totalTheta_0 + mu_into_A_from_a)
1581 + k_A*(k_A - 1.0)*dt/totalTheta_0;
1582 // Note: A factor of theta_A0 cancels in the "dt" term.
1583 if (term_A >= DBL_BIG)
1584 return -DBL_BIG; // unavoidable overflow
1585 }
1586 }
1587 if (k_a > 0.0)
1588 term_a = k_a * factor_a
1589 * (1.0/e_toThe_ste - 1.0/e_toThe_sts)
1590 * ((k_a - 1.0)/totalTheta_0 + mu_into_a_from_A)
1591 + k_a*(k_a - 1.0)*dt/totalTheta_0;
1592 // Note: A factor of theta_a0 cancels in the "dt" term.
1593 }
1594 else // s < 0
1595 {
1596 // Use overflow protection for term_a; if term_A underflows, that's okay.
1597
1598 if (-s*dt > LOG_ONEPLUSEPSILON)
1599 {
1600 term = SafeProductWithExp(factor_a,-s*dt); // coefficient guaranteed finite and positive
1601 if (term >= DBL_BIG)
1602 return -DBL_BIG; // unavoidable overflow
1603 e_toThe_ste = e_toThe_sts/(term/factor_a);
1604 if (k_a > 0.0)
1605 {
1606 term_a = k_a*((term - factor_a)/e_toThe_sts)*((k_a - 1.0)/totalTheta_0 + mu_into_a_from_A)
1607 + k_a*(k_a - 1.0)*dt/totalTheta_0;
1608 // Note: A factor of theta_a0 cancels in the "dt" term.
1609 if (term_a >= DBL_BIG)
1610 return -DBL_BIG; // unavoidable overflow
1611 }
1612 }
1613 else
1614 {
1615 if (-s*dt >= numeric_limits<double>::epsilon())
1616 e_toThe_ste = (1.0 + s*dt)*e_toThe_sts;
1617 else
1618 e_toThe_ste = SafeProductWithExp(1.0,s*t_e);
1619 if (k_a > 0.0)
1620 {
1621 term_a = k_a*(factor_a*(-s)*dt/e_toThe_sts)*((k_a - 1.0)/totalTheta_0 + mu_into_a_from_A)
1622 + k_a*(k_a - 1.0)*dt/totalTheta_0;
1623 // Note: A factor of theta_a0 cancels in the "dt" term.
1624 if (term_a >= DBL_BIG)
1625 return -DBL_BIG; // unavoidable overflow
1626 }
1627 }
1628 if (k_A > 0.0)
1629 term_A = k_A*factor_A*(e_toThe_ste - e_toThe_sts)*((k_A - 1.0)/totalTheta_0 + mu_into_A_from_a)
1630 + k_A*(k_A - 1.0)*dt/totalTheta_0;
1631 // Note: A factor of theta_a0 cancels in the "dt" term.
1632 }
1633
1634 result -= term_A + term_a;
1635 if (result <= -DBL_BIG)
1636 return -DBL_BIG;
1637
1638 t_s = t_e;
1639 e_toThe_sts = e_toThe_ste;
1640 }
1641
1642 return result;
1643 }
1644
1645 //------------------------------------------------------------------------------------
1646 // See comment for lnWait(), above.
1647 // This function expands lnWait() in a Taylor series about s = 0,
1648 // so that we can smoothly handle the factor of 1/s in lnWait().
1649 // This version retains only the first two terms in
1650 // (exp(s*te) - exp(s*ts))/s = (te-ts) + (s/2!)(te^2 - ts^2) + (s^2/3!)(te^3 - ts^3) + ....
1651
lnWaitForTinyLogisticSelectionCoefficient(const vector<double> & param,const TreeSummary * treedata)1652 double CoalesceLogisticSelectionPL::lnWaitForTinyLogisticSelectionCoefficient(const vector<double>& param,
1653 const TreeSummary* treedata)
1654 {
1655 double mu_into_A_from_a(param[3]), mu_into_a_from_A(param[4]);
1656
1657 if (0 != param[2] || 0 != param[5] || 0 == param[3] || 0 == param[4])
1658 throw implementation_error("Error parsing disease rates in "
1659 "CoalesceLogisticSelectionPL::lnWaitForTinyLogisticSelectionCoefficient()");
1660
1661 const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1662 list<Interval>::const_iterator treesum_it;
1663
1664 double result(0.0), t_e, t_s(0.0), s(param[m_s_is_here]), dt, term;
1665 double theta_A0(param[m_start]), theta_a0(param[m_start+1]), totalTheta_0(theta_A0+theta_a0);
1666
1667 for (treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
1668 {
1669 t_e = treesum_it->endtime;
1670 const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
1671 const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
1672 dt = t_e - t_s;
1673
1674 term = 0.5*s*(t_e*t_e - t_s*t_s);
1675
1676 result += -(k_A*dt/theta_A0)*((k_A - 1.0) + theta_a0*mu_into_A_from_a)
1677 - (k_A*theta_a0*term/theta_A0)*((k_A - 1.0)/totalTheta_0 + mu_into_A_from_a);
1678
1679 result += -(k_a*dt/theta_a0)*((k_a - 1.0) + theta_A0*mu_into_a_from_A)
1680 + (k_a*theta_A0*term/theta_a0)*((k_a - 1.0)/totalTheta_0 + mu_into_a_from_A);
1681
1682 t_s = t_e;
1683 }
1684
1685 return result;
1686 }
1687
1688 //------------------------------------------------------------------------------------
1689 // This function computes the "point" contribution to the log-likelihood.
1690 // It computes and returns
1691 //
1692 // sum_over_all_coalescent_times_in_A( ln(2/theta_A(te_A)) )
1693 // + sum_over_all_coalescent_times_in_a( ln(2/theta_a(te_a)) ),
1694 //
1695 // where te_A is the time (te = "endtime") at the bottom of a time interval
1696 // which ends with a coalescence in the population with allele "A," and
1697 // similarly for the pop. with allele "a."
1698 // theta_A(t) + theta_a(t) = theta_A(0) + theta_a(0) for all times t;
1699 // that is, the overall population size is assumed constant. We have
1700 //
1701 // theta_A(t) = ((theta_A0+theta_a0)/(theta_A0+theta_a0*exp(s*t)))*theta_A0,
1702 // theta_a(t) = ((theta_A0+theta_a0)/(theta_A0+theta_a0*exp(s*t)))*theta_a0*exp(s*t).
1703
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)1704 double CoalesceLogisticSelectionPL::lnPoint(const vector<double>& param, const vector<double>& lparam,
1705 const TreeSummary* treedata)
1706 {
1707 if (2 != m_nPop)
1708 {
1709 string msg = "CoalesceLogisticSelectionPL::lnPoint() called with m_nPop = ";
1710 msg += ToString(m_nPop);
1711 msg += ". m_nPop must equal 2, reflecting one population with the major ";
1712 msg += "allele and one population with the minor allele.";
1713 throw implementation_error(msg);
1714 }
1715
1716 const Interval *treesum = treedata->GetCoalSummary()->GetLongPoint();
1717 const Interval *pTreesum;
1718 double theta_A0(param[0]), theta_a0(param[1]), logTheta_A0(lparam[0]),
1719 logTheta_a0(lparam[1]), logThetaTotal_0, s(param[m_s_is_here]);
1720
1721 if (theta_A0 > 0.0 && theta_a0 > 0.0)
1722 logThetaTotal_0 = log(theta_A0 + theta_a0);
1723 else
1724 {
1725 // This really, really should never ever happen.
1726 string msg = "CoalesceLogisticSelectionPL::lnPoint(), received an invalid value ";
1727 msg += "(theta_A0 = ";
1728 msg += ToString(theta_A0);
1729 msg += " and theta_a0 = " + ToString(theta_a0) + "); s = ";
1730 msg += ToString(s) + ".";
1731 throw impossible_error(msg);
1732 }
1733
1734 double result = 0.0;
1735
1736 for(pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
1737 {
1738 double thetaDenominator = theta_A0 +
1739 SafeProductWithExp(theta_a0, s * pTreesum->endtime); // sum is guaranteed to be > 0
1740
1741 if (thetaDenominator < DBL_BIG)
1742 result += LOG2 - logThetaTotal_0 + log(thetaDenominator);
1743 else
1744 result += LOG2 - logThetaTotal_0 + logTheta_a0 + s*pTreesum->endtime;
1745 if (0L == pTreesum->oldstatus)
1746 result -= logTheta_A0;
1747 else if (1L == pTreesum->oldstatus)
1748 result -= logTheta_a0 + (pTreesum->endtime * s);
1749 else
1750 {
1751 string msg = "CoalesceLogisticSelectionPL::lnPoint(), received a TreeSummary ";
1752 msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
1753 msg += ". \"oldstatus\" must be either 0 or 1, reflecting a coalescene in either ";
1754 msg += "the population with allele A or the population with allele a.";
1755 throw implementation_error(msg);
1756 }
1757 }
1758
1759 return result;
1760 }
1761
1762 //------------------------------------------------------------------------------------
1763
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1764 double CoalesceLogisticSelectionPL::DlnWait(const vector<double>& param, const TreeSummary * treedata,
1765 const long int & whichparam)
1766 {
1767 if (2 != m_nPop)
1768 {
1769 string msg = "CoalesceLogisticSelectionPL::DlnWait() called with m_nPop = ";
1770 msg += ToString(m_nPop);
1771 msg += ". m_nPop must equal 2, reflecting one population with the major ";
1772 msg += "allele and one population with the minor allele.";
1773 throw implementation_error(msg);
1774 }
1775
1776 double s(param[m_s_is_here]);
1777
1778 if (fabs(s) < LOGISTIC_SELECTION_COEFFICIENT_EPSILON)
1779 return DlnWaitForTinyLogisticSelectionCoefficient(param, treedata, whichparam);
1780
1781 bool derivativeWithRespectTo_A;
1782
1783 if (0 == whichparam)
1784 derivativeWithRespectTo_A = true;
1785 else if (1 == whichparam)
1786 derivativeWithRespectTo_A = false;
1787 else
1788 {
1789 string msg = "CoalesceLogisticSelectionPL::DlnWait() called with whichparam = ";
1790 msg += ToString(whichparam);
1791 msg += ". \"whichparam\" must equal 0 or 1, reflecting one population with the favored ";
1792 msg += "allele and one population with the disfavored allele.";
1793 throw implementation_error(msg);
1794 }
1795
1796 if (s >= DBL_BIG)
1797 return derivativeWithRespectTo_A ? DBL_BIG : -DBL_BIG;
1798 if (s <= -DBL_BIG)
1799 return derivativeWithRespectTo_A ? -DBL_BIG : DBL_BIG;
1800
1801 if (0 != param[2] || 0 != param[5] || 0 == param[0] || 0 == param[1] ||
1802 0 == param[3] || 0 == param[4])
1803 throw implementation_error("Bad param received by CoalesceLogisticSelectionPL::DlnWait().");
1804
1805 const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1806 list<Interval>::const_iterator treesum_it;
1807 double theta_A0(param[0]), theta_a0(param[1]), totalTheta_0(theta_A0+theta_a0);
1808 double term_A(0.0), term_a(0.0), term, result(0.0);
1809 double t_s(0.0),t_e,dt,e_toThe_sts(1.0),e_toThe_ste;
1810 double factor = 1.0/(totalTheta_0*totalTheta_0);
1811 double thetaFactor_A = theta_a0/(theta_A0*theta_A0);
1812 double thetaFactor_a = theta_A0/(theta_a0*theta_a0);
1813 double mu_into_A_from_a(param[3]), mu_into_a_from_A(param[4]);
1814
1815 for (treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
1816 {
1817 t_e = treesum_it->endtime;
1818 dt = t_e - t_s;
1819 term_A = term_a = 0.0;
1820 const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
1821 const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
1822
1823 if (s > 0.0)
1824 {
1825 // Use overflow protection for term_A; if term_a underflows, that's okay.
1826
1827 if (s*dt > LOG_ONEPLUSEPSILON)
1828 {
1829 term = SafeProductWithExp(factor/s,s*dt); // quotient guaranteed finite and positive
1830 if (term >= DBL_BIG)
1831 return derivativeWithRespectTo_A ? DBL_BIG : -DBL_BIG; // unavoidable overflow
1832 e_toThe_ste = (term/(factor/s))*e_toThe_sts;
1833 if (k_A > 0)
1834 {
1835 if (derivativeWithRespectTo_A)
1836 term_A = (term - factor/s)*thetaFactor_A*e_toThe_sts;
1837 else
1838 term_A = (term - factor/s)*e_toThe_sts;
1839 if (term_A >= DBL_BIG)
1840 return derivativeWithRespectTo_A ? DBL_BIG : -DBL_BIG; // unavoidable overflow
1841 }
1842 }
1843 else
1844 {
1845 if (s*dt >= numeric_limits<double>::epsilon())
1846 e_toThe_ste = (1.0 + s*dt)*e_toThe_sts;
1847 else
1848 e_toThe_ste = SafeProductWithExp(1.0,s*t_e);
1849 if (k_A > 0)
1850 {
1851 if (derivativeWithRespectTo_A)
1852 term_A = factor*dt*thetaFactor_A*e_toThe_sts;
1853 else
1854 term_A = factor*dt*e_toThe_sts;
1855 if (term_A >= DBL_BIG)
1856 return derivativeWithRespectTo_A ? DBL_BIG : -DBL_BIG; // unavoidable overflow
1857 }
1858 }
1859 if (k_a > 0)
1860 {
1861 if (derivativeWithRespectTo_A)
1862 term_a = (factor/(-s))*(1.0/e_toThe_ste - 1.0/e_toThe_sts);
1863 else
1864 term_a = (factor/(-s))*thetaFactor_a*(1.0/e_toThe_ste - 1.0/e_toThe_sts);
1865 }
1866 }
1867
1868 else // s < 0
1869 {
1870 // Use overflow protection for dtau_a; if dtau_A underflows, that's okay.
1871
1872 if (-s*dt > LOG_ONEPLUSEPSILON)
1873 {
1874 term = SafeProductWithExp(factor/(-s),-s*dt);// quotient guaranteed finite and positive
1875 if (term >= DBL_BIG)
1876 return derivativeWithRespectTo_A ? -DBL_BIG : DBL_BIG; // unavoidable overflow
1877 e_toThe_ste = e_toThe_sts/(term/(factor/(-s)));
1878 if (k_a > 0)
1879 {
1880 if (derivativeWithRespectTo_A)
1881 term_a = (term - factor/(-s))/e_toThe_sts;
1882 else
1883 term_a = (term - factor/(-s))*thetaFactor_a/e_toThe_sts;
1884 if (term_a >= DBL_BIG)
1885 return derivativeWithRespectTo_A ? -DBL_BIG : DBL_BIG; // unavoidable overflow
1886 }
1887 }
1888 else
1889 {
1890 if (-s*dt >= numeric_limits<double>::epsilon())
1891 e_toThe_ste = (1.0 + s*dt)*e_toThe_sts;
1892 else
1893 e_toThe_ste = SafeProductWithExp(1.0,s*t_e);
1894 if (k_a > 0)
1895 {
1896 if (derivativeWithRespectTo_A)
1897 term_a = factor*dt/e_toThe_sts;
1898 else
1899 term_a = factor*dt*thetaFactor_a/e_toThe_sts;
1900 if (term_a >= DBL_BIG)
1901 return derivativeWithRespectTo_A ? -DBL_BIG : DBL_BIG; // unavoidable overflow
1902 }
1903 }
1904 if (k_A > 0)
1905 {
1906 if (derivativeWithRespectTo_A)
1907 term_A = (factor/s)*thetaFactor_A*(e_toThe_ste - e_toThe_sts);
1908 else
1909 term_A = (factor/s)*(e_toThe_ste - e_toThe_sts);
1910 }
1911 }
1912
1913 if (derivativeWithRespectTo_A)
1914 result += k_A*term_A*((k_A-1.0)*(theta_A0+totalTheta_0) + mu_into_A_from_a/factor)
1915 - k_a*term_a*((k_a-1.0) + mu_into_a_from_A/(factor*theta_a0))
1916 + dt*factor*(k_A*(k_A-1.0) + k_a*(k_a-1.0));
1917 else
1918 result += -k_A*term_A*((k_A-1.0) + mu_into_A_from_a/(factor*theta_A0))
1919 + k_a*term_a*((k_a-1.0)*(theta_a0+totalTheta_0) + mu_into_a_from_A/factor)
1920 + dt*factor*(k_A*(k_A-1.0) + k_a*(k_a-1.0));
1921
1922 if (result >= DBL_BIG)
1923 return DBL_BIG;
1924 if (result <= -DBL_BIG)
1925 return -DBL_BIG;
1926
1927 t_s = t_e;
1928 e_toThe_sts = e_toThe_ste;
1929 }
1930
1931 return result;
1932 }
1933
1934 //------------------------------------------------------------------------------------
1935
DlnWaitForTinyLogisticSelectionCoefficient(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1936 double CoalesceLogisticSelectionPL::DlnWaitForTinyLogisticSelectionCoefficient(const vector<double>& param,
1937 const TreeSummary *treedata,
1938 const long int & whichparam)
1939 {
1940 bool derivativeWithRespectTo_A;
1941
1942 if (0 == whichparam)
1943 derivativeWithRespectTo_A = true;
1944 else if (1 == whichparam)
1945 derivativeWithRespectTo_A = false;
1946 else
1947 {
1948 string msg = "CoalesceLogisticSelectionPL::DlnWaitForTinyLogisticSelectionCoefficient() ";
1949 msg += "called with whichparam = ";
1950 msg += ToString(whichparam);
1951 msg += ". \"whichparam\" must equal 0 or 1, reflecting one subpopulation with the favored ";
1952 msg += "allele and one subpopulation with the disfavored allele.";
1953 throw implementation_error(msg);
1954 }
1955
1956 if (0 != param[2] || 0 != param[5] || 0 == param[0] || 0 == param[1] ||
1957 0 == param[3] || 0 == param[4])
1958 throw implementation_error
1959 ("Bad param received by CoalesceLogisticSelectionPL::DlnWaitForTinyLogisticSelectionCoefficient().");
1960
1961 const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1962 list<Interval>::const_iterator treesum_it;
1963 double theta_A0(param[0]), theta_a0(param[1]), totalTheta_0(theta_A0+theta_a0);
1964 double t_e, t_s(0.0), resultTimesDenominator(0.0), s(param[m_s_is_here]);
1965 double mu_into_A_from_a(param[3]), mu_into_a_from_A(param[4]);
1966 double denominator(totalTheta_0*totalTheta_0), term_A, term_a;
1967 double factor_A(theta_a0/(theta_A0*theta_A0)), factor_a(theta_A0/(theta_a0*theta_a0));
1968
1969 for(treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
1970 {
1971 t_e = treesum_it->endtime;
1972 double dt = t_e - t_s;
1973 const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
1974 const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
1975
1976 if (derivativeWithRespectTo_A)
1977 {
1978 term_A = k_A*factor_A*(dt + 0.5*s*(t_e*t_e - t_s*t_s))*((k_A-1.0)*(theta_A0+totalTheta_0)
1979 + mu_into_A_from_a*denominator);
1980 term_a = -k_a*(dt - 0.5*s*(t_e*t_e - t_s*t_s))*((k_a-1.0) + mu_into_a_from_A*denominator);
1981 }
1982 else
1983 {
1984 term_A = -k_A*(dt + 0.5*s*(t_e*t_e - t_s*t_s))*((k_A-1.0) + mu_into_A_from_a*denominator);
1985 term_a = k_a*factor_a*(dt = 0.5*s*(t_e*t_e - t_s*t_s))*((k_a-1.0)*(theta_a0+totalTheta_0)
1986 + mu_into_a_from_A*denominator);
1987 }
1988
1989 resultTimesDenominator += term_A + term_a + dt*(k_A*(k_A-1.0) + k_a*(k_a-1.0));
1990
1991 t_s = t_e;
1992 }
1993
1994 return resultTimesDenominator/denominator;
1995 }
1996
1997 //------------------------------------------------------------------------------------
1998
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1999 double CoalesceLogisticSelectionPL::DlnPoint(const vector<double>& param, const TreeSummary *treedata,
2000 const long int & whichparam)
2001 {
2002 if (2 != m_nPop)
2003 {
2004 string msg = "CoalesceLogisticSelectionPL::DlnPoint() called with m_nPop = ";
2005 msg += ToString(m_nPop);
2006 msg += ". m_nPop must equal 2, reflecting one population with the major ";
2007 msg += "allele and one population with the minor allele.";
2008 throw implementation_error(msg);
2009 }
2010
2011 bool derivativeWithRespectTo_A;
2012
2013 if (0 == whichparam)
2014 derivativeWithRespectTo_A = true;
2015 else if (1 == whichparam)
2016 derivativeWithRespectTo_A = false;
2017 else
2018 {
2019 string msg = "CoalesceLogisticSelectionPL::DlnPoint() called with whichparam = ";
2020 msg += ToString(whichparam);
2021 msg += ". \"whichparam\" must equal 0 or 1, reflecting one population with the major ";
2022 msg += "allele and one population with the minor allele.";
2023 throw implementation_error(msg);
2024 }
2025
2026 const Interval *treesum = treedata->GetCoalSummary()->GetLongPoint();
2027 const Interval *pTreesum;
2028 double theta_A0(param[0]), theta_a0(param[1]), s(param[m_s_is_here]),
2029 totalTheta_0(theta_A0+theta_a0), theta_A0squared(theta_A0*theta_A0),
2030 theta_a0squared(theta_a0*theta_a0), result(0.0), x;
2031
2032 if (theta_a0 <= 0.0 || theta_A0 <= 0.0)
2033 {
2034 string msg = "CoalesceLogisticSelectionPL::DlnPoint(), received an invalid Theta value ";
2035 msg += "(theta_A0 = " + ToString(theta_A0) + ", theta_a0 = " + ToString(theta_a0) + ").";
2036 throw impossible_error(msg);
2037 }
2038
2039 // First, add up the contributions from the coalescent events.
2040
2041 if (s >= 0.0)
2042 {
2043 for (pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
2044 {
2045 double t_e = pTreesum->endtime;
2046 if (derivativeWithRespectTo_A)
2047 {
2048 if (0L == pTreesum->oldstatus) // coal. in allele A
2049 {
2050 x = SafeProductWithExp(theta_a0*(theta_A0+totalTheta_0),s*t_e);
2051 if (x < DBL_BIG)
2052 result -= (theta_A0squared + x) /
2053 ((theta_A0*totalTheta_0)*(theta_A0 + x/(theta_A0+totalTheta_0)));
2054 else
2055 result -= (theta_A0 + totalTheta_0)/(theta_A0*totalTheta_0);
2056 }
2057 else if (1L == pTreesum->oldstatus) // coal. in allele a
2058 {
2059 x = SafeProductWithExp(theta_a0,s*t_e);
2060 if (x < DBL_BIG)
2061 result -= (x - theta_a0)/(totalTheta_0*(theta_A0 + x));
2062 else
2063 result -= 1.0/totalTheta_0;
2064 }
2065 else
2066 {
2067 string msg = "CoalesceLogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2068 msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2069 msg += ". \"oldstatus\" must be either 0 or 1, reflecting a coalescene in either ";
2070 msg += "the population with allele A or the population with allele a.";
2071 throw implementation_error(msg);
2072 }
2073 }
2074 else // derivative with repect to theta_a0
2075 {
2076 if (0L == pTreesum->oldstatus) // coal. in allele A
2077 {
2078 x = SafeProductWithExp(theta_A0,s*t_e);
2079 if (x < DBL_BIG)
2080 result += (x - theta_A0) /
2081 (totalTheta_0*(theta_A0 + x*theta_a0/theta_A0));
2082 else
2083 result += theta_A0/(theta_a0*totalTheta_0);
2084 }
2085 else if (1L == pTreesum->oldstatus) // coal. in allele a
2086 {
2087 x = SafeProductWithExp(theta_a0squared,s*t_e);
2088 if (x < DBL_BIG)
2089 result -= (theta_A0*(theta_a0+totalTheta_0) + x) /
2090 (totalTheta_0*(theta_A0*theta_a0 + x));
2091 else
2092 result -= 1.0/totalTheta_0;
2093 }
2094 else
2095 {
2096 string msg = "CoalesceLogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2097 msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2098 msg += ". \"oldstatus\" must be either 0 or 1, reflecting a coalescene in either ";
2099 msg += "the population with allele A or the population with allele a.";
2100 throw implementation_error(msg);
2101 }
2102 }
2103 }
2104 }
2105 else // s < 0
2106 {
2107 for (pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
2108 {
2109 double t_e = pTreesum->endtime;
2110 if (derivativeWithRespectTo_A)
2111 {
2112 if (0L == pTreesum->oldstatus) // coal. in allele A
2113 {
2114 x = SafeProductWithExp(theta_A0squared,-s*t_e);
2115 if (x < DBL_BIG)
2116 result -= (theta_a0*(theta_A0+totalTheta_0) + x) /
2117 (totalTheta_0*(theta_A0*theta_a0 + x));
2118 else
2119 result -= 1.0/totalTheta_0;
2120 }
2121 else if (1L == pTreesum->oldstatus) // coal. in allele a
2122 {
2123 x = SafeProductWithExp(theta_a0,-s*t_e);
2124 if (x < DBL_BIG)
2125 result += (x - theta_a0) / (totalTheta_0*(theta_a0 + x*theta_A0/theta_a0));
2126 else
2127 result += theta_a0/(theta_A0*totalTheta_0);
2128 }
2129 else
2130 {
2131 string msg = "CoalesceLogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2132 msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2133 msg += ". \"oldstatus\" must be either 0 or 1, reflecting a coalescene in either ";
2134 msg += "the population with allele A or the population with allele a.";
2135 throw implementation_error(msg);
2136 }
2137 }
2138 else // derivative with respect to theta_a0
2139 {
2140 if (0L == pTreesum->oldstatus) // coal. in allele A
2141 {
2142 x = SafeProductWithExp(theta_A0,-s*t_e);
2143 if (x < DBL_BIG)
2144 result -= (x - theta_A0) / (totalTheta_0*(theta_a0 + x));
2145 else
2146 result -= 1.0/totalTheta_0;
2147 }
2148 else if (1L == pTreesum->oldstatus) // coal. in allele a
2149 {
2150 x = SafeProductWithExp(theta_A0*(theta_a0+totalTheta_0),-s*t_e);
2151 if (x < DBL_BIG)
2152 result -= (x + theta_a0squared) /
2153 (theta_a0*totalTheta_0*(theta_a0 + x/(theta_a0+totalTheta_0)));
2154 else
2155 result -= (theta_a0+totalTheta_0)/(theta_a0*totalTheta_0);
2156 }
2157 else
2158 {
2159 string msg = "CoalesceLogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2160 msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2161 msg += ". \"oldstatus\" must be either 0 or 1, reflecting a coalescene in either ";
2162 msg += "the population with allele A or the population with allele a.";
2163 throw implementation_error(msg);
2164 }
2165 }
2166 }
2167 }
2168
2169 // Now, add the contributions from the mutation events a --> A and A --> a.
2170 treesum = treedata->GetDiseaseSummary()->GetLongPoint();
2171
2172 double one__over__theta_A0(1.0/theta_A0), one__over__theta_a0(1.0/theta_a0);
2173 for(pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
2174 {
2175 if (0L == pTreesum->oldstatus)
2176 {
2177 if (derivativeWithRespectTo_A)
2178 result -= one__over__theta_A0;
2179 else
2180 result += one__over__theta_a0;
2181 }
2182 else if (1L == pTreesum->oldstatus)
2183 {
2184 if (derivativeWithRespectTo_A)
2185 result += one__over__theta_A0;
2186 else
2187 result -= one__over__theta_a0;
2188 }
2189 else
2190 {
2191 string msg = "DiseaseLogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2192 msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2193 msg += ". \"oldstatus\" must be either 0 or 1, reflecting a mutation to either ";
2194 msg += "allele A or allele a.";
2195 throw implementation_error(msg);
2196 }
2197 }
2198
2199 return result;
2200
2201 }
2202
2203 //------------------------------------------------------------------------------------
2204
lnWait(const vector<double> & param,const TreeSummary * treedata)2205 double LogisticSelectionPL::lnWait(const vector<double>& param, const TreeSummary *treedata)
2206 {
2207 return 0.0;
2208 }
2209
2210 //------------------------------------------------------------------------------------
2211
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)2212 double LogisticSelectionPL::lnPoint(const vector<double>& param, const vector<double>& lparam,
2213 const TreeSummary *treedata)
2214 {
2215 return 0.0;
2216 }
2217
2218 //------------------------------------------------------------------------------------
2219
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)2220 double LogisticSelectionPL::DlnWait(const vector<double>& param, const TreeSummary *treedata,
2221 const long int & whichparam)
2222 {
2223 if (whichparam != m_s_is_here)
2224 {
2225 string msg = "LogisticSelectionPL::DlnWait() called with whichparam = ";
2226 msg += ToString(whichparam);
2227 msg += ". \"whichparam\" must equal " + ToString(m_s_is_here);
2228 msg += ", which corresponds to the selection coefficient \"s.\"";
2229 throw implementation_error(msg);
2230 }
2231
2232 double s(param[m_s_is_here]);
2233
2234 if (fabs(s) < LOGISTIC_SELECTION_COEFFICIENT_EPSILON)
2235 return DlnWaitForTinyLogisticSelectionCoefficient(param, treedata, whichparam);
2236
2237 double theta_A0(param[0]), theta_a0(param[1]), mu_into_A_from_a(param[3]),
2238 mu_into_a_from_A(param[4]);
2239
2240 if (0.0 == theta_A0 || 0.0 == theta_a0)
2241 {
2242 string msg = "LogisticSelectionPL::DlnWait(), theta_A0 = ";
2243 msg += ToString(theta_A0) + ", theta_a0 = " + ToString(theta_a0);
2244 msg += "; attempted to divide by zero.";
2245 throw impossible_error(msg);
2246 }
2247
2248 const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
2249 list<Interval>::const_iterator treesum_it;
2250 double t_s(0.0), t_e, term_s, term_e, denominator(s*s*(theta_A0+theta_a0)),
2251 dtau_A__ds(0.0), dtau_a__ds(0.0), result(0.0);
2252 double mu_term_A(mu_into_A_from_a*(theta_A0+theta_a0)), mu_term_a(mu_into_a_from_A*(theta_A0+theta_a0));
2253 term_s = s > 0.0 ? -theta_a0/denominator : theta_A0/denominator;
2254
2255 for(treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
2256 {
2257 t_e = treesum_it->endtime;
2258 const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
2259 const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
2260 dtau_A__ds = dtau_a__ds = 0;
2261
2262 if (s > 0.0)
2263 {
2264 term_e = SafeProductWithExp(theta_a0*(s*t_e - 1.0)/denominator,s*t_e);
2265 if (term_e >= DBL_BIG)
2266 return -DBL_BIG;
2267 if (k_A > 0)
2268 dtau_A__ds = term_e - term_s;
2269 if (k_a > 0)
2270 dtau_a__ds = ((theta_A0*theta_a0)/(denominator*denominator)) *
2271 ((s*s*t_e*t_e - 1.0)/term_e - (s*s*t_s*t_s - 1.0)/term_s);
2272 }
2273 else
2274 {
2275 term_e = SafeProductWithExp(theta_A0*(s*t_e + 1.0)/denominator,-s*t_e);
2276 if (term_e <= -DBL_BIG)
2277 return DBL_BIG;
2278 if (k_a > 0)
2279 dtau_a__ds = term_e - term_s;
2280 if (k_A > 0)
2281 dtau_A__ds = ((theta_A0*theta_a0)/(denominator*denominator)) *
2282 ((s*s*t_e*t_e - 1.0)/term_e - (s*s*t_s*t_s - 1.0)/term_s);
2283 }
2284
2285 result -= (k_A*(k_A - 1.0 + mu_term_A)/theta_A0)*dtau_A__ds
2286 + (k_a*(k_a - 1.0 + mu_term_a)/theta_a0)*dtau_a__ds;
2287 if (result <= -DBL_BIG)
2288 return -DBL_BIG;
2289 if (result >= DBL_BIG)
2290 return DBL_BIG;
2291
2292 t_s = t_e;
2293 term_s = term_e;
2294 }
2295
2296 return result;
2297
2298 }
2299
2300 //------------------------------------------------------------------------------------
2301
DlnWaitForTinyLogisticSelectionCoefficient(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)2302 double LogisticSelectionPL::DlnWaitForTinyLogisticSelectionCoefficient(const vector<double> & param,
2303 const TreeSummary * treedata,
2304 const long int & whichparam)
2305 {
2306 const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
2307 list<Interval>::const_iterator treesum_it;
2308 double theta_A0(param[0]), theta_a0(param[1]), theta_A0_over_theta_a0;
2309 double ts(0.0), te, result(0.0), s(param[m_s_is_here]);
2310 double ts_squared(0.0), te_squared;
2311
2312 if (0.0 == theta_A0 || 0.0 == theta_a0 || 0.0 == theta_A0 + theta_a0)
2313 {
2314 string msg = "LogisticSelectionPL::DlnWaitForTinySelectionCoefficient(), theta_A0 = ";
2315 msg += ToString(theta_A0) + ", theta_a0 = " + ToString(theta_a0);
2316 msg += "; attempted to divide by zero.";
2317 throw impossible_error(msg);
2318 }
2319
2320 theta_A0_over_theta_a0 = theta_A0/theta_a0;
2321 double mu_into_A_from_a(param[3]), mu_into_a_from_A(param[4]),
2322 mu_term_A(mu_into_A_from_a*(theta_A0+theta_a0)), mu_term_a(mu_into_a_from_A*(theta_A0+theta_a0));
2323
2324 for(treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
2325 {
2326 te = treesum_it->endtime;
2327 te_squared = te*te;
2328 const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
2329 const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
2330
2331 result -= 0.5*(te_squared - ts_squared)*(k_A*(k_A-1.0+mu_term_A)/theta_A0_over_theta_a0 -
2332 k_a*(k_a-1.0+mu_term_a)*theta_A0_over_theta_a0);
2333 result -= (s/3.0)*(te_squared*te - ts_squared*ts)*(k_A*(k_A-1.0+mu_term_A)/theta_A0_over_theta_a0 +
2334 k_a*(k_a-1.0+mu_term_a)*theta_A0_over_theta_a0);
2335 ts= te;
2336 ts_squared = te_squared;
2337 }
2338
2339 return result/(theta_A0 + theta_a0);
2340 }
2341
2342 //------------------------------------------------------------------------------------
2343
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)2344 double LogisticSelectionPL::DlnPoint(const vector<double> & param, const TreeSummary * treedata,
2345 const long int & whichparam)
2346 {
2347 if (whichparam != m_s_is_here)
2348 {
2349 string msg = "LogisticSelectionPL::DlnPoint() called with whichparam = ";
2350 msg += ToString(whichparam);
2351 msg += ". \"whichparam\" must equal " + ToString(m_s_is_here);
2352 msg += ", which corresponds to the selection coefficient \"s.\"";
2353 throw implementation_error(msg);
2354 }
2355
2356 const Interval *treesum = treedata->GetCoalSummary()->GetLongPoint();
2357 const Interval *pTreesum;
2358 double theta_A0(param[0]), theta_a0(param[1]), s(param[m_s_is_here]);
2359 double result(0.0), term_a;
2360
2361 // First, add up the contributions from coalescent events.
2362
2363 for (pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
2364 {
2365 const double& te = pTreesum->endtime;
2366 term_a = SafeProductWithExp(theta_a0, s*te);
2367
2368 if (0L == pTreesum->oldstatus) // coal. in allele A
2369 {
2370 if (term_a < DBL_BIG)
2371 result += te*term_a/(theta_A0 + term_a); // rhs = 0 for s << 0
2372 else
2373 result += te;
2374 }
2375
2376 else if (1L == pTreesum->oldstatus) // coal. in allele a
2377 result -= te/(term_a/theta_A0 + 1.0);
2378
2379 else
2380 {
2381 string msg = "LogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2382 msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2383 msg += ". \"oldstatus\" must be either 0 or 1, reflecting a coalescence in either ";
2384 msg += "the subpopulation with allele A or the subpopulation with allele a.";
2385 throw implementation_error(msg);
2386 }
2387 }
2388
2389 // Now, add the contributions from the mutation events a --> A and A --> a.
2390 treesum = treedata->GetDiseaseSummary()->GetLongPoint();
2391
2392 for(pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
2393 {
2394 const double& te = pTreesum->endtime;
2395
2396 if (0L == pTreesum->oldstatus)
2397 result += te;
2398 else if (1L == pTreesum->oldstatus)
2399 result -= te;
2400 else
2401 {
2402 string msg = "LogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2403 msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2404 msg += ". \"oldstatus\" must be either 0 or 1, reflecting a mutation to either ";
2405 msg += "allele A or allele a.";
2406 throw implementation_error(msg);
2407 }
2408 }
2409
2410 return result;
2411 }
2412
2413 //------------------------------------------------------------------------------------
2414
StickSelectPL(const ForceSummary & fs)2415 StickSelectPL::StickSelectPL(const ForceSummary& fs)
2416 : PLForces(0), // MDEBUG arbitrary value, is that okay?
2417 m_thetastart(0),
2418 m_thetaend(0),
2419 m_toBigA_here(FLAGLONG),
2420 m_toSmallA_here(FLAGLONG),
2421 m_s_here(0),
2422 m_r_here(FLAGLONG),
2423 m_disstart(FLAGLONG)
2424 {
2425 m_minuslnsqrt2pi = -log(sqrt(2*PI));
2426 m_nxparts = fs.GetNParameters(force_COAL);
2427 // JDEBUG--when switch to allowing migration remove the assert
2428 assert(fs.GetNonLocalPartitionIndexes().size() < 2);
2429 m_partindex = fs.GetNonLocalPartitionIndexes();
2430 // JDEBUG--this doesn't work due to invalid parameters
2431 // m_ndis = fs.GetNParameters(force_DISEASE);
2432 m_ndis = 2L;
2433 m_disindex = fs.GetPartIndex(force_DISEASE);
2434 assert(m_ndis == 2);
2435 assert(m_nxparts == 2);
2436 }
2437
2438 //------------------------------------------------------------------------------------
2439
~StickSelectPL()2440 StickSelectPL::~StickSelectPL()
2441 {
2442 // intentionally blank
2443 }
2444
2445 //------------------------------------------------------------------------------------
2446
SetToBigAIndex(long index)2447 void StickSelectPL::SetToBigAIndex(long index)
2448 {
2449 m_toBigA_here = index;
2450 if (m_disstart == FLAGLONG)
2451 // if toBig or toSmall isn't set, m_disstart will end
2452 // up as FLAGLONG again, as that's smaller than all legal values
2453 m_disstart = std::min(m_toBigA_here,m_toSmallA_here);
2454 }
2455
2456 //------------------------------------------------------------------------------------
2457
SetToSmallAIndex(long index)2458 void StickSelectPL::SetToSmallAIndex(long index)
2459 {
2460 m_toSmallA_here = index;
2461 if (m_disstart == FLAGLONG)
2462 // if toBig or toSmall isn't set, m_disstart will end
2463 // up as FLAGLONG again, as that's smaller than all legal values
2464 m_disstart = std::min(m_toBigA_here,m_toSmallA_here);
2465 }
2466
2467 //------------------------------------------------------------------------------------
2468
SetLocalPartForces(const ForceSummary & fs)2469 void StickSelectPL::SetLocalPartForces(const ForceSummary& fs)
2470 {
2471 // JDEBUG--: This code is cut and paste from CoalescePL
2472 // and smells pretty bad as a result.
2473
2474 m_localpartforces = fs.GetLocalPartitionForces();
2475 assert(!m_localpartforces.empty());
2476
2477 // set up vector which answers the following question:
2478 // for each local partition force
2479 // for any xpartition theta (identified by index),
2480 // what partition (of the given force) is it?
2481
2482 // This code is copied approximately from PartitionForce::
2483 // SumXPartsToParts().
2484
2485 ForceVec::const_iterator pforce;
2486 const ForceVec& partforces = fs.GetPartitionForces();
2487 LongVec1d indicator(partforces.size(), 0L);
2488 LongVec1d nparts(registry.GetDataPack().GetAllNPartitions());
2489 DoubleVec1d::size_type xpart;
2490 long partindex;
2491 for (pforce = partforces.begin(), partindex = 0;
2492 pforce != partforces.end(); ++pforce, ++partindex)
2493 {
2494 vector<DoubleVec1d::size_type> indices(m_nxparts, 0);
2495 for(xpart = 0; xpart < static_cast<DoubleVec1d::size_type>(m_nxparts); ++xpart)
2496 {
2497 indices[xpart] = indicator[partindex];
2498 long int part;
2499 for (part = nparts.size() - 1; part >= 0; --part)
2500 {
2501 ++indicator[part];
2502 if (indicator[part] < nparts[part]) break;
2503 indicator[part] = 0;
2504 }
2505 }
2506 // initialize xparts vectors
2507 vector<DoubleVec1d::size_type> emptyvec;
2508 vector<vector<DoubleVec1d::size_type> > partvec(nparts[partindex], emptyvec);
2509
2510 if ((*pforce)->IsLocalPartitionForce())
2511 {
2512 m_whichlocalpart.push_back(indices);
2513 m_whichlocalxparts.push_back(partvec);
2514 }
2515 m_whichpart.push_back(indices);
2516 m_whichxparts.push_back(partvec);
2517 }
2518
2519 // now construct the vector mapping partition to a set of xpartitions.
2520 //
2521 // we will do this by going through the vector mapping xpartition to
2522 // partition we just constructed, letting its contents tell us which
2523 // partition to add the parameter (xpartition) to and keeping a
2524 // counter to let us know which xpartition to add.
2525 long lpindex = 0;
2526 for (pforce = partforces.begin(), partindex = 0;
2527 pforce != partforces.end(); ++pforce, ++partindex)
2528 {
2529 for(xpart = 0; xpart < m_whichpart[partindex].size(); ++xpart)
2530 m_whichxparts[partindex][m_whichpart[partindex][xpart]].
2531 push_back(xpart);
2532 if ((*pforce)->IsLocalPartitionForce())
2533 {
2534 for(xpart = 0; xpart < m_whichpart[partindex].size(); ++xpart)
2535 {
2536 m_whichlocalxparts[lpindex][m_whichpart[partindex][xpart]].push_back(xpart);
2537 }
2538 ++lpindex;
2539 }
2540 }
2541 } // SetLocalPartForces
2542
2543 //------------------------------------------------------------------------------------
2544
StickMean(double freqA,double s,double toA,double toa) const2545 double StickSelectPL::StickMean(double freqA, double s, double toA, double toa) const
2546 {
2547 return s*freqA*(1-freqA) + toA*(1-freqA) - toa*freqA;
2548 }
2549
2550 //------------------------------------------------------------------------------------
2551
StickVar(double freqA,double tipfreqA,double thetaA) const2552 double StickSelectPL::StickVar(double freqA, double tipfreqA, double thetaA) const
2553 {
2554 return 2.0*freqA*(1-freqA)*tipfreqA / thetaA;
2555 }
2556
2557 //------------------------------------------------------------------------------------
2558
CommonFunctional(double freqBigA,double prevfreqBigA,double s,double toBigA,double toSmallA,double length) const2559 double StickSelectPL::CommonFunctional(double freqBigA, double prevfreqBigA, double s,
2560 double toBigA, double toSmallA, double length) const
2561 {
2562 // this implements the un-squared numerator of the posterior on stick selection:
2563 return freqBigA - prevfreqBigA - length *
2564 StickMean(prevfreqBigA,s,toBigA,toSmallA);
2565 }
2566
2567 //------------------------------------------------------------------------------------
2568
lnPTreeStick(const DoubleVec1d & param,const DoubleVec1d & lparam,const TreeSummary * treedata)2569 double StickSelectPL::lnPTreeStick(const DoubleVec1d& param, const DoubleVec1d& lparam, const TreeSummary* treedata)
2570 {
2571 double answ(lnWait(param,treedata));
2572
2573 answ += lnPointTreeStick(param,lparam,treedata);
2574
2575 return answ;
2576 }
2577
2578 //------------------------------------------------------------------------------------
2579
lnWait(const DoubleVec1d & param,const TreeSummary * treedata)2580 double StickSelectPL::lnWait(const DoubleVec1d& param, const TreeSummary* treedata)
2581 {
2582 assert(m_disstart != FLAGLONG);
2583 double answ(0.0);
2584
2585 assert(m_ndis == 2);
2586 assert(m_ndis == m_nxparts);
2587
2588 // compute waiting times for theta and disease
2589 // all LongWait() are the same....
2590 const list<Interval>& coalsum(treedata->GetCoalSummary()->GetLongWait());
2591 list<Interval>::const_iterator interval(coalsum.begin());
2592 const DoubleVec1d& stairlengths(treedata->GetStickSummary().lengths);
2593 const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2594 double tipthetaA(param[m_thetastart]);
2595 double jointend(stairlengths[0]),intervalend(interval->endtime);
2596 double jointstart(0.0),intervalstart(0.0);
2597 long joint(0);
2598
2599 while (interval != coalsum.end())
2600 {
2601 // how long is the bit of tree where nothing changes?
2602 double lengthininterval = std::min(jointend, intervalend) -
2603 std::max(jointstart, intervalstart);
2604
2605 // adjust for coalescent contribution
2606 long xpart;
2607 for(xpart = 0; xpart < m_nxparts; ++xpart)
2608 {
2609 answ -= lengthininterval * stairfreqs[0][0] *
2610 (interval->xpartlines[xpart])*(interval->xpartlines[xpart]-1) /
2611 // because there is no migration we can use xpart, otherwise
2612 // we'll need to add a loop in local partitions
2613 (stairfreqs[joint][xpart] * tipthetaA);
2614 }
2615
2616 // adjust for trait mutation contribution
2617 long part;
2618 for(part = 0; part < m_ndis; ++part)
2619 {
2620 answ -= lengthininterval * interval->partlines[m_disindex][part] *
2621 param[(part ? m_disstart+1 : m_disstart)] *
2622 // both macros rely on only two disease states present
2623 // equation relies on nxparts == nparts!
2624 stairfreqs[joint][(part ? 0 : 1)]/stairfreqs[joint][part];
2625 }
2626
2627 if (jointend < intervalend) // joint ends first
2628 {
2629 ++joint;
2630 jointstart = jointend;
2631 jointend += stairlengths[joint];
2632 }
2633 else
2634 if (intervalend < jointend) // else interval ends first
2635 {
2636 ++interval;
2637 intervalstart = intervalend;
2638 intervalend = interval->endtime;
2639 }
2640 else // they both ended at the exact same time!
2641 {
2642 ++joint;
2643 jointstart = jointend;
2644 jointend += stairlengths[joint];
2645 ++interval;
2646 intervalstart = intervalend;
2647 intervalend = interval->endtime;
2648 }
2649 }
2650
2651 // compute rec-rate contrib
2652 if (!m_localpartforces.empty())
2653 {
2654 const vector<double>& rWait = treedata->GetRecSummary()->GetShortWait();
2655 answ -= rWait[0] * param[m_r_here];
2656 }
2657
2658 // there aren't separate "wait" and "point" terms wrt. sticks,
2659 // therefore all the actual calculations are handled in "point".
2660
2661 return answ;
2662 }
2663
2664 //------------------------------------------------------------------------------------
2665
lnPointTreeStick(const DoubleVec1d & param,const DoubleVec1d & lparam,const TreeSummary * treedata)2666 double StickSelectPL::lnPointTreeStick(const DoubleVec1d& param, const DoubleVec1d& lparam,
2667 const TreeSummary* treedata)
2668 // there aren't separate "wait" and "point" terms with stick selection,
2669 // all the actual calculations are handled in "point".
2670 //
2671 // The actual equation for the log tree posterior likelihood:
2672 // Sum_Over_All_Stick_Intervals[
2673 // -log(sqrt(2*pi))-log(sqrt(Var(curr_freq)*int_length))-Exponent]
2674 //
2675 // Exponent =
2676 // (curr_freq-(prev_freq*Mean(prev_freq)*int_length))**2
2677 // ------------------------------------------------------------
2678 // 2*Var(prev_freq)*int_length
2679 //
2680 // Mean(freq) = sel_coeff*freq*(1-freq)+mu_to*(1-freq)-mu_from*freq
2681 // Var(freq) = freq*(1-freq) / theta
2682 {
2683 assert(m_disstart != FLAGLONG);
2684 double answ(0.0);
2685
2686 // stuff useful for all stick terms
2687 const Interval * treesum = treedata->GetCoalSummary()->GetLongPoint();
2688 const Interval * tit(treesum);
2689 const DoubleVec1d& stairlengths(treedata->GetStickSummary().lengths);
2690 const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2691 const DoubleVec2d& stairlnfreqs(treedata->GetStickSummary().lnfreqs);
2692 double tipprobA(stairfreqs[0][0]);
2693 double lnthA(lparam[m_thetastart]), lnprA(log(tipprobA));
2694 double jointend(stairlengths[0]);
2695 long joint(0);
2696
2697 // compute theta contrib
2698 do {
2699 while (jointend < tit->endtime)
2700 {
2701 ++joint;
2702 jointend += stairlengths[joint];
2703 }
2704
2705 answ += LOG2 + lnprA - lnthA - stairlnfreqs[joint][tit->oldstatus];
2706
2707 tit = tit->next;
2708 } while(tit != NULL);
2709
2710 // compute trait mutation contrib
2711 treesum = treedata->GetDiseaseSummary()->GetLongPoint();
2712 tit = treesum;
2713 jointend = stairlengths[0];
2714 joint = 0;
2715 do {
2716 while (jointend < tit->endtime)
2717 {
2718 ++joint;
2719 jointend += stairlengths[joint];
2720 }
2721 // JDEBUG--is the macro correct??
2722 answ += lparam[(tit->newstatus ? m_disstart+1 : m_disstart)] +
2723 stairlnfreqs[joint][tit->oldstatus]-stairlnfreqs[joint][tit->newstatus];
2724
2725 tit = tit->next;
2726 } while(tit != NULL);
2727
2728 // compute rec-rate contrib--Sum_j(recevents * log(r))
2729 // as noted earlier (see Likelihood::FillForces()), the existence
2730 // of a localpartforces vector is used to flag the presence/absence
2731 // of recombination
2732
2733 // MDEBUG MREVIEW let's review this code--Mary doesn't get it.
2734 if (!m_localpartforces.empty())
2735 {
2736 treesum = treedata->GetRecSummary()->GetLongPoint();
2737 tit = treesum;
2738 jointend = stairlengths[0];
2739 joint = 0;
2740 do {
2741 while (jointend < tit->endtime)
2742 {
2743 ++joint;
2744 jointend += stairlengths[joint];
2745 }
2746 // we can use partnerpicks[0] because there is only one lpforce
2747 // in existence
2748 answ += lparam[m_r_here] +
2749 stairlnfreqs[joint][treesum->partnerpicks[0]];
2750 tit = tit->next;
2751 } while(tit != NULL);
2752 }
2753
2754 return answ;
2755 }
2756
2757 //------------------------------------------------------------------------------------
2758
lnPoint(const DoubleVec1d & param,const DoubleVec1d & lparam,const TreeSummary * treedata)2759 double StickSelectPL::lnPoint(const DoubleVec1d& param, const DoubleVec1d& lparam, const TreeSummary* treedata)
2760 // there aren't separate "wait" and "point" terms with stick selection,
2761 // all the actual calculations are handled in "point".
2762 //
2763 // The actual equation for the log tree posterior likelihood:
2764 // Sum_Over_All_Stick_Intervals[
2765 // -log(sqrt(2*pi))-log(sqrt(Var(prev_freq)*int_length))-Exponent]
2766 //
2767 // Exponent =
2768 // (curr_freq-(prev_freq-Mean(prev_freq)*int_length))**2
2769 // ------------------------------------------------------------
2770 // 2*Var(prev_freq)*int_length
2771 //
2772 // Mean(freq) = sel_coeff*freq*(1-freq)+mu_to*(1-freq)-mu_from*freq
2773 // Var(freq) = freq*(1-freq) / theta_total
2774 {
2775 assert(m_disstart != FLAGLONG);
2776 double answ(lnPointTreeStick(param,lparam,treedata));
2777
2778 const DoubleVec1d& stairlengths(treedata->GetStickSummary().lengths);
2779 const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2780 double tipthetaA(param[m_thetastart]);
2781 double tipprobA(stairfreqs[0][0]);
2782 double s(param[m_s_here]),toBigA(param[m_toBigA_here]),
2783 toSmallA(param[m_toSmallA_here]);
2784 assert(stairfreqs.size() == stairlengths.size());
2785
2786 DoubleVec1d::size_type step;
2787 for(step = 1; step < stairfreqs.size(); ++step)
2788 {
2789 double variance = StickVar(stairfreqs[step-1][0],tipprobA,tipthetaA);
2790 answ += m_minuslnsqrt2pi - 0.5 * log(variance*stairlengths[step-1]);
2791 double numer = CommonFunctional(stairfreqs[step][0],
2792 stairfreqs[step-1][0],s,toBigA,toSmallA,stairlengths[step-1]);
2793 double denom(2.0*variance*stairlengths[step-1]);
2794 numer = -1.0 * numer * numer;
2795 answ += numer/denom;
2796 }
2797
2798 return answ;
2799 }
2800
2801 //------------------------------------------------------------------------------------
2802
DlnWait(const DoubleVec1d & param,const TreeSummary * treedata,const long & whichparam)2803 double StickSelectPL::DlnWait(const DoubleVec1d& param, const TreeSummary* treedata, const long& whichparam)
2804 {
2805 long which;
2806 double answ(0.0);
2807
2808 // compute theta contrib
2809 if (m_thetastart <= whichparam && whichparam < m_thetastart + m_nxparts)
2810 {
2811 const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2812 const DoubleVec1d& stairlengths(treedata->GetStickSummary().lengths);
2813 const list<Interval>& treesum(treedata->GetCoalSummary()->GetLongWait());
2814 list<Interval>::const_iterator interval(treesum.begin());
2815 double tipthetaA(param[m_thetastart]);
2816 double tipprobA(stairfreqs[0][0]);
2817 double jointend(stairlengths[0]),intervalend(interval->endtime);
2818 double jointstart(0.0),intervalstart(0.0);
2819 long joint(0);
2820 which = whichparam-m_thetastart;
2821 while (interval != treesum.end())
2822 {
2823 // how long is the bit of tree where nothing changes?
2824 double lengthininterval = std::min(jointend, intervalend) -
2825 std::max(jointstart, intervalstart);
2826
2827 answ += lengthininterval * tipprobA *
2828 (interval->xpartlines[which])*(interval->xpartlines[which]-1)
2829 // because there is no migration we can use xpart, otherwise
2830 // we'll need to add a loop in local partitions
2831 / (tipthetaA * tipthetaA * stairfreqs[joint][which]);
2832
2833 if (jointend < intervalend) // joint ends first
2834 {
2835 ++joint;
2836 jointstart = jointend;
2837 jointend += stairlengths[joint];
2838 }
2839 else
2840 if (intervalend < jointend) // else interval ends first
2841 {
2842 ++interval;
2843 intervalstart = intervalend;
2844 intervalend = interval->endtime;
2845 }
2846 else // they both ended at the exact same time!
2847 {
2848 ++joint;
2849 jointstart = jointend;
2850 jointend += stairlengths[joint];
2851 ++interval;
2852 intervalstart = intervalend;
2853 intervalend = interval->endtime;
2854 }
2855 }
2856
2857 return answ;
2858 }
2859
2860 // compute disease contrib
2861 if (m_disstart <= whichparam && whichparam < m_disstart + m_ndis)
2862 {
2863 assert(m_disstart != FLAGLONG);
2864 if (whichparam == m_disstart) which = 0;
2865 else which = 1;
2866 const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2867 const DoubleVec1d& stairlengths(treedata->GetStickSummary().lengths);
2868 const list<Interval>& treesum(treedata->GetDiseaseSummary()->GetLongWait());
2869 list<Interval>::const_iterator interval(treesum.begin());
2870 double jointend(stairlengths[0]),intervalend(interval->endtime);
2871 double jointstart(0.0),intervalstart(0.0);
2872 long joint(0);
2873
2874 while (interval != treesum.end())
2875 {
2876 // how long is the bit of tree where nothing changes?
2877 double lengthininterval = std::min(jointend, intervalend) -
2878 std::max(jointstart, intervalstart);
2879
2880 answ -= lengthininterval * interval->partlines[m_disindex][which] *
2881 // the macro relies on only two disease states present
2882 // equation relies on nxparts == nparts!
2883 stairfreqs[joint][(which ? 0 : 1)]/stairfreqs[joint][which];
2884
2885 if (jointend < intervalend) // joint ends first
2886 {
2887 ++joint;
2888 jointstart = jointend;
2889 jointend += stairlengths[joint];
2890 }
2891 else
2892 if (intervalend < jointend) // else interval ends first
2893 {
2894 ++interval;
2895 intervalstart = intervalend;
2896 intervalend = interval->endtime;
2897 }
2898 else // they both ended at the exact same time!
2899 {
2900 ++joint;
2901 jointstart = jointend;
2902 jointend += stairlengths[joint];
2903 ++interval;
2904 intervalstart = intervalend;
2905 intervalend = interval->endtime;
2906 }
2907 }
2908
2909 return answ;
2910 }
2911
2912 // compute recombination contrib
2913 if (whichparam == m_r_here)
2914 {
2915 assert(!m_localpartforces.empty());
2916 const vector<double>& rWait = treedata->GetRecSummary()->GetShortWait();
2917 return -rWait[0];
2918 }
2919
2920 // We compute all terms involving the stick in lnPoint and DlnPoint
2921 // (arbitrarily) and thus return 0 if asked to take dWait of those parameters
2922 return answ;
2923 }
2924
2925 //------------------------------------------------------------------------------------
2926
DlnPoint(const DoubleVec1d & param,const TreeSummary * treedata,const long & whichparam)2927 double StickSelectPL::DlnPoint(const DoubleVec1d& param, const TreeSummary* treedata, const long& whichparam)
2928 {
2929 // NB Assumes only one localpartforce and no migration, no growth!
2930 long which;
2931
2932 // compute recombination contrib (does not involve stick)
2933 if (whichparam == m_r_here)
2934 {
2935 const vector<double>& nrecs = treedata->GetRecSummary()->GetShortPoint();
2936 if(nrecs[0]==0)
2937 {
2938 return 0.0;
2939 }
2940 else
2941 {
2942 return SafeDivide(nrecs[0], *(param.begin() + m_r_here));
2943 }
2944 }
2945
2946 // useful variables for all stick computations
2947 const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2948 const DoubleVec1d& stairlengths(treedata->GetStickSummary().lengths);
2949 double s(param[m_s_here]),toBigA(param[m_toBigA_here]),
2950 toSmallA(param[m_toSmallA_here]);
2951 double tipthetaA(param[m_thetastart]);
2952 double tipprobA(stairfreqs[0][0]);
2953 assert(stairfreqs.size() == stairlengths.size());
2954 double answ(0.0);
2955
2956 // compute theta contrib
2957 if (m_thetastart <= whichparam && whichparam < m_thetastart + m_nxparts)
2958 {
2959 which = whichparam-m_thetastart;
2960 const DoubleVec1d& ncoal = treedata->GetCoalSummary()->GetShortPoint();
2961
2962 answ = -ncoal[which] / tipthetaA;
2963
2964 // deal with the stick contribution to theta
2965 DoubleVec1d::size_type step;
2966 for(step = 1; step < stairfreqs.size(); ++step)
2967 {
2968 double numer = CommonFunctional(stairfreqs[step][0],
2969 stairfreqs[step-1][0],s,toBigA,toSmallA,stairlengths[step-1]);
2970 numer *= numer;
2971 answ -= numer / (4.0 * stairfreqs[step-1][0] * tipprobA *
2972 (1.0 - stairfreqs[step-1][0]) * stairlengths[step-1]);
2973 answ += 1.0 / (2 * tipthetaA);
2974 }
2975
2976 return answ;
2977 }
2978
2979 // compute disease contrib
2980 if (m_disstart <= whichparam && whichparam < m_disstart + m_ndis)
2981 {
2982 // non-stick terms
2983 assert(m_disstart != FLAGLONG);
2984 if (whichparam == m_disstart) which = 0;
2985 else which = 1;
2986
2987 answ += (treedata->GetDiseaseSummary()->GetShortPoint()[which]) / param[whichparam];
2988
2989 // stick terms
2990 const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2991 DoubleVec1d::size_type step;
2992 // terms in toBigA
2993 if (whichparam == m_toBigA_here)
2994 {
2995 for(step = 1; step < stairfreqs.size(); ++step)
2996 {
2997 answ += CommonFunctional(stairfreqs[step][0],stairfreqs[step-1][0],s,
2998 toBigA,toSmallA,stairlengths[step-1]) * tipthetaA /
2999 (2.0 * tipprobA * stairfreqs[step-1][0]);
3000 }
3001 return answ;
3002 }
3003 // terms in toSmallA
3004 if (whichparam == m_toSmallA_here)
3005 {
3006 for(step = 1; step < stairfreqs.size(); ++step)
3007 {
3008 answ -= CommonFunctional(stairfreqs[step][0],stairfreqs[step-1][0],s,
3009 toBigA,toSmallA,stairlengths[step-1]) * tipthetaA /
3010 ((2.0 * tipprobA) * (1.0 - stairfreqs[step-1][0]));
3011 }
3012 return answ;
3013 }
3014 assert(false); // neither toBigA nor toSmallA?!
3015 }
3016
3017 // compute s contrib
3018 if (whichparam == m_s_here)
3019 {
3020 DoubleVec1d::size_type step;
3021 for(step = 1; step < stairfreqs.size(); ++step)
3022 {
3023 answ += CommonFunctional(stairfreqs[step][0],stairfreqs[step-1][0],
3024 s,toBigA,toSmallA,stairlengths[step-1]) * tipthetaA /
3025 (2.0 * tipprobA);
3026 }
3027 return answ;
3028 }
3029
3030 assert(false); // should have computed something!
3031 return answ;
3032 }
3033
3034 //____________________________________________________________________________________
3035