1 // $Id: plforces.cpp,v 1.74 2011/04/23 02:02:49 bobgian Exp $
2 
3 /*
4   Copyright 2002  Peter Beerli, Mary Kuhner, Jon Yamato and Joseph Felsenstein
5 
6   This software is distributed free of charge for non-commercial use
7   and is copyrighted.  Of course, we do not guarantee that the software
8   works, and are not responsible for any damage you may cause or have.
9 */
10 
11 // Posterior Likelihood forces classes
12 // - CoalescePL
13 //   - coalesce with no-growth [using compressed summaries]
14 // - CoalesceGrowPL
15 //   - coalesce with exponential growth [using non-compressed summaries]
16 // - GrowPL
17 //   - coalesce with exponential growth [using non-compressed summaries]
18 // - MigratePL
19 // - RecombinePL
20 // - SelectPL [stubs]
21 
22 //------------------------------------------------------------------------------------
23 
24 #include <cassert>
25 
26 #include "local_build.h"
27 
28 #include "mathx.h"
29 #include "plforces.h"
30 #include "runreport.h"
31 #include "summary.h"
32 #include "vectorx.h"
33 #include "force.h"
34 #include "timemanager.h"                // for CoalesceGrowPL::lnPoint()
35 
36 #ifdef DMALLOC_FUNC_CHECK
37 #include <dmalloc.h>
38 #endif
39 
40 // DEBUG CODE HELPERS
41 #include <iostream>
42 #include <fstream>
43 #include <sstream>
44 std::ofstream numfile;
45 
46 using namespace std;
47 
48 //------------------------------------------------------------------------------------
49 
50 const double LOG_ONEPLUSEPSILON = log(1.0 + numeric_limits<double>::epsilon());
51 
52 //------------------------------------------------------------------------------------
53 // DiseasePL: disease force specific waiting time and
54 //            point probabilites plus derivatives
55 //------------------------------------------------------------------------------------
56 
lnWait(const vector<double> & param,const TreeSummary * treedata)57 double DiseasePL::lnWait (const vector < double >&param, const TreeSummary * treedata)
58 {
59     vector < double >::iterator i;
60     vector < double >::const_iterator pstart;
61     vector < double >::const_iterator pend;
62 
63     const vector<double>& mWait = treedata->GetDiseaseSummary()->GetShortWait();
64     // precomputing the transition rates to a status Sum[d_ji] into
65     // membervariable m_msum
66     for (i = m_msum.begin (),
67              pstart = param.begin () + m_start,
68              pend = param.begin () + m_start + m_nPop;
69          i != m_msum.end (); ++i, pstart += m_nPop, pend += m_nPop)
70     {
71         (*i) = accumulate (pstart, pend, 0.0);
72     }
73 
74     // sum_pop(SM_status *kt)
75     return  -1.0 * inner_product (mWait.begin (),
76                                   mWait.end (), m_msum.begin (), 0.0);
77 }
78 
79 //------------------------------------------------------------------------------------
80 
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)81 double DiseasePL::lnPoint (const vector < double >&param, const vector < double >&lparam,
82                            const TreeSummary * treedata)
83 {
84     //  Sum_j(Sum_i(disevent[j,i] * log(M[j,i]))
85     const vector<double>& nevents = treedata->GetDiseaseSummary()->GetShortPoint();
86     return  inner_product (nevents.begin (), nevents.end (),
87                            lparam.begin () + m_start, 0.0);
88 }
89 
90 //------------------------------------------------------------------------------------
91 
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)92 double DiseasePL::DlnWait (const vector < double >&param, const TreeSummary * treedata, const long int &whichparam)
93 {
94     long int which = (whichparam - m_start) / m_nPop;
95     const vector<double>& mWait = treedata->GetDiseaseSummary()->GetShortWait();
96     return -mWait[which];
97 }
98 
99 //------------------------------------------------------------------------------------
100 
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)101 double DiseasePL::DlnPoint (const vector < double >&param, const TreeSummary * treedata, const long int &whichparam)
102 {
103     long int which = whichparam - m_start;
104     const vector<double>& nmig = treedata->GetDiseaseSummary()->GetShortPoint();
105     return SafeDivide (nmig[which], param[whichparam]);
106 }
107 
108 //------------------------------------------------------------------------------------
109 
lnWait(const vector<double> & param,const TreeSummary * treedata)110 double DiseaseLogisticSelectionPL::lnWait(const vector<double>& param, const TreeSummary * treedata)
111 {
112     return 0.0; // contained in CoalesceLogisticSelectionPL::lnWait(), for speed
113 }
114 
115 //------------------------------------------------------------------------------------
116 // Note:  DlnPoint() is selection-independent, because the point terms
117 // separate into a sum of logarithms.  So, the DiseasePL method is used for that.
118 
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)119 double DiseaseLogisticSelectionPL::lnPoint(const vector<double>& param, const vector<double>& lparam,
120                                            const TreeSummary * treedata)
121 {
122     const Interval *treesum = treedata->GetDiseaseSummary()->GetLongPoint();
123     const Interval *pTreesum;
124 
125     double logTheta_A0(lparam[0]), logTheta_a0(lparam[1]), s(param[m_s_is_here]);
126     double log_mu_into_A_from_a(lparam[3]), log_mu_into_a_from_A(lparam[4]);
127 
128     if (0 != param[2] || 0 != param[5] || 0 == param[3] || 0 == param[4])
129         throw implementation_error("Error parsing disease rates in DiseaseLogisticSelectionPL::lnPoint()");
130 
131     double result = 0.0;
132 
133     for(pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
134     {
135         if (0L == pTreesum->oldstatus)
136             result += log_mu_into_A_from_a + logTheta_a0 - logTheta_A0 + s*pTreesum->endtime;
137         else if (1L == pTreesum->oldstatus)
138             result += log_mu_into_a_from_A + logTheta_A0 - logTheta_a0 - s*pTreesum->endtime;
139         else
140         {
141             string msg = "DiseaseLogisticSelectionPL::lnPoint(), received a TreeSummary ";
142             msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
143             msg += ".  \"oldstatus\" must be either 0 or 1, reflecting a coalescence in either ";
144             msg += "the subpopulation with allele A or the subpopulation with allele a.";
145             throw implementation_error(msg);
146         }
147     }
148 
149     return result;
150 }
151 
152 //------------------------------------------------------------------------------------
153 
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)154 double DiseaseLogisticSelectionPL::DlnWait(const vector<double>&param, const TreeSummary * treedata,
155                                            const long int &whichparam)
156 {
157     if (2 != m_nPop)
158     {
159         string msg = "DiseaseLogisticSelectionPL::DlnWait() called with m_nPop = ";
160         msg += ToString(m_nPop);
161         msg += ".  m_nPop must equal 2, reflecting one population with the major ";
162         msg += "allele and one population with the minor allele.";
163         throw implementation_error(msg);
164     }
165 
166     if (3 != whichparam && 4 != whichparam)
167         throw implementation_error("DiseaseLogisticSelectionPL::DlnWait(), bad \"whichparam\"");
168 
169     const list<Interval>& treesum = treedata->GetDiseaseSummary()->GetLongWait();
170     list<Interval>::const_iterator treesum_it;
171 
172     double result(0.0), s(param[m_s_is_here]);
173     double theta_A0(param[m_start]), theta_a0(param[m_start+1]);
174 
175     if (theta_a0 <= 0.0 || theta_A0 <= 0.0)
176     {
177         string msg = "DiseaseLogisticSelectionPL::DlnWait(), received an invalid Theta value ";
178         msg += "(theta_A0 = " + ToString(theta_A0) + ", theta_a0 = " + ToString(theta_a0) + ").";
179         throw impossible_error(msg);
180     }
181 
182     if (fabs(s) < LOGISTIC_SELECTION_COEFFICIENT_EPSILON)
183         return DlnWaitForTinyLogisticSelectionCoefficient(param, treedata, whichparam);
184 
185     if (fabs(s) >= DBL_BIG)
186         return -DBL_BIG; // unavoidable overflow
187 
188     double t_e, t_s(0.0), dt, e_toThe_sts(1.0), e_toThe_ste, term(0.0);
189     double term_A(0.0), term_a(0.0);
190     double factor_A(theta_a0/(s*theta_A0)), factor_a(theta_A0/(-s*theta_a0));
191     bool derivativeWithRespectTo_mu_into_A_from_a = (3 == whichparam ? true : false);
192 
193     for (treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
194     {
195         t_e = treesum_it->endtime;
196         dt = t_e - t_s;
197         term_A = term_a = 0.0;
198         const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
199         const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
200 
201         if (s > 0.0)
202         {
203             // Use overflow protection for term_A; if term_a underflows, that's okay.
204             if (s*dt > LOG_ONEPLUSEPSILON)
205             {
206                 term = SafeProductWithExp(factor_A,s*dt);
207                 if (term >= DBL_BIG && derivativeWithRespectTo_mu_into_A_from_a)
208                     return -DBL_BIG; // unavoidable overflow
209                 e_toThe_ste = (term/factor_A)*e_toThe_sts;
210                 if (k_A > 0.0 && derivativeWithRespectTo_mu_into_A_from_a)
211                 {
212                     term_A = k_A*(term - factor_A)*e_toThe_sts;
213                     if (term_A >= DBL_BIG)
214                         return -DBL_BIG; // unavoidable overflow
215                 }
216             }
217             else
218             {
219                 if (s*dt >= numeric_limits<double>::epsilon())
220                     e_toThe_ste = (1.0 + s*dt)*e_toThe_sts;
221                 else
222                     e_toThe_ste = SafeProductWithExp(1.0,s*t_e);
223                 if (k_A > 0.0 && derivativeWithRespectTo_mu_into_A_from_a)
224                 {
225                     term_A = k_A*factor_A*s*dt*e_toThe_sts;
226                     if (term_A >= DBL_BIG)
227                         return -DBL_BIG; // unavoidable overflow
228                 }
229             }
230             if (k_a > 0.0 && !derivativeWithRespectTo_mu_into_A_from_a)
231                 term_a = k_a*factor_a*(1.0/e_toThe_ste - 1.0/e_toThe_sts); // note: factor_a includes minus sign
232         }
233         else // s < 0
234         {
235             // Use overflow protection for term_a; if term_A underflows, that's okay.
236 
237             if (-s*dt > LOG_ONEPLUSEPSILON)
238             {
239                 term = SafeProductWithExp(factor_a,-s*dt);
240                 if (term >= DBL_BIG && !derivativeWithRespectTo_mu_into_A_from_a)
241                     return -DBL_BIG; // unavoidable overflow
242                 e_toThe_ste = e_toThe_sts/(term/factor_a);
243                 if (k_a > 0.0 && !derivativeWithRespectTo_mu_into_A_from_a)
244                 {
245                     term_a = k_a*(term - factor_a)/e_toThe_sts;
246                     if (term_a >= DBL_BIG)
247                         return -DBL_BIG; // unavoidable overflow
248                 }
249             }
250             else
251             {
252                 if (-s*dt >= numeric_limits<double>::epsilon())
253                     e_toThe_ste = (1.0 + s*dt)*e_toThe_sts;
254                 else
255                     e_toThe_ste = SafeProductWithExp(1.0,s*t_e);
256                 if (k_a > 0.0 && !derivativeWithRespectTo_mu_into_A_from_a)
257                 {
258                     term_a = k_a*factor_a*(-s)*dt/e_toThe_sts;
259                     if (term_a >= DBL_BIG)
260                         return -DBL_BIG; // unavoidable overflow
261                 }
262             }
263             if (k_A > 0.0 && derivativeWithRespectTo_mu_into_A_from_a)
264                 term_A = k_A*factor_A*(e_toThe_ste - e_toThe_sts);
265         }
266 
267         if (derivativeWithRespectTo_mu_into_A_from_a)
268             result -= term_A;
269         else
270             result -= term_a;
271         if (result <= -DBL_BIG)
272             return -DBL_BIG;
273 
274         t_s = t_e;
275         e_toThe_sts = e_toThe_ste;
276     }
277 
278     return result;
279 }
280 
281 //------------------------------------------------------------------------------------
282 
DlnWaitForTinyLogisticSelectionCoefficient(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)283 double DiseaseLogisticSelectionPL::DlnWaitForTinyLogisticSelectionCoefficient(const vector<double>& param,
284                                                                               const TreeSummary *treedata,
285                                                                               const long int &whichparam)
286 {
287     const list<Interval>& treesum = treedata->GetDiseaseSummary()->GetLongWait();
288     list<Interval>::const_iterator treesum_it;
289     double result(0.0), s(param[m_s_is_here]);
290     double theta_A0(param[m_start]), theta_a0(param[m_start+1]);
291     double t_e, t_s(0.0);
292     bool derivativeWithRespectTo_mu_into_A_from_a = (4 == whichparam ? true : false);
293 
294     for (treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
295     {
296         t_e = treesum_it->endtime;
297         const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
298         const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
299 
300         if (derivativeWithRespectTo_mu_into_A_from_a)
301             result -= (k_A*theta_a0/theta_A0)*((t_e - t_s) + 0.5*s*(t_e*t_e - t_s*t_s));
302         else
303             result -= (k_a*theta_A0/theta_a0)*((t_e - t_s) - 0.5*s*(t_e*t_e - t_s*t_s));
304 
305         t_s = t_e;
306     }
307 
308     return result;
309 }
310 
311 //------------------------------------------------------------------------------------
312 // MigratePL: migration forces specific waiting time and
313 //            point probabilites and its derivatives
314 
lnWait(const vector<double> & param,const TreeSummary * treedata)315 double MigratePL::lnWait (const vector < double >&param, const TreeSummary * treedata)
316 {
317     vector < double >::iterator i;
318     vector < double >::const_iterator pstart;
319     vector < double >::const_iterator pend;
320 
321     const vector<double>& mWait = treedata->GetMigSummary()->GetShortWait();
322 
323     // precomputing the immigration rates Sum[m_ji] into membervariable m_msum
324     for (i = m_msum.begin (),
325              pstart = param.begin () + m_start,
326              pend = param.begin () + m_start + m_nPop;
327          i != m_msum.end (); ++i, pstart += m_nPop, pend += m_nPop)
328     {
329         (*i) = accumulate (pstart, pend, 0.0);
330     }
331 
332     // sum_pop(SM_pop *kt)
333     return  -1.0 * inner_product (mWait.begin (),
334                                   mWait.end (), m_msum.begin (), 0.0);
335 }
336 
337 //------------------------------------------------------------------------------------
338 
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)339 double MigratePL::lnPoint (const vector < double >&param, const vector < double >&lparam,
340                            const TreeSummary * treedata)
341 {
342     //  Sum_j(Sum_i(migevent[j,i] * log(M[j,i]))
343     const vector<double>& nmig = treedata->GetMigSummary()->GetShortPoint();
344 
345     return  inner_product (nmig.begin(), nmig.end(), lparam.begin() + m_start,
346                            0.0);
347 }
348 
349 //------------------------------------------------------------------------------------
350 
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)351 double MigratePL::DlnWait (const vector < double >&param, const TreeSummary * treedata, const long int & whichparam)
352 {
353     long int which = (whichparam - m_start) / m_nPop;
354     const vector<double>& mWait = treedata->GetMigSummary()->GetShortWait();
355     return -mWait[which];
356 }
357 
358 //------------------------------------------------------------------------------------
359 
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)360 double MigratePL::DlnPoint (const vector < double >&param, const TreeSummary * treedata, const long int & whichparam)
361 {
362     long int which = whichparam - m_start;
363     const vector<double>& nmig = treedata->GetMigSummary()->GetShortPoint();
364 
365     return SafeDivide (nmig[which], param[whichparam]);
366 }
367 
368 //------------------------------------------------------------------------------------
369 
370 // CoalescePL: coalescence forces specific waiting time and
371 //             point probabilites and its derivatives
372 
373 // CalculateScaledLPCounts returns the log-likelihood of all
374 // of the choices of recombination-branch partition assignment
375 // made in a genealogy.  It does this by summing the log of (the
376 // number of times each partition was chosen times the relative
377 // frequency of that partition).
378 //
379 // JDEBUG--:  This code cannot handle migration.  To do so, it
380 // will be necessary to (a) store the partition in which each
381 // recombination happened, and (b) make the relative frequency
382 // relative to the partition, not the whole.  For example, the
383 // term for choice of a disease state in Boston should be
384 // Theta(disease & Boston) over Theta(Boston).  Currently it
385 // is Theta(disease) over Theta(total).
386 
CalculateScaledLPCounts(const DoubleVec1d & params,const LongVec2d & picks) const387 double CoalescePL::CalculateScaledLPCounts(const DoubleVec1d& params, const LongVec2d& picks) const
388 {
389     double total_theta(std::accumulate(params.begin()+m_start,
390                                        params.begin()+m_end,0.0));
391     DoubleVec1d myths(params.begin()+m_start,params.begin()+m_end);
392     double answ(0.0);
393     LongVec2d::size_type lpforce;
394     for(lpforce = 0; lpforce < picks.size(); ++lpforce)
395     {
396         LocalPartitionForce* pforce(dynamic_cast<LocalPartitionForce*>
397                                     (m_localpartforces[lpforce]));
398         assert(pforce);  // it wasn't a local partition force?
399         DoubleVec1d part_thetas(pforce->SumXPartsToParts(myths));
400         LongVec1d::size_type partition;
401         for(partition = 0; partition < picks[lpforce].size(); ++partition)
402         {
403             answ += log(picks[lpforce][partition] * part_thetas[partition] / total_theta);
404         }
405     }
406     return answ;
407 }
408 
409 //------------------------------------------------------------------------------------
410 
SetLocalPartForces(const ForceSummary & fs)411 void CoalescePL::SetLocalPartForces(const ForceSummary& fs)
412 {
413     m_localpartforces = fs.GetLocalPartitionForces();
414     if (m_localpartforces.empty()) return;
415 
416     // set up vector which answers the following question:
417     // for each local partition force
418     //   for any xpartition theta (identified by index),
419     //   what partition (of the given force) is it?
420 
421     // This code is copied approximately from PartitionForce::
422     // SumXPartsToParts().
423     ForceVec::const_iterator pforce;
424     const ForceVec& partforces = fs.GetPartitionForces();
425     LongVec1d indicator(partforces.size(), 0L);
426     LongVec1d nparts(registry.GetDataPack().GetAllNPartitions());
427     DoubleVec1d::size_type xpart;
428     long partindex;
429     for (pforce = partforces.begin(), partindex = 0;
430          pforce != partforces.end(); ++pforce, ++partindex)
431     {
432         vector<DoubleVec1d::size_type> indices(m_nPop, 0);
433         for(xpart = 0; xpart < static_cast<DoubleVec1d::size_type>(m_nPop); ++xpart)
434         {
435             indices[xpart] = indicator[partindex];
436             long int part;
437             for (part = nparts.size() - 1; part >= 0; --part)
438             {
439                 ++indicator[part];
440                 if (indicator[part] < nparts[part]) break;
441                 indicator[part] = 0;
442             }
443         }
444         // initialize xparts vectors
445         vector<DoubleVec1d::size_type> emptyvec;
446         vector<vector<DoubleVec1d::size_type> > partvec(nparts[partindex], emptyvec);
447 
448         if ((*pforce)->IsLocalPartitionForce())
449         {
450             m_whichlocalpart.push_back(indices);
451             m_whichlocalxparts.push_back(partvec);
452         }
453         m_whichpart.push_back(indices);
454         m_whichxparts.push_back(partvec);
455     }
456 
457     // now construct the vector mapping partition to a set of xpartitions.
458     //
459     // we will do this by going through the vector mapping xpartition to
460     // partition we just constructed, letting its contents tell us which
461     // partition to add the parameter (xpartition) to and keeping a
462     // counter to let us know which xpartition to add.
463     long lpindex = 0;
464     for (pforce = partforces.begin(), partindex = 0;
465          pforce != partforces.end(); ++pforce, ++partindex)
466     {
467         for(xpart = 0; xpart < m_whichpart[partindex].size(); ++xpart)
468             m_whichxparts[partindex][m_whichpart[partindex][xpart]].push_back(xpart);
469 
470         if ((*pforce)->IsLocalPartitionForce())
471         {
472             for(xpart = 0; xpart < m_whichpart[partindex].size(); ++xpart)
473             {
474                 m_whichlocalxparts[lpindex][m_whichpart[partindex][xpart]].
475                     push_back(xpart);
476             }
477             ++lpindex;
478         }
479     }
480 } // SetLocalPartForces
481 
482 //------------------------------------------------------------------------------------
483 // COMPRESSED DATA (SHORT) SUMMARIES
484 
lnWait(const vector<double> & param,const TreeSummary * treedata)485 double CoalescePL::lnWait (const vector < double >&param, const TreeSummary * treedata)
486 {
487     // result = sum_pop(k(k-1)t/theta_pop)
488     const vector<double>& cWait = treedata->GetCoalSummary()->GetShortWait();
489 
490     return -1.0 * inner_product (cWait.begin (), cWait.end (),
491                                  param.begin () + m_start,
492                                  0.0, plus < double >(),
493                                  divides < double >());
494 }
495 
496 //------------------------------------------------------------------------------------
497 
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)498 double CoalescePL::lnPoint (const vector < double >&param, const vector < double >&lparam,
499                             const TreeSummary * treedata)
500 {
501     //  in general it is Sum_j(coalesceevent[j] * (log(2) - log(theta_j)))
502     //  sumcpoint = Sum_j(coalesceevent[j] * log(2))
503     //   - Sum_j(coalesceevent[j] * log(theta_j))
504     const vector<double>& ncoal = treedata->GetCoalSummary()->GetShortPoint();
505 
506     double point = (LOG2 * accumulate (ncoal.begin (), ncoal.end (), 0.0) -
507                     inner_product (ncoal.begin (), ncoal.end (),
508                                    lparam.begin () + m_start, 0.0));
509     const LongVec2d& picks = treedata->GetCoalSummary()->GetShortPicks();
510     if (picks.empty()) return point;
511     else return point + CalculateScaledLPCounts(param, picks);
512 }
513 
514 //------------------------------------------------------------------------------------
515 
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)516 double CoalescePL::DlnWait (const vector < double >&param, const TreeSummary * treedata, const long int & whichparam)
517 {
518     long int which = whichparam - m_start;
519     const vector<double>& cWait = treedata->GetCoalSummary()->GetShortWait();
520 
521     return cWait[which] / (param[whichparam] * param[whichparam]);
522 }
523 
524 //------------------------------------------------------------------------------------
525 // erynes 2004/03/11 -- This method presently returns the following:
526 //
527 //          -ncoal/theta,
528 //
529 // where theta is the current estimate of parameter theta for the
530 // given population ("whichparam"), and ncoal is the total number
531 // of coalescent events for this population (i.e., one less than the
532 // total number of tips that the population has in this genealogy tree,
533 // unless recombinations are present).
534 //
535 // Note:  CoalescePL::DlnPoint calculates the partial derivative of log(Point)
536 // with respect to theta for the given population ("whichparam"),
537 // in both the absence and presence of growth.
538 // When growth is present, CoalesceGrowPL::DlnWait does this for log(Wait).
539 // Partial derivatives with respect to growth are found in GrowPL.
540 //
541 // jay 2007/03/22 -- added support for disease w/ recombination
542 //
543 // JDEBUG--:  only in absence of migration
544 
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)545 double CoalescePL::DlnPoint (const vector < double >&param, const TreeSummary * treedata, const long int & whichparam)
546 {
547     long int which = whichparam - m_start;
548     const vector<double>& ncoal = treedata->GetCoalSummary()->GetShortPoint();
549 
550     double answ(-ncoal[which] / param[whichparam]);
551 
552     const LongVec2d& picks = treedata->GetCoalSummary()->GetShortPicks();
553     if (picks.empty()) return answ;
554     else  {
555         // count recs in crosspartition corresponding to whichparam
556         const RecSummary* rsum = dynamic_cast<const RecSummary*>(treedata->GetSummary(force_REC));
557         long recs_in_xpart = rsum->GetNRecsByXPart()[whichparam];
558         // divide by whichparam
559         double xpart_theta = param[whichparam];
560         answ += recs_in_xpart / xpart_theta;
561         // from that, subtract number of recs (should be "in that partition")
562         long nrecs = rsum->GetNRecs();
563         // divided by sum of thetas (should be "for that partition")
564         double total_theta(std::accumulate(param.begin()+m_start,
565                                            param.begin()+m_end,0.0));
566         answ -= nrecs/total_theta;
567     }
568 
569 #if 0 // this was what CVS merged into here.....
570     if (!m_localpartforces.empty())
571     {
572         double total_theta(std::accumulate(param.begin()+m_start,
573                                            param.begin()+m_end,0.0));
574         DoubleVec1d myths(param.begin()+m_start,param.begin()+m_end);
575         long int total_npartitions(0);
576         LongVec2d::size_type lpforce;
577         for(lpforce = 0; lpforce < m_localpartforces.size(); ++lpforce)
578         {
579             LocalPartitionForce* pforce(dynamic_cast<LocalPartitionForce*>
580                                         (m_localpartforces[lpforce]));
581             assert(pforce);  // it wasn't a local partition force?
582             total_npartitions += pforce->GetNPartitions();
583 
584             const vector<DoubleVec1d::size_type>& indices(
585                 m_whichxparts[lpforce][m_whichpart[lpforce][which]]);
586             vector<DoubleVec1d::size_type>::const_iterator index;
587             double part_theta(0.0);
588             for(index = indices.begin(); index != indices.end(); ++index)
589             {
590                 part_theta += myths[*index];
591             }
592             answ += 1.0 / part_theta;
593 
594         }
595         answ -= total_npartitions / total_theta;
596     }
597 #endif
598 
599     return answ;
600 }
601 
602 //------------------------------------------------------------------------------------
603 // CoalesceGrowPL: coalescence forces specific waiting time and
604 //             point probabilites and its derivatives
605 // LONG DATA SUMMARIES
606 // [only in effect when GROWTH force is turned on]
607 
608 //------------------------------------------------------------------------------------
609 // erynes 2004/03/11 -- This method presently returns the following:
610 //
611 // sum_over_populations(sum_over_each_population's_coalescent_time_intervals(
612 //  (k_i*(k_i - 1)/(theta_p * g_p)) * (exp(g_p * t_start)- exp(g_p * t_end))
613 //                                                                          )
614 //                     )
615 //
616 // where
617 //        k_i is the number of lineages of population p in time interval i
618 //        theta_p is the present estimate of parameter theta for population p
619 //        g_p is the present estimate of the growth parameter for population p
620 //        t_start is the timepoint at the start of time interval i
621 //        t_end is the timepoint at the end of time interval i
622 //
623 // This quantity is equal to log(WaitProb(G|P)), where WaitProb(G|P) is the
624 // "waiting" probability of the input genealogy G (parameter "treedata")
625 // given the parameters P (parameter "param").
626 // Prob(G|P) = PointProb(G|P) * WaitProb(G|P).
627 // See CoalesceGrowPL::Point() for the point probability term.
628 
lnWait(const vector<double> & param,const TreeSummary * treedata)629 double CoalesceGrowPL::lnWait (const vector < double >&param, const TreeSummary * treedata)
630 {
631     const list<Interval>& treesum=treedata->GetCoalSummary()->GetLongWait();
632     list <Interval> :: const_iterator tit;
633     vector < double > :: const_iterator pit = param.begin() + m_start;
634     vector < double > :: const_iterator pend = param.begin() + m_start + m_nPop;
635     vector < double > :: const_iterator git = param.begin() + m_growthstart;
636 
637     unsigned long int xpartition = 0L;
638     double growth, theta, gts, gte;
639     double coeff_s, coeff_e, arg1_s, arg1_e;
640     double result = 0.0;
641     double starttime;
642 
643     for( ; pit != pend; ++pit, ++git, ++xpartition)
644     {
645         growth = *git;
646         theta = *pit;
647 
648         if (fabs(growth) < GROWTH_EPSILON)
649         {
650             result += lnWaitForTinyGrowth(theta, growth, xpartition, treesum);
651             continue;
652         }
653 
654         starttime = 0.0;
655         for(tit = treesum.begin(); tit != treesum.end(); ++tit)
656         {
657             gts = growth * starttime;
658             gte = growth * (*tit).endtime;
659 
660             coeff_s = coeff_e = arg1_s = arg1_e = 1.0;
661             const double& k = (*tit).xpartlines[xpartition];
662 
663             // IMPORTANT NOTE:  The following over/underflow checking
664             // assumes that each starttime and endtime is always >= 0.
665             // Hence, e.g., gts < 0 means growth < 0.
666 
667             if (gts > 1.0)
668             {
669                 coeff_s = k * (k - 1.0);
670                 if (growth > 1.0)
671                     arg1_s /= growth;
672                 else
673                     coeff_s /= growth;
674                 if (theta > 1.0)
675                     arg1_s /= theta;
676                 else
677                     coeff_s /= theta;
678             }
679             else if (gts < -1.0)
680             {
681                 arg1_s *= k * (k - 1.0);
682                 if (growth < -1.0)
683                     coeff_s /= growth;
684                 else
685                     arg1_s /= growth;
686                 if (theta > 1.0)
687                     coeff_s /= theta;
688                 else
689                     arg1_s /= theta;
690             }
691             else // no over/underflow in exp()
692                 coeff_s = k * (k - 1.0)/(theta * growth); // arg1_s already set
693 
694             // BUGBUG erynes -- what follows is easy to understand,
695             // but we should revisit this and reuse some of what we
696             // learned about growth and theta above, while considering gts.
697 
698             if (gte > 1.0)
699             {
700                 coeff_e = k * (k - 1.0);
701                 if (growth > 1.0)
702                     arg1_e /= growth;
703                 else
704                     coeff_e /= growth;
705                 if (theta > 1.0)
706                     arg1_e /= theta;
707                 else
708                     coeff_e /= theta;
709             }
710             else if (gte < -1.0)
711             {
712                 arg1_e *= k * (k - 1.0);
713                 if (growth < -1.0)
714                     coeff_e /= growth;
715                 else
716                     arg1_e /= growth;
717                 if (theta > 1.0)
718                     coeff_e /= theta;
719                 else
720                     arg1_e /= theta;
721             }
722             else // no over/underflow in exp()
723                 coeff_e = k * (k - 1.0)/(theta * growth); // arg1_e already set
724 
725             double incrementalResult =
726                 coeff_s * SafeProductWithExp(arg1_s, gts) -
727                 coeff_e * SafeProductWithExp(arg1_e, gte);
728 
729             if (incrementalResult > 0.0)
730             {
731                 // We have precision problems,
732                 // because we expect te > ts always,
733                 // and hence gte > gts always.
734                 // During debugging, we have encountered precision problems
735                 // in which gdb claims gts == gte and arg1_s == arg1_e
736                 // but incrementalResult > 0, typically reflecting
737                 // a difference near the 15th digit.
738                 // (Note that te == ts corresponds to two events
739                 // occurring simultaneously--e.g., k = 1 for a really long
740                 // time in each of two populations, then one finally
741                 // migrates to the other, then they coalesce instantaneously.)
742                 // It should be safe to set incrementalResult to zero in cases of
743                 // roundoff error.  If incrementalResult is positive,
744                 // but _not_ tiny compared with either of the terms
745                 // whose difference yielded incrementalResult,
746                 // then something is _very_ wrong--probably te < ts,
747                 // which should never happen.  We assert in this case;
748                 // this will enable us to see the problem, but the end user
749                 // will not see the assertion, unless s/he's running in debug mode.
750                 if (incrementalResult/fabs(coeff_s * SafeProductWithExp(arg1_s, gts)) > 1e-10)
751                 {
752                     string msg = "incrementalResult in plforces.cpp is greater "
753                         "than zero.  coeff_s = "
754                         + ToString(coeff_s) + ", coeff_e = " + ToString(coeff_e)
755                         + ", theta = " + ToString(theta) + ", growth = "
756                         + ToString(growth) + ", and incrementalResult is "
757                         + ToString(incrementalResult);
758                     registry.GetRunReport().ReportDebug(msg);
759                     //assert(0);
760                 }
761                 incrementalResult = 0.0;
762             }
763 
764             starttime = (*tit).endtime;
765             result += incrementalResult;
766 
767         }
768 
769     }
770 
771     return result;
772 }
773 
774 //------------------------------------------------------------------------------------
775 // erynes 2004/05/20 -- This method presently returns the following:
776 //
777 // sum_over_a_population's_coalescent_time_intervals(
778 //  (k_i*(k_i - 1)/theta_p) * ( (tis - tie) + (1/2)*g_p*(tis^2 - tie^2) )
779 //                                                  )
780 //
781 // where
782 //        k_i is the number of lineages of population p in time interval i
783 //        theta_p is the present estimate of parameter theta for population p
784 //        g_p is the present estimate of the growth parameter for population p
785 //        tis is the timepoint at the start of time interval i
786 //        tie is the timepoint at the end of time interval i
787 //
788 // The population "p" is determined in Wait() by iterating over all
789 // populations.  That method calls this method with the appropriate
790 // parameters for the population in question.
791 //
792 // This non-public method is used to handle the case in which fabs(g)
793 // is tiny--that is, less than GROWTH_EPSILON.  It is derived from the
794 // formula used in Wait by expanding exp(g*t) in a Taylor series
795 // about g == 0, multiplying this by the term k(k-1)/(g*theta),
796 // and dropping all terms of order g^2 and above.
797 // Brief tests with Mathematica suggest that the original formula
798 // converges very nicely to this linear formula for normal values
799 // of the timepoints, the difference being less than one part in one
800 // billion, dropping down to zero difference for g == 0 (using
801 // L'Hopital's rule).
802 //
803 // Please also see the comments for DWait.
804 
lnWaitForTinyGrowth(const double theta,const double growth,const unsigned long int xpartition,const list<Interval> & treesummary)805 double CoalesceGrowPL::lnWaitForTinyGrowth(const double theta, const double growth,
806                                            const unsigned long int xpartition,
807                                            const list<Interval>& treesummary)
808 {
809     assert(fabs(growth) < GROWTH_EPSILON); // otherwise use Wait
810 
811     list<Interval>::const_iterator tit;
812     double k, te;
813 
814     double result = 0.0;
815     // ts starts at 0 and subsequently is the previous cycle's te
816     double ts = 0.0;
817 
818     for(tit = treesummary.begin(); tit != treesummary.end(); ++tit)
819     {
820         te = (*tit).endtime;
821         k  = (*tit).xpartlines[xpartition];
822 
823         result += (k * (k - 1.0) / theta) *
824             ((ts - te) + 0.5 * growth * (ts * ts - te * te));
825         ts = te;
826     }
827 
828     return result;
829 }
830 
831 //------------------------------------------------------------------------------------
832 // erynes 2004/03/11 -- This method presently returns the following:
833 //
834 // sum_over_populations(sum_over_each_population's_coalescent_time_intervals(
835 //                              log(2) + g_p*t_i_end - log(theta_p)
836 //                                                                          )
837 //                     )
838 //
839 // where
840 //        theta_p is the present estimate of parameter theta for population p
841 //        g_p is the present estimate of the growth parameter for population p
842 //        t_i_end is the timepoint at the end of time interval i
843 //
844 // This quantity is equal to log(PointProb(G|P)), where PointProb(G|P) is the
845 // "point" probability of the input genealogy G (parameter "treedata")
846 // given the parameters P (parameter "param").
847 // Prob(G|P) = PointProb(G|P) * WaitProb(G|P).
848 // See CoalesceGrowPL::lnWait() for the wait probability term.
849 
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)850 double CoalesceGrowPL::lnPoint (const vector < double >&param, const vector < double >&lparam,
851                                 const TreeSummary * treedata)
852 
853 {
854     const Interval *  treesum = treedata->GetCoalSummary()->GetLongPoint();
855     const Interval * tit;
856     vector < double > :: const_iterator pit = lparam.begin() + m_start;
857     vector < double > :: const_iterator pend = lparam.begin () + m_start + m_nPop;
858     vector < double > :: const_iterator git = param.begin() + m_growthstart;
859 
860     long int xpartition = 0L;
861     double result = 0.0;
862 
863     for( ; pit != pend; ++pit, ++git, ++xpartition)
864     {
865         for(tit= treesum; tit != NULL; tit = tit->next)
866         {
867             if(xpartition==(*tit).oldstatus)
868                 result +=  LOG2 + (*tit).endtime * (*git) - (*pit);
869         }
870     }
871 
872     // we pull this array only to check for emptyness, it is not used!
873     // JDEBUG--:  not correct in presence of migration
874     const LongVec2d& picks = treedata->GetCoalSummary()->GetShortPicks();
875     if (!picks.empty())                 // partner-picks must be accomodated
876     {
877         DoubleVec1d timesizes(m_nPop, 0.0);
878         ForceParameters fp(unknown_region);
879         DoubleVec1d myths(param.begin()+m_start,param.begin()+m_end);
880         fp.SetRegionalThetas(myths);
881         DoubleVec1d mygs(param.begin()+m_growthstart,
882                          param.begin()+m_growthstart+m_nPop);
883         fp.SetGrowthRates(mygs);
884         // now for every recombination event....
885         treesum = treedata->GetRecSummary()->GetLongPoint();
886         for (tit = treesum; tit != NULL; tit = tit->next)
887         {
888             // compute partner-picks denominator
889             timesizes = m_timesize->XpartThetasAtT(tit->endtime,fp);
890             assert(timesizes.size() == static_cast<DoubleVec1d::size_type>(m_nPop));
891             double denominator(std::accumulate(timesizes.begin(), timesizes.end(),0.0));
892 
893             // compute partner-picks numerator
894             ForceVec::size_type lpforce = 0;
895             // assuming no migration and only one local partition force
896             long whichtheta =
897                 m_whichlocalxparts[lpforce][tit->partnerpicks[lpforce]][0];
898             result += log(timesizes[whichtheta] / denominator);
899         }
900     }
901 
902 #if 0 // JDEBUG--JWARNING this is the logic that was in Jim's original commit
903     // compute partner-picks numerator
904     for(lpforce = 0; lpforce < m_localpartforces.size(); ++lpforce)
905     {
906         LocalPartitionForce* pforce(dynamic_cast<LocalPartitionForce*>
907                                     (m_localpartforces[lpforce]));
908         assert(pforce);  // it wasn't a local partition force?
909         DoubleVec1d part_thetas(pforce->SumXPartsToParts(myths, mygs, tit->endtime));
910         DoubleVec1d::size_type partition;
911         for(partition = 0; partition < part_thetas.size(); ++partition)
912         {
913             result += log(part_thetas[partition] / denominator);
914         }
915     }
916 #endif
917 
918     return result;
919 }
920 
921 //------------------------------------------------------------------------------------
922 // erynes 2004/03/11 -- This method currently returns the following:
923 //
924 // sum_over_coalescent_time_intervals_of_population_whichparam(
925 //   -(k_i(k_i - 1)/(g*theta*theta))*(exp(g*t_i_start) - exp(g*t_i_end))
926 //                                                            )
927 // where
928 //   k_i is the number of lineages of this population in time interval i
929 //   g   is the current estimate of the growth parameter for this pop.
930 //   theta is the current estimate of the theta parameter for this pop.
931 //   t_i_start is the timepoint at the beginning of time interval i
932 //   t_i_end is the timepoint at the end of time interval i
933 //
934 // IMPORTANT NOTE:
935 // Wait() and Point() return log(WaitProb(G|P)) and log(PointProb(G|P)),
936 // and PostLike::DCalculate() currently assumes that DWait() and DPoint()
937 // return the derivatives of log(WaitProb(G|P)) and log(PointProb(G|P)),
938 // instead of returning the derivatives of WaitProb/PointProb directly.
939 // As long as everything is kept consistent, this is all very good
940 // for reducing complexity and improving speed.
941 //
942 // Note:  CoalesceGrowPL::DWait calculates the partial derivative of Wait
943 // with respect to theta for the given population ("whichparam").
944 // CoalescePL::DPoint does the same for Point.
945 // Partial derivatives with respect to growth are found in GrowPL.
946 
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)947 double CoalesceGrowPL::DlnWait (const vector < double >&param, const TreeSummary * treedata,
948                                 const long int & whichparam)
949 {
950     const double growth = param[whichparam + m_growthstart];
951     if(fabs(growth) < GROWTH_EPSILON)
952         return DlnWaitForTinyGrowth(param, treedata, whichparam);
953 
954     double result = 0.0;
955     long int which = whichparam - m_start;
956     const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
957     list <Interval> :: const_iterator tit=treesum.begin();
958     const double theta = param[whichparam];
959     double coeff_s, coeff_e, arg1_s, arg1_e;
960     double starttime = 0.0;
961 
962     for( ; tit != treesum.end(); ++tit)
963     {
964         double gts = growth * starttime;
965         double gte = growth * (*tit).endtime;
966 
967         const long int & k = (*tit).xpartlines[which];
968 
969         coeff_s = coeff_e = arg1_s = arg1_e = 1.0;
970 
971         // IMPORTANT NOTE:  The following over/underflow checking
972         // assumes that each starttime and endtime is always >= 0.
973         // Hence, e.g., gts < 0 means growth < 0.
974 
975         if (gts > 1.0)
976         {
977             coeff_s = -k * (k - 1.0);
978             if (growth > 1.0)
979                 arg1_s /= growth;
980             else
981                 coeff_s /= growth;
982             if (theta > 1.0)
983                 arg1_s /= theta*theta;
984             else
985                 coeff_s /= theta*theta;
986         }
987         else if (gts < -1.0)
988         {
989             arg1_s *= -k * (k - 1.0);
990             if (growth < -1.0)
991                 coeff_s /= growth;
992             else
993                 arg1_s /= growth;
994             if (theta > 1.0)
995                 coeff_s /= theta*theta;
996             else
997                 arg1_s /= theta*theta;
998         }
999         else // no over/underflow in exp()
1000             coeff_s = -k * (k - 1.0)/(theta * theta * growth); // arg1_s already set
1001 
1002         // BUGBUG erynes -- what follows is easy to understand,
1003         // but we should revisit this and reuse some of what we
1004         // learned about growth and theta above, while considering gts.
1005 
1006         if (gte > 1.0)
1007         {
1008             coeff_e = -k * (k - 1.0);
1009             if (growth > 1.0)
1010                 arg1_e /= growth;
1011             else
1012                 coeff_e /= growth;
1013             if (theta > 1.0)
1014                 arg1_e /= theta*theta;
1015             else
1016                 coeff_e /= theta*theta;
1017         }
1018         else if (gte < -1.0)
1019         {
1020             arg1_e *= -k * (k - 1.0);
1021             if (growth < -1.0)
1022                 coeff_e /= growth;
1023             else
1024                 arg1_e /= growth;
1025             if (theta > 1.0)
1026                 coeff_e /= theta*theta;
1027             else
1028                 arg1_e /= theta*theta;
1029         }
1030         else // no over/underflow in exp()
1031             coeff_e = -k * (k - 1.0)/(theta * theta * growth); // arg1_e already set
1032 
1033         double incrementalResult =
1034             coeff_s * SafeProductWithExp(arg1_s, gts) -
1035             coeff_e * SafeProductWithExp(arg1_e, gte);
1036 
1037         if (incrementalResult < 0.0)
1038         {
1039             // We have precision problems,
1040             // because we expect te > ts always,
1041             // and hence gte > gts always.
1042             // During debugging, we have encountered precision problems
1043             // in which gdb claims gts == gte and arg1_s == arg1_e
1044             // but incrementalResult < 0, typically reflecting
1045             // a difference near the 15th digit.
1046             // (Note that te == ts corresponds to two events
1047             // occurring simultaneously--e.g., k = 1 for a really long
1048             // time in each of two populations, then one finally
1049             // migrates to the other, then they coalesce instantaneously.)
1050             // It should be safe to set incrementalResult to zero in cases of
1051             // roundoff error.  If incrementalResult is negative,
1052             // but _not_ tiny compared with either of the terms
1053             // whose difference yielded incrementalResult,
1054             // then something is _very_ wrong--probably te < ts,
1055             // which should never happen.  We assert in this case;
1056             // this will enable us to see the problem, but the end user
1057             // will not see the assertion, unless s/he's running in debug mode.
1058             if (incrementalResult/fabs(coeff_s * SafeProductWithExp(arg1_s, gts))
1059                 < -1e-10)
1060                 assert(0);
1061             incrementalResult = 0.0;
1062         }
1063 
1064         starttime = (*tit).endtime;
1065         result += incrementalResult;
1066 
1067     }
1068 
1069     return result;
1070 }
1071 
1072 //------------------------------------------------------------------------------------
1073 // erynes 2004/05/20 -- This method currently returns the following:
1074 //
1075 // sum_over_coalescent_time_intervals_of_population_whichparam(
1076 //   -(k_i(k_i - 1)/(theta*theta))*( (t_i_start - t_i_end) +
1077 //                                   (1/2)*g*(t_i_start^2 - t_i_end^2) )
1078 //                                                            )
1079 // where
1080 //   k_i is the number of lineages of this population in time interval i
1081 //   g   is the current estimate of the growth parameter for this pop.
1082 //   theta is the current estimate of the theta parameter for this pop.
1083 //   t_i_start is the timepoint at the beginning of time interval i
1084 //   t_i_end is the timepoint at the end of time interval i
1085 //
1086 // This non-public method is used to handle the case in which fabs(g)
1087 // is tiny--that is, less than GROWTH_EPSILON.  It is derived from the
1088 // formula used in DWait by expanding exp(g*t) in a Taylor series
1089 // about g == 0, multiplying this by the term -k(k-1)/(g*theta*theta),
1090 // and dropping all terms of order g^2 and above.
1091 // Brief tests with Mathematica suggest that the original formula
1092 // converges very nicely to this linear formula for normal values
1093 // of the timepoints, the difference being less than one part in one
1094 // billion, dropping down to zero difference for g == 0 (using
1095 // L'Hopital's rule).
1096 //
1097 // Please also see the comments for DWait.
1098 
DlnWaitForTinyGrowth(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1099 double CoalesceGrowPL::DlnWaitForTinyGrowth(const vector < double >&param, const TreeSummary * treedata,
1100                                             const long int & whichparam)
1101 {
1102     const double growth = param[whichparam + m_growthstart];
1103     assert(fabs(growth) < GROWTH_EPSILON); // otherwise use DWait()
1104     const double theta = param[whichparam];
1105     double k, te;
1106     long int which = whichparam - m_start;
1107     const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1108     list<Interval>::const_iterator tit = treesum.begin();
1109 
1110     double result = 0.0;
1111     double ts = 0.0;
1112 
1113     for( ; tit != treesum.end(); ++tit)
1114     {
1115         k  = (*tit).xpartlines[which];
1116         te = (*tit).endtime;
1117 
1118         result += -(k * (k - 1.0) / (theta * theta)) *
1119             ((ts - te) + 0.5 * growth * (ts * ts - te * te));
1120         ts = te;
1121     }
1122 
1123     return result;
1124 }
1125 
1126 //------------------------------------------------------------------------------------
1127 // Mary 2007/08/13 -- added support for disease w/ recombination
1128 //
1129 // JDEBUG--:  only in absence of migration
1130 
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long & whichparam)1131 double CoalesceGrowPL::DlnPoint (const vector < double >&param, const TreeSummary * treedata, const long &whichparam)
1132 {
1133     long which = whichparam - m_start;
1134     const vector<double>& ncoal = treedata->GetCoalSummary()->GetShortPoint();
1135 
1136     double answ(-ncoal[which] / param[whichparam]);
1137 
1138     const LongVec2d& picks = treedata->GetCoalSummary()->GetShortPicks();
1139     if (picks.empty()) return answ;
1140     else  {
1141         DoubleVec1d timesizes(m_nPop, 0.0);
1142         ForceParameters fp(unknown_region);
1143         DoubleVec1d myths(param.begin()+m_start,param.begin()+m_end);
1144         fp.SetRegionalThetas(myths);
1145         DoubleVec1d mygs(param.begin()+m_growthstart,
1146                          param.begin()+m_growthstart+m_nPop);
1147         fp.SetGrowthRates(mygs);
1148         // now for every recombination event....
1149         const Interval* reclist  = treedata->GetRecSummary()->GetLongPoint();
1150         const Interval* tit;
1151         for (tit = reclist; tit != NULL; tit = tit->next)
1152         {
1153             // compute partner-picks denominator
1154             timesizes = m_timesize->XpartThetasAtT(tit->endtime,fp);
1155             assert(timesizes.size() == static_cast<DoubleVec1d::size_type>(m_nPop));
1156             double denominator(std::accumulate(timesizes.begin(), timesizes.end(),0.0));
1157 
1158             // compute partner-picks numerator
1159             // ALL assuming no migration and only one local partition force
1160             // therefore xpart == part
1161             answ -= 1.0/denominator;
1162             if (tit->partnerpicks[0] == which)
1163             {
1164                 answ += 1.0/timesizes[which];
1165             }
1166         }
1167     }
1168 
1169     return answ;
1170 } // CoalesceGrowPL::DlnPoint
1171 
1172 //------------------------------------------------------------------------------------
1173 // GrowPL: exponential growth forces specific waiting time (=0) and
1174 //              point probabilites and its derivatives
1175 // LONG DATA SUMMARIES
1176 
lnWait(const vector<double> & param,const TreeSummary * treedata)1177 double GrowPL::lnWait (const vector < double >&param, const TreeSummary * treedata)
1178 {
1179     // Growth is a modifier of theta, hence its contribution
1180     // is included in CoalesceGrowPL::Wait.
1181     return 0.0;
1182 }
1183 
1184 //------------------------------------------------------------------------------------
1185 
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)1186 double GrowPL::lnPoint (const vector < double >&param, const vector < double >&lparam, const TreeSummary * treedata)
1187 {
1188     // Growth is a modifier of theta, hence its contribution
1189     // is included in CoalesceGrowPL::Point.
1190     return 0.0;
1191 }
1192 
1193 //------------------------------------------------------------------------------------
1194 // erynes 2004/04/09 -- This method currently returns the following:
1195 //
1196 // sum_over_coalescent_time_intervals_of_population_whichparam(
1197 //   -(k_i(k_i - 1)/(g*g*theta))*(
1198 //             (1 - g*t_i_start)*exp(g*t_i_start) -
1199 //             (1 - g*t_i_end)  *exp(g*t_i_end)
1200 //                               )
1201 //                                                            )
1202 // where
1203 //   k_i is the number of lineages of this population in time interval i
1204 //   g   is the current estimate of the growth parameter for this pop.
1205 //   theta is the current estimate of the theta parameter for this pop.
1206 //   t_i_start is the timepoint at the beginning of time interval i
1207 //   t_i_end is the timepoint at the end of time interval i
1208 
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1209 double GrowPL::DlnWait (const vector < double >&param, const TreeSummary * treedata, const long int & whichparam)
1210 {
1211     const double growth = param[whichparam];
1212     if (fabs(growth) < GROWTH_EPSILON)
1213         return DlnWaitForTinyGrowth(param, treedata, whichparam);
1214 
1215     const long int whicht = m_thetastart + whichparam - m_start;
1216     const double theta = param[whicht];
1217     assert(theta); // we divide by it below
1218     double gte, gts, arg1_s, arg1_e, coeff_s, coeff_e;
1219     long int k;
1220 
1221     const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1222     list <Interval> :: const_iterator tit = treesum.begin();
1223 
1224     double result = 0.0;
1225     double starttime = 0.0;
1226 
1227     for( ; tit != treesum.end(); ++tit)
1228     {
1229         gts = growth * starttime;
1230         gte = growth * (*tit).endtime;
1231 
1232         k = (*tit).xpartlines[whicht];
1233 
1234         arg1_s = 1.0 - gts;
1235         arg1_e = 1.0 - gte;
1236         coeff_s = 1.0;
1237         coeff_e = 1.0;
1238 
1239         // IMPORTANT NOTE:  The following over/underflow checking
1240         // assumes that each starttime and endtime is always >= 0.
1241         // Hence, e.g., gts < 0 means growth < 0.
1242 
1243         if (gts > 1.0)
1244         {
1245             coeff_s = -k * (k - 1.0);
1246             if (growth > 1.0)
1247                 arg1_s /= growth*growth;
1248             else
1249                 coeff_s /= growth*growth;
1250             if (theta > 1.0)
1251                 arg1_s /= theta;
1252             else
1253                 coeff_s /= theta;
1254         }
1255         else if (gts < -1.0)
1256         {
1257             arg1_s *= -k*(k-1.0);
1258             if (growth < -1.0)
1259                 coeff_s /= growth*growth;
1260             else
1261                 arg1_s /= growth*growth;
1262             if (theta > 1.0)
1263                 coeff_s /= theta;
1264             else
1265                 arg1_s /= theta;
1266         }
1267         else // no over/underflow in exp()
1268             coeff_s = -k*(k-1.0)/(theta*growth*growth); // arg1_s already set
1269 
1270         // BUGBUG erynes -- this is easy to follow,
1271         // but we should revisit it and possibly reuse
1272         // the growth and theta checks that we made for the gts case.
1273 
1274         if (gte > 1.0)
1275         {
1276             coeff_e = -k*(k-1.0);
1277             if (growth > 1.0)
1278                 arg1_e /= growth*growth;
1279             else
1280                 coeff_e /= growth*growth;
1281             if (theta > 1.0)
1282                 arg1_e /= theta;
1283             else
1284                 coeff_e /= theta;
1285         }
1286         else if (gte < -1.0)
1287         {
1288             arg1_e *= -k*(k-1.0);
1289             if (growth < -1.0)
1290                 coeff_e /= growth*growth;
1291             else
1292                 arg1_e /= growth*growth;
1293             if (theta > 1.0)
1294                 coeff_e /= theta;
1295             else
1296                 arg1_e /= theta;
1297         }
1298         else // no over/underflow in exp()
1299             coeff_e = -k*(k-1.0)/(theta*growth*growth); // arg1_e already set
1300 
1301         starttime = (*tit).endtime;
1302         result +=  coeff_s * SafeProductWithExp(arg1_s, gts)
1303             - coeff_e * SafeProductWithExp(arg1_e, gte);
1304     }
1305 
1306     return result;
1307 }
1308 
1309 //------------------------------------------------------------------------------------
1310 // erynes 2004/05/20 -- This method currently returns the following:
1311 //
1312 // sum_over_coalescent_time_intervals_of_population_whichparam(
1313 //    (k_i(k_i - 1)/(2*theta))*(
1314 //                  tis^2 * (1 + g*tis)  -  tie^2 * (1 + g*tie)
1315 //                             )
1316 //                                                            )
1317 // where
1318 //   k_i is the number of lineages of this population in time interval i
1319 //   g   is the current estimate of the growth parameter for this pop.
1320 //   theta is the current estimate of the theta parameter for this pop.
1321 //   tis is the timepoint at the beginning of time interval i
1322 //   tie is the timepoint at the end of time interval i
1323 //
1324 // This non-public method is used to handle the case in which fabs(g)
1325 // is tiny--that is, less than GROWTH_EPSILON.  It is derived from the
1326 // formula used in DWait by expanding exp(g*t) in a Taylor series
1327 // about g == 0, multiplying through in the original formula,
1328 // and dropping all terms of order g^2 and above.
1329 // Brief tests with Mathematica suggest that the original formula
1330 // converges very nicely to this linear formula for normal values
1331 // of the timepoints, the difference being less than one part in one
1332 // billion, dropping down to zero difference for g == 0 (using
1333 // L'Hopital's rule).
1334 //
1335 // Please also see the comments for DWait.
1336 
DlnWaitForTinyGrowth(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1337 double GrowPL::DlnWaitForTinyGrowth(const vector < double >&param, const TreeSummary * treedata,
1338                                     const long int & whichparam)
1339 {
1340     const double growth = param[whichparam];
1341     assert(fabs(growth) < GROWTH_EPSILON); // otherwise use DWait
1342 
1343     const long int whicht = m_thetastart + whichparam - m_start;
1344     const double theta = param[whicht];
1345     assert(theta); // we divide by it below
1346     double k, te;
1347     const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1348     list <Interval> :: const_iterator tit = treesum.begin();
1349 
1350     double result = 0.0;
1351     double ts = 0.0;
1352 
1353     for( ; tit != treesum.end(); ++tit)
1354     {
1355         te = (*tit).endtime;
1356         k = (*tit).xpartlines[whicht];
1357 
1358         result += (k * (k - 1.0) / (2.0 * theta)) *
1359             (ts * ts * (1.0 + growth * ts) - te * te * (1.0 + growth * te));
1360         ts = te;
1361     }
1362 
1363     return result;
1364 }
1365 
1366 //------------------------------------------------------------------------------------
1367 // This method presently returns:
1368 //
1369 // sum_over_time_intervals_of_this_population's_coalescences(endtime)
1370 //
1371 // This is the derivative of log(Point) with respect to growth.
1372 
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1373 double GrowPL::DlnPoint (const vector < double >&param, const TreeSummary * treedata, const long int & whichparam)
1374 {
1375     const Interval * treesum = treedata->GetCoalSummary()->GetLongPoint();
1376     const Interval * tit;
1377     double result = 0.0 ;
1378     // indicator to Theta, we need this to get the correct
1379     // time intervals in treesum
1380     long int whicht = m_thetastart + whichparam - m_start;
1381 
1382     for(tit = treesum; tit != NULL; tit = tit->next)
1383     {
1384         if(whicht == (*tit).oldstatus)
1385             result += (*tit).endtime;
1386     }
1387 
1388     return result;
1389 }
1390 
1391 //------------------------------------------------------------------------------------
1392 //------------------------------------------------------------------------------------
1393 
lnWait(const vector<double> & param,const TreeSummary * treedata)1394 double DivMigPL::lnWait(const vector<double>& param, const TreeSummary* treedata)
1395 {
1396     const vector<double>& mWait = treedata->GetDivMigSummary()->GetShortWait();
1397 
1398     // sum_pop(SM_pop *kt)
1399     return  -1.0 * inner_product (mWait.begin (),
1400                                   mWait.end (), param.begin() + m_start, 0.0);
1401 
1402 }
1403 
1404 //------------------------------------------------------------------------------------
1405 
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)1406 double DivMigPL::lnPoint(const vector<double>& param, const vector<double>& lparam, const TreeSummary* treedata)
1407 {
1408     //  Sum_j(Sum_i(migevent[j,i] * log(M[j,i]))
1409     const vector<double>& nmig = treedata->GetDivMigSummary()->GetShortPoint();
1410 
1411     return  inner_product (nmig.begin(), nmig.end(), lparam.begin() + m_start,
1412                            0.0);
1413 }
1414 
1415 //------------------------------------------------------------------------------------
1416 //------------------------------------------------------------------------------------
1417 
1418 // RecombinePL: recombination forces specific waiting time and
1419 //              point probabilites and its derivatives
1420 
lnWait(const vector<double> & param,const TreeSummary * treedata)1421 double RecombinePL::lnWait (const vector < double >&param, const TreeSummary * treedata)
1422 {
1423     const vector<double>& rWait = treedata->GetRecSummary()->GetShortWait();
1424     return  -rWait[0] * *(param.begin () + m_start);
1425 }
1426 
1427 //------------------------------------------------------------------------------------
1428 
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)1429 double RecombinePL::lnPoint (const vector < double >&param, const vector < double >&lparam,
1430                              const TreeSummary * treedata)
1431 {
1432     //  Sum_j(recevents * log(r))
1433     const vector<double>& nrec = treedata->GetRecSummary()->GetShortPoint();
1434     return nrec[0] * *(lparam.begin () + m_start);
1435 }
1436 
1437 //------------------------------------------------------------------------------------
1438 
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1439 double RecombinePL::DlnWait (const vector < double >&param, const TreeSummary * treedata,
1440                              const long int & whichparam)
1441 {
1442     const vector<double>& rWait = treedata->GetRecSummary()->GetShortWait();
1443     return -rWait[0];
1444 }
1445 
1446 //------------------------------------------------------------------------------------
1447 
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1448 double RecombinePL::DlnPoint (const vector < double >&param, const TreeSummary * treedata,
1449                               const long int & whichparam)
1450 {
1451     const vector<double>& nrecs = treedata->GetRecSummary()->GetShortPoint();
1452     if(nrecs[0] == 0)
1453     {
1454         return 0.0;
1455     }
1456     else
1457     {
1458         return SafeDivide(nrecs[0], *(param.begin() + m_start));
1459     }
1460 }
1461 
1462 //------------------------------------------------------------------------------------
1463 // SelectPL: selection forces specific waiting time and
1464 //              point probabilites and its derivatives
1465 
lnWait(const vector<double> & param,const TreeSummary * treedata)1466 double SelectPL::lnWait (const vector < double >&param, const TreeSummary * treedata)
1467 {
1468     return 0.0;
1469 }
1470 
1471 //------------------------------------------------------------------------------------
1472 
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)1473 double SelectPL::lnPoint (const vector < double >&param, const vector < double >&lparam, const TreeSummary * treedata)
1474 {
1475     return 0.0;
1476 }
1477 
1478 //------------------------------------------------------------------------------------
1479 
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1480 double SelectPL::DlnWait (const vector < double >&param, const TreeSummary * treedata, const long int & whichparam)
1481 {
1482     return 0.0;
1483 }
1484 
1485 //------------------------------------------------------------------------------------
1486 
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1487 double SelectPL::DlnPoint (const vector < double >&param, const TreeSummary * treedata, const long int & whichparam)
1488 {
1489     return 0.0;
1490 }
1491 
1492 //------------------------------------------------------------------------------------
1493 // This function computes the "waiting time" contribution to the log-likelihood.
1494 // It computes and returns
1495 //
1496 // sum_over_all_timeIntervals_i( -kA_i*(kA_i - 1)*dtauA_i/thetaA0 -ka_i*(ka_i - 1)*dtaua_i/thetaa0 ),
1497 //
1498 // where "A" is the favored allele, "a" is the disfavored allele, "k" is the number of lineages
1499 // in population "A" or "a," and dtau is the length of the time interval in the "Kingman reference frame"
1500 // (i.e., time running as usual and pop. size remaining constant).  The transformations from
1501 // "magic time" dtau to real time "dt," where dt = te - ts = endtime - starttime, are:
1502 //
1503 // dtau_A = (theta_A0*dt + (theta_a0/s)*(exp(s*te) - exp(s*ts)))/(theta_A0 + theta_a0),
1504 // dtau_a = (theta_a0*dt - (theta_A0/s)*(exp(-s*te) - exp(-s*ts)))/(theta_A0 + theta_a0).
1505 
lnWait(const vector<double> & param,const TreeSummary * treedata)1506 double CoalesceLogisticSelectionPL::lnWait(const vector<double>& param, const TreeSummary *treedata)
1507 {
1508     if (2 != m_nPop)
1509     {
1510         string msg = "CoalesceLogisticSelectionPL::lnWait() called with m_nPop = ";
1511         msg += ToString(m_nPop);
1512         msg += ".  m_nPop must equal 2, reflecting one population with the major ";
1513         msg += "allele and one population with the minor allele.";
1514         throw implementation_error(msg);
1515     }
1516     double mu_into_A_from_a(param[3]), mu_into_a_from_A(param[4]);
1517 
1518     if (0 != param[2] || 0 != param[5] || 0 == param[3] || 0 == param[4])
1519         throw implementation_error("Error parsing disease rates in CoalesceLogisticSelectionPL::lnWait()");
1520 
1521     const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1522     list<Interval>::const_iterator treesum_it;
1523 
1524     double result(0.0), s(param[m_s_is_here]);
1525     double theta_A0(param[m_start]), theta_a0(param[m_start+1]);
1526     double totalTheta_0(theta_A0 + theta_a0);
1527 
1528     if (theta_a0 <= 0.0 || theta_A0 <= 0.0)
1529     {
1530         string msg = "CoalesceLogisticSelectionPL::lnWait(), received an invalid Theta value ";
1531         msg += "(theta_A0 = " + ToString(theta_A0) + ", theta_a0 = " + ToString(theta_a0) + ").";
1532         throw impossible_error(msg);
1533     }
1534 
1535     if (fabs(s) < LOGISTIC_SELECTION_COEFFICIENT_EPSILON)
1536         return lnWaitForTinyLogisticSelectionCoefficient(param, treedata);
1537 
1538     if (fabs(s) >= DBL_BIG)
1539         return -DBL_BIG; // unavoidable overflow
1540 
1541     double factor_A(theta_a0/(s*theta_A0)), factor_a(theta_A0/(-s*theta_a0));
1542     double t_e, t_s(0.0), dt, e_toThe_sts(1.0), e_toThe_ste, term;
1543     double term_A(0.0), term_a(0.0);
1544 
1545     for (treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
1546     {
1547         t_e = treesum_it->endtime;
1548         dt = t_e - t_s;
1549         term_A = term_a = 0.0;
1550         const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
1551         const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
1552 
1553         if (s > 0.0)
1554         {
1555             // Use overflow protection for term_A; if term_a underflows, that's okay.
1556 
1557             if (s*dt > LOG_ONEPLUSEPSILON)
1558             {
1559                 term = SafeProductWithExp(factor_A, s*dt); // coefficient guaranteed finite and positive
1560                 if (term >= DBL_BIG)
1561                     return -DBL_BIG; // unavoidable overflow
1562                 e_toThe_ste = (term/factor_A)*e_toThe_sts;
1563                 if (k_A > 0.0)
1564                 {
1565                     term_A = k_A*(term - factor_A)*e_toThe_sts*((k_A - 1.0)/totalTheta_0 + mu_into_A_from_a)
1566                         + k_A*(k_A - 1.0)*dt/totalTheta_0;
1567                     // Note:  A factor of theta_A0 cancels in the "dt" term.
1568                     if (term_A >= DBL_BIG)
1569                         return -DBL_BIG; // unavoidable overflow
1570                 }
1571             }
1572             else
1573             {
1574                 if (s*dt >= numeric_limits<double>::epsilon())
1575                     e_toThe_ste = (1.0 + s*dt)*e_toThe_sts;
1576                 else
1577                     e_toThe_ste = SafeProductWithExp(1.0,s*t_e);
1578                 if (k_A > 0.0)
1579                 {
1580                     term_A = k_A*factor_A*s*dt*e_toThe_sts*((k_A - 1.0)/totalTheta_0 + mu_into_A_from_a)
1581                         + k_A*(k_A - 1.0)*dt/totalTheta_0;
1582                     // Note:  A factor of theta_A0 cancels in the "dt" term.
1583                     if (term_A >= DBL_BIG)
1584                         return -DBL_BIG; // unavoidable overflow
1585                 }
1586             }
1587             if (k_a > 0.0)
1588                 term_a = k_a * factor_a
1589                     * (1.0/e_toThe_ste - 1.0/e_toThe_sts)
1590                     * ((k_a - 1.0)/totalTheta_0 + mu_into_a_from_A)
1591                     + k_a*(k_a - 1.0)*dt/totalTheta_0;
1592             // Note:  A factor of theta_a0 cancels in the "dt" term.
1593         }
1594         else // s < 0
1595         {
1596             // Use overflow protection for term_a; if term_A underflows, that's okay.
1597 
1598             if (-s*dt > LOG_ONEPLUSEPSILON)
1599             {
1600                 term = SafeProductWithExp(factor_a,-s*dt); // coefficient guaranteed finite and positive
1601                 if (term >= DBL_BIG)
1602                     return -DBL_BIG; // unavoidable overflow
1603                 e_toThe_ste = e_toThe_sts/(term/factor_a);
1604                 if (k_a > 0.0)
1605                 {
1606                     term_a = k_a*((term - factor_a)/e_toThe_sts)*((k_a - 1.0)/totalTheta_0 + mu_into_a_from_A)
1607                         + k_a*(k_a - 1.0)*dt/totalTheta_0;
1608                     // Note:  A factor of theta_a0 cancels in the "dt" term.
1609                     if (term_a >= DBL_BIG)
1610                         return -DBL_BIG; // unavoidable overflow
1611                 }
1612             }
1613             else
1614             {
1615                 if (-s*dt >= numeric_limits<double>::epsilon())
1616                     e_toThe_ste = (1.0 + s*dt)*e_toThe_sts;
1617                 else
1618                     e_toThe_ste = SafeProductWithExp(1.0,s*t_e);
1619                 if (k_a > 0.0)
1620                 {
1621                     term_a = k_a*(factor_a*(-s)*dt/e_toThe_sts)*((k_a - 1.0)/totalTheta_0 + mu_into_a_from_A)
1622                         + k_a*(k_a - 1.0)*dt/totalTheta_0;
1623                     // Note:  A factor of theta_a0 cancels in the "dt" term.
1624                     if (term_a >= DBL_BIG)
1625                         return -DBL_BIG; // unavoidable overflow
1626                 }
1627             }
1628             if (k_A > 0.0)
1629                 term_A = k_A*factor_A*(e_toThe_ste - e_toThe_sts)*((k_A - 1.0)/totalTheta_0 + mu_into_A_from_a)
1630                     + k_A*(k_A - 1.0)*dt/totalTheta_0;
1631             // Note:  A factor of theta_a0 cancels in the "dt" term.
1632         }
1633 
1634         result -= term_A + term_a;
1635         if (result <= -DBL_BIG)
1636             return -DBL_BIG;
1637 
1638         t_s = t_e;
1639         e_toThe_sts = e_toThe_ste;
1640     }
1641 
1642     return result;
1643 }
1644 
1645 //------------------------------------------------------------------------------------
1646 // See comment for lnWait(), above.
1647 // This function expands lnWait() in a Taylor series about s = 0,
1648 // so that we can smoothly handle the factor of 1/s in lnWait().
1649 // This version retains only the first two terms in
1650 // (exp(s*te) - exp(s*ts))/s = (te-ts) + (s/2!)(te^2 - ts^2) + (s^2/3!)(te^3 - ts^3) + ....
1651 
lnWaitForTinyLogisticSelectionCoefficient(const vector<double> & param,const TreeSummary * treedata)1652 double CoalesceLogisticSelectionPL::lnWaitForTinyLogisticSelectionCoefficient(const vector<double>& param,
1653                                                                               const TreeSummary* treedata)
1654 {
1655     double mu_into_A_from_a(param[3]), mu_into_a_from_A(param[4]);
1656 
1657     if (0 != param[2] || 0 != param[5] || 0 == param[3] || 0 == param[4])
1658         throw implementation_error("Error parsing disease rates in "
1659                                    "CoalesceLogisticSelectionPL::lnWaitForTinyLogisticSelectionCoefficient()");
1660 
1661     const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1662     list<Interval>::const_iterator treesum_it;
1663 
1664     double result(0.0), t_e, t_s(0.0), s(param[m_s_is_here]), dt, term;
1665     double theta_A0(param[m_start]), theta_a0(param[m_start+1]), totalTheta_0(theta_A0+theta_a0);
1666 
1667     for (treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
1668     {
1669         t_e = treesum_it->endtime;
1670         const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
1671         const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
1672         dt = t_e - t_s;
1673 
1674         term = 0.5*s*(t_e*t_e - t_s*t_s);
1675 
1676         result += -(k_A*dt/theta_A0)*((k_A - 1.0) + theta_a0*mu_into_A_from_a)
1677             - (k_A*theta_a0*term/theta_A0)*((k_A - 1.0)/totalTheta_0 + mu_into_A_from_a);
1678 
1679         result += -(k_a*dt/theta_a0)*((k_a - 1.0) + theta_A0*mu_into_a_from_A)
1680             + (k_a*theta_A0*term/theta_a0)*((k_a - 1.0)/totalTheta_0 + mu_into_a_from_A);
1681 
1682         t_s = t_e;
1683     }
1684 
1685     return result;
1686 }
1687 
1688 //------------------------------------------------------------------------------------
1689 // This function computes the "point" contribution to the log-likelihood.
1690 // It computes and returns
1691 //
1692 // sum_over_all_coalescent_times_in_A( ln(2/theta_A(te_A)) )
1693 //  + sum_over_all_coalescent_times_in_a( ln(2/theta_a(te_a)) ),
1694 //
1695 // where te_A is the time (te = "endtime") at the bottom of a time interval
1696 // which ends with a coalescence in the population with allele "A," and
1697 // similarly for the pop. with allele "a."
1698 // theta_A(t) + theta_a(t) = theta_A(0) + theta_a(0) for all times t;
1699 // that is, the overall population size is assumed constant.  We have
1700 //
1701 // theta_A(t) = ((theta_A0+theta_a0)/(theta_A0+theta_a0*exp(s*t)))*theta_A0,
1702 // theta_a(t) = ((theta_A0+theta_a0)/(theta_A0+theta_a0*exp(s*t)))*theta_a0*exp(s*t).
1703 
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)1704 double CoalesceLogisticSelectionPL::lnPoint(const vector<double>& param, const vector<double>& lparam,
1705                                             const TreeSummary* treedata)
1706 {
1707     if (2 != m_nPop)
1708     {
1709         string msg = "CoalesceLogisticSelectionPL::lnPoint() called with m_nPop = ";
1710         msg += ToString(m_nPop);
1711         msg += ".  m_nPop must equal 2, reflecting one population with the major ";
1712         msg += "allele and one population with the minor allele.";
1713         throw implementation_error(msg);
1714     }
1715 
1716     const Interval *treesum = treedata->GetCoalSummary()->GetLongPoint();
1717     const Interval *pTreesum;
1718     double theta_A0(param[0]), theta_a0(param[1]), logTheta_A0(lparam[0]),
1719         logTheta_a0(lparam[1]), logThetaTotal_0, s(param[m_s_is_here]);
1720 
1721     if (theta_A0 > 0.0 && theta_a0 > 0.0)
1722         logThetaTotal_0 = log(theta_A0 + theta_a0);
1723     else
1724     {
1725         // This really, really should never ever happen.
1726         string msg = "CoalesceLogisticSelectionPL::lnPoint(), received an invalid value ";
1727         msg += "(theta_A0 = ";
1728         msg += ToString(theta_A0);
1729         msg += " and theta_a0 = " + ToString(theta_a0) + "); s = ";
1730         msg += ToString(s) + ".";
1731         throw impossible_error(msg);
1732     }
1733 
1734     double result = 0.0;
1735 
1736     for(pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
1737     {
1738         double thetaDenominator = theta_A0 +
1739             SafeProductWithExp(theta_a0, s * pTreesum->endtime); // sum is guaranteed to be > 0
1740 
1741         if (thetaDenominator < DBL_BIG)
1742             result += LOG2 - logThetaTotal_0 + log(thetaDenominator);
1743         else
1744             result += LOG2 - logThetaTotal_0 + logTheta_a0 + s*pTreesum->endtime;
1745         if (0L == pTreesum->oldstatus)
1746             result -= logTheta_A0;
1747         else if (1L == pTreesum->oldstatus)
1748             result -= logTheta_a0 + (pTreesum->endtime * s);
1749         else
1750         {
1751             string msg = "CoalesceLogisticSelectionPL::lnPoint(), received a TreeSummary ";
1752             msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
1753             msg += ".  \"oldstatus\" must be either 0 or 1, reflecting a coalescene in either ";
1754             msg += "the population with allele A or the population with allele a.";
1755             throw implementation_error(msg);
1756         }
1757     }
1758 
1759     return result;
1760 }
1761 
1762 //------------------------------------------------------------------------------------
1763 
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1764 double CoalesceLogisticSelectionPL::DlnWait(const vector<double>& param, const TreeSummary * treedata,
1765                                             const long int & whichparam)
1766 {
1767     if (2 != m_nPop)
1768     {
1769         string msg = "CoalesceLogisticSelectionPL::DlnWait() called with m_nPop = ";
1770         msg += ToString(m_nPop);
1771         msg += ".  m_nPop must equal 2, reflecting one population with the major ";
1772         msg += "allele and one population with the minor allele.";
1773         throw implementation_error(msg);
1774     }
1775 
1776     double s(param[m_s_is_here]);
1777 
1778     if (fabs(s) < LOGISTIC_SELECTION_COEFFICIENT_EPSILON)
1779         return DlnWaitForTinyLogisticSelectionCoefficient(param, treedata, whichparam);
1780 
1781     bool derivativeWithRespectTo_A;
1782 
1783     if (0 == whichparam)
1784         derivativeWithRespectTo_A = true;
1785     else if (1 == whichparam)
1786         derivativeWithRespectTo_A = false;
1787     else
1788     {
1789         string msg = "CoalesceLogisticSelectionPL::DlnWait() called with whichparam = ";
1790         msg += ToString(whichparam);
1791         msg += ".  \"whichparam\" must equal 0 or 1, reflecting one population with the favored ";
1792         msg += "allele and one population with the disfavored allele.";
1793         throw implementation_error(msg);
1794     }
1795 
1796     if (s >= DBL_BIG)
1797         return derivativeWithRespectTo_A ? DBL_BIG : -DBL_BIG;
1798     if (s <= -DBL_BIG)
1799         return derivativeWithRespectTo_A ? -DBL_BIG : DBL_BIG;
1800 
1801     if (0 != param[2] || 0 != param[5] || 0 == param[0] || 0 == param[1] ||
1802         0 == param[3] || 0 == param[4])
1803         throw implementation_error("Bad param received by CoalesceLogisticSelectionPL::DlnWait().");
1804 
1805     const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1806     list<Interval>::const_iterator treesum_it;
1807     double theta_A0(param[0]), theta_a0(param[1]), totalTheta_0(theta_A0+theta_a0);
1808     double term_A(0.0), term_a(0.0), term, result(0.0);
1809     double t_s(0.0),t_e,dt,e_toThe_sts(1.0),e_toThe_ste;
1810     double factor = 1.0/(totalTheta_0*totalTheta_0);
1811     double thetaFactor_A = theta_a0/(theta_A0*theta_A0);
1812     double thetaFactor_a = theta_A0/(theta_a0*theta_a0);
1813     double mu_into_A_from_a(param[3]), mu_into_a_from_A(param[4]);
1814 
1815     for (treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
1816     {
1817         t_e = treesum_it->endtime;
1818         dt = t_e - t_s;
1819         term_A = term_a = 0.0;
1820         const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
1821         const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
1822 
1823         if (s > 0.0)
1824         {
1825             // Use overflow protection for term_A; if term_a underflows, that's okay.
1826 
1827             if (s*dt > LOG_ONEPLUSEPSILON)
1828             {
1829                 term = SafeProductWithExp(factor/s,s*dt); // quotient guaranteed finite and positive
1830                 if (term >= DBL_BIG)
1831                     return derivativeWithRespectTo_A ? DBL_BIG : -DBL_BIG; // unavoidable overflow
1832                 e_toThe_ste = (term/(factor/s))*e_toThe_sts;
1833                 if (k_A > 0)
1834                 {
1835                     if (derivativeWithRespectTo_A)
1836                         term_A = (term - factor/s)*thetaFactor_A*e_toThe_sts;
1837                     else
1838                         term_A = (term - factor/s)*e_toThe_sts;
1839                     if (term_A >= DBL_BIG)
1840                         return derivativeWithRespectTo_A ? DBL_BIG : -DBL_BIG; // unavoidable overflow
1841                 }
1842             }
1843             else
1844             {
1845                 if (s*dt >= numeric_limits<double>::epsilon())
1846                     e_toThe_ste = (1.0 + s*dt)*e_toThe_sts;
1847                 else
1848                     e_toThe_ste = SafeProductWithExp(1.0,s*t_e);
1849                 if (k_A > 0)
1850                 {
1851                     if (derivativeWithRespectTo_A)
1852                         term_A = factor*dt*thetaFactor_A*e_toThe_sts;
1853                     else
1854                         term_A = factor*dt*e_toThe_sts;
1855                     if (term_A >= DBL_BIG)
1856                         return derivativeWithRespectTo_A ? DBL_BIG : -DBL_BIG; // unavoidable overflow
1857                 }
1858             }
1859             if (k_a > 0)
1860             {
1861                 if (derivativeWithRespectTo_A)
1862                     term_a = (factor/(-s))*(1.0/e_toThe_ste - 1.0/e_toThe_sts);
1863                 else
1864                     term_a = (factor/(-s))*thetaFactor_a*(1.0/e_toThe_ste - 1.0/e_toThe_sts);
1865             }
1866         }
1867 
1868         else // s < 0
1869         {
1870             // Use overflow protection for dtau_a; if dtau_A underflows, that's okay.
1871 
1872             if (-s*dt > LOG_ONEPLUSEPSILON)
1873             {
1874                 term = SafeProductWithExp(factor/(-s),-s*dt);// quotient guaranteed finite and positive
1875                 if (term >= DBL_BIG)
1876                     return derivativeWithRespectTo_A ? -DBL_BIG : DBL_BIG; // unavoidable overflow
1877                 e_toThe_ste = e_toThe_sts/(term/(factor/(-s)));
1878                 if (k_a > 0)
1879                 {
1880                     if (derivativeWithRespectTo_A)
1881                         term_a = (term - factor/(-s))/e_toThe_sts;
1882                     else
1883                         term_a = (term - factor/(-s))*thetaFactor_a/e_toThe_sts;
1884                     if (term_a >= DBL_BIG)
1885                         return derivativeWithRespectTo_A ? -DBL_BIG : DBL_BIG; // unavoidable overflow
1886                 }
1887             }
1888             else
1889             {
1890                 if (-s*dt >= numeric_limits<double>::epsilon())
1891                     e_toThe_ste = (1.0 + s*dt)*e_toThe_sts;
1892                 else
1893                     e_toThe_ste = SafeProductWithExp(1.0,s*t_e);
1894                 if (k_a > 0)
1895                 {
1896                     if (derivativeWithRespectTo_A)
1897                         term_a = factor*dt/e_toThe_sts;
1898                     else
1899                         term_a = factor*dt*thetaFactor_a/e_toThe_sts;
1900                     if (term_a >= DBL_BIG)
1901                         return derivativeWithRespectTo_A ? -DBL_BIG : DBL_BIG; // unavoidable overflow
1902                 }
1903             }
1904             if (k_A > 0)
1905             {
1906                 if (derivativeWithRespectTo_A)
1907                     term_A = (factor/s)*thetaFactor_A*(e_toThe_ste - e_toThe_sts);
1908                 else
1909                     term_A = (factor/s)*(e_toThe_ste - e_toThe_sts);
1910             }
1911         }
1912 
1913         if (derivativeWithRespectTo_A)
1914             result += k_A*term_A*((k_A-1.0)*(theta_A0+totalTheta_0) + mu_into_A_from_a/factor)
1915                 - k_a*term_a*((k_a-1.0) + mu_into_a_from_A/(factor*theta_a0))
1916                 + dt*factor*(k_A*(k_A-1.0) + k_a*(k_a-1.0));
1917         else
1918             result += -k_A*term_A*((k_A-1.0) + mu_into_A_from_a/(factor*theta_A0))
1919                 + k_a*term_a*((k_a-1.0)*(theta_a0+totalTheta_0) + mu_into_a_from_A/factor)
1920                 + dt*factor*(k_A*(k_A-1.0) + k_a*(k_a-1.0));
1921 
1922         if (result >= DBL_BIG)
1923             return DBL_BIG;
1924         if (result <= -DBL_BIG)
1925             return -DBL_BIG;
1926 
1927         t_s = t_e;
1928         e_toThe_sts = e_toThe_ste;
1929     }
1930 
1931     return result;
1932 }
1933 
1934 //------------------------------------------------------------------------------------
1935 
DlnWaitForTinyLogisticSelectionCoefficient(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1936 double CoalesceLogisticSelectionPL::DlnWaitForTinyLogisticSelectionCoefficient(const vector<double>& param,
1937                                                                                const TreeSummary *treedata,
1938                                                                                const long int & whichparam)
1939 {
1940     bool derivativeWithRespectTo_A;
1941 
1942     if (0 == whichparam)
1943         derivativeWithRespectTo_A = true;
1944     else if (1 == whichparam)
1945         derivativeWithRespectTo_A = false;
1946     else
1947     {
1948         string msg = "CoalesceLogisticSelectionPL::DlnWaitForTinyLogisticSelectionCoefficient() ";
1949         msg += "called with whichparam = ";
1950         msg += ToString(whichparam);
1951         msg += ".  \"whichparam\" must equal 0 or 1, reflecting one subpopulation with the favored ";
1952         msg += "allele and one subpopulation with the disfavored allele.";
1953         throw implementation_error(msg);
1954     }
1955 
1956     if (0 != param[2] || 0 != param[5] || 0 == param[0] || 0 == param[1] ||
1957         0 == param[3] || 0 == param[4])
1958         throw implementation_error
1959             ("Bad param received by CoalesceLogisticSelectionPL::DlnWaitForTinyLogisticSelectionCoefficient().");
1960 
1961     const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
1962     list<Interval>::const_iterator treesum_it;
1963     double theta_A0(param[0]), theta_a0(param[1]), totalTheta_0(theta_A0+theta_a0);
1964     double t_e, t_s(0.0), resultTimesDenominator(0.0), s(param[m_s_is_here]);
1965     double mu_into_A_from_a(param[3]), mu_into_a_from_A(param[4]);
1966     double denominator(totalTheta_0*totalTheta_0), term_A, term_a;
1967     double factor_A(theta_a0/(theta_A0*theta_A0)), factor_a(theta_A0/(theta_a0*theta_a0));
1968 
1969     for(treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
1970     {
1971         t_e = treesum_it->endtime;
1972         double dt = t_e - t_s;
1973         const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
1974         const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
1975 
1976         if (derivativeWithRespectTo_A)
1977         {
1978             term_A = k_A*factor_A*(dt + 0.5*s*(t_e*t_e - t_s*t_s))*((k_A-1.0)*(theta_A0+totalTheta_0)
1979                                                                     + mu_into_A_from_a*denominator);
1980             term_a = -k_a*(dt - 0.5*s*(t_e*t_e - t_s*t_s))*((k_a-1.0) + mu_into_a_from_A*denominator);
1981         }
1982         else
1983         {
1984             term_A = -k_A*(dt + 0.5*s*(t_e*t_e - t_s*t_s))*((k_A-1.0) + mu_into_A_from_a*denominator);
1985             term_a = k_a*factor_a*(dt = 0.5*s*(t_e*t_e - t_s*t_s))*((k_a-1.0)*(theta_a0+totalTheta_0)
1986                                                                     + mu_into_a_from_A*denominator);
1987         }
1988 
1989         resultTimesDenominator += term_A + term_a + dt*(k_A*(k_A-1.0) + k_a*(k_a-1.0));
1990 
1991         t_s = t_e;
1992     }
1993 
1994     return resultTimesDenominator/denominator;
1995 }
1996 
1997 //------------------------------------------------------------------------------------
1998 
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)1999 double CoalesceLogisticSelectionPL::DlnPoint(const vector<double>& param, const TreeSummary *treedata,
2000                                              const long int & whichparam)
2001 {
2002     if (2 != m_nPop)
2003     {
2004         string msg = "CoalesceLogisticSelectionPL::DlnPoint() called with m_nPop = ";
2005         msg += ToString(m_nPop);
2006         msg += ".  m_nPop must equal 2, reflecting one population with the major ";
2007         msg += "allele and one population with the minor allele.";
2008         throw implementation_error(msg);
2009     }
2010 
2011     bool derivativeWithRespectTo_A;
2012 
2013     if (0 == whichparam)
2014         derivativeWithRespectTo_A = true;
2015     else if (1 == whichparam)
2016         derivativeWithRespectTo_A = false;
2017     else
2018     {
2019         string msg = "CoalesceLogisticSelectionPL::DlnPoint() called with whichparam = ";
2020         msg += ToString(whichparam);
2021         msg += ".  \"whichparam\" must equal 0 or 1, reflecting one population with the major ";
2022         msg += "allele and one population with the minor allele.";
2023         throw implementation_error(msg);
2024     }
2025 
2026     const Interval *treesum = treedata->GetCoalSummary()->GetLongPoint();
2027     const Interval *pTreesum;
2028     double theta_A0(param[0]), theta_a0(param[1]), s(param[m_s_is_here]),
2029         totalTheta_0(theta_A0+theta_a0), theta_A0squared(theta_A0*theta_A0),
2030         theta_a0squared(theta_a0*theta_a0), result(0.0), x;
2031 
2032     if (theta_a0 <= 0.0 || theta_A0 <= 0.0)
2033     {
2034         string msg = "CoalesceLogisticSelectionPL::DlnPoint(), received an invalid Theta value ";
2035         msg += "(theta_A0 = " + ToString(theta_A0) + ", theta_a0 = " + ToString(theta_a0) + ").";
2036         throw impossible_error(msg);
2037     }
2038 
2039     // First, add up the contributions from the coalescent events.
2040 
2041     if (s >= 0.0)
2042     {
2043         for (pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
2044         {
2045             double t_e = pTreesum->endtime;
2046             if (derivativeWithRespectTo_A)
2047             {
2048                 if (0L == pTreesum->oldstatus) // coal. in allele A
2049                 {
2050                     x = SafeProductWithExp(theta_a0*(theta_A0+totalTheta_0),s*t_e);
2051                     if (x < DBL_BIG)
2052                         result -= (theta_A0squared + x) /
2053                             ((theta_A0*totalTheta_0)*(theta_A0 + x/(theta_A0+totalTheta_0)));
2054                     else
2055                         result -= (theta_A0 + totalTheta_0)/(theta_A0*totalTheta_0);
2056                 }
2057                 else if (1L == pTreesum->oldstatus) // coal. in allele a
2058                 {
2059                     x = SafeProductWithExp(theta_a0,s*t_e);
2060                     if (x < DBL_BIG)
2061                         result -= (x - theta_a0)/(totalTheta_0*(theta_A0 + x));
2062                     else
2063                         result -= 1.0/totalTheta_0;
2064                 }
2065                 else
2066                 {
2067                     string msg = "CoalesceLogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2068                     msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2069                     msg += ".  \"oldstatus\" must be either 0 or 1, reflecting a coalescene in either ";
2070                     msg += "the population with allele A or the population with allele a.";
2071                     throw implementation_error(msg);
2072                 }
2073             }
2074             else // derivative with repect to theta_a0
2075             {
2076                 if (0L == pTreesum->oldstatus) // coal. in allele A
2077                 {
2078                     x = SafeProductWithExp(theta_A0,s*t_e);
2079                     if (x < DBL_BIG)
2080                         result += (x - theta_A0) /
2081                             (totalTheta_0*(theta_A0 + x*theta_a0/theta_A0));
2082                     else
2083                         result += theta_A0/(theta_a0*totalTheta_0);
2084                 }
2085                 else if (1L == pTreesum->oldstatus) // coal. in allele a
2086                 {
2087                     x = SafeProductWithExp(theta_a0squared,s*t_e);
2088                     if (x < DBL_BIG)
2089                         result -= (theta_A0*(theta_a0+totalTheta_0) + x) /
2090                             (totalTheta_0*(theta_A0*theta_a0 + x));
2091                     else
2092                         result -= 1.0/totalTheta_0;
2093                 }
2094                 else
2095                 {
2096                     string msg = "CoalesceLogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2097                     msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2098                     msg += ".  \"oldstatus\" must be either 0 or 1, reflecting a coalescene in either ";
2099                     msg += "the population with allele A or the population with allele a.";
2100                     throw implementation_error(msg);
2101                 }
2102             }
2103         }
2104     }
2105     else // s < 0
2106     {
2107         for (pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
2108         {
2109             double t_e = pTreesum->endtime;
2110             if (derivativeWithRespectTo_A)
2111             {
2112                 if (0L == pTreesum->oldstatus) // coal. in allele A
2113                 {
2114                     x = SafeProductWithExp(theta_A0squared,-s*t_e);
2115                     if (x < DBL_BIG)
2116                         result -= (theta_a0*(theta_A0+totalTheta_0) + x) /
2117                             (totalTheta_0*(theta_A0*theta_a0 + x));
2118                     else
2119                         result -= 1.0/totalTheta_0;
2120                 }
2121                 else if (1L == pTreesum->oldstatus) // coal. in allele a
2122                 {
2123                     x = SafeProductWithExp(theta_a0,-s*t_e);
2124                     if (x < DBL_BIG)
2125                         result += (x - theta_a0) / (totalTheta_0*(theta_a0 + x*theta_A0/theta_a0));
2126                     else
2127                         result += theta_a0/(theta_A0*totalTheta_0);
2128                 }
2129                 else
2130                 {
2131                     string msg = "CoalesceLogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2132                     msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2133                     msg += ".  \"oldstatus\" must be either 0 or 1, reflecting a coalescene in either ";
2134                     msg += "the population with allele A or the population with allele a.";
2135                     throw implementation_error(msg);
2136                 }
2137             }
2138             else // derivative with respect to theta_a0
2139             {
2140                 if (0L == pTreesum->oldstatus) // coal. in allele A
2141                 {
2142                     x = SafeProductWithExp(theta_A0,-s*t_e);
2143                     if (x < DBL_BIG)
2144                         result -= (x - theta_A0) / (totalTheta_0*(theta_a0 + x));
2145                     else
2146                         result -= 1.0/totalTheta_0;
2147                 }
2148                 else if (1L == pTreesum->oldstatus) // coal. in allele a
2149                 {
2150                     x = SafeProductWithExp(theta_A0*(theta_a0+totalTheta_0),-s*t_e);
2151                     if (x < DBL_BIG)
2152                         result -= (x + theta_a0squared) /
2153                             (theta_a0*totalTheta_0*(theta_a0 + x/(theta_a0+totalTheta_0)));
2154                     else
2155                         result -= (theta_a0+totalTheta_0)/(theta_a0*totalTheta_0);
2156                 }
2157                 else
2158                 {
2159                     string msg = "CoalesceLogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2160                     msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2161                     msg += ".  \"oldstatus\" must be either 0 or 1, reflecting a coalescene in either ";
2162                     msg += "the population with allele A or the population with allele a.";
2163                     throw implementation_error(msg);
2164                 }
2165             }
2166         }
2167     }
2168 
2169     // Now, add the contributions from the mutation events a --> A and A --> a.
2170     treesum = treedata->GetDiseaseSummary()->GetLongPoint();
2171 
2172     double one__over__theta_A0(1.0/theta_A0), one__over__theta_a0(1.0/theta_a0);
2173     for(pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
2174     {
2175         if (0L == pTreesum->oldstatus)
2176         {
2177             if (derivativeWithRespectTo_A)
2178                 result -= one__over__theta_A0;
2179             else
2180                 result += one__over__theta_a0;
2181         }
2182         else if (1L == pTreesum->oldstatus)
2183         {
2184             if (derivativeWithRespectTo_A)
2185                 result += one__over__theta_A0;
2186             else
2187                 result -= one__over__theta_a0;
2188         }
2189         else
2190         {
2191             string msg = "DiseaseLogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2192             msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2193             msg += ".  \"oldstatus\" must be either 0 or 1, reflecting a mutation to either ";
2194             msg += "allele A or allele a.";
2195             throw implementation_error(msg);
2196         }
2197     }
2198 
2199     return result;
2200 
2201 }
2202 
2203 //------------------------------------------------------------------------------------
2204 
lnWait(const vector<double> & param,const TreeSummary * treedata)2205 double LogisticSelectionPL::lnWait(const vector<double>& param, const TreeSummary *treedata)
2206 {
2207     return 0.0;
2208 }
2209 
2210 //------------------------------------------------------------------------------------
2211 
lnPoint(const vector<double> & param,const vector<double> & lparam,const TreeSummary * treedata)2212 double LogisticSelectionPL::lnPoint(const vector<double>& param, const vector<double>& lparam,
2213                                     const TreeSummary *treedata)
2214 {
2215     return 0.0;
2216 }
2217 
2218 //------------------------------------------------------------------------------------
2219 
DlnWait(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)2220 double LogisticSelectionPL::DlnWait(const vector<double>& param, const TreeSummary *treedata,
2221                                     const long int & whichparam)
2222 {
2223     if (whichparam != m_s_is_here)
2224     {
2225         string msg = "LogisticSelectionPL::DlnWait() called with whichparam = ";
2226         msg += ToString(whichparam);
2227         msg += ".  \"whichparam\" must equal " + ToString(m_s_is_here);
2228         msg += ", which corresponds to the selection coefficient \"s.\"";
2229         throw implementation_error(msg);
2230     }
2231 
2232     double s(param[m_s_is_here]);
2233 
2234     if (fabs(s) < LOGISTIC_SELECTION_COEFFICIENT_EPSILON)
2235         return DlnWaitForTinyLogisticSelectionCoefficient(param, treedata, whichparam);
2236 
2237     double theta_A0(param[0]), theta_a0(param[1]), mu_into_A_from_a(param[3]),
2238         mu_into_a_from_A(param[4]);
2239 
2240     if (0.0 == theta_A0 || 0.0 == theta_a0)
2241     {
2242         string msg = "LogisticSelectionPL::DlnWait(), theta_A0 = ";
2243         msg += ToString(theta_A0) + ", theta_a0 = " + ToString(theta_a0);
2244         msg += "; attempted to divide by zero.";
2245         throw impossible_error(msg);
2246     }
2247 
2248     const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
2249     list<Interval>::const_iterator treesum_it;
2250     double t_s(0.0), t_e, term_s, term_e, denominator(s*s*(theta_A0+theta_a0)),
2251         dtau_A__ds(0.0), dtau_a__ds(0.0), result(0.0);
2252     double mu_term_A(mu_into_A_from_a*(theta_A0+theta_a0)), mu_term_a(mu_into_a_from_A*(theta_A0+theta_a0));
2253     term_s = s > 0.0 ? -theta_a0/denominator : theta_A0/denominator;
2254 
2255     for(treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
2256     {
2257         t_e = treesum_it->endtime;
2258         const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
2259         const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
2260         dtau_A__ds = dtau_a__ds = 0;
2261 
2262         if (s > 0.0)
2263         {
2264             term_e = SafeProductWithExp(theta_a0*(s*t_e - 1.0)/denominator,s*t_e);
2265             if (term_e >= DBL_BIG)
2266                 return -DBL_BIG;
2267             if (k_A > 0)
2268                 dtau_A__ds = term_e - term_s;
2269             if (k_a > 0)
2270                 dtau_a__ds = ((theta_A0*theta_a0)/(denominator*denominator)) *
2271                     ((s*s*t_e*t_e - 1.0)/term_e - (s*s*t_s*t_s - 1.0)/term_s);
2272         }
2273         else
2274         {
2275             term_e = SafeProductWithExp(theta_A0*(s*t_e + 1.0)/denominator,-s*t_e);
2276             if (term_e <= -DBL_BIG)
2277                 return DBL_BIG;
2278             if (k_a > 0)
2279                 dtau_a__ds = term_e - term_s;
2280             if (k_A > 0)
2281                 dtau_A__ds = ((theta_A0*theta_a0)/(denominator*denominator)) *
2282                     ((s*s*t_e*t_e - 1.0)/term_e - (s*s*t_s*t_s - 1.0)/term_s);
2283         }
2284 
2285         result -= (k_A*(k_A - 1.0 + mu_term_A)/theta_A0)*dtau_A__ds
2286             + (k_a*(k_a - 1.0 + mu_term_a)/theta_a0)*dtau_a__ds;
2287         if (result <= -DBL_BIG)
2288             return -DBL_BIG;
2289         if (result >= DBL_BIG)
2290             return DBL_BIG;
2291 
2292         t_s = t_e;
2293         term_s = term_e;
2294     }
2295 
2296     return result;
2297 
2298 }
2299 
2300 //------------------------------------------------------------------------------------
2301 
DlnWaitForTinyLogisticSelectionCoefficient(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)2302 double LogisticSelectionPL::DlnWaitForTinyLogisticSelectionCoefficient(const vector<double> & param,
2303                                                                        const TreeSummary * treedata,
2304                                                                        const long int & whichparam)
2305 {
2306     const list<Interval>& treesum = treedata->GetCoalSummary()->GetLongWait();
2307     list<Interval>::const_iterator treesum_it;
2308     double theta_A0(param[0]), theta_a0(param[1]), theta_A0_over_theta_a0;
2309     double ts(0.0), te, result(0.0), s(param[m_s_is_here]);
2310     double ts_squared(0.0), te_squared;
2311 
2312     if (0.0 == theta_A0 || 0.0 == theta_a0 || 0.0 == theta_A0 + theta_a0)
2313     {
2314         string msg = "LogisticSelectionPL::DlnWaitForTinySelectionCoefficient(), theta_A0 = ";
2315         msg += ToString(theta_A0) + ", theta_a0 = " + ToString(theta_a0);
2316         msg += "; attempted to divide by zero.";
2317         throw impossible_error(msg);
2318     }
2319 
2320     theta_A0_over_theta_a0 = theta_A0/theta_a0;
2321     double mu_into_A_from_a(param[3]), mu_into_a_from_A(param[4]),
2322         mu_term_A(mu_into_A_from_a*(theta_A0+theta_a0)), mu_term_a(mu_into_a_from_A*(theta_A0+theta_a0));
2323 
2324     for(treesum_it = treesum.begin(); treesum_it != treesum.end(); treesum_it++)
2325     {
2326         te = treesum_it->endtime;
2327         te_squared = te*te;
2328         const double& k_A = treesum_it->xpartlines[0]; // num. lineages, allele A
2329         const double& k_a = treesum_it->xpartlines[1]; // num. lineages, allele a
2330 
2331         result -= 0.5*(te_squared - ts_squared)*(k_A*(k_A-1.0+mu_term_A)/theta_A0_over_theta_a0 -
2332                                                  k_a*(k_a-1.0+mu_term_a)*theta_A0_over_theta_a0);
2333         result -= (s/3.0)*(te_squared*te - ts_squared*ts)*(k_A*(k_A-1.0+mu_term_A)/theta_A0_over_theta_a0 +
2334                                                            k_a*(k_a-1.0+mu_term_a)*theta_A0_over_theta_a0);
2335         ts= te;
2336         ts_squared = te_squared;
2337     }
2338 
2339     return result/(theta_A0 + theta_a0);
2340 }
2341 
2342 //------------------------------------------------------------------------------------
2343 
DlnPoint(const vector<double> & param,const TreeSummary * treedata,const long int & whichparam)2344 double LogisticSelectionPL::DlnPoint(const vector<double> & param, const TreeSummary * treedata,
2345                                      const long int & whichparam)
2346 {
2347     if (whichparam != m_s_is_here)
2348     {
2349         string msg = "LogisticSelectionPL::DlnPoint() called with whichparam = ";
2350         msg += ToString(whichparam);
2351         msg += ".  \"whichparam\" must equal " + ToString(m_s_is_here);
2352         msg += ", which corresponds to the selection coefficient \"s.\"";
2353         throw implementation_error(msg);
2354     }
2355 
2356     const Interval *treesum = treedata->GetCoalSummary()->GetLongPoint();
2357     const Interval *pTreesum;
2358     double theta_A0(param[0]), theta_a0(param[1]), s(param[m_s_is_here]);
2359     double result(0.0), term_a;
2360 
2361     // First, add up the contributions from coalescent events.
2362 
2363     for (pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
2364     {
2365         const double& te = pTreesum->endtime;
2366         term_a = SafeProductWithExp(theta_a0, s*te);
2367 
2368         if (0L == pTreesum->oldstatus) // coal. in allele A
2369         {
2370             if (term_a < DBL_BIG)
2371                 result += te*term_a/(theta_A0 + term_a); // rhs = 0 for s << 0
2372             else
2373                 result += te;
2374         }
2375 
2376         else if (1L == pTreesum->oldstatus) // coal. in allele a
2377             result -= te/(term_a/theta_A0 + 1.0);
2378 
2379         else
2380         {
2381             string msg = "LogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2382             msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2383             msg += ".  \"oldstatus\" must be either 0 or 1, reflecting a coalescence in either ";
2384             msg += "the subpopulation with allele A or the subpopulation with allele a.";
2385             throw implementation_error(msg);
2386         }
2387     }
2388 
2389     // Now, add the contributions from the mutation events a --> A and A --> a.
2390     treesum = treedata->GetDiseaseSummary()->GetLongPoint();
2391 
2392     for(pTreesum = treesum; pTreesum != NULL; pTreesum = pTreesum->next)
2393     {
2394         const double& te = pTreesum->endtime;
2395 
2396         if (0L == pTreesum->oldstatus)
2397             result += te;
2398         else if (1L == pTreesum->oldstatus)
2399             result -= te;
2400         else
2401         {
2402             string msg = "LogisticSelectionPL::DlnPoint(), received a TreeSummary ";
2403             msg += "containing an event with oldstatus = " + ToString(pTreesum->oldstatus);
2404             msg += ".  \"oldstatus\" must be either 0 or 1, reflecting a mutation to either ";
2405             msg += "allele A or allele a.";
2406             throw implementation_error(msg);
2407         }
2408     }
2409 
2410     return result;
2411 }
2412 
2413 //------------------------------------------------------------------------------------
2414 
StickSelectPL(const ForceSummary & fs)2415 StickSelectPL::StickSelectPL(const ForceSummary& fs)
2416     : PLForces(0),  // MDEBUG arbitrary value, is that okay?
2417       m_thetastart(0),
2418       m_thetaend(0),
2419       m_toBigA_here(FLAGLONG),
2420       m_toSmallA_here(FLAGLONG),
2421       m_s_here(0),
2422       m_r_here(FLAGLONG),
2423       m_disstart(FLAGLONG)
2424 {
2425     m_minuslnsqrt2pi = -log(sqrt(2*PI));
2426     m_nxparts = fs.GetNParameters(force_COAL);
2427     // JDEBUG--when switch to allowing migration remove the assert
2428     assert(fs.GetNonLocalPartitionIndexes().size() < 2);
2429     m_partindex = fs.GetNonLocalPartitionIndexes();
2430     // JDEBUG--this doesn't work due to invalid parameters
2431     // m_ndis = fs.GetNParameters(force_DISEASE);
2432     m_ndis = 2L;
2433     m_disindex = fs.GetPartIndex(force_DISEASE);
2434     assert(m_ndis == 2);
2435     assert(m_nxparts == 2);
2436 }
2437 
2438 //------------------------------------------------------------------------------------
2439 
~StickSelectPL()2440 StickSelectPL::~StickSelectPL()
2441 {
2442     // intentionally blank
2443 }
2444 
2445 //------------------------------------------------------------------------------------
2446 
SetToBigAIndex(long index)2447 void StickSelectPL::SetToBigAIndex(long index)
2448 {
2449     m_toBigA_here = index;
2450     if (m_disstart == FLAGLONG)
2451         // if toBig or toSmall isn't set, m_disstart will end
2452         // up as FLAGLONG again, as that's smaller than all legal values
2453         m_disstart = std::min(m_toBigA_here,m_toSmallA_here);
2454 }
2455 
2456 //------------------------------------------------------------------------------------
2457 
SetToSmallAIndex(long index)2458 void StickSelectPL::SetToSmallAIndex(long index)
2459 {
2460     m_toSmallA_here = index;
2461     if (m_disstart == FLAGLONG)
2462         // if toBig or toSmall isn't set, m_disstart will end
2463         // up as FLAGLONG again, as that's smaller than all legal values
2464         m_disstart = std::min(m_toBigA_here,m_toSmallA_here);
2465 }
2466 
2467 //------------------------------------------------------------------------------------
2468 
SetLocalPartForces(const ForceSummary & fs)2469 void StickSelectPL::SetLocalPartForces(const ForceSummary& fs)
2470 {
2471     // JDEBUG--:  This code is cut and paste from CoalescePL
2472     // and smells pretty bad as a result.
2473 
2474     m_localpartforces = fs.GetLocalPartitionForces();
2475     assert(!m_localpartforces.empty());
2476 
2477     // set up vector which answers the following question:
2478     // for each local partition force
2479     //   for any xpartition theta (identified by index),
2480     //   what partition (of the given force) is it?
2481 
2482     // This code is copied approximately from PartitionForce::
2483     // SumXPartsToParts().
2484 
2485     ForceVec::const_iterator pforce;
2486     const ForceVec& partforces = fs.GetPartitionForces();
2487     LongVec1d indicator(partforces.size(), 0L);
2488     LongVec1d nparts(registry.GetDataPack().GetAllNPartitions());
2489     DoubleVec1d::size_type xpart;
2490     long partindex;
2491     for (pforce = partforces.begin(), partindex = 0;
2492          pforce != partforces.end(); ++pforce, ++partindex)
2493     {
2494         vector<DoubleVec1d::size_type> indices(m_nxparts, 0);
2495         for(xpart = 0; xpart < static_cast<DoubleVec1d::size_type>(m_nxparts); ++xpart)
2496         {
2497             indices[xpart] = indicator[partindex];
2498             long int part;
2499             for (part = nparts.size() - 1; part >= 0; --part)
2500             {
2501                 ++indicator[part];
2502                 if (indicator[part] < nparts[part]) break;
2503                 indicator[part] = 0;
2504             }
2505         }
2506         // initialize xparts vectors
2507         vector<DoubleVec1d::size_type> emptyvec;
2508         vector<vector<DoubleVec1d::size_type> > partvec(nparts[partindex], emptyvec);
2509 
2510         if ((*pforce)->IsLocalPartitionForce())
2511         {
2512             m_whichlocalpart.push_back(indices);
2513             m_whichlocalxparts.push_back(partvec);
2514         }
2515         m_whichpart.push_back(indices);
2516         m_whichxparts.push_back(partvec);
2517     }
2518 
2519     // now construct the vector mapping partition to a set of xpartitions.
2520     //
2521     // we will do this by going through the vector mapping xpartition to
2522     // partition we just constructed, letting its contents tell us which
2523     // partition to add the parameter (xpartition) to and keeping a
2524     // counter to let us know which xpartition to add.
2525     long lpindex = 0;
2526     for (pforce = partforces.begin(), partindex = 0;
2527          pforce != partforces.end(); ++pforce, ++partindex)
2528     {
2529         for(xpart = 0; xpart < m_whichpart[partindex].size(); ++xpart)
2530             m_whichxparts[partindex][m_whichpart[partindex][xpart]].
2531                 push_back(xpart);
2532         if ((*pforce)->IsLocalPartitionForce())
2533         {
2534             for(xpart = 0; xpart < m_whichpart[partindex].size(); ++xpart)
2535             {
2536                 m_whichlocalxparts[lpindex][m_whichpart[partindex][xpart]].push_back(xpart);
2537             }
2538             ++lpindex;
2539         }
2540     }
2541 } // SetLocalPartForces
2542 
2543 //------------------------------------------------------------------------------------
2544 
StickMean(double freqA,double s,double toA,double toa) const2545 double StickSelectPL::StickMean(double freqA, double s, double toA, double toa) const
2546 {
2547     return s*freqA*(1-freqA) + toA*(1-freqA) - toa*freqA;
2548 }
2549 
2550 //------------------------------------------------------------------------------------
2551 
StickVar(double freqA,double tipfreqA,double thetaA) const2552 double StickSelectPL::StickVar(double freqA, double tipfreqA, double thetaA) const
2553 {
2554     return 2.0*freqA*(1-freqA)*tipfreqA / thetaA;
2555 }
2556 
2557 //------------------------------------------------------------------------------------
2558 
CommonFunctional(double freqBigA,double prevfreqBigA,double s,double toBigA,double toSmallA,double length) const2559 double StickSelectPL::CommonFunctional(double freqBigA, double prevfreqBigA, double s,
2560                                        double toBigA, double toSmallA, double length) const
2561 {
2562     // this implements the un-squared numerator of the posterior on stick selection:
2563     return freqBigA - prevfreqBigA - length *
2564         StickMean(prevfreqBigA,s,toBigA,toSmallA);
2565 }
2566 
2567 //------------------------------------------------------------------------------------
2568 
lnPTreeStick(const DoubleVec1d & param,const DoubleVec1d & lparam,const TreeSummary * treedata)2569 double StickSelectPL::lnPTreeStick(const DoubleVec1d& param, const DoubleVec1d& lparam, const TreeSummary* treedata)
2570 {
2571     double answ(lnWait(param,treedata));
2572 
2573     answ += lnPointTreeStick(param,lparam,treedata);
2574 
2575     return answ;
2576 }
2577 
2578 //------------------------------------------------------------------------------------
2579 
lnWait(const DoubleVec1d & param,const TreeSummary * treedata)2580 double StickSelectPL::lnWait(const DoubleVec1d& param, const TreeSummary* treedata)
2581 {
2582     assert(m_disstart != FLAGLONG);
2583     double answ(0.0);
2584 
2585     assert(m_ndis == 2);
2586     assert(m_ndis == m_nxparts);
2587 
2588     // compute waiting times for theta and disease
2589     // all LongWait() are the same....
2590     const list<Interval>& coalsum(treedata->GetCoalSummary()->GetLongWait());
2591     list<Interval>::const_iterator interval(coalsum.begin());
2592     const DoubleVec1d& stairlengths(treedata->GetStickSummary().lengths);
2593     const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2594     double tipthetaA(param[m_thetastart]);
2595     double jointend(stairlengths[0]),intervalend(interval->endtime);
2596     double jointstart(0.0),intervalstart(0.0);
2597     long joint(0);
2598 
2599     while (interval != coalsum.end())
2600     {
2601         // how long is the bit of tree where nothing changes?
2602         double lengthininterval = std::min(jointend, intervalend) -
2603             std::max(jointstart, intervalstart);
2604 
2605         // adjust for coalescent contribution
2606         long xpart;
2607         for(xpart = 0; xpart < m_nxparts; ++xpart)
2608         {
2609             answ -= lengthininterval * stairfreqs[0][0] *
2610                 (interval->xpartlines[xpart])*(interval->xpartlines[xpart]-1) /
2611                 // because there is no migration we can use xpart, otherwise
2612                 // we'll need to add a loop in local partitions
2613                 (stairfreqs[joint][xpart] * tipthetaA);
2614         }
2615 
2616         // adjust for trait mutation contribution
2617         long part;
2618         for(part = 0; part < m_ndis; ++part)
2619         {
2620             answ -= lengthininterval * interval->partlines[m_disindex][part] *
2621                 param[(part ? m_disstart+1 : m_disstart)] *
2622                 // both macros rely on only two disease states present
2623                 // equation relies on nxparts == nparts!
2624                 stairfreqs[joint][(part ? 0 : 1)]/stairfreqs[joint][part];
2625         }
2626 
2627         if (jointend < intervalend)     // joint ends first
2628         {
2629             ++joint;
2630             jointstart = jointend;
2631             jointend += stairlengths[joint];
2632         }
2633         else
2634             if (intervalend < jointend) // else interval ends first
2635             {
2636                 ++interval;
2637                 intervalstart = intervalend;
2638                 intervalend = interval->endtime;
2639             }
2640             else                          // they both ended at the exact same time!
2641             {
2642                 ++joint;
2643                 jointstart = jointend;
2644                 jointend += stairlengths[joint];
2645                 ++interval;
2646                 intervalstart = intervalend;
2647                 intervalend = interval->endtime;
2648             }
2649     }
2650 
2651     // compute rec-rate contrib
2652     if (!m_localpartforces.empty())
2653     {
2654         const vector<double>& rWait = treedata->GetRecSummary()->GetShortWait();
2655         answ -= rWait[0] * param[m_r_here];
2656     }
2657 
2658     // there aren't separate "wait" and "point" terms wrt. sticks,
2659     // therefore all the actual calculations are handled in "point".
2660 
2661     return answ;
2662 }
2663 
2664 //------------------------------------------------------------------------------------
2665 
lnPointTreeStick(const DoubleVec1d & param,const DoubleVec1d & lparam,const TreeSummary * treedata)2666 double StickSelectPL::lnPointTreeStick(const DoubleVec1d& param, const DoubleVec1d& lparam,
2667                                        const TreeSummary* treedata)
2668 // there aren't separate "wait" and "point" terms with stick selection,
2669 // all the actual calculations are handled in "point".
2670 //
2671 // The actual equation for the log tree posterior likelihood:
2672 // Sum_Over_All_Stick_Intervals[
2673 //    -log(sqrt(2*pi))-log(sqrt(Var(curr_freq)*int_length))-Exponent]
2674 //
2675 // Exponent =
2676 //    (curr_freq-(prev_freq*Mean(prev_freq)*int_length))**2
2677 //    ------------------------------------------------------------
2678 //             2*Var(prev_freq)*int_length
2679 //
2680 // Mean(freq) = sel_coeff*freq*(1-freq)+mu_to*(1-freq)-mu_from*freq
2681 // Var(freq) = freq*(1-freq) / theta
2682 {
2683     assert(m_disstart != FLAGLONG);
2684     double answ(0.0);
2685 
2686     // stuff useful for all stick terms
2687     const Interval * treesum = treedata->GetCoalSummary()->GetLongPoint();
2688     const Interval * tit(treesum);
2689     const DoubleVec1d& stairlengths(treedata->GetStickSummary().lengths);
2690     const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2691     const DoubleVec2d& stairlnfreqs(treedata->GetStickSummary().lnfreqs);
2692     double tipprobA(stairfreqs[0][0]);
2693     double lnthA(lparam[m_thetastart]), lnprA(log(tipprobA));
2694     double jointend(stairlengths[0]);
2695     long joint(0);
2696 
2697     // compute theta contrib
2698     do {
2699         while (jointend < tit->endtime)
2700         {
2701             ++joint;
2702             jointend += stairlengths[joint];
2703         }
2704 
2705         answ +=  LOG2 + lnprA - lnthA - stairlnfreqs[joint][tit->oldstatus];
2706 
2707         tit = tit->next;
2708     } while(tit != NULL);
2709 
2710     // compute trait mutation contrib
2711     treesum = treedata->GetDiseaseSummary()->GetLongPoint();
2712     tit = treesum;
2713     jointend = stairlengths[0];
2714     joint = 0;
2715     do {
2716         while (jointend < tit->endtime)
2717         {
2718             ++joint;
2719             jointend += stairlengths[joint];
2720         }
2721         // JDEBUG--is the macro correct??
2722         answ +=  lparam[(tit->newstatus ? m_disstart+1 : m_disstart)] +
2723             stairlnfreqs[joint][tit->oldstatus]-stairlnfreqs[joint][tit->newstatus];
2724 
2725         tit = tit->next;
2726     } while(tit != NULL);
2727 
2728     // compute rec-rate contrib--Sum_j(recevents * log(r))
2729     // as noted earlier (see Likelihood::FillForces()), the existence
2730     // of a localpartforces vector is used to flag the presence/absence
2731     // of recombination
2732 
2733     // MDEBUG MREVIEW let's review this code--Mary doesn't get it.
2734     if (!m_localpartforces.empty())
2735     {
2736         treesum = treedata->GetRecSummary()->GetLongPoint();
2737         tit = treesum;
2738         jointend = stairlengths[0];
2739         joint = 0;
2740         do {
2741             while (jointend < tit->endtime)
2742             {
2743                 ++joint;
2744                 jointend += stairlengths[joint];
2745             }
2746             // we can use partnerpicks[0] because there is only one lpforce
2747             // in existence
2748             answ += lparam[m_r_here] +
2749                 stairlnfreqs[joint][treesum->partnerpicks[0]];
2750             tit = tit->next;
2751         } while(tit != NULL);
2752     }
2753 
2754     return answ;
2755 }
2756 
2757 //------------------------------------------------------------------------------------
2758 
lnPoint(const DoubleVec1d & param,const DoubleVec1d & lparam,const TreeSummary * treedata)2759 double StickSelectPL::lnPoint(const DoubleVec1d& param, const DoubleVec1d& lparam, const TreeSummary* treedata)
2760 // there aren't separate "wait" and "point" terms with stick selection,
2761 // all the actual calculations are handled in "point".
2762 //
2763 // The actual equation for the log tree posterior likelihood:
2764 // Sum_Over_All_Stick_Intervals[
2765 //    -log(sqrt(2*pi))-log(sqrt(Var(prev_freq)*int_length))-Exponent]
2766 //
2767 // Exponent =
2768 //    (curr_freq-(prev_freq-Mean(prev_freq)*int_length))**2
2769 //    ------------------------------------------------------------
2770 //             2*Var(prev_freq)*int_length
2771 //
2772 // Mean(freq) = sel_coeff*freq*(1-freq)+mu_to*(1-freq)-mu_from*freq
2773 // Var(freq) = freq*(1-freq) / theta_total
2774 {
2775     assert(m_disstart != FLAGLONG);
2776     double answ(lnPointTreeStick(param,lparam,treedata));
2777 
2778     const DoubleVec1d& stairlengths(treedata->GetStickSummary().lengths);
2779     const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2780     double tipthetaA(param[m_thetastart]);
2781     double tipprobA(stairfreqs[0][0]);
2782     double s(param[m_s_here]),toBigA(param[m_toBigA_here]),
2783         toSmallA(param[m_toSmallA_here]);
2784     assert(stairfreqs.size() == stairlengths.size());
2785 
2786     DoubleVec1d::size_type step;
2787     for(step = 1; step < stairfreqs.size(); ++step)
2788     {
2789         double variance = StickVar(stairfreqs[step-1][0],tipprobA,tipthetaA);
2790         answ += m_minuslnsqrt2pi - 0.5 * log(variance*stairlengths[step-1]);
2791         double numer = CommonFunctional(stairfreqs[step][0],
2792                                         stairfreqs[step-1][0],s,toBigA,toSmallA,stairlengths[step-1]);
2793         double denom(2.0*variance*stairlengths[step-1]);
2794         numer = -1.0 * numer * numer;
2795         answ += numer/denom;
2796     }
2797 
2798     return answ;
2799 }
2800 
2801 //------------------------------------------------------------------------------------
2802 
DlnWait(const DoubleVec1d & param,const TreeSummary * treedata,const long & whichparam)2803 double StickSelectPL::DlnWait(const DoubleVec1d& param, const TreeSummary* treedata, const long& whichparam)
2804 {
2805     long which;
2806     double answ(0.0);
2807 
2808     // compute theta contrib
2809     if (m_thetastart <= whichparam && whichparam < m_thetastart + m_nxparts)
2810     {
2811         const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2812         const DoubleVec1d& stairlengths(treedata->GetStickSummary().lengths);
2813         const list<Interval>& treesum(treedata->GetCoalSummary()->GetLongWait());
2814         list<Interval>::const_iterator interval(treesum.begin());
2815         double tipthetaA(param[m_thetastart]);
2816         double tipprobA(stairfreqs[0][0]);
2817         double jointend(stairlengths[0]),intervalend(interval->endtime);
2818         double jointstart(0.0),intervalstart(0.0);
2819         long joint(0);
2820         which = whichparam-m_thetastart;
2821         while (interval != treesum.end())
2822         {
2823             // how long is the bit of tree where nothing changes?
2824             double lengthininterval = std::min(jointend, intervalend) -
2825                 std::max(jointstart, intervalstart);
2826 
2827             answ += lengthininterval * tipprobA *
2828                 (interval->xpartlines[which])*(interval->xpartlines[which]-1)
2829                 // because there is no migration we can use xpart, otherwise
2830                 // we'll need to add a loop in local partitions
2831                 / (tipthetaA * tipthetaA * stairfreqs[joint][which]);
2832 
2833             if (jointend < intervalend) // joint ends first
2834             {
2835                 ++joint;
2836                 jointstart = jointend;
2837                 jointend += stairlengths[joint];
2838             }
2839             else
2840                 if (intervalend < jointend) // else interval ends first
2841                 {
2842                     ++interval;
2843                     intervalstart = intervalend;
2844                     intervalend = interval->endtime;
2845                 }
2846                 else                      // they both ended at the exact same time!
2847                 {
2848                     ++joint;
2849                     jointstart = jointend;
2850                     jointend += stairlengths[joint];
2851                     ++interval;
2852                     intervalstart = intervalend;
2853                     intervalend = interval->endtime;
2854                 }
2855         }
2856 
2857         return answ;
2858     }
2859 
2860     // compute disease contrib
2861     if (m_disstart <= whichparam && whichparam < m_disstart + m_ndis)
2862     {
2863         assert(m_disstart != FLAGLONG);
2864         if (whichparam == m_disstart) which = 0;
2865         else which = 1;
2866         const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2867         const DoubleVec1d& stairlengths(treedata->GetStickSummary().lengths);
2868         const list<Interval>& treesum(treedata->GetDiseaseSummary()->GetLongWait());
2869         list<Interval>::const_iterator interval(treesum.begin());
2870         double jointend(stairlengths[0]),intervalend(interval->endtime);
2871         double jointstart(0.0),intervalstart(0.0);
2872         long joint(0);
2873 
2874         while (interval != treesum.end())
2875         {
2876             // how long is the bit of tree where nothing changes?
2877             double lengthininterval = std::min(jointend, intervalend) -
2878                 std::max(jointstart, intervalstart);
2879 
2880             answ -= lengthininterval * interval->partlines[m_disindex][which] *
2881                 // the macro relies on only two disease states present
2882                 // equation relies on nxparts == nparts!
2883                 stairfreqs[joint][(which ? 0 : 1)]/stairfreqs[joint][which];
2884 
2885             if (jointend < intervalend) // joint ends first
2886             {
2887                 ++joint;
2888                 jointstart = jointend;
2889                 jointend += stairlengths[joint];
2890             }
2891             else
2892                 if (intervalend < jointend) // else interval ends first
2893                 {
2894                     ++interval;
2895                     intervalstart = intervalend;
2896                     intervalend = interval->endtime;
2897                 }
2898                 else                      // they both ended at the exact same time!
2899                 {
2900                     ++joint;
2901                     jointstart = jointend;
2902                     jointend += stairlengths[joint];
2903                     ++interval;
2904                     intervalstart = intervalend;
2905                     intervalend = interval->endtime;
2906                 }
2907         }
2908 
2909         return answ;
2910     }
2911 
2912     // compute recombination contrib
2913     if (whichparam == m_r_here)
2914     {
2915         assert(!m_localpartforces.empty());
2916         const vector<double>& rWait = treedata->GetRecSummary()->GetShortWait();
2917         return -rWait[0];
2918     }
2919 
2920     // We compute all terms involving the stick in lnPoint and DlnPoint
2921     // (arbitrarily) and thus return 0 if asked to take dWait of those parameters
2922     return answ;
2923 }
2924 
2925 //------------------------------------------------------------------------------------
2926 
DlnPoint(const DoubleVec1d & param,const TreeSummary * treedata,const long & whichparam)2927 double StickSelectPL::DlnPoint(const DoubleVec1d& param, const TreeSummary* treedata, const long& whichparam)
2928 {
2929     // NB Assumes only one localpartforce and no migration, no growth!
2930     long which;
2931 
2932     // compute recombination contrib (does not involve stick)
2933     if (whichparam == m_r_here)
2934     {
2935         const vector<double>& nrecs = treedata->GetRecSummary()->GetShortPoint();
2936         if(nrecs[0]==0)
2937         {
2938             return 0.0;
2939         }
2940         else
2941         {
2942             return SafeDivide(nrecs[0], *(param.begin() + m_r_here));
2943         }
2944     }
2945 
2946     // useful variables for all stick computations
2947     const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2948     const DoubleVec1d& stairlengths(treedata->GetStickSummary().lengths);
2949     double s(param[m_s_here]),toBigA(param[m_toBigA_here]),
2950         toSmallA(param[m_toSmallA_here]);
2951     double tipthetaA(param[m_thetastart]);
2952     double tipprobA(stairfreqs[0][0]);
2953     assert(stairfreqs.size() == stairlengths.size());
2954     double answ(0.0);
2955 
2956     // compute theta contrib
2957     if (m_thetastart <= whichparam && whichparam < m_thetastart + m_nxparts)
2958     {
2959         which = whichparam-m_thetastart;
2960         const DoubleVec1d& ncoal = treedata->GetCoalSummary()->GetShortPoint();
2961 
2962         answ = -ncoal[which] / tipthetaA;
2963 
2964         // deal with the stick contribution to theta
2965         DoubleVec1d::size_type step;
2966         for(step = 1; step < stairfreqs.size(); ++step)
2967         {
2968             double numer = CommonFunctional(stairfreqs[step][0],
2969                                             stairfreqs[step-1][0],s,toBigA,toSmallA,stairlengths[step-1]);
2970             numer *= numer;
2971             answ -= numer / (4.0 * stairfreqs[step-1][0] * tipprobA *
2972                              (1.0 - stairfreqs[step-1][0]) * stairlengths[step-1]);
2973             answ += 1.0 / (2 * tipthetaA);
2974         }
2975 
2976         return answ;
2977     }
2978 
2979     // compute disease contrib
2980     if (m_disstart <= whichparam && whichparam < m_disstart + m_ndis)
2981     {
2982         // non-stick terms
2983         assert(m_disstart != FLAGLONG);
2984         if (whichparam == m_disstart) which = 0;
2985         else which = 1;
2986 
2987         answ += (treedata->GetDiseaseSummary()->GetShortPoint()[which]) / param[whichparam];
2988 
2989         // stick terms
2990         const DoubleVec2d& stairfreqs(treedata->GetStickSummary().freqs);
2991         DoubleVec1d::size_type step;
2992         // terms in toBigA
2993         if (whichparam == m_toBigA_here)
2994         {
2995             for(step = 1; step < stairfreqs.size(); ++step)
2996             {
2997                 answ += CommonFunctional(stairfreqs[step][0],stairfreqs[step-1][0],s,
2998                                          toBigA,toSmallA,stairlengths[step-1]) * tipthetaA /
2999                     (2.0 * tipprobA * stairfreqs[step-1][0]);
3000             }
3001             return answ;
3002         }
3003         // terms in toSmallA
3004         if (whichparam == m_toSmallA_here)
3005         {
3006             for(step = 1; step < stairfreqs.size(); ++step)
3007             {
3008                 answ -= CommonFunctional(stairfreqs[step][0],stairfreqs[step-1][0],s,
3009                                          toBigA,toSmallA,stairlengths[step-1]) * tipthetaA /
3010                     ((2.0 * tipprobA) * (1.0 - stairfreqs[step-1][0]));
3011             }
3012             return answ;
3013         }
3014         assert(false); // neither toBigA nor toSmallA?!
3015     }
3016 
3017     // compute s contrib
3018     if (whichparam == m_s_here)
3019     {
3020         DoubleVec1d::size_type step;
3021         for(step = 1; step < stairfreqs.size(); ++step)
3022         {
3023             answ += CommonFunctional(stairfreqs[step][0],stairfreqs[step-1][0],
3024                                      s,toBigA,toSmallA,stairlengths[step-1]) * tipthetaA /
3025                 (2.0 * tipprobA);
3026         }
3027         return answ;
3028     }
3029 
3030     assert(false); // should have computed something!
3031     return answ;
3032 }
3033 
3034 //____________________________________________________________________________________
3035