1 // $Id: rectree.cpp,v 1.77 2012/02/16 19:10:59 bobgian Exp $
2 
3 /*
4   Copyright 2002  Peter Beerli, Mary Kuhner, Jon Yamato and Joseph Felsenstein
5 
6   This software is distributed free of charge for non-commercial use
7   and is copyrighted.  Of course, we do not guarantee that the software
8   works, and are not responsible for any damage you may cause or have.
9 */
10 
11 #include <cassert>
12 
13 #include "local_build.h"
14 
15 #include "errhandling.h"                // Can throw overrun_error.
16 #include "force.h"                      // For TimeSize object.
17 #include "range.h"
18 #include "runreport.h"
19 #include "tree.h"
20 #include "mathx.h"
21 #include "branchbuffer.h"               // Used in SetTargetLinksFrom for BranchBuffer::ExtractConstBranches.
22 #include "fc_status.h"                  // Used in Prune() to determine what recombinations can be removed.
23 
24 #ifdef DMALLOC_FUNC_CHECK
25 #include "/usr/local/include/dmalloc.h"
26 #endif
27 
28 using namespace std;
29 
30 // Note that some functions/variables defined here have "_Tr" (for "Tree") as a suffix in their name.
31 // This is to distinguish them from functions/variables in class Range (or RecRange) of the same name, which use "_Rg".
32 
33 //------------------------------------------------------------------------------------
34 
35 typedef boost::shared_ptr<RBranch> RBranch_ptr;
36 
37 //------------------------------------------------------------------------------------
38 
RecTree()39 RecTree::RecTree()
40     : Tree(),
41       m_pMovingLocusVec(),
42       m_protoMovingCells()
43 {
44     m_numTargetLinks_Tr = 0;
45     m_numNewTargetLinks_Tr = 0;
46 }
47 
48 //------------------------------------------------------------------------------------
49 
RecTree(const RecTree & tree,bool makestump)50 RecTree::RecTree(const RecTree & tree, bool makestump)
51     : Tree(tree, makestump),
52       m_pMovingLocusVec(tree.m_pMovingLocusVec),
53       m_protoMovingCells(tree.m_protoMovingCells)
54 
55 {
56     m_numTargetLinks_Tr = 0;
57     m_numNewTargetLinks_Tr = 0;
58 }
59 
60 //------------------------------------------------------------------------------------
61 
Clone() const62 Tree * RecTree::Clone() const
63 {
64     RecTree * tree = new RecTree(*this, false);
65     return tree;
66 } // RecTree::Clone
67 
68 //------------------------------------------------------------------------------------
69 
MakeStump() const70 Tree * RecTree::MakeStump() const
71 {
72     RecTree * tree = new RecTree(*this, true);
73     return tree;
74 }
75 
76 //------------------------------------------------------------------------------------
77 
Clear()78 void RecTree::Clear()
79 {
80     Tree::Clear();
81     m_numTargetLinks_Tr = 0;
82     m_numNewTargetLinks_Tr = 0;
83 }
84 
85 //------------------------------------------------------------------------------------
86 
CopyTips(const Tree * tree)87 void RecTree::CopyTips(const Tree * tree)
88 {
89     Tree::CopyTips(tree);
90     m_numTargetLinks_Tr = 0;
91     m_numNewTargetLinks_Tr = 0;
92     for(vector<Locus>::const_iterator locus = m_pMovingLocusVec->begin(); locus != m_pMovingLocusVec->end(); ++locus)
93     {
94         m_aliases.push_back(locus->GetDLCalc()->RecalculateAliases(*this, *locus));
95     }
96 }
97 
98 //------------------------------------------------------------------------------------
99 
CopyBody(const Tree * tree)100 void RecTree::CopyBody(const Tree * tree)
101 {
102     Tree::CopyBody(tree);
103     m_numTargetLinks_Tr = 0;
104     m_numNewTargetLinks_Tr = 0;
105 }
106 
107 //------------------------------------------------------------------------------------
108 
CopyPartialBody(const Tree * tree)109 void RecTree::CopyPartialBody(const Tree * tree)
110 {
111     Tree::CopyPartialBody(tree);
112     m_numTargetLinks_Tr = 0;
113     m_numNewTargetLinks_Tr = 0;
114 }
115 
116 //------------------------------------------------------------------------------------
117 
Break(Branch_ptr pBranch)118 void RecTree::Break(Branch_ptr pBranch)
119 {
120     Branch_ptr pParent = pBranch->Parent(0);
121 
122     if (pParent->CanRemove(pBranch))
123     {
124         Break(pParent);
125         m_timeList.Remove(pParent);
126 
127         pParent = pBranch->Parent(1);
128         if (pParent)
129         {
130             Break(pParent);
131             m_timeList.Remove(pParent);
132         }
133     }
134 }
135 
136 //------------------------------------------------------------------------------------
137 
CollectMovingCells()138 const vector<LocusCell> & RecTree::CollectMovingCells()
139 {
140     if (m_protoMovingCells.empty())
141     {
142         unsigned long i;
143         for (i = 0; i < m_pMovingLocusVec->size(); ++i)
144         {
145             m_protoMovingCells.push_back((*m_pMovingLocusVec)[i].GetProtoCell());
146         }
147     }
148     return m_protoMovingCells;
149 } // CollectCells
150 
151 //------------------------------------------------------------------------------------
152 
ActivateTips(Tree * othertree)153 vector<Branch_ptr> RecTree::ActivateTips(Tree * othertree)
154 {
155     vector<Branch_ptr> tips = Tree::ActivateTips(othertree);
156     m_numTargetLinks_Tr = m_timeList.GetNTips() * (m_totalSites - 1); // -1 because there is no link after last site.
157     m_numNewTargetLinks_Tr = 0;
158     return tips;
159 }
160 
161 //------------------------------------------------------------------------------------
162 
ActivateBranch(Tree * othertree)163 Branch_ptr RecTree::ActivateBranch(Tree * othertree)
164 {
165     Branch_ptr pActive = Tree::ActivateBranch(othertree);
166 
167     m_numTargetLinks_Tr += pActive->m_rangePtr->NumTargetLinks_Rg();
168     return pActive;
169 }
170 
171 //------------------------------------------------------------------------------------
172 
ActivateRoot(FC_Status & fcstatus)173 Branch_ptr RecTree::ActivateRoot(FC_Status & fcstatus)
174 {
175     Branch_ptr pRoot = Tree::ActivateRoot(fcstatus);
176     m_numTargetLinks_Tr += pRoot->m_rangePtr->NumTargetLinks_Rg();
177     m_numNewTargetLinks_Tr = 0;
178     return pRoot;
179 }
180 
181 //------------------------------------------------------------------------------------
182 
AttachBase(Branch_ptr newroot)183 void RecTree::AttachBase(Branch_ptr newroot)
184 {
185     Tree::AttachBase(newroot);
186     m_numTargetLinks_Tr = 0;
187 }
188 
189 //------------------------------------------------------------------------------------
190 
FirstInterval(double eventT)191 vector<Branch_ptr> RecTree::FirstInterval(double eventT)
192 {
193     Branch_ptr pBranch;
194 
195     vector<Branch_ptr> newinactives = Tree::FirstInterval(eventT);
196 
197     m_numNewTargetLinks_Tr = 0;         // Initialize the count of open links.
198 
199     return newinactives;
200 }
201 
202 //------------------------------------------------------------------------------------
203 
NextInterval(Branch_ptr pBranch)204 void RecTree::NextInterval(Branch_ptr pBranch)
205 {
206     Branch_ptr pParent = pBranch->Parent(0);
207 
208     m_numNewTargetLinks_Tr -= pParent->Child(0)->m_rangePtr->NumNewTargetLinks_Rg();
209 
210     // If the branch has a sibling, we must remove it as well (it's a coalescence event replaced by its parent).
211     if (pParent->Child(1))
212     {
213         m_numNewTargetLinks_Tr -= pParent->Child(1)->m_rangePtr->NumNewTargetLinks_Rg();
214     }
215     else if (pBranch->Parent(1))        // If the branch has a second parent, insert it.
216     {
217         m_numNewTargetLinks_Tr += pBranch->Parent(1)->m_rangePtr->NumNewTargetLinks_Rg();
218     }
219 
220     m_numNewTargetLinks_Tr += pParent->m_rangePtr->NumNewTargetLinks_Rg();
221 }
222 
223 //------------------------------------------------------------------------------------
224 
Prune()225 void RecTree::Prune()
226 {
227     // This routine DEMANDS that all current-state Range information in the tree is correct!  Thus:
228     assert(m_timeList.RevalidateAllRanges());
229 
230     // This routine locates recombinations which are no longer relevant due to occuring at links that
231     // are no longer targettable.  It removes such recombinations and everything logically dependent
232     // on them.  It also makes all "old state" information in the Ranges equal to the current
233     // state, in preparation for the next cycle.
234 
235     m_numNewTargetLinks_Tr = 0;
236     Branch_ptr pBranch, pChild;
237     // The original cut branch never needs to be updated or removed by Prune().
238     Branchiter start(m_timeList.NextNonTimeTiedBranch(m_firstInvalid));
239 
240     FC_Status fcstatus;
241     vector<Branch_ptr> branches(FindBranchesImmediatelyTipwardOf(start));
242     vector<Branch_ptr>::iterator branch;
243 
244 #if LAMARC_QA_FC_ON
245     for(branch = branches.begin(); branch != branches.end(); ++branch)
246     {
247         fcstatus.Increment_FC_Counts((*branch)->GetLiveSites_Br());
248     }
249 #endif
250 
251     // Loop over pBranch starting from cut point.
252     Branchiter brit;
253     for (brit = start; brit != m_timeList.EndBranch(); /* increments inside loop */ )
254     {
255         pBranch = *brit;
256 
257 #if LAMARC_QA_FC_ON
258         // If we're a fully functional, two-legged coalescence ...
259         if (pBranch->Event() == btypeCoal && !pBranch->m_marked)
260         {
261             rangeset coalesced_sites =
262                 Intersection(pBranch->Child(0)->GetLiveSites_Br(), pBranch->Child(1)->GetLiveSites_Br());
263             fcstatus.Decrement_FC_Counts(coalesced_sites);
264         }
265 #endif
266 
267         rangeset fcsites;
268 #if LAMARC_QA_FC_ON
269         fcsites = fcstatus.Coalesced_Sites();
270 #endif
271 
272         // Update this branch's oldtargetsites info, which is no longer needed in this cycle,
273         // so that it will be right for next cycle.  Done here because it requires FC information.
274         pBranch->ResetOldTargetSites_Br(fcsites);
275 
276         if (pBranch->IsRemovableRecombinationLeg(fcsites))
277         {
278 #ifndef NDEBUG
279             // validation check:
280             Range * childrangeptr = pBranch->Child(0)->m_rangePtr;
281             rangeset childtargetsites(Union(childrangeptr->GetDiseaseSites_Rg(),
282                                             RemoveRangeFromRange(fcsites, childrangeptr->GetLiveSites_Rg())));
283             assert(TargetLinksFrom(childtargetsites) == childrangeptr->GetTargetLinks_Rg());
284 #endif
285             Branch_ptr pSpouse = pBranch->GetRecPartner();
286             Branch_ptr pRemove(pBranch), pRemain(pSpouse);
287             // Which one to remove completely?
288             if (pSpouse->IsRemovableRecombinationLeg(fcsites))
289             {
290                 // They are both removable based on targetable sites.
291                 if (pBranch->m_rangePtr->GetLiveSites_Rg().empty() && pSpouse->m_rangePtr->GetLiveSites_Rg().empty())
292                 {
293                     // ... and we can't break the tie on live sites, so we randomize.
294                     bool removespouse = registry.GetRandom().Bool();
295                     if (removespouse)
296                     {
297                         pRemove = pSpouse;
298                         pRemain = pBranch;
299                     }
300                 }
301                 else
302                 {
303                     // ... and we break the tie on live sites.
304                     if (pSpouse->m_rangePtr->GetLiveSites_Rg().empty())
305                     {
306                         pRemove = pSpouse;
307                         pRemain = pBranch;
308                     }
309                 }
310             }
311 
312             // Unhook branch which will be completely removed.
313             Branch_ptr pChild = pRemove->Child(0);
314             if (pChild->Parent(0) == pRemove)
315             {
316                 pChild->SetParent(0, pChild->Parent(1));
317             }
318             pChild->SetParent(1, Branch::NONBRANCH);
319 
320             // Remove descending material below it.
321             Break(pRemove);
322 
323             // Get an interator to the next still-valid interval.
324             brit = m_timeList.NextBody(brit);
325             if (*brit == pSpouse) brit = m_timeList.NextBody(brit);
326 
327             // Splice out branch which remains in tree.
328             Branch_ptr pParent0 = pRemain->Parent(0);
329             pParent0->ReplaceChild(pRemain, pRemain->Child(0));
330             m_timeList.SetUpdateDLs(pParent0);
331             Branch_ptr pParent1 = pRemain->Parent(1);
332             if (pParent1)
333             {
334                 pParent1->ReplaceChild(pRemain, pRemain->Child(0));
335                 m_timeList.SetUpdateDLs(pParent1);
336             }
337 
338             // Remove the dead stuff.
339             m_timeList.Remove(pRemove);
340             m_timeList.Remove(pRemain);
341         }
342         else
343         {
344 #if LAMARC_QA_FC_ON
345             pBranch->UpdateBranchRange(fcsites, true);
346 #else
347             pBranch->UpdateBranchRange(fcsites, false);
348 #endif
349             brit = m_timeList.NextBody(brit);
350         }
351     }
352 
353     Tree::Prune();
354     // MDEBUG validating the ranges
355     assert(m_timeList.RevalidateAllRanges());
356 
357 } // RecTree::Prune
358 
359 //------------------------------------------------------------------------------------
360 
ReassignDLsFor(string lname,long marker,long ind)361 void RecTree::ReassignDLsFor(string lname, long marker, long ind)
362 {
363     // Find out which locus we're dealing with.  We have its name.  If the locus in question
364     // is in m_pLocusVec, we call through to Tree::ReassignDLsFor.
365     long locus = FLAGLONG;
366     for (unsigned long lnum = 0; lnum < m_pMovingLocusVec->size(); lnum++)
367     {
368         if ((*m_pMovingLocusVec)[lnum].GetName() == lname)
369         {
370             locus = lnum;
371         }
372     }
373     if (locus == FLAGLONG)
374     {
375         // The locus must be in m_pLocusVec--call the base function.
376         return Tree::ReassignDLsFor(lname, marker, ind);
377     }
378 
379     vector<Branch_ptr> haps = m_individuals[ind].GetAllTips();
380     vector<LocusCell> cells = m_individuals[ind].GetLocusCellsFor(lname, marker);
381 
382     for (unsigned long tip = 0; tip < haps.size(); tip++)
383     {
384         Cell_ptr origcell = haps[tip]->GetDLCell(locus, markerCell, true);
385         Cell_ptr newcell  = cells[tip][0];
386         origcell->SetSiteDLs(marker, newcell->GetSiteDLs(marker));
387         MarkForDLRecalc(haps[tip]);
388     }
389 
390     // Don't recalculate aliases, since there are no aliases for the moving locus.
391 } // RecTree::ReassignDLsFor
392 
393 //------------------------------------------------------------------------------------
394 
CoalesceActive(double eventT,Branch_ptr active1,Branch_ptr active2,const rangeset & fcsites)395 Branch_ptr RecTree::CoalesceActive(double eventT, Branch_ptr active1, Branch_ptr active2, const rangeset & fcsites)
396 {
397     Branch_ptr pBranch = Tree::CoalesceActive(eventT, active1, active2, fcsites);
398 
399     pBranch->SetMovingDLCells(CollectMovingCells());
400 
401     m_numTargetLinks_Tr -= active1->m_rangePtr->NumTargetLinks_Rg();
402     m_numTargetLinks_Tr -= active2->m_rangePtr->NumTargetLinks_Rg();
403     m_numTargetLinks_Tr += pBranch->m_rangePtr->NumTargetLinks_Rg();
404 
405     return pBranch;
406 }
407 
408 //------------------------------------------------------------------------------------
409 
CoalesceInactive(double eventT,Branch_ptr active,Branch_ptr inactive,const rangeset & fcsites)410 Branch_ptr RecTree::CoalesceInactive(double eventT, Branch_ptr active, Branch_ptr inactive, const rangeset & fcsites)
411 {
412     Branch_ptr pBranch = Tree::CoalesceInactive(eventT, active, inactive, fcsites);
413 
414     pBranch->SetMovingDLCells(CollectMovingCells());
415 
416     m_numNewTargetLinks_Tr -= inactive->m_rangePtr->NumNewTargetLinks_Rg();
417     m_numTargetLinks_Tr    -= active->m_rangePtr->NumTargetLinks_Rg();
418     m_numNewTargetLinks_Tr += pBranch->m_rangePtr->NumNewTargetLinks_Rg();
419     return pBranch;
420 }
421 
422 //------------------------------------------------------------------------------------
423 
Migrate(double eventT,long topop,long maxEvents,Branch_ptr active)424 Branch_ptr RecTree::Migrate(double eventT, long topop, long maxEvents, Branch_ptr active)
425 {
426     Branch_ptr pBranch = Tree::Migrate(eventT, topop, maxEvents, active);
427 
428     //    pBranch->m_rangePtr->SetMRange(pBranch->Child(0)->m_rangePtr);  // PRUNE?
429     // A migration does not change the active or newly active links.
430     return pBranch;
431 }
432 
433 //------------------------------------------------------------------------------------
434 
DiseaseMutate(double eventT,long endstatus,long maxEvents,Branch_ptr active)435 Branch_ptr RecTree::DiseaseMutate(double eventT, long endstatus, long maxEvents, Branch_ptr active)
436 {
437     Branch_ptr pBranch = Tree::DiseaseMutate(eventT, endstatus, maxEvents, active);
438 
439     //    pBranch->m_rangePtr->SetMRange(pBranch->Child(0)->m_rangePtr);  // PRUNE?
440     // A disease mutation does not change the active or newly active links.
441     return pBranch;
442 }
443 
444 //------------------------------------------------------------------------------------
445 
RecombineActive(double eventT,long maxEvents,FPartMap fparts,Branch_ptr pActive,long rec,const rangeset & fcsites)446 branchpair RecTree::RecombineActive(double eventT, long maxEvents, FPartMap fparts,
447                                     Branch_ptr pActive, long rec, const rangeset & fcsites)
448 {
449     if (NumberOfRecombinations() >= maxEvents)
450     {
451         rec_overrun e;
452         throw e;
453     }
454 
455     // Create the left parent of the active branch, saving partition handling until end.
456 
457     bool newbranchisinactive(false);
458     rangeset transmittedsites;
459     rangepair transmit(0L, rec + 1);
460     transmittedsites = AddPairToRange(transmit, transmittedsites);
461     RBranch_ptr pParentA = RBranch_ptr(new RBranch(pActive->m_rangePtr, newbranchisinactive, transmittedsites, fcsites));
462     pParentA->m_eventTime  = eventT;    // Set the event time.
463     pParentA->SetChild(0, pActive);
464     pActive->SetParent(0, pParentA);
465 
466     // Create the right parent of the active branch, saving partition handling until end.
467 #if 0 // JDEBUG, apparently can't do this in stl
468     transmittedsites.begin()->first = rec + 1;
469     transmittedsites.begin()->second = m_totalSites;
470 #endif
471     // Can we just change transmit?
472     rangeset transmittedsites2;
473     rangepair transmit2(rec + 1, m_totalSites);
474     transmittedsites2 = AddPairToRange(transmit2, transmittedsites2);
475     RBranch_ptr pParentB = RBranch_ptr(new RBranch(pActive->m_rangePtr, newbranchisinactive, transmittedsites2, fcsites));
476     pParentB->m_eventTime  = eventT;    // Set the event time.
477     pParentB->SetChild(0, pActive);
478     pActive->SetParent(1, pParentB);
479 
480     // Now deal with partitions.
481     if (fparts.empty())
482     {
483         pParentA->CopyPartitionsFrom(pActive);
484         pParentB->CopyPartitionsFrom(pActive);
485     }
486     else
487     {
488         pParentA->RecCopyPartitionsFrom(pActive, fparts, true);
489         pParentB->RecCopyPartitionsFrom(pActive, fparts, false);
490     }
491 
492     m_numTargetLinks_Tr -= pActive->m_rangePtr->NumTargetLinks_Rg();
493     m_numTargetLinks_Tr += pParentA->m_rangePtr->NumTargetLinks_Rg();
494     m_numTargetLinks_Tr += pParentB->m_rangePtr->NumTargetLinks_Rg();
495 
496     m_timeList.Collate(pParentA, pParentB);
497     return make_pair(pParentA, pParentB);
498 }
499 
500 //------------------------------------------------------------------------------------
501 
RecombineInactive(double eventT,long maxEvents,FPartMap fparts,Branch_ptr pBranch,long rec,const rangeset & fcsites)502 branchpair RecTree::RecombineInactive(double eventT, long maxEvents, FPartMap fparts,
503                                       Branch_ptr pBranch, long rec, const rangeset & fcsites)
504 {
505     if (NumberOfRecombinations() >= maxEvents)
506     {
507         rec_overrun e;
508         throw e;
509     }
510 
511     // This is the branch already in the tree.
512     bool inactive_is_low = pBranch->m_rangePtr->AreLowSitesOnInactiveBranch_Rg(rec);
513 
514     // All this might eventually be one constructor call.
515     bool newbranchisinactive(true);
516     rangepair transmit;
517     if (inactive_is_low)
518     {
519         transmit.first = 0L;
520         transmit.second = rec + 1;
521     }
522     else
523     {
524         transmit.first = rec + 1;
525         transmit.second = m_totalSites;
526     }
527     rangeset transmittedsitesInac;
528     transmittedsitesInac = AddPairToRange(transmit, transmittedsitesInac);
529     RBranch_ptr pParentInac = RBranch_ptr(new RBranch(pBranch->m_rangePtr, newbranchisinactive, transmittedsitesInac, fcsites));
530     pParentInac->m_eventTime  = eventT;
531     pParentInac->CopyPartitionsFrom(pBranch);
532 
533     pParentInac->SetChild(0, pBranch);
534     pBranch->Parent(0)->ReplaceChild(pBranch, pParentInac);
535     pBranch->SetParent(0, pParentInac);
536     if (pBranch->Parent(1))
537     {
538         pBranch->Parent(1)->ReplaceChild(pBranch, pParentInac);
539         pBranch->SetParent(1, Branch::NONBRANCH);
540     }
541 
542     // This is the new, active branch.
543     newbranchisinactive = false;
544     if (!inactive_is_low)
545     {
546         transmit.first = 0L;
547         transmit.second = rec + 1;
548     }
549     else
550     {
551         transmit.first = rec + 1;
552         transmit.second = m_totalSites;
553     }
554     rangeset transmittedsitesAc;
555     transmittedsitesAc = AddPairToRange(transmit, transmittedsitesAc);
556     assert(Intersection(transmittedsitesAc, pBranch->m_rangePtr->GetOldTargetSites_Rg()).empty());
557     RBranch_ptr pParentAc = RBranch_ptr(new RBranch(pBranch->m_rangePtr, newbranchisinactive, transmittedsitesAc, fcsites));
558     pParentAc->m_eventTime  = eventT;
559 
560     pParentAc->SetChild(0, pBranch);
561     pBranch->SetParent(1, pParentAc);
562 
563     if (fparts.empty())
564     {
565         pParentAc->CopyPartitionsFrom(pBranch);
566     }
567     else
568     {
569         pParentAc->RecCopyPartitionsFrom(pBranch, fparts, !inactive_is_low);
570     }
571 
572     // Now put both branches in place
573     m_timeList.Collate(pParentInac, pParentAc);
574 
575     m_numNewTargetLinks_Tr -= pBranch->m_rangePtr->NumNewTargetLinks_Rg();
576     m_numNewTargetLinks_Tr += pParentInac->m_rangePtr->NumNewTargetLinks_Rg();
577     m_numTargetLinks_Tr    += pParentAc->m_rangePtr->NumTargetLinks_Rg();
578 
579     return make_pair(pParentInac, pParentAc);
580 }
581 
582 //------------------------------------------------------------------------------------
583 
GetLocusSubtrees(rangepair span) const584 rangevector RecTree::GetLocusSubtrees(rangepair span) const
585 {
586     // Get recombination breakpoints
587     set<long> breakpt(GetBreakpoints());
588 
589     // Push rangepairs representing each breakpoint into subtree vector.  The (pt + 1) avoids creating a rangepair
590     // starting at the last site.  The strange iterator maneuver is because (pt + 1) is not allowed in g++.  We do
591     // not risk an empty set; we know it contains at least 2 elements.
592 
593     rangevector subtrees;
594     set<long>::const_iterator pt = breakpt.begin();
595     set<long>::const_iterator nextpt = pt;
596     ++nextpt;
597     long begin = span.first;
598     long end = span.second;
599 
600     for ( ; nextpt != breakpt.end(); ++pt, ++nextpt)
601     {
602         // No overlap so we don't add it.
603         if (*nextpt <= begin || *pt >= end) continue;
604         // Some overlap, but ends may need adjustment.
605         long first = *pt;
606         long last = *nextpt;
607         if (first < begin) first = begin;
608         if (last > end) last = end;
609         subtrees.push_back(rangepair(first, last));
610     }
611     return subtrees;
612 } // GetLocusSubtrees
613 
614 //------------------------------------------------------------------------------------
615 
SetMovingLocusVec(vector<Locus> * loc)616 void RecTree::SetMovingLocusVec(vector<Locus> * loc)
617 {
618     m_pMovingLocusVec = loc;
619     // We don't have to worry about the range--the moving loci move around within the range of the fixed loci.
620 } // SetMovingLocusVec
621 
622 //------------------------------------------------------------------------------------
623 
SetMovingMapposition(long mloc,long site)624 void RecTree::SetMovingMapposition(long mloc, long site)
625 {
626     assert(mloc < static_cast<long>(m_pMovingLocusVec->size()));
627     (*m_pMovingLocusVec)[mloc].SetRegionalMapposition(site);
628 }
629 
630 //------------------------------------------------------------------------------------
631 
CreateTip(const TipData & tipdata,const vector<LocusCell> & cells,const vector<LocusCell> & movingcells,const rangeset & diseasesites)632 TBranch_ptr RecTree::CreateTip(const TipData & tipdata, const vector<LocusCell> & cells,
633                                const vector<LocusCell> & movingcells, const rangeset & diseasesites)
634 {
635     TBranch_ptr pTip = m_timeList.CreateTip(tipdata, cells, movingcells, m_totalSites, diseasesites);
636     return pTip;
637 }
638 
639 //------------------------------------------------------------------------------------
640 
CreateTip(const TipData & tipdata,const vector<LocusCell> & cells,const vector<LocusCell> & movingcells,const rangeset & diseasesites,const vector<Locus> & loci)641 TBranch_ptr RecTree::CreateTip(const TipData & tipdata, const vector<LocusCell> & cells,
642                                const vector<LocusCell> & movingcells, const rangeset & diseasesites,
643                                const vector<Locus> & loci)
644 {
645     TBranch_ptr pTip = m_timeList.CreateTip(tipdata, cells, movingcells, m_totalSites, diseasesites, loci);
646     return pTip;
647 }
648 
649 //------------------------------------------------------------------------------------
650 
DoesThisLocusJump(long mloc) const651 bool RecTree::DoesThisLocusJump(long mloc) const
652 {
653     assert(mloc < static_cast<long>(m_pMovingLocusVec->size()));
654     return ((*m_pMovingLocusVec)[mloc].GetAnalysisType() == mloc_mapjump);
655 }
656 
657 //------------------------------------------------------------------------------------
658 
AnyRelativeHaplotypes() const659 bool RecTree::AnyRelativeHaplotypes() const
660 {
661     for (IndVec::const_iterator ind = m_individuals.begin(); ind != m_individuals.end(); ind++)
662     {
663         if (ind->MultipleTraitHaplotypes()) return true;
664     }
665     return false;
666 }
667 
668 //------------------------------------------------------------------------------------
669 
CalculateDataLikes()670 void RecTree::CalculateDataLikes()
671 {
672     m_overallDL = CalculateDataLikesForFixedLoci();
673     //The base function accumulates a likelihood into m_overallDL.
674     for (unsigned long loc = 0; loc < m_pMovingLocusVec->size(); ++loc)
675     {
676         if ((*m_pMovingLocusVec)[loc].GetAnalysisType() == mloc_mapjump)
677         {
678             m_overallDL += CalculateDataLikesForMovingLocus(loc);
679         }
680     }
681     m_timeList.ClearUpdateDLs();        // Reset the updating flags.
682 }
683 
684 //------------------------------------------------------------------------------------
685 
CalculateDataLikesForMovingLocus(long loc)686 double RecTree::CalculateDataLikesForMovingLocus(long loc)
687 {
688     double likelihood;
689 
690     const Locus & locus = (*m_pMovingLocusVec)[loc];
691     try                                 // Check for need to switch to normalization.
692     {
693         likelihood = locus.CalculateDataLikelihood(*this, true);
694     }
695 
696     catch (datalikenorm_error & ex)     // Normalization is set by thrower.
697     {
698         m_timeList.SetAllUpdateDLs();   // All subsequent loci will recalculate the entire tree.
699         RunReport & runreport = registry.GetRunReport();
700         runreport.ReportChat("\n", 0);
701         runreport.ReportChat("Subtree of likelihood 0.0 found:  Turning on normalization and re-calculating.");
702 
703         likelihood = locus.CalculateDataLikelihood(*this, true);
704     }
705 
706     return likelihood;
707 } // CalculateDataLikesForMovingLocus
708 
709 //------------------------------------------------------------------------------------
710 
GetBreakpoints() const711 set<long> RecTree::GetBreakpoints() const
712 {
713     // Create a (sorted) set containing all recombination breakpoints.
714     set<long> breakpt;
715     breakpt.insert(0);
716     breakpt.insert(m_totalSites);
717 
718     Branchconstiter brit;
719     long site;
720     for (brit = m_timeList.FirstBody(); brit != m_timeList.EndBranch(); brit = m_timeList.NextBody(brit))
721     {
722         site = (*brit)->GetRecSite_Br();
723         if (site != FLAGLONG)           // FLAGLONG means there is no recombination here
724             breakpt.insert(site + 1);   // inserting site + 1 because of open interval conventions.
725     }
726 
727     return breakpt;
728 } // GetBreakpoints
729 
730 //------------------------------------------------------------------------------------
731 
GetMapSummary()732 DoubleVec2d RecTree::GetMapSummary()
733 {
734     DoubleVec2d retvec;
735     DoubleVec1d zeroes(m_totalSites, 0.0);
736     for (unsigned long mloc = 0; mloc < m_pMovingLocusVec->size(); mloc++)
737     {
738         DoubleVec1d mlikes = zeroes;
739         long currentsite;
740         switch((*m_pMovingLocusVec)[mloc].GetAnalysisType())
741         {
742             case mloc_mapjump:
743                 //Find out where it is, make the likelihood at that value 1.
744                 currentsite = (*m_pMovingLocusVec)[mloc].GetRegionalMapposition();
745                 mlikes[currentsite] = 1.0;
746                 break;
747             case mloc_mapfloat:
748                 //Calculate the likelihoods over all subtrees, return vectors with it.
749                 //mlikes = CalculateDataLikesWithRandomHaplotypesForFloatingLocus(mloc);
750                 CalculateDataLikesForAllHaplotypesForFloatingLocus(mloc, mlikes);
751                 break;
752             case mloc_data:
753             case mloc_partition:
754                 assert(false);
755                 throw implementation_error("We seem to want to collect mapping data for a segment without that type"
756                                            " of analysis.  This is our fault; e-mail us at lamarc@gs.washington.edu");
757         }
758         retvec.push_back(mlikes);
759     }
760 
761     return retvec;
762 }
763 
764 //------------------------------------------------------------------------------------
765 
CalculateDataLikesWithRandomHaplotypesForFloatingLocus(long mloc)766 DoubleVec1d RecTree::CalculateDataLikesWithRandomHaplotypesForFloatingLocus(long mloc)
767 {
768     RandomizeMovingHaplotypes(mloc);
769     return CalculateDataLikesForFloatingLocus(mloc);
770 }
771 
772 //------------------------------------------------------------------------------------
773 
CalculateDataLikesForAllHaplotypesForFloatingLocus(long mloc,DoubleVec1d & mlikes)774 void RecTree::CalculateDataLikesForAllHaplotypesForFloatingLocus(long mloc, DoubleVec1d & mlikes)
775 {
776     long ind = 0;
777     //We need to change the vector of mlikes from zeroes to EXPMINS.
778     mlikes = SafeLog(mlikes);
779     UpdateDataLikesForIndividualsFrom(ind, mloc, mlikes);
780     //LS DEBUG
781     //PrintTipData(mloc, 0);
782     //LS TEST
783     //cout << "Sum:  " << endl << ToString(mlikes, 5) << endl;
784 }
785 
786 //------------------------------------------------------------------------------------
787 
UpdateDataLikesForIndividualsFrom(long ind,long mloc,DoubleVec1d & mlikes)788 bool RecTree::UpdateDataLikesForIndividualsFrom(long ind, long mloc, DoubleVec1d & mlikes)
789 {
790     if (static_cast<unsigned long>(ind) == m_individuals.size()) return true;
791     string lname = (*m_pMovingLocusVec)[mloc].GetName();
792     //LS NOTE: If this ASSERTs, we are mapping something with more than one marker.
793     // Might be OK, but should be checked.
794     assert ((*m_pMovingLocusVec)[mloc].GetNmarkers() == 1);
795     for (long marker = 0; marker < (*m_pMovingLocusVec)[mloc].GetNmarkers(); marker++)
796     {
797         bool newhaps = true;
798         for (m_individuals[ind].ChooseFirstHaplotypeFor(lname, marker);
799              newhaps;
800              newhaps = m_individuals[ind].ChooseNextHaplotypeFor(lname, marker))
801         {
802             ReassignDLsFor(lname, marker, ind);
803             if (UpdateDataLikesForIndividualsFrom(ind + 1, mloc, mlikes))
804             {
805                 //We're on the last one--update the data likelihood.
806                 DoubleVec1d newlikes = CalculateDataLikesForFloatingLocus(mloc);
807                 //LS TEST:
808                 //cout << ToString(newlikes, 5) << endl;
809                 mlikes = AddValsOfLogs(mlikes, newlikes);
810             }
811         }
812     }
813     return false;
814 }
815 
816 //------------------------------------------------------------------------------------
817 
CalculateDataLikesForFloatingLocus(long mloc)818 DoubleVec1d RecTree::CalculateDataLikesForFloatingLocus(long mloc)
819 {
820     m_timeList.SetAllUpdateDLs();
821 
822     // We always need these on because there's only enough storage for a single subtree,
823     // and the last subtree we used was at the other end of the range.
824     DoubleVec1d datalikes(m_totalSites);
825     rangeset allowedranges = (*m_pMovingLocusVec)[mloc].GetAllowedRange();
826     set<long> breakpoints = GetBreakpoints();
827     breakpoints = IgnoreDisallowedSubTrees(breakpoints, allowedranges);
828     set<long>::iterator left(breakpoints.begin()), right(breakpoints.begin());
829 
830     right++;
831 
832     for ( ; right != breakpoints.end(); left++, right++ )
833     {
834         (*m_pMovingLocusVec)[mloc].SetRegionalMapposition(*left);
835         double datalike = CalculateDataLikesForMovingLocus(mloc);
836         for (long siteindex = *left; siteindex<*right; siteindex++)
837         {
838             datalikes[siteindex] = datalike;
839         }
840     }
841 
842     datalikes = ZeroDisallowedSites(datalikes, allowedranges);
843     m_timeList.ClearUpdateDLs();
844 
845     //LS NOTE: We could theoretically not call ClearUpdateDLs if there was only one subtree,
846     // but only if the *next* call to this function also had the exact same subtree, which
847     // we can't enforce.  So, always call ClearUpdateDLs
848     return datalikes;
849 }
850 
851 //------------------------------------------------------------------------------------
852 
IgnoreDisallowedSubTrees(set<long> breakpoints,rangeset allowedranges)853 set<long> RecTree::IgnoreDisallowedSubTrees(set<long> breakpoints, rangeset allowedranges)
854 {
855     set<long> newbreakpoints;
856     newbreakpoints.insert(allowedranges.begin()->first);
857     newbreakpoints.insert(m_totalSites); // To close out the final range.
858     for (rangeset::iterator range = allowedranges.begin(); range != allowedranges.end(); range++)
859     {
860         for (long site = range->first; site < range->second; site++)
861         {
862             if (breakpoints.find(site) != breakpoints.end())
863             {
864                 newbreakpoints.insert(site);
865             }
866         }
867     }
868     return newbreakpoints;
869 }
870 
871 //------------------------------------------------------------------------------------
872 
ZeroDisallowedSites(DoubleVec1d datalikes,rangeset allowedranges)873 DoubleVec1d RecTree::ZeroDisallowedSites(DoubleVec1d datalikes, rangeset allowedranges)
874 {
875     DoubleVec1d newdatalikes(datalikes.size(), -DBL_BIG);
876     for (rangeset::iterator range = allowedranges.begin(); range != allowedranges.end(); range++)
877     {
878         assert(range->first >= 0);
879         assert(static_cast<unsigned long int>(range->second) <= datalikes.size());
880         for (long int site = range->first; site < range->second; site++)
881         {
882             newdatalikes[site] = datalikes[site];
883         }
884     }
885     return newdatalikes;
886 }
887 
888 //------------------------------------------------------------------------------------
889 
RandomizeMovingHaplotypes(long mlocus)890 void RecTree::RandomizeMovingHaplotypes(long mlocus)
891 {
892     //This function loops over all individuals in m_individuals and randomizes the haplotypes.
893     string lname = (*m_pMovingLocusVec)[mlocus].GetName();
894     long nmarkers = (*m_pMovingLocusVec)[mlocus].GetNmarkers();
895     for (long marker = 0; marker < nmarkers; marker++)
896     {
897         for (unsigned long ind = 0; ind < m_individuals.size(); ind++)
898         {
899             if (m_individuals[ind].ChooseRandomHaplotypesFor(lname, marker))
900             {
901                 //The chosen one is different from last time.
902                 ReassignDLsFor(lname, marker, ind);
903             }
904         }
905     }
906 }
907 
908 //------------------------------------------------------------------------------------
909 
SimulateDataIfNeeded()910 bool RecTree::SimulateDataIfNeeded()
911 {
912     if (Tree::SimulateDataIfNeeded())
913     {
914         //LS DEBUG: Current version uses same data for all moving loci.  If we want to simulate data individually,
915         // uncomment the following lines and replace all the subsequent loops over lnums to locus.whatever calls.
916         //
917         //  for (unsigned long mloc = 0; mloc < m_pMovingLocusVec->size(); mloc++)
918         //  {
919         //      Locus & locus = (*m_pMovingLocusVec)[mloc];
920         if ((*m_pMovingLocusVec).size() > 0)
921         {
922             Locus & locus = (*m_pMovingLocusVec)[0];
923             if (locus.GetShouldSimulate())
924             {
925                 for (size_t lnum = 0; lnum < (*m_pMovingLocusVec).size(); lnum++)
926                 {
927                     (*m_pMovingLocusVec)[lnum].ClearVariability();
928                     (*m_pMovingLocusVec)[lnum].SetNewMappositionIfMoving();
929                 }
930 
931                 // Check to see if this locus's site was already simulated.
932                 Locus * simlocus = NULL; //to prevent compiler warnings
933                 for (unsigned long loc = 0; loc < m_pLocusVec->size(); loc++)
934                 {
935                     Locus & oldlocus = (*m_pLocusVec)[loc];
936                     if (oldlocus.GetShouldSimulate())
937                     {
938                         if (oldlocus.SiteInLocus(locus.GetRegionalMapposition()))
939                         {
940                             simlocus = & oldlocus;
941                         }
942                     }
943                 }
944                 if (simlocus != NULL)
945                 {
946                     //If there was at least one site in the original that overlapped with the range on the moving
947                     // locus, we take that chances as the chance that we hit the original at all, but we want
948                     // to choose a variable site from that range.  This introduces an ascertainment bias that I'm
949                     // not exactly sure how to compensate for, or if it needs to be compensated for.  But anyway.
950                     long newsite = simlocus->ChooseVariableSiteFrom(locus.GetAllowedRange());
951                     if (newsite != FLAGLONG)
952                     {
953                         for (size_t lnum = 0; lnum < (*m_pMovingLocusVec).size(); lnum++)
954                         {
955                             (*m_pMovingLocusVec)[lnum].SetRegionalMapposition(newsite);
956                             (*m_pMovingLocusVec)[lnum].SetTrueSite(newsite);
957                             //LS TEST
958                             //simlocus->PrintOnesAndZeroesForVariableSites();
959                             (*m_pMovingLocusVec)[lnum].SetVariableRange(simlocus->CalculateVariableRange());
960                         }
961                         //LS DEBUG:  Not really for release.
962                         //simlocus->CalculateDisEqFor(newsite);
963                         //If there were no variable sites in the original, we keep the originally chosen site.
964                         // Our trait isn't going to be variable, but them's the breaks.
965                     }
966                     for (size_t lnum = 0; lnum < (*m_pMovingLocusVec).size(); lnum++)
967                     {
968                         (*m_pMovingLocusVec)[lnum].CopyDataFrom(*simlocus, *this);
969                     }
970                     if (locus.IsNighInvariant())
971                     {
972                         registry.GetRunReport().ReportNormal("All simulated data was nigh invariant for the"
973                                                              " source segment, giving us nigh invariant data"
974                                                              " for the simulated segment "
975                                                              + locus.GetName() + " as well.");
976                     }
977                     //LS DEBUG SIM:  This would delete information again, for simulations where
978                     // the data exists but you never sequenced it.
979                     // simlocus->RandomizeHalf(*this);
980                 }
981                 else
982                 {
983                     locus.SimulateData(*this, m_totalSites);
984                     // Now copy this data into any other moving loci.
985                     long newsite = locus.GetRegionalMapposition();
986                     for (size_t lnum = 1; lnum < (*m_pMovingLocusVec).size(); lnum++)
987                     {
988                         (*m_pMovingLocusVec)[lnum].SetRegionalMapposition(newsite);
989                         (*m_pMovingLocusVec)[lnum].SetTrueSite(newsite);
990                         (*m_pMovingLocusVec)[lnum].CopyDataFrom(locus, *this);
991                     }
992                 }
993                 for (size_t lnum = 0; lnum < (*m_pMovingLocusVec).size(); lnum++)
994                 {
995                     (*m_pMovingLocusVec)[lnum].MakePhenotypesFor(m_individuals);
996                 }
997             }
998         }
999         //Redo the aliases, in case we randomized the data in the original locus.
1000         SetupAliases(*m_pLocusVec);
1001         return true;
1002     }
1003     return false;
1004 }
1005 
1006 //------------------------------------------------------------------------------------
1007 
NumberOfRecombinations()1008 long RecTree::NumberOfRecombinations()
1009 {
1010     return (m_timeList.HowMany(btypeRec) / 2);
1011 }
1012 
1013 //------------------------------------------------------------------------------------
1014 
SetTargetLinksFrom(const BranchBuffer & brbuffer)1015 void RecTree::SetTargetLinksFrom(const BranchBuffer & brbuffer)
1016 {
1017     m_numTargetLinks_Tr = 0;
1018 
1019     vector<Branch_ptr> branches(brbuffer.ExtractConstBranches());
1020     vector<Branch_ptr>::const_iterator branch;
1021     for(branch = branches.begin(); branch != branches.end(); ++branch)
1022     {
1023         m_numTargetLinks_Tr += (*branch)->m_rangePtr->NumTargetLinks_Rg();
1024     }
1025 }
1026 
1027 //------------------------------------------------------------------------------------
1028 // JDEBUG--this is a Debugging function.
1029 
SetNewTargetLinksFrom(const BranchBuffer & brbuffer)1030 void RecTree::SetNewTargetLinksFrom(const BranchBuffer & brbuffer)
1031 {
1032     m_numNewTargetLinks_Tr = 0;
1033 
1034     vector<Branch_ptr> branches(brbuffer.ExtractConstBranches());
1035     vector<Branch_ptr>::const_iterator branch;
1036     for(branch = branches.begin(); branch != branches.end(); ++branch)
1037     {
1038         m_numNewTargetLinks_Tr += (*branch)->m_rangePtr->NumNewTargetLinks_Rg();
1039     }
1040 }
1041 
1042 //------------------------------------------------------------------------------------
1043 // Debugging function.
1044 
PrintTipData(long mloc,long marker)1045 void RecTree::PrintTipData(long mloc, long marker)
1046 {
1047     cout << endl << "Tip data:" << endl;
1048     for (Branchconstiter tip = m_timeList.FirstTip(); tip != m_timeList.FirstBody(); tip++)
1049     {
1050         cout << " " << (*m_pMovingLocusVec)[mloc].GetDataModel()->CellToData((*tip)->GetDLCell(mloc, marker, true), marker);
1051     }
1052     cout << endl;
1053 }
1054 
1055 //------------------------------------------------------------------------------------
1056 // Debugging function.
1057 
PrintRangeSetCount(const rangesetcount & rsc)1058 void RecTree::PrintRangeSetCount(const rangesetcount & rsc)
1059 {
1060     for (RSCcIter rsci = rsc.begin(); rsci != rsc.end(); rsci++)
1061     {
1062         cout << (*rsci).first << ": " << ToString((*rsci).second) << endl;
1063     }
1064 }
1065 
1066 //------------------------------------------------------------------------------------
1067 // Debugging function.
1068 
RemoveEmpty(const rangesetcount & rsc)1069 rangesetcount RecTree::RemoveEmpty(const rangesetcount & rsc)
1070 {
1071     rangesetcount result;
1072     for (RSCcIter rsci = rsc.begin(); rsci != rsc.end(); rsci++)
1073     {
1074         if (!(*rsci).second.empty())
1075         {
1076             result.insert(make_pair((*rsci).first, (*rsci).second));
1077         }
1078     }
1079     return result;
1080 }
1081 
1082 //____________________________________________________________________________________
1083