1 // $Id: rectree.cpp,v 1.77 2012/02/16 19:10:59 bobgian Exp $
2
3 /*
4 Copyright 2002 Peter Beerli, Mary Kuhner, Jon Yamato and Joseph Felsenstein
5
6 This software is distributed free of charge for non-commercial use
7 and is copyrighted. Of course, we do not guarantee that the software
8 works, and are not responsible for any damage you may cause or have.
9 */
10
11 #include <cassert>
12
13 #include "local_build.h"
14
15 #include "errhandling.h" // Can throw overrun_error.
16 #include "force.h" // For TimeSize object.
17 #include "range.h"
18 #include "runreport.h"
19 #include "tree.h"
20 #include "mathx.h"
21 #include "branchbuffer.h" // Used in SetTargetLinksFrom for BranchBuffer::ExtractConstBranches.
22 #include "fc_status.h" // Used in Prune() to determine what recombinations can be removed.
23
24 #ifdef DMALLOC_FUNC_CHECK
25 #include "/usr/local/include/dmalloc.h"
26 #endif
27
28 using namespace std;
29
30 // Note that some functions/variables defined here have "_Tr" (for "Tree") as a suffix in their name.
31 // This is to distinguish them from functions/variables in class Range (or RecRange) of the same name, which use "_Rg".
32
33 //------------------------------------------------------------------------------------
34
35 typedef boost::shared_ptr<RBranch> RBranch_ptr;
36
37 //------------------------------------------------------------------------------------
38
RecTree()39 RecTree::RecTree()
40 : Tree(),
41 m_pMovingLocusVec(),
42 m_protoMovingCells()
43 {
44 m_numTargetLinks_Tr = 0;
45 m_numNewTargetLinks_Tr = 0;
46 }
47
48 //------------------------------------------------------------------------------------
49
RecTree(const RecTree & tree,bool makestump)50 RecTree::RecTree(const RecTree & tree, bool makestump)
51 : Tree(tree, makestump),
52 m_pMovingLocusVec(tree.m_pMovingLocusVec),
53 m_protoMovingCells(tree.m_protoMovingCells)
54
55 {
56 m_numTargetLinks_Tr = 0;
57 m_numNewTargetLinks_Tr = 0;
58 }
59
60 //------------------------------------------------------------------------------------
61
Clone() const62 Tree * RecTree::Clone() const
63 {
64 RecTree * tree = new RecTree(*this, false);
65 return tree;
66 } // RecTree::Clone
67
68 //------------------------------------------------------------------------------------
69
MakeStump() const70 Tree * RecTree::MakeStump() const
71 {
72 RecTree * tree = new RecTree(*this, true);
73 return tree;
74 }
75
76 //------------------------------------------------------------------------------------
77
Clear()78 void RecTree::Clear()
79 {
80 Tree::Clear();
81 m_numTargetLinks_Tr = 0;
82 m_numNewTargetLinks_Tr = 0;
83 }
84
85 //------------------------------------------------------------------------------------
86
CopyTips(const Tree * tree)87 void RecTree::CopyTips(const Tree * tree)
88 {
89 Tree::CopyTips(tree);
90 m_numTargetLinks_Tr = 0;
91 m_numNewTargetLinks_Tr = 0;
92 for(vector<Locus>::const_iterator locus = m_pMovingLocusVec->begin(); locus != m_pMovingLocusVec->end(); ++locus)
93 {
94 m_aliases.push_back(locus->GetDLCalc()->RecalculateAliases(*this, *locus));
95 }
96 }
97
98 //------------------------------------------------------------------------------------
99
CopyBody(const Tree * tree)100 void RecTree::CopyBody(const Tree * tree)
101 {
102 Tree::CopyBody(tree);
103 m_numTargetLinks_Tr = 0;
104 m_numNewTargetLinks_Tr = 0;
105 }
106
107 //------------------------------------------------------------------------------------
108
CopyPartialBody(const Tree * tree)109 void RecTree::CopyPartialBody(const Tree * tree)
110 {
111 Tree::CopyPartialBody(tree);
112 m_numTargetLinks_Tr = 0;
113 m_numNewTargetLinks_Tr = 0;
114 }
115
116 //------------------------------------------------------------------------------------
117
Break(Branch_ptr pBranch)118 void RecTree::Break(Branch_ptr pBranch)
119 {
120 Branch_ptr pParent = pBranch->Parent(0);
121
122 if (pParent->CanRemove(pBranch))
123 {
124 Break(pParent);
125 m_timeList.Remove(pParent);
126
127 pParent = pBranch->Parent(1);
128 if (pParent)
129 {
130 Break(pParent);
131 m_timeList.Remove(pParent);
132 }
133 }
134 }
135
136 //------------------------------------------------------------------------------------
137
CollectMovingCells()138 const vector<LocusCell> & RecTree::CollectMovingCells()
139 {
140 if (m_protoMovingCells.empty())
141 {
142 unsigned long i;
143 for (i = 0; i < m_pMovingLocusVec->size(); ++i)
144 {
145 m_protoMovingCells.push_back((*m_pMovingLocusVec)[i].GetProtoCell());
146 }
147 }
148 return m_protoMovingCells;
149 } // CollectCells
150
151 //------------------------------------------------------------------------------------
152
ActivateTips(Tree * othertree)153 vector<Branch_ptr> RecTree::ActivateTips(Tree * othertree)
154 {
155 vector<Branch_ptr> tips = Tree::ActivateTips(othertree);
156 m_numTargetLinks_Tr = m_timeList.GetNTips() * (m_totalSites - 1); // -1 because there is no link after last site.
157 m_numNewTargetLinks_Tr = 0;
158 return tips;
159 }
160
161 //------------------------------------------------------------------------------------
162
ActivateBranch(Tree * othertree)163 Branch_ptr RecTree::ActivateBranch(Tree * othertree)
164 {
165 Branch_ptr pActive = Tree::ActivateBranch(othertree);
166
167 m_numTargetLinks_Tr += pActive->m_rangePtr->NumTargetLinks_Rg();
168 return pActive;
169 }
170
171 //------------------------------------------------------------------------------------
172
ActivateRoot(FC_Status & fcstatus)173 Branch_ptr RecTree::ActivateRoot(FC_Status & fcstatus)
174 {
175 Branch_ptr pRoot = Tree::ActivateRoot(fcstatus);
176 m_numTargetLinks_Tr += pRoot->m_rangePtr->NumTargetLinks_Rg();
177 m_numNewTargetLinks_Tr = 0;
178 return pRoot;
179 }
180
181 //------------------------------------------------------------------------------------
182
AttachBase(Branch_ptr newroot)183 void RecTree::AttachBase(Branch_ptr newroot)
184 {
185 Tree::AttachBase(newroot);
186 m_numTargetLinks_Tr = 0;
187 }
188
189 //------------------------------------------------------------------------------------
190
FirstInterval(double eventT)191 vector<Branch_ptr> RecTree::FirstInterval(double eventT)
192 {
193 Branch_ptr pBranch;
194
195 vector<Branch_ptr> newinactives = Tree::FirstInterval(eventT);
196
197 m_numNewTargetLinks_Tr = 0; // Initialize the count of open links.
198
199 return newinactives;
200 }
201
202 //------------------------------------------------------------------------------------
203
NextInterval(Branch_ptr pBranch)204 void RecTree::NextInterval(Branch_ptr pBranch)
205 {
206 Branch_ptr pParent = pBranch->Parent(0);
207
208 m_numNewTargetLinks_Tr -= pParent->Child(0)->m_rangePtr->NumNewTargetLinks_Rg();
209
210 // If the branch has a sibling, we must remove it as well (it's a coalescence event replaced by its parent).
211 if (pParent->Child(1))
212 {
213 m_numNewTargetLinks_Tr -= pParent->Child(1)->m_rangePtr->NumNewTargetLinks_Rg();
214 }
215 else if (pBranch->Parent(1)) // If the branch has a second parent, insert it.
216 {
217 m_numNewTargetLinks_Tr += pBranch->Parent(1)->m_rangePtr->NumNewTargetLinks_Rg();
218 }
219
220 m_numNewTargetLinks_Tr += pParent->m_rangePtr->NumNewTargetLinks_Rg();
221 }
222
223 //------------------------------------------------------------------------------------
224
Prune()225 void RecTree::Prune()
226 {
227 // This routine DEMANDS that all current-state Range information in the tree is correct! Thus:
228 assert(m_timeList.RevalidateAllRanges());
229
230 // This routine locates recombinations which are no longer relevant due to occuring at links that
231 // are no longer targettable. It removes such recombinations and everything logically dependent
232 // on them. It also makes all "old state" information in the Ranges equal to the current
233 // state, in preparation for the next cycle.
234
235 m_numNewTargetLinks_Tr = 0;
236 Branch_ptr pBranch, pChild;
237 // The original cut branch never needs to be updated or removed by Prune().
238 Branchiter start(m_timeList.NextNonTimeTiedBranch(m_firstInvalid));
239
240 FC_Status fcstatus;
241 vector<Branch_ptr> branches(FindBranchesImmediatelyTipwardOf(start));
242 vector<Branch_ptr>::iterator branch;
243
244 #if LAMARC_QA_FC_ON
245 for(branch = branches.begin(); branch != branches.end(); ++branch)
246 {
247 fcstatus.Increment_FC_Counts((*branch)->GetLiveSites_Br());
248 }
249 #endif
250
251 // Loop over pBranch starting from cut point.
252 Branchiter brit;
253 for (brit = start; brit != m_timeList.EndBranch(); /* increments inside loop */ )
254 {
255 pBranch = *brit;
256
257 #if LAMARC_QA_FC_ON
258 // If we're a fully functional, two-legged coalescence ...
259 if (pBranch->Event() == btypeCoal && !pBranch->m_marked)
260 {
261 rangeset coalesced_sites =
262 Intersection(pBranch->Child(0)->GetLiveSites_Br(), pBranch->Child(1)->GetLiveSites_Br());
263 fcstatus.Decrement_FC_Counts(coalesced_sites);
264 }
265 #endif
266
267 rangeset fcsites;
268 #if LAMARC_QA_FC_ON
269 fcsites = fcstatus.Coalesced_Sites();
270 #endif
271
272 // Update this branch's oldtargetsites info, which is no longer needed in this cycle,
273 // so that it will be right for next cycle. Done here because it requires FC information.
274 pBranch->ResetOldTargetSites_Br(fcsites);
275
276 if (pBranch->IsRemovableRecombinationLeg(fcsites))
277 {
278 #ifndef NDEBUG
279 // validation check:
280 Range * childrangeptr = pBranch->Child(0)->m_rangePtr;
281 rangeset childtargetsites(Union(childrangeptr->GetDiseaseSites_Rg(),
282 RemoveRangeFromRange(fcsites, childrangeptr->GetLiveSites_Rg())));
283 assert(TargetLinksFrom(childtargetsites) == childrangeptr->GetTargetLinks_Rg());
284 #endif
285 Branch_ptr pSpouse = pBranch->GetRecPartner();
286 Branch_ptr pRemove(pBranch), pRemain(pSpouse);
287 // Which one to remove completely?
288 if (pSpouse->IsRemovableRecombinationLeg(fcsites))
289 {
290 // They are both removable based on targetable sites.
291 if (pBranch->m_rangePtr->GetLiveSites_Rg().empty() && pSpouse->m_rangePtr->GetLiveSites_Rg().empty())
292 {
293 // ... and we can't break the tie on live sites, so we randomize.
294 bool removespouse = registry.GetRandom().Bool();
295 if (removespouse)
296 {
297 pRemove = pSpouse;
298 pRemain = pBranch;
299 }
300 }
301 else
302 {
303 // ... and we break the tie on live sites.
304 if (pSpouse->m_rangePtr->GetLiveSites_Rg().empty())
305 {
306 pRemove = pSpouse;
307 pRemain = pBranch;
308 }
309 }
310 }
311
312 // Unhook branch which will be completely removed.
313 Branch_ptr pChild = pRemove->Child(0);
314 if (pChild->Parent(0) == pRemove)
315 {
316 pChild->SetParent(0, pChild->Parent(1));
317 }
318 pChild->SetParent(1, Branch::NONBRANCH);
319
320 // Remove descending material below it.
321 Break(pRemove);
322
323 // Get an interator to the next still-valid interval.
324 brit = m_timeList.NextBody(brit);
325 if (*brit == pSpouse) brit = m_timeList.NextBody(brit);
326
327 // Splice out branch which remains in tree.
328 Branch_ptr pParent0 = pRemain->Parent(0);
329 pParent0->ReplaceChild(pRemain, pRemain->Child(0));
330 m_timeList.SetUpdateDLs(pParent0);
331 Branch_ptr pParent1 = pRemain->Parent(1);
332 if (pParent1)
333 {
334 pParent1->ReplaceChild(pRemain, pRemain->Child(0));
335 m_timeList.SetUpdateDLs(pParent1);
336 }
337
338 // Remove the dead stuff.
339 m_timeList.Remove(pRemove);
340 m_timeList.Remove(pRemain);
341 }
342 else
343 {
344 #if LAMARC_QA_FC_ON
345 pBranch->UpdateBranchRange(fcsites, true);
346 #else
347 pBranch->UpdateBranchRange(fcsites, false);
348 #endif
349 brit = m_timeList.NextBody(brit);
350 }
351 }
352
353 Tree::Prune();
354 // MDEBUG validating the ranges
355 assert(m_timeList.RevalidateAllRanges());
356
357 } // RecTree::Prune
358
359 //------------------------------------------------------------------------------------
360
ReassignDLsFor(string lname,long marker,long ind)361 void RecTree::ReassignDLsFor(string lname, long marker, long ind)
362 {
363 // Find out which locus we're dealing with. We have its name. If the locus in question
364 // is in m_pLocusVec, we call through to Tree::ReassignDLsFor.
365 long locus = FLAGLONG;
366 for (unsigned long lnum = 0; lnum < m_pMovingLocusVec->size(); lnum++)
367 {
368 if ((*m_pMovingLocusVec)[lnum].GetName() == lname)
369 {
370 locus = lnum;
371 }
372 }
373 if (locus == FLAGLONG)
374 {
375 // The locus must be in m_pLocusVec--call the base function.
376 return Tree::ReassignDLsFor(lname, marker, ind);
377 }
378
379 vector<Branch_ptr> haps = m_individuals[ind].GetAllTips();
380 vector<LocusCell> cells = m_individuals[ind].GetLocusCellsFor(lname, marker);
381
382 for (unsigned long tip = 0; tip < haps.size(); tip++)
383 {
384 Cell_ptr origcell = haps[tip]->GetDLCell(locus, markerCell, true);
385 Cell_ptr newcell = cells[tip][0];
386 origcell->SetSiteDLs(marker, newcell->GetSiteDLs(marker));
387 MarkForDLRecalc(haps[tip]);
388 }
389
390 // Don't recalculate aliases, since there are no aliases for the moving locus.
391 } // RecTree::ReassignDLsFor
392
393 //------------------------------------------------------------------------------------
394
CoalesceActive(double eventT,Branch_ptr active1,Branch_ptr active2,const rangeset & fcsites)395 Branch_ptr RecTree::CoalesceActive(double eventT, Branch_ptr active1, Branch_ptr active2, const rangeset & fcsites)
396 {
397 Branch_ptr pBranch = Tree::CoalesceActive(eventT, active1, active2, fcsites);
398
399 pBranch->SetMovingDLCells(CollectMovingCells());
400
401 m_numTargetLinks_Tr -= active1->m_rangePtr->NumTargetLinks_Rg();
402 m_numTargetLinks_Tr -= active2->m_rangePtr->NumTargetLinks_Rg();
403 m_numTargetLinks_Tr += pBranch->m_rangePtr->NumTargetLinks_Rg();
404
405 return pBranch;
406 }
407
408 //------------------------------------------------------------------------------------
409
CoalesceInactive(double eventT,Branch_ptr active,Branch_ptr inactive,const rangeset & fcsites)410 Branch_ptr RecTree::CoalesceInactive(double eventT, Branch_ptr active, Branch_ptr inactive, const rangeset & fcsites)
411 {
412 Branch_ptr pBranch = Tree::CoalesceInactive(eventT, active, inactive, fcsites);
413
414 pBranch->SetMovingDLCells(CollectMovingCells());
415
416 m_numNewTargetLinks_Tr -= inactive->m_rangePtr->NumNewTargetLinks_Rg();
417 m_numTargetLinks_Tr -= active->m_rangePtr->NumTargetLinks_Rg();
418 m_numNewTargetLinks_Tr += pBranch->m_rangePtr->NumNewTargetLinks_Rg();
419 return pBranch;
420 }
421
422 //------------------------------------------------------------------------------------
423
Migrate(double eventT,long topop,long maxEvents,Branch_ptr active)424 Branch_ptr RecTree::Migrate(double eventT, long topop, long maxEvents, Branch_ptr active)
425 {
426 Branch_ptr pBranch = Tree::Migrate(eventT, topop, maxEvents, active);
427
428 // pBranch->m_rangePtr->SetMRange(pBranch->Child(0)->m_rangePtr); // PRUNE?
429 // A migration does not change the active or newly active links.
430 return pBranch;
431 }
432
433 //------------------------------------------------------------------------------------
434
DiseaseMutate(double eventT,long endstatus,long maxEvents,Branch_ptr active)435 Branch_ptr RecTree::DiseaseMutate(double eventT, long endstatus, long maxEvents, Branch_ptr active)
436 {
437 Branch_ptr pBranch = Tree::DiseaseMutate(eventT, endstatus, maxEvents, active);
438
439 // pBranch->m_rangePtr->SetMRange(pBranch->Child(0)->m_rangePtr); // PRUNE?
440 // A disease mutation does not change the active or newly active links.
441 return pBranch;
442 }
443
444 //------------------------------------------------------------------------------------
445
RecombineActive(double eventT,long maxEvents,FPartMap fparts,Branch_ptr pActive,long rec,const rangeset & fcsites)446 branchpair RecTree::RecombineActive(double eventT, long maxEvents, FPartMap fparts,
447 Branch_ptr pActive, long rec, const rangeset & fcsites)
448 {
449 if (NumberOfRecombinations() >= maxEvents)
450 {
451 rec_overrun e;
452 throw e;
453 }
454
455 // Create the left parent of the active branch, saving partition handling until end.
456
457 bool newbranchisinactive(false);
458 rangeset transmittedsites;
459 rangepair transmit(0L, rec + 1);
460 transmittedsites = AddPairToRange(transmit, transmittedsites);
461 RBranch_ptr pParentA = RBranch_ptr(new RBranch(pActive->m_rangePtr, newbranchisinactive, transmittedsites, fcsites));
462 pParentA->m_eventTime = eventT; // Set the event time.
463 pParentA->SetChild(0, pActive);
464 pActive->SetParent(0, pParentA);
465
466 // Create the right parent of the active branch, saving partition handling until end.
467 #if 0 // JDEBUG, apparently can't do this in stl
468 transmittedsites.begin()->first = rec + 1;
469 transmittedsites.begin()->second = m_totalSites;
470 #endif
471 // Can we just change transmit?
472 rangeset transmittedsites2;
473 rangepair transmit2(rec + 1, m_totalSites);
474 transmittedsites2 = AddPairToRange(transmit2, transmittedsites2);
475 RBranch_ptr pParentB = RBranch_ptr(new RBranch(pActive->m_rangePtr, newbranchisinactive, transmittedsites2, fcsites));
476 pParentB->m_eventTime = eventT; // Set the event time.
477 pParentB->SetChild(0, pActive);
478 pActive->SetParent(1, pParentB);
479
480 // Now deal with partitions.
481 if (fparts.empty())
482 {
483 pParentA->CopyPartitionsFrom(pActive);
484 pParentB->CopyPartitionsFrom(pActive);
485 }
486 else
487 {
488 pParentA->RecCopyPartitionsFrom(pActive, fparts, true);
489 pParentB->RecCopyPartitionsFrom(pActive, fparts, false);
490 }
491
492 m_numTargetLinks_Tr -= pActive->m_rangePtr->NumTargetLinks_Rg();
493 m_numTargetLinks_Tr += pParentA->m_rangePtr->NumTargetLinks_Rg();
494 m_numTargetLinks_Tr += pParentB->m_rangePtr->NumTargetLinks_Rg();
495
496 m_timeList.Collate(pParentA, pParentB);
497 return make_pair(pParentA, pParentB);
498 }
499
500 //------------------------------------------------------------------------------------
501
RecombineInactive(double eventT,long maxEvents,FPartMap fparts,Branch_ptr pBranch,long rec,const rangeset & fcsites)502 branchpair RecTree::RecombineInactive(double eventT, long maxEvents, FPartMap fparts,
503 Branch_ptr pBranch, long rec, const rangeset & fcsites)
504 {
505 if (NumberOfRecombinations() >= maxEvents)
506 {
507 rec_overrun e;
508 throw e;
509 }
510
511 // This is the branch already in the tree.
512 bool inactive_is_low = pBranch->m_rangePtr->AreLowSitesOnInactiveBranch_Rg(rec);
513
514 // All this might eventually be one constructor call.
515 bool newbranchisinactive(true);
516 rangepair transmit;
517 if (inactive_is_low)
518 {
519 transmit.first = 0L;
520 transmit.second = rec + 1;
521 }
522 else
523 {
524 transmit.first = rec + 1;
525 transmit.second = m_totalSites;
526 }
527 rangeset transmittedsitesInac;
528 transmittedsitesInac = AddPairToRange(transmit, transmittedsitesInac);
529 RBranch_ptr pParentInac = RBranch_ptr(new RBranch(pBranch->m_rangePtr, newbranchisinactive, transmittedsitesInac, fcsites));
530 pParentInac->m_eventTime = eventT;
531 pParentInac->CopyPartitionsFrom(pBranch);
532
533 pParentInac->SetChild(0, pBranch);
534 pBranch->Parent(0)->ReplaceChild(pBranch, pParentInac);
535 pBranch->SetParent(0, pParentInac);
536 if (pBranch->Parent(1))
537 {
538 pBranch->Parent(1)->ReplaceChild(pBranch, pParentInac);
539 pBranch->SetParent(1, Branch::NONBRANCH);
540 }
541
542 // This is the new, active branch.
543 newbranchisinactive = false;
544 if (!inactive_is_low)
545 {
546 transmit.first = 0L;
547 transmit.second = rec + 1;
548 }
549 else
550 {
551 transmit.first = rec + 1;
552 transmit.second = m_totalSites;
553 }
554 rangeset transmittedsitesAc;
555 transmittedsitesAc = AddPairToRange(transmit, transmittedsitesAc);
556 assert(Intersection(transmittedsitesAc, pBranch->m_rangePtr->GetOldTargetSites_Rg()).empty());
557 RBranch_ptr pParentAc = RBranch_ptr(new RBranch(pBranch->m_rangePtr, newbranchisinactive, transmittedsitesAc, fcsites));
558 pParentAc->m_eventTime = eventT;
559
560 pParentAc->SetChild(0, pBranch);
561 pBranch->SetParent(1, pParentAc);
562
563 if (fparts.empty())
564 {
565 pParentAc->CopyPartitionsFrom(pBranch);
566 }
567 else
568 {
569 pParentAc->RecCopyPartitionsFrom(pBranch, fparts, !inactive_is_low);
570 }
571
572 // Now put both branches in place
573 m_timeList.Collate(pParentInac, pParentAc);
574
575 m_numNewTargetLinks_Tr -= pBranch->m_rangePtr->NumNewTargetLinks_Rg();
576 m_numNewTargetLinks_Tr += pParentInac->m_rangePtr->NumNewTargetLinks_Rg();
577 m_numTargetLinks_Tr += pParentAc->m_rangePtr->NumTargetLinks_Rg();
578
579 return make_pair(pParentInac, pParentAc);
580 }
581
582 //------------------------------------------------------------------------------------
583
GetLocusSubtrees(rangepair span) const584 rangevector RecTree::GetLocusSubtrees(rangepair span) const
585 {
586 // Get recombination breakpoints
587 set<long> breakpt(GetBreakpoints());
588
589 // Push rangepairs representing each breakpoint into subtree vector. The (pt + 1) avoids creating a rangepair
590 // starting at the last site. The strange iterator maneuver is because (pt + 1) is not allowed in g++. We do
591 // not risk an empty set; we know it contains at least 2 elements.
592
593 rangevector subtrees;
594 set<long>::const_iterator pt = breakpt.begin();
595 set<long>::const_iterator nextpt = pt;
596 ++nextpt;
597 long begin = span.first;
598 long end = span.second;
599
600 for ( ; nextpt != breakpt.end(); ++pt, ++nextpt)
601 {
602 // No overlap so we don't add it.
603 if (*nextpt <= begin || *pt >= end) continue;
604 // Some overlap, but ends may need adjustment.
605 long first = *pt;
606 long last = *nextpt;
607 if (first < begin) first = begin;
608 if (last > end) last = end;
609 subtrees.push_back(rangepair(first, last));
610 }
611 return subtrees;
612 } // GetLocusSubtrees
613
614 //------------------------------------------------------------------------------------
615
SetMovingLocusVec(vector<Locus> * loc)616 void RecTree::SetMovingLocusVec(vector<Locus> * loc)
617 {
618 m_pMovingLocusVec = loc;
619 // We don't have to worry about the range--the moving loci move around within the range of the fixed loci.
620 } // SetMovingLocusVec
621
622 //------------------------------------------------------------------------------------
623
SetMovingMapposition(long mloc,long site)624 void RecTree::SetMovingMapposition(long mloc, long site)
625 {
626 assert(mloc < static_cast<long>(m_pMovingLocusVec->size()));
627 (*m_pMovingLocusVec)[mloc].SetRegionalMapposition(site);
628 }
629
630 //------------------------------------------------------------------------------------
631
CreateTip(const TipData & tipdata,const vector<LocusCell> & cells,const vector<LocusCell> & movingcells,const rangeset & diseasesites)632 TBranch_ptr RecTree::CreateTip(const TipData & tipdata, const vector<LocusCell> & cells,
633 const vector<LocusCell> & movingcells, const rangeset & diseasesites)
634 {
635 TBranch_ptr pTip = m_timeList.CreateTip(tipdata, cells, movingcells, m_totalSites, diseasesites);
636 return pTip;
637 }
638
639 //------------------------------------------------------------------------------------
640
CreateTip(const TipData & tipdata,const vector<LocusCell> & cells,const vector<LocusCell> & movingcells,const rangeset & diseasesites,const vector<Locus> & loci)641 TBranch_ptr RecTree::CreateTip(const TipData & tipdata, const vector<LocusCell> & cells,
642 const vector<LocusCell> & movingcells, const rangeset & diseasesites,
643 const vector<Locus> & loci)
644 {
645 TBranch_ptr pTip = m_timeList.CreateTip(tipdata, cells, movingcells, m_totalSites, diseasesites, loci);
646 return pTip;
647 }
648
649 //------------------------------------------------------------------------------------
650
DoesThisLocusJump(long mloc) const651 bool RecTree::DoesThisLocusJump(long mloc) const
652 {
653 assert(mloc < static_cast<long>(m_pMovingLocusVec->size()));
654 return ((*m_pMovingLocusVec)[mloc].GetAnalysisType() == mloc_mapjump);
655 }
656
657 //------------------------------------------------------------------------------------
658
AnyRelativeHaplotypes() const659 bool RecTree::AnyRelativeHaplotypes() const
660 {
661 for (IndVec::const_iterator ind = m_individuals.begin(); ind != m_individuals.end(); ind++)
662 {
663 if (ind->MultipleTraitHaplotypes()) return true;
664 }
665 return false;
666 }
667
668 //------------------------------------------------------------------------------------
669
CalculateDataLikes()670 void RecTree::CalculateDataLikes()
671 {
672 m_overallDL = CalculateDataLikesForFixedLoci();
673 //The base function accumulates a likelihood into m_overallDL.
674 for (unsigned long loc = 0; loc < m_pMovingLocusVec->size(); ++loc)
675 {
676 if ((*m_pMovingLocusVec)[loc].GetAnalysisType() == mloc_mapjump)
677 {
678 m_overallDL += CalculateDataLikesForMovingLocus(loc);
679 }
680 }
681 m_timeList.ClearUpdateDLs(); // Reset the updating flags.
682 }
683
684 //------------------------------------------------------------------------------------
685
CalculateDataLikesForMovingLocus(long loc)686 double RecTree::CalculateDataLikesForMovingLocus(long loc)
687 {
688 double likelihood;
689
690 const Locus & locus = (*m_pMovingLocusVec)[loc];
691 try // Check for need to switch to normalization.
692 {
693 likelihood = locus.CalculateDataLikelihood(*this, true);
694 }
695
696 catch (datalikenorm_error & ex) // Normalization is set by thrower.
697 {
698 m_timeList.SetAllUpdateDLs(); // All subsequent loci will recalculate the entire tree.
699 RunReport & runreport = registry.GetRunReport();
700 runreport.ReportChat("\n", 0);
701 runreport.ReportChat("Subtree of likelihood 0.0 found: Turning on normalization and re-calculating.");
702
703 likelihood = locus.CalculateDataLikelihood(*this, true);
704 }
705
706 return likelihood;
707 } // CalculateDataLikesForMovingLocus
708
709 //------------------------------------------------------------------------------------
710
GetBreakpoints() const711 set<long> RecTree::GetBreakpoints() const
712 {
713 // Create a (sorted) set containing all recombination breakpoints.
714 set<long> breakpt;
715 breakpt.insert(0);
716 breakpt.insert(m_totalSites);
717
718 Branchconstiter brit;
719 long site;
720 for (brit = m_timeList.FirstBody(); brit != m_timeList.EndBranch(); brit = m_timeList.NextBody(brit))
721 {
722 site = (*brit)->GetRecSite_Br();
723 if (site != FLAGLONG) // FLAGLONG means there is no recombination here
724 breakpt.insert(site + 1); // inserting site + 1 because of open interval conventions.
725 }
726
727 return breakpt;
728 } // GetBreakpoints
729
730 //------------------------------------------------------------------------------------
731
GetMapSummary()732 DoubleVec2d RecTree::GetMapSummary()
733 {
734 DoubleVec2d retvec;
735 DoubleVec1d zeroes(m_totalSites, 0.0);
736 for (unsigned long mloc = 0; mloc < m_pMovingLocusVec->size(); mloc++)
737 {
738 DoubleVec1d mlikes = zeroes;
739 long currentsite;
740 switch((*m_pMovingLocusVec)[mloc].GetAnalysisType())
741 {
742 case mloc_mapjump:
743 //Find out where it is, make the likelihood at that value 1.
744 currentsite = (*m_pMovingLocusVec)[mloc].GetRegionalMapposition();
745 mlikes[currentsite] = 1.0;
746 break;
747 case mloc_mapfloat:
748 //Calculate the likelihoods over all subtrees, return vectors with it.
749 //mlikes = CalculateDataLikesWithRandomHaplotypesForFloatingLocus(mloc);
750 CalculateDataLikesForAllHaplotypesForFloatingLocus(mloc, mlikes);
751 break;
752 case mloc_data:
753 case mloc_partition:
754 assert(false);
755 throw implementation_error("We seem to want to collect mapping data for a segment without that type"
756 " of analysis. This is our fault; e-mail us at lamarc@gs.washington.edu");
757 }
758 retvec.push_back(mlikes);
759 }
760
761 return retvec;
762 }
763
764 //------------------------------------------------------------------------------------
765
CalculateDataLikesWithRandomHaplotypesForFloatingLocus(long mloc)766 DoubleVec1d RecTree::CalculateDataLikesWithRandomHaplotypesForFloatingLocus(long mloc)
767 {
768 RandomizeMovingHaplotypes(mloc);
769 return CalculateDataLikesForFloatingLocus(mloc);
770 }
771
772 //------------------------------------------------------------------------------------
773
CalculateDataLikesForAllHaplotypesForFloatingLocus(long mloc,DoubleVec1d & mlikes)774 void RecTree::CalculateDataLikesForAllHaplotypesForFloatingLocus(long mloc, DoubleVec1d & mlikes)
775 {
776 long ind = 0;
777 //We need to change the vector of mlikes from zeroes to EXPMINS.
778 mlikes = SafeLog(mlikes);
779 UpdateDataLikesForIndividualsFrom(ind, mloc, mlikes);
780 //LS DEBUG
781 //PrintTipData(mloc, 0);
782 //LS TEST
783 //cout << "Sum: " << endl << ToString(mlikes, 5) << endl;
784 }
785
786 //------------------------------------------------------------------------------------
787
UpdateDataLikesForIndividualsFrom(long ind,long mloc,DoubleVec1d & mlikes)788 bool RecTree::UpdateDataLikesForIndividualsFrom(long ind, long mloc, DoubleVec1d & mlikes)
789 {
790 if (static_cast<unsigned long>(ind) == m_individuals.size()) return true;
791 string lname = (*m_pMovingLocusVec)[mloc].GetName();
792 //LS NOTE: If this ASSERTs, we are mapping something with more than one marker.
793 // Might be OK, but should be checked.
794 assert ((*m_pMovingLocusVec)[mloc].GetNmarkers() == 1);
795 for (long marker = 0; marker < (*m_pMovingLocusVec)[mloc].GetNmarkers(); marker++)
796 {
797 bool newhaps = true;
798 for (m_individuals[ind].ChooseFirstHaplotypeFor(lname, marker);
799 newhaps;
800 newhaps = m_individuals[ind].ChooseNextHaplotypeFor(lname, marker))
801 {
802 ReassignDLsFor(lname, marker, ind);
803 if (UpdateDataLikesForIndividualsFrom(ind + 1, mloc, mlikes))
804 {
805 //We're on the last one--update the data likelihood.
806 DoubleVec1d newlikes = CalculateDataLikesForFloatingLocus(mloc);
807 //LS TEST:
808 //cout << ToString(newlikes, 5) << endl;
809 mlikes = AddValsOfLogs(mlikes, newlikes);
810 }
811 }
812 }
813 return false;
814 }
815
816 //------------------------------------------------------------------------------------
817
CalculateDataLikesForFloatingLocus(long mloc)818 DoubleVec1d RecTree::CalculateDataLikesForFloatingLocus(long mloc)
819 {
820 m_timeList.SetAllUpdateDLs();
821
822 // We always need these on because there's only enough storage for a single subtree,
823 // and the last subtree we used was at the other end of the range.
824 DoubleVec1d datalikes(m_totalSites);
825 rangeset allowedranges = (*m_pMovingLocusVec)[mloc].GetAllowedRange();
826 set<long> breakpoints = GetBreakpoints();
827 breakpoints = IgnoreDisallowedSubTrees(breakpoints, allowedranges);
828 set<long>::iterator left(breakpoints.begin()), right(breakpoints.begin());
829
830 right++;
831
832 for ( ; right != breakpoints.end(); left++, right++ )
833 {
834 (*m_pMovingLocusVec)[mloc].SetRegionalMapposition(*left);
835 double datalike = CalculateDataLikesForMovingLocus(mloc);
836 for (long siteindex = *left; siteindex<*right; siteindex++)
837 {
838 datalikes[siteindex] = datalike;
839 }
840 }
841
842 datalikes = ZeroDisallowedSites(datalikes, allowedranges);
843 m_timeList.ClearUpdateDLs();
844
845 //LS NOTE: We could theoretically not call ClearUpdateDLs if there was only one subtree,
846 // but only if the *next* call to this function also had the exact same subtree, which
847 // we can't enforce. So, always call ClearUpdateDLs
848 return datalikes;
849 }
850
851 //------------------------------------------------------------------------------------
852
IgnoreDisallowedSubTrees(set<long> breakpoints,rangeset allowedranges)853 set<long> RecTree::IgnoreDisallowedSubTrees(set<long> breakpoints, rangeset allowedranges)
854 {
855 set<long> newbreakpoints;
856 newbreakpoints.insert(allowedranges.begin()->first);
857 newbreakpoints.insert(m_totalSites); // To close out the final range.
858 for (rangeset::iterator range = allowedranges.begin(); range != allowedranges.end(); range++)
859 {
860 for (long site = range->first; site < range->second; site++)
861 {
862 if (breakpoints.find(site) != breakpoints.end())
863 {
864 newbreakpoints.insert(site);
865 }
866 }
867 }
868 return newbreakpoints;
869 }
870
871 //------------------------------------------------------------------------------------
872
ZeroDisallowedSites(DoubleVec1d datalikes,rangeset allowedranges)873 DoubleVec1d RecTree::ZeroDisallowedSites(DoubleVec1d datalikes, rangeset allowedranges)
874 {
875 DoubleVec1d newdatalikes(datalikes.size(), -DBL_BIG);
876 for (rangeset::iterator range = allowedranges.begin(); range != allowedranges.end(); range++)
877 {
878 assert(range->first >= 0);
879 assert(static_cast<unsigned long int>(range->second) <= datalikes.size());
880 for (long int site = range->first; site < range->second; site++)
881 {
882 newdatalikes[site] = datalikes[site];
883 }
884 }
885 return newdatalikes;
886 }
887
888 //------------------------------------------------------------------------------------
889
RandomizeMovingHaplotypes(long mlocus)890 void RecTree::RandomizeMovingHaplotypes(long mlocus)
891 {
892 //This function loops over all individuals in m_individuals and randomizes the haplotypes.
893 string lname = (*m_pMovingLocusVec)[mlocus].GetName();
894 long nmarkers = (*m_pMovingLocusVec)[mlocus].GetNmarkers();
895 for (long marker = 0; marker < nmarkers; marker++)
896 {
897 for (unsigned long ind = 0; ind < m_individuals.size(); ind++)
898 {
899 if (m_individuals[ind].ChooseRandomHaplotypesFor(lname, marker))
900 {
901 //The chosen one is different from last time.
902 ReassignDLsFor(lname, marker, ind);
903 }
904 }
905 }
906 }
907
908 //------------------------------------------------------------------------------------
909
SimulateDataIfNeeded()910 bool RecTree::SimulateDataIfNeeded()
911 {
912 if (Tree::SimulateDataIfNeeded())
913 {
914 //LS DEBUG: Current version uses same data for all moving loci. If we want to simulate data individually,
915 // uncomment the following lines and replace all the subsequent loops over lnums to locus.whatever calls.
916 //
917 // for (unsigned long mloc = 0; mloc < m_pMovingLocusVec->size(); mloc++)
918 // {
919 // Locus & locus = (*m_pMovingLocusVec)[mloc];
920 if ((*m_pMovingLocusVec).size() > 0)
921 {
922 Locus & locus = (*m_pMovingLocusVec)[0];
923 if (locus.GetShouldSimulate())
924 {
925 for (size_t lnum = 0; lnum < (*m_pMovingLocusVec).size(); lnum++)
926 {
927 (*m_pMovingLocusVec)[lnum].ClearVariability();
928 (*m_pMovingLocusVec)[lnum].SetNewMappositionIfMoving();
929 }
930
931 // Check to see if this locus's site was already simulated.
932 Locus * simlocus = NULL; //to prevent compiler warnings
933 for (unsigned long loc = 0; loc < m_pLocusVec->size(); loc++)
934 {
935 Locus & oldlocus = (*m_pLocusVec)[loc];
936 if (oldlocus.GetShouldSimulate())
937 {
938 if (oldlocus.SiteInLocus(locus.GetRegionalMapposition()))
939 {
940 simlocus = & oldlocus;
941 }
942 }
943 }
944 if (simlocus != NULL)
945 {
946 //If there was at least one site in the original that overlapped with the range on the moving
947 // locus, we take that chances as the chance that we hit the original at all, but we want
948 // to choose a variable site from that range. This introduces an ascertainment bias that I'm
949 // not exactly sure how to compensate for, or if it needs to be compensated for. But anyway.
950 long newsite = simlocus->ChooseVariableSiteFrom(locus.GetAllowedRange());
951 if (newsite != FLAGLONG)
952 {
953 for (size_t lnum = 0; lnum < (*m_pMovingLocusVec).size(); lnum++)
954 {
955 (*m_pMovingLocusVec)[lnum].SetRegionalMapposition(newsite);
956 (*m_pMovingLocusVec)[lnum].SetTrueSite(newsite);
957 //LS TEST
958 //simlocus->PrintOnesAndZeroesForVariableSites();
959 (*m_pMovingLocusVec)[lnum].SetVariableRange(simlocus->CalculateVariableRange());
960 }
961 //LS DEBUG: Not really for release.
962 //simlocus->CalculateDisEqFor(newsite);
963 //If there were no variable sites in the original, we keep the originally chosen site.
964 // Our trait isn't going to be variable, but them's the breaks.
965 }
966 for (size_t lnum = 0; lnum < (*m_pMovingLocusVec).size(); lnum++)
967 {
968 (*m_pMovingLocusVec)[lnum].CopyDataFrom(*simlocus, *this);
969 }
970 if (locus.IsNighInvariant())
971 {
972 registry.GetRunReport().ReportNormal("All simulated data was nigh invariant for the"
973 " source segment, giving us nigh invariant data"
974 " for the simulated segment "
975 + locus.GetName() + " as well.");
976 }
977 //LS DEBUG SIM: This would delete information again, for simulations where
978 // the data exists but you never sequenced it.
979 // simlocus->RandomizeHalf(*this);
980 }
981 else
982 {
983 locus.SimulateData(*this, m_totalSites);
984 // Now copy this data into any other moving loci.
985 long newsite = locus.GetRegionalMapposition();
986 for (size_t lnum = 1; lnum < (*m_pMovingLocusVec).size(); lnum++)
987 {
988 (*m_pMovingLocusVec)[lnum].SetRegionalMapposition(newsite);
989 (*m_pMovingLocusVec)[lnum].SetTrueSite(newsite);
990 (*m_pMovingLocusVec)[lnum].CopyDataFrom(locus, *this);
991 }
992 }
993 for (size_t lnum = 0; lnum < (*m_pMovingLocusVec).size(); lnum++)
994 {
995 (*m_pMovingLocusVec)[lnum].MakePhenotypesFor(m_individuals);
996 }
997 }
998 }
999 //Redo the aliases, in case we randomized the data in the original locus.
1000 SetupAliases(*m_pLocusVec);
1001 return true;
1002 }
1003 return false;
1004 }
1005
1006 //------------------------------------------------------------------------------------
1007
NumberOfRecombinations()1008 long RecTree::NumberOfRecombinations()
1009 {
1010 return (m_timeList.HowMany(btypeRec) / 2);
1011 }
1012
1013 //------------------------------------------------------------------------------------
1014
SetTargetLinksFrom(const BranchBuffer & brbuffer)1015 void RecTree::SetTargetLinksFrom(const BranchBuffer & brbuffer)
1016 {
1017 m_numTargetLinks_Tr = 0;
1018
1019 vector<Branch_ptr> branches(brbuffer.ExtractConstBranches());
1020 vector<Branch_ptr>::const_iterator branch;
1021 for(branch = branches.begin(); branch != branches.end(); ++branch)
1022 {
1023 m_numTargetLinks_Tr += (*branch)->m_rangePtr->NumTargetLinks_Rg();
1024 }
1025 }
1026
1027 //------------------------------------------------------------------------------------
1028 // JDEBUG--this is a Debugging function.
1029
SetNewTargetLinksFrom(const BranchBuffer & brbuffer)1030 void RecTree::SetNewTargetLinksFrom(const BranchBuffer & brbuffer)
1031 {
1032 m_numNewTargetLinks_Tr = 0;
1033
1034 vector<Branch_ptr> branches(brbuffer.ExtractConstBranches());
1035 vector<Branch_ptr>::const_iterator branch;
1036 for(branch = branches.begin(); branch != branches.end(); ++branch)
1037 {
1038 m_numNewTargetLinks_Tr += (*branch)->m_rangePtr->NumNewTargetLinks_Rg();
1039 }
1040 }
1041
1042 //------------------------------------------------------------------------------------
1043 // Debugging function.
1044
PrintTipData(long mloc,long marker)1045 void RecTree::PrintTipData(long mloc, long marker)
1046 {
1047 cout << endl << "Tip data:" << endl;
1048 for (Branchconstiter tip = m_timeList.FirstTip(); tip != m_timeList.FirstBody(); tip++)
1049 {
1050 cout << " " << (*m_pMovingLocusVec)[mloc].GetDataModel()->CellToData((*tip)->GetDLCell(mloc, marker, true), marker);
1051 }
1052 cout << endl;
1053 }
1054
1055 //------------------------------------------------------------------------------------
1056 // Debugging function.
1057
PrintRangeSetCount(const rangesetcount & rsc)1058 void RecTree::PrintRangeSetCount(const rangesetcount & rsc)
1059 {
1060 for (RSCcIter rsci = rsc.begin(); rsci != rsc.end(); rsci++)
1061 {
1062 cout << (*rsci).first << ": " << ToString((*rsci).second) << endl;
1063 }
1064 }
1065
1066 //------------------------------------------------------------------------------------
1067 // Debugging function.
1068
RemoveEmpty(const rangesetcount & rsc)1069 rangesetcount RecTree::RemoveEmpty(const rangesetcount & rsc)
1070 {
1071 rangesetcount result;
1072 for (RSCcIter rsci = rsc.begin(); rsci != rsc.end(); rsci++)
1073 {
1074 if (!(*rsci).second.empty())
1075 {
1076 result.insert(make_pair((*rsci).first, (*rsci).second));
1077 }
1078 }
1079 return result;
1080 }
1081
1082 //____________________________________________________________________________________
1083