1 // $Id: tree.h,v 1.79 2011/08/18 16:57:57 jyamato Exp $ 2 3 /* 4 Copyright 2002 Peter Beerli, Mary Kuhner, Jon Yamato and Joseph Felsenstein 5 6 This software is distributed free of charge for non-commercial use 7 and is copyrighted. Of course, we do not guarantee that the software 8 works, and are not responsible for any damage you may cause or have. 9 */ 10 11 /******************************************************************* 12 Class Tree represents a genealogy (not necessarily a "tree" in the recombinant cases). It has two subtypes, 13 a PlainTree (no recombination) and a RecTree (with recombination) because the recombination machinery is too 14 expensive to carry if you don't need it. 15 16 Normally all Trees used during rearrangement come from either copying an existing tree 17 or copying the prototype tree in Registry. 18 19 DEBUG: This class is a monster. Anything to make it simpler would be good. 20 21 Written by Jim Sloan, heavily revised by Mary Kuhner 22 23 02/08/05 Pulled out locus specific stuff into LocusLike class -- Mary 24 2004/9/15 Killed the LocusLike class, merging into Locus 25 2005/3/1 Moved alias info, reluctantly, from DLCalc to Tree (because 26 DLCalc is a shared object and the alias is not shareable!) -- Mary 27 2006/02/13 Begun to add jointed stick 28 *******************************************************************/ 29 30 #ifndef TREE_H 31 #define TREE_H 32 33 #include <cmath> 34 #include <string> 35 #include <vector> 36 37 #include "timelist.h" // for TimeList member 38 #include "individual.h" // for IndVec member 39 #include "locus.h" 40 #include "vectorx.h" 41 #include "constants.h" 42 #include "definitions.h" 43 #include "rangex.h" 44 #include "locus.h" // for DataModel 45 46 // Note that some functions/variables defined here have "_Tr" (for "Tree") as a suffix in their name. 47 // This is to distinguish them from functions/variables in class Range (or RecRange) of the same name, which use "_Rg". 48 49 //------------------------------------------------------------------------------------ 50 51 // typedef also used by Event and Arranger 52 typedef std::pair<Branch_ptr, Branch_ptr> branchpair; 53 54 class TreeSummary; // return type of SummarizeTree() 55 class TipData; 56 class Random; 57 class ForceParameters; 58 class TimeManager; 59 60 //------------------------------------------------------------------------------------ 61 62 class Tree 63 { 64 private: 65 Tree(const Tree &); // undefined 66 Tree & operator=(const Tree &); // undefined 67 68 vector<LocusCell> m_protoCells; // cache of locuscells for all loci 69 70 vector<Branch_ptr> FindAllBranchesAtTime(double eventT); 71 72 protected: 73 Tree(); 74 Tree(const Tree & tree, bool makestump); 75 IndVec m_individuals; // associates tips from same individual 76 double m_overallDL; // data log-likelihood 77 LongVec2d m_aliases; // dim: loci x markers 78 Random * m_randomSource; // non-owning pointer 79 TimeList m_timeList; // all lineages 80 vector<Locus> * m_pLocusVec; // not owning. Also, not const because of simulation 81 long m_totalSites; // span of entire region, including gaps 82 TimeManager * m_timeManager; // our timemanager.... 83 bool m_hasSnpPanel; // the region that created this tree includes SNP panel data 84 85 // A "timelist" iterator to the the branch after the cut during rearrangement. Set by ActivateBranch(). 86 // Used during mid-rearrangement so that CopyPartialBody() & Rectree::Prune() can be done quicker. 87 // This member is not copied! 88 Branchiter m_firstInvalid; 89 90 // Protected rearrangement primitives. 91 virtual void Break(Branch_ptr pBranch) = 0; 92 void SetFirstInvalidFrom(Branchconstiter & target); SetFirstInvalid()93 void SetFirstInvalid() { m_firstInvalid = m_timeList.BeginBranch(); }; 94 void MarkForDLRecalc(Branch_ptr br); 95 96 // Protected Branch-management primitives. 97 const vector<LocusCell> & CollectCells(); // Get locuscells for all loci. 98 vector<Branch_ptr> GetTips(StringVec1d & names) const; 99 100 public: 101 // Creation and destruction. 102 virtual ~Tree(); 103 virtual Tree *Clone() const = 0; 104 virtual Tree *MakeStump() const = 0; 105 virtual void Clear(); 106 virtual void CopyTips(const Tree * tree); 107 virtual void CopyBody(const Tree * tree); 108 virtual void CopyStick(const Tree * tree); 109 virtual void CopyPartialBody(const Tree * tree); 110 virtual TBranch_ptr CreateTip(const TipData & tipdata, const vector<LocusCell> & cells, 111 const vector<LocusCell> & movingcells, const rangeset & diseasesites); 112 virtual TBranch_ptr CreateTip(const TipData & tipdata, const vector<LocusCell> & cells, 113 const vector<LocusCell> & movingcells, const rangeset & diseasesites, 114 const vector<Locus> & loci); 115 void SetTreeTimeManager(TimeManager * tm); 116 117 // Getters. GetTimeList()118 TimeList & GetTimeList() { return m_timeList; }; GetTimeList()119 const TimeList & GetTimeList() const { return m_timeList; }; 120 virtual rangevector GetLocusSubtrees(rangepair span) const; 121 Branch_ptr GetTip(const std::string & name) const; GetTimeManager()122 TimeManager * GetTimeManager() { return m_timeManager; }; GetTimeManager()123 const TimeManager * GetTimeManager() const { return m_timeManager; }; RootTime()124 double RootTime() const { return m_timeList.RootTime(); }; 125 bool NoPhaseUnknownSites() const; 126 long GetNsites() const; 127 128 bool GroomForGrowth(const DoubleVec1d & thetas, const DoubleVec1d & growths, double temperature); 129 bool GroomForLogisticSelection(const DoubleVec1d & thetas, 130 double s, // logistic selection coefficient or zero 131 double temperature); GetSnpPanelFlag()132 bool GetSnpPanelFlag() { return m_hasSnpPanel; }; 133 134 // Setters. 135 void SetIndividualsWithTips(const vector<Individual> & indvec); 136 void SetLocusVec(vector<Locus> * loc); SetSnpPanelFlag(bool flag)137 void SetSnpPanelFlag(bool flag) { m_hasSnpPanel = flag; }; 138 139 // Likelihood manipulation. 140 virtual void CalculateDataLikes() = 0; 141 virtual double CalculateDataLikesForFixedLoci(); GetDLValue()142 double GetDLValue() const { return m_overallDL; }; GetAliases(long loc)143 const LongVec1d & GetAliases(long loc) const { return m_aliases[loc]; }; 144 void SetupAliases(const std::vector<Locus> & loci); 145 146 // Rearrangement primitives. 147 virtual vector<Branch_ptr> ActivateTips(Tree * othertree); 148 virtual Branch_ptr ActivateBranch(Tree * othertree); 149 virtual Branch_ptr ActivateRoot(FC_Status & fcstatus); 150 151 // ChoosePreferentiallyTowardsRoot does not break the tree, but does set m_firstInvalid. 152 // to the interval containing the return value. So does ChooseFirstBranchInEpoch. 153 Branch_ptr ChoosePreferentiallyTowardsRoot(Tree * othertree); 154 Branch_ptr ChooseFirstBranchInEpoch(double targettime, Tree * othertree); 155 156 virtual void AttachBase(Branch_ptr newroot); 157 virtual vector<Branch_ptr> FindBranchesImmediatelyTipwardOf(Branchiter start); 158 vector<Branch_ptr> FindBranchesBetween(double startinterval, double stopinterval); 159 virtual vector<Branch_ptr> FirstInterval(double eventT); 160 virtual void NextInterval(Branch_ptr branch ); 161 virtual void Prune(); 162 void TrimStickToRoot(); 163 void SwapSiteDLs(); 164 virtual void PickNewSiteDLs(); 165 virtual void ReassignDLsFor(std::string lname, long marker, long ind); 166 void SetNewTimesFrom(Branchiter start, const DoubleVec1d & newtimes); 167 Branch_ptr TransitionEpoch(double eventT, long newpop, long maxevents, Branch_ptr pActive); 168 vector<Branch_ptr> FindBranchesStartingOnOpenInterval(double starttime, double endtime); 169 vector<Branch_ptr> FindEpochBranchesAt(double time); 170 vector<Branch_ptr> FindBranchesStartingRootwardOf(double time); 171 172 // Force-specific rearrangement primitives. 173 Branch_ptr Coalesce(Branch_ptr child1, Branch_ptr child2, double tevent, const rangeset & fcsites); 174 175 virtual Branch_ptr CoalesceActive(double eventT, Branch_ptr active1, 176 Branch_ptr active2, const rangeset & fcsites); 177 virtual Branch_ptr CoalesceInactive(double eventT, Branch_ptr active, 178 Branch_ptr inactive, const rangeset & fcsites); 179 virtual Branch_ptr Migrate(double eventT, long topop, long maxEvents, Branch_ptr active); 180 virtual Branch_ptr DiseaseMutate(double eventT, long endstatus, long maxEvents, Branch_ptr active); 181 182 // TreeSummaryFactory. 183 TreeSummary * SummarizeTree() const; 184 185 // Invariant checking. 186 bool IsValidTree() const; // check invariants (debugging function) 187 bool ConsistentWithParameters(const ForceParameters& fp) const; // debugging function 188 bool operator==(const Tree & src) const; // compare trees 189 bool operator!=(const Tree & src) const { return !(*this == src); }; 190 191 // TimeManager call throughs (we provide a public front for TimeManager). CopyStick() is also in this category. 192 void DestroyStick(); 193 void SetStickParams(const ForceParameters & fp); 194 bool UsingStick() const; 195 void ScoreStick(TreeSummary & treesum) const; 196 DoubleVec1d XpartThetasAtT(double time, const ForceParameters & fp) const; 197 DoubleVec1d PartitionThetasAtT(double time, force_type force, const ForceParameters & fp) const; 198 199 // TreeSizeArranger. SetTargetLinksFrom(const BranchBuffer & brbuffer)200 virtual void SetTargetLinksFrom(const BranchBuffer & brbuffer) { }; // no-op ClearTargetLinks()201 virtual void ClearTargetLinks() { }; // no-op 202 203 // Simulation. 204 virtual bool SimulateDataIfNeeded(); 205 virtual long NumberOfRecombinations() = 0; 206 207 // Debugging functions. 208 // MakeCoalescent strips all migration nodes from the tree and forces all remaining 209 // coalescences to their expectation times. DO NOT use with migration! MakeCoalescent(double theta)210 void MakeCoalescent(double theta) { m_timeList.MakeCoalescent(theta); }; 211 // 212 // DLCheck returns an error message with the first marker at which each branch pair differs; 213 // unmentioned branches don't differ. 214 void DLCheck(const Tree & other) const; 215 // 216 // Call through debugging code. 217 void PrintStickThetasToFile(std::ofstream & of) const; 218 void PrintStickFreqsToFile(std::ofstream & of) const; 219 void PrintStickFreqsToFileAtTime(std::ofstream & of, double time) const; 220 void PrintStickThetasToFileForJoint300(std::ofstream & of) const; 221 void PrintStickToFile(std::ofstream & of) const; 222 void PrintDirectionalMutationEventCountsToFile(std::ofstream & of) const; 223 void PrintTimeTilFirstEventToFile(std::ofstream & of) const; 224 void PrintTraitPhenotypeAtLastCoalescence(std::ofstream & of) const; 225 }; 226 227 //------------------------------------------------------------------------------------ 228 //------------------------------------------------------------------------------------ 229 230 class PlainTree : public Tree 231 { 232 public: 233 // Creation and destruction. PlainTree()234 PlainTree() : Tree() {}; PlainTree(const Tree & tree,bool makestump)235 PlainTree(const Tree & tree, bool makestump) : 236 Tree(tree, makestump) {}; ~PlainTree()237 virtual ~PlainTree() {}; 238 virtual Tree *Clone() const; 239 virtual Tree *MakeStump() const; 240 virtual void CalculateDataLikes(); 241 virtual void Break(Branch_ptr pBranch); NumberOfRecombinations()242 virtual long NumberOfRecombinations() { return 0L; }; 243 244 // Yes, everything else is the same as Tree. 245 }; 246 247 //------------------------------------------------------------------------------------ 248 //------------------------------------------------------------------------------------ 249 250 // To keep track of final coalescences: 251 typedef std::map<long, rangeset> rangesetcount; 252 typedef std::map<long, rangeset>::iterator RSCIter; 253 typedef std::map<long, rangeset>::const_iterator RSCcIter; 254 typedef std::list<std::pair<double, rangesetcount> > sitecountlist; 255 256 //------------------------------------------------------------------------------------ 257 258 class RecTree : public Tree 259 { 260 private: 261 long m_numTargetLinks_Tr; 262 long m_numNewTargetLinks_Tr; 263 std::set<long> GetBreakpoints() const; 264 265 protected: 266 vector<Locus> * m_pMovingLocusVec; // Not owning. But we can change 'em! 267 vector<LocusCell> m_protoMovingCells; 268 269 virtual void Break(Branch_ptr branch); 270 const vector<LocusCell> & CollectMovingCells(); 271 272 public: 273 // Creation and destruction. 274 RecTree(); 275 RecTree(const RecTree & tree, bool makestump); ~RecTree()276 ~RecTree() {}; 277 278 virtual Tree *Clone() const; 279 virtual Tree *MakeStump() const; 280 virtual void Clear(); 281 virtual void CopyTips(const Tree * tree); 282 virtual void CopyBody(const Tree * tree); 283 virtual void CopyPartialBody(const Tree * tree); 284 285 // Getters. 286 virtual rangevector GetLocusSubtrees(rangepair span) const; NumTargetLinks_Tr()287 long NumTargetLinks_Tr() const { return m_numTargetLinks_Tr; }; NumNewTargetLinks_Tr()288 long NumNewTargetLinks_Tr() const { return m_numNewTargetLinks_Tr; }; GetNumMovingLoci()289 unsigned long GetNumMovingLoci() { return m_pMovingLocusVec->size(); }; 290 virtual bool DoesThisLocusJump(long mloc) const; 291 virtual bool AnyRelativeHaplotypes() const; 292 293 // Setters. 294 void SetMovingLocusVec(vector<Locus> * locs); 295 void SetMovingMapposition(long mloc, long site); 296 297 virtual TBranch_ptr CreateTip(const TipData & tipdata, const vector<LocusCell> & cells, 298 const vector<LocusCell> & movingcells, const rangeset & diseasesites); 299 300 virtual TBranch_ptr CreateTip(const TipData & tipdata, const vector<LocusCell> & cells, 301 const vector<LocusCell> & movingcells, const rangeset & diseasesites, 302 const vector<Locus> & loci); 303 304 // Likelihood manipulation. 305 virtual void CalculateDataLikes(); 306 virtual double CalculateDataLikesForMovingLocus(long mloc); 307 virtual DoubleVec1d CalculateDataLikesForFloatingLocus(long mloc); 308 virtual DoubleVec1d CalculateDataLikesWithRandomHaplotypesForFloatingLocus(long mloc); 309 virtual void CalculateDataLikesForAllHaplotypesForFloatingLocus(long mloc, DoubleVec1d & mlikes); 310 virtual bool UpdateDataLikesForIndividualsFrom(long ind, long mloc, DoubleVec1d & mlikes); 311 312 // Rearrangement primitives. 313 virtual vector<Branch_ptr> ActivateTips(Tree * othertree); 314 virtual Branch_ptr ActivateBranch(Tree * othertree); 315 virtual Branch_ptr ActivateRoot(FC_Status & fcstatus); 316 virtual void AttachBase(Branch_ptr newroot); 317 virtual vector<Branch_ptr> FirstInterval(double eventT); 318 virtual void NextInterval(Branch_ptr branch); 319 virtual void Prune(); 320 virtual void ReassignDLsFor(std::string lname, long marker, long ind); 321 322 // Force-specific rearrangement primitives. 323 virtual Branch_ptr CoalesceActive(double eventT, Branch_ptr active1, 324 Branch_ptr active2, const rangeset & fcsites); 325 virtual Branch_ptr CoalesceInactive(double eventT, Branch_ptr active, 326 Branch_ptr inactive, const rangeset & fcsites); 327 virtual Branch_ptr Migrate(double eventT, long topop, long maxEvents, Branch_ptr active); 328 virtual Branch_ptr DiseaseMutate (double eventT, long endstatus, long maxEvents, Branch_ptr active); 329 branchpair RecombineActive(double eventT, long maxEvents, FPartMap fparts, 330 Branch_ptr active, long recsite, const rangeset & fcsites); 331 branchpair RecombineInactive(double eventT, long maxEvents, FPartMap fparts, 332 Branch_ptr active, long recsite, const rangeset & fcsites); 333 334 // Map Summary. 335 DoubleVec2d GetMapSummary(); 336 std::set<long> IgnoreDisallowedSubTrees(std::set<long> breakpoints, rangeset allowedranges); 337 DoubleVec1d ZeroDisallowedSites(DoubleVec1d datalikes, rangeset allowedranges); 338 void RandomizeMovingHaplotypes(long mlocus); 339 340 // Simulation. 341 virtual bool SimulateDataIfNeeded(); 342 virtual long NumberOfRecombinations(); 343 344 // TreeSizeArranger. 345 virtual void SetTargetLinksFrom(const BranchBuffer & brbuffer); ClearTargetLinks()346 virtual void ClearTargetLinks() { m_numTargetLinks_Tr = 0; }; 347 348 // Debugging functions. 349 virtual void SetNewTargetLinksFrom(const BranchBuffer & brbuffer); 350 virtual void PrintTipData(long mloc, long marker); 351 virtual void PrintRangeSetCount(const rangesetcount & rsc); 352 virtual rangesetcount RemoveEmpty(const rangesetcount & rsc); 353 }; 354 355 #endif // TREE_H 356 357 //____________________________________________________________________________________ 358