1 // $Id: tree.h,v 1.79 2011/08/18 16:57:57 jyamato Exp $
2 
3 /*
4   Copyright 2002  Peter Beerli, Mary Kuhner, Jon Yamato and Joseph Felsenstein
5 
6   This software is distributed free of charge for non-commercial use
7   and is copyrighted.  Of course, we do not guarantee that the software
8   works, and are not responsible for any damage you may cause or have.
9 */
10 
11 /*******************************************************************
12  Class Tree represents a genealogy (not necessarily a "tree" in the recombinant cases).  It has two subtypes,
13  a PlainTree (no recombination) and a RecTree (with recombination) because the recombination machinery is too
14  expensive to carry if you don't need it.
15 
16  Normally all Trees used during rearrangement come from either copying an existing tree
17  or copying the prototype tree in Registry.
18 
19  DEBUG:  This class is a monster.  Anything to make it simpler would be good.
20 
21  Written by Jim Sloan, heavily revised by Mary Kuhner
22 
23  02/08/05 Pulled out locus specific stuff into LocusLike class -- Mary
24  2004/9/15 Killed the LocusLike class, merging into Locus
25  2005/3/1 Moved alias info, reluctantly, from DLCalc to Tree (because
26     DLCalc is a shared object and the alias is not shareable!) -- Mary
27  2006/02/13 Begun to add jointed stick
28 *******************************************************************/
29 
30 #ifndef TREE_H
31 #define TREE_H
32 
33 #include <cmath>
34 #include <string>
35 #include <vector>
36 
37 #include "timelist.h"                   // for TimeList member
38 #include "individual.h"                 // for IndVec member
39 #include "locus.h"
40 #include "vectorx.h"
41 #include "constants.h"
42 #include "definitions.h"
43 #include "rangex.h"
44 #include "locus.h"      // for DataModel
45 
46 // Note that some functions/variables defined here have "_Tr" (for "Tree") as a suffix in their name.
47 // This is to distinguish them from functions/variables in class Range (or RecRange) of the same name, which use "_Rg".
48 
49 //------------------------------------------------------------------------------------
50 
51 // typedef also used by Event and Arranger
52 typedef std::pair<Branch_ptr, Branch_ptr> branchpair;
53 
54 class TreeSummary;                      // return type of SummarizeTree()
55 class TipData;
56 class Random;
57 class ForceParameters;
58 class TimeManager;
59 
60 //------------------------------------------------------------------------------------
61 
62 class Tree
63 {
64   private:
65     Tree(const Tree &);                 // undefined
66     Tree & operator=(const Tree &);     // undefined
67 
68     vector<LocusCell> m_protoCells;     // cache of locuscells for all loci
69 
70     vector<Branch_ptr> FindAllBranchesAtTime(double eventT);
71 
72   protected:
73     Tree();
74     Tree(const Tree & tree, bool makestump);
75     IndVec          m_individuals;        // associates tips from same individual
76     double          m_overallDL;          // data log-likelihood
77     LongVec2d       m_aliases;            // dim: loci x markers
78     Random *        m_randomSource;       // non-owning pointer
79     TimeList        m_timeList;           // all lineages
80     vector<Locus> * m_pLocusVec;          // not owning.  Also, not const because of simulation
81     long            m_totalSites;         // span of entire region, including gaps
82     TimeManager *   m_timeManager;        // our timemanager....
83     bool            m_hasSnpPanel;        // the region that created this tree includes SNP panel data
84 
85     // A "timelist" iterator to the the branch after the cut during rearrangement. Set by ActivateBranch().
86     // Used during mid-rearrangement so that CopyPartialBody() & Rectree::Prune() can be done quicker.
87     // This member is not copied!
88     Branchiter    m_firstInvalid;
89 
90     // Protected rearrangement primitives.
91     virtual void    Break(Branch_ptr pBranch) = 0;
92     void    SetFirstInvalidFrom(Branchconstiter & target);
SetFirstInvalid()93     void    SetFirstInvalid() { m_firstInvalid = m_timeList.BeginBranch(); };
94     void    MarkForDLRecalc(Branch_ptr br);
95 
96     // Protected Branch-management primitives.
97     const vector<LocusCell> & CollectCells(); // Get locuscells for all loci.
98     vector<Branch_ptr>  GetTips(StringVec1d & names)  const;
99 
100   public:
101     // Creation and destruction.
102     virtual           ~Tree();
103     virtual Tree     *Clone()                 const = 0;
104     virtual Tree     *MakeStump()             const = 0;
105     virtual void      Clear();
106     virtual void      CopyTips(const Tree * tree);
107     virtual void      CopyBody(const Tree * tree);
108     virtual void      CopyStick(const Tree * tree);
109     virtual void      CopyPartialBody(const Tree * tree);
110     virtual TBranch_ptr CreateTip(const TipData & tipdata, const vector<LocusCell> & cells,
111                                   const vector<LocusCell> & movingcells, const rangeset & diseasesites);
112     virtual TBranch_ptr CreateTip(const TipData & tipdata, const vector<LocusCell> & cells,
113                                   const vector<LocusCell> & movingcells, const rangeset & diseasesites,
114                                   const vector<Locus> & loci);
115     void      SetTreeTimeManager(TimeManager * tm);
116 
117     // Getters.
GetTimeList()118     TimeList & GetTimeList()                 { return m_timeList; };
GetTimeList()119     const TimeList & GetTimeList()           const { return m_timeList; };
120     virtual rangevector GetLocusSubtrees(rangepair span) const;
121     Branch_ptr     GetTip(const std::string & name) const;
GetTimeManager()122     TimeManager * GetTimeManager()           { return m_timeManager; };
GetTimeManager()123     const TimeManager * GetTimeManager()     const { return m_timeManager; };
RootTime()124     double      RootTime()                  const { return m_timeList.RootTime(); };
125     bool        NoPhaseUnknownSites()       const;
126     long        GetNsites()                 const;
127 
128     bool GroomForGrowth(const DoubleVec1d & thetas, const DoubleVec1d & growths, double temperature);
129     bool GroomForLogisticSelection(const DoubleVec1d & thetas,
130                                    double s, // logistic selection coefficient or zero
131                                    double temperature);
GetSnpPanelFlag()132     bool GetSnpPanelFlag() { return m_hasSnpPanel; };
133 
134     // Setters.
135     void SetIndividualsWithTips(const vector<Individual> & indvec);
136     void SetLocusVec(vector<Locus> * loc);
SetSnpPanelFlag(bool flag)137     void SetSnpPanelFlag(bool flag) { m_hasSnpPanel = flag; };
138 
139     // Likelihood manipulation.
140     virtual void      CalculateDataLikes() = 0;
141     virtual double    CalculateDataLikesForFixedLoci();
GetDLValue()142     double    GetDLValue()            const { return m_overallDL; };
GetAliases(long loc)143     const LongVec1d & GetAliases(long loc) const { return m_aliases[loc]; };
144     void      SetupAliases(const std::vector<Locus> & loci);
145 
146     // Rearrangement primitives.
147     virtual vector<Branch_ptr> ActivateTips(Tree * othertree);
148     virtual Branch_ptr   ActivateBranch(Tree * othertree);
149     virtual Branch_ptr   ActivateRoot(FC_Status & fcstatus);
150 
151     // ChoosePreferentiallyTowardsRoot does not break the tree, but does set m_firstInvalid.
152     // to the interval containing the return value.  So does ChooseFirstBranchInEpoch.
153     Branch_ptr   ChoosePreferentiallyTowardsRoot(Tree * othertree);
154     Branch_ptr   ChooseFirstBranchInEpoch(double targettime, Tree * othertree);
155 
156     virtual void      AttachBase(Branch_ptr newroot);
157     virtual vector<Branch_ptr> FindBranchesImmediatelyTipwardOf(Branchiter start);
158     vector<Branch_ptr> FindBranchesBetween(double startinterval, double stopinterval);
159     virtual vector<Branch_ptr> FirstInterval(double eventT);
160     virtual void      NextInterval(Branch_ptr branch );
161     virtual void      Prune();
162     void      TrimStickToRoot();
163     void      SwapSiteDLs();
164     virtual void      PickNewSiteDLs();
165     virtual void      ReassignDLsFor(std::string lname, long marker, long ind);
166     void      SetNewTimesFrom(Branchiter start, const DoubleVec1d & newtimes);
167     Branch_ptr TransitionEpoch(double eventT, long newpop, long maxevents, Branch_ptr pActive);
168     vector<Branch_ptr> FindBranchesStartingOnOpenInterval(double starttime, double endtime);
169     vector<Branch_ptr> FindEpochBranchesAt(double time);
170     vector<Branch_ptr> FindBranchesStartingRootwardOf(double time);
171 
172     // Force-specific rearrangement primitives.
173     Branch_ptr   Coalesce(Branch_ptr child1, Branch_ptr child2, double tevent, const rangeset & fcsites);
174 
175     virtual Branch_ptr   CoalesceActive(double eventT, Branch_ptr active1,
176                                         Branch_ptr active2, const rangeset & fcsites);
177     virtual Branch_ptr   CoalesceInactive(double eventT, Branch_ptr active,
178                                           Branch_ptr inactive, const rangeset & fcsites);
179     virtual Branch_ptr   Migrate(double eventT, long topop, long maxEvents, Branch_ptr active);
180     virtual Branch_ptr   DiseaseMutate(double eventT, long endstatus, long maxEvents, Branch_ptr active);
181 
182     // TreeSummaryFactory.
183     TreeSummary * SummarizeTree() const;
184 
185     // Invariant checking.
186     bool      IsValidTree()               const;  // check invariants (debugging function)
187     bool      ConsistentWithParameters(const ForceParameters& fp) const;  // debugging function
188     bool      operator==(const Tree & src) const; // compare trees
189     bool      operator!=(const Tree & src) const { return !(*this == src); };
190 
191     // TimeManager call throughs (we provide a public front for TimeManager).  CopyStick() is also in this category.
192     void DestroyStick();
193     void SetStickParams(const ForceParameters & fp);
194     bool UsingStick() const;
195     void ScoreStick(TreeSummary & treesum) const;
196     DoubleVec1d XpartThetasAtT(double time, const ForceParameters & fp) const;
197     DoubleVec1d PartitionThetasAtT(double time, force_type force, const ForceParameters & fp) const;
198 
199     // TreeSizeArranger.
SetTargetLinksFrom(const BranchBuffer & brbuffer)200     virtual void  SetTargetLinksFrom(const BranchBuffer & brbuffer) { }; // no-op
ClearTargetLinks()201     virtual void  ClearTargetLinks() { };                                // no-op
202 
203     // Simulation.
204     virtual bool  SimulateDataIfNeeded();
205     virtual long  NumberOfRecombinations() = 0;
206 
207     // Debugging functions.
208     // MakeCoalescent strips all migration nodes from the tree and forces all remaining
209     // coalescences to their expectation times.  DO NOT use with migration!
MakeCoalescent(double theta)210     void MakeCoalescent(double theta) { m_timeList.MakeCoalescent(theta); };
211     //
212     // DLCheck returns an error message with the first marker at which each branch pair differs;
213     // unmentioned branches don't differ.
214     void DLCheck(const Tree & other) const;
215     //
216     // Call through debugging code.
217     void PrintStickThetasToFile(std::ofstream & of) const;
218     void PrintStickFreqsToFile(std::ofstream & of) const;
219     void PrintStickFreqsToFileAtTime(std::ofstream & of, double time) const;
220     void PrintStickThetasToFileForJoint300(std::ofstream & of) const;
221     void PrintStickToFile(std::ofstream & of) const;
222     void PrintDirectionalMutationEventCountsToFile(std::ofstream & of) const;
223     void PrintTimeTilFirstEventToFile(std::ofstream & of) const;
224     void PrintTraitPhenotypeAtLastCoalescence(std::ofstream & of) const;
225 };
226 
227 //------------------------------------------------------------------------------------
228 //------------------------------------------------------------------------------------
229 
230 class PlainTree : public Tree
231 {
232   public:
233     // Creation and destruction.
PlainTree()234     PlainTree() : Tree()  {};
PlainTree(const Tree & tree,bool makestump)235     PlainTree(const Tree & tree, bool makestump) :
236         Tree(tree, makestump)       {};
~PlainTree()237     virtual          ~PlainTree()        {};
238     virtual Tree     *Clone()                 const;
239     virtual Tree     *MakeStump()             const;
240     virtual void      CalculateDataLikes();
241     virtual void      Break(Branch_ptr pBranch);
NumberOfRecombinations()242     virtual long      NumberOfRecombinations() { return 0L; };
243 
244     // Yes, everything else is the same as Tree.
245 };
246 
247 //------------------------------------------------------------------------------------
248 //------------------------------------------------------------------------------------
249 
250 // To keep track of final coalescences:
251 typedef std::map<long, rangeset> rangesetcount;
252 typedef std::map<long, rangeset>::iterator RSCIter;
253 typedef std::map<long, rangeset>::const_iterator RSCcIter;
254 typedef std::list<std::pair<double, rangesetcount> > sitecountlist;
255 
256 //------------------------------------------------------------------------------------
257 
258 class RecTree : public Tree
259 {
260   private:
261     long      m_numTargetLinks_Tr;
262     long      m_numNewTargetLinks_Tr;
263     std::set<long> GetBreakpoints() const;
264 
265   protected:
266     vector<Locus> *    m_pMovingLocusVec; // Not owning.  But we can change 'em!
267     vector<LocusCell> m_protoMovingCells;
268 
269     virtual  void     Break(Branch_ptr branch);
270     const vector<LocusCell> & CollectMovingCells();
271 
272   public:
273     // Creation and destruction.
274     RecTree();
275     RecTree(const RecTree & tree, bool makestump);
~RecTree()276     ~RecTree() {};
277 
278     virtual  Tree    *Clone()     const;
279     virtual  Tree    *MakeStump() const;
280     virtual  void     Clear();
281     virtual  void     CopyTips(const Tree * tree);
282     virtual  void     CopyBody(const Tree * tree);
283     virtual  void     CopyPartialBody(const Tree * tree);
284 
285     // Getters.
286     virtual  rangevector GetLocusSubtrees(rangepair span) const;
NumTargetLinks_Tr()287     long     NumTargetLinks_Tr() const    { return m_numTargetLinks_Tr; };
NumNewTargetLinks_Tr()288     long     NumNewTargetLinks_Tr() const { return m_numNewTargetLinks_Tr; };
GetNumMovingLoci()289     unsigned long GetNumMovingLoci()      { return m_pMovingLocusVec->size(); };
290     virtual  bool DoesThisLocusJump(long mloc) const;
291     virtual  bool AnyRelativeHaplotypes() const;
292 
293     // Setters.
294     void      SetMovingLocusVec(vector<Locus> * locs);
295     void      SetMovingMapposition(long mloc, long site);
296 
297     virtual TBranch_ptr CreateTip(const TipData & tipdata, const vector<LocusCell> & cells,
298                                   const vector<LocusCell> & movingcells, const rangeset & diseasesites);
299 
300     virtual TBranch_ptr CreateTip(const TipData & tipdata, const vector<LocusCell> & cells,
301                                   const vector<LocusCell> & movingcells, const rangeset & diseasesites,
302                                   const vector<Locus> & loci);
303 
304     // Likelihood manipulation.
305     virtual void        CalculateDataLikes();
306     virtual double      CalculateDataLikesForMovingLocus(long mloc);
307     virtual DoubleVec1d CalculateDataLikesForFloatingLocus(long mloc);
308     virtual DoubleVec1d CalculateDataLikesWithRandomHaplotypesForFloatingLocus(long mloc);
309     virtual void CalculateDataLikesForAllHaplotypesForFloatingLocus(long mloc, DoubleVec1d & mlikes);
310     virtual bool UpdateDataLikesForIndividualsFrom(long ind, long mloc, DoubleVec1d & mlikes);
311 
312     // Rearrangement primitives.
313     virtual  vector<Branch_ptr> ActivateTips(Tree * othertree);
314     virtual  Branch_ptr  ActivateBranch(Tree * othertree);
315     virtual  Branch_ptr  ActivateRoot(FC_Status & fcstatus);
316     virtual  void     AttachBase(Branch_ptr newroot);
317     virtual  vector<Branch_ptr> FirstInterval(double eventT);
318     virtual  void     NextInterval(Branch_ptr branch);
319     virtual  void     Prune();
320     virtual  void     ReassignDLsFor(std::string lname, long marker, long ind);
321 
322     // Force-specific rearrangement primitives.
323     virtual Branch_ptr   CoalesceActive(double eventT, Branch_ptr active1,
324                                         Branch_ptr active2, const rangeset & fcsites);
325     virtual Branch_ptr   CoalesceInactive(double eventT, Branch_ptr active,
326                                           Branch_ptr inactive, const rangeset & fcsites);
327     virtual Branch_ptr   Migrate(double eventT, long topop, long maxEvents, Branch_ptr active);
328     virtual Branch_ptr   DiseaseMutate (double eventT, long endstatus, long maxEvents, Branch_ptr active);
329     branchpair RecombineActive(double eventT, long maxEvents, FPartMap fparts,
330                                Branch_ptr active, long recsite, const rangeset & fcsites);
331     branchpair RecombineInactive(double eventT, long maxEvents, FPartMap fparts,
332                                  Branch_ptr active, long recsite, const rangeset & fcsites);
333 
334     // Map Summary.
335     DoubleVec2d  GetMapSummary();
336     std::set<long> IgnoreDisallowedSubTrees(std::set<long> breakpoints, rangeset allowedranges);
337     DoubleVec1d ZeroDisallowedSites(DoubleVec1d datalikes, rangeset allowedranges);
338     void RandomizeMovingHaplotypes(long mlocus);
339 
340     // Simulation.
341     virtual bool SimulateDataIfNeeded();
342     virtual long NumberOfRecombinations();
343 
344     // TreeSizeArranger.
345     virtual void  SetTargetLinksFrom(const BranchBuffer & brbuffer);
ClearTargetLinks()346     virtual void  ClearTargetLinks() { m_numTargetLinks_Tr = 0; };
347 
348     // Debugging functions.
349     virtual void  SetNewTargetLinksFrom(const BranchBuffer & brbuffer);
350     virtual void PrintTipData(long mloc, long marker);
351     virtual void PrintRangeSetCount(const rangesetcount & rsc);
352     virtual rangesetcount RemoveEmpty(const rangesetcount & rsc);
353 };
354 
355 #endif // TREE_H
356 
357 //____________________________________________________________________________________
358