1 /* 2 * scrm is an implementation of the Sequential-Coalescent-with-Recombination Model. 3 * 4 * Copyright (C) 2013, 2014 Paul R. Staab, Sha (Joe) Zhu, Dirk Metzler and Gerton Lunter 5 * 6 * This file is part of scrm. 7 * 8 * scrm is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 21 */ 22 23 /*! 24 * \file forest.h 25 * \brief Contains the class Forest, which is the central class in scrm 26 * 27 * The central data structure of scrm is a forest, which is a collection of 28 * trees. This class on the one hand contains a NodeContainer object with all 29 * nodes building the trees, and on the other hand functions to manipulate to 30 * forest. 31 * 32 * Most functions are defined in forest.cc with exception of pure debugging 33 * functions, with are in forest-debug.cc. 34 */ 35 36 #ifndef scrm_src_forest 37 #define scrm_src_forest 38 39 #include <string> 40 41 #include "macros.h" // Needs to be before cassert 42 43 #include <vector> 44 #include <unordered_set> 45 #include <stdexcept> 46 #include <cassert> 47 #include <iostream> // ostreams 48 #include <iomanip> // Used for debug output 49 #include <sstream> // Used for debug output 50 51 #include "contemporaries_container.h" 52 #include "event.h" 53 #include "model.h" 54 #include "macros.h" 55 #include "node.h" 56 #include "node_container.h" 57 #include "time_interval.h" 58 #include "tree_point.h" 59 #include "random/random_generator.h" 60 #include "summary_statistics/summary_statistic.h" 61 62 class TimeInterval; 63 class TimeIntervalIterator; 64 enum eventCode { NOEVENT, EVENT, INIT_NULL}; 65 66 class Forest 67 { 68 public: 69 Node* readNewickNode( std::string &in_str, std::string::iterator ¤t_it, size_t parenthesis_balance = 0, Node* parent = NULL ); 70 void readNewick(std::string &in_str); contemporaries()71 ContemporariesContainer* contemporaries() {return &this->contemporaries_;}; 72 73 #ifdef UNITTEST 74 friend class TestForest; 75 friend class TestNode; 76 friend class TestTimeInterval; 77 friend class TestModel; 78 friend class TestNodeContainer; 79 #endif 80 friend class TimeInterval; 81 friend class TimeIntervalIterator; 82 83 Forest(Model *model, RandomGenerator *random_generator); 84 Forest(const Forest ¤t_forest); ~Forest()85 virtual ~Forest() {}; 86 87 //Getters & Setters model()88 const Model &model() const { return *model_; } set_model(Model * model)89 void set_model(Model* model) { this->model_ = model; } writable_model()90 Model* writable_model() { return this->model_; }; 91 local_root()92 Node* local_root() const { return local_root_; } set_local_root(Node * local_root)93 void set_local_root(Node* local_root) { local_root_ = local_root; }; 94 primary_root()95 Node* primary_root() const { return primary_root_; } set_primary_root(Node * primary_root)96 void set_primary_root(Node* primary_root) { primary_root_ = primary_root; }; 97 sample_size()98 size_t sample_size() const { 99 if (sample_size_ == 0) return model().sample_size(); 100 return this->sample_size_; 101 } set_sample_size(const size_t size)102 void set_sample_size(const size_t size ) { sample_size_ = size; } 103 segment_count()104 size_t segment_count() const { return current_rec_; } 105 106 void sampleNextBase(); 107 108 /** 109 * @brief Returns the length of the sequence for with the current tree is 110 * valid 111 * 112 * @param finite_sites If 'true', the length is measured in number of bases 113 * (e.g. integer sequence positions) for which the tree is valid. Otherwise, 114 * the length is measured real valued number on a continuous chromosome. 115 * 116 * @return The length of the current segment (see above for its unit) 117 */ calcSegmentLength()118 double calcSegmentLength() const { 119 if (model().getSequenceScaling() == relative) { 120 return (next_base() - current_base()) / model().loci_length(); 121 } else { 122 return ceil(next_base()) - ceil(current_base()); 123 } 124 } 125 set_random_generator(RandomGenerator * rg)126 void set_random_generator(RandomGenerator *rg) { 127 this->random_generator_ = rg; 128 } random_generator()129 RandomGenerator* random_generator() const { return this->random_generator_; } 130 getNodes()131 NodeContainer const *getNodes() const { return &nodes_; }; 132 133 // Central functions 134 void buildInitialTree(); 135 void sampleNextGenealogy(); 136 TreePoint samplePoint(Node* node = NULL, double length_left = -1) const; 137 138 void clear(); 139 140 //Debugging Tools 141 void addNodeToTree(Node *node, Node *parent, Node *first_child, Node *second_child); 142 void createExampleTree(); 143 void createScaledExampleTree(); 144 bool checkLeafsOnLocalTree(Node const* node=NULL) const; 145 bool checkTree(Node const* root = NULL) const; 146 double calcTreeLength() const; 147 bool checkTreeLength() const; 148 bool checkInvariants(Node const* node = NULL) const; 149 bool checkNodeProperties() const; 150 bool checkContemporaries(const double time) const; 151 bool printNodes() const; 152 bool checkForNodeAtHeight(const double height) const; 153 bool checkRootIsRegistered(Node const* node) const; 154 bool checkRoots() const; 155 156 //Debug Tree Printing 157 int countLinesLeft(Node const* node) const; 158 int countLinesRight(Node const* node) const; 159 int countBelowLinesLeft(Node const* node) const; 160 int countBelowLinesRight(Node const* node) const; 161 bool printTree() const; 162 std::vector<Node const*> determinePositions() const; 163 void printPositions(const std::vector<Node const*> &positions) const; 164 nodes()165 NodeContainer *nodes() { return &(this->nodes_); } 166 167 double getTMRCA(const bool &scaled = false) const { 168 if (scaled) return local_root()->height() / (4 * this->model_->default_pop_size()); 169 else return local_root()->height(); 170 } 171 172 double getLocalTreeLength(const bool &scaled = false) const { 173 if (scaled) return local_root()->length_below() / (4 * this->model_->default_pop_size()); 174 else return local_root()->length_below(); 175 } 176 177 //derived class from Forest record_Recombevent(size_t pop_i,double opportunity,eventCode event_code)178 virtual void record_Recombevent(size_t pop_i, 179 double opportunity, 180 eventCode event_code){ 181 (void)pop_i; 182 (void)opportunity; 183 (void)event_code; 184 } record_all_event(TimeInterval const & ti)185 virtual void record_all_event( TimeInterval const &ti){ 186 (void) ti; 187 } 188 189 // Calc & Print Summary Statistics 190 void calcSegmentSumStats(); 191 void clearSumStats(); 192 void printLocusSumStats(std::ostream &output) const; 193 void printSegmentSumStats(std::ostream &output) const; 194 get_rec_base(const size_t idx)195 double get_rec_base(const size_t idx) const { 196 return rec_bases_.at(idx); 197 } 198 current_base()199 double current_base() const { return get_rec_base(current_rec_); } next_base()200 double next_base() const { return get_rec_base(current_rec_ + 1); } set_current_base(double const base)201 void set_current_base(double const base) { rec_bases_[current_rec_] = base; }; set_next_base(double const base)202 void set_next_base(double const base) { rec_bases_.push_back(base); }; 203 current_rec()204 size_t current_rec() const { return current_rec_; }; 205 coalescence_finished()206 bool coalescence_finished() const { return this->coalescence_finished_; } 207 208 private: Forest()209 Forest() { this->initialize(); } 210 211 //Operations on the Tree 212 Node* cut(const TreePoint &cut_point); 213 214 void updateAbove(Node* node, 215 bool above_local_root = false, 216 const bool &recursive = true, 217 const bool &invariants_only = false); 218 219 // Tools for doing coalescence & recombination 220 void sampleCoalescences(Node *start_node); 221 size_t getNodeState(Node const *node, const double current_time) const; 222 Node* updateBranchBelowEvent(Node* node, const TreePoint &event_point); 223 Node* possiblyMoveUpwards(Node* node, const TimeInterval &event); 224 225 // Implementation of the different events 226 void implementNoEvent(const TimeInterval &ti, bool &coalescence_finished); 227 void implementCoalescence(const Event &event, TimeIntervalIterator &tii); 228 void implementPwCoalescence(Node* root_1, Node* root_2, const double time); 229 void implementRecombination(const Event &event, TimeIntervalIterator &tii); 230 void implementMigration(const Event &event, const bool &recalculate, TimeIntervalIterator &tii); 231 void implementFixedTimeEvent(TimeIntervalIterator &ti); 232 233 // Pruning nodeIsOld(Node const * node)234 bool nodeIsOld(Node const* node) const { 235 if ( node->local() ) return false; 236 if ( node->is_root() ) return false; 237 if ( model().has_window_rec() && 238 segment_count() - node->last_update() > model().window_length_rec()) { 239 return true; 240 } 241 if ( model().has_window_seq() && 242 current_base() - get_rec_base(node->last_update()) > model().window_length_seq()) { 243 return true; 244 } 245 return false; 246 } 247 nodeIsActive(Node const * node)248 bool nodeIsActive(Node const* node) const { 249 return (node == active_node(0) || node == active_node(1)); 250 } 251 252 bool pruneNodeIfNeeded(Node* node, const bool prune_orphans = true); 253 254 // Calculation of Rates 255 double calcCoalescenceRate(const size_t pop, const TimeInterval &ti) const; 256 double calcPwCoalescenceRate(const size_t pop, const TimeInterval &ti) const; 257 double calcRecombinationRate(Node const* node) const; 258 void calcRates(const TimeInterval &ti); 259 260 void sampleEvent(const TimeInterval &ti, double &event_time, Event &return_event) const; 261 262 void sampleEventType(const double time, const size_t time_line, 263 const TimeInterval &ti, Event &return_event) const; 264 265 void selectFirstTime(const double new_time, const size_t time_line, 266 double ¤t_time, size_t ¤t_time_line) const; 267 getTimeLineGrowth(const size_t time_line)268 double getTimeLineGrowth(const size_t time_line) const { 269 if (time_line == 0) return 0.0; 270 else if (time_line == 1) return model().growth_rate(active_node(0)->population()); 271 else if (time_line == 2) return model().growth_rate(active_node(1)->population()); 272 else throw std::out_of_range("Trying to get growthrate of unknown time line."); 273 } 274 275 276 // Private Members 277 NodeContainer nodes_; // The nodes of the Tree/Forest 278 279 // We have 3 different kind of roots that are important: 280 // local root: root of the smallest subtree containing all local sequences 281 Node* local_root_; 282 283 // primary root: root of the tree that contains all local sequences 284 Node* primary_root_; 285 286 // secondary roots: roots of trees that contain only non-local nodes 287 // std::unordered_set<Node*> secondary_roots_; 288 289 size_t sample_size_; // The number of sampled nodes (changes while building the initial tree) 290 size_t current_rec_; // A counter for recombinations 291 292 std::vector<double> rec_bases_; // Genetic positions of the recombinations 293 294 Model* model_; 295 RandomGenerator* random_generator_; 296 297 void initialize(Model *model = new Model(), 298 RandomGenerator *rg = NULL); 299 300 void createSampleNodes(); 301 302 // Temporarily used when doing the coalescence 303 304 // Rates: 305 double rates_[3]; 306 307 // States: Each (branch above an) active node can either be in state 308 // - 0 = off (the other coalescence has not reached it yet) or 309 // - 1 = potentially coalescing in a time interval or 310 // - 2 = potentially recombining in a time interval 311 size_t states_[2]; 312 Node* active_nodes_[2]; 313 Event tmp_event_; 314 double tmp_event_time_; 315 ContemporariesContainer contemporaries_; 316 317 // These are pointers to the up to two active nodes during a coalescence 318 size_t active_nodes_timelines_[2]; active_node(size_t nr)319 Node* active_node(size_t nr) const { return active_nodes_[nr]; }; set_active_node(size_t nr,Node * node)320 void set_active_node(size_t nr, Node* node) { active_nodes_[nr] = node; }; 321 getEventNode()322 Node* getEventNode() const { return active_node(tmp_event_.active_node_nr()); } getEventNodesState()323 size_t getEventNodesState() const { return states_[tmp_event_.active_node_nr()]; }; getOtherNode()324 Node* getOtherNode() const { return active_node(1-tmp_event_.active_node_nr()); }; getOtherNodesState()325 size_t getOtherNodesState() const { return states_[1-tmp_event_.active_node_nr()]; }; 326 327 bool coalescence_finished_; 328 }; 329 330 #endif 331