1 /* 2 * scrm is an implementation of the Sequential-Coalescent-with-Recombination Model. 3 * 4 * Copyright (C) 2013, 2014 Paul R. Staab, Sha (Joe) Zhu, Dirk Metzler and Gerton Lunter 5 * 6 * This file is part of scrm. 7 * 8 * scrm is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 21 */ 22 23 /*! 24 * \file model.h 25 * 26 * \brief This file contains the class Model, which is a simple container object for 27 * model parameters. 28 * 29 */ 30 31 #ifndef scrm_src_model 32 #define scrm_src_model 33 #pragma GCC diagnostic ignored "-Wsign-compare" 34 35 #include "macros.h" // Needs to be before cassert 36 37 #include <cstddef> 38 #include <vector> 39 #include <cfloat> 40 #include <algorithm> 41 #include <iostream> 42 #include <stdexcept> 43 #include <cassert> 44 #include <cmath> 45 #include <iomanip> 46 #include <memory> 47 #include <sstream> 48 49 #include "summary_statistics/summary_statistic.h" 50 51 class Param; 52 53 struct MigEvent { 54 size_t source_pop; 55 size_t sink_pop; 56 double prob; 57 }; 58 59 enum SeqScale { relative, absolute, ms }; 60 61 class Model 62 { 63 public: 64 #ifdef UNITTEST 65 friend class TestModel; 66 friend class TestTimeInterval; 67 friend class TestParam; 68 friend class TestForest; 69 #endif 70 friend class Forest; 71 friend class Param; 72 73 Model(); 74 Model(size_t sample_size); 75 76 // Default values; 77 constexpr static double default_pop_size_ = 10000.0; 78 constexpr static double default_growth_rate = 0.0; 79 constexpr static double default_mig_rate = 0.0; 80 constexpr static double scaling_factor_ = 1.0 / (4 * default_pop_size_); 81 82 // Getters & Setters default_pop_size()83 double default_pop_size() const { return Model::default_pop_size_; }; 84 85 86 /** 87 * @brief Returns the scaling factor for times and many parameters 88 * 89 * @return 1 / ( 4 * default_pop_size); 90 */ scaling_factor()91 double scaling_factor() const { return scaling_factor_; }; 92 93 /** 94 * @brief Returns the mutation rate per base pair per generation for the 95 * currently active sequence position. 96 * 97 * @return The mutation rate per base per generation 98 */ mutation_rate()99 double mutation_rate() const { return mutation_rates_.at(current_seq_idx_); } 100 101 /** 102 * @brief Returns the recombination rate per base pair per generation for the 103 * currently active sequence positions. 104 * 105 * @return The recombination rate per base pair per generation 106 */ 107 double recombination_rate(const size_t idx = -1) const { 108 if (idx == -1) return recombination_rates_.at(current_seq_idx_); 109 else return recombination_rates_.at(idx); 110 } 111 112 /** 113 * @brief Returns if the model has recombination. 114 * 115 * @return true if the model has recombination, false otherwise 116 */ has_recombination()117 bool has_recombination() const { 118 return has_recombination_; 119 }; 120 121 122 /** 123 * @brief Returns the length of all loci, in base pairs 124 * 125 * @return length of all loci, in base pairs 126 */ loci_length()127 size_t loci_length() const { return loci_length_; } 128 129 /** 130 * @brief Getter for the current growth rate of a subpopulation 131 * 132 * This returns the growth rate for a population for the current time of the 133 * model. Use resetTime() and increaseTime() to set the model to the time you 134 * want. 135 * 136 * @param pop The population for which the growth rate is returned. 137 * 138 * @return The growth rate. 139 */ 140 double growth_rate(size_t pop = 0) const { 141 if (current_growth_rates_ == NULL) return default_growth_rate; 142 return current_growth_rates_->at(pop); 143 } 144 145 146 /** 147 * @brief Getter for the current diploid size of a (sub-)population. 148 * 149 * This returns the size of a population for the current time of the 150 * model. Use resetTime() and increaseTime() to set the model to the time you 151 * want. In case of a growing population, you can use the time parameter to 152 * specify for which time of the current model stop you want to get the 153 * population size. 154 * 155 * @param pop The population for which the size is returned. 156 * @param time The time inside the current model step for which to return the 157 * size. This will only make a difference if the model is growing. 158 * 159 * @return The size of the sub population 160 */ 161 double population_size(const size_t pop = 0, const double time = -1) const { 162 return 0.5 / inv_double_pop_size(pop, time); 163 } 164 165 double inv_double_pop_size(const size_t pop = 0, const double time = -1) const { 166 double pop_size; 167 if (current_pop_sizes_ == NULL) 168 pop_size = 1/(2*default_pop_size()); 169 else 170 pop_size = current_pop_sizes_->at(pop); // population size basal to this segment 171 172 if (time >= 0 && growth_rate(pop) != 0.0) { 173 assert( time >= getCurrentTime() && time <= getNextTime() ); 174 pop_size *= std::exp(growth_rate(pop) * (time - getCurrentTime())); 175 } 176 177 return pop_size; 178 } 179 180 /** 181 * @brief Returns the current migration rate for a given pair of populations. 182 * 183 * The migration rate is returned unscaled, e.g. in migrants per generation. 184 * The rate is for the current time of the 185 * model. Use resetTime() and increaseTime() to set the model to the time you 186 * want. 187 * 188 * @param source The population from which the migrants come (when looking 189 * backwards in time!) 190 * @param sink The population that the migration goes to. 191 * 192 * @return The current unscaled, backwards migration rate. 193 */ migration_rate(const size_t source,const size_t sink)194 double migration_rate(const size_t source, const size_t sink) const { 195 if (sink == source) return 0.0; 196 if (current_mig_rates_ == NULL) return default_mig_rate; 197 return current_mig_rates_->at( getMigMatrixIndex(source, sink) ); 198 }; 199 200 /** 201 * @brief Getter for the current total rate of migrating out of one 202 * population (viewed backwards in time). 203 * 204 * 205 * The migration rate is returned unscaled, e.g. in migrants per generation. 206 * The rate is for the current time of the 207 * model. Use resetTime() and increaseTime() to set the model to the time you 208 * want. 209 * 210 * @param source The population for which the rate is given. 211 * 212 * @return The total current, unscaled rate of migration out if the population. 213 */ total_migration_rate(const size_t source)214 double total_migration_rate(const size_t source) const { 215 if (current_total_mig_rates_ == NULL) return default_mig_rate; 216 return current_total_mig_rates_->at(source); 217 }; 218 219 /** 220 * @brief Getter for the probability of spontaneous migration at the 221 * beginning of the current time interval. 222 * 223 * Use resetTime() and increaseTime() to set the model to the time interval you 224 * want. You can use hasFixedTimeEvent() to check if there is any single migration event. 225 * 226 * @param source The source population of the migration. 227 * @param sink The sink population of the migration. 228 * 229 * @return The probability/fraction of migration. 230 */ single_mig_events()231 std::vector<MigEvent> single_mig_events() const { 232 return single_mig_list_.at(current_time_idx_); 233 } 234 235 void setMutationRate(double rate, 236 const bool &per_locus = false, 237 const bool &scaled = false, 238 const double seq_position = 0); 239 240 void setRecombinationRate(double rate, 241 const bool &per_locus = false, 242 const bool &scaled = false, 243 const double seq_position = 0); 244 hasFixedTimeEvent(const double at_time)245 bool hasFixedTimeEvent(const double at_time) const { 246 if (single_mig_list_.at(current_time_idx_).empty()) return false; 247 if (getCurrentTime() != at_time) return false; 248 return true; 249 } 250 sample_size()251 size_t sample_size() const { return sample_times_.size(); }; sample_population(size_t sample_id)252 size_t sample_population(size_t sample_id) const { return sample_populations_.at(sample_id); }; sample_time(size_t sample_id)253 double sample_time(size_t sample_id) const { return sample_times_.at(sample_id); }; 254 population_number()255 size_t population_number() const { return pop_number_; } 256 257 getCurrentTime()258 double getCurrentTime() const { return change_times_.at(current_time_idx_); } getNextTime()259 double getNextTime() const { 260 if ( current_time_idx_ + 1 >= change_times_.size() ) return DBL_MAX; 261 else return change_times_.at(current_time_idx_ + 1); 262 } 263 getCurrentSequencePosition()264 double getCurrentSequencePosition() const { 265 if ( current_seq_idx_ >= change_position_.size() ) return loci_length(); 266 return change_position_.at(current_seq_idx_); 267 } 268 getNextSequencePosition()269 double getNextSequencePosition() const { 270 if ( current_seq_idx_ + 1 >= change_position_.size() ) return loci_length(); 271 else return change_position_.at(current_seq_idx_ + 1); 272 } 273 window_length_seq()274 double window_length_seq() const { return window_length_seq_; } window_length_rec()275 size_t window_length_rec() const { return window_length_rec_; } has_window_rec()276 bool has_window_rec() const { return has_window_rec_; } has_window_seq()277 bool has_window_seq() const { return has_window_seq_; } has_approximation()278 bool has_approximation() const { return has_appr_; } set_window_length_seq(const double ewl)279 void set_window_length_seq(const double ewl) { 280 if (ewl < 0) throw std::invalid_argument("Exact window length can not be negative"); 281 window_length_seq_ = ewl; 282 has_window_seq_ = true; 283 has_window_rec_ = false; 284 has_appr_ = true; 285 } set_window_length_rec(const size_t ewl)286 void set_window_length_rec(const size_t ewl) { 287 window_length_rec_ = ewl; 288 has_window_seq_ = false; 289 has_window_rec_ = true; 290 has_appr_ = true; 291 } disable_approximation()292 void disable_approximation() { 293 has_appr_ = false; 294 has_window_rec_ = false; 295 has_window_seq_ = false; 296 } 297 set_population_number(const size_t pop_number)298 void set_population_number(const size_t pop_number) { 299 pop_number_ = pop_number; 300 if (pop_number_<1) throw std::out_of_range("Population number out of range"); 301 } 302 resetTime()303 void resetTime() { 304 if (pop_sizes_list_[0].empty()) current_pop_sizes_ = NULL; 305 else current_pop_sizes_ = &(pop_sizes_list_[0]); 306 307 if (growth_rates_list_[0].empty()) current_growth_rates_ = NULL; 308 else current_growth_rates_ = &(growth_rates_list_[0]); 309 310 if (mig_rates_list_[0].empty()) current_mig_rates_ = NULL; 311 else current_mig_rates_ = &(mig_rates_list_[0]); 312 313 if (total_mig_rates_list_[0].empty()) current_total_mig_rates_ = NULL; 314 else current_total_mig_rates_ = &(total_mig_rates_list_[0]); 315 316 current_time_idx_ = 0; 317 }; 318 resetSequencePosition()319 void resetSequencePosition() { 320 current_seq_idx_ = 0; 321 } 322 increaseTime()323 void increaseTime() { 324 if ( current_time_idx_ == change_times_.size() ) throw std::out_of_range("Model change times out of range"); 325 ++current_time_idx_; 326 327 if ( ! pop_sizes_list_.at(current_time_idx_).empty() ) 328 current_pop_sizes_ = &(pop_sizes_list_.at(current_time_idx_)); 329 if ( ! growth_rates_list_.at(current_time_idx_).empty() ) 330 current_growth_rates_ = &(growth_rates_list_.at(current_time_idx_)); 331 if ( ! mig_rates_list_.at(current_time_idx_).empty() ) 332 current_mig_rates_ = &(mig_rates_list_.at(current_time_idx_)); 333 if ( ! total_mig_rates_list_.at(current_time_idx_).empty() ) 334 current_total_mig_rates_ = &(total_mig_rates_list_.at(current_time_idx_)); 335 }; 336 increaseSequencePosition()337 void increaseSequencePosition() { 338 ++current_seq_idx_; 339 } 340 countChangeTimes()341 size_t countChangeTimes() const { return change_times_.size(); } countChangePositions()342 size_t countChangePositions() const { return change_position_.size(); } 343 344 void print(std::ostream &os) const; 345 346 loci_number()347 size_t loci_number() const { return loci_number_; }; set_loci_number(size_t loci_number)348 void set_loci_number(size_t loci_number) { loci_number_ = loci_number; }; 349 350 // Add populations size changes 351 void addPopulationSizes(double time, const std::vector<double> &pop_sizes, 352 const bool &time_scaled = false, const bool &relative = false); 353 354 void addPopulationSizes(const double time, const double pop_size, 355 const bool &time_scaled = false, const bool &relative = false); 356 357 void addPopulationSize(const double time, const size_t pop, double population_sizes, 358 const bool &time_scaled = false, const bool &relative = false); 359 360 // Add exponential growth 361 void addGrowthRates(const double time, const std::vector<double> &growth_rates, 362 const bool &time_scaled = false, 363 const bool &rate_scaled = false); 364 365 void addGrowthRates(const double time, const double growth_rates, 366 const bool &time_scaled = false, 367 const bool &rate_scaled = false); 368 369 void addGrowthRate(const double time, const size_t population, 370 double growth_rates, const bool &time_scaled = false, 371 const bool &rate_scaled = false); 372 373 void addSampleSizes(double time, const std::vector<size_t> &samples_sizes, 374 const bool &scaled = false); 375 376 // functions to add Migration 377 void addMigrationRates(double time, const std::vector<double> &mig_rates, 378 const bool &time_scaled = false, const bool &rate_scaled = false); 379 380 void addMigrationRate(double time, size_t source, size_t sink, double mig_rate, 381 const bool &time_scaled = false, const bool &rate_scaled = false); 382 383 void addSymmetricMigration(const double time, const double mig_rate, 384 const bool &time_scaled = false, const bool &rate_scaled = false); 385 386 void addSingleMigrationEvent(const double time, const size_t source_pop, 387 const size_t sink_pop, const double fraction, 388 const bool &time_scaled = false); 389 390 void finalize(); 391 392 void check(); 393 //void reset(); 394 countSummaryStatistics()395 size_t countSummaryStatistics() const { 396 return summary_statistics_.size(); 397 } 398 getSummaryStatistic(const size_t i)399 SummaryStatistic* getSummaryStatistic(const size_t i) const { 400 return summary_statistics_.at(i).get(); 401 } 402 addSummaryStatistic(std::shared_ptr<SummaryStatistic> sum_stat)403 void addSummaryStatistic(std::shared_ptr<SummaryStatistic> sum_stat) { 404 summary_statistics_.push_back(sum_stat); 405 } 406 407 void addPopulation(); 408 getSequenceScaling()409 SeqScale getSequenceScaling() const { return seq_scale_; } setSequenceScaling(SeqScale seq_scale)410 void setSequenceScaling(SeqScale seq_scale) { seq_scale_ = seq_scale; }; 411 setLocusLength(const size_t length)412 void setLocusLength(const size_t length) { 413 // Rescale the rates that are per base pair 414 for (size_t i = 0; i < change_position_.size(); ++i) { 415 mutation_rates_.at(i) *= (double)loci_length() / length; 416 recombination_rates_.at(i) *= (double)(loci_length()-1) / (length-1); 417 } 418 loci_length_ = length; 419 } 420 421 private: 422 std::vector<double> change_times_; 423 change_position(size_t idx)424 double change_position(size_t idx) const { 425 return this->change_position_.at(idx); 426 } 427 get_position_index()428 size_t get_position_index() const { 429 return this->current_seq_idx_; 430 } 431 432 size_t addChangeTime(double time, const bool &scaled = false); 433 size_t addChangePosition(const double position); 434 435 template <typename T> deleteParList(std::vector<T * > & parList)436 void deleteParList(std::vector<T*> &parList) { 437 typename std::vector<T*>::iterator it; 438 for (it = parList.begin(); it != parList.end(); ++it) { 439 if (*it != NULL) delete *it; 440 } 441 parList.clear(); 442 } 443 444 void updateTotalMigRates(const size_t position); has_migration()445 bool has_migration() { return has_migration_; }; 446 447 void fillVectorList(std::vector<std::vector<double> > &vector_list, const double default_value); 448 void calcPopSizes(); checkPopulation(const size_t pop)449 void checkPopulation(const size_t pop) { 450 if (pop >= this->population_number()) 451 throw std::invalid_argument("Invalid population"); 452 } 453 454 friend void swap(Model& first, Model& second); 455 getMigMatrixIndex(const size_t i,const size_t j)456 size_t getMigMatrixIndex(const size_t i, const size_t j) const { 457 assert(i != j); 458 return i * (population_number()-1) + j - ( i < j ); 459 } 460 461 void addPopToMatrixList(std::vector<std::vector<double> > &vector_list, 462 size_t new_pop, 463 double default_value = nan("value to replace")); 464 void addPopToVectorList(std::vector<std::vector<double> > &vector_list); 465 466 // Stores information about samples. Each index represents a sample. 467 std::vector<size_t> sample_populations_; 468 std::vector<double> sample_times_; 469 470 // Stores the time and sequences positions where the model changes. 471 std::vector<double> change_position_; 472 473 // These pointer vectors hold the actual model parameters that can change in 474 // time. Each index represents one period in time within which the model 475 // parameters are constant. NULL means that the parameters do not change. 476 std::vector<std::vector<double> > growth_rates_list_; 477 std::vector<std::vector<double> > mig_rates_list_; 478 std::vector<std::vector<double> > total_mig_rates_list_; 479 480 std::vector<std::vector<MigEvent> > single_mig_list_; 481 482 // Population sizes are saved as 1/(2N), where N is the actual population 483 // size (do to fast multiplication rather than slow division in the 484 // algorithm) 485 std::vector<std::vector<double> > pop_sizes_list_; 486 487 // These vectors contain the model parameters that may change along the sequence. 488 // Again, each index represents an sequence segment within with the model 489 // does not change. 490 std::vector<double> recombination_rates_; /*!< Unit: Recombinations per base per generation */ 491 std::vector<double> mutation_rates_; /*!< Unit: Mutations per base per generation */ 492 493 // The index of the time and sequence segment currently active. 494 size_t current_time_idx_; 495 size_t current_seq_idx_; 496 497 // Direct pointers to the currently active model parameters. 498 std::vector<double>* current_pop_sizes_; 499 std::vector<double>* current_growth_rates_; 500 std::vector<double>* current_mig_rates_; 501 std::vector<double>* current_total_mig_rates_; 502 503 size_t pop_number_; 504 505 size_t loci_number_; 506 size_t loci_length_; 507 508 double window_length_seq_; 509 size_t window_length_rec_; 510 bool has_window_seq_; 511 bool has_window_rec_; 512 bool has_appr_; 513 514 bool has_migration_; 515 bool has_recombination_; 516 517 SeqScale seq_scale_; 518 519 std::vector<std::shared_ptr<SummaryStatistic> > summary_statistics_; 520 }; 521 522 523 524 std::ostream& operator<<(std::ostream& os, Model& model); 525 526 template <typename T> 527 std::ostream& operator<<(std::ostream& os, const std::vector<T> &vec) { 528 typename std::vector<T>::const_iterator it; 529 for (it = vec.begin(); it != vec.end(); ++it) os << *it << " "; 530 return os; 531 } 532 533 534 535 #endif 536