1 /*
2  * scrm is an implementation of the Sequential-Coalescent-with-Recombination Model.
3  *
4  * Copyright (C) 2013, 2014 Paul R. Staab, Sha (Joe) Zhu, Dirk Metzler and Gerton Lunter
5  *
6  * This file is part of scrm.
7  *
8  * scrm is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 
21 */
22 
23 /*!
24  * \file model.h
25  *
26  * \brief This file contains the class Model, which is a simple container object for
27  * model parameters.
28  *
29  */
30 
31 #ifndef scrm_src_model
32 #define scrm_src_model
33 #pragma GCC diagnostic ignored "-Wsign-compare"
34 
35 #include "macros.h" // Needs to be before cassert
36 
37 #include <cstddef>
38 #include <vector>
39 #include <cfloat>
40 #include <algorithm>
41 #include <iostream>
42 #include <stdexcept>
43 #include <cassert>
44 #include <cmath>
45 #include <iomanip>
46 #include <memory>
47 #include <sstream>
48 
49 #include "summary_statistics/summary_statistic.h"
50 
51 class Param;
52 
53 struct MigEvent {
54   size_t source_pop;
55   size_t sink_pop;
56   double prob;
57 };
58 
59 enum SeqScale { relative, absolute, ms };
60 
61 class Model
62 {
63   public:
64 #ifdef UNITTEST
65    friend class TestModel;
66    friend class TestTimeInterval;
67    friend class TestParam;
68    friend class TestForest;
69 #endif
70    friend class Forest;
71    friend class Param;
72 
73    Model();
74    Model(size_t sample_size);
75 
76    // Default values;
77    constexpr static double default_pop_size_ = 10000.0;
78    constexpr static double default_growth_rate = 0.0;
79    constexpr static double default_mig_rate = 0.0;
80    constexpr static double scaling_factor_ = 1.0 / (4 * default_pop_size_);
81 
82    // Getters & Setters
default_pop_size()83    double default_pop_size() const { return Model::default_pop_size_; };
84 
85 
86    /**
87     * @brief Returns the scaling factor for times and many parameters
88     *
89     * @return 1 / ( 4 * default_pop_size);
90     */
scaling_factor()91    double scaling_factor() const { return scaling_factor_; };
92 
93    /**
94     * @brief Returns the mutation rate per base pair per generation for the
95     * currently active sequence position.
96     *
97     * @return The mutation rate per base per generation
98     */
mutation_rate()99    double mutation_rate() const { return mutation_rates_.at(current_seq_idx_); }
100 
101    /**
102     * @brief Returns the recombination rate per base pair per generation for the
103     * currently active sequence positions.
104     *
105     * @return The recombination rate per base pair per generation
106     */
107    double recombination_rate(const size_t idx = -1) const {
108      if (idx == -1) return recombination_rates_.at(current_seq_idx_);
109      else return recombination_rates_.at(idx);
110    }
111 
112    /**
113     * @brief Returns if the model has recombination.
114     *
115     * @return true if the model has recombination, false otherwise
116     */
has_recombination()117    bool has_recombination() const {
118      return has_recombination_;
119    };
120 
121 
122    /**
123     * @brief Returns the length of all loci, in base pairs
124     *
125     * @return length of all loci, in base pairs
126     */
loci_length()127    size_t loci_length() const { return loci_length_; }
128 
129    /**
130     * @brief Getter for the current growth rate of a subpopulation
131     *
132     * This returns the growth rate for a population for the current time of the
133     * model. Use resetTime() and increaseTime() to set the model to the time you
134     * want.
135     *
136     * @param pop The population for which the growth rate is returned.
137     *
138     * @return The growth rate.
139     */
140    double growth_rate(size_t pop = 0) const {
141      if (current_growth_rates_ == NULL) return default_growth_rate;
142      return current_growth_rates_->at(pop);
143    }
144 
145 
146    /**
147     * @brief Getter for the current diploid size of a (sub-)population.
148     *
149     * This returns the size of a population for the current time of the
150     * model. Use resetTime() and increaseTime() to set the model to the time you
151     * want. In case of a growing population, you can use the time parameter to
152     * specify for which time of the current model stop you want to get the
153     * population size.
154     *
155     * @param pop The population for which the size is returned.
156     * @param time The time inside the current model step for which to return the
157     * size. This will only make a difference if the model is growing.
158     *
159     * @return The size of the sub population
160     */
161    double population_size(const size_t pop = 0, const double time = -1) const {
162      return 0.5 / inv_double_pop_size(pop, time);
163    }
164 
165    double inv_double_pop_size(const size_t pop = 0, const double time = -1) const {
166      double pop_size;
167      if (current_pop_sizes_ == NULL)
168        pop_size = 1/(2*default_pop_size());
169      else
170        pop_size = current_pop_sizes_->at(pop);   // population size basal to this segment
171 
172      if (time >= 0 && growth_rate(pop) != 0.0) {
173        assert( time >= getCurrentTime() && time <= getNextTime() );
174        pop_size *= std::exp(growth_rate(pop) * (time - getCurrentTime()));
175      }
176 
177      return pop_size;
178    }
179 
180    /**
181     * @brief Returns the current migration rate for a given pair of populations.
182     *
183     * The migration rate is returned unscaled, e.g. in migrants per generation.
184     * The rate is for the current time of the
185     * model. Use resetTime() and increaseTime() to set the model to the time you
186     * want.
187     *
188     * @param source The population from which the migrants come (when looking
189     *               backwards in time!)
190     * @param sink The population that the migration goes to.
191     *
192     * @return The current unscaled, backwards migration rate.
193     */
migration_rate(const size_t source,const size_t sink)194    double migration_rate(const size_t source, const size_t sink) const {
195      if (sink == source) return 0.0;
196      if (current_mig_rates_ == NULL) return default_mig_rate;
197      return current_mig_rates_->at( getMigMatrixIndex(source, sink) );
198    };
199 
200    /**
201     * @brief Getter for the current total rate of migrating out of one
202     * population (viewed backwards in time).
203     *
204     *
205     * The migration rate is returned unscaled, e.g. in migrants per generation.
206     * The rate is for the current time of the
207     * model. Use resetTime() and increaseTime() to set the model to the time you
208     * want.
209     *
210     * @param source The population for which the rate is given.
211     *
212     * @return The total current, unscaled rate of migration out if the population.
213     */
total_migration_rate(const size_t source)214    double total_migration_rate(const size_t source) const {
215      if (current_total_mig_rates_ == NULL) return default_mig_rate;
216      return current_total_mig_rates_->at(source);
217    };
218 
219    /**
220     * @brief Getter for the probability of spontaneous migration at the
221     * beginning of the current time interval.
222     *
223     * Use resetTime() and increaseTime() to set the model to the time interval you
224     * want. You can use hasFixedTimeEvent() to check if there is any single migration event.
225     *
226     * @param source The source population of the migration.
227     * @param sink The sink population of the migration.
228     *
229     * @return The probability/fraction of migration.
230     */
single_mig_events()231    std::vector<MigEvent> single_mig_events() const {
232     return single_mig_list_.at(current_time_idx_);
233    }
234 
235    void setMutationRate(double rate,
236                         const bool &per_locus = false,
237                         const bool &scaled = false,
238                         const double seq_position = 0);
239 
240    void setRecombinationRate(double rate,
241                              const bool &per_locus = false,
242                              const bool &scaled = false,
243                              const double seq_position = 0);
244 
hasFixedTimeEvent(const double at_time)245    bool hasFixedTimeEvent(const double at_time) const {
246      if (single_mig_list_.at(current_time_idx_).empty()) return false;
247      if (getCurrentTime() != at_time) return false;
248      return true;
249    }
250 
sample_size()251    size_t sample_size() const { return sample_times_.size(); };
sample_population(size_t sample_id)252    size_t sample_population(size_t sample_id) const { return sample_populations_.at(sample_id); };
sample_time(size_t sample_id)253    double sample_time(size_t sample_id) const { return sample_times_.at(sample_id); };
254 
population_number()255    size_t population_number() const { return pop_number_; }
256 
257 
getCurrentTime()258    double getCurrentTime() const { return change_times_.at(current_time_idx_); }
getNextTime()259    double getNextTime() const {
260     if ( current_time_idx_ + 1 >= change_times_.size() ) return DBL_MAX;
261     else return change_times_.at(current_time_idx_ + 1);
262    }
263 
getCurrentSequencePosition()264    double getCurrentSequencePosition() const {
265     if ( current_seq_idx_ >= change_position_.size() ) return loci_length();
266     return change_position_.at(current_seq_idx_);
267    }
268 
getNextSequencePosition()269    double getNextSequencePosition() const {
270     if ( current_seq_idx_ + 1 >= change_position_.size() ) return loci_length();
271     else return change_position_.at(current_seq_idx_ + 1);
272    }
273 
window_length_seq()274    double window_length_seq() const { return window_length_seq_; }
window_length_rec()275    size_t window_length_rec() const { return window_length_rec_; }
has_window_rec()276    bool has_window_rec() const { return has_window_rec_; }
has_window_seq()277    bool has_window_seq() const { return has_window_seq_; }
has_approximation()278    bool has_approximation() const { return has_appr_; }
set_window_length_seq(const double ewl)279    void set_window_length_seq(const double ewl) {
280      if (ewl < 0) throw std::invalid_argument("Exact window length can not be negative");
281      window_length_seq_ = ewl;
282      has_window_seq_ = true;
283      has_window_rec_ = false;
284      has_appr_ = true;
285    }
set_window_length_rec(const size_t ewl)286    void set_window_length_rec(const size_t ewl) {
287      window_length_rec_ = ewl;
288      has_window_seq_ = false;
289      has_window_rec_ = true;
290      has_appr_ = true;
291    }
disable_approximation()292    void disable_approximation() {
293      has_appr_ = false;
294      has_window_rec_ = false;
295      has_window_seq_ = false;
296    }
297 
set_population_number(const size_t pop_number)298    void set_population_number(const size_t pop_number) {
299     pop_number_ = pop_number;
300     if (pop_number_<1) throw std::out_of_range("Population number out of range");
301    }
302 
resetTime()303    void resetTime() {
304      if (pop_sizes_list_[0].empty()) current_pop_sizes_ = NULL;
305      else current_pop_sizes_ = &(pop_sizes_list_[0]);
306 
307      if (growth_rates_list_[0].empty()) current_growth_rates_ = NULL;
308      else current_growth_rates_ = &(growth_rates_list_[0]);
309 
310      if (mig_rates_list_[0].empty()) current_mig_rates_ = NULL;
311      else current_mig_rates_ = &(mig_rates_list_[0]);
312 
313      if (total_mig_rates_list_[0].empty()) current_total_mig_rates_ = NULL;
314      else current_total_mig_rates_ = &(total_mig_rates_list_[0]);
315 
316      current_time_idx_ = 0;
317    };
318 
resetSequencePosition()319    void resetSequencePosition() {
320      current_seq_idx_ = 0;
321    }
322 
increaseTime()323    void increaseTime() {
324      if ( current_time_idx_ == change_times_.size() ) throw std::out_of_range("Model change times out of range");
325      ++current_time_idx_;
326 
327      if ( ! pop_sizes_list_.at(current_time_idx_).empty() )
328        current_pop_sizes_ = &(pop_sizes_list_.at(current_time_idx_));
329      if ( ! growth_rates_list_.at(current_time_idx_).empty() )
330        current_growth_rates_ = &(growth_rates_list_.at(current_time_idx_));
331      if ( ! mig_rates_list_.at(current_time_idx_).empty() )
332        current_mig_rates_ = &(mig_rates_list_.at(current_time_idx_));
333      if ( ! total_mig_rates_list_.at(current_time_idx_).empty() )
334        current_total_mig_rates_ = &(total_mig_rates_list_.at(current_time_idx_));
335    };
336 
increaseSequencePosition()337    void increaseSequencePosition() {
338     ++current_seq_idx_;
339    }
340 
countChangeTimes()341    size_t countChangeTimes() const { return change_times_.size(); }
countChangePositions()342    size_t countChangePositions() const { return change_position_.size(); }
343 
344    void print(std::ostream &os) const;
345 
346 
loci_number()347    size_t loci_number() const { return loci_number_; };
set_loci_number(size_t loci_number)348    void set_loci_number(size_t loci_number) { loci_number_ = loci_number; };
349 
350    // Add populations size changes
351    void addPopulationSizes(double time, const std::vector<double> &pop_sizes,
352                            const bool &time_scaled = false, const bool &relative = false);
353 
354    void addPopulationSizes(const double time, const double pop_size,
355                            const bool &time_scaled = false, const bool &relative = false);
356 
357    void addPopulationSize(const double time, const size_t pop, double population_sizes,
358                           const bool &time_scaled = false, const bool &relative = false);
359 
360    // Add exponential growth
361    void addGrowthRates(const double time, const std::vector<double> &growth_rates,
362                        const bool &time_scaled = false,
363                        const bool &rate_scaled = false);
364 
365    void addGrowthRates(const double time, const double growth_rates,
366                        const bool &time_scaled = false,
367                        const bool &rate_scaled = false);
368 
369    void addGrowthRate(const double time, const size_t population,
370                       double growth_rates, const bool &time_scaled = false,
371                       const bool &rate_scaled = false);
372 
373    void addSampleSizes(double time, const std::vector<size_t> &samples_sizes,
374                          const bool &scaled = false);
375 
376    // functions to add Migration
377    void addMigrationRates(double time, const std::vector<double> &mig_rates,
378                           const bool &time_scaled = false, const bool &rate_scaled = false);
379 
380    void addMigrationRate(double time, size_t source, size_t sink, double mig_rate,
381                          const bool &time_scaled = false, const bool &rate_scaled = false);
382 
383    void addSymmetricMigration(const double time, const double mig_rate,
384                               const bool &time_scaled = false, const bool &rate_scaled = false);
385 
386    void addSingleMigrationEvent(const double time, const size_t source_pop,
387                                 const size_t sink_pop, const double fraction,
388                                 const bool &time_scaled = false);
389 
390    void finalize();
391 
392    void check();
393    //void reset();
394 
countSummaryStatistics()395    size_t countSummaryStatistics() const {
396      return summary_statistics_.size();
397    }
398 
getSummaryStatistic(const size_t i)399    SummaryStatistic* getSummaryStatistic(const size_t i) const {
400      return summary_statistics_.at(i).get();
401    }
402 
addSummaryStatistic(std::shared_ptr<SummaryStatistic> sum_stat)403    void addSummaryStatistic(std::shared_ptr<SummaryStatistic> sum_stat) {
404      summary_statistics_.push_back(sum_stat);
405    }
406 
407   void addPopulation();
408 
getSequenceScaling()409   SeqScale getSequenceScaling() const { return seq_scale_; }
setSequenceScaling(SeqScale seq_scale)410   void setSequenceScaling(SeqScale seq_scale) { seq_scale_ = seq_scale; };
411 
setLocusLength(const size_t length)412    void setLocusLength(const size_t length) {
413     // Rescale the rates that are per base pair
414     for (size_t i = 0; i < change_position_.size(); ++i) {
415       mutation_rates_.at(i) *= (double)loci_length() / length;
416       recombination_rates_.at(i) *= (double)(loci_length()-1) / (length-1);
417     }
418     loci_length_ = length;
419    }
420 
421   private:
422    std::vector<double> change_times_;
423 
change_position(size_t idx)424    double change_position(size_t idx) const {
425     return this->change_position_.at(idx);
426    }
427 
get_position_index()428    size_t get_position_index() const {
429     return this->current_seq_idx_;
430    }
431 
432    size_t addChangeTime(double time, const bool &scaled = false);
433    size_t addChangePosition(const double position);
434 
435    template <typename T>
deleteParList(std::vector<T * > & parList)436    void deleteParList(std::vector<T*> &parList) {
437     typename std::vector<T*>::iterator it;
438     for (it = parList.begin(); it != parList.end(); ++it) {
439       if (*it != NULL) delete *it;
440     }
441     parList.clear();
442    }
443 
444    void updateTotalMigRates(const size_t position);
has_migration()445    bool has_migration() { return has_migration_; };
446 
447   void fillVectorList(std::vector<std::vector<double> > &vector_list, const double default_value);
448   void calcPopSizes();
checkPopulation(const size_t pop)449   void checkPopulation(const size_t pop) {
450     if (pop >= this->population_number())
451       throw std::invalid_argument("Invalid population");
452   }
453 
454   friend void swap(Model& first, Model& second);
455 
getMigMatrixIndex(const size_t i,const size_t j)456   size_t getMigMatrixIndex(const size_t i, const size_t j) const {
457     assert(i != j);
458     return i * (population_number()-1) + j - ( i < j );
459   }
460 
461   void addPopToMatrixList(std::vector<std::vector<double> > &vector_list,
462                           size_t new_pop,
463                           double default_value = nan("value to replace"));
464   void addPopToVectorList(std::vector<std::vector<double> > &vector_list);
465 
466    // Stores information about samples. Each index represents a sample.
467    std::vector<size_t> sample_populations_;
468    std::vector<double> sample_times_;
469 
470    // Stores the time and sequences positions where the model changes.
471    std::vector<double> change_position_;
472 
473    // These pointer vectors hold the actual model parameters that can change in
474    // time. Each index represents one period in time within which the model
475    // parameters are constant. NULL means that the parameters do not change.
476    std::vector<std::vector<double> > growth_rates_list_;
477    std::vector<std::vector<double> > mig_rates_list_;
478    std::vector<std::vector<double> > total_mig_rates_list_;
479 
480    std::vector<std::vector<MigEvent> > single_mig_list_;
481 
482    // Population sizes are saved as 1/(2N), where N is the actual population
483    // size (do to fast multiplication rather than slow division in the
484    // algorithm)
485    std::vector<std::vector<double> > pop_sizes_list_;
486 
487    // These vectors contain the model parameters that may change along the sequence.
488    // Again, each index represents an sequence segment within with the model
489    // does not change.
490    std::vector<double> recombination_rates_;       /*!< Unit: Recombinations per base per generation */
491    std::vector<double> mutation_rates_;           /*!< Unit: Mutations per base per generation */
492 
493    // The index of the time and sequence segment currently active.
494    size_t current_time_idx_;
495    size_t current_seq_idx_;
496 
497    // Direct pointers to the currently active model parameters.
498    std::vector<double>* current_pop_sizes_;
499    std::vector<double>* current_growth_rates_;
500    std::vector<double>* current_mig_rates_;
501    std::vector<double>* current_total_mig_rates_;
502 
503    size_t pop_number_;
504 
505    size_t loci_number_;
506    size_t loci_length_;
507 
508    double window_length_seq_;
509    size_t window_length_rec_;
510    bool has_window_seq_;
511    bool has_window_rec_;
512    bool has_appr_;
513 
514    bool has_migration_;
515    bool has_recombination_;
516 
517    SeqScale seq_scale_;
518 
519    std::vector<std::shared_ptr<SummaryStatistic> > summary_statistics_;
520 };
521 
522 
523 
524 std::ostream& operator<<(std::ostream& os, Model& model);
525 
526 template <typename T>
527 std::ostream& operator<<(std::ostream& os, const std::vector<T> &vec) {
528   typename std::vector<T>::const_iterator it;
529   for (it = vec.begin(); it != vec.end(); ++it) os << *it << " ";
530   return os;
531 }
532 
533 
534 
535 #endif
536