1 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
2 //
3 // Copyright 2014-2018, Julian Catchen <jcatchen@illinois.edu>
4 //
5 // This file is part of Stacks.
6 //
7 // Stacks is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // Stacks is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with Stacks. If not, see <http://www.gnu.org/licenses/>.
19 //
20
21 #ifndef __SMOOTHING_H__
22 #define __SMOOTHING_H__
23
24 #include <cmath>
25 #include "smoothing_utils.h"
26
27 template<class StatT=PopStat>
28 class KSmooth {
29 uint size; // Number of elements expected in the StatT class to smooth.
30 double *weights; // Weight matrix to apply while smoothing.
31
32 public:
KSmooth(int size)33 KSmooth(int size) {
34 this->size = size;
35 this->weights = calc_weights();
36
37 }
~KSmooth()38 ~KSmooth() {
39 delete [] this->weights;
40 }
41
42 int smooth(vector<const StatT *> &popstats);
43 };
44
45 template<class StatT>
46 int
smooth(vector<const StatT * > & popstats)47 KSmooth<StatT>::smooth(vector<const StatT *> &popstats)
48 {
49 //
50 // To generate smooth genome-wide distributions of Fst, we calculate a kernel-smoothing
51 // moving average of Fst values along each ordered chromosome.
52 //
53 // For each genomic region centered on a nucleotide position c, the contribution of the population
54 // genetic statistic at position p to the region average was weighted by the Gaussian function:
55 // exp( (-1 * (p - c)^2) / (2 * sigma^2))
56 //
57 // In addition, we weight each position according to (n_k - 1), where n_k is the number of alleles
58 // sampled at that location.
59 //
60 // By default, sigma = 150Kb, for computational efficiency, only calculate average out to 3sigma.
61 //
62 #pragma omp parallel
63 {
64 int limit = 3 * sigma;
65 int dist;
66 uint pos_l, pos_u;
67 double *sum, final_weight;
68 const PopStat *c, *p;
69
70 sum = new double[this->size];
71 pos_l = 0;
72 pos_u = 0;
73
74 #pragma omp for schedule(dynamic, 1)
75 for (uint pos_c = 0; pos_c < popstats.size(); pos_c++) {
76 c = popstats[pos_c];
77
78 if (c == NULL || c->fixed == true)
79 continue;
80
81 for (uint i = 0; i < this->size; i++) {
82 c->smoothed[i] = 0.0;
83 sum[i] = 0.0;
84 }
85
86 determine_window_limits(popstats, c->bp, pos_l, pos_u);
87
88 for (uint pos_p = pos_l; pos_p < pos_u; pos_p++) {
89 p = popstats[pos_p];
90
91 if (p == NULL)
92 continue;
93
94 dist = p->bp > c->bp ? p->bp - c->bp : c->bp - p->bp;
95
96 if (dist > limit || dist < 0) {
97 #pragma omp critical
98 {
99 cerr << "ERROR: current basepair is out of the sliding window.\n"
100 << " Calculating sliding window; start position: " << pos_l << ", " << (popstats[pos_l] == NULL ? -1 : popstats[pos_l]->bp +1) << "bp; end position: "
101 << pos_u << ", " << (popstats[pos_u] == NULL ? -1 : popstats[pos_u]->bp +1) << "bp; center: "
102 << pos_c << ", " << popstats[pos_c]->bp +1 << "bp\n"
103 << " Current position: " << pos_p << ", " << popstats[pos_p]->bp +1 << "; Dist: " << dist << "\n"
104 << " Window positions:\n";
105
106 for (uint j = pos_l; j < pos_u; j++) {
107 p = popstats[j];
108 if (p == NULL) continue;
109 cerr << " Position: " << j << "; " << p->bp + 1 << "bp\n";
110 }
111 }
112 continue;
113 }
114
115 // sites_cnt++;
116
117 final_weight = (p->alleles - 1) * this->weights[dist];
118 for (uint i = 0; i < this->size; i++) {
119 if (p->stat[i] > -7.0) {
120 c->smoothed[i] += p->stat[i] * final_weight;
121 sum[i] += final_weight;
122 }
123 }
124 // if (c->loc_id == 9314) {
125 // cerr << " id: " << p->loc_id
126 // << "; dist: " << dist
127 // << "; weight: " << weights[dist]
128 // << "; final_weight: " << final_weight
129 // << "; fst': " << p->stat[3]
130 // << "; sum: " << sum
131 // << "; smoothed: " << c->smoothed[3] << "\n";
132 // }
133 }
134
135 // sites_per_snp += (sites_cnt / snp_cnt);
136 // tot_windows++;
137 //
138 // if (snp_cnt < max_snp_dist) {
139 // #pragma omp atomic
140 // snp_dist[snp_cnt]++;
141 // }
142 // c->snp_cnt = snp_cnt;
143
144 for (uint i = 0; i < this->size; i++)
145 c->smoothed[i] /= sum[i];
146 }
147
148 delete [] sum;
149 }
150
151 return 0;
152 }
153
154 #endif // __SMOOTHING_H__
155