1 /*
2 Genome-wide Efficient Mixed Model Association (GEMMA)
3 Copyright © 2011-2017, Xiang Zhou
4 Copyright © 2017, Peter Carbonetto
5 Copyright © 2017-2018, Pjotr Prins
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include <bitset>
22 #include <cmath>
23 #include <cstring>
24 #include <fstream>
25 #include <iomanip>
26 #include <iostream>
27 #include <limits.h>
28 #include <map>
29 #include <regex>
30 #include <set>
31 #include <sstream>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string>
35 #include <tuple>
36 #include <vector>
37
38 // #include "Eigen/Dense"
39
40 #include "gsl/gsl_version.h"
41
42 #if GSL_MAJOR_VERSION < 2
43 #pragma message "GSL version " GSL_VERSION
44 #endif
45
46 #include "gsl/gsl_sys.h" // for gsl_isnan, gsl_isinf, gsl_isfinite
47 #include "gsl/gsl_blas.h"
48 #include "gsl/gsl_cdf.h"
49 #include "gsl/gsl_linalg.h"
50 #include "gsl/gsl_matrix.h"
51 #include "gsl/gsl_vector.h"
52 #include "gsl/gsl_eigen.h"
53
54 #include "debug.h"
55 // #include "eigenlib.h"
56 #include "fastblas.h"
57 #include "lapack.h"
58 #include "mathfunc.h"
59
60 using namespace std;
61 // using namespace Eigen;
62
has_nan(const vector<double> v)63 bool has_nan(const vector<double> v) {
64 if (!is_check_mode()) return false;
65
66 for (const auto& e: v) {
67 if (is_nan(e))
68 return true;
69 }
70 return false;
71 }
72
has_nan(const gsl_vector * v)73 bool has_nan(const gsl_vector *v) {
74 if (!is_check_mode()) return false;
75
76 for (size_t i = 0; i < v->size; ++i)
77 if (is_nan(gsl_vector_get(v,i))) return true;
78 return false;
79 }
has_inf(const gsl_vector * v)80 bool has_inf(const gsl_vector *v) {
81 if (!is_check_mode()) return false;
82
83 for (size_t i = 0; i < v->size; ++i) {
84 auto value = gsl_vector_get(v,i);
85 if (is_inf(value) != 0) return true;
86 }
87 return false;
88 }
has_nan(const gsl_matrix * m)89 bool has_nan(const gsl_matrix *m) {
90 if (!is_check_mode()) return false;
91
92 for (size_t i = 0; i < m->size1; ++i)
93 for (size_t j = 0; j < m->size2; ++j)
94 if (is_nan(gsl_matrix_get(m,i,j))) return true;
95 return false;
96 }
has_inf(const gsl_matrix * m)97 bool has_inf(const gsl_matrix *m) {
98 if (!is_check_mode()) return false;
99
100 for (size_t i = 0; i < m->size1; ++i)
101 for (size_t j = 0; j < m->size2; ++j) {
102 auto value = gsl_matrix_get(m,i,j);
103 if (is_inf(value) != 0) return true;
104 }
105 return false;
106 }
107
is_integer(const std::string & s)108 bool is_integer(const std::string & s){
109 return std::regex_match(s, std::regex("^[0-9]+$"));
110 }
111
is_float(const std::string & s)112 bool is_float(const std::string & s){
113 return std::regex_match(s, std::regex("^[+-]?([0-9]*[.])?[0-9]+$"));
114 }
115
safe_log(const double d)116 double safe_log(const double d) {
117 if (!is_legacy_mode() && (is_check_mode() || is_debug_mode()))
118 enforce_msg(d > 0.0, (std::string("Trying to take the log of ") + std::to_string(d)).c_str());
119 return log(d);
120 }
121
safe_sqrt(const double d)122 double safe_sqrt(const double d) {
123 double d1 = d;
124 if (fabs(d < 0.001))
125 d1 = fabs(d);
126 if (!is_legacy_mode() && (is_check_mode() || is_debug_mode()))
127 enforce_msg(d1 >= 0.0, (std::string("Trying to take the sqrt of ") + std::to_string(d)).c_str());
128 if (d1 < 0.0 )
129 return nan("");
130 return sqrt(d1);
131 }
132
133 // calculate variance of a vector
VectorVar(const gsl_vector * v)134 double VectorVar(const gsl_vector *v) {
135 double d, m = 0.0, m2 = 0.0;
136 for (size_t i = 0; i < v->size; ++i) {
137 d = gsl_vector_get(v, i);
138 m += d;
139 m2 += d * d;
140 }
141 m /= (double)v->size;
142 m2 /= (double)v->size;
143 return m2 - m * m;
144 }
145
146 // Center the matrix G.
CenterMatrix(gsl_matrix * G)147 void CenterMatrix(gsl_matrix *G) {
148 double d;
149 gsl_vector *w = gsl_vector_safe_alloc(G->size1);
150 gsl_vector *Gw = gsl_vector_safe_alloc(G->size1);
151 gsl_vector_set_all(w, 1.0);
152
153 // y := alpha*A*x+ beta*y or Gw = G*w
154 gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
155
156 // int gsl_blas_dsyr2(CBLAS_UPLO_t Uplo, double alpha, const gsl_vector * x, const gsl_vector * y, gsl_matrix * A)
157 // compute the symmetric rank-2 update A = \alpha x y^T + \alpha y x^T + A of the symmetric matrix A. Since the matrix A is symmetric only its upper half or lower half need to be stored
158 // or G = (UpperTriangle) alpha*Gw*w' + alpha*w*Gw' + G
159 gsl_blas_dsyr2(CblasUpper, -1.0 / (double)G->size1, Gw, w, G);
160 // compute dot product of vectors w.Gw and store in d
161 gsl_blas_ddot(w, Gw, &d);
162 // G = (upper) alpha w*w' + G
163 gsl_blas_dsyr(CblasUpper, d / ((double)G->size1 * (double)G->size1), w, G);
164
165 // Transpose the matrix
166 for (size_t i = 0; i < G->size1; ++i) {
167 for (size_t j = 0; j < i; ++j) {
168 d = gsl_matrix_get(G, j, i);
169 gsl_matrix_set(G, i, j, d);
170 }
171 }
172
173 gsl_vector_safe_free(w);
174 gsl_vector_safe_free(Gw);
175
176 return;
177 }
178
179 // Center the matrix G.
180 // Only used in vc
CenterMatrix(gsl_matrix * G,const gsl_vector * w)181 void CenterMatrix(gsl_matrix *G, const gsl_vector *w) {
182 double d, wtw;
183 gsl_vector *Gw = gsl_vector_safe_alloc(G->size1);
184
185 gsl_blas_ddot(w, w, &wtw);
186 gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
187 gsl_blas_dsyr2(CblasUpper, -1.0 / wtw, Gw, w, G);
188 gsl_blas_ddot(w, Gw, &d);
189 gsl_blas_dsyr(CblasUpper, d / (wtw * wtw), w, G);
190
191 for (size_t i = 0; i < G->size1; ++i) {
192 for (size_t j = 0; j < i; ++j) {
193 d = gsl_matrix_get(G, j, i);
194 gsl_matrix_set(G, i, j, d);
195 }
196 }
197
198 gsl_vector_safe_free(Gw);
199
200 return;
201 }
202
203 // Center the matrix G.
204 // Only used in vc
CenterMatrix(gsl_matrix * G,const gsl_matrix * W)205 void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) {
206 gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
207 gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
208 gsl_matrix *WtWiWt = gsl_matrix_safe_alloc(W->size2, G->size1);
209 gsl_matrix *GW = gsl_matrix_safe_alloc(G->size1, W->size2);
210 gsl_matrix *WtGW = gsl_matrix_safe_alloc(W->size2, W->size2);
211 gsl_matrix *Gtmp = gsl_matrix_safe_alloc(G->size1, G->size1);
212
213 gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
214
215 int sig;
216 gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
217 LUDecomp(WtW, pmt, &sig);
218 LUInvert(WtW, pmt, WtWi);
219
220 gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, WtWi, W, 0.0, WtWiWt);
221 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, G, W, 0.0, GW);
222 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0, Gtmp);
223
224 gsl_matrix_sub(G, Gtmp);
225 gsl_matrix_transpose(Gtmp);
226 gsl_matrix_sub(G, Gtmp);
227
228 gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, GW, 0.0, WtGW);
229 // GW is destroyed.
230 gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, WtWiWt, WtGW, 0.0, GW);
231 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0, Gtmp);
232
233 gsl_matrix_add(G, Gtmp);
234
235 gsl_matrix_safe_free(WtW);
236 gsl_matrix_safe_free(WtWi);
237 gsl_matrix_safe_free(WtWiWt);
238 gsl_matrix_safe_free(GW);
239 gsl_matrix_safe_free(WtGW);
240 gsl_matrix_safe_free(Gtmp);
241
242 return;
243 }
244
245 // "Standardize" the matrix G such that all diagonal elements = 1.
246 // (only used by vc)
StandardizeMatrix(gsl_matrix * G)247 void StandardizeMatrix(gsl_matrix *G) {
248 double d = 0.0;
249 vector<double> vec_d;
250
251 for (size_t i = 0; i < G->size1; ++i) {
252 vec_d.push_back(gsl_matrix_get(G, i, i));
253 }
254 for (size_t i = 0; i < G->size1; ++i) {
255 for (size_t j = i; j < G->size2; ++j) {
256 if (j == i) {
257 gsl_matrix_set(G, i, j, 1);
258 } else {
259 d = gsl_matrix_get(G, i, j);
260 d /= sqrt(vec_d[i] * vec_d[j]);
261 gsl_matrix_set(G, i, j, d);
262 gsl_matrix_set(G, j, i, d);
263 }
264 }
265 }
266
267 return;
268 }
269
270 // Scale the matrix G such that the mean diagonal = 1.
ScaleMatrix(gsl_matrix * G)271 double ScaleMatrix(gsl_matrix *G) {
272 double d = 0.0;
273
274 // Compute mean of diagonal
275 for (size_t i = 0; i < G->size1; ++i) {
276 d += gsl_matrix_get(G, i, i);
277 }
278 d /= (double)G->size1;
279
280 // Scale the matrix using the diagonal mean
281 if (d != 0) {
282 gsl_matrix_scale(G, 1.0 / d);
283 }
284
285 return d;
286 }
287
isMatrixSymmetric(const gsl_matrix * G)288 bool isMatrixSymmetric(const gsl_matrix *G) {
289 enforce(G->size1 == G->size2);
290 auto m = G->data;
291 // upper triangle
292 auto size = G->size1;
293 for(size_t c = 0; c < size; c++) {
294 for(size_t r = 0; r < c; r++) {
295 int x1 = c, y1 = r, x2 = r, y2 = c;
296 auto idx1 = y1*size+x1, idx2 = y2*size+x2;
297 // printf("(%d,%d %f - %d,%d %f)",x1,y1,m[idx1],x2,y2,m[idx2]);
298 if(m[idx1] != m[idx2]) {
299 cout << "Mismatch coordinates (" << c << "," << r << ")" << m[idx1] << ":" << m[idx2] << "!" << endl;
300 return false;
301 }
302 }
303 }
304 return true;
305 }
306
isMatrixPositiveDefinite(const gsl_matrix * G)307 bool isMatrixPositiveDefinite(const gsl_matrix *G) {
308 enforce(G->size1 == G->size2);
309 auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2);
310 enforce_gsl(gsl_matrix_safe_memcpy(G2,G));
311 auto handler = gsl_set_error_handler_off();
312 #if GSL_MAJOR_VERSION >= 2 && GSL_MINOR_VERSION >= 3
313 auto s = gsl_linalg_cholesky_decomp1(G2);
314 #else
315 auto s = gsl_linalg_cholesky_decomp(G2);
316 #endif
317 gsl_set_error_handler(handler);
318 if (s == GSL_SUCCESS) {
319 gsl_matrix_safe_free(G2);
320 return true;
321 }
322 gsl_matrix_free(G2);
323 return (false);
324 }
325
getEigenValues(const gsl_matrix * G)326 gsl_vector *getEigenValues(const gsl_matrix *G) {
327 enforce(G->size1 == G->size2);
328 auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2);
329 enforce_gsl(gsl_matrix_safe_memcpy(G2,G));
330 auto eworkspace = gsl_eigen_symm_alloc(G->size1);
331 enforce(eworkspace);
332 gsl_vector *eigenvalues = gsl_vector_safe_alloc(G->size1);
333 enforce_gsl(gsl_eigen_symm(G2, eigenvalues, eworkspace));
334 gsl_eigen_symm_free(eworkspace);
335 gsl_matrix_safe_free(G2);
336 return eigenvalues;
337 }
338
339 // Check whether eigen values are larger than *min*
340 // by default 1E-5.
341 // Returns success, eigen min, eigen min-but-1, eigen max
342
minmax(const gsl_vector * v)343 tuple<double, double, double> minmax(const gsl_vector *v) {
344 auto min = v->data[0];
345 auto min1 = min;
346 auto max = min;
347 for (size_t i=1; i<v->size; i++) {
348 auto value = std::abs(v->data[i]);
349 if (value < min) {
350 min1 = min;
351 min = value;
352 }
353 if (value > max)
354 max = value;
355 }
356 return std::make_tuple(min, min1, max);
357 }
358
abs_minmax(const gsl_vector * v)359 tuple<double, double, double> abs_minmax(const gsl_vector *v) {
360 auto min = std::abs(v->data[0]);
361 auto min1 = min;
362 auto max = min;
363 for (size_t i=1; i<v->size; i++) {
364 auto value = std::abs(v->data[i]);
365 if (value < min) {
366 min1 = min;
367 min = value;
368 }
369 if (value > max)
370 max = value;
371 }
372 return std::make_tuple(min, min1, max);
373 }
374
375 // Check for negative values. skip_min will leave out
376 // the lowest value
has_negative_values_but_one(const gsl_vector * v)377 bool has_negative_values_but_one(const gsl_vector *v) {
378 bool one_skipped = false;
379 for (size_t i=0; i<v->size; i++) {
380 if (v->data[i] < -EIGEN_MINVALUE) {
381 if (one_skipped)
382 return true;
383 one_skipped = true;
384 }
385 }
386 return false;
387 }
388
count_abs_small_values(const gsl_vector * v,double min)389 uint count_abs_small_values(const gsl_vector *v, double min) {
390 uint count = 0;
391 for (size_t i=0; i<v->size; i++) {
392 if (std::abs(v->data[i]) < min) {
393 count += 1;
394 }
395 }
396 return count;
397 }
398
399 // Check for matrix being ill conditioned by taking the eigen values
400 // and the ratio of max and min but one (min is expected to be zero).
isMatrixIllConditioned(const gsl_vector * eigenvalues,double max_ratio)401 bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio) {
402 auto t = abs_minmax(eigenvalues);
403
404 #if !defined NDEBUG
405 auto absmin = get<0>(t);
406 #endif
407
408 auto absmin1 = get<1>(t);
409 auto absmax = get<2>(t);
410 if (absmax/absmin1 > max_ratio) {
411 #if !NDEBUG
412 cerr << "**** DEBUG: Ratio |eigenmax|/|eigenmin| suggests matrix is ill conditioned for double precision" << endl;
413 auto t = minmax(eigenvalues);
414 auto min = get<0>(t);
415 auto min1 = get<1>(t);
416 auto max = get<2>(t);
417 cerr << "**** DEBUG: Abs eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio (" << absmax << "/" << absmin1 << ") = " << absmax/absmin1 << endl;
418 cerr << "**** DEBUG: Eigenvalues [Min " << min << ", " << min1 << " ... " << max << " Max]" << endl;
419 #endif
420 return true;
421 }
422 return false;
423 }
424
sum(const double * m,size_t rows,size_t cols)425 double sum(const double *m, size_t rows, size_t cols) {
426 double sum = 0.0;
427 for (size_t i = 0; i<rows*cols; i++)
428 sum += m[i];
429 return sum;
430 }
431
SumVector(const gsl_vector * v)432 double SumVector(const gsl_vector *v) {
433 double sum = 0;
434 for (size_t i = 0; i < v->size; i++ ) {
435 sum += gsl_vector_get(v, i);
436 }
437 return( sum );
438 }
439
440 // Center the vector y.
CenterVector(gsl_vector * y)441 double CenterVector(gsl_vector *y) {
442 double d = 0.0;
443
444 for (size_t i = 0; i < y->size; ++i) {
445 d += gsl_vector_get(y, i);
446 }
447 d /= (double)y->size;
448
449 gsl_vector_add_constant(y, -1.0 * d);
450
451 return d;
452 }
453
454 // Center the vector y.
CenterVector(gsl_vector * y,const gsl_matrix * W)455 void CenterVector(gsl_vector *y, const gsl_matrix *W) {
456 gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
457 gsl_vector *Wty = gsl_vector_safe_alloc(W->size2);
458 gsl_vector *WtWiWty = gsl_vector_safe_alloc(W->size2);
459
460 gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
461 gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
462
463 int sig;
464 gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
465 LUDecomp(WtW, pmt, &sig);
466 LUSolve(WtW, pmt, Wty, WtWiWty);
467
468 gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWty, 1.0, y);
469
470 gsl_matrix_safe_free(WtW);
471 gsl_vector_safe_free(Wty);
472 gsl_vector_safe_free(WtWiWty);
473
474 return;
475 }
476
477 // "Standardize" vector y to have mean 0 and y^ty/n=1.
StandardizeVector(gsl_vector * y)478 void StandardizeVector(gsl_vector *y) {
479 double d = 0.0, m = 0.0, v = 0.0;
480
481 for (size_t i = 0; i < y->size; ++i) {
482 d = gsl_vector_get(y, i);
483 m += d;
484 v += d * d;
485 }
486 m /= (double)y->size;
487 v /= (double)y->size;
488 v -= m * m;
489
490 gsl_vector_add_constant(y, -1.0 * m);
491 gsl_vector_scale(y, 1.0 / sqrt(v));
492
493 return;
494 }
495
496 // Calculate UtX (U gets transposed)
CalcUtX(const gsl_matrix * U,gsl_matrix * UtX)497 void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX) {
498 gsl_matrix *X = gsl_matrix_safe_alloc(UtX->size1, UtX->size2);
499 gsl_matrix_safe_memcpy(X, UtX);
500 fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
501 gsl_matrix_safe_free(X);
502 }
503
CalcUtX(const gsl_matrix * U,const gsl_matrix * X,gsl_matrix * UtX)504 void CalcUtX(const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX) {
505 fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
506 }
507
CalcUtX(const gsl_matrix * U,const gsl_vector * x,gsl_vector * Utx)508 void CalcUtX(const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx) {
509 gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
510 }
511
512 // Kronecker product.
Kronecker(const gsl_matrix * K,const gsl_matrix * V,gsl_matrix * H)513 void Kronecker(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
514 for (size_t i = 0; i < K->size1; i++) {
515 for (size_t j = 0; j < K->size2; j++) {
516 gsl_matrix_view H_sub = gsl_matrix_submatrix(
517 H, i * V->size1, j * V->size2, V->size1, V->size2);
518 gsl_matrix_safe_memcpy(&H_sub.matrix, V);
519 gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
520 }
521 }
522 return;
523 }
524
525 // Symmetric K matrix.
KroneckerSym(const gsl_matrix * K,const gsl_matrix * V,gsl_matrix * H)526 void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
527 for (size_t i = 0; i < K->size1; i++) {
528 for (size_t j = i; j < K->size2; j++) {
529 gsl_matrix_view H_sub = gsl_matrix_submatrix(
530 H, i * V->size1, j * V->size2, V->size1, V->size2);
531 gsl_matrix_safe_memcpy(&H_sub.matrix, V);
532 gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
533
534 if (i != j) {
535 gsl_matrix_view H_sub_sym = gsl_matrix_submatrix(
536 H, j * V->size1, i * V->size2, V->size1, V->size2);
537 gsl_matrix_safe_memcpy(&H_sub_sym.matrix, &H_sub.matrix);
538 }
539 }
540 }
541 return;
542 }
543
544 // This function calculates HWE p value with methods described in
545 // Wigginton et al. (2005) AJHG; it is based on the code in plink 1.07.
CalcHWE(const size_t n_hom1,const size_t n_hom2,const size_t n_ab)546 double CalcHWE(const size_t n_hom1, const size_t n_hom2, const size_t n_ab) {
547 if ((n_hom1 + n_hom2 + n_ab) == 0) {
548 return 1;
549 }
550
551 // "AA" is the rare allele.
552 int n_aa = n_hom1 < n_hom2 ? n_hom1 : n_hom2;
553 int n_bb = n_hom1 < n_hom2 ? n_hom2 : n_hom1;
554
555 int rare_copies = 2 * n_aa + n_ab;
556 int genotypes = n_ab + n_bb + n_aa;
557
558 double *het_probs = (double *)malloc((rare_copies + 1) * sizeof(double));
559 if (het_probs == NULL)
560 cout << "Internal error: SNP-HWE: Unable to allocate array" << endl;
561
562 int i;
563 for (i = 0; i <= rare_copies; i++)
564 het_probs[i] = 0.0;
565
566 // Start at midpoint.
567 // XZ modified to add (long int)
568 int mid = ((long int)rare_copies *
569 (2 * (long int)genotypes - (long int)rare_copies)) /
570 (2 * (long int)genotypes);
571
572 // Check to ensure that midpoint and rare alleles have same
573 // parity.
574 if ((rare_copies & 1) ^ (mid & 1))
575 mid++;
576
577 int curr_hets = mid;
578 int curr_homr = (rare_copies - mid) / 2;
579 int curr_homc = genotypes - curr_hets - curr_homr;
580
581 het_probs[mid] = 1.0;
582 double sum = het_probs[mid];
583 for (curr_hets = mid; curr_hets > 1; curr_hets -= 2) {
584 het_probs[curr_hets - 2] = het_probs[curr_hets] * curr_hets *
585 (curr_hets - 1.0) /
586 (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0));
587 sum += het_probs[curr_hets - 2];
588
589 // Two fewer heterozygotes for next iteration; add one
590 // rare, one common homozygote.
591 curr_homr++;
592 curr_homc++;
593 }
594
595 curr_hets = mid;
596 curr_homr = (rare_copies - mid) / 2;
597 curr_homc = genotypes - curr_hets - curr_homr;
598 for (curr_hets = mid; curr_hets <= rare_copies - 2; curr_hets += 2) {
599 het_probs[curr_hets + 2] = het_probs[curr_hets] * 4.0 * curr_homr *
600 curr_homc /
601 ((curr_hets + 2.0) * (curr_hets + 1.0));
602 sum += het_probs[curr_hets + 2];
603
604 // Add 2 heterozygotes for next iteration; subtract
605 // one rare, one common homozygote.
606 curr_homr--;
607 curr_homc--;
608 }
609
610 for (i = 0; i <= rare_copies; i++)
611 het_probs[i] /= sum;
612
613 double p_hwe = 0.0;
614
615 // p-value calculation for p_hwe.
616 for (i = 0; i <= rare_copies; i++) {
617 if (het_probs[i] > het_probs[n_ab])
618 continue;
619 p_hwe += het_probs[i];
620 }
621
622 p_hwe = p_hwe > 1.0 ? 1.0 : p_hwe;
623
624 free(het_probs);
625
626 return p_hwe;
627 }
628
UcharToDouble02(const unsigned char c)629 double UcharToDouble02(const unsigned char c) { return (double)c * 0.01; }
630
Double02ToUchar(const double dosage)631 unsigned char Double02ToUchar(const double dosage) {
632 return (int)(dosage * 100);
633 }
634