1 /*
2     Genome-wide Efficient Mixed Model Association (GEMMA)
3     Copyright © 2011-2017, Xiang Zhou
4     Copyright © 2017, Peter Carbonetto
5     Copyright © 2017-2018, Pjotr Prins
6 
7     This program is free software: you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation, either version 3 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include <bitset>
22 #include <cmath>
23 #include <cstring>
24 #include <fstream>
25 #include <iomanip>
26 #include <iostream>
27 #include <limits.h>
28 #include <map>
29 #include <regex>
30 #include <set>
31 #include <sstream>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string>
35 #include <tuple>
36 #include <vector>
37 
38 // #include "Eigen/Dense"
39 
40 #include "gsl/gsl_version.h"
41 
42 #if GSL_MAJOR_VERSION < 2
43 #pragma message "GSL version " GSL_VERSION
44 #endif
45 
46 #include "gsl/gsl_sys.h" // for gsl_isnan, gsl_isinf, gsl_isfinite
47 #include "gsl/gsl_blas.h"
48 #include "gsl/gsl_cdf.h"
49 #include "gsl/gsl_linalg.h"
50 #include "gsl/gsl_matrix.h"
51 #include "gsl/gsl_vector.h"
52 #include "gsl/gsl_eigen.h"
53 
54 #include "debug.h"
55 // #include "eigenlib.h"
56 #include "fastblas.h"
57 #include "lapack.h"
58 #include "mathfunc.h"
59 
60 using namespace std;
61 // using namespace Eigen;
62 
has_nan(const vector<double> v)63 bool has_nan(const vector<double> v) {
64   if (!is_check_mode()) return false;
65 
66   for (const auto& e: v) {
67     if (is_nan(e))
68       return true;
69   }
70   return false;
71 }
72 
has_nan(const gsl_vector * v)73 bool has_nan(const gsl_vector *v) {
74   if (!is_check_mode()) return false;
75 
76   for (size_t i = 0; i < v->size; ++i)
77     if (is_nan(gsl_vector_get(v,i))) return true;
78   return false;
79 }
has_inf(const gsl_vector * v)80 bool has_inf(const gsl_vector *v) {
81   if (!is_check_mode()) return false;
82 
83   for (size_t i = 0; i < v->size; ++i) {
84     auto value = gsl_vector_get(v,i);
85     if (is_inf(value) != 0) return true;
86   }
87   return false;
88 }
has_nan(const gsl_matrix * m)89 bool has_nan(const gsl_matrix *m) {
90   if (!is_check_mode()) return false;
91 
92   for (size_t i = 0; i < m->size1; ++i)
93     for (size_t j = 0; j < m->size2; ++j)
94       if (is_nan(gsl_matrix_get(m,i,j))) return true;
95   return false;
96 }
has_inf(const gsl_matrix * m)97 bool has_inf(const gsl_matrix *m) {
98   if (!is_check_mode()) return false;
99 
100   for (size_t i = 0; i < m->size1; ++i)
101     for (size_t j = 0; j < m->size2; ++j) {
102       auto value = gsl_matrix_get(m,i,j);
103       if (is_inf(value) != 0) return true;
104     }
105   return false;
106 }
107 
is_integer(const std::string & s)108 bool is_integer(const std::string & s){
109     return std::regex_match(s, std::regex("^[0-9]+$"));
110 }
111 
is_float(const std::string & s)112 bool is_float(const std::string & s){
113     return std::regex_match(s, std::regex("^[+-]?([0-9]*[.])?[0-9]+$"));
114 }
115 
safe_log(const double d)116 double safe_log(const double d) {
117   if (!is_legacy_mode() && (is_check_mode() || is_debug_mode()))
118     enforce_msg(d > 0.0, (std::string("Trying to take the log of ") + std::to_string(d)).c_str());
119   return log(d);
120 }
121 
safe_sqrt(const double d)122 double safe_sqrt(const double d) {
123   double d1 = d;
124   if (fabs(d < 0.001))
125     d1 = fabs(d);
126   if (!is_legacy_mode() && (is_check_mode() || is_debug_mode()))
127     enforce_msg(d1 >= 0.0, (std::string("Trying to take the sqrt of ") + std::to_string(d)).c_str());
128   if (d1 < 0.0 )
129     return nan("");
130   return sqrt(d1);
131 }
132 
133 // calculate variance of a vector
VectorVar(const gsl_vector * v)134 double VectorVar(const gsl_vector *v) {
135   double d, m = 0.0, m2 = 0.0;
136   for (size_t i = 0; i < v->size; ++i) {
137     d = gsl_vector_get(v, i);
138     m += d;
139     m2 += d * d;
140   }
141   m /= (double)v->size;
142   m2 /= (double)v->size;
143   return m2 - m * m;
144 }
145 
146 // Center the matrix G.
CenterMatrix(gsl_matrix * G)147 void CenterMatrix(gsl_matrix *G) {
148   double d;
149   gsl_vector *w = gsl_vector_safe_alloc(G->size1);
150   gsl_vector *Gw = gsl_vector_safe_alloc(G->size1);
151   gsl_vector_set_all(w, 1.0);
152 
153   //  y := alpha*A*x+ beta*y or Gw = G*w
154   gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
155 
156   // int gsl_blas_dsyr2(CBLAS_UPLO_t Uplo, double alpha, const gsl_vector * x, const gsl_vector * y, gsl_matrix * A)
157   // compute the symmetric rank-2 update A = \alpha x y^T + \alpha y x^T + A of the symmetric matrix A. Since the matrix A is symmetric only its upper half or lower half need to be stored
158   // or G = (UpperTriangle) alpha*Gw*w' + alpha*w*Gw' + G
159   gsl_blas_dsyr2(CblasUpper, -1.0 / (double)G->size1, Gw, w, G);
160   // compute dot product of vectors w.Gw and store in d
161   gsl_blas_ddot(w, Gw, &d);
162   // G = (upper) alpha w*w' + G
163   gsl_blas_dsyr(CblasUpper, d / ((double)G->size1 * (double)G->size1), w, G);
164 
165   // Transpose the matrix
166   for (size_t i = 0; i < G->size1; ++i) {
167     for (size_t j = 0; j < i; ++j) {
168       d = gsl_matrix_get(G, j, i);
169       gsl_matrix_set(G, i, j, d);
170     }
171   }
172 
173   gsl_vector_safe_free(w);
174   gsl_vector_safe_free(Gw);
175 
176   return;
177 }
178 
179 // Center the matrix G.
180 // Only used in vc
CenterMatrix(gsl_matrix * G,const gsl_vector * w)181 void CenterMatrix(gsl_matrix *G, const gsl_vector *w) {
182   double d, wtw;
183   gsl_vector *Gw = gsl_vector_safe_alloc(G->size1);
184 
185   gsl_blas_ddot(w, w, &wtw);
186   gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
187   gsl_blas_dsyr2(CblasUpper, -1.0 / wtw, Gw, w, G);
188   gsl_blas_ddot(w, Gw, &d);
189   gsl_blas_dsyr(CblasUpper, d / (wtw * wtw), w, G);
190 
191   for (size_t i = 0; i < G->size1; ++i) {
192     for (size_t j = 0; j < i; ++j) {
193       d = gsl_matrix_get(G, j, i);
194       gsl_matrix_set(G, i, j, d);
195     }
196   }
197 
198   gsl_vector_safe_free(Gw);
199 
200   return;
201 }
202 
203 // Center the matrix G.
204 // Only used in vc
CenterMatrix(gsl_matrix * G,const gsl_matrix * W)205 void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) {
206   gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
207   gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
208   gsl_matrix *WtWiWt = gsl_matrix_safe_alloc(W->size2, G->size1);
209   gsl_matrix *GW = gsl_matrix_safe_alloc(G->size1, W->size2);
210   gsl_matrix *WtGW = gsl_matrix_safe_alloc(W->size2, W->size2);
211   gsl_matrix *Gtmp = gsl_matrix_safe_alloc(G->size1, G->size1);
212 
213   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
214 
215   int sig;
216   gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
217   LUDecomp(WtW, pmt, &sig);
218   LUInvert(WtW, pmt, WtWi);
219 
220   gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, WtWi, W, 0.0, WtWiWt);
221   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, G, W, 0.0, GW);
222   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0, Gtmp);
223 
224   gsl_matrix_sub(G, Gtmp);
225   gsl_matrix_transpose(Gtmp);
226   gsl_matrix_sub(G, Gtmp);
227 
228   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, GW, 0.0, WtGW);
229   // GW is destroyed.
230   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, WtWiWt, WtGW, 0.0, GW);
231   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0, Gtmp);
232 
233   gsl_matrix_add(G, Gtmp);
234 
235   gsl_matrix_safe_free(WtW);
236   gsl_matrix_safe_free(WtWi);
237   gsl_matrix_safe_free(WtWiWt);
238   gsl_matrix_safe_free(GW);
239   gsl_matrix_safe_free(WtGW);
240   gsl_matrix_safe_free(Gtmp);
241 
242   return;
243 }
244 
245 // "Standardize" the matrix G such that all diagonal elements = 1.
246 // (only used by vc)
StandardizeMatrix(gsl_matrix * G)247 void StandardizeMatrix(gsl_matrix *G) {
248   double d = 0.0;
249   vector<double> vec_d;
250 
251   for (size_t i = 0; i < G->size1; ++i) {
252     vec_d.push_back(gsl_matrix_get(G, i, i));
253   }
254   for (size_t i = 0; i < G->size1; ++i) {
255     for (size_t j = i; j < G->size2; ++j) {
256       if (j == i) {
257         gsl_matrix_set(G, i, j, 1);
258       } else {
259         d = gsl_matrix_get(G, i, j);
260         d /= sqrt(vec_d[i] * vec_d[j]);
261         gsl_matrix_set(G, i, j, d);
262         gsl_matrix_set(G, j, i, d);
263       }
264     }
265   }
266 
267   return;
268 }
269 
270 // Scale the matrix G such that the mean diagonal = 1.
ScaleMatrix(gsl_matrix * G)271 double ScaleMatrix(gsl_matrix *G) {
272   double d = 0.0;
273 
274   // Compute mean of diagonal
275   for (size_t i = 0; i < G->size1; ++i) {
276     d += gsl_matrix_get(G, i, i);
277   }
278   d /= (double)G->size1;
279 
280   // Scale the matrix using the diagonal mean
281   if (d != 0) {
282     gsl_matrix_scale(G, 1.0 / d);
283   }
284 
285   return d;
286 }
287 
isMatrixSymmetric(const gsl_matrix * G)288 bool isMatrixSymmetric(const gsl_matrix *G) {
289   enforce(G->size1 == G->size2);
290   auto m = G->data;
291   // upper triangle
292   auto size = G->size1;
293   for(size_t c = 0; c < size; c++) {
294     for(size_t r = 0; r < c; r++) {
295       int x1 = c, y1 = r, x2 = r, y2 = c;
296       auto idx1 = y1*size+x1, idx2 = y2*size+x2;
297       // printf("(%d,%d %f - %d,%d %f)",x1,y1,m[idx1],x2,y2,m[idx2]);
298       if(m[idx1] != m[idx2]) {
299         cout << "Mismatch coordinates (" << c << "," << r << ")" << m[idx1] << ":" << m[idx2] << "!" << endl;
300         return false;
301       }
302     }
303   }
304   return true;
305 }
306 
isMatrixPositiveDefinite(const gsl_matrix * G)307 bool isMatrixPositiveDefinite(const gsl_matrix *G) {
308   enforce(G->size1 == G->size2);
309   auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2);
310   enforce_gsl(gsl_matrix_safe_memcpy(G2,G));
311   auto handler = gsl_set_error_handler_off();
312 #if GSL_MAJOR_VERSION >= 2 && GSL_MINOR_VERSION >= 3
313   auto s = gsl_linalg_cholesky_decomp1(G2);
314 #else
315   auto s = gsl_linalg_cholesky_decomp(G2);
316 #endif
317   gsl_set_error_handler(handler);
318   if (s == GSL_SUCCESS) {
319     gsl_matrix_safe_free(G2);
320     return true;
321   }
322   gsl_matrix_free(G2);
323   return (false);
324 }
325 
getEigenValues(const gsl_matrix * G)326 gsl_vector *getEigenValues(const gsl_matrix *G) {
327   enforce(G->size1 == G->size2);
328   auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2);
329   enforce_gsl(gsl_matrix_safe_memcpy(G2,G));
330   auto eworkspace = gsl_eigen_symm_alloc(G->size1);
331   enforce(eworkspace);
332   gsl_vector *eigenvalues = gsl_vector_safe_alloc(G->size1);
333   enforce_gsl(gsl_eigen_symm(G2, eigenvalues, eworkspace));
334   gsl_eigen_symm_free(eworkspace);
335   gsl_matrix_safe_free(G2);
336   return eigenvalues;
337 }
338 
339 // Check whether eigen values are larger than *min*
340 // by default 1E-5.
341 // Returns success, eigen min, eigen min-but-1, eigen max
342 
minmax(const gsl_vector * v)343 tuple<double, double, double> minmax(const gsl_vector *v) {
344   auto min  = v->data[0];
345   auto min1 = min;
346   auto max  = min;
347   for (size_t i=1; i<v->size; i++) {
348     auto value = std::abs(v->data[i]);
349     if (value < min) {
350       min1 = min;
351       min = value;
352     }
353     if (value > max)
354       max = value;
355   }
356   return std::make_tuple(min, min1, max);
357 }
358 
abs_minmax(const gsl_vector * v)359 tuple<double, double, double> abs_minmax(const gsl_vector *v) {
360   auto min  = std::abs(v->data[0]);
361   auto min1 = min;
362   auto max  = min;
363   for (size_t i=1; i<v->size; i++) {
364     auto value = std::abs(v->data[i]);
365     if (value < min) {
366       min1 = min;
367       min = value;
368     }
369     if (value > max)
370       max = value;
371   }
372   return std::make_tuple(min, min1, max);
373 }
374 
375 // Check for negative values. skip_min will leave out
376 // the lowest value
has_negative_values_but_one(const gsl_vector * v)377 bool has_negative_values_but_one(const gsl_vector *v) {
378   bool one_skipped = false;
379   for (size_t i=0; i<v->size; i++) {
380     if (v->data[i] < -EIGEN_MINVALUE) {
381       if (one_skipped)
382         return true;
383       one_skipped = true;
384     }
385   }
386   return false;
387 }
388 
count_abs_small_values(const gsl_vector * v,double min)389 uint count_abs_small_values(const gsl_vector *v, double min) {
390   uint count = 0;
391   for (size_t i=0; i<v->size; i++) {
392     if (std::abs(v->data[i]) < min) {
393       count += 1;
394     }
395   }
396   return count;
397 }
398 
399 // Check for matrix being ill conditioned by taking the eigen values
400 // and the ratio of max and min but one (min is expected to be zero).
isMatrixIllConditioned(const gsl_vector * eigenvalues,double max_ratio)401 bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio) {
402   auto t = abs_minmax(eigenvalues);
403 
404 #if !defined NDEBUG
405   auto absmin = get<0>(t);
406 #endif
407 
408   auto absmin1 = get<1>(t);
409   auto absmax = get<2>(t);
410   if (absmax/absmin1 > max_ratio) {
411     #if !NDEBUG
412     cerr << "**** DEBUG: Ratio |eigenmax|/|eigenmin| suggests matrix is ill conditioned for double precision" << endl;
413     auto t = minmax(eigenvalues);
414     auto min = get<0>(t);
415     auto min1 = get<1>(t);
416     auto max = get<2>(t);
417     cerr << "**** DEBUG: Abs eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio (" << absmax << "/" << absmin1 << ") = " << absmax/absmin1 << endl;
418     cerr << "**** DEBUG: Eigenvalues [Min " << min << ", " << min1 << " ... " << max << " Max]" << endl;
419     #endif
420     return true;
421   }
422   return false;
423 }
424 
sum(const double * m,size_t rows,size_t cols)425 double sum(const double *m, size_t rows, size_t cols) {
426   double sum = 0.0;
427   for (size_t i = 0; i<rows*cols; i++)
428     sum += m[i];
429   return sum;
430 }
431 
SumVector(const gsl_vector * v)432 double SumVector(const gsl_vector *v) {
433   double sum = 0;
434   for (size_t i = 0; i < v->size; i++ ) {
435     sum += gsl_vector_get(v, i);
436   }
437   return( sum );
438 }
439 
440 // Center the vector y.
CenterVector(gsl_vector * y)441 double CenterVector(gsl_vector *y) {
442   double d = 0.0;
443 
444   for (size_t i = 0; i < y->size; ++i) {
445     d += gsl_vector_get(y, i);
446   }
447   d /= (double)y->size;
448 
449   gsl_vector_add_constant(y, -1.0 * d);
450 
451   return d;
452 }
453 
454 // Center the vector y.
CenterVector(gsl_vector * y,const gsl_matrix * W)455 void CenterVector(gsl_vector *y, const gsl_matrix *W) {
456   gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
457   gsl_vector *Wty = gsl_vector_safe_alloc(W->size2);
458   gsl_vector *WtWiWty = gsl_vector_safe_alloc(W->size2);
459 
460   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
461   gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
462 
463   int sig;
464   gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
465   LUDecomp(WtW, pmt, &sig);
466   LUSolve(WtW, pmt, Wty, WtWiWty);
467 
468   gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWty, 1.0, y);
469 
470   gsl_matrix_safe_free(WtW);
471   gsl_vector_safe_free(Wty);
472   gsl_vector_safe_free(WtWiWty);
473 
474   return;
475 }
476 
477 // "Standardize" vector y to have mean 0 and y^ty/n=1.
StandardizeVector(gsl_vector * y)478 void StandardizeVector(gsl_vector *y) {
479   double d = 0.0, m = 0.0, v = 0.0;
480 
481   for (size_t i = 0; i < y->size; ++i) {
482     d = gsl_vector_get(y, i);
483     m += d;
484     v += d * d;
485   }
486   m /= (double)y->size;
487   v /= (double)y->size;
488   v -= m * m;
489 
490   gsl_vector_add_constant(y, -1.0 * m);
491   gsl_vector_scale(y, 1.0 / sqrt(v));
492 
493   return;
494 }
495 
496 // Calculate UtX (U gets transposed)
CalcUtX(const gsl_matrix * U,gsl_matrix * UtX)497 void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX) {
498   gsl_matrix *X = gsl_matrix_safe_alloc(UtX->size1, UtX->size2);
499   gsl_matrix_safe_memcpy(X, UtX);
500   fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
501   gsl_matrix_safe_free(X);
502 }
503 
CalcUtX(const gsl_matrix * U,const gsl_matrix * X,gsl_matrix * UtX)504 void CalcUtX(const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX) {
505   fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
506 }
507 
CalcUtX(const gsl_matrix * U,const gsl_vector * x,gsl_vector * Utx)508 void CalcUtX(const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx) {
509   gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
510 }
511 
512 // Kronecker product.
Kronecker(const gsl_matrix * K,const gsl_matrix * V,gsl_matrix * H)513 void Kronecker(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
514   for (size_t i = 0; i < K->size1; i++) {
515     for (size_t j = 0; j < K->size2; j++) {
516       gsl_matrix_view H_sub = gsl_matrix_submatrix(
517           H, i * V->size1, j * V->size2, V->size1, V->size2);
518       gsl_matrix_safe_memcpy(&H_sub.matrix, V);
519       gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
520     }
521   }
522   return;
523 }
524 
525 // Symmetric K matrix.
KroneckerSym(const gsl_matrix * K,const gsl_matrix * V,gsl_matrix * H)526 void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
527   for (size_t i = 0; i < K->size1; i++) {
528     for (size_t j = i; j < K->size2; j++) {
529       gsl_matrix_view H_sub = gsl_matrix_submatrix(
530           H, i * V->size1, j * V->size2, V->size1, V->size2);
531       gsl_matrix_safe_memcpy(&H_sub.matrix, V);
532       gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
533 
534       if (i != j) {
535         gsl_matrix_view H_sub_sym = gsl_matrix_submatrix(
536             H, j * V->size1, i * V->size2, V->size1, V->size2);
537         gsl_matrix_safe_memcpy(&H_sub_sym.matrix, &H_sub.matrix);
538       }
539     }
540   }
541   return;
542 }
543 
544 // This function calculates HWE p value with methods described in
545 // Wigginton et al. (2005) AJHG; it is based on the code in plink 1.07.
CalcHWE(const size_t n_hom1,const size_t n_hom2,const size_t n_ab)546 double CalcHWE(const size_t n_hom1, const size_t n_hom2, const size_t n_ab) {
547   if ((n_hom1 + n_hom2 + n_ab) == 0) {
548     return 1;
549   }
550 
551   // "AA" is the rare allele.
552   int n_aa = n_hom1 < n_hom2 ? n_hom1 : n_hom2;
553   int n_bb = n_hom1 < n_hom2 ? n_hom2 : n_hom1;
554 
555   int rare_copies = 2 * n_aa + n_ab;
556   int genotypes = n_ab + n_bb + n_aa;
557 
558   double *het_probs = (double *)malloc((rare_copies + 1) * sizeof(double));
559   if (het_probs == NULL)
560     cout << "Internal error: SNP-HWE: Unable to allocate array" << endl;
561 
562   int i;
563   for (i = 0; i <= rare_copies; i++)
564     het_probs[i] = 0.0;
565 
566   // Start at midpoint.
567   // XZ modified to add (long int)
568   int mid = ((long int)rare_copies *
569              (2 * (long int)genotypes - (long int)rare_copies)) /
570             (2 * (long int)genotypes);
571 
572   // Check to ensure that midpoint and rare alleles have same
573   // parity.
574   if ((rare_copies & 1) ^ (mid & 1))
575     mid++;
576 
577   int curr_hets = mid;
578   int curr_homr = (rare_copies - mid) / 2;
579   int curr_homc = genotypes - curr_hets - curr_homr;
580 
581   het_probs[mid] = 1.0;
582   double sum = het_probs[mid];
583   for (curr_hets = mid; curr_hets > 1; curr_hets -= 2) {
584     het_probs[curr_hets - 2] = het_probs[curr_hets] * curr_hets *
585                                (curr_hets - 1.0) /
586                                (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0));
587     sum += het_probs[curr_hets - 2];
588 
589     // Two fewer heterozygotes for next iteration; add one
590     // rare, one common homozygote.
591     curr_homr++;
592     curr_homc++;
593   }
594 
595   curr_hets = mid;
596   curr_homr = (rare_copies - mid) / 2;
597   curr_homc = genotypes - curr_hets - curr_homr;
598   for (curr_hets = mid; curr_hets <= rare_copies - 2; curr_hets += 2) {
599     het_probs[curr_hets + 2] = het_probs[curr_hets] * 4.0 * curr_homr *
600                                curr_homc /
601                                ((curr_hets + 2.0) * (curr_hets + 1.0));
602     sum += het_probs[curr_hets + 2];
603 
604     // Add 2 heterozygotes for next iteration; subtract
605     // one rare, one common homozygote.
606     curr_homr--;
607     curr_homc--;
608   }
609 
610   for (i = 0; i <= rare_copies; i++)
611     het_probs[i] /= sum;
612 
613   double p_hwe = 0.0;
614 
615   // p-value calculation for p_hwe.
616   for (i = 0; i <= rare_copies; i++) {
617     if (het_probs[i] > het_probs[n_ab])
618       continue;
619     p_hwe += het_probs[i];
620   }
621 
622   p_hwe = p_hwe > 1.0 ? 1.0 : p_hwe;
623 
624   free(het_probs);
625 
626   return p_hwe;
627 }
628 
UcharToDouble02(const unsigned char c)629 double UcharToDouble02(const unsigned char c) { return (double)c * 0.01; }
630 
Double02ToUchar(const double dosage)631 unsigned char Double02ToUchar(const double dosage) {
632   return (int)(dosage * 100);
633 }
634