1 /*
2 * mkl.cpp
3 * gcta
4 *
5 * Created by Jian Yang on 24/01/13.
6 * Copyright 2013 QIMR. All rights reserved.
7 *
8 */
9
10 #include "gcta.h"
11
12 /////////////////
13 // data functions
14
make_XMat_mkl(float * X,bool grm_d_flag)15 void gcta::make_XMat_mkl(float *X, bool grm_d_flag)
16 {
17 if (_mu.empty()) calcu_mu();
18
19 cout << "Recoding genotypes (individual major mode) ..." << endl;
20 unsigned long i = 0, j = 0, k = 0, n = _keep.size(), m = _include.size();
21
22 if (!grm_d_flag) {
23 #pragma omp parallel for private(j)
24 for (i = 0; i < n; i++) {
25 if (_dosage_flag) {
26 for (j = 0; j < m; j++) {
27 if (_geno_dose[_keep[i]][_include[j]] < 1e5) {
28 if (_allele1[_include[j]] == _ref_A[_include[j]]) X[i * m + j] = _geno_dose[_keep[i]][_include[j]];
29 else X[i * m + j] = 2.0 - _geno_dose[_keep[i]][_include[j]];
30 } else X[i * m + j] = 1e6;
31 }
32 _geno_dose[i].clear();
33 } else {
34 for (j = 0; j < _include.size(); j++) {
35 if (!_snp_1[_include[j]][_keep[i]] || _snp_2[_include[j]][_keep[i]]) {
36 if (_allele1[_include[j]] == _ref_A[_include[j]]) X[i * m + j] = _snp_1[_include[j]][_keep[i]] + _snp_2[_include[j]][_keep[i]];
37 else X[i * m + j] = 2.0 - (_snp_1[_include[j]][_keep[i]] + _snp_2[_include[j]][_keep[i]]);
38 } else X[i * m + j] = 1e6;
39 }
40 }
41 }
42 }
43 else {
44 #pragma omp parallel for private(j, k)
45 for (i = 0; i < n; i++) {
46 if (_dosage_flag) {
47 for (j = 0; j < m; j++) {
48 if (_geno_dose[_keep[i]][_include[j]] < 1e5) {
49 k = i * m + j;
50 if (_allele1[_include[j]] == _ref_A[_include[j]]) X[k] = _geno_dose[_keep[i]][_include[j]];
51 else X[k] = 2.0 - _geno_dose[_keep[i]][_include[j]];
52 if (X[k] < 0.5) X[k] = 0.0;
53 else if (X[k] < 1.5) X[k] = _mu[_include[j]];
54 else X[k] = (2.0 * _mu[_include[j]] - 2.0);
55 } else X[k] = 1e6;
56 }
57 _geno_dose[i].clear();
58 }
59 else {
60 for (j = 0; j < _include.size(); j++) {
61 if (!_snp_1[_include[j]][_keep[i]] || _snp_2[_include[j]][_keep[i]]) {
62 k = i * m + j;
63 if (_allele1[_include[j]] == _ref_A[_include[j]]) X[k] = _snp_1[_include[j]][_keep[i]] + _snp_2[_include[j]][_keep[i]];
64 else X[k] = 2.0 - (_snp_1[_include[j]][_keep[i]] + _snp_2[_include[j]][_keep[i]]);
65 if (X[k] < 0.5) X[k] = 0.0;
66 else if (X[k] < 1.5) X[k] = _mu[_include[j]];
67 else X[k] = (2.0 * _mu[_include[j]] - 2.0);
68 } else X[i * m + j] = 1e6;
69 }
70 }
71 }
72 }
73 }
74
std_XMat_mkl(float * X,vector<double> & sd_SNP,bool grm_xchr_flag,bool miss_with_mu,bool divid_by_std)75 void gcta::std_XMat_mkl(float *X, vector<double> &sd_SNP, bool grm_xchr_flag, bool miss_with_mu, bool divid_by_std) {
76 if (_mu.empty()) calcu_mu();
77
78 unsigned long i = 0, j = 0, k = 0, n = _keep.size(), m = _include.size();
79 sd_SNP.clear();
80 sd_SNP.resize(m);
81 if (_dosage_flag) {
82 #pragma omp parallel for private(i)
83 for (j = 0; j < m; j++) {
84 for (i = 0; i < n; i++) {
85 double d_buf = X[i * m + j] - _mu[_include[j]];
86 sd_SNP[j] += d_buf*d_buf;
87 }
88 sd_SNP[j] /= (n - 1.0);
89 }
90 } else {
91 for (j = 0; j < m; j++) sd_SNP[j] = _mu[_include[j]]*(1.0 - 0.5 * _mu[_include[j]]);
92 }
93 if (divid_by_std) {
94 for (j = 0; j < m; j++) {
95 if (fabs(sd_SNP[j]) < 1.0e-50) sd_SNP[j] = 0.0;
96 else sd_SNP[j] = sqrt(1.0 / sd_SNP[j]);
97 }
98 }
99
100 #pragma omp parallel for private(j, k)
101 for (i = 0; i < n; i++) {
102 for (j = 0; j < m; j++) {
103 k = i * m + j;
104 if (X[k] < 1e5) {
105 X[k] -= _mu[_include[j]];
106 if (divid_by_std) X[k] *= sd_SNP[j];
107 } else if (miss_with_mu) X[k] = 0.0;
108 }
109 }
110
111 if (!grm_xchr_flag) return;
112 // for the X-chromosome
113 check_sex();
114 double f_buf = sqrt(0.5);
115
116 #pragma omp parallel for private(j, k)
117 for (i = 0; i < n; i++) {
118 if (_sex[_keep[i]] == 1) {
119 for (j = 0; j < m; j++) {
120 k = i * m + j;
121 if (X[k] < 1e5) X[k] *= f_buf;
122 else if (miss_with_mu) X[k] = 0.0;
123 }
124 }
125 }
126 }
127
std_XMat_d_mkl(float * X,vector<double> & sd_SNP,bool miss_with_mu,bool divid_by_std)128 void gcta::std_XMat_d_mkl(float *X, vector<double> &sd_SNP, bool miss_with_mu, bool divid_by_std) {
129 if (_mu.empty()) calcu_mu();
130
131 unsigned long i = 0, j = 0, k = 0, n = _keep.size(), m = _include.size();
132 sd_SNP.clear();
133 sd_SNP.resize(m);
134 if (_dosage_flag) {
135 #pragma omp parallel for private(i)
136 for (j = 0; j < m; j++) {
137 for (i = 0; i < n; i++) {
138 double d_buf = (X[i * m + j] - _mu[_include[j]]);
139 sd_SNP[j] += d_buf*d_buf;
140 }
141 sd_SNP[j] /= (n - 1.0);
142 }
143 } else {
144 for (j = 0; j < m; j++) sd_SNP[j] = _mu[_include[j]]*(1.0 - 0.5 * _mu[_include[j]]);
145 }
146 if (divid_by_std) {
147 for (j = 0; j < m; j++) {
148 if (fabs(sd_SNP[j]) < 1.0e-50) sd_SNP[j] = 0.0;
149 else sd_SNP[j] = 1.0 / sd_SNP[j];
150 }
151 } else {
152 for (j = 0; j < m; j++) sd_SNP[j] = sd_SNP[j] * sd_SNP[j];
153 }
154 vector<double> psq(m);
155 for (j = 0; j < m; j++) psq[j] = 0.5 * _mu[_include[j]] * _mu[_include[j]];
156
157 #pragma omp parallel for private(j, k)
158 for (i = 0; i < n; i++) {
159 for (j = 0; j < m; j++) {
160 k = i * m + j;
161 if (X[k] < 1e5) {
162 X[k] -= psq[j];
163 if (divid_by_std) X[k] *= sd_SNP[j];
164 } else if (miss_with_mu) X[k] = 0.0;
165 }
166 }
167 }
168
169 ////////////
170 // GRM
make_grm_mkl(bool grm_d_flag,bool grm_xchr_flag,bool inbred,bool output_bin,int grm_mtd,bool mlmassoc,bool diag_f3_flag)171 void gcta::make_grm_mkl(bool grm_d_flag, bool grm_xchr_flag, bool inbred, bool output_bin, int grm_mtd, bool mlmassoc, bool diag_f3_flag)
172 {
173 if (!grm_d_flag && grm_xchr_flag) check_chrX();
174 else check_autosome();
175
176 unsigned long i = 0, j = 0, k = 0, l = 0, n = _keep.size(), m = _include.size();
177 _geno_mkl = new float[n * m]; // alloc memory to X matrix
178
179 make_XMat_mkl(_geno_mkl, grm_d_flag);
180 vector<double> sd_SNP, sd_SNP_buf;
181 if (grm_mtd == 0) {
182 if (grm_d_flag) std_XMat_d_mkl(_geno_mkl, sd_SNP, false, true);
183 else std_XMat_mkl(_geno_mkl, sd_SNP, grm_xchr_flag, false, true);
184 }
185 else {
186 if (grm_d_flag) std_XMat_d_mkl(_geno_mkl, sd_SNP, false, false);
187 else std_XMat_mkl(_geno_mkl, sd_SNP, grm_xchr_flag, false, false);
188 }
189
190 if (!mlmassoc) cout << "\nCalculating the" << ((grm_d_flag) ? " dominance" : "") << " genetic relationship matrix (GRM)" << (grm_xchr_flag ? " for the X chromosome" : "") << (_dosage_flag ? " using imputed dosage data" : "") << " ... (Note: default speed-optimized mode, may use huge RAM)" << endl;
191 else cout << "\nCalculating the genetic relationship matrix (GRM) ... " << endl;
192
193 // count the number of missing genotypes
194 vector< vector<int> > miss_pos(n);
195 bool * X_bool = new bool[n * m];
196 for (i = 0; i < n; i++) {
197 for (j = 0; j < m; j++) {
198 k = i * m + j;
199 if (_geno_mkl[k] < 1e5) X_bool[k] = true;
200 else {
201 _geno_mkl[k] = 0.0;
202 miss_pos[i].push_back(j);
203 X_bool[k] = false;
204 }
205 }
206 }
207
208 // Calculate A_N matrix
209 _grm_N.resize(n, n);
210 #pragma omp parallel for private(j, k)
211 for (i = 0; i < n; i++) {
212 for (j = 0; j <= i; j++) {
213 int miss_j = 0;
214 for (k = 0; k < miss_pos[j].size(); k++) miss_j += (int) X_bool[i * m + miss_pos[j][k]];
215 _grm_N(i,j) = m - miss_pos[i].size() - miss_j;
216 }
217 }
218
219 // Calculate sum of LD weights
220 if (grm_mtd == 1) {
221 double denominator = 0.0;
222 for (j = 0; j < m; j++) denominator += sd_SNP[j];
223 denominator = denominator / (double)m;
224 #pragma omp parallel for private(j, k)
225 for (i = 0; i < n; i++) {
226 for (j = 0; j <= i; j++) _grm_N(i,j) = _grm_N(i,j) * denominator;
227 }
228 }
229
230 // Calcuate WW'
231 _grm_mkl = new float[n * n]; // alloc memory to A
232 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, n, n, m, 1.0, _geno_mkl, m, _geno_mkl, m, 0.0, _grm_mkl, n);
233
234 // re-calcuate the diagonals (Fhat3+1)
235 if (diag_f3_flag) {
236 #pragma omp parallel for private(j,k,l)
237 for (i = 0; i < n; i++) {
238 l = i * n + i;
239 _grm_mkl[l] = 0.0;
240 for (j = 0; j < m; j++) {
241 k = i * m + j;
242 _grm_mkl[l] += _geno_mkl[k]*(_geno_mkl[k]+(_mu[_include[j]] - 1.0) * sd_SNP[j]);
243 }
244 }
245 }
246
247 // Calculate A matrix
248 #pragma omp parallel for private(j)
249 for (i = 0; i < n; i++) {
250 for (j = 0; j <= i; j++) {
251 if (_grm_N(i,j) > 0.0) _grm_mkl[i * n + j] /= _grm_N(i,j);
252 else _grm_mkl[i * n + j] = 0.0;
253 }
254 }
255
256 if (inbred) {
257 #pragma omp parallel for private(j)
258 for (i = 0; i < n; i++) {
259 for (j = 0; j <= i; j++) _grm_mkl[i * n + j] *= 0.5;
260 }
261 }
262
263 if (mlmassoc && grm_mtd == 0) {
264 for (j = 0; j < m; j++) {
265 if (fabs(sd_SNP[j]) < 1.0e-50) sd_SNP[j] = 0.0;
266 else sd_SNP[j] = 1.0 / sd_SNP[j];
267 }
268 #pragma omp parallel for private(j, k)
269 for (i = 0; i < n; i++) {
270 for (j = 0; j < m; j++) {
271 k = i * m + j;
272 if (_geno_mkl[k] < 1e5) _geno_mkl[k] *= sd_SNP[j];
273 else _geno_mkl[k] = 0.0;
274 }
275 }
276 delete[] X_bool;
277 } else {
278 // Output A_N and A
279 string out_buf = _out;
280 if (grm_d_flag) _out += ".d";
281 output_grm_mkl(_grm_mkl, output_bin);
282 _out = out_buf;
283
284 // free memory
285 delete[] _geno_mkl;
286 delete[] X_bool;
287 delete[] _grm_mkl;
288 }
289 }
290
output_grm_mkl(float * A,bool output_grm_bin)291 void gcta::output_grm_mkl(float* A, bool output_grm_bin)
292 {
293 unsigned long i = 0, j = 0, n = _keep.size();
294 string grm_file;
295
296 if (output_grm_bin) {
297 // Save matrix A in binary file
298 grm_file = _out + ".grm.bin";
299 fstream A_Bin(grm_file.c_str(), ios::out | ios::binary);
300 if (!A_Bin) throw ("Error: can not open the file [" + grm_file + "] to write.");
301 int size = sizeof (float);
302 for (i = 0; i < n; i++) {
303 for (j = 0; j <= i; j++) A_Bin.write((char*) &(A[i * n + j]), size);
304 }
305 A_Bin.close();
306 cout << "GRM of " << n << " individuals has been saved in the file [" + grm_file + "] (in binary format)." << endl;
307
308 string grm_N_file = _out + ".grm.N.bin";
309 fstream N_Bin(grm_N_file.c_str(), ios::out | ios::binary);
310 if (!N_Bin) throw ("Error: can not open the file [" + grm_N_file + "] to write.");
311 size = sizeof (float);
312 for (i = 0; i < n; i++) {
313 for (j = 0; j <= i; j++) N_Bin.write((char*) &(_grm_N(i,j)), size);
314 }
315 N_Bin.close();
316 cout << "Number of SNPs to calcuate the genetic relationship between each pair of individuals has been saved in the file [" + grm_N_file + "] (in binary format)." << endl;
317 } else {
318 // Save A matrix in txt format
319 grm_file = _out + ".grm.gz";
320 gzofstream zoutf;
321 zoutf.open(grm_file.c_str());
322 if (!zoutf.is_open()) throw ("Error: can not open the file [" + grm_file + "] to write.");
323 cout << "Saving the genetic relationship matrix to the file [" + grm_file + "] (in compressed text format)." << endl;
324 zoutf.setf(ios::scientific);
325 zoutf.precision(6);
326 for (i = 0; i < n; i++) {
327 for (j = 0; j <= i; j++) zoutf << i + 1 << '\t' << j + 1 << '\t' << _grm_N(i,j) << '\t' << A[i * n + j] << endl;
328 }
329 zoutf.close();
330 cout << "The genetic relationship matrix has been saved in the file [" + grm_file + "] (in compressed text format)." << endl;
331 }
332
333 string famfile = _out + ".grm.id";
334 ofstream Fam(famfile.c_str());
335 if (!Fam) throw ("Error: can not open the file [" + famfile + "] to write.");
336 for (i = 0; i < n; i++) Fam << _fid[_keep[i]] + "\t" + _pid[_keep[i]] << endl;
337 Fam.close();
338 cout << "IDs for the GRM file [" + grm_file + "] have been saved in the file [" + famfile + "]." << endl;
339 }
340
341 ///////////
342 // reml
343
comput_inverse_logdet_LDLT_mkl(eigenMatrix & Vi,double & logdet)344 bool gcta::comput_inverse_logdet_LDLT_mkl(eigenMatrix &Vi, double &logdet)
345 {
346 unsigned long i = 0, j = 0, n = Vi.cols();
347 double* Vi_mkl = new double[n * n];
348 //float* Vi_mkl=new float[n*n];
349
350 #pragma omp parallel for private(j)
351 for (i = 0; i < n; i++) {
352 for (j = 0; j < n; j++) {
353 Vi_mkl[i * n + j] = Vi(i, j);
354 }
355 }
356
357 // MKL's Cholesky decomposition
358 int info = 0, int_n = (int) n;
359 char uplo = 'L';
360 dpotrf_(&uplo, &int_n, Vi_mkl, &int_n, &info);
361 //spotrf( &uplo, &n, Vi_mkl, &n, &info );
362 if (info < 0) throw ("Error: Cholesky decomposition failed. Invalid values found in the matrix.\n");
363 else if (info > 0) return false;
364 else {
365 logdet = 0.0;
366 for (i = 0; i < n; i++) {
367 double d_buf = Vi_mkl[i * n + i];
368 logdet += log(d_buf * d_buf);
369 }
370
371 // Calcualte V inverse
372 dpotri_(&uplo, &int_n, Vi_mkl, &int_n, &info);
373 //spotri( &uplo, &n, Vi_mkl, &n, &info );
374 if (info < 0) throw ("Error: invalid values found in the varaince-covaraince (V) matrix.\n");
375 else if (info > 0) return false;
376 else {
377 #pragma omp parallel for private(j)
378 for (j = 0; j < n; j++) {
379 for (i = 0; i <= j; i++) Vi(i, j) = Vi(j, i) = Vi_mkl[i * n + j];
380 }
381 }
382 }
383
384 // free memory
385 delete[] Vi_mkl;
386
387 return true;
388
389 }
390
comput_inverse_logdet_LU_mkl(eigenMatrix & Vi,double & logdet)391 bool gcta::comput_inverse_logdet_LU_mkl(eigenMatrix &Vi, double &logdet)
392 {
393 unsigned long i = 0, j = 0, n = Vi.cols();
394 double* Vi_mkl = new double[n * n];
395
396 #pragma omp parallel for private(j)
397 for (i = 0; i < n; i++) {
398 for (j = 0; j < n; j++) {
399 Vi_mkl[i * n + j] = Vi(i, j);
400 }
401 }
402
403 int N = (int) n;
404 int *IPIV = new int[n + 1];
405 int LWORK = N*N;
406 double *WORK = new double[n * n];
407 int INFO;
408 dgetrf_(&N, &N, Vi_mkl, &N, IPIV, &INFO);
409 if (INFO < 0) throw ("Error: LU decomposition failed. Invalid values found in the matrix.\n");
410 else if (INFO > 0) {
411 delete[] Vi_mkl;
412 return false;
413 } else {
414 logdet = 0.0;
415 for (i = 0; i < n; i++) {
416 double d_buf = Vi_mkl[i * n + i];
417 logdet += log(fabs(d_buf));
418 }
419
420 // Calcualte V inverse
421 dgetri_(&N, Vi_mkl, &N, IPIV, WORK, &LWORK, &INFO);
422 if (INFO < 0) throw ("Error: invalid values found in the varaince-covaraince (V) matrix.\n");
423 else if (INFO > 0) return false;
424 else {
425 #pragma omp parallel for private(j)
426 for (j = 0; j < n; j++) {
427 for (i = 0; i <= j; i++) Vi(i, j) = Vi(j, i) = Vi_mkl[i * n + j];
428 }
429 }
430 }
431
432 // free memory
433 delete[] Vi_mkl;
434 delete[] IPIV;
435 delete[] WORK;
436
437 return true;
438 }
439
comput_inverse_logdet_LU_mkl_array(int n,float * Vi,double & logdet)440 bool gcta::comput_inverse_logdet_LU_mkl_array(int n, float *Vi, double &logdet) {
441 unsigned long i = 0, j = 0;
442 double* Vi_mkl = new double[n * n];
443
444 #pragma omp parallel for private(j)
445 for (i = 0; i < n; i++) {
446 for (j = 0; j < n; j++) {
447 Vi_mkl[i * n + j] = Vi[i * n + j];
448 }
449 }
450
451 int N = (int) n;
452 int *IPIV = new int[n + 1];
453 int LWORK = N*N;
454 double *WORK = new double[n * n];
455 int INFO;
456 dgetrf_(&N, &N, Vi_mkl, &N, IPIV, &INFO);
457 if (INFO < 0) throw ("Error: LU decomposition failed. Invalid values found in the matrix.\n");
458 else if (INFO > 0) {
459 delete[] Vi_mkl;
460 return (false); //Vi.diagonal()=Vi.diagonal().array()+Vi.diagonal().mean()*1e-3;
461 }
462 else {
463 logdet = 0.0;
464 for (i = 0; i < n; i++) {
465 double d_buf = Vi_mkl[i * n + i];
466 logdet += log(fabs(d_buf));
467 }
468
469 // Calcualte V inverse
470 dgetri_(&N, Vi_mkl, &N, IPIV, WORK, &LWORK, &INFO);
471 if (INFO < 0) throw ("Error: invalid values found in the varaince-covaraince (V) matrix.\n");
472 else if (INFO > 0) return (false); // Vi.diagonal()=Vi.diagonal().array()+Vi.diagonal().mean()*1e-3;
473 else {
474 #pragma omp parallel for private(j)
475 for (j = 0; j < n; j++) {
476 for (i = 0; i < n; i++) Vi[i * n + j] = Vi_mkl[i * n + j];
477 }
478 }
479 }
480
481 // free memory
482 delete[] Vi_mkl;
483 delete[] IPIV;
484 delete[] WORK;
485
486 return true;
487 }
488
489 //////////////
490 // LD
491
LD_pruning_mkl(double rsq_cutoff,int wind_size)492 void gcta::LD_pruning_mkl(double rsq_cutoff, int wind_size) {
493 check_autosome();
494
495 unsigned long i = 0, j = 0, k = 0, l = 0, n = _keep.size(), m = _include.size();
496 _geno_mkl = new float[n * m]; // alloc memory to X matrix
497 make_XMat_mkl(_geno_mkl, false);
498 vector<double> sd_SNP;
499 std_XMat_mkl(_geno_mkl, sd_SNP, false, true, true);
500
501 cout << "\nPruning SNPs for LD ..." << endl;
502 vector<int> brk_pnt1, brk_pnt2, brk_pnt3;
503 get_ld_blk_pnt(brk_pnt1, brk_pnt2, brk_pnt3, wind_size*2);
504
505 vector<int> rm_snp_indx;
506 LD_pruning_blk_mkl(_geno_mkl, brk_pnt1, rsq_cutoff, rm_snp_indx);
507 if (brk_pnt2.size() > 1) LD_pruning_blk_mkl(_geno_mkl, brk_pnt2, rsq_cutoff, rm_snp_indx);
508 stable_sort(rm_snp_indx.begin(), rm_snp_indx.end());
509 rm_snp_indx.erase(unique(rm_snp_indx.begin(), rm_snp_indx.end()), rm_snp_indx.end());
510 int m_sub = rm_snp_indx.size();
511 vector<string> rm_snp_name(m_sub);
512 #pragma omp parallel for
513 for (i = 0; i < m_sub; i++) rm_snp_name[i] = _snp_name[_include[rm_snp_indx[i]]];
514 update_id_map_rm(rm_snp_name, _snp_name_map, _include);
515 m = _include.size();
516
517 cout << "After LD-pruning, " << m << " SNPs are remaining." << endl;
518 string pruned_file = _out + ".prune.in";
519 ofstream oprune(pruned_file.data());
520 for (i = 0; i < m; i++) oprune << _snp_name[_include[i]] << endl;
521 oprune << endl;
522 cout << "The list of " << m << " LD-pruned SNPs (pruned in) have been saved in the file [" + pruned_file + "]." << endl;
523 }
524
LD_pruning_blk_mkl(float * X,vector<int> & brk_pnt,double rsq_cutoff,vector<int> & rm_snp_ID1)525 void gcta::LD_pruning_blk_mkl(float *X, vector<int> &brk_pnt, double rsq_cutoff, vector<int> &rm_snp_ID1)
526 {
527 unsigned long i = 0, j = 0, k = 0, l = 0, n = _keep.size(), m = _include.size(), size = 0;
528
529 for (i = 0; i < brk_pnt.size() - 1; i++) {
530 if (_chr[_include[brk_pnt[i]]] != _chr[_include[brk_pnt[i + 1]]]) continue;
531 size = brk_pnt[i + 1] - brk_pnt[i] + 1;
532 if (size < 3) continue;
533
534 float *X_sub = new float[n * size];
535 #pragma omp parallel for private(k, l)
536 for (j = 0; j < n; j++) {
537 for (k = 0, l = brk_pnt[i]; k < size; k++, l++) X_sub[j * size + k] = X[j * m + l];
538 }
539
540 float *rsq_sub = new float[size * size];
541 cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans, size, size, n, 1.0, X_sub, size, X_sub, size, 0.0, rsq_sub, size);
542 #pragma omp parallel for private(k, l)
543 for (j = 0; j < size; j++) {
544 for (k = 0; k < size; k++) {
545 l = j * size + k;
546 rsq_sub[l] /= (float) n;
547 rsq_sub[l] = rsq_sub[l] * rsq_sub[l];
548 }
549 }
550 vector<int> rm_snp_buf;
551 rm_cor_snp((int) size, brk_pnt[i], rsq_sub, rsq_cutoff, rm_snp_buf);
552 rm_snp_ID1.insert(rm_snp_ID1.end(), rm_snp_buf.begin(), rm_snp_buf.end());
553
554 delete[] X_sub;
555 delete[] rsq_sub;
556 }
557 }
558
calcu_mean_rsq_mkl(int wind_size,double rsq_cutoff)559 void gcta::calcu_mean_rsq_mkl(int wind_size, double rsq_cutoff)
560 {
561 check_autosome();
562
563 unsigned long i = 0, j = 0, k = 0, l = 0, n = _keep.size(), m = _include.size();
564 _geno_mkl = new float[n * m];
565 make_XMat_mkl(_geno_mkl, false);
566 vector<double> sd_SNP;
567 std_XMat_mkl(_geno_mkl, sd_SNP, false, true, true);
568 calcu_ssx_sqrt_i_mkl(_geno_mkl, sd_SNP);
569
570 cout << "Calculating mean and maximum LD rsq (window size = at least " << wind_size / 1000 << "Kb in either direction; LD rsq threshold = " << rsq_cutoff << ") ... " << endl;
571 vector<int> brk_pnt1, brk_pnt2, brk_pnt3;
572 get_ld_blk_pnt(brk_pnt1, brk_pnt2, brk_pnt3, wind_size*2);
573
574 eigenVector mean_rsq = eigenVector::Zero(m);
575 eigenVector snp_num = eigenVector::Zero(m);
576 eigenVector max_rsq = eigenVector::Zero(m);
577 calcu_ld_blk_mkl(_geno_mkl, sd_SNP, brk_pnt1, brk_pnt3, mean_rsq, snp_num, max_rsq, false, rsq_cutoff);
578 if (brk_pnt2.size() > 1) calcu_ld_blk_mkl(_geno_mkl, sd_SNP, brk_pnt2, brk_pnt3, mean_rsq, snp_num, max_rsq, true, rsq_cutoff);
579
580 string mrsq_file = _out + ".mrsq.ld";
581 ofstream o_mrsq(mrsq_file.data());
582 for (i = 0; i < m; i++) o_mrsq << _snp_name[_include[i]] << " " << 0.5 * _mu[_include[i]] << " " << mean_rsq[i] << " " << snp_num[i] << " " << max_rsq[i] << endl;
583 o_mrsq << endl;
584 cout << "Mean and maximum LD rsq for " << m << " SNPs have been saved in the file [" + mrsq_file + "]." << endl;
585 }
586
calcu_ssx_sqrt_i_mkl(float * X_std,vector<double> & ssx_sqrt_i)587 void gcta::calcu_ssx_sqrt_i_mkl(float *X_std, vector<double> &ssx_sqrt_i)
588 {
589 unsigned long i = 0, j = 0, k = 0, m = _include.size(), n = _keep.size();
590
591 ssx_sqrt_i.clear();
592 ssx_sqrt_i.resize(m);
593 #pragma omp parallel for private(i,k)
594 for (j = 0; j < m; j++) {
595 ssx_sqrt_i[j] = 0.0;
596 for (i = 0; i < n; i++) {
597 k = i * m + j;
598 ssx_sqrt_i[j] += X_std[k] * X_std[k];
599 }
600 ssx_sqrt_i[j] = sqrt(ssx_sqrt_i[j]);
601 if (ssx_sqrt_i[j] < 1.0e-50) ssx_sqrt_i[j] = 0.0;
602 else ssx_sqrt_i[j] = 1.0 / ssx_sqrt_i[j];
603 }
604 }
605
calcu_ld_blk_mkl(float * X,vector<double> & ssx,vector<int> & brk_pnt,vector<int> & brk_pnt3,eigenVector & mean_rsq,eigenVector & snp_num,eigenVector & max_rsq,bool second,double rsq_cutoff)606 void gcta::calcu_ld_blk_mkl(float *X, vector<double> &ssx, vector<int> &brk_pnt, vector<int> &brk_pnt3, eigenVector &mean_rsq, eigenVector &snp_num, eigenVector &max_rsq, bool second, double rsq_cutoff)
607 {
608 unsigned long i = 0, j = 0, k = 0, l = 0, s1 = 0, s2 = 0, n = _keep.size(), m = _include.size(), size = 0, size_limit = 10000;
609
610 for (i = 0; i < brk_pnt.size() - 1; i++) {
611 if (_chr[_include[brk_pnt[i]]] != _chr[_include[brk_pnt[i + 1]]]) continue;
612 size = brk_pnt[i + 1] - brk_pnt[i] + 1;
613 if (size < 3) continue;
614 if (second) {
615 s1 = brk_pnt3[i] - brk_pnt[i];
616 s2 = s1 + 1;
617 }
618 else {
619 s1 = 0;
620 s2 = size - 1;
621 }
622
623 float *X_sub = new float[n * size];
624 #pragma omp parallel for private(k,l)
625 for (j = 0; j < n; j++) {
626 for (k = 0, l = brk_pnt[i]; k < size; k++, l++) X_sub[j * size + k] = X[j * m + l];
627 }
628 vector<double> ssx_sub(size);
629 for (k = 0, l = brk_pnt[i]; k < size; k++, l++) ssx_sub[k] = ssx[l];
630 vector<double> rsq_size(size), mean_rsq_sub(size), max_rsq_sub(size);
631 for(j = 0; j < size; j++) max_rsq_sub[j] = -1.0;
632
633 if (size > size_limit) calcu_ld_blk_split_mkl(size, size_limit, X_sub, ssx_sub, rsq_cutoff, rsq_size, mean_rsq_sub, max_rsq_sub, s1, s2, second);
634 else {
635 float *rsq_sub = new float[size * size];
636 cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans, size, size, n, 1.0, X_sub, size, X_sub, size, 0.0, rsq_sub, size);
637 #pragma omp parallel for private(k,l)
638 for (j = 0; j < size; j++) {
639 rsq_size[j] = 0.0;
640 mean_rsq_sub[j] = 0.0;
641 for (k = 0; k < size; k++) {
642 if (second) {
643 if (j <= s1 && k <= s1) continue;
644 if (j >= s2 && k >= s2) continue;
645 }
646 if (k == j) continue;
647 l = j * size + k;
648 rsq_sub[l] *= (ssx_sub[j] * ssx_sub[k]);
649 if (rsq_sub[l] > 1.0) rsq_sub[l] = 1.0;
650 rsq_sub[l] = rsq_sub[l] * rsq_sub[l];
651 if (rsq_sub[l] >= rsq_cutoff) {
652 mean_rsq_sub[j] += rsq_sub[l];
653 rsq_size[j] += 1.0;
654 }
655 if (rsq_sub[l] > max_rsq_sub[j]) max_rsq_sub[j] = rsq_sub[l];
656 }
657 if (rsq_size[j] > 0.0) mean_rsq_sub[j] /= rsq_size[j];
658 }
659 delete[] rsq_sub;
660 }
661 delete[] X_sub;
662
663 for (j = 0, k = brk_pnt[i]; j < size; j++, k++) {
664 if (second) {
665 if (rsq_size[j] > 0.0) {
666 mean_rsq[k] = (mean_rsq[k] * snp_num[k] + mean_rsq_sub[j] * rsq_size[j]) / (snp_num[k] + rsq_size[j]);
667 snp_num[k] = (snp_num[k] + rsq_size[j]);
668 if(max_rsq[k] < max_rsq_sub[j]) max_rsq[k] = max_rsq_sub[j];
669 }
670 }
671 else {
672 mean_rsq[k] = mean_rsq_sub[j];
673 snp_num[k] = rsq_size[j];
674 max_rsq[k] = max_rsq_sub[j];
675 }
676 }
677 }
678 }
679
calcu_ld_blk_split_mkl(int size,int size_limit,float * X_sub,vector<double> & ssx_sub,double rsq_cutoff,vector<double> & rsq_size,vector<double> & mean_rsq_sub,vector<double> & max_rsq_sub,int s1,int s2,bool second)680 void gcta::calcu_ld_blk_split_mkl(int size, int size_limit, float *X_sub, vector<double> &ssx_sub, double rsq_cutoff, vector<double> &rsq_size, vector<double> &mean_rsq_sub, vector<double> &max_rsq_sub, int s1, int s2, bool second)
681 {
682 unsigned long i = 0, j = 0, k = 0, l = 0, m = 0, n = _keep.size();
683 vector<int> brk_pnt;
684 brk_pnt.push_back(0);
685 for (i = size_limit; i < size - size_limit; i += size_limit) {
686 brk_pnt.push_back(i - 1);
687 brk_pnt.push_back(i);
688 j = i;
689 }
690 j = (size - j) / 2 + j;
691 brk_pnt.push_back(j - 1);
692 brk_pnt.push_back(j);
693 brk_pnt.push_back(size - 1);
694
695 for (i = 0; i < brk_pnt.size() - 1; i++) {
696 int size_sub = brk_pnt[i + 1] - brk_pnt[i] + 1;
697 if (size_sub < 3) continue;
698
699 float *X_sub_sub = new float[size_sub * n];
700 #pragma omp parallel for private(k,l)
701 for (j = 0; j < n; j++) {
702 for (k = 0, l = brk_pnt[i]; k < size_sub; k++, l++) X_sub_sub[k * n + j] = X_sub[j * size + l];
703 }
704 vector<double> ssx_sub_sub(size_sub);
705 for (k = 0, l = brk_pnt[i]; k < size_sub; k++, l++) ssx_sub_sub[k] = ssx_sub[l];
706
707 float *rsq_sub_sub = new float[size_sub * size];
708 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, size_sub, size, n, 1.0, X_sub_sub, n, X_sub, size, 0.0, rsq_sub_sub, size);
709 delete[] X_sub_sub;
710
711 vector<double> rsq_size_sub(size_sub), mean_rsq_sub_sub(size_sub), max_rsq_sub_sub(size_sub);
712 for (j = 0; j < size_sub; j++) max_rsq_sub_sub[j] = -1.0;
713 #pragma omp parallel for private(k,l)
714 for (j = 0; j < size_sub; j++) {
715 unsigned long s = j + brk_pnt[i];
716 rsq_size_sub[j] = 0.0;
717 mean_rsq_sub_sub[j] = 0.0;
718 for (k = 0; k < size; k++) {
719 if (second) {
720 if (s <= s1 && k <= s1) continue;
721 if (s >= s2 && k >= s2) continue;
722 }
723 if (k == s) continue;
724 l = j * size + k;
725 rsq_sub_sub[l] *= (ssx_sub_sub[j] * ssx_sub[k]);
726 rsq_sub_sub[l] = rsq_sub_sub[l] * rsq_sub_sub[l];
727 if (rsq_sub_sub[l] > 1.0) rsq_sub_sub[l] = 1.0;
728 if (rsq_sub_sub[l] >= rsq_cutoff) {
729 mean_rsq_sub_sub[j] += rsq_sub_sub[l];
730 rsq_size_sub[j] += 1.0;
731 }
732 if(rsq_sub_sub[l] > max_rsq_sub_sub[j]) max_rsq_sub_sub[j] = rsq_sub_sub[l];
733 }
734
735 if (rsq_size_sub[j] > 0.0) mean_rsq_sub_sub[j] /= rsq_size_sub[j];
736 }
737 delete[] rsq_sub_sub;
738
739 for (j = 0, k = brk_pnt[i]; j < size_sub; j++, k++) {
740 mean_rsq_sub[k] = mean_rsq_sub_sub[j];
741 rsq_size[k] = rsq_size_sub[j];
742 max_rsq_sub[k] = max_rsq_sub_sub[j];
743 }
744 }
745 }
746
747
748