1 // This file is part of PLINK 1.90, copyright (C) 2005-2020 Shaun Purcell,
2 // Christopher Chang.
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17
18 #include "plink_common.h"
19
20 #include <stddef.h>
21 #include "plink_assoc.h"
22 #include "plink_glm.h"
23 #include "plink_ld.h"
24 #include "plink_stats.h"
25 #include "pigz.h"
26
27 #define MULTIPLEX_LD 1920
28 #define MULTIPLEX_2LD (MULTIPLEX_LD * 2)
29
ld_epi_init(Ld_info * ldip,Epi_info * epi_ip,Clump_info * clump_ip)30 void ld_epi_init(Ld_info* ldip, Epi_info* epi_ip, Clump_info* clump_ip) {
31 ldip->modifier = 0;
32 ldip->prune_window_size = 0;
33 ldip->prune_window_incr = 0;
34 ldip->prune_last_param = 0.0;
35 ldip->window_size = 0xffffffffU;
36 ldip->window_bp = 0xffffffffU;
37 ldip->window_cm = -1;
38 ldip->window_r2 = 0.2;
39 ldip->blocks_max_bp = 0xffffffffU;
40 ldip->blocks_min_maf = 0.05;
41 ldip->blocks_strong_lowci_outer = 71;
42 ldip->blocks_strong_lowci = 72;
43 ldip->blocks_strong_highci = 97;
44 ldip->blocks_recomb_highci = 89;
45 ldip->blocks_inform_frac = 0.95;
46 ldip->flipscan_window_size = 10;
47 ldip->flipscan_window_bp = 0xffffffffU;
48 ldip->flipscan_thresh = 0.5;
49 ldip->show_tags_bp = 250000;
50 ldip->show_tags_r2 = 0.8;
51 ldip->snpstr = nullptr;
52 ldip->show_tags_fname = nullptr;
53 range_list_init(&(ldip->snps_rl));
54 epi_ip->modifier = 0;
55 epi_ip->case_only_gap = 1000000;
56 epi_ip->epi1 = 0.0;
57 epi_ip->epi2 = 0.01;
58 epi_ip->je_cellmin = 5;
59 epi_ip->ld_mkr1 = nullptr;
60 epi_ip->ld_mkr2 = nullptr;
61 epi_ip->twolocus_mkr1 = nullptr;
62 epi_ip->twolocus_mkr2 = nullptr;
63 epi_ip->summary_merge_prefix = nullptr;
64 clump_ip->modifier = 0;
65 clump_ip->fname_ct = 0;
66 clump_ip->bp_radius = 249999;
67 clump_ip->range_border = 0;
68 clump_ip->fnames_flattened = nullptr;
69 clump_ip->annotate_flattened = nullptr;
70 clump_ip->snpfield_search_order = nullptr;
71 clump_ip->pfield_search_order = nullptr;
72 clump_ip->range_fname = nullptr;
73 clump_ip->p1 = 1e-4;
74 clump_ip->p2 = 1e-2;
75 clump_ip->r2 = 0.5;
76 }
77
ld_epi_cleanup(Ld_info * ldip,Epi_info * epi_ip,Clump_info * clump_ip)78 void ld_epi_cleanup(Ld_info* ldip, Epi_info* epi_ip, Clump_info* clump_ip) {
79 free_cond(ldip->snpstr);
80 free_cond(ldip->show_tags_fname);
81 free_range_list(&(ldip->snps_rl));
82 free_cond(epi_ip->ld_mkr1);
83 free_cond(epi_ip->ld_mkr2);
84 free_cond(epi_ip->twolocus_mkr1);
85 free_cond(epi_ip->twolocus_mkr2);
86 free_cond(epi_ip->summary_merge_prefix);
87 free_cond(clump_ip->fnames_flattened);
88 free_cond(clump_ip->annotate_flattened);
89 free_cond(clump_ip->snpfield_search_order);
90 free_cond(clump_ip->pfield_search_order);
91 free_cond(clump_ip->range_fname);
92 }
93
94 #ifdef __LP64__
ld_dot_prod_batch(__m128i * vec1,__m128i * vec2,__m128i * mask1,__m128i * mask2,int32_t * return_vals,uint32_t iters)95 static inline void ld_dot_prod_batch(__m128i* vec1, __m128i* vec2, __m128i* mask1, __m128i* mask2, int32_t* return_vals, uint32_t iters) {
96 // Main routine for computation of \sum_i^M (x_i - \mu_x)(y_i - \mu_y), where
97 // x_i, y_i \in \{-1, 0, 1\}, but there are missing values.
98 //
99 //
100 // We decompose this sum into
101 // \sum_i x_iy_i - \mu_y\sum_i x_i - \mu_x\sum_i y_i +
102 // (M - # missing)\mu_x\mu_y.
103 // *Without* missing values, this can be handled very cleanly. The last
104 // three terms can all be precomputed, and \sum_i x_iy_i can be handled in a
105 // manner very similar to bitwise Hamming distance. This is several times as
106 // fast as the lookup tables used for relationship matrices.
107 //
108 // Unfortunately, when missing values are present,
109 // \mu_y\sum_{i: nonmissing from y} x_i and
110 // \mu_x\sum_{i: nonmissing from x} y_i must also be evaluated (and, in
111 // practice, \mu_y\sum_{i: nonmissing from y} x_i^2 and
112 // \mu_x\sum_{i: nonmissing from x} y_i^2 should be determined here as well);
113 // this removes much of the speed advantage, and the best applications of the
114 // underlying ternary dot product algorithm used here lie elsewhere.
115 // Nevertheless, it is still faster, so we use it.
116 // (possible todo: accelerated function when there really are no missing
117 // values, similar to what is now done for --fast-epistasis)
118 //
119 //
120 // Input:
121 // * vec1 and vec2 are encoded -1 -> 00, 0/missing -> 01, 1 -> 10.
122 // * mask1 and mask2 mask out missing values (i.e. 00 for missing, 11 for
123 // nonmissing).
124 // * return_vals provides space for return values.
125 // * iters is the number of 48-byte windows to process, anywhere from 1 to 10
126 // inclusive.
127 //
128 // This function performs the update
129 // return_vals[0] += (-N) + \sum_i x_iy_i
130 // return_vals[1] += N_y + \sum_{i: nonmissing from y} x_i
131 // return_vals[2] += N_x + \sum_{i: nonmissing from x} y_i
132 // return_vals[3] += N_y - \sum_{i: nonmissing from y} x_i^2
133 // return_vals[4] += N_x - \sum_{i: nonmissing from x} y_i^2
134 // where N is the number of samples processed after applying the missingness
135 // masks indicated by the subscripts.
136 //
137 // Computation of terms [1]-[4] is based on the identity
138 // N_y + \sum_{i: nonmissing from y} x_i = popcount2(vec1 & mask2)
139 // where "popcount2" refers to starting with two-bit integers instead of
140 // one-bit integers in our summing process (this allows us to skip a few
141 // operations). (Once we can assume the presence of hardware popcount, a
142 // slightly different implementation may be better.)
143 //
144 // The trickier [0] computation currently proceeds as follows:
145 //
146 // 1. zcheck := (vec1 | vec2) & 0x5555...
147 // Detects whether at least one member of the pair has a 0/missing value.
148 //
149 // 2. popcount2(((vec1 ^ vec2) & (0xaaaa... - zcheck)) | zcheck)
150 // Subtracting this *from* a bias will give us our desired \sum_i x_iy_i dot
151 // product.
152 //
153 // MULTIPLEX_LD sets of values are usually handled per function call. If
154 // fewer values are present, the ends of all input vectors should be zeroed
155 // out.
156
157 const __m128i m1 = {FIVEMASK, FIVEMASK};
158 const __m128i m2 = {0x3333333333333333LLU, 0x3333333333333333LLU};
159 const __m128i m4 = {0x0f0f0f0f0f0f0f0fLLU, 0x0f0f0f0f0f0f0f0fLLU};
160 __m128i loader1;
161 __m128i loader2;
162 __m128i sum1;
163 __m128i sum2;
164 __m128i sum11;
165 __m128i sum22;
166 __m128i sum12;
167 __m128i tmp_sum1;
168 __m128i tmp_sum2;
169 __m128i tmp_sum12;
170 __univec acc;
171 __univec acc1;
172 __univec acc2;
173 __univec acc11;
174 __univec acc22;
175 acc.vi = _mm_setzero_si128();
176 acc1.vi = _mm_setzero_si128();
177 acc2.vi = _mm_setzero_si128();
178 acc11.vi = _mm_setzero_si128();
179 acc22.vi = _mm_setzero_si128();
180 do {
181 loader1 = *vec1++;
182 loader2 = *vec2++;
183 sum1 = *mask2++;
184 sum2 = *mask1++;
185 sum12 = _mm_and_si128(_mm_or_si128(loader1, loader2), m1);
186 // sum11 = _mm_and_si128(_mm_and_si128(_mm_xor_si128(sum1, m1), m1), loader1);
187 // sum22 = _mm_and_si128(_mm_and_si128(_mm_xor_si128(sum2, m1), m1), loader2);
188 sum1 = _mm_and_si128(sum1, loader1);
189 sum2 = _mm_and_si128(sum2, loader2);
190 sum11 = _mm_and_si128(sum1, m1);
191 sum22 = _mm_and_si128(sum2, m1);
192 // use andnot to eliminate need for 0xaaaa... to occupy an xmm register
193 loader1 = _mm_andnot_si128(_mm_add_epi64(m1, sum12), _mm_xor_si128(loader1, loader2));
194 sum12 = _mm_or_si128(sum12, loader1);
195
196 // sum1, sum2, and sum12 now store the (biased) two-bit sums of
197 // interest; merge to 4 bits to prevent overflow. this merge can be
198 // postponed for sum11 and sum22 because the individual terms are 0/1
199 // instead of 0/1/2.
200 sum1 = _mm_add_epi64(_mm_and_si128(sum1, m2), _mm_and_si128(_mm_srli_epi64(sum1, 2), m2));
201 sum2 = _mm_add_epi64(_mm_and_si128(sum2, m2), _mm_and_si128(_mm_srli_epi64(sum2, 2), m2));
202 sum12 = _mm_add_epi64(_mm_and_si128(sum12, m2), _mm_and_si128(_mm_srli_epi64(sum12, 2), m2));
203
204 loader1 = *vec1++;
205 loader2 = *vec2++;
206 tmp_sum1 = *mask2++;
207 tmp_sum2 = *mask1++;
208 tmp_sum12 = _mm_and_si128(_mm_or_si128(loader1, loader2), m1);
209 tmp_sum1 = _mm_and_si128(tmp_sum1, loader1);
210 tmp_sum2 = _mm_and_si128(tmp_sum2, loader2);
211 sum11 = _mm_add_epi64(sum11, _mm_and_si128(tmp_sum1, m1));
212 sum22 = _mm_add_epi64(sum22, _mm_and_si128(tmp_sum2, m1));
213 loader1 = _mm_andnot_si128(_mm_add_epi64(m1, tmp_sum12), _mm_xor_si128(loader1, loader2));
214 tmp_sum12 = _mm_or_si128(loader1, tmp_sum12);
215
216 sum1 = _mm_add_epi64(sum1, _mm_add_epi64(_mm_and_si128(tmp_sum1, m2), _mm_and_si128(_mm_srli_epi64(tmp_sum1, 2), m2)));
217 sum2 = _mm_add_epi64(sum2, _mm_add_epi64(_mm_and_si128(tmp_sum2, m2), _mm_and_si128(_mm_srli_epi64(tmp_sum2, 2), m2)));
218 sum12 = _mm_add_epi64(sum12, _mm_add_epi64(_mm_and_si128(tmp_sum12, m2), _mm_and_si128(_mm_srli_epi64(tmp_sum12, 2), m2)));
219
220 loader1 = *vec1++;
221 loader2 = *vec2++;
222 tmp_sum1 = *mask2++;
223 tmp_sum2 = *mask1++;
224 tmp_sum12 = _mm_and_si128(_mm_or_si128(loader1, loader2), m1);
225 tmp_sum1 = _mm_and_si128(tmp_sum1, loader1);
226 tmp_sum2 = _mm_and_si128(tmp_sum2, loader2);
227 sum11 = _mm_add_epi64(sum11, _mm_and_si128(tmp_sum1, m1));
228 sum22 = _mm_add_epi64(sum22, _mm_and_si128(tmp_sum2, m1));
229 loader1 = _mm_andnot_si128(_mm_add_epi64(m1, tmp_sum12), _mm_xor_si128(loader1, loader2));
230 tmp_sum12 = _mm_or_si128(loader1, tmp_sum12);
231
232 sum1 = _mm_add_epi64(sum1, _mm_add_epi64(_mm_and_si128(tmp_sum1, m2), _mm_and_si128(_mm_srli_epi64(tmp_sum1, 2), m2)));
233 sum2 = _mm_add_epi64(sum2, _mm_add_epi64(_mm_and_si128(tmp_sum2, m2), _mm_and_si128(_mm_srli_epi64(tmp_sum2, 2), m2)));
234 sum11 = _mm_add_epi64(_mm_and_si128(sum11, m2), _mm_and_si128(_mm_srli_epi64(sum11, 2), m2));
235 sum22 = _mm_add_epi64(_mm_and_si128(sum22, m2), _mm_and_si128(_mm_srli_epi64(sum22, 2), m2));
236 sum12 = _mm_add_epi64(sum12, _mm_add_epi64(_mm_and_si128(tmp_sum12, m2), _mm_and_si128(_mm_srli_epi64(tmp_sum12, 2), m2)));
237
238 acc1.vi = _mm_add_epi64(acc1.vi, _mm_add_epi64(_mm_and_si128(sum1, m4), _mm_and_si128(_mm_srli_epi64(sum1, 4), m4)));
239 acc2.vi = _mm_add_epi64(acc2.vi, _mm_add_epi64(_mm_and_si128(sum2, m4), _mm_and_si128(_mm_srli_epi64(sum2, 4), m4)));
240 acc11.vi = _mm_add_epi64(acc11.vi, _mm_add_epi64(_mm_and_si128(sum11, m4), _mm_and_si128(_mm_srli_epi64(sum11, 4), m4)));
241 acc22.vi = _mm_add_epi64(acc22.vi, _mm_add_epi64(_mm_and_si128(sum22, m4), _mm_and_si128(_mm_srli_epi64(sum22, 4), m4)));
242 acc.vi = _mm_add_epi64(acc.vi, _mm_add_epi64(_mm_and_si128(sum12, m4), _mm_and_si128(_mm_srli_epi64(sum12, 4), m4)));
243 } while (--iters);
244 // moved down because we've almost certainly run out of xmm registers
245 const __m128i m8 = {0x00ff00ff00ff00ffLLU, 0x00ff00ff00ff00ffLLU};
246 #if MULTIPLEX_LD > 960
247 acc1.vi = _mm_add_epi64(_mm_and_si128(acc1.vi, m8), _mm_and_si128(_mm_srli_epi64(acc1.vi, 8), m8));
248 acc2.vi = _mm_add_epi64(_mm_and_si128(acc2.vi, m8), _mm_and_si128(_mm_srli_epi64(acc2.vi, 8), m8));
249 acc.vi = _mm_add_epi64(_mm_and_si128(acc.vi, m8), _mm_and_si128(_mm_srli_epi64(acc.vi, 8), m8));
250 #else
251 acc1.vi = _mm_and_si128(_mm_add_epi64(acc1.vi, _mm_srli_epi64(acc1.vi, 8)), m8);
252 acc2.vi = _mm_and_si128(_mm_add_epi64(acc2.vi, _mm_srli_epi64(acc2.vi, 8)), m8);
253 acc.vi = _mm_and_si128(_mm_add_epi64(acc.vi, _mm_srli_epi64(acc.vi, 8)), m8);
254 #endif
255 acc11.vi = _mm_and_si128(_mm_add_epi64(acc11.vi, _mm_srli_epi64(acc11.vi, 8)), m8);
256 acc22.vi = _mm_and_si128(_mm_add_epi64(acc22.vi, _mm_srli_epi64(acc22.vi, 8)), m8);
257
258 return_vals[0] -= ((acc.u8[0] + acc.u8[1]) * 0x1000100010001LLU) >> 48;
259 return_vals[1] += ((acc1.u8[0] + acc1.u8[1]) * 0x1000100010001LLU) >> 48;
260 return_vals[2] += ((acc2.u8[0] + acc2.u8[1]) * 0x1000100010001LLU) >> 48;
261 return_vals[3] += ((acc11.u8[0] + acc11.u8[1]) * 0x1000100010001LLU) >> 48;
262 return_vals[4] += ((acc22.u8[0] + acc22.u8[1]) * 0x1000100010001LLU) >> 48;
263 }
264
ld_dot_prod(uintptr_t * vec1,uintptr_t * vec2,uintptr_t * mask1,uintptr_t * mask2,int32_t * return_vals,uint32_t batch_ct_m1,uint32_t last_batch_size)265 void ld_dot_prod(uintptr_t* vec1, uintptr_t* vec2, uintptr_t* mask1, uintptr_t* mask2, int32_t* return_vals, uint32_t batch_ct_m1, uint32_t last_batch_size) {
266 while (batch_ct_m1--) {
267 ld_dot_prod_batch((__m128i*)vec1, (__m128i*)vec2, (__m128i*)mask1, (__m128i*)mask2, return_vals, MULTIPLEX_LD / 192);
268 vec1 = &(vec1[MULTIPLEX_LD / BITCT2]);
269 vec2 = &(vec2[MULTIPLEX_LD / BITCT2]);
270 mask1 = &(mask1[MULTIPLEX_LD / BITCT2]);
271 mask2 = &(mask2[MULTIPLEX_LD / BITCT2]);
272 }
273 ld_dot_prod_batch((__m128i*)vec1, (__m128i*)vec2, (__m128i*)mask1, (__m128i*)mask2, return_vals, last_batch_size);
274 }
275
ld_dot_prod_nm_batch(__m128i * vec1,__m128i * vec2,uint32_t iters)276 static inline int32_t ld_dot_prod_nm_batch(__m128i* vec1, __m128i* vec2, uint32_t iters) {
277 // faster ld_dot_prod_batch() for no-missing-calls case.
278 const __m128i m1 = {FIVEMASK, FIVEMASK};
279 const __m128i m2 = {0x3333333333333333LLU, 0x3333333333333333LLU};
280 const __m128i m4 = {0x0f0f0f0f0f0f0f0fLLU, 0x0f0f0f0f0f0f0f0fLLU};
281 const __m128i m8 = {0x00ff00ff00ff00ffLLU, 0x00ff00ff00ff00ffLLU};
282 __m128i loader1;
283 __m128i loader2;
284 __m128i sum12;
285 __m128i tmp_sum12;
286 __univec acc;
287 acc.vi = _mm_setzero_si128();
288 do {
289 loader1 = *vec1++;
290 loader2 = *vec2++;
291 sum12 = _mm_and_si128(_mm_or_si128(loader1, loader2), m1);
292 loader1 = _mm_andnot_si128(_mm_add_epi64(m1, sum12), _mm_xor_si128(loader1, loader2));
293 sum12 = _mm_or_si128(sum12, loader1);
294 sum12 = _mm_add_epi64(_mm_and_si128(sum12, m2), _mm_and_si128(_mm_srli_epi64(sum12, 2), m2));
295
296 loader1 = *vec1++;
297 loader2 = *vec2++;
298 tmp_sum12 = _mm_and_si128(_mm_or_si128(loader1, loader2), m1);
299 loader1 = _mm_andnot_si128(_mm_add_epi64(m1, tmp_sum12), _mm_xor_si128(loader1, loader2));
300 tmp_sum12 = _mm_or_si128(loader1, tmp_sum12);
301 sum12 = _mm_add_epi64(sum12, _mm_add_epi64(_mm_and_si128(tmp_sum12, m2), _mm_and_si128(_mm_srli_epi64(tmp_sum12, 2), m2)));
302
303 loader1 = *vec1++;
304 loader2 = *vec2++;
305 tmp_sum12 = _mm_and_si128(_mm_or_si128(loader1, loader2), m1);
306 loader1 = _mm_andnot_si128(_mm_add_epi64(m1, tmp_sum12), _mm_xor_si128(loader1, loader2));
307 tmp_sum12 = _mm_or_si128(loader1, tmp_sum12);
308 sum12 = _mm_add_epi64(sum12, _mm_add_epi64(_mm_and_si128(tmp_sum12, m2), _mm_and_si128(_mm_srli_epi64(tmp_sum12, 2), m2)));
309
310 acc.vi = _mm_add_epi64(acc.vi, _mm_add_epi64(_mm_and_si128(sum12, m4), _mm_and_si128(_mm_srli_epi64(sum12, 4), m4)));
311 } while (--iters);
312 #if MULTIPLEX_LD > 960
313 acc.vi = _mm_add_epi64(_mm_and_si128(acc.vi, m8), _mm_and_si128(_mm_srli_epi64(acc.vi, 8), m8));
314 #else
315 acc.vi = _mm_and_si128(_mm_add_epi64(acc.vi, _mm_srli_epi64(acc.vi, 8)), m8);
316 #endif
317 return (uint32_t)(((acc.u8[0] + acc.u8[1]) * 0x1000100010001LLU) >> 48);
318 }
319
ld_dot_prod_nm(uintptr_t * vec1,uintptr_t * vec2,uint32_t founder_ct,uint32_t batch_ct_m1,uint32_t last_batch_size)320 int32_t ld_dot_prod_nm(uintptr_t* vec1, uintptr_t* vec2, uint32_t founder_ct, uint32_t batch_ct_m1, uint32_t last_batch_size) {
321 // accelerated implementation for no-missing-loci case
322 int32_t result = (int32_t)founder_ct;
323 while (batch_ct_m1--) {
324 result -= ld_dot_prod_nm_batch((__m128i*)vec1, (__m128i*)vec2, MULTIPLEX_LD / 192);
325 vec1 = &(vec1[MULTIPLEX_LD / BITCT2]);
326 vec2 = &(vec2[MULTIPLEX_LD / BITCT2]);
327 }
328 result -= ld_dot_prod_nm_batch((__m128i*)vec1, (__m128i*)vec2, last_batch_size);
329 return result;
330 }
331 #else
ld_dot_prod_batch(uintptr_t * vec1,uintptr_t * vec2,uintptr_t * mask1,uintptr_t * mask2,int32_t * return_vals,uint32_t iters)332 static inline void ld_dot_prod_batch(uintptr_t* vec1, uintptr_t* vec2, uintptr_t* mask1, uintptr_t* mask2, int32_t* return_vals, uint32_t iters) {
333 uint32_t final_sum1 = 0;
334 uint32_t final_sum2 = 0;
335 uint32_t final_sum11 = 0;
336 uint32_t final_sum22 = 0;
337 uint32_t final_sum12 = 0;
338 uintptr_t loader1;
339 uintptr_t loader2;
340 uintptr_t sum1;
341 uintptr_t sum2;
342 uintptr_t sum11;
343 uintptr_t sum22;
344 uintptr_t sum12;
345 uintptr_t tmp_sum1;
346 uintptr_t tmp_sum2;
347 uintptr_t tmp_sum12;
348 do {
349 // (The important part of the header comment on the 64-bit version is
350 // copied below.)
351 //
352 // Input:
353 // * vec1 and vec2 are encoded -1 -> 00, 0/missing -> 01, 1 -> 10.
354 // * mask1 and mask2 mask out missing values (i.e. 00 for missing, 11 for
355 // nonmissing).
356 // * return_vals provides space for return values.
357 // * iters is the number of 12-byte windows to process, anywhere from 1 to
358 // 40 inclusive. (No, this is not the interface you'd use for a
359 // general-purpose library.) [32- and 64-bit differ here.]
360 //
361 // This function performs the update
362 // return_vals[0] += (-N) + \sum_i x_iy_i
363 // return_vals[1] += N_y + \sum_{i: nonmissing from y} x_i
364 // return_vals[2] += N_x + \sum_{i: nonmissing from x} y_i
365 // return_vals[3] += N_y - \sum_{i: nonmissing from y} x_i^2
366 // return_vals[4] += N_x - \sum_{i: nonmissing from x} y_i^2
367 // where N is the number of samples processed after applying the
368 // missingness masks indicated by the subscripts.
369 //
370 // Computation of terms [1]-[4] is based on the identity
371 // N_y + \sum_{i: nonmissing from y} x_i = popcount2(vec1 & mask2)
372 // where "popcount2" refers to starting with two-bit integers instead of
373 // one-bit integers in our summing process (this allows us to skip a few
374 // operations). (Once we can assume the presence of hardware popcount, a
375 // slightly different implementation may be better.)
376 //
377 // The trickier [0] computation currently proceeds as follows:
378 //
379 // 1. zcheck := (vec1 | vec2) & 0x5555...
380 // Detects whether at least one member of the pair has a 0/missing value.
381 //
382 // 2. popcount2(((vec1 ^ vec2) & (0xaaaa... - zcheck)) | zcheck)
383 // Subtracting this *from* a bias will give us our desired \sum_i x_iy_i
384 // dot product.
385
386
387 loader1 = *vec1++;
388 loader2 = *vec2++;
389 sum1 = *mask2++;
390 sum2 = *mask1++;
391 sum12 = (loader1 | loader2) & FIVEMASK;
392
393 sum1 = sum1 & loader1;
394 sum2 = sum2 & loader2;
395 loader1 = (loader1 ^ loader2) & (AAAAMASK - sum12);
396 sum12 = sum12 | loader1;
397 sum11 = sum1 & FIVEMASK;
398 sum22 = sum2 & FIVEMASK;
399
400 sum1 = (sum1 & 0x33333333) + ((sum1 >> 2) & 0x33333333);
401 sum2 = (sum2 & 0x33333333) + ((sum2 >> 2) & 0x33333333);
402 sum12 = (sum12 & 0x33333333) + ((sum12 >> 2) & 0x33333333);
403
404 loader1 = *vec1++;
405 loader2 = *vec2++;
406 tmp_sum1 = *mask2++;
407 tmp_sum2 = *mask1++;
408 tmp_sum12 = (loader1 | loader2) & FIVEMASK;
409 tmp_sum1 = tmp_sum1 & loader1;
410 tmp_sum2 = tmp_sum2 & loader2;
411
412 loader1 = (loader1 ^ loader2) & (AAAAMASK - tmp_sum12);
413 tmp_sum12 = tmp_sum12 | loader1;
414 sum11 += tmp_sum1 & FIVEMASK;
415 sum22 += tmp_sum2 & FIVEMASK;
416
417 sum1 += (tmp_sum1 & 0x33333333) + ((tmp_sum1 >> 2) & 0x33333333);
418 sum2 += (tmp_sum2 & 0x33333333) + ((tmp_sum2 >> 2) & 0x33333333);
419 sum12 += (tmp_sum12 & 0x33333333) + ((tmp_sum12 >> 2) & 0x33333333);
420
421 loader1 = *vec1++;
422 loader2 = *vec2++;
423 tmp_sum1 = *mask2++;
424 tmp_sum2 = *mask1++;
425 tmp_sum12 = (loader1 | loader2) & FIVEMASK;
426
427 tmp_sum1 = tmp_sum1 & loader1;
428 tmp_sum2 = tmp_sum2 & loader2;
429 loader1 = (loader1 ^ loader2) & (AAAAMASK - tmp_sum12);
430 tmp_sum12 = tmp_sum12 | loader1;
431 sum11 += tmp_sum1 & FIVEMASK;
432 sum22 += tmp_sum2 & FIVEMASK;
433
434 sum1 += (tmp_sum1 & 0x33333333) + ((tmp_sum1 >> 2) & 0x33333333);
435 sum2 += (tmp_sum2 & 0x33333333) + ((tmp_sum2 >> 2) & 0x33333333);
436 sum11 = (sum11 & 0x33333333) + ((sum11 >> 2) & 0x33333333);
437 sum22 = (sum22 & 0x33333333) + ((sum22 >> 2) & 0x33333333);
438 sum12 += (tmp_sum12 & 0x33333333) + ((tmp_sum12 >> 2) & 0x33333333);
439
440 sum1 = (sum1 & 0x0f0f0f0f) + ((sum1 >> 4) & 0x0f0f0f0f);
441 sum2 = (sum2 & 0x0f0f0f0f) + ((sum2 >> 4) & 0x0f0f0f0f);
442 sum11 = (sum11 & 0x0f0f0f0f) + ((sum11 >> 4) & 0x0f0f0f0f);
443 sum22 = (sum22 & 0x0f0f0f0f) + ((sum22 >> 4) & 0x0f0f0f0f);
444 sum12 = (sum12 & 0x0f0f0f0f) + ((sum12 >> 4) & 0x0f0f0f0f);
445
446 // technically could do the multiply-and-shift only once every two rounds
447 final_sum1 += (sum1 * 0x01010101) >> 24;
448 final_sum2 += (sum2 * 0x01010101) >> 24;
449 final_sum11 += (sum11 * 0x01010101) >> 24;
450 final_sum22 += (sum22 * 0x01010101) >> 24;
451 final_sum12 += (sum12 * 0x01010101) >> 24;
452 } while (--iters);
453 return_vals[0] -= final_sum12;
454 return_vals[1] += final_sum1;
455 return_vals[2] += final_sum2;
456 return_vals[3] += final_sum11;
457 return_vals[4] += final_sum22;
458 }
459
ld_dot_prod(uintptr_t * vec1,uintptr_t * vec2,uintptr_t * mask1,uintptr_t * mask2,int32_t * return_vals,uint32_t batch_ct_m1,uint32_t last_batch_size)460 void ld_dot_prod(uintptr_t* vec1, uintptr_t* vec2, uintptr_t* mask1, uintptr_t* mask2, int32_t* return_vals, uint32_t batch_ct_m1, uint32_t last_batch_size) {
461 while (batch_ct_m1--) {
462 ld_dot_prod_batch(vec1, vec2, mask1, mask2, return_vals, MULTIPLEX_LD / 48);
463 vec1 = &(vec1[MULTIPLEX_LD / BITCT2]);
464 vec2 = &(vec2[MULTIPLEX_LD / BITCT2]);
465 mask1 = &(mask1[MULTIPLEX_LD / BITCT2]);
466 mask2 = &(mask2[MULTIPLEX_LD / BITCT2]);
467 }
468 ld_dot_prod_batch(vec1, vec2, mask1, mask2, return_vals, last_batch_size);
469 }
470
ld_dot_prod_nm_batch(uintptr_t * vec1,uintptr_t * vec2,uint32_t iters)471 static inline int32_t ld_dot_prod_nm_batch(uintptr_t* vec1, uintptr_t* vec2, uint32_t iters) {
472 uint32_t final_sum12 = 0;
473 uintptr_t loader1;
474 uintptr_t loader2;
475 uintptr_t sum12;
476 uintptr_t tmp_sum12;
477 do {
478 loader1 = *vec1++;
479 loader2 = *vec2++;
480 sum12 = (loader1 | loader2) & FIVEMASK;
481 loader1 = (loader1 ^ loader2) & (AAAAMASK - sum12);
482 sum12 = sum12 | loader1;
483 sum12 = (sum12 & 0x33333333) + ((sum12 >> 2) & 0x33333333);
484
485 loader1 = *vec1++;
486 loader2 = *vec2++;
487 tmp_sum12 = (loader1 | loader2) & FIVEMASK;
488 loader1 = (loader1 ^ loader2) & (AAAAMASK - tmp_sum12);
489 tmp_sum12 = tmp_sum12 | loader1;
490 sum12 += (tmp_sum12 & 0x33333333) + ((tmp_sum12 >> 2) & 0x33333333);
491
492 loader1 = *vec1++;
493 loader2 = *vec2++;
494 tmp_sum12 = (loader1 | loader2) & FIVEMASK;
495 loader1 = (loader1 ^ loader2) & (AAAAMASK - tmp_sum12);
496 tmp_sum12 = tmp_sum12 | loader1;
497 sum12 += (tmp_sum12 & 0x33333333) + ((tmp_sum12 >> 2) & 0x33333333);
498 sum12 = (sum12 & 0x0f0f0f0f) + ((sum12 >> 4) & 0x0f0f0f0f);
499
500 final_sum12 += (sum12 * 0x01010101) >> 24;
501 } while (--iters);
502 return final_sum12;
503 }
504
ld_dot_prod_nm(uintptr_t * vec1,uintptr_t * vec2,uint32_t founder_ct,uint32_t batch_ct_m1,uint32_t last_batch_size)505 int32_t ld_dot_prod_nm(uintptr_t* vec1, uintptr_t* vec2, uint32_t founder_ct, uint32_t batch_ct_m1, uint32_t last_batch_size) {
506 int32_t result = (int32_t)founder_ct;
507 while (batch_ct_m1--) {
508 result -= ld_dot_prod_nm_batch(vec1, vec2, MULTIPLEX_LD / 48);
509 vec1 = &(vec1[MULTIPLEX_LD / BITCT2]);
510 vec2 = &(vec2[MULTIPLEX_LD / BITCT2]);
511 }
512 result -= ld_dot_prod_nm_batch(vec1, vec2, last_batch_size);
513 return result;
514 }
515 #endif // __LP64__
516
ld_process_load(uintptr_t * geno_buf,uintptr_t * mask_buf,uintptr_t * missing_buf,uint32_t * missing_ct_ptr,double * sum_ptr,double * variance_recip_ptr,uint32_t founder_ct,uint32_t is_x,uint32_t weighted_x,uint32_t nonmale_founder_ct,uintptr_t * founder_male_include2,uintptr_t * nonmale_geno,uintptr_t * nonmale_masks,uintptr_t nonmale_offset)517 uint32_t ld_process_load(uintptr_t* geno_buf, uintptr_t* mask_buf, uintptr_t* missing_buf, uint32_t* missing_ct_ptr, double* sum_ptr, double* variance_recip_ptr, uint32_t founder_ct, uint32_t is_x, uint32_t weighted_x, uint32_t nonmale_founder_ct, uintptr_t* founder_male_include2, uintptr_t* nonmale_geno, uintptr_t* nonmale_masks, uintptr_t nonmale_offset) {
518 uintptr_t* geno_ptr = geno_buf;
519 uintptr_t founder_ctl2 = QUATERCT_TO_WORDCT(founder_ct);
520 uintptr_t* geno_end = &(geno_buf[founder_ctl2]);
521 uintptr_t* mask_buf_ptr = mask_buf;
522 uintptr_t* missing_ptr = missing_buf;
523 uintptr_t new_missing = 0;
524 int64_t llii;
525 uint32_t missing_bit_offset = 0;
526 uint32_t ssq = 0;
527 uint32_t missing_ct = 0;
528 int32_t sum = -founder_ct;
529 uintptr_t* nm_mask_ptr;
530 uintptr_t cur_geno;
531 uintptr_t shifted_masked_geno;
532 uintptr_t new_geno;
533 uintptr_t new_mask;
534 while (1) {
535 // Desired encodings:
536 // new_geno: nonset homozygote -> 00
537 // het/missing -> 01
538 // set homozygote -> 10
539 // Given PLINK encoding xx, this is (xx - ((xx >> 1) & FIVEMASK)).
540 //
541 // new_mask: missing -> 00
542 // otherwise -> 11
543 // ...and this is (((xx >> 1) & FIVEMASK) | ((~xx) & FIVEMASK)) * 3.
544 //
545 // new_missing: missing -> 1
546 // otherwise -> 0
547 // This can be assembled via repeated CTZLU on ~new_mask.
548 cur_geno = *geno_ptr;
549 shifted_masked_geno = (cur_geno >> 1) & FIVEMASK;
550 new_geno = cur_geno - shifted_masked_geno;
551 *geno_ptr++ = new_geno;
552 new_mask = (((~cur_geno) & FIVEMASK) | shifted_masked_geno) * 3;
553 *mask_buf_ptr++ = new_mask;
554 new_mask = (~new_mask) & FIVEMASK;
555 while (new_mask) {
556 new_missing |= ONELU << (missing_bit_offset + (CTZLU(new_mask) / 2));
557 missing_ct++;
558 new_mask &= new_mask - 1;
559 }
560 if (geno_ptr == geno_end) {
561 break;
562 }
563 if (missing_bit_offset) {
564 missing_bit_offset = 0;
565 *missing_ptr++ = new_missing;
566 new_missing = 0;
567 } else {
568 missing_bit_offset = BITCT2;
569 }
570 }
571 *missing_ptr = new_missing;
572 if (is_x && (!weighted_x)) {
573 // special case #1: recode male clear homozygotes to 01 on X chromosome,
574 // for backwards compatibility
575 //
576 // this is a bit ugly (e.g. results are actually affected by which allele
577 // is A1), so may want to switch the default to mode 3
578 geno_ptr = geno_buf;
579 do {
580 new_geno = *geno_ptr;
581 *geno_ptr++ = new_geno + ((~(new_geno | (new_geno >> 1))) & (*founder_male_include2++));
582 } while (geno_ptr < geno_end);
583 }
584 geno_ptr = geno_buf;
585 while (1) {
586 new_geno = *geno_ptr++;
587 sum += popcount2_long(new_geno);
588 new_geno = (new_geno ^ FIVEMASK) & FIVEMASK;
589 if (geno_ptr == geno_end) {
590 break;
591 }
592 ssq += popcount2_long(new_geno);
593 }
594 // have to be careful with trailing zeroes here
595 ssq += popcount2_long(new_geno << (BITCT - 2 * (1 + ((founder_ct - 1) % BITCT2))));
596 if (founder_ct % BITCT2) {
597 mask_buf[founder_ct / BITCT2] &= (ONELU << (2 * (founder_ct % BITCT2))) - ONELU;
598 }
599 if (is_x && weighted_x) {
600 // special case #2: double-count nonmales
601 geno_ptr = geno_buf;
602 sum -= founder_ct;
603 nonmale_geno = &(nonmale_geno[nonmale_offset]);
604 nonmale_masks = &(nonmale_masks[nonmale_offset]);
605 mask_buf_ptr = mask_buf;
606 nm_mask_ptr = nonmale_masks;
607 while (1) {
608 new_mask = ~((*founder_male_include2) * 3);
609 new_geno = ((*geno_ptr++) & new_mask) | (*founder_male_include2++);
610 *nonmale_geno++ = new_geno;
611 *nm_mask_ptr++ = new_mask & (*mask_buf_ptr++);
612 sum += popcount2_long(new_geno);
613 new_geno = (new_geno ^ FIVEMASK) & FIVEMASK;
614 if (geno_ptr == geno_end) {
615 break;
616 }
617 ssq += popcount2_long(new_geno);
618 }
619 ssq += popcount2_long(new_geno << (BITCT - 2 * (1 + ((founder_ct - 1) % BITCT2))));
620 missing_ct += founder_ct - (popcount_longs(nonmale_masks, founder_ctl2) / 2);
621 founder_ct *= 2;
622 } else if (!missing_ct) {
623 // save sum and (n^2)/variance, for faster processing of pairwise
624 // no-missing-calls case
625 llii = (int64_t)((uint64_t)ssq) * founder_ct - ((int64_t)sum) * sum;
626 if (!llii) {
627 return 0;
628 }
629 *missing_ct_ptr = 0;
630 *sum_ptr = (double)sum;
631 *variance_recip_ptr = 1.0 / ((double)llii);
632 return 1;
633 }
634 *missing_ct_ptr = missing_ct;
635 return (((int64_t)((uint64_t)ssq)) * (founder_ct - missing_ct) - ((int64_t)sum) * sum)? 1 : 0;
636 }
637
ld_prune_next_valid_chrom_start(uintptr_t * marker_exclude,uint32_t cur_uidx,Chrom_info * chrom_info_ptr,uint32_t chrom_code_end,uint32_t unfiltered_marker_ct)638 uint32_t ld_prune_next_valid_chrom_start(uintptr_t* marker_exclude, uint32_t cur_uidx, Chrom_info* chrom_info_ptr, uint32_t chrom_code_end, uint32_t unfiltered_marker_ct) {
639 uint32_t chrom_idx;
640 cur_uidx = next_unset(marker_exclude, cur_uidx, unfiltered_marker_ct);
641 while (cur_uidx < unfiltered_marker_ct) {
642 chrom_idx = get_variant_chrom(chrom_info_ptr, cur_uidx);
643 // --aec 0 support
644 if (chrom_idx && (chrom_idx < chrom_code_end)) {
645 return cur_uidx;
646 }
647 cur_uidx = next_unset(marker_exclude, get_chrom_end_vidx(chrom_info_ptr, chrom_idx), unfiltered_marker_ct);
648 }
649 return cur_uidx;
650 }
651
ld_prune_start_chrom(uint32_t ld_window_kb,uint32_t * cur_chrom_ptr,uint32_t * chrom_end_ptr,uint32_t window_unfiltered_start,uint32_t * live_indices,uint32_t * start_arr,uint32_t * window_unfiltered_end_ptr,uint32_t ld_window_size,uint32_t * cur_window_size_ptr,uintptr_t unfiltered_marker_ct,uintptr_t * marker_exclude,Chrom_info * chrom_info_ptr,uint32_t * marker_pos,uint32_t * is_haploid_ptr,uint32_t * is_x_ptr,uint32_t * is_y_ptr)652 void ld_prune_start_chrom(uint32_t ld_window_kb, uint32_t* cur_chrom_ptr, uint32_t* chrom_end_ptr, uint32_t window_unfiltered_start, uint32_t* live_indices, uint32_t* start_arr, uint32_t* window_unfiltered_end_ptr, uint32_t ld_window_size, uint32_t* cur_window_size_ptr, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, uint32_t* is_haploid_ptr, uint32_t* is_x_ptr, uint32_t* is_y_ptr) {
653 uint32_t cur_chrom = get_variant_chrom(chrom_info_ptr, window_unfiltered_start);
654 uint32_t window_unfiltered_end = window_unfiltered_start + 1;
655 uint32_t chrom_end = get_chrom_end_vidx(chrom_info_ptr, cur_chrom);
656 uint32_t uii = 0;
657 uint32_t window_size;
658 live_indices[0] = window_unfiltered_start;
659 next_unset_ck(marker_exclude, chrom_end, &window_unfiltered_end);
660 if (ld_window_kb) {
661 window_size = 1;
662 uii = window_unfiltered_end;
663 while ((uii < chrom_end) && (marker_pos[uii] <= marker_pos[window_unfiltered_start] + (1000 * ld_window_size))) {
664 window_size++;
665 uii++;
666 next_unset_ck(marker_exclude, chrom_end, &uii);
667 }
668 uii = 0;
669 } else {
670 window_size = ld_window_size;
671 }
672 for (uii = 1; uii < window_size; uii++) {
673 if (window_unfiltered_end == chrom_end) {
674 break;
675 }
676 start_arr[uii - 1] = window_unfiltered_end;
677 live_indices[uii] = window_unfiltered_end;
678 window_unfiltered_end++;
679 next_unset_ck(marker_exclude, chrom_end, &window_unfiltered_end);
680 }
681 *cur_window_size_ptr = uii;
682 start_arr[uii - 1] = window_unfiltered_end;
683 *cur_chrom_ptr = cur_chrom;
684 *chrom_end_ptr = chrom_end;
685 *window_unfiltered_end_ptr = window_unfiltered_end;
686 *is_haploid_ptr = IS_SET(chrom_info_ptr->haploid_mask, cur_chrom);
687 *is_x_ptr = (((int32_t)cur_chrom) == chrom_info_ptr->xymt_codes[X_OFFSET]);
688 *is_y_ptr = (((int32_t)cur_chrom) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
689 }
690
ld_prune_write(char * outname,char * outname_end,uintptr_t * marker_exclude,uintptr_t * pruned_arr,char * marker_ids,uintptr_t max_marker_id_len,Chrom_info * chrom_info_ptr,uint32_t chrom_code_end)691 int32_t ld_prune_write(char* outname, char* outname_end, uintptr_t* marker_exclude, uintptr_t* pruned_arr, char* marker_ids, uintptr_t max_marker_id_len, Chrom_info* chrom_info_ptr, uint32_t chrom_code_end) {
692 FILE* outfile = nullptr;
693 int32_t retval = 0;
694 {
695 fputs("Writing...", stdout);
696 fflush(stdout);
697 strcpy(outname_end, ".prune.in");
698 if (fopen_checked(outname, "w", &outfile)) {
699 goto ld_prune_write_ret_OPEN_FAIL;
700 }
701 for (uint32_t cur_chrom = 1; cur_chrom < chrom_code_end; cur_chrom++) {
702 if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom)) {
703 continue;
704 }
705 const uint32_t chrom_end = get_chrom_end_vidx(chrom_info_ptr, cur_chrom);
706 for (uint32_t marker_uidx = get_chrom_start_vidx(chrom_info_ptr, cur_chrom); marker_uidx < chrom_end; marker_uidx++) {
707 // pruned_arr initialized to marker_exclude
708 if (!IS_SET(pruned_arr, marker_uidx)) {
709 fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
710 putc_unlocked('\n', outfile);
711 }
712 }
713 }
714 if (fclose_null(&outfile)) {
715 goto ld_prune_write_ret_WRITE_FAIL;
716 }
717 strcpy(outname_end, ".prune.out");
718 if (fopen_checked(outname, "w", &outfile)) {
719 goto ld_prune_write_ret_OPEN_FAIL;
720 }
721 for (uint32_t cur_chrom = 1; cur_chrom < chrom_code_end; cur_chrom++) {
722 if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom)) {
723 continue;
724 }
725 const uint32_t chrom_end = get_chrom_end_vidx(chrom_info_ptr, cur_chrom);
726 for (uint32_t marker_uidx = get_chrom_start_vidx(chrom_info_ptr, cur_chrom); marker_uidx < chrom_end; marker_uidx++) {
727 if ((!IS_SET(marker_exclude, marker_uidx)) && IS_SET(pruned_arr, marker_uidx)) {
728 fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
729 putc_unlocked('\n', outfile);
730 }
731 }
732 }
733 if (fclose_null(&outfile)) {
734 goto ld_prune_write_ret_WRITE_FAIL;
735 }
736 *outname_end = '\0';
737 putc_unlocked('\r', stdout);
738 LOGPRINTFWW("Marker lists written to %s.prune.in and %s.prune.out .\n", outname, outname);
739 }
740 while (0) {
741 ld_prune_write_ret_OPEN_FAIL:
742 retval = RET_OPEN_FAIL;
743 break;
744 ld_prune_write_ret_WRITE_FAIL:
745 retval = RET_WRITE_FAIL;
746 break;
747 }
748 fclose_cond(outfile);
749 return retval;
750 }
751
ld_prune(Ld_info * ldip,FILE * bedfile,uintptr_t bed_offset,uintptr_t marker_ct,uintptr_t unfiltered_marker_ct,uintptr_t * marker_exclude,uintptr_t * marker_reverse,char * marker_ids,uintptr_t max_marker_id_len,Chrom_info * chrom_info_ptr,double * set_allele_freqs,uint32_t * marker_pos,uintptr_t unfiltered_sample_ct,uintptr_t * founder_info,uintptr_t * sex_male,char * outname,char * outname_end,uint32_t hh_exists)752 int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, Chrom_info* chrom_info_ptr, double* set_allele_freqs, uint32_t* marker_pos, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uintptr_t* sex_male, char* outname, char* outname_end, uint32_t hh_exists) {
753 // Results are slightly different from PLINK 1.07 when missing data is
754 // present, but that's due to a minor bug in 1.07 (sample per-marker
755 // variances don't exclude the missing markers).
756
757 // for future consideration: chromosome-based multithread/parallel?
758 unsigned char* bigstack_mark = g_bigstack_base;
759 uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
760 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
761 uintptr_t unfiltered_sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(unfiltered_sample_ct);
762 uintptr_t founder_ct = popcount_longs(founder_info, unfiltered_sample_ctv2 / 2);
763 uintptr_t founder_ctl = BITCT_TO_WORDCT(founder_ct);
764 #ifdef __LP64__
765 uintptr_t founder_ctv = BITCT_TO_ALIGNED_WORDCT(founder_ct);
766 #else
767 uintptr_t founder_ctv = founder_ctl;
768 #endif
769 uintptr_t founder_ct_mld = (founder_ct + MULTIPLEX_LD - 1) / MULTIPLEX_LD;
770 uint32_t founder_ct_mld_m1 = ((uint32_t)founder_ct_mld) - 1;
771 #ifdef __LP64__
772 uint32_t founder_ct_mld_rem = (MULTIPLEX_LD / 192) - (founder_ct_mld * MULTIPLEX_LD - founder_ct) / 192;
773 #else
774 uint32_t founder_ct_mld_rem = (MULTIPLEX_LD / 48) - (founder_ct_mld * MULTIPLEX_LD - founder_ct) / 48;
775 #endif
776 uintptr_t founder_ct_192_long = founder_ct_mld_m1 * (MULTIPLEX_LD / BITCT2) + founder_ct_mld_rem * (192 / BITCT2);
777 uintptr_t final_mask = get_final_mask(founder_ct);
778 uint32_t weighted_founder_ct = founder_ct;
779 uint32_t founder_trail_ct = founder_ct_192_long - founder_ctl * 2;
780 uint32_t pairwise = (ldip->modifier / LD_PRUNE_PAIRWISE) & 1;
781 uint32_t ignore_x = (ldip->modifier / LD_IGNORE_X) & 1;
782 uint32_t weighted_x = (ldip->modifier / LD_WEIGHTED_X) & 1;
783 uint32_t window_is_kb = (ldip->modifier / LD_PRUNE_KB_WINDOW) & 1;
784 uint32_t ld_window_size = ldip->prune_window_size;
785 uint32_t ld_window_incr = ldip->prune_window_incr;
786 double ld_last_param = ldip->prune_last_param;
787 uint32_t nonmale_founder_ct = 0;
788 uintptr_t window_max = 1;
789 uintptr_t* geno = nullptr;
790 uintptr_t* founder_include2 = nullptr;
791 uintptr_t* founder_male_include2 = nullptr;
792 uintptr_t* nonmale_geno = nullptr;
793 uintptr_t* nonmale_masks = nullptr;
794 double* cov_matrix = nullptr;
795 double* new_cov_matrix = nullptr;
796 MATRIX_INVERT_BUF1_TYPE* irow = nullptr;
797 double* work = nullptr;
798 uint32_t* idx_remap = nullptr;
799 uint32_t tot_exclude_ct = 0;
800 uint32_t at_least_one_prune = 0;
801 uint32_t chrom_code_end = chrom_info_ptr->max_code + 1 + chrom_info_ptr->name_ct;
802 int32_t retval = 0;
803 uintptr_t* geno_masks;
804 uintptr_t* geno_mmasks;
805 uintptr_t* pruned_arr;
806 uint32_t* live_indices;
807 uint32_t* start_arr;
808 uint32_t pct;
809 uint32_t pct_thresh;
810 uint32_t window_unfiltered_start;
811 uint32_t window_unfiltered_end;
812 uint32_t cur_window_size;
813 uint32_t old_window_size;
814 uint32_t uii;
815 uint32_t ujj;
816 uint32_t ukk;
817 int32_t ii;
818 uint32_t cur_chrom;
819 uint32_t chrom_start;
820 uint32_t chrom_end;
821 uint32_t is_haploid;
822 uint32_t is_x;
823 uint32_t is_y;
824 uintptr_t* loadbuf;
825 double* sums;
826 double* variance_recips; // entries are actually n^2 / variance
827 uint32_t* missing_cts;
828 uint32_t fixed_missing_ct;
829 uintptr_t ulii;
830 double dxx;
831 double dyy;
832 double cov12;
833 uint32_t fixed_non_missing_ct;
834 uint32_t non_missing_ct;
835 int32_t dp_result[5];
836 double non_missing_ctd;
837 uintptr_t* geno_fixed_vec_ptr;
838 uintptr_t* geno_var_vec_ptr;
839 uintptr_t* mask_fixed_vec_ptr;
840 uintptr_t* mask_var_vec_ptr;
841 uintptr_t cur_exclude_ct;
842 uint32_t prev_end;
843 __CLPK_integer window_rem_li;
844 uint32_t old_window_rem;
845 uint32_t window_rem;
846 uint32_t bsearch_min;
847 uint32_t bsearch_max;
848 uint32_t bsearch_cur;
849 double prune_ld_thresh;
850
851 if (founder_ct < 2) {
852 LOGERRPRINTF("Warning: Skipping --indep%s since there are less than two founders.\n(--make-founders may come in handy here.)\n", pairwise? "-pairwise" : "");
853 goto ld_prune_ret_1;
854 }
855 if (is_set(chrom_info_ptr->chrom_mask, 0)) {
856 ulii = count_chrom_markers(chrom_info_ptr, marker_exclude, 0);
857 if (chrom_info_ptr->zero_extra_chroms) {
858 for (uii = chrom_info_ptr->max_code + 1; uii < chrom_code_end; uii++) {
859 ulii += count_chrom_markers(chrom_info_ptr, marker_exclude, uii);
860 }
861 chrom_code_end = chrom_info_ptr->max_code + 1;
862 }
863 marker_ct -= ulii;
864 LOGPRINTF("--indep%s: Ignoring %" PRIuPTR " chromosome 0 variant%s.\n", pairwise? "-pairwise" : "", ulii, (ulii == 1)? "" : "s");
865 }
866 if (marker_ct < 2) {
867 LOGERRPRINTF("Error: Too few valid variants for --indep%s.\n", pairwise? "-pairwise" : "");
868 goto ld_prune_ret_INVALID_FORMAT;
869 }
870
871 // force founder_male_include2 allocation
872 if (alloc_collapsed_haploid_filters(founder_info, sex_male, unfiltered_sample_ct, founder_ct, XMHH_EXISTS | hh_exists, 1, &founder_include2, &founder_male_include2)) {
873 goto ld_prune_ret_NOMEM;
874 }
875 if (weighted_x) {
876 nonmale_founder_ct = founder_ct - popcount01_longs(founder_male_include2, founder_ctl);
877 if (founder_ct + nonmale_founder_ct > 0x7fffffff) {
878 // no, this shouldn't ever happen, but may as well document that there
879 // theoretically is a 32-bit integer range issue here
880 logerrprint("Error: Too many founders for --indep[-pairwise] + --ld-xchr 3.\n");
881 goto ld_prune_ret_1;
882 }
883 }
884
885 if (window_is_kb) {
886 // determine maximum number of markers that may need to be loaded at once
887 for (cur_chrom = 1; cur_chrom < chrom_code_end; cur_chrom++) {
888 if (is_set(chrom_info_ptr->chrom_mask, cur_chrom)) {
889 window_max = chrom_window_max(marker_pos, marker_exclude, chrom_info_ptr, cur_chrom, 0x7fffffff, ld_window_size * 1000, window_max);
890 }
891 }
892 }
893 if (pairwise) {
894 prune_ld_thresh = ld_last_param * (1 + SMALL_EPSILON);
895 } else {
896 #ifdef __LP64__
897 if (window_max > 46340) {
898 // todo: check what LAPACK's matrix inversion limit actually is. Guess
899 // sqrt(2^31 - 1) for now.
900 logerrprint("Error: --indep does not currently support window sizes > 46340.\n");
901 goto ld_prune_ret_INVALID_CMDLINE;
902 }
903 #endif
904 // r, not r2, in this case
905 prune_ld_thresh = 0.999999;
906 }
907
908 window_unfiltered_start = ld_prune_next_valid_chrom_start(marker_exclude, 0, chrom_info_ptr, chrom_code_end, unfiltered_marker_ct);
909
910 if (bigstack_alloc_ul(unfiltered_marker_ctl, &pruned_arr)) {
911 goto ld_prune_ret_NOMEM;
912 }
913
914 memcpy(pruned_arr, marker_exclude, unfiltered_marker_ctl * sizeof(intptr_t));
915
916 if (!window_is_kb) {
917 window_max = ld_window_size;
918 }
919 if (bigstack_alloc_ui(window_max, &live_indices) ||
920 bigstack_alloc_ui(window_max, &start_arr) ||
921 bigstack_alloc_ul(unfiltered_sample_ctv2, &loadbuf) ||
922 bigstack_alloc_ul(window_max * founder_ct_192_long, &geno) ||
923 bigstack_alloc_ul(window_max * founder_ct_192_long, &geno_masks) ||
924 bigstack_alloc_ul(window_max * founder_ctv, &geno_mmasks) ||
925 bigstack_alloc_ui(window_max, &missing_cts) ||
926 bigstack_alloc_d(window_max, &sums) ||
927 bigstack_alloc_d(window_max, &variance_recips)) {
928 goto ld_prune_ret_NOMEM;
929 }
930 if (weighted_x) {
931 if (bigstack_alloc_ul(window_max * founder_ct_192_long, &nonmale_geno) ||
932 bigstack_alloc_ul(window_max * founder_ct_192_long, &nonmale_masks)) {
933 goto ld_prune_ret_NOMEM;
934 }
935 }
936 for (ulii = 1; ulii <= window_max; ulii++) {
937 fill_ulong_zero(founder_trail_ct + 2, &(geno[ulii * founder_ct_192_long - founder_trail_ct - 2]));
938 fill_ulong_zero(founder_trail_ct + 2, &(geno_masks[ulii * founder_ct_192_long - founder_trail_ct - 2]));
939 if (weighted_x) {
940 fill_ulong_zero(founder_trail_ct + 2, &(nonmale_geno[ulii * founder_ct_192_long - founder_trail_ct - 2]));
941 fill_ulong_zero(founder_trail_ct + 2, &(nonmale_masks[ulii * founder_ct_192_long - founder_trail_ct - 2]));
942 }
943 }
944 if (!pairwise) {
945 if (bigstack_alloc_d(window_max * window_max, &cov_matrix) ||
946 bigstack_alloc_d(window_max * window_max, &new_cov_matrix) ||
947 bigstack_alloc_ui(window_max, &idx_remap)) {
948 goto ld_prune_ret_NOMEM;
949 }
950
951 irow = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(window_max * MATRIX_INVERT_BUF1_CHECKED_ALLOC);
952 if (!irow) {
953 goto ld_prune_ret_NOMEM;
954 }
955
956 if (window_max < 4) {
957 ulii = 4;
958 } else {
959 ulii = window_max;
960 }
961 if (bigstack_alloc_d(ulii * window_max, &work)) {
962 goto ld_prune_ret_NOMEM;
963 }
964 }
965 do {
966 prev_end = 0;
967 ld_prune_start_chrom(window_is_kb, &cur_chrom, &chrom_end, window_unfiltered_start, live_indices, start_arr, &window_unfiltered_end, ld_window_size, &cur_window_size, unfiltered_marker_ct, pruned_arr, chrom_info_ptr, marker_pos, &is_haploid, &is_x, &is_y);
968 if (weighted_x) {
969 if (is_x) {
970 weighted_founder_ct = 2 * founder_ct;
971 } else {
972 weighted_founder_ct = founder_ct;
973 }
974 }
975 old_window_size = 0;
976 cur_exclude_ct = 0;
977 if (cur_window_size > 1) {
978 for (ulii = 0; ulii < (uintptr_t)cur_window_size; ulii++) {
979 uii = live_indices[ulii];
980 if (fseeko(bedfile, bed_offset + (uii * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
981 goto ld_prune_ret_READ_FAIL;
982 }
983 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, uii), bedfile, loadbuf, &(geno[ulii * founder_ct_192_long]))) {
984 goto ld_prune_ret_READ_FAIL;
985 }
986 if (is_haploid && hh_exists) {
987 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)(&(geno[ulii * founder_ct_192_long])));
988 }
989 if (!ld_process_load(&(geno[ulii * founder_ct_192_long]), &(geno_masks[ulii * founder_ct_192_long]), &(geno_mmasks[ulii * founder_ctv]), &(missing_cts[ulii]), &(sums[ulii]), &(variance_recips[ulii]), founder_ct, is_x && (!ignore_x), weighted_x, nonmale_founder_ct, founder_male_include2, nonmale_geno, nonmale_masks, ulii * founder_ct_192_long)) {
990 SET_BIT(uii, pruned_arr);
991 cur_exclude_ct++;
992 }
993 }
994 }
995 pct = 1;
996 chrom_start = get_chrom_start_vidx(chrom_info_ptr, cur_chrom);
997 pct_thresh = window_unfiltered_start + ((uint64_t)pct * (chrom_end - chrom_start)) / 100;
998 while ((window_unfiltered_start < chrom_end) || (cur_window_size > 1)) {
999 if (cur_window_size > 1) {
1000 do {
1001 at_least_one_prune = 0;
1002 for (uii = 0; uii < cur_window_size - 1; uii++) {
1003 if (IS_SET(pruned_arr, live_indices[uii])) {
1004 continue;
1005 }
1006 fixed_missing_ct = missing_cts[uii];
1007 fixed_non_missing_ct = weighted_founder_ct - fixed_missing_ct;
1008 geno_fixed_vec_ptr = &(geno[uii * founder_ct_192_long]);
1009 mask_fixed_vec_ptr = &(geno_masks[uii * founder_ct_192_long]);
1010 ujj = uii + 1;
1011 while (live_indices[ujj] < start_arr[uii]) {
1012 if (++ujj == cur_window_size) {
1013 break;
1014 }
1015 }
1016 for (; ujj < cur_window_size; ujj++) {
1017 if (IS_SET(pruned_arr, live_indices[ujj])) {
1018 continue;
1019 }
1020 geno_var_vec_ptr = &(geno[ujj * founder_ct_192_long]);
1021 if ((!fixed_missing_ct) && (!missing_cts[ujj]) && ((!is_x) || (!weighted_x))) {
1022 cov12 = (double)(ld_dot_prod_nm(geno_fixed_vec_ptr, geno_var_vec_ptr, weighted_founder_ct, founder_ct_mld_m1, founder_ct_mld_rem) * ((int64_t)founder_ct)) - sums[uii] * sums[ujj];
1023 dxx = variance_recips[uii] * variance_recips[ujj];
1024 } else {
1025 mask_var_vec_ptr = &(geno_masks[ujj * founder_ct_192_long]);
1026 dp_result[0] = weighted_founder_ct;
1027 // reversed from what I initially thought because I'm passing
1028 // the ujj-associated buffers before the uii-associated ones.
1029 dp_result[1] = -((int32_t)fixed_non_missing_ct);
1030 dp_result[2] = missing_cts[ujj] - weighted_founder_ct;
1031 dp_result[3] = dp_result[1];
1032 dp_result[4] = dp_result[2];
1033 ld_dot_prod(geno_var_vec_ptr, geno_fixed_vec_ptr, mask_var_vec_ptr, mask_fixed_vec_ptr, dp_result, founder_ct_mld_m1, founder_ct_mld_rem);
1034 if (is_x && weighted_x) {
1035 non_missing_ct = (popcount_longs_intersect(&(nonmale_masks[uii * founder_ct_192_long]), &(nonmale_masks[ujj * founder_ct_192_long]), 2 * founder_ctl) + popcount_longs_intersect(mask_fixed_vec_ptr, mask_var_vec_ptr, 2 * founder_ctl)) / 2;
1036 ld_dot_prod(&(nonmale_geno[ujj * founder_ct_192_long]), &(nonmale_geno[uii * founder_ct_192_long]), &(nonmale_masks[ujj * founder_ct_192_long]), &(nonmale_masks[uii * founder_ct_192_long]), dp_result, founder_ct_mld_m1, founder_ct_mld_rem);
1037 } else {
1038 non_missing_ct = fixed_non_missing_ct - missing_cts[ujj];
1039 if (fixed_missing_ct && missing_cts[ujj]) {
1040 non_missing_ct += popcount_longs_intersect(&(geno_mmasks[uii * founder_ctv]), &(geno_mmasks[ujj * founder_ctv]), founder_ctl);
1041 }
1042 }
1043 non_missing_ctd = (double)((int32_t)non_missing_ct);
1044 dxx = dp_result[1];
1045 dyy = dp_result[2];
1046 cov12 = dp_result[0] * non_missing_ctd - dxx * dyy;
1047 dxx = 1.0 / ((dp_result[3] * non_missing_ctd + dxx * dxx) * (dp_result[4] * non_missing_ctd + dyy * dyy));
1048 }
1049 if (!pairwise) {
1050 dxx = cov12 * sqrt(dxx);
1051 if (dxx != dxx) {
1052 // force prune if 0/0 for now
1053 dxx = 1.0;
1054 }
1055 cov_matrix[uii * window_max + ujj] = dxx;
1056 } else {
1057 dxx = cov12 * cov12 * dxx;
1058 }
1059 if (dxx > prune_ld_thresh) {
1060 at_least_one_prune = 1;
1061 cur_exclude_ct++;
1062 // remove marker with lower MAF
1063 // could cache MAFs of all current-window variants, but
1064 // get_maf() is too cheap for this to make a noticeable
1065 // difference
1066 if (get_maf(set_allele_freqs[live_indices[uii]]) < (1 - SMALL_EPSILON) * get_maf(set_allele_freqs[live_indices[ujj]])) {
1067 /*
1068 if (debug_print) {
1069 printf("removed %u, kept %u, MAFs %g/%g, r2 %g\n", live_indices[uii], live_indices[ujj], get_maf(set_allele_freqs[live_indices[uii]]), get_maf(set_allele_freqs[live_indices[ujj]]), dxx);
1070 }
1071 */
1072 SET_BIT(live_indices[uii], pruned_arr);
1073 } else {
1074 /*
1075 if (debug_print) {
1076 printf("removed %u, kept %u, MAFs %g/%g, r2 %g\n", live_indices[ujj], live_indices[uii], get_maf(set_allele_freqs[live_indices[ujj]]), get_maf(set_allele_freqs[live_indices[uii]]), dxx);
1077 }
1078 */
1079 SET_BIT(live_indices[ujj], pruned_arr);
1080 ujj++;
1081 while (ujj < cur_window_size) {
1082 if (!IS_SET(pruned_arr, live_indices[ujj])) {
1083 break;
1084 }
1085 ujj++;
1086 }
1087 if (ujj < cur_window_size) {
1088 start_arr[uii] = live_indices[ujj];
1089 }
1090 }
1091 break;
1092 }
1093 }
1094 if (ujj == cur_window_size) {
1095 start_arr[uii] = window_unfiltered_end;
1096 }
1097 }
1098 } while (at_least_one_prune);
1099 if (!pairwise) {
1100 window_rem = 0;
1101 for (uii = 0; uii < old_window_size; uii++) {
1102 if (IS_SET(pruned_arr, live_indices[uii])) {
1103 continue;
1104 }
1105 idx_remap[window_rem++] = uii;
1106 }
1107 old_window_rem = window_rem;
1108 for (; uii < cur_window_size; uii++) {
1109 if (IS_SET(pruned_arr, live_indices[uii])) {
1110 continue;
1111 }
1112 idx_remap[window_rem++] = uii;
1113 }
1114 while (window_rem > 1) {
1115 new_cov_matrix[0] = 1.0;
1116 for (uii = 1; uii < window_rem; uii++) {
1117 ukk = idx_remap[uii];
1118 for (ujj = 0; ujj < uii; ujj++) {
1119 dxx = cov_matrix[idx_remap[ujj] * window_max + ukk];
1120 new_cov_matrix[ujj * window_rem + uii] = dxx;
1121 new_cov_matrix[uii * window_rem + ujj] = dxx;
1122 }
1123 new_cov_matrix[uii * (window_rem + 1)] = 1.0;
1124 }
1125 window_rem_li = window_rem;
1126 ii = invert_matrix_checked(window_rem_li, new_cov_matrix, irow, work);
1127 while (ii) {
1128 #ifdef NOLAPACK
1129 if (ii == -1) {
1130 goto ld_prune_ret_NOMEM;
1131 }
1132 #endif
1133 // 1. binary search for minimum number of bottom right rows/
1134 // columns that must be trimmed to get a nonsingular matrix
1135 bsearch_max = window_rem - 1;
1136 if (old_window_rem > bsearch_max) {
1137 // Normally we can assume that only loci not in the previous
1138 // window need to be considered here. But, thanks to numeric
1139 // instability, we might still need to properly handle an
1140 // apparently-singular old submatrix?
1141 old_window_size = 0;
1142 old_window_rem = 0;
1143 }
1144 bsearch_min = old_window_rem;
1145 while (bsearch_min < bsearch_max) {
1146 bsearch_cur = (bsearch_min + bsearch_max) / 2;
1147 new_cov_matrix[0] = 1.0;
1148 for (uii = 1; uii < bsearch_cur; uii++) {
1149 ukk = idx_remap[uii];
1150 for (ujj = 0; ujj < uii; ujj++) {
1151 dxx = cov_matrix[idx_remap[ujj] * window_max + ukk];
1152 new_cov_matrix[ujj * bsearch_cur + uii] = dxx;
1153 new_cov_matrix[uii * bsearch_cur + ujj] = dxx;
1154 }
1155 new_cov_matrix[uii * (bsearch_cur + 1)] = 1.0;
1156 }
1157 if (bsearch_cur) {
1158 window_rem_li = bsearch_cur;
1159 ii = invert_matrix_checked(window_rem_li, new_cov_matrix, irow, work);
1160 if (!ii) {
1161 bsearch_min = bsearch_cur + 1;
1162 } else {
1163 bsearch_max = bsearch_cur;
1164 }
1165 } else {
1166 bsearch_min = 1;
1167 }
1168 }
1169
1170 // 2. the last trimmed row/column must be part of some linear
1171 // combination. prune *just* that, and retry.
1172 ujj = bsearch_min;
1173 // bug reported by Kaustubh was a violation of this:
1174 // assert(!IS_SET(pruned_arr, live_indices[idx_remap[ujj]]));
1175 SET_BIT(live_indices[idx_remap[ujj]], pruned_arr);
1176 cur_exclude_ct++;
1177 window_rem--;
1178 for (uii = ujj; uii < window_rem; uii++) {
1179 idx_remap[uii] = idx_remap[uii + 1];
1180 }
1181 new_cov_matrix[0] = 1.0;
1182 for (uii = 1; uii < window_rem; uii++) {
1183 ukk = idx_remap[uii];
1184 for (ujj = 0; ujj < uii; ujj++) {
1185 dxx = cov_matrix[idx_remap[ujj] * window_max + ukk];
1186 new_cov_matrix[ujj * window_rem + uii] = dxx;
1187 new_cov_matrix[uii * window_rem + ujj] = dxx;
1188 }
1189 new_cov_matrix[uii * (window_rem + 1)] = 1.0;
1190 }
1191 window_rem_li = window_rem;
1192 ii = invert_matrix_checked(window_rem_li, new_cov_matrix, irow, work);
1193 }
1194 dxx = new_cov_matrix[0];
1195 ujj = 0;
1196 for (uii = 1; uii < window_rem; uii++) {
1197 if (new_cov_matrix[uii * (window_rem + 1)] > dxx) {
1198 dxx = new_cov_matrix[uii * (window_rem + 1)];
1199 ujj = uii;
1200 }
1201 }
1202 if (dxx > ld_last_param) {
1203 SET_BIT(live_indices[idx_remap[ujj]], pruned_arr);
1204 cur_exclude_ct++;
1205 window_rem--;
1206 if (idx_remap[ujj] < (uint32_t)old_window_size) {
1207 old_window_rem--;
1208 }
1209 for (uii = ujj; uii < window_rem; uii++) {
1210 idx_remap[uii] = idx_remap[uii + 1];
1211 }
1212 } else {
1213 // break out
1214 window_rem = 1;
1215 }
1216 }
1217 }
1218 }
1219 for (uii = 0; uii < ld_window_incr; uii++) {
1220 if (window_unfiltered_start == chrom_end) {
1221 break;
1222 }
1223 window_unfiltered_start++;
1224 next_unset_ck(marker_exclude, chrom_end, &window_unfiltered_start);
1225 }
1226 if (window_unfiltered_start == chrom_end) {
1227 break;
1228 }
1229 if (window_unfiltered_start >= pct_thresh) {
1230 pct = ((window_unfiltered_start - chrom_start) * 100LLU) / (chrom_end - chrom_start);
1231 printf("\r%u%%", pct++);
1232 fflush(stdout);
1233 pct_thresh = chrom_start + (((uint64_t)pct * (chrom_end - chrom_start)) / 100);
1234 }
1235 ujj = 0;
1236
1237 if (window_unfiltered_end < window_unfiltered_start) {
1238 window_unfiltered_end = window_unfiltered_start;
1239 }
1240
1241 // copy back previously loaded/computed results
1242 while (live_indices[ujj] < window_unfiltered_start) {
1243 ujj++;
1244 if (ujj == cur_window_size) {
1245 break;
1246 }
1247 }
1248 for (uii = 0; ujj < cur_window_size; ujj++) {
1249 if (IS_SET(pruned_arr, live_indices[ujj])) {
1250 continue;
1251 }
1252 memcpy(&(geno[uii * founder_ct_192_long]), &(geno[ujj * founder_ct_192_long]), founder_ct_192_long * sizeof(intptr_t));
1253 memcpy(&(geno_masks[uii * founder_ct_192_long]), &(geno_masks[ujj * founder_ct_192_long]), founder_ct_192_long * sizeof(intptr_t));
1254 if (is_x && weighted_x) {
1255 memcpy(&(nonmale_geno[uii * founder_ct_192_long]), &(nonmale_geno[ujj * founder_ct_192_long]), founder_ct_192_long * sizeof(intptr_t));
1256 memcpy(&(nonmale_masks[uii * founder_ct_192_long]), &(nonmale_masks[ujj * founder_ct_192_long]), founder_ct_192_long * sizeof(intptr_t));
1257 }
1258 memcpy(&(geno_mmasks[uii * founder_ctv]), &(geno_mmasks[ujj * founder_ctv]), founder_ctl * sizeof(intptr_t));
1259 live_indices[uii] = live_indices[ujj];
1260 start_arr[uii] = start_arr[ujj];
1261 missing_cts[uii] = missing_cts[ujj];
1262 sums[uii] = sums[ujj];
1263 variance_recips[uii] = variance_recips[ujj];
1264 if (!pairwise) {
1265 for (ukk = 0; ukk < uii; ukk++) {
1266 cov_matrix[ukk * window_max + uii] = cov_matrix[idx_remap[ukk] * window_max + ujj];
1267 }
1268 idx_remap[uii] = ujj;
1269 }
1270 uii++;
1271 }
1272
1273 prev_end = uii;
1274 cur_window_size = uii;
1275 if (window_is_kb) {
1276 ujj = 0;
1277 ukk = window_unfiltered_end;
1278 while ((ukk < chrom_end) && (marker_pos[ukk] <= marker_pos[window_unfiltered_start] + (1000 * ld_window_size))) {
1279 ujj++;
1280 ukk++;
1281 next_unset_ck(marker_exclude, chrom_end, &ukk);
1282 }
1283 } else {
1284 ujj = ld_window_incr;
1285 }
1286 old_window_size = cur_window_size;
1287 for (uii = 0; uii < ujj; uii++) {
1288 if (window_unfiltered_end == chrom_end) {
1289 break;
1290 }
1291 live_indices[cur_window_size] = window_unfiltered_end;
1292 if (cur_window_size > prev_end) {
1293 start_arr[cur_window_size - 1] = window_unfiltered_end;
1294 }
1295 if (fseeko(bedfile, bed_offset + (window_unfiltered_end * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
1296 goto ld_prune_ret_READ_FAIL;
1297 }
1298 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, window_unfiltered_end), bedfile, loadbuf, &(geno[cur_window_size * founder_ct_192_long]))) {
1299 goto ld_prune_ret_READ_FAIL;
1300 }
1301 if (is_haploid && hh_exists) {
1302 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)(&(geno[cur_window_size * founder_ct_192_long])));
1303 }
1304 if (!ld_process_load(&(geno[cur_window_size * founder_ct_192_long]), &(geno_masks[cur_window_size * founder_ct_192_long]), &(geno_mmasks[cur_window_size * founder_ctv]), &(missing_cts[cur_window_size]), &(sums[cur_window_size]), &(variance_recips[cur_window_size]), founder_ct, is_x && (!ignore_x), weighted_x, nonmale_founder_ct, founder_male_include2, nonmale_geno, nonmale_masks, cur_window_size * founder_ct_192_long)) {
1305 SET_BIT(window_unfiltered_end, pruned_arr);
1306 cur_exclude_ct++;
1307 }
1308 cur_window_size++;
1309 window_unfiltered_end++;
1310 next_unset_ck(marker_exclude, chrom_end, &window_unfiltered_end);
1311 }
1312 if (cur_window_size > prev_end) {
1313 start_arr[cur_window_size - 1] = window_unfiltered_end;
1314 }
1315 }
1316 putc_unlocked('\r', stdout);
1317 LOGPRINTF("Pruned %" PRIuPTR " variant%s from chromosome %u, leaving %" PRIuPTR ".\n", cur_exclude_ct, (cur_exclude_ct == 1)? "" : "s", cur_chrom, chrom_end - chrom_start - popcount_bit_idx(marker_exclude, chrom_start, chrom_end) - cur_exclude_ct);
1318 tot_exclude_ct += cur_exclude_ct;
1319
1320 // advance chromosomes as necessary
1321 window_unfiltered_start = ld_prune_next_valid_chrom_start(pruned_arr, window_unfiltered_start, chrom_info_ptr, chrom_code_end, unfiltered_marker_ct);
1322 } while (window_unfiltered_start < unfiltered_marker_ct);
1323
1324 LOGPRINTF("Pruning complete. %u of %" PRIuPTR " variants removed.\n", tot_exclude_ct, marker_ct);
1325 retval = ld_prune_write(outname, outname_end, marker_exclude, pruned_arr, marker_ids, max_marker_id_len, chrom_info_ptr, chrom_code_end);
1326 if (retval) {
1327 goto ld_prune_ret_1;
1328 }
1329
1330 while (0) {
1331 ld_prune_ret_NOMEM:
1332 retval = RET_NOMEM;
1333 break;
1334 ld_prune_ret_READ_FAIL:
1335 retval = RET_READ_FAIL;
1336 break;
1337 ld_prune_ret_INVALID_FORMAT:
1338 retval = RET_INVALID_FORMAT;
1339 break;
1340 #ifdef __LP64__
1341 ld_prune_ret_INVALID_CMDLINE:
1342 retval = RET_INVALID_CMDLINE;
1343 break;
1344 #endif
1345 }
1346 ld_prune_ret_1:
1347 bigstack_reset(bigstack_mark);
1348 return retval;
1349 }
1350
ld_process_load2(uintptr_t * geno_buf,uintptr_t * mask_buf,uint32_t * missing_ct_ptr,uint32_t founder_ct,uint32_t is_x,uintptr_t * founder_male_include2)1351 void ld_process_load2(uintptr_t* geno_buf, uintptr_t* mask_buf, uint32_t* missing_ct_ptr, uint32_t founder_ct, uint32_t is_x, uintptr_t* founder_male_include2) {
1352 // ld_process_load(), except no missing_buf[] to conserve memory (and no
1353 // --ld-xchr 3 support yet), and no zero-variance check (we just want to
1354 // dump nans in that case)
1355 uintptr_t* geno_ptr = geno_buf;
1356 uintptr_t founder_ctl2 = QUATERCT_TO_WORDCT(founder_ct);
1357 uintptr_t* geno_end = &(geno_buf[founder_ctl2]);
1358 uintptr_t* mask_buf_ptr = mask_buf;
1359 uintptr_t cur_geno;
1360 uintptr_t shifted_masked_geno;
1361 uintptr_t new_geno;
1362 uintptr_t new_mask;
1363 do {
1364 cur_geno = *geno_ptr;
1365 shifted_masked_geno = (cur_geno >> 1) & FIVEMASK;
1366 new_geno = cur_geno - shifted_masked_geno;
1367 *geno_ptr++ = new_geno;
1368 new_mask = (((~cur_geno) & FIVEMASK) | shifted_masked_geno) * 3;
1369 *mask_buf_ptr++ = new_mask;
1370 } while (geno_ptr < geno_end);
1371 if (is_x) {
1372 geno_ptr = geno_buf;
1373 do {
1374 new_geno = *geno_ptr;
1375 *geno_ptr++ = new_geno + ((~(new_geno | (new_geno >> 1))) & (*founder_male_include2++));
1376 } while (geno_ptr < geno_end);
1377 }
1378 if (founder_ct % BITCT2) {
1379 mask_buf[founder_ct / BITCT2] &= (ONELU << (2 * (founder_ct % BITCT2))) - ONELU;
1380 }
1381 *missing_ct_ptr = founder_ct - (popcount_longs(mask_buf, founder_ctl2) / 2);
1382 }
1383
ld_missing_ct_intersect(uintptr_t * lptr1,uintptr_t * lptr2,uintptr_t word12_ct,uintptr_t word12_rem,uintptr_t lshift_last)1384 uint32_t ld_missing_ct_intersect(uintptr_t* lptr1, uintptr_t* lptr2, uintptr_t word12_ct, uintptr_t word12_rem, uintptr_t lshift_last) {
1385 // variant of popcount_longs_intersect()
1386 uintptr_t tot = 0;
1387 uintptr_t* lptr1_end2;
1388 #ifdef __LP64__
1389 const __m128i m1 = {FIVEMASK, FIVEMASK};
1390 const __m128i m2 = {0x3333333333333333LLU, 0x3333333333333333LLU};
1391 const __m128i m4 = {0x0f0f0f0f0f0f0f0fLLU, 0x0f0f0f0f0f0f0f0fLLU};
1392 const __m128i m8 = {0x00ff00ff00ff00ffLLU, 0x00ff00ff00ff00ffLLU};
1393 __m128i* vptr1 = (__m128i*)lptr1;
1394 __m128i* vptr2 = (__m128i*)lptr2;
1395 __m128i* vend1;
1396 __m128i loader1;
1397 __m128i loader2;
1398 __univec acc;
1399
1400 while (word12_ct >= 10) {
1401 word12_ct -= 10;
1402 vend1 = &(vptr1[60]);
1403 ld_missing_ct_intersect_main_loop:
1404 acc.vi = _mm_setzero_si128();
1405 do {
1406 loader1 = _mm_andnot_si128(_mm_or_si128(*vptr2++, *vptr1++), m1);
1407 loader2 = _mm_andnot_si128(_mm_or_si128(*vptr2++, *vptr1++), m1);
1408 loader1 = _mm_add_epi64(loader1, _mm_andnot_si128(_mm_or_si128(*vptr2++, *vptr1++), m1));
1409 loader2 = _mm_add_epi64(loader2, _mm_andnot_si128(_mm_or_si128(*vptr2++, *vptr1++), m1));
1410 loader1 = _mm_add_epi64(loader1, _mm_andnot_si128(_mm_or_si128(*vptr2++, *vptr1++), m1));
1411 loader2 = _mm_add_epi64(loader2, _mm_andnot_si128(_mm_or_si128(*vptr2++, *vptr1++), m1));
1412 loader1 = _mm_add_epi64(_mm_and_si128(loader1, m2), _mm_and_si128(_mm_srli_epi64(loader1, 2), m2));
1413 loader1 = _mm_add_epi64(loader1, _mm_add_epi64(_mm_and_si128(loader2, m2), _mm_and_si128(_mm_srli_epi64(loader2, 2), m2)));
1414 acc.vi = _mm_add_epi64(acc.vi, _mm_add_epi64(_mm_and_si128(loader1, m4), _mm_and_si128(_mm_srli_epi64(loader1, 4), m4)));
1415 } while (vptr1 < vend1);
1416 acc.vi = _mm_add_epi64(_mm_and_si128(acc.vi, m8), _mm_and_si128(_mm_srli_epi64(acc.vi, 8), m8));
1417 tot += ((acc.u8[0] + acc.u8[1]) * 0x1000100010001LLU) >> 48;
1418 }
1419 if (word12_ct) {
1420 vend1 = &(vptr1[word12_ct * 6]);
1421 word12_ct = 0;
1422 goto ld_missing_ct_intersect_main_loop;
1423 }
1424 lptr1 = (uintptr_t*)vptr1;
1425 lptr2 = (uintptr_t*)vptr2;
1426 #else
1427 uintptr_t* lptr1_end = &(lptr1[word12_ct * 12]);
1428 uintptr_t tmp_stor;
1429 uintptr_t loader1;
1430 uintptr_t loader2;
1431 while (lptr1 < lptr1_end) {
1432 loader1 = (~((*lptr1++) | (*lptr2++))) & FIVEMASK;
1433 loader2 = (~((*lptr1++) | (*lptr2++))) & FIVEMASK;
1434 loader1 += (~((*lptr1++) | (*lptr2++))) & FIVEMASK;
1435 loader2 += (~((*lptr1++) | (*lptr2++))) & FIVEMASK;
1436 loader1 += (~((*lptr1++) | (*lptr2++))) & FIVEMASK;
1437 loader2 += (~((*lptr1++) | (*lptr2++))) & FIVEMASK;
1438 loader1 = (loader1 & 0x33333333) + ((loader1 >> 2) & 0x33333333);
1439 loader1 += (loader2 & 0x33333333) + ((loader2 >> 2) & 0x33333333);
1440 tmp_stor = (loader1 & 0x0f0f0f0f) + ((loader1 >> 4) & 0x0f0f0f0f);
1441
1442 loader1 = (~((*lptr1++) | (*lptr2++))) & FIVEMASK;
1443 loader2 = (~((*lptr1++) | (*lptr2++))) & FIVEMASK;
1444 loader1 += (~((*lptr1++) | (*lptr2++))) & FIVEMASK;
1445 loader2 += (~((*lptr1++) | (*lptr2++))) & FIVEMASK;
1446 loader1 += (~((*lptr1++) | (*lptr2++))) & FIVEMASK;
1447 loader2 += (~((*lptr1++) | (*lptr2++))) & FIVEMASK;
1448 loader1 = (loader1 & 0x33333333) + ((loader1 >> 2) & 0x33333333);
1449 loader1 += (loader2 & 0x33333333) + ((loader2 >> 2) & 0x33333333);
1450 tmp_stor += (loader1 & 0x0f0f0f0f) + ((loader1 >> 4) & 0x0f0f0f0f);
1451 tot += (tmp_stor * 0x01010101) >> 24;
1452 }
1453 #endif
1454 lptr1_end2 = &(lptr1[word12_rem]);
1455 while (lptr1 < lptr1_end2) {
1456 tot += popcount2_long((~((*lptr1++) | (*lptr2++))) & FIVEMASK);
1457 }
1458 if (lshift_last) {
1459 tot += popcount2_long(((~((*lptr1) | (*lptr2))) & FIVEMASK) << lshift_last);
1460 }
1461 return tot;
1462 }
1463
flipscan(Ld_info * ldip,FILE * bedfile,uintptr_t bed_offset,uintptr_t marker_ct,uintptr_t unfiltered_marker_ct,uintptr_t * marker_exclude,uintptr_t * marker_reverse,char * marker_ids,uintptr_t max_marker_id_len,uint32_t plink_maxsnp,char ** marker_allele_ptrs,uintptr_t max_marker_allele_len,Chrom_info * chrom_info_ptr,double * set_allele_freqs,uint32_t * marker_pos,uintptr_t unfiltered_sample_ct,uintptr_t * pheno_nm,uintptr_t * pheno_c,uintptr_t * founder_info,uintptr_t * sex_male,char * outname,char * outname_end,uint32_t hh_exists)1464 int32_t flipscan(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, Chrom_info* chrom_info_ptr, double* set_allele_freqs, uint32_t* marker_pos, uintptr_t unfiltered_sample_ct, uintptr_t* pheno_nm, uintptr_t* pheno_c, uintptr_t* founder_info, uintptr_t* sex_male, char* outname, char* outname_end, uint32_t hh_exists) {
1465 unsigned char* bigstack_mark = g_bigstack_base;
1466 FILE* outfile = nullptr;
1467 FILE* outfile_verbose = nullptr;
1468 uintptr_t* sample_include2 = nullptr;
1469 uintptr_t* sample_male_include2 = nullptr;
1470 double min_corr = ldip->flipscan_thresh * (1 - SMALL_EPSILON);
1471 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
1472 uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
1473 uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
1474 uintptr_t marker_idx = 0;
1475 uintptr_t max_window_size = 1;
1476 uintptr_t pct = 1;
1477 uintptr_t pct_thresh = marker_ct / 100;
1478 uint32_t verbose = (ldip->modifier / LD_FLIPSCAN_VERBOSE) & 1;
1479 uint32_t ignore_x = (ldip->modifier / LD_IGNORE_X) & 1;
1480 uint32_t max_window_locus_ct = ldip->flipscan_window_size - 1;
1481 uint32_t window_bp = ldip->flipscan_window_bp;
1482 uint32_t problem_ct = 0;
1483 int32_t retval = 0;
1484 uintptr_t* founder_phenos[2];
1485 uintptr_t* pheno_male_include2[2];
1486 uintptr_t* window_geno[2];
1487 uintptr_t* window_mask[2];
1488 uintptr_t pheno_ct[2];
1489 uintptr_t pheno_ct_192_long[2];
1490 uint32_t pheno_ctl[2];
1491 uint32_t pheno_ct_mld_m1[2];
1492 uint32_t pheno_ct_mld_rem[2];
1493 int32_t dp_result[5];
1494 double* r_matrix;
1495 double* r_matrix_ptr;
1496 double* r_row_ptr;
1497 uintptr_t* loadbuf_raw;
1498 uintptr_t* window_geno_ptr;
1499 uintptr_t* window_mask_ptr;
1500 uintptr_t* geno_fixed_vec_ptr;
1501 uintptr_t* mask_fixed_vec_ptr;
1502 uintptr_t* geno_var_vec_ptr;
1503 uintptr_t* mask_var_vec_ptr;
1504 uint32_t* window_uidxs;
1505 uint32_t* window_cidx_starts;
1506 uint32_t* neg_uidx_buf;
1507 uint32_t* missing_cts;
1508 uint32_t* missing_cts_ptr;
1509 char* textbuf;
1510 char* wptr;
1511 char* wptr_start;
1512 char* wptr_start2;
1513 double pos_r_tot;
1514 double neg_r_tot;
1515 double ctrl_pheno;
1516 double case_pheno;
1517 double non_missing_ctd;
1518 double cov12;
1519 double dxx;
1520 double dyy;
1521 uintptr_t marker_uidx;
1522 uintptr_t cur_pheno_ct;
1523 uintptr_t window_cidx;
1524 uintptr_t window_cidx2;
1525 uintptr_t window_cidx3;
1526 uintptr_t marker_uidx2;
1527 uintptr_t marker_uidx3;
1528 uintptr_t cur_192_long;
1529 uintptr_t cur_ctwd12;
1530 uintptr_t cur_ctwd12_rem;
1531 uintptr_t lshift_last;
1532 uintptr_t ulii;
1533 uintptr_t uljj;
1534 uint32_t chrom_fo_idx;
1535 uint32_t chrom_idx;
1536 uint32_t chrom_end;
1537 uint32_t chrom_marker_ct;
1538 uint32_t chrom_marker_idx;
1539 uint32_t is_haploid;
1540 uint32_t is_x;
1541 uint32_t is_y;
1542 uint32_t is_case;
1543 uint32_t marker_pos_thresh;
1544 uint32_t pos_r_ct;
1545 uint32_t neg_r_ct;
1546 uint32_t fixed_missing_ct;
1547 uint32_t fixed_non_missing_ct;
1548 uint32_t non_missing_ct;
1549 uint32_t cur_mld_m1;
1550 uint32_t cur_mld_rem;
1551 uint32_t uii;
1552 ulii = 2 * (max_marker_allele_len + plink_maxsnp) + 256;
1553 if (ulii <= MAXLINELEN) {
1554 textbuf = g_textbuf;
1555 } else {
1556 if (bigstack_alloc_c(ulii, &textbuf)) {
1557 goto flipscan_ret_NOMEM;
1558 }
1559 }
1560 if (bigstack_alloc_ul(unfiltered_sample_ctl, &(founder_phenos[0])) ||
1561 bigstack_alloc_ul(unfiltered_sample_ctl, &(founder_phenos[1])) ||
1562 bigstack_alloc_ul(unfiltered_sample_ctl2, &loadbuf_raw)) {
1563 goto flipscan_ret_NOMEM;
1564 }
1565 loadbuf_raw[unfiltered_sample_ctl2 - 1] = 0;
1566 memcpy(founder_phenos[0], founder_info, unfiltered_sample_ctl * sizeof(intptr_t));
1567 bitvec_and(pheno_nm, unfiltered_sample_ctl, founder_phenos[0]);
1568 if (alloc_raw_haploid_filters(unfiltered_sample_ct, hh_exists, 0, founder_phenos[0], sex_male, &sample_include2, &sample_male_include2)) {
1569 goto flipscan_ret_NOMEM;
1570 }
1571 memcpy(founder_phenos[1], founder_phenos[0], unfiltered_sample_ctl * sizeof(intptr_t));
1572 bitvec_and(pheno_c, unfiltered_sample_ctl, founder_phenos[1]);
1573 bitvec_andnot(pheno_c, unfiltered_sample_ctl, founder_phenos[0]);
1574 pheno_ct[0] = popcount_longs(founder_phenos[0], unfiltered_sample_ctl);
1575 pheno_ct[1] = popcount_longs(founder_phenos[1], unfiltered_sample_ctl);
1576 if ((!pheno_ct[0]) || (!pheno_ct[1])) {
1577 if (popcount_longs(founder_info, unfiltered_sample_ctl)) {
1578 logerrprint("Error: --flip-scan requires at least one case and one control, and only\nconsiders founders.\n");
1579 } else {
1580 logerrprint("Error: --flip-scan requires founders. (--make-founders may come in handy\nhere.)\n");
1581 }
1582 goto flipscan_ret_INVALID_CMDLINE;
1583 }
1584 for (is_case = 0; is_case < 2; is_case++) {
1585 pheno_ctl[is_case] = BITCT_TO_WORDCT(pheno_ct[is_case]);
1586
1587 // ulii == total number of blocks, all but last is size MULTIPLEX_LD
1588 ulii = (pheno_ct[is_case] + MULTIPLEX_LD - 1) / MULTIPLEX_LD;
1589 pheno_ct_mld_m1[is_case] = ulii - 1;
1590
1591 // number of size-{48,192} sub-blocks in trailing block
1592 #ifdef __LP64__
1593 pheno_ct_mld_rem[is_case] = (MULTIPLEX_LD / 192) - (ulii * MULTIPLEX_LD - pheno_ct[is_case]) / 192;
1594 #else
1595 pheno_ct_mld_rem[is_case] = (MULTIPLEX_LD / 48) - (ulii * MULTIPLEX_LD - pheno_ct[is_case]) / 48;
1596 #endif
1597
1598 // number of genotype words per variant, rounded up to the next 192-sample
1599 // boundary
1600 pheno_ct_192_long[is_case] = pheno_ct_mld_m1[is_case] * (MULTIPLEX_LD / BITCT2) + pheno_ct_mld_rem[is_case] * (192 / BITCT2);
1601 }
1602 for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
1603 max_window_size = chrom_window_max(marker_pos, marker_exclude, chrom_info_ptr, chrom_info_ptr->chrom_file_order[chrom_fo_idx], max_window_locus_ct * 2 + 1, window_bp * 2, max_window_size);
1604 }
1605 if (bigstack_alloc_ui(max_window_size, &window_uidxs) ||
1606 bigstack_alloc_ui(max_window_size, &window_cidx_starts) ||
1607 bigstack_alloc_ui(max_window_size, &neg_uidx_buf) ||
1608 bigstack_alloc_ul(pheno_ctl[0] * 2, &(pheno_male_include2[0])) ||
1609 bigstack_alloc_ul(pheno_ctl[1] * 2, &(pheno_male_include2[1])) ||
1610 bigstack_alloc_ui(max_window_size * 2, &missing_cts) ||
1611 bigstack_alloc_ul(max_window_size * pheno_ct_192_long[0], &(window_geno[0])) ||
1612 bigstack_alloc_ul(max_window_size * pheno_ct_192_long[0], &(window_mask[0])) ||
1613 bigstack_alloc_ul(max_window_size * pheno_ct_192_long[1], &(window_geno[1])) ||
1614 bigstack_alloc_ul(max_window_size * pheno_ct_192_long[1], &(window_mask[1])) ||
1615 // not advantageous to choose a very large block size here, so O(n^2)
1616 // memory is fine (though it can be avoided by calculating each
1617 // correlation twice).
1618 bigstack_alloc_d(max_window_size * max_window_size * 2, &r_matrix)) {
1619 goto flipscan_ret_NOMEM;
1620 }
1621 ulii = (max_window_size + 1) * 2;
1622 for (uljj = 0; uljj < max_window_size; uljj++) {
1623 neg_uidx_buf[uljj * ulii] = 0.0;
1624 neg_uidx_buf[uljj * ulii + 1] = 0.0;
1625 // bugfix: initialize r_matrix diagonal
1626 r_matrix[uljj * ulii] = 0.0;
1627 r_matrix[uljj * ulii + 1] = 0.0;
1628 }
1629 for (is_case = 0; is_case < 2; is_case++) {
1630 quaterarr_collapse_init(sex_male, unfiltered_sample_ct, founder_phenos[is_case], pheno_ct[is_case], pheno_male_include2[is_case]);
1631 window_geno_ptr = window_geno[is_case];
1632 window_mask_ptr = window_mask[is_case];
1633 cur_192_long = pheno_ct_192_long[is_case];
1634 ulii = 2 + pheno_ct_192_long[is_case] - pheno_ctl[is_case] * 2;
1635 for (uljj = 1; uljj <= max_window_size; uljj++) {
1636 fill_ulong_zero(ulii, &(window_geno_ptr[uljj * cur_192_long - ulii]));
1637 fill_ulong_zero(ulii, &(window_mask_ptr[uljj * cur_192_long - ulii]));
1638 }
1639 }
1640
1641 memcpy(outname_end, ".flipscan", 10);
1642 if (fopen_checked(outname, "w", &outfile)) {
1643 goto flipscan_ret_OPEN_FAIL;
1644 }
1645 wptr = memcpya(textbuf, " CHR ", 7);
1646 wptr = fw_strcpyn(plink_maxsnp, 3, "SNP", wptr);
1647 wptr = strcpya(wptr, " BP A1 A2 F POS R_POS NEG R_NEG NEGSNPS\n");
1648 if (fwrite_checked(textbuf, wptr - textbuf, outfile)) {
1649 goto flipscan_ret_WRITE_FAIL;
1650 }
1651 if (verbose) {
1652 memcpy(&(outname_end[9]), ".verbose", 9);
1653 if (fopen_checked(outname, "w", &outfile_verbose)) {
1654 goto flipscan_ret_OPEN_FAIL;
1655 }
1656 outname_end[9] = '\0';
1657 // er, this is a misalignment disaster
1658 wptr = memcpya(textbuf, "CHR_INDX ", 9);
1659 wptr = fw_strcpyn(plink_maxsnp, 8, "SNP_INDX", wptr);
1660 wptr = memcpya(wptr, " BP_INDX A1_INDX ", 22);
1661 wptr = fw_strcpyn(plink_maxsnp, 8, "SNP_PAIR", wptr);
1662 wptr = strcpya(wptr, " BP_PAIR A1_PAIR R_A R_U\n");
1663 if (fwrite_checked(textbuf, wptr - textbuf, outfile_verbose)) {
1664 goto flipscan_ret_WRITE_FAIL;
1665 }
1666 }
1667 printf("--flip-scan%s: 0%%", verbose? " verbose" : "");
1668 fflush(stdout);
1669 for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
1670 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
1671 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
1672 marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
1673 chrom_marker_ct = chrom_end - marker_uidx - popcount_bit_idx(marker_exclude, marker_uidx, chrom_end);
1674 if (chrom_marker_ct < 2) {
1675 marker_idx += chrom_marker_ct;
1676 continue;
1677 }
1678 wptr_start = width_force(6, textbuf, chrom_name_write(chrom_info_ptr, chrom_idx, textbuf));
1679 *wptr_start++ = ' ';
1680 is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
1681 is_x = (chrom_idx == ((uint32_t)chrom_info_ptr->xymt_codes[X_OFFSET]));
1682 is_y = (chrom_idx == ((uint32_t)chrom_info_ptr->xymt_codes[Y_OFFSET]));
1683 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
1684 goto flipscan_ret_READ_FAIL;
1685 }
1686 chrom_marker_idx = 0;
1687 window_cidx = max_window_size - 1;
1688 window_cidx2 = 0;
1689 do {
1690 if (++window_cidx == max_window_size) {
1691 window_cidx = 0;
1692 }
1693 window_uidxs[window_cidx] = marker_uidx;
1694
1695 // circular index of beginning of window starting at current marker
1696 window_cidx_starts[window_cidx] = window_cidx2;
1697 if (load_raw(unfiltered_sample_ct4, bedfile, loadbuf_raw)) {
1698 goto flipscan_ret_READ_FAIL;
1699 }
1700 if (IS_SET(marker_reverse, marker_uidx)) {
1701 reverse_loadbuf(unfiltered_sample_ct, (unsigned char*)loadbuf_raw);
1702 }
1703 if (is_haploid && hh_exists) {
1704 haploid_fix(hh_exists, sample_include2, sample_male_include2, unfiltered_sample_ct, is_x, is_y, (unsigned char*)loadbuf_raw);
1705 }
1706 for (is_case = 0; is_case < 2; is_case++) {
1707 // similar to ld_block_thread() below
1708 cur_pheno_ct = pheno_ct[is_case];
1709 uii = cur_pheno_ct / BITCT2;
1710 cur_ctwd12 = uii / 12;
1711 cur_ctwd12_rem = uii - (12 * cur_ctwd12);
1712 lshift_last = 2 * ((0x7fffffc0 - cur_pheno_ct) % BITCT2);
1713 cur_mld_m1 = pheno_ct_mld_m1[is_case];
1714 cur_mld_rem = pheno_ct_mld_rem[is_case];
1715 cur_192_long = pheno_ct_192_long[is_case];
1716 window_geno_ptr = window_geno[is_case];
1717 window_mask_ptr = window_mask[is_case];
1718 missing_cts_ptr = &(missing_cts[is_case * max_window_size]);
1719 r_matrix_ptr = &(r_matrix[is_case]);
1720 geno_fixed_vec_ptr = &(window_geno_ptr[window_cidx * cur_192_long]);
1721 mask_fixed_vec_ptr = &(window_mask_ptr[window_cidx * cur_192_long]);
1722 copy_quaterarr_nonempty_subset(loadbuf_raw, founder_phenos[is_case], unfiltered_sample_ct, cur_pheno_ct, geno_fixed_vec_ptr);
1723 ld_process_load2(geno_fixed_vec_ptr, mask_fixed_vec_ptr, &fixed_missing_ct, cur_pheno_ct, is_x && (!ignore_x), pheno_male_include2[is_case]);
1724 fixed_non_missing_ct = cur_pheno_ct - fixed_missing_ct;
1725 missing_cts_ptr[window_cidx] = fixed_missing_ct;
1726 window_cidx3 = window_cidx2;
1727 while (window_cidx3 != window_cidx) {
1728 geno_var_vec_ptr = &(window_geno_ptr[window_cidx3 * cur_192_long]);
1729 mask_var_vec_ptr = &(window_mask_ptr[window_cidx3 * cur_192_long]);
1730 non_missing_ct = fixed_non_missing_ct - missing_cts_ptr[window_cidx3];
1731 if (fixed_missing_ct && missing_cts_ptr[window_cidx3]) {
1732 non_missing_ct += ld_missing_ct_intersect(mask_var_vec_ptr, mask_fixed_vec_ptr, cur_ctwd12, cur_ctwd12_rem, lshift_last);
1733 }
1734 if (non_missing_ct) {
1735 dp_result[0] = cur_pheno_ct;
1736 dp_result[1] = -((int32_t)fixed_non_missing_ct);
1737 dp_result[2] = missing_cts_ptr[window_cidx3] - cur_pheno_ct;
1738 dp_result[3] = dp_result[1];
1739 dp_result[4] = dp_result[2];
1740 ld_dot_prod(geno_var_vec_ptr, geno_fixed_vec_ptr, mask_var_vec_ptr, mask_fixed_vec_ptr, dp_result, cur_mld_m1, cur_mld_rem);
1741 non_missing_ctd = (double)((int32_t)non_missing_ct);
1742 dxx = dp_result[1];
1743 dyy = dp_result[2];
1744 cov12 = dp_result[0] * non_missing_ctd - dxx * dyy;
1745 dxx = (dp_result[3] * non_missing_ctd + dxx * dxx) * (dp_result[4] * non_missing_ctd + dyy * dyy);
1746 dxx = cov12 / sqrt(dxx);
1747 } else {
1748 dxx = 0.0;
1749 }
1750 r_matrix_ptr[2 * (window_cidx3 * max_window_size + window_cidx)] = dxx;
1751 r_matrix_ptr[2 * (window_cidx * max_window_size + window_cidx3)] = dxx;
1752 if (++window_cidx3 == max_window_size) {
1753 window_cidx3 = 0;
1754 }
1755 }
1756 }
1757
1758 if (++chrom_marker_idx < chrom_marker_ct) {
1759 marker_uidx++;
1760 if (IS_SET(marker_exclude, marker_uidx)) {
1761 marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx);
1762 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
1763 goto flipscan_ret_READ_FAIL;
1764 }
1765 }
1766 marker_pos_thresh = marker_pos[marker_uidx];
1767 if (marker_pos_thresh < window_bp) {
1768 marker_pos_thresh = 0;
1769 } else {
1770 marker_pos_thresh -= window_bp;
1771 }
1772 } else {
1773 // close out the chromosome
1774 marker_pos_thresh = 0x80000000U;
1775 }
1776 // only need to enforce window locus count constraint during first loop
1777 // iteration
1778 ulii = window_cidx2 + max_window_locus_ct;
1779 if (ulii >= max_window_size) {
1780 ulii -= max_window_size;
1781 }
1782 marker_uidx2 = window_uidxs[window_cidx2];
1783 if ((ulii == window_cidx) || (marker_pos[marker_uidx2] < marker_pos_thresh)) {
1784 do {
1785 pos_r_tot = 0.0;
1786 neg_r_tot = 0.0;
1787 pos_r_ct = 0;
1788 neg_r_ct = 0;
1789 r_row_ptr = &(r_matrix[2 * max_window_size * window_cidx2]);
1790 window_cidx3 = window_cidx_starts[window_cidx2];
1791 while (1) {
1792 ctrl_pheno = r_row_ptr[2 * window_cidx3];
1793 case_pheno = r_row_ptr[2 * window_cidx3 + 1];
1794 if ((fabs(ctrl_pheno) >= min_corr) || (fabs(case_pheno) >= min_corr)) {
1795 dxx = fabs(ctrl_pheno) + fabs(case_pheno);
1796 if (case_pheno * ctrl_pheno >= 0.0) {
1797 pos_r_ct++;
1798 pos_r_tot += dxx;
1799 } else {
1800 neg_uidx_buf[neg_r_ct++] = window_uidxs[window_cidx3];
1801 neg_r_tot += dxx;
1802 }
1803 }
1804 if (window_cidx3 == window_cidx) {
1805 break;
1806 }
1807 if (++window_cidx3 == max_window_size) {
1808 window_cidx3 = 0;
1809 }
1810 }
1811 wptr_start2 = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx2 * max_marker_id_len]), wptr_start);
1812 wptr_start2 = memseta(wptr_start2, 32, 3);
1813 wptr_start2 = uint32toa_w10x(marker_pos[marker_uidx2], ' ', wptr_start2);
1814 wptr_start2 = fw_strcpy(4, marker_allele_ptrs[2 * marker_uidx2], wptr_start2);
1815 *wptr_start2++ = ' ';
1816 wptr = fw_strcpy(4, marker_allele_ptrs[2 * marker_uidx2 + 1], wptr_start2);
1817 *wptr++ = ' ';
1818 wptr = dtoa_g_wxp3x(1.0 - set_allele_freqs[marker_uidx2], 8, ' ', wptr);
1819 wptr = uint32toa_w6x(pos_r_ct, ' ', wptr);
1820 if (!pos_r_ct) {
1821 wptr = memcpya(wptr, " NA", 8);
1822 } else {
1823 wptr = dtoa_g_wxp3(pos_r_tot / ((int32_t)(pos_r_ct * 2)), 8, wptr);
1824 }
1825 *wptr++ = ' ';
1826 wptr = uint32toa_w6x(neg_r_ct, ' ', wptr);
1827 if (!neg_r_ct) {
1828 wptr = memcpya(wptr, " NA", 8);
1829 } else {
1830 wptr = dtoa_g_wxp3(neg_r_tot / ((int32_t)(neg_r_ct * 2)), 8, wptr);
1831 }
1832 *wptr++ = ' ';
1833 if (fwrite_checked(textbuf, wptr - textbuf, outfile)) {
1834 goto flipscan_ret_WRITE_FAIL;
1835 }
1836 if (neg_r_ct) {
1837 for (ulii = 0; ulii < neg_r_ct; ulii++) {
1838 if (ulii) {
1839 putc_unlocked('|', outfile);
1840 }
1841 fputs(&(marker_ids[neg_uidx_buf[ulii] * max_marker_id_len]), outfile);
1842 }
1843 problem_ct++;
1844 if (verbose) {
1845 window_cidx3 = window_cidx_starts[window_cidx2];
1846 while (1) {
1847 ctrl_pheno = r_row_ptr[2 * window_cidx3];
1848 case_pheno = r_row_ptr[2 * window_cidx3 + 1];
1849 if ((fabs(ctrl_pheno) >= min_corr) || (fabs(case_pheno) >= min_corr)) {
1850 marker_uidx3 = window_uidxs[window_cidx3];
1851 wptr = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx3 * max_marker_id_len]), wptr_start2);
1852 wptr = memseta(wptr, 32, 3);
1853 wptr = uint32toa_w10x(marker_pos[marker_uidx3], ' ', wptr);
1854 wptr = fw_strcpy(4, marker_allele_ptrs[2 * marker_uidx3], wptr);
1855 *wptr++ = ' ';
1856 wptr = dtoa_g_wxp3x(case_pheno, 8, ' ', wptr);
1857 wptr = dtoa_g_wxp3x(ctrl_pheno, 8, '\n', wptr);
1858 if (fwrite_checked(textbuf, wptr - textbuf, outfile_verbose)) {
1859 goto flipscan_ret_WRITE_FAIL;
1860 }
1861 }
1862 if (window_cidx3 == window_cidx) {
1863 break;
1864 }
1865 if (++window_cidx3 == max_window_size) {
1866 window_cidx3 = 0;
1867 }
1868 }
1869 }
1870 }
1871 putc_unlocked('\n', outfile);
1872 if (++marker_idx >= pct_thresh) {
1873 if (pct > 10) {
1874 putc_unlocked('\b', stdout);
1875 }
1876 pct = (marker_idx * 100LLU) / marker_ct;
1877 if (pct < 100) {
1878 printf("\b\b%" PRIuPTR "%%", pct);
1879 fflush(stdout);
1880 pct_thresh = ((++pct) * ((uint64_t)marker_ct)) / 100;
1881 }
1882 }
1883 // better to perform this comparison first
1884 if (window_cidx2 == window_cidx) {
1885 if (++window_cidx2 == max_window_size) {
1886 window_cidx2 = 0;
1887 }
1888 break;
1889 }
1890 if (++window_cidx2 == max_window_size) {
1891 window_cidx2 = 0;
1892 }
1893 marker_uidx2 = window_uidxs[window_cidx2];
1894 } while (marker_pos[marker_uidx2] < marker_pos_thresh);
1895 }
1896 } while (chrom_marker_idx < chrom_marker_ct);
1897 }
1898 if (fclose_null(&outfile)) {
1899 goto flipscan_ret_WRITE_FAIL;
1900 }
1901 if (verbose) {
1902 if (fclose_null(&outfile_verbose)) {
1903 goto flipscan_ret_WRITE_FAIL;
1904 }
1905 }
1906 putc_unlocked('\r', stdout);
1907 // not actually possible to have exactly one problem variant, heh
1908 LOGPRINTF("--flip-scan%s: %u variants with at least one negative LD match.\n", verbose? " verbose" : "", problem_ct);
1909 if (verbose) {
1910 LOGPRINTFWW("Report written to %s ; neg-match details written to %s.verbose .\n", outname, outname);
1911 } else {
1912 LOGPRINTFWW("Report written to %s .\n", outname);
1913 }
1914 while (0) {
1915 flipscan_ret_NOMEM:
1916 retval = RET_NOMEM;
1917 break;
1918 flipscan_ret_OPEN_FAIL:
1919 retval = RET_OPEN_FAIL;
1920 break;
1921 flipscan_ret_READ_FAIL:
1922 retval = RET_READ_FAIL;
1923 break;
1924 flipscan_ret_WRITE_FAIL:
1925 retval = RET_WRITE_FAIL;
1926 break;
1927 flipscan_ret_INVALID_CMDLINE:
1928 retval = RET_INVALID_CMDLINE;
1929 break;
1930 }
1931 bigstack_reset(bigstack_mark);
1932 fclose_cond(outfile);
1933 fclose_cond(outfile_verbose);
1934 return retval;
1935 }
1936
1937 // LD multithread globals
1938 static uintptr_t* g_ld_geno1;
1939 static uintptr_t* g_ld_geno2;
1940 static uintptr_t* g_ld_geno_masks1;
1941 static uintptr_t* g_ld_geno_masks2;
1942 static uint32_t* g_ld_missing_cts1;
1943 static uint32_t* g_ld_missing_cts2;
1944 static uint32_t* g_ld_interval1;
1945 static double* g_ld_results;
1946 static float* g_ld_results_f;
1947 static double* g_ld_set_allele_freqs;
1948 static uintptr_t g_ld_idx1_block_size;
1949 static uintptr_t g_ld_idx2_block_size;
1950 static uintptr_t g_ld_idx2_block_start;
1951 static uintptr_t g_ld_block_idx1;
1952 static uintptr_t g_ld_marker_ct;
1953 static uintptr_t g_ld_marker_ctm8;
1954 static uintptr_t g_ld_founder_ct;
1955 static uintptr_t g_ld_founder_ct_192_long;
1956 static uint32_t g_ld_founder_ct_mld_m1;
1957 static uint32_t g_ld_founder_ct_mld_rem;
1958 static uint32_t g_ld_is_r2;
1959 static uint32_t g_ld_thread_ct;
1960
1961 // with '--r2 dprime', males should be downweighted by a factor of 2 when
1962 // considering two X chromosome variants, and by a factor of sqrt(2) when doing
1963 // an inter-chromosome evaluation involving a single Xchr variant. (The
1964 // sqrt(2) factor is not implemented by PLINK 1.07, but the math compels its
1965 // use.)
1966 static uintptr_t* g_ld_sex_male;
1967 static uintptr_t* g_ld_thread_wkspace;
1968 static uint32_t g_ld_xstart1;
1969 static uint32_t g_ld_xend1;
1970 static uint32_t g_ld_xstart2;
1971 static uint32_t g_ld_xend2;
1972
1973 static char g_ld_delimiter;
1974 static uint32_t g_ld_plink_maxsnp;
1975 static char* g_ld_marker_ids;
1976 static Chrom_info* g_ld_chrom_info_ptr;
1977 static uint32_t* g_ld_marker_pos;
1978 static double* g_ld_marker_cms;
1979 static uintptr_t* g_ld_marker_exclude_idx1;
1980 static uintptr_t* g_ld_marker_exclude;
1981 static char** g_ld_marker_allele_ptrs;
1982 static uintptr_t g_ld_max_marker_id_len;
1983 static uintptr_t g_ld_marker_uidx1;
1984 static uintptr_t g_ld_uidx2_start;
1985 static uintptr_t g_ld_marker_uidx2;
1986 static uintptr_t g_ld_block_idx2;
1987 static double g_ld_window_r2;
1988 static uint32_t g_ld_is_first_block;
1989 static uint32_t g_ld_is_inter_chr;
1990 static uint32_t g_ld_prefix_len;
1991 static uint32_t g_ld_keep_sign;
1992 static uint32_t g_ld_modifier;
1993
ld_block_thread(void * arg)1994 THREAD_RET_TYPE ld_block_thread(void* arg) {
1995 uintptr_t tidx = (uintptr_t)arg;
1996 uint32_t thread_ct = g_ld_thread_ct;
1997 uintptr_t block_idx1_start = (tidx * g_ld_idx1_block_size) / thread_ct;
1998 uintptr_t block_idx1_end = ((tidx + 1) * g_ld_idx1_block_size) / thread_ct;
1999 uintptr_t marker_idx2_maxw = g_ld_marker_ctm8;
2000 uintptr_t founder_ct = g_ld_founder_ct;
2001 uintptr_t founder_ctwd = founder_ct / BITCT2;
2002 uintptr_t founder_ctwd12 = founder_ctwd / 12;
2003 uintptr_t founder_ctwd12_rem = founder_ctwd - (12 * founder_ctwd12);
2004 uintptr_t lshift_last = 2 * ((0x7fffffc0 - founder_ct) % BITCT2);
2005 uintptr_t founder_ct_192_long = g_ld_founder_ct_192_long;
2006 uintptr_t* geno1 = g_ld_geno1;
2007 uintptr_t* geno_masks1 = g_ld_geno_masks1;
2008 uint32_t* missing_cts1 = g_ld_missing_cts1;
2009 uint32_t* ld_interval1 = g_ld_interval1;
2010 uint32_t founder_ct_mld_m1 = g_ld_founder_ct_mld_m1;
2011 uint32_t founder_ct_mld_rem = g_ld_founder_ct_mld_rem;
2012 uint32_t is_r2 = g_ld_is_r2;
2013 uint32_t keep_sign = g_ld_keep_sign;
2014 double* results = g_ld_results;
2015 float* results_f = g_ld_results_f;
2016 double* rptr = nullptr;
2017 float* rptr_f = nullptr;
2018 int32_t dp_result[5];
2019 uintptr_t* geno_fixed_vec_ptr;
2020 uintptr_t* geno_var_vec_ptr;
2021 uintptr_t* mask_fixed_vec_ptr;
2022 uintptr_t* mask_var_vec_ptr;
2023 uintptr_t* geno2;
2024 uintptr_t* geno_masks2;
2025 uint32_t* missing_cts2;
2026 uintptr_t idx2_block_size;
2027 uintptr_t idx2_block_start;
2028 uintptr_t block_idx1;
2029 uintptr_t block_idx2;
2030 uintptr_t cur_block_idx2_end;
2031 double non_missing_ctd;
2032 double cov12;
2033 double dxx;
2034 double dyy;
2035 float non_missing_ctf;
2036 float cov12_f;
2037 float fxx;
2038 float fyy;
2039 uint32_t fixed_missing_ct;
2040 uint32_t fixed_non_missing_ct;
2041 uint32_t non_missing_ct;
2042 while (1) {
2043 idx2_block_size = g_ld_idx2_block_size;
2044 idx2_block_start = g_ld_idx2_block_start;
2045 geno2 = g_ld_geno2;
2046 geno_masks2 = g_ld_geno_masks2;
2047 missing_cts2 = g_ld_missing_cts2;
2048 for (block_idx1 = block_idx1_start; block_idx1 < block_idx1_end; block_idx1++) {
2049 fixed_non_missing_ct = ld_interval1[block_idx1 * 2]; // temporary redefine
2050 block_idx2 = fixed_non_missing_ct;
2051 cur_block_idx2_end = ld_interval1[block_idx1 * 2 + 1];
2052 if (block_idx2 < idx2_block_start) {
2053 if (cur_block_idx2_end <= idx2_block_start) {
2054 continue;
2055 }
2056 block_idx2 = 0;
2057 } else {
2058 block_idx2 -= idx2_block_start;
2059 if (block_idx2 >= idx2_block_size) {
2060 // nondecreasing, so we can safely exit
2061 break;
2062 }
2063 }
2064 cur_block_idx2_end -= idx2_block_start;
2065 if (cur_block_idx2_end > idx2_block_size) {
2066 cur_block_idx2_end = idx2_block_size;
2067 }
2068 if (results) {
2069 rptr = &(results[block_idx1 * marker_idx2_maxw + block_idx2 + idx2_block_start - fixed_non_missing_ct]);
2070 } else {
2071 rptr_f = &(results_f[block_idx1 * marker_idx2_maxw + block_idx2 + idx2_block_start - fixed_non_missing_ct]);
2072 }
2073 fixed_missing_ct = missing_cts1[block_idx1];
2074 fixed_non_missing_ct = founder_ct - fixed_missing_ct;
2075 geno_fixed_vec_ptr = &(geno1[block_idx1 * founder_ct_192_long]);
2076 mask_fixed_vec_ptr = &(geno_masks1[block_idx1 * founder_ct_192_long]);
2077 for (; block_idx2 < cur_block_idx2_end; block_idx2++) {
2078 geno_var_vec_ptr = &(geno2[block_idx2 * founder_ct_192_long]);
2079 mask_var_vec_ptr = &(geno_masks2[block_idx2 * founder_ct_192_long]);
2080 non_missing_ct = fixed_non_missing_ct - missing_cts2[block_idx2];
2081 if (fixed_missing_ct && missing_cts2[block_idx2]) {
2082 non_missing_ct += ld_missing_ct_intersect(mask_var_vec_ptr, mask_fixed_vec_ptr, founder_ctwd12, founder_ctwd12_rem, lshift_last);
2083 }
2084 dp_result[0] = founder_ct;
2085 dp_result[1] = -fixed_non_missing_ct;
2086 dp_result[2] = missing_cts2[block_idx2] - founder_ct;
2087 dp_result[3] = dp_result[1];
2088 dp_result[4] = dp_result[2];
2089 ld_dot_prod(geno_var_vec_ptr, geno_fixed_vec_ptr, mask_var_vec_ptr, mask_fixed_vec_ptr, dp_result, founder_ct_mld_m1, founder_ct_mld_rem);
2090 if (results) {
2091 non_missing_ctd = (double)((int32_t)non_missing_ct);
2092 dxx = dp_result[1];
2093 dyy = dp_result[2];
2094 cov12 = dp_result[0] * non_missing_ctd - dxx * dyy;
2095 dxx = (dp_result[3] * non_missing_ctd + dxx * dxx) * (dp_result[4] * non_missing_ctd + dyy * dyy);
2096 if (!is_r2) {
2097 dxx = cov12 / sqrt(dxx);
2098 } else if (!keep_sign) {
2099 dxx = (cov12 * cov12) / dxx;
2100 } else {
2101 dxx = (fabs(cov12) * cov12) / dxx;
2102 }
2103 *rptr++ = dxx;
2104 } else {
2105 non_missing_ctf = (float)((int32_t)non_missing_ct);
2106 fxx = dp_result[1];
2107 fyy = dp_result[2];
2108 cov12_f = dp_result[0] * non_missing_ctf - fxx * fyy;
2109 fxx = (dp_result[3] * non_missing_ctf + fxx * fxx) * (dp_result[4] * non_missing_ctf + fyy * fyy);
2110 if (!is_r2) {
2111 fxx = cov12_f / sqrt(fxx);
2112 } else if (!keep_sign) {
2113 fxx = (cov12_f * cov12_f) / fxx;
2114 } else {
2115 fxx = (fabs(cov12_f) * cov12_f) / fxx;
2116 }
2117 *rptr_f++ = fxx;
2118 }
2119 }
2120 }
2121 if ((!tidx) || g_is_last_thread_block) {
2122 THREAD_RETURN;
2123 }
2124 THREAD_BLOCK_FINISH(tidx);
2125 }
2126 }
2127
ld_matrix_emitn(uint32_t overflow_ct,unsigned char * readbuf)2128 uint32_t ld_matrix_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
2129 char* sptr_cur = (char*)(&(readbuf[overflow_ct]));
2130 char* readbuf_end = (char*)(&(readbuf[PIGZ_BLOCK_SIZE]));
2131 uintptr_t block_size1 = g_ld_idx1_block_size;
2132 uintptr_t marker_ct = g_ld_marker_ct;
2133 uintptr_t marker_ctm8 = g_ld_marker_ctm8;
2134 uintptr_t block_idx1 = g_ld_block_idx1;
2135 uintptr_t marker_idx = g_ld_idx2_block_start;
2136 uintptr_t marker_idx_end = g_ld_idx2_block_size;
2137 uint32_t is_square = ((g_ld_modifier & LD_MATRIX_SHAPEMASK) == LD_MATRIX_SQ);
2138 uint32_t is_square0 = ((g_ld_modifier & LD_MATRIX_SHAPEMASK) == LD_MATRIX_SQ0);
2139 char delimiter = g_ld_delimiter;
2140 double* results = g_ld_results;
2141 double* dptr;
2142 uintptr_t ulii;
2143 while (block_idx1 < block_size1) {
2144 dptr = &(results[block_idx1 * marker_ctm8 + marker_idx]);
2145 while (marker_idx < marker_idx_end) {
2146 sptr_cur = dtoa_gx(*dptr++, delimiter, sptr_cur);
2147 marker_idx++;
2148 if (sptr_cur > readbuf_end) {
2149 goto ld_matrix_emitn_ret;
2150 }
2151 }
2152 if (is_square0 && (marker_idx < marker_ct)) {
2153 ulii = (((uintptr_t)(readbuf_end - sptr_cur)) + 1) / 2;
2154 // bugfix: can't be <= since tab delimiter wouldn't be handled correctly
2155 // on subsequent pass
2156 if (ulii < marker_ct - marker_idx) {
2157 sptr_cur = memcpya(sptr_cur, g_textbuf, ulii * 2);
2158 marker_idx += ulii;
2159 goto ld_matrix_emitn_ret;
2160 } else {
2161 sptr_cur = memcpya(sptr_cur, g_textbuf, (marker_ct - marker_idx) * 2);
2162 marker_idx = marker_ct;
2163 }
2164 }
2165 if (delimiter == '\t') {
2166 sptr_cur--;
2167 }
2168 *sptr_cur++ = '\n';
2169 marker_idx = 0;
2170 if (!is_square) {
2171 marker_idx_end++;
2172 }
2173 block_idx1++;
2174 }
2175 ld_matrix_emitn_ret:
2176 g_ld_block_idx1 = block_idx1;
2177 g_ld_idx2_block_start = marker_idx;
2178 g_ld_idx2_block_size = marker_idx_end;
2179 return (uintptr_t)(((unsigned char*)sptr_cur) - readbuf);
2180 }
2181
ld_report_matrix(pthread_t * threads,Ld_info * ldip,FILE * bedfile,uintptr_t bed_offset,uintptr_t unfiltered_marker_ct,uintptr_t * marker_exclude,uintptr_t * marker_reverse,uintptr_t unfiltered_sample_ct,uintptr_t * founder_info,uint32_t parallel_idx,uint32_t parallel_tot,uintptr_t * sex_male,uintptr_t * founder_include2,uintptr_t * founder_male_include2,uintptr_t * loadbuf,char * outname,uint32_t hh_exists)2182 int32_t ld_report_matrix(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uint32_t parallel_idx, uint32_t parallel_tot, uintptr_t* sex_male, uintptr_t* founder_include2, uintptr_t* founder_male_include2, uintptr_t* loadbuf, char* outname, uint32_t hh_exists) {
2183 FILE* outfile = nullptr;
2184 uint32_t ld_modifier = ldip->modifier;
2185 uint32_t is_binary = ld_modifier & (LD_MATRIX_BIN | LD_MATRIX_BIN4);
2186 uint32_t is_square = ((ld_modifier & LD_MATRIX_SHAPEMASK) == LD_MATRIX_SQ);
2187 uint32_t is_square0 = ((ld_modifier & LD_MATRIX_SHAPEMASK) == LD_MATRIX_SQ0);
2188 uint32_t output_single_prec = (ld_modifier / LD_MATRIX_BIN4) & 1;
2189 uint32_t output_gz = ld_modifier & LD_REPORT_GZ;
2190 uint32_t ignore_x = (ld_modifier / LD_IGNORE_X) & 1;
2191 uintptr_t marker_ct = g_ld_marker_ct;
2192 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
2193 uintptr_t marker_ctm8 = round_up_pow2(marker_ct, 8);
2194 uintptr_t founder_ct = g_ld_founder_ct;
2195 uintptr_t founder_ctl = BITCT_TO_WORDCT(founder_ct);
2196 uintptr_t founder_ct_192_long = g_ld_founder_ct_192_long;
2197 uintptr_t final_mask = get_final_mask(founder_ct);
2198 uintptr_t marker_uidx_base = next_unset_unsafe(marker_exclude, 0);
2199 uintptr_t marker_uidx1 = marker_uidx_base;
2200 uintptr_t marker_idx1_start = (((uint64_t)parallel_idx) * marker_ct) / parallel_tot;
2201 uintptr_t marker_idx1 = marker_idx1_start;
2202 uintptr_t marker_idx1_end = (((uint64_t)(parallel_idx + 1)) * marker_ct) / parallel_tot;
2203 uintptr_t pct = 1;
2204 uint64_t job_size = marker_idx1_end - marker_idx1_start;
2205 uint64_t pct_thresh = job_size / 100;
2206 Chrom_info* chrom_info_ptr = g_ld_chrom_info_ptr;
2207 uint32_t founder_trail_ct = founder_ct_192_long - founder_ctl * 2;
2208 uint32_t thread_ct = g_ld_thread_ct;
2209 uint32_t chrom_fo_idx = 0;
2210 uint32_t is_haploid = 0;
2211 uint32_t is_x = 0;
2212 uint32_t is_y = 0;
2213 uint32_t not_first_write = 0;
2214 int32_t retval = 0;
2215 unsigned char* bigstack_mark2;
2216 uintptr_t* ulptr;
2217 unsigned char* overflow_buf;
2218 uint64_t tests_completed;
2219 uintptr_t thread_workload;
2220 uintptr_t cur_idx2_block_size;
2221 uintptr_t marker_idx2_end;
2222 uintptr_t block_idx1;
2223 uintptr_t marker_uidx2;
2224 uintptr_t marker_idx2;
2225 uintptr_t block_idx2;
2226 uintptr_t idx1_block_size;
2227 uintptr_t idx2_block_size;
2228 uintptr_t ulii;
2229 uintptr_t uljj;
2230 uint32_t chrom_idx;
2231 uint32_t chrom_end;
2232 uint32_t is_last_block;
2233
2234 if (bigstack_alloc_uc(262144, &overflow_buf)) {
2235 goto ld_report_matrix_ret_NOMEM;
2236 }
2237 if (output_single_prec) {
2238 // force divisibility by 16 instead (cacheline = 64 bytes, float = 4)
2239 marker_ctm8 = (marker_ctm8 + 8) & (~15);
2240 }
2241 if (is_binary) {
2242 if (fopen_checked(outname, FOPEN_WB, &outfile)) {
2243 goto ld_report_matrix_ret_OPEN_FAIL;
2244 }
2245 }
2246 // claim up to half of memory with idx1 bufs; each marker costs
2247 // founder_ct_192_long * sizeof(intptr_t) for genotype buffer
2248 // + founder_ct_192_long * sizeof(intptr_t) for missing mask buffer
2249 // + sizeof(int32_t) for g_ld_missing_cts1 entry
2250 // + 2 * sizeof(int32_t) for g_ld_interval1
2251 // + marker_ctm8 * sizeof(double) or marker_ctm16 * sizeof(float) for
2252 // g_ld_results buffer
2253 // round down to multiple of thread_ct for better workload distribution
2254 ulii = founder_ct_192_long * 2 * sizeof(intptr_t) + 3 * sizeof(int32_t) + marker_ctm8 * (8 - 4 * output_single_prec);
2255 idx1_block_size = bigstack_left() / (ulii * 2);
2256 thread_workload = idx1_block_size / thread_ct;
2257 if (!thread_workload) {
2258 goto ld_report_matrix_ret_NOMEM;
2259 }
2260 idx1_block_size = thread_workload * thread_ct;
2261 if ((parallel_tot > 1) && (marker_ct < 2 * parallel_tot)) {
2262 LOGERRPRINTF("Error: Too few variants in --r%s run for --parallel %u %u.\n", g_ld_is_r2? "2" : "", parallel_idx + 1, parallel_tot);
2263 goto ld_report_matrix_ret_INVALID_CMDLINE;
2264 }
2265 if (!is_square) {
2266 job_size = ((uint64_t)marker_ct) * (marker_ct + 1);
2267 if (parallel_tot > 1) {
2268 job_size /= parallel_tot;
2269 marker_idx1_start = triangle_divide(job_size * parallel_idx, 1);
2270 if (parallel_idx + 1 < parallel_tot) {
2271 marker_idx1_end = triangle_divide(job_size * (parallel_idx + 1), 1);
2272 }
2273 job_size = ((((uint64_t)marker_idx1_end) * (marker_idx1_end + 1)) - (((uint64_t)marker_idx1_start) * (marker_idx1_start + 1))) / 2;
2274 } else {
2275 job_size /= 2;
2276 }
2277 }
2278 pct_thresh = job_size / 100;
2279 if (idx1_block_size > marker_idx1_end - marker_idx1_start) {
2280 idx1_block_size = marker_idx1_end - marker_idx1_start;
2281 }
2282 bigstack_alloc_ul(founder_ct_192_long * idx1_block_size, &g_ld_geno1);
2283 bigstack_alloc_ul(founder_ct_192_long * idx1_block_size, &g_ld_geno_masks1);
2284 bigstack_alloc_ui(idx1_block_size, &g_ld_missing_cts1);
2285 bigstack_alloc_ui(idx1_block_size * 2, &g_ld_interval1);
2286
2287 if (!output_single_prec) {
2288 // may want to set g_ld_results_f to nullptr
2289 if (bigstack_alloc_d(marker_ctm8 * idx1_block_size, &g_ld_results)) {
2290 goto ld_report_matrix_ret_NOMEM;
2291 }
2292 } else {
2293 g_ld_results = nullptr;
2294 if (bigstack_alloc_f(marker_ctm8 * idx1_block_size, &g_ld_results_f)) {
2295 goto ld_report_matrix_ret_NOMEM;
2296 }
2297 }
2298
2299 // claim the other half with idx2 buffer
2300 ulii -= marker_ctm8 * (8 - 4 * output_single_prec) + 2 * sizeof(int32_t);
2301 if (!output_single_prec) {
2302 idx2_block_size = (bigstack_left() / ulii) & (~(7 * ONELU));
2303 } else {
2304 idx2_block_size = (bigstack_left() / ulii) & (~(15 * ONELU));
2305 }
2306 if (idx2_block_size > marker_ctm8) {
2307 idx2_block_size = marker_ctm8;
2308 }
2309 bigstack_mark2 = g_bigstack_base;
2310 while (1) {
2311 if (!idx2_block_size) {
2312 goto ld_report_matrix_ret_NOMEM;
2313 }
2314 if (!(bigstack_alloc_ul(founder_ct_192_long * idx2_block_size, &g_ld_geno2) ||
2315 bigstack_alloc_ul(founder_ct_192_long * idx2_block_size, &g_ld_geno_masks2) ||
2316 bigstack_alloc_ui(idx2_block_size, &g_ld_missing_cts2))) {
2317 break;
2318 }
2319 bigstack_reset(bigstack_mark2);
2320 if (!output_single_prec) {
2321 idx2_block_size -= 8;
2322 } else {
2323 idx2_block_size -= 16;
2324 }
2325 }
2326 uljj = founder_trail_ct + 2;
2327 for (ulii = 1; ulii <= idx1_block_size; ulii++) {
2328 fill_ulong_zero(uljj, &(g_ld_geno1[ulii * founder_ct_192_long - uljj]));
2329 fill_ulong_zero(uljj, &(g_ld_geno_masks1[ulii * founder_ct_192_long - uljj]));
2330 }
2331 for (ulii = 1; ulii <= idx2_block_size; ulii++) {
2332 fill_ulong_zero(uljj, &(g_ld_geno2[ulii * founder_ct_192_long - uljj]));
2333 fill_ulong_zero(uljj, &(g_ld_geno_masks2[ulii * founder_ct_192_long - uljj]));
2334 }
2335 if (is_square) {
2336 for (ulii = 0; ulii < idx1_block_size; ulii++) {
2337 g_ld_interval1[ulii * 2] = 0;
2338 g_ld_interval1[ulii * 2 + 1] = marker_ct;
2339 }
2340 g_ld_marker_ctm8 = marker_ctm8;
2341 } else {
2342 for (ulii = 0; ulii < idx1_block_size; ulii++) {
2343 g_ld_interval1[ulii * 2] = 0;
2344 }
2345 if (is_square0) {
2346 if (is_binary) {
2347 if (!output_single_prec) {
2348 fill_double_zero(MAXLINELEN / sizeof(double), (double*)g_textbuf);
2349 } else {
2350 fill_float_zero(MAXLINELEN / sizeof(float), (float*)g_textbuf);
2351 }
2352 } else {
2353 ulptr = (uintptr_t*)g_textbuf;
2354 // assume little-endian
2355 // 0[delim]0[delim]...
2356 #ifdef __LP64__
2357 ulii = 0x30003000300030LLU | (0x100010001000100LLU * ((unsigned char)g_ld_delimiter));
2358 #else
2359 ulii = 0x300030 | (0x1000100 * ((unsigned char)g_ld_delimiter));
2360 #endif
2361 for (uljj = 0; uljj < MAXLINELEN / sizeof(intptr_t); uljj++) {
2362 *ulptr++ = ulii;
2363 }
2364 }
2365 }
2366 }
2367 if (marker_idx1) {
2368 marker_uidx1 = jump_forward_unset_unsafe(marker_exclude, marker_uidx1 + 1, marker_idx1);
2369 }
2370 g_ld_keep_sign = 0;
2371 sprintf(g_logbuf, "--r%s %s%s to %s ... ", g_ld_is_r2? "2" : "", is_square? "square" : (is_square0? "square0" : "triangle"), is_binary? (output_single_prec? " bin4" : " bin") : (output_gz? " gz" : ""), outname);
2372 wordwrapb(16); // strlen("99% [processing]")
2373 logprintb();
2374 fputs("0%", stdout);
2375 do {
2376 fputs(" [processing]", stdout);
2377 fflush(stdout);
2378 if (idx1_block_size > marker_idx1_end - marker_idx1) {
2379 idx1_block_size = marker_idx1_end - marker_idx1;
2380 if (idx1_block_size < thread_ct) {
2381 thread_ct = idx1_block_size;
2382 g_ld_thread_ct = thread_ct;
2383 }
2384 }
2385 g_ld_idx1_block_size = idx1_block_size;
2386 // marker_uidx1_tmp = marker_uidx1;
2387 if (fseeko(bedfile, bed_offset + (marker_uidx1 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
2388 goto ld_report_matrix_ret_READ_FAIL;
2389 }
2390 chrom_end = 0;
2391 for (block_idx1 = 0; block_idx1 < idx1_block_size; marker_uidx1++, block_idx1++) {
2392 if (IS_SET(marker_exclude, marker_uidx1)) {
2393 marker_uidx1 = next_unset_ul_unsafe(marker_exclude, marker_uidx1);
2394 if (fseeko(bedfile, bed_offset + (marker_uidx1 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
2395 goto ld_report_matrix_ret_READ_FAIL;
2396 }
2397 }
2398 if (marker_uidx1 >= chrom_end) {
2399 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1);
2400 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
2401 is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
2402 is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
2403 is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
2404 }
2405 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, marker_uidx1), bedfile, loadbuf, &(g_ld_geno1[block_idx1 * founder_ct_192_long]))) {
2406 goto ld_report_matrix_ret_READ_FAIL;
2407 }
2408 if (is_haploid && hh_exists) {
2409 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)(&(g_ld_geno1[block_idx1 * founder_ct_192_long])));
2410 }
2411 ld_process_load2(&(g_ld_geno1[block_idx1 * founder_ct_192_long]), &(g_ld_geno_masks1[block_idx1 * founder_ct_192_long]), &(g_ld_missing_cts1[block_idx1]), founder_ct, is_x && (!ignore_x), founder_male_include2);
2412 }
2413 marker_uidx2 = marker_uidx_base;
2414 marker_idx2 = 0;
2415 if (is_square) {
2416 marker_idx2_end = marker_ct;
2417 } else {
2418 marker_idx2_end = marker_idx1 + idx1_block_size;
2419 for (ulii = 1; ulii <= idx1_block_size; ulii++) {
2420 g_ld_interval1[2 * ulii - 1] = ulii + marker_idx1;
2421 }
2422 if (!output_single_prec) {
2423 marker_ctm8 = round_up_pow2(marker_idx2_end, 8);
2424 } else {
2425 marker_ctm8 = round_up_pow2(marker_idx2_end, 16);
2426 }
2427 g_ld_marker_ctm8 = marker_ctm8;
2428 }
2429 chrom_end = 0;
2430 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
2431 goto ld_report_matrix_ret_READ_FAIL;
2432 }
2433 cur_idx2_block_size = idx2_block_size;
2434 do {
2435 if (cur_idx2_block_size > marker_idx2_end - marker_idx2) {
2436 cur_idx2_block_size = marker_idx2_end - marker_idx2;
2437 }
2438 for (block_idx2 = 0; block_idx2 < cur_idx2_block_size; marker_uidx2++, block_idx2++) {
2439 if (IS_SET(marker_exclude, marker_uidx2)) {
2440 marker_uidx2 = next_unset_ul_unsafe(marker_exclude, marker_uidx2);
2441 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
2442 goto ld_report_matrix_ret_READ_FAIL;
2443 }
2444 }
2445 if (marker_uidx2 >= chrom_end) {
2446 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
2447 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
2448 is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
2449 is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
2450 is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
2451 }
2452 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, marker_uidx2), bedfile, loadbuf, &(g_ld_geno2[block_idx2 * founder_ct_192_long]))) {
2453 goto ld_report_matrix_ret_READ_FAIL;
2454 }
2455 if (is_haploid && hh_exists) {
2456 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)(&(g_ld_geno2[block_idx2 * founder_ct_192_long])));
2457 }
2458 ld_process_load2(&(g_ld_geno2[block_idx2 * founder_ct_192_long]), &(g_ld_geno_masks2[block_idx2 * founder_ct_192_long]), &(g_ld_missing_cts2[block_idx2]), founder_ct, is_x && (!ignore_x), founder_male_include2);
2459 }
2460 g_ld_idx2_block_size = cur_idx2_block_size;
2461 g_ld_idx2_block_start = marker_idx2;
2462 marker_idx2 += cur_idx2_block_size;
2463 is_last_block = (marker_idx2 >= marker_idx2_end);
2464 if (spawn_threads2(threads, &ld_block_thread, thread_ct, is_last_block)) {
2465 goto ld_report_matrix_ret_THREAD_CREATE_FAIL;
2466 }
2467 ld_block_thread((void*)0);
2468 join_threads2(threads, thread_ct, is_last_block);
2469 } while (!is_last_block);
2470 fputs("\b\b\b\b\b\b\b\b\b\b\bwriting] \b\b\b", stdout);
2471 fflush(stdout);
2472 if (is_binary) {
2473 if (!output_single_prec) {
2474 if (is_square) {
2475 for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
2476 if (fwrite_checked(&(g_ld_results[block_idx1 * marker_ctm8]), cur_idx2_block_size * sizeof(double), outfile)) {
2477 goto ld_report_matrix_ret_WRITE_FAIL;
2478 }
2479 }
2480 } else {
2481 for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
2482 if (fwrite_checked(&(g_ld_results[block_idx1 * marker_ctm8]), (block_idx1 + marker_idx1 + 1) * sizeof(double), outfile)) {
2483 goto ld_report_matrix_ret_WRITE_FAIL;
2484 }
2485 if (is_square0) {
2486 ulii = marker_ct - block_idx1 - marker_idx1 - 1;
2487 while (ulii) {
2488 if (ulii > MAXLINELEN / sizeof(double)) {
2489 uljj = MAXLINELEN / sizeof(double);
2490 ulii -= MAXLINELEN / sizeof(double);
2491 } else {
2492 uljj = ulii;
2493 ulii = 0;
2494 }
2495 if (fwrite_checked(g_textbuf, uljj * sizeof(double), outfile)) {
2496 goto ld_report_matrix_ret_WRITE_FAIL;
2497 }
2498 }
2499 }
2500 }
2501 }
2502 } else {
2503 if (is_square) {
2504 for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
2505 if (fwrite_checked(&(g_ld_results_f[block_idx1 * marker_ctm8]), cur_idx2_block_size * sizeof(float), outfile)) {
2506 goto ld_report_matrix_ret_WRITE_FAIL;
2507 }
2508 }
2509 } else {
2510 for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
2511 if (fwrite_checked(&(g_ld_results_f[block_idx1 * marker_ctm8]), (block_idx1 + marker_idx1 + 1) * sizeof(float), outfile)) {
2512 goto ld_report_matrix_ret_WRITE_FAIL;
2513 }
2514 if (is_square0) {
2515 ulii = marker_ct - block_idx1 - marker_idx1 - 1;
2516 while (ulii) {
2517 if (ulii > MAXLINELEN / sizeof(float)) {
2518 uljj = MAXLINELEN / sizeof(float);
2519 ulii -= MAXLINELEN / sizeof(float);
2520 } else {
2521 uljj = ulii;
2522 ulii = 0;
2523 }
2524 if (fwrite_checked(g_textbuf, uljj * sizeof(float), outfile)) {
2525 goto ld_report_matrix_ret_WRITE_FAIL;
2526 }
2527 }
2528 }
2529 }
2530 }
2531 }
2532 } else {
2533 g_ld_block_idx1 = 0;
2534 g_ld_idx2_block_start = 0;
2535 if (is_square) {
2536 g_ld_idx2_block_size = marker_ct;
2537 } else {
2538 g_ld_idx2_block_size = marker_idx1 + 1;
2539 }
2540 if (output_gz) {
2541 parallel_compress(outname, overflow_buf, not_first_write, ld_matrix_emitn);
2542 } else {
2543 write_uncompressed(outname, overflow_buf, not_first_write, ld_matrix_emitn);
2544 }
2545 not_first_write = 1;
2546 }
2547 marker_idx1 += idx1_block_size;
2548 fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
2549 if (is_square) {
2550 tests_completed = marker_idx1 - marker_idx1_start;
2551 } else {
2552 tests_completed = ((((uint64_t)marker_idx1) * (marker_idx1 + 1)) - (((uint64_t)marker_idx1_start) * (marker_idx1_start + 1))) / 2;
2553 }
2554 if (tests_completed >= pct_thresh) {
2555 if (pct > 10) {
2556 putc_unlocked('\b', stdout);
2557 }
2558 pct = (tests_completed * 100LLU) / job_size;
2559 if (pct < 100) {
2560 printf("\b\b%" PRIuPTR "%%", pct);
2561 fflush(stdout);
2562 pct_thresh = ((++pct) * ((uint64_t)job_size)) / 100;
2563 }
2564 }
2565 } while (marker_idx1 < marker_idx1_end);
2566 fputs("\b\b", stdout);
2567 logprint("done.\n");
2568 if (is_binary) {
2569 if (fclose_null(&outfile)) {
2570 goto ld_report_matrix_ret_WRITE_FAIL;
2571 }
2572 }
2573 while (0) {
2574 ld_report_matrix_ret_NOMEM:
2575 retval = RET_NOMEM;
2576 break;
2577 ld_report_matrix_ret_OPEN_FAIL:
2578 retval = RET_OPEN_FAIL;
2579 break;
2580 ld_report_matrix_ret_READ_FAIL:
2581 retval = RET_READ_FAIL;
2582 break;
2583 ld_report_matrix_ret_WRITE_FAIL:
2584 retval = RET_WRITE_FAIL;
2585 break;
2586 ld_report_matrix_ret_INVALID_CMDLINE:
2587 retval = RET_INVALID_CMDLINE;
2588 break;
2589 ld_report_matrix_ret_THREAD_CREATE_FAIL:
2590 retval = RET_THREAD_CREATE_FAIL;
2591 break;
2592 }
2593 fclose_cond(outfile);
2594 // trust parent to free memory
2595 return retval;
2596 }
2597
ld_regular_emitn(uint32_t overflow_ct,unsigned char * readbuf)2598 uint32_t ld_regular_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
2599 char* sptr_cur = (char*)(&(readbuf[overflow_ct]));
2600 char* readbuf_end = (char*)(&(readbuf[PIGZ_BLOCK_SIZE]));
2601 Chrom_info* chrom_info_ptr = g_ld_chrom_info_ptr;
2602 uintptr_t* marker_exclude_idx1 = g_ld_marker_exclude_idx1;
2603 uintptr_t* marker_exclude = g_ld_marker_exclude;
2604 uint32_t* marker_pos = g_ld_marker_pos;
2605 char* marker_ids = g_ld_marker_ids;
2606 char** marker_allele_ptrs = g_ld_marker_allele_ptrs;
2607 uint32_t* ld_interval1 = g_ld_interval1;
2608 double* results = g_ld_results;
2609 double* set_allele_freqs = g_ld_set_allele_freqs;
2610 char* fixed_a1 = nullptr;
2611 char* fixed_a2 = nullptr;
2612 uintptr_t max_marker_id_len = g_ld_max_marker_id_len;
2613 uintptr_t marker_uidx1 = g_ld_marker_uidx1;
2614 uintptr_t block_idx1 = g_ld_block_idx1;
2615 uintptr_t block_size1 = g_ld_idx1_block_size;
2616 uintptr_t marker_uidx2_start = g_ld_uidx2_start;
2617 uintptr_t block_idx2_start = g_ld_idx2_block_start;
2618 uintptr_t block_idx2 = g_ld_block_idx2;
2619 uintptr_t marker_idx2_maxw = g_ld_marker_ctm8;
2620 uintptr_t marker_uidx2 = g_ld_marker_uidx2;
2621 double window_r2 = g_ld_window_r2;
2622 uint32_t plink_maxsnp = g_ld_plink_maxsnp;
2623 uint32_t is_inter_chr = g_ld_is_inter_chr;
2624
2625 // 0 = not d/dprime/dprime-signed
2626 uint32_t dprime_type = g_ld_modifier & LD_DX;
2627
2628 uint32_t is_r2 = g_ld_is_r2;
2629 uint32_t prefix_len = g_ld_prefix_len;
2630 uint32_t chrom_fo_idx1 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1);
2631 uint32_t chrom_idx1 = chrom_info_ptr->chrom_file_order[chrom_fo_idx1];
2632 uint32_t chrom_end1 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx1 + 1];
2633 uint32_t chrom_fo_idx2 = 0;
2634 uint32_t chrom_idx2 = 0;
2635 uint32_t fixed_a1_len = 0;
2636 uint32_t fixed_a2_len = 0;
2637 uintptr_t block_end2;
2638 uint32_t coupling;
2639 uint32_t chrom_end2;
2640 char* sptr2;
2641 double* dptr;
2642 double dxx;
2643 if (block_idx1 == block_size1) {
2644 goto ld_regular_emitn_ret;
2645 }
2646 if (block_idx2) {
2647 goto ld_regular_emitn_start_2;
2648 }
2649 // block_idx2 is only zero on initial call, never on reentry
2650 if (g_ld_is_first_block) {
2651 sptr_cur = memcpya(sptr_cur, " CHR_A BP_A ", 20);
2652 sptr_cur = fw_strcpyn(g_ld_plink_maxsnp, 5, "SNP_A", sptr_cur);
2653 if (set_allele_freqs) {
2654 sptr_cur = memcpya(sptr_cur, " MAF_A", 11);
2655 }
2656 sptr_cur = memcpya(sptr_cur, " CHR_B BP_B ", 21);
2657 sptr_cur = fw_strcpyn(g_ld_plink_maxsnp, 5, "SNP_B", sptr_cur);
2658 if (marker_allele_ptrs) {
2659 sptr_cur = memcpya(sptr_cur, " PHASE", 11);
2660 }
2661 if (set_allele_freqs) {
2662 sptr_cur = memcpya(sptr_cur, " MAF_B", 11);
2663 }
2664 sptr_cur = memseta(sptr_cur, 32, 11);
2665 sptr_cur = memcpyl3a(sptr_cur, is_r2? "R2 " : " R ");
2666 if (dprime_type) {
2667 sptr_cur = memcpya(sptr_cur, (dprime_type == LD_D)? " D " : " DP ", 13);
2668 }
2669 *sptr_cur++ = '\n';
2670 }
2671 goto ld_regular_emitn_start;
2672 do {
2673 marker_uidx1++;
2674 next_unset_ul_unsafe_ck(marker_exclude_idx1, &marker_uidx1);
2675 if (marker_uidx1 >= chrom_end1) {
2676 chrom_fo_idx1 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1);
2677 chrom_idx1 = chrom_info_ptr->chrom_file_order[chrom_fo_idx1];
2678 chrom_end1 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx1 + 1];
2679 }
2680 block_idx2 = ld_interval1[2 * block_idx1];
2681 if (block_idx2_start < block_idx2) {
2682 marker_uidx2_start = jump_forward_unset_unsafe(marker_exclude, marker_uidx2_start + 1, block_idx2 - block_idx2_start);
2683 block_idx2_start = block_idx2;
2684 }
2685 ld_regular_emitn_start:
2686 marker_uidx2 = marker_uidx2_start;
2687 sptr2 = width_force(6, g_textbuf, chrom_name_write(chrom_info_ptr, chrom_idx1, g_textbuf));
2688 sptr2 = memseta(sptr2, 32, 3);
2689 sptr2 = uint32toa_w10x(marker_pos[marker_uidx1], ' ', sptr2);
2690 sptr2 = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx1 * max_marker_id_len]), sptr2);
2691 *sptr2++ = ' ';
2692 if (set_allele_freqs) {
2693 sptr2 = width_force(10, sptr2, dtoa_g(1.0 - set_allele_freqs[marker_uidx1], sptr2));
2694 *sptr2++ = ' ';
2695 }
2696 if (!is_inter_chr) {
2697 sptr2 = width_force(6, sptr2, chrom_name_write(chrom_info_ptr, chrom_idx1, sptr2));
2698 sptr2 = memseta(sptr2, 32, 3);
2699 }
2700 prefix_len = (uintptr_t)(sptr2 - g_textbuf);
2701 ld_regular_emitn_start_2:
2702 if (marker_allele_ptrs) {
2703 fixed_a1 = marker_allele_ptrs[2 * marker_uidx1];
2704 fixed_a2 = marker_allele_ptrs[2 * marker_uidx1 + 1];
2705 fixed_a1_len = strlen(fixed_a1);
2706 fixed_a2_len = strlen(fixed_a2);
2707 }
2708 chrom_end2 = 0;
2709 block_end2 = ld_interval1[2 * block_idx1 + 1];
2710 dptr = &(results[(block_idx1 * marker_idx2_maxw + block_idx2 - block_idx2_start) * (1 + (dprime_type != 0))]);
2711 while (block_idx2 < block_end2) {
2712 next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx2);
2713 dxx = *dptr++;
2714 if (fabs(dxx) >= window_r2) {
2715 sptr_cur = memcpya(sptr_cur, g_textbuf, prefix_len);
2716 if (is_inter_chr) {
2717 if (marker_uidx2 >= chrom_end2) {
2718 chrom_fo_idx2 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
2719 chrom_idx2 = chrom_info_ptr->chrom_file_order[chrom_fo_idx2];
2720 chrom_end2 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx2 + 1];
2721 }
2722 sptr_cur = width_force(6, sptr_cur, chrom_name_write(chrom_info_ptr, chrom_idx2, sptr_cur));
2723 sptr_cur = memseta(sptr_cur, 32, 3);
2724 }
2725 sptr_cur = uint32toa_w10x(marker_pos[marker_uidx2], ' ', sptr_cur);
2726 sptr_cur = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx2 * max_marker_id_len]), sptr_cur);
2727 *sptr_cur++ = ' ';
2728 if (marker_allele_ptrs) {
2729 coupling = (dxx > 0);
2730 sptr2 = memcpya(sptr_cur, fixed_a1, fixed_a1_len);
2731 sptr2 = strcpyax(sptr2, marker_allele_ptrs[2 * marker_uidx2 + (1 - coupling)], '/');
2732 sptr2 = memcpya(sptr2, fixed_a2, fixed_a2_len);
2733 sptr2 = strcpya(sptr2, marker_allele_ptrs[2 * marker_uidx2 + coupling]);
2734 sptr_cur = width_force(10, sptr_cur, sptr2);
2735 *sptr_cur++ = ' ';
2736 }
2737 if (set_allele_freqs) {
2738 sptr_cur = width_force(10, sptr_cur, dtoa_g(1.0 - set_allele_freqs[marker_uidx2], sptr_cur));
2739 *sptr_cur++ = ' ';
2740 }
2741 if (is_r2) {
2742 dxx = fabs(dxx);
2743 }
2744 sptr_cur = width_force(12, sptr_cur, dtoa_g(dxx, sptr_cur));
2745 if (dprime_type) {
2746 *sptr_cur++ = ' ';
2747 sptr_cur = width_force(12, sptr_cur, dtoa_g(*dptr++, sptr_cur));
2748 }
2749 sptr_cur = memcpya(sptr_cur, " \n", 2);
2750 } else if (dprime_type) {
2751 dptr++;
2752 }
2753 block_idx2++;
2754 marker_uidx2++;
2755 if (sptr_cur > readbuf_end) {
2756 goto ld_regular_emitn_ret;
2757 }
2758 }
2759 } while (++block_idx1 < block_size1);
2760 ld_regular_emitn_ret:
2761 g_ld_marker_uidx1 = marker_uidx1;
2762 g_ld_block_idx1 = block_idx1;
2763 g_ld_prefix_len = prefix_len;
2764 g_ld_uidx2_start = marker_uidx2_start;
2765 g_ld_idx2_block_start = block_idx2_start;
2766 g_ld_marker_uidx2 = marker_uidx2;
2767 g_ld_block_idx2 = block_idx2;
2768 return (uintptr_t)(((unsigned char*)sptr_cur) - readbuf);
2769 }
2770
2771 // The following three functions are built around a data representation
2772 // introduced by Xiang Yan et al.'s BOOST software (the original bitwise
2773 // representation I came up with was less efficient); see
2774 // http://bioinformatics.ust.hk/BOOST.html .
2775 //
2776 // The BOOST implementation just evaluated four contingency table values; when
2777 // there is no missing data, the other five can be determined via subtraction.
2778 // two_locus_3x3_zmiss_tablev() function handles this case. However, with
2779 // *only* that logic, all sites with missing data must be thrown out.
2780 // two_locus_3x3_tablev() handles the other cases, directly summing 6 or 9
2781 // table values when necessary.
2782 //
2783 // If permutation testing is added later, it should exploit the fact that
2784 // [cell xy value in case 3x3 table] + [cell xy value in ctrl 3x3 table]
2785 // is constant across permutations; i.e. we just need to determine the new case
2786 // contingency table, and then the control table falls out via subtraction.
2787 // Several ideas from PERMORY could also be applied.
load_and_split3(FILE * bedfile,uintptr_t * rawbuf,uint32_t unfiltered_sample_ct,uintptr_t * casebuf,uintptr_t * pheno_nm,uintptr_t * pheno_c,uint32_t case_ctv,uint32_t ctrl_ctv,uint32_t do_reverse,uint32_t is_case_only,uintptr_t * nm_info_ptr)2788 uint32_t load_and_split3(FILE* bedfile, uintptr_t* rawbuf, uint32_t unfiltered_sample_ct, uintptr_t* casebuf, uintptr_t* pheno_nm, uintptr_t* pheno_c, uint32_t case_ctv, uint32_t ctrl_ctv, uint32_t do_reverse, uint32_t is_case_only, uintptr_t* nm_info_ptr) {
2789 uintptr_t* rawbuf_end = &(rawbuf[unfiltered_sample_ct / BITCT2]);
2790 uintptr_t* ctrlbuf = &(casebuf[3 * case_ctv]);
2791 uintptr_t case_words[4];
2792 uintptr_t ctrl_words[4];
2793 uint32_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
2794 uint32_t case_rem = 0;
2795 uint32_t ctrl_rem = 0;
2796 uint32_t read_shift_max = BITCT2;
2797 uint32_t sample_uidx = 0;
2798 uint32_t offset0_case = do_reverse * 2 * case_ctv;
2799 uint32_t offset2_case = (1 - do_reverse) * 2 * case_ctv;
2800 uint32_t offset0_ctrl = do_reverse * 2 * ctrl_ctv;
2801 uint32_t offset2_ctrl = (1 - do_reverse) * 2 * ctrl_ctv;
2802 uint32_t read_shift;
2803 uintptr_t read_word;
2804 uintptr_t ulii;
2805 if (bedfile) {
2806 // ld_report_dprime() preloads this and does het. haploid handling, etc.
2807 if (load_raw(unfiltered_sample_ct4, bedfile, rawbuf)) {
2808 return RET_READ_FAIL;
2809 }
2810 }
2811 case_words[0] = 0;
2812 case_words[1] = 0;
2813 case_words[2] = 0;
2814 case_words[3] = 0;
2815 ctrl_words[0] = 0;
2816 ctrl_words[1] = 0;
2817 ctrl_words[2] = 0;
2818 ctrl_words[3] = 0;
2819 while (1) {
2820 while (rawbuf < rawbuf_end) {
2821 read_word = *rawbuf++;
2822 for (read_shift = 0; read_shift < read_shift_max; sample_uidx++, read_shift++) {
2823 if (is_set(pheno_nm, sample_uidx)) {
2824 ulii = read_word & 3;
2825 if (is_set(pheno_c, sample_uidx)) {
2826 case_words[ulii] |= ONELU << case_rem;
2827 if (++case_rem == BITCT) {
2828 casebuf[offset0_case] = case_words[0];
2829 casebuf[case_ctv] = case_words[2];
2830 casebuf[offset2_case] = case_words[3];
2831 casebuf++;
2832 case_words[0] = 0;
2833 case_words[2] = 0;
2834 case_words[3] = 0;
2835 case_rem = 0;
2836 }
2837 } else if (!is_case_only) {
2838 ctrl_words[ulii] |= ONELU << ctrl_rem;
2839 if (++ctrl_rem == BITCT) {
2840 ctrlbuf[offset0_ctrl] = ctrl_words[0];
2841 ctrlbuf[ctrl_ctv] = ctrl_words[2];
2842 ctrlbuf[offset2_ctrl] = ctrl_words[3];
2843 ctrlbuf++;
2844 ctrl_words[0] = 0;
2845 ctrl_words[2] = 0;
2846 ctrl_words[3] = 0;
2847 ctrl_rem = 0;
2848 }
2849 }
2850 }
2851 read_word >>= 2;
2852 }
2853 }
2854 if (sample_uidx == unfiltered_sample_ct) {
2855 if (case_rem) {
2856 casebuf[offset0_case] = case_words[0];
2857 casebuf[case_ctv] = case_words[2];
2858 casebuf[offset2_case] = case_words[3];
2859 }
2860 if (ctrl_rem) {
2861 ctrlbuf[offset0_ctrl] = ctrl_words[0];
2862 ctrlbuf[ctrl_ctv] = ctrl_words[2];
2863 ctrlbuf[offset2_ctrl] = ctrl_words[3];
2864 }
2865 ulii = 3;
2866 if (case_words[1]) {
2867 ulii -= 1;
2868 }
2869 if (ctrl_words[1]) {
2870 ulii -= 2;
2871 }
2872 *nm_info_ptr = ulii;
2873 return 0;
2874 }
2875 rawbuf_end++;
2876 read_shift_max = unfiltered_sample_ct % BITCT2;
2877 }
2878 }
2879
2880 #ifdef __LP64__
two_locus_3x3_tablev(__m128i * vec1,__m128i * vec2,uint32_t * counts_3x3,uint32_t sample_ctv6,uint32_t iter_ct)2881 static void two_locus_3x3_tablev(__m128i* vec1, __m128i* vec2, uint32_t* counts_3x3, uint32_t sample_ctv6, uint32_t iter_ct) {
2882 const __m128i m1 = {FIVEMASK, FIVEMASK};
2883 const __m128i m2 = {0x3333333333333333LLU, 0x3333333333333333LLU};
2884 const __m128i m4 = {0x0f0f0f0f0f0f0f0fLLU, 0x0f0f0f0f0f0f0f0fLLU};
2885 __m128i* vec20;
2886 __m128i* vec21;
2887 __m128i* vec22;
2888 __m128i* vend1;
2889 __m128i loader1;
2890 __m128i loader20;
2891 __m128i loader21;
2892 __m128i loader22;
2893 __m128i count10;
2894 __m128i count11;
2895 __m128i count12;
2896 __m128i count20;
2897 __m128i count21;
2898 __m128i count22;
2899 __univec acc0;
2900 __univec acc1;
2901 __univec acc2;
2902 uint32_t ct;
2903 uint32_t ct2;
2904 while (iter_ct--) {
2905 ct = sample_ctv6;
2906 vec20 = vec2;
2907 vec21 = &(vec20[sample_ctv6]);
2908 vec22 = &(vec20[2 * sample_ctv6]);
2909 while (ct >= 30) {
2910 ct -= 30;
2911 vend1 = &(vec1[30]);
2912 acc0.vi = _mm_setzero_si128();
2913 acc1.vi = _mm_setzero_si128();
2914 acc2.vi = _mm_setzero_si128();
2915 do {
2916 two_locus_3x3_tablev_outer:
2917 loader1 = *vec1++;
2918 loader20 = *vec20++;
2919 loader21 = *vec21++;
2920 loader22 = *vec22++;
2921 count10 = _mm_and_si128(loader1, loader20);
2922 count11 = _mm_and_si128(loader1, loader21);
2923 count12 = _mm_and_si128(loader1, loader22);
2924 count10 = _mm_sub_epi64(count10, _mm_and_si128(_mm_srli_epi64(count10, 1), m1));
2925 count11 = _mm_sub_epi64(count11, _mm_and_si128(_mm_srli_epi64(count11, 1), m1));
2926 count12 = _mm_sub_epi64(count12, _mm_and_si128(_mm_srli_epi64(count12, 1), m1));
2927 two_locus_3x3_tablev_two_left:
2928 // unlike the zmiss variant, this apparently does not suffer from
2929 // enough register spill to justify shrinking the inner loop
2930 loader1 = *vec1++;
2931 loader20 = *vec20++;
2932 loader21 = *vec21++;
2933 loader22 = *vec22++;
2934 count20 = _mm_and_si128(loader1, loader20);
2935 count21 = _mm_and_si128(loader1, loader21);
2936 count22 = _mm_and_si128(loader1, loader22);
2937 count20 = _mm_sub_epi64(count20, _mm_and_si128(_mm_srli_epi64(count20, 1), m1));
2938 count21 = _mm_sub_epi64(count21, _mm_and_si128(_mm_srli_epi64(count21, 1), m1));
2939 count22 = _mm_sub_epi64(count22, _mm_and_si128(_mm_srli_epi64(count22, 1), m1));
2940 two_locus_3x3_tablev_one_left:
2941 loader1 = *vec1++;
2942 loader20 = *vec20++;
2943 loader21 = _mm_and_si128(loader1, loader20); // half1
2944 loader22 = _mm_and_si128(_mm_srli_epi64(loader21, 1), m1); // half2
2945 count10 = _mm_add_epi64(count10, _mm_and_si128(loader21, m1));
2946 count20 = _mm_add_epi64(count20, loader22);
2947 loader20 = *vec21++;
2948 loader21 = _mm_and_si128(loader1, loader20);
2949 loader22 = _mm_and_si128(_mm_srli_epi64(loader21, 1), m1);
2950 count11 = _mm_add_epi64(count11, _mm_and_si128(loader21, m1));
2951 count21 = _mm_add_epi64(count21, loader22);
2952 loader20 = *vec22++;
2953 loader21 = _mm_and_si128(loader1, loader20);
2954 loader22 = _mm_and_si128(_mm_srli_epi64(loader21, 1), m1);
2955 count12 = _mm_add_epi64(count12, _mm_and_si128(loader21, m1));
2956 count22 = _mm_add_epi64(count22, loader22);
2957
2958 count10 = _mm_add_epi64(_mm_and_si128(count10, m2), _mm_and_si128(_mm_srli_epi64(count10, 2), m2));
2959 count11 = _mm_add_epi64(_mm_and_si128(count11, m2), _mm_and_si128(_mm_srli_epi64(count11, 2), m2));
2960 count12 = _mm_add_epi64(_mm_and_si128(count12, m2), _mm_and_si128(_mm_srli_epi64(count12, 2), m2));
2961 count10 = _mm_add_epi64(count10, _mm_add_epi64(_mm_and_si128(count20, m2), _mm_and_si128(_mm_srli_epi64(count20, 2), m2)));
2962 count11 = _mm_add_epi64(count11, _mm_add_epi64(_mm_and_si128(count21, m2), _mm_and_si128(_mm_srli_epi64(count21, 2), m2)));
2963 count12 = _mm_add_epi64(count12, _mm_add_epi64(_mm_and_si128(count22, m2), _mm_and_si128(_mm_srli_epi64(count22, 2), m2)));
2964 acc0.vi = _mm_add_epi64(acc0.vi, _mm_add_epi64(_mm_and_si128(count10, m4), _mm_and_si128(_mm_srli_epi64(count10, 4), m4)));
2965 acc1.vi = _mm_add_epi64(acc1.vi, _mm_add_epi64(_mm_and_si128(count11, m4), _mm_and_si128(_mm_srli_epi64(count11, 4), m4)));
2966 acc2.vi = _mm_add_epi64(acc2.vi, _mm_add_epi64(_mm_and_si128(count12, m4), _mm_and_si128(_mm_srli_epi64(count12, 4), m4)));
2967 } while (vec1 < vend1);
2968 const __m128i m8 = {0x00ff00ff00ff00ffLLU, 0x00ff00ff00ff00ffLLU};
2969 acc0.vi = _mm_add_epi64(_mm_and_si128(acc0.vi, m8), _mm_and_si128(_mm_srli_epi64(acc0.vi, 8), m8));
2970 acc1.vi = _mm_add_epi64(_mm_and_si128(acc1.vi, m8), _mm_and_si128(_mm_srli_epi64(acc1.vi, 8), m8));
2971 acc2.vi = _mm_add_epi64(_mm_and_si128(acc2.vi, m8), _mm_and_si128(_mm_srli_epi64(acc2.vi, 8), m8));
2972 counts_3x3[0] += ((acc0.u8[0] + acc0.u8[1]) * 0x1000100010001LLU) >> 48;
2973 counts_3x3[1] += ((acc1.u8[0] + acc1.u8[1]) * 0x1000100010001LLU) >> 48;
2974 counts_3x3[2] += ((acc2.u8[0] + acc2.u8[1]) * 0x1000100010001LLU) >> 48;
2975 }
2976 if (ct) {
2977 vend1 = &(vec1[ct]);
2978 ct2 = ct % 3;
2979 acc0.vi = _mm_setzero_si128();
2980 acc1.vi = _mm_setzero_si128();
2981 acc2.vi = _mm_setzero_si128();
2982 ct = 0;
2983 if (ct2) {
2984 count10 = _mm_setzero_si128();
2985 count11 = _mm_setzero_si128();
2986 count12 = _mm_setzero_si128();
2987 if (ct2 == 2) {
2988 goto two_locus_3x3_tablev_two_left;
2989 }
2990 count20 = _mm_setzero_si128();
2991 count21 = _mm_setzero_si128();
2992 count22 = _mm_setzero_si128();
2993 goto two_locus_3x3_tablev_one_left;
2994 }
2995 goto two_locus_3x3_tablev_outer;
2996 }
2997 counts_3x3 = &(counts_3x3[3]);
2998 }
2999 }
3000
two_locus_3x3_zmiss_tablev(__m128i * veca0,__m128i * vecb0,uint32_t * counts_3x3,uint32_t sample_ctv6)3001 static inline void two_locus_3x3_zmiss_tablev(__m128i* veca0, __m128i* vecb0, uint32_t* counts_3x3, uint32_t sample_ctv6) {
3002 const __m128i m1 = {FIVEMASK, FIVEMASK};
3003 const __m128i m2 = {0x3333333333333333LLU, 0x3333333333333333LLU};
3004 const __m128i m4 = {0x0f0f0f0f0f0f0f0fLLU, 0x0f0f0f0f0f0f0f0fLLU};
3005 __m128i* vecb1 = &(vecb0[sample_ctv6]);
3006 __m128i* veca1 = &(veca0[sample_ctv6]);
3007 __m128i* vend;
3008 __m128i loadera0;
3009 __m128i loaderb0;
3010 __m128i loaderb1;
3011 __m128i loadera1;
3012 __m128i countx00;
3013 __m128i countx01;
3014 __m128i countx11;
3015 __m128i countx10;
3016 __m128i county00;
3017 __m128i county01;
3018 __m128i county11;
3019 __m128i county10;
3020 __univec acc00;
3021 __univec acc01;
3022 __univec acc11;
3023 __univec acc10;
3024 uint32_t ct2;
3025 while (sample_ctv6 >= 30) {
3026 sample_ctv6 -= 30;
3027 vend = &(veca0[30]);
3028 acc00.vi = _mm_setzero_si128();
3029 acc01.vi = _mm_setzero_si128();
3030 acc11.vi = _mm_setzero_si128();
3031 acc10.vi = _mm_setzero_si128();
3032 do {
3033 two_locus_3x3_zmiss_tablev_outer:
3034 loadera0 = *veca0++;
3035 loaderb0 = *vecb0++;
3036 loaderb1 = *vecb1++;
3037 loadera1 = *veca1++;
3038 countx00 = _mm_and_si128(loadera0, loaderb0);
3039 countx01 = _mm_and_si128(loadera0, loaderb1);
3040 countx11 = _mm_and_si128(loadera1, loaderb1);
3041 countx10 = _mm_and_si128(loadera1, loaderb0);
3042 countx00 = _mm_sub_epi64(countx00, _mm_and_si128(_mm_srli_epi64(countx00, 1), m1));
3043 countx01 = _mm_sub_epi64(countx01, _mm_and_si128(_mm_srli_epi64(countx01, 1), m1));
3044 countx11 = _mm_sub_epi64(countx11, _mm_and_si128(_mm_srli_epi64(countx11, 1), m1));
3045 countx10 = _mm_sub_epi64(countx10, _mm_and_si128(_mm_srli_epi64(countx10, 1), m1));
3046 countx00 = _mm_add_epi64(_mm_and_si128(countx00, m2), _mm_and_si128(_mm_srli_epi64(countx00, 2), m2));
3047 countx01 = _mm_add_epi64(_mm_and_si128(countx01, m2), _mm_and_si128(_mm_srli_epi64(countx01, 2), m2));
3048 countx11 = _mm_add_epi64(_mm_and_si128(countx11, m2), _mm_and_si128(_mm_srli_epi64(countx11, 2), m2));
3049 countx10 = _mm_add_epi64(_mm_and_si128(countx10, m2), _mm_and_si128(_mm_srli_epi64(countx10, 2), m2));
3050 two_locus_3x3_zmiss_tablev_one_left:
3051 loadera0 = *veca0++;
3052 loaderb0 = *vecb0++;
3053 loaderb1 = *vecb1++;
3054 loadera1 = *veca1++;
3055 county00 = _mm_and_si128(loadera0, loaderb0);
3056 county01 = _mm_and_si128(loadera0, loaderb1);
3057 county11 = _mm_and_si128(loadera1, loaderb1);
3058 county10 = _mm_and_si128(loadera1, loaderb0);
3059 county00 = _mm_sub_epi64(county00, _mm_and_si128(_mm_srli_epi64(county00, 1), m1));
3060 county01 = _mm_sub_epi64(county01, _mm_and_si128(_mm_srli_epi64(county01, 1), m1));
3061 county11 = _mm_sub_epi64(county11, _mm_and_si128(_mm_srli_epi64(county11, 1), m1));
3062 county10 = _mm_sub_epi64(county10, _mm_and_si128(_mm_srli_epi64(county10, 1), m1));
3063 countx00 = _mm_add_epi64(countx00, _mm_add_epi64(_mm_and_si128(county00, m2), _mm_and_si128(_mm_srli_epi64(county00, 2), m2)));
3064 countx01 = _mm_add_epi64(countx01, _mm_add_epi64(_mm_and_si128(county01, m2), _mm_and_si128(_mm_srli_epi64(county01, 2), m2)));
3065 countx11 = _mm_add_epi64(countx11, _mm_add_epi64(_mm_and_si128(county11, m2), _mm_and_si128(_mm_srli_epi64(county11, 2), m2)));
3066 countx10 = _mm_add_epi64(countx10, _mm_add_epi64(_mm_and_si128(county10, m2), _mm_and_si128(_mm_srli_epi64(county10, 2), m2)));
3067 acc00.vi = _mm_add_epi64(acc00.vi, _mm_add_epi64(_mm_and_si128(countx00, m4), _mm_and_si128(_mm_srli_epi64(countx00, 4), m4)));
3068 acc01.vi = _mm_add_epi64(acc01.vi, _mm_add_epi64(_mm_and_si128(countx01, m4), _mm_and_si128(_mm_srli_epi64(countx01, 4), m4)));
3069 acc11.vi = _mm_add_epi64(acc11.vi, _mm_add_epi64(_mm_and_si128(countx11, m4), _mm_and_si128(_mm_srli_epi64(countx11, 4), m4)));
3070 acc10.vi = _mm_add_epi64(acc10.vi, _mm_add_epi64(_mm_and_si128(countx10, m4), _mm_and_si128(_mm_srli_epi64(countx10, 4), m4)));
3071 } while (veca0 < vend);
3072 const __m128i m8 = {0x00ff00ff00ff00ffLLU, 0x00ff00ff00ff00ffLLU};
3073 acc00.vi = _mm_add_epi64(_mm_and_si128(acc00.vi, m8), _mm_and_si128(_mm_srli_epi64(acc00.vi, 8), m8));
3074 acc01.vi = _mm_add_epi64(_mm_and_si128(acc01.vi, m8), _mm_and_si128(_mm_srli_epi64(acc01.vi, 8), m8));
3075 acc11.vi = _mm_add_epi64(_mm_and_si128(acc11.vi, m8), _mm_and_si128(_mm_srli_epi64(acc11.vi, 8), m8));
3076 acc10.vi = _mm_add_epi64(_mm_and_si128(acc10.vi, m8), _mm_and_si128(_mm_srli_epi64(acc10.vi, 8), m8));
3077 counts_3x3[0] += ((acc00.u8[0] + acc00.u8[1]) * 0x1000100010001LLU) >> 48;
3078 counts_3x3[1] += ((acc01.u8[0] + acc01.u8[1]) * 0x1000100010001LLU) >> 48;
3079 counts_3x3[4] += ((acc11.u8[0] + acc11.u8[1]) * 0x1000100010001LLU) >> 48;
3080 counts_3x3[3] += ((acc10.u8[0] + acc10.u8[1]) * 0x1000100010001LLU) >> 48;
3081 }
3082 if (sample_ctv6) {
3083 vend = &(veca0[sample_ctv6]);
3084 ct2 = sample_ctv6 % 2;
3085 sample_ctv6 = 0;
3086 acc00.vi = _mm_setzero_si128();
3087 acc01.vi = _mm_setzero_si128();
3088 acc11.vi = _mm_setzero_si128();
3089 acc10.vi = _mm_setzero_si128();
3090 if (ct2) {
3091 countx00 = _mm_setzero_si128();
3092 countx01 = _mm_setzero_si128();
3093 countx11 = _mm_setzero_si128();
3094 countx10 = _mm_setzero_si128();
3095 goto two_locus_3x3_zmiss_tablev_one_left;
3096 }
3097 goto two_locus_3x3_zmiss_tablev_outer;
3098 }
3099 }
3100 #endif
3101
two_locus_count_table_zmiss1(uintptr_t * lptr1,uintptr_t * lptr2,uint32_t * counts_3x3,uint32_t sample_ctv3,uint32_t is_zmiss2)3102 static void two_locus_count_table_zmiss1(uintptr_t* lptr1, uintptr_t* lptr2, uint32_t* counts_3x3, uint32_t sample_ctv3, uint32_t is_zmiss2) {
3103 #ifdef __LP64__
3104 fill_uint_zero(6, counts_3x3);
3105 if (is_zmiss2) {
3106 two_locus_3x3_zmiss_tablev((__m128i*)lptr1, (__m128i*)lptr2, counts_3x3, sample_ctv3 / 2);
3107 } else {
3108 two_locus_3x3_tablev((__m128i*)lptr1, (__m128i*)lptr2, counts_3x3, sample_ctv3 / 2, 2);
3109 }
3110 #else
3111 counts_3x3[0] = popcount_longs_intersect(lptr1, lptr2, sample_ctv3);
3112 counts_3x3[1] = popcount_longs_intersect(lptr1, &(lptr2[sample_ctv3]), sample_ctv3);
3113 if (!is_zmiss2) {
3114 counts_3x3[2] = popcount_longs_intersect(lptr1, &(lptr2[2 * sample_ctv3]), sample_ctv3);
3115 counts_3x3[5] = popcount_longs_intersect(&(lptr1[sample_ctv3]), &(lptr2[2 * sample_ctv3]), sample_ctv3);
3116 }
3117 lptr1 = &(lptr1[sample_ctv3]);
3118 counts_3x3[3] = popcount_longs_intersect(lptr1, lptr2, sample_ctv3);
3119 counts_3x3[4] = popcount_longs_intersect(lptr1, &(lptr2[sample_ctv3]), sample_ctv3);
3120 #endif
3121 }
3122
two_locus_count_table(uintptr_t * lptr1,uintptr_t * lptr2,uint32_t * counts_3x3,uint32_t sample_ctv3,uint32_t is_zmiss2)3123 static void two_locus_count_table(uintptr_t* lptr1, uintptr_t* lptr2, uint32_t* counts_3x3, uint32_t sample_ctv3, uint32_t is_zmiss2) {
3124 #ifdef __LP64__
3125 uint32_t uii;
3126 fill_uint_zero(9, counts_3x3);
3127 if (!is_zmiss2) {
3128 two_locus_3x3_tablev((__m128i*)lptr1, (__m128i*)lptr2, counts_3x3, sample_ctv3 / 2, 3);
3129 } else {
3130 two_locus_3x3_tablev((__m128i*)lptr2, (__m128i*)lptr1, counts_3x3, sample_ctv3 / 2, 2);
3131 uii = counts_3x3[1];
3132 counts_3x3[1] = counts_3x3[3];
3133 counts_3x3[3] = uii;
3134 counts_3x3[6] = counts_3x3[2];
3135 counts_3x3[7] = counts_3x3[5];
3136 }
3137 #else
3138 counts_3x3[0] = popcount_longs_intersect(lptr2, lptr1, sample_ctv3);
3139 counts_3x3[3] = popcount_longs_intersect(lptr2, &(lptr1[sample_ctv3]), sample_ctv3);
3140 counts_3x3[6] = popcount_longs_intersect(lptr2, &(lptr1[2 * sample_ctv3]), sample_ctv3);
3141 lptr2 = &(lptr2[sample_ctv3]);
3142 counts_3x3[1] = popcount_longs_intersect(lptr2, lptr1, sample_ctv3);
3143 counts_3x3[4] = popcount_longs_intersect(lptr2, &(lptr1[sample_ctv3]), sample_ctv3);
3144 counts_3x3[7] = popcount_longs_intersect(lptr2, &(lptr1[2 * sample_ctv3]), sample_ctv3);
3145 if (!is_zmiss2) {
3146 lptr2 = &(lptr2[sample_ctv3]);
3147 counts_3x3[2] = popcount_longs_intersect(lptr2, lptr1, sample_ctv3);
3148 counts_3x3[5] = popcount_longs_intersect(lptr2, &(lptr1[sample_ctv3]), sample_ctv3);
3149 counts_3x3[8] = popcount_longs_intersect(lptr2, &(lptr1[2 * sample_ctv3]), sample_ctv3);
3150 }
3151 #endif
3152 }
3153
fepi_counts_to_joint_effects_stats(uint32_t group_ct,uint32_t * counts,double * diff_ptr,double * case_var_ptr,double * ctrl_var_ptr)3154 void fepi_counts_to_joint_effects_stats(uint32_t group_ct, uint32_t* counts, double* diff_ptr, double* case_var_ptr, double* ctrl_var_ptr) {
3155 // See JointEffects::evaluateStatistic(). This is slightly reordered to
3156 // avoid a bit of redundant calculation, but the logic is otherwise
3157 // identical.
3158 //
3159 // Two adjustments to the raw counts are applied:
3160 // 1. If any cell in either the case or control tables is zero, we add 0.5 to
3161 // all cells in both tables.
3162 // 2. Then, if the [hom A2 x hom B2] cell in either the case or control table
3163 // is less than 1% of the total (very unlikely since A2/B2 are normally
3164 // major), multiply all other cells by a reduction factor and increase the
3165 // [hom A2 x hom B2] cell by the total reduction (choosing the factor such
3166 // that the [hom A2 x hom B2] cell ends up at about 1%).
3167 //
3168 // Then, we define
3169 // i22_case := [hom A1 x hom B1] * [hom A2 x hom B2] /
3170 // ([hom A1 x hom B2] * [hom A2 x hom B1])
3171 // i21_case := [hom A1 x het] * [hom A2 x hom B2] /
3172 // ([hom A1 x hom B2] * [hom A2 x het])
3173 // i12_case := [het x hom B1] * [hom A2 x hom B2] /
3174 // ([het x hom B2] * [hom A2 x hom B1])
3175 // i11_case := [het x het] * [hom A2 x hom B2] /
3176 // ([het x hom B2] * [hom A2 x het])
3177 // (analogously for controls)
3178 //
3179 // At this point, two formulas may be applied to the (adjusted) counts:
3180 // 1. If i11 is greater than 0.5 for both cases and controls (this is usually
3181 // true),
3182 // xi0 := 0.5
3183 // xi1 := 1.0
3184 // xi2 := 1.0
3185 // xi3 := 2 * i11_case / (2 * i11_case - 1)
3186 // invq00 := 1.0 / [hom A2 x hom B2]
3187 // invq01 := 1.0 / [hom A2 x het]
3188 // ...
3189 // inverse_matrix := [ (invq22+invq02+invq20+invq00)*xi0*xi0 (invq20+invq00)*xi0*xi1 (invq02+invq00)*xi0*xi2 invq00*xi0*xi3 ]^-1
3190 // [ ... (invq21+invq20+invq01+invq00)*xi1*xi1 invq00*xi1*xi2 (invq01+invq00)*xi1*xi3 ]
3191 // [ ... ... (invq12+invq10+invq02+invq00)*xi2*xi2 (invq10+invq00)*xi2*xi3 ]
3192 // [ ... ... ... (invq11+invq10+invq01+invq00)*xi3*xi3 ]
3193 // (bottom left is symmetric copy of upper right)
3194 // row_totals_case[i] := sum(row i of inverse_matrix_case)
3195 // total_inv_v_case := 1.0 / (row_totals_case[0] + [1] + [2] + [3])
3196 // lambda_case := row_totals_case[0] * log(i22_case) * 0.5 +
3197 // row_totals_case[1] * log(i21_case) +
3198 // row_totals_case[2] * log(i12_case) +
3199 // row_totals_case[3] * log(2 * i11_case - 1)
3200 // (analogous formulas for lambda_ctrl)
3201 // diff := lambda_case * total_inv_v_case -
3202 // lambda_ctrl * total_inv_v_ctrl
3203 // chisq := diff * diff / (total_inv_v_case + total_inv_v_ctrl)
3204 //
3205 // 2. Otherwise,
3206 // xi0 := sqrt(i22) / (2 * sqrt(i22) + 2)
3207 // xi1 := i21 / (i21 + 1)
3208 // xi2 := i12 / (i12 + 1)
3209 // xi3 := 1.0
3210 // (inverse_matrix, row_totals, total_inv_v defined as before)
3211 // mu_case := row_totals_case[0] * log((sqrt(i22_case) + 1) * 0.5) +
3212 // row_totals_case[1] * log((i21_case + 1) * 0.5) +
3213 // row_totals_case[2] * log((i12_case + 1) * 0.5) +
3214 // row_totals_case[3] * log(i11_case)
3215 // (similar for mu_ctrl)
3216 // diff := mu_case * total_inv_v_case - mu_ctrl * total_inv_v_ctrl
3217 double dcounts[18];
3218 double invcounts[18];
3219 double ivv[8]; // i22_case in [0], i21_case in [1], ..., i22_ctrl in [4]...
3220 double xiv[8];
3221 double row_totals[8];
3222 double to_invert[16];
3223 MATRIX_INVERT_BUF1_TYPE int_1d_buf[4];
3224 double dbl_2d_buf[16];
3225 double tot_inv_v[2];
3226 double lambda_or_mu[2];
3227 double dxx;
3228 double dyy;
3229 double* dptr;
3230 double* dptr2;
3231 double* dptr3;
3232 uint32_t use_reg_stat;
3233 uint32_t uii;
3234 uint32_t ujj;
3235 uint32_t ukk;
3236 tot_inv_v[0] = 0.0; // gcc7 maybe-uninitialized warning
3237 dptr = dcounts;
3238 if (counts[0] && counts[1] && counts[2] && counts[3] && counts[4] && counts[5] && counts[6] && counts[7] && counts[8] && ((group_ct == 1) || (counts[9] && counts[10] && counts[11] && counts[12] && counts[13] && counts[14] && counts[15] && counts[16] && counts[17]))) {
3239 for (uii = 0; uii < group_ct; uii++) {
3240 dxx = 0;
3241 for (ujj = 0; ujj < 9; ujj++) {
3242 dyy = (double)((int32_t)(*counts++));
3243 *dptr++ = dyy;
3244 dxx += dyy;
3245 }
3246 if (dyy * 100 < dxx) {
3247 // This tends to come up with adjacent pairs of markers where MAF
3248 // "flips" from one side of 0.5 to the other. Is this really a good
3249 // way to handle it?
3250 dyy = dxx / (1.01 * dxx - dyy);
3251 dptr = &(dptr[-9]);
3252 for (ujj = 0; ujj < 8; ujj++) {
3253 *dptr *= dyy;
3254 dptr++;
3255 }
3256 *dptr++ = 0.01 * dyy * dxx;
3257 }
3258 }
3259 } else {
3260 for (uii = 0; uii < group_ct; uii++) {
3261 dxx = -4.5;
3262 for (ujj = 0; ujj < 9; ujj++) {
3263 dyy = 0.5 + (double)((int32_t)(*counts++));
3264 *dptr++ = dyy;
3265 dxx += dyy;
3266 }
3267 if (dyy * 100 < dxx) {
3268 dyy = dxx / (1.01 * dxx - dyy + 4.5);
3269 dptr = &(dptr[-9]);
3270 for (ujj = 0; ujj < 8; ujj++) {
3271 *dptr *= dyy;
3272 dptr++;
3273 }
3274 *dptr++ = 0.01 * dyy * dxx;
3275 }
3276 }
3277 }
3278 dptr = dcounts;
3279 dptr2 = invcounts;
3280 for (uii = 0; uii < group_ct; uii++) {
3281 for (ujj = 0; ujj < 9; ujj++) {
3282 *dptr2++ = 1.0 / (*dptr++);
3283 }
3284 }
3285 dptr2 = ivv;
3286 uii = 0;
3287 do {
3288 dptr = &(dcounts[uii * 9]);
3289 dptr3 = &(invcounts[uii * 9]);
3290 dxx = dptr[8];
3291 *dptr2++ = dxx * dptr[0] * dptr3[2] * dptr3[6];
3292 *dptr2++ = dxx * dptr[1] * dptr3[2] * dptr3[7];
3293 *dptr2++ = dxx * dptr[3] * dptr3[5] * dptr3[6];
3294 *dptr2++ = dxx * dptr[4] * dptr3[5] * dptr3[7];
3295 } while (++uii < group_ct);
3296 use_reg_stat = (ivv[3] > 0.5) && ((group_ct == 1) || (ivv[7] > 0.5));
3297 if (use_reg_stat) {
3298 dptr2 = xiv;
3299 for (uii = 0; uii < group_ct; uii++) {
3300 dxx = 2 * ivv[3 + 4 * uii];
3301 *dptr2++ = 0.5;
3302 *dptr2++ = 1.0;
3303 *dptr2++ = 1.0;
3304 *dptr2++ = dxx / (dxx - 1);
3305 }
3306 } else {
3307 for (uii = 0; uii < group_ct; uii++) {
3308 dptr = &(ivv[uii * 4]);
3309 dptr2 = &(xiv[uii * 4]);
3310 dxx = sqrt(dptr[0]);
3311 dptr2[1] = dptr[1] / (dptr[1] + 1);
3312 dptr2[2] = dptr[2] / (dptr[2] + 1);
3313 dptr2[3] = 1.0;
3314 dptr2[0] = dxx / (2 * dxx + 2);
3315 dptr[0] = dxx; // original i22 is not used from here on
3316 }
3317 }
3318 for (uii = 0; uii < group_ct; uii++) {
3319 dptr = &(invcounts[uii * 9]);
3320 dptr2 = &(xiv[uii * 4]);
3321 // invq00 = dptr[8]
3322 // invq01 = dptr[7]
3323 // ...
3324 // thank god this code doesn't need to be edited every day
3325 dxx = dptr[8];
3326 dyy = dptr2[0];
3327 to_invert[0] = (dptr[0] + dptr[2] + dptr[6] + dxx) * dyy * dyy;
3328 to_invert[1] = (dptr[2] + dxx) * dyy * dptr2[1];
3329 to_invert[2] = (dptr[6] + dxx) * dyy * dptr2[2];
3330 to_invert[3] = dxx * dyy * dptr2[3];
3331 dyy = dptr2[1];
3332 to_invert[4] = to_invert[1];
3333 to_invert[5] = (dptr[1] + dptr[2] + dptr[7] + dxx) * dyy * dyy;
3334 to_invert[6] = dxx * dyy * dptr2[2];
3335 to_invert[7] = (dptr[7] + dxx) * dyy * dptr2[3];
3336 dyy = dptr2[2];
3337 to_invert[8] = to_invert[2];
3338 to_invert[9] = to_invert[6];
3339 to_invert[10] = (dptr[3] + dptr[5] + dptr[6] + dxx) * dyy * dyy;
3340 to_invert[11] = (dptr[5] + dxx) * dyy * dptr2[3];
3341 dyy = dptr2[3];
3342 to_invert[12] = to_invert[3];
3343 to_invert[13] = to_invert[7];
3344 to_invert[14] = to_invert[11];
3345 to_invert[15] = (dptr[4] + dptr[5] + dptr[7] + dxx) * dyy * dyy;
3346 invert_matrix(4, to_invert, int_1d_buf, dbl_2d_buf);
3347 dptr = to_invert;
3348 dptr2 = &(row_totals[uii * 4]);
3349 dxx = 0;
3350 for (ujj = 0; ujj < 4; ujj++) {
3351 dyy = 0;
3352 for (ukk = 0; ukk < 4; ukk++) {
3353 dyy += (*dptr++);
3354 }
3355 *dptr2++ = dyy;
3356 dxx += dyy;
3357 }
3358 tot_inv_v[uii] = dxx;
3359 }
3360 if (use_reg_stat) {
3361 for (uii = 0; uii < group_ct; uii++) {
3362 dptr = &(row_totals[uii * 4]);
3363 dptr2 = &(ivv[uii * 4]);
3364 lambda_or_mu[uii] = dptr[0] * log(dptr2[0]) * 0.5 +
3365 dptr[1] * log(dptr2[1]) +
3366 dptr[2] * log(dptr2[2]) +
3367 dptr[3] * log(2 * dptr2[3] - 1);
3368 }
3369 } else {
3370 for (uii = 0; uii < group_ct; uii++) {
3371 dptr = &(row_totals[uii * 4]);
3372 dptr2 = &(ivv[uii * 4]);
3373 // note that dptr2[0] has sqrt(i22) instead of i22
3374 // really minor thing to check: cheaper to subtract log(2) than multiply
3375 // by 0.5 inside log? (I wouldn't think so: multiplication-by-0.5 is the
3376 // sort of thing which looks like it's eligible for automatic
3377 // optimization.)
3378 lambda_or_mu[uii] = dptr[0] * log((dptr2[0] + 1) * 0.5) +
3379 dptr[1] * log((dptr2[1] + 1) * 0.5) +
3380 dptr[2] * log((dptr2[2] + 1) * 0.5) +
3381 dptr[3] * log(dptr2[3]);
3382 }
3383 }
3384 dxx = tot_inv_v[0];
3385 if (group_ct == 1) {
3386 *case_var_ptr = dxx;
3387 *diff_ptr = lambda_or_mu[0];
3388 return;
3389 }
3390 dxx = 1.0 / dxx;
3391 dyy = 1.0 / tot_inv_v[1];
3392 *diff_ptr = lambda_or_mu[0] * dxx - lambda_or_mu[1] * dyy;
3393 *case_var_ptr = dxx;
3394 *ctrl_var_ptr = dyy;
3395 }
3396
3397 // epistasis multithread globals
3398 static uint32_t* g_epi_geno1_offsets;
3399 static double* g_epi_all_chisq;
3400 static uintptr_t* g_epi_geno1;
3401 static uintptr_t* g_epi_zmiss1;
3402 static uint32_t* g_epi_idx1_block_bounds;
3403 static uint32_t* g_epi_idx1_block_bounds16;
3404 static double* g_epi_best_chisq1;
3405 static uint32_t* g_epi_best_id1; // best partner ID
3406 static uint32_t* g_epi_n_sig_ct1;
3407 static uint32_t* g_epi_fail_ct1;
3408 static uintptr_t* g_epi_geno2;
3409 static uintptr_t* g_epi_zmiss2;
3410 static uint32_t* g_epi_tot2;
3411 static double* g_epi_boost_precalc2 = nullptr;
3412 static double* g_epi_best_chisq2;
3413 static uint32_t* g_epi_best_id2;
3414 static uint32_t* g_epi_n_sig_ct2;
3415 static uint32_t* g_epi_fail_ct2;
3416 static double* g_epi_recip_cache;
3417 static uint32_t g_epi_thread_ct;
3418 static uint32_t g_epi_case_ct;
3419 static uint32_t g_epi_ctrl_ct;
3420 static uint32_t g_epi_flag;
3421 static uint32_t g_epi_cellmin;
3422 static uintptr_t g_epi_marker_ct;
3423 static uintptr_t g_epi_marker_idx1;
3424 static uintptr_t g_epi_idx2_block_size;
3425 static uintptr_t g_epi_idx2_block_start;
3426 static double g_epi_alpha1sq[3];
3427 static double g_epi_alpha2sq[3];
3428
3429 // The following two functions are essentially ported from Statistics.cpp in
3430 // Richard Howey's CASSI software
3431 // (http://www.staff.ncl.ac.uk/richard.howey/cassi/index.html). (CASSI is also
3432 // GPLv3-licensed; just remember to give credit to Howey if you redistribute a
3433 // variant of this code. This would have been a friggin' nightmare to debug if
3434 // he hadn't already done all the real work.)
fepi_counts_to_stats(uint32_t * counts_3x3,uint32_t no_ueki,double * or_ptr,double * var_ptr)3435 static void fepi_counts_to_stats(uint32_t* counts_3x3, uint32_t no_ueki, double* or_ptr, double* var_ptr) {
3436 double c11 = (double)((int32_t)(4 * counts_3x3[0] + 2 * (counts_3x3[1] + counts_3x3[3]) + counts_3x3[4]));
3437 double c12 = (double)((int32_t)(4 * counts_3x3[2] + 2 * (counts_3x3[1] + counts_3x3[5]) + counts_3x3[4]));
3438 double c21 = (double)((int32_t)(4 * counts_3x3[6] + 2 * (counts_3x3[3] + counts_3x3[7]) + counts_3x3[4]));
3439 double c22 = (double)((int32_t)(4 * counts_3x3[8] + 2 * (counts_3x3[5] + counts_3x3[7]) + counts_3x3[4]));
3440 double rc11;
3441 double rc12;
3442 double rc21;
3443 double rc22;
3444 double dxx;
3445 uint32_t no_adj;
3446 if (!no_ueki) {
3447 // See AdjustedFastEpistasis::calculateLogOddsAdjustedVariance().
3448 no_adj = (counts_3x3[0] && counts_3x3[1] && counts_3x3[2] && counts_3x3[3] && counts_3x3[4] && counts_3x3[5] && counts_3x3[6] && counts_3x3[7] && counts_3x3[8]);
3449 if (!no_adj) {
3450 c11 += 4.5;
3451 c12 += 4.5;
3452 c21 += 4.5;
3453 c22 += 4.5;
3454 }
3455 rc11 = 1.0 / c11;
3456 rc12 = 1.0 / c12;
3457 rc21 = 1.0 / c21;
3458 rc22 = 1.0 / c22;
3459 *or_ptr = log(c11 * c22 * rc12 * rc21);
3460
3461 c11 = rc11 - rc12; // bit2
3462 c12 = rc11 - rc21; // bit3
3463 dxx = rc11 - rc12 - rc21 + rc22; // bit5
3464 c21 = rc22 - rc12; // bit6
3465 c22 = rc22 - rc21; // bit8
3466
3467 rc11 *= rc11;
3468 rc12 *= rc12;
3469 rc21 *= rc21;
3470 rc22 *= rc22;
3471 c11 *= c11;
3472 c12 *= c12;
3473 c21 *= c21;
3474 c22 *= c22;
3475 dxx *= dxx;
3476
3477 if (no_adj) {
3478 *var_ptr = 4 * (4 * (rc11 * (double)((int32_t)counts_3x3[0]) +
3479 rc12 * (double)((int32_t)counts_3x3[2]) +
3480 rc21 * (double)((int32_t)counts_3x3[6]) +
3481 rc22 * (double)((int32_t)counts_3x3[8])) +
3482 c11 * (double)((int32_t)counts_3x3[1]) +
3483 c12 * (double)((int32_t)counts_3x3[3]) +
3484 c21 * (double)((int32_t)counts_3x3[5]) +
3485 c22 * (double)((int32_t)counts_3x3[7])) +
3486 dxx * (double)((int32_t)counts_3x3[4]);
3487 } else {
3488 *var_ptr = 4 * (4 * (rc11 * ((double)((int32_t)counts_3x3[0]) + 0.5) +
3489 rc12 * ((double)((int32_t)counts_3x3[2]) + 0.5) +
3490 rc21 * ((double)((int32_t)counts_3x3[6]) + 0.5) +
3491 rc22 * ((double)((int32_t)counts_3x3[8]) + 0.5)) +
3492 c11 * ((double)((int32_t)counts_3x3[1]) + 0.5) +
3493 c12 * ((double)((int32_t)counts_3x3[3]) + 0.5) +
3494 c21 * ((double)((int32_t)counts_3x3[5]) + 0.5) +
3495 c22 * ((double)((int32_t)counts_3x3[7]) + 0.5)) +
3496 dxx * ((double)((int32_t)counts_3x3[4]) + 0.5);
3497 }
3498 } else {
3499 rc11 = 1.0 / c11;
3500 rc12 = 1.0 / c12;
3501 rc21 = 1.0 / c21;
3502 rc22 = 1.0 / c22;
3503 *or_ptr = log(c11 * c22 * rc12 * rc21);
3504 *var_ptr = rc11 + rc12 + rc21 + rc22;
3505 }
3506 }
3507
boost_calc_p_bc(uint32_t case0_ct,uint32_t case1_ct,uint32_t case2_ct,uint32_t ctrl0_ct,uint32_t ctrl1_ct,uint32_t ctrl2_ct,double * p_bc)3508 void boost_calc_p_bc(uint32_t case0_ct, uint32_t case1_ct, uint32_t case2_ct, uint32_t ctrl0_ct, uint32_t ctrl1_ct, uint32_t ctrl2_ct, double* p_bc) {
3509 double* recip_cache = g_epi_recip_cache;
3510 double tot_recip = recip_cache[case0_ct + case1_ct + case2_ct];
3511 p_bc[0] = ((int32_t)case0_ct) * tot_recip;
3512 p_bc[1] = ((int32_t)case1_ct) * tot_recip;
3513 p_bc[2] = ((int32_t)case2_ct) * tot_recip;
3514 tot_recip = recip_cache[ctrl0_ct + ctrl1_ct + ctrl2_ct];
3515 p_bc[3] = ((int32_t)ctrl0_ct) * tot_recip;
3516 p_bc[4] = ((int32_t)ctrl1_ct) * tot_recip;
3517 p_bc[5] = ((int32_t)ctrl2_ct) * tot_recip;
3518 }
3519
boost_calc_p_ca(uint32_t case0_ct,uint32_t case1_ct,uint32_t case2_ct,uint32_t ctrl0_ct,uint32_t ctrl1_ct,uint32_t ctrl2_ct,double * p_ca,uint32_t * df_adj_ptr)3520 uint32_t boost_calc_p_ca(uint32_t case0_ct, uint32_t case1_ct, uint32_t case2_ct, uint32_t ctrl0_ct, uint32_t ctrl1_ct, uint32_t ctrl2_ct, double* p_ca, uint32_t* df_adj_ptr) {
3521 double* recip_cache = g_epi_recip_cache;
3522 uint32_t uii = case0_ct + ctrl0_ct;
3523 uint32_t df_adj = 0;
3524 double tot_recip;
3525 tot_recip = recip_cache[uii];
3526 if (!uii) {
3527 df_adj++;
3528 }
3529 p_ca[0] = ((int32_t)case0_ct) * tot_recip;
3530 p_ca[1] = ((int32_t)ctrl0_ct) * tot_recip;
3531 uii = case1_ct + ctrl1_ct;
3532 tot_recip = recip_cache[uii];
3533 if (!uii) {
3534 df_adj++;
3535 }
3536 p_ca[2] = ((int32_t)case1_ct) * tot_recip;
3537 p_ca[3] = ((int32_t)ctrl1_ct) * tot_recip;
3538 uii = case2_ct + ctrl2_ct;
3539 tot_recip = recip_cache[uii];
3540 if (!uii) {
3541 df_adj++;
3542 }
3543 p_ca[4] = ((int32_t)case2_ct) * tot_recip;
3544 p_ca[5] = ((int32_t)ctrl2_ct) * tot_recip;
3545 *df_adj_ptr = df_adj;
3546 return (df_adj > 1);
3547 }
3548
fepi_counts_to_boost_chisq(uint32_t * counts,double * p_bc,double * p_ca,double * alpha1sq_ptr,double * alpha2sq_ptr,uintptr_t df_adj,double * chisq_ptr,uint32_t * sig_ct1_ptr,uint32_t * sig_ct2_ptr)3549 double fepi_counts_to_boost_chisq(uint32_t* counts, double* p_bc, double* p_ca, double* alpha1sq_ptr, double* alpha2sq_ptr, uintptr_t df_adj, double* chisq_ptr, uint32_t* sig_ct1_ptr, uint32_t* sig_ct2_ptr) {
3550 // see BOOSTx64.c lines 625-903.
3551 double interaction_measure = 0.0;
3552 double tau = 0.0;
3553 double* recip_cache = g_epi_recip_cache;
3554 uint32_t* uiptr = counts;
3555 uint32_t sum = 0;
3556 uint32_t uoo = 0;
3557 double mu_xx[9]; // initially p_ab
3558 double mu_tmp[18];
3559 double mu0_tmp[18];
3560 double* dptr = mu_xx;
3561 double sum_recip;
3562 double dxx;
3563 double dyy;
3564 double mu_error;
3565
3566 // dirty hack: encode df adjustment in low bits of *chisq_ptr
3567 uintptr_t ularr[sizeof(double) / BYTECT];
3568
3569 uint32_t uii;
3570 uint32_t ujj;
3571 uint32_t ukk;
3572 uint32_t umm;
3573 uint32_t unn;
3574 for (uii = 0; uii < 3; uii++) {
3575 ujj = counts[uii] + counts[uii + 9];
3576 ukk = counts[uii + 3] + counts[uii + 12];
3577 umm = counts[uii + 6] + counts[uii + 15];
3578 unn = ujj + ukk + umm;
3579 if (!unn) {
3580 if (uoo++) {
3581 return NAN;
3582 }
3583 df_adj++;
3584 }
3585 sum += unn;
3586 dxx = recip_cache[unn];
3587 *dptr++ = ((int32_t)ujj) * dxx;
3588 *dptr++ = ((int32_t)ukk) * dxx;
3589 *dptr++ = ((int32_t)umm) * dxx;
3590 }
3591 for (ukk = 0; ukk < 2; ukk++) {
3592 for (uii = 0; uii < 3; uii++) {
3593 dyy = p_ca[2 * uii + ukk];
3594 dptr = &(p_bc[3 * ukk]);
3595 dxx = mu_xx[uii] * (*dptr++) * dyy;
3596 tau += dxx;
3597 umm = *uiptr++;
3598 if (umm) {
3599 if (dxx != 0.0) {
3600 // Cx * log(Cx / y)
3601 // = Cx * (log(C) + log(x / y))
3602 // = Cx * log(C) + Cx * log(x / y)
3603
3604 // caching entropy as well would merely reduce a multiplication to
3605 // an addition, which is almost certainly not worth the cost
3606 interaction_measure -= ((int32_t)umm) * log(dxx * recip_cache[umm]);
3607 } else {
3608 dxx = (double)((int32_t)umm);
3609 interaction_measure += dxx * log(dxx);
3610 }
3611 }
3612 dxx = mu_xx[uii + 3] * (*dptr++) * dyy;
3613 tau += dxx;
3614 umm = *uiptr++;
3615 if (umm) {
3616 if (dxx != 0.0) {
3617 interaction_measure -= ((int32_t)umm) * log(dxx * recip_cache[umm]);
3618 } else {
3619 dxx = (double)((int32_t)umm);
3620 interaction_measure += dxx * log(dxx);
3621 }
3622 }
3623 dxx = mu_xx[uii + 6] * (*dptr++) * dyy;
3624 tau += dxx;
3625 umm = *uiptr++;
3626 if (umm) {
3627 if (dxx != 0.0) {
3628 interaction_measure -= ((int32_t)umm) * log(dxx * recip_cache[umm]);
3629 } else {
3630 dxx = (double)((int32_t)umm);
3631 interaction_measure += dxx * log(dxx);
3632 }
3633 }
3634 }
3635 }
3636 // interaction_measure = interaction_measure / sum - log(sum);
3637 // interaction_measure = (interaction_measure + log(tau)) * sum * 2;
3638 sum_recip = recip_cache[sum];
3639 interaction_measure = 2 * (interaction_measure + ((int32_t)sum) * log(tau * sum_recip));
3640 // > instead of >= for maximum compatibility, I guess
3641 if (interaction_measure > alpha1sq_ptr[df_adj]) {
3642 for (uii = 0; uii < 18; uii++) {
3643 mu_tmp[uii] = 1.0;
3644 }
3645 do {
3646 memcpy(mu0_tmp, mu_tmp, 18 * sizeof(double));
3647 dptr = mu_xx; // mu_ij
3648 for (uii = 0; uii < 18; uii += 2) {
3649 *dptr++ = mu_tmp[uii] + mu_tmp[uii + 1];
3650 }
3651 dptr = mu_tmp;
3652 for (uii = 0; uii < 9; uii++) {
3653 dxx = mu_xx[uii];
3654 if (dxx != 0.0) {
3655 dxx = (double)((int32_t)(counts[uii] + counts[uii + 9])) / dxx;
3656 }
3657 *dptr *= dxx;
3658 dptr++;
3659 *dptr *= dxx;
3660 dptr++;
3661 }
3662 dptr = mu_xx; // mu_ik
3663 for (uii = 0; uii < 18; uii += 6) {
3664 for (ukk = uii; ukk < uii + 2; ukk++) {
3665 *dptr++ = mu_tmp[ukk] + mu_tmp[ukk + 2] + mu_tmp[ukk + 4];
3666 }
3667 }
3668 for (uii = 0; uii < 3; uii++) {
3669 for (ukk = 0; ukk < 2; ukk++) {
3670 dxx = mu_xx[uii * 2 + ukk];
3671 if (dxx != 0.0) {
3672 dxx = ((double)((int32_t)(counts[ukk * 9 + uii * 3] + counts[ukk * 9 + uii * 3 + 1] + counts[ukk * 9 + uii * 3 + 2]))) / dxx;
3673 }
3674 mu_tmp[uii * 6 + ukk] *= dxx;
3675 mu_tmp[uii * 6 + ukk + 2] *= dxx;
3676 mu_tmp[uii * 6 + ukk + 4] *= dxx;
3677 }
3678 }
3679 dptr = mu_xx; // mu_jk
3680 for (uii = 0; uii < 6; uii++) {
3681 *dptr = mu_tmp[uii] + mu_tmp[uii + 6] + mu_tmp[uii + 12];
3682 dptr++;
3683 }
3684 for (ujj = 0; ujj < 3; ujj++) {
3685 for (ukk = 0; ukk < 2; ukk++) {
3686 dxx = mu_xx[ujj * 2 + ukk];
3687 if (dxx != 0.0) {
3688 dxx = ((double)((int32_t)(counts[ukk * 9 + ujj] + counts[ukk * 9 + ujj + 3] + counts[ukk * 9 + ujj + 6]))) / dxx;
3689 }
3690 mu_tmp[ujj * 2 + ukk] *= dxx;
3691 mu_tmp[ujj * 2 + ukk + 6] *= dxx;
3692 mu_tmp[ujj * 2 + ukk + 12] *= dxx;
3693 }
3694 }
3695 mu_error = 0.0;
3696 for (uii = 0; uii < 18; uii++) {
3697 mu_error += fabs(mu_tmp[uii] - mu0_tmp[uii]);
3698 }
3699 } while (mu_error > 0.001);
3700 tau = 0.0;
3701 interaction_measure = 0.0;
3702 uiptr = counts;
3703 for (ukk = 0; ukk < 2; ukk++) {
3704 for (uii = 0; uii < 3; uii++) {
3705 for (ujj = 0; ujj < 3; ujj++) {
3706 dxx = ((double)((int32_t)(*uiptr++))) * sum_recip;
3707 dyy = mu_tmp[uii * 6 + ujj * 2 + ukk] * sum_recip;
3708 if (dxx != 0.0) {
3709 if (dyy != 0.0) {
3710 interaction_measure += dxx * log(dxx / dyy);
3711 } else {
3712 interaction_measure += dxx * log(dxx);
3713 }
3714 }
3715 tau += dyy;
3716 }
3717 }
3718 }
3719 interaction_measure = (interaction_measure + log(tau)) * ((int32_t)(sum * 2));
3720 memcpy(ularr, &interaction_measure, sizeof(double));
3721 // save df_adj in low two bits
3722 ularr[0] &= ~(3 * ONELU);
3723 ularr[0] |= df_adj;
3724 memcpy(chisq_ptr, ularr, sizeof(double));
3725 if (interaction_measure < alpha1sq_ptr[df_adj]) {
3726 interaction_measure = alpha1sq_ptr[df_adj];
3727 }
3728 }
3729 if (interaction_measure >= alpha2sq_ptr[df_adj]) {
3730 *sig_ct1_ptr += 1;
3731 *sig_ct2_ptr += 1;
3732 }
3733 return interaction_measure;
3734 }
3735
fast_epi_thread(void * arg)3736 THREAD_RET_TYPE fast_epi_thread(void* arg) {
3737 uintptr_t tidx = (uintptr_t)arg;
3738 uintptr_t block_idx1_start = g_epi_idx1_block_bounds[tidx];
3739 uintptr_t block_idx1_end = g_epi_idx1_block_bounds[tidx + 1];
3740 uintptr_t idx1_block_start16 = g_epi_idx1_block_bounds16[tidx];
3741 uintptr_t marker_idx1 = g_epi_marker_idx1 + block_idx1_start;
3742 uintptr_t marker_ct = g_epi_marker_ct;
3743 uint32_t case_ct = g_epi_case_ct;
3744 uint32_t ctrl_ct = g_epi_ctrl_ct;
3745 uint32_t case_ctv3 = BITCT_TO_ALIGNED_WORDCT(case_ct);
3746 uint32_t ctrl_ctv3 = BITCT_TO_ALIGNED_WORDCT(ctrl_ct);
3747 uint32_t case_ctsplit = 3 * case_ctv3;
3748 uint32_t ctrl_ctsplit = 3 * ctrl_ctv3;
3749 uint32_t tot_ctsplit = case_ctsplit + ctrl_ctsplit;
3750 uint32_t is_case_only = (g_epi_flag / EPI_FAST_CASE_ONLY) & 1;
3751 uint32_t group_ct = 2 - is_case_only;
3752 uint32_t tot_stride = group_ct * 3;
3753 uint32_t no_ueki = (g_epi_flag / EPI_FAST_NO_UEKI) & 1;
3754 uint32_t is_boost = (g_epi_flag / EPI_FAST_BOOST) & 1;
3755 uint32_t do_joint_effects = (g_epi_flag / EPI_FAST_JOINT_EFFECTS) & 1;
3756 uint32_t cellmin = g_epi_cellmin;
3757 uint32_t best_id_fixed = 0;
3758 uint32_t is_first_half = 0;
3759 uintptr_t* geno1 = g_epi_geno1;
3760 uintptr_t* zmiss1 = g_epi_zmiss1;
3761 uintptr_t* cur_geno1 = nullptr;
3762 uintptr_t* cur_geno1_ctrls = nullptr;
3763 double* cur_boost_precalc2 = nullptr;
3764 double* p_bc_ptr = nullptr;
3765 uint32_t* geno1_offsets = g_epi_geno1_offsets;
3766 uint32_t* best_id1 = &(g_epi_best_id1[idx1_block_start16]);
3767 double* alpha1sq_ptr = g_epi_alpha1sq;
3768 double* alpha2sq_ptr = g_epi_alpha2sq;
3769 double alpha1sq = alpha1sq_ptr[0];
3770 double alpha2sq = alpha2sq_ptr[0];
3771 double ctrl_var = 0;
3772 uint32_t tot1[6];
3773 uint32_t counts[18];
3774 double p_bc_tmp[6];
3775 double p_ca_fixed[6];
3776 double p_ca_tmp[6];
3777 uintptr_t* geno2;
3778 uintptr_t* zmiss2;
3779 uintptr_t* cur_geno2;
3780 double* all_chisq_write;
3781 double* chisq2_ptr;
3782 double* boost_precalc2;
3783 double* all_chisq;
3784 double* best_chisq1;
3785 double* best_chisq2;
3786 double* p_ca_ptr;
3787 uint32_t* n_sig_ct1;
3788 uint32_t* fail_ct1;
3789 uint32_t* best_id2;
3790 uint32_t* n_sig_ct2;
3791 uint32_t* fail_ct2;
3792 uint32_t* tot2;
3793 uint32_t* cur_tot2;
3794 uintptr_t idx2_block_size;
3795 uintptr_t cur_idx2_block_size;
3796 uintptr_t idx2_block_start;
3797 uintptr_t idx2_block_end;
3798 uintptr_t idx2_block_sizea16;
3799 uintptr_t block_idx1;
3800 uintptr_t block_delta1;
3801 uintptr_t block_idx2;
3802 uintptr_t cur_zmiss2;
3803 uintptr_t cur_zmiss2_tmp;
3804 uintptr_t ulii;
3805 double best_chisq_fixed;
3806 double case_var;
3807 double ctrl_or;
3808 double dxx;
3809 double zsq;
3810 uint32_t nm_case_fixed;
3811 uint32_t nm_ctrl_fixed;
3812 uint32_t nm_fixed;
3813 uint32_t n_sig_ct_fixed;
3814 uint32_t fail_ct_fixed;
3815 uint32_t df_adj_base;
3816 uint32_t df_adj;
3817 tot1[3] = 0; // suppress warning
3818 tot1[4] = 0;
3819 tot1[5] = 0;
3820 while (1) {
3821 idx2_block_size = g_epi_idx2_block_size;
3822 cur_idx2_block_size = idx2_block_size;
3823 idx2_block_start = g_epi_idx2_block_start;
3824 idx2_block_end = idx2_block_start + idx2_block_size;
3825 idx2_block_sizea16 = round_up_pow2(idx2_block_size, 16);
3826 geno2 = g_epi_geno2;
3827 zmiss2 = g_epi_zmiss2;
3828 tot2 = g_epi_tot2;
3829 boost_precalc2 = g_epi_boost_precalc2;
3830 all_chisq = &(g_epi_all_chisq[idx2_block_start]);
3831 best_chisq1 = &(g_epi_best_chisq1[idx1_block_start16]);
3832 best_chisq2 = &(g_epi_best_chisq2[tidx * idx2_block_sizea16]);
3833 n_sig_ct1 = &(g_epi_n_sig_ct1[idx1_block_start16]);
3834 fail_ct1 = &(g_epi_fail_ct1[idx1_block_start16]);
3835 best_id2 = &(g_epi_best_id2[tidx * idx2_block_sizea16]);
3836 n_sig_ct2 = &(g_epi_n_sig_ct2[tidx * idx2_block_sizea16]);
3837 fail_ct2 = &(g_epi_fail_ct2[tidx * idx2_block_sizea16]);
3838 for (block_idx1 = block_idx1_start; block_idx1 < block_idx1_end; block_idx1++, marker_idx1++) {
3839 ulii = geno1_offsets[2 * block_idx1];
3840 if (ulii > idx2_block_start) {
3841 block_idx2 = 0;
3842 cur_idx2_block_size = ulii - idx2_block_start;
3843 if (cur_idx2_block_size >= idx2_block_size) {
3844 cur_idx2_block_size = idx2_block_size;
3845 } else {
3846 is_first_half = 1;
3847 }
3848 } else {
3849 ulii = geno1_offsets[2 * block_idx1 + 1];
3850 if (ulii >= idx2_block_end) {
3851 // may not be done in set1 x all or set1 x set2 cases
3852 continue;
3853 } else {
3854 if (ulii <= idx2_block_start) {
3855 block_idx2 = 0;
3856 } else {
3857 block_idx2 = ulii - idx2_block_start;
3858 }
3859 }
3860 }
3861 cur_geno1 = &(geno1[block_idx1 * tot_ctsplit]);
3862 n_sig_ct_fixed = 0;
3863 fail_ct_fixed = 0;
3864 nm_case_fixed = is_set_ul(zmiss1, block_idx1 * 2);
3865 nm_ctrl_fixed = is_set_ul(zmiss1, block_idx1 * 2 + 1);
3866 nm_fixed = nm_case_fixed & nm_ctrl_fixed;
3867 tot1[0] = popcount_longs(cur_geno1, case_ctv3);
3868 tot1[1] = popcount_longs(&(cur_geno1[case_ctv3]), case_ctv3);
3869 tot1[2] = popcount_longs(&(cur_geno1[2 * case_ctv3]), case_ctv3);
3870 if (!is_case_only) {
3871 cur_geno1_ctrls = &(cur_geno1[case_ctsplit]);
3872 tot1[3] = popcount_longs(cur_geno1_ctrls, ctrl_ctv3);
3873 tot1[4] = popcount_longs(&(cur_geno1_ctrls[ctrl_ctv3]), ctrl_ctv3);
3874 tot1[5] = popcount_longs(&(cur_geno1_ctrls[2 * ctrl_ctv3]), ctrl_ctv3);
3875 if (is_boost) {
3876 if (nm_fixed) {
3877 cur_boost_precalc2 = &(boost_precalc2[block_idx2 * 6]);
3878 } else {
3879 p_bc_ptr = p_bc_tmp;
3880 }
3881 boost_calc_p_ca(tot1[0], tot1[1], tot1[2], tot1[3], tot1[4], tot1[5], p_ca_fixed, &df_adj_base);
3882 }
3883 }
3884 block_delta1 = block_idx1 - block_idx1_start;
3885 best_chisq_fixed = best_chisq1[block_delta1];
3886 all_chisq_write = &(all_chisq[block_idx1 * marker_ct]);
3887 fast_epi_thread_second_half:
3888 cur_geno2 = &(geno2[block_idx2 * tot_ctsplit]);
3889 chisq2_ptr = &(best_chisq2[block_idx2]);
3890 for (; block_idx2 < cur_idx2_block_size; block_idx2++, chisq2_ptr++, cur_geno2 = &(cur_geno2[tot_ctsplit])) {
3891 cur_tot2 = &(tot2[block_idx2 * tot_stride]);
3892 // this operation isn't extracting a 2-bit genotype, so don't use the
3893 // macro
3894 cur_zmiss2 = (zmiss2[block_idx2 / BITCT2] >> (2 * (block_idx2 % BITCT2))) & 3;
3895 cur_zmiss2_tmp = cur_zmiss2 & 1;
3896 if (nm_case_fixed) {
3897 two_locus_count_table_zmiss1(cur_geno1, cur_geno2, counts, case_ctv3, cur_zmiss2_tmp);
3898 if (cur_zmiss2_tmp) {
3899 counts[2] = tot1[0] - counts[0] - counts[1];
3900 counts[5] = tot1[1] - counts[3] - counts[4];
3901 }
3902 counts[6] = cur_tot2[0] - counts[0] - counts[3];
3903 counts[7] = cur_tot2[1] - counts[1] - counts[4];
3904 counts[8] = cur_tot2[2] - counts[2] - counts[5];
3905 } else {
3906 two_locus_count_table(cur_geno1, cur_geno2, counts, case_ctv3, cur_zmiss2_tmp);
3907 if (cur_zmiss2_tmp) {
3908 counts[2] = tot1[0] - counts[0] - counts[1];
3909 counts[5] = tot1[1] - counts[3] - counts[4];
3910 counts[8] = tot1[2] - counts[6] - counts[7];
3911 }
3912 }
3913 if (!is_case_only) {
3914 cur_zmiss2_tmp = cur_zmiss2 >> 1;
3915 if (nm_ctrl_fixed) {
3916 two_locus_count_table_zmiss1(cur_geno1_ctrls, &(cur_geno2[case_ctsplit]), &(counts[9]), ctrl_ctv3, cur_zmiss2_tmp);
3917 if (cur_zmiss2_tmp) {
3918 counts[11] = tot1[3] - counts[9] - counts[10];
3919 counts[14] = tot1[4] - counts[12] - counts[13];
3920 }
3921 counts[15] = cur_tot2[3] - counts[9] - counts[12];
3922 counts[16] = cur_tot2[4] - counts[10] - counts[13];
3923 counts[17] = cur_tot2[5] - counts[11] - counts[14];
3924 } else {
3925 two_locus_count_table(cur_geno1_ctrls, &(cur_geno2[case_ctsplit]), &(counts[9]), ctrl_ctv3, cur_zmiss2_tmp);
3926 if (cur_zmiss2_tmp) {
3927 counts[11] = tot1[3] - counts[9] - counts[10];
3928 counts[14] = tot1[4] - counts[12] - counts[13];
3929 counts[17] = tot1[5] - counts[15] - counts[16];
3930 }
3931 }
3932 }
3933 if (!is_boost) {
3934 if (!do_joint_effects) {
3935 fepi_counts_to_stats(counts, no_ueki, &dxx, &case_var);
3936 if (!is_case_only) {
3937 fepi_counts_to_stats(&(counts[9]), no_ueki, &ctrl_or, &ctrl_var);
3938 dxx -= ctrl_or;
3939 }
3940 } else {
3941 if (cellmin) {
3942 if ((counts[0] < cellmin) || (counts[1] < cellmin) || (counts[2] < cellmin) || (counts[3] < cellmin) || (counts[4] < cellmin) || (counts[5] < cellmin) || (counts[6] < cellmin) || (counts[7] < cellmin) || (counts[8] < cellmin)) {
3943 goto fast_epi_thread_fail;
3944 }
3945 if (!is_case_only) {
3946 if ((counts[9] < cellmin) || (counts[10] < cellmin) || (counts[11] < cellmin) || (counts[12] < cellmin) || (counts[13] < cellmin) || (counts[14] < cellmin) || (counts[15] < cellmin) || (counts[16] < cellmin) || (counts[17] < cellmin)) {
3947 goto fast_epi_thread_fail;
3948 }
3949 }
3950 }
3951 fepi_counts_to_joint_effects_stats(group_ct, counts, &dxx, &case_var, &ctrl_var);
3952 }
3953 zsq = dxx * dxx / (case_var + ctrl_var);
3954 if (!realnum(zsq)) {
3955 goto fast_epi_thread_fail;
3956 }
3957 if (zsq >= alpha1sq) {
3958 all_chisq_write[block_idx2] = zsq;
3959 }
3960 if (zsq >= alpha2sq) {
3961 n_sig_ct_fixed++;
3962 n_sig_ct2[block_idx2] += 1;
3963 }
3964 fast_epi_thread_boost_save:
3965 if (zsq > best_chisq_fixed) {
3966 best_chisq_fixed = zsq;
3967 best_id_fixed = block_idx2 + idx2_block_start;
3968 }
3969 dxx = *chisq2_ptr;
3970 if (zsq > dxx) {
3971 *chisq2_ptr = zsq;
3972 best_id2[block_idx2] = marker_idx1;
3973 }
3974 } else {
3975 if (nm_fixed) {
3976 p_bc_ptr = cur_boost_precalc2;
3977 cur_boost_precalc2 = &(cur_boost_precalc2[6]);
3978 } else {
3979 boost_calc_p_bc(counts[0] + counts[3] + counts[6], counts[1] + counts[4] + counts[7], counts[2] + counts[5] + counts[8], counts[9] + counts[12] + counts[15], counts[10] + counts[13] + counts[16], counts[11] + counts[14] + counts[17], p_bc_ptr);
3980 }
3981 if (cur_zmiss2 == 3) {
3982 p_ca_ptr = p_ca_fixed;
3983 df_adj = df_adj_base;
3984 } else {
3985 if (boost_calc_p_ca(counts[0] + counts[1] + counts[2], counts[3] + counts[4] + counts[5], counts[6] + counts[7] + counts[8], counts[9] + counts[10] + counts[11], counts[12] + counts[13] + counts[14], counts[15] + counts[16] + counts[17], p_ca_tmp, &df_adj)) {
3986 goto fast_epi_thread_fail;
3987 }
3988 p_ca_ptr = p_ca_tmp;
3989 }
3990
3991 // if approximate zsq >= epi1 threshold but more accurate value is
3992 // not, we still want to save the more accurate value
3993 // also, we want epi2 counting to be df-sensitive
3994 // (punt on df/best_chisq for now)
3995 zsq = fepi_counts_to_boost_chisq(counts, p_bc_ptr, p_ca_ptr, alpha1sq_ptr, alpha2sq_ptr, df_adj, &(all_chisq_write[block_idx2]), &n_sig_ct_fixed, &(n_sig_ct2[block_idx2]));
3996 if (realnum(zsq)) {
3997 goto fast_epi_thread_boost_save;
3998 }
3999 fast_epi_thread_fail:
4000 fail_ct_fixed++;
4001 fail_ct2[block_idx2] += 1;
4002 if (alpha1sq == 0.0) {
4003 // special case: log NA when '--epi1 1' specified
4004 all_chisq_write[block_idx2] = NAN;
4005 }
4006 }
4007 }
4008 if (is_first_half) {
4009 is_first_half = 0;
4010 ulii = geno1_offsets[2 * block_idx1 + 1];
4011 cur_idx2_block_size = idx2_block_size;
4012 if (ulii < idx2_block_end) {
4013 // guaranteed to be larger than idx2_block_start, otherwise there
4014 // would have been no first half
4015 block_idx2 = ulii - idx2_block_start;
4016 if (is_boost && nm_fixed) {
4017 cur_boost_precalc2 = &(boost_precalc2[block_idx2 * 6]);
4018 }
4019 goto fast_epi_thread_second_half;
4020 }
4021 }
4022 if (best_chisq_fixed > best_chisq1[block_delta1]) {
4023 best_chisq1[block_delta1] = best_chisq_fixed;
4024 best_id1[block_delta1] = best_id_fixed;
4025 }
4026 n_sig_ct1[block_delta1] = n_sig_ct_fixed;
4027 if (fail_ct_fixed) {
4028 fail_ct1[block_delta1] = fail_ct_fixed;
4029 }
4030 }
4031 if ((!tidx) || g_is_last_thread_block) {
4032 THREAD_RETURN;
4033 }
4034 THREAD_BLOCK_FINISH(tidx);
4035 }
4036 }
4037
4038 // epistasis linear/logistic regression multithread globals
4039
4040 static double* g_epi_pheno_d2;
4041 static double* g_epi_phenogeno1;
4042 static double* g_epi_phenogeno2;
4043 static uint32_t* g_epi_genosums1;
4044 static uint32_t* g_epi_genosums2;
4045 static double g_epi_pheno_sum;
4046 static double g_epi_pheno_ssq;
4047 static double g_epi_vif_thresh;
4048
4049 static uint32_t g_epi_pheno_nm_ct;
4050
4051 typedef struct epi_logistic_multithread_struct {
4052 float* cur_covars_cov_major;
4053 float* coef;
4054 float* pp;
4055 float* sample_1d_buf;
4056 float* pheno_buf;
4057 float* param_1d_buf;
4058 float* param_1d_buf2;
4059 float* param_2d_buf;
4060 float* param_2d_buf2;
4061 } Epi_logistic_multithread;
4062
4063 static Epi_logistic_multithread* g_epi_logistic_mt;
4064 static uintptr_t* g_epi_pheno_c;
4065 static float* g_epi_all_chisq_f;
4066 static float* g_epi_best_chisq_f1;
4067 static float* g_epi_best_chisq_f2;
4068
matrix_invert_4x4symm(double * dmatrix)4069 uint32_t matrix_invert_4x4symm(double* dmatrix) {
4070 double buf[16];
4071 double determinant;
4072 // initially, dww = A_{22}A_{34} - A_{23}A_{24}
4073 // dxx = A_{23}A_{34} - A_{24}A_{33}
4074 // dyy = A_{23}A_{44} - A_{24}A_{34}
4075 // dzz = A_{33}A_{44} - A_{34}A_{34}
4076 double dww = dmatrix[5] * dmatrix[11] - dmatrix[6] * dmatrix[7];
4077 double dxx = dmatrix[6] * dmatrix[11] - dmatrix[7] * dmatrix[10];
4078 double dyy = dmatrix[6] * dmatrix[15] - dmatrix[7] * dmatrix[11];
4079 double dzz = dmatrix[10] * dmatrix[15] - dmatrix[11] * dmatrix[11];
4080 double dvv;
4081 double duu;
4082 buf[0] = dmatrix[5] * dzz
4083 - dmatrix[6] * dyy
4084 + dmatrix[7] * dxx;
4085 buf[1] = dmatrix[2] * dyy
4086 - dmatrix[1] * dzz
4087 - dmatrix[3] * dxx;
4088 buf[2] = dmatrix[1] * dyy
4089 + dmatrix[2] * (dmatrix[7] * dmatrix[7] - dmatrix[5] * dmatrix[15])
4090 + dmatrix[3] * dww;
4091 duu = dmatrix[5] * dmatrix[10] - dmatrix[6] * dmatrix[6];
4092 buf[3] = dmatrix[2] * dww
4093 - dmatrix[1] * dxx
4094 - dmatrix[3] * duu;
4095 determinant = dmatrix[0] * buf[0] + dmatrix[1] * buf[1] + dmatrix[2] * buf[2] + dmatrix[3] * buf[3];
4096 if (fabs(determinant) < EPSILON) {
4097 return 1;
4098 }
4099 buf[5] = dmatrix[0] * dzz
4100 + dmatrix[2] * (dmatrix[3] * dmatrix[11] - dmatrix[2] * dmatrix[15])
4101 + dmatrix[3] * (dmatrix[2] * dmatrix[11] - dmatrix[3] * dmatrix[10]);
4102 dzz = dmatrix[1] * dmatrix[15] - dmatrix[3] * dmatrix[7];
4103 buf[6] = dmatrix[2] * dzz
4104 - dmatrix[0] * dyy
4105 + dmatrix[3] * (dmatrix[3] * dmatrix[6] - dmatrix[1] * dmatrix[11]);
4106 dyy = dmatrix[1] * dmatrix[11] - dmatrix[2] * dmatrix[7];
4107 dvv = dmatrix[1] * dmatrix[10] - dmatrix[2] * dmatrix[6];
4108 buf[7] = dmatrix[0] * dxx
4109 - dmatrix[2] * dyy
4110 + dmatrix[3] * dvv;
4111 buf[10] = dmatrix[0] * (dmatrix[5] * dmatrix[15] - dmatrix[7] * dmatrix[7])
4112 - dmatrix[1] * dzz
4113 + dmatrix[3] * (dmatrix[1] * dmatrix[7] - dmatrix[3] * dmatrix[5]);
4114 dxx = dmatrix[1] * dmatrix[6] - dmatrix[2] * dmatrix[5];
4115 buf[11] = dmatrix[1] * dyy
4116 - dmatrix[0] * dww
4117 - dmatrix[3] * dxx;
4118 buf[15] = dmatrix[0] * duu
4119 - dmatrix[1] * dvv
4120 + dmatrix[2] * dxx;
4121 determinant = 1.0 / determinant; // now reciprocal
4122 dmatrix[0] = buf[0] * determinant;
4123 dmatrix[1] = buf[1] * determinant;
4124 dmatrix[2] = buf[2] * determinant;
4125 dmatrix[3] = buf[3] * determinant;
4126 dmatrix[4] = dmatrix[1];
4127 dmatrix[5] = buf[5] * determinant;
4128 dmatrix[6] = buf[6] * determinant;
4129 dmatrix[7] = buf[7] * determinant;
4130 dmatrix[8] = dmatrix[2];
4131 dmatrix[9] = dmatrix[6];
4132 dmatrix[10] = buf[10] * determinant;
4133 dmatrix[11] = buf[11] * determinant;
4134 dmatrix[12] = dmatrix[3];
4135 dmatrix[13] = dmatrix[7];
4136 dmatrix[14] = dmatrix[11];
4137 dmatrix[15] = buf[15] * determinant;
4138 return 0;
4139 }
4140
epi_linear_thread(void * arg)4141 THREAD_RET_TYPE epi_linear_thread(void* arg) {
4142 uintptr_t tidx = (uintptr_t)arg;
4143 uintptr_t block_idx1_start = g_epi_idx1_block_bounds[tidx];
4144 uintptr_t block_idx1_end = g_epi_idx1_block_bounds[tidx + 1];
4145 uintptr_t idx1_block_start16 = g_epi_idx1_block_bounds16[tidx];
4146 uintptr_t marker_idx1 = g_epi_marker_idx1 + block_idx1_start;
4147 uintptr_t marker_ct = g_epi_marker_ct;
4148 double alpha1sq = g_epi_alpha1sq[0];
4149 double alpha2sq = g_epi_alpha2sq[0];
4150 double pheno_sum = g_epi_pheno_sum;
4151 double pheno_ssq = g_epi_pheno_ssq;
4152 double vif_thresh = g_epi_vif_thresh;
4153 uint32_t pheno_nm_ct = g_epi_pheno_nm_ct;
4154 uint32_t best_id_fixed = 0;
4155 uint32_t is_first_half = 0;
4156 uintptr_t pheno_nm_ctl2 = QUATERCT_TO_WORDCT(pheno_nm_ct);
4157 uintptr_t* geno1 = g_epi_geno1;
4158 double* pheno_d2 = g_epi_pheno_d2;
4159 uint32_t* geno1_offsets = g_epi_geno1_offsets;
4160 uint32_t* best_id1 = &(g_epi_best_id1[idx1_block_start16]);
4161 const double dconst[] = {1.0, 2.0, 2.0, 4.0};
4162 double dmatrix_buf[16];
4163 double dmatrix_buf2[4];
4164
4165 // sum(aa), sum(ab), sum(bb), sum(aab), sum(abb), and sum(aabb) can all be
4166 // derived from these four quantities.
4167 uint32_t cur_minor_cts[4]; // 11, 12, 21, 22
4168
4169 uintptr_t* cur_geno1;
4170 uintptr_t* geno2;
4171 uintptr_t* cur_geno2;
4172 double* phenogeno1;
4173 double* phenogeno2;
4174 double* all_chisq_write;
4175 double* chisq2_ptr;
4176 double* all_chisq;
4177 double* best_chisq1;
4178 double* best_chisq2;
4179 double* dptr;
4180 double* dptr2;
4181 uint32_t* n_sig_ct1;
4182 uint32_t* fail_ct1;
4183 uint32_t* best_id2;
4184 uint32_t* n_sig_ct2;
4185 uint32_t* fail_ct2;
4186 uint32_t* genosums1;
4187 uint32_t* genosums2;
4188 uintptr_t idx2_block_size;
4189 uintptr_t cur_idx2_block_size;
4190 uintptr_t idx2_block_start;
4191 uintptr_t idx2_block_end;
4192 uintptr_t idx2_block_sizea16;
4193 uintptr_t block_idx1;
4194 uintptr_t block_delta1;
4195 uintptr_t block_idx2;
4196 uintptr_t cur_word1;
4197 uintptr_t cur_word2;
4198 uintptr_t active_mask;
4199 uintptr_t param_idx;
4200 uintptr_t param_idx2;
4201 uintptr_t cur_sum_aab;
4202 uintptr_t cur_sum_abb;
4203 uintptr_t cur_sum_aabb;
4204 uintptr_t ulii;
4205 uintptr_t uljj;
4206 double best_chisq_fixed;
4207 double sum_a_pheno_base;
4208 double cur_pheno_sum;
4209 double cur_pheno_ssq;
4210 double cur_sum_a_pheno;
4211 double cur_sum_b_pheno;
4212 double cur_sum_ab_pheno;
4213 double sample_ctd;
4214 double sample_ct_recip;
4215 double sample_ct_m1_recip;
4216 double cur_sum_ad;
4217 double cur_sum_bd;
4218 double cur_sum_abd;
4219 double determinant;
4220 double min_sigma;
4221 double sigma;
4222 double dxx;
4223 double dyy;
4224 double dzz;
4225 double dww;
4226 double dvv;
4227 double duu;
4228 double zsq;
4229 uint32_t n_sig_ct_fixed;
4230 uint32_t fail_ct_fixed;
4231
4232 uint32_t sum_a_base;
4233 uint32_t sum_aa_base;
4234 uint32_t cur_sum_a;
4235 uint32_t cur_sum_aa;
4236 uint32_t cur_sum_b;
4237 uint32_t cur_sum_bb;
4238 uint32_t cur_sum_ab;
4239 uint32_t widx;
4240 uint32_t sample_idx;
4241 uint32_t cur_sample_ct;
4242 uint32_t woffset;
4243 while (1) {
4244 idx2_block_size = g_epi_idx2_block_size;
4245 cur_idx2_block_size = idx2_block_size;
4246 idx2_block_start = g_epi_idx2_block_start;
4247 idx2_block_end = idx2_block_start + idx2_block_size;
4248 idx2_block_sizea16 = round_up_pow2(idx2_block_size, 16);
4249 geno2 = g_epi_geno2;
4250 phenogeno1 = g_epi_phenogeno1;
4251 phenogeno2 = g_epi_phenogeno2;
4252 genosums1 = g_epi_genosums1;
4253 genosums2 = g_epi_genosums2;
4254 all_chisq = &(g_epi_all_chisq[2 * idx2_block_start]);
4255 best_chisq1 = &(g_epi_best_chisq1[idx1_block_start16]);
4256 best_chisq2 = &(g_epi_best_chisq2[tidx * idx2_block_sizea16]);
4257 n_sig_ct1 = &(g_epi_n_sig_ct1[idx1_block_start16]);
4258 fail_ct1 = &(g_epi_fail_ct1[idx1_block_start16]);
4259 best_id2 = &(g_epi_best_id2[tidx * idx2_block_sizea16]);
4260 n_sig_ct2 = &(g_epi_n_sig_ct2[tidx * idx2_block_sizea16]);
4261 fail_ct2 = &(g_epi_fail_ct2[tidx * idx2_block_sizea16]);
4262 for (block_idx1 = block_idx1_start; block_idx1 < block_idx1_end; block_idx1++, marker_idx1++) {
4263 ulii = geno1_offsets[2 * block_idx1];
4264 if (ulii > idx2_block_start) {
4265 block_idx2 = 0;
4266 cur_idx2_block_size = ulii - idx2_block_start;
4267 if (cur_idx2_block_size >= idx2_block_size) {
4268 cur_idx2_block_size = idx2_block_size;
4269 } else {
4270 is_first_half = 1;
4271 }
4272 } else {
4273 ulii = geno1_offsets[2 * block_idx1 + 1];
4274 if (ulii >= idx2_block_end) {
4275 // may not be done in set1 x all or set1 x set2 cases
4276 continue;
4277 } else {
4278 if (ulii <= idx2_block_start) {
4279 block_idx2 = 0;
4280 } else {
4281 block_idx2 = ulii - idx2_block_start;
4282 }
4283 }
4284 }
4285 cur_geno1 = &(geno1[block_idx1 * pheno_nm_ctl2]);
4286 n_sig_ct_fixed = 0;
4287 fail_ct_fixed = 0;
4288 block_delta1 = block_idx1 - block_idx1_start;
4289 best_chisq_fixed = best_chisq1[block_delta1];
4290 sum_a_pheno_base = phenogeno1[block_idx1];
4291 sum_a_base = genosums1[2 * block_idx1];
4292 sum_aa_base = genosums1[2 * block_idx1 + 1];
4293
4294 // [0] = chisq, [1] = beta
4295 all_chisq_write = &(all_chisq[block_idx1 * marker_ct * 2]);
4296
4297 epi_linear_thread_second_half:
4298 cur_geno2 = &(geno2[block_idx2 * pheno_nm_ctl2]);
4299 chisq2_ptr = &(best_chisq2[block_idx2]);
4300 for (; block_idx2 < cur_idx2_block_size; block_idx2++, chisq2_ptr++, cur_geno2 = &(cur_geno2[pheno_nm_ctl2])) {
4301 // Our covariates are 1, genotype A (in {0, 1, 2}), genotype B, and
4302 // [genotype A] * [genotype B].
4303 // The ordinary least squares solution to this system is
4304 // (X^T X)^{-1} X^T Y
4305 // where X^T X is the following 4x4 matrix (where n = # of samples):
4306 // [ n sum(A) sum(B) sum(AB) ]
4307 // [ sum(A) sum(AA) sum(AB) sum(AAB) ]
4308 // [ sum(B) sum(AB) sum(BB) sum(ABB) ]
4309 // [ sum(AB) sum(AAB) sum(ABB) sum(AABB) ]
4310 // (sum(.) denotes the sum of that (product of) genotypes, across all
4311 // samples.)
4312 // Meanwhile, X^T Y is the following 4x1 matrix:
4313 // [ sum(pheno) ]
4314 // [ sum(A * pheno) ]
4315 // [ sum(B * pheno) ]
4316 // [ sum(AB * pheno) ]
4317 // Crucially, the VIF and valid parameters checks can also operate
4318 // purely on the terms above and sum(pheno * pheno).
4319
4320 // these nine values can be mostly precomputed; just need to subtract
4321 // from them sometimes when missing values are present.
4322 cur_pheno_sum = pheno_sum;
4323 cur_pheno_ssq = pheno_ssq;
4324 cur_sum_a_pheno = sum_a_pheno_base;
4325 cur_sum_b_pheno = phenogeno2[block_idx2];
4326 cur_sum_a = sum_a_base;
4327 cur_sum_aa = sum_aa_base;
4328 cur_sum_b = genosums2[block_idx2 * 2];
4329 cur_sum_bb = genosums2[block_idx2 * 2 + 1];
4330 cur_sample_ct = pheno_nm_ct;
4331
4332 cur_sum_ab_pheno = 0.0;
4333 fill_uint_zero(4, cur_minor_cts);
4334 for (widx = 0; widx < pheno_nm_ctl2; widx++) {
4335 sample_idx = widx * BITCT2;
4336 cur_word1 = cur_geno1[widx];
4337 cur_word2 = cur_geno2[widx];
4338 // we can entirely skip 5 common cases: 00/00, 00/01, 00/10, 01/00,
4339 // 10/00.
4340 active_mask = cur_word1 | cur_word2;
4341 active_mask = (active_mask & (active_mask >> 1) & FIVEMASK) | (cur_word1 & cur_word2);
4342 dptr = &(pheno_d2[sample_idx]);
4343 while (active_mask) {
4344 woffset = CTZLU(active_mask) / 2;
4345 dxx = dptr[woffset];
4346 woffset *= 2;
4347 ulii = (cur_word1 >> woffset) & (3 * ONELU);
4348 uljj = (cur_word2 >> woffset) & (3 * ONELU);
4349 active_mask &= ~((3 * ONELU) << woffset);
4350 if (ulii && uljj) {
4351 if (ulii == 3) {
4352 if (uljj == 1) {
4353 cur_sum_b_pheno -= dxx;
4354 cur_sum_b--;
4355 cur_sum_bb--;
4356 } else if (uljj == 2) {
4357 cur_sum_b_pheno -= 2 * dxx;
4358 cur_sum_b -= 2;
4359 cur_sum_bb -= 4;
4360 }
4361 } else if (uljj == 3) {
4362 // ulii must be 1 or 2
4363 cur_sum_a_pheno -= dxx;
4364 if (ulii == 2) {
4365 cur_sum_a_pheno -= dxx;
4366 }
4367 cur_sum_a -= ulii;
4368 cur_sum_aa -= ulii * ulii;
4369 } else {
4370 ulii = ulii * 2 + uljj - 3;
4371 cur_sum_ab_pheno += dconst[ulii] * dxx;
4372 cur_minor_cts[ulii] += 1;
4373 continue;
4374 }
4375 }
4376 cur_pheno_sum -= dxx;
4377 cur_pheno_ssq -= dxx * dxx;
4378 cur_sample_ct--;
4379 }
4380 }
4381 if (cur_sample_ct <= 4) {
4382 goto epi_linear_thread_regression_fail;
4383 }
4384 // VIF check. Mirrors glm_check_vif(), but param_ct is hardcoded to 4
4385 // and we avoid additional iteration over the sample_idxs.
4386 sample_ctd = (double)((int32_t)cur_sample_ct);
4387 sample_ct_recip = 1.0 / sample_ctd;
4388 sample_ct_m1_recip = 1.0 / ((double)((int32_t)(cur_sample_ct - 1)));
4389 cur_sum_ab = cur_minor_cts[0] + 2 * (cur_minor_cts[1] + cur_minor_cts[2]) + 4 * cur_minor_cts[3];
4390 cur_sum_aab = cur_minor_cts[0] + 2 * cur_minor_cts[1] + 4 * cur_minor_cts[2] + (8 * ONELU) * cur_minor_cts[3];
4391 cur_sum_abb = cur_minor_cts[0] + 4 * cur_minor_cts[1] + 2 * cur_minor_cts[2] + (8 * ONELU) * cur_minor_cts[3];
4392 cur_sum_aabb = cur_minor_cts[0] + 4 * (cur_minor_cts[1] + cur_minor_cts[2]) + (16 * ONELU) * cur_minor_cts[3];
4393
4394 cur_sum_ad = (double)((int32_t)cur_sum_a);
4395 cur_sum_bd = (double)((int32_t)cur_sum_b);
4396 cur_sum_abd = (double)((int32_t)cur_sum_ab);
4397
4398 // some genotype means
4399 dxx = cur_sum_bd * sample_ct_recip;
4400 dyy = cur_sum_abd * sample_ct_recip;
4401
4402 dww = ((double)((int32_t)cur_sum_aa)) - cur_sum_ad * cur_sum_ad * sample_ct_recip;
4403 dvv = ((double)((int32_t)cur_sum_bb)) - cur_sum_bd * dxx;
4404 duu = ((double)((intptr_t)cur_sum_aabb)) - cur_sum_abd * dyy;
4405 if ((dww <= 0) || (dvv <= 0) || (duu <= 0)) {
4406 goto epi_linear_thread_regression_fail;
4407 }
4408 dww = 1.0 / sqrt(dww * sample_ct_m1_recip);
4409 dvv = 1.0 / sqrt(dvv * sample_ct_m1_recip);
4410 duu = 1.0 / sqrt(duu * sample_ct_m1_recip);
4411
4412 dxx = (cur_sum_abd - cur_sum_ad * dxx) * sample_ct_m1_recip;
4413 dzz = (((double)((intptr_t)cur_sum_abb)) - cur_sum_bd * dyy) * sample_ct_m1_recip;
4414 dyy = (((double)((intptr_t)cur_sum_aab)) - cur_sum_ad * dyy) * sample_ct_m1_recip;
4415 // now dxx = A_{12}, dyy = A_{13}, dzz = A_{23}
4416
4417 dxx *= dww * dvv;
4418 dyy *= dww * duu;
4419 dzz *= dvv * duu;
4420 if ((dxx > 0.999) || (dyy > 0.999) || (dzz > 0.999)) {
4421 goto epi_linear_thread_regression_fail;
4422 }
4423 // Use analytic formula for 3x3 symmetric matrix inverse.
4424 // det A = A_{11}A_{22}A_{33} + 2 * A_{12}A_{13}A_{23}
4425 // - A_{11}(A_{23}^2) - A_{22}(A_{13}^2) - A_{33}(A_{12}^2)
4426 // upper left of inverse = (A_{22}A_{33} - (A_{23}^2))(det A)^{-1}
4427 // middle = (A_{11}A_{33} - (A_{13}^2))(det A)^{-1}
4428 // lower right = (A_{11}A_{22} - (A_{12}^2))(det A)^{-1}
4429 dww = dxx * dxx;
4430 dvv = dyy * dyy;
4431 duu = dzz * dzz;
4432 determinant = 1 + 2 * dxx * dyy * dzz - dww - dvv - duu;
4433 if (fabs(determinant) < EPSILON) {
4434 goto epi_linear_thread_regression_fail;
4435 }
4436 // (1 - x^2)/det > vif_thresh
4437 // if det > 0:
4438 // 1 - x^2 > vif_thresh * det
4439 // 1 - vif_thresh * det > x^2
4440 // otherwise:
4441 // 1 - x^2 < vif_thresh * det
4442 // 1 - vif_thresh * det < x^2
4443 dxx = 1 - vif_thresh * determinant; // now a threshold
4444 if (((determinant > 0) && ((dxx > dww) || (dxx > dvv) || (dxx > duu))) || ((determinant < 0) && ((dxx < dww) || (dxx < dvv) || (dxx < duu)))) {
4445 goto epi_linear_thread_regression_fail;
4446 }
4447
4448 // VIF check done, now perform linear regression
4449 dmatrix_buf[0] = sample_ctd;
4450 dmatrix_buf[1] = cur_sum_ad;
4451 dmatrix_buf[2] = cur_sum_bd;
4452 dmatrix_buf[3] = cur_sum_abd;
4453 dmatrix_buf[5] = (double)((int32_t)cur_sum_aa);
4454 dmatrix_buf[6] = cur_sum_abd;
4455 dmatrix_buf[7] = (double)((intptr_t)cur_sum_aab);
4456 dmatrix_buf[10] = (double)((int32_t)cur_sum_bb);
4457 dmatrix_buf[11] = (double)((intptr_t)cur_sum_abb);
4458 dmatrix_buf[15] = (double)((intptr_t)cur_sum_aabb);
4459 if (matrix_invert_4x4symm(dmatrix_buf)) {
4460 goto epi_linear_thread_regression_fail;
4461 }
4462
4463 for (param_idx = 0; param_idx < 4; param_idx++) {
4464 dmatrix_buf2[param_idx] = sqrt(dmatrix_buf[param_idx * 5]);
4465 }
4466 for (param_idx = 1; param_idx < 4; param_idx++) {
4467 dxx = 0.99999 * dmatrix_buf2[param_idx];
4468 dptr = &(dmatrix_buf[param_idx * 4]);
4469 dptr2 = dmatrix_buf2;
4470 for (param_idx2 = 0; param_idx2 < param_idx; param_idx2++) {
4471 if ((*dptr++) > dxx * (*dptr2++)) {
4472 goto epi_linear_thread_regression_fail;
4473 }
4474 }
4475 }
4476 min_sigma = MAXV(dmatrix_buf[5], dmatrix_buf[10]);
4477 if (dmatrix_buf[15] > min_sigma) {
4478 min_sigma = dmatrix_buf[15];
4479 }
4480 min_sigma = 1e-20 / min_sigma;
4481
4482 for (param_idx = 0; param_idx < 4; param_idx++) {
4483 dptr = &(dmatrix_buf[param_idx * 4]);
4484 dmatrix_buf2[param_idx] = cur_pheno_sum * dptr[0] + cur_sum_a_pheno * dptr[1] + cur_sum_b_pheno * dptr[2] + cur_sum_ab_pheno * dptr[3];
4485 }
4486 // dmatrix_buf2[0..3] now has linear regression result
4487
4488 // partial = coef[0] + A * coef[1] + B * coef[2] + AB * coef[3] - pheno
4489 // sigma = \sum_{all samples} (partial * partial)
4490 // = \sum (coef[0]^2
4491 // + 2 * A * coef[0] * coef[1]
4492 // + 2 * B * coef[0] * coef[2]
4493 // + 2 * AB * coef[0] * coef[3]
4494 // - 2 * coef[0] * pheno
4495 // + AA * coef[1]^2
4496 // + 2 * AB * coef[1] * coef[2]
4497 // + 2 * AAB * coef[1] * coef[3]
4498 // - 2 * A * coef[1] * pheno
4499 // + BB * coef[2]^2
4500 // + 2 * ABB * coef[2] * coef[3]
4501 // - 2 * B * coef[2] * pheno
4502 // + AABB * coef[3]^2
4503 // - 2 * AB * coef[3] * pheno
4504 // + pheno * pheno
4505 sigma = dmatrix_buf2[0] * dmatrix_buf2[0] * sample_ctd
4506 + dmatrix_buf2[1] * dmatrix_buf2[1] * ((double)((int32_t)cur_sum_aa))
4507 + dmatrix_buf2[2] * dmatrix_buf2[2] * ((double)((int32_t)cur_sum_bb))
4508 + dmatrix_buf2[3] * dmatrix_buf2[3] * ((double)((intptr_t)cur_sum_aabb))
4509 + cur_pheno_ssq
4510 + 2 * (dmatrix_buf2[0] * (dmatrix_buf2[1] * cur_sum_ad
4511 + dmatrix_buf2[2] * cur_sum_bd
4512 + dmatrix_buf2[3] * cur_sum_abd
4513 - cur_pheno_sum)
4514 + dmatrix_buf2[1] * (dmatrix_buf2[2] * cur_sum_abd
4515 + dmatrix_buf2[3] * ((double)((intptr_t)cur_sum_aab))
4516 - cur_sum_a_pheno)
4517 + dmatrix_buf2[2] * (dmatrix_buf2[3] * ((double)((intptr_t)cur_sum_abb))
4518 - cur_sum_b_pheno)
4519 - dmatrix_buf2[3] * cur_sum_ab_pheno);
4520 sigma /= (double)((int32_t)(cur_sample_ct - 4));
4521 if (sigma < min_sigma) {
4522 goto epi_linear_thread_regression_fail;
4523 }
4524
4525 // dmatrix_buf2[3] = linear regression beta for AB term
4526 // sqrt(dmatrix_buf[15] * sigma) = standard error for AB term
4527 dxx = dmatrix_buf2[3];
4528 zsq = (dxx * dxx) / (dmatrix_buf[15] * sigma);
4529 if (zsq >= alpha1sq) {
4530 all_chisq_write[2 * block_idx2] = zsq;
4531 all_chisq_write[2 * block_idx2 + 1] = dxx;
4532 }
4533 if (zsq >= alpha2sq) {
4534 n_sig_ct_fixed++;
4535 n_sig_ct2[block_idx2] += 1;
4536 }
4537 if (zsq > best_chisq_fixed) {
4538 best_chisq_fixed = zsq;
4539 best_id_fixed = block_idx2 + idx2_block_start;
4540 }
4541 dxx = *chisq2_ptr;
4542 if (zsq > dxx) {
4543 *chisq2_ptr = zsq;
4544 best_id2[block_idx2] = marker_idx1;
4545 }
4546 while (0) {
4547 epi_linear_thread_regression_fail:
4548 zsq = 0;
4549 fail_ct_fixed++;
4550 fail_ct2[block_idx2] += 1;
4551 if (alpha1sq == 0.0) {
4552 // special case: log NA when '--epi1 1' specified
4553 all_chisq_write[block_idx2 * 2] = NAN;
4554 all_chisq_write[block_idx2 * 2 + 1] = NAN;
4555 }
4556 }
4557 }
4558 if (is_first_half) {
4559 is_first_half = 0;
4560 ulii = geno1_offsets[2 * block_idx1 + 1];
4561 cur_idx2_block_size = idx2_block_size;
4562 if (ulii < idx2_block_end) {
4563 // guaranteed to be larger than idx2_block_start, otherwise there
4564 // would have been no first half
4565 block_idx2 = ulii - idx2_block_start;
4566 goto epi_linear_thread_second_half;
4567 }
4568 }
4569 if (best_chisq_fixed > best_chisq1[block_delta1]) {
4570 best_chisq1[block_delta1] = best_chisq_fixed;
4571 best_id1[block_delta1] = best_id_fixed;
4572 }
4573 n_sig_ct1[block_delta1] = n_sig_ct_fixed;
4574 if (fail_ct_fixed) {
4575 fail_ct1[block_delta1] = fail_ct_fixed;
4576 }
4577 }
4578 if ((!tidx) || g_is_last_thread_block) {
4579 THREAD_RETURN;
4580 }
4581 THREAD_BLOCK_FINISH(tidx);
4582 }
4583 }
4584
epi_logistic_thread(void * arg)4585 THREAD_RET_TYPE epi_logistic_thread(void* arg) {
4586 uintptr_t tidx = (uintptr_t)arg;
4587 uintptr_t block_idx1_start = g_epi_idx1_block_bounds[tidx];
4588 uintptr_t block_idx1_end = g_epi_idx1_block_bounds[tidx + 1];
4589 uintptr_t idx1_block_start16 = g_epi_idx1_block_bounds16[tidx];
4590 uintptr_t marker_idx1 = g_epi_marker_idx1 + block_idx1_start;
4591 uintptr_t marker_ct = g_epi_marker_ct;
4592 float alpha1sq = (float)g_epi_alpha1sq[0];
4593 float alpha2sq = (float)g_epi_alpha2sq[0];
4594 uint32_t pheno_nm_ct = g_epi_pheno_nm_ct;
4595 uint32_t best_id_fixed = 0;
4596 uint32_t is_first_half = 0;
4597 uintptr_t pheno_nm_ctl2 = QUATERCT_TO_WORDCT(pheno_nm_ct);
4598 uintptr_t* geno1 = g_epi_geno1;
4599 uintptr_t* pheno_c = g_epi_pheno_c;
4600 float* covars_cov_major = g_epi_logistic_mt[tidx].cur_covars_cov_major;
4601 float* coef = g_epi_logistic_mt[tidx].coef;
4602 float* pp = g_epi_logistic_mt[tidx].pp;
4603 float* sample_1d_buf = g_epi_logistic_mt[tidx].sample_1d_buf;
4604 float* pheno_buf = g_epi_logistic_mt[tidx].pheno_buf;
4605 float* param_1d_buf = g_epi_logistic_mt[tidx].param_1d_buf;
4606 float* param_1d_buf2 = g_epi_logistic_mt[tidx].param_1d_buf2;
4607 float* param_2d_buf = g_epi_logistic_mt[tidx].param_2d_buf;
4608 float* param_2d_buf2 = g_epi_logistic_mt[tidx].param_2d_buf2;
4609 uint32_t* geno1_offsets = g_epi_geno1_offsets;
4610 uint32_t* best_id1 = &(g_epi_best_id1[idx1_block_start16]);
4611 uintptr_t* cur_geno1;
4612 uintptr_t* geno2;
4613 uintptr_t* cur_geno2;
4614 float* all_chisq_write;
4615 float* chisq2_ptr;
4616 float* all_chisq;
4617 float* best_chisq1;
4618 float* best_chisq2;
4619 float* fptr;
4620 float* fptr2;
4621 uint32_t* n_sig_ct1;
4622 uint32_t* fail_ct1;
4623 uint32_t* best_id2;
4624 uint32_t* n_sig_ct2;
4625 uint32_t* fail_ct2;
4626 uintptr_t idx2_block_size;
4627 uintptr_t cur_idx2_block_size;
4628 uintptr_t idx2_block_start;
4629 uintptr_t idx2_block_end;
4630 uintptr_t idx2_block_sizea16;
4631 uintptr_t block_idx1;
4632 uintptr_t block_delta1;
4633 uintptr_t block_idx2;
4634 uintptr_t cur_word1;
4635 uintptr_t cur_word2;
4636 uintptr_t param_idx;
4637 uintptr_t param_idx2;
4638 uintptr_t cur_sample_cta4;
4639 uintptr_t ulii;
4640 uintptr_t uljj;
4641 float best_chisq_fixed;
4642 // todo
4643 float fxx;
4644 float fyy;
4645 float zsq;
4646 uint32_t n_sig_ct_fixed;
4647 uint32_t fail_ct_fixed;
4648 uint32_t widx;
4649 uint32_t loop_end;
4650 uint32_t sample_idx;
4651 uint32_t cur_sample_ct;
4652 while (1) {
4653 idx2_block_size = g_epi_idx2_block_size;
4654 cur_idx2_block_size = idx2_block_size;
4655 idx2_block_start = g_epi_idx2_block_start;
4656 idx2_block_end = idx2_block_start + idx2_block_size;
4657 idx2_block_sizea16 = round_up_pow2(idx2_block_size, 16);
4658 geno2 = g_epi_geno2;
4659 all_chisq = &(g_epi_all_chisq_f[2 * idx2_block_start]);
4660 best_chisq1 = &(g_epi_best_chisq_f1[idx1_block_start16]);
4661 best_chisq2 = &(g_epi_best_chisq_f2[tidx * idx2_block_sizea16]);
4662 n_sig_ct1 = &(g_epi_n_sig_ct1[idx1_block_start16]);
4663 fail_ct1 = &(g_epi_fail_ct1[idx1_block_start16]);
4664 best_id2 = &(g_epi_best_id2[tidx * idx2_block_sizea16]);
4665 n_sig_ct2 = &(g_epi_n_sig_ct2[tidx * idx2_block_sizea16]);
4666 fail_ct2 = &(g_epi_fail_ct2[tidx * idx2_block_sizea16]);
4667 for (block_idx1 = block_idx1_start; block_idx1 < block_idx1_end; block_idx1++, marker_idx1++) {
4668 ulii = geno1_offsets[2 * block_idx1];
4669 if (ulii > idx2_block_start) {
4670 block_idx2 = 0;
4671 cur_idx2_block_size = ulii - idx2_block_start;
4672 if (cur_idx2_block_size >= idx2_block_size) {
4673 cur_idx2_block_size = idx2_block_size;
4674 } else {
4675 is_first_half = 1;
4676 }
4677 } else {
4678 ulii = geno1_offsets[2 * block_idx1 + 1];
4679 if (ulii >= idx2_block_end) {
4680 // may not be done in set1 x all or set1 x set2 cases
4681 continue;
4682 } else {
4683 if (ulii <= idx2_block_start) {
4684 block_idx2 = 0;
4685 } else {
4686 block_idx2 = ulii - idx2_block_start;
4687 }
4688 }
4689 }
4690 cur_geno1 = &(geno1[block_idx1 * pheno_nm_ctl2]);
4691 n_sig_ct_fixed = 0;
4692 fail_ct_fixed = 0;
4693 block_delta1 = block_idx1 - block_idx1_start;
4694 best_chisq_fixed = best_chisq1[block_delta1];
4695
4696 // [0] = chisq, [1] = beta
4697 all_chisq_write = &(all_chisq[block_idx1 * marker_ct * 2]);
4698
4699 epi_logistic_thread_second_half:
4700 cur_geno2 = &(geno2[block_idx2 * pheno_nm_ctl2]);
4701 chisq2_ptr = &(best_chisq2[block_idx2]);
4702 for (; block_idx2 < cur_idx2_block_size; block_idx2++, chisq2_ptr++, cur_geno2 = &(cur_geno2[pheno_nm_ctl2])) {
4703 fptr = covars_cov_major;
4704 fptr2 = pheno_buf;
4705 cur_sample_ct = pheno_nm_ct;
4706 // this part is similar to glm_logistic().
4707
4708 // 1. determine number of samples with at least one missing genotype
4709 for (widx = 0; widx < pheno_nm_ctl2; widx++) {
4710 cur_word1 = cur_geno1[widx];
4711 cur_word2 = cur_geno2[widx];
4712 cur_word1 = cur_word1 & (cur_word1 >> 1);
4713 cur_word2 = cur_word2 & (cur_word2 >> 1);
4714 cur_sample_ct -= popcount2_long((cur_word1 | cur_word2) & FIVEMASK);
4715 }
4716 unsigned char geno_pair_present[12];
4717 if (cur_sample_ct <= 4) {
4718 goto epi_logistic_thread_regression_fail;
4719 }
4720 // 2. now populate covariate-major matrix with 16-byte-aligned,
4721 // trailing-entries-zeroed rows
4722 // quasi-bugfix (13 Sep 2018): reliably detect when this matrix is not
4723 // of full rank, and skip the regression in that case.
4724 memset(geno_pair_present, 0, 12);
4725 cur_sample_cta4 = round_up_pow2(cur_sample_ct, 4);
4726 for (widx = 0; widx < pheno_nm_ctl2; widx++) {
4727 sample_idx = widx * BITCT2;
4728 cur_word1 = cur_geno1[widx];
4729 cur_word2 = cur_geno2[widx];
4730 loop_end = sample_idx + BITCT2;
4731 if (loop_end > pheno_nm_ct) {
4732 loop_end = pheno_nm_ct;
4733 }
4734 for (; sample_idx < loop_end; sample_idx++) {
4735 ulii = cur_word1 & (3 * ONELU);
4736 uljj = cur_word2 & (3 * ONELU);
4737 if ((ulii != 3) && (uljj != 3)) {
4738 *fptr = 1.0;
4739 geno_pair_present[ulii + uljj * 4] = 1;
4740 fxx = (float)((intptr_t)ulii);
4741 fyy = (float)((intptr_t)uljj);
4742 // maybe this is faster with continuous writes instead of
4743 // continuous reads? can experiment later
4744 fptr[cur_sample_cta4] = fxx;
4745 fptr[2 * cur_sample_cta4] = fyy;
4746 fptr[3 * cur_sample_cta4] = fxx * fyy;
4747 fptr++;
4748 *fptr2++ = (float)((int32_t)is_set(pheno_c, sample_idx));
4749 }
4750 cur_word1 >>= 2;
4751 cur_word2 >>= 2;
4752 }
4753 }
4754 if (!geno_pair_present[5]) {
4755 // not full rank if any 2x2 square in the 3x3 contingency table is
4756 // empty.
4757 if (((!geno_pair_present[0]) && (!geno_pair_present[1]) && (!geno_pair_present[4])) ||
4758 ((!geno_pair_present[1]) && (!geno_pair_present[2]) && (!geno_pair_present[6])) ||
4759 ((!geno_pair_present[4]) && (!geno_pair_present[8]) && (!geno_pair_present[9])) ||
4760 ((!geno_pair_present[6]) && (!geno_pair_present[9]) && (!geno_pair_present[10]))) {
4761 goto epi_logistic_thread_regression_fail;
4762 }
4763 }
4764 if (cur_sample_ct < cur_sample_cta4) {
4765 loop_end = cur_sample_cta4 - cur_sample_ct;
4766 fill_float_zero(loop_end, fptr);
4767 fill_float_zero(loop_end, &(fptr[cur_sample_cta4]));
4768 fill_float_zero(loop_end, &(fptr[2 * cur_sample_cta4]));
4769 fill_float_zero(loop_end, &(fptr[3 * cur_sample_cta4]));
4770 fill_float_zero(loop_end, fptr2);
4771 }
4772
4773 fill_float_zero(4, coef);
4774 if (logistic_regression(cur_sample_ct, 4, sample_1d_buf, param_2d_buf, param_1d_buf, param_2d_buf2, param_1d_buf2, covars_cov_major, pheno_buf, coef, pp)) {
4775 goto epi_logistic_thread_regression_fail;
4776 }
4777
4778 // compute S
4779 for (param_idx = 0; param_idx < 4; param_idx++) {
4780 fill_float_zero(4, param_1d_buf);
4781 param_1d_buf[param_idx] = 1.0;
4782 solve_linear_system(param_2d_buf2, param_1d_buf, param_1d_buf2, 4);
4783 memcpy(&(param_2d_buf[param_idx * 4]), param_1d_buf2, 4 * sizeof(float));
4784 }
4785 for (param_idx = 1; param_idx < 4; param_idx++) {
4786 fxx = param_2d_buf[param_idx * 5];
4787 if ((fxx < 1e-20) || (!realnum(fxx))) {
4788 goto epi_logistic_thread_regression_fail;
4789 }
4790 param_2d_buf2[param_idx] = sqrtf(fxx);
4791 }
4792 param_2d_buf2[0] = sqrtf(param_2d_buf[0]);
4793 for (param_idx = 1; param_idx < 4; param_idx++) {
4794 fxx = 0.99999 * param_2d_buf2[param_idx];
4795 fptr = &(param_2d_buf[param_idx * 4]);
4796 fptr2 = param_2d_buf2;
4797 for (param_idx2 = 0; param_idx2 < param_idx; param_idx2++) {
4798 if ((*fptr++) > fxx * (*fptr2++)) {
4799 goto epi_logistic_thread_regression_fail;
4800 }
4801 }
4802 }
4803
4804 // coef[3] = logistic regression beta for AB term
4805 // sqrt(param_2d_buf[15]) = standard error for AB term
4806 zsq = coef[3] * coef[3] / param_2d_buf[15];
4807 if (zsq >= alpha1sq) {
4808 all_chisq_write[2 * block_idx2] = zsq;
4809 all_chisq_write[2 * block_idx2 + 1] = coef[3];
4810 }
4811 if (zsq >= alpha2sq) {
4812 n_sig_ct_fixed++;
4813 n_sig_ct2[block_idx2] += 1;
4814 }
4815 if (zsq > best_chisq_fixed) {
4816 best_chisq_fixed = zsq;
4817 best_id_fixed = block_idx2 + idx2_block_start;
4818 }
4819 fxx = *chisq2_ptr;
4820 if (zsq > fxx) {
4821 *chisq2_ptr = zsq;
4822 best_id2[block_idx2] = marker_idx1;
4823 }
4824 while (0) {
4825 epi_logistic_thread_regression_fail:
4826 zsq = 0;
4827 fail_ct_fixed++;
4828 fail_ct2[block_idx2] += 1;
4829 if (alpha1sq == 0.0) {
4830 // special case: log NA when '--epi1 1' specified
4831 all_chisq_write[block_idx2 * 2] = NAN;
4832 all_chisq_write[block_idx2 * 2 + 1] = NAN;
4833 }
4834 }
4835 }
4836 if (is_first_half) {
4837 is_first_half = 0;
4838 ulii = geno1_offsets[2 * block_idx1 + 1];
4839 cur_idx2_block_size = idx2_block_size;
4840 if (ulii < idx2_block_end) {
4841 block_idx2 = ulii - idx2_block_start;
4842 goto epi_logistic_thread_second_half;
4843 }
4844 }
4845 if (best_chisq_fixed > best_chisq1[block_delta1]) {
4846 best_chisq1[block_delta1] = best_chisq_fixed;
4847 best_id1[block_delta1] = best_id_fixed;
4848 }
4849 n_sig_ct1[block_delta1] = n_sig_ct_fixed;
4850 if (fail_ct_fixed) {
4851 fail_ct1[block_delta1] = fail_ct_fixed;
4852 }
4853 }
4854 if ((!tidx) || g_is_last_thread_block) {
4855 THREAD_RETURN;
4856 }
4857 THREAD_BLOCK_FINISH(tidx);
4858 }
4859 }
4860
calc_lnlike(double known11,double known12,double known21,double known22,double center_ct_d,double freq11,double freq12,double freq21,double freq22,double half_hethet_share,double freq11_incr)4861 double calc_lnlike(double known11, double known12, double known21, double known22, double center_ct_d, double freq11, double freq12, double freq21, double freq22, double half_hethet_share, double freq11_incr) {
4862 double lnlike;
4863 freq11 += freq11_incr;
4864 freq22 += freq11_incr;
4865 freq12 += half_hethet_share - freq11_incr;
4866 freq21 += half_hethet_share - freq11_incr;
4867 lnlike = center_ct_d * log(freq11 * freq22 + freq12 * freq21);
4868 if (known11 != 0.0) {
4869 lnlike += known11 * log(freq11);
4870 }
4871 if (known12 != 0.0) {
4872 lnlike += known12 * log(freq12);
4873 }
4874 if (known21 != 0.0) {
4875 lnlike += known21 * log(freq21);
4876 }
4877 if (known22 != 0.0) {
4878 lnlike += known22 * log(freq22);
4879 }
4880 return lnlike;
4881 }
4882
em_phase_hethet(double known11,double known12,double known21,double known22,uint32_t center_ct,double * freq1x_ptr,double * freq2x_ptr,double * freqx1_ptr,double * freqx2_ptr,double * freq11_ptr,uint32_t * onside_sol_ct_ptr)4883 uint32_t em_phase_hethet(double known11, double known12, double known21, double known22, uint32_t center_ct, double* freq1x_ptr, double* freq2x_ptr, double* freqx1_ptr, double* freqx2_ptr, double* freq11_ptr, uint32_t* onside_sol_ct_ptr) {
4884 // Returns 1 if at least one SNP is monomorphic over all valid observations;
4885 // returns 0 otherwise, and fills all frequencies using the maximum
4886 // likelihood solution to the cubic equation.
4887 // (We're discontinuing most use of EM phasing since better algorithms have
4888 // been developed, but the two marker case is mathematically clean and fast
4889 // enough that it'll probably remain useful as an input for some of those
4890 // better algorithms...)
4891 double center_ct_d = (int32_t)center_ct;
4892 double twice_tot = known11 + known12 + known21 + known22 + 2 * center_ct_d;
4893 uint32_t sol_start_idx = 0;
4894 uint32_t sol_end_idx = 1;
4895 double solutions[3];
4896 double twice_tot_recip;
4897 double half_hethet_share;
4898 double freq11;
4899 double freq12;
4900 double freq21;
4901 double freq22;
4902 double prod_1122;
4903 double prod_1221;
4904 double incr_1122;
4905 double best_sol;
4906 double best_lnlike;
4907 double cur_lnlike;
4908 double freq1x;
4909 double freq2x;
4910 double freqx1;
4911 double freqx2;
4912 double lbound;
4913 double dxx;
4914 uint32_t cur_sol_idx;
4915 // shouldn't have to worry about subtractive cancellation problems here
4916 if (twice_tot == 0.0) {
4917 return 1;
4918 }
4919 twice_tot_recip = 1.0 / twice_tot;
4920 freq11 = known11 * twice_tot_recip;
4921 freq12 = known12 * twice_tot_recip;
4922 freq21 = known21 * twice_tot_recip;
4923 freq22 = known22 * twice_tot_recip;
4924 prod_1122 = freq11 * freq22;
4925 prod_1221 = freq12 * freq21;
4926 half_hethet_share = center_ct_d * twice_tot_recip;
4927 // the following four values should all be guaranteed nonzero except in the
4928 // NAN case
4929 freq1x = freq11 + freq12 + half_hethet_share;
4930 freq2x = 1.0 - freq1x;
4931 freqx1 = freq11 + freq21 + half_hethet_share;
4932 freqx2 = 1.0 - freqx1;
4933 if (center_ct) {
4934 if ((prod_1122 != 0.0) || (prod_1221 != 0.0)) {
4935 sol_end_idx = cubic_real_roots(0.5 * (freq11 + freq22 - freq12 - freq21 - 3 * half_hethet_share), 0.5 * (prod_1122 + prod_1221 + half_hethet_share * (freq12 + freq21 - freq11 - freq22 + half_hethet_share)), -0.5 * half_hethet_share * prod_1122, solutions);
4936 while (sol_end_idx && (solutions[sol_end_idx - 1] > half_hethet_share + SMALLISH_EPSILON)) {
4937 sol_end_idx--;
4938 }
4939 while ((sol_start_idx < sol_end_idx) && (solutions[sol_start_idx] < -SMALLISH_EPSILON)) {
4940 sol_start_idx++;
4941 }
4942 if (sol_start_idx == sol_end_idx) {
4943 // Lost a planet Master Obi-Wan has. How embarrassing...
4944 // lost root must be a double root at one of the boundary points, just
4945 // check their likelihoods
4946 sol_start_idx = 0;
4947 sol_end_idx = 2;
4948 solutions[0] = 0;
4949 solutions[1] = half_hethet_share;
4950 } else {
4951 if (solutions[sol_start_idx] < 0) {
4952 solutions[sol_start_idx] = 0;
4953 }
4954 if (solutions[sol_end_idx - 1] > half_hethet_share) {
4955 solutions[sol_end_idx - 1] = half_hethet_share;
4956 }
4957 }
4958 } else {
4959 solutions[0] = 0;
4960 // bugfix (6 Oct 2017): need to use all nonzero values here
4961 const double nonzero_freq_xx = freq11 + freq22;
4962 const double nonzero_freq_xy = freq12 + freq21;
4963 if ((nonzero_freq_xx + SMALLISH_EPSILON < half_hethet_share + nonzero_freq_xy) && (nonzero_freq_xy + SMALLISH_EPSILON < half_hethet_share + nonzero_freq_xx)) {
4964 sol_end_idx = 3;
4965 solutions[1] = (half_hethet_share + nonzero_freq_xy - nonzero_freq_xx) * 0.5;
4966 solutions[2] = half_hethet_share;
4967 } else {
4968 sol_end_idx = 2;
4969 solutions[1] = half_hethet_share;
4970 }
4971 }
4972 best_sol = solutions[sol_start_idx];
4973 if (sol_end_idx > sol_start_idx + 1) {
4974 // select largest log likelihood
4975 best_lnlike = calc_lnlike(known11, known12, known21, known22, center_ct_d, freq11, freq12, freq21, freq22, half_hethet_share, best_sol);
4976 cur_sol_idx = sol_start_idx + 1;
4977 do {
4978 incr_1122 = solutions[cur_sol_idx];
4979 cur_lnlike = calc_lnlike(known11, known12, known21, known22, center_ct_d, freq11, freq12, freq21, freq22, half_hethet_share, incr_1122);
4980 if (cur_lnlike > best_lnlike) {
4981 cur_lnlike = best_lnlike;
4982 best_sol = incr_1122;
4983 }
4984 } while (++cur_sol_idx < sol_end_idx);
4985 }
4986 if (onside_sol_ct_ptr && (sol_end_idx > sol_start_idx + 1)) {
4987 if (freqx1 * freq1x >= freq11) {
4988 dxx = freq1x * freqx1 - freq11;
4989 if (dxx > half_hethet_share) {
4990 dxx = half_hethet_share;
4991 }
4992 } else {
4993 dxx = 0.0;
4994 }
4995 // okay to NOT count suboptimal boundary points because they don't permit
4996 // direction changes within the main interval
4997 // this should exactly match haploview_blocks_classify()'s D sign check
4998 if ((freq11 + best_sol) - freqx1 * freq1x >= 0.0) {
4999 if (best_sol > dxx + SMALLISH_EPSILON) {
5000 lbound = dxx + SMALLISH_EPSILON;
5001 } else {
5002 lbound = dxx;
5003 }
5004 if (best_sol < half_hethet_share - SMALLISH_EPSILON) {
5005 half_hethet_share -= SMALLISH_EPSILON;
5006 }
5007 } else {
5008 if (best_sol > SMALLISH_EPSILON) {
5009 lbound = SMALLISH_EPSILON;
5010 } else {
5011 lbound = 0.0;
5012 }
5013 if (best_sol < dxx - SMALLISH_EPSILON) {
5014 half_hethet_share = dxx - SMALLISH_EPSILON;
5015 } else {
5016 half_hethet_share = dxx;
5017 }
5018 }
5019 for (cur_sol_idx = sol_start_idx; cur_sol_idx < sol_end_idx; cur_sol_idx++) {
5020 if (solutions[cur_sol_idx] < lbound) {
5021 sol_start_idx++;
5022 }
5023 if (solutions[cur_sol_idx] > half_hethet_share) {
5024 break;
5025 }
5026 }
5027 if (cur_sol_idx >= sol_start_idx + 2) {
5028 *onside_sol_ct_ptr = cur_sol_idx - sol_start_idx;
5029 }
5030 }
5031 freq11 += best_sol;
5032 } else if ((prod_1122 == 0.0) && (prod_1221 == 0.0)) {
5033 return 1;
5034 }
5035 *freq1x_ptr = freq1x;
5036 *freq2x_ptr = freq2x;
5037 *freqx1_ptr = freqx1;
5038 *freqx2_ptr = freqx2;
5039 *freq11_ptr = freq11;
5040 return 0;
5041 }
5042
em_phase_hethet_nobase(uint32_t * counts,uint32_t is_x1,uint32_t is_x2,double * freq1x_ptr,double * freq2x_ptr,double * freqx1_ptr,double * freqx2_ptr,double * freq11_ptr)5043 uint32_t em_phase_hethet_nobase(uint32_t* counts, uint32_t is_x1, uint32_t is_x2, double* freq1x_ptr, double* freq2x_ptr, double* freqx1_ptr, double* freqx2_ptr, double* freq11_ptr) {
5044 // if is_x1 and/or is_x2 is set, counts[9]..[17] are male-only counts.
5045 double known11 = (double)(2 * counts[0] + counts[1] + counts[3]);
5046 double known12 = (double)(2 * counts[2] + counts[1] + counts[5]);
5047 double known21 = (double)(2 * counts[6] + counts[3] + counts[7]);
5048 double known22 = (double)(2 * counts[8] + counts[5] + counts[7]);
5049 if (is_x1 || is_x2) {
5050 if (is_x1 && is_x2) {
5051 known11 -= (double)((int32_t)counts[9]);
5052 known12 -= (double)((int32_t)counts[11]);
5053 known21 -= (double)((int32_t)counts[15]);
5054 known22 -= (double)((int32_t)counts[17]);
5055 } else if (is_x1) {
5056 known11 -= ((double)(2 * counts[9] + counts[10])) * (1.0 - SQRT_HALF);
5057 known12 -= ((double)(2 * counts[11] + counts[10])) * (1.0 - SQRT_HALF);
5058 known21 -= ((double)(2 * counts[15] + counts[16])) * (1.0 - SQRT_HALF);
5059 known22 -= ((double)(2 * counts[17] + counts[16])) * (1.0 - SQRT_HALF);
5060 } else {
5061 known11 -= ((double)(2 * counts[9] + counts[12])) * (1.0 - SQRT_HALF);
5062 known12 -= ((double)(2 * counts[11] + counts[12])) * (1.0 - SQRT_HALF);
5063 known21 -= ((double)(2 * counts[15] + counts[14])) * (1.0 - SQRT_HALF);
5064 known22 -= ((double)(2 * counts[17] + counts[14])) * (1.0 - SQRT_HALF);
5065 }
5066 }
5067 return em_phase_hethet(known11, known12, known21, known22, counts[4], freq1x_ptr, freq2x_ptr, freqx1_ptr, freqx2_ptr, freq11_ptr, nullptr);
5068 }
5069
ld_dprime_thread(void * arg)5070 THREAD_RET_TYPE ld_dprime_thread(void* arg) {
5071 uintptr_t tidx = (uintptr_t)arg;
5072 uintptr_t block_idx1_start = (tidx * g_ld_idx1_block_size) / g_ld_thread_ct;
5073 uintptr_t block_idx1_end = ((tidx + 1) * g_ld_idx1_block_size) / g_ld_thread_ct;
5074 uintptr_t marker_idx2_maxw = g_ld_marker_ctm8;
5075 uintptr_t founder_ct = g_ld_founder_ct;
5076 uint32_t founder_ctv3 = BITCT_TO_ALIGNED_WORDCT(founder_ct);
5077 uint32_t founder_ctsplit = 3 * founder_ctv3;
5078 uintptr_t* geno1 = g_ld_geno1;
5079 uintptr_t* zmiss1 = g_epi_zmiss1;
5080 uintptr_t* sex_male = g_ld_sex_male;
5081 uintptr_t* cur_geno1_male = nullptr;
5082 uint32_t* ld_interval1 = g_ld_interval1;
5083 uint32_t is_dprime = g_ld_modifier & (LD_DPRIME | LD_DPRIME_SIGNED);
5084 uint32_t is_dprime_unsigned = g_ld_modifier & LD_DPRIME;
5085 uint32_t is_r2 = g_ld_is_r2;
5086 uint32_t xstart1 = g_ld_xstart1;
5087 uint32_t xend1 = g_ld_xend1;
5088 double* results = g_ld_results;
5089 uint32_t tot1[6];
5090 uint32_t counts[18];
5091 uintptr_t* cur_geno1;
5092 uintptr_t* cur_geno2;
5093 uintptr_t* geno2;
5094 uintptr_t* zmiss2;
5095 double* rptr;
5096 uint32_t* tot2;
5097 uint32_t* cur_tot2;
5098 uintptr_t idx2_block_size;
5099 uintptr_t idx2_block_start;
5100 uintptr_t block_idx1;
5101 uintptr_t block_idx2;
5102 uintptr_t cur_zmiss2;
5103 uintptr_t cur_block_idx2_end;
5104 double freq11;
5105 double freq11_expected;
5106 double freq1x;
5107 double freq2x;
5108 double freqx1;
5109 double freqx2;
5110 double dxx;
5111 uint32_t xstart2;
5112 uint32_t xend2;
5113 uint32_t x2_present;
5114 uint32_t is_x1;
5115 uint32_t is_x2;
5116 uint32_t nm_fixed;
5117 if (g_ld_thread_wkspace) {
5118 cur_geno1_male = &(g_ld_thread_wkspace[tidx * round_up_pow2(founder_ctsplit, CACHELINE_WORD)]);
5119 }
5120 // suppress warning
5121 fill_uint_zero(3, &(tot1[3]));
5122 while (1) {
5123 idx2_block_size = g_ld_idx2_block_size;
5124 idx2_block_start = g_ld_idx2_block_start;
5125 geno2 = g_ld_geno2;
5126 zmiss2 = g_epi_zmiss2;
5127 tot2 = g_epi_tot2;
5128 xstart2 = g_ld_xstart2;
5129 xend2 = g_ld_xend2;
5130 x2_present = (g_ld_thread_wkspace && (idx2_block_start < xend2) && (idx2_block_start + idx2_block_size > xstart2));
5131 for (block_idx1 = block_idx1_start; block_idx1 < block_idx1_end; block_idx1++) {
5132 cur_zmiss2 = ld_interval1[block_idx1 * 2];
5133 block_idx2 = cur_zmiss2;
5134 cur_block_idx2_end = ld_interval1[block_idx1 * 2 + 1];
5135 if (block_idx2 < idx2_block_start) {
5136 if (cur_block_idx2_end <= idx2_block_start) {
5137 continue;
5138 }
5139 block_idx2 = 0;
5140 } else {
5141 block_idx2 -= idx2_block_start;
5142 if (block_idx2 >= idx2_block_size) {
5143 break;
5144 }
5145 }
5146 cur_block_idx2_end -= idx2_block_start;
5147 if (cur_block_idx2_end > idx2_block_size) {
5148 cur_block_idx2_end = idx2_block_size;
5149 }
5150 is_x1 = (block_idx1 >= xstart1) && (block_idx1 < xend1);
5151 nm_fixed = is_set_ul(zmiss1, block_idx1);
5152 cur_geno1 = &(geno1[block_idx1 * founder_ctsplit]);
5153 tot1[0] = popcount_longs(cur_geno1, founder_ctv3);
5154 tot1[1] = popcount_longs(&(cur_geno1[founder_ctv3]), founder_ctv3);
5155 tot1[2] = popcount_longs(&(cur_geno1[2 * founder_ctv3]), founder_ctv3);
5156 if (is_x1 || x2_present) {
5157 memcpy(cur_geno1_male, cur_geno1, founder_ctsplit * sizeof(intptr_t));
5158 bitvec_and(sex_male, founder_ctv3, cur_geno1_male);
5159 tot1[3] = popcount_longs(cur_geno1_male, founder_ctv3);
5160 bitvec_and(sex_male, founder_ctv3, &(cur_geno1_male[founder_ctv3]));
5161 tot1[4] = popcount_longs(&(cur_geno1_male[founder_ctv3]), founder_ctv3);
5162 bitvec_and(sex_male, founder_ctv3, &(cur_geno1_male[2 * founder_ctv3]));
5163 tot1[5] = popcount_longs(&(cur_geno1_male[2 * founder_ctv3]), founder_ctv3);
5164 }
5165 cur_geno2 = &(geno2[block_idx2 * founder_ctsplit]);
5166 rptr = &(results[2 * block_idx1 * marker_idx2_maxw]);
5167 for (; block_idx2 < cur_block_idx2_end; block_idx2++, cur_geno2 = &(cur_geno2[founder_ctsplit])) {
5168 cur_tot2 = &(tot2[block_idx2 * 3]);
5169 cur_zmiss2 = is_set_ul(zmiss2, block_idx2);
5170 if (nm_fixed) {
5171 two_locus_count_table_zmiss1(cur_geno1, cur_geno2, counts, founder_ctv3, cur_zmiss2);
5172 if (cur_zmiss2) {
5173 counts[2] = tot1[0] - counts[0] - counts[1];
5174 counts[5] = tot1[1] - counts[3] - counts[4];
5175 }
5176 counts[6] = cur_tot2[0] - counts[0] - counts[3];
5177 counts[7] = cur_tot2[1] - counts[1] - counts[4];
5178 counts[8] = cur_tot2[2] - counts[2] - counts[5];
5179 } else {
5180 two_locus_count_table(cur_geno1, cur_geno2, counts, founder_ctv3, cur_zmiss2);
5181 if (cur_zmiss2) {
5182 counts[2] = tot1[0] - counts[0] - counts[1];
5183 counts[5] = tot1[1] - counts[3] - counts[4];
5184 counts[8] = tot1[2] - counts[6] - counts[7];
5185 }
5186 }
5187 is_x2 = ((block_idx2 < xend2) && (block_idx2 >= xstart2));
5188 if (is_x1 || is_x2) {
5189 two_locus_count_table(cur_geno1_male, cur_geno2, &(counts[9]), founder_ctv3, cur_zmiss2);
5190 if (cur_zmiss2) {
5191 counts[11] = tot1[3] - counts[9] - counts[10];
5192 counts[14] = tot1[4] - counts[12] - counts[13];
5193 counts[17] = tot1[5] - counts[15] - counts[16];
5194 }
5195 }
5196 if (em_phase_hethet_nobase(counts, is_x1, is_x2, &freq1x, &freq2x, &freqx1, &freqx2, &freq11)) {
5197 *rptr++ = NAN;
5198 *rptr++ = NAN;
5199 continue;
5200 }
5201 freq11_expected = freqx1 * freq1x; // fA * fB temp var
5202 // a bit of numeric instability here, but not tragic since this is the
5203 // end of the calculation
5204 dxx = freq11 - freq11_expected; // D
5205 if (fabs(dxx) < SMALL_EPSILON) {
5206 *rptr++ = 0;
5207 *rptr = 0;
5208 } else {
5209 if (is_r2) {
5210 *rptr = fabs(dxx) * dxx / (freq11_expected * freq2x * freqx2);
5211 } else {
5212 *rptr = dxx / sqrt(freq11_expected * freq2x * freqx2);
5213 }
5214 rptr++;
5215 if (is_dprime) {
5216 if (dxx >= 0) {
5217 dxx /= MINV(freqx1 * freq2x, freqx2 * freq1x);
5218 } else {
5219 if (is_dprime_unsigned) {
5220 dxx = -dxx;
5221 }
5222 dxx /= MINV(freq11_expected, freqx2 * freq2x);
5223 }
5224 }
5225 *rptr = dxx;
5226 }
5227 rptr++;
5228 }
5229 }
5230 if ((!tidx) || g_is_last_thread_block) {
5231 THREAD_RETURN;
5232 }
5233 THREAD_BLOCK_FINISH(tidx);
5234 }
5235 }
5236
ld_report_dprime(pthread_t * threads,Ld_info * ldip,FILE * bedfile,uintptr_t bed_offset,uintptr_t * marker_reverse,uintptr_t unfiltered_sample_ct,uintptr_t * founder_info,uintptr_t * sex_male,uintptr_t * founder_include2,uintptr_t * founder_male_include2,uintptr_t * loadbuf_raw,char * outname,uint32_t hh_exists,uintptr_t marker_idx1_start,uintptr_t marker_idx1_end)5237 int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uintptr_t* sex_male, uintptr_t* founder_include2, uintptr_t* founder_male_include2, uintptr_t* loadbuf_raw, char* outname, uint32_t hh_exists, uintptr_t marker_idx1_start, uintptr_t marker_idx1_end) {
5238 Chrom_info* chrom_info_ptr = g_ld_chrom_info_ptr;
5239 uintptr_t* marker_exclude_idx1 = g_ld_marker_exclude_idx1;
5240 uintptr_t* marker_exclude = g_ld_marker_exclude;
5241 uint32_t* marker_pos = g_ld_marker_pos;
5242 double* marker_cms = g_ld_marker_cms;
5243 uintptr_t marker_ct = g_ld_marker_ct;
5244 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
5245 uintptr_t founder_ct = g_ld_founder_ct;
5246 uintptr_t founder_ctl = BITCT_TO_WORDCT(founder_ct);
5247 uintptr_t founder_ctv3 = BITCT_TO_ALIGNED_WORDCT(founder_ct);
5248 uintptr_t founder_ctsplit = 3 * founder_ctv3;
5249 uintptr_t final_mask = get_final_mask(founder_ct);
5250 uintptr_t orig_marker_ctm8 = g_ld_marker_ctm8;
5251 uintptr_t marker_idx2_maxw = orig_marker_ctm8;
5252 uintptr_t marker_idx1 = marker_idx1_start;
5253 uintptr_t job_size = marker_idx1_end - marker_idx1_start;
5254 uintptr_t pct_thresh = job_size / 100;
5255 uintptr_t pct = 1;
5256 uintptr_t ulii = founder_ctsplit * sizeof(intptr_t) + 2 * sizeof(int32_t) + marker_idx2_maxw * 2 * sizeof(double);
5257 uint32_t output_gz = ldip->modifier & LD_REPORT_GZ;
5258 uint32_t is_inter_chr = g_ld_is_inter_chr;
5259 uint32_t idx1_subset = (ldip->snpstr || ldip->snps_rl.name_ct);
5260 uint32_t window_size_m1 = ldip->window_size - 1;
5261 uint32_t window_bp = ldip->window_bp;
5262 double window_cm = ldip->window_cm;
5263 uint32_t thread_ct = g_ld_thread_ct;
5264 uint32_t chrom_fo_idx = 0;
5265 uint32_t is_haploid = 0;
5266 uint32_t is_x = 0;
5267 uint32_t is_y = 0;
5268 uint32_t not_first_write = 0;
5269 uint32_t chrom_last = 0;
5270 uint32_t marker_uidx2_back = 0;
5271 uint32_t marker_uidx2_fwd = 0;
5272 uint32_t marker_uidx2_fwd2 = 0;
5273 uint32_t window_trail_ct = 0;
5274 uint32_t window_lead_ct = 0;
5275 int32_t x_code = chrom_info_ptr->xymt_codes[X_OFFSET];
5276 uint32_t xstart = 0;
5277 uint32_t xend = 0;
5278 int32_t retval = 0;
5279 uintptr_t* loadbuf;
5280 uintptr_t* dummy_nm;
5281 uintptr_t* ulptr;
5282 uint32_t* uiptr;
5283 unsigned char* overflow_buf;
5284 unsigned char* bigstack_mark2;
5285 uintptr_t cur_bigstack_left;
5286 uintptr_t thread_workload;
5287 uintptr_t idx1_block_size;
5288 uintptr_t idx2_block_size;
5289 uintptr_t cur_idx2_block_size;
5290 uintptr_t marker_idx2;
5291 uintptr_t marker_uidx1;
5292 uintptr_t marker_uidx1_tmp;
5293 uintptr_t marker_uidx2_base;
5294 uintptr_t marker_uidx2;
5295 uintptr_t marker_idx2_base;
5296 uintptr_t marker_idx2_end;
5297 uintptr_t block_idx1;
5298 uintptr_t block_idx2;
5299 uintptr_t uljj;
5300 uint32_t chrom_idx;
5301 uint32_t chrom_end;
5302 uint32_t cur_marker_pos;
5303 double cur_marker_cm;
5304 uint32_t is_last_block;
5305 uint32_t uii;
5306 if (bigstack_alloc_uc(262144, &overflow_buf) ||
5307 bigstack_alloc_ul(founder_ctl * 2, &loadbuf) ||
5308 bigstack_alloc_ul(founder_ctl, &dummy_nm)) {
5309 goto ld_report_dprime_ret_NOMEM;
5310 }
5311 loadbuf[founder_ctl * 2 - 2] = 0;
5312 loadbuf[founder_ctl * 2 - 1] = 0;
5313 fill_all_bits(founder_ct, dummy_nm);
5314 g_ld_thread_wkspace = nullptr;
5315 if ((x_code != -2) && is_set(chrom_info_ptr->chrom_mask, x_code)) {
5316 uii = get_chrom_start_vidx(chrom_info_ptr, (uint32_t)x_code);
5317 chrom_end = get_chrom_end_vidx(chrom_info_ptr, (uint32_t)x_code);
5318 chrom_end = chrom_end - uii - popcount_bit_idx(marker_exclude, uii, chrom_end);
5319 if (chrom_end) {
5320 if (bigstack_alloc_ul(round_up_pow2(founder_ctsplit, CACHELINE_WORD) * thread_ct, &g_ld_thread_wkspace)) {
5321 goto ld_report_dprime_ret_NOMEM;
5322 }
5323 xstart = uii - popcount_bit_idx(marker_exclude, 0, uii);
5324 xend = xstart + chrom_end;
5325 g_ld_sex_male = sex_male;
5326 }
5327 }
5328 cur_bigstack_left = bigstack_left();
5329 if (cur_bigstack_left < 2 * CACHELINE) {
5330 goto ld_report_dprime_ret_NOMEM;
5331 }
5332 idx1_block_size = (cur_bigstack_left - 2 * CACHELINE) / (ulii * 2 + 1);
5333 thread_workload = idx1_block_size / thread_ct;
5334 if (!thread_workload) {
5335 goto ld_report_dprime_ret_NOMEM;
5336 }
5337 idx1_block_size = thread_workload * thread_ct;
5338 if (idx1_block_size > job_size) {
5339 idx1_block_size = job_size;
5340 }
5341 if (bigstack_alloc_ul(founder_ctsplit * idx1_block_size, &g_ld_geno1) ||
5342 bigstack_alloc_ul(BITCT_TO_WORDCT(idx1_block_size), &g_epi_zmiss1) ||
5343 bigstack_alloc_ui(idx1_block_size * 2, &g_ld_interval1) ||
5344 // double size since both r/r^2 and dprime are needed
5345 // (marker_idx2_maxw only needs to be divisible by 4 as a result)
5346 bigstack_alloc_d(marker_idx2_maxw * 2 * idx1_block_size, &g_ld_results)) {
5347 goto ld_report_dprime_ret_NOMEM;
5348 }
5349 for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
5350 g_ld_geno1[block_idx1 * founder_ctsplit + founder_ctv3 - 1] = 0;
5351 g_ld_geno1[block_idx1 * founder_ctsplit + 2 * founder_ctv3 - 1] = 0;
5352 g_ld_geno1[block_idx1 * founder_ctsplit + founder_ctsplit - 1] = 0;
5353 }
5354
5355 ulii = founder_ctsplit * sizeof(intptr_t) + 1 + 3 * sizeof(int32_t);
5356 cur_bigstack_left = bigstack_left();
5357 if (cur_bigstack_left >= CACHELINE) {
5358 cur_bigstack_left -= CACHELINE;
5359 }
5360 idx2_block_size = (cur_bigstack_left / ulii) & (~(7 * ONELU));
5361 if (idx2_block_size > marker_ct) {
5362 idx2_block_size = round_up_pow2(marker_ct, 8);
5363 }
5364 bigstack_mark2 = g_bigstack_base;
5365 while (1) {
5366 if (!idx2_block_size) {
5367 goto ld_report_dprime_ret_NOMEM;
5368 }
5369 if (!(bigstack_alloc_ul(founder_ctsplit * idx2_block_size, &g_ld_geno2) ||
5370 bigstack_alloc_ul(BITCT_TO_WORDCT(idx2_block_size), &g_epi_zmiss2) ||
5371 bigstack_alloc_ui(idx2_block_size * 3, &g_epi_tot2))) {
5372 break;
5373 }
5374 bigstack_reset(bigstack_mark2);
5375 idx2_block_size -= 4;
5376 }
5377 for (block_idx2 = 0; block_idx2 < idx2_block_size; block_idx2++) {
5378 g_ld_geno2[block_idx2 * founder_ctsplit + founder_ctv3 - 1] = 0;
5379 g_ld_geno2[block_idx2 * founder_ctsplit + 2 * founder_ctv3 - 1] = 0;
5380 g_ld_geno2[block_idx2 * founder_ctsplit + founder_ctsplit - 1] = 0;
5381 }
5382 marker_uidx1 = next_unset_unsafe(marker_exclude_idx1, 0);
5383 if (marker_idx1) {
5384 marker_uidx1 = jump_forward_unset_unsafe(marker_exclude_idx1, marker_uidx1 + 1, marker_idx1);
5385 }
5386 LOGPRINTF("--r%s%s%s d%s%s...", g_ld_is_r2? "2" : "", is_inter_chr? " inter-chr" : "", g_ld_marker_allele_ptrs? " in-phase" : "", (g_ld_modifier & LD_D)? "" : ((g_ld_modifier & LD_DPRIME)? "prime" : "prime-signed"), g_ld_set_allele_freqs? " with-freqs" : "");
5387 fputs(" 0%", stdout);
5388 while (1) {
5389 fputs(" [processing]", stdout);
5390 fflush(stdout);
5391 if (idx1_block_size > marker_idx1_end - marker_idx1) {
5392 idx1_block_size = marker_idx1_end - marker_idx1;
5393 if (idx1_block_size < thread_ct) {
5394 thread_ct = idx1_block_size;
5395 g_ld_thread_ct = thread_ct;
5396 }
5397 }
5398 g_ld_idx1_block_size = idx1_block_size;
5399 marker_uidx1_tmp = marker_uidx1;
5400 if ((marker_idx1 < xend) && (marker_idx1 + idx1_block_size > xstart)) {
5401 uii = MAXV(marker_idx1, xstart);
5402 g_ld_xstart1 = uii - marker_idx1;
5403 g_ld_xend1 = MINV(xend, marker_idx1 + idx1_block_size) - uii;
5404 }
5405
5406 if (idx1_subset) {
5407 if (!is_inter_chr) {
5408 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
5409 marker_uidx2_base = window_back(marker_pos, marker_cms, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1, window_size_m1, window_bp, window_cm, &uii);
5410 marker_idx2_base = marker_uidx2_base - popcount_bit_idx(marker_exclude, 0, marker_uidx2_base);
5411 marker_idx2 = marker_idx2_base + uii;
5412 } else {
5413 marker_uidx2_base = next_unset_unsafe(marker_exclude, 0);
5414 marker_idx2_base = 0;
5415 marker_idx2 = 0;
5416 }
5417 } else {
5418 marker_idx2_base = marker_uidx1 + 1 - popcount_bit_idx(marker_exclude, 0, marker_uidx1);
5419 if (marker_idx2_base == marker_ct) {
5420 goto ld_report_dprime_done;
5421 }
5422 marker_idx2 = marker_idx2_base - 1;
5423 marker_uidx2_base = next_unset_unsafe(marker_exclude, marker_uidx1 + 1);
5424 }
5425 if (fseeko(bedfile, bed_offset + (marker_uidx1 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
5426 goto ld_report_dprime_ret_READ_FAIL;
5427 }
5428 chrom_end = 0;
5429 fill_ulong_zero(BITCT_TO_WORDCT(idx1_block_size), g_epi_zmiss1);
5430 for (block_idx1 = 0; block_idx1 < idx1_block_size; marker_uidx1_tmp++, block_idx1++, marker_idx2++) {
5431 if (IS_SET(marker_exclude_idx1, marker_uidx1_tmp)) {
5432 ulii = next_unset_ul_unsafe(marker_exclude_idx1, marker_uidx1_tmp);
5433 uljj = ulii - marker_uidx1_tmp - popcount_bit_idx(marker_exclude, marker_uidx1_tmp, ulii);
5434 if (uljj) {
5435 uii = 1;
5436 marker_idx2 += uljj;
5437 }
5438 marker_uidx1_tmp = ulii;
5439 if (fseeko(bedfile, bed_offset + (marker_uidx1_tmp * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
5440 goto ld_report_dprime_ret_READ_FAIL;
5441 }
5442 }
5443 if (marker_uidx1_tmp >= chrom_end) {
5444 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
5445 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
5446 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
5447 chrom_last = prev_unset_unsafe(marker_exclude, chrom_end);
5448 is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
5449 is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
5450 is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
5451 uii = 1;
5452 }
5453 if (!is_inter_chr) {
5454 // uii == 0 if we can perform an incremental update, 1 if we need
5455 // fully-powered window_back()/window_forward()
5456 if (uii) {
5457 if (idx1_subset) {
5458 marker_uidx2_back = window_back(marker_pos, marker_cms, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1_tmp, window_size_m1, window_bp, window_cm, &window_trail_ct);
5459 }
5460 marker_uidx2_fwd = window_forward(marker_pos, marker_cms, marker_exclude, marker_uidx1_tmp, chrom_last, window_size_m1, window_bp, window_cm, &window_lead_ct);
5461 marker_uidx2_fwd2 = marker_uidx2_fwd;
5462 if (marker_uidx2_fwd < chrom_last) {
5463 marker_uidx2_fwd2++;
5464 next_unset_unsafe_ck(marker_exclude, &marker_uidx2_fwd2);
5465 }
5466 uii = 0;
5467 } else {
5468 if (idx1_subset) {
5469 if (window_trail_ct == window_size_m1) {
5470 marker_uidx2_back++;
5471 next_unset_unsafe_ck(marker_exclude, &marker_uidx2_back);
5472 } else {
5473 window_trail_ct++;
5474 }
5475 cur_marker_pos = marker_pos[marker_uidx1_tmp];
5476 if (cur_marker_pos > window_bp) {
5477 cur_marker_pos -= window_bp;
5478 while (marker_pos[marker_uidx2_back] < cur_marker_pos) {
5479 window_trail_ct--;
5480 marker_uidx2_back++;
5481 next_unset_unsafe_ck(marker_exclude, &marker_uidx2_back);
5482 }
5483 }
5484 if (marker_cms) {
5485 cur_marker_cm = marker_cms[marker_uidx1_tmp] - window_cm;
5486 while (marker_cms[marker_uidx2_back] < cur_marker_cm) {
5487 window_trail_ct--;
5488 marker_uidx2_back++;
5489 next_unset_unsafe_ck(marker_exclude, &marker_uidx2_back);
5490 }
5491 }
5492 }
5493 if (marker_uidx2_fwd < chrom_last) {
5494 cur_marker_pos = marker_pos[marker_uidx1_tmp] + window_bp;
5495 if (!marker_cms) {
5496 while (marker_pos[marker_uidx2_fwd2] <= cur_marker_pos) {
5497 marker_uidx2_fwd = marker_uidx2_fwd2;
5498 window_lead_ct++;
5499 if (marker_uidx2_fwd == chrom_last) {
5500 break;
5501 }
5502 marker_uidx2_fwd2++;
5503 next_unset_unsafe_ck(marker_exclude, &marker_uidx2_fwd2);
5504 if (window_lead_ct > window_size_m1) {
5505 break;
5506 }
5507 }
5508 } else {
5509 cur_marker_cm = marker_cms[marker_uidx1_tmp] + window_cm;
5510 while ((marker_pos[marker_uidx2_fwd2] <= cur_marker_pos) && (marker_cms[marker_uidx2_fwd2] <= window_cm)) {
5511 marker_uidx2_fwd = marker_uidx2_fwd2;
5512 window_lead_ct++;
5513 if (marker_uidx2_fwd == chrom_last) {
5514 break;
5515 }
5516 marker_uidx2_fwd2++;
5517 next_unset_unsafe_ck(marker_exclude, &marker_uidx2_fwd2);
5518 if (window_lead_ct > window_size_m1) {
5519 break;
5520 }
5521 }
5522 }
5523 }
5524 window_lead_ct--;
5525 }
5526 }
5527 if (!is_inter_chr) {
5528 if (idx1_subset) {
5529 g_ld_interval1[block_idx1 * 2] = marker_idx2 - window_trail_ct - marker_idx2_base;
5530 } else {
5531 g_ld_interval1[block_idx1 * 2] = marker_idx2 + 1 - marker_idx2_base;
5532 }
5533 g_ld_interval1[block_idx1 * 2 + 1] = marker_idx2 + window_lead_ct + 1 - marker_idx2_base;
5534 } else {
5535 if (!idx1_subset) {
5536 g_ld_interval1[block_idx1 * 2] = marker_idx2 + 1 - marker_idx2_base;
5537 } else {
5538 g_ld_interval1[block_idx1 * 2] = 0;
5539 }
5540 g_ld_interval1[block_idx1 * 2 + 1] = marker_ct - marker_idx2_base;
5541 }
5542
5543 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, marker_uidx1_tmp), bedfile, loadbuf_raw, loadbuf)) {
5544 goto ld_report_dprime_ret_READ_FAIL;
5545 }
5546 if (is_haploid && hh_exists) {
5547 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)loadbuf);
5548 }
5549 load_and_split3(nullptr, loadbuf, founder_ct, &(g_ld_geno1[block_idx1 * founder_ctsplit]), dummy_nm, dummy_nm, founder_ctv3, 0, 0, 1, &ulii);
5550 if (ulii == 3) {
5551 SET_BIT(block_idx1, g_epi_zmiss1);
5552 }
5553 }
5554 marker_uidx2 = marker_uidx2_base;
5555 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
5556 goto ld_report_dprime_ret_READ_FAIL;
5557 }
5558
5559 cur_idx2_block_size = idx2_block_size;
5560 uljj = g_ld_interval1[2 * idx1_block_size - 1];
5561 marker_idx2_end = uljj + marker_idx2_base;
5562 marker_idx2_maxw = round_up_pow2(uljj, 4);
5563 if (marker_idx2_maxw > orig_marker_ctm8) {
5564 marker_idx2_maxw = orig_marker_ctm8;
5565 }
5566 g_ld_marker_ctm8 = marker_idx2_maxw;
5567 marker_idx2 = marker_idx2_base;
5568 do {
5569 if (cur_idx2_block_size > marker_idx2_end - marker_idx2) {
5570 cur_idx2_block_size = marker_idx2_end - marker_idx2;
5571 }
5572 if ((marker_idx2 < xend) && (marker_idx2 + cur_idx2_block_size > xstart)) {
5573 uii = MAXV(marker_idx2, xstart);
5574 g_ld_xstart2 = uii - marker_idx2;
5575 g_ld_xend2 = MINV(xend, marker_idx2 + cur_idx2_block_size) - uii;
5576 }
5577 fill_ulong_zero(BITCT_TO_WORDCT(cur_idx2_block_size), g_epi_zmiss2);
5578 for (block_idx2 = 0; block_idx2 < cur_idx2_block_size; marker_uidx2++, block_idx2++) {
5579 if (IS_SET(marker_exclude, marker_uidx2)) {
5580 marker_uidx2 = next_unset_ul_unsafe(marker_exclude, marker_uidx2);
5581 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
5582 goto ld_report_dprime_ret_READ_FAIL;
5583 }
5584 }
5585 if (marker_uidx2 >= chrom_end) {
5586 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
5587 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
5588 is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
5589 is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
5590 is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
5591 }
5592 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, marker_uidx2), bedfile, loadbuf_raw, loadbuf)) {
5593 goto ld_report_dprime_ret_READ_FAIL;
5594 }
5595 if (is_haploid && hh_exists) {
5596 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)loadbuf);
5597 }
5598 ulptr = &(g_ld_geno2[block_idx2 * founder_ctsplit]);
5599 load_and_split3(nullptr, loadbuf, founder_ct, ulptr, dummy_nm, dummy_nm, founder_ctv3, 0, 0, 1, &ulii);
5600 uiptr = &(g_epi_tot2[block_idx2 * 3]);
5601 uiptr[0] = popcount_longs(ulptr, founder_ctv3);
5602 uiptr[1] = popcount_longs(&(ulptr[founder_ctv3]), founder_ctv3);
5603 uiptr[2] = popcount_longs(&(ulptr[2 * founder_ctv3]), founder_ctv3);
5604 if (ulii == 3) {
5605 SET_BIT(block_idx2, g_epi_zmiss2);
5606 }
5607 }
5608 g_ld_idx2_block_size = cur_idx2_block_size;
5609 g_ld_idx2_block_start = marker_idx2 - marker_idx2_base;
5610 marker_idx2 += cur_idx2_block_size;
5611 is_last_block = (marker_idx2 >= marker_idx2_end);
5612 if (spawn_threads2(threads, &ld_dprime_thread, thread_ct, is_last_block)) {
5613 goto ld_report_dprime_ret_THREAD_CREATE_FAIL;
5614 }
5615 ld_dprime_thread((void*)0);
5616 join_threads2(threads, thread_ct, is_last_block);
5617 } while (!is_last_block);
5618
5619 fputs("\b\b\b\b\b\b\b\b\b\b\bwriting] \b\b\b", stdout);
5620 fflush(stdout);
5621 g_ld_marker_uidx1 = marker_uidx1;
5622 g_ld_block_idx1 = 0;
5623 g_ld_uidx2_start = marker_uidx2_base;
5624 g_ld_idx2_block_start = 0;
5625 g_ld_block_idx2 = 0;
5626 if (output_gz) {
5627 parallel_compress(outname, overflow_buf, not_first_write, ld_regular_emitn);
5628 } else {
5629 write_uncompressed(outname, overflow_buf, not_first_write, ld_regular_emitn);
5630 }
5631 not_first_write = 1;
5632 g_ld_is_first_block = 0;
5633 ld_report_dprime_done:
5634 marker_idx1 += idx1_block_size;
5635 fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
5636 if (marker_idx1 >= pct_thresh) {
5637 if (pct > 10) {
5638 putc_unlocked('\b', stdout);
5639 }
5640 pct = ((marker_idx1 - marker_idx1_start) * 100LLU) / job_size;
5641 if (pct < 100) {
5642 printf("\b\b%" PRIuPTR "%%", pct);
5643 fflush(stdout);
5644 pct_thresh = marker_idx1_start + ((++pct) * ((uint64_t)job_size)) / 100;
5645 }
5646 }
5647 if (marker_idx1 == marker_idx1_end) {
5648 break;
5649 }
5650 marker_uidx1 = jump_forward_unset_unsafe(marker_exclude_idx1, marker_uidx1 + 1, idx1_block_size);
5651 }
5652 fputs("\b\b\b", stdout);
5653 logprint(" done.\n");
5654 LOGPRINTFWW("Results written to %s .\n", outname);
5655
5656 while (0) {
5657 ld_report_dprime_ret_NOMEM:
5658 retval = RET_NOMEM;
5659 break;
5660 ld_report_dprime_ret_READ_FAIL:
5661 retval = RET_READ_FAIL;
5662 break;
5663 ld_report_dprime_ret_THREAD_CREATE_FAIL:
5664 retval = RET_THREAD_CREATE_FAIL;
5665 break;
5666 }
5667 return retval;
5668 }
5669
ld_report_regular(pthread_t * threads,Ld_info * ldip,FILE * bedfile,uintptr_t bed_offset,uintptr_t unfiltered_marker_ct,uintptr_t * marker_reverse,uintptr_t unfiltered_sample_ct,uintptr_t * founder_info,uint32_t parallel_idx,uint32_t parallel_tot,uintptr_t * sex_male,uintptr_t * founder_include2,uintptr_t * founder_male_include2,uintptr_t * loadbuf,char * outname,uint32_t hh_exists)5670 int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uint32_t parallel_idx, uint32_t parallel_tot, uintptr_t* sex_male, uintptr_t* founder_include2, uintptr_t* founder_male_include2, uintptr_t* loadbuf, char* outname, uint32_t hh_exists) {
5671 FILE* infile = nullptr;
5672 uintptr_t* marker_exclude = g_ld_marker_exclude;
5673 char* marker_ids = g_ld_marker_ids;
5674 uintptr_t max_marker_id_len = g_ld_max_marker_id_len;
5675 uint32_t ld_modifier = ldip->modifier;
5676 uint32_t output_gz = ld_modifier & LD_REPORT_GZ;
5677 uint32_t ignore_x = (ld_modifier & LD_IGNORE_X) & 1;
5678 uint32_t is_inter_chr = ld_modifier & LD_INTER_CHR;
5679 uint32_t snp_list_file = ld_modifier & LD_SNP_LIST_FILE;
5680 uintptr_t marker_ct = g_ld_marker_ct;
5681 uintptr_t marker_ct1 = marker_ct;
5682 uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
5683 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
5684 uintptr_t founder_ct = g_ld_founder_ct;
5685 uintptr_t founder_ctl = BITCT_TO_WORDCT(founder_ct);
5686 uintptr_t founder_ct_192_long = g_ld_founder_ct_mld_m1 * (MULTIPLEX_LD / BITCT2) + g_ld_founder_ct_mld_rem * (192 / BITCT2);
5687 uintptr_t final_mask = get_final_mask(founder_ct);
5688 uintptr_t pct = 1;
5689 uintptr_t marker_idx2_maxw = 1;
5690 Chrom_info* chrom_info_ptr = g_ld_chrom_info_ptr;
5691 uintptr_t* marker_exclude_idx1 = marker_exclude;
5692 uint32_t* marker_pos = g_ld_marker_pos;
5693 double* marker_cms = g_ld_marker_cms;
5694 uint32_t founder_trail_ct = founder_ct_192_long - founder_ctl * 2;
5695 uint32_t idx1_subset = (ldip->snpstr || ldip->snps_rl.name_ct);
5696 uint32_t window_size_m1 = ldip->window_size - 1;
5697 uint32_t window_bp = ldip->window_bp;
5698 double window_cm = ldip->window_cm;
5699 uint32_t thread_ct = g_ld_thread_ct;
5700 uint32_t chrom_fo_idx = 0;
5701 uint32_t chrom_fo_idx2 = 0;
5702 uint32_t is_haploid = 0;
5703 uint32_t is_x = 0;
5704 uint32_t is_y = 0;
5705 uint32_t not_first_write = 0;
5706 uint32_t marker_uidx2_back = 0;
5707 uint32_t marker_uidx2_fwd = 0;
5708 uint32_t marker_uidx2_fwd2 = 0;
5709 uint32_t window_trail_ct = 0;
5710 uint32_t window_lead_ct = 0;
5711 uint32_t chrom_last = 0;
5712 int32_t retval = 0;
5713 unsigned char* bigstack_mark2;
5714 unsigned char* overflow_buf;
5715 uint32_t* id_map;
5716 char* sorted_ids;
5717 char* bufptr;
5718 uintptr_t thread_workload;
5719 uintptr_t idx1_block_size;
5720 uintptr_t idx2_block_size;
5721 uintptr_t cur_idx2_block_size;
5722 uintptr_t orig_marker_ctm8;
5723 uintptr_t marker_idx1_start;
5724 uintptr_t marker_idx1;
5725 uintptr_t marker_idx1_end;
5726 uintptr_t marker_idx2;
5727 uintptr_t job_size;
5728 uintptr_t pct_thresh;
5729 uintptr_t marker_uidx1;
5730 uintptr_t marker_uidx1_tmp;
5731 uintptr_t marker_uidx2_base;
5732 uintptr_t marker_uidx2;
5733 uintptr_t marker_idx2_base;
5734 uintptr_t marker_idx2_end;
5735 uintptr_t block_idx1;
5736 uintptr_t block_idx2;
5737 uintptr_t snplist_ct;
5738 uintptr_t max_snplist_id_len;
5739 uintptr_t ulii;
5740 uintptr_t uljj;
5741 uint32_t window_size_ceil;
5742 uint32_t chrom_idx;
5743 uint32_t chrom_end;
5744 uint32_t chrom_idx2;
5745 uint32_t chrom_end2;
5746 uint32_t cur_marker_pos;
5747 double cur_marker_cm;
5748 uint32_t is_last_block;
5749 uint32_t uii;
5750 int32_t ii;
5751 if (bigstack_alloc_uc(262144, &overflow_buf)) {
5752 goto ld_report_regular_ret_NOMEM;
5753 }
5754 if (idx1_subset) {
5755 if (bigstack_alloc_ul(unfiltered_marker_ctl, &marker_exclude_idx1)) {
5756 goto ld_report_regular_ret_NOMEM;
5757 }
5758 fill_all_bits(unfiltered_marker_ct, marker_exclude_idx1);
5759 marker_uidx1 = next_unset_unsafe(marker_exclude, 0);
5760 if (ldip->snpstr && (!snp_list_file)) {
5761 bufptr = ldip->snpstr;
5762 uii = strlen(bufptr) + 1;
5763 if (uii > max_marker_id_len) {
5764 goto ld_report_regular_ret_EMPTY_SET1;
5765 }
5766 for (marker_idx1 = 0; marker_idx1 < marker_ct; marker_uidx1++, marker_idx1++) {
5767 next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx1);
5768 if (!memcmp(&(marker_ids[marker_uidx1 * max_marker_id_len]), bufptr, uii)) {
5769 break;
5770 }
5771 }
5772 if (marker_idx1 == marker_ct) {
5773 goto ld_report_regular_ret_EMPTY_SET1;
5774 }
5775 clear_bit_ul(marker_uidx1, marker_exclude_idx1);
5776 marker_ct1 = 1;
5777 } else {
5778 marker_ct1 = 0;
5779 retval = sort_item_ids(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_ct, marker_ids, max_marker_id_len, 0, 0, strcmp_deref, &sorted_ids, &id_map);
5780 if (retval) {
5781 goto ld_report_regular_ret_1;
5782 }
5783 if (snp_list_file) {
5784 if (fopen_checked(ldip->snpstr, FOPEN_RB, &infile)) {
5785 goto ld_report_regular_ret_OPEN_FAIL;
5786 }
5787 snplist_ct = 0;
5788 max_snplist_id_len = 0;
5789 retval = scan_token_ct_len(MAXLINELEN, infile, g_textbuf, &snplist_ct, &max_snplist_id_len);
5790 if (retval) {
5791 goto ld_report_regular_ret_1;
5792 }
5793 if (!snplist_ct) {
5794 goto ld_report_regular_ret_EMPTY_SET1;
5795 }
5796 if (bigstack_alloc_c(snplist_ct * max_snplist_id_len, &bufptr)) {
5797 goto ld_report_regular_ret_NOMEM;
5798 }
5799 rewind(infile);
5800 retval = read_tokens(MAXLINELEN, snplist_ct, max_snplist_id_len, infile, g_textbuf, bufptr);
5801 if (retval) {
5802 goto ld_report_regular_ret_1;
5803 }
5804 if (fclose_null(&infile)) {
5805 goto ld_report_regular_ret_READ_FAIL;
5806 }
5807 for (marker_idx1 = 0; marker_idx1 < snplist_ct; marker_idx1++) {
5808 ii = bsearch_str_nl(&(bufptr[marker_idx1 * max_snplist_id_len]), sorted_ids, max_marker_id_len, marker_ct);
5809 if (ii != -1) {
5810 uii = id_map[(uint32_t)ii];
5811 if (!is_set(marker_exclude_idx1, uii)) {
5812 logerrprint("Error: Duplicate variant ID in --ld-snp-list file.\n");
5813 goto ld_report_regular_ret_INVALID_FORMAT;
5814 }
5815 clear_bit(uii, marker_exclude_idx1);
5816 marker_ct1++;
5817 }
5818 }
5819 } else {
5820 retval = string_range_list_to_bitarr2(sorted_ids, id_map, marker_ct, max_marker_id_len, &(ldip->snps_rl), "ld-snps", marker_exclude_idx1);
5821 if (retval) {
5822 goto ld_report_regular_ret_1;
5823 }
5824 bitvec_or(marker_exclude, unfiltered_marker_ctl, marker_exclude_idx1);
5825 // bugfix, 13 Jan 2017
5826 // another bugfix, 28 Mar 2017: popcounted the wrong array...
5827 marker_ct1 = unfiltered_marker_ct - popcount_longs(marker_exclude_idx1, unfiltered_marker_ctl);
5828 }
5829 if (!marker_ct1) {
5830 goto ld_report_regular_ret_EMPTY_SET1;
5831 }
5832 bigstack_reset(id_map);
5833 }
5834 }
5835 if ((parallel_tot > 1) && (marker_ct1 < 2 * parallel_tot)) {
5836 LOGERRPRINTF("Error: Too few variants in --r%s run for --parallel %u %u.\n", g_ld_is_r2? "2" : "", parallel_idx + 1, parallel_tot);
5837 goto ld_report_regular_ret_INVALID_CMDLINE;
5838 }
5839 // yeah, this is uneven in the inter-chr case
5840 marker_idx1_start = (((uint64_t)parallel_idx) * marker_ct1) / parallel_tot;
5841 marker_idx1 = marker_idx1_start;
5842 marker_idx1_end = (((uint64_t)(parallel_idx + 1)) * marker_ct1) / parallel_tot;
5843 job_size = marker_idx1_end - marker_idx1_start;
5844 pct_thresh = job_size / 100;
5845
5846 if (is_inter_chr) {
5847 marker_idx2_maxw = marker_ct + idx1_subset - 1;
5848 } else {
5849 window_size_ceil = (idx1_subset + 1) * (window_size_m1 + 1) - 1;
5850 if ((window_size_m1 < 12) || ((!idx1_subset) && (window_size_m1 <= 16))) {
5851 marker_idx2_maxw = window_size_ceil;
5852 } else {
5853 for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
5854 marker_idx2_maxw = chrom_window_max(marker_pos, marker_exclude, chrom_info_ptr, chrom_info_ptr->chrom_file_order[chrom_fo_idx], window_size_ceil, window_bp * (idx1_subset + 1), marker_idx2_maxw);
5855 }
5856 }
5857 }
5858
5859 g_ld_marker_exclude_idx1 = marker_exclude_idx1;
5860 g_ld_marker_exclude = marker_exclude;
5861 g_ld_is_inter_chr = is_inter_chr;
5862
5863 g_ld_is_first_block = (!parallel_idx);
5864 if (g_ld_is_r2) {
5865 g_ld_window_r2 = ldip->window_r2;
5866 } else {
5867 g_ld_window_r2 = sqrt(ldip->window_r2);
5868 }
5869 if (ld_modifier & LD_DX) {
5870 // this is more like --fast-epistasis under the hood, since it requires the
5871 // entire 3x3 table
5872 g_ld_marker_ctm8 = round_up_pow2(marker_idx2_maxw, 4);
5873 retval = ld_report_dprime(threads, ldip, bedfile, bed_offset, marker_reverse, unfiltered_sample_ct, founder_info, sex_male, founder_include2, founder_male_include2, loadbuf, outname, hh_exists, marker_idx1_start, marker_idx1_end);
5874 goto ld_report_regular_ret_1;
5875 }
5876 marker_idx2_maxw = round_up_pow2(marker_idx2_maxw, 8);
5877 orig_marker_ctm8 = marker_idx2_maxw;
5878 g_ld_marker_ctm8 = marker_idx2_maxw;
5879 g_ld_keep_sign = 1;
5880 // each marker costs
5881 // founder_ct_192_long * sizeof(intptr_t) for genotype buffer
5882 // + founder_ct_192_long * sizeof(intptr_t) for missing mask buffer
5883 // + sizeof(int32_t) for g_ld_missing_cts1 entry
5884 // + 2 * sizeof(int32_t) for window offset and size
5885 // + marker_idx2_maxw * sizeof(double) for g_ld_results buffer
5886 // round down to multiple of thread_ct for better workload distribution
5887 ulii = founder_ct_192_long * 2 * sizeof(intptr_t) + 3 * sizeof(int32_t) + marker_idx2_maxw * sizeof(double);
5888 idx1_block_size = bigstack_left() / (ulii * 2);
5889 thread_workload = idx1_block_size / thread_ct;
5890 if (!thread_workload) {
5891 goto ld_report_regular_ret_NOMEM;
5892 }
5893 idx1_block_size = thread_workload * thread_ct;
5894 if (idx1_block_size > job_size) {
5895 idx1_block_size = job_size;
5896 }
5897 bigstack_alloc_ul(founder_ct_192_long * idx1_block_size, &g_ld_geno1);
5898 bigstack_alloc_ul(founder_ct_192_long * idx1_block_size, &g_ld_geno_masks1);
5899 bigstack_alloc_ui(idx1_block_size, &g_ld_missing_cts1);
5900 bigstack_alloc_ui(idx1_block_size * 2, &g_ld_interval1);
5901 if (bigstack_alloc_d(marker_idx2_maxw * idx1_block_size, &g_ld_results)) {
5902 goto ld_report_regular_ret_NOMEM;
5903 }
5904
5905 ulii -= 2 * sizeof(int32_t) + marker_idx2_maxw * sizeof(double);
5906 idx2_block_size = (bigstack_left() / ulii) & (~(7 * ONELU));
5907 if (idx2_block_size > marker_ct) {
5908 idx2_block_size = round_up_pow2(marker_ct, 8);
5909 }
5910 bigstack_mark2 = g_bigstack_base;
5911 while (1) {
5912 if (!idx2_block_size) {
5913 goto ld_report_regular_ret_NOMEM;
5914 }
5915 if (!(bigstack_alloc_ul(founder_ct_192_long * idx2_block_size, &g_ld_geno2) ||
5916 bigstack_alloc_ul(founder_ct_192_long * idx2_block_size, &g_ld_geno_masks2) ||
5917 bigstack_alloc_ui(idx2_block_size, &g_ld_missing_cts2))) {
5918 break;
5919 }
5920 bigstack_reset(bigstack_mark2);
5921 idx2_block_size -= 8;
5922 }
5923 uljj = founder_trail_ct + 2;
5924 for (ulii = 1; ulii <= idx1_block_size; ulii++) {
5925 fill_ulong_zero(uljj, &(g_ld_geno1[ulii * founder_ct_192_long - uljj]));
5926 fill_ulong_zero(uljj, &(g_ld_geno_masks1[ulii * founder_ct_192_long - uljj]));
5927 }
5928 for (ulii = 1; ulii <= idx2_block_size; ulii++) {
5929 fill_ulong_zero(uljj, &(g_ld_geno2[ulii * founder_ct_192_long - uljj]));
5930 fill_ulong_zero(uljj, &(g_ld_geno_masks2[ulii * founder_ct_192_long - uljj]));
5931 }
5932 marker_uidx1 = next_unset_unsafe(marker_exclude_idx1, 0);
5933 if (marker_idx1) {
5934 marker_uidx1 = jump_forward_unset_unsafe(marker_exclude_idx1, marker_uidx1 + 1, marker_idx1);
5935 }
5936 sprintf(g_logbuf, "--r%s%s%s%s to %s ... ", g_ld_is_r2? "2" : "", is_inter_chr? " inter-chr" : "", g_ld_marker_allele_ptrs? " in-phase" : "", g_ld_set_allele_freqs? " with-freqs" : "", outname);
5937 wordwrapb(16); // strlen("99% [processing]")
5938 logprintb();
5939 fputs("0%", stdout);
5940 while (1) {
5941 fputs(" [processing]", stdout);
5942 fflush(stdout);
5943 if (idx1_block_size > marker_idx1_end - marker_idx1) {
5944 idx1_block_size = marker_idx1_end - marker_idx1;
5945 if (idx1_block_size < thread_ct) {
5946 thread_ct = idx1_block_size;
5947 g_ld_thread_ct = thread_ct;
5948 }
5949 }
5950 g_ld_idx1_block_size = idx1_block_size;
5951 marker_uidx1_tmp = marker_uidx1;
5952
5953 if (idx1_subset) {
5954 if (!is_inter_chr) {
5955 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
5956 marker_uidx2_base = window_back(marker_pos, marker_cms, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1, window_size_m1, window_bp, window_cm, &uii);
5957 marker_idx2_base = marker_uidx2_base - popcount_bit_idx(marker_exclude, 0, marker_uidx2_base);
5958 marker_idx2 = marker_idx2_base + uii;
5959 } else {
5960 marker_uidx2_base = next_unset_unsafe(marker_exclude, 0);
5961 marker_idx2_base = 0;
5962 marker_idx2 = 0; // ignored
5963 }
5964 } else {
5965 marker_idx2_base = marker_uidx1 + 1 - popcount_bit_idx(marker_exclude, 0, marker_uidx1);
5966 if (marker_idx2_base == marker_ct) {
5967 goto ld_report_regular_done;
5968 }
5969 marker_idx2 = marker_idx2_base - 1;
5970 marker_uidx2_base = next_unset_unsafe(marker_exclude, marker_uidx1 + 1);
5971 }
5972 if (fseeko(bedfile, bed_offset + (marker_uidx1 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
5973 goto ld_report_regular_ret_READ_FAIL;
5974 }
5975 chrom_end = 0;
5976 for (block_idx1 = 0; block_idx1 < idx1_block_size; marker_uidx1_tmp++, block_idx1++, marker_idx2++) {
5977 if (IS_SET(marker_exclude_idx1, marker_uidx1_tmp)) {
5978 ulii = next_unset_ul_unsafe(marker_exclude_idx1, marker_uidx1_tmp);
5979 uljj = ulii - marker_uidx1_tmp - popcount_bit_idx(marker_exclude, marker_uidx1_tmp, ulii);
5980 if (uljj) {
5981 uii = 1; // recalculate window beginning/end from scratch
5982 marker_idx2 += uljj;
5983 }
5984 marker_uidx1_tmp = ulii;
5985 if (fseeko(bedfile, bed_offset + (marker_uidx1_tmp * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
5986 goto ld_report_regular_ret_READ_FAIL;
5987 }
5988 }
5989 if (marker_uidx1_tmp >= chrom_end) {
5990 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
5991 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
5992 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
5993 chrom_last = prev_unset_unsafe(marker_exclude, chrom_end);
5994 is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
5995 is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
5996 is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
5997 uii = 1;
5998 }
5999 if (!is_inter_chr) {
6000 // uii == 0 if we can perform an incremental update, 1 if we need
6001 // fully-powered window_back()/window_forward()
6002 if (uii) {
6003 if (idx1_subset) {
6004 marker_uidx2_back = window_back(marker_pos, marker_cms, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1_tmp, window_size_m1, window_bp, window_cm, &window_trail_ct);
6005 }
6006 marker_uidx2_fwd = window_forward(marker_pos, marker_cms, marker_exclude, marker_uidx1_tmp, chrom_last, window_size_m1, window_bp, window_cm, &window_lead_ct);
6007 marker_uidx2_fwd2 = marker_uidx2_fwd;
6008 if (marker_uidx2_fwd < chrom_last) {
6009 marker_uidx2_fwd2++;
6010 next_unset_unsafe_ck(marker_exclude, &marker_uidx2_fwd2);
6011 }
6012 uii = 0;
6013 } else {
6014 if (idx1_subset) {
6015 if (window_trail_ct == window_size_m1) {
6016 marker_uidx2_back++;
6017 next_unset_unsafe_ck(marker_exclude, &marker_uidx2_back);
6018 } else {
6019 window_trail_ct++;
6020 }
6021 cur_marker_pos = marker_pos[marker_uidx1_tmp];
6022 if (cur_marker_pos > window_bp) {
6023 cur_marker_pos -= window_bp;
6024 while (marker_pos[marker_uidx2_back] < cur_marker_pos) {
6025 window_trail_ct--;
6026 marker_uidx2_back++;
6027 next_unset_unsafe_ck(marker_exclude, &marker_uidx2_back);
6028 }
6029 }
6030 if (marker_cms) {
6031 cur_marker_cm = marker_cms[marker_uidx1_tmp] - window_cm;
6032 while (marker_cms[marker_uidx2_back] < cur_marker_cm) {
6033 window_trail_ct--;
6034 marker_uidx2_back++;
6035 next_unset_unsafe_ck(marker_exclude, &marker_uidx2_back);
6036 }
6037 }
6038 }
6039 if (marker_uidx2_fwd < chrom_last) {
6040 cur_marker_pos = marker_pos[marker_uidx1_tmp] + window_bp;
6041 if (!marker_cms) {
6042 while (marker_pos[marker_uidx2_fwd2] <= cur_marker_pos) {
6043 marker_uidx2_fwd = marker_uidx2_fwd2;
6044 window_lead_ct++;
6045 if (marker_uidx2_fwd == chrom_last) {
6046 break;
6047 }
6048 marker_uidx2_fwd2++;
6049 next_unset_unsafe_ck(marker_exclude, &marker_uidx2_fwd2);
6050 if (window_lead_ct > window_size_m1) {
6051 break;
6052 }
6053 }
6054 } else {
6055 cur_marker_cm = marker_cms[marker_uidx1_tmp] + window_cm;
6056 while ((marker_pos[marker_uidx2_fwd2] <= cur_marker_pos) && (marker_cms[marker_uidx2_fwd2] <= cur_marker_cm)) {
6057 marker_uidx2_fwd = marker_uidx2_fwd2;
6058 window_lead_ct++;
6059 if (marker_uidx2_fwd == chrom_last) {
6060 break;
6061 }
6062 marker_uidx2_fwd2++;
6063 next_unset_unsafe_ck(marker_exclude, &marker_uidx2_fwd2);
6064 if (window_lead_ct > window_size_m1) {
6065 break;
6066 }
6067 }
6068 }
6069 }
6070 window_lead_ct--;
6071 }
6072 }
6073 if (!is_inter_chr) {
6074 if (idx1_subset) {
6075 g_ld_interval1[block_idx1 * 2] = marker_idx2 - window_trail_ct - marker_idx2_base;
6076 } else {
6077 g_ld_interval1[block_idx1 * 2] = marker_idx2 + 1 - marker_idx2_base;
6078 }
6079 g_ld_interval1[block_idx1 * 2 + 1] = marker_idx2 + window_lead_ct + 1 - marker_idx2_base;
6080 } else {
6081 if (!idx1_subset) {
6082 g_ld_interval1[block_idx1 * 2] = marker_idx2 + 1 - marker_idx2_base;
6083 } else {
6084 g_ld_interval1[block_idx1 * 2] = 0;
6085 }
6086 g_ld_interval1[block_idx1 * 2 + 1] = marker_ct - marker_idx2_base;
6087 }
6088
6089 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, marker_uidx1_tmp), bedfile, loadbuf, &(g_ld_geno1[block_idx1 * founder_ct_192_long]))) {
6090 goto ld_report_regular_ret_READ_FAIL;
6091 }
6092 if (is_haploid && hh_exists) {
6093 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)(&(g_ld_geno1[block_idx1 * founder_ct_192_long])));
6094 }
6095 ld_process_load2(&(g_ld_geno1[block_idx1 * founder_ct_192_long]), &(g_ld_geno_masks1[block_idx1 * founder_ct_192_long]), &(g_ld_missing_cts1[block_idx1]), founder_ct, is_x && (!ignore_x), founder_male_include2);
6096 }
6097 marker_uidx2 = marker_uidx2_base;
6098 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
6099 goto ld_report_regular_ret_READ_FAIL;
6100 }
6101
6102 cur_idx2_block_size = idx2_block_size;
6103 uljj = g_ld_interval1[2 * idx1_block_size - 1];
6104 marker_idx2_end = uljj + marker_idx2_base;
6105 marker_idx2_maxw = round_up_pow2(uljj, 8);
6106 if (marker_idx2_maxw > orig_marker_ctm8) {
6107 marker_idx2_maxw = orig_marker_ctm8;
6108 }
6109 g_ld_marker_ctm8 = marker_idx2_maxw;
6110 marker_idx2 = marker_idx2_base;
6111 chrom_end2 = 0;
6112 do {
6113 if (cur_idx2_block_size > marker_idx2_end - marker_idx2) {
6114 cur_idx2_block_size = marker_idx2_end - marker_idx2;
6115 }
6116
6117 for (block_idx2 = 0; block_idx2 < cur_idx2_block_size; marker_uidx2++, block_idx2++) {
6118 // todo: when set has big holes in the middle, do not load everything
6119 if (IS_SET(marker_exclude, marker_uidx2)) {
6120 marker_uidx2 = next_unset_ul_unsafe(marker_exclude, marker_uidx2);
6121 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
6122 goto ld_report_regular_ret_READ_FAIL;
6123 }
6124 }
6125 if (marker_uidx2 >= chrom_end2) {
6126 chrom_fo_idx2 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
6127 chrom_idx2 = chrom_info_ptr->chrom_file_order[chrom_fo_idx2];
6128 chrom_end2 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx2 + 1];
6129 is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx2);
6130 is_x = (((int32_t)chrom_idx2) == chrom_info_ptr->xymt_codes[X_OFFSET]);
6131 is_y = (((int32_t)chrom_idx2) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
6132 }
6133 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, marker_uidx2), bedfile, loadbuf, &(g_ld_geno2[block_idx2 * founder_ct_192_long]))) {
6134 goto ld_report_regular_ret_READ_FAIL;
6135 }
6136 if (is_haploid && hh_exists) {
6137 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)(&(g_ld_geno2[block_idx2 * founder_ct_192_long])));
6138 }
6139 ld_process_load2(&(g_ld_geno2[block_idx2 * founder_ct_192_long]), &(g_ld_geno_masks2[block_idx2 * founder_ct_192_long]), &(g_ld_missing_cts2[block_idx2]), founder_ct, is_x && (!ignore_x), founder_male_include2);
6140 }
6141
6142 g_ld_idx2_block_size = cur_idx2_block_size;
6143 g_ld_idx2_block_start = marker_idx2 - marker_idx2_base;
6144 marker_idx2 += cur_idx2_block_size;
6145 is_last_block = (marker_idx2 >= marker_idx2_end);
6146 if (spawn_threads2(threads, &ld_block_thread, thread_ct, is_last_block)) {
6147 goto ld_report_regular_ret_THREAD_CREATE_FAIL;
6148 }
6149 ld_block_thread((void*)0);
6150 join_threads2(threads, thread_ct, is_last_block);
6151 } while (!is_last_block);
6152
6153 fputs("\b\b\b\b\b\b\b\b\b\b\bwriting] \b\b\b", stdout);
6154 fflush(stdout);
6155 g_ld_marker_uidx1 = marker_uidx1;
6156 g_ld_block_idx1 = 0;
6157 g_ld_uidx2_start = marker_uidx2_base;
6158 g_ld_idx2_block_start = 0;
6159 g_ld_block_idx2 = 0;
6160 if (output_gz) {
6161 parallel_compress(outname, overflow_buf, not_first_write, ld_regular_emitn);
6162 } else {
6163 write_uncompressed(outname, overflow_buf, not_first_write, ld_regular_emitn);
6164 }
6165 not_first_write = 1;
6166 g_ld_is_first_block = 0;
6167 ld_report_regular_done:
6168 marker_idx1 += idx1_block_size;
6169 fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
6170 if (marker_idx1 >= pct_thresh) {
6171 if (pct > 10) {
6172 putc_unlocked('\b', stdout);
6173 }
6174 pct = ((marker_idx1 - marker_idx1_start) * 100LLU) / job_size;
6175 if (pct < 100) {
6176 printf("\b\b%" PRIuPTR "%%", pct);
6177 fflush(stdout);
6178 pct_thresh = marker_idx1_start + ((++pct) * ((uint64_t)job_size)) / 100;
6179 }
6180 }
6181 if (marker_idx1 == marker_idx1_end) {
6182 break;
6183 }
6184 marker_uidx1 = jump_forward_unset_unsafe(marker_exclude_idx1, marker_uidx1 + 1, idx1_block_size);
6185 }
6186 fputs("\b\b", stdout);
6187 logprint("done.\n");
6188 while (0) {
6189 ld_report_regular_ret_NOMEM:
6190 retval = RET_NOMEM;
6191 break;
6192 ld_report_regular_ret_OPEN_FAIL:
6193 retval = RET_OPEN_FAIL;
6194 break;
6195 ld_report_regular_ret_READ_FAIL:
6196 retval = RET_READ_FAIL;
6197 break;
6198 ld_report_regular_ret_EMPTY_SET1:
6199 logerrprint("Error: No valid variants specified by --ld-snp/--ld-snps/--ld-snp-list.\n");
6200 ld_report_regular_ret_INVALID_CMDLINE:
6201 retval = RET_INVALID_CMDLINE;
6202 break;
6203 ld_report_regular_ret_INVALID_FORMAT:
6204 retval = RET_INVALID_FORMAT;
6205 break;
6206 ld_report_regular_ret_THREAD_CREATE_FAIL:
6207 retval = RET_THREAD_CREATE_FAIL;
6208 break;
6209 }
6210 ld_report_regular_ret_1:
6211 fclose_cond(infile);
6212 // trust parent to free memory
6213 return retval;
6214 }
6215
ld_report(pthread_t * threads,Ld_info * ldip,FILE * bedfile,uintptr_t bed_offset,uintptr_t marker_ct,uintptr_t unfiltered_marker_ct,uintptr_t * marker_exclude,uintptr_t * marker_reverse,char * marker_ids,uintptr_t max_marker_id_len,uint32_t plink_maxsnp,char ** marker_allele_ptrs,uintptr_t max_marker_allele_len,double * set_allele_freqs,Chrom_info * chrom_info_ptr,uint32_t * marker_pos,double * marker_cms,uintptr_t unfiltered_sample_ct,uintptr_t * founder_info,uint32_t parallel_idx,uint32_t parallel_tot,uintptr_t * sex_male,char * outname,char * outname_end,uint32_t hh_exists)6216 int32_t ld_report(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, double* set_allele_freqs, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, double* marker_cms, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uint32_t parallel_idx, uint32_t parallel_tot, uintptr_t* sex_male, char* outname, char* outname_end, uint32_t hh_exists) {
6217 unsigned char* bigstack_mark = g_bigstack_base;
6218 uintptr_t unfiltered_sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(unfiltered_sample_ct);
6219 uintptr_t founder_ct = popcount_longs(founder_info, unfiltered_sample_ctv2 / 2);
6220 uintptr_t* founder_include2 = nullptr;
6221 uintptr_t* founder_male_include2 = nullptr;
6222 uintptr_t founder_ct_mld = (founder_ct + MULTIPLEX_LD - 1) / MULTIPLEX_LD;
6223 uint32_t founder_ct_mld_m1 = ((uint32_t)founder_ct_mld) - 1;
6224 #ifdef __LP64__
6225 uint32_t founder_ct_mld_rem = (MULTIPLEX_LD / 192) - (founder_ct_mld * MULTIPLEX_LD - founder_ct) / 192;
6226 #else
6227 uint32_t founder_ct_mld_rem = (MULTIPLEX_LD / 48) - (founder_ct_mld * MULTIPLEX_LD - founder_ct) / 48;
6228 #endif
6229 uintptr_t founder_ct_192_long = founder_ct_mld_m1 * (MULTIPLEX_LD / BITCT2) + founder_ct_mld_rem * (192 / BITCT2);
6230 uint32_t ld_modifier = ldip->modifier;
6231 uint32_t is_binary = ld_modifier & (LD_MATRIX_BIN | LD_MATRIX_BIN4);
6232 uint32_t output_gz = ld_modifier & LD_REPORT_GZ;
6233 char* bufptr = memcpyl3a(outname_end, ".ld");
6234 int32_t retval = 0;
6235 uintptr_t* loadbuf;
6236
6237 g_ld_modifier = ld_modifier;
6238 g_ld_founder_ct = founder_ct;
6239 g_ld_founder_ct_192_long = founder_ct_192_long;
6240 g_ld_founder_ct_mld_m1 = founder_ct_mld_m1;
6241 g_ld_founder_ct_mld_rem = founder_ct_mld_rem;
6242 g_ld_is_r2 = ld_modifier & LD_R2;
6243 g_ld_marker_ct = marker_ct;
6244 g_ld_chrom_info_ptr = chrom_info_ptr;
6245 g_ld_thread_ct = g_thread_ct;
6246 g_ld_set_allele_freqs = (ld_modifier & LD_WITH_FREQS)? set_allele_freqs : nullptr;
6247 if (founder_ct < 2) {
6248 LOGERRPRINTF("Warning: Skipping --r%s since there are less than two founders.\n(--make-founders may come in handy here.)\n", g_ld_is_r2? "2" : "");
6249 goto ld_report_ret_1;
6250 } else if (founder_ct >= 0x20000000) {
6251 logerrprint("Error: --r/--r2 does not support >= 2^29 samples.\n");
6252 goto ld_report_ret_INVALID_CMDLINE;
6253 }
6254 if ((marker_ct > 400000) && (!(ld_modifier & LD_YES_REALLY)) && (parallel_tot == 1) && ((ld_modifier & LD_MATRIX_SHAPEMASK) || ((ld_modifier & LD_INTER_CHR) && (!ldip->snpstr) && (!ldip->snps_rl.name_ct) && ((!g_ld_is_r2) || (ldip->window_r2 == 0.0))))) {
6255 logerrprint("Error: Gigantic (over 400k loci) --r/--r2 unfiltered, non-distributed\ncomputation. Rerun with the 'yes-really' modifier if you are SURE you have\nenough hard drive space and want to do this.\n");
6256 goto ld_report_ret_INVALID_CMDLINE;
6257 }
6258 if (alloc_collapsed_haploid_filters(founder_info, sex_male, unfiltered_sample_ct, founder_ct, XMHH_EXISTS | hh_exists, 1, &founder_include2, &founder_male_include2)) {
6259 goto ld_report_ret_NOMEM;
6260 }
6261 if (bigstack_alloc_ul(unfiltered_sample_ctv2, &loadbuf)) {
6262 goto ld_report_ret_NOMEM;
6263 }
6264 loadbuf[unfiltered_sample_ctv2 - 2] = 0;
6265 loadbuf[unfiltered_sample_ctv2 - 1] = 0;
6266 // possible todo: throw out all monomorphic sites (and, in at least the
6267 // matrix case, dump a list of expelled site IDs)
6268 if (is_binary) {
6269 bufptr = memcpya(bufptr, ".bin", 4);
6270 }
6271 if (parallel_tot > 1) {
6272 *bufptr++ = '.';
6273 bufptr = uint32toa(parallel_idx + 1, bufptr);
6274 }
6275 if (!is_binary) {
6276 g_ld_delimiter = (ld_modifier & LD_MATRIX_SPACES)? ' ' : '\t';
6277 if (output_gz) {
6278 bufptr = memcpyl3a(bufptr, ".gz");
6279 }
6280 }
6281 *bufptr = '\0';
6282 if (ld_modifier & LD_INPHASE) {
6283 if (max_marker_allele_len * 4 + plink_maxsnp * 2 + get_max_chrom_slen(chrom_info_ptr) * 2 + 128 > MAXLINELEN) {
6284 logerrprint("Error: --r/--r2 in-phase does not support very long allele codes.\n");
6285 goto ld_report_ret_INVALID_CMDLINE;
6286 }
6287 g_ld_marker_allele_ptrs = marker_allele_ptrs;
6288 } else {
6289 g_ld_marker_allele_ptrs = nullptr;
6290 }
6291 if (ld_modifier & (LD_MATRIX_SQ | LD_MATRIX_SQ0 | LD_MATRIX_TRI)) {
6292 retval = ld_report_matrix(threads, ldip, bedfile, bed_offset, unfiltered_marker_ct, marker_exclude, marker_reverse, unfiltered_sample_ct, founder_info, parallel_idx, parallel_tot, sex_male, founder_include2, founder_male_include2, loadbuf, outname, hh_exists);
6293 } else {
6294 g_ld_plink_maxsnp = plink_maxsnp;
6295 g_ld_marker_ids = marker_ids;
6296 g_ld_marker_pos = marker_pos;
6297 g_ld_marker_cms = (ldip->window_cm == -1)? nullptr : marker_cms;
6298 g_ld_marker_exclude = marker_exclude;
6299 g_ld_max_marker_id_len = max_marker_id_len;
6300 retval = ld_report_regular(threads, ldip, bedfile, bed_offset, unfiltered_marker_ct, marker_reverse, unfiltered_sample_ct, founder_info, parallel_idx, parallel_tot, sex_male, founder_include2, founder_male_include2, loadbuf, outname, hh_exists);
6301 }
6302 while (0) {
6303 ld_report_ret_NOMEM:
6304 retval = RET_NOMEM;
6305 break;
6306 ld_report_ret_INVALID_CMDLINE:
6307 retval = RET_INVALID_CMDLINE;
6308 break;
6309 }
6310 ld_report_ret_1:
6311 bigstack_reset(bigstack_mark);
6312 return retval;
6313 }
6314
show_tags(Ld_info * ldip,FILE * bedfile,uintptr_t bed_offset,uintptr_t marker_ct,uintptr_t unfiltered_marker_ct,uintptr_t * marker_exclude,uintptr_t * marker_reverse,char * marker_ids,uintptr_t max_marker_id_len,uint32_t plink_maxsnp,uint32_t * marker_pos,Chrom_info * chrom_info_ptr,uintptr_t unfiltered_sample_ct,uintptr_t * founder_info,uintptr_t * sex_male,char * outname,char * outname_end,uint32_t hh_exists)6315 int32_t show_tags(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, uint32_t* marker_pos, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uintptr_t* sex_male, char* outname, char* outname_end, uint32_t hh_exists) {
6316 // Similar to ld_prune() and flipscan().
6317 unsigned char* bigstack_mark = g_bigstack_base;
6318 uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
6319 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
6320 uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
6321 uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
6322 uintptr_t founder_ct = popcount_longs(founder_info, unfiltered_sample_ctl);
6323 uintptr_t founder_ctl = BITCT_TO_WORDCT(founder_ct);
6324 uintptr_t final_mask = get_final_mask(founder_ct);
6325 uintptr_t marker_idx = 0;
6326 uintptr_t max_window_size = 1;
6327 uintptr_t pct = 1;
6328 uintptr_t pct_thresh = marker_ct / 100;
6329 FILE* infile = nullptr;
6330 FILE* outfile = nullptr;
6331 uintptr_t* final_set = nullptr;
6332 uintptr_t* founder_include2 = nullptr;
6333 uintptr_t* founder_male_include2 = nullptr;
6334 char* chrom_name_ptr = nullptr;
6335 double tag_thresh = ldip->show_tags_r2 * (1 - SMALL_EPSILON);
6336 uint32_t tags_list = (ldip->modifier & LD_SHOW_TAGS_LIST_ALL) || (!ldip->show_tags_fname);
6337 uint32_t twocolumn = ldip->modifier & LD_SHOW_TAGS_MODE2;
6338 uint32_t ignore_x = (ldip->modifier & LD_IGNORE_X) & 1;
6339 uint32_t window_bp = ldip->show_tags_bp;
6340 uint32_t target_ct = 0;
6341 uint32_t chrom_name_len = 0;
6342 int32_t retval = 0;
6343 char chrom_name_buf[3 + MAX_CHROM_TEXTNUM_SLEN];
6344 int32_t dp_result[5];
6345 uintptr_t founder_ct_192_long;
6346 uintptr_t founder_ctwd12;
6347 uintptr_t founder_ctwd12_rem;
6348 uintptr_t lshift_last;
6349 uintptr_t line_idx;
6350 uintptr_t unrecog_ct;
6351 uintptr_t max_window_ctal;
6352 uintptr_t max_window_ctl;
6353 uintptr_t marker_uidx;
6354 uintptr_t window_cidx;
6355 uintptr_t window_cidx2;
6356 uintptr_t window_cidx3;
6357 uintptr_t marker_uidx2;
6358 uintptr_t ulii;
6359 uintptr_t* targets;
6360 uintptr_t* loadbuf_raw;
6361 uintptr_t* geno;
6362 uintptr_t* geno_masks;
6363 uintptr_t* geno_fixed_vec_ptr;
6364 uintptr_t* mask_fixed_vec_ptr;
6365 uintptr_t* geno_var_vec_ptr;
6366 uintptr_t* mask_var_vec_ptr;
6367 uintptr_t* cur_targets;
6368 uintptr_t* tag_matrix;
6369 uintptr_t* tag_matrix_row_ptr;
6370 char* sorted_marker_ids;
6371 char* bufptr;
6372 char* bufptr2;
6373 uint32_t* marker_id_map;
6374 uint32_t* window_uidxs;
6375 uint32_t* window_cidx_starts;
6376 uint32_t* missing_cts;
6377 double non_missing_ctd;
6378 double cov12;
6379 double dxx;
6380 double dyy;
6381 uint32_t founder_ct_mld_m1;
6382 uint32_t founder_ct_mld_rem;
6383 uint32_t chrom_fo_idx;
6384 uint32_t chrom_idx;
6385 uint32_t chrom_end;
6386 uint32_t chrom_marker_ct;
6387 uint32_t chrom_marker_idx;
6388 uint32_t is_haploid;
6389 uint32_t is_x;
6390 uint32_t is_y;
6391 uint32_t is_target;
6392 uint32_t marker_pos_thresh;
6393 uint32_t fixed_missing_ct;
6394 uint32_t fixed_non_missing_ct;
6395 uint32_t non_missing_ct;
6396 uint32_t slen;
6397 uint32_t tag_ct;
6398 uint32_t marker_uidx3;
6399 uint32_t min_bp;
6400 uint32_t max_bp;
6401 uint32_t cur_bp;
6402 uint32_t uii;
6403 int32_t ii;
6404 if (founder_ct < 2) {
6405 logerrprint("Warning: Skipping --show-tags since there are less than two founders.\n(--make-founders may come in handy here.)\n");
6406 goto show_tags_ret_1;
6407 }
6408 if (bigstack_alloc_ul(unfiltered_marker_ctl, &targets) ||
6409 bigstack_alloc_ul(unfiltered_sample_ctl2, &loadbuf_raw)) {
6410 goto show_tags_ret_NOMEM;
6411 }
6412 loadbuf_raw[unfiltered_sample_ctl2 - 1] = 0;
6413 if (ldip->show_tags_fname) {
6414 fill_ulong_zero(unfiltered_marker_ctl, targets);
6415 retval = sort_item_ids(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_ct, marker_ids, max_marker_id_len, 0, 0, strcmp_deref, &sorted_marker_ids, &marker_id_map);
6416 if (retval) {
6417 goto show_tags_ret_1;
6418 }
6419 if (fopen_checked(ldip->show_tags_fname, "r", &infile)) {
6420 goto show_tags_ret_OPEN_FAIL;
6421 }
6422 g_textbuf[MAXLINELEN - 1] = ' ';
6423 line_idx = 0;
6424 unrecog_ct = 0;
6425 while (fgets(g_textbuf, MAXLINELEN, infile)) {
6426 line_idx++;
6427 if (!g_textbuf[MAXLINELEN - 1]) {
6428 LOGERRPRINTF("Error: Line %" PRIuPTR " of --show-tags file is pathologically long.\n", line_idx);
6429 goto show_tags_ret_INVALID_FORMAT;
6430 }
6431 bufptr = skip_initial_spaces(g_textbuf);
6432 if (is_eoln_kns(*bufptr)) {
6433 continue;
6434 }
6435 slen = strlen_se(bufptr);
6436 if (twocolumn) {
6437 bufptr2 = skip_initial_spaces(&(bufptr[slen]));
6438 if (!bufptr2) {
6439 LOGERRPRINTF("Error: Line %" PRIuPTR " of --show-tags file has fewer tokens than expected.\n", line_idx);
6440 goto show_tags_ret_INVALID_FORMAT;
6441 }
6442 if ((*bufptr2 != '1') || (!is_space_or_eoln(bufptr2[1]))) {
6443 continue;
6444 }
6445 }
6446 ii = bsearch_str(bufptr, slen, sorted_marker_ids, max_marker_id_len, marker_ct);
6447 if (ii == -1) {
6448 unrecog_ct++;
6449 continue;
6450 }
6451 marker_uidx = marker_id_map[(uint32_t)ii];
6452 if (IS_SET(targets, marker_uidx)) {
6453 bufptr[slen] = '\0';
6454 LOGERRPRINTF("Error: Duplicate variant ID '%s' in --show-tags file.\n", bufptr);
6455 goto show_tags_ret_INVALID_FORMAT;
6456 }
6457 SET_BIT(marker_uidx, targets);
6458 }
6459 if (fclose_null(&infile)) {
6460 goto show_tags_ret_READ_FAIL;
6461 }
6462 bigstack_reset((unsigned char*)marker_id_map);
6463 target_ct = popcount_longs(targets, unfiltered_marker_ctl);
6464 if (!target_ct) {
6465 logerrprint("Error: No recognized variant IDs in --show-tags file.\n");
6466 goto show_tags_ret_INVALID_FORMAT;
6467 }
6468 if (bigstack_alloc_ul(unfiltered_marker_ctl, &final_set)) {
6469 goto show_tags_ret_NOMEM;
6470 }
6471 memcpy(final_set, targets, unfiltered_marker_ctl * sizeof(intptr_t));
6472 LOGPRINTF("--show-tags: %u target variant%s loaded.\n", target_ct, (target_ct == 1)? "" : "s");
6473 if (unrecog_ct) {
6474 LOGERRPRINTF("Warning: %" PRIuPTR " unrecognized variant ID%s in --show-tags file.\n", unrecog_ct, (unrecog_ct == 1)? "" : "s");
6475 }
6476 } else {
6477 bitarr_invert_copy(marker_exclude, unfiltered_marker_ct, targets);
6478 }
6479 // force founder_male_include2 allocation
6480 if (alloc_collapsed_haploid_filters(founder_info, sex_male, unfiltered_sample_ct, founder_ct, XMHH_EXISTS | hh_exists, 1, &founder_include2, &founder_male_include2)) {
6481 goto show_tags_ret_NOMEM;
6482 }
6483 founder_ct_mld_m1 = (founder_ct - 1) / MULTIPLEX_LD;
6484 ulii = founder_ct_mld_m1 + 1;
6485 #ifdef __LP64__
6486 founder_ct_mld_rem = (MULTIPLEX_LD / 192) - (ulii * MULTIPLEX_LD - founder_ct) / 192;
6487 #else
6488 founder_ct_mld_rem = (MULTIPLEX_LD / 48) - (ulii * MULTIPLEX_LD - founder_ct) / 48;
6489 #endif
6490 founder_ct_192_long = founder_ct_mld_m1 * (MULTIPLEX_LD / BITCT2) + founder_ct_mld_rem * (192 / BITCT2);
6491 uii = founder_ct / BITCT2;
6492 founder_ctwd12 = uii / 12;
6493 founder_ctwd12_rem = uii - (12 * founder_ctwd12);
6494 lshift_last = 2 * ((0x7fffffc0 - founder_ct) % BITCT2);
6495 for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
6496 max_window_size = chrom_window_max(marker_pos, marker_exclude, chrom_info_ptr, chrom_info_ptr->chrom_file_order[chrom_fo_idx], 0x7fffffff, window_bp * 2, max_window_size);
6497 }
6498 max_window_ctl = BITCT_TO_WORDCT(max_window_size);
6499 max_window_ctal = max_window_ctl * BITCT;
6500 if (bigstack_alloc_ui(max_window_size, &window_uidxs) ||
6501 bigstack_alloc_ui(max_window_size, &window_cidx_starts) ||
6502 bigstack_alloc_ui(max_window_size, &missing_cts) ||
6503 bigstack_alloc_ul(max_window_size * founder_ct_192_long, &geno) ||
6504 bigstack_alloc_ul(max_window_size * founder_ct_192_long, &geno_masks) ||
6505 bigstack_alloc_ul(max_window_ctl, &cur_targets) ||
6506 bigstack_alloc_ul(max_window_size * max_window_ctl, &tag_matrix)) {
6507 goto show_tags_ret_NOMEM;
6508 }
6509 uii = 2 + founder_ct_192_long - founder_ctl * 2;
6510 for (ulii = 1; ulii <= max_window_size; ulii++) {
6511 fill_ulong_zero(uii, &(geno[ulii * founder_ct_192_long - uii]));
6512 fill_ulong_zero(uii, &(geno_masks[ulii * founder_ct_192_long - uii]));
6513 }
6514
6515 if (tags_list) {
6516 memcpy(outname_end, ".tags.list", 11);
6517 if (fopen_checked(outname, "w", &outfile)) {
6518 goto show_tags_ret_WRITE_FAIL;
6519 }
6520 sprintf(g_textbuf, "%%%us CHR BP NTAG LEFT RIGHT KBSPAN TAGS\n", plink_maxsnp);
6521 fprintf(outfile, g_textbuf, "SNP");
6522 }
6523 printf("--show-tags%s: 0%%", final_set? "" : " all");
6524 for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
6525 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
6526 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
6527 marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
6528 chrom_marker_ct = chrom_end - marker_uidx - popcount_bit_idx(marker_exclude, marker_uidx, chrom_end);
6529 if (chrom_marker_ct < 2) {
6530 marker_idx += chrom_marker_ct;
6531 continue;
6532 }
6533 chrom_name_ptr = chrom_name_buf5w4write(chrom_info_ptr, chrom_idx, &chrom_name_len, chrom_name_buf);
6534 is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
6535 is_x = (chrom_idx == ((uint32_t)chrom_info_ptr->xymt_codes[X_OFFSET]));
6536 is_y = (chrom_idx == ((uint32_t)chrom_info_ptr->xymt_codes[Y_OFFSET]));
6537 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
6538 goto show_tags_ret_READ_FAIL;
6539 }
6540 chrom_marker_idx = 0;
6541 window_cidx = max_window_size - 1;
6542 window_cidx2 = 0;
6543 do {
6544 if (++window_cidx == max_window_size) {
6545 window_cidx = 0;
6546 }
6547 window_uidxs[window_cidx] = marker_uidx;
6548 is_target = IS_SET(targets, marker_uidx);
6549 if (is_target) {
6550 SET_BIT(window_cidx, cur_targets);
6551 } else {
6552 CLEAR_BIT(window_cidx, cur_targets);
6553 }
6554
6555 // circular index of beginning of window starting at current marker
6556 window_cidx_starts[window_cidx] = window_cidx2;
6557 geno_fixed_vec_ptr = &(geno[window_cidx * founder_ct_192_long]);
6558 mask_fixed_vec_ptr = &(geno_masks[window_cidx * founder_ct_192_long]);
6559 fill_ulong_zero(max_window_ctl, &(tag_matrix[window_cidx * max_window_ctl]));
6560 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, marker_uidx), bedfile, loadbuf_raw, geno_fixed_vec_ptr)) {
6561 goto show_tags_ret_READ_FAIL;
6562 }
6563 if (is_haploid && hh_exists) {
6564 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)loadbuf_raw);
6565 }
6566 ld_process_load2(geno_fixed_vec_ptr, mask_fixed_vec_ptr, &fixed_missing_ct, founder_ct, is_x && (!ignore_x), founder_male_include2);
6567 fixed_non_missing_ct = founder_ct - fixed_missing_ct;
6568 missing_cts[window_cidx] = fixed_missing_ct;
6569 window_cidx3 = window_cidx2;
6570 while (window_cidx3 != window_cidx) {
6571 if (is_target || IS_SET(cur_targets, window_cidx3)) {
6572 // don't bother computing r^2 if no target variant involved
6573 geno_var_vec_ptr = &(geno[window_cidx3 * founder_ct_192_long]);
6574 mask_var_vec_ptr = &(geno_masks[window_cidx3 * founder_ct_192_long]);
6575 non_missing_ct = fixed_non_missing_ct - missing_cts[window_cidx3];
6576 if (fixed_missing_ct && missing_cts[window_cidx3]) {
6577 non_missing_ct += ld_missing_ct_intersect(mask_var_vec_ptr, mask_fixed_vec_ptr, founder_ctwd12, founder_ctwd12_rem, lshift_last);
6578 }
6579 if (non_missing_ct) {
6580 dp_result[0] = founder_ct;
6581 dp_result[1] = -((int32_t)fixed_non_missing_ct);
6582 dp_result[2] = missing_cts[window_cidx3] - founder_ct;
6583 dp_result[3] = dp_result[1];
6584 dp_result[4] = dp_result[2];
6585 ld_dot_prod(geno_var_vec_ptr, geno_fixed_vec_ptr, mask_var_vec_ptr, mask_fixed_vec_ptr, dp_result, founder_ct_mld_m1, founder_ct_mld_rem);
6586 non_missing_ctd = (double)((int32_t)non_missing_ct);
6587 dxx = dp_result[1];
6588 dyy = dp_result[2];
6589 cov12 = dp_result[0] * non_missing_ctd - dxx * dyy;
6590 dxx = (dp_result[3] * non_missing_ctd + dxx * dxx) * (dp_result[4] * non_missing_ctd + dyy * dyy);
6591 if (cov12 * cov12 > dxx * tag_thresh) {
6592 set_bit_ul(window_cidx * max_window_ctal + window_cidx3, tag_matrix);
6593 set_bit_ul(window_cidx3 * max_window_ctal + window_cidx, tag_matrix);
6594 }
6595 }
6596 }
6597 if (++window_cidx3 == max_window_size) {
6598 window_cidx3 = 0;
6599 }
6600 }
6601 if (++chrom_marker_idx < chrom_marker_ct) {
6602 marker_uidx++;
6603 if (IS_SET(marker_exclude, marker_uidx)) {
6604 marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx);
6605 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
6606 goto show_tags_ret_READ_FAIL;
6607 }
6608 }
6609 marker_pos_thresh = marker_pos[marker_uidx];
6610 if (marker_pos_thresh < window_bp) {
6611 marker_pos_thresh = 0;
6612 } else {
6613 marker_pos_thresh -= window_bp;
6614 }
6615 } else {
6616 // close out the chromosome
6617 marker_pos_thresh = 0x80000000U;
6618 }
6619 marker_uidx2 = window_uidxs[window_cidx2];
6620 while (marker_pos[marker_uidx2] < marker_pos_thresh) {
6621 if (IS_SET(cur_targets, window_cidx2)) {
6622 // bugfix: tag_matrix_row_ptr is not always 16-byte aligned.
6623 tag_ct = popcount_longs_nzbase(tag_matrix, window_cidx2 * max_window_ctl, (window_cidx2 + 1) * max_window_ctl);
6624 tag_matrix_row_ptr = &(tag_matrix[window_cidx2 * max_window_ctl]);
6625 min_bp = marker_pos[marker_uidx2];
6626 max_bp = marker_pos[marker_uidx2];
6627 window_cidx3 = window_cidx_starts[window_cidx2];
6628 for (uii = 0; uii < tag_ct; uii++, window_cidx3++) {
6629 next_set_ul_ck(tag_matrix_row_ptr, max_window_size, &window_cidx3);
6630 if (window_cidx3 == max_window_size) {
6631 window_cidx3 = next_set_unsafe(tag_matrix_row_ptr, 0);
6632 }
6633 marker_uidx3 = window_uidxs[window_cidx3];
6634 if (final_set) {
6635 SET_BIT(marker_uidx3, final_set);
6636 }
6637 if (tags_list) {
6638 cur_bp = marker_pos[marker_uidx3];
6639 if (cur_bp < min_bp) {
6640 min_bp = cur_bp;
6641 } else if (cur_bp > max_bp) {
6642 max_bp = cur_bp;
6643 }
6644 }
6645 }
6646 if (tags_list) {
6647 bufptr = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx2 * max_marker_id_len]), g_textbuf);
6648 *bufptr++ = ' ';
6649 bufptr = memcpyax(bufptr, chrom_name_ptr, chrom_name_len, ' ');
6650 bufptr = uint32toa_w10x(marker_pos[marker_uidx2], ' ', bufptr);
6651 bufptr = uint32toa_w4x(tag_ct, ' ', bufptr);
6652 bufptr = uint32toa_w10x(min_bp, ' ', bufptr);
6653 bufptr = uint32toa_w10x(max_bp, ' ', bufptr);
6654 bufptr = width_force(8, bufptr, dtoa_g(((int32_t)(max_bp - min_bp + 1)) * 0.001, bufptr));
6655 *bufptr++ = ' ';
6656 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
6657 goto show_tags_ret_WRITE_FAIL;
6658 }
6659 window_cidx3 = window_cidx_starts[window_cidx2];
6660 for (uii = 0; uii < tag_ct; uii++, window_cidx3++) {
6661 next_set_ul_ck(tag_matrix_row_ptr, max_window_size, &window_cidx3);
6662 if (window_cidx3 == max_window_size) {
6663 window_cidx3 = next_set_unsafe(tag_matrix_row_ptr, 0);
6664 }
6665 if (uii) {
6666 putc_unlocked('|', outfile);
6667 }
6668 fputs(&(marker_ids[window_uidxs[window_cidx3] * max_marker_id_len]), outfile);
6669 }
6670 if (!tag_ct) {
6671 fputs("NONE", outfile);
6672 }
6673 putc_unlocked('\n', outfile);
6674 }
6675 }
6676 if (++marker_idx >= pct_thresh) {
6677 if (pct > 10) {
6678 putc_unlocked('\b', stdout);
6679 }
6680 pct = (marker_idx * 100LLU) / marker_ct;
6681 if (pct < 100) {
6682 printf("\b\b%" PRIuPTR "%%", pct);
6683 fflush(stdout);
6684 pct_thresh = ((++pct) * ((uint64_t)marker_ct)) / 100;
6685 }
6686 }
6687 if (window_cidx2 == window_cidx) {
6688 if (++window_cidx2 == max_window_size) {
6689 window_cidx2 = 0;
6690 }
6691 break;
6692 }
6693 if (++window_cidx2 == max_window_size) {
6694 window_cidx2 = 0;
6695 }
6696 marker_uidx2 = window_uidxs[window_cidx2];
6697 }
6698 } while (chrom_marker_idx < chrom_marker_ct);
6699 }
6700 putc_unlocked('\r', stdout);
6701 if (tags_list) {
6702 if (fclose_null(&outfile)) {
6703 goto show_tags_ret_WRITE_FAIL;
6704 }
6705 if (!final_set) {
6706 LOGPRINTFWW("--show-tags all: Report written to %s .\n", outname);
6707 }
6708 }
6709 if (final_set) {
6710 memcpy(outname_end, ".tags", 6);
6711 if (fopen_checked(outname, "w", &outfile)) {
6712 goto show_tags_ret_OPEN_FAIL;
6713 }
6714 if (!twocolumn) {
6715 marker_uidx = next_set(final_set, 0, unfiltered_marker_ct);
6716 while (marker_uidx < unfiltered_marker_ct) {
6717 fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
6718 putc_unlocked('\n', outfile);
6719 marker_uidx++;
6720 next_set_ul_ck(final_set, unfiltered_marker_ct, &marker_uidx);
6721 }
6722 } else {
6723 for (marker_uidx = 0, marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
6724 next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
6725 fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
6726 putc_unlocked('\t', outfile);
6727 putc_unlocked('0' + IS_SET(final_set, marker_uidx), outfile);
6728 putc_unlocked('\n', outfile);
6729 }
6730 }
6731 if (fclose_null(&outfile)) {
6732 goto show_tags_ret_WRITE_FAIL;
6733 }
6734 uii = popcount_longs(final_set, unfiltered_marker_ctl) - target_ct;
6735 if (tags_list) {
6736 LOGPRINTFWW("--show-tags: Main report written to %s.list , and simple tag ID list (%u tag%s added) written to %s .\n", outname, uii, (uii == 1)? "" : "s", outname);
6737 } else {
6738 LOGPRINTFWW("--show-tags: Simple tag ID list (%u tag%s added) written to %s .\n", uii, (uii == 1)? "" : "s", outname);
6739 }
6740 }
6741 while (0) {
6742 show_tags_ret_NOMEM:
6743 retval = RET_NOMEM;
6744 break;
6745 show_tags_ret_OPEN_FAIL:
6746 retval = RET_OPEN_FAIL;
6747 break;
6748 show_tags_ret_READ_FAIL:
6749 retval = RET_READ_FAIL;
6750 break;
6751 show_tags_ret_WRITE_FAIL:
6752 retval = RET_WRITE_FAIL;
6753 break;
6754 show_tags_ret_INVALID_FORMAT:
6755 retval = RET_INVALID_FORMAT;
6756 break;
6757 }
6758 show_tags_ret_1:
6759 bigstack_reset(bigstack_mark);
6760 fclose_cond(infile);
6761 fclose_cond(outfile);
6762 return retval;
6763 }
6764
calc_lnlike_quantile(double known11,double known12,double known21,double known22,double unknown_dh,double freqx1,double freq1x,double freq2x,double freq11_expected,double denom,int32_t quantile)6765 double calc_lnlike_quantile(double known11, double known12, double known21, double known22, double unknown_dh, double freqx1, double freq1x, double freq2x, double freq11_expected, double denom, int32_t quantile) {
6766 // almost identical to calc_lnlike, but we can skip the equal-to-zero checks
6767 // when quantile isn't 100
6768 double tmp11 = quantile * denom + freq11_expected;
6769 double tmp12 = freq1x - tmp11;
6770 double tmp21 = freqx1 - tmp11;
6771 double tmp22 = freq2x - tmp21;
6772 if (quantile == 100) {
6773 // One of these values will be ~zero, and we want to ensure its logarithm
6774 // is treated as a very negative number instead of nan. May as well do it
6775 // the same way as Haploview.
6776 if (tmp11 < 1e-10) {
6777 tmp11 = 1e-10;
6778 }
6779 if (tmp12 < 1e-10) {
6780 tmp12 = 1e-10;
6781 }
6782 if (tmp21 < 1e-10) {
6783 tmp21 = 1e-10;
6784 }
6785 if (tmp22 < 1e-10) {
6786 tmp22 = 1e-10;
6787 }
6788 }
6789 return known11 * log(tmp11) + known12 * log(tmp12) + known21 * log(tmp21) + known22 * log(tmp22) + unknown_dh * log(tmp11 * tmp22 + tmp12 * tmp21);
6790 }
6791
haploview_blocks_classify(uint32_t * counts,uint32_t lowci_max,uint32_t lowci_min,uint32_t recomb_highci,uint32_t strong_highci,uint32_t strong_lowci,uint32_t strong_lowci_outer,uint32_t is_x,double recomb_fast_ln_thresh)6792 uint32_t haploview_blocks_classify(uint32_t* counts, uint32_t lowci_max, uint32_t lowci_min, uint32_t recomb_highci, uint32_t strong_highci, uint32_t strong_lowci, uint32_t strong_lowci_outer, uint32_t is_x, double recomb_fast_ln_thresh) {
6793 // See comments in the middle of haploview_blocks(). The key insight is that
6794 // we only need to classify the D' confidence intervals into a few types, and
6795 // this almost never requires evaluation of all 101 log likelihoods.
6796
6797 // Note that lowCI and highCI are *one-sided* 95% confidence bounds, i.e.
6798 // together, they form a 90% confidence interval.
6799 double known11 = (double)(2 * counts[0] + counts[1] + counts[3]);
6800 double known12 = (double)(2 * counts[2] + counts[1] + counts[5]);
6801 double known21 = (double)(2 * counts[6] + counts[3] + counts[7]);
6802 double known22 = (double)(2 * counts[8] + counts[5] + counts[7]);
6803 double total_prob = 0.0;
6804 double lnsurf_highstrong_thresh = 0.0;
6805 uint32_t onside_sol_ct = 1;
6806 double right_sum[83];
6807 double freq1x;
6808 double freq2x;
6809 double freqx1;
6810 double freqx2;
6811 double freq11_expected;
6812 double unknown_dh;
6813 double denom;
6814 double lnlike1;
6815 double lnsurf_highindiff_thresh;
6816 double dxx;
6817 double dyy;
6818 double dzz;
6819 uint32_t quantile;
6820 uint32_t center;
6821 if (is_x) {
6822 known11 -= (double)((int32_t)counts[9]);
6823 known12 -= (double)((int32_t)counts[11]);
6824 known21 -= (double)((int32_t)counts[12]);
6825 known22 -= (double)((int32_t)counts[14]);
6826 }
6827 if (em_phase_hethet(known11, known12, known21, known22, counts[4], &freq1x, &freq2x, &freqx1, &freqx2, &dzz, &onside_sol_ct)) {
6828 return 1;
6829 }
6830 freq11_expected = freqx1 * freq1x;
6831 dxx = dzz - freq11_expected;
6832 if (dxx < 0.0) {
6833 // D < 0, flip (1,1)<->(1,2) and (2,1)<->(2,2) to make D positive
6834 dyy = known11;
6835 known11 = known12;
6836 known12 = dyy;
6837 dyy = known21;
6838 known21 = known22;
6839 known22 = dyy;
6840 freq11_expected = freqx2 * freq1x;
6841 dyy = freqx1;
6842 freqx1 = freqx2;
6843 freqx2 = dyy;
6844 dxx = -dxx;
6845 }
6846 dyy = MINV(freqx1 * freq2x, freqx2 * freq1x);
6847 // this will always be in a term with a 0.01 multiplier from now on, so may
6848 // as well premultiply.
6849 denom = 0.01 * dyy;
6850 unknown_dh = (double)((int32_t)counts[4]);
6851
6852 // force this to an actual likelihood array entry, so we know for sure
6853 // total_prob >= 1.0 and can use that inequality for both early exit and
6854 // determining the "futility threshold" (terms smaller than 2^{-53} / 19 are
6855 // too small to matter).
6856 center = (int32_t)(((dxx / dyy) * 100) + 0.5);
6857
6858 lnlike1 = calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, center);
6859
6860 // Previously assumed log likelihood was always concave, and used geometric
6861 // series bounds... then I realized this was NOT a safe assumption to make.
6862 // See e.g. rs9435793 and rs7531410 in 1000 Genomes phase 1.
6863 // So, instead, we only use an aggressive approach when onside_sol_ct == 1
6864 // (fortunately, that is almost always the case).
6865 if (onside_sol_ct == 1) {
6866 // It's not actually necessary to keep the entire likelihood array in
6867 // memory. This is similar to the HWE and Fisher's exact test
6868 // calculations: we can get away with tracking a few partial sums, and
6869 // exploit unimodality, fixed direction on both sides of the center,
6870 // knowledge of the center's location, and the fact that we only need to
6871 // classify the CI rather than fully evaluate it.
6872 //
6873 // Specifically, we need to determine the following:
6874 // 1. Is highCI >= 0.98? Or < 0.90?
6875 // 2. If highCI >= 0.98, is lowCI >= 0.81? In [0.71, 0.81)? Equal to
6876 // 0.70? In [0.51, 0.70)? In [0.01, 0.51)? Or < 0.01?
6877 // (Crucially, if highCI < 0.98, we don't actually need to determine
6878 // lowCI at all.)
6879 // To make this classification with as few relative likelihood evaluations
6880 // as possible (5 logs, an exp call, 8 multiplies, 9 adds... that's kinda
6881 // heavy for an inner loop operation), we distinguish the following cases:
6882 // a. D' >= 0.41. We first try to quickly rule out highCI >= 0.98 by
6883 // inspection of f(0.97). Then,
6884 // * If it's below the futility threshold, jump to case (b).
6885 // * Otherwise, sum f(0.98)..f(1.00), and then sum other likelihoods
6886 // from f(0.96) on down.
6887 // b. D' < 0.41. highCI >= 0.98 is impossible since f(0.41) >= f(0.42) >=
6888 // ...; goal is to quickly establish highCI < 0.90. A large fraction of
6889 // the time, this can be accomplished simply by inspecting f(0.89); if
6890 // it's less than 1/220, we're done because we know there's a 1
6891 // somewhere in the array, and the sum of the likelihoods between
6892 // f(0.89) and whatever that 1 entry is is bounded above by 12 * (1/220)
6893 // due to fixed direction. Otherwise, we sum from the top down.
6894 // This should be good for a ~10x speedup on the larger datasets where it's
6895 // most wanted.
6896 if (100 - center < 20 * (100 - strong_highci)) {
6897 dxx = calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, strong_highci) - lnlike1;
6898 // ln(2^{-53} / 19) is just under -39.6812
6899 if ((center > strong_highci) || (dxx > -39.6812)) {
6900 total_prob = exp(dxx);
6901 for (quantile = 100; quantile > strong_highci; quantile--) {
6902 total_prob += exp(calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile) - lnlike1);
6903 }
6904 if (total_prob > (1.0 / 19.0)) {
6905 // branch 1: highCI might be >= 0.98
6906 lnsurf_highstrong_thresh = total_prob * 20;
6907 for (quantile = strong_highci - 1; quantile >= recomb_highci; quantile--) {
6908 total_prob += exp(calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile) - lnlike1);
6909 }
6910 lnsurf_highindiff_thresh = total_prob * 20;
6911 while (1) {
6912 dxx = exp(calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile) - lnlike1);
6913 total_prob += dxx;
6914 // see comments on branch 2. this is more complicated because we
6915 // still have work to do after resolving whether highCI >= 0.98,
6916 // but the reasoning is similar.
6917 if (total_prob >= lnsurf_highstrong_thresh) {
6918 if (quantile >= center) {
6919 goto haploview_blocks_classify_no_highstrong_1;
6920 }
6921 goto haploview_blocks_classify_no_highstrong_2;
6922 }
6923 if ((quantile <= lowci_max) && (quantile >= lowci_min)) {
6924 // We actually only need the [52..100], [71..100], [72..100], and
6925 // [82..100] right sums, but saving a few extra values is
6926 // probably more efficient than making this if-statement more
6927 // complicated. [99 - quantile] rather than e.g. [quantile]
6928 // is used so memory writes go to sequentially increasing rather
6929 // than decreasing addresses. (okay, this shouldn't matter since
6930 // everything should be in L1 cache, but there's negligible
6931 // opportunity cost)
6932 right_sum[quantile] = total_prob;
6933 }
6934 dxx *= ((int32_t)quantile);
6935 if (total_prob + dxx < lnsurf_highstrong_thresh) {
6936 while (1) {
6937 // Now we want to bound lowCI, optimizing for being able to
6938 // quickly establish lowCI >= 0.71.
6939 if (dxx * 19 < total_prob) {
6940 // less than 5% remaining on left tail
6941 if (quantile >= lowci_max) {
6942 return 6;
6943 }
6944 while (quantile > lowci_min) {
6945 quantile--;
6946 total_prob += exp(calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile) - lnlike1);
6947 if (quantile <= lowci_max) {
6948 right_sum[quantile] = total_prob;
6949 }
6950 }
6951 dyy = right_sum[lowci_min] * (20.0 / 19.0);
6952 while (total_prob < dyy) {
6953 if ((!quantile) || (dxx <= RECIP_2_53)) {
6954 total_prob *= 0.95;
6955 if (total_prob >= right_sum[strong_lowci_outer]) {
6956 // lowCI < 0.70
6957 // -> f(0.00) + f(0.01) + ... + f(0.70) > 0.05 * total
6958 return 3;
6959 } else if (total_prob < right_sum[lowci_max]) {
6960 return 6;
6961 } else if ((lowci_max > strong_lowci) && (total_prob < right_sum[strong_lowci])) {
6962 return 5;
6963 }
6964 return 4;
6965 }
6966 quantile--;
6967 dxx = exp(calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile) - lnlike1);
6968 total_prob += dxx;
6969 }
6970 return 2;
6971 }
6972 quantile--;
6973 dxx = exp(calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile) - lnlike1);
6974 total_prob += dxx;
6975 if ((quantile <= lowci_max) && (quantile >= lowci_min)) {
6976 right_sum[quantile] = total_prob;
6977 }
6978 dxx *= ((int32_t)quantile);
6979 }
6980 }
6981 quantile--;
6982 }
6983 }
6984 }
6985 quantile = strong_highci - 1;
6986 } else {
6987 quantile = 100;
6988 }
6989 // branch 2: highCI guaranteed less than 0.98. If D' <= 0.875, try to
6990 // quickly establish highCI < 0.90.
6991 dxx = calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, recomb_highci) - lnlike1;
6992 if ((center < recomb_highci) && (dxx < recomb_fast_ln_thresh)) {
6993 return 0;
6994 }
6995 // okay, we'll sum the whole right tail. May as well sum from the outside
6996 // in here for a bit more numerical stability, instead of adding exp(dxx)
6997 // first.
6998 do {
6999 total_prob += exp(calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile) - lnlike1);
7000 } while (--quantile > recomb_highci);
7001 total_prob += exp(dxx);
7002 lnsurf_highindiff_thresh = total_prob * 20;
7003 haploview_blocks_classify_no_highstrong_1:
7004 quantile--;
7005 if (center < recomb_highci) {
7006 // if we know there's a 1.0 ahead in the likelihood array, may as well
7007 // take advantage of that
7008 lnsurf_highstrong_thresh = lnsurf_highindiff_thresh - 1.0;
7009 while (quantile > center) {
7010 total_prob += exp(calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile) - lnlike1);
7011 if (total_prob >= lnsurf_highstrong_thresh) {
7012 return 0;
7013 }
7014 quantile--;
7015 }
7016 if (!center) {
7017 return 1;
7018 }
7019 total_prob += 1;
7020 quantile--;
7021 }
7022 // likelihoods are now declining, try to exploit that to exit early
7023 // (it's okay if the first likelihood does not represent a decline)
7024 while (1) {
7025 dxx = exp(calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile) - lnlike1);
7026 total_prob += dxx;
7027 haploview_blocks_classify_no_highstrong_2:
7028 if (total_prob >= lnsurf_highindiff_thresh) {
7029 return 0;
7030 }
7031 if (total_prob + ((int32_t)quantile) * dxx < lnsurf_highindiff_thresh) {
7032 // guaranteed to catch quantile == 0
7033 return 1;
7034 }
7035 quantile--;
7036 }
7037 }
7038 for (quantile = 100; quantile >= recomb_highci; quantile--) {
7039 total_prob += exp(calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile) - lnlike1);
7040 if (quantile == strong_highci) {
7041 lnsurf_highstrong_thresh = total_prob * 20;
7042 }
7043 }
7044 if (total_prob < (1.0 / 19.0)) {
7045 return 0;
7046 }
7047 lnsurf_highindiff_thresh = total_prob * 20;
7048 while (1) {
7049 total_prob += exp(calc_lnlike_quantile(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile) - lnlike1);
7050 if (total_prob >= lnsurf_highindiff_thresh) {
7051 return 0;
7052 }
7053 if (quantile <= lowci_max) {
7054 if (quantile >= lowci_min) {
7055 right_sum[quantile] = total_prob;
7056 } else if (!quantile) {
7057 break;
7058 }
7059 }
7060 quantile--;
7061 }
7062 if (total_prob >= lnsurf_highstrong_thresh) {
7063 return 1;
7064 }
7065 total_prob *= 0.95;
7066 if (total_prob < right_sum[strong_lowci]) {
7067 if ((lowci_max > strong_lowci) && (total_prob >= right_sum[lowci_max])) {
7068 return 5;
7069 }
7070 return 6;
7071 }
7072 if (total_prob >= right_sum[strong_lowci_outer]) {
7073 if ((lowci_min < strong_lowci_outer) && (total_prob >= right_sum[lowci_min])) {
7074 return 2;
7075 }
7076 return 3;
7077 }
7078 return 4;
7079 }
7080
haploview_blocks(Ld_info * ldip,FILE * bedfile,uintptr_t bed_offset,uintptr_t marker_ct,uintptr_t unfiltered_marker_ct,uintptr_t * marker_exclude_orig,char * marker_ids,uintptr_t max_marker_id_len,uint32_t * marker_pos,Chrom_info * chrom_info_ptr,double * set_allele_freqs,uintptr_t unfiltered_sample_ct,uintptr_t * founder_info,uintptr_t * pheno_nm,uintptr_t * sex_male,char * outname,char * outname_end,uint32_t hh_exists)7081 int32_t haploview_blocks(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude_orig, char* marker_ids, uintptr_t max_marker_id_len, uint32_t* marker_pos, Chrom_info* chrom_info_ptr, double* set_allele_freqs, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uintptr_t* pheno_nm, uintptr_t* sex_male, char* outname, char* outname_end, uint32_t hh_exists) {
7082 // See Plink::mkBlks() in blox.cpp (which is, in turn, a port of doGabriel()
7083 // in FindBlocks.java and computeDPrime() in HaploData.java from Haploview).
7084 // No unwindowed/inter-chr mode, so little point in bothering with
7085 // multithreading.
7086 //
7087 // MAF < 0.05 markers have a minor effect on PLINK 1.07 --blocks's behavior
7088 // when present, while Haploview completely ignores them. We replicate
7089 // Haploview's behavior.
7090 unsigned char* bigstack_mark = g_bigstack_base;
7091 uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
7092 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
7093 uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
7094 uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
7095 FILE* outfile = nullptr;
7096 FILE* outfile_det = nullptr;
7097 // circular. [2n] = numStrong, [2n+1] = numRec
7098 uintptr_t* strong_rec_cts = nullptr;
7099 uintptr_t* founder_include2 = nullptr;
7100 uintptr_t* founder_male_include2 = nullptr;
7101 uintptr_t marker_uidx = 0;
7102 uintptr_t block_idx_first = 0;
7103 uintptr_t block_uidx_first = 0;
7104 uintptr_t block_pos_first = 0;
7105 uintptr_t prev_strong = 0;
7106 uintptr_t prev_rec = 0;
7107 uintptr_t markers_done = 0;
7108 uint32_t no_pheno_req = ldip->modifier & LD_BLOCKS_NO_PHENO_REQ;
7109 uint32_t max_window_bp = ldip->blocks_max_bp;
7110 uint32_t max_window_bp1 = 20000;
7111 uint32_t max_window_bp2 = 30000;
7112 uint32_t recomb_highci = ldip->blocks_recomb_highci;
7113 uint32_t strong_highci = ldip->blocks_strong_highci;
7114 uint32_t strong_lowci = ldip->blocks_strong_lowci;
7115 uint32_t strong_lowci_outer = ldip->blocks_strong_lowci_outer;
7116 uint32_t block_ct = 0;
7117 uint32_t maxspan = 0;
7118 uint32_t pct = 0;
7119 int32_t retval = 0;
7120 double recomb_fast_ln_thresh = -log((int32_t)((100 - recomb_highci) * 20));
7121 double inform_frac = ldip->blocks_inform_frac + SMALLISH_EPSILON;
7122 uint32_t inform_thresh_two = 1 + ((int32_t)(3 * inform_frac));
7123 uint32_t inform_thresh_three = (int32_t)(6 * inform_frac);
7124 uint32_t counts[15];
7125 // [0]: (m, m-1)
7126 // [1]: (m, m-2)
7127 // [2]: (m-1, m-2)
7128 // [3]: (m-1, m-3)
7129 // [4]: (m-2, m-3)
7130 uint32_t recent_ci_types[5];
7131 uint32_t index_tots[5];
7132 uintptr_t* founder_pnm;
7133 uintptr_t* marker_exclude;
7134 uintptr_t* in_haploblock;
7135 uintptr_t* loadbuf_raw;
7136 uintptr_t* index_data;
7137 uintptr_t* window_data;
7138 uintptr_t* window_data_ptr;
7139 unsigned char* bigstack_mark2;
7140 uint32_t* block_uidxs;
7141 uint32_t* forward_block_sizes;
7142 uint32_t* candidate_pairs;
7143 char* wptr_start;
7144 char* wptr;
7145 char* sptr;
7146 uintptr_t cur_marker_ct;
7147 uintptr_t max_block_size;
7148 uintptr_t marker_idx;
7149 uintptr_t cur_block_size;
7150 uintptr_t last_block_size;
7151 uintptr_t founder_ct;
7152 uintptr_t founder_ctl2;
7153 uintptr_t founder_ctv2;
7154 uintptr_t final_mask;
7155 uintptr_t futility_rec;
7156 uintptr_t max_candidates;
7157 uintptr_t candidate_ct;
7158 uintptr_t candidate_idx;
7159 uintptr_t delta;
7160 uintptr_t pct_thresh;
7161 uintptr_t ulii;
7162 double min_maf;
7163 double max_maf;
7164 double dxx;
7165 uint32_t chrom_fo_idx;
7166 uint32_t chrom_idx;
7167 uint32_t chrom_start;
7168 uint32_t chrom_end;
7169 uint32_t is_haploid;
7170 uint32_t is_x;
7171 uint32_t is_y;
7172 uint32_t marker_pos_thresh;
7173 uint32_t forward_scan_uidx;
7174 uint32_t block_cidx;
7175 uint32_t block_cidx2;
7176 uint32_t cur_strong;
7177 uint32_t cur_rec;
7178 uint32_t lowci_max;
7179 uint32_t lowci_min;
7180 uint32_t cur_ci_type;
7181 uint32_t cur_marker_pos;
7182 uint32_t uii;
7183 uint32_t ujj;
7184 // suppress warning
7185 index_tots[3] = 0;
7186 index_tots[4] = 0;
7187 if (ldip->modifier & LD_BLOCKS_NO_SMALL_MAX_SPAN) {
7188 max_window_bp1 = 0x7fffffff;
7189 max_window_bp2 = 0x7fffffff;
7190 }
7191
7192 // First enforce MAF 0.05 minimum; then, on each chromosome:
7193 // 1. Determine maximum number of markers that might need to be loaded at
7194 // once on current chromosome, and then (re)allocate memory buffers.
7195 // 2. Find all pairs of markers satisfying the "strong LD" and informative
7196 // fraction criteria. (The original algorithm deferred the informative
7197 // fraction calculation; we don't do that because it forces nonsequential
7198 // file access.)
7199 // 3. Sort the pairs in decreasing order primarily by bp distance, and
7200 // secondarily by start uidx.
7201 // 4. Greedily construct blocks from the sorted list (i.e. form largest
7202 // blocks first).
7203 if (bigstack_alloc_ul(unfiltered_sample_ctl, &founder_pnm) ||
7204 bigstack_alloc_ul(unfiltered_marker_ctl, &marker_exclude) ||
7205 bigstack_alloc_ul(unfiltered_marker_ctl, &in_haploblock) ||
7206 bigstack_alloc_ul(unfiltered_sample_ctl2, &loadbuf_raw)) {
7207 goto haploview_blocks_ret_NOMEM;
7208 }
7209 memcpy(founder_pnm, founder_info, unfiltered_sample_ctl * sizeof(intptr_t));
7210 if (!no_pheno_req) {
7211 bitvec_and(pheno_nm, unfiltered_sample_ctl, founder_pnm);
7212 }
7213 founder_ct = popcount_longs(founder_pnm, unfiltered_sample_ctl);
7214 if (founder_ct < 2) {
7215 if ((!no_pheno_req) && (!popcount_longs(pheno_nm, unfiltered_sample_ctl))) {
7216 logerrprint("Warning: Skipping --blocks, since there are less than two founders with\nnonmissing phenotypes. (The 'no-pheno-req' modifier removes the phenotype\nrestriction.)\n");
7217 } else {
7218 logerrprint("Warning: Skipping --blocks, since there are less than two founders with\nnonmissing phenotypes. (--make-founders may come in handy here.)\n");
7219 }
7220 goto haploview_blocks_ret_1;
7221 }
7222 final_mask = get_final_mask(founder_ct);
7223 memcpy(marker_exclude, marker_exclude_orig, unfiltered_marker_ctl * sizeof(intptr_t));
7224 if (ldip->blocks_min_maf > 0.0) {
7225 min_maf = ldip->blocks_min_maf * (1 - SMALL_EPSILON);
7226 max_maf = 1 - min_maf;
7227 for (marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
7228 next_unset_ul_unsafe_ck(marker_exclude_orig, &marker_uidx);
7229 dxx = set_allele_freqs[marker_uidx];
7230 if ((dxx < min_maf) || (dxx > max_maf)) {
7231 set_bit_ul(marker_uidx, marker_exclude);
7232 }
7233 }
7234 marker_ct = unfiltered_marker_ct - popcount_longs(marker_exclude, unfiltered_marker_ctl);
7235 }
7236 if (marker_ct < 2) {
7237 logerrprint("Warning: Skipping --blocks since there are too few variants with MAF >= 0.05.\n");
7238 goto haploview_blocks_ret_1;
7239 }
7240 pct_thresh = marker_ct / 100;
7241 fill_ulong_zero(unfiltered_marker_ctl, in_haploblock);
7242 loadbuf_raw[unfiltered_sample_ctl2 - 1] = 0;
7243 founder_ctl2 = QUATERCT_TO_WORDCT(founder_ct);
7244 founder_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(founder_ct);
7245 if (bigstack_alloc_ul(5 * founder_ctv2, &index_data)) {
7246 goto haploview_blocks_ret_NOMEM;
7247 }
7248 if (alloc_collapsed_haploid_filters(founder_info, sex_male, unfiltered_sample_ct, founder_ct, Y_FIX_NEEDED, 1, &founder_include2, &founder_male_include2)) {
7249 goto haploview_blocks_ret_NOMEM;
7250 }
7251 memcpy(outname_end, ".blocks.det", 12);
7252 if (fopen_checked(outname, "w", &outfile_det)) {
7253 goto haploview_blocks_ret_OPEN_FAIL;
7254 }
7255 if (fputs_checked(" CHR BP1 BP2 KB NSNPS SNPS\n", outfile_det)) {
7256 goto haploview_blocks_ret_WRITE_FAIL;
7257 }
7258 outname_end[7] = '\0';
7259 if (fopen_checked(outname, "w", &outfile)) {
7260 goto haploview_blocks_ret_OPEN_FAIL;
7261 }
7262 bigstack_mark2 = g_bigstack_base;
7263 fputs("--blocks: 0%", stdout);
7264 fflush(stdout);
7265 for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++, markers_done += cur_marker_ct) {
7266 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
7267 chrom_start = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
7268 cur_marker_ct = chrom_end - chrom_start - popcount_bit_idx(marker_exclude, chrom_start, chrom_end);
7269 if (cur_marker_ct < 2) {
7270 continue;
7271 }
7272 marker_uidx = chrom_start;
7273 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
7274 max_block_size = chrom_window_max(marker_pos, marker_exclude, chrom_info_ptr, chrom_idx, 0x7fffffff, max_window_bp, 1);
7275 if (max_block_size < 2) {
7276 continue;
7277 }
7278 #ifndef __LP64__
7279 if (max_block_size > 65536) {
7280 logprint("\n");
7281 logerrprint("Error: 32-bit --blocks cannot analyze potential blocks with more than 65536\nvariants. Use a 64-bit PLINK build or a smaller --blocks-window-kb value.\n");
7282 goto haploview_blocks_ret_INVALID_CMDLINE;
7283 }
7284 #endif
7285 is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
7286 is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
7287 is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
7288 bigstack_reset(bigstack_mark2);
7289 // Need to compute full 3x3 count tables, but only for a limited window;
7290 // more similar to --clump than --fast-epistasis, so we don't bother with
7291 // precomputing 0-only/1-only/2-only bitfields or multithreading for now.
7292
7293 // For each pair, we just need to know 100x the Haploview lowCI and highCI
7294 // values; lod and dp are unnecessary since the CI value also tracks bad
7295 // pairs. More precisely, there are only seven types of CIs worth
7296 // distinguishing:
7297 // 0. non-bad pair, and highCI < recHighCI (0.90)
7298 // 1. "null" (bad pair, or highCI in [0.90, 0.98))
7299 // 2. highCI >= 0.98, and lowCI < 0.51
7300 // (treated the same as type 1, but it takes no additional effort to
7301 // distinguish this case)
7302 // 3. highCI >= 0.98, and lowCI in [0.51, 0.70)
7303 // 4. highCI >= 0.98, and lowCI == 0.70
7304 // (turns out (double)70 / 100.0 compares exactly equal to 0.70, so
7305 // Haploview's use of < cutLowCI in its initial "strong LD" check
7306 // actually behaves differently from the later "not > cutLowCI" check)
7307 // 5. highCI >= 0.98, and lowCI in [0.71, 0.81)
7308 // 6. highCI >= 0.98, and lowCI in [0.81, 1]
7309 // And it gets better than that: given block size n, we just need to
7310 // maintain #(type 0) and #(type 4/5/6) arrays (and a tiny array with more
7311 // detailed information on the most recent blocks) to find all potentially
7312 // valid blocks in a single pass. So we can use practically all our memory
7313 // to track and sort those blocks by bp length.
7314 if (bigstack_alloc_ui(max_block_size, &block_uidxs) ||
7315 bigstack_alloc_ui(max_block_size, &forward_block_sizes) ||
7316 bigstack_alloc_ul(max_block_size * founder_ctv2, &window_data)) {
7317 goto haploview_blocks_ret_NOMEM;
7318 }
7319 if (max_block_size >= 4) {
7320 // After marker m is fully processed,
7321 // strong_rec_cts[(block_cidx + delta) * 2] = numStrong, and
7322 // strong_rec_cts[(block_cidx + delta) * 2 + 1] = numRec
7323 // for the potential [m - delta, m] block, taking array indices modulo
7324 // max_block_size * 2.
7325 if (bigstack_alloc_ul(max_block_size * 2, &strong_rec_cts)) {
7326 goto haploview_blocks_ret_NOMEM;
7327 }
7328 }
7329 window_data_ptr = &(window_data[founder_ctv2 - 2]);
7330 for (ulii = 0; ulii < max_block_size; ulii++) {
7331 window_data_ptr[0] = 0;
7332 window_data_ptr[1] = 0;
7333 window_data_ptr = &(window_data_ptr[founder_ctv2]);
7334 }
7335 block_idx_first = 0;
7336 block_uidx_first = chrom_start;
7337 marker_uidx = chrom_start;
7338 block_pos_first = marker_pos[chrom_start];
7339 max_candidates = bigstack_left() / (3 * sizeof(int32_t));
7340 bigstack_alloc_ui(max_candidates * 3, &candidate_pairs);
7341 candidate_ct = 0;
7342 cur_block_size = 0;
7343 fill_uint_zero(3, recent_ci_types);
7344 // count down instead of up so more memory accesses are sequential
7345 block_cidx = max_block_size;
7346 forward_scan_uidx = marker_uidx;
7347 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
7348 goto haploview_blocks_ret_READ_FAIL;
7349 }
7350 for (marker_idx = 0; marker_idx < cur_marker_ct; marker_uidx++, marker_idx++) {
7351 if (block_cidx) {
7352 block_cidx--;
7353 } else {
7354 block_cidx = max_block_size - 1;
7355 }
7356 window_data_ptr = &(window_data[block_cidx * founder_ctv2]);
7357 if (IS_SET(marker_exclude, marker_uidx)) {
7358 marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx);
7359 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
7360 goto haploview_blocks_ret_READ_FAIL;
7361 }
7362 }
7363 block_uidxs[block_cidx] = marker_uidx;
7364 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_pnm, final_mask, 0, bedfile, loadbuf_raw, window_data_ptr)) {
7365 goto haploview_blocks_ret_READ_FAIL;
7366 }
7367 if (is_haploid) {
7368 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)window_data_ptr);
7369 }
7370 cur_marker_pos = marker_pos[marker_uidx];
7371 marker_pos_thresh = cur_marker_pos;
7372 if (marker_pos_thresh < max_window_bp) {
7373 marker_pos_thresh = 0;
7374 } else {
7375 marker_pos_thresh -= max_window_bp;
7376 }
7377 if (marker_pos_thresh > block_pos_first) {
7378 do {
7379 block_uidx_first++;
7380 next_unset_ul_unsafe_ck(marker_exclude, &block_uidx_first);
7381 block_pos_first = marker_pos[block_uidx_first];
7382 block_idx_first++;
7383 } while (marker_pos_thresh > block_pos_first);
7384 }
7385 last_block_size = cur_block_size;
7386 cur_block_size = marker_idx - block_idx_first;
7387 recent_ci_types[4] = recent_ci_types[2];
7388 recent_ci_types[2] = recent_ci_types[0];
7389 recent_ci_types[3] = recent_ci_types[1];
7390 if (cur_block_size > last_block_size) {
7391 cur_block_size = last_block_size + 1;
7392 }
7393 // now determine maximum local block size, so we can set futility_rec
7394 // efficiently.
7395 marker_pos_thresh = cur_marker_pos + max_window_bp;
7396 if (forward_scan_uidx < marker_uidx) {
7397 forward_scan_uidx = marker_uidx;
7398 }
7399 while (marker_pos_thresh >= marker_pos[forward_scan_uidx]) {
7400 uii = forward_scan_uidx + 1;
7401 next_unset_ck(marker_exclude, chrom_end, &uii);
7402 if (uii == chrom_end) {
7403 break;
7404 }
7405 forward_scan_uidx = uii;
7406 }
7407 uii = forward_scan_uidx + 1 - marker_uidx - popcount_bit_idx(marker_exclude, marker_uidx, forward_scan_uidx);
7408 forward_block_sizes[block_cidx] = uii;
7409 if (!cur_block_size) {
7410 continue;
7411 }
7412 block_cidx2 = block_cidx + 1;
7413 for (delta = 1; delta <= cur_block_size; delta++, block_cidx2++) {
7414 if (block_cidx2 == max_block_size) {
7415 block_cidx2 = 0;
7416 }
7417 if (forward_block_sizes[block_cidx2] > uii) {
7418 uii = forward_block_sizes[block_cidx2];
7419 }
7420 }
7421 ulii = uii;
7422 // If numRec ever reaches this value, we can just move on to the next
7423 // marker (even skipping the remaining D' evaluations).
7424 futility_rec = 1 + (((double)((intptr_t)((ulii * (ulii - 1)) / 2))) * (1.0 - inform_frac));
7425 block_cidx2 = block_cidx + 1;
7426 cur_strong = 0;
7427 cur_rec = 0;
7428 vec_datamask(founder_ct, 0, window_data_ptr, founder_include2, index_data);
7429 index_tots[0] = popcount2_longs(index_data, founder_ctl2);
7430 vec_datamask(founder_ct, 2, window_data_ptr, founder_include2, &(index_data[founder_ctv2]));
7431 index_tots[1] = popcount2_longs(&(index_data[founder_ctv2]), founder_ctl2);
7432 vec_datamask(founder_ct, 3, window_data_ptr, founder_include2, &(index_data[2 * founder_ctv2]));
7433 index_tots[2] = popcount2_longs(&(index_data[2 * founder_ctv2]), founder_ctl2);
7434 if (is_x) {
7435 vec_datamask(founder_ct, 0, window_data_ptr, founder_male_include2, &(index_data[3 * founder_ctv2]));
7436 index_tots[3] = popcount2_longs(&(index_data[3 * founder_ctv2]), founder_ctl2);
7437 vec_datamask(founder_ct, 3, window_data_ptr, founder_male_include2, &(index_data[4 * founder_ctv2]));
7438 index_tots[4] = popcount2_longs(&(index_data[4 * founder_ctv2]), founder_ctl2);
7439 }
7440 lowci_max = 82;
7441 lowci_min = 52;
7442 for (delta = 1; delta <= cur_block_size; delta++, block_cidx2++) {
7443 if (block_cidx2 == max_block_size) {
7444 block_cidx2 = 0;
7445 }
7446 if (delta >= 4) {
7447 prev_rec = strong_rec_cts[block_cidx2 * 2 + 1];
7448 if (cur_rec + prev_rec >= futility_rec) {
7449 cur_block_size = delta - 1;
7450 break;
7451 }
7452 prev_strong = strong_rec_cts[block_cidx2 * 2];
7453 }
7454 window_data_ptr = &(window_data[block_cidx2 * founder_ctv2]);
7455 genovec_3freq(window_data_ptr, index_data, founder_ctl2, &(counts[0]), &(counts[1]), &(counts[2]));
7456 counts[0] = index_tots[0] - counts[0] - counts[1] - counts[2];
7457 genovec_3freq(window_data_ptr, &(index_data[founder_ctv2]), founder_ctl2, &(counts[3]), &(counts[4]), &(counts[5]));
7458 counts[3] = index_tots[1] - counts[3] - counts[4] - counts[5];
7459 genovec_3freq(window_data_ptr, &(index_data[2 * founder_ctv2]), founder_ctl2, &(counts[6]), &(counts[7]), &(counts[8]));
7460 counts[6] = index_tots[2] - counts[6] - counts[7] - counts[8];
7461 if (is_x) {
7462 genovec_3freq(window_data_ptr, &(index_data[3 * founder_ctv2]), founder_ctl2, &(counts[9]), &(counts[10]), &(counts[11]));
7463 // counts[10] should always be zero
7464 counts[9] = index_tots[3] - counts[9] - counts[11];
7465 genovec_3freq(window_data_ptr, &(index_data[4 * founder_ctv2]), founder_ctl2, &(counts[12]), &(counts[13]), &(counts[14]));
7466 counts[12] = index_tots[4] - counts[12] - counts[14];
7467 }
7468 cur_ci_type = haploview_blocks_classify(counts, lowci_max, lowci_min, recomb_highci, strong_highci, strong_lowci, strong_lowci_outer, is_x, recomb_fast_ln_thresh);
7469 if (cur_ci_type > 4) {
7470 cur_strong++;
7471 } else if (!cur_ci_type) {
7472 cur_rec++;
7473 }
7474 if (delta < 4) {
7475 if (delta == 1) {
7476 lowci_max = strong_lowci;
7477 recent_ci_types[0] = cur_ci_type;
7478 if ((cur_ci_type == 6) && (cur_marker_pos - marker_pos[block_uidxs[block_cidx2]] <= max_window_bp1)) {
7479 goto haploview_blocks_save_candidate;
7480 }
7481 } else if (delta == 2) {
7482 recent_ci_types[1] = cur_ci_type;
7483 if ((cur_ci_type >= 4) && (cur_marker_pos - marker_pos[block_uidxs[block_cidx2]] <= max_window_bp2)) {
7484 uii = 1;
7485 if (recent_ci_types[0] >= 3) {
7486 uii++;
7487 }
7488 if (recent_ci_types[2] >= 3) {
7489 uii++;
7490 }
7491 if (uii >= inform_thresh_two) {
7492 goto haploview_blocks_save_candidate;
7493 }
7494 }
7495 } else {
7496 lowci_min = strong_lowci_outer;
7497 prev_strong = 0; // 5+
7498 uii = 0; // 3+, not counting cur_ci_type
7499 prev_rec = 0;
7500 if (cur_ci_type > 4) {
7501 prev_strong++;
7502 } else if (!cur_ci_type) {
7503 prev_rec++;
7504 }
7505 for (ujj = 0; ujj < 5; ujj++) {
7506 if (recent_ci_types[ujj] >= 3) {
7507 uii++;
7508 if (recent_ci_types[ujj] > 4) {
7509 prev_strong++;
7510 }
7511 } else if (!recent_ci_types[ujj]) {
7512 prev_rec++;
7513 }
7514 }
7515 strong_rec_cts[block_cidx2 * 2] = prev_strong;
7516 strong_rec_cts[block_cidx2 * 2 + 1] = prev_rec;
7517 if ((cur_ci_type >= 4) && (uii >= inform_thresh_three)) {
7518 goto haploview_blocks_save_candidate;
7519 }
7520 }
7521 } else {
7522 prev_strong += cur_strong;
7523 prev_rec += cur_rec;
7524 strong_rec_cts[block_cidx2 * 2] = prev_strong;
7525 strong_rec_cts[block_cidx2 * 2 + 1] = prev_rec;
7526 ulii = prev_strong + prev_rec;
7527 if ((cur_ci_type >= 4) && (ulii >= 6) && (((intptr_t)ulii) * inform_frac < ((double)((intptr_t)prev_strong)))) {
7528 haploview_blocks_save_candidate:
7529 if (candidate_ct == max_candidates) {
7530 goto haploview_blocks_ret_NOMEM;
7531 }
7532 uii = block_uidxs[block_cidx2];
7533 candidate_pairs[3 * candidate_ct] = cur_marker_pos - marker_pos[uii];
7534 candidate_pairs[3 * candidate_ct + 1] = uii;
7535 candidate_pairs[3 * candidate_ct + 2] = marker_uidx;
7536 candidate_ct++;
7537 }
7538 }
7539 }
7540 if (markers_done + marker_idx >= pct_thresh) {
7541 if (pct > 10) {
7542 putc_unlocked('\b', stdout);
7543 }
7544 pct = ((markers_done + marker_idx) * 100LLU) / marker_ct;
7545 printf("\b\b%u%%", pct++);
7546 fflush(stdout);
7547 pct_thresh = (pct * marker_ct) / 100;
7548 }
7549 }
7550 if (!candidate_ct) {
7551 continue;
7552 }
7553 qsort(candidate_pairs, candidate_ct, 12, intcmp3_decr);
7554 if (candidate_pairs[0] > maxspan) {
7555 maxspan = candidate_pairs[0];
7556 }
7557 ulii = 0; // final haploblock count
7558 for (candidate_idx = 0; candidate_idx < candidate_ct; candidate_idx++) {
7559 block_cidx = candidate_pairs[candidate_idx * 3 + 1];
7560 if (is_set(in_haploblock, block_cidx)) {
7561 continue;
7562 }
7563 block_cidx2 = candidate_pairs[candidate_idx * 3 + 2];
7564 if (is_set(in_haploblock, block_cidx2)) {
7565 continue;
7566 }
7567 candidate_pairs[2 * ulii] = block_cidx;
7568 candidate_pairs[2 * ulii + 1] = block_cidx2;
7569 fill_bits(block_cidx, block_cidx2 + 1 - block_cidx, in_haploblock);
7570 ulii++;
7571 }
7572 #ifdef __cplusplus
7573 std::sort((int64_t*)candidate_pairs, (int64_t*)(&(candidate_pairs[ulii * 2])));
7574 #else
7575 qsort(candidate_pairs, ulii, sizeof(int64_t), llcmp);
7576 #endif
7577 wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, chrom_idx, g_textbuf));
7578 wptr_start = memseta(wptr_start, 32, 3);
7579 for (candidate_idx = 0; candidate_idx < ulii; candidate_idx++) {
7580 putc_unlocked('*', outfile);
7581 block_cidx = candidate_pairs[2 * candidate_idx];
7582 block_cidx2 = candidate_pairs[2 * candidate_idx + 1];
7583 marker_uidx = block_cidx;
7584 wptr = uint32toa_w10(marker_pos[block_cidx], wptr_start);
7585 wptr = memseta(wptr, 32, 3);
7586 wptr = uint32toa_w10x(marker_pos[block_cidx2], ' ', wptr);
7587 wptr = width_force(12, wptr, dtoa_g(((int32_t)(marker_pos[block_cidx2] + 1 - marker_pos[block_cidx])) * 0.001, wptr));
7588 *wptr++ = ' ';
7589 wptr = uint32toa_w6x(block_cidx2 + 1 - block_cidx - popcount_bit_idx(marker_exclude, block_cidx, block_cidx2), ' ', wptr);
7590 if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile_det)) {
7591 goto haploview_blocks_ret_WRITE_FAIL;
7592 }
7593 for (marker_uidx = block_cidx; marker_uidx <= block_cidx2; marker_uidx++) {
7594 next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
7595 sptr = &(marker_ids[marker_uidx * max_marker_id_len]);
7596 putc_unlocked(' ', outfile);
7597 fputs(sptr, outfile);
7598 if (marker_uidx != block_cidx) {
7599 putc_unlocked('|', outfile_det);
7600 }
7601 fputs(sptr, outfile_det);
7602 }
7603 putc_unlocked('\n', outfile);
7604 putc_unlocked('\n', outfile_det);
7605 }
7606 block_ct += ulii;
7607 }
7608 if (fclose_null(&outfile)) {
7609 goto haploview_blocks_ret_WRITE_FAIL;
7610 }
7611 if (fclose_null(&outfile_det)) {
7612 goto haploview_blocks_ret_WRITE_FAIL;
7613 }
7614 putc_unlocked('\r', stdout);
7615 LOGPRINTFWW("--blocks: %u haploblock%s written to %s .\n", block_ct, (block_ct == 1)? "" : "s", outname);
7616 LOGPRINTFWW("Extra block details written to %s.det .\n", outname);
7617 if (block_ct) {
7618 LOGPRINTF("Longest span: %gkb.\n", ((double)(maxspan + 1)) * 0.001);
7619 }
7620 while (0) {
7621 haploview_blocks_ret_NOMEM:
7622 retval = RET_NOMEM;
7623 break;
7624 haploview_blocks_ret_OPEN_FAIL:
7625 retval = RET_OPEN_FAIL;
7626 break;
7627 haploview_blocks_ret_READ_FAIL:
7628 retval = RET_READ_FAIL;
7629 break;
7630 haploview_blocks_ret_WRITE_FAIL:
7631 retval = RET_WRITE_FAIL;
7632 break;
7633 #ifndef __LP64__
7634 haploview_blocks_ret_INVALID_CMDLINE:
7635 retval = RET_INVALID_CMDLINE;
7636 break;
7637 #endif
7638 }
7639 haploview_blocks_ret_1:
7640 bigstack_reset(bigstack_mark);
7641 fclose_cond(outfile);
7642 fclose_cond(outfile_det);
7643 return retval;
7644 }
7645
twolocus_write_table(FILE * outfile,uint32_t * counts,uint32_t plink_maxsnp,char * mkr1,char * mkr2,char * allele00,char * allele01,char * allele10,char * allele11,uint32_t alen00,uint32_t alen01,uint32_t alen10,uint32_t alen11)7646 void twolocus_write_table(FILE* outfile, uint32_t* counts, uint32_t plink_maxsnp, char* mkr1, char* mkr2, char* allele00, char* allele01, char* allele10, char* allele11, uint32_t alen00, uint32_t alen01, uint32_t alen10, uint32_t alen11) {
7647 // PLINK 1.07's print settings for this function don't handle large numbers
7648 // well so we break byte-for-byte compatibility.
7649 char* bufptr = memseta(g_textbuf, 32, plink_maxsnp + 14);
7650 uint32_t* uiptr = counts;
7651 uint32_t total = 0;
7652 uint32_t marg_a[4];
7653 uint32_t marg_b[4];
7654 char spaces[7];
7655 double tot_recip;
7656 uint32_t uii;
7657 uint32_t ujj;
7658 uint32_t ukk;
7659 uint32_t umm;
7660 fill_uint_zero(4, marg_b);
7661 memset(spaces, 32, 7);
7662 for (uii = 0; uii < 4; uii++) {
7663 ukk = 0;
7664 for (ujj = 0; ujj < 4; ujj++) {
7665 umm = *uiptr++;
7666 ukk += umm;
7667 marg_b[ujj] += umm;
7668 }
7669 marg_a[uii] = ukk;
7670 total += ukk;
7671 }
7672 tot_recip = 1.0 / ((double)((int32_t)total));
7673 bufptr = strcpyax(bufptr, mkr2, '\n');
7674 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7675 fwrite(g_textbuf, 1, plink_maxsnp + 7, outfile);
7676 if (alen10 < 4) {
7677 fwrite(spaces, 1, 9 - 2 * alen10, outfile);
7678 }
7679 fputs(allele10, outfile);
7680 putc_unlocked('/', outfile);
7681 fputs(allele10, outfile);
7682 putc_unlocked(' ', outfile);
7683 if (alen10 + alen11 < 7) {
7684 fwrite(spaces, 1, 9 - alen10 - alen11, outfile);
7685 }
7686 fputs(allele10, outfile);
7687 putc_unlocked('/', outfile);
7688 fputs(allele11, outfile);
7689 putc_unlocked(' ', outfile);
7690 if (alen11 < 4) {
7691 fwrite(spaces, 1, 9 - 2 * alen11, outfile);
7692 }
7693 fputs(allele11, outfile);
7694 putc_unlocked('/', outfile);
7695 fputs(allele11, outfile);
7696 fputs(" 0/0 */*\n", outfile);
7697
7698 bufptr = fw_strcpy(plink_maxsnp, mkr1, g_textbuf);
7699 *bufptr++ = ' ';
7700 if (alen00 == 1) {
7701 bufptr = memseta(bufptr, 32, 2);
7702 }
7703 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7704 fputs(allele00, outfile);
7705 putc_unlocked('/', outfile);
7706 fputs(allele00, outfile);
7707 bufptr = g_textbuf;
7708 *bufptr++ = ' ';
7709 bufptr = uint32toa_w10x(counts[0], ' ', bufptr);
7710 bufptr = uint32toa_w10x(counts[2], ' ', bufptr);
7711 bufptr = uint32toa_w10x(counts[3], ' ', bufptr);
7712 bufptr = uint32toa_w10x(counts[1], ' ', bufptr);
7713 bufptr = uint32toa_w10x(marg_a[0], '\n', bufptr);
7714 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7715
7716 bufptr = memseta(g_textbuf, 32, plink_maxsnp + 1);
7717 if (alen00 + alen01 < 4) {
7718 bufptr = memseta(bufptr, 32, 4 - alen00 - alen01);
7719 }
7720 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7721 fputs(allele00, outfile);
7722 putc_unlocked('/', outfile);
7723 fputs(allele01, outfile);
7724 bufptr = g_textbuf;
7725 *bufptr++ = ' ';
7726 bufptr = uint32toa_w10x(counts[8], ' ', bufptr);
7727 bufptr = uint32toa_w10x(counts[10], ' ', bufptr);
7728 bufptr = uint32toa_w10x(counts[11], ' ', bufptr);
7729 bufptr = uint32toa_w10x(counts[9], ' ', bufptr);
7730 bufptr = uint32toa_w10x(marg_a[2], '\n', bufptr);
7731 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7732
7733 bufptr = memseta(g_textbuf, 32, plink_maxsnp + 1);
7734 if (alen01 == 1) {
7735 bufptr = memseta(bufptr, 32, 2);
7736 }
7737 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7738 fputs(allele01, outfile);
7739 putc_unlocked('/', outfile);
7740 fputs(allele01, outfile);
7741 bufptr = g_textbuf;
7742 *bufptr++ = ' ';
7743 bufptr = uint32toa_w10x(counts[12], ' ', bufptr);
7744 bufptr = uint32toa_w10x(counts[14], ' ', bufptr);
7745 bufptr = uint32toa_w10x(counts[15], ' ', bufptr);
7746 bufptr = uint32toa_w10x(counts[13], ' ', bufptr);
7747 bufptr = uint32toa_w10x(marg_a[3], '\n', bufptr);
7748 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7749
7750 bufptr = memseta(g_textbuf, 32, plink_maxsnp + 3);
7751 bufptr = memcpya(bufptr, "0/0 ", 4);
7752 bufptr = uint32toa_w10x(counts[4], ' ', bufptr);
7753 bufptr = uint32toa_w10x(counts[6], ' ', bufptr);
7754 bufptr = uint32toa_w10x(counts[7], ' ', bufptr);
7755 bufptr = uint32toa_w10x(counts[5], ' ', bufptr);
7756 bufptr = uint32toa_w10x(marg_a[1], '\n', bufptr);
7757 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7758
7759 bufptr = memseta(g_textbuf, 32, plink_maxsnp + 3);
7760 bufptr = memcpya(bufptr, "*/* ", 4);
7761 bufptr = uint32toa_w10x(marg_b[0], ' ', bufptr);
7762 bufptr = uint32toa_w10x(marg_b[2], ' ', bufptr);
7763 bufptr = uint32toa_w10x(marg_b[3], ' ', bufptr);
7764 bufptr = uint32toa_w10x(marg_b[1], ' ', bufptr);
7765 bufptr = uint32toa_w10x(total, '\n', bufptr);
7766 *bufptr++ = '\n';
7767 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7768
7769 bufptr = memseta(g_textbuf, 32, plink_maxsnp + 14);
7770 bufptr = strcpyax(bufptr, mkr2, '\n');
7771 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7772 fwrite(g_textbuf, 1, plink_maxsnp + 9, outfile);
7773 fputs(allele10, outfile);
7774 putc_unlocked('/', outfile);
7775 fputs(allele10, outfile);
7776 if (alen10 < 4) {
7777 fwrite(spaces, 1, 9 - 2 * alen10, outfile);
7778 }
7779 putc_unlocked(' ', outfile);
7780 fputs(allele10, outfile);
7781 putc_unlocked('/', outfile);
7782 fputs(allele11, outfile);
7783 if (alen10 + alen11 < 7) {
7784 fwrite(spaces, 1, 9 - alen10 - alen11, outfile);
7785 }
7786 putc_unlocked(' ', outfile);
7787 fputs(allele11, outfile);
7788 putc_unlocked('/', outfile);
7789 fputs(allele11, outfile);
7790 if (alen11 < 4) {
7791 fwrite(spaces, 1, 9 - 2 * alen11, outfile);
7792 }
7793 fputs(" 0/0 */*\n", outfile);
7794
7795 bufptr = fw_strcpy(plink_maxsnp, mkr1, g_textbuf);
7796 *bufptr++ = ' ';
7797 if (alen00 == 1) {
7798 bufptr = memseta(bufptr, 32, 2);
7799 }
7800 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7801 fputs(allele00, outfile);
7802 putc_unlocked('/', outfile);
7803 fputs(allele00, outfile);
7804 bufptr = memseta(g_textbuf, 32, 2);
7805 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[0]) * tot_recip, bufptr);
7806 bufptr = memseta(bufptr, 32, 2);
7807 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[2]) * tot_recip, bufptr);
7808 bufptr = memseta(bufptr, 32, 2);
7809 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[3]) * tot_recip, bufptr);
7810 bufptr = memseta(bufptr, 32, 2);
7811 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[1]) * tot_recip, bufptr);
7812 bufptr = memseta(bufptr, 32, 2);
7813 bufptr = dtoa_f_w9p6_clipped(((int32_t)marg_a[0]) * tot_recip, bufptr);
7814 *bufptr++ = '\n';
7815 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7816
7817 bufptr = memseta(g_textbuf, 32, plink_maxsnp + 1);
7818 if (alen00 + alen01 < 4) {
7819 bufptr = memseta(bufptr, 32, 4 - alen00 - alen01);
7820 }
7821 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7822 fputs(allele00, outfile);
7823 putc_unlocked('/', outfile);
7824 fputs(allele01, outfile);
7825 bufptr = memseta(g_textbuf, 32, 2);
7826 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[8]) * tot_recip, bufptr);
7827 bufptr = memseta(bufptr, 32, 2);
7828 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[10]) * tot_recip, bufptr);
7829 bufptr = memseta(bufptr, 32, 2);
7830 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[11]) * tot_recip, bufptr);
7831 bufptr = memseta(bufptr, 32, 2);
7832 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[9]) * tot_recip, bufptr);
7833 bufptr = memseta(bufptr, 32, 2);
7834 bufptr = dtoa_f_w9p6_clipped(((int32_t)marg_a[2]) * tot_recip, bufptr);
7835 *bufptr++ = '\n';
7836 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7837
7838 bufptr = memseta(g_textbuf, 32, plink_maxsnp + 1);
7839 if (alen01 == 1) {
7840 bufptr = memseta(bufptr, 32, 2);
7841 }
7842 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7843 fputs(allele01, outfile);
7844 putc_unlocked('/', outfile);
7845 fputs(allele01, outfile);
7846 bufptr = memseta(g_textbuf, 32, 2);
7847 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[12]) * tot_recip, bufptr);
7848 bufptr = memseta(bufptr, 32, 2);
7849 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[14]) * tot_recip, bufptr);
7850 bufptr = memseta(bufptr, 32, 2);
7851 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[15]) * tot_recip, bufptr);
7852 bufptr = memseta(bufptr, 32, 2);
7853 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[13]) * tot_recip, bufptr);
7854 bufptr = memseta(bufptr, 32, 2);
7855 bufptr = dtoa_f_w9p6_clipped(((int32_t)marg_a[3]) * tot_recip, bufptr);
7856 *bufptr++ = '\n';
7857 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7858
7859 bufptr = memseta(g_textbuf, 32, plink_maxsnp + 3);
7860 bufptr = memcpya(bufptr, "0/0 ", 5);
7861 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[4]) * tot_recip, bufptr);
7862 bufptr = memseta(bufptr, 32, 2);
7863 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[6]) * tot_recip, bufptr);
7864 bufptr = memseta(bufptr, 32, 2);
7865 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[7]) * tot_recip, bufptr);
7866 bufptr = memseta(bufptr, 32, 2);
7867 bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[5]) * tot_recip, bufptr);
7868 bufptr = memseta(bufptr, 32, 2);
7869 bufptr = dtoa_f_w9p6_clipped(((int32_t)marg_a[1]) * tot_recip, bufptr);
7870 *bufptr++ = '\n';
7871 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7872
7873 bufptr = memseta(g_textbuf, 32, plink_maxsnp + 3);
7874 bufptr = memcpya(bufptr, "*/* ", 5);
7875 bufptr = dtoa_f_w9p6_spaced(((int32_t)marg_b[0]) * tot_recip, bufptr);
7876 bufptr = memseta(bufptr, 32, 2);
7877 bufptr = dtoa_f_w9p6_spaced(((int32_t)marg_b[2]) * tot_recip, bufptr);
7878 bufptr = memseta(bufptr, 32, 2);
7879 bufptr = dtoa_f_w9p6_spaced(((int32_t)marg_b[3]) * tot_recip, bufptr);
7880 bufptr = memseta(bufptr, 32, 2);
7881 bufptr = dtoa_f_w9p6_spaced(((int32_t)marg_b[1]) * tot_recip, bufptr);
7882 bufptr = memcpya(bufptr, " 1\n\n", 6);
7883 fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
7884 }
7885
twolocus(Epi_info * epi_ip,FILE * bedfile,uintptr_t bed_offset,uintptr_t marker_ct,uintptr_t unfiltered_marker_ct,uintptr_t * marker_exclude,uintptr_t * marker_reverse,char * marker_ids,uintptr_t max_marker_id_len,uint32_t plink_maxsnp,char ** marker_allele_ptrs,Chrom_info * chrom_info_ptr,uintptr_t unfiltered_sample_ct,uintptr_t * sample_exclude,uintptr_t sample_ct,uintptr_t * pheno_nm,uint32_t pheno_nm_ct,uint32_t pheno_ctrl_ct,uintptr_t * pheno_c,uintptr_t * sex_male,char * outname,char * outname_end,uint32_t hh_exists)7886 int32_t twolocus(Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, uintptr_t* pheno_nm, uint32_t pheno_nm_ct, uint32_t pheno_ctrl_ct, uintptr_t* pheno_c, uintptr_t* sex_male, char* outname, char* outname_end, uint32_t hh_exists) {
7887 unsigned char* bigstack_mark = g_bigstack_base;
7888 FILE* outfile = nullptr;
7889 char* mkr1 = outname? epi_ip->twolocus_mkr1 : epi_ip->ld_mkr1;
7890 char* mkr2 = outname? epi_ip->twolocus_mkr2 : epi_ip->ld_mkr2;
7891 uintptr_t* sample_include2 = nullptr;
7892 uintptr_t* sample_male_include2 = nullptr;
7893 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
7894 uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
7895 uintptr_t ulii = strlen(mkr1) + 1;
7896 uintptr_t uljj = strlen(mkr2) + 1;
7897 uint32_t hwe_midp = epi_ip->modifier & EPI_HWE_MIDP;
7898 int32_t retval = 0;
7899 uint32_t counts_all[16];
7900 uint32_t counts_cc[32];
7901 uintptr_t* loadbufs[2];
7902 uintptr_t marker_uidxs[2];
7903 double solutions[3];
7904 uint32_t is_haploid[2];
7905 uint32_t is_x[2];
7906 uintptr_t* loadbuf_raw;
7907 uintptr_t* loadbuf0_ptr;
7908 uintptr_t* loadbuf1_ptr;
7909 uintptr_t* loadbuf0_end;
7910 char* bufptr;
7911 char* bufptr2;
7912 uintptr_t sample_ctl2;
7913 uintptr_t final_mask;
7914 uintptr_t marker_uidx;
7915 uintptr_t marker_idx;
7916 uintptr_t sample_uidx;
7917 uintptr_t sample_idx;
7918 uintptr_t sample_idx_end;
7919 uintptr_t ulkk;
7920 double twice_tot_recip;
7921 double half_hethet_share;
7922 double freq11;
7923 double freq12;
7924 double freq21;
7925 double freq22;
7926 double freq1x;
7927 double freq2x;
7928 double freqx1;
7929 double freqx2;
7930 double dxx;
7931 uint32_t chrom_fo_idx;
7932 uint32_t chrom_idx;
7933 uint32_t is_y;
7934 uint32_t alen00;
7935 uint32_t alen01;
7936 uint32_t alen10;
7937 uint32_t alen11;
7938 uint32_t count_total;
7939 if (!outname) {
7940 ulkk = BITCT_TO_WORDCT(unfiltered_sample_ct);
7941 // ulkk = (unfiltered_sample_ctl2 + 1) / 2;
7942 sample_ct = popcount_longs(sample_exclude, ulkk);
7943 if (!sample_ct) {
7944 logerrprint("Warning: Skipping --ld since there are no founders. (--make-founders may come\nin handy here.)\n");
7945 goto twolocus_ret_1;
7946 }
7947 if (bigstack_alloc_ul(ulkk, &loadbuf_raw)) {
7948 goto twolocus_ret_NOMEM;
7949 }
7950 bitarr_invert_copy(sample_exclude, unfiltered_sample_ct, loadbuf_raw);
7951 sample_exclude = loadbuf_raw;
7952 }
7953 sample_ctl2 = QUATERCT_TO_WORDCT(sample_ct);
7954 final_mask = get_final_mask(sample_ct);
7955 if ((ulii > max_marker_id_len) || (uljj > max_marker_id_len)) {
7956 goto twolocus_ret_MARKER_NOT_FOUND;
7957 }
7958 marker_uidxs[0] = 0;
7959 marker_uidxs[1] = 0;
7960 for (marker_uidx = 0, marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
7961 next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
7962 bufptr = &(marker_ids[marker_uidx * max_marker_id_len]);
7963 if (ulii && (!memcmp(mkr1, bufptr, ulii))) {
7964 marker_uidxs[0] = marker_uidx;
7965 if (!uljj) {
7966 break;
7967 }
7968 ulii = 0;
7969 } else if (uljj && (!memcmp(mkr2, bufptr, uljj))) {
7970 marker_uidxs[1] = marker_uidx;
7971 if (!ulii) {
7972 break;
7973 }
7974 uljj = 0;
7975 }
7976 }
7977 if (marker_idx == marker_ct) {
7978 goto twolocus_ret_MARKER_NOT_FOUND;
7979 }
7980 if (bigstack_alloc_ul(unfiltered_sample_ctl2, &loadbuf_raw) ||
7981 bigstack_alloc_ul(sample_ctl2, &loadbufs[0]) ||
7982 bigstack_alloc_ul(sample_ctl2, &loadbufs[1])) {
7983 goto twolocus_ret_NOMEM;
7984 }
7985 loadbuf_raw[unfiltered_sample_ctl2 - 1] = 0;
7986 loadbufs[0][sample_ctl2 - 1] = 0;
7987 loadbufs[1][sample_ctl2 - 1] = 0;
7988 if (alloc_collapsed_haploid_filters(sample_exclude, sex_male, unfiltered_sample_ct, sample_ct, hh_exists, 0, &sample_include2, &sample_male_include2)) {
7989 goto twolocus_ret_NOMEM;
7990 }
7991 is_haploid[0] = 0;
7992 is_haploid[1] = 0;
7993 is_x[0] = 0;
7994 is_x[1] = 0;
7995 for (marker_idx = 0; marker_idx < 2; marker_idx++) {
7996 marker_uidx = marker_uidxs[marker_idx];
7997 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
7998 goto twolocus_ret_READ_FAIL;
7999 }
8000 if (load_and_collapse(unfiltered_sample_ct, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx), bedfile, loadbuf_raw, loadbufs[marker_idx])) {
8001 goto twolocus_ret_READ_FAIL;
8002 }
8003 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx);
8004 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
8005 is_haploid[marker_idx] = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
8006 if (is_haploid[marker_idx]) {
8007 is_x[marker_idx] = (chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[X_OFFSET]);
8008 is_y = (chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[Y_OFFSET]);
8009 haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x[marker_idx], is_y, (unsigned char*)(loadbufs[marker_idx]));
8010 }
8011 }
8012 if (!outname) {
8013 // --ld needs X chromosome sex stratification instead of --twolocus's
8014 // case/control stratification
8015 if (is_x[0] || is_x[1]) {
8016 pheno_c = sex_male;
8017 pheno_nm = sex_male;
8018 } else {
8019 pheno_c = nullptr;
8020 }
8021 }
8022 fill_uint_zero(16, counts_all);
8023 fill_uint_zero(32, counts_cc);
8024 loadbuf0_ptr = loadbufs[0];
8025 loadbuf1_ptr = loadbufs[1];
8026 loadbuf0_end = &(loadbuf0_ptr[sample_ct / BITCT2]);
8027 sample_uidx = 0;
8028 sample_idx = 0;
8029 sample_idx_end = BITCT2;
8030 while (1) {
8031 while (loadbuf0_ptr < loadbuf0_end) {
8032 ulii = *loadbuf0_ptr++;
8033 uljj = *loadbuf1_ptr++;
8034 if (pheno_c) {
8035 for (; sample_idx < sample_idx_end; sample_uidx++, sample_idx++) {
8036 next_unset_ul_unsafe_ck(sample_exclude, &sample_uidx);
8037 ulkk = ((ulii & 3) << 2) | (uljj & 3);
8038 ulii >>= 2;
8039 uljj >>= 2;
8040 counts_all[ulkk] += 1;
8041 if (IS_SET(pheno_nm, sample_uidx)) {
8042 counts_cc[(16 * IS_SET(pheno_c, sample_uidx)) + ulkk] += 1;
8043 }
8044 }
8045 } else {
8046 for (; sample_idx < sample_idx_end; sample_idx++) {
8047 ulkk = ((ulii & 3) << 2) | (uljj & 3);
8048 ulii >>= 2;
8049 uljj >>= 2;
8050 counts_all[ulkk] += 1;
8051 }
8052 }
8053 sample_idx_end += BITCT2;
8054 }
8055 if (sample_idx == sample_ct) {
8056 break;
8057 }
8058 loadbuf0_end++;
8059 sample_idx_end = sample_ct;
8060 }
8061
8062 alen00 = strlen(marker_allele_ptrs[2 * marker_uidxs[0]]);
8063 alen01 = strlen(marker_allele_ptrs[2 * marker_uidxs[0] + 1]);
8064 alen10 = strlen(marker_allele_ptrs[2 * marker_uidxs[1]]);
8065 alen11 = strlen(marker_allele_ptrs[2 * marker_uidxs[1] + 1]);
8066 if (outname) {
8067 memcpy(outname_end, ".twolocus", 10);
8068 if (fopen_checked(outname, "w", &outfile)) {
8069 goto twolocus_ret_OPEN_FAIL;
8070 }
8071 fputs("\nAll individuals\n===============\n", outfile);
8072 twolocus_write_table(outfile, counts_all, plink_maxsnp, mkr1, mkr2, marker_allele_ptrs[2 * marker_uidxs[0]], marker_allele_ptrs[2 * marker_uidxs[0] + 1], marker_allele_ptrs[2 * marker_uidxs[1]], marker_allele_ptrs[2 * marker_uidxs[1] + 1], alen00, alen01, alen10, alen11);
8073 if (pheno_c) {
8074 if (pheno_nm_ct != pheno_ctrl_ct) {
8075 fputs("\nCases\n=====\n", outfile);
8076 twolocus_write_table(outfile, &(counts_cc[16]), plink_maxsnp, mkr1, mkr2, marker_allele_ptrs[2 * marker_uidxs[0]], marker_allele_ptrs[2 * marker_uidxs[0] + 1], marker_allele_ptrs[2 * marker_uidxs[1]], marker_allele_ptrs[2 * marker_uidxs[1] + 1], alen00, alen01, alen10, alen11);
8077 }
8078 if (pheno_ctrl_ct) {
8079 fputs("\nControls\n========\n", outfile);
8080 twolocus_write_table(outfile, counts_cc, plink_maxsnp, mkr1, mkr2, marker_allele_ptrs[2 * marker_uidxs[0]], marker_allele_ptrs[2 * marker_uidxs[0] + 1], marker_allele_ptrs[2 * marker_uidxs[1]], marker_allele_ptrs[2 * marker_uidxs[1] + 1], alen00, alen01, alen10, alen11);
8081 }
8082 }
8083 putc_unlocked('\n', outfile);
8084 if (fclose_null(&outfile)) {
8085 goto twolocus_ret_WRITE_FAIL;
8086 }
8087 LOGPRINTFWW("--twolocus: Report written to %s .\n", outname);
8088 } else {
8089 // low counts_cc[] values aren't used, so may as well store marginal counts
8090 // there
8091 counts_cc[0] = counts_all[0] + counts_all[2] + counts_all[3];
8092 counts_cc[2] = counts_all[8] + counts_all[10] + counts_all[11];
8093 counts_cc[3] = counts_all[12] + counts_all[14] + counts_all[15];
8094 counts_cc[4] = counts_all[0] + counts_all[8] + counts_all[12];
8095 counts_cc[6] = counts_all[2] + counts_all[10] + counts_all[14];
8096 counts_cc[7] = counts_all[3] + counts_all[11] + counts_all[15];
8097 count_total = counts_cc[0] + counts_cc[2] + counts_cc[3];
8098 if (!count_total) {
8099 logerrprint("Error: No valid observations for --ld.\n");
8100 goto twolocus_ret_INVALID_CMDLINE;
8101 }
8102 if ((!counts_cc[2]) && ((!counts_cc[0]) || (!counts_cc[3]))) {
8103 LOGPREPRINTFWW("Error: %s is monomorphic across all valid observations.\n", mkr1);
8104 goto twolocus_ret_INVALID_CMDLINE_2;
8105 } else if ((!counts_cc[6]) && ((!counts_cc[4]) || (!counts_cc[7]))) {
8106 LOGPREPRINTFWW("Error: %s is monomorphic across all valid observations.\n", mkr2);
8107 goto twolocus_ret_INVALID_CMDLINE_2;
8108 } else if ((alen00 > (MAXLINELEN / 4) - 16) || (alen01 > (MAXLINELEN / 4) - 16)) {
8109 LOGPREPRINTFWW("Error: %s has a pathologically long allele code.\n", mkr1);
8110 goto twolocus_ret_INVALID_CMDLINE_2;
8111 } else if ((alen10 > (MAXLINELEN / 4) - 16) || (alen11 > (MAXLINELEN / 4) - 16)) {
8112 LOGPREPRINTFWW("Error: %s has a pathologically long allele code.\n", mkr2);
8113 goto twolocus_ret_INVALID_CMDLINE_2;
8114 }
8115 LOGPRINTF("\n--ld %s %s:\n", mkr1, mkr2);
8116 ulii = 0;
8117 // possible todo: factor out redundancy with other D-prime calculations
8118 freq11 = (double)(2 * counts_all[0] + counts_all[2] + counts_all[8]);
8119 freq12 = (double)(2 * counts_all[3] + counts_all[2] + counts_all[11]);
8120 freq21 = (double)(2 * counts_all[12] + counts_all[8] + counts_all[14]);
8121 freq22 = (double)(2 * counts_all[15] + counts_all[11] + counts_all[14]);
8122 if (is_x[0] || is_x[1]) {
8123 if (is_x[0] && is_x[1]) {
8124 freq11 -= (double)((int32_t)counts_cc[16]);
8125 freq12 -= (double)((int32_t)counts_cc[19]);
8126 freq21 -= (double)((int32_t)counts_cc[28]);
8127 freq22 -= (double)((int32_t)counts_cc[31]);
8128 } else if (is_x[0]) {
8129 freq11 -= ((double)(2 * counts_cc[16] + counts_cc[18])) * (1.0 - SQRT_HALF);
8130 freq12 -= ((double)(2 * counts_cc[19] + counts_cc[18])) * (1.0 - SQRT_HALF);
8131 freq21 -= ((double)(2 * counts_cc[28] + counts_cc[30])) * (1.0 - SQRT_HALF);
8132 freq22 -= ((double)(2 * counts_cc[31] + counts_cc[30])) * (1.0 - SQRT_HALF);
8133 } else {
8134 freq11 -= ((double)(2 * counts_cc[16] + counts_cc[24])) * (1.0 - SQRT_HALF);
8135 freq12 -= ((double)(2 * counts_cc[19] + counts_cc[27])) * (1.0 - SQRT_HALF);
8136 freq21 -= ((double)(2 * counts_cc[28] + counts_cc[24])) * (1.0 - SQRT_HALF);
8137 freq22 -= ((double)(2 * counts_cc[31] + counts_cc[27])) * (1.0 - SQRT_HALF);
8138 }
8139 }
8140 twice_tot_recip = 1.0 / (freq11 + freq12 + freq21 + freq22 + 2 * ((int32_t)counts_all[10]));
8141 freq11 *= twice_tot_recip;
8142 freq12 *= twice_tot_recip;
8143 freq21 *= twice_tot_recip;
8144 freq22 *= twice_tot_recip;
8145 half_hethet_share = ((int32_t)counts_all[10]) * twice_tot_recip;
8146 freq1x = freq11 + freq12 + half_hethet_share;
8147 freq2x = 1.0 - freq1x;
8148 freqx1 = freq11 + freq21 + half_hethet_share;
8149 freqx2 = 1.0 - freqx1;
8150 if (counts_all[10]) {
8151 // detect degenerate cases to avoid e-17 ugliness
8152 // possible todo: when there are multiple solutions, compute log
8153 // likelihood for each and mark the EM solution in some manner
8154 if ((freq11 * freq22 != 0.0) || (freq12 * freq21 != 0.0)) {
8155 // (f11 + x)(f22 + x)(K - x) = x(f12 + K - x)(f21 + K - x)
8156 // (x - K)(x + f11)(x + f22) + x(x - K - f12)(x - K - f21) = 0
8157 // x^3 + (f11 + f22 - K)x^2 + (f11*f22 - K*f11 - K*f22)x
8158 // - K*f11*f22 + x^3 - (2K + f12 + f21)x^2 + (K + f12)(K + f21)x = 0
8159 uljj = cubic_real_roots(0.5 * (freq11 + freq22 - freq12 - freq21 - 3 * half_hethet_share), 0.5 * (freq11 * freq22 + freq12 * freq21 + half_hethet_share * (freq12 + freq21 - freq11 - freq22 + half_hethet_share)), -0.5 * half_hethet_share * freq11 * freq22, solutions);
8160 if (uljj > 1) {
8161 while (solutions[uljj - 1] > half_hethet_share + SMALLISH_EPSILON) {
8162 uljj--;
8163 }
8164 if (solutions[uljj - 1] > half_hethet_share - SMALLISH_EPSILON) {
8165 solutions[uljj - 1] = half_hethet_share;
8166 }
8167 while (solutions[ulii] < -SMALLISH_EPSILON) {
8168 ulii++;
8169 }
8170 if (solutions[ulii] < SMALLISH_EPSILON) {
8171 solutions[ulii] = 0;
8172 }
8173 }
8174 } else {
8175 // bugfix (6 Oct 2017):
8176 // At least one of {f11, f22} is zero, and one of {f12, f21} is zero.
8177 // Initially suppose that the zero-values are f11 and f12. Then the
8178 // equality becomes
8179 // x(f22 + x)(K - x) = x(K - x)(f21 + K - x)
8180 // x=0 and x=K are always solutions; the rest becomes
8181 // f22 + x = f21 + K - x
8182 // 2x = K + f21 - f22
8183 // x = (K + f21 - f22)/2; in-range iff (f21 - f22) in (-K, K).
8184 // So far so good. However, this code used to *always* check
8185 // (f21 - f22), when it's necessary to use all the nonzero values.
8186 // (this still works if three or all four values are zero)
8187 solutions[0] = 0;
8188 const double nonzero_freq_xx = freq11 + freq22;
8189 const double nonzero_freq_xy = freq12 + freq21;
8190 if ((nonzero_freq_xx + SMALLISH_EPSILON < half_hethet_share + nonzero_freq_xy) && (nonzero_freq_xy + SMALLISH_EPSILON < half_hethet_share + nonzero_freq_xx)) {
8191 uljj = 3;
8192 solutions[1] = (half_hethet_share + nonzero_freq_xy - nonzero_freq_xx) * 0.5;
8193 solutions[2] = half_hethet_share;
8194 } else {
8195 uljj = 2;
8196 solutions[1] = half_hethet_share;
8197 }
8198 }
8199 if (uljj > ulii + 1) {
8200 // not Xchr/haploid-sensitive yet
8201 logprint("Multiple haplotype phasing solutions; sample size, HWE, or random mating\nassumption may be violated.\n\nHWE exact test p-values\n-----------------------\n");
8202 if (is_haploid[0] && (!is_x[0])) {
8203 LOGPRINTF(" %s: n/a\n", mkr1);
8204 } else {
8205 LOGPRINTF(" %s: %g\n", mkr1, SNPHWE2(counts_cc[2] + counts_all[9], counts_cc[0] + counts_all[1] - 2 * (counts_cc[16] + counts_cc[19]), counts_cc[3] + counts_all[13] - 2 * (counts_cc[28] + counts_cc[31]), hwe_midp));
8206 }
8207 if (is_haploid[1] && (!is_x[1])) {
8208 LOGPRINTF(" %s: n/a\n", mkr2);
8209 } else {
8210 LOGPRINTF(" %s: %g\n\n", mkr2, SNPHWE2(counts_cc[6] + counts_all[6], counts_cc[4] + counts_all[4] - 2 * (counts_cc[16] + counts_cc[28]), counts_cc[7] + counts_all[7] - 2 * (counts_cc[19] + counts_cc[31]), hwe_midp));
8211 }
8212 }
8213 } else {
8214 uljj = 1;
8215 solutions[0] = 0.0;
8216 }
8217 if (uljj == ulii + 1) {
8218 logprint("\n");
8219 }
8220 for (ulkk = ulii; ulkk < uljj; ulkk++) {
8221 if (uljj - ulii > 1) {
8222 LOGPRINTF("Solution #%" PRIuPTR ":\n", ulkk + 1 - ulii);
8223 }
8224 dxx = freq11 + solutions[ulkk] - freqx1 * freq1x; // D
8225 if (fabs(dxx) < SMALL_EPSILON) {
8226 dxx = 0;
8227 }
8228 bufptr = memcpya(g_logbuf, " R-sq = ", 10);
8229 bufptr2 = dtoa_g(dxx * dxx / (freq1x * freqx1 * freq2x * freqx2), bufptr);
8230 // assumes bufptr2 - bufptr < 15
8231 bufptr = memseta(bufptr2, 32, 15 - ((uintptr_t)(bufptr2 - bufptr)));
8232 bufptr = memcpya(bufptr, "D' = ", 5);
8233 if (dxx >= 0) {
8234 bufptr = dtoa_g(dxx / MINV(freqx1 * freq2x, freqx2 * freq1x), bufptr);
8235 } else {
8236 bufptr = dtoa_g(-dxx / MINV(freqx1 * freq1x, freqx2 * freq2x), bufptr);
8237 }
8238 bufptr = memcpya(bufptr, "\n\n", 3);
8239 logprintb();
8240 logprint(" Haplotype Frequency Expectation under LE\n");
8241 logprint(" --------- --------- --------------------\n");
8242 bufptr = memseta(g_logbuf, 32, 3);
8243 if (alen00 + alen10 < 9) {
8244 bufptr = memseta(bufptr, 32, 9 - alen00 - alen10);
8245 }
8246 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[0]], alen00);
8247 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[1]], alen10);
8248 bufptr = memseta(bufptr, 32, 5);
8249 bufptr = dtoa_f_w9p6_spaced(freq11 + solutions[ulkk], bufptr);
8250 bufptr = memseta(bufptr, 32, 15);
8251 bufptr = dtoa_f_w9p6_clipped(freqx1 * freq1x, bufptr);
8252 bufptr = memcpya(bufptr, "\n", 2);
8253 logprintb();
8254 bufptr = &(g_logbuf[3]);
8255 if (alen01 + alen10 < 9) {
8256 bufptr = memseta(bufptr, 32, 9 - alen01 - alen10);
8257 }
8258 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[0] + 1], alen01);
8259 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[1]], alen10);
8260 bufptr = memseta(bufptr, 32, 5);
8261 bufptr = dtoa_f_w9p6_spaced(freq21 + half_hethet_share - solutions[ulkk], bufptr);
8262 bufptr = memseta(bufptr, 32, 15);
8263 bufptr = dtoa_f_w9p6_clipped(freqx1 * freq2x, bufptr);
8264 bufptr = memcpya(bufptr, "\n", 2);
8265 logprintb();
8266 bufptr = &(g_logbuf[3]);
8267 if (alen00 + alen11 < 9) {
8268 bufptr = memseta(bufptr, 32, 9 - alen00 - alen11);
8269 }
8270 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[0]], alen00);
8271 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[1] + 1], alen11);
8272 bufptr = memseta(bufptr, 32, 5);
8273 bufptr = dtoa_f_w9p6_spaced(freq12 + half_hethet_share - solutions[ulkk], bufptr);
8274 bufptr = memseta(bufptr, 32, 15);
8275 bufptr = dtoa_f_w9p6_clipped(freqx2 * freq1x, bufptr);
8276 bufptr = memcpya(bufptr, "\n", 2);
8277 logprintb();
8278 bufptr = &(g_logbuf[3]);
8279 if (alen01 + alen11 < 9) {
8280 bufptr = memseta(bufptr, 32, 9 - alen01 - alen11);
8281 }
8282 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[0] + 1], alen01);
8283 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[1] + 1], alen11);
8284 bufptr = memseta(bufptr, 32, 5);
8285 bufptr = dtoa_f_w9p6_spaced(freq22 + solutions[ulkk], bufptr);
8286 bufptr = memseta(bufptr, 32, 15);
8287 bufptr = dtoa_f_w9p6_clipped(freqx2 * freq2x, bufptr);
8288 bufptr = memcpyl3a(bufptr, "\n\n");
8289 logprintb();
8290 bufptr = &(g_logbuf[3]);
8291 bufptr = memcpya(bufptr, "In phase alleles are ", 21);
8292 if (dxx > 0) {
8293 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[0]], alen00);
8294 bufptr = memcpyax(bufptr, marker_allele_ptrs[2 * marker_uidxs[1]], alen10, '/');
8295 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[0] + 1], alen01);
8296 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[1] + 1], alen11);
8297 } else {
8298 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[0]], alen00);
8299 bufptr = memcpyax(bufptr, marker_allele_ptrs[2 * marker_uidxs[1] + 1], alen11, '/');
8300 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[0] + 1], alen01);
8301 bufptr = memcpya(bufptr, marker_allele_ptrs[2 * marker_uidxs[1]], alen10);
8302 }
8303 bufptr = memcpyl3a(bufptr, "\n\n");
8304 logprintb();
8305 }
8306 }
8307 while (0) {
8308 twolocus_ret_NOMEM:
8309 retval = RET_NOMEM;
8310 break;
8311 twolocus_ret_OPEN_FAIL:
8312 retval = RET_OPEN_FAIL;
8313 break;
8314 twolocus_ret_READ_FAIL:
8315 retval = RET_READ_FAIL;
8316 break;
8317 twolocus_ret_WRITE_FAIL:
8318 retval = RET_WRITE_FAIL;
8319 break;
8320 twolocus_ret_MARKER_NOT_FOUND:
8321 if (outname) {
8322 logerrprint("Error: --twolocus variant name not found.\n");
8323 } else {
8324 logerrprint("Error: --ld variant name not found.\n");
8325 }
8326 retval = RET_INVALID_CMDLINE;
8327 break;
8328 twolocus_ret_INVALID_CMDLINE_2:
8329 logerrprintb();
8330 twolocus_ret_INVALID_CMDLINE:
8331 retval = RET_INVALID_CMDLINE;
8332 break;
8333 }
8334 twolocus_ret_1:
8335 fclose_cond(outfile);
8336 bigstack_reset(bigstack_mark);
8337 return retval;
8338 }
8339
rotate_loadbuf_and_compute_phenogeno(uintptr_t * loadbuf,double * pheno_d2,uint32_t pheno_nm_ct,uintptr_t * loadbuf_write,double * phenogeno,uint32_t * genosums)8340 void rotate_loadbuf_and_compute_phenogeno(uintptr_t* loadbuf, double* pheno_d2, uint32_t pheno_nm_ct, uintptr_t* loadbuf_write, double* phenogeno, uint32_t* genosums) {
8341 double cur_phenogeno = 0;
8342 uint32_t geno1_ct = 0;
8343 uint32_t geno2_ct = 0;
8344 uintptr_t cur_word;
8345 uintptr_t ulii;
8346 uintptr_t uljj;
8347 double dxx;
8348 uint32_t sample_idx;
8349 uint32_t sample_idx_base;
8350 uint32_t uii;
8351 for (sample_idx = 0; sample_idx < pheno_nm_ct;) {
8352 // we're interested in hom A1 (non-trailing 00) and het (10) bit
8353 // values here.
8354 cur_word = ~(*loadbuf++);
8355 sample_idx_base = sample_idx;
8356 sample_idx += BITCT2;
8357 if (sample_idx > pheno_nm_ct) {
8358 cur_word &= (ONELU << (2 * (pheno_nm_ct % BITCT2))) - ONELU;
8359 }
8360 // now hom A1 = 11 and het = 01. Temporarily erase the 10s.
8361 uljj = cur_word & FIVEMASK;
8362 ulii = uljj | (cur_word & (uljj << 1));
8363 while (ulii) {
8364 uii = CTZLU(ulii);
8365 dxx = pheno_d2[sample_idx_base + uii / 2];
8366 if (!((ulii >> (uii + 1)) & 1)) {
8367 // het
8368 cur_phenogeno += dxx;
8369 geno1_ct++;
8370 } else {
8371 // hom A1
8372 cur_phenogeno += 2 * dxx;
8373 geno2_ct++;
8374 }
8375 ulii &= ~((3 * ONELU) << uii);
8376 }
8377 // currently hom A1 = 11, missing = 10, het = 01, hom A2 = 00
8378 // rotate to hom A1 = 10, missing = 11, het = 01, hom A2 = 00
8379 // to allow inner loop to use ordinary multiplication
8380 *loadbuf_write++ = cur_word ^ ((cur_word >> 1) & FIVEMASK);
8381 }
8382 *phenogeno = cur_phenogeno;
8383 genosums[0] = geno1_ct + 2 * geno2_ct;
8384 genosums[1] = geno1_ct + 4 * geno2_ct;
8385 }
8386
epistasis_linear_regression(pthread_t * threads,Epi_info * epi_ip,FILE * bedfile,uintptr_t bed_offset,uintptr_t unfiltered_marker_ct,uintptr_t * marker_reverse,char * marker_ids,uintptr_t max_marker_id_len,uint32_t plink_maxsnp,Chrom_info * chrom_info_ptr,uintptr_t marker_uidx_base,uintptr_t marker_ct1,uintptr_t * marker_exclude1,uintptr_t marker_idx1_start,uintptr_t marker_idx1_end,uintptr_t marker_ct2,uintptr_t * marker_exclude2,uint32_t is_triangular,uintptr_t job_size,uint64_t tests_expected,uintptr_t unfiltered_sample_ct,uintptr_t * pheno_nm,uint32_t pheno_nm_ct,double * pheno_d,uint32_t parallel_idx,uint32_t parallel_tot,char * outname,char * outname_end,double output_min_p,double glm_vif_thresh,uintptr_t * loadbuf_raw,uintptr_t * loadbuf,double * best_chisq,uint32_t * best_ids,uint32_t * n_sig_cts,uint32_t * fail_cts,uint32_t * gap_cts)8387 int32_t epistasis_linear_regression(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, uintptr_t marker_uidx_base, uintptr_t marker_ct1, uintptr_t* marker_exclude1, uintptr_t marker_idx1_start, uintptr_t marker_idx1_end, uintptr_t marker_ct2, uintptr_t* marker_exclude2, uint32_t is_triangular, uintptr_t job_size, uint64_t tests_expected, uintptr_t unfiltered_sample_ct, uintptr_t* pheno_nm, uint32_t pheno_nm_ct, double* pheno_d, uint32_t parallel_idx, uint32_t parallel_tot, char* outname, char* outname_end, double output_min_p, double glm_vif_thresh, uintptr_t* loadbuf_raw, uintptr_t* loadbuf, double* best_chisq, uint32_t* best_ids, uint32_t* n_sig_cts, uint32_t* fail_cts, uint32_t* gap_cts) {
8388 // We use QT --assoc's strategy for speeding up linear regression, since we
8389 // do not need to support arbitrary covariates. It's more complicated here
8390 // because we have 3 covariates instead of one, but two of them are still
8391 // restricted to the values {0, 1, 2} and the last is the product of the
8392 // first two. So we're able to use variations of the QT --assoc bit hacks.
8393 FILE* outfile = nullptr;
8394 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
8395 uintptr_t pheno_nm_ctl2 = QUATERCT_TO_WORDCT(pheno_nm_ct);
8396 uintptr_t final_mask = get_final_mask(pheno_nm_ct);
8397 uintptr_t marker_uidx = marker_uidx_base;
8398 uintptr_t pct = 1;
8399 uintptr_t marker_uidx2 = 0;
8400 uintptr_t marker_idx1 = marker_idx1_start;
8401 uintptr_t marker_idx2 = 0;
8402 uint64_t pct_thresh = tests_expected / 100;
8403 uint64_t tests_complete = 0;
8404 uint32_t max_thread_ct = g_epi_thread_ct;
8405 uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
8406 uint32_t chrom_end = 0;
8407 uint32_t chrom_idx = 0;
8408 uint32_t chrom_idx2 = 0;
8409 int32_t retval = 0;
8410 unsigned char* bigstack_mark2;
8411 char* wptr_start;
8412 char* wptr_start2;
8413 char* wptr;
8414 double* pheno_d2;
8415 double* dptr;
8416 double* dptr2;
8417 uint32_t* uiptr;
8418 uint32_t* uiptr2;
8419 uint32_t* uiptr3;
8420 uint32_t* uiptr4;
8421 uint32_t* uiptr5;
8422 uintptr_t cur_bigstack_left;
8423 uintptr_t cur_workload;
8424 uintptr_t idx1_block_size;
8425 uintptr_t idx2_block_size;
8426 uintptr_t idx2_block_sizea16;
8427 uintptr_t marker_uidx_tmp;
8428 uintptr_t block_idx1;
8429 uintptr_t block_idx2;
8430 uintptr_t cur_idx2_block_size;
8431 uintptr_t chrom_end2;
8432 uintptr_t tidx;
8433 uintptr_t ulii;
8434 uintptr_t uljj;
8435 uintptr_t ulkk;
8436 double dxx;
8437 uint32_t chrom_fo_idx;
8438 uint32_t chrom_fo_idx2;
8439 uint32_t is_last_block;
8440 uint32_t sample_uidx;
8441 uint32_t sample_idx;
8442 uint32_t uii;
8443 uint32_t ujj;
8444 if (bigstack_alloc_d(pheno_nm_ct, &pheno_d2)) {
8445 goto epistasis_linear_regression_ret_NOMEM;
8446 }
8447 g_epi_pheno_d2 = pheno_d2;
8448 g_epi_pheno_nm_ct = pheno_nm_ct;
8449 dptr = pheno_d2;
8450 g_epi_pheno_sum = 0;
8451 g_epi_pheno_ssq = 0;
8452 for (sample_uidx = 0, sample_idx = 0; sample_idx < pheno_nm_ct; sample_uidx++, sample_idx++) {
8453 next_set_unsafe_ck(pheno_nm, &sample_uidx);
8454 dxx = pheno_d[sample_uidx];
8455 g_epi_pheno_sum += dxx;
8456 g_epi_pheno_ssq += dxx * dxx;
8457 pheno_d2[sample_idx] = dxx;
8458 }
8459 // could add an epsilon here, but this is good enough to catch the most
8460 // common case (all phenotypes are the same integer near zero).
8461 if (g_epi_pheno_ssq * ((double)((int32_t)pheno_nm_ct)) == g_epi_pheno_sum * g_epi_pheno_sum) {
8462 logerrprint("Error: Phenotype is constant.\n");
8463 goto epistasis_linear_regression_ret_INVALID_CMDLINE;
8464 }
8465 g_epi_vif_thresh = glm_vif_thresh;
8466
8467 // claim up to half of memory with idx1 bufs; each marker currently costs:
8468 // pheno_nm_ctl2 * sizeof(intptr_t) for geno buf
8469 // sizeof(double) for precomputed sum(phenotype * genotype) values
8470 // 2 * sizeof(int32_t) for precomputed sum(genotype) and sum(genotype^2)
8471 // values
8472 // 4 * sizeof(int32_t) + sizeof(double) + marker_ct2 * 2 * sizeof(double)
8473 // for other stuff (see epistasis_report() comment, starting from
8474 // "offset"; main result buffer must be double-size to store both beta
8475 // and chi-square stat)
8476 cur_bigstack_left = bigstack_left();
8477 ulii = 6 * CACHELINE + max_thread_ct * (5 * (CACHELINE - 4)) - 5 * sizeof(int32_t) - sizeof(double);
8478 if (cur_bigstack_left >= ulii) {
8479 cur_bigstack_left -= ulii;
8480 }
8481 ulii = pheno_nm_ctl2 * sizeof(intptr_t) + 6 * sizeof(int32_t) + 2 * sizeof(double) + marker_ct2 * 2 * sizeof(double);
8482 idx1_block_size = cur_bigstack_left / (ulii * 2 + 1);
8483 if (!idx1_block_size) {
8484 goto epistasis_linear_regression_ret_NOMEM;
8485 }
8486 if (idx1_block_size > job_size) {
8487 idx1_block_size = job_size;
8488 }
8489 // pad to avoid threads writing to same cacheline
8490 ulii = (max_thread_ct - 1) * 15 + idx1_block_size;
8491 bigstack_alloc_ui(idx1_block_size * 2, &g_epi_geno1_offsets);
8492 bigstack_alloc_ul(pheno_nm_ctl2 * idx1_block_size, &g_epi_geno1);
8493 bigstack_alloc_d(idx1_block_size, &g_epi_phenogeno1);
8494 // may be better to just recompute genosums values in inner loop? can test
8495 // this later
8496 bigstack_alloc_ui(idx1_block_size * 2, &g_epi_genosums1);
8497 bigstack_alloc_d(idx1_block_size * marker_ct2 * 2, &g_epi_all_chisq);
8498 bigstack_alloc_d(ulii, &g_epi_best_chisq1);
8499 bigstack_alloc_ui(ulii, &g_epi_best_id1);
8500 bigstack_alloc_ui(ulii, &g_epi_n_sig_ct1);
8501 bigstack_alloc_ui(ulii, &g_epi_fail_ct1);
8502 for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
8503 g_epi_geno1[block_idx1 * pheno_nm_ctl2 + pheno_nm_ctl2 - 1] = 0;
8504 }
8505 if (is_triangular) {
8506 fill_uint_zero(2 * idx1_block_size, g_epi_geno1_offsets);
8507 }
8508
8509 ulii = pheno_nm_ctl2 * sizeof(intptr_t) + 2 * sizeof(int32_t) + sizeof(double) + max_thread_ct * (3 * sizeof(int32_t) + sizeof(double));
8510 idx2_block_size = (bigstack_left() - (3 * CACHELINE - sizeof(intptr_t) - 2 * sizeof(int32_t) - sizeof(double)) - max_thread_ct * (3 * (CACHELINE - sizeof(int32_t)) + (CACHELINE - sizeof(double)))) / ulii;
8511 if (idx2_block_size > marker_ct2) {
8512 idx2_block_size = marker_ct2;
8513 }
8514 idx2_block_size = round_up_pow2(idx2_block_size, 16);
8515
8516 memcpy(outname_end, ".epi.qt", 8);
8517 if (parallel_tot > 1) {
8518 outname_end[7] = '.';
8519 uint32toa_x(parallel_idx + 1, '\0', &(outname_end[8]));
8520 }
8521 if (fopen_checked(outname, "w", &outfile)) {
8522 goto epistasis_linear_regression_ret_OPEN_FAIL;
8523 }
8524 if (!parallel_idx) {
8525 wptr = memcpya(g_textbuf, "CHR1 ", 5);
8526 wptr = fw_strcpyn(plink_maxsnp, 4, "SNP1", wptr);
8527 wptr = memcpya(wptr, " CHR2 ", 6);
8528 wptr = fw_strcpyn(plink_maxsnp, 4, "SNP2", wptr);
8529 wptr = memcpya(wptr, " BETA_INT STAT P \n", 41);
8530 if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
8531 goto epistasis_linear_regression_ret_WRITE_FAIL;
8532 }
8533 }
8534
8535 bigstack_mark2 = g_bigstack_base;
8536 while (1) {
8537 if (!idx2_block_size) {
8538 goto epistasis_linear_regression_ret_NOMEM;
8539 }
8540 if (!(bigstack_alloc_ul(pheno_nm_ctl2 * idx2_block_size, &g_epi_geno2) ||
8541 bigstack_alloc_d(idx2_block_size, &g_epi_phenogeno2) ||
8542 bigstack_alloc_ui(idx2_block_size * 2, &g_epi_genosums2) ||
8543 bigstack_alloc_d(max_thread_ct * idx2_block_size, &g_epi_best_chisq2) ||
8544 bigstack_alloc_ui(max_thread_ct * idx2_block_size, &g_epi_best_id2) ||
8545 bigstack_alloc_ui(max_thread_ct * idx2_block_size, &g_epi_n_sig_ct2) ||
8546 bigstack_alloc_ui(max_thread_ct * idx2_block_size, &g_epi_fail_ct2))) {
8547 break;
8548 }
8549 bigstack_reset(bigstack_mark2);
8550 idx2_block_size -= 16;
8551 }
8552 for (block_idx2 = 0; block_idx2 < idx2_block_size; block_idx2++) {
8553 g_epi_geno2[block_idx2 * pheno_nm_ctl2 + pheno_nm_ctl2 - 1] = 0;
8554 }
8555 marker_uidx = next_unset_ul_unsafe(marker_exclude1, marker_uidx_base);
8556 if (marker_idx1) {
8557 marker_uidx = jump_forward_unset_unsafe(marker_exclude1, marker_uidx + 1, marker_idx1);
8558 }
8559 wptr = memcpya(g_logbuf, "QT --epistasis to ", 18);
8560 wptr = strcpya(wptr, outname);
8561 memcpy(wptr, " ... ", 6);
8562 wordwrapb(16); // strlen("99% [processing]")
8563 logprintb();
8564 fputs("0%", stdout);
8565 do {
8566 fputs(" [processing]", stdout);
8567 fflush(stdout);
8568 if (idx1_block_size > marker_idx1_end - marker_idx1) {
8569 idx1_block_size = marker_idx1_end - marker_idx1;
8570 if (idx1_block_size < max_thread_ct) {
8571 max_thread_ct = idx1_block_size;
8572 g_epi_thread_ct = max_thread_ct;
8573 }
8574 }
8575 g_epi_marker_idx1 = marker_idx1;
8576 dptr = g_epi_all_chisq;
8577 dptr2 = &(g_epi_all_chisq[idx1_block_size * marker_ct2 * 2]);
8578 do {
8579 *dptr = -1;
8580 dptr = &(dptr[2]);
8581 } while (dptr < dptr2);
8582 marker_uidx_tmp = marker_uidx;
8583 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
8584 goto epistasis_linear_regression_ret_READ_FAIL;
8585 }
8586 cur_workload = idx1_block_size * marker_ct2;
8587 if (is_triangular) {
8588 for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
8589 ulii = block_idx1 + marker_idx1 + 1;
8590 cur_workload -= ulii;
8591 g_epi_geno1_offsets[2 * block_idx1 + 1] = ulii;
8592 }
8593 } else {
8594 fill_uint_zero(2 * idx1_block_size, g_epi_geno1_offsets);
8595 marker_uidx2 = marker_uidx_base;
8596 marker_idx2 = 0;
8597 }
8598 tests_complete += cur_workload;
8599 ulii = 0; // total number of tests
8600 g_epi_idx1_block_bounds[0] = 0;
8601 g_epi_idx1_block_bounds16[0] = 0;
8602 block_idx1 = 0;
8603 for (tidx = 1; tidx < max_thread_ct; tidx++) {
8604 uljj = (((uint64_t)cur_workload) * tidx) / max_thread_ct;
8605 if (is_triangular) {
8606 do {
8607 ulii += marker_ct2 - g_epi_geno1_offsets[2 * block_idx1 + 1];
8608 block_idx1++;
8609 } while (ulii < uljj);
8610 } else {
8611 do {
8612 ulii += marker_ct2;
8613 block_idx1++;
8614 } while (ulii < uljj);
8615 }
8616 uii = block_idx1 - g_epi_idx1_block_bounds[tidx - 1];
8617 g_epi_idx1_block_bounds[tidx] = block_idx1;
8618 g_epi_idx1_block_bounds16[tidx] = g_epi_idx1_block_bounds16[tidx - 1] + round_up_pow2_ui(uii, 16);
8619 }
8620 g_epi_idx1_block_bounds[max_thread_ct] = idx1_block_size;
8621 chrom_end = 0;
8622 for (block_idx1 = 0; block_idx1 < idx1_block_size; marker_uidx_tmp++, block_idx1++) {
8623 if (IS_SET(marker_exclude1, marker_uidx_tmp)) {
8624 marker_uidx_tmp = next_unset_ul_unsafe(marker_exclude1, marker_uidx_tmp);
8625 if (fseeko(bedfile, bed_offset + (marker_uidx_tmp * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
8626 goto epistasis_linear_regression_ret_READ_FAIL;
8627 }
8628 }
8629 if (load_and_collapse_incl(unfiltered_sample_ct, pheno_nm_ct, pheno_nm, final_mask, IS_SET(marker_reverse, marker_uidx_tmp), bedfile, loadbuf_raw, loadbuf)) {
8630 goto epistasis_linear_regression_ret_READ_FAIL;
8631 }
8632 rotate_loadbuf_and_compute_phenogeno(loadbuf, pheno_d2, pheno_nm_ct, &(g_epi_geno1[block_idx1 * pheno_nm_ctl2]), &(g_epi_phenogeno1[block_idx1]), &(g_epi_genosums1[block_idx1 * 2]));
8633 if (!is_triangular) {
8634 if (!IS_SET(marker_exclude2, marker_uidx_tmp)) {
8635 // do not compare against self
8636 marker_idx2 += marker_uidx_tmp - marker_uidx2 - popcount_bit_idx(marker_exclude2, marker_uidx2, marker_uidx_tmp);
8637 marker_uidx2 = marker_uidx_tmp;
8638 g_epi_geno1_offsets[2 * block_idx1] = marker_idx2;
8639 g_epi_geno1_offsets[2 * block_idx1 + 1] = marker_idx2 + 1;
8640 gap_cts[block_idx1 + marker_idx1] = 1;
8641 }
8642 }
8643 }
8644 marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
8645 if (is_triangular) {
8646 marker_idx2 = marker_idx1 + 1;
8647 marker_uidx2 = jump_forward_unset_unsafe(marker_exclude2, marker_uidx2 + 1, marker_idx2);
8648 } else {
8649 marker_idx2 = 0;
8650 }
8651 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
8652 goto epistasis_linear_regression_ret_READ_FAIL;
8653 }
8654 cur_idx2_block_size = idx2_block_size;
8655 do {
8656 if (cur_idx2_block_size > marker_ct2 - marker_idx2) {
8657 cur_idx2_block_size = marker_ct2 - marker_idx2;
8658 }
8659 for (block_idx2 = 0; block_idx2 < cur_idx2_block_size; marker_uidx2++, block_idx2++) {
8660 if (IS_SET(marker_exclude2, marker_uidx2)) {
8661 marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx2);
8662 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
8663 goto epistasis_linear_regression_ret_READ_FAIL;
8664 }
8665 }
8666 if (load_and_collapse_incl(unfiltered_sample_ct, pheno_nm_ct, pheno_nm, final_mask, IS_SET(marker_reverse, marker_uidx2), bedfile, loadbuf_raw, loadbuf)) {
8667 goto epistasis_linear_regression_ret_READ_FAIL;
8668 }
8669 rotate_loadbuf_and_compute_phenogeno(loadbuf, pheno_d2, pheno_nm_ct, &(g_epi_geno2[block_idx2 * pheno_nm_ctl2]), &(g_epi_phenogeno2[block_idx2]), &(g_epi_genosums2[block_idx2 * 2]));
8670 }
8671 g_epi_idx2_block_size = cur_idx2_block_size;
8672 g_epi_idx2_block_start = marker_idx2;
8673 idx2_block_sizea16 = round_up_pow2(cur_idx2_block_size, 16);
8674 fill_uint_zero(idx1_block_size + 15 * (max_thread_ct - 1), g_epi_n_sig_ct1);
8675 fill_uint_zero(idx1_block_size + 15 * (max_thread_ct - 1), g_epi_fail_ct1);
8676 fill_uint_zero(idx2_block_sizea16 * max_thread_ct, g_epi_n_sig_ct2);
8677 fill_uint_zero(idx2_block_sizea16 * max_thread_ct, g_epi_fail_ct2);
8678 for (tidx = 0; tidx < max_thread_ct; tidx++) {
8679 ulii = g_epi_idx1_block_bounds[tidx];
8680 uljj = g_epi_idx1_block_bounds[tidx + 1] - ulii;
8681 dptr = &(g_epi_best_chisq1[g_epi_idx1_block_bounds[tidx]]);
8682 dptr2 = &(g_epi_all_chisq[(marker_idx1 + ulii) * 2]);
8683 for (ulkk = 0; ulkk < uljj; ulkk++) {
8684 *dptr++ = dptr2[ulkk * 2];
8685 }
8686 ulii = g_epi_geno1_offsets[2 * ulii + 1];
8687 if (ulii < marker_idx2 + cur_idx2_block_size) {
8688 if (ulii <= marker_idx2) {
8689 ulii = 0;
8690 } else {
8691 ulii -= marker_idx2;
8692 }
8693 uljj = cur_idx2_block_size - ulii;
8694 dptr = &(g_epi_best_chisq2[tidx * idx2_block_sizea16 + ulii]);
8695 dptr2 = &(g_epi_all_chisq[(marker_idx2 + ulii) * 2]);
8696 for (ulkk = 0; ulkk < uljj; ulkk++) {
8697 *dptr++ = dptr2[ulkk * 2];
8698 }
8699 }
8700 }
8701 is_last_block = (marker_idx2 + cur_idx2_block_size >= marker_ct2);
8702 if (spawn_threads2(threads, &epi_linear_thread, max_thread_ct, is_last_block)) {
8703 goto epistasis_linear_regression_ret_THREAD_CREATE_FAIL;
8704 }
8705 epi_linear_thread((void*)0);
8706 join_threads2(threads, max_thread_ct, is_last_block);
8707 // merge best_chisq, best_ids, fail_cts
8708 for (tidx = 0; tidx < max_thread_ct; tidx++) {
8709 ulii = g_epi_idx1_block_bounds[tidx];
8710 uljj = g_epi_idx1_block_bounds[tidx + 1] - ulii;
8711 uii = g_epi_idx1_block_bounds16[tidx];
8712 dptr = &(g_epi_best_chisq1[uii]);
8713 uiptr = &(g_epi_best_id1[uii]);
8714 uiptr2 = &(g_epi_n_sig_ct1[uii]);
8715 uiptr3 = &(g_epi_fail_ct1[uii]);
8716 ulii += marker_idx1;
8717 dptr2 = &(best_chisq[ulii]);
8718 uiptr4 = &(n_sig_cts[ulii]);
8719 uiptr5 = &(fail_cts[ulii]);
8720 for (block_idx1 = 0; block_idx1 < uljj; block_idx1++, dptr2++, uiptr4++, uiptr5++) {
8721 dxx = *dptr++;
8722 if (dxx > (*dptr2)) {
8723 *dptr2 = dxx;
8724 best_ids[block_idx1 + ulii] = uiptr[block_idx1];
8725 }
8726 *uiptr4 += *uiptr2++;
8727 *uiptr5 += *uiptr3++;
8728 }
8729 }
8730 if (is_triangular) {
8731 for (tidx = 0; tidx < max_thread_ct; tidx++) {
8732 block_idx2 = g_epi_geno1_offsets[2 * g_epi_idx1_block_bounds[tidx] + 1];
8733 if (block_idx2 <= marker_idx2) {
8734 block_idx2 = 0;
8735 } else {
8736 block_idx2 -= marker_idx2;
8737 }
8738 dptr = &(g_epi_best_chisq2[tidx * idx2_block_sizea16 + block_idx2]);
8739 uiptr = &(g_epi_best_id2[tidx * idx2_block_sizea16]);
8740 uiptr2 = &(g_epi_n_sig_ct2[tidx * idx2_block_sizea16 + block_idx2]);
8741 uiptr3 = &(g_epi_fail_ct2[tidx * idx2_block_sizea16 + block_idx2]);
8742 dptr2 = &(best_chisq[block_idx2 + marker_idx2]);
8743 uiptr4 = &(n_sig_cts[block_idx2 + marker_idx2]);
8744 uiptr5 = &(fail_cts[block_idx2 + marker_idx2]);
8745 for (; block_idx2 < cur_idx2_block_size; block_idx2++, dptr2++, uiptr4++, uiptr5++) {
8746 dxx = *dptr++;
8747 if (dxx > (*dptr2)) {
8748 *dptr2 = dxx;
8749 best_ids[block_idx2 + marker_idx2] = uiptr[block_idx2];
8750 }
8751 *uiptr4 += *uiptr2++;
8752 *uiptr5 += *uiptr3++;
8753 }
8754 }
8755 }
8756 marker_idx2 += cur_idx2_block_size;
8757 } while (marker_idx2 < marker_ct2);
8758 fputs("\b\b\b\b\b\b\b\b\b\b\bwriting] \b\b\b", stdout);
8759 fflush(stdout);
8760 chrom_end = 0;
8761 block_idx1 = 0;
8762 while (1) {
8763 next_unset_ul_unsafe_ck(marker_exclude1, &marker_uidx);
8764 ujj = g_epi_geno1_offsets[2 * block_idx1];
8765 marker_idx2 = 0;
8766 dptr = &(g_epi_all_chisq[block_idx1 * 2 * marker_ct2]);
8767 if (marker_uidx >= chrom_end) {
8768 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx);
8769 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
8770 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
8771 }
8772 wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, chrom_idx, g_textbuf));
8773 *wptr_start++ = ' ';
8774 wptr_start = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx * max_marker_id_len]), wptr_start);
8775 *wptr_start++ = ' ';
8776 marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
8777 for (chrom_fo_idx2 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2); chrom_fo_idx2 < chrom_ct; chrom_fo_idx2++) {
8778 chrom_idx2 = chrom_info_ptr->chrom_file_order[chrom_fo_idx2];
8779 chrom_end2 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx2 + 1];
8780 wptr_start2 = width_force(4, wptr_start, chrom_name_write(chrom_info_ptr, chrom_idx2, wptr_start));
8781 *wptr_start2++ = ' ';
8782 for (; marker_uidx2 < chrom_end2; ++marker_uidx2, next_unset_ul_ck(marker_exclude2, unfiltered_marker_ct, &marker_uidx2), ++marker_idx2, dptr = &(dptr[2])) {
8783 if (marker_idx2 == ujj) {
8784 marker_idx2 = g_epi_geno1_offsets[2 * block_idx1 + 1];
8785 if (marker_idx2 == marker_ct2) {
8786 goto epistasis_linear_regression_write_loop;
8787 }
8788 if (marker_idx2 > ujj) {
8789 marker_uidx2 = jump_forward_unset_unsafe(marker_exclude2, marker_uidx2 + 1, marker_idx2 - ujj);
8790 dptr = &(dptr[2 * (marker_idx2 - ujj)]);
8791 if (marker_uidx2 >= chrom_end2) {
8792 break;
8793 }
8794 }
8795 } else if (marker_idx2 == marker_ct2) {
8796 goto epistasis_linear_regression_write_loop;
8797 }
8798 dxx = *dptr;
8799 if (dxx != -1) {
8800 wptr = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx2 * max_marker_id_len]), wptr_start2);
8801 *wptr++ = ' ';
8802 // beta
8803 wptr = width_force(12, wptr, dtoa_g(dptr[1], wptr));
8804 *wptr++ = ' ';
8805 wptr = width_force(12, wptr, dtoa_g(dxx, wptr));
8806 *wptr++ = ' ';
8807 dxx = normdist(-sqrt(dxx)) * 2;
8808 wptr = dtoa_g_wxp4x(MAXV(dxx, output_min_p), 12, ' ', wptr);
8809 *wptr++ = '\n';
8810 if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
8811 goto epistasis_linear_regression_ret_WRITE_FAIL;
8812 }
8813 // could remove this writeback in --epi1 1 case
8814 *dptr = -1;
8815 }
8816 }
8817 }
8818 epistasis_linear_regression_write_loop:
8819 block_idx1++;
8820 marker_uidx++;
8821 if (block_idx1 >= idx1_block_size) {
8822 break;
8823 }
8824 }
8825 marker_idx1 += idx1_block_size;
8826 fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
8827 if (tests_complete >= pct_thresh) {
8828 if (pct > 10) {
8829 putc_unlocked('\b', stdout);
8830 }
8831 pct = (tests_complete * 100LLU) / tests_expected;
8832 if (pct < 100) {
8833 printf("\b\b%" PRIuPTR "%%", pct);
8834 fflush(stdout);
8835 pct_thresh = ((++pct) * ((uint64_t)tests_expected)) / 100;
8836 }
8837 }
8838 } while (marker_idx1 < marker_idx1_end);
8839 if (fclose_null(&outfile)) {
8840 goto epistasis_linear_regression_ret_WRITE_FAIL;
8841 }
8842 while (0) {
8843 epistasis_linear_regression_ret_NOMEM:
8844 retval = RET_NOMEM;
8845 break;
8846 epistasis_linear_regression_ret_OPEN_FAIL:
8847 retval = RET_OPEN_FAIL;
8848 break;
8849 epistasis_linear_regression_ret_READ_FAIL:
8850 retval = RET_READ_FAIL;
8851 break;
8852 epistasis_linear_regression_ret_WRITE_FAIL:
8853 retval = RET_WRITE_FAIL;
8854 break;
8855 epistasis_linear_regression_ret_INVALID_CMDLINE:
8856 retval = RET_INVALID_CMDLINE;
8857 break;
8858 epistasis_linear_regression_ret_THREAD_CREATE_FAIL:
8859 retval = RET_THREAD_CREATE_FAIL;
8860 break;
8861 }
8862 fclose_cond(outfile);
8863 // caller will free memory
8864 return retval;
8865 }
8866
epistasis_logistic_regression(pthread_t * threads,Epi_info * epi_ip,FILE * bedfile,uintptr_t bed_offset,uintptr_t unfiltered_marker_ct,uintptr_t * marker_reverse,char * marker_ids,uintptr_t max_marker_id_len,uint32_t plink_maxsnp,Chrom_info * chrom_info_ptr,uintptr_t marker_uidx_base,uintptr_t marker_ct1,uintptr_t * marker_exclude1,uintptr_t marker_idx1_start,uintptr_t marker_idx1_end,uintptr_t marker_ct2,uintptr_t * marker_exclude2,uint32_t is_triangular,uintptr_t job_size,uint64_t tests_expected,uintptr_t unfiltered_sample_ct,uintptr_t * pheno_nm,uint32_t pheno_nm_ct,uintptr_t * pheno_c,uint32_t parallel_idx,uint32_t parallel_tot,char * outname,char * outname_end,double output_min_p,uintptr_t * loadbuf_raw,uintptr_t * loadbuf,double * best_chisq,uint32_t * best_ids,uint32_t * n_sig_cts,uint32_t * fail_cts,uint32_t * gap_cts)8867 int32_t epistasis_logistic_regression(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, uintptr_t marker_uidx_base, uintptr_t marker_ct1, uintptr_t* marker_exclude1, uintptr_t marker_idx1_start, uintptr_t marker_idx1_end, uintptr_t marker_ct2, uintptr_t* marker_exclude2, uint32_t is_triangular, uintptr_t job_size, uint64_t tests_expected, uintptr_t unfiltered_sample_ct, uintptr_t* pheno_nm, uint32_t pheno_nm_ct, uintptr_t* pheno_c, uint32_t parallel_idx, uint32_t parallel_tot, char* outname, char* outname_end, double output_min_p, uintptr_t* loadbuf_raw, uintptr_t* loadbuf, double* best_chisq, uint32_t* best_ids, uint32_t* n_sig_cts, uint32_t* fail_cts, uint32_t* gap_cts) {
8868 FILE* outfile = nullptr;
8869 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
8870 uintptr_t pheno_nm_cta4 = round_up_pow2(pheno_nm_ct, 4);
8871 uintptr_t pheno_nm_ctl2 = QUATERCT_TO_WORDCT(pheno_nm_ct);
8872 uintptr_t final_mask = get_final_mask(pheno_nm_ct);
8873 uintptr_t marker_uidx = marker_uidx_base;
8874 uintptr_t pct = 1;
8875 uintptr_t marker_uidx2 = 0;
8876 uintptr_t marker_idx1 = marker_idx1_start;
8877 uintptr_t marker_idx2 = 0;
8878 uint64_t pct_thresh = tests_expected / 100;
8879 uint64_t tests_complete = 0;
8880 uint32_t max_thread_ct = g_epi_thread_ct;
8881 uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
8882 uint32_t chrom_end = 0;
8883 uint32_t chrom_idx = 0;
8884 uint32_t chrom_idx2 = 0;
8885 int32_t retval = 0;
8886 unsigned char* bigstack_mark2;
8887 uintptr_t* ulptr;
8888 char* wptr_start;
8889 char* wptr_start2;
8890 char* wptr;
8891 float* fptr;
8892 float* fptr2;
8893 double* dptr;
8894 uint32_t* uiptr;
8895 uint32_t* uiptr2;
8896 uint32_t* uiptr3;
8897 uint32_t* uiptr4;
8898 uint32_t* uiptr5;
8899 uintptr_t cur_bigstack_left;
8900 uintptr_t cur_workload;
8901 uintptr_t idx1_block_size;
8902 uintptr_t idx2_block_size;
8903 uintptr_t idx2_block_sizea16;
8904 uintptr_t marker_uidx_tmp;
8905 uintptr_t block_idx1;
8906 uintptr_t block_idx2;
8907 uintptr_t cur_idx2_block_size;
8908 uintptr_t chrom_end2;
8909 uintptr_t tidx;
8910 uintptr_t ulii;
8911 uintptr_t uljj;
8912 uintptr_t ulkk;
8913 double dxx;
8914 float fxx;
8915 uint32_t chrom_fo_idx;
8916 uint32_t chrom_fo_idx2;
8917 uint32_t is_last_block;
8918 uint32_t uii;
8919 uint32_t ujj;
8920 if (bigstack_alloc_ul(pheno_nm_ctl2, &g_epi_pheno_c)) {
8921 goto epistasis_logistic_regression_ret_NOMEM;
8922 }
8923 copy_bitarr_subset(pheno_c, pheno_nm, unfiltered_sample_ct, pheno_nm_ct, g_epi_pheno_c);
8924 g_epi_pheno_nm_ct = pheno_nm_ct;
8925 // per-thread buffers
8926 g_epi_logistic_mt = (Epi_logistic_multithread*)bigstack_alloc(max_thread_ct * sizeof(Epi_logistic_multithread));
8927 if (!g_epi_logistic_mt) {
8928 goto epistasis_logistic_regression_ret_NOMEM;
8929 }
8930 // param_ct_max = 4 (intercept, A, B, AB)
8931 for (tidx = 0; tidx < max_thread_ct; tidx++) {
8932 if (bigstack_alloc_f(pheno_nm_cta4 * 4, &(g_epi_logistic_mt[tidx].cur_covars_cov_major)) ||
8933 bigstack_alloc_f(4, &(g_epi_logistic_mt[tidx].coef)) ||
8934 bigstack_alloc_f(pheno_nm_cta4, &(g_epi_logistic_mt[tidx].pp)) ||
8935 bigstack_alloc_f(pheno_nm_ct, &(g_epi_logistic_mt[tidx].sample_1d_buf)) ||
8936 bigstack_alloc_f(pheno_nm_ct, &(g_epi_logistic_mt[tidx].pheno_buf)) ||
8937 bigstack_alloc_f(pheno_nm_ct * 4, &(g_epi_logistic_mt[tidx].param_1d_buf)) ||
8938 bigstack_alloc_f(pheno_nm_ct, &(g_epi_logistic_mt[tidx].param_1d_buf2)) ||
8939 bigstack_alloc_f(4 * 4, &(g_epi_logistic_mt[tidx].param_2d_buf)) ||
8940 bigstack_alloc_f(4 * 4, &(g_epi_logistic_mt[tidx].param_2d_buf2))) {
8941 goto epistasis_logistic_regression_ret_NOMEM;
8942 }
8943 }
8944
8945 // claim up to half of memory with idx1 bufs; each marker currently costs:
8946 // pheno_nm_ctl2 * sizeof(intptr_t) for geno buf
8947 // 4 * sizeof(int32_t) + sizeof(float) + marker_ct2 * 2 * sizeof(float)
8948 // for other stuff (see epistasis_report() comment, starting from
8949 // "offset"; main result buffer must be double-size to store both beta
8950 // and chi-square stat)
8951 cur_bigstack_left = bigstack_left();
8952 ulii = 4 * CACHELINE - 3 * sizeof(int32_t) + max_thread_ct * (5 * (CACHELINE - 4));
8953 if (cur_bigstack_left >= ulii) {
8954 cur_bigstack_left -= ulii;
8955 }
8956 ulii = pheno_nm_ctl2 * sizeof(intptr_t) + 4 * sizeof(int32_t) + sizeof(float) + marker_ct2 * 2 * sizeof(float);
8957 idx1_block_size = cur_bigstack_left / (ulii * 2 + 1);
8958 if (!idx1_block_size) {
8959 goto epistasis_logistic_regression_ret_NOMEM;
8960 }
8961 if (idx1_block_size > job_size) {
8962 idx1_block_size = job_size;
8963 }
8964 // pad to avoid threads writing to same cacheline
8965 ulii = (max_thread_ct - 1) * 15 + idx1_block_size;
8966 bigstack_alloc_ui(idx1_block_size * 2, &g_epi_geno1_offsets);
8967 bigstack_alloc_ul(pheno_nm_ctl2 * idx1_block_size, &g_epi_geno1);
8968 bigstack_alloc_f(idx1_block_size * marker_ct2 * 2, &g_epi_all_chisq_f);
8969 bigstack_alloc_f(ulii, &g_epi_best_chisq_f1);
8970 bigstack_alloc_ui(ulii, &g_epi_best_id1);
8971 bigstack_alloc_ui(ulii, &g_epi_n_sig_ct1);
8972 bigstack_alloc_ui(ulii, &g_epi_fail_ct1);
8973 for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
8974 g_epi_geno1[block_idx1 * pheno_nm_ctl2 + pheno_nm_ctl2 - 1] = 0;
8975 }
8976 if (is_triangular) {
8977 fill_uint_zero(2 * idx1_block_size, g_epi_geno1_offsets);
8978 }
8979
8980 ulii = pheno_nm_ctl2 * sizeof(intptr_t) + max_thread_ct * (3 * sizeof(int32_t) + sizeof(double));
8981 idx2_block_size = (bigstack_left() - (CACHELINE - sizeof(intptr_t)) - max_thread_ct * (3 * (CACHELINE - sizeof(int32_t)) + (CACHELINE - sizeof(float)))) / ulii;
8982 if (idx2_block_size > marker_ct2) {
8983 idx2_block_size = marker_ct2;
8984 }
8985 idx2_block_size = round_up_pow2(idx2_block_size, 16);
8986
8987 memcpy(outname_end, ".epi.cc", 8);
8988 if (parallel_tot > 1) {
8989 outname_end[7] = '.';
8990 uint32toa_x(parallel_idx + 1, '\0', &(outname_end[8]));
8991 }
8992 if (fopen_checked(outname, "w", &outfile)) {
8993 goto epistasis_logistic_regression_ret_OPEN_FAIL;
8994 }
8995 if (!parallel_idx) {
8996 wptr = memcpya(g_textbuf, "CHR1 ", 5);
8997 wptr = fw_strcpyn(plink_maxsnp, 4, "SNP1", wptr);
8998 wptr = memcpya(wptr, " CHR2 ", 6);
8999 wptr = fw_strcpyn(plink_maxsnp, 4, "SNP2", wptr);
9000 wptr = memcpya(wptr, " OR_INT STAT P \n", 41);
9001 if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
9002 goto epistasis_logistic_regression_ret_WRITE_FAIL;
9003 }
9004 }
9005
9006 bigstack_mark2 = g_bigstack_base;
9007 while (1) {
9008 if (!idx2_block_size) {
9009 goto epistasis_logistic_regression_ret_NOMEM;
9010 }
9011 if (!(bigstack_alloc_ul(pheno_nm_ctl2 * idx2_block_size, &g_epi_geno2) ||
9012 bigstack_alloc_f(max_thread_ct * idx2_block_size, &g_epi_best_chisq_f2) ||
9013 bigstack_alloc_ui(max_thread_ct * idx2_block_size, &g_epi_best_id2) ||
9014 bigstack_alloc_ui(max_thread_ct * idx2_block_size, &g_epi_n_sig_ct2) ||
9015 bigstack_alloc_ui(max_thread_ct * idx2_block_size, &g_epi_fail_ct2))) {
9016 break;
9017 }
9018 bigstack_reset(bigstack_mark2);
9019 idx2_block_size -= 16;
9020 }
9021 for (block_idx2 = 0; block_idx2 < idx2_block_size; block_idx2++) {
9022 g_epi_geno2[block_idx2 * pheno_nm_ctl2 + pheno_nm_ctl2 - 1] = 0;
9023 }
9024 marker_uidx = next_unset_ul_unsafe(marker_exclude1, marker_uidx_base);
9025 if (marker_idx1) {
9026 marker_uidx = jump_forward_unset_unsafe(marker_exclude1, marker_uidx + 1, marker_idx1);
9027 }
9028 wptr = memcpya(g_logbuf, "C/C --epistasis to ", 19);
9029 wptr = strcpya(wptr, outname);
9030 memcpy(wptr, " ... ", 6);
9031 wordwrapb(16); // strlen("99% [processing]")
9032 logprintb();
9033 fputs("0%", stdout);
9034 do {
9035 fputs(" [processing]", stdout);
9036 fflush(stdout);
9037 if (idx1_block_size > marker_idx1_end - marker_idx1) {
9038 idx1_block_size = marker_idx1_end - marker_idx1;
9039 if (idx1_block_size < max_thread_ct) {
9040 max_thread_ct = idx1_block_size;
9041 g_epi_thread_ct = max_thread_ct;
9042 }
9043 }
9044 g_epi_marker_idx1 = marker_idx1;
9045 fptr = g_epi_all_chisq_f;
9046 fptr2 = &(g_epi_all_chisq_f[idx1_block_size * marker_ct2 * 2]);
9047 do {
9048 *fptr = -1;
9049 fptr = &(fptr[2]);
9050 } while (fptr < fptr2);
9051 marker_uidx_tmp = marker_uidx;
9052 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
9053 goto epistasis_logistic_regression_ret_READ_FAIL;
9054 }
9055 cur_workload = idx1_block_size * marker_ct2;
9056 if (is_triangular) {
9057 for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
9058 ulii = block_idx1 + marker_idx1 + 1;
9059 cur_workload -= ulii;
9060 g_epi_geno1_offsets[2 * block_idx1 + 1] = ulii;
9061 }
9062 } else {
9063 fill_uint_zero(2 * idx1_block_size, g_epi_geno1_offsets);
9064 marker_uidx2 = marker_uidx_base;
9065 marker_idx2 = 0;
9066 }
9067 tests_complete += cur_workload;
9068 ulii = 0; // total number of tests
9069 g_epi_idx1_block_bounds[0] = 0;
9070 g_epi_idx1_block_bounds16[0] = 0;
9071 block_idx1 = 0;
9072 for (tidx = 1; tidx < max_thread_ct; tidx++) {
9073 uljj = (((uint64_t)cur_workload) * tidx) / max_thread_ct;
9074 if (is_triangular) {
9075 do {
9076 ulii += marker_ct2 - g_epi_geno1_offsets[2 * block_idx1 + 1];
9077 block_idx1++;
9078 } while (ulii < uljj);
9079 } else {
9080 do {
9081 ulii += marker_ct2;
9082 block_idx1++;
9083 } while (ulii < uljj);
9084 }
9085 uii = block_idx1 - g_epi_idx1_block_bounds[tidx - 1];
9086 g_epi_idx1_block_bounds[tidx] = block_idx1;
9087 g_epi_idx1_block_bounds16[tidx] = g_epi_idx1_block_bounds16[tidx - 1] + round_up_pow2_ui(uii, 16);
9088 }
9089 g_epi_idx1_block_bounds[max_thread_ct] = idx1_block_size;
9090 chrom_end = 0;
9091 for (block_idx1 = 0; block_idx1 < idx1_block_size; marker_uidx_tmp++, block_idx1++) {
9092 if (IS_SET(marker_exclude1, marker_uidx_tmp)) {
9093 marker_uidx_tmp = next_unset_ul_unsafe(marker_exclude1, marker_uidx_tmp);
9094 if (fseeko(bedfile, bed_offset + (marker_uidx_tmp * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
9095 goto epistasis_logistic_regression_ret_READ_FAIL;
9096 }
9097 }
9098 // marker_reverse deliberately flipped
9099 if (load_and_collapse_incl(unfiltered_sample_ct, pheno_nm_ct, pheno_nm, final_mask, !IS_SET(marker_reverse, marker_uidx_tmp), bedfile, loadbuf_raw, loadbuf)) {
9100 goto epistasis_logistic_regression_ret_READ_FAIL;
9101 }
9102 // rotate to hom A1 = 10, het = 01, hom A2 = 00, missing = 11, to allow
9103 // inner loop to use ordinary multiplication
9104 // this is a bit redundant with the forced reverse, but it's not a
9105 // bottleneck
9106 rotate_plink1_to_a2ct_and_copy(loadbuf, &(g_epi_geno1[block_idx1 * pheno_nm_ctl2]), pheno_nm_ctl2);
9107 if (!is_triangular) {
9108 if (!IS_SET(marker_exclude2, marker_uidx_tmp)) {
9109 // do not compare against self
9110 marker_idx2 += marker_uidx_tmp - marker_uidx2 - popcount_bit_idx(marker_exclude2, marker_uidx2, marker_uidx_tmp);
9111 marker_uidx2 = marker_uidx_tmp;
9112 g_epi_geno1_offsets[2 * block_idx1] = marker_idx2;
9113 g_epi_geno1_offsets[2 * block_idx1 + 1] = marker_idx2 + 1;
9114 gap_cts[block_idx1 + marker_idx1] = 1;
9115 }
9116 }
9117 }
9118 marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
9119 if (is_triangular) {
9120 marker_idx2 = marker_idx1 + 1;
9121 marker_uidx2 = jump_forward_unset_unsafe(marker_exclude2, marker_uidx2 + 1, marker_idx2);
9122 } else {
9123 marker_idx2 = 0;
9124 }
9125 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
9126 goto epistasis_logistic_regression_ret_READ_FAIL;
9127 }
9128 cur_idx2_block_size = idx2_block_size;
9129 do {
9130 if (cur_idx2_block_size > marker_ct2 - marker_idx2) {
9131 cur_idx2_block_size = marker_ct2 - marker_idx2;
9132 }
9133 for (block_idx2 = 0; block_idx2 < cur_idx2_block_size; marker_uidx2++, block_idx2++) {
9134 if (IS_SET(marker_exclude2, marker_uidx2)) {
9135 marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx2);
9136 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
9137 goto epistasis_logistic_regression_ret_READ_FAIL;
9138 }
9139 }
9140 ulptr = &(g_epi_geno2[block_idx2 * pheno_nm_ctl2]);
9141 // marker_reverse deliberately flipped
9142 if (load_and_collapse_incl(unfiltered_sample_ct, pheno_nm_ct, pheno_nm, final_mask, !IS_SET(marker_reverse, marker_uidx2), bedfile, loadbuf_raw, loadbuf)) {
9143 goto epistasis_logistic_regression_ret_READ_FAIL;
9144 }
9145 rotate_plink1_to_a2ct_and_copy(loadbuf, ulptr, pheno_nm_ctl2);
9146 }
9147 g_epi_idx2_block_size = cur_idx2_block_size;
9148 g_epi_idx2_block_start = marker_idx2;
9149 idx2_block_sizea16 = round_up_pow2(cur_idx2_block_size, 16);
9150 fill_uint_zero(idx1_block_size + 15 * (max_thread_ct - 1), g_epi_n_sig_ct1);
9151 fill_uint_zero(idx1_block_size + 15 * (max_thread_ct - 1), g_epi_fail_ct1);
9152 fill_uint_zero(idx2_block_sizea16 * max_thread_ct, g_epi_n_sig_ct2);
9153 fill_uint_zero(idx2_block_sizea16 * max_thread_ct, g_epi_fail_ct2);
9154 for (tidx = 0; tidx < max_thread_ct; tidx++) {
9155 ulii = g_epi_idx1_block_bounds[tidx];
9156 uljj = g_epi_idx1_block_bounds[tidx + 1] - ulii;
9157 fptr = &(g_epi_best_chisq_f1[g_epi_idx1_block_bounds[tidx]]);
9158 fptr2 = &(g_epi_all_chisq_f[(marker_idx1 + ulii) * 2]);
9159 for (ulkk = 0; ulkk < uljj; ulkk++) {
9160 *fptr++ = fptr2[ulkk * 2];
9161 }
9162 ulii = g_epi_geno1_offsets[2 * ulii + 1];
9163 if (ulii < marker_idx2 + cur_idx2_block_size) {
9164 if (ulii <= marker_idx2) {
9165 ulii = 0;
9166 } else {
9167 ulii -= marker_idx2;
9168 }
9169 uljj = cur_idx2_block_size - ulii;
9170 fptr = &(g_epi_best_chisq_f2[tidx * idx2_block_sizea16 + ulii]);
9171 fptr2 = &(g_epi_all_chisq_f[(marker_idx2 + ulii) * 2]);
9172 for (ulkk = 0; ulkk < uljj; ulkk++) {
9173 *fptr++ = fptr2[ulkk * 2];
9174 }
9175 }
9176 }
9177 is_last_block = (marker_idx2 + cur_idx2_block_size >= marker_ct2);
9178 if (spawn_threads2(threads, &epi_logistic_thread, max_thread_ct, is_last_block)) {
9179 goto epistasis_logistic_regression_ret_THREAD_CREATE_FAIL;
9180 }
9181 epi_logistic_thread((void*)0);
9182 join_threads2(threads, max_thread_ct, is_last_block);
9183 // merge best_chisq, best_ids, fail_cts
9184 for (tidx = 0; tidx < max_thread_ct; tidx++) {
9185 ulii = g_epi_idx1_block_bounds[tidx];
9186 uljj = g_epi_idx1_block_bounds[tidx + 1] - ulii;
9187 uii = g_epi_idx1_block_bounds16[tidx];
9188 fptr = &(g_epi_best_chisq_f1[uii]);
9189 uiptr = &(g_epi_best_id1[uii]);
9190 uiptr2 = &(g_epi_n_sig_ct1[uii]);
9191 uiptr3 = &(g_epi_fail_ct1[uii]);
9192 ulii += marker_idx1;
9193 dptr = &(best_chisq[ulii]);
9194 uiptr4 = &(n_sig_cts[ulii]);
9195 uiptr5 = &(fail_cts[ulii]);
9196 for (block_idx1 = 0; block_idx1 < uljj; block_idx1++, dptr++, uiptr4++, uiptr5++) {
9197 dxx = (double)(*fptr++);
9198 if (dxx > (*dptr)) {
9199 *dptr = dxx;
9200 best_ids[block_idx1 + ulii] = uiptr[block_idx1];
9201 }
9202 *uiptr4 += *uiptr2++;
9203 *uiptr5 += *uiptr3++;
9204 }
9205 }
9206 if (is_triangular) {
9207 for (tidx = 0; tidx < max_thread_ct; tidx++) {
9208 block_idx2 = g_epi_geno1_offsets[2 * g_epi_idx1_block_bounds[tidx] + 1];
9209 if (block_idx2 <= marker_idx2) {
9210 block_idx2 = 0;
9211 } else {
9212 block_idx2 -= marker_idx2;
9213 }
9214 fptr = &(g_epi_best_chisq_f2[tidx * idx2_block_sizea16 + block_idx2]);
9215 uiptr = &(g_epi_best_id2[tidx * idx2_block_sizea16]);
9216 uiptr2 = &(g_epi_n_sig_ct2[tidx * idx2_block_sizea16 + block_idx2]);
9217 uiptr3 = &(g_epi_fail_ct2[tidx * idx2_block_sizea16 + block_idx2]);
9218 dptr = &(best_chisq[block_idx2 + marker_idx2]);
9219 uiptr4 = &(n_sig_cts[block_idx2 + marker_idx2]);
9220 uiptr5 = &(fail_cts[block_idx2 + marker_idx2]);
9221 for (; block_idx2 < cur_idx2_block_size; block_idx2++, dptr++, uiptr4++, uiptr5++) {
9222 dxx = (double)(*fptr++);
9223 if (dxx > (*dptr)) {
9224 *dptr = dxx;
9225 best_ids[block_idx2 + marker_idx2] = uiptr[block_idx2];
9226 }
9227 *uiptr4 += *uiptr2++;
9228 *uiptr5 += *uiptr3++;
9229 }
9230 }
9231 }
9232 marker_idx2 += cur_idx2_block_size;
9233 } while (marker_idx2 < marker_ct2);
9234 fputs("\b\b\b\b\b\b\b\b\b\b\bwriting] \b\b\b", stdout);
9235 fflush(stdout);
9236 chrom_end = 0;
9237 block_idx1 = 0;
9238 while (1) {
9239 next_unset_ul_unsafe_ck(marker_exclude1, &marker_uidx);
9240 ujj = g_epi_geno1_offsets[2 * block_idx1];
9241 marker_idx2 = 0;
9242 fptr = &(g_epi_all_chisq_f[block_idx1 * 2 * marker_ct2]);
9243 if (marker_uidx >= chrom_end) {
9244 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx);
9245 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
9246 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
9247 }
9248 wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, chrom_idx, g_textbuf));
9249 *wptr_start++ = ' ';
9250 wptr_start = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx * max_marker_id_len]), wptr_start);
9251 *wptr_start++ = ' ';
9252 marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
9253 for (chrom_fo_idx2 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2); chrom_fo_idx2 < chrom_ct; chrom_fo_idx2++) {
9254 chrom_idx2 = chrom_info_ptr->chrom_file_order[chrom_fo_idx2];
9255 chrom_end2 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx2 + 1];
9256 wptr_start2 = width_force(4, wptr_start, chrom_name_write(chrom_info_ptr, chrom_idx2, wptr_start));
9257 *wptr_start2++ = ' ';
9258 for (; marker_uidx2 < chrom_end2; ++marker_uidx2, next_unset_ul_ck(marker_exclude2, unfiltered_marker_ct, &marker_uidx2), ++marker_idx2, fptr = &(fptr[2])) {
9259 if (marker_idx2 == ujj) {
9260 marker_idx2 = g_epi_geno1_offsets[2 * block_idx1 + 1];
9261 if (marker_idx2 == marker_ct2) {
9262 goto epistasis_logistic_regression_write_loop;
9263 }
9264 if (marker_idx2 > ujj) {
9265 marker_uidx2 = jump_forward_unset_unsafe(marker_exclude2, marker_uidx2 + 1, marker_idx2 - ujj);
9266 fptr = &(fptr[2 * (marker_idx2 - ujj)]);
9267 if (marker_uidx2 >= chrom_end2) {
9268 break;
9269 }
9270 }
9271 } else if (marker_idx2 == marker_ct2) {
9272 goto epistasis_logistic_regression_write_loop;
9273 }
9274 fxx = *fptr;
9275 if (fxx != -1) {
9276 dxx = (double)fxx;
9277 wptr = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx2 * max_marker_id_len]), wptr_start2);
9278 *wptr++ = ' ';
9279 // odds ratio
9280 wptr = width_force(12, wptr, dtoa_g(exp((double)fptr[1]), wptr));
9281 *wptr++ = ' ';
9282 wptr = width_force(12, wptr, ftoa_g(fxx, wptr));
9283 *wptr++ = ' ';
9284 dxx = normdist(-sqrt(dxx)) * 2;
9285 wptr = dtoa_g_wxp4x(MAXV(dxx, output_min_p), 12, ' ', wptr);
9286 *wptr++ = '\n';
9287 if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
9288 goto epistasis_logistic_regression_ret_WRITE_FAIL;
9289 }
9290 // could remove this writeback in --epi1 1 case
9291 *fptr = -1;
9292 }
9293 }
9294 }
9295 epistasis_logistic_regression_write_loop:
9296 block_idx1++;
9297 marker_uidx++;
9298 if (block_idx1 >= idx1_block_size) {
9299 break;
9300 }
9301 }
9302 marker_idx1 += idx1_block_size;
9303 fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
9304 if (tests_complete >= pct_thresh) {
9305 if (pct > 10) {
9306 putc_unlocked('\b', stdout);
9307 }
9308 pct = (tests_complete * 100LLU) / tests_expected;
9309 if (pct < 100) {
9310 printf("\b\b%" PRIuPTR "%%", pct);
9311 fflush(stdout);
9312 pct_thresh = ((++pct) * ((uint64_t)tests_expected)) / 100;
9313 }
9314 }
9315 } while (marker_idx1 < marker_idx1_end);
9316 if (fclose_null(&outfile)) {
9317 goto epistasis_logistic_regression_ret_WRITE_FAIL;
9318 }
9319 while (0) {
9320 epistasis_logistic_regression_ret_NOMEM:
9321 retval = RET_NOMEM;
9322 break;
9323 epistasis_logistic_regression_ret_OPEN_FAIL:
9324 retval = RET_OPEN_FAIL;
9325 break;
9326 epistasis_logistic_regression_ret_READ_FAIL:
9327 retval = RET_READ_FAIL;
9328 break;
9329 epistasis_logistic_regression_ret_WRITE_FAIL:
9330 retval = RET_WRITE_FAIL;
9331 break;
9332 epistasis_logistic_regression_ret_THREAD_CREATE_FAIL:
9333 retval = RET_THREAD_CREATE_FAIL;
9334 break;
9335 }
9336 fclose_cond(outfile);
9337 // caller will free memory
9338 return retval;
9339 }
9340
epistasis_report(pthread_t * threads,Epi_info * epi_ip,FILE * bedfile,uintptr_t bed_offset,uintptr_t marker_ct2,uintptr_t unfiltered_marker_ct,uintptr_t * marker_exclude,uintptr_t * marker_reverse,char * marker_ids,uintptr_t max_marker_id_len,uint32_t * marker_pos,uint32_t plink_maxsnp,Chrom_info * chrom_info_ptr,uintptr_t unfiltered_sample_ct,uintptr_t * pheno_nm,uint32_t pheno_nm_ct,uint32_t ctrl_ct,uintptr_t * pheno_c,double * pheno_d,uint32_t parallel_idx,uint32_t parallel_tot,char * outname,char * outname_end,double output_min_p,double glm_vif_thresh,Set_info * sip)9341 int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct2, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t* marker_pos, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* pheno_nm, uint32_t pheno_nm_ct, uint32_t ctrl_ct, uintptr_t* pheno_c, double* pheno_d, uint32_t parallel_idx, uint32_t parallel_tot, char* outname, char* outname_end, double output_min_p, double glm_vif_thresh, Set_info* sip) {
9342 unsigned char* bigstack_mark = g_bigstack_base;
9343 FILE* outfile = nullptr;
9344 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
9345 uintptr_t unfiltered_sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(unfiltered_sample_ct);
9346 uintptr_t final_mask = get_final_mask(pheno_nm_ct);
9347 uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
9348 uintptr_t marker_uidx_base = next_unset_unsafe(marker_exclude, 0);
9349 uintptr_t marker_uidx = marker_uidx_base;
9350 uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
9351 uint32_t modifier = epi_ip->modifier;
9352 uint32_t is_fast = modifier & EPI_FAST;
9353 uint32_t is_boost = (modifier / EPI_FAST_BOOST) & 1;
9354 uint32_t do_joint_effects = modifier & EPI_FAST_JOINT_EFFECTS;
9355 uint32_t no_ueki = modifier & EPI_FAST_NO_UEKI;
9356 uint32_t is_case_only = (modifier / EPI_FAST_CASE_ONLY) & 1;
9357 uint32_t is_triangular = 1;
9358 uint32_t is_custom_set1 = modifier & (EPI_SET_BY_SET | EPI_SET_BY_ALL)? 1 : 0;
9359 uint32_t is_set_by_set = modifier & EPI_SET_BY_SET;
9360 uint32_t tot_stride = 6 - 3 * is_case_only;
9361 uint32_t no_p_value = modifier & EPI_FAST_NO_P_VALUE;
9362 uint32_t case_only_gap = epi_ip->case_only_gap;
9363 uint32_t is_case_only_window = (is_case_only && case_only_gap);
9364 uint32_t case_ct = pheno_nm_ct - ctrl_ct;
9365 uint32_t cellminx3 = 0;
9366 uintptr_t case_ctl2 = QUATERCT_TO_WORDCT(case_ct);
9367 uintptr_t case_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(case_ct);
9368 uintptr_t ctrl_ctl2 = QUATERCT_TO_WORDCT(ctrl_ct);
9369 uintptr_t case_ctv3 = BITCT_TO_ALIGNED_WORDCT(case_ct);
9370 uintptr_t ctrl_ctv3 = BITCT_TO_ALIGNED_WORDCT(ctrl_ct);
9371 uintptr_t case_ctsplit = 3 * case_ctv3;
9372 uintptr_t ctrl_ctsplit = 3 * ctrl_ctv3;
9373 uintptr_t pct = 1;
9374 uintptr_t marker_uidx2 = 0;
9375 uintptr_t marker_uidx2_trail = 0;
9376 uintptr_t marker_idx2 = 0;
9377 uintptr_t marker_idx2_trail = 0;
9378 uint64_t tests_thrown_out = 0;
9379 uint64_t tests_complete = 0;
9380 uint32_t max_thread_ct = g_thread_ct;
9381 uint32_t chrom_idx = 0;
9382 uint32_t chrom_end = 0;
9383 uint32_t last_pos = 0;
9384 uint32_t first_pos = 0;
9385 uint32_t uii = 0;
9386 int32_t retval = 0;
9387 uint32_t* gap_cts = nullptr;
9388 uintptr_t* ctrlbuf = nullptr;
9389 uintptr_t* marker_exclude1 = nullptr;
9390 uintptr_t* ulptr = nullptr;
9391 uintptr_t ularr[sizeof(double) / BYTECT];
9392 uintptr_t* casebuf;
9393 uintptr_t* loadbuf;
9394 uintptr_t* marker_exclude2;
9395 double* best_chisq;
9396 uint32_t* best_ids;
9397 uint32_t* n_sig_cts;
9398 uint32_t* fail_cts;
9399 uint32_t* marker_idx_to_uidx;
9400 unsigned char* bigstack_mark2;
9401 unsigned char* bigstack_mark3;
9402 char* wptr_start;
9403 char* wptr_start2;
9404 char* wptr;
9405 double* dptr;
9406 double* dptr2;
9407 uint32_t* uiptr;
9408 uint32_t* uiptr2;
9409 uint32_t* uiptr3;
9410 uint32_t* uiptr4;
9411 uint32_t* uiptr5;
9412 uint64_t tests_expected;
9413 uint64_t pct_thresh;
9414 double dxx;
9415 uintptr_t marker_ct1;
9416 uintptr_t tot_ctsplit;
9417 uintptr_t job_size;
9418 uintptr_t cur_bigstack_left;
9419 uintptr_t cur_workload;
9420 uintptr_t marker_idx1_start;
9421 uintptr_t marker_idx1;
9422 uintptr_t marker_idx1_end;
9423 uintptr_t idx1_block_size;
9424 uintptr_t idx2_block_size;
9425 uintptr_t idx2_block_sizea16;
9426 uintptr_t marker_uidx_tmp;
9427 uintptr_t block_idx1;
9428 uintptr_t block_idx2;
9429 uintptr_t cur_idx2_block_size;
9430 uintptr_t tidx;
9431 uintptr_t ulii;
9432 uintptr_t uljj;
9433 uintptr_t chrom_end2;
9434 uint32_t chrom_fo_idx;
9435 uint32_t chrom_fo_idx2;
9436 uint32_t chrom_idx2;
9437 uint32_t cur_window_end;
9438 uint32_t is_last_block;
9439 uint32_t missing_ct;
9440 uint32_t ujj;
9441
9442 // common initialization between --epistasis and --fast-epistasis: remove
9443 // monomorphic and non-autosomal diploid sites
9444 if (is_custom_set1) {
9445 if (!sip->ct) {
9446 sprintf(g_logbuf, "Error: --%sepistasis set-by-%s requires a variant set to be loaded.\n", is_fast? "fast-" : "", is_set_by_set? "set" : "all");
9447 goto epistasis_report_ret_INVALID_CMDLINE_2;
9448 } else if (!is_set_by_set) {
9449 if (sip->ct > 1) {
9450 logerrprint("Error: --{fast-}epistasis set-by-all requires exactly one set. (--set-names or\n--set-collapse-all may be handy here.\n");
9451 goto epistasis_report_ret_INVALID_CMDLINE;
9452 }
9453 } else if (sip->ct > 2) {
9454 logerrprint("Error: --{fast-}epistasis set-by-set requires exactly one or two sets.\n(--set-names or --set-collapse-all may be handy here.)\n");
9455 goto epistasis_report_ret_INVALID_CMDLINE;
9456 }
9457 if (bigstack_alloc_ul(unfiltered_marker_ctl, &marker_exclude1)) {
9458 goto epistasis_report_ret_NOMEM;
9459 }
9460 unpack_set_unfiltered(marker_ct2, unfiltered_marker_ct, marker_exclude, sip->setdefs[0], marker_exclude1);
9461 if (is_set_by_set && (sip->ct == 1)) {
9462 marker_ct2 = unfiltered_marker_ct - popcount_longs(marker_exclude1, unfiltered_marker_ctl);
9463 } else {
9464 is_triangular = 0;
9465 }
9466 // if set-by-set with two sets, wait till after monomorphic sites are
9467 // removed to unpack 2nd set
9468 }
9469 if (pheno_nm_ct >= 0x20000000) {
9470 // may as well document the existence of sub-2b overflow conditions even
9471 // though they'll never come up
9472 logerrprint("Error: --{fast-}epistasis does not support >= 2^29 samples.\n");
9473 goto epistasis_report_ret_INVALID_CMDLINE;
9474 }
9475 if (!pheno_d) {
9476 if ((case_ct < 2) || ((!is_case_only) && (ctrl_ct < 2))) {
9477 sprintf(g_logbuf, "Error: --%sepistasis requires at least two cases%s.\n", is_fast? "fast-" : "", is_case_only? "" : " and two controls");
9478 goto epistasis_report_ret_INVALID_CMDLINE_2;
9479 }
9480 if (bigstack_alloc_ul(case_ctv2 + ctrl_ctl2, &casebuf)) {
9481 goto epistasis_report_ret_NOMEM;
9482 }
9483 ctrlbuf = &(casebuf[case_ctv2]);
9484 ctrlbuf[ctrl_ctl2 - 1] = 0;
9485 } else {
9486 case_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(pheno_nm_ct);
9487 if (bigstack_alloc_ul(case_ctv2, &casebuf)) {
9488 goto epistasis_report_ret_NOMEM;
9489 }
9490 }
9491 casebuf[case_ctv2 - 2] = 0;
9492 casebuf[case_ctv2 - 1] = 0;
9493 // marker_exclude2 should be on top since we might free it
9494 if (bigstack_alloc_ul(unfiltered_sample_ctv2, &loadbuf) ||
9495 bigstack_alloc_ul(unfiltered_marker_ctl, &marker_exclude2)) {
9496 goto epistasis_report_ret_NOMEM;
9497 }
9498 loadbuf[unfiltered_sample_ctv2 - 2] = 0;
9499 loadbuf[unfiltered_sample_ctv2 - 1] = 0;
9500 if ((!is_set_by_set) || (sip->ct == 2)) {
9501 memcpy(marker_exclude2, marker_exclude, unfiltered_marker_ctl * sizeof(intptr_t));
9502 } else {
9503 memcpy(marker_exclude2, marker_exclude1, unfiltered_marker_ctl * sizeof(intptr_t));
9504 }
9505 if (do_joint_effects && epi_ip->je_cellmin) {
9506 cellminx3 = epi_ip->je_cellmin * 3;
9507 if ((case_ct < cellminx3 * 3) || ((!is_case_only) && (ctrl_ct < cellminx3 * 3))) {
9508 sprintf(g_logbuf, "Error: Too few cases or controls for --je-cellmin %u.\n", epi_ip->je_cellmin);
9509 goto epistasis_report_ret_INVALID_CMDLINE_2;
9510 }
9511 ulii = case_ctl2;
9512 if ((!is_case_only) && (ctrl_ctl2 > case_ctl2)) {
9513 ulii = ctrl_ctl2;
9514 }
9515 if (bigstack_alloc_ul(ulii, &ulptr)) {
9516 goto epistasis_report_ret_NOMEM;
9517 }
9518 fill_quatervec_55(ulii * BITCT2, ulptr);
9519 }
9520 for (chrom_fo_idx = 0; chrom_fo_idx < chrom_ct; chrom_fo_idx++) {
9521 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
9522 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
9523 if (is_set(chrom_info_ptr->haploid_mask, chrom_idx)) {
9524 uii = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
9525 fill_bits(uii, chrom_end - uii, marker_exclude2);
9526 marker_uidx = chrom_end;
9527 continue;
9528 }
9529 // may want to keep two window sizes' raw data loaded for marker 1, to
9530 // halve the number of non-sequential seeks?
9531 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
9532 goto epistasis_report_ret_READ_FAIL;
9533 }
9534 while (marker_uidx < chrom_end) {
9535 if (is_set(marker_exclude2, marker_uidx)) {
9536 marker_uidx = next_unset(marker_exclude2, marker_uidx, chrom_end);
9537 if (marker_uidx == chrom_end) {
9538 break;
9539 }
9540 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
9541 goto epistasis_report_ret_READ_FAIL;
9542 }
9543 }
9544 if ((!no_ueki) && (!cellminx3)) {
9545 if (load_and_collapse_incl(unfiltered_sample_ct, pheno_nm_ct, pheno_nm, final_mask, 0, bedfile, loadbuf, casebuf)) {
9546 goto epistasis_report_ret_READ_FAIL;
9547 }
9548 if (is_boost) {
9549 if (less_than_two_genotypes(casebuf, pheno_nm_ct)) {
9550 SET_BIT(marker_uidx, marker_exclude2);
9551 }
9552 } else {
9553 if (is_monomorphic(casebuf, pheno_nm_ct)) {
9554 SET_BIT(marker_uidx, marker_exclude2);
9555 }
9556 }
9557 } else {
9558 if (load_and_split(unfiltered_sample_ct, pheno_nm, pheno_c, bedfile, loadbuf, casebuf, ctrlbuf)) {
9559 goto epistasis_report_ret_READ_FAIL;
9560 }
9561 if (no_ueki) {
9562 if (is_monomorphic(casebuf, case_ct) || ((!is_case_only) && is_monomorphic(ctrlbuf, ctrl_ct))) {
9563 SET_BIT(marker_uidx, marker_exclude2);
9564 }
9565 } else {
9566 genovec_3freq(casebuf, ulptr, case_ctl2, &missing_ct, &uii, &ujj);
9567 if ((uii < cellminx3) || (ujj < cellminx3) || (case_ct - uii - ujj - missing_ct < cellminx3)) {
9568 SET_BIT(marker_uidx, marker_exclude2);
9569 } else if (!is_case_only) {
9570 genovec_3freq(ctrlbuf, ulptr, ctrl_ctl2, &missing_ct, &uii, &ujj);
9571 if ((uii < cellminx3) || (ujj < cellminx3) || (ctrl_ct - uii - ujj - missing_ct < cellminx3)) {
9572 SET_BIT(marker_uidx, marker_exclude2);
9573 }
9574 }
9575 }
9576 }
9577 marker_uidx++;
9578 }
9579 }
9580 ulii = unfiltered_marker_ct - popcount_longs(marker_exclude2, unfiltered_marker_ctl);
9581 if ((!ulii) || ((ulii == 1) && is_triangular)) {
9582 goto epistasis_report_ret_TOO_FEW_MARKERS;
9583 }
9584 if (ulii != marker_ct2) {
9585 if (!cellminx3) {
9586 LOGPRINTF("--%sepistasis: Skipping %" PRIuPTR " monomorphic/non-autosomal site%s.\n", is_fast? "fast-" : "", marker_ct2 - ulii, (marker_ct2 - ulii == 1)? "" : "s");
9587 } else {
9588 LOGPRINTF("--%sepistasis: Skipping %" PRIuPTR " site%s due to --je-cellmin setting.\n", is_fast? "fast-" : "", marker_ct2 - ulii, (marker_ct2 - ulii == 1)? "" : "s");
9589 bigstack_reset(ulptr);
9590 }
9591 marker_uidx_base = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
9592 } else if ((!is_custom_set1) || (!is_set_by_set)) {
9593 bigstack_reset(marker_exclude2);
9594 marker_exclude2 = marker_exclude;
9595 }
9596 if (is_triangular) {
9597 if (!marker_exclude1) {
9598 marker_exclude1 = marker_exclude2;
9599 }
9600 marker_ct1 = ulii;
9601 marker_ct2 = ulii;
9602 tests_expected = ((((uint64_t)marker_ct1) * (marker_ct1 - 1)) / 2);
9603 } else {
9604 bitvec_or(marker_exclude2, unfiltered_marker_ctl, marker_exclude1);
9605 marker_ct1 = unfiltered_marker_ct - popcount_longs(marker_exclude1, unfiltered_marker_ctl);
9606 if (sip->ct == 2) {
9607 if (bigstack_alloc_ul(unfiltered_marker_ctl, &ulptr)) {
9608 goto epistasis_report_ret_NOMEM;
9609 }
9610 memcpy(ulptr, marker_exclude2, unfiltered_marker_ctl * sizeof(intptr_t));
9611 unpack_set_unfiltered(marker_ct2, unfiltered_marker_ct, marker_exclude, sip->setdefs[1], marker_exclude2);
9612 bitvec_or(ulptr, unfiltered_marker_ctl, marker_exclude2);
9613 bigstack_reset(ulptr);
9614 marker_ct2 = unfiltered_marker_ct - popcount_longs(marker_exclude2, unfiltered_marker_ctl);
9615 } else {
9616 marker_ct2 = ulii;
9617 }
9618 tests_expected = ((uint64_t)marker_ct1) * marker_ct2;
9619 if (!tests_expected) {
9620 goto epistasis_report_ret_TOO_FEW_MARKERS;
9621 }
9622 }
9623 if (parallel_tot > 1) {
9624 if (marker_ct1 < (1 + is_triangular) * parallel_tot) {
9625 sprintf(g_logbuf, "Error: Too few loci remaining for --parallel %u %u + --%sepistasis.\n", parallel_idx + 1, parallel_tot, is_fast? "fast-" : "");
9626 goto epistasis_report_ret_INVALID_CMDLINE_2;
9627 }
9628 if (is_triangular) {
9629 // If there are n markers, and we're computing the usual upper right
9630 // triangle, first row has n-1 entries, second row has n-2, etc.
9631 // Total entry count is n(n-1)/2; total entry count starting from row r
9632 // is (n-r)(n-r-1)/2... upside-down triangle_divide() calls produce a
9633 // good partition.
9634 // Divide first to avoid 64-bit integer overflow (!) on really huge jobs.
9635 // (Multiply-by-2 is there because triangle_divide() takes n(n-1) instead
9636 // of n(n-1)/2 as first parameter.)
9637 pct_thresh = (2 * tests_expected) / parallel_tot;
9638 // If parallel_idx == 0, the marker_ct >= 2 * parallel_tot condition
9639 // ensures the precision loss from dividing and remultiplying does not
9640 // cause the first marker to be dropped.
9641 marker_idx1_start = triangle_divide(pct_thresh * (parallel_tot - parallel_idx), -1);
9642 marker_idx1_end = triangle_divide(pct_thresh * (parallel_tot - parallel_idx - 1), -1);
9643 tests_expected = ((((uint64_t)marker_idx1_start) * (marker_idx1_start - 1)) - (((uint64_t)marker_idx1_end) * (marker_idx1_end - 1))) / 2;
9644 marker_idx1_start = marker_ct1 - marker_idx1_start;
9645 marker_idx1_end = marker_ct1 - marker_idx1_end;
9646 } else {
9647 marker_idx1_start = (parallel_idx * ((uint64_t)marker_ct1)) / parallel_tot;
9648 marker_idx1_end = ((parallel_idx + 1) * ((uint64_t)marker_ct1)) / parallel_tot;
9649 tests_expected = (marker_idx1_end - marker_idx1_start) * ((uint64_t)marker_ct2);
9650 }
9651 } else {
9652 marker_idx1_start = 0;
9653 marker_idx1_end = marker_ct1;
9654 }
9655 marker_idx1 = marker_idx1_start;
9656 job_size = marker_idx1_end - marker_idx1_start;
9657 if (max_thread_ct > job_size) {
9658 max_thread_ct = job_size;
9659 }
9660 if (bigstack_calloc_d(marker_ct1, &best_chisq) ||
9661 bigstack_calloc_ui(marker_ct1, &best_ids) ||
9662 bigstack_calloc_ui(marker_ct1, &n_sig_cts) ||
9663 bigstack_calloc_ui(marker_ct1, &fail_cts) ||
9664 bigstack_alloc_ui(max_thread_ct + 1, &g_epi_idx1_block_bounds) ||
9665 bigstack_alloc_ui(max_thread_ct, &g_epi_idx1_block_bounds16)) {
9666 goto epistasis_report_ret_NOMEM;
9667 }
9668 if (is_case_only_window || (!is_triangular)) {
9669 if (bigstack_calloc_ui(marker_ct1, &gap_cts)) {
9670 goto epistasis_report_ret_NOMEM;
9671 }
9672 }
9673 bigstack_mark3 = g_bigstack_base;
9674
9675 g_epi_thread_ct = max_thread_ct;
9676 g_epi_case_ct = case_ct;
9677 g_epi_flag = modifier;
9678 g_epi_marker_ct = marker_ct2;
9679 g_epi_cellmin = cellminx3 / 3;
9680 // might want to provide a Bonferroni correction interface...
9681 if (is_boost) {
9682 if (epi_ip->epi1 == 0.0) {
9683 dxx = 0.000005;
9684 } else {
9685 dxx = epi_ip->epi1;
9686 }
9687 g_epi_alpha1sq[0] = inverse_chiprob(dxx, 4);
9688 g_epi_alpha1sq[1] = inverse_chiprob(dxx, 2);
9689 g_epi_alpha1sq[2] = inverse_chiprob(dxx, 1);
9690 g_epi_alpha2sq[0] = inverse_chiprob(epi_ip->epi2, 4);
9691 if (g_epi_alpha1sq[0] == g_epi_alpha2sq[0]) {
9692 // count final instead of screening p-value hits
9693 g_epi_alpha2sq[0] *= 1 + SMALL_EPSILON;
9694 g_epi_alpha2sq[1] = g_epi_alpha1sq[1] * (1 + SMALL_EPSILON);
9695 g_epi_alpha2sq[2] = g_epi_alpha1sq[2] * (1 + SMALL_EPSILON);
9696 } else {
9697 g_epi_alpha2sq[1] = inverse_chiprob(epi_ip->epi2, 2);
9698 g_epi_alpha2sq[2] = inverse_chiprob(epi_ip->epi2, 1);
9699 }
9700 if (bigstack_alloc_d(pheno_nm_ct + 1, &g_epi_recip_cache)) {
9701 goto epistasis_report_ret_NOMEM;
9702 }
9703 g_epi_recip_cache[0] = 0.0;
9704 for (uii = 1; uii <= pheno_nm_ct; uii++) {
9705 g_epi_recip_cache[uii] = 1.0 / ((double)((int32_t)uii));
9706 }
9707 } else {
9708 if (epi_ip->epi1 == 0.0) {
9709 dxx = 0.00005;
9710 } else {
9711 dxx = epi_ip->epi1 * 0.5;
9712 }
9713 dxx = ltqnorm(dxx);
9714 g_epi_alpha1sq[0] = dxx * dxx;
9715 dxx = ltqnorm(epi_ip->epi2 / 2);
9716 g_epi_alpha2sq[0] = dxx * dxx;
9717 }
9718 if (!is_fast) {
9719 if (pheno_d) {
9720 retval = epistasis_linear_regression(threads, epi_ip, bedfile, bed_offset, unfiltered_marker_ct, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, marker_uidx_base, marker_ct1, marker_exclude1, marker_idx1_start, marker_idx1_end, marker_ct2, marker_exclude2, is_triangular, job_size, tests_expected, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_d, parallel_idx, parallel_tot, outname, outname_end, output_min_p, glm_vif_thresh, loadbuf, casebuf, best_chisq, best_ids, n_sig_cts, fail_cts, gap_cts);
9721 } else {
9722 retval = epistasis_logistic_regression(threads, epi_ip, bedfile, bed_offset, unfiltered_marker_ct, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, marker_uidx_base, marker_ct1, marker_exclude1, marker_idx1_start, marker_idx1_end, marker_ct2, marker_exclude2, is_triangular, job_size, tests_expected, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_c, parallel_idx, parallel_tot, outname, outname_end, output_min_p, loadbuf, casebuf, best_chisq, best_ids, n_sig_cts, fail_cts, gap_cts);
9723 }
9724 if (retval) {
9725 goto epistasis_report_ret_1;
9726 }
9727 } else {
9728 pct_thresh = tests_expected / 100;
9729 if (is_case_only) {
9730 g_epi_ctrl_ct = 0;
9731 ctrl_ctv3 = 0;
9732 ctrl_ctsplit = 0;
9733 memcpy(outname_end, ".epi.co", 8);
9734 } else {
9735 g_epi_ctrl_ct = ctrl_ct;
9736 memcpy(outname_end, ".epi.cc", 8);
9737 }
9738 if (parallel_tot > 1) {
9739 outname_end[7] = '.';
9740 uint32toa_x(parallel_idx + 1, '\0', &(outname_end[8]));
9741 }
9742 tot_ctsplit = case_ctsplit + ctrl_ctsplit;
9743 if (fopen_checked(outname, "w", &outfile)) {
9744 goto epistasis_report_ret_OPEN_FAIL;
9745 }
9746 if (!parallel_idx) {
9747 wptr = memcpya(g_textbuf, "CHR1 ", 5);
9748 wptr = fw_strcpyn(plink_maxsnp, 4, "SNP1", wptr);
9749 wptr = memcpya(wptr, " CHR2 ", 6);
9750 wptr = fw_strcpyn(plink_maxsnp, 4, "SNP2", wptr);
9751 wptr = memcpya(wptr, " STAT ", 14);
9752 if (is_boost) {
9753 wptr = memcpya(wptr, " DF ", 5);
9754 }
9755 if (!no_p_value) {
9756 wptr = memcpya(wptr, " P ", 13);
9757 }
9758 *wptr++ = '\n';
9759 if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
9760 goto epistasis_report_ret_WRITE_FAIL;
9761 }
9762 }
9763 // claim up to half of memory with idx1 bufs; each marker currently costs:
9764 // (case_ctsplit + ctrl_ctsplit) * sizeof(intptr_t) for loose geno buf
9765 // 0.25 for missing tracker
9766 // sizeof(int32_t) for offset (to skip bottom left triangle, and/or
9767 // too-close pairs for case-only tests; will sometimes need to be
9768 // larger when sets come into the picture
9769 // sizeof(double) for best chisq,
9770 // sizeof(int32_t) for best opposite ID,
9771 // sizeof(int32_t) for N_SIG count,
9772 // sizeof(int32_t) for per-site fail counts, and (bleah)
9773 // marker_ct2 * sizeof(double) for the usually oversized results space
9774 cur_bigstack_left = bigstack_left();
9775 ulii = 4 * CACHELINE - 3 * sizeof(int32_t) + max_thread_ct * (5 * (CACHELINE - 4));
9776 if (cur_bigstack_left >= ulii) {
9777 cur_bigstack_left -= ulii;
9778 }
9779 ulii = tot_ctsplit * sizeof(intptr_t) + 4 * sizeof(int32_t) + sizeof(double) + marker_ct2 * sizeof(double);
9780 idx1_block_size = cur_bigstack_left / (ulii * 2 + 1);
9781 if (!idx1_block_size) {
9782 goto epistasis_report_ret_NOMEM;
9783 }
9784 if (idx1_block_size > job_size) {
9785 idx1_block_size = job_size;
9786 }
9787 // pad to avoid threads writing to same cacheline
9788 ulii = (max_thread_ct - 1) * 15 + idx1_block_size;
9789 // offsets[] isn't really needed, but barely takes any memory
9790 // if 'case-only', want two more offsets columns to store where the "too
9791 // close" variants are
9792 bigstack_alloc_ui(idx1_block_size * 2, &g_epi_geno1_offsets);
9793 bigstack_alloc_ul(tot_ctsplit * idx1_block_size, &g_epi_geno1);
9794 bigstack_alloc_ul(QUATERCT_TO_WORDCT(idx1_block_size), &g_epi_zmiss1);
9795 bigstack_alloc_d(idx1_block_size * marker_ct2, &g_epi_all_chisq);
9796 bigstack_alloc_d(ulii, &g_epi_best_chisq1);
9797 bigstack_alloc_ui(ulii, &g_epi_best_id1);
9798 bigstack_alloc_ui(ulii, &g_epi_n_sig_ct1);
9799 bigstack_alloc_ui(ulii, &g_epi_fail_ct1);
9800 for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
9801 g_epi_geno1[block_idx1 * tot_ctsplit + case_ctv3 - 1] = 0;
9802 g_epi_geno1[block_idx1 * tot_ctsplit + 2 * case_ctv3 - 1] = 0;
9803 g_epi_geno1[block_idx1 * tot_ctsplit + case_ctsplit - 1] = 0;
9804 g_epi_geno1[block_idx1 * tot_ctsplit + case_ctsplit + ctrl_ctv3 - 1] = 0;
9805 g_epi_geno1[block_idx1 * tot_ctsplit + case_ctsplit + 2 * ctrl_ctv3 - 1] = 0;
9806 g_epi_geno1[block_idx1 * tot_ctsplit + tot_ctsplit - 1] = 0;
9807 }
9808 if (is_triangular) {
9809 fill_uint_zero(2 * idx1_block_size, g_epi_geno1_offsets);
9810 }
9811 // don't actually need best_chisq2, best_id2, n_sig_ct2, fail_ct2 if not
9812 // triangular, but rather not complicate/duplicate the common case inner
9813 // loop for now
9814 ulii = tot_ctsplit * sizeof(intptr_t) + 1 + is_boost * 6 * sizeof(double) + tot_stride * sizeof(int32_t) + max_thread_ct * (3 * sizeof(int32_t) + sizeof(double));
9815 idx2_block_size = (bigstack_left() - CACHELINE - is_boost * (CACHELINE - 8) - max_thread_ct * (5 * (CACHELINE - 4))) / ulii;
9816 if (idx2_block_size > marker_ct2) {
9817 idx2_block_size = marker_ct2;
9818 }
9819 idx2_block_size = round_up_pow2(idx2_block_size, 16);
9820 bigstack_mark2 = g_bigstack_base;
9821 while (1) {
9822 if (!idx2_block_size) {
9823 goto epistasis_report_ret_NOMEM;
9824 }
9825 if (!(bigstack_alloc_ul(tot_ctsplit * idx2_block_size, &g_epi_geno2) ||
9826 bigstack_alloc_ul(QUATERCT_TO_WORDCT(idx2_block_size), &g_epi_zmiss2) ||
9827 bigstack_alloc_ui(idx2_block_size * tot_stride, &g_epi_tot2) ||
9828 bigstack_alloc_d(max_thread_ct * idx2_block_size, &g_epi_best_chisq2) ||
9829 bigstack_alloc_ui(max_thread_ct * idx2_block_size, &g_epi_best_id2) ||
9830 bigstack_alloc_ui(max_thread_ct * idx2_block_size, &g_epi_n_sig_ct2) ||
9831 bigstack_alloc_ui(max_thread_ct * idx2_block_size, &g_epi_fail_ct2))) {
9832 if ((!is_boost) || (!bigstack_alloc_d(6 * idx2_block_size, &g_epi_boost_precalc2))) {
9833 break;
9834 }
9835 }
9836 bigstack_reset(bigstack_mark2);
9837 idx2_block_size -= 16;
9838 }
9839 for (block_idx2 = 0; block_idx2 < idx2_block_size; block_idx2++) {
9840 g_epi_geno2[block_idx2 * tot_ctsplit + case_ctv3 - 1] = 0;
9841 g_epi_geno2[block_idx2 * tot_ctsplit + 2 * case_ctv3 - 1] = 0;
9842 g_epi_geno2[block_idx2 * tot_ctsplit + case_ctsplit - 1] = 0;
9843 g_epi_geno2[block_idx2 * tot_ctsplit + case_ctsplit + ctrl_ctv3 - 1] = 0;
9844 g_epi_geno2[block_idx2 * tot_ctsplit + case_ctsplit + 2 * ctrl_ctv3 - 1] = 0;
9845 g_epi_geno2[block_idx2 * tot_ctsplit + tot_ctsplit - 1] = 0;
9846 }
9847 marker_uidx = next_unset_ul_unsafe(marker_exclude1, marker_uidx_base);
9848 if (marker_idx1) {
9849 marker_uidx = jump_forward_unset_unsafe(marker_exclude1, marker_uidx + 1, marker_idx1);
9850 }
9851 wptr = memcpya(g_logbuf, "--fast-epistasis", 16);
9852 if (is_boost) {
9853 wptr = memcpya(wptr, " boost", 6);
9854 } else if (no_ueki) {
9855 wptr = memcpya(wptr, " no-ueki", 8);
9856 } else if (do_joint_effects) {
9857 wptr = memcpya(wptr, " joint-effects", 14);
9858 }
9859 if (is_case_only) {
9860 wptr = memcpya(wptr, " case-only", 10);
9861 }
9862 wptr = memcpya(wptr, " to ", 4);
9863 wptr = strcpya(wptr, outname);
9864 memcpy(wptr, " ... ", 6);
9865 wordwrapb(16); // strlen("99% [processing]")
9866 logprintb();
9867 fputs("0%", stdout);
9868 do {
9869 fputs(" [processing]", stdout);
9870 fflush(stdout);
9871 if (idx1_block_size > marker_idx1_end - marker_idx1) {
9872 idx1_block_size = marker_idx1_end - marker_idx1;
9873 if (idx1_block_size < max_thread_ct) {
9874 max_thread_ct = idx1_block_size;
9875 g_epi_thread_ct = max_thread_ct;
9876 }
9877 }
9878 g_epi_marker_idx1 = marker_idx1;
9879 dptr = g_epi_all_chisq;
9880 dptr2 = &(g_epi_all_chisq[idx1_block_size * marker_ct2]);
9881 do {
9882 *dptr++ = -1;
9883 } while (dptr < dptr2);
9884 marker_uidx_tmp = marker_uidx;
9885 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
9886 goto epistasis_report_ret_READ_FAIL;
9887 }
9888 cur_workload = idx1_block_size * marker_ct2;
9889 if (is_triangular) {
9890 for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
9891 ulii = block_idx1 + marker_idx1 + 1;
9892 cur_workload -= ulii;
9893 // edit this during loading, when we have to know marker_uidx anyway,
9894 // if case-only
9895 g_epi_geno1_offsets[2 * block_idx1 + 1] = ulii;
9896 }
9897 } else {
9898 fill_uint_zero(2 * idx1_block_size, g_epi_geno1_offsets);
9899 marker_uidx2 = marker_uidx_base;
9900 marker_idx2 = 0;
9901 }
9902 tests_complete += cur_workload;
9903 ulii = 0; // total number of tests
9904 g_epi_idx1_block_bounds[0] = 0;
9905 g_epi_idx1_block_bounds16[0] = 0;
9906 block_idx1 = 0;
9907 for (tidx = 1; tidx < max_thread_ct; tidx++) {
9908 uljj = (((uint64_t)cur_workload) * tidx) / max_thread_ct;
9909 if (is_triangular) {
9910 do {
9911 // slightly inaccurate for case-only due to the way --gap is
9912 // supported, but this doesn't affect any calculation results, only
9913 // the progress display
9914 ulii += marker_ct2 - g_epi_geno1_offsets[2 * block_idx1 + 1];
9915 block_idx1++;
9916 } while (ulii < uljj);
9917 } else {
9918 do {
9919 ulii += marker_ct2;
9920 block_idx1++;
9921 } while (ulii < uljj);
9922 }
9923 uii = block_idx1 - g_epi_idx1_block_bounds[tidx - 1];
9924 g_epi_idx1_block_bounds[tidx] = block_idx1;
9925 g_epi_idx1_block_bounds16[tidx] = g_epi_idx1_block_bounds16[tidx - 1] + round_up_pow2_ui(uii, 16);
9926 }
9927 g_epi_idx1_block_bounds[max_thread_ct] = idx1_block_size;
9928 fill_ulong_zero(QUATERCT_TO_WORDCT(idx1_block_size), g_epi_zmiss1);
9929 chrom_end = 0;
9930 for (block_idx1 = 0; block_idx1 < idx1_block_size; marker_uidx_tmp++, block_idx1++) {
9931 if (IS_SET(marker_exclude1, marker_uidx_tmp)) {
9932 marker_uidx_tmp = next_unset_ul_unsafe(marker_exclude1, marker_uidx_tmp);
9933 if (fseeko(bedfile, bed_offset + (marker_uidx_tmp * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
9934 goto epistasis_report_ret_READ_FAIL;
9935 }
9936 }
9937 if (load_and_split3(bedfile, loadbuf, unfiltered_sample_ct, &(g_epi_geno1[block_idx1 * tot_ctsplit]), pheno_nm, pheno_c, case_ctv3, ctrl_ctv3, IS_SET(marker_reverse, marker_uidx_tmp), is_case_only, &ulii)) {
9938 goto epistasis_report_ret_READ_FAIL;
9939 }
9940 if (ulii) {
9941 g_epi_zmiss1[block_idx1 / BITCT2] |= ulii << (2 * (block_idx1 % BITCT2));
9942 // g_epi_tot1 doesn't need to exist, better for each thread to
9943 // determine those totals on the fly
9944 }
9945 if (is_case_only_window) {
9946 cur_window_end = marker_pos[marker_uidx_tmp] + case_only_gap;
9947 if (marker_uidx_tmp >= chrom_end) {
9948 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx_tmp);
9949 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
9950 if (is_triangular) {
9951 marker_uidx2 = marker_uidx_tmp;
9952 marker_idx2 = block_idx1 + marker_idx1;
9953 last_pos = marker_pos[marker_uidx_tmp];
9954 } else {
9955 uii = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
9956 if (marker_pos[marker_uidx_tmp] < case_only_gap) {
9957 ujj = 0;
9958 } else {
9959 ujj = marker_pos[marker_uidx_tmp] + 1 - case_only_gap;
9960 }
9961 marker_uidx2_trail = next_unset(marker_exclude2, uii + uint32arr_greater_than(&(marker_pos[uii]), marker_uidx_tmp + 1 - uii, ujj), chrom_end);
9962 marker_idx2_trail = marker_uidx2_trail - popcount_bit_idx(marker_exclude2, 0, marker_uidx2_trail);
9963 if (marker_uidx2_trail < chrom_end) {
9964 first_pos = marker_pos[marker_uidx2_trail];
9965 // this could be more efficient, but not a big deal since
9966 // there aren't many chromosomes
9967 marker_uidx2 = next_unset(marker_exclude2, uii + uint32arr_greater_than(&(marker_pos[marker_uidx_tmp]), chrom_end - marker_uidx_tmp, cur_window_end), chrom_end);
9968 } else {
9969 first_pos = 0x7fffffffU;
9970 marker_uidx2 = chrom_end;
9971 }
9972 marker_idx2 = marker_idx2_trail + marker_uidx2 - marker_uidx2_trail - popcount_bit_idx(marker_exclude2, marker_uidx2_trail, marker_uidx2);
9973 if (marker_uidx2 < chrom_end) {
9974 last_pos = marker_pos[marker_uidx2];
9975 } else {
9976 last_pos = 0xffffffffU;
9977 }
9978 }
9979 }
9980 while (last_pos < cur_window_end) {
9981 marker_idx2++;
9982 marker_uidx2++;
9983 next_unset_ul_ck(marker_exclude2, chrom_end, &marker_uidx2);
9984 if (marker_uidx2 != chrom_end) {
9985 last_pos = marker_pos[marker_uidx2];
9986 } else {
9987 last_pos = 0xffffffffU;
9988 }
9989 }
9990 if (is_triangular) {
9991 ulii = block_idx1 + marker_idx1;
9992 gap_cts[ulii] += marker_idx2 - ulii - 1;
9993 while (++ulii < marker_idx2) {
9994 gap_cts[ulii] += 1;
9995 }
9996 g_epi_geno1_offsets[2 * block_idx1 + 1] = marker_idx2;
9997 } else {
9998 uii = marker_pos[marker_uidx_tmp];
9999 while (first_pos + case_only_gap <= uii) {
10000 marker_idx2_trail++;
10001 marker_uidx2_trail++;
10002 next_unset_ul_ck(marker_exclude2, chrom_end, &marker_uidx2_trail);
10003 if (marker_uidx2_trail != chrom_end) {
10004 first_pos = marker_pos[marker_uidx2_trail];
10005 } else {
10006 first_pos = 0x7fffffffU;
10007 }
10008 }
10009 if (marker_idx2 > marker_idx2_trail) {
10010 g_epi_geno1_offsets[2 * block_idx1] = marker_idx2_trail;
10011 g_epi_geno1_offsets[2 * block_idx1 + 1] = marker_idx2;
10012 gap_cts[block_idx1 + marker_idx1] = marker_idx2 - marker_idx2_trail;
10013 }
10014 }
10015 } else if (!is_triangular) {
10016 if (!IS_SET(marker_exclude2, marker_uidx_tmp)) {
10017 // do not compare against self
10018 marker_idx2 += marker_uidx_tmp - marker_uidx2 - popcount_bit_idx(marker_exclude2, marker_uidx2, marker_uidx_tmp);
10019 marker_uidx2 = marker_uidx_tmp;
10020 g_epi_geno1_offsets[2 * block_idx1] = marker_idx2;
10021 g_epi_geno1_offsets[2 * block_idx1 + 1] = marker_idx2 + 1;
10022 gap_cts[block_idx1 + marker_idx1] = 1;
10023 }
10024 }
10025 }
10026 marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
10027 if (is_triangular) {
10028 marker_idx2 = marker_idx1 + 1;
10029 marker_uidx2 = jump_forward_unset_unsafe(marker_exclude2, marker_uidx2 + 1, marker_idx2);
10030 } else {
10031 marker_idx2 = 0;
10032 }
10033 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
10034 goto epistasis_report_ret_READ_FAIL;
10035 }
10036 cur_idx2_block_size = idx2_block_size;
10037 do {
10038 if (cur_idx2_block_size > marker_ct2 - marker_idx2) {
10039 cur_idx2_block_size = marker_ct2 - marker_idx2;
10040 }
10041 fill_ulong_zero(QUATERCT_TO_WORDCT(cur_idx2_block_size), g_epi_zmiss2);
10042 for (block_idx2 = 0; block_idx2 < cur_idx2_block_size; marker_uidx2++, block_idx2++) {
10043 if (IS_SET(marker_exclude2, marker_uidx2)) {
10044 marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx2);
10045 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
10046 goto epistasis_report_ret_READ_FAIL;
10047 }
10048 }
10049 ulptr = &(g_epi_geno2[block_idx2 * tot_ctsplit]);
10050 if (load_and_split3(bedfile, loadbuf, unfiltered_sample_ct, ulptr, pheno_nm, pheno_c, case_ctv3, ctrl_ctv3, IS_SET(marker_reverse, marker_uidx2), is_case_only, &ulii)) {
10051 goto epistasis_report_ret_READ_FAIL;
10052 }
10053 uiptr = &(g_epi_tot2[block_idx2 * tot_stride]);
10054 uiptr[0] = popcount_longs(ulptr, case_ctv3);
10055 uiptr[1] = popcount_longs(&(ulptr[case_ctv3]), case_ctv3);
10056 uiptr[2] = popcount_longs(&(ulptr[2 * case_ctv3]), case_ctv3);
10057 if (!is_case_only) {
10058 ulptr = &(ulptr[case_ctv3 * 3]);
10059 uiptr[3] = popcount_longs(ulptr, ctrl_ctv3);
10060 uiptr[4] = popcount_longs(&(ulptr[ctrl_ctv3]), ctrl_ctv3);
10061 uiptr[5] = popcount_longs(&(ulptr[2 * ctrl_ctv3]), ctrl_ctv3);
10062 if (is_boost) {
10063 boost_calc_p_bc(uiptr[0], uiptr[1], uiptr[2], uiptr[3], uiptr[4], uiptr[5], &(g_epi_boost_precalc2[block_idx2 * 6]));
10064 }
10065 }
10066 if (ulii) {
10067 g_epi_zmiss2[block_idx2 / BITCT2] |= ulii << (2 * (block_idx2 % BITCT2));
10068 }
10069 }
10070 g_epi_idx2_block_size = cur_idx2_block_size;
10071 g_epi_idx2_block_start = marker_idx2;
10072 idx2_block_sizea16 = round_up_pow2(cur_idx2_block_size, 16);
10073 fill_uint_zero(idx1_block_size + 15 * (max_thread_ct - 1), g_epi_n_sig_ct1);
10074 fill_uint_zero(idx1_block_size + 15 * (max_thread_ct - 1), g_epi_fail_ct1);
10075 fill_uint_zero(idx2_block_sizea16 * max_thread_ct, g_epi_n_sig_ct2);
10076 fill_uint_zero(idx2_block_sizea16 * max_thread_ct, g_epi_fail_ct2);
10077 for (tidx = 0; tidx < max_thread_ct; tidx++) {
10078 ulii = g_epi_idx1_block_bounds[tidx];
10079 uljj = g_epi_idx1_block_bounds[tidx + 1];
10080 memcpy(&(g_epi_best_chisq1[g_epi_idx1_block_bounds16[tidx]]), &(g_epi_all_chisq[marker_idx1 + ulii]), (uljj - ulii) * sizeof(double));
10081 ulii = g_epi_geno1_offsets[2 * ulii + 1];
10082 if (ulii < marker_idx2 + cur_idx2_block_size) {
10083 if (ulii <= marker_idx2) {
10084 ulii = 0;
10085 } else {
10086 ulii -= marker_idx2;
10087 }
10088 memcpy(&(g_epi_best_chisq2[tidx * idx2_block_sizea16 + ulii]), &(g_epi_all_chisq[marker_idx2 + ulii]), (cur_idx2_block_size - ulii) * sizeof(double));
10089 }
10090 // no need to initialize IDs since they are only referenced when a
10091 // higher chisq value is present, and when that happens an ID is
10092 // always written
10093 }
10094 is_last_block = (marker_idx2 + cur_idx2_block_size >= marker_ct2);
10095 if (spawn_threads2(threads, &fast_epi_thread, max_thread_ct, is_last_block)) {
10096 goto epistasis_report_ret_THREAD_CREATE_FAIL;
10097 }
10098 fast_epi_thread((void*)0);
10099 join_threads2(threads, max_thread_ct, is_last_block);
10100 // merge best_chisq, best_ids, fail_cts
10101 for (tidx = 0; tidx < max_thread_ct; tidx++) {
10102 ulii = g_epi_idx1_block_bounds[tidx];
10103 uljj = g_epi_idx1_block_bounds[tidx + 1] - ulii;
10104 uii = g_epi_idx1_block_bounds16[tidx];
10105 dptr = &(g_epi_best_chisq1[uii]);
10106 uiptr = &(g_epi_best_id1[uii]);
10107 uiptr2 = &(g_epi_n_sig_ct1[uii]);
10108 uiptr3 = &(g_epi_fail_ct1[uii]);
10109 ulii += marker_idx1;
10110 dptr2 = &(best_chisq[ulii]);
10111 uiptr4 = &(n_sig_cts[ulii]);
10112 uiptr5 = &(fail_cts[ulii]);
10113 for (block_idx1 = 0; block_idx1 < uljj; block_idx1++, dptr2++, uiptr4++, uiptr5++) {
10114 dxx = *dptr++;
10115 if (dxx > (*dptr2)) {
10116 *dptr2 = dxx;
10117 best_ids[block_idx1 + ulii] = uiptr[block_idx1];
10118 }
10119 *uiptr4 += *uiptr2++;
10120 *uiptr5 += *uiptr3++;
10121 }
10122 }
10123 if (is_triangular) {
10124 for (tidx = 0; tidx < max_thread_ct; tidx++) {
10125 block_idx2 = g_epi_geno1_offsets[2 * g_epi_idx1_block_bounds[tidx] + 1];
10126 if (block_idx2 <= marker_idx2) {
10127 block_idx2 = 0;
10128 } else {
10129 block_idx2 -= marker_idx2;
10130 }
10131 dptr = &(g_epi_best_chisq2[tidx * idx2_block_sizea16 + block_idx2]);
10132 uiptr = &(g_epi_best_id2[tidx * idx2_block_sizea16]);
10133 uiptr2 = &(g_epi_n_sig_ct2[tidx * idx2_block_sizea16 + block_idx2]);
10134 uiptr3 = &(g_epi_fail_ct2[tidx * idx2_block_sizea16 + block_idx2]);
10135 dptr2 = &(best_chisq[block_idx2 + marker_idx2]);
10136 uiptr4 = &(n_sig_cts[block_idx2 + marker_idx2]);
10137 uiptr5 = &(fail_cts[block_idx2 + marker_idx2]);
10138 for (; block_idx2 < cur_idx2_block_size; block_idx2++, dptr2++, uiptr4++, uiptr5++) {
10139 dxx = *dptr++;
10140 if (dxx > (*dptr2)) {
10141 *dptr2 = dxx;
10142 best_ids[block_idx2 + marker_idx2] = uiptr[block_idx2];
10143 }
10144 *uiptr4 += *uiptr2++;
10145 *uiptr5 += *uiptr3++;
10146 }
10147 }
10148 }
10149 marker_idx2 += cur_idx2_block_size;
10150 } while (marker_idx2 < marker_ct2);
10151 fputs("\b\b\b\b\b\b\b\b\b\b\bwriting] \b\b\b", stdout);
10152 fflush(stdout);
10153 chrom_end = 0;
10154 block_idx1 = 0;
10155 while (1) {
10156 next_unset_ul_unsafe_ck(marker_exclude1, &marker_uidx);
10157 ujj = g_epi_geno1_offsets[2 * block_idx1];
10158 marker_idx2 = 0;
10159 dptr = &(g_epi_all_chisq[block_idx1 * marker_ct2]);
10160 if (marker_uidx >= chrom_end) {
10161 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx);
10162 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
10163 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
10164 }
10165 wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, chrom_idx, g_textbuf));
10166 *wptr_start++ = ' ';
10167 wptr_start = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx * max_marker_id_len]), wptr_start);
10168 *wptr_start++ = ' ';
10169 marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
10170 for (chrom_fo_idx2 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2); chrom_fo_idx2 < chrom_ct; chrom_fo_idx2++) {
10171 chrom_end2 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx2 + 1];
10172 if (marker_uidx2 >= chrom_end2) {
10173 continue;
10174 }
10175 chrom_idx2 = chrom_info_ptr->chrom_file_order[chrom_fo_idx2];
10176 wptr_start2 = width_force(4, wptr_start, chrom_name_write(chrom_info_ptr, chrom_idx2, wptr_start));
10177 *wptr_start2++ = ' ';
10178 for (; marker_uidx2 < chrom_end2; ++marker_uidx2, next_unset_ul_ck(marker_exclude2, unfiltered_marker_ct, &marker_uidx2), ++marker_idx2, ++dptr) {
10179 if (marker_idx2 == ujj) {
10180 marker_idx2 = g_epi_geno1_offsets[2 * block_idx1 + 1];
10181 if (marker_idx2 == marker_ct2) {
10182 goto epistasis_report_write_loop;
10183 }
10184 if (marker_idx2 > ujj) {
10185 marker_uidx2 = jump_forward_unset_unsafe(marker_exclude2, marker_uidx2 + 1, marker_idx2 - ujj);
10186 dptr = &(dptr[marker_idx2 - ujj]);
10187 if (marker_uidx2 >= chrom_end2) {
10188 break;
10189 }
10190 }
10191 } else if (marker_idx2 == marker_ct2) {
10192 goto epistasis_report_write_loop;
10193 }
10194 dxx = *dptr;
10195 if (dxx != -1) {
10196 wptr = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx2 * max_marker_id_len]), wptr_start2);
10197 *wptr++ = ' ';
10198 if (is_boost) {
10199 if (dxx == dxx) { // not nan
10200 memcpy(ularr, &dxx, sizeof(double));
10201 uii = 4 >> (ularr[0] & 3);
10202 // don't want ugly e-324s when zero belongs
10203 ularr[0] &= ~(3 * ONELU);
10204 memcpy(&dxx, ularr, sizeof(double));
10205 wptr = width_force(12, wptr, dtoa_g(dxx, wptr));
10206 wptr = memseta(wptr, 32, 4);
10207 *wptr++ = '0' + uii;
10208 *wptr++ = ' ';
10209 } else {
10210 wptr = memcpya(wptr, " nan 0 ", 18);
10211 uii = 0;
10212 }
10213 } else if (!no_ueki) {
10214 wptr = width_force(12, wptr, dtoa_g(dxx, wptr));
10215 *wptr++ = ' ';
10216 } else {
10217 // lower precision compatibility mode
10218 wptr = dtoa_g_wxp4x(dxx, 12, ' ', wptr);
10219 }
10220 if (!no_p_value) {
10221 if (!is_boost) {
10222 dxx = normdist(-sqrt(dxx)) * 2;
10223 wptr = dtoa_g_wxp4x(MAXV(dxx, output_min_p), 12, ' ', wptr);
10224 } else if (uii) {
10225 dxx = chiprob_p(dxx, uii);
10226 wptr = dtoa_g_wxp4x(MAXV(dxx, output_min_p), 12, ' ', wptr);
10227 } else {
10228 wptr = memcpya(wptr, " NA ", 13);
10229 }
10230 }
10231 *wptr++ = '\n';
10232 if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
10233 goto epistasis_report_ret_WRITE_FAIL;
10234 }
10235 // could remove this writeback in --epi1 1 case
10236 *dptr = -1;
10237 }
10238 }
10239 }
10240 epistasis_report_write_loop:
10241 block_idx1++;
10242 marker_uidx++;
10243 if (block_idx1 >= idx1_block_size) {
10244 break;
10245 }
10246 }
10247 marker_idx1 += idx1_block_size;
10248 fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
10249 if (tests_complete >= pct_thresh) {
10250 if (pct > 10) {
10251 putc_unlocked('\b', stdout);
10252 }
10253 pct = (tests_complete * 100LLU) / tests_expected;
10254 if (pct < 100) {
10255 printf("\b\b%" PRIuPTR "%%", pct);
10256 fflush(stdout);
10257 pct_thresh = ((++pct) * ((uint64_t)tests_expected)) / 100;
10258 }
10259 }
10260 } while (marker_idx1 < marker_idx1_end);
10261 if (fclose_null(&outfile)) {
10262 goto epistasis_report_ret_WRITE_FAIL;
10263 }
10264 }
10265 memcpy(&(outname_end[7]), ".summary", 9);
10266 if (parallel_tot > 1) {
10267 outname_end[15] = '.';
10268 uint32toa_x(parallel_idx + 1, '\0', &(outname_end[16]));
10269 }
10270 if (fopen_checked(outname, "w", &outfile)) {
10271 goto epistasis_report_ret_OPEN_FAIL;
10272 }
10273 wptr = memcpya(g_textbuf, " CHR ", 5);
10274 wptr = fw_strcpyn(plink_maxsnp, 3, "SNP", wptr);
10275 if (parallel_tot == 1) {
10276 wptr = strcpya(wptr, " N_SIG N_TOT PROP BEST_CHISQ BEST_CHR ");
10277 } else {
10278 wptr = strcpya(wptr, " N_SIG N_TOT BEST_CHISQ BEST_CHR ");
10279 }
10280 wptr = fw_strcpyn(plink_maxsnp, 8, "BEST_SNP", wptr);
10281 wptr = memcpya(wptr, " \n", 2);
10282 if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
10283 goto epistasis_report_ret_WRITE_FAIL;
10284 }
10285 bigstack_reset(bigstack_mark3);
10286 if (bigstack_alloc_ui(marker_ct1, &marker_idx_to_uidx)) {
10287 goto epistasis_report_ret_NOMEM;
10288 }
10289 fill_idx_to_uidx(marker_exclude2, unfiltered_marker_ct, marker_ct2, marker_idx_to_uidx);
10290 marker_idx1 = marker_idx1_start;
10291 marker_uidx = next_unset_ul_unsafe(marker_exclude1, marker_uidx_base);
10292 if (marker_idx1) {
10293 marker_uidx = jump_forward_unset_unsafe(marker_exclude1, marker_uidx + 1, marker_idx1);
10294 }
10295 for (chrom_fo_idx = 0; chrom_fo_idx < chrom_ct; chrom_fo_idx++) {
10296 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
10297 if (marker_uidx >= chrom_end) {
10298 continue;
10299 }
10300 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
10301 wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, chrom_idx, g_textbuf));
10302 *wptr_start++ = ' ';
10303 for (; marker_uidx < chrom_end; marker_uidx++, next_unset_ul_ck(marker_exclude1, unfiltered_marker_ct, &marker_uidx), marker_idx1++) {
10304 uii = n_sig_cts[marker_idx1];
10305 ujj = fail_cts[marker_idx1];
10306 if (gap_cts) {
10307 ujj += gap_cts[marker_idx1];
10308 }
10309 tests_thrown_out += (uint64_t)ujj;
10310 // number of tests attempted in this run:
10311 // * if set1 and set2 are identical, there are
10312 // marker_ct2 - 1 - marker_idx1_start cells between the row and the
10313 // same-index column
10314 // * otherwise, gap_cts[] counted the number of skipped cells
10315 if (marker_idx1 < marker_idx1_end) {
10316 if (is_triangular) {
10317 ujj = marker_ct2 - 1 - marker_idx1_start - ujj;
10318 } else {
10319 ujj = marker_ct2 - ujj;
10320 }
10321 } else {
10322 // --parallel bugfix
10323 ujj = job_size - ujj;
10324 }
10325 wptr = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx * max_marker_id_len]), wptr_start);
10326 wptr = memseta(wptr, 32, 3);
10327 wptr = uint32toa_w10(uii, wptr);
10328 wptr = memseta(wptr, 32, 3);
10329 wptr = uint32toa_w10x(ujj, ' ', wptr);
10330 if (parallel_tot == 1) {
10331 wptr = dtoa_g_wxp4x(((double)((int32_t)uii)) / ((double)((int32_t)ujj)), 12, ' ', wptr);
10332 }
10333 if (ujj) {
10334 if (parallel_tot == 1) {
10335 // or cat mode
10336 wptr = dtoa_g_wxp4x(best_chisq[marker_idx1], 12, ' ', wptr);
10337 } else {
10338 // greater precision for accurate merges
10339 wptr = dtoa_g_wxp8x(best_chisq[marker_idx1], 12, ' ', wptr);
10340 }
10341 uii = marker_idx_to_uidx[best_ids[marker_idx1]];
10342 wptr = width_force(4, wptr, chrom_name_write(chrom_info_ptr, get_variant_chrom(chrom_info_ptr, uii), wptr));
10343 *wptr++ = ' ';
10344 wptr = fw_strcpy(plink_maxsnp, &(marker_ids[uii * max_marker_id_len]), wptr);
10345 } else {
10346 wptr = memcpya(wptr, " NA NA", 17);
10347 wptr = memseta(wptr, 32, plink_maxsnp - 1);
10348 wptr = memcpya(wptr, "NA", 2);
10349 }
10350 wptr = memcpya(wptr, " \n", 2);
10351 if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
10352 goto epistasis_report_ret_WRITE_FAIL;
10353 }
10354 }
10355 }
10356 if (is_triangular) {
10357 tests_thrown_out /= 2; // all fails double-counted in triangle case
10358 }
10359 fputs("\b\b", stdout);
10360 LOGPRINTF("done.\n");
10361 LOGPRINTFWW("%" PRIu64 " valid test%s performed, summary written to %s .\n", tests_expected - tests_thrown_out, (tests_expected - tests_thrown_out == 1)? "" : "s", outname);
10362
10363 while (0) {
10364 epistasis_report_ret_NOMEM:
10365 retval = RET_NOMEM;
10366 break;
10367 epistasis_report_ret_OPEN_FAIL:
10368 retval = RET_OPEN_FAIL;
10369 break;
10370 epistasis_report_ret_READ_FAIL:
10371 retval = RET_READ_FAIL;
10372 break;
10373 epistasis_report_ret_WRITE_FAIL:
10374 retval = RET_WRITE_FAIL;
10375 break;
10376 epistasis_report_ret_TOO_FEW_MARKERS:
10377 if (pheno_d) {
10378 if (is_triangular) {
10379 logerrprint("Error: --epistasis requires 2+ non-monomorphic autosomal diploid loci.\n");
10380 } else {
10381 logerrprint("Error: Each --epistasis set must contain at least one non-monomorphic autosomal\ndiploid site.\n");
10382 }
10383 } else {
10384 if (is_triangular) {
10385 logerrprint("Error: --{fast-}epistasis requires 2+ autosomal diploid loci not monomorphic in\neither cases or controls.\n");
10386 } else {
10387 logerrprint("Error: Each --{fast-}epistasis set must contain at least one autosomal diploid\nlocus not monomorphic in either cases or controls.\n");
10388 }
10389 }
10390 retval = RET_INVALID_CMDLINE;
10391 break;
10392 epistasis_report_ret_INVALID_CMDLINE_2:
10393 logerrprintb();
10394 epistasis_report_ret_INVALID_CMDLINE:
10395 retval = RET_INVALID_CMDLINE;
10396 break;
10397 epistasis_report_ret_THREAD_CREATE_FAIL:
10398 retval = RET_THREAD_CREATE_FAIL;
10399 break;
10400 }
10401 epistasis_report_ret_1:
10402 fclose_cond(outfile);
10403 bigstack_reset(bigstack_mark);
10404 return retval;
10405 }
10406
indep_pairphase(Ld_info * ldip,FILE * bedfile,uintptr_t bed_offset,uintptr_t marker_ct,uintptr_t unfiltered_marker_ct,uintptr_t * marker_exclude,uintptr_t * marker_reverse,char * marker_ids,uintptr_t max_marker_id_len,Chrom_info * chrom_info_ptr,double * set_allele_freqs,uint32_t * marker_pos,uintptr_t unfiltered_sample_ct,uintptr_t * founder_info,uintptr_t * sex_male,char * outname,char * outname_end,uint32_t hh_exists)10407 int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, Chrom_info* chrom_info_ptr, double* set_allele_freqs, uint32_t* marker_pos, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uintptr_t* sex_male, char* outname, char* outname_end, uint32_t hh_exists) {
10408 // Like ld_prune(), except that it computes the full 3x3 contingency table,
10409 // and is always in pairwise mode.
10410 unsigned char* bigstack_mark = g_bigstack_base;
10411 uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
10412 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
10413 uintptr_t unfiltered_sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(unfiltered_sample_ct);
10414 uintptr_t founder_ct = popcount_longs(founder_info, unfiltered_sample_ctv2 / 2);
10415 uintptr_t founder_ctl = BITCT_TO_WORDCT(founder_ct);
10416 uintptr_t founder_ctv3 = BITCT_TO_ALIGNED_WORDCT(founder_ct);
10417 // no actual case/control split here, but keep the variables the same to
10418 // minimize divergence from ld_report_dprime()
10419 uintptr_t founder_ctsplit = 3 * founder_ctv3;
10420 uintptr_t final_mask = get_final_mask(founder_ct);
10421 uintptr_t window_max = 1;
10422 uintptr_t* founder_include2 = nullptr;
10423 uintptr_t* founder_male_include2 = nullptr;
10424 uintptr_t* sex_male_collapsed = nullptr;
10425 uintptr_t* cur_geno1_male = nullptr;
10426 double prune_ld_thresh = ldip->prune_last_param * (1 + SMALL_EPSILON);
10427 uint32_t window_is_kb = (ldip->modifier / LD_PRUNE_KB_WINDOW) & 1;
10428 uint32_t ld_window_size = ldip->prune_window_size;
10429 uint32_t ld_window_incr = ldip->prune_window_incr;
10430 uint32_t tot_exclude_ct = 0;
10431 uint32_t at_least_one_prune = 0;
10432 uint32_t chrom_code_end = chrom_info_ptr->max_code + 1 + chrom_info_ptr->name_ct;
10433 int32_t retval = 0;
10434 uint32_t tot1[6];
10435 uint32_t counts[18];
10436 uintptr_t* loadbuf_raw;
10437 uintptr_t* loadbuf;
10438 uintptr_t* dummy_nm;
10439 uintptr_t* pruned_arr;
10440 uintptr_t* geno;
10441 uintptr_t* zmiss;
10442 uintptr_t* cur_geno1;
10443 uintptr_t* cur_geno2;
10444 uint32_t* live_indices;
10445 uint32_t* start_arr;
10446 uint32_t* cur_tots;
10447 uint32_t* cur_tot2;
10448 uintptr_t window_maxl;
10449 uintptr_t cur_exclude_ct;
10450 uintptr_t ulii;
10451 uintptr_t uljj;
10452 uintptr_t ulkk;
10453 double freq1x;
10454 double freq2x;
10455 double freqx1;
10456 double freqx2;
10457 double freq11;
10458 double freq11_expected;
10459 double rsq;
10460 uint32_t pct_thresh;
10461 uint32_t window_unfiltered_start;
10462 uint32_t window_unfiltered_end;
10463 uint32_t cur_window_size;
10464 uint32_t cur_chrom;
10465 uint32_t chrom_start;
10466 uint32_t chrom_end;
10467 uint32_t is_haploid;
10468 uint32_t is_x;
10469 uint32_t is_y;
10470 uint32_t prev_end;
10471 uint32_t nm_fixed;
10472 uint32_t cur_zmiss2;
10473 uint32_t pct;
10474 uint32_t uii;
10475 if (founder_ct < 2) {
10476 logerrprint("Warning: Skipping --indep-pairphase since there are less than two founders.\n(--make-founders may come in handy here.)\n");
10477 goto indep_pairphase_ret_1;
10478 }
10479 if (is_set(chrom_info_ptr->chrom_mask, 0)) {
10480 ulii = count_chrom_markers(chrom_info_ptr, marker_exclude, 0);
10481 if (chrom_info_ptr->zero_extra_chroms) {
10482 for (uii = chrom_info_ptr->max_code + 1; uii < chrom_code_end; uii++) {
10483 ulii += count_chrom_markers(chrom_info_ptr, marker_exclude, uii);
10484 }
10485 chrom_code_end = chrom_info_ptr->max_code + 1;
10486 }
10487 marker_ct -= ulii;
10488 LOGPRINTF("--indep-pairphase: Ignoring %" PRIuPTR " chromosome 0 variant%s.\n", ulii, (ulii == 1)? "" : "s");
10489 }
10490 if (marker_ct < 2) {
10491 logerrprint("Error: Too few variants for --indep-pairphase.\n");
10492 goto indep_pairphase_ret_INVALID_FORMAT;
10493 }
10494
10495 // no need to force founder_male_include2 initialization here
10496 if (alloc_collapsed_haploid_filters(founder_info, sex_male, unfiltered_sample_ct, founder_ct, hh_exists, 1, &founder_include2, &founder_male_include2)) {
10497 goto indep_pairphase_ret_NOMEM;
10498 }
10499
10500 if (window_is_kb) {
10501 // determine maximum number of markers that may need to be loaded at once
10502 for (cur_chrom = 1; cur_chrom < chrom_code_end; cur_chrom++) {
10503 if (is_set(chrom_info_ptr->chrom_mask, cur_chrom)) {
10504 window_max = chrom_window_max(marker_pos, marker_exclude, chrom_info_ptr, cur_chrom, 0x7fffffff, ld_window_size * 1000, window_max);
10505 }
10506 }
10507 }
10508
10509 window_unfiltered_start = ld_prune_next_valid_chrom_start(marker_exclude, 0, chrom_info_ptr, chrom_code_end, unfiltered_marker_ct);
10510
10511 if (bigstack_alloc_ul(unfiltered_marker_ctl, &pruned_arr)) {
10512 goto indep_pairphase_ret_NOMEM;
10513 }
10514
10515 memcpy(pruned_arr, marker_exclude, unfiltered_marker_ctl * sizeof(intptr_t));
10516
10517 if (!window_is_kb) {
10518 window_max = ld_window_size;
10519 }
10520 window_maxl = BITCT_TO_WORDCT(window_max);
10521 if (bigstack_alloc_ui(window_max, &live_indices) ||
10522 bigstack_alloc_ui(window_max, &start_arr) ||
10523 bigstack_alloc_ul(unfiltered_sample_ctv2, &loadbuf_raw) ||
10524 bigstack_alloc_ul(founder_ctl * 2, &loadbuf) ||
10525 bigstack_alloc_ul(founder_ctl, &dummy_nm) ||
10526 bigstack_alloc_ul(founder_ctsplit * window_max, &geno) ||
10527 bigstack_alloc_ul(window_maxl, &zmiss) ||
10528 bigstack_alloc_ui(window_max * 3, &cur_tots)) {
10529 goto indep_pairphase_ret_NOMEM;
10530 }
10531 loadbuf[founder_ctl * 2 - 2] = 0;
10532 loadbuf[founder_ctl * 2 - 1] = 0;
10533 fill_all_bits(founder_ct, dummy_nm);
10534 // bugfix: this loop must start at 0, not 1
10535 for (ulii = 0; ulii < window_max; ulii++) {
10536 geno[ulii * founder_ctsplit + founder_ctv3 - 1] = 0;
10537 geno[ulii * founder_ctsplit + 2 * founder_ctv3 - 1] = 0;
10538 geno[ulii * founder_ctsplit + founder_ctsplit - 1] = 0;
10539 }
10540 if ((chrom_info_ptr->xymt_codes[X_OFFSET] != -2) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[X_OFFSET])) {
10541 if (bigstack_alloc_ul(founder_ctl, &sex_male_collapsed) ||
10542 bigstack_alloc_ul(founder_ctsplit, &cur_geno1_male)) {
10543 goto indep_pairphase_ret_NOMEM;
10544 }
10545 copy_bitarr_subset(sex_male, founder_info, unfiltered_sample_ct, founder_ct, sex_male_collapsed);
10546 }
10547 do {
10548 prev_end = 0;
10549 ld_prune_start_chrom(window_is_kb, &cur_chrom, &chrom_end, window_unfiltered_start, live_indices, start_arr, &window_unfiltered_end, ld_window_size, &cur_window_size, unfiltered_marker_ct, pruned_arr, chrom_info_ptr, marker_pos, &is_haploid, &is_x, &is_y);
10550 cur_exclude_ct = 0;
10551 fill_ulong_zero(window_maxl, zmiss);
10552 if (cur_window_size > 1) {
10553 for (ulii = 0; ulii < (uintptr_t)cur_window_size; ulii++) {
10554 uljj = live_indices[ulii];
10555 if (fseeko(bedfile, bed_offset + (uljj * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
10556 goto indep_pairphase_ret_READ_FAIL;
10557 }
10558 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, uljj), bedfile, loadbuf_raw, loadbuf)) {
10559 goto indep_pairphase_ret_READ_FAIL;
10560 }
10561 if (is_haploid && hh_exists) {
10562 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)loadbuf);
10563 }
10564 cur_geno1 = &(geno[ulii * founder_ctsplit]);
10565 load_and_split3(nullptr, loadbuf, founder_ct, cur_geno1, dummy_nm, dummy_nm, founder_ctv3, 0, 0, 1, &ulkk);
10566 cur_tots[ulii * 3] = popcount_longs(cur_geno1, founder_ctv3);
10567 cur_tots[ulii * 3 + 1] = popcount_longs(&(cur_geno1[founder_ctv3]), founder_ctv3);
10568 cur_tots[ulii * 3 + 2] = popcount_longs(&(cur_geno1[2 * founder_ctv3]), founder_ctv3);
10569 if ((!cur_tots[ulii * 3 + 1]) && ((!cur_tots[ulii * 3]) || (!cur_tots[ulii * 3 + 2]))) {
10570 SET_BIT(uljj, pruned_arr);
10571 cur_exclude_ct++;
10572 } else if (ulkk == 3) {
10573 SET_BIT(ulii, zmiss);
10574 }
10575 }
10576 }
10577 pct = 1;
10578 chrom_start = get_chrom_start_vidx(chrom_info_ptr, cur_chrom);
10579 pct_thresh = window_unfiltered_start + ((uint64_t)pct * (chrom_end - chrom_start)) / 100;
10580 while ((window_unfiltered_start < chrom_end) || (cur_window_size > 1)) {
10581 if (cur_window_size > 1) {
10582 do {
10583 at_least_one_prune = 0;
10584 for (ulii = 0; ulii < cur_window_size - 1; ulii++) {
10585 if (IS_SET(pruned_arr, live_indices[ulii])) {
10586 continue;
10587 }
10588 uljj = ulii + 1;
10589 while (live_indices[uljj] < start_arr[ulii]) {
10590 if (++uljj == cur_window_size) {
10591 goto indep_pairphase_skip_marker;
10592 }
10593 }
10594 cur_geno1 = &(geno[ulii * founder_ctsplit]);
10595 memcpy(tot1, &(cur_tots[ulii * 3]), 3 * sizeof(int32_t));
10596 nm_fixed = is_set_ul(zmiss, ulii);
10597 if (is_x) {
10598 memcpy(cur_geno1_male, cur_geno1, founder_ctsplit * sizeof(intptr_t));
10599 bitvec_and(sex_male_collapsed, founder_ctv3, cur_geno1_male);
10600 tot1[3] = popcount_longs(cur_geno1_male, founder_ctv3);
10601 bitvec_and(sex_male_collapsed, founder_ctv3, &(cur_geno1_male[founder_ctv3]));
10602 tot1[4] = popcount_longs(&(cur_geno1_male[founder_ctv3]), founder_ctv3);
10603 bitvec_and(sex_male_collapsed, founder_ctv3, &(cur_geno1_male[2 * founder_ctv3]));
10604 tot1[5] = popcount_longs(&(cur_geno1_male[2 * founder_ctv3]), founder_ctv3);
10605 }
10606 for (; uljj < cur_window_size; uljj++) {
10607 if (IS_SET(pruned_arr, live_indices[uljj])) {
10608 continue;
10609 }
10610 cur_geno2 = &(geno[uljj * founder_ctsplit]);
10611 cur_tot2 = &(cur_tots[uljj * 3]);
10612 cur_zmiss2 = IS_SET(zmiss, uljj);
10613 if (nm_fixed) {
10614 two_locus_count_table_zmiss1(cur_geno1, cur_geno2, counts, founder_ctv3, cur_zmiss2);
10615 if (cur_zmiss2) {
10616 counts[2] = tot1[0] - counts[0] - counts[1];
10617 counts[5] = tot1[1] - counts[3] - counts[4];
10618 }
10619 counts[6] = cur_tot2[0] - counts[0] - counts[3];
10620 counts[7] = cur_tot2[1] - counts[1] - counts[4];
10621 counts[8] = cur_tot2[2] - counts[2] - counts[5];
10622 } else {
10623 two_locus_count_table(cur_geno1, cur_geno2, counts, founder_ctv3, cur_zmiss2);
10624 if (cur_zmiss2) {
10625 counts[2] = tot1[0] - counts[0] - counts[1];
10626 counts[5] = tot1[1] - counts[3] - counts[4];
10627 counts[8] = tot1[2] - counts[6] - counts[7];
10628 }
10629 }
10630 if (is_x) {
10631 two_locus_count_table(cur_geno1_male, cur_geno2, &(counts[9]), founder_ctv3, cur_zmiss2);
10632 if (cur_zmiss2) {
10633 counts[11] = tot1[3] - counts[9] - counts[10];
10634 counts[14] = tot1[4] - counts[12] - counts[13];
10635 counts[17] = tot1[5] - counts[15] - counts[16];
10636 }
10637 }
10638 if (!em_phase_hethet_nobase(counts, is_x, is_x, &freq1x, &freq2x, &freqx1, &freqx2, &freq11)) {
10639 freq11_expected = freqx1 * freq1x;
10640 rsq = freq11 - freq11_expected;
10641 rsq = rsq * rsq / (freq11_expected * freq2x * freqx2);
10642 if (rsq > prune_ld_thresh) {
10643 at_least_one_prune = 1;
10644 cur_exclude_ct++;
10645 // remove marker with lower MAF
10646 if (get_maf(set_allele_freqs[live_indices[ulii]]) < (1 - SMALL_EPSILON) * get_maf(set_allele_freqs[live_indices[uljj]])) {
10647 SET_BIT(live_indices[ulii], pruned_arr);
10648 } else {
10649 SET_BIT(live_indices[uljj], pruned_arr);
10650 uljj++;
10651 while (uljj < cur_window_size) {
10652 if (!IS_SET(pruned_arr, live_indices[uljj])) {
10653 break;
10654 }
10655 uljj++;
10656 }
10657 if (uljj < cur_window_size) {
10658 start_arr[ulii] = live_indices[uljj];
10659 }
10660 }
10661 break;
10662 }
10663 }
10664 }
10665 if (uljj == cur_window_size) {
10666 indep_pairphase_skip_marker:
10667 start_arr[ulii] = window_unfiltered_end;
10668 }
10669 }
10670 } while (at_least_one_prune);
10671 }
10672 for (uii = 0; uii < ld_window_incr; uii++) {
10673 if (window_unfiltered_start == chrom_end) {
10674 break;
10675 }
10676 window_unfiltered_start++;
10677 next_unset_ck(marker_exclude, chrom_end, &window_unfiltered_start);
10678 }
10679 if (window_unfiltered_start == chrom_end) {
10680 break;
10681 }
10682 if (window_unfiltered_start >= pct_thresh) {
10683 pct = ((window_unfiltered_start - chrom_start) * 100LLU) / (chrom_end - chrom_start);
10684 printf("\r%u%%", pct++);
10685 fflush(stdout);
10686 pct_thresh = chrom_start + (((uint64_t)pct * (chrom_end - chrom_start)) / 100);
10687 }
10688 uljj = 0;
10689 if (window_unfiltered_end < window_unfiltered_start) {
10690 window_unfiltered_end = window_unfiltered_start;
10691 }
10692 // copy back previously loaded/computed results
10693 while (live_indices[uljj] < window_unfiltered_start) {
10694 uljj++;
10695 if (uljj == cur_window_size) {
10696 break;
10697 }
10698 }
10699 for (ulii = 0; uljj < cur_window_size; uljj++) {
10700 if (IS_SET(pruned_arr, live_indices[uljj])) {
10701 continue;
10702 }
10703 memcpy(&(geno[ulii * founder_ctsplit]), &(geno[uljj * founder_ctsplit]), founder_ctsplit * sizeof(intptr_t));
10704 live_indices[ulii] = live_indices[uljj];
10705 start_arr[ulii] = start_arr[uljj];
10706 memcpy(&(cur_tots[ulii * 3]), &(cur_tots[uljj * 3]), 3 * sizeof(int32_t));
10707 // bugfix: forgot to update zmiss
10708 if (IS_SET(zmiss, uljj)) {
10709 SET_BIT(ulii, zmiss);
10710 } else {
10711 CLEAR_BIT(ulii, zmiss);
10712 }
10713 ulii++;
10714 }
10715 clear_bits(ulii, window_max, zmiss);
10716
10717 prev_end = ulii;
10718 cur_window_size = ulii;
10719 if (window_is_kb) {
10720 uljj = 0;
10721 ulkk = window_unfiltered_end;
10722 while ((window_unfiltered_end < chrom_end) && (marker_pos[window_unfiltered_end] <= marker_pos[window_unfiltered_start] + (1000 * ld_window_size))) {
10723 uljj++;
10724 window_unfiltered_end++;
10725 next_unset_ck(marker_exclude, chrom_end, &window_unfiltered_end);
10726 }
10727 window_unfiltered_end = ulkk;
10728 } else {
10729 uljj = ld_window_incr;
10730 }
10731 for (ulii = 0; ulii < uljj; ulii++) {
10732 if (window_unfiltered_end == chrom_end) {
10733 break;
10734 }
10735 live_indices[cur_window_size] = window_unfiltered_end;
10736 if (cur_window_size > prev_end) {
10737 start_arr[cur_window_size - 1] = window_unfiltered_end;
10738 }
10739 if (fseeko(bedfile, bed_offset + (window_unfiltered_end * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
10740 goto indep_pairphase_ret_READ_FAIL;
10741 }
10742 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, window_unfiltered_end), bedfile, loadbuf_raw, loadbuf)) {
10743 goto indep_pairphase_ret_READ_FAIL;
10744 }
10745 if (is_haploid && hh_exists) {
10746 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)loadbuf);
10747 }
10748 cur_geno1 = &(geno[cur_window_size * founder_ctsplit]);
10749 load_and_split3(nullptr, loadbuf, founder_ct, cur_geno1, dummy_nm, dummy_nm, founder_ctv3, 0, 0, 1, &ulkk);
10750 cur_tots[((uintptr_t)cur_window_size) * 3] = popcount_longs(cur_geno1, founder_ctv3);
10751 cur_tots[((uintptr_t)cur_window_size) * 3 + 1] = popcount_longs(&(cur_geno1[founder_ctv3]), founder_ctv3);
10752 cur_tots[((uintptr_t)cur_window_size) * 3 + 2] = popcount_longs(&(cur_geno1[2 * founder_ctv3]), founder_ctv3);
10753 if ((!cur_tots[((uintptr_t)cur_window_size) * 3 + 1]) && ((!cur_tots[((uintptr_t)cur_window_size) * 3]) || (!cur_tots[((uintptr_t)cur_window_size) * 3 + 2]))) {
10754 SET_BIT(window_unfiltered_end, pruned_arr);
10755 cur_exclude_ct++;
10756 } else if (ulkk == 3) {
10757 SET_BIT(cur_window_size, zmiss);
10758 }
10759 cur_window_size++;
10760 window_unfiltered_end++;
10761 next_unset_ck(marker_exclude, chrom_end, &window_unfiltered_end);
10762 }
10763 if (cur_window_size > prev_end) {
10764 start_arr[cur_window_size - 1] = window_unfiltered_end;
10765 }
10766 }
10767 putc_unlocked('\r', stdout);
10768 LOGPRINTF("Pruned %" PRIuPTR " variant%s from chromosome %u, leaving %" PRIuPTR ".\n", cur_exclude_ct, (cur_exclude_ct == 1)? "" : "s", cur_chrom, chrom_end - chrom_start - popcount_bit_idx(marker_exclude, chrom_start, chrom_end) - cur_exclude_ct);
10769 tot_exclude_ct += cur_exclude_ct;
10770
10771 // advance chromosomes as necessary
10772 window_unfiltered_start = ld_prune_next_valid_chrom_start(pruned_arr, window_unfiltered_start, chrom_info_ptr, chrom_code_end, unfiltered_marker_ct);
10773 } while (window_unfiltered_start < unfiltered_marker_ct);
10774
10775 LOGPRINTF("Pruning complete. %u of %" PRIuPTR " variants removed.\n", tot_exclude_ct, marker_ct);
10776 retval = ld_prune_write(outname, outname_end, marker_exclude, pruned_arr, marker_ids, max_marker_id_len, chrom_info_ptr, chrom_code_end);
10777 if (retval) {
10778 goto indep_pairphase_ret_1;
10779 }
10780
10781 while (0) {
10782 indep_pairphase_ret_NOMEM:
10783 retval = RET_NOMEM;
10784 break;
10785 indep_pairphase_ret_READ_FAIL:
10786 retval = RET_READ_FAIL;
10787 break;
10788 indep_pairphase_ret_INVALID_FORMAT:
10789 retval = RET_INVALID_FORMAT;
10790 break;
10791 }
10792 indep_pairphase_ret_1:
10793 bigstack_reset(bigstack_mark);
10794 return retval;
10795 }
10796
10797 typedef struct ll_epi_summary_struct {
10798 struct ll_epi_summary_struct* next;
10799 double best_chisq;
10800 char* best_chr_and_snp; // separate allocation; tab-delimited
10801 uint32_t n_sig;
10802 uint32_t n_tot;
10803 uint32_t id_len; // variant ID NOT null-terminated
10804 char strbuf[];
10805 } Ll_epi_summary;
10806
10807 // N.B. moves g_bigstack_base in word-size instead of cacheline increments
lle_alloc(char * chrom_id,uint32_t chrom_len,char * marker_id,uint32_t marker_id_len,uint32_t nsig,uint32_t ntot,double chisq)10808 Ll_epi_summary* lle_alloc(char* chrom_id, uint32_t chrom_len, char* marker_id, uint32_t marker_id_len, uint32_t nsig, uint32_t ntot, double chisq) {
10809 uintptr_t alloc_size = (sizeof(Ll_epi_summary) + chrom_len + marker_id_len + sizeof(intptr_t)) & (~(sizeof(intptr_t) - ONELU));
10810 Ll_epi_summary* newptr = (Ll_epi_summary*)g_bigstack_base;
10811 if (bigstack_left() < alloc_size) {
10812 return nullptr;
10813 }
10814 g_bigstack_base = &(g_bigstack_base[alloc_size]);
10815 newptr->next = nullptr;
10816 newptr->best_chisq = chisq;
10817 newptr->n_sig = nsig;
10818 newptr->n_tot = ntot;
10819 newptr->id_len = marker_id_len;
10820 memcpy(newptr->strbuf, marker_id, marker_id_len);
10821 memcpyx(&(newptr->strbuf[marker_id_len]), chrom_id, chrom_len, '\0');
10822 return newptr;
10823 }
10824
validate_epistasis_summary_header(char * bufptr)10825 int32_t validate_epistasis_summary_header(char* bufptr) {
10826 uint32_t slen = strlen_se(bufptr);
10827 int32_t retval = 0;
10828 if ((slen != 3) || memcmp(bufptr, "CHR", 3)) {
10829 return RET_INVALID_FORMAT;
10830 }
10831 bufptr = skip_initial_spaces(&(bufptr[3]));
10832 slen = strlen_se(bufptr);
10833 if ((slen != 3) || memcmp(bufptr, "SNP", 3)) {
10834 return RET_INVALID_FORMAT;
10835 }
10836 bufptr = skip_initial_spaces(&(bufptr[3]));
10837 slen = strlen_se(bufptr);
10838 if ((slen != 5) || memcmp(bufptr, "N_SIG", 5)) {
10839 return RET_INVALID_FORMAT;
10840 }
10841 bufptr = skip_initial_spaces(&(bufptr[5]));
10842 slen = strlen_se(bufptr);
10843 if ((slen != 5) || memcmp(bufptr, "N_TOT", 5)) {
10844 return RET_INVALID_FORMAT;
10845 }
10846 bufptr = skip_initial_spaces(&(bufptr[5]));
10847 slen = strlen_se(bufptr);
10848 if (slen == 4) {
10849 if (memcmp(bufptr, "PROP", 4)) {
10850 return RET_INVALID_FORMAT;
10851 }
10852 retval = -1;
10853 bufptr = skip_initial_spaces(&(bufptr[4]));
10854 slen = strlen_se(bufptr);
10855 }
10856 if ((slen != 10) || memcmp(bufptr, "BEST_CHISQ", 10)) {
10857 return RET_INVALID_FORMAT;
10858 }
10859 bufptr = skip_initial_spaces(&(bufptr[10]));
10860 slen = strlen_se(bufptr);
10861 if ((slen != 8) || memcmp(bufptr, "BEST_CHR", 8)) {
10862 return RET_INVALID_FORMAT;
10863 }
10864 bufptr = skip_initial_spaces(&(bufptr[8]));
10865 slen = strlen_se(bufptr);
10866 if ((slen != 8) || memcmp(bufptr, "BEST_SNP", 8)) {
10867 return RET_INVALID_FORMAT;
10868 }
10869 bufptr = skip_initial_spaces(&(bufptr[8]));
10870 if (!is_eoln_kns(*bufptr)) {
10871 return RET_INVALID_FORMAT;
10872 }
10873 return retval;
10874 }
10875
epi_summary_merge(Epi_info * epi_ip,char * outname,char * outname_end)10876 int32_t epi_summary_merge(Epi_info* epi_ip, char* outname, char* outname_end) {
10877 unsigned char* bigstack_mark = g_bigstack_base;
10878 FILE* infile = nullptr;
10879 FILE* outfile = nullptr;
10880 char* inprefix = epi_ip->summary_merge_prefix;
10881 char* inprefix_end = (char*)memchr(inprefix, 0, FNAMESIZE);
10882 Ll_epi_summary* list_start = nullptr;
10883 // first .3 entry is later than first .2 entry, etc., so we can save
10884 // ourselves some linked list traversal time by starting the first-entry scan
10885 // after where the last one left off.
10886 Ll_epi_summary* last_start = nullptr;
10887 Ll_epi_summary** lle_pp = &list_start; // end-of-list pointer for first file
10888 uint32_t file_ct = epi_ip->summary_merge_ct;
10889 int32_t retval = 0;
10890 char* bufptr;
10891 char* bufptr2;
10892 char* bufptr3;
10893 char* bufptr4;
10894 char* id_ptr;
10895 char* nsig_ptr;
10896 char* ntot_ptr;
10897 char* best_chisq_ptr;
10898 char* best_chr_ptr;
10899 char* best_marker_ptr;
10900 Ll_epi_summary* lle_ptr; // traverser for remaining files
10901 uintptr_t line_idx;
10902 uintptr_t ulii;
10903 double cur_chisq;
10904 uint32_t plink_maxsnp;
10905 uint32_t file_idx;
10906 uint32_t chrom_len;
10907 uint32_t id_len;
10908 uint32_t is_first_entry;
10909 int32_t nsig;
10910 int32_t ntot;
10911 if (inprefix_end[-1] == '.') {
10912 inprefix_end--;
10913 }
10914 ulii = (uintptr_t)(inprefix_end - inprefix);
10915 if ((ulii >= 16) && (!memcmp(".summary", &(inprefix[ulii - 8]), 8))) {
10916 inprefix_end -= 8;
10917 ulii -= 8;
10918 }
10919 bufptr = &(inprefix[ulii - 2]);
10920 if (memcmp(".epi.", &(inprefix[ulii - 7]), 5) || (memcmp("cc", bufptr, 2) && memcmp("co", bufptr, 2) && memcmp("qt", bufptr, 2))) {
10921 LOGERRPRINTFWW("Error: Invalid --epistasis-summary-merge filename prefix '%s'. (*.epi.cc, *.epi.co, or *.epi.qt expected.)\n", inprefix);
10922 goto epi_summary_merge_ret_INVALID_CMDLINE;
10923 }
10924 inprefix_end = memcpya(inprefix_end, ".summary.", 9);
10925 memcpyx(outname_end, &(inprefix[ulii - 7]), 15, '\0');
10926 // Started out using a hash table, but on second thought, it's unnecessary
10927 // given the possibilities for distributed .summary files.
10928 // 1. ALL x ALL, SET x SET: First file lists all marker IDs in the final
10929 // order; first entry in remaining files should match an entry in the
10930 // middle and the rest match sequentially from there.
10931 // 2. SET1 x ALL, SET1 x SET2: No duplication whatsoever. Output will be
10932 // such that cat actually works, but asking users to conditionally use cat
10933 // would add confusion for little reason; instead we detect the telltale
10934 // "PROP" in the first file's header line and switch to cat.
10935
10936 g_textbuf[MAXLINELEN - 1] = ' ';
10937 memcpy(inprefix_end, "1", 2);
10938 if (fopen_checked(inprefix, "r", &infile)) {
10939 goto epi_summary_merge_ret_OPEN_FAIL;
10940 }
10941 retval = load_to_first_token(infile, MAXLINELEN, '\0', "--epistasis-summary-merge file", g_textbuf, &bufptr, &line_idx);
10942 if (retval) {
10943 goto epi_summary_merge_ret_1;
10944 }
10945 retval = validate_epistasis_summary_header(bufptr);
10946 if (retval) {
10947 if (retval == -1) {
10948 // switch to cat mode. meow.
10949 fclose_null(&infile);
10950 if (fopen_checked(outname, FOPEN_WB, &outfile)) {
10951 goto epi_summary_merge_ret_OPEN_FAIL;
10952 }
10953 for (file_idx = 1; file_idx <= file_ct; file_idx++) {
10954 uint32toa_x(file_idx, '\0', inprefix_end);
10955 if (fopen_checked(inprefix, FOPEN_RB, &infile)) {
10956 goto epi_summary_merge_ret_OPEN_FAIL;
10957 }
10958 while (1) {
10959 ulii = fread(g_textbuf, 1, MAXLINELEN, infile);
10960 if (!ulii) {
10961 break;
10962 }
10963 if (fwrite_checked(g_textbuf, ulii, outfile)) {
10964 goto epi_summary_merge_ret_WRITE_FAIL;
10965 }
10966 }
10967 if (fclose_null(&infile)) {
10968 goto epi_summary_merge_ret_READ_FAIL;
10969 }
10970 }
10971 retval = 0;
10972 goto epi_summary_merge_success;
10973 }
10974 goto epi_summary_merge_ret_INVALID_HEADER;
10975 }
10976 bufptr2 = token_endnn(bufptr);
10977 bufptr = skip_initial_spaces(bufptr2);
10978 plink_maxsnp = ((uintptr_t)(token_endnn(bufptr) - bufptr2)) - 1;
10979 while (fgets(g_textbuf, MAXLINELEN, infile)) {
10980 line_idx++;
10981 if (!g_textbuf[MAXLINELEN - 1]) {
10982 goto epi_summary_merge_ret_LONG_LINE;
10983 }
10984 bufptr = skip_initial_spaces(g_textbuf);
10985 if (is_eoln_kns(*bufptr)) {
10986 continue;
10987 }
10988 chrom_len = strlen_se(bufptr);
10989 id_ptr = skip_initial_spaces(&(bufptr[chrom_len]));
10990 id_len = strlen_se(id_ptr);
10991 nsig_ptr = skip_initial_spaces(&(id_ptr[id_len]));
10992 ntot_ptr = next_token(nsig_ptr);
10993 best_chisq_ptr = next_token(ntot_ptr);
10994 best_chr_ptr = next_token(best_chisq_ptr);
10995 if (no_more_tokens_kns(best_chr_ptr)) {
10996 goto epi_summary_merge_ret_MISSING_TOKENS;
10997 }
10998 if (scan_uint_icap(nsig_ptr, (uint32_t*)&nsig)) {
10999 goto epi_summary_merge_ret_INVALID_NSIG;
11000 }
11001 if (scan_uint_icap(ntot_ptr, (uint32_t*)&ntot)) {
11002 goto epi_summary_merge_ret_INVALID_NTOT;
11003 }
11004 if (ntot) {
11005 if (scan_double(best_chisq_ptr, &cur_chisq)) {
11006 goto epi_summary_merge_ret_INVALID_CHISQ;
11007 }
11008 } else {
11009 cur_chisq = 0;
11010 }
11011 *lle_pp = lle_alloc(bufptr, chrom_len, id_ptr, id_len, nsig, ntot, cur_chisq);
11012 if (!(*lle_pp)) {
11013 goto epi_summary_merge_ret_NOMEM;
11014 }
11015 chrom_len = strlen_se(best_chr_ptr);
11016 best_marker_ptr = skip_initial_spaces(&(best_chr_ptr[chrom_len]));
11017 id_len = strlen_se(best_marker_ptr);
11018 if (!id_len) {
11019 goto epi_summary_merge_ret_MISSING_TOKENS;
11020 }
11021 // throw in an extra word, to reduce the need for reallocation
11022 ulii = (chrom_len + id_len + 1 + 2 * sizeof(intptr_t)) & (~(sizeof(intptr_t) - 1));
11023 if (ulii > bigstack_left()) {
11024 goto epi_summary_merge_ret_NOMEM;
11025 }
11026 bufptr = (char*)g_bigstack_base;
11027 memcpyx(bufptr, best_chr_ptr, chrom_len, '\t');
11028 memcpy(&(bufptr[chrom_len + 1]), best_marker_ptr, id_len);
11029 // pad with nulls then tab-terminate, so we can find the buffer end later
11030 memset(&(bufptr[chrom_len + id_len + 1]), 0, ulii - chrom_len - id_len - 2);
11031 bufptr[ulii - 1] = '\t';
11032 (*lle_pp)->best_chr_and_snp = bufptr;
11033 lle_pp = &((*lle_pp)->next);
11034 g_bigstack_base = &(g_bigstack_base[ulii]);
11035 }
11036 if (fclose_null(&infile)) {
11037 goto epi_summary_merge_ret_READ_FAIL;
11038 }
11039 if (!list_start) {
11040 LOGPREPRINTFWW("Error: %s has no entries.\n", inprefix);
11041 goto epi_summary_merge_ret_INVALID_FORMAT_2;
11042 }
11043 last_start = list_start->next;
11044 for (file_idx = 2; file_idx <= file_ct; file_idx++) {
11045 uint32toa_x(file_idx, '\0', inprefix_end);
11046 if (fopen_checked(inprefix, "r", &infile)) {
11047 goto epi_summary_merge_ret_OPEN_FAIL;
11048 }
11049 retval = load_to_first_token(infile, MAXLINELEN, '\0', "--epistasis-summary-merge file", g_textbuf, &bufptr, &line_idx);
11050 if (retval) {
11051 goto epi_summary_merge_ret_1;
11052 }
11053 retval = validate_epistasis_summary_header(bufptr);
11054 if (retval) {
11055 goto epi_summary_merge_ret_INVALID_HEADER;
11056 }
11057 lle_ptr = last_start;
11058 is_first_entry = 1;
11059 while (fgets(g_textbuf, MAXLINELEN, infile)) {
11060 line_idx++;
11061 if (!g_textbuf[MAXLINELEN - 1]) {
11062 goto epi_summary_merge_ret_LONG_LINE;
11063 }
11064 bufptr = skip_initial_spaces(g_textbuf);
11065 if (is_eoln_kns(*bufptr)) {
11066 continue;
11067 }
11068 if (!lle_ptr) {
11069 LOGPREPRINTFWW("Error: More lines than expected in %s.\n", inprefix);
11070 goto epi_summary_merge_ret_INVALID_FORMAT_2;
11071 }
11072 chrom_len = strlen_se(bufptr);
11073 id_ptr = skip_initial_spaces(&(bufptr[chrom_len]));
11074 id_len = strlen_se(id_ptr);
11075 nsig_ptr = skip_initial_spaces(&(id_ptr[id_len]));
11076 ntot_ptr = next_token(nsig_ptr);
11077 best_chisq_ptr = next_token(ntot_ptr);
11078 best_chr_ptr = next_token(best_chisq_ptr);
11079 if (no_more_tokens_kns(best_chr_ptr)) {
11080 goto epi_summary_merge_ret_MISSING_TOKENS;
11081 }
11082 if (scan_uint_icap(nsig_ptr, (uint32_t*)&nsig)) {
11083 goto epi_summary_merge_ret_INVALID_NSIG;
11084 }
11085 if (scan_uint_icap(ntot_ptr, (uint32_t*)&ntot)) {
11086 goto epi_summary_merge_ret_INVALID_NTOT;
11087 }
11088 if (!is_first_entry) {
11089 if ((lle_ptr->id_len != id_len) || memcmp(lle_ptr->strbuf, id_ptr, id_len) || (strlen(&(lle_ptr->strbuf[id_len])) != chrom_len) || memcmp(&(lle_ptr->strbuf[id_len]), bufptr, chrom_len)) {
11090 goto epi_summary_merge_ret_MISMATCH;
11091 }
11092 } else {
11093 while (1) {
11094 if (!lle_ptr) {
11095 goto epi_summary_merge_ret_MISMATCH;
11096 }
11097 if ((lle_ptr->id_len == id_len) && (!memcmp(lle_ptr->strbuf, id_ptr, id_len))) {
11098 break;
11099 }
11100 lle_ptr = lle_ptr->next;
11101 }
11102 if ((strlen(&(lle_ptr->strbuf[id_len])) != chrom_len) || memcmp(&(lle_ptr->strbuf[id_len]), bufptr, chrom_len)) {
11103 goto epi_summary_merge_ret_MISMATCH;
11104 }
11105 last_start = lle_ptr->next;
11106 is_first_entry = 0;
11107 }
11108 if (ntot) {
11109 if (scan_double(best_chisq_ptr, &cur_chisq)) {
11110 goto epi_summary_merge_ret_INVALID_CHISQ;
11111 }
11112 lle_ptr->n_sig += nsig;
11113 lle_ptr->n_tot += ntot;
11114 if (cur_chisq > lle_ptr->best_chisq) {
11115 chrom_len = strlen_se(best_chr_ptr);
11116 best_marker_ptr = skip_initial_spaces(&(best_chr_ptr[chrom_len]));
11117 id_len = strlen_se(best_marker_ptr);
11118 if (!id_len) {
11119 goto epi_summary_merge_ret_MISSING_TOKENS;
11120 }
11121 lle_ptr->best_chisq = cur_chisq;
11122 bufptr = lle_ptr->best_chr_and_snp;
11123 bufptr2 = (char*)memchr(bufptr, '\t', MAXLINELEN);
11124 bufptr3 = (char*)memchr(++bufptr2, 0, MAXLINELEN);
11125 bufptr4 = (char*)memchr(bufptr3, '\t', MAXLINELEN);
11126 ulii = (uintptr_t)(bufptr4 - bufptr);
11127 if (ulii <= chrom_len + id_len + 1) {
11128 ulii = (chrom_len + id_len + 1 + sizeof(intptr_t)) & (~(sizeof(intptr_t) - 1));
11129 if (ulii > bigstack_left()) {
11130 goto epi_summary_merge_ret_NOMEM;
11131 }
11132 bufptr = (char*)g_bigstack_base;
11133 bufptr3 = &(bufptr[ulii - 1]);
11134 *bufptr3 = '\t';
11135 lle_ptr->best_chr_and_snp = bufptr;
11136 g_bigstack_base = &(g_bigstack_base[ulii]);
11137 }
11138 bufptr = memcpyax(bufptr, best_chr_ptr, chrom_len, '\t');
11139 bufptr = memcpya(bufptr, best_marker_ptr, id_len);
11140 if (bufptr < bufptr3) {
11141 memset(bufptr, 0, bufptr3 - bufptr);
11142 }
11143 }
11144 }
11145 lle_ptr = lle_ptr->next;
11146 }
11147 if (fclose_null(&infile)) {
11148 goto epi_summary_merge_ret_READ_FAIL;
11149 }
11150 }
11151
11152 if (fopen_checked(outname, "w", &outfile)) {
11153 goto epi_summary_merge_ret_OPEN_FAIL;
11154 }
11155 bufptr = memcpya(g_textbuf, " CHR ", 5);
11156 bufptr = fw_strcpyn(plink_maxsnp, 3, "SNP", bufptr);
11157 bufptr = strcpya(bufptr, " N_SIG N_TOT PROP BEST_CHISQ BEST_CHR ");
11158 bufptr = fw_strcpyn(plink_maxsnp, 8, "BEST_SNP", bufptr);
11159 bufptr = memcpya(bufptr, " \n", 2);
11160 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
11161 goto epi_summary_merge_ret_WRITE_FAIL;
11162 }
11163 lle_ptr = list_start;
11164 do {
11165 bufptr2 = lle_ptr->strbuf;
11166 id_len = lle_ptr->id_len;
11167 bufptr3 = &(bufptr2[id_len]);
11168 bufptr = fw_strcpy(4, bufptr3, g_textbuf);
11169 *bufptr++ = ' ';
11170 bufptr = fw_strcpyn(plink_maxsnp, id_len, bufptr2, bufptr);
11171 nsig = lle_ptr->n_sig;
11172 ntot = lle_ptr->n_tot;
11173 bufptr = memseta(bufptr, 32, 3);
11174 bufptr = uint32toa_w10(nsig, bufptr);
11175 bufptr = memseta(bufptr, 32, 3);
11176 bufptr = uint32toa_w10x(ntot, ' ', bufptr);
11177 bufptr = dtoa_g_wxp4x(((double)((int32_t)nsig)) / ((double)((int32_t)ntot)), 12, ' ', bufptr);
11178 bufptr = dtoa_g_wxp4x(lle_ptr->best_chisq, 12, ' ', bufptr);
11179 // no need to special-case ntot == 0, this code correctly copies 'NA'
11180 bufptr2 = lle_ptr->best_chr_and_snp;
11181 bufptr3 = (char*)memchr(bufptr2, '\t', MAXLINELEN);
11182 ulii = (uintptr_t)(bufptr3 - bufptr2);
11183 bufptr = fw_strcpyn(4, ulii, bufptr2, bufptr);
11184 *bufptr++ = ' ';
11185 bufptr = fw_strcpy(plink_maxsnp, &(bufptr3[1]), bufptr);
11186 bufptr = memcpya(bufptr, " \n", 2);
11187 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
11188 goto epi_summary_merge_ret_WRITE_FAIL;
11189 }
11190 lle_ptr = lle_ptr->next;
11191 } while (lle_ptr);
11192
11193 epi_summary_merge_success:
11194 if (fclose_null(&outfile)) {
11195 // just kidding! no success
11196 goto epi_summary_merge_ret_WRITE_FAIL;
11197 }
11198 LOGPRINTFWW("--epistasis-summary-merge: Merged summary written to %s .\n", outname);
11199 while (0) {
11200 epi_summary_merge_ret_NOMEM:
11201 retval = RET_NOMEM;
11202 break;
11203 epi_summary_merge_ret_OPEN_FAIL:
11204 retval = RET_OPEN_FAIL;
11205 break;
11206 epi_summary_merge_ret_READ_FAIL:
11207 retval = RET_READ_FAIL;
11208 break;
11209 epi_summary_merge_ret_WRITE_FAIL:
11210 retval = RET_WRITE_FAIL;
11211 break;
11212 epi_summary_merge_ret_INVALID_CMDLINE:
11213 retval = RET_INVALID_CMDLINE;
11214 break;
11215 epi_summary_merge_ret_MISMATCH:
11216 logerrprint("Error: --epistasis-summary-merge files were generated from different datasets\nand/or settings.\n");
11217 retval = RET_INVALID_FORMAT;
11218 break;
11219 epi_summary_merge_ret_INVALID_NSIG:
11220 LOGERRPRINTFWW("Error: Invalid N_SIG value on line %" PRIuPTR " of %s .\n", line_idx, inprefix);
11221 retval = RET_INVALID_FORMAT;
11222 break;
11223 epi_summary_merge_ret_INVALID_NTOT:
11224 LOGERRPRINTFWW("Error: Invalid N_SIG value on line %" PRIuPTR " of %s .\n", line_idx, inprefix);
11225 retval = RET_INVALID_FORMAT;
11226 break;
11227 epi_summary_merge_ret_INVALID_CHISQ:
11228 LOGERRPRINTFWW("Error: Invalid BEST_CHISQ value on line %" PRIuPTR " of %s .\n", line_idx, inprefix);
11229 retval = RET_INVALID_FORMAT;
11230 break;
11231 epi_summary_merge_ret_MISSING_TOKENS:
11232 LOGERRPRINTFWW("Error: Line %" PRIuPTR " of %s has fewer tokens than expected.\n", line_idx, inprefix);
11233 retval = RET_INVALID_FORMAT;
11234 break;
11235 epi_summary_merge_ret_LONG_LINE:
11236 LOGERRPRINTFWW("Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, inprefix);
11237 retval = RET_INVALID_FORMAT;
11238 break;
11239 epi_summary_merge_ret_INVALID_HEADER:
11240 LOGPREPRINTFWW(g_logbuf, "Error: Invalid --epistasis-summary-merge header in %s.\n", inprefix);
11241 epi_summary_merge_ret_INVALID_FORMAT_2:
11242 logerrprintb();
11243 retval = RET_INVALID_FORMAT;
11244 break;
11245 }
11246 epi_summary_merge_ret_1:
11247 fclose_cond(infile);
11248 fclose_cond(outfile);
11249 bigstack_reset(bigstack_mark);
11250 return retval;
11251 }
11252
test_mishap_write_line(FILE * outfile,char * wptr,uint32_t prev_alen,uint32_t next_alen,const char * prev_aptr,const char * next_aptr,double * total_cts,double * curhap_cts,double tot_recip,double output_min_p,char * flankstr,uint32_t flanklen)11253 void test_mishap_write_line(FILE* outfile, char* wptr, uint32_t prev_alen, uint32_t next_alen, const char* prev_aptr, const char* next_aptr, double* total_cts, double* curhap_cts, double tot_recip, double output_min_p, char* flankstr, uint32_t flanklen) {
11254 // total_cts[0] = caseN[0] + caseN[1]
11255 // total_cts[1] = controlN[0] + controlN[1]
11256 char* tbuf_cur = g_textbuf;
11257 double casen_1 = total_cts[0] - curhap_cts[0];
11258 double ctrln_1 = total_cts[1] - curhap_cts[1];
11259 uint32_t uii = prev_alen + next_alen;
11260 char* wptr2;
11261 double row_mult;
11262 double cur_expected;
11263 double dxx;
11264 double chisq;
11265 if (uii <= 10) {
11266 wptr = memseta(wptr, 32, 10 - uii);
11267 if (prev_alen) {
11268 wptr = memcpya(wptr, prev_aptr, prev_alen);
11269 }
11270 if (next_alen) {
11271 wptr = memcpya(wptr, next_aptr, next_alen);
11272 }
11273 } else {
11274 fwrite(g_textbuf, 1, (uintptr_t)(wptr - g_textbuf), outfile);
11275 if (prev_alen) {
11276 fputs(prev_aptr, outfile);
11277 }
11278 if (next_alen) {
11279 fputs(next_aptr, outfile);
11280 }
11281 tbuf_cur = wptr;
11282 }
11283 *wptr++ = ' ';
11284 if (total_cts[0] > 0.0) {
11285 wptr = dtoa_g_wxp3(curhap_cts[0] / total_cts[0], 8, wptr);
11286 } else {
11287 wptr = memcpya(wptr, " NA", 8);
11288 }
11289 *wptr++ = ' ';
11290 if (total_cts[1] > 0.0) {
11291 wptr = dtoa_g_wxp3(curhap_cts[1] / total_cts[1], 8, wptr);
11292 } else {
11293 wptr = memcpya(wptr, " NA", 8);
11294 }
11295 *wptr++ = ' ';
11296 wptr2 = dtoa_g(curhap_cts[0], wptr);
11297 *wptr2++ = '/';
11298 wptr2 = dtoa_g(curhap_cts[1], wptr2);
11299 wptr = width_force(20, wptr, wptr2);
11300 *wptr++ = ' ';
11301 wptr2 = dtoa_g(casen_1, wptr);
11302 *wptr2++ = '/';
11303 wptr2 = dtoa_g(ctrln_1, wptr2);
11304 wptr = width_force(20, wptr, wptr2);
11305 *wptr++ = ' ';
11306 if ((curhap_cts[0] > 0.0) && (curhap_cts[1] > 0.0) && (casen_1 > 0.0) && (ctrln_1 > 0.0)) {
11307 row_mult = (curhap_cts[0] + curhap_cts[1]) * tot_recip;
11308 cur_expected = row_mult * total_cts[0];
11309 dxx = curhap_cts[0] - cur_expected;
11310 chisq = dxx * dxx / cur_expected;
11311 cur_expected = row_mult * total_cts[1];
11312 dxx = curhap_cts[1] - cur_expected;
11313 chisq += dxx * dxx / cur_expected;
11314 row_mult = (total_cts[0] + total_cts[1]) * tot_recip - row_mult;
11315 cur_expected = row_mult * total_cts[0];
11316 dxx = casen_1 - cur_expected;
11317 chisq += dxx * dxx / cur_expected;
11318 cur_expected = row_mult * total_cts[1];
11319 dxx = ctrln_1 - cur_expected;
11320 chisq += dxx * dxx / cur_expected;
11321 wptr = dtoa_g_wxp3(chisq, 8, wptr);
11322 *wptr++ = ' ';
11323 dxx = chiprob_p(chisq, 1);
11324 wptr = dtoa_g_wxp3(MAXV(dxx, output_min_p), 8, wptr);
11325 } else {
11326 wptr = memcpya(wptr, " NA NA", 17);
11327 }
11328 wptr = memcpya(wptr, flankstr, flanklen);
11329 fwrite(tbuf_cur, 1, (uintptr_t)(wptr - tbuf_cur), outfile);
11330 }
11331
test_mishap(FILE * bedfile,uintptr_t bed_offset,char * outname,char * outname_end,double output_min_p,uintptr_t unfiltered_marker_ct,uintptr_t * marker_exclude,uintptr_t * marker_reverse,uintptr_t marker_ct,char * marker_ids,uintptr_t max_marker_id_len,uint32_t plink_maxsnp,char ** marker_allele_ptrs,double min_maf,Chrom_info * chrom_info_ptr,uintptr_t unfiltered_sample_ct,uintptr_t * sample_exclude,uintptr_t sample_ct)11332 int32_t test_mishap(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, double output_min_p, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, double min_maf, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct) {
11333 unsigned char* bigstack_mark = g_bigstack_base;
11334 FILE* outfile = nullptr;
11335 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
11336 uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
11337 uintptr_t sample_ctl2 = QUATERCT_TO_WORDCT(sample_ct);
11338 uintptr_t sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(sample_ct);
11339 uintptr_t final_mask = get_final_mask(sample_ct);
11340 char* tbuf2 = &(g_textbuf[MAXLINELEN]);
11341 char* wptr2 = nullptr;
11342 uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
11343 uint32_t inspected_ct = 0;
11344 uint32_t missing_ct_next = 0;
11345 uint32_t prev_a1len = 0;
11346 uint32_t prev_a2len = 0;
11347 int32_t retval = 0;
11348 // need following counts:
11349 // all 9 flanking hap combinations | current missing
11350 // [0]: prev = 00, next = 00
11351 // [1]: prev = 00, next = 10
11352 // [2]: prev = 00, next = 11
11353 // [3]: prev = 10, next = 00
11354 // ...
11355 // all 9 flanking hap combinations | current nonmissing [9..17]
11356 uint32_t counts[27];
11357 // [0]: central call missing, all haps clearing --maf (caseN[0] + caseN[1])
11358 // [1]: central call nonmissing, all haps clear --maf (ctrlN[0] + ctrlN[1])
11359 // [2k], k in 1..4: caseN[0] for current hap
11360 // [2k+1]: controlN[0] for current hap
11361 // note that all numbers are actually double raw counts
11362 double hap_ct_table[10];
11363 uintptr_t* loadbuf_raw;
11364 uintptr_t* loadbuf;
11365 uintptr_t* loadbuf_end;
11366 uintptr_t* prevsnp_ptr;
11367 uintptr_t* cursnp_ptr;
11368 uintptr_t* nextsnp_ptr;
11369 uintptr_t* maskbuf_mid;
11370 uintptr_t* maskbuf;
11371 char* wptr;
11372 uint32_t* uiptr;
11373 uintptr_t marker_uidx_prev;
11374 uintptr_t marker_uidx_cur;
11375 uintptr_t marker_uidx_next;
11376 double freq11;
11377 double tot_recip;
11378 double dxx;
11379 double dyy;
11380 double dzz;
11381 double dww;
11382 double orig_cmiss_tot;
11383 double orig_cnm_tot;
11384 uint32_t flanklen;
11385 uint32_t missing_ct_cur;
11386 uint32_t chrom_fo_idx;
11387 uint32_t chrom_idx;
11388 uint32_t chrom_end;
11389 uint32_t next_a1len;
11390 uint32_t next_a2len;
11391 uint32_t uii;
11392 uint32_t ujj;
11393 uint32_t ukk;
11394 uint32_t umm;
11395 if (is_set(chrom_info_ptr->haploid_mask, 1)) {
11396 logerrprint("Error: --test-mishap can only be used on diploid genomes.\n");
11397 goto test_mishap_ret_INVALID_CMDLINE;
11398 }
11399 if (sample_ct >= 0x40000000) {
11400 logerrprint("Error: --test-mishap does not support >= 2^30 samples.\n");
11401 goto test_mishap_ret_INVALID_CMDLINE;
11402 }
11403 if (bigstack_alloc_ul(unfiltered_sample_ctl2, &loadbuf_raw) ||
11404 bigstack_alloc_ul(sample_ctv2 * 3, &loadbuf) ||
11405 bigstack_alloc_ul(sample_ctv2, &maskbuf_mid) ||
11406 bigstack_alloc_ul(sample_ctv2, &maskbuf)) {
11407 goto test_mishap_ret_NOMEM;
11408 }
11409 loadbuf_raw[unfiltered_sample_ctl2 - 1] = 0;
11410 loadbuf[sample_ctv2 - 2] = 0;
11411 loadbuf[sample_ctv2 - 1] = 0;
11412 loadbuf[2 * sample_ctv2 - 2] = 0;
11413 loadbuf[2 * sample_ctv2 - 1] = 0;
11414 loadbuf[3 * sample_ctv2 - 2] = 0;
11415 loadbuf[3 * sample_ctv2 - 1] = 0;
11416 loadbuf_end = &(loadbuf[sample_ctv2 * 3]);
11417 tbuf2[0] = ' ';
11418 memcpy(outname_end, ".missing.hap", 13);
11419 if (fopen_checked(outname, "w", &outfile)) {
11420 goto test_mishap_ret_OPEN_FAIL;
11421 }
11422 sprintf(g_textbuf, "%%%us HAPLOTYPE F_0 F_1 M_H1 M_H2 CHISQ P FLANKING\n", plink_maxsnp);
11423 fprintf(outfile, g_textbuf, "SNP");
11424 min_maf *= 1 - SMALL_EPSILON;
11425 for (chrom_fo_idx = 0; chrom_fo_idx < chrom_ct; chrom_fo_idx++) {
11426 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
11427 if (is_set(chrom_info_ptr->haploid_mask, chrom_idx)) {
11428 continue;
11429 }
11430 marker_uidx_cur = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
11431 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
11432 marker_uidx_cur = next_unset_ul(marker_exclude, marker_uidx_cur, chrom_end);
11433 if (marker_uidx_cur == chrom_end) {
11434 continue;
11435 }
11436 marker_uidx_next = next_unset_ul(marker_exclude, marker_uidx_cur + 1, chrom_end);
11437 if (marker_uidx_next == chrom_end) {
11438 continue;
11439 }
11440 prevsnp_ptr = loadbuf;
11441 fill_ulong_zero(sample_ctl2, prevsnp_ptr);
11442 cursnp_ptr = &(loadbuf[sample_ctv2]);
11443 if (fseeko(bedfile, bed_offset + marker_uidx_cur * ((uint64_t)unfiltered_sample_ct4), SEEK_SET)) {
11444 goto test_mishap_ret_READ_FAIL;
11445 }
11446 if (load_and_collapse(unfiltered_sample_ct, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx_cur), bedfile, loadbuf_raw, cursnp_ptr)) {
11447 goto test_mishap_ret_READ_FAIL;
11448 }
11449 missing_ct_cur = count_01(cursnp_ptr, sample_ctl2);
11450 marker_uidx_prev = ~ZEROLU;
11451 for (; marker_uidx_cur < chrom_end; marker_uidx_prev = marker_uidx_cur, marker_uidx_cur = marker_uidx_next, prevsnp_ptr = cursnp_ptr, cursnp_ptr = nextsnp_ptr, missing_ct_cur = missing_ct_next, marker_uidx_next++) {
11452 nextsnp_ptr = &(cursnp_ptr[sample_ctv2]);
11453 if (nextsnp_ptr == loadbuf_end) {
11454 nextsnp_ptr = loadbuf;
11455 }
11456 if (marker_uidx_next < chrom_end) {
11457 if (IS_SET(marker_exclude, marker_uidx_next)) {
11458 marker_uidx_next = next_unset_ul(marker_exclude, marker_uidx_next, chrom_end);
11459 if (marker_uidx_next == chrom_end) {
11460 goto test_mishap_last_chrom_snp;
11461 }
11462 if (fseeko(bedfile, bed_offset + marker_uidx_next * ((uint64_t)unfiltered_sample_ct4), SEEK_SET)) {
11463 goto test_mishap_ret_READ_FAIL;
11464 }
11465 }
11466 if (load_and_collapse(unfiltered_sample_ct, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx_next), bedfile, loadbuf_raw, nextsnp_ptr)) {
11467 goto test_mishap_ret_READ_FAIL;
11468 }
11469 missing_ct_next = count_01(nextsnp_ptr, sample_ctl2);
11470 } else {
11471 test_mishap_last_chrom_snp:
11472 fill_ulong_zero(sample_ctl2, nextsnp_ptr);
11473 }
11474 if (missing_ct_cur < 5) {
11475 continue;
11476 }
11477 quatervec_copy_only_01(cursnp_ptr, unfiltered_sample_ct, maskbuf_mid);
11478 uiptr = counts;
11479 for (uii = 0; uii < 2; uii++) {
11480 if (uii) {
11481 quatervec_01_invert(unfiltered_sample_ct, maskbuf_mid);
11482 }
11483 for (ujj = 0; ujj < 3; ujj++) {
11484 vec_datamask(unfiltered_sample_ct, ujj + (ujj + 1) / 2, prevsnp_ptr, maskbuf_mid, maskbuf);
11485 ukk = popcount01_longs(maskbuf, sample_ctl2);
11486 genovec_3freq(nextsnp_ptr, maskbuf, sample_ctl2, &umm, &(uiptr[1]), &(uiptr[2]));
11487 uiptr[0] = ukk - umm - uiptr[1] - uiptr[2];
11488 uiptr = &(uiptr[3]);
11489 }
11490 }
11491 wptr = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx_cur * max_marker_id_len]), g_textbuf);
11492 *wptr++ = ' ';
11493 if (marker_uidx_prev != ~ZEROLU) {
11494 prev_a1len = strlen(marker_allele_ptrs[2 * marker_uidx_prev]);
11495 prev_a2len = strlen(marker_allele_ptrs[2 * marker_uidx_prev + 1]);
11496 wptr2 = strcpya(&(tbuf2[1]), &(marker_ids[marker_uidx_prev * max_marker_id_len]));
11497 }
11498 if (marker_uidx_next < chrom_end) {
11499 next_a1len = strlen(marker_allele_ptrs[2 * marker_uidx_next]);
11500 next_a2len = strlen(marker_allele_ptrs[2 * marker_uidx_next + 1]);
11501 if (marker_uidx_prev != ~ZEROLU) {
11502 hap_ct_table[0] = (int32_t)(2 * (counts[0] + counts[1] + counts[2] + counts[3] + counts[4] + counts[5] + counts[6] + counts[7] + counts[8]));
11503 hap_ct_table[1] = (int32_t)(2 * (counts[9] + counts[10] + counts[11] + counts[12] + counts[13] + counts[14] + counts[15] + counts[16] + counts[17]));
11504 tot_recip = hap_ct_table[0] + hap_ct_table[1];
11505 if (tot_recip == 0.0) {
11506 // minor change: skip markers with zero observations. (output
11507 // wouldn't match PLINK 1.07 anyway, due to EM phasing differences)
11508 continue;
11509 }
11510 orig_cmiss_tot = hap_ct_table[0];
11511 orig_cnm_tot = hap_ct_table[1];
11512 *wptr2++ = '|';
11513 wptr2 = strcpya(wptr2, &(marker_ids[marker_uidx_next * max_marker_id_len]));
11514 *wptr2++ = '\n';
11515 flanklen = (uintptr_t)(wptr2 - tbuf2);
11516 hap_ct_table[2] = (int32_t)(2 * counts[0] + counts[1] + counts[3]);
11517 hap_ct_table[3] = (int32_t)(2 * counts[9] + counts[10] + counts[12]);
11518 hap_ct_table[4] = (int32_t)(2 * counts[2] + counts[1] + counts[5]);
11519 hap_ct_table[5] = (int32_t)(2 * counts[11] + counts[10] + counts[14]);
11520 hap_ct_table[6] = (int32_t)(2 * counts[6] + counts[3] + counts[7]);
11521 hap_ct_table[7] = (int32_t)(2 * counts[15] + counts[12] + counts[16]);
11522 hap_ct_table[8] = (int32_t)(2 * counts[8] + counts[5] + counts[7]);
11523 hap_ct_table[9] = (int32_t)(2 * counts[17] + counts[14] + counts[16]);
11524 if (counts[4] + counts[13]) {
11525 for (uii = 0; uii < 9; uii++) {
11526 counts[18 + uii] = counts[uii] + counts[9 + uii];
11527 }
11528 // no need to check return value
11529 em_phase_hethet_nobase(&(counts[18]), 0, 0, &dxx, &dyy, &dzz, &dww, &freq11);
11530 // share of counts[4]/counts[13] which goes to 11 or 22 haplotype
11531 // (0.5 - dxx) is share which goes to 12/21 haps
11532 // (conveniently, there's a 0.5 and a 2 which cancel out here)
11533 dxx = (freq11 * tot_recip - (hap_ct_table[2] + hap_ct_table[3])) / ((double)((int32_t)(counts[4] + counts[13])));
11534 dyy = ((int32_t)counts[4]) * dxx;
11535 dzz = ((int32_t)counts[13]) * dxx;
11536 hap_ct_table[2] += dyy;
11537 hap_ct_table[3] += dzz;
11538 hap_ct_table[8] += dyy;
11539 hap_ct_table[9] += dzz;
11540 dxx = 1.0 - dxx;
11541 dyy = ((int32_t)counts[4]) * dxx;
11542 dzz = ((int32_t)counts[13]) * dxx;
11543 hap_ct_table[4] += dyy;
11544 hap_ct_table[5] += dzz;
11545 hap_ct_table[6] += dyy;
11546 hap_ct_table[7] += dzz;
11547 }
11548 dxx = min_maf * tot_recip;
11549 if (hap_ct_table[2] + hap_ct_table[3] < dxx) {
11550 hap_ct_table[0] -= hap_ct_table[2];
11551 hap_ct_table[1] -= hap_ct_table[3];
11552 tot_recip -= hap_ct_table[2] + hap_ct_table[3];
11553 }
11554 if (hap_ct_table[4] + hap_ct_table[5] < dxx) {
11555 hap_ct_table[0] -= hap_ct_table[4];
11556 hap_ct_table[1] -= hap_ct_table[5];
11557 tot_recip -= hap_ct_table[4] + hap_ct_table[5];
11558 }
11559 if (hap_ct_table[6] + hap_ct_table[7] < dxx) {
11560 hap_ct_table[0] -= hap_ct_table[6];
11561 hap_ct_table[1] -= hap_ct_table[7];
11562 tot_recip -= hap_ct_table[6] + hap_ct_table[7];
11563 }
11564 if (hap_ct_table[8] + hap_ct_table[9] < dxx) {
11565 hap_ct_table[0] -= hap_ct_table[8];
11566 hap_ct_table[1] -= hap_ct_table[9];
11567 tot_recip -= hap_ct_table[8] + hap_ct_table[9];
11568 }
11569 tot_recip = 1.0 / tot_recip;
11570 if (hap_ct_table[2] + hap_ct_table[3] >= dxx) {
11571 test_mishap_write_line(outfile, wptr, prev_a1len, next_a1len, marker_allele_ptrs[2 * marker_uidx_prev], marker_allele_ptrs[2 * marker_uidx_next], hap_ct_table, &(hap_ct_table[2]), tot_recip, output_min_p, tbuf2, flanklen);
11572 }
11573 if (hap_ct_table[6] + hap_ct_table[7] >= dxx) {
11574 test_mishap_write_line(outfile, wptr, prev_a2len, next_a1len, marker_allele_ptrs[2 * marker_uidx_prev + 1], marker_allele_ptrs[2 * marker_uidx_next], hap_ct_table, &(hap_ct_table[6]), tot_recip, output_min_p, tbuf2, flanklen);
11575 }
11576 if (hap_ct_table[4] + hap_ct_table[5] >= dxx) {
11577 test_mishap_write_line(outfile, wptr, prev_a1len, next_a2len, marker_allele_ptrs[2 * marker_uidx_prev], marker_allele_ptrs[2 * marker_uidx_next + 1], hap_ct_table, &(hap_ct_table[4]), tot_recip, output_min_p, tbuf2, flanklen);
11578 }
11579 if (hap_ct_table[8] + hap_ct_table[9] >= dxx) {
11580 test_mishap_write_line(outfile, wptr, prev_a2len, next_a2len, marker_allele_ptrs[2 * marker_uidx_prev + 1], marker_allele_ptrs[2 * marker_uidx_next + 1], hap_ct_table, &(hap_ct_table[8]), tot_recip, output_min_p, tbuf2, flanklen);
11581 }
11582 } else {
11583 hap_ct_table[0] = (int32_t)(2 * (counts[0] + counts[1] + counts[2]));
11584 hap_ct_table[1] = (int32_t)(2 * (counts[9] + counts[10] + counts[11]));
11585 tot_recip = hap_ct_table[0] + hap_ct_table[1];
11586 if (tot_recip == 0.0) {
11587 continue;
11588 }
11589 orig_cmiss_tot = hap_ct_table[0];
11590 orig_cnm_tot = hap_ct_table[1];
11591 wptr2 = strcpya(&(tbuf2[1]), &(marker_ids[marker_uidx_next * max_marker_id_len]));
11592 *wptr2++ = '\n';
11593 flanklen = (uintptr_t)(wptr2 - tbuf2);
11594 dxx = min_maf * tot_recip;
11595 hap_ct_table[2] = (int32_t)(counts[0] * 2 + counts[1]);
11596 hap_ct_table[3] = (int32_t)(counts[9] * 2 + counts[10]);
11597 hap_ct_table[4] = (int32_t)(counts[2] * 2 + counts[1]);
11598 hap_ct_table[5] = (int32_t)(counts[11] * 2 + counts[10]);
11599 if (hap_ct_table[4] + hap_ct_table[5] < dxx) {
11600 hap_ct_table[0] = hap_ct_table[2];
11601 hap_ct_table[1] = hap_ct_table[3];
11602 tot_recip = hap_ct_table[2] + hap_ct_table[3];
11603 } else if (hap_ct_table[2] + hap_ct_table[3] < dxx) {
11604 hap_ct_table[0] = hap_ct_table[4];
11605 hap_ct_table[1] = hap_ct_table[5];
11606 tot_recip = hap_ct_table[4] + hap_ct_table[5];
11607 }
11608 tot_recip = 1.0 / tot_recip;
11609 if (hap_ct_table[2] + hap_ct_table[3] >= dxx) {
11610 test_mishap_write_line(outfile, wptr, 0, next_a1len, nullptr, marker_allele_ptrs[2 * marker_uidx_next], hap_ct_table, &(hap_ct_table[2]), tot_recip, output_min_p, tbuf2, flanklen);
11611 }
11612 if (hap_ct_table[4] + hap_ct_table[5] >= dxx) {
11613 test_mishap_write_line(outfile, wptr, 0, next_a2len, nullptr, marker_allele_ptrs[2 * marker_uidx_next + 1], hap_ct_table, &(hap_ct_table[4]), tot_recip, output_min_p, tbuf2, flanklen);
11614 }
11615 }
11616 } else {
11617 hap_ct_table[0] = (int32_t)(2 * (counts[0] + counts[3] + counts[6]));
11618 hap_ct_table[1] = (int32_t)(2 * (counts[9] + counts[12] + counts[15]));
11619 tot_recip = hap_ct_table[0] + hap_ct_table[1];
11620 if (tot_recip == 0.0) {
11621 continue;
11622 }
11623 orig_cmiss_tot = hap_ct_table[0];
11624 orig_cnm_tot = hap_ct_table[1];
11625 *wptr2++ = '\n';
11626 flanklen = (uintptr_t)(wptr2 - tbuf2);
11627 dxx = min_maf * tot_recip;
11628 hap_ct_table[2] = (int32_t)(counts[0] * 2 + counts[3]);
11629 hap_ct_table[3] = (int32_t)(counts[9] * 2 + counts[12]);
11630 hap_ct_table[4] = (int32_t)(counts[6] * 2 + counts[3]);
11631 hap_ct_table[5] = (int32_t)(counts[15] * 2 + counts[12]);
11632 if (hap_ct_table[4] + hap_ct_table[5] < dxx) {
11633 hap_ct_table[0] = hap_ct_table[2];
11634 hap_ct_table[1] = hap_ct_table[3];
11635 tot_recip = hap_ct_table[2] + hap_ct_table[3];
11636 } else if (hap_ct_table[2] + hap_ct_table[3] < dxx) {
11637 hap_ct_table[0] = hap_ct_table[4];
11638 hap_ct_table[1] = hap_ct_table[5];
11639 tot_recip = hap_ct_table[4] + hap_ct_table[5];
11640 }
11641 tot_recip = 1.0 / tot_recip;
11642 if (hap_ct_table[2] + hap_ct_table[3] >= dxx) {
11643 test_mishap_write_line(outfile, wptr, prev_a1len, 0, marker_allele_ptrs[2 * marker_uidx_prev], nullptr, hap_ct_table, &(hap_ct_table[2]), tot_recip, output_min_p, tbuf2, flanklen);
11644 }
11645 if (hap_ct_table[4] + hap_ct_table[5] >= dxx) {
11646 test_mishap_write_line(outfile, wptr, prev_a2len, 0, marker_allele_ptrs[2 * marker_uidx_prev + 1], nullptr, hap_ct_table, &(hap_ct_table[4]), tot_recip, output_min_p, tbuf2, flanklen);
11647 }
11648 }
11649 hap_ct_table[0] = orig_cmiss_tot * 0.5;
11650 hap_ct_table[1] = orig_cnm_tot * 0.5;
11651 hap_ct_table[2] = (int32_t)(counts[1] + counts[3] + counts[4] + counts[5] + counts[7]);
11652 hap_ct_table[3] = (int32_t)(counts[10] + counts[12] + counts[13] + counts[14] + counts[16]);
11653 test_mishap_write_line(outfile, wptr, 6, 0, "HETERO", nullptr, hap_ct_table, &(hap_ct_table[2]), 1.0 / (hap_ct_table[0] + hap_ct_table[1]), output_min_p, tbuf2, flanklen);
11654 inspected_ct++;
11655 if (!(inspected_ct % 1000)) {
11656 printf("\r--test-mishap: %uk loci checked.", inspected_ct / 1000);
11657 fflush(stdout);
11658 }
11659 }
11660 }
11661
11662 if (fclose_null(&outfile)) {
11663 goto test_mishap_ret_WRITE_FAIL;
11664 }
11665 putc_unlocked('\r', stdout);
11666 if (inspected_ct < marker_ct) {
11667 LOGPRINTF("--test-mishap: %u loc%s checked (%" PRIuPTR " skipped).\n", inspected_ct, (inspected_ct == 1)? "us" : "i", marker_ct - inspected_ct);
11668 LOGPREPRINTFWW("Report written to %s .\n", outname);
11669 } else {
11670 LOGPREPRINTFWW("--test-mishap: %u loc%s checked, report written to %s .\n", inspected_ct, (inspected_ct == 1)? "us" : "i", outname);
11671 }
11672 logprintb();
11673
11674 while (0) {
11675 test_mishap_ret_NOMEM:
11676 retval = RET_NOMEM;
11677 break;
11678 test_mishap_ret_OPEN_FAIL:
11679 retval = RET_OPEN_FAIL;
11680 break;
11681 test_mishap_ret_READ_FAIL:
11682 retval = RET_READ_FAIL;
11683 break;
11684 test_mishap_ret_WRITE_FAIL:
11685 retval = RET_WRITE_FAIL;
11686 break;
11687 test_mishap_ret_INVALID_CMDLINE:
11688 retval = RET_WRITE_FAIL;
11689 break;
11690 }
11691 fclose_cond(outfile);
11692 bigstack_reset(bigstack_mark);
11693 return retval;
11694 }
11695
11696 static uintptr_t* g_ld_load2_bitfield;
11697 static uintptr_t* g_ld_result_bitfield;
11698
ld_map_thread(void * arg)11699 THREAD_RET_TYPE ld_map_thread(void* arg) {
11700 uintptr_t tidx = (uintptr_t)arg;
11701 uint32_t thread_ct = g_ld_thread_ct;
11702 // er, this use of "ctv" is nonstandard, probably want to fix this later
11703 uintptr_t marker_ctv = ((g_ld_marker_ct + 127) / 128) * (128 / BITCT);
11704 uintptr_t idx1_offset = g_ld_block_idx1;
11705 uintptr_t block_idx1_start = (tidx * g_ld_idx1_block_size) / thread_ct;
11706 uintptr_t block_idx1_end = ((tidx + 1) * g_ld_idx1_block_size) / thread_ct;
11707 uintptr_t founder_ct = g_ld_founder_ct;
11708 uintptr_t founder_ctwd = founder_ct / BITCT2;
11709 uintptr_t founder_ctwd12 = founder_ctwd / 12;
11710 uintptr_t founder_ctwd12_rem = founder_ctwd - (12 * founder_ctwd12);
11711 uintptr_t lshift_last = 2 * ((0x7fffffc0 - founder_ct) % BITCT2);
11712 uintptr_t founder_ct_192_long = g_ld_founder_ct_192_long;
11713 uintptr_t* geno1 = g_ld_geno1;
11714 uintptr_t* geno_masks1 = g_ld_geno_masks1;
11715 uint32_t* missing_cts1 = g_ld_missing_cts1;
11716 uint32_t founder_ct_mld_m1 = g_ld_founder_ct_mld_m1;
11717 uint32_t founder_ct_mld_rem = g_ld_founder_ct_mld_rem;
11718 uintptr_t* load2_bitfield = g_ld_load2_bitfield;
11719 uintptr_t* result_bitfield = g_ld_result_bitfield;
11720 double r2_thresh = g_ld_window_r2;
11721 int32_t dp_result[5];
11722 uintptr_t* geno_fixed_vec_ptr;
11723 uintptr_t* geno_var_vec_ptr;
11724 uintptr_t* mask_fixed_vec_ptr;
11725 uintptr_t* mask_var_vec_ptr;
11726 uintptr_t* geno2;
11727 uintptr_t* geno_masks2;
11728 uintptr_t* rb_cur;
11729 uint32_t* missing_cts2;
11730 uintptr_t block_idx1;
11731 uintptr_t block_idx2;
11732 double non_missing_ctd;
11733 double cov12;
11734 double dxx;
11735 double dyy;
11736 uint32_t marker_idx2_start;
11737 uint32_t marker_idx2;
11738 uint32_t marker_idx2_end;
11739 uint32_t fixed_missing_ct;
11740 uint32_t fixed_non_missing_ct;
11741 uint32_t non_missing_ct;
11742 uint32_t uii;
11743 while (1) {
11744 marker_idx2_start = g_ld_idx2_block_start;
11745 marker_idx2_end = g_ld_marker_ctm8;
11746 geno2 = g_ld_geno2;
11747 geno_masks2 = g_ld_geno_masks2;
11748 missing_cts2 = g_ld_missing_cts2;
11749 rb_cur = &(result_bitfield[block_idx1_start * marker_ctv]);
11750 for (block_idx1 = block_idx1_start; block_idx1 < block_idx1_end; block_idx1++, rb_cur = &(rb_cur[marker_ctv])) {
11751 marker_idx2 = block_idx1 + idx1_offset + 1;
11752 if (marker_idx2 < marker_idx2_start) {
11753 marker_idx2 = marker_idx2_start;
11754 } else if (marker_idx2 >= marker_idx2_end) {
11755 break;
11756 }
11757 marker_idx2 = next_set(rb_cur, marker_idx2, marker_idx2_end);
11758 if (marker_idx2 == marker_idx2_end) {
11759 continue;
11760 }
11761 fixed_missing_ct = missing_cts1[block_idx1];
11762 fixed_non_missing_ct = founder_ct - fixed_missing_ct;
11763 geno_fixed_vec_ptr = &(geno1[block_idx1 * founder_ct_192_long]);
11764 mask_fixed_vec_ptr = &(geno_masks1[block_idx1 * founder_ct_192_long]);
11765 block_idx2 = popcount_bit_idx(load2_bitfield, marker_idx2_start, marker_idx2);
11766 while (1) {
11767 geno_var_vec_ptr = &(geno2[block_idx2 * founder_ct_192_long]);
11768 mask_var_vec_ptr = &(geno_masks2[block_idx2 * founder_ct_192_long]);
11769 non_missing_ct = fixed_non_missing_ct - missing_cts2[block_idx2];
11770 if (fixed_missing_ct && missing_cts2[block_idx2]) {
11771 non_missing_ct += ld_missing_ct_intersect(mask_var_vec_ptr, mask_fixed_vec_ptr, founder_ctwd12, founder_ctwd12_rem, lshift_last);
11772 }
11773 dp_result[0] = founder_ct;
11774 dp_result[1] = -fixed_non_missing_ct;
11775 dp_result[2] = missing_cts2[block_idx2] - founder_ct;
11776 dp_result[3] = dp_result[1];
11777 dp_result[4] = dp_result[2];
11778 ld_dot_prod(geno_var_vec_ptr, geno_fixed_vec_ptr, mask_var_vec_ptr, mask_fixed_vec_ptr, dp_result, founder_ct_mld_m1, founder_ct_mld_rem);
11779 non_missing_ctd = (double)((int32_t)non_missing_ct);
11780 dxx = dp_result[1];
11781 dyy = dp_result[2];
11782 cov12 = dp_result[0] * non_missing_ctd - dxx * dyy;
11783 if (cov12 * cov12 <= r2_thresh * ((dp_result[3] * non_missing_ctd + dxx * dxx) * (dp_result[4] * non_missing_ctd + dyy * dyy))) {
11784 clear_bit(marker_idx2, rb_cur);
11785 }
11786 uii = marker_idx2++;
11787 if (is_set(rb_cur, marker_idx2)) {
11788 if (marker_idx2 == marker_idx2_end) {
11789 break;
11790 }
11791 block_idx2++;
11792 } else {
11793 marker_idx2 = next_set(rb_cur, marker_idx2, marker_idx2_end);
11794 if (marker_idx2 == marker_idx2_end) {
11795 break;
11796 }
11797 block_idx2 += popcount_bit_idx(load2_bitfield, uii, marker_idx2);
11798 }
11799 }
11800 }
11801 if ((!tidx) || g_is_last_thread_block) {
11802 THREAD_RETURN;
11803 }
11804 THREAD_BLOCK_FINISH(tidx);
11805 }
11806 }
11807
construct_ld_map(pthread_t * threads,FILE * bedfile,uintptr_t bed_offset,uintptr_t * marker_exclude,uintptr_t marker_ct,uintptr_t * marker_reverse,uint32_t * marker_idx_to_uidx,uintptr_t unfiltered_sample_ct,uintptr_t * founder_pnm,Set_info * sip,uintptr_t * set_incl,uintptr_t set_ct,uint32_t ** setdefs,char * outname,char * outname_end,char * marker_ids,uintptr_t max_marker_id_len,uintptr_t * sex_male,Chrom_info * chrom_info_ptr,uint32_t ignore_x,uint32_t hh_exists,uint32_t *** ld_map_ptr)11808 int32_t construct_ld_map(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uintptr_t* marker_exclude, uintptr_t marker_ct, uintptr_t* marker_reverse, uint32_t* marker_idx_to_uidx, uintptr_t unfiltered_sample_ct, uintptr_t* founder_pnm, Set_info* sip, uintptr_t* set_incl, uintptr_t set_ct, uint32_t** setdefs, char* outname, char* outname_end, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t* sex_male, Chrom_info* chrom_info_ptr, uint32_t ignore_x, uint32_t hh_exists, uint32_t*** ld_map_ptr) {
11809 // Takes a bunch of set definitions, and determines which pairs of same-set
11810 // markers reach/exceed the --set-r2 threshold, saving them (in setdef
11811 // format) to a newly stack-allocated ld_map[].
11812 // If --set-r2 write was specified, the map's contents are written to {output
11813 // prefix}.ldset.
11814 // Note that, when very large set(s) are present, and there's a moderate
11815 // amount of "random" long-range LD, the memory requirement may be huge.
11816 FILE* outfile = nullptr;
11817 unsigned char* bigstack_end_mark = g_bigstack_end;
11818 uintptr_t marker_ctv = ((marker_ct + 127) / 128) * (128 / BITCT);
11819 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
11820 uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
11821 uintptr_t max_set_id_len = sip->max_name_len;
11822 uintptr_t founder_ct = popcount_longs(founder_pnm, unfiltered_sample_ctl);
11823 uintptr_t founder_ctl = BITCT_TO_WORDCT(founder_ct);
11824 uintptr_t founder_ctv2 = founder_ctl * 2;
11825 uintptr_t founder_ct_mld = (founder_ct + MULTIPLEX_LD - 1) / MULTIPLEX_LD;
11826 uint32_t founder_ct_mld_m1 = ((uint32_t)founder_ct_mld) - 1;
11827 #ifdef __LP64__
11828 uintptr_t founder_ct_mld_rem = (MULTIPLEX_LD / 192) - (founder_ct_mld * MULTIPLEX_LD - founder_ct) / 192;
11829 #else
11830 uintptr_t founder_ct_mld_rem = (MULTIPLEX_LD / 48) - (founder_ct_mld * MULTIPLEX_LD - founder_ct) / 48;
11831 #endif
11832 uintptr_t founder_ct_192_long = founder_ct_mld_m1 * (MULTIPLEX_LD / BITCT2) + founder_ct_mld_rem * (192 / BITCT2);
11833 uintptr_t final_mask = get_final_mask(founder_ct);
11834 uint32_t founder_trail_ct = founder_ct_192_long - founder_ctl * 2;
11835 uint32_t marker_idx = 0;
11836 uint32_t chrom_fo_idx = 0;
11837 uint32_t chrom_idx = 0;
11838 uint32_t is_haploid = 0;
11839 uint32_t is_x = 0;
11840 uint32_t is_y = 0;
11841 uint32_t range_end = 0;
11842 int32_t retval = 0;
11843 char charbuf[8];
11844 uintptr_t* loadbuf;
11845 uintptr_t* load2_bitfield;
11846 uintptr_t* founder_include2;
11847 uintptr_t* founder_male_include2;
11848 uintptr_t* tmp_set_bitfield;
11849 uintptr_t* geno1;
11850 uintptr_t* geno_masks1;
11851 uintptr_t* geno2;
11852 uintptr_t* geno_masks2;
11853 uintptr_t* result_bitfield;
11854 uintptr_t* rb_ptr;
11855 uintptr_t* loadbuf_ptr;
11856 uint32_t** ld_map;
11857 uint32_t* cur_setdef;
11858 uint32_t* cur_setdef2;
11859 char* sptr;
11860 char* wptr_start;
11861 char* wptr;
11862 uintptr_t memreq1;
11863 uintptr_t memreq2;
11864 uintptr_t minmem;
11865 uintptr_t idx1_block_size;
11866 uintptr_t idx2_block_size;
11867 uintptr_t cur_idx2_block_size;
11868 uintptr_t firstw;
11869 uintptr_t wlen;
11870 uintptr_t marker_uidx;
11871 uintptr_t marker_uidx2;
11872 uintptr_t ulii;
11873 uintptr_t uljj;
11874 uint32_t thread_ct;
11875 uint32_t chrom_end;
11876 uint32_t set_idx;
11877 uint32_t set_uidx;
11878 uint32_t idx1_block_end;
11879 uint32_t marker_idx2;
11880 uint32_t load_idx2_tot;
11881 uint32_t marker_load_idx2;
11882 uint32_t block_idx1;
11883 uint32_t block_idx2;
11884 uint32_t setdef_incr_aux;
11885 uint32_t setdef_incr_aux2;
11886 uint32_t is_last_block;
11887 uint32_t range_start;
11888 uint32_t uii;
11889 if (!founder_ct) {
11890 logerrprint("Error: Cannot construct LD map, since there are no founders with nonmissing\nphenotypes. (--make-founders may come in handy here.)\n");
11891 goto construct_ld_map_ret_INVALID_CMDLINE;
11892 }
11893 ld_map = (uint32_t**)bigstack_alloc(marker_ct * sizeof(intptr_t));
11894 if (!ld_map) {
11895 goto construct_ld_map_ret_NOMEM;
11896 }
11897 *ld_map_ptr = ld_map;
11898 // To avoid too much back-and-forth disk seeking for large datasets, we
11899 // construct the LD map in blocks, using similar logic to the --r/--r2 and
11900 // --fast-epistasis computations.
11901 // 1. bigstack_end_alloc space for main window markers' raw data, bitfields
11902 // for them listing intersecting markers in front (i.e. we only look at
11903 // the upper right triangle of the LD matrix), and another union bitfield.
11904 // Break the union into secondary windows, and for each secondary window:
11905 // a. bigstack_end_alloc secondary window markers' raw data
11906 // b. perform multithreaded LD calculations, saving results via in-place
11907 // clearing of the first markers' bitfields
11908 // Memory requirement per main window marker is:
11909 // 96 bytes per 192 founders for raw data (rounded up)
11910 // 32 bytes per 128 filtered markers (rounded up), for the results (16
11911 // working, 16 final)
11912 // 4 bytes for missing_ct
11913 // 16 extra bytes to ensure enough setdef compression workspace
11914 // Memory req. per secondary window marker is 4 + 96 bytes/192 founders.
11915 // To reduce false sharing risk, each thread is assigned at least 4
11916 // markers.
11917 // 2. populate the bottom left triangle of the result matrix by referring to
11918 // earlier results
11919 // 3. save final results for each marker in compressed setdef format at the
11920 // current workspace bottom
11921 // 4. dump .ldset file if necessary
11922 loadbuf = (uintptr_t*)bigstack_end_alloc(unfiltered_sample_ct4);
11923 if (!loadbuf) {
11924 // separate since unfiltered_sample_ct4 is a byte, not word, count
11925 goto construct_ld_map_ret_NOMEM;
11926 }
11927 if (bigstack_end_alloc_ul(marker_ctv, &load2_bitfield) ||
11928 bigstack_end_alloc_ul(marker_ctv, &tmp_set_bitfield) ||
11929 bigstack_end_alloc_ul(founder_ctv2, &founder_include2) ||
11930 bigstack_end_alloc_ul(founder_ctv2, &founder_male_include2)) {
11931 goto construct_ld_map_ret_NOMEM;
11932 }
11933 // bugfix: last word might not be initialized by unpack_set(). Also
11934 // initialize second-to-last word to defend against an unpack_set()
11935 // implementation change.
11936 #ifndef __LP64__
11937 // oh, this also matters in 32-bit case
11938 tmp_set_bitfield[marker_ctv - 4] = 0;
11939 tmp_set_bitfield[marker_ctv - 3] = 0;
11940 #endif
11941 tmp_set_bitfield[marker_ctv - 2] = 0;
11942 tmp_set_bitfield[marker_ctv - 1] = 0;
11943 g_ld_load2_bitfield = load2_bitfield;
11944 alloc_collapsed_haploid_filters(founder_pnm, sex_male, unfiltered_sample_ct, founder_ct, XMHH_EXISTS | hh_exists, 1, &founder_include2, &founder_male_include2);
11945 memreq2 = founder_ct_192_long * sizeof(intptr_t) * 2 + 4;
11946
11947 // this guarantees enough room for save_set_bitfield() worst case
11948 memreq1 = memreq2 + marker_ctv * sizeof(intptr_t) * 2 + 16;
11949
11950 minmem = memreq2 * BITCT;
11951 if (minmem < memreq1 * 4) {
11952 minmem = memreq1 * 4;
11953 }
11954 g_ld_marker_ct = marker_ct;
11955 g_ld_founder_ct = founder_ct;
11956 g_ld_founder_ct_192_long = founder_ct_192_long;
11957 g_ld_founder_ct_mld_m1 = founder_ct_mld_m1;
11958 g_ld_founder_ct_mld_rem = founder_ct_mld_rem;
11959 g_ld_window_r2 = sip->set_r2 * (1 - SMALL_EPSILON);
11960 do {
11961 ulii = bigstack_left() / 2;
11962 if (ulii < minmem) {
11963 goto construct_ld_map_ret_NOMEM;
11964 }
11965 idx1_block_size = (ulii / memreq1) & (~(3 * ONELU));
11966 if (idx1_block_size > marker_ct - marker_idx) {
11967 idx1_block_size = marker_ct - marker_idx;
11968 }
11969 thread_ct = g_thread_ct;
11970 if (thread_ct > idx1_block_size / 4) {
11971 thread_ct = idx1_block_size / 4;
11972 if (!thread_ct) {
11973 thread_ct = 1;
11974 }
11975 }
11976 g_ld_thread_ct = thread_ct;
11977 idx2_block_size = (ulii / memreq2) & (~(BITCT - ONELU));
11978 if (idx2_block_size > marker_ct) {
11979 idx2_block_size = marker_ct;
11980 }
11981 g_ld_block_idx1 = marker_idx;
11982 g_ld_idx1_block_size = idx1_block_size;
11983 bigstack_end_alloc_ul(idx1_block_size * founder_ct_192_long, &geno1);
11984 bigstack_end_alloc_ul(idx1_block_size * founder_ct_192_long, &geno_masks1);
11985 bigstack_end_alloc_ui(idx1_block_size, &g_ld_missing_cts1);
11986 bigstack_end_alloc_ul(idx2_block_size * founder_ct_192_long, &geno2);
11987 bigstack_end_alloc_ul(idx2_block_size * founder_ct_192_long, &geno_masks2);
11988 bigstack_end_alloc_ui(idx2_block_size, &g_ld_missing_cts2);
11989 bigstack_end_alloc_ul(idx1_block_size * marker_ctv, &result_bitfield);
11990 uljj = founder_trail_ct + 2;
11991 for (ulii = 1; ulii <= idx1_block_size; ulii++) {
11992 fill_ulong_zero(uljj, &(geno1[ulii * founder_ct_192_long - uljj]));
11993 fill_ulong_zero(uljj, &(geno_masks1[ulii * founder_ct_192_long - uljj]));
11994 }
11995 for (ulii = 1; ulii <= idx2_block_size; ulii++) {
11996 fill_ulong_zero(uljj, &(geno2[ulii * founder_ct_192_long - uljj]));
11997 fill_ulong_zero(uljj, &(geno_masks2[ulii * founder_ct_192_long - uljj]));
11998 }
11999 fill_ulong_zero(idx1_block_size * marker_ctv, result_bitfield);
12000 g_ld_geno1 = geno1;
12001 g_ld_geno_masks1 = geno_masks1;
12002 g_ld_geno2 = geno2;
12003 g_ld_geno_masks2 = geno_masks2;
12004 g_ld_result_bitfield = result_bitfield;
12005 idx1_block_end = marker_idx + idx1_block_size;
12006 fill_ulong_zero(marker_ctv, load2_bitfield);
12007 fill_ulong_zero(idx1_block_size * marker_ctv, result_bitfield);
12008 for (set_idx = 0; set_idx < set_ct; set_idx++) {
12009 cur_setdef = setdefs[set_idx];
12010 setdef_iter_init(cur_setdef, marker_ct, marker_idx, &marker_idx2, &setdef_incr_aux);
12011 if (setdef_iter(cur_setdef, &marker_idx2, &setdef_incr_aux) && (marker_idx2 < idx1_block_end)) {
12012 unpack_set(marker_ct, cur_setdef, tmp_set_bitfield);
12013 get_set_wrange_align(tmp_set_bitfield, marker_ctv, &firstw, &wlen);
12014 if (wlen) {
12015 uii = marker_idx2;
12016 do {
12017 bitvec_or(&(tmp_set_bitfield[firstw]), wlen, &(result_bitfield[((marker_idx2 - marker_idx) * marker_ctv + firstw)]));
12018 marker_idx2++;
12019 next_set_ck(tmp_set_bitfield, idx1_block_end, &marker_idx2);
12020 } while (marker_idx2 < idx1_block_end);
12021 // don't need to load the first intersecting member or anything
12022 // before it, since we're only traversing the upper right triangle
12023 wlen += firstw;
12024 #ifdef __LP64__
12025 firstw = 2 * (uii / 128);
12026 #else
12027 firstw = uii / 32;
12028 #endif
12029 clear_bits(0, uii + 1 - firstw * BITCT, &(tmp_set_bitfield[firstw]));
12030 bitvec_or(&(tmp_set_bitfield[firstw]), wlen - firstw, &(load2_bitfield[firstw]));
12031 }
12032 }
12033 }
12034 load_idx2_tot = popcount_longs(load2_bitfield, marker_ctv);
12035 if (!load_idx2_tot) {
12036 // no new r^2 computations to make at all!
12037 goto construct_ld_map_no_new;
12038 }
12039 marker_uidx = next_unset_unsafe(marker_exclude, 0);
12040 if (marker_idx) {
12041 marker_uidx = jump_forward_unset_unsafe(marker_exclude, marker_uidx + 1, marker_idx);
12042 }
12043 marker_uidx2 = marker_uidx;
12044 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
12045 goto construct_ld_map_ret_READ_FAIL;
12046 }
12047 chrom_end = 0;
12048 for (block_idx1 = 0; block_idx1 < idx1_block_size; marker_uidx++, block_idx1++) {
12049 if (IS_SET(marker_exclude, marker_uidx)) {
12050 marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx);
12051 if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
12052 goto construct_ld_map_ret_READ_FAIL;
12053 }
12054 }
12055 if (marker_uidx >= chrom_end) {
12056 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx);
12057 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
12058 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
12059 is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
12060 is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
12061 is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
12062 }
12063 ulii = block_idx1 * founder_ct_192_long;
12064 loadbuf_ptr = &(geno1[ulii]);
12065 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_pnm, final_mask, IS_SET(marker_reverse, marker_uidx), bedfile, loadbuf, loadbuf_ptr)) {
12066 goto construct_ld_map_ret_READ_FAIL;
12067 }
12068 if (is_haploid && hh_exists) {
12069 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)loadbuf_ptr);
12070 }
12071 ld_process_load2(loadbuf_ptr, &(geno_masks1[ulii]), &(g_ld_missing_cts1[block_idx1]), founder_ct, is_x && (!ignore_x), founder_male_include2);
12072 }
12073 chrom_end = 0;
12074 cur_idx2_block_size = idx2_block_size;
12075 marker_idx2 = next_set_unsafe(load2_bitfield, 0);
12076 marker_uidx2 = jump_forward_unset_unsafe(marker_exclude, marker_uidx2 + 1, marker_idx2 - marker_idx);
12077 marker_load_idx2 = 0;
12078 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
12079 goto construct_ld_map_ret_READ_FAIL;
12080 }
12081 do {
12082 if (cur_idx2_block_size > load_idx2_tot - marker_load_idx2) {
12083 cur_idx2_block_size = load_idx2_tot - marker_load_idx2;
12084 }
12085 g_ld_idx2_block_start = marker_idx2;
12086 block_idx2 = 0;
12087 while (1) {
12088 if (marker_uidx2 >= chrom_end) {
12089 chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
12090 chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
12091 chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
12092 is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
12093 is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
12094 is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
12095 }
12096 ulii = block_idx2 * founder_ct_192_long;
12097 loadbuf_ptr = &(geno2[ulii]);
12098 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_pnm, final_mask, IS_SET(marker_reverse, marker_uidx2), bedfile, loadbuf, loadbuf_ptr)) {
12099 goto construct_ld_map_ret_READ_FAIL;
12100 }
12101 if (is_haploid && hh_exists) {
12102 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)loadbuf_ptr);
12103 }
12104 ld_process_load2(loadbuf_ptr, &(geno_masks2[ulii]), &(g_ld_missing_cts2[block_idx2]), founder_ct, is_x && (!ignore_x), founder_male_include2);
12105 if (++block_idx2 == cur_idx2_block_size) {
12106 break;
12107 }
12108 uii = marker_idx2++;
12109 ulii = ++marker_uidx2;
12110 if (is_set(load2_bitfield, marker_idx2)) {
12111 next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx2);
12112 } else {
12113 marker_idx2 = next_set_unsafe(load2_bitfield, marker_idx2);
12114 marker_uidx2 = jump_forward_unset_unsafe(marker_exclude, marker_uidx2, marker_idx2 - uii);
12115 }
12116 if (ulii < marker_uidx2) {
12117 if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
12118 goto construct_ld_map_ret_READ_FAIL;
12119 }
12120 }
12121 }
12122 g_ld_marker_ctm8 = marker_idx2 + 1; // repurposed
12123 marker_load_idx2 += cur_idx2_block_size;
12124 is_last_block = (marker_load_idx2 == load_idx2_tot);
12125 if (spawn_threads2(threads, &ld_map_thread, thread_ct, is_last_block)) {
12126 goto construct_ld_map_ret_THREAD_CREATE_FAIL;
12127 }
12128 ld_map_thread((void*)0);
12129 join_threads2(threads, thread_ct, is_last_block);
12130 } while (!is_last_block);
12131 construct_ld_map_no_new:
12132 for (block_idx1 = marker_idx; block_idx1 < idx1_block_end; block_idx1++) {
12133 rb_ptr = &(result_bitfield[(block_idx1 - marker_idx) * marker_ctv]);
12134 marker_idx2 = 0;
12135 while (1) {
12136 marker_idx2 = next_set(rb_ptr, marker_idx2, block_idx1);
12137 if (marker_idx2 == block_idx1) {
12138 clear_bit(block_idx1, rb_ptr);
12139 break;
12140 }
12141 if (!in_setdef(ld_map[marker_idx2], block_idx1)) {
12142 clear_bit(marker_idx2, rb_ptr);
12143 }
12144 marker_idx2++;
12145 }
12146 range_start = next_set(rb_ptr, 0, marker_ct);
12147 if (range_start != marker_ct) {
12148 range_end = last_set_bit(rb_ptr, marker_ctv) + 1;
12149 }
12150 save_set_bitfield(rb_ptr, marker_ct, range_start, range_end, 0, &(ld_map[block_idx1]));
12151 }
12152 // free previous round of allocations
12153 bigstack_end_reset(founder_male_include2);
12154 marker_idx = idx1_block_end;
12155 } while (marker_idx < marker_ct);
12156 if (sip->modifier & SET_R2_WRITE) {
12157 memcpy(charbuf, outname_end, 8);
12158 memcpy(outname_end, ".ldset", 7);
12159 if (fopen_checked(outname, "w", &outfile)) {
12160 goto construct_ld_map_ret_OPEN_FAIL;
12161 }
12162 set_uidx = 0;
12163 for (set_idx = 0; set_idx < set_ct; set_uidx++, set_idx++) {
12164 next_set_unsafe_ck(set_incl, &set_uidx);
12165 sptr = &(sip->names[set_uidx * max_set_id_len]);
12166 uii = strlen(sptr);
12167 wptr_start = memcpyax(g_textbuf, sptr, uii, ' ');
12168 cur_setdef = setdefs[set_idx];
12169 setdef_iter_init(cur_setdef, marker_ct, 0, &marker_idx, &setdef_incr_aux);
12170
12171 while (setdef_iter(cur_setdef, &marker_idx, &setdef_incr_aux)) {
12172 cur_setdef2 = ld_map[marker_idx];
12173 // cur_setdef2 can contain variants outside of the current set, so we
12174 // need to look at the intersection.
12175 setdef_iter_init(cur_setdef2, marker_ct, 0, &marker_idx2, &setdef_incr_aux2);
12176 uii = 0; // now this tracks whether a first match has been found
12177 while (setdef_iter(cur_setdef2, &marker_idx2, &setdef_incr_aux2)) {
12178 if (in_setdef(cur_setdef, marker_idx2)) {
12179 if (!uii) {
12180 uii = 1;
12181 wptr = strcpyax(wptr_start, &(marker_ids[marker_idx_to_uidx[marker_idx] * max_marker_id_len]), ' ');
12182 if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
12183 goto construct_ld_map_ret_WRITE_FAIL;
12184 }
12185 }
12186 fputs(&(marker_ids[marker_idx_to_uidx[marker_idx2] * max_marker_id_len]), outfile);
12187 putc_unlocked(' ', outfile);
12188 }
12189 marker_idx2++;
12190 }
12191 if (uii) {
12192 if (putc_checked('\n', outfile)) {
12193 goto construct_ld_map_ret_WRITE_FAIL;
12194 }
12195 }
12196 marker_idx++;
12197 }
12198 }
12199 if (fclose_null(&outfile)) {
12200 goto construct_ld_map_ret_WRITE_FAIL;
12201 }
12202 LOGPRINTFWW("--set-r2 write: LD map written to %s .\n", outname);
12203 memcpy(outname_end, charbuf, 8);
12204 } else {
12205 logprint("LD map constructed.\n");
12206 }
12207 while (0) {
12208 construct_ld_map_ret_NOMEM:
12209 retval = RET_NOMEM;
12210 break;
12211 construct_ld_map_ret_OPEN_FAIL:
12212 retval = RET_OPEN_FAIL;
12213 break;
12214 construct_ld_map_ret_READ_FAIL:
12215 retval = RET_READ_FAIL;
12216 break;
12217 construct_ld_map_ret_WRITE_FAIL:
12218 retval = RET_WRITE_FAIL;
12219 break;
12220 construct_ld_map_ret_INVALID_CMDLINE:
12221 retval = RET_INVALID_CMDLINE;
12222 break;
12223 construct_ld_map_ret_THREAD_CREATE_FAIL:
12224 retval = RET_THREAD_CREATE_FAIL;
12225 break;
12226 }
12227 fclose_cond(outfile);
12228 bigstack_end_reset(bigstack_end_mark);
12229 return retval;
12230 }
12231
set_test_score(uintptr_t marker_ct,double chisq_threshold,uint32_t set_max,double * chisq_arr,uint32_t ** ld_map,uint32_t * cur_setdef,double * sorted_chisq_buf,uint32_t * sorted_marker_idx_buf,uint32_t * proxy_arr,uint32_t * raw_sig_ct_ptr,uint32_t * final_sig_ct_ptr,double * set_score_ptr)12232 void set_test_score(uintptr_t marker_ct, double chisq_threshold, uint32_t set_max, double* chisq_arr, uint32_t** ld_map, uint32_t* cur_setdef, double* sorted_chisq_buf, uint32_t* sorted_marker_idx_buf, uint32_t* proxy_arr, uint32_t* raw_sig_ct_ptr, uint32_t* final_sig_ct_ptr, double* set_score_ptr) {
12233 // set score statistic = mean of chi-square statistics of set
12234 // representatives. --linear t statistics are converted to same-p-value 1df
12235 // chi-square stats out of necessity; in theory, this hack could be applied
12236 // to e.g. Fisher's exact test and the variable-df genotypic test as well,
12237 // but I'll hold off on that until/unless it's specifically requested.
12238
12239 // sort variants by p-value, then iterate over setdefs, greedily selecting up
12240 // to sip->set_max significant independent variants from each.
12241 double chi_sum = 0.0;
12242 uint32_t raw_sig_ct = 0;
12243 uint32_t final_sig_ct = 0;
12244 uint32_t marker_idx;
12245 uint32_t setdef_incr_aux;
12246 uint32_t raw_idx;
12247 uint32_t ld_conflict;
12248 uint32_t uii;
12249 setdef_iter_init(cur_setdef, marker_ct, 0, &marker_idx, &setdef_incr_aux);
12250 while (setdef_iter(cur_setdef, &marker_idx, &setdef_incr_aux)) {
12251 if (chisq_arr[marker_idx] >= chisq_threshold) {
12252 sorted_chisq_buf[raw_sig_ct] = chisq_arr[marker_idx];
12253 sorted_marker_idx_buf[raw_sig_ct] = marker_idx;
12254 raw_sig_ct++;
12255 }
12256 marker_idx++;
12257 }
12258 if (!raw_sig_ct) {
12259 // not possible for initial pass, so no need to set raw_sig_ct_ptr, etc.
12260 // bugfix: actually, that comment was incorrect
12261 *set_score_ptr = 0.0;
12262 return;
12263 }
12264 qsort_ext2((char*)sorted_chisq_buf, raw_sig_ct, sizeof(double), double_cmp_deref, (char*)sorted_marker_idx_buf, sizeof(int32_t), (char*)proxy_arr, sizeof(double) + sizeof(int32_t));
12265 raw_idx = raw_sig_ct;
12266 do {
12267 raw_idx--;
12268 ld_conflict = 0;
12269 marker_idx = sorted_marker_idx_buf[raw_idx];
12270 for (uii = 0; uii < final_sig_ct; uii++) {
12271 if (in_setdef(ld_map[proxy_arr[uii]], marker_idx)) {
12272 ld_conflict = 1;
12273 break;
12274 }
12275 }
12276 if (!ld_conflict) {
12277 proxy_arr[final_sig_ct] = marker_idx;
12278 chi_sum += sorted_chisq_buf[raw_idx];
12279 if (++final_sig_ct == set_max) {
12280 break;
12281 }
12282 }
12283 } while (raw_idx);
12284 *set_score_ptr = chi_sum / ((double)((int32_t)final_sig_ct));
12285 if (raw_sig_ct_ptr) {
12286 *raw_sig_ct_ptr = raw_sig_ct;
12287 *final_sig_ct_ptr = final_sig_ct;
12288 }
12289 }
12290
set_test_common_init(pthread_t * threads,FILE * bedfile,uintptr_t bed_offset,char * outname,char * outname_end,uintptr_t unfiltered_marker_ct,uintptr_t * marker_exclude_orig,uintptr_t marker_ct_orig,char * marker_ids,uintptr_t max_marker_id_len,uintptr_t * marker_reverse,double * orig_chisq,Set_info * sip,Chrom_info * chrom_info_ptr,uintptr_t unfiltered_sample_ct,uintptr_t * sex_male,uintptr_t * founder_pnm,uint32_t ld_ignore_x,uint32_t hh_exists,const char * flag_descrip,uintptr_t * marker_ct_ptr,uintptr_t ** marker_exclude_ptr,uintptr_t ** set_incl_ptr,uint32_t ** marker_idx_to_uidx_ptr,uint32_t *** setdefs_ptr,uintptr_t * set_ct_ptr,uint32_t * max_sigset_size_ptr,uint32_t *** ld_map_ptr,double * chisq_threshold_ptr,double ** orig_set_scores_ptr,double ** sorted_chisq_buf_ptr,uint32_t ** sorted_marker_idx_buf_ptr,uint32_t ** proxy_arr_ptr,uintptr_t ** perm_adapt_set_unstopped_ptr,uint32_t ** perm_2success_ct_ptr,uint32_t ** perm_attempt_ct_ptr,uintptr_t ** unstopped_markers_ptr)12291 int32_t set_test_common_init(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude_orig, uintptr_t marker_ct_orig, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t* marker_reverse, double* orig_chisq, Set_info* sip, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* sex_male, uintptr_t* founder_pnm, uint32_t ld_ignore_x, uint32_t hh_exists, const char* flag_descrip, uintptr_t* marker_ct_ptr, uintptr_t** marker_exclude_ptr, uintptr_t** set_incl_ptr, uint32_t** marker_idx_to_uidx_ptr, uint32_t*** setdefs_ptr, uintptr_t* set_ct_ptr, uint32_t* max_sigset_size_ptr, uint32_t*** ld_map_ptr, double* chisq_threshold_ptr, double** orig_set_scores_ptr, double** sorted_chisq_buf_ptr, uint32_t** sorted_marker_idx_buf_ptr, uint32_t** proxy_arr_ptr, uintptr_t** perm_adapt_set_unstopped_ptr, uint32_t** perm_2success_ct_ptr, uint32_t** perm_attempt_ct_ptr, uintptr_t** unstopped_markers_ptr) {
12292 // Assumes *marker_ct_ptr has value marker_ct_mid, and marker_exclude_ptr
12293 // initially points to marker_exclude_mid.
12294 // Side effect: allocates set_incl, marker_idx_to_uidx, ld_map, and several
12295 // other arrays on stack
12296 uintptr_t marker_ct_mid = *marker_ct_ptr;
12297 uintptr_t marker_ct = marker_ct_mid;
12298 uintptr_t raw_set_ct = sip->ct;
12299 uintptr_t raw_set_ctl = BITCT_TO_WORDCT(raw_set_ct);
12300 uintptr_t set_ct = 0;
12301 uintptr_t* marker_exclude_mid = *marker_exclude_ptr;
12302 double chisq_threshold = inverse_chiprob(sip->set_p, 1);
12303 uint32_t max_sigset_size = 0;
12304 int32_t retval = 0;
12305 uintptr_t marker_midx;
12306 uintptr_t set_uidx;
12307 uintptr_t* set_incl;
12308 uintptr_t* cur_bitfield;
12309 double* chisq_ptr;
12310 double* chisq_end;
12311 double* orig_set_scores;
12312 uint32_t** setdefs;
12313 uint32_t* marker_midx_to_idx;
12314 uint32_t* cur_setdef;
12315 uintptr_t marker_idx;
12316 uintptr_t set_idx;
12317 uint32_t range_ct;
12318 uint32_t range_idx;
12319 uint32_t range_offset;
12320 uint32_t range_stop;
12321 uint32_t include_out_of_bounds;
12322 uint32_t cur_set_size;
12323 uint32_t cur_range_size;
12324 uint32_t uii;
12325 if (bigstack_calloc_ul(raw_set_ctl, set_incl_ptr) ||
12326 bigstack_alloc_ui(marker_ct_orig, &marker_midx_to_idx)) {
12327 goto set_test_common_init_ret_NOMEM;
12328 }
12329 set_incl = *set_incl_ptr;
12330 fill_midx_to_idx(marker_exclude_orig, marker_exclude_mid, marker_ct, marker_midx_to_idx);
12331
12332 // determine which sets contain at least one significant marker. do not
12333 // attempt to calculate the sum statistic yet: we need the LD map for that.
12334 for (set_uidx = 0; set_uidx < raw_set_ct; set_uidx++) {
12335 cur_setdef = sip->setdefs[set_uidx];
12336 range_ct = cur_setdef[0];
12337 cur_set_size = 0;
12338 uii = 0; // found a significant marker?
12339 if (range_ct != 0xffffffffU) {
12340 for (range_idx = 0; range_idx < range_ct; range_idx++) {
12341 marker_midx = *(++cur_setdef);
12342 range_stop = *(++cur_setdef);
12343 cur_range_size = range_stop - marker_midx;
12344 cur_set_size += cur_range_size;
12345 if (!uii) {
12346 chisq_ptr = &(orig_chisq[marker_midx_to_idx[marker_midx]]);
12347 chisq_end = &(chisq_ptr[cur_range_size]);
12348 for (; chisq_ptr < chisq_end; chisq_ptr++) {
12349 if (*chisq_ptr >= chisq_threshold) {
12350 uii = 1;
12351 break;
12352 }
12353 }
12354 }
12355 }
12356 } else {
12357 range_offset = cur_setdef[1];
12358 range_stop = cur_setdef[2];
12359 include_out_of_bounds = cur_setdef[3];
12360 cur_bitfield = (uintptr_t*)(&(cur_setdef[4]));
12361 if (include_out_of_bounds && range_offset) {
12362 for (marker_midx = 0; marker_midx < range_offset; marker_midx++) {
12363 // all initial markers guaranteed to be in union, no
12364 // marker_midx_to_idx lookup needed
12365 if (orig_chisq[marker_midx] >= chisq_threshold) {
12366 uii = 1;
12367 break;
12368 }
12369 }
12370 cur_set_size += range_offset;
12371 }
12372 cur_set_size += popcount_longs(cur_bitfield, ((range_stop + 127) / 128) * (128 / BITCT));
12373 if (!uii) {
12374 for (marker_midx = 0; marker_midx < range_stop; marker_midx++) {
12375 if (IS_SET(cur_bitfield, marker_midx)) {
12376 if (orig_chisq[marker_midx_to_idx[marker_midx + range_offset]] >= chisq_threshold) {
12377 uii = 1;
12378 break;
12379 }
12380 }
12381 }
12382 }
12383 if (include_out_of_bounds && (range_offset + range_stop < marker_ct_orig)) {
12384 cur_set_size += marker_ct_orig - range_offset - range_stop;
12385 if (!uii) {
12386 for (marker_idx = marker_midx_to_idx[range_offset + range_stop]; marker_idx < marker_ct; marker_idx++) {
12387 // all trailing markers guaranteed to be in union
12388 if (orig_chisq[marker_idx] >= chisq_threshold) {
12389 uii = 1;
12390 break;
12391 }
12392 }
12393 }
12394 }
12395 }
12396 if (uii) {
12397 SET_BIT(set_uidx, set_incl);
12398 set_ct++;
12399 if (cur_set_size > max_sigset_size) {
12400 max_sigset_size = cur_set_size;
12401 }
12402 }
12403 }
12404 if (!set_ct) {
12405 logerrprint("Warning: No significant variants in any set. Skipping permutation-based set\ntest.\n");
12406 goto set_test_common_init_ret_1;
12407 }
12408 LOGPRINTFWW("%s set test: Testing %" PRIuPTR " set%s with at least one significant variant.\n", flag_descrip, set_ct, (set_ct == 1)? "" : "s");
12409 bigstack_reset((unsigned char*)marker_midx_to_idx);
12410 if (set_ct < raw_set_ct) {
12411 marker_ct = marker_ct_orig;
12412 if (extract_set_union_unfiltered(sip, set_incl, unfiltered_marker_ct, marker_exclude_orig, marker_exclude_ptr, &marker_ct)) {
12413 goto set_test_common_init_ret_NOMEM;
12414 }
12415 }
12416 // Okay, we've pruned all we can, now it's time to suck it up and construct
12417 // the potentially huge LD map
12418 if (bigstack_alloc_ui(marker_ct, marker_idx_to_uidx_ptr)) {
12419 goto set_test_common_init_ret_NOMEM;
12420 }
12421 fill_idx_to_uidx(*marker_exclude_ptr, unfiltered_marker_ct, marker_ct, *marker_idx_to_uidx_ptr);
12422 if (marker_ct < marker_ct_orig) {
12423 if (setdefs_compress(sip, set_incl, set_ct, unfiltered_marker_ct, marker_exclude_orig, marker_ct_orig, *marker_exclude_ptr, marker_ct, setdefs_ptr)) {
12424 goto set_test_common_init_ret_NOMEM;
12425 }
12426 } else {
12427 *setdefs_ptr = sip->setdefs;
12428 }
12429 setdefs = *setdefs_ptr;
12430 retval = construct_ld_map(threads, bedfile, bed_offset, *marker_exclude_ptr, marker_ct, marker_reverse, *marker_idx_to_uidx_ptr, unfiltered_sample_ct, founder_pnm, sip, set_incl, set_ct, setdefs, outname, outname_end, marker_ids, max_marker_id_len, sex_male, chrom_info_ptr, ld_ignore_x, hh_exists, ld_map_ptr);
12431 if (retval) {
12432 goto set_test_common_init_ret_1;
12433 }
12434 if (marker_ct_mid != marker_ct) {
12435 // caller needs to collapse other arrays
12436 inplace_delta_collapse_arr((char*)orig_chisq, sizeof(double), marker_ct_mid, marker_ct, marker_exclude_mid, *marker_exclude_ptr);
12437 }
12438 if (bigstack_alloc_d(set_ct, orig_set_scores_ptr) ||
12439 bigstack_alloc_d(max_sigset_size, sorted_chisq_buf_ptr) ||
12440 bigstack_alloc_ui(max_sigset_size, sorted_marker_idx_buf_ptr) ||
12441 // 3 int32s = max(sizeof(double), sizeof(intptr_t)) + sizeof(int32_t)
12442 bigstack_alloc_ui(max_sigset_size * 3LU, proxy_arr_ptr)) {
12443 goto set_test_common_init_ret_NOMEM;
12444 }
12445 orig_set_scores = *orig_set_scores_ptr;
12446 for (set_idx = 0; set_idx < set_ct; set_idx++) {
12447 // we're calling this again during final write anyway, so don't bother
12448 // saving raw_sig_ct or final_sig_ct now
12449 set_test_score(marker_ct, chisq_threshold, sip->set_max, orig_chisq, *ld_map_ptr, setdefs[set_idx], *sorted_chisq_buf_ptr, *sorted_marker_idx_buf_ptr, *proxy_arr_ptr, nullptr, nullptr, &(orig_set_scores[set_idx]));
12450 }
12451 // just treat --mperm as --perm with min_perms == max_perms, since this isn't
12452 // a proper max(T) test
12453 if (bigstack_alloc_ul(BITCT_TO_WORDCT(set_ct), perm_adapt_set_unstopped_ptr) ||
12454 bigstack_calloc_ui(set_ct, perm_2success_ct_ptr) ||
12455 bigstack_alloc_ui(set_ct, perm_attempt_ct_ptr) ||
12456 bigstack_alloc_ul(BITCT_TO_WORDCT(marker_ct), unstopped_markers_ptr)) {
12457 goto set_test_common_init_ret_NOMEM;
12458 }
12459 fill_all_bits(set_ct, *perm_adapt_set_unstopped_ptr);
12460 fill_all_bits(marker_ct, *unstopped_markers_ptr);
12461 while (0) {
12462 set_test_common_init_ret_NOMEM:
12463 retval = RET_NOMEM;
12464 break;
12465 }
12466 set_test_common_init_ret_1:
12467 *marker_ct_ptr = marker_ct;
12468 *set_ct_ptr = set_ct;
12469 *max_sigset_size_ptr = max_sigset_size;
12470 *chisq_threshold_ptr = chisq_threshold;
12471 return retval;
12472 }
12473
compute_set_scores(uintptr_t marker_ct,uintptr_t perm_vec_ct,uintptr_t set_ct,double * chisq_matrix,double * orig_set_scores,double * sorted_chisq_buf,uint32_t * sorted_marker_idx_buf,uint32_t * proxy_arr,uint32_t ** setdefs,uint32_t ** ld_map,Aperm_info * apip,double chisq_threshold,double adaptive_ci_zt,uint32_t first_adapt_check,uint32_t perms_done,uint32_t set_max,uintptr_t * perm_adapt_set_unstopped,uint32_t * perm_2success_ct,uint32_t * perm_attempt_ct)12474 void compute_set_scores(uintptr_t marker_ct, uintptr_t perm_vec_ct, uintptr_t set_ct, double* chisq_matrix, double* orig_set_scores, double* sorted_chisq_buf, uint32_t* sorted_marker_idx_buf, uint32_t* proxy_arr, uint32_t** setdefs, uint32_t** ld_map, Aperm_info* apip, double chisq_threshold, double adaptive_ci_zt, uint32_t first_adapt_check, uint32_t perms_done, uint32_t set_max, uintptr_t* perm_adapt_set_unstopped, uint32_t* perm_2success_ct, uint32_t* perm_attempt_ct) {
12475 // compute set stats for the just-completed permutations
12476 uint32_t pidx_offset = perms_done - perm_vec_ct;
12477 uintptr_t set_idx;
12478 double stat_high;
12479 double stat_low;
12480 double cur_score;
12481 double pval;
12482 double dxx;
12483 uint32_t next_adapt_check;
12484 uint32_t pidx;
12485 uint32_t uii;
12486 for (set_idx = 0; set_idx < set_ct; set_idx++) {
12487 if (IS_SET(perm_adapt_set_unstopped, set_idx)) {
12488 next_adapt_check = first_adapt_check;
12489 uii = perm_2success_ct[set_idx];
12490 stat_high = orig_set_scores[set_idx] + EPSILON;
12491 stat_low = orig_set_scores[set_idx] - EPSILON;
12492 for (pidx = 0; pidx < perm_vec_ct;) {
12493 set_test_score(marker_ct, chisq_threshold, set_max, &(chisq_matrix[pidx * marker_ct]), ld_map, setdefs[set_idx], sorted_chisq_buf, sorted_marker_idx_buf, proxy_arr, nullptr, nullptr, &cur_score);
12494 if (cur_score > stat_high) {
12495 uii += 2;
12496 } else if (cur_score > stat_low) {
12497 uii++;
12498 }
12499 if (++pidx == next_adapt_check - pidx_offset) {
12500 if (uii) {
12501 pval = ((double)((int32_t)uii + 2)) / ((double)(2 * ((int32_t)next_adapt_check + 1)));
12502 dxx = adaptive_ci_zt * sqrt(pval * (1 - pval) / ((int32_t)next_adapt_check));
12503 if ((pval - dxx > apip->alpha) || (pval + dxx < apip->alpha)) {
12504 CLEAR_BIT(set_idx, perm_adapt_set_unstopped);
12505 perm_attempt_ct[set_idx] = next_adapt_check;
12506 break;
12507 }
12508 }
12509 next_adapt_check += (int32_t)(apip->init_interval + ((int32_t)next_adapt_check) * apip->interval_slope);
12510 }
12511 }
12512 perm_2success_ct[set_idx] = uii;
12513 }
12514 }
12515 }
12516
write_set_test_results(char * outname,char * outname_end2,Set_info * sip,uint32_t ** ld_map,uint32_t ** setdefs,uintptr_t * set_incl,uintptr_t set_ct,uintptr_t marker_ct_orig,uintptr_t marker_ct,uint32_t * marker_idx_to_uidx,char * marker_ids,uintptr_t max_marker_id_len,uint32_t * perm_2success_ct,uint32_t * perm_attempt_ct,uint32_t mtest_adjust,uint32_t perm_count,double pfilter,double output_min_p,double chisq_threshold,double * orig_stats,double * sorted_chisq_buf,uint32_t * sorted_marker_idx_buf,uint32_t * proxy_arr)12517 int32_t write_set_test_results(char* outname, char* outname_end2, Set_info* sip, uint32_t** ld_map, uint32_t** setdefs, uintptr_t* set_incl, uintptr_t set_ct, uintptr_t marker_ct_orig, uintptr_t marker_ct, uint32_t* marker_idx_to_uidx, char* marker_ids, uintptr_t max_marker_id_len, uint32_t* perm_2success_ct, uint32_t* perm_attempt_ct, uint32_t mtest_adjust, uint32_t perm_count, double pfilter, double output_min_p, double chisq_threshold, double* orig_stats, double* sorted_chisq_buf, uint32_t* sorted_marker_idx_buf, uint32_t* proxy_arr) {
12518 // assumes caller will free memory from stack
12519 FILE* outfile = nullptr;
12520 uintptr_t* nonempty_set_incl = nullptr;
12521 double* empirical_pvals = nullptr;
12522 uintptr_t raw_set_ct = sip->ct;
12523 uintptr_t max_set_id_len = sip->max_name_len;
12524 uint32_t nonempty_set_ct = 0;
12525 int32_t retval = 0;
12526 uintptr_t set_uidx;
12527 uintptr_t set_idx;
12528 char* bufptr;
12529 uint32_t* nonempty_set_idx_to_uidx;
12530 double cur_score;
12531 double pval;
12532 uint32_t raw_sig_ct;
12533 uint32_t final_sig_ct;
12534 uint32_t set_midx;
12535 uint32_t uii;
12536 if (set_ct && mtest_adjust) {
12537 if (alloc_and_populate_nonempty_set_incl(sip, &nonempty_set_ct, &nonempty_set_incl)) {
12538 goto write_set_test_results_ret_NOMEM;
12539 }
12540 if (bigstack_alloc_d(nonempty_set_ct, &empirical_pvals)) {
12541 goto write_set_test_results_ret_NOMEM;
12542 }
12543 }
12544 if (fopen_checked(outname, "w", &outfile)) {
12545 goto write_set_test_results_ret_OPEN_FAIL;
12546 }
12547 fprintf(outfile, " SET NSNP NSIG ISIG EMP1 %sSNPS\n", perm_count? " NP " : "");
12548 for (set_uidx = 0, set_midx = 0, set_idx = 0; set_uidx < raw_set_ct; set_uidx++) {
12549 bufptr = fw_strcpy(12, &(sip->names[set_uidx * max_set_id_len]), g_textbuf);
12550 *bufptr++ = ' ';
12551 bufptr = uint32toa_w6x(setdef_size(sip->setdefs[set_uidx], marker_ct_orig), ' ', bufptr);
12552 if (IS_SET(set_incl, set_uidx)) {
12553 set_test_score(marker_ct, chisq_threshold, sip->set_max, orig_stats, ld_map, setdefs[set_idx], sorted_chisq_buf, sorted_marker_idx_buf, proxy_arr, &raw_sig_ct, &final_sig_ct, &cur_score);
12554 bufptr = uint32toa_w6x(raw_sig_ct, ' ', bufptr);
12555 bufptr = uint32toa_w6x(final_sig_ct, ' ', bufptr);
12556 pval = ((double)(perm_2success_ct[set_idx] + 2)) / ((double)(2 * (perm_attempt_ct[set_idx] + 1)));
12557 if (empirical_pvals) {
12558 empirical_pvals[set_midx] = pval;
12559 }
12560 if (pval <= pfilter) {
12561 if (!perm_count) {
12562 bufptr = dtoa_g_wxp4x(MAXV(pval, output_min_p), 12, ' ', bufptr);
12563 } else {
12564 bufptr = dtoa_g_wxp4(((double)perm_2success_ct[set_idx]) * 0.5, 12, bufptr);
12565 bufptr = memseta(bufptr, 32, 3);
12566 bufptr = uint32toa_w10x(perm_attempt_ct[set_idx], ' ', bufptr);
12567 }
12568 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
12569 goto write_set_test_results_ret_WRITE_FAIL;
12570 }
12571 fputs(&(marker_ids[marker_idx_to_uidx[proxy_arr[0]] * max_marker_id_len]), outfile);
12572 for (uii = 1; uii < final_sig_ct; uii++) {
12573 putc_unlocked('|', outfile);
12574 fputs(&(marker_ids[marker_idx_to_uidx[proxy_arr[uii]] * max_marker_id_len]), outfile);
12575 }
12576 if (putc_checked('\n', outfile)) {
12577 goto write_set_test_results_ret_WRITE_FAIL;
12578 }
12579 }
12580 set_midx++;
12581 set_idx++;
12582 } else {
12583 if (!perm_count) {
12584 bufptr = memcpya(bufptr, " 0 0 1 NA\n", 30);
12585 } else {
12586 bufptr = memcpya(bufptr, " 0 0 0 0 NA\n", 43);
12587 }
12588 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
12589 goto write_set_test_results_ret_WRITE_FAIL;
12590 }
12591 if (nonempty_set_incl && is_set(nonempty_set_incl, set_uidx)) {
12592 empirical_pvals[set_midx] = 1.0;
12593 set_midx++;
12594 }
12595 }
12596 }
12597 if (fclose_null(&outfile)) {
12598 goto write_set_test_results_ret_WRITE_FAIL;
12599 }
12600 LOGPRINTFWW("Set test results written to %s .\n", outname);
12601 if (empirical_pvals) {
12602 if (bigstack_alloc_ui(nonempty_set_ct, &nonempty_set_idx_to_uidx)) {
12603 goto write_set_test_results_ret_NOMEM;
12604 }
12605 fill_idx_to_uidx_incl(nonempty_set_incl, raw_set_ct, nonempty_set_ct, nonempty_set_idx_to_uidx);
12606 // .qassoc.set.adjusted instead of .set.mperm.adjusted, etc.
12607 *outname_end2 = '\0';
12608 retval = multcomp(outname, outname_end2, nonempty_set_idx_to_uidx, nonempty_set_ct, sip->names, max_set_id_len, 0, nullptr, nullptr, pfilter, output_min_p, mtest_adjust, 1, 0.0, nullptr, empirical_pvals);
12609 }
12610 while (0) {
12611 write_set_test_results_ret_NOMEM:
12612 retval = RET_NOMEM;
12613 break;
12614 write_set_test_results_ret_OPEN_FAIL:
12615 retval = RET_OPEN_FAIL;
12616 break;
12617 write_set_test_results_ret_WRITE_FAIL:
12618 retval = RET_WRITE_FAIL;
12619 break;
12620 }
12621 fclose_cond(outfile);
12622 return retval;
12623 }
12624
12625 typedef struct clump_entry_struct {
12626 double pval;
12627 struct clump_entry_struct* next;
12628 uint32_t fidx;
12629 char annot[];
12630 } Clump_entry;
12631
12632 typedef struct cur_clump_info_struct {
12633 double r2;
12634 uint32_t marker_idx;
12635 uint32_t fidx;
12636 } Cur_clump_info;
12637
12638 typedef struct clump_missing_id_struct {
12639 double pval;
12640 struct clump_missing_id_struct* next;
12641 char idstr[];
12642 } Clump_missing_id;
12643
update_clump_histo(double pval,uintptr_t * histo)12644 void update_clump_histo(double pval, uintptr_t* histo) {
12645 if (pval < 0.001) {
12646 if (pval < 0.0001) {
12647 histo[4] += 1;
12648 } else {
12649 histo[3] += 1;
12650 }
12651 } else if (pval < 0.01) {
12652 histo[2] += 1;
12653 } else if (pval < 0.05) {
12654 histo[1] += 1;
12655 } else {
12656 histo[0] += 1;
12657 }
12658 }
12659
clump_reports(FILE * bedfile,uintptr_t bed_offset,char * outname,char * outname_end,uintptr_t unfiltered_marker_ct,uintptr_t * marker_exclude,uintptr_t marker_ct,char * marker_ids,uintptr_t max_marker_id_len,uint32_t plink_maxsnp,uint32_t * marker_pos,char ** marker_allele_ptrs,uintptr_t * marker_reverse,Chrom_info * chrom_info_ptr,uintptr_t unfiltered_sample_ct,uintptr_t * founder_info,Clump_info * clump_ip,uintptr_t * sex_male,uint32_t hh_exists)12660 int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, uint32_t* marker_pos, char** marker_allele_ptrs, uintptr_t* marker_reverse, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, Clump_info* clump_ip, uintptr_t* sex_male, uint32_t hh_exists) {
12661 unsigned char* bigstack_mark = g_bigstack_base;
12662 unsigned char* bigstack_end_mark = g_bigstack_end;
12663 gzFile gz_infile = nullptr;
12664 FILE* outfile = nullptr;
12665 FILE* outfile_ranges = nullptr;
12666 FILE* outfile_best = nullptr;
12667 uintptr_t marker_ctl = BITCT_TO_WORDCT(marker_ct);
12668 uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
12669 uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
12670 uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
12671 uintptr_t founder_ct = popcount_longs(founder_info, unfiltered_sample_ctl);
12672 uintptr_t founder_ctl2 = QUATERCT_TO_WORDCT(founder_ct);
12673 uintptr_t founder_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(founder_ct);
12674 uintptr_t final_mask = get_final_mask(founder_ct);
12675 uintptr_t range_group_ct = 0;
12676 uintptr_t max_range_group_id_len = 0;
12677 uintptr_t max_header_len = 2;
12678 uintptr_t snpfield_search_ct = 1;
12679 uintptr_t pfield_search_ct = 1;
12680 uintptr_t annot_ct = 0;
12681 uintptr_t missing_variant_ct = 0;
12682 uintptr_t cur_rg_ct = 0;
12683 uintptr_t range_chrom_max = 0;
12684 uintptr_t unmatched_group_ct = 0;
12685 uintptr_t* haploid_mask = chrom_info_ptr->haploid_mask;
12686 char* range_group_names = nullptr;
12687 char* fname_ptr = nullptr;
12688 char* annot_flattened = clump_ip->annotate_flattened;
12689 char* tbuf2 = &(g_textbuf[MAXLINELEN]);
12690 char* header2_ptr = nullptr;
12691 char* annot_ptr = nullptr;
12692 char* cur_rg_names = nullptr;
12693 uintptr_t* founder_include2 = nullptr;
12694 uintptr_t* founder_male_include2 = nullptr;
12695 uintptr_t* rg_chrom_bounds = nullptr;
12696 uint32_t** rg_setdefs = nullptr;
12697 uint32_t** cur_rg_setdefs = nullptr;
12698 Clump_missing_id* not_found_list = nullptr;
12699 uintptr_t* rangematch_bitfield = nullptr;
12700 double p1_thresh = clump_ip->p1;
12701 double p2_thresh = clump_ip->p2;
12702 double load_pthresh = 0.05;
12703 double r2_thresh = clump_ip->r2;
12704 uint32_t allow_overlap = clump_ip->modifier & CLUMP_ALLOW_OVERLAP;
12705 uint32_t clump_index_first = clump_ip->modifier & CLUMP_INDEX_FIRST;
12706 uint32_t clump_best = clump_ip->modifier & CLUMP_BEST;
12707 uint32_t clump_verbose = clump_ip->modifier & CLUMP_VERBOSE;
12708 uint32_t bp_radius = clump_ip->bp_radius;
12709 uint32_t best_fidx_match = 0xffffffffU;
12710 uint32_t require_multifile = clump_ip->modifier & CLUMP_REPLICATE;
12711 uint32_t index_eligible = 1;
12712 uint32_t header1_len = 0;
12713 uint32_t header2_len = 0;
12714 uint32_t file_ct = 0;
12715 uint32_t final_clump_ct = 0;
12716 uint32_t max_missing_id_len = 0;
12717 int32_t retval = 0;
12718 uintptr_t histo[5]; // NSIG, S05, S01, S001, S0001
12719 uint32_t index_tots[5];
12720 uint32_t counts[18];
12721 Clump_entry** clump_entries;
12722 Clump_entry* clump_entry_ptr;
12723 Clump_entry* best_entry_ptr;
12724 Cur_clump_info* cur_clump_base;
12725 Cur_clump_info* cc_ptr;
12726 uintptr_t* col_bitfield;
12727 uintptr_t* cur_bitfield;
12728 uintptr_t* loadbuf_raw;
12729 uintptr_t* index_data;
12730 uintptr_t* window_data;
12731 uintptr_t* window_data_ptr;
12732 char* sorted_missing_variant_ids;
12733 char* sorted_header_dict;
12734 char* loadbuft; // t is for text
12735 char* cur_a1;
12736 char* cur_a2;
12737 char* bufptr;
12738 char* bufptr2;
12739 char* bufptr3;
12740 char* bufptr4;
12741 uint32_t* header_id_map;
12742 uint32_t* marker_id_htable;
12743 uint32_t* parse_table;
12744 uint32_t* cur_parse_info;
12745 uint32_t* nsig_arr;
12746 uint32_t* pval_map;
12747 uint32_t* marker_uidx_to_idx;
12748 uint32_t* marker_idx_to_uidx;
12749 double* sorted_pvals;
12750 Clump_missing_id* cm_ptr;
12751 uintptr_t header_dict_ct;
12752 uintptr_t extra_annot_space;
12753 uintptr_t cur_bigstack_left;
12754 uintptr_t loadbuft_size;
12755 uintptr_t marker_idx;
12756 uintptr_t last_marker_idx;
12757 uintptr_t max_window_size; // universal bound
12758 uintptr_t cur_window_size;
12759 uintptr_t line_idx;
12760 uintptr_t ulii;
12761 uintptr_t uljj;
12762 uintptr_t ulkk;
12763 uintptr_t ulmm;
12764 double pval;
12765 double freq1x;
12766 double freq2x;
12767 double freqx1;
12768 double freqx2;
12769 double freq11;
12770 double freq11_expected;
12771 double cur_r2;
12772 double max_r2;
12773 double dxx;
12774 uint32_t marker_id_htable_size;
12775 uint32_t annot_ct_p2;
12776 uint32_t annot_ct_p2_ctl;
12777 uint32_t cur_read_ct;
12778 uint32_t index_ct;
12779 uint32_t sp_idx;
12780 uint32_t file_idx;
12781 uint32_t ivar_idx;
12782 uint32_t ivar_uidx;
12783 uint32_t cur_bp;
12784 uint32_t min_bp;
12785 uint32_t max_bp;
12786 uint32_t clump_chrom_idx;
12787 uint32_t clump_uidx_first;
12788 uint32_t clump_uidx_last;
12789 uint32_t index_fidx;
12790 uint32_t marker_uidx;
12791 uint32_t max_r2_uidx;
12792 uint32_t is_haploid;
12793 uint32_t is_x;
12794 uint32_t is_y;
12795 uint32_t a1_len;
12796 uint32_t a2_len;
12797 uint32_t allele_padding;
12798 uint32_t uii;
12799 uint32_t ujj;
12800 uint32_t ukk;
12801 uint32_t umm;
12802 int32_t ii;
12803 // suppress warning
12804 index_tots[3] = 0;
12805 index_tots[4] = 0;
12806
12807 if (annot_flattened && (!clump_verbose) && (!clump_best)) {
12808 logerrprint("Error: --clump-annotate must be used with --clump-verbose or --clump-best.\n");
12809 goto clump_reports_ret_INVALID_CMDLINE;
12810 }
12811 if (!founder_ct) {
12812 logerrprint("Warning: Skipping --clump since there are no founders. (--make-founders may\ncome in handy here.)\n");
12813 goto clump_reports_ret_1;
12814 }
12815 if (clump_best) {
12816 load_pthresh = 1.0;
12817 } else {
12818 if (p2_thresh > load_pthresh) {
12819 load_pthresh = p2_thresh;
12820 }
12821 if (p1_thresh >= load_pthresh) {
12822 // may as well maximize backwards compatibility re: which comparisons are
12823 // > vs. >=
12824 load_pthresh = p1_thresh * (1 + SMALL_EPSILON);
12825 }
12826 }
12827 if (clump_ip->range_fname) {
12828 // 1. load range file, sort, etc.
12829 retval = load_range_list_sortpos(clump_ip->range_fname, clump_ip->range_border, 0, nullptr, 0, chrom_info_ptr, &range_group_ct, &range_group_names, &max_range_group_id_len, &rg_chrom_bounds, &rg_setdefs, &range_chrom_max, "--clump-range");
12830 if (retval) {
12831 goto clump_reports_ret_1;
12832 }
12833 }
12834 // 2. create marker ID hash table, allocate index-tracking bitfield
12835 retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, 0, &marker_id_htable_size, &marker_id_htable);
12836 if (retval) {
12837 goto clump_reports_ret_1;
12838 }
12839 if (bigstack_calloc_ul(marker_ctl, &cur_bitfield) ||
12840 bigstack_alloc_ui(unfiltered_marker_ct, &marker_uidx_to_idx)) {
12841 goto clump_reports_ret_NOMEM;
12842 }
12843 fill_uidx_to_idx(marker_exclude, unfiltered_marker_ct, marker_ct, marker_uidx_to_idx);
12844 if (clump_ip->snpfield_search_order) {
12845 snpfield_search_ct = count_and_measure_multistr(clump_ip->snpfield_search_order, &max_header_len);
12846 } else {
12847 max_header_len = 4; // 'SNP' + null terminator
12848 }
12849 if (clump_ip->pfield_search_order) {
12850 pfield_search_ct = count_and_measure_multistr(clump_ip->pfield_search_order, &max_header_len);
12851 }
12852 if (annot_flattened) {
12853 annot_ct = count_and_measure_multistr(annot_flattened, &max_header_len);
12854 }
12855 header_dict_ct = snpfield_search_ct + pfield_search_ct + annot_ct;
12856 // parse_table[2k + 1] stores the number of additional fields to skip before
12857 // reading that particular entry. For example, if variant IDs are in the
12858 // second column in the current file, while p-values are in the fifth column,
12859 // parse_table[1] is 1 and parse_table[3] = 2.
12860 // parse_table[2k] stores the type of field contents (0 = variant ID, 1 =
12861 // P-value, 2 or more = annotation).
12862 // In the main loop, cur_parse_info[2k] stores the in-loadbuft offset of the
12863 // the string with that parse_table[2k] index, and cur_parse_info[2k + 1]
12864 // stores string length.
12865 annot_ct_p2 = 2 + annot_ct;
12866 annot_ct_p2_ctl = (annot_ct + (BITCT + 1)) / BITCT;
12867 if (bigstack_alloc_c(max_header_len * header_dict_ct, &sorted_header_dict) ||
12868 bigstack_alloc_ui(header_dict_ct, &header_id_map) ||
12869 bigstack_alloc_ul(annot_ct_p2_ctl, &col_bitfield) ||
12870 bigstack_alloc_ui(annot_ct_p2 * 2, &parse_table) ||
12871 bigstack_alloc_ui(annot_ct_p2 * 2, &cur_parse_info)) {
12872 goto clump_reports_ret_NOMEM;
12873 }
12874 ulii = 0; // write position
12875 if (clump_ip->snpfield_search_order) {
12876 bufptr = clump_ip->snpfield_search_order;
12877 uii = 0x40000000;
12878 do {
12879 ujj = strlen(bufptr) + 1;
12880 memcpy(&(sorted_header_dict[ulii * max_header_len]), bufptr, ujj);
12881 header_id_map[ulii++] = uii++;
12882 bufptr = &(bufptr[ujj]);
12883 } while (*bufptr);
12884 } else {
12885 memcpy(sorted_header_dict, "SNP", 4);
12886 header_id_map[0] = 0x40000000;
12887 ulii++;
12888 }
12889 if (clump_ip->pfield_search_order) {
12890 bufptr = clump_ip->pfield_search_order;
12891 uii = 0x20000000;
12892 do {
12893 ujj = strlen(bufptr) + 1;
12894 memcpy(&(sorted_header_dict[ulii * max_header_len]), bufptr, ujj);
12895 header_id_map[ulii++] = uii++;
12896 bufptr = &(bufptr[ujj]);
12897 } while (*bufptr);
12898 } else {
12899 memcpy(&(sorted_header_dict[ulii * max_header_len]), "P", 2);
12900 header_id_map[ulii++] = 0x20000000;
12901 }
12902 if (annot_flattened) {
12903 bufptr = annot_flattened;
12904 uii = 2;
12905 do {
12906 ujj = strlen(bufptr) + 1;
12907 memcpy(&(sorted_header_dict[ulii * max_header_len]), bufptr, ujj);
12908 header_id_map[ulii++] = uii++;
12909 bufptr = &(bufptr[ujj]);
12910 } while (*bufptr);
12911 }
12912 if (qsort_ext(sorted_header_dict, header_dict_ct, max_header_len, strcmp_deref, (char*)header_id_map, sizeof(int32_t))) {
12913 goto clump_reports_ret_NOMEM;
12914 }
12915 if (scan_for_duplicate_ids(sorted_header_dict, header_dict_ct, max_header_len)) {
12916 logerrprint("Error: Duplicate --clump-snp-field/--clump-field/--clump-annotate field name.\n");
12917 goto clump_reports_ret_INVALID_CMDLINE;
12918 }
12919
12920 if (bigstack_calloc_ui(marker_ct, &nsig_arr)) {
12921 goto clump_reports_ret_NOMEM;
12922 }
12923 clump_entries = (Clump_entry**)bigstack_alloc(marker_ct * sizeof(intptr_t));
12924 if (!clump_entries) {
12925 goto clump_reports_ret_NOMEM;
12926 }
12927 fill_ulong_zero(marker_ct, (uintptr_t*)clump_entries);
12928 // 3. load file(s) in sequence. start with array of null pointers, allocate
12929 // from bottom of stack (possibly need to save p-val, file number,
12930 // annotations, and/or pointer to next entry) while updating
12931 // p-val/reverse-lookup array
12932 bufptr = clump_ip->fnames_flattened;
12933 do {
12934 fname_ptr = bufptr;
12935 bufptr = strchr(bufptr, '\0');
12936 bufptr++;
12937 file_ct++;
12938 } while (*bufptr);
12939 loadbuft = (char*)g_bigstack_base;
12940 if (clump_best) {
12941 if (file_ct == 2) {
12942 if (!clump_index_first) {
12943 logerrprint("Error: --clump-best can no longer be used with two --clump files unless\n--clump-index-first is also specified. (Contact the developers if this is\nproblematic.)\n");
12944 goto clump_reports_ret_INVALID_CMDLINE;
12945 }
12946 } else if (file_ct > 2) {
12947 logerrprint("Error: --clump-best can no longer be used with more than two --clump files.\n(Contact the developers if this is problematic.)\n");
12948 goto clump_reports_ret_INVALID_CMDLINE;
12949 }
12950 // only draw proxies from this file
12951 best_fidx_match = file_ct;
12952 }
12953 // Suppose the current line has a super-long allele code which must be saved
12954 // (since it will go into the ANNOT field). Then the new allocation may need
12955 // to be the size of the entire line. So, to be safe, we require the current
12956 // line to fit in ~half of available workspace.
12957 // To reduce the risk of 32-bit integer overflow bugs, we cap line length at
12958 // a bit under 2^30 instead of 2^31 here.
12959 extra_annot_space = (48 + 2 * annot_ct) & (~(15 * ONELU));
12960 cur_bigstack_left = bigstack_left();
12961 if (cur_bigstack_left <= 2 * MAXLINELEN + extra_annot_space) {
12962 goto clump_reports_ret_NOMEM;
12963 } else if (cur_bigstack_left - extra_annot_space >= MAXLINEBUFLEN) {
12964 loadbuft[(MAXLINEBUFLEN / 2) - 1] = ' ';
12965 }
12966 if (clump_index_first && (file_ct > 1)) {
12967 index_eligible = 0;
12968 }
12969 // load in reverse order since we're adding to the front of the linked lists
12970 for (file_idx = file_ct; file_idx; file_idx--) {
12971 retval = gzopen_read_checked(fname_ptr, &gz_infile);
12972 if (retval) {
12973 goto clump_reports_ret_1;
12974 }
12975 loadbuft_size = bigstack_left();
12976 if (loadbuft_size <= 2 * MAXLINELEN + extra_annot_space) {
12977 goto clump_reports_ret_NOMEM;
12978 }
12979 loadbuft_size = (loadbuft_size - extra_annot_space) / 2;
12980 if (loadbuft_size >= MAXLINEBUFLEN / 2) {
12981 loadbuft_size = MAXLINEBUFLEN / 2;
12982 // no space-termination needed
12983 } else {
12984 loadbuft[loadbuft_size - 1] = ' ';
12985 }
12986 ukk = 0x7fffffff; // highest-precedence variant ID header seen so far
12987 umm = 0x7fffffff; // highest-precedence p-value header seen so far
12988 // load_to_first_token() with potentially gzipped input. Move this to
12989 // plink_common if anything else needs it.
12990 line_idx = 0;
12991 while (1) {
12992 line_idx++;
12993 if (!gzgets(gz_infile, loadbuft, loadbuft_size)) {
12994 if (!gzeof(gz_infile)) {
12995 goto clump_reports_ret_READ_FAIL;
12996 } else {
12997 LOGPREPRINTFWW("Error: Empty %s.\n", fname_ptr);
12998 goto clump_reports_ret_INVALID_FORMAT_2;
12999 }
13000 }
13001 if (!(loadbuft[loadbuft_size - 1])) {
13002 if (loadbuft_size == MAXLINEBUFLEN / 2) {
13003 LOGPREPRINTFWW("Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, fname_ptr);
13004 goto clump_reports_ret_INVALID_FORMAT_2;
13005 } else {
13006 goto clump_reports_ret_NOMEM;
13007 }
13008 }
13009 bufptr = skip_initial_spaces(loadbuft);
13010 if (!is_eoln_kns(*bufptr)) {
13011 break;
13012 }
13013 }
13014 fill_ulong_zero(annot_ct_p2_ctl, col_bitfield);
13015 uii = 0; // current 0-based column number
13016 // We don't know in advance when the highest-precedence SNP/p-val columns
13017 // will appear, so we initially populate parse_table with
13018 // [2k]: header type index (0 = variant ID, 1 = p-val, 2+ = annot)
13019 // [2k + 1]: 0-based column number
13020 // and then sort at the end.
13021 cur_read_ct = 2;
13022 parse_table[0] = 0;
13023 parse_table[2] = 1;
13024 do {
13025 bufptr2 = token_endnn(bufptr);
13026 ii = bsearch_str(bufptr, (uintptr_t)(bufptr2 - bufptr), sorted_header_dict, max_header_len, header_dict_ct);
13027 if (ii != -1) {
13028 ujj = header_id_map[(uint32_t)ii];
13029 if (ujj >= 0x40000000) {
13030 if (ujj < ukk) {
13031 // ignore title if higher-precedence title already seen
13032 set_bit(0, col_bitfield);
13033 ukk = ujj;
13034 parse_table[1] = uii; // temporary storage
13035 } else if (ujj == ukk) {
13036 goto clump_reports_ret_DUPLICATE_HEADER_COL;
13037 }
13038 } else if (ujj >= 0x20000000) {
13039 if (ujj < umm) {
13040 set_bit(1, col_bitfield);
13041 umm = ujj;
13042 parse_table[3] = uii;
13043 } else if (ujj == umm) {
13044 goto clump_reports_ret_DUPLICATE_HEADER_COL;
13045 }
13046 } else {
13047 if (is_set(col_bitfield, ujj)) {
13048 goto clump_reports_ret_DUPLICATE_HEADER_COL;
13049 }
13050 set_bit(ujj, col_bitfield);
13051 parse_table[cur_read_ct * 2 + 1] = uii;
13052 parse_table[cur_read_ct * 2] = ujj;
13053 cur_read_ct++;
13054 }
13055 }
13056 bufptr = skip_initial_spaces(bufptr2);
13057 uii++;
13058 } while (!is_eoln_kns(*bufptr));
13059 if (!is_set(col_bitfield, 0)) {
13060 LOGPREPRINTFWW("Error: No variant ID field found in %s.\n", fname_ptr);
13061 goto clump_reports_ret_INVALID_FORMAT_2;
13062 } else if (!is_set(col_bitfield, 1)) {
13063 LOGPREPRINTFWW("Error: No p-value field found in %s.\n", fname_ptr);
13064 goto clump_reports_ret_INVALID_FORMAT_2;
13065 }
13066 #ifdef __cplusplus
13067 std::sort((int64_t*)parse_table, (int64_t*)(&(parse_table[cur_read_ct * 2])));
13068 #else
13069 qsort((int64_t*)parse_table, cur_read_ct, sizeof(int64_t), llcmp);
13070 #endif
13071 for (uii = cur_read_ct - 1; uii; uii--) {
13072 parse_table[uii * 2 + 1] -= parse_table[uii * 2 - 1] + 1;
13073 }
13074 clump_reports_load_loop:
13075 while (1) {
13076 line_idx++;
13077 if (!gzgets(gz_infile, loadbuft, loadbuft_size)) {
13078 if (!gzeof(gz_infile)) {
13079 goto clump_reports_ret_READ_FAIL;
13080 }
13081 break;
13082 }
13083 if (!loadbuft[loadbuft_size - 1]) {
13084 if (loadbuft_size == MAXLINEBUFLEN / 2) {
13085 LOGPREPRINTFWW("Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, fname_ptr);
13086 goto clump_reports_ret_INVALID_FORMAT_2;
13087 }
13088 goto clump_reports_ret_NOMEM;
13089 }
13090 bufptr = skip_initial_spaces(loadbuft);
13091 if (is_eoln_kns(*bufptr)) {
13092 continue;
13093 }
13094 fill_uint_zero(annot_ct_p2 * 2, cur_parse_info);
13095 uii = 0;
13096 ukk = annot_ct * 2; // annotation string length
13097 for (; uii < cur_read_ct; uii++) {
13098 bufptr = next_token_multz(bufptr, parse_table[uii * 2 + 1]);
13099 if (no_more_tokens_kns(bufptr)) {
13100 // PLINK 1.07 --clump just skips the line in this situation, instead
13101 // of erroring out, so we replicate that
13102 goto clump_reports_load_loop;
13103 }
13104 bufptr2 = token_endnn(bufptr);
13105 ujj = parse_table[uii * 2] * 2;
13106 cur_parse_info[ujj] = (uintptr_t)(bufptr - loadbuft);
13107 cur_parse_info[ujj + 1] = (uintptr_t)(bufptr2 - bufptr);
13108 if (ujj > 2) {
13109 ukk += cur_parse_info[ujj + 1];
13110 }
13111 bufptr = skip_initial_spaces(bufptr2);
13112 }
13113 if (scan_double(&(loadbuft[cur_parse_info[2]]), &pval)) {
13114 continue;
13115 }
13116 if (pval < 0.0) {
13117 LOGPREPRINTFWW("Error: Negative p-value on line %" PRIuPTR " of %s.\n", line_idx, fname_ptr);
13118 goto clump_reports_ret_INVALID_FORMAT_2;
13119 }
13120 marker_uidx = id_htable_find(&(loadbuft[cur_parse_info[0]]), cur_parse_info[1], marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len);
13121 if (marker_uidx == 0xffffffffU) {
13122 // variant ID not in current fileset
13123 if ((pval <= p1_thresh) && index_eligible) {
13124 // actually a top variant, track it
13125 missing_variant_ct++;
13126 // screw it, just allocate these outside the workspace
13127 uii = cur_parse_info[1];
13128 if (uii >= max_missing_id_len) {
13129 max_missing_id_len = uii + 1;
13130 }
13131 cm_ptr = (Clump_missing_id*)malloc(offsetof(Clump_missing_id, idstr) + uii + 1);
13132 cm_ptr->pval = pval;
13133 cm_ptr->next = not_found_list;
13134 not_found_list = cm_ptr;
13135 memcpyx(cm_ptr->idstr, &(loadbuft[cur_parse_info[0]]), uii, '\0');
13136 }
13137 continue;
13138 }
13139 marker_idx = marker_uidx_to_idx[marker_uidx];
13140 if (pval > load_pthresh) {
13141 if (pval >= 0.05) {
13142 if (pval > 1) {
13143 LOGPREPRINTFWW("Error: p-value > 1 on line %" PRIuPTR " of %s.\n", line_idx, fname_ptr);
13144 goto clump_reports_ret_INVALID_FORMAT_2;
13145 }
13146 nsig_arr[marker_idx] += 1;
13147 }
13148 continue;
13149 }
13150 clump_entry_ptr = (Clump_entry*)bigstack_end_alloc(offsetof(Clump_entry, annot) + ukk - 1);
13151 if (!clump_entry_ptr) {
13152 goto clump_reports_ret_NOMEM;
13153 }
13154 clump_entry_ptr->pval = pval;
13155 clump_entry_ptr->next = clump_entries[marker_idx];
13156 clump_entry_ptr->fidx = file_idx;
13157 if (annot_ct) {
13158 bufptr = clump_entry_ptr->annot;
13159 uii = 2;
13160 while (1) {
13161 bufptr = memcpya(bufptr, &(loadbuft[cur_parse_info[uii * 2]]), cur_parse_info[uii * 2 + 1]);
13162 if (++uii == annot_ct_p2) {
13163 break;
13164 }
13165 bufptr = memcpya(bufptr, ", ", 2);
13166 }
13167 *bufptr = '\0';
13168 }
13169 clump_entries[marker_idx] = clump_entry_ptr;
13170 if ((pval <= p1_thresh) && index_eligible) {
13171 set_bit(marker_idx, cur_bitfield);
13172 }
13173 loadbuft_size = bigstack_left();
13174 if (loadbuft_size <= 2 * MAXLINELEN + extra_annot_space) {
13175 goto clump_reports_ret_NOMEM;
13176 }
13177 loadbuft_size = (loadbuft_size - extra_annot_space) / 2;
13178 if (loadbuft_size >= MAXLINEBUFLEN / 2) {
13179 loadbuft_size = MAXLINEBUFLEN / 2;
13180 // no space-termination needed
13181 } else {
13182 loadbuft[loadbuft_size - 1] = ' ';
13183 }
13184 }
13185 if (gzclose_null(&gz_infile)) {
13186 goto clump_reports_ret_READ_FAIL;
13187 }
13188 if (file_idx > 1) {
13189 fname_ptr = &(fname_ptr[-3]);
13190 while (*fname_ptr) {
13191 fname_ptr--;
13192 }
13193 fname_ptr++;
13194 if (clump_index_first && (file_idx == 2)) {
13195 index_eligible = 1;
13196 }
13197 }
13198 }
13199 // 4. sort p-val array, greedily form clumps
13200 index_ct = popcount_longs(cur_bitfield, marker_ctl);
13201 if (!index_ct) {
13202 logerrprint("Warning: No significant --clump results. Skipping.\n");
13203 goto clump_reports_ret_1;
13204 }
13205 if (bigstack_alloc_d(index_ct, &sorted_pvals) ||
13206 bigstack_alloc_ui(index_ct, &pval_map)) {
13207 goto clump_reports_ret_NOMEM;
13208 }
13209 marker_idx = 0;
13210 for (uii = 0; uii < index_ct; uii++, marker_idx++) {
13211 marker_idx = next_set_unsafe(cur_bitfield, marker_idx);
13212 clump_entry_ptr = clump_entries[marker_idx];
13213 pval = clump_entry_ptr->pval;
13214 if (!clump_index_first) {
13215 while (clump_entry_ptr->next) {
13216 clump_entry_ptr = clump_entry_ptr->next;
13217 if (clump_entry_ptr->pval < pval) {
13218 pval = clump_entry_ptr->pval;
13219 }
13220 }
13221 }
13222 sorted_pvals[uii] = pval;
13223 pval_map[uii] = marker_idx;
13224 }
13225 if (qsort_ext((char*)sorted_pvals, index_ct, sizeof(double), double_cmp_deref, (char*)pval_map, sizeof(int32_t))) {
13226 goto clump_reports_ret_NOMEM;
13227 }
13228 if (bigstack_alloc_ui(marker_ct, &marker_idx_to_uidx) ||
13229 bigstack_alloc_ul(unfiltered_sample_ctl2, &loadbuf_raw) ||
13230 bigstack_alloc_ul(5 * founder_ctv2, &index_data)) {
13231 goto clump_reports_ret_NOMEM;
13232 }
13233 for (uii = 1; uii <= 5; uii++) {
13234 index_data[uii * founder_ctv2 - 2] = 0;
13235 index_data[uii * founder_ctv2 - 1] = 0;
13236 }
13237 if (alloc_collapsed_haploid_filters(founder_info, sex_male, unfiltered_sample_ct, founder_ct, Y_FIX_NEEDED, 1, &founder_include2, &founder_male_include2)) {
13238 goto clump_reports_ret_NOMEM;
13239 }
13240 if (clump_verbose && rg_setdefs) {
13241 if (bigstack_alloc_ul(BITCT_TO_WORDCT(range_chrom_max), &rangematch_bitfield)) {
13242 goto clump_reports_ret_NOMEM;
13243 }
13244 }
13245 window_data = (uintptr_t*)g_bigstack_base;
13246 max_window_size = bigstack_left() / (founder_ctv2 * sizeof(intptr_t) + sizeof(Cur_clump_info));
13247 if (!max_window_size) {
13248 goto clump_reports_ret_NOMEM;
13249 }
13250 fill_idx_to_uidx(marker_exclude, unfiltered_marker_ct, marker_ct, marker_idx_to_uidx);
13251 loadbuf_raw[unfiltered_sample_ctl2 - 1] = 0;
13252 // now this indicates whether a variant has previously been in a clump
13253 fill_ulong_zero(marker_ctl, cur_bitfield);
13254 // 5. iterate through clumps, calculate r^2 and write output
13255 memcpy(outname_end, ".clumped", 9);
13256 if (fopen_checked(outname, "w", &outfile)) {
13257 goto clump_reports_ret_OPEN_FAIL;
13258 }
13259 bufptr = tbuf2;
13260 if (clump_verbose) {
13261 *bufptr++ = '\n';
13262 }
13263 bufptr = memcpya(bufptr, " CHR F ", 10);
13264 bufptr = fw_strcpyn(plink_maxsnp, 3, "SNP", bufptr);
13265 // replicate the misaligned non-verbose header for now
13266 bufptr = memcpya(bufptr, " BP ", clump_verbose? 21 : 19);
13267 bufptr = strcpya(bufptr, "P TOTAL NSIG S05 S01 S001 S0001");
13268 if (!clump_verbose) {
13269 bufptr = memcpya(bufptr, " SP2\n", 8);
13270 if (fwrite_checked(tbuf2, bufptr - tbuf2, outfile)) {
13271 goto clump_reports_ret_WRITE_FAIL;
13272 }
13273 if (rg_setdefs) {
13274 memcpy(&(outname_end[8]), ".ranges", 8);
13275 if (fopen_checked(outname, "w", &outfile_ranges)) {
13276 goto clump_reports_ret_OPEN_FAIL;
13277 }
13278 bufptr = fw_strcpyn(plink_maxsnp, 3, "SNP", &(tbuf2[5]));
13279 bufptr = strcpya(bufptr, " P N POS KB RANGES\n");
13280 if (fwrite_checked(tbuf2, bufptr - tbuf2, outfile_ranges)) {
13281 goto clump_reports_ret_WRITE_FAIL;
13282 }
13283 }
13284 } else {
13285 *bufptr++ = '\n';
13286 header2_ptr = bufptr;
13287 header1_len = (uintptr_t)(header2_ptr - tbuf2);
13288 *bufptr++ = '\n';
13289 bufptr = memseta(bufptr, 32, 19 + plink_maxsnp);
13290 bufptr = strcpya(bufptr, "KB RSQ ALLELES F P ");
13291 if (annot_flattened) {
13292 bufptr = memcpya(bufptr, " ANNOT", 12);
13293 }
13294 bufptr = memcpya(bufptr, "\n (INDEX) ", 11);
13295 header2_len = (uintptr_t)(bufptr - header2_ptr);
13296 }
13297 if (clump_best) {
13298 memcpy(&(outname_end[8]), ".best", 6);
13299 if (fopen_checked(outname, "w", &outfile_best)) {
13300 goto clump_reports_ret_OPEN_FAIL;
13301 }
13302 bufptr = fw_strcpyn(plink_maxsnp, 5, "INDEX", g_textbuf);
13303 *bufptr++ = ' ';
13304 bufptr = fw_strcpyn(plink_maxsnp, 4, "PSNP", bufptr);
13305 bufptr = strcpya(bufptr, " RSQ KB P ALLELES F\n");
13306 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile_best)) {
13307 goto clump_reports_ret_WRITE_FAIL;
13308 }
13309 }
13310 for (sp_idx = 0; sp_idx < index_ct; sp_idx++) {
13311 ivar_idx = pval_map[sp_idx];
13312 if ((!clump_best) && is_set(cur_bitfield, ivar_idx)) {
13313 continue;
13314 }
13315 ivar_uidx = marker_idx_to_uidx[ivar_idx];
13316 cur_bp = marker_pos[ivar_uidx];
13317 uii = get_variant_chrom_fo_idx(chrom_info_ptr, ivar_uidx);
13318 clump_chrom_idx = chrom_info_ptr->chrom_file_order[uii];
13319 ujj = chrom_info_ptr->chrom_fo_vidx_start[uii];
13320 if (cur_bp < bp_radius) {
13321 clump_uidx_first = ujj;
13322 } else {
13323 clump_uidx_first = ujj + uint32arr_greater_than(&(marker_pos[ujj]), ivar_uidx + 1 - ujj, cur_bp - bp_radius);
13324 }
13325 next_unset_unsafe_ck(marker_exclude, &clump_uidx_first);
13326 clump_uidx_last = ivar_uidx + uint32arr_greater_than(&(marker_pos[ivar_uidx]), chrom_info_ptr->chrom_fo_vidx_start[uii + 1] - ivar_uidx, cur_bp + bp_radius + 1);
13327 prev_unset_unsafe_ck(marker_exclude, &clump_uidx_last);
13328 marker_uidx = clump_uidx_first;
13329 marker_idx = ivar_idx + popcount_bit_idx(marker_exclude, clump_uidx_first, ivar_uidx) + clump_uidx_first - ivar_uidx;
13330 // Don't want to seek backwards in the file any more than necessary, so
13331 // 1. load all clump-inclusion candidates before index variant
13332 // 2. load index variant, compute pairwise r^2s
13333 // 3. load one clump-inclusion at a time after index variant, compute r^2
13334 // 4. write main result
13335 cur_window_size = 0;
13336 is_haploid = is_set(haploid_mask, clump_chrom_idx);
13337 is_x = (clump_chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[X_OFFSET]);
13338 is_y = (clump_chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[Y_OFFSET]);
13339 window_data_ptr = window_data;
13340 for (; marker_idx < ivar_idx; marker_uidx++, marker_idx++) {
13341 next_unset_unsafe_ck(marker_exclude, &marker_uidx);
13342 if (((!allow_overlap) && is_set(cur_bitfield, marker_idx)) || ((!clump_entries[marker_idx]) && (!nsig_arr[marker_idx]))) {
13343 continue;
13344 }
13345 if (++cur_window_size == max_window_size) {
13346 goto clump_reports_ret_NOMEM;
13347 }
13348 if (fseeko(bedfile, bed_offset + marker_uidx * ((uint64_t)unfiltered_sample_ct4), SEEK_SET)) {
13349 goto clump_reports_ret_READ_FAIL;
13350 }
13351 window_data_ptr[founder_ctv2 - 2] = 0;
13352 window_data_ptr[founder_ctv2 - 1] = 0;
13353 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, is_set(marker_reverse, marker_uidx), bedfile, loadbuf_raw, window_data_ptr)) {
13354 goto clump_reports_ret_READ_FAIL;
13355 }
13356 if (is_haploid) {
13357 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)window_data_ptr);
13358 }
13359 window_data_ptr = &(window_data_ptr[founder_ctv2]);
13360 }
13361 next_unset_unsafe_ck(marker_exclude, &marker_uidx);
13362 if (fseeko(bedfile, bed_offset + marker_uidx * ((uint64_t)unfiltered_sample_ct4), SEEK_SET)) {
13363 goto clump_reports_ret_READ_FAIL;
13364 }
13365 window_data_ptr[founder_ctv2 - 2] = 0;
13366 window_data_ptr[founder_ctv2 - 1] = 0;
13367 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, is_set(marker_reverse, marker_uidx), bedfile, loadbuf_raw, window_data_ptr)) {
13368 goto clump_reports_ret_READ_FAIL;
13369 }
13370 if (is_haploid) {
13371 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)window_data_ptr);
13372 }
13373 vec_datamask(founder_ct, 0, window_data_ptr, founder_include2, index_data);
13374 index_tots[0] = popcount2_longs(index_data, founder_ctl2);
13375 vec_datamask(founder_ct, 2, window_data_ptr, founder_include2, &(index_data[founder_ctv2]));
13376 index_tots[1] = popcount2_longs(&(index_data[founder_ctv2]), founder_ctl2);
13377 vec_datamask(founder_ct, 3, window_data_ptr, founder_include2, &(index_data[2 * founder_ctv2]));
13378 index_tots[2] = popcount2_longs(&(index_data[2 * founder_ctv2]), founder_ctl2);
13379 if (is_x) {
13380 vec_datamask(founder_ct, 0, window_data_ptr, founder_male_include2, &(index_data[3 * founder_ctv2]));
13381 index_tots[3] = popcount2_longs(&(index_data[3 * founder_ctv2]), founder_ctl2);
13382 vec_datamask(founder_ct, 3, window_data_ptr, founder_male_include2, &(index_data[4 * founder_ctv2]));
13383 index_tots[4] = popcount2_longs(&(index_data[4 * founder_ctv2]), founder_ctl2);
13384 }
13385 if (!cur_window_size) {
13386 cur_clump_base = (Cur_clump_info*)(&(window_data[founder_ctv2]));
13387 } else {
13388 cur_clump_base = (Cur_clump_info*)window_data_ptr;
13389 }
13390 cc_ptr = cur_clump_base;
13391 window_data_ptr = window_data;
13392 marker_uidx = clump_uidx_first;
13393 marker_idx = ivar_idx + popcount_bit_idx(marker_exclude, clump_uidx_first, ivar_uidx) + clump_uidx_first - ivar_uidx;
13394 max_r2 = -1;
13395 max_r2_uidx = 0xffffffffU;
13396 fill_ulong_zero(5, histo);
13397 best_entry_ptr = nullptr;
13398 for (; marker_idx < ivar_idx; marker_uidx++, marker_idx++) {
13399 marker_uidx = next_unset_unsafe(marker_exclude, marker_uidx);
13400 clump_entry_ptr = clump_entries[marker_idx];
13401 if (((!allow_overlap) && is_set(cur_bitfield, marker_idx)) || ((!clump_entry_ptr) && (!nsig_arr[marker_idx]))) {
13402 continue;
13403 }
13404 genovec_3freq(window_data_ptr, index_data, founder_ctl2, &(counts[0]), &(counts[1]), &(counts[2]));
13405 counts[0] = index_tots[0] - counts[0] - counts[1] - counts[2];
13406 genovec_3freq(window_data_ptr, &(index_data[founder_ctv2]), founder_ctl2, &(counts[3]), &(counts[4]), &(counts[5]));
13407 counts[3] = index_tots[1] - counts[3] - counts[4] - counts[5];
13408 genovec_3freq(window_data_ptr, &(index_data[2 * founder_ctv2]), founder_ctl2, &(counts[6]), &(counts[7]), &(counts[8]));
13409 counts[6] = index_tots[2] - counts[6] - counts[7] - counts[8];
13410 if (is_x) {
13411 genovec_3freq(window_data_ptr, &(index_data[3 * founder_ctv2]), founder_ctl2, &(counts[9]), &(counts[10]), &(counts[11]));
13412 counts[9] = index_tots[3] - counts[9] - counts[11];
13413 genovec_3freq(window_data_ptr, &(index_data[4 * founder_ctv2]), founder_ctl2, &(counts[15]), &(counts[16]), &(counts[17]));
13414 counts[15] = index_tots[4] - counts[15] - counts[17];
13415 }
13416 if (!em_phase_hethet_nobase(counts, is_x, is_x, &freq1x, &freq2x, &freqx1, &freqx2, &freq11)) {
13417 freq11_expected = freqx1 * freq1x;
13418 dxx = freq11 - freq11_expected;
13419 cur_r2 = fabs(dxx);
13420 // if r^2 threshold is 0, let everything else through but exclude the
13421 // apparent zeroes. Zeroes *are* included if r2_thresh is negative,
13422 // though (only nans are rejected then).
13423 if (cur_r2 >= SMALL_EPSILON) {
13424 cur_r2 = cur_r2 * dxx / (freq11_expected * freq2x * freqx2);
13425 } else {
13426 cur_r2 = 0;
13427 }
13428 if (fabs(cur_r2) > r2_thresh) {
13429 while (clump_entry_ptr) {
13430 dxx = clump_entry_ptr->pval;
13431 update_clump_histo(dxx, histo);
13432 if (dxx < p2_thresh) {
13433 if (((unsigned char*)cc_ptr) >= g_bigstack_end) {
13434 goto clump_reports_ret_NOMEM;
13435 }
13436 cc_ptr->r2 = cur_r2;
13437 cc_ptr->marker_idx = marker_idx;
13438 uii = clump_entry_ptr->fidx;
13439 cc_ptr->fidx = uii;
13440 if ((uii == best_fidx_match) && (fabs(cur_r2) > max_r2)) {
13441 max_r2 = cur_r2;
13442 max_r2_uidx = marker_uidx;
13443 best_entry_ptr = clump_entry_ptr;
13444 }
13445 cc_ptr++;
13446 }
13447 clump_entry_ptr = clump_entry_ptr->next;
13448 }
13449 histo[0] += nsig_arr[marker_idx];
13450 set_bit(marker_idx, cur_bitfield);
13451 }
13452 }
13453 window_data_ptr = &(window_data_ptr[founder_ctv2]);
13454 }
13455 pval = sorted_pvals[sp_idx];
13456 clump_entry_ptr = clump_entries[ivar_idx];
13457 uii = 0;
13458 if (clump_entry_ptr->pval != pval) {
13459 uii = 1;
13460 do {
13461 dxx = clump_entry_ptr->pval;
13462 update_clump_histo(dxx, histo);
13463 if (dxx < p2_thresh) {
13464 if (((unsigned char*)cc_ptr) >= g_bigstack_end) {
13465 goto clump_reports_ret_NOMEM;
13466 }
13467 cc_ptr->r2 = 1;
13468 cc_ptr->marker_idx = ivar_idx;
13469 cc_ptr->fidx = clump_entry_ptr->fidx;
13470 // clump_best match should be impossible here
13471 cc_ptr++;
13472 }
13473 clump_entry_ptr = clump_entry_ptr->next;
13474 } while (clump_entry_ptr->pval != pval);
13475 }
13476 index_fidx = clump_entry_ptr->fidx;
13477 if (annot_flattened) {
13478 annot_ptr = clump_entry_ptr->annot;
13479 }
13480 if ((!clump_best) || allow_overlap || (!is_set(cur_bitfield, ivar_idx))) {
13481 if (clump_entry_ptr->next) {
13482 uii = 1;
13483 do {
13484 clump_entry_ptr = clump_entry_ptr->next;
13485 dxx = clump_entry_ptr->pval;
13486 update_clump_histo(dxx, histo);
13487 if (dxx < p2_thresh) {
13488 if (((unsigned char*)cc_ptr) >= g_bigstack_end) {
13489 goto clump_reports_ret_NOMEM;
13490 }
13491 cc_ptr->r2 = 1;
13492 cc_ptr->marker_idx = ivar_idx;
13493 cc_ptr->fidx = clump_entry_ptr->fidx;
13494 if (clump_best) {
13495 max_r2 = 1;
13496 max_r2_uidx = ivar_uidx;
13497 best_entry_ptr = clump_entry_ptr;
13498 }
13499 cc_ptr++;
13500 }
13501 } while (clump_entry_ptr->next);
13502 }
13503 }
13504 // include co-located entries in the clump and mark the position as clumped
13505 // iff
13506 // i. there were co-located entries in the first place, and either
13507 // ii-a. overlaps are permitted or
13508 // ii-b. index variant position was not previously clumped
13509 if ((uii || nsig_arr[ivar_idx]) && (allow_overlap || (!is_set(cur_bitfield, ivar_idx)))) {
13510 histo[0] += nsig_arr[ivar_idx];
13511 set_bit(ivar_idx, cur_bitfield);
13512 }
13513 marker_uidx = ivar_uidx;
13514 marker_idx = ivar_idx;
13515 while (marker_uidx < clump_uidx_last) {
13516 marker_uidx++;
13517 next_unset_unsafe_ck(marker_exclude, &marker_uidx);
13518 marker_idx++;
13519 clump_entry_ptr = clump_entries[marker_idx];
13520 if (((!allow_overlap) && is_set(cur_bitfield, marker_idx)) || ((!clump_entry_ptr) && (!nsig_arr[marker_idx]))) {
13521 continue;
13522 }
13523 if (fseeko(bedfile, bed_offset + marker_uidx * ((uint64_t)unfiltered_sample_ct4), SEEK_SET)) {
13524 goto clump_reports_ret_READ_FAIL;
13525 }
13526 window_data[founder_ctv2 - 2] = 0;
13527 window_data[founder_ctv2 - 1] = 0;
13528 if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, is_set(marker_reverse, marker_uidx), bedfile, loadbuf_raw, window_data)) {
13529 goto clump_reports_ret_READ_FAIL;
13530 }
13531 if (is_haploid) {
13532 haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)window_data);
13533 }
13534 genovec_3freq(window_data, index_data, founder_ctl2, &(counts[0]), &(counts[1]), &(counts[2]));
13535 counts[0] = index_tots[0] - counts[0] - counts[1] - counts[2];
13536 genovec_3freq(window_data, &(index_data[founder_ctv2]), founder_ctl2, &(counts[3]), &(counts[4]), &(counts[5]));
13537 counts[3] = index_tots[1] - counts[3] - counts[4] - counts[5];
13538 genovec_3freq(window_data, &(index_data[2 * founder_ctv2]), founder_ctl2, &(counts[6]), &(counts[7]), &(counts[8]));
13539 counts[6] = index_tots[2] - counts[6] - counts[7] - counts[8];
13540 if (is_x) {
13541 genovec_3freq(window_data, &(index_data[3 * founder_ctv2]), founder_ctl2, &(counts[9]), &(counts[10]), &(counts[11]));
13542 counts[9] = index_tots[3] - counts[9] - counts[11];
13543 genovec_3freq(window_data, &(index_data[4 * founder_ctv2]), founder_ctl2, &(counts[15]), &(counts[16]), &(counts[17]));
13544 counts[15] = index_tots[4] - counts[15] - counts[17];
13545 }
13546 if (!em_phase_hethet_nobase(counts, is_x, is_x, &freq1x, &freq2x, &freqx1, &freqx2, &freq11)) {
13547 freq11_expected = freqx1 * freq1x;
13548 dxx = freq11 - freq11_expected;
13549 cur_r2 = fabs(dxx);
13550 if (cur_r2 >= SMALL_EPSILON) {
13551 cur_r2 = cur_r2 * dxx / (freq11_expected * freq2x * freqx2);
13552 } else {
13553 cur_r2 = 0;
13554 }
13555 if (fabs(cur_r2) > r2_thresh) {
13556 while (clump_entry_ptr) {
13557 dxx = clump_entry_ptr->pval;
13558 update_clump_histo(dxx, histo);
13559 if (dxx < p2_thresh) {
13560 if (((unsigned char*)cc_ptr) >= g_bigstack_end) {
13561 goto clump_reports_ret_NOMEM;
13562 }
13563 cc_ptr->r2 = cur_r2;
13564 cc_ptr->marker_idx = marker_idx;
13565 uii = clump_entry_ptr->fidx;
13566 cc_ptr->fidx = uii;
13567 if ((uii == best_fidx_match) && (fabs(cur_r2) > max_r2)) {
13568 max_r2 = cur_r2;
13569 max_r2_uidx = marker_uidx;
13570 best_entry_ptr = clump_entry_ptr;
13571 }
13572 cc_ptr++;
13573 }
13574 clump_entry_ptr = clump_entry_ptr->next;
13575 }
13576 histo[0] += nsig_arr[marker_idx];
13577 set_bit(marker_idx, cur_bitfield);
13578 }
13579 }
13580 }
13581 cur_window_size = (uintptr_t)(cc_ptr - cur_clump_base);
13582 if (require_multifile) {
13583 if (cur_window_size < 2) {
13584 continue;
13585 }
13586 uii = cur_clump_base[0].fidx;
13587 for (ulii = 1; ulii < cur_window_size; ulii++) {
13588 if (uii != cur_clump_base[ulii].fidx) {
13589 break;
13590 }
13591 }
13592 if (ulii == cur_window_size) {
13593 continue;
13594 }
13595 }
13596 if (clump_best) {
13597 bufptr = fw_strcpy(plink_maxsnp, &(marker_ids[ivar_uidx * max_marker_id_len]), g_textbuf);
13598 *bufptr++ = ' ';
13599 if (best_entry_ptr) {
13600 bufptr = fw_strcpy(plink_maxsnp, &(marker_ids[max_r2_uidx * max_marker_id_len]), bufptr);
13601 *bufptr++ = ' ';
13602 if (max_r2_uidx == ivar_uidx) {
13603 bufptr = memcpya(bufptr, " *", 6);
13604 } else {
13605 bufptr = dtoa_g_wxp3(fabs(max_r2), 6, bufptr);
13606 }
13607 *bufptr++ = ' ';
13608 bufptr = dtoa_g_wxp3x(((double)((int32_t)(marker_pos[max_r2_uidx] - cur_bp))) * 0.001, 8, ' ', bufptr);
13609 bufptr = dtoa_g_wxp3x(best_entry_ptr->pval, 8, ' ', bufptr);
13610 if (max_r2 > 0) {
13611 uii = 0;
13612 } else {
13613 uii = 1;
13614 }
13615 cur_a1 = marker_allele_ptrs[2 * ivar_uidx];
13616 cur_a2 = marker_allele_ptrs[2 * ivar_uidx + 1];
13617 bufptr2 = marker_allele_ptrs[2 * max_r2_uidx + uii];
13618 bufptr3 = marker_allele_ptrs[2 * max_r2_uidx + 1 - uii];
13619 bufptr4 = cur_a1;
13620 for (uii = 3; uii; uii--) {
13621 if (!(*(++bufptr4))) {
13622 bufptr4 = cur_a2;
13623 for (; uii; uii--) {
13624 if (!(*(++bufptr4))) {
13625 bufptr4 = bufptr2;
13626 for (; uii; uii--) {
13627 if (!(*(++bufptr4))) {
13628 bufptr4 = bufptr3;
13629 for (; uii; uii--) {
13630 if (!(*(++bufptr4))) {
13631 bufptr = memseta(bufptr, 32, uii);
13632 break;
13633 }
13634 }
13635 break;
13636 }
13637 }
13638 break;
13639 }
13640 }
13641 break;
13642 }
13643 }
13644 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile_best)) {
13645 goto clump_reports_ret_WRITE_FAIL;
13646 }
13647 fputs(cur_a1, outfile_best);
13648 fputs(bufptr2, outfile_best);
13649 putc_unlocked('/', outfile_best);
13650 fputs(cur_a2, outfile_best);
13651 fputs(bufptr3, outfile_best);
13652 g_textbuf[0] = ' ';
13653 bufptr = uint32toa_w8x(best_fidx_match, ' ', &(g_textbuf[1]));
13654 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile_best)) {
13655 goto clump_reports_ret_WRITE_FAIL;
13656 }
13657 if (annot_flattened) {
13658 fputs(best_entry_ptr->annot, outfile_best);
13659 }
13660 putc_unlocked('\n', outfile_best);
13661 } else {
13662 bufptr = fw_strcpyn(plink_maxsnp, 2, "NA", bufptr);
13663 bufptr = memcpya(bufptr, " NA NA NA NA NA \n", 45);
13664 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile_best)) {
13665 goto clump_reports_ret_WRITE_FAIL;
13666 }
13667 }
13668 }
13669 bufptr = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, clump_chrom_idx, g_textbuf));
13670 *bufptr++ = ' ';
13671 bufptr = uint32toa_w4(index_fidx, bufptr);
13672 *bufptr++ = ' ';
13673 bufptr = fw_strcpy(plink_maxsnp, &(marker_ids[ivar_uidx * max_marker_id_len]), bufptr);
13674 *bufptr++ = ' ';
13675 bufptr = uint32toa_w10x(cur_bp, ' ', bufptr);
13676 bufptr = dtoa_g_wxp3x(pval, 10, ' ', bufptr);
13677 #ifdef __LP64__
13678 // may as well be paranoid
13679 bufptr = width_force(8, bufptr, int64toa((int64_t)(histo[0] + histo[1] + histo[2] + histo[3] + histo[4]), bufptr));
13680 *bufptr++ = ' ';
13681 for (uii = 0; uii < 5; uii++) {
13682 bufptr = width_force(6, bufptr, int64toa((int64_t)((uintptr_t)histo[uii]), bufptr));
13683 *bufptr++ = ' ';
13684 }
13685 #else
13686 bufptr = uint32toa_w8x(histo[0] + histo[1] + histo[2] + histo[3] + histo[4], ' ', bufptr);
13687 for (uii = 0; uii < 5; uii++) {
13688 bufptr = uint32toa_w6x(histo[uii], ' ', bufptr);
13689 }
13690 #endif
13691 final_clump_ct++;
13692 min_bp = cur_bp;
13693 max_bp = cur_bp;
13694 if (cur_window_size) {
13695 marker_idx = cur_clump_base[0].marker_idx;
13696 if (marker_idx < ivar_idx) {
13697 min_bp = marker_pos[marker_idx_to_uidx[marker_idx]];
13698 }
13699 marker_idx = cur_clump_base[cur_window_size - 1].marker_idx;
13700 if (marker_idx > ivar_idx) {
13701 max_bp = marker_pos[marker_idx_to_uidx[marker_idx]];
13702 }
13703 }
13704 if (rg_setdefs) {
13705 ulii = rg_chrom_bounds[clump_chrom_idx];
13706 cur_rg_setdefs = &(rg_setdefs[ulii]);
13707 cur_rg_names = &(range_group_names[ulii * max_range_group_id_len + 4]);
13708 cur_rg_ct = rg_chrom_bounds[clump_chrom_idx + 1] - ulii;
13709 }
13710 if (!clump_verbose) {
13711 if (!cur_window_size) {
13712 bufptr = memcpya(bufptr, "NONE\n", 5);
13713 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
13714 goto clump_reports_ret_WRITE_FAIL;
13715 }
13716 } else {
13717 // avoid buffer overflow
13718 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
13719 goto clump_reports_ret_WRITE_FAIL;
13720 }
13721 g_textbuf[0] = '(';
13722 for (ulii = 0; ulii < cur_window_size;) {
13723 fputs(&(marker_ids[marker_idx_to_uidx[cur_clump_base[ulii].marker_idx] * max_marker_id_len]), outfile);
13724 bufptr = uint32toa_x(cur_clump_base[ulii].fidx, ')', &(g_textbuf[1]));
13725 ulii++;
13726 if (ulii != cur_window_size) {
13727 *bufptr++ = ',';
13728 }
13729 fwrite(g_textbuf, 1, (uintptr_t)(bufptr - g_textbuf), outfile);
13730 }
13731 if (putc_checked('\n', outfile)) {
13732 goto clump_reports_ret_WRITE_FAIL;
13733 }
13734 }
13735 if (rg_setdefs) {
13736 bufptr = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, clump_chrom_idx, g_textbuf));
13737 *bufptr++ = ' ';
13738 bufptr = fw_strcpy(plink_maxsnp, &(marker_ids[ivar_uidx * max_marker_id_len]), bufptr);
13739 *bufptr++ = ' ';
13740 bufptr = dtoa_g_wxp4x(pval, 10, ' ', bufptr);
13741 bufptr = uint32toa_w6x(cur_window_size + 1, ' ', bufptr);
13742 if (clump_chrom_idx <= chrom_info_ptr->max_code) {
13743 bufptr2 = memcpyl3a(bufptr, "chr");
13744 bufptr2 = uint32toa(clump_chrom_idx, bufptr2);
13745 } else if (chrom_info_ptr->zero_extra_chroms) {
13746 bufptr2 = memcpya(bufptr, "chr0", 4);
13747 } else {
13748 bufptr2 = strcpya(bufptr, chrom_info_ptr->nonstd_names[clump_chrom_idx]);
13749 }
13750 *bufptr2++ = ':';
13751 bufptr2 = uint32toa(min_bp, bufptr2);
13752 bufptr2 = memcpya(bufptr2, "..", 2);
13753 bufptr2 = uint32toa(max_bp, bufptr2);
13754 bufptr = width_force(28, bufptr, bufptr2);
13755 *bufptr++ = ' ';
13756 bufptr = width_force(10, bufptr, dtoa_g(((int32_t)(max_bp - min_bp + 1)) * 0.001, bufptr));
13757 bufptr = memcpya(bufptr, " [", 2);
13758 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile_ranges)) {
13759 goto clump_reports_ret_WRITE_FAIL;
13760 }
13761 uljj = 0;
13762 for (ulii = 0; ulii < cur_rg_ct; ulii++) {
13763 if (interval_in_setdef(cur_rg_setdefs[ulii], min_bp, max_bp)) {
13764 if (uljj) {
13765 putc_unlocked(',', outfile_ranges);
13766 } else {
13767 uljj = 1;
13768 }
13769 fputs(&(cur_rg_names[ulii * max_range_group_id_len]), outfile_ranges);
13770 }
13771 }
13772 fputs("]\n", outfile_ranges);
13773 }
13774 } else {
13775 if (fwrite_checked(tbuf2, header1_len, outfile)) {
13776 goto clump_reports_ret_WRITE_FAIL;
13777 }
13778 *bufptr++ = '\n';
13779 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
13780 goto clump_reports_ret_WRITE_FAIL;
13781 }
13782 if (cur_window_size) {
13783 if (fwrite_checked(header2_ptr, header2_len, outfile)) {
13784 goto clump_reports_ret_WRITE_FAIL;
13785 }
13786 bufptr = fw_strcpy(plink_maxsnp, &(marker_ids[ivar_uidx * max_marker_id_len]), g_textbuf);
13787 bufptr = memcpya(bufptr, " 0 1.000 ", 21);
13788 cur_a1 = marker_allele_ptrs[2 * ivar_uidx];
13789 a1_len = strlen(cur_a1);
13790 if (a1_len < 8) {
13791 bufptr = memseta(bufptr, 32, 8 - a1_len);
13792 }
13793 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
13794 goto clump_reports_ret_WRITE_FAIL;
13795 }
13796 fwrite(cur_a1, 1, a1_len, outfile);
13797 cur_a2 = marker_allele_ptrs[2 * ivar_uidx + 1];
13798 a2_len = strlen(cur_a2);
13799 if (a1_len + a2_len < 5) {
13800 allele_padding = 5 - a1_len - a2_len;
13801 } else {
13802 allele_padding = 0;
13803 }
13804 g_textbuf[0] = ' ';
13805 bufptr = uint32toa_w4x(index_fidx, ' ', &(g_textbuf[1]));
13806 bufptr = dtoa_g_wxp3x(pval, 12, ' ', bufptr);
13807 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
13808 goto clump_reports_ret_WRITE_FAIL;
13809 }
13810 if (annot_flattened) {
13811 bufptr2 = annot_ptr;
13812 for (uii = 11; uii; uii--) {
13813 if (!(*(++bufptr2))) {
13814 fwrite(" ", 1, uii, outfile);
13815 break;
13816 }
13817 }
13818 fputs(annot_ptr, outfile);
13819 }
13820 fputs("\n\n", outfile);
13821 last_marker_idx = ~ZEROLU;
13822 if (rg_setdefs) {
13823 fill_ulong_zero(BITCT_TO_WORDCT(cur_rg_ct), rangematch_bitfield);
13824 unmatched_group_ct = cur_rg_ct;
13825 }
13826 for (ulii = 0; ulii < cur_window_size; ulii++) {
13827 bufptr = memseta(g_textbuf, 32, 10);
13828 marker_idx = cur_clump_base[ulii].marker_idx;
13829 if (last_marker_idx != marker_idx) {
13830 marker_uidx = marker_idx_to_uidx[marker_idx];
13831 clump_entry_ptr = clump_entries[marker_idx];
13832 if (rg_setdefs) {
13833 uii = marker_pos[marker_uidx];
13834 uljj = 0; // range group idx
13835 ulkk = 0; // number of new matches
13836 for (ulmm = 0; ulmm < unmatched_group_ct; uljj++, ulmm++) {
13837 next_unset_ul_unsafe_ck(rangematch_bitfield, &uljj);
13838 if (interval_in_setdef(cur_rg_setdefs[uljj], uii, uii + 1)) {
13839 set_bit(uljj, rangematch_bitfield);
13840 ulkk++;
13841 }
13842 }
13843 unmatched_group_ct -= ulkk;
13844 }
13845 }
13846 ukk = cur_clump_base[ulii].fidx;
13847 while (clump_entry_ptr->fidx != ukk) {
13848 clump_entry_ptr = clump_entry_ptr->next;
13849 }
13850 bufptr = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx * max_marker_id_len]), bufptr);
13851 *bufptr++ = ' ';
13852 bufptr = dtoa_g_wxp3x(((double)(((int32_t)marker_pos[marker_uidx]) - ((int32_t)cur_bp))) * 0.001, 10, ' ', bufptr);
13853 cur_r2 = cur_clump_base[ulii].r2;
13854 if (cur_r2 > 0) {
13855 ujj = 0;
13856 } else {
13857 ujj = 1; // reversed phase
13858 }
13859 bufptr = dtoa_g_wxp3x(fabs(cur_r2), 8, ' ', bufptr);
13860 bufptr2 = marker_allele_ptrs[marker_uidx * 2 + ujj];
13861 bufptr3 = marker_allele_ptrs[marker_uidx * 2 + 1 - ujj];
13862 if (allele_padding) {
13863 bufptr4 = bufptr2;
13864 for (uii = allele_padding; uii; uii--) {
13865 // fast in common case, don't bother to compute strlen for long
13866 // indels
13867 if (!(*(++bufptr4))) {
13868 bufptr4 = bufptr3;
13869 for (; uii; uii--) {
13870 if (!(*(++bufptr4))) {
13871 bufptr = memseta(bufptr, 32, uii);
13872 break;
13873 }
13874 }
13875 break;
13876 }
13877 }
13878 }
13879 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
13880 goto clump_reports_ret_WRITE_FAIL;
13881 }
13882 fwrite(cur_a1, 1, a1_len, outfile);
13883 fputs(bufptr2, outfile);
13884 putc_unlocked('/', outfile);
13885 fwrite(cur_a2, 1, a2_len, outfile);
13886 fputs(bufptr3, outfile);
13887 g_textbuf[0] = ' ';
13888 bufptr = uint32toa_w4x(cur_clump_base[ulii].fidx, ' ', &(g_textbuf[1]));
13889 bufptr = dtoa_g_wxp3x(clump_entry_ptr->pval, 12, ' ', bufptr);
13890 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
13891 goto clump_reports_ret_WRITE_FAIL;
13892 }
13893 if (annot_flattened) {
13894 bufptr2 = clump_entry_ptr->annot;
13895 bufptr3 = bufptr2;
13896 for (uii = 11; uii; uii--) {
13897 if (!(*(++bufptr3))) {
13898 fwrite(" ", 1, uii, outfile);
13899 break;
13900 }
13901 }
13902 fputs(bufptr2, outfile);
13903 }
13904 putc_unlocked('\n', outfile);
13905 last_marker_idx = marker_idx;
13906 }
13907 bufptr = memcpya(g_textbuf, "\n RANGE: ", 18);
13908 if (clump_chrom_idx <= chrom_info_ptr->max_code) {
13909 bufptr = memcpyl3a(bufptr, "chr");
13910 bufptr = uint32toa(clump_chrom_idx, bufptr);
13911 } else if (chrom_info_ptr->zero_extra_chroms) {
13912 bufptr = memcpya(bufptr, "chr0", 4);
13913 } else {
13914 bufptr = strcpya(bufptr, chrom_info_ptr->nonstd_names[clump_chrom_idx]);
13915 }
13916 *bufptr++ = ':';
13917 bufptr = uint32toa(min_bp, bufptr);
13918 bufptr = memcpya(bufptr, "..", 2);
13919 bufptr = uint32toa(max_bp, bufptr);
13920 bufptr = memcpya(bufptr, "\n SPAN: ", 18);
13921 bufptr = uint32toa((max_bp - min_bp + 1) / 1000, bufptr);
13922 bufptr = memcpyl3a(bufptr, "kb\n");
13923 if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
13924 goto clump_reports_ret_WRITE_FAIL;
13925 }
13926 if (rg_setdefs) {
13927 fputs(" GENES w/SNPs: ", outfile);
13928 ulii = 0;
13929 uljj = 0;
13930 unmatched_group_ct = cur_rg_ct - unmatched_group_ct;
13931 if (unmatched_group_ct) {
13932 while (1) {
13933 uljj = next_set_ul_unsafe(rangematch_bitfield, uljj);
13934 fputs(&(cur_rg_names[uljj * max_range_group_id_len]), outfile);
13935 if (!(--unmatched_group_ct)) {
13936 break;
13937 }
13938 uljj++;
13939 putc_unlocked(',', outfile);
13940 }
13941 }
13942 putc_unlocked('\n', outfile);
13943 }
13944 }
13945 if (rg_setdefs) {
13946 if (!cur_window_size) {
13947 putc_unlocked('\n', outfile);
13948 }
13949 fputs(" GENES: ", outfile);
13950 uljj = 0;
13951 for (ulii = 0; ulii < cur_rg_ct; ulii++) {
13952 if (interval_in_setdef(cur_rg_setdefs[ulii], min_bp, max_bp)) {
13953 if (uljj) {
13954 if (uljj & 7) {
13955 putc_unlocked(',', outfile);
13956 } else {
13957 putc_unlocked('\n', outfile);
13958 }
13959 }
13960 fputs(&(cur_rg_names[ulii * max_range_group_id_len]), outfile);
13961 uljj++;
13962 }
13963 }
13964 putc_unlocked('\n', outfile);
13965 }
13966 if (fwrite_checked("\n------------------------------------------------------------------\n\n", 69, outfile)) {
13967 goto clump_reports_ret_WRITE_FAIL;
13968 }
13969 }
13970 }
13971 putc_unlocked('\n', outfile);
13972 if (missing_variant_ct) {
13973 // 1. sort by ID (could switch this to hash table-based too)
13974 // 2. pick smallest pval when duplicates present
13975 // 3. sort by pval
13976 // 4. write results
13977 bigstack_double_reset(bigstack_mark, bigstack_end_mark);
13978 if (bigstack_alloc_c(missing_variant_ct * max_missing_id_len, &sorted_missing_variant_ids) ||
13979 bigstack_alloc_d(missing_variant_ct, &sorted_pvals)) {
13980 goto clump_reports_ret_NOMEM;
13981 }
13982 for (ulii = 0; ulii < missing_variant_ct; ulii++) {
13983 cm_ptr = not_found_list;
13984 strcpy(&(sorted_missing_variant_ids[ulii * max_missing_id_len]), cm_ptr->idstr);
13985 sorted_pvals[ulii] = cm_ptr->pval;
13986 not_found_list = not_found_list->next;
13987 free(cm_ptr);
13988 }
13989 if (qsort_ext(sorted_missing_variant_ids, missing_variant_ct, max_missing_id_len, strcmp_deref, (char*)sorted_pvals, sizeof(double))) {
13990 goto clump_reports_ret_NOMEM;
13991 }
13992 bufptr = sorted_missing_variant_ids;
13993 uii = strlen(sorted_missing_variant_ids);
13994 for (ulii = 1; ulii < missing_variant_ct; ulii++) {
13995 bufptr2 = &(bufptr[max_missing_id_len]);
13996 ujj = strlen(bufptr2);
13997 if ((uii == ujj) && (!memcmp(bufptr, bufptr2, uii))) {
13998 uljj = ulii - 1; // write index
13999 pval = sorted_pvals[uljj];
14000 if (pval > sorted_pvals[ulii]) {
14001 pval = sorted_pvals[ulii];
14002 }
14003 while (++ulii < missing_variant_ct) {
14004 bufptr2 = &(bufptr2[max_missing_id_len]);
14005 ujj = strlen(bufptr2);
14006 if ((uii == ujj) && (!memcmp(bufptr, bufptr2, uii))) {
14007 if (pval > sorted_pvals[ulii]) {
14008 pval = sorted_pvals[ulii];
14009 }
14010 } else {
14011 sorted_pvals[uljj++] = pval;
14012 bufptr = &(bufptr[max_missing_id_len]);
14013 memcpy(bufptr, bufptr2, ujj + 1);
14014 pval = sorted_pvals[ulii];
14015 uii = ujj;
14016 }
14017 }
14018 sorted_pvals[uljj] = pval;
14019 ulii = uljj + 1; // save final array length
14020 break;
14021 }
14022 bufptr = bufptr2;
14023 uii = ujj;
14024 }
14025 missing_variant_ct = ulii;
14026 if (qsort_ext((char*)sorted_pvals, missing_variant_ct, sizeof(double), double_cmp_deref, sorted_missing_variant_ids, max_missing_id_len)) {
14027 goto clump_reports_ret_NOMEM;
14028 }
14029 if (clump_verbose) {
14030 for (ulii = 0; ulii < missing_variant_ct; ulii++) {
14031 fputs(&(sorted_missing_variant_ids[ulii * max_missing_id_len]), outfile);
14032 fputs(" not found in dataset\n", outfile);
14033 }
14034 LOGPRINTF("%" PRIuPTR " top variant ID%s missing; see the end of the .clumped file.\n", missing_variant_ct, (missing_variant_ct == 1)? "" : "s");
14035 } else {
14036 uljj = MINV(missing_variant_ct, 3);
14037 for (ulii = 0; ulii < uljj; ulii++) {
14038 LOGERRPRINTFWW("Warning: '%s' is missing from the main dataset, and is a top variant.\n", &(sorted_missing_variant_ids[ulii * max_missing_id_len]));
14039 }
14040 if (missing_variant_ct > 3) {
14041 fprintf(stderr, "%" PRIuPTR " more top variant ID%s missing; see log file.\n", missing_variant_ct - 3, (missing_variant_ct == 4)? "" : "s");
14042 for (ulii = 3; ulii < missing_variant_ct; ulii++) {
14043 LOGPREPRINTFWW("Warning: '%s' is missing from the main dataset, and is a top variant.\n", &(sorted_missing_variant_ids[ulii * max_missing_id_len]));
14044 logstr(g_logbuf);
14045 }
14046 }
14047 }
14048 }
14049 putc_unlocked('\n', outfile);
14050 if (fclose_null(&outfile)) {
14051 goto clump_reports_ret_WRITE_FAIL;
14052 }
14053 outname_end[8] = '\0';
14054 LOGPRINTF("--clump: %u clump%s formed from %u top variant%s.\n", final_clump_ct, (final_clump_ct == 1)? "" : "s", index_ct, (index_ct == 1)? "" : "s");
14055 LOGPRINTFWW("Results written to %s .\n", outname);
14056 if (rg_setdefs && (!clump_verbose)) {
14057 memcpy(&(outname_end[8]), ".ranges", 8);
14058 LOGPRINTFWW("--clump-range: Clump/region overlaps reported in %s .\n", outname);
14059 }
14060 if (clump_best) {
14061 memcpy(&(outname_end[8]), ".best", 6);
14062 LOGPRINTFWW("--clump-best: Best proxies written to %s .\n", outname);
14063 }
14064 while (0) {
14065 clump_reports_ret_NOMEM:
14066 retval = RET_NOMEM;
14067 break;
14068 clump_reports_ret_OPEN_FAIL:
14069 retval = RET_OPEN_FAIL;
14070 break;
14071 clump_reports_ret_READ_FAIL:
14072 retval = RET_READ_FAIL;
14073 break;
14074 clump_reports_ret_WRITE_FAIL:
14075 retval = RET_WRITE_FAIL;
14076 break;
14077 clump_reports_ret_INVALID_CMDLINE:
14078 retval = RET_INVALID_CMDLINE;
14079 break;
14080 clump_reports_ret_DUPLICATE_HEADER_COL:
14081 *bufptr2 = '\0';
14082 LOGPREPRINTFWW("Error: Duplicate column header '%s' in %s.\n", bufptr, fname_ptr);
14083 clump_reports_ret_INVALID_FORMAT_2:
14084 logerrprintb();
14085 retval = RET_INVALID_FORMAT;
14086 break;
14087 }
14088 clump_reports_ret_1:
14089 bigstack_double_reset(bigstack_mark, bigstack_end_mark);
14090 gzclose_cond(gz_infile);
14091 fclose_cond(outfile);
14092 fclose_cond(outfile_ranges);
14093 fclose_cond(outfile_best);
14094 while (not_found_list) {
14095 cm_ptr = not_found_list;
14096 not_found_list = not_found_list->next;
14097 free(cm_ptr);
14098 }
14099 return retval;
14100 }
14101