1 // This file is part of PLINK 2.00, copyright (C) 2005-2020 Shaun Purcell,
2 // Christopher Chang.
3 //
4 // This program is free software: you can redistribute it and/or modify it
5 // under the terms of the GNU General Public License as published by the Free
6 // Software Foundation, either version 3 of the License, or (at your option)
7 // any later version.
8 //
9 // This program is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 // more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17
18 #include "include/pgenlib_write.h"
19 #include "plink2_compress_stream.h"
20 #include "plink2_data.h"
21 #include "plink2_pvar.h"
22
23 #include <time.h>
24
25 #ifdef __cplusplus
26 namespace plink2 {
27 #endif
28
29 PglErr WriteMapOrBim(const char* outname, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const double* variant_cms, uint32_t variant_ct, uint32_t max_allele_slen, char delim, uint32_t output_zst, uint32_t thread_ct) {
30 // - Normally generates a .bim file. Set max_allele_slen to zero to generate
31 // a .map.
32 // - allele_presents must be nullptr unless we're trimming alt alleles.
33 // - Errors out when writing .bim if any remaining variant is multiallelic
34 // and refalt1_select is nullptr.
35 // - Multiallelic-split case is handled by WriteBimSplit().
36 unsigned char* bigstack_mark = g_bigstack_base;
37 char* cswritep = nullptr;
38 CompressStreamState css;
39 PglErr reterr = kPglRetSuccess;
40 PreinitCstream(&css);
41 {
42 const uint32_t max_chr_blen = GetMaxChrSlen(cip) + 1;
43 // includes trailing tab
44 char* chr_buf;
45 if (unlikely(bigstack_alloc_c(max_chr_blen, &chr_buf))) {
46 goto WriteMapOrBim_ret_NOMEM;
47 }
48 const uintptr_t overflow_buf_size = kCompressStreamBlock + kMaxIdSlen + 512 + 2 * max_allele_slen;
49 reterr = InitCstreamAlloc(outname, 0, output_zst, thread_ct, overflow_buf_size, &css, &cswritep);
50 if (unlikely(reterr)) {
51 goto WriteMapOrBim_ret_1;
52 }
53
54 const char output_missing_geno_char = *g_output_missing_geno_ptr;
55 uintptr_t variant_uidx_base = 0;
56 uintptr_t cur_bits = variant_include[0];
57 uint32_t chr_fo_idx = UINT32_MAX;
58 uint32_t chr_end = 0;
59 uint32_t chr_buf_blen = 0;
60 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
61 const uint32_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
62 if (variant_uidx >= chr_end) {
63 do {
64 ++chr_fo_idx;
65 chr_end = cip->chr_fo_vidx_start[chr_fo_idx + 1];
66 } while (variant_uidx >= chr_end);
67 char* chr_name_end = chrtoa(cip, cip->chr_file_order[chr_fo_idx], chr_buf);
68 *chr_name_end = delim;
69 chr_buf_blen = 1 + S_CAST(uintptr_t, chr_name_end - chr_buf);
70 }
71 cswritep = memcpya(cswritep, chr_buf, chr_buf_blen);
72 cswritep = strcpyax(cswritep, variant_ids[variant_uidx], delim);
73 if (!variant_cms) {
74 *cswritep++ = '0';
75 } else {
76 cswritep = dtoa_g_p8(variant_cms[variant_uidx], cswritep);
77 }
78 *cswritep++ = delim;
79 cswritep = u32toa(variant_bps[variant_uidx], cswritep);
80 if (max_allele_slen) {
81 *cswritep++ = delim;
82 uintptr_t allele_idx_offset_base = variant_uidx * 2;
83 if (allele_idx_offsets) {
84 allele_idx_offset_base = allele_idx_offsets[variant_uidx];
85 if (!refalt1_select) {
86 const uintptr_t allele_idx_offset_end = allele_idx_offsets[variant_uidx + 1];
87 if (allele_idx_offset_end != allele_idx_offset_base + 2) {
88 // not actually unlikely at this point, but simplest to stay
89 // consistent
90 if (unlikely((!allele_presents) || (!AllBitsAreZero(allele_presents, 2 + allele_idx_offset_base, allele_idx_offset_end)))) {
91 logputs("\n");
92 logerrprintfww("Error: %s cannot contain multiallelic variants.\n", outname);
93 goto WriteMapOrBim_ret_INCONSISTENT_INPUT;
94 }
95 }
96 }
97 }
98 const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
99 // note that VCF ref allele corresponds to A2, not A1
100 if (!refalt1_select) {
101 if ((!allele_presents) || IsSet(allele_presents, 1 + allele_idx_offset_base)) {
102 cswritep = strcpya(cswritep, cur_alleles[1]);
103 } else {
104 *cswritep++ = output_missing_geno_char;
105 }
106 *cswritep++ = delim;
107 cswritep = strcpya(cswritep, cur_alleles[0]);
108 } else {
109 STD_ARRAY_KREF(AlleleCode, 2) cur_refalt1_select = refalt1_select[variant_uidx];
110 if ((!allele_presents) || IsSet(allele_presents, cur_refalt1_select[1] + allele_idx_offset_base)) {
111 cswritep = strcpya(cswritep, cur_alleles[cur_refalt1_select[1]]);
112 } else {
113 *cswritep++ = output_missing_geno_char;
114 }
115 *cswritep++ = delim;
116 cswritep = strcpya(cswritep, cur_alleles[cur_refalt1_select[0]]);
117 }
118 }
119 AppendBinaryEoln(&cswritep);
120 if (unlikely(Cswrite(&css, &cswritep))) {
121 goto WriteMapOrBim_ret_WRITE_FAIL;
122 }
123 }
124 if (unlikely(CswriteCloseNull(&css, cswritep))) {
125 goto WriteMapOrBim_ret_WRITE_FAIL;
126 }
127 }
128 while (0) {
129 WriteMapOrBim_ret_NOMEM:
130 reterr = kPglRetNomem;
131 break;
132 WriteMapOrBim_ret_WRITE_FAIL:
133 reterr = kPglRetWriteFail;
134 break;
135 WriteMapOrBim_ret_INCONSISTENT_INPUT:
136 reterr = kPglRetInconsistentInput;
137 break;
138 }
139 WriteMapOrBim_ret_1:
140 CswriteCloseCond(&css, cswritep);
141 BigstackReset(bigstack_mark);
142 return reterr;
143 }
144
PvarInfoReloadHeader(TextStream * pvar_reload_txsp,char ** line_iterp,uint32_t * info_col_idx_ptr)145 PglErr PvarInfoReloadHeader(TextStream* pvar_reload_txsp, char** line_iterp, uint32_t* info_col_idx_ptr) {
146 char* line_iter;
147 do {
148 PglErr reterr = TextNextLineLstrip(pvar_reload_txsp, &line_iter);
149 if (unlikely(reterr)) {
150 return reterr;
151 }
152 } while (!StrStartsWithUnsafe(line_iter, "#CHROM"));
153 uint32_t info_col_idx = 0;
154 do {
155 line_iter = NextToken(line_iter);
156 ++info_col_idx;
157 } while (!tokequal_k(line_iter, "INFO"));
158 *line_iterp = line_iter;
159 *info_col_idx_ptr = info_col_idx;
160 return kPglRetSuccess;
161 }
162
163 // May use all remaining workspace memory.
PvarInfoOpenAndReloadHeader(const char * pvar_info_reload,uint32_t calc_thread_ct,TextStream * pvar_reload_txsp,char ** line_iterp,uint32_t * info_col_idx_ptr)164 PglErr PvarInfoOpenAndReloadHeader(const char* pvar_info_reload, uint32_t calc_thread_ct, TextStream* pvar_reload_txsp, char** line_iterp, uint32_t* info_col_idx_ptr) {
165 PglErr reterr = SizeAndInitTextStream(pvar_info_reload, bigstack_left(), calc_thread_ct, pvar_reload_txsp);
166 if (unlikely(reterr)) {
167 return reterr;
168 }
169 return PvarInfoReloadHeader(pvar_reload_txsp, line_iterp, info_col_idx_ptr);
170 }
171
PvarInfoWrite(uint32_t info_pr_flag_present,uint32_t is_pr,char * info_token,char ** write_iter_ptr)172 void PvarInfoWrite(uint32_t info_pr_flag_present, uint32_t is_pr, char* info_token, char** write_iter_ptr) {
173 char* info_token_end = CurTokenEnd(info_token);
174 uint32_t info_token_slen = info_token_end - info_token;
175 char* info_token_pr = nullptr;
176 if (info_pr_flag_present) {
177 info_token_pr = PrInInfoToken(info_token_slen, info_token);
178 }
179 char* write_iter = *write_iter_ptr;
180 if (is_pr || (!info_token_pr)) {
181 write_iter = memcpya(write_iter, info_token, info_token_slen);
182 if (is_pr && (!info_token_pr)) {
183 if ((info_token_slen == 1) && (info_token[0] == '.')) {
184 write_iter[-1] = 'P';
185 *write_iter++ = 'R';
186 } else {
187 write_iter = strcpya_k(write_iter, ";PR");
188 }
189 }
190 } else {
191 // possible with --real-ref-alleles/--ref-from-fa
192 if (info_token_pr == info_token) {
193 if (info_token_slen == 2) {
194 *write_iter++ = '.';
195 } else {
196 write_iter = memcpya(write_iter, &(info_token[3]), info_token_slen - 3);
197 }
198 } else {
199 write_iter = memcpya(write_iter, info_token, S_CAST(uintptr_t, info_token_pr - info_token) - 1);
200 const char* pr_end = &(info_token_pr[2]);
201 write_iter = memcpya(write_iter, pr_end, info_token_end - pr_end);
202 }
203 }
204 *write_iter_ptr = write_iter;
205 }
206
PvarInfoReload(uint32_t info_col_idx,uint32_t variant_uidx,TextStream * pvar_reload_txsp,char ** line_iterp,uint32_t * trs_variant_uidx_ptr)207 PglErr PvarInfoReload(uint32_t info_col_idx, uint32_t variant_uidx, TextStream* pvar_reload_txsp, char** line_iterp, uint32_t* trs_variant_uidx_ptr) {
208 uint32_t trs_variant_uidx = *trs_variant_uidx_ptr;
209 char* line_iter = AdvPastDelim(*line_iterp, '\n');
210 if (trs_variant_uidx < variant_uidx) {
211 TextSetPos(line_iter, pvar_reload_txsp);
212 PglErr reterr = TextSkipNz(variant_uidx - trs_variant_uidx, pvar_reload_txsp);
213 if (unlikely(reterr)) {
214 return reterr;
215 }
216 line_iter = TextLineEnd(pvar_reload_txsp);
217 trs_variant_uidx = variant_uidx;
218 }
219 PglErr reterr = TextNextLineLstripUnsafe(pvar_reload_txsp, &line_iter);
220 if (unlikely(reterr)) {
221 return reterr;
222 }
223 *line_iterp = NextTokenMultFar(line_iter, info_col_idx);
224
225 // index *after* just-loaded line.
226 *trs_variant_uidx_ptr = trs_variant_uidx + 1;
227 return kPglRetSuccess;
228 }
229
PvarInfoReloadAndWrite(uint32_t info_pr_flag_present,uint32_t info_col_idx,uint32_t variant_uidx,uint32_t is_pr,TextStream * pvar_reload_txsp,char ** line_iterp,char ** write_iter_ptr,uint32_t * trs_variant_uidx_ptr)230 PglErr PvarInfoReloadAndWrite(uint32_t info_pr_flag_present, uint32_t info_col_idx, uint32_t variant_uidx, uint32_t is_pr, TextStream* pvar_reload_txsp, char** line_iterp, char** write_iter_ptr, uint32_t* trs_variant_uidx_ptr) {
231 PglErr reterr = PvarInfoReload(info_col_idx, variant_uidx, pvar_reload_txsp, line_iterp, trs_variant_uidx_ptr);
232 if (unlikely(reterr)) {
233 return reterr;
234 }
235 PvarInfoWrite(info_pr_flag_present, is_pr, *line_iterp, write_iter_ptr);
236 return kPglRetSuccess;
237 }
238
AppendChrsetLine(const ChrInfo * cip,char ** write_iter_ptr)239 void AppendChrsetLine(const ChrInfo* cip, char** write_iter_ptr) {
240 char* write_iter = strcpya_k(*write_iter_ptr, "##chrSet=<");
241 if (!(cip->haploid_mask[0] & 1)) {
242 write_iter = strcpya_k(write_iter, "autosomePairCt=");
243 write_iter = u32toa(cip->autosome_ct, write_iter);
244 if (!IsI32Neg(cip->xymt_codes[kChrOffsetX])) {
245 write_iter = strcpya_k(write_iter, ",X");
246 }
247 if (!IsI32Neg(cip->xymt_codes[kChrOffsetY])) {
248 write_iter = strcpya_k(write_iter, ",Y");
249 }
250 if (!IsI32Neg(cip->xymt_codes[kChrOffsetXY])) {
251 write_iter = strcpya_k(write_iter, ",XY");
252 }
253 if (!IsI32Neg(cip->xymt_codes[kChrOffsetMT])) {
254 write_iter = strcpya_k(write_iter, ",M");
255 }
256 if (!IsI32Neg(cip->xymt_codes[kChrOffsetPAR1])) {
257 write_iter = strcpya_k(write_iter, ",PAR1");
258 }
259 if (!IsI32Neg(cip->xymt_codes[kChrOffsetPAR2])) {
260 write_iter = strcpya_k(write_iter, ",PAR2");
261 }
262 } else {
263 write_iter = strcpya_k(write_iter, "haploidAutosomeCt=");
264 write_iter = u32toa(cip->autosome_ct, write_iter);
265 }
266 *write_iter++ = '>';
267 *write_iter_ptr = write_iter;
268 AppendBinaryEoln(write_iter_ptr);
269 }
270
271 // fileformat, fileDate, source
AppendVcfHeaderStart(uint32_t v43,char ** cswritepp)272 void AppendVcfHeaderStart(uint32_t v43, char** cswritepp) {
273 char* cswritep = *cswritepp;
274 cswritep = strcpya_k(cswritep, "##fileformat=VCFv4.");
275 *cswritep++ = v43 + '2';
276 cswritep = strcpya_k(cswritep, EOLN_STR "##fileDate=");
277 time_t rawtime;
278 time(&rawtime);
279 const struct tm* loctime = localtime(&rawtime);
280 cswritep += strftime(cswritep, kMaxMediumLine, "%Y%m%d", loctime);
281 cswritep = strcpya_k(cswritep, EOLN_STR "##source=PLINKv2.00" EOLN_STR);
282 *cswritepp = cswritep;
283 return;
284 }
285
286 // Note that the order-of-operations page lists this as happening right after
287 // the filtering performed by LoadPvar(). Which is effectively true, since we
288 // ignore variant_include (this is safe since LoadPvar() always initializes
289 // all variant_bps[] and allele_storage[] entries appropriately).
290 // possible todo: ChrInfo can have a length field, which is initialized by the
291 // ##contig header line when possible, but when that doesn't exist LoadPvar()
292 // can conditionally detect INFO:END and take that into account. (Or a reason
293 // to keep the entire info_end array in memory may emerge.)
ChrLenLbound(const ChrInfo * cip,const uint32_t * variant_bps,const uintptr_t * allele_idx_offsets,const char * const * allele_storage,const uint32_t * new_variant_idx_to_old,uint32_t chr_fo_idx,uint32_t max_allele_slen,UnsortedVar vpos_sortstatus)294 uint32_t ChrLenLbound(const ChrInfo* cip, const uint32_t* variant_bps, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uint32_t* new_variant_idx_to_old, uint32_t chr_fo_idx, uint32_t max_allele_slen, UnsortedVar vpos_sortstatus) {
295 const uint32_t vidx_start = cip->chr_fo_vidx_start[chr_fo_idx];
296 const uint32_t vidx_end = cip->chr_fo_vidx_start[chr_fo_idx + 1];
297 assert(vidx_start != vidx_end);
298 if (!(vpos_sortstatus & kfUnsortedVarBp)) {
299 if (!new_variant_idx_to_old) {
300 if (max_allele_slen == 1) {
301 return variant_bps[vidx_end - 1];
302 }
303 uint32_t bp_end = 0;
304 for (uint32_t vidx = vidx_end; vidx != vidx_start; ) {
305 --vidx;
306 const uint32_t cur_bp = variant_bps[vidx];
307 if (cur_bp + max_allele_slen <= bp_end) {
308 break;
309 }
310 uintptr_t allele_idx_offset_base = vidx * 2;
311 if (allele_idx_offsets) {
312 allele_idx_offset_base = allele_idx_offsets[vidx];
313 }
314 // We only care about reference-allele length.
315 const uint32_t cur_bp_end = cur_bp + strlen(allele_storage[allele_idx_offset_base]) - 1;
316 if (cur_bp_end > bp_end) {
317 bp_end = cur_bp_end;
318 }
319 }
320 return bp_end;
321 }
322 if (max_allele_slen == 1) {
323 return variant_bps[new_variant_idx_to_old[vidx_end - 1]];
324 }
325 uint32_t bp_end = 0;
326 for (uint32_t new_vidx = vidx_end; new_vidx != vidx_start; ) {
327 --new_vidx;
328 const uint32_t old_vidx = new_variant_idx_to_old[new_vidx];
329 const uint32_t cur_bp = variant_bps[old_vidx];
330 if (cur_bp + max_allele_slen <= bp_end) {
331 break;
332 }
333 uintptr_t allele_idx_offset_base = old_vidx * 2;
334 if (allele_idx_offsets) {
335 allele_idx_offset_base = allele_idx_offsets[old_vidx];
336 }
337 const uint32_t cur_bp_end = cur_bp + strlen(allele_storage[allele_idx_offset_base]) - 1;
338 if (cur_bp_end > bp_end) {
339 bp_end = cur_bp_end;
340 }
341 }
342 return bp_end;
343 }
344 uint32_t bp_end = U32ArrMax(&(variant_bps[vidx_start]), vidx_end - vidx_start);
345 if (max_allele_slen == 1) {
346 return bp_end;
347 }
348 uint32_t min_check_bp = 0;
349 if (bp_end >= max_allele_slen) {
350 min_check_bp = bp_end + 1 - max_allele_slen;
351 }
352 for (uint32_t vidx = vidx_start; vidx != vidx_end; ++vidx) {
353 const uint32_t cur_bp = variant_bps[vidx];
354 if (cur_bp < min_check_bp) {
355 continue;
356 }
357 uintptr_t allele_idx_offset_base = vidx * 2;
358 if (allele_idx_offsets) {
359 allele_idx_offset_base = allele_idx_offsets[vidx];
360 }
361 const uint32_t cur_bp_end = cur_bp + strlen(allele_storage[allele_idx_offset_base]) - 1;
362 if (cur_bp_end > bp_end) {
363 bp_end = cur_bp_end;
364 }
365 }
366 return bp_end;
367 }
368
PvarXheaderWrite(const uintptr_t * variant_include,const ChrInfo * cip,const uint32_t * variant_bps,const uintptr_t * allele_idx_offsets,const char * const * allele_storage,const uint32_t * new_variant_idx_to_old,uintptr_t xheader_blen,uint32_t vcfheader,uint32_t write_filter,uint32_t write_info,uint32_t append_info_pr_header_line,uint32_t max_allele_slen,UnsortedVar vpos_sortstatus,char * xheader,CompressStreamState * css_ptr,char ** cswritepp)369 PglErr PvarXheaderWrite(const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uint32_t* new_variant_idx_to_old, uintptr_t xheader_blen, uint32_t vcfheader, uint32_t write_filter, uint32_t write_info, uint32_t append_info_pr_header_line, uint32_t max_allele_slen, UnsortedVar vpos_sortstatus, char* xheader, CompressStreamState* css_ptr, char** cswritepp) {
370 unsigned char* bigstack_mark = g_bigstack_base;
371 PglErr reterr = kPglRetSuccess;
372 {
373 if (!vcfheader) {
374 if (write_filter && write_info) {
375 if (unlikely(CsputsStd(xheader, xheader_blen, css_ptr, cswritepp))) {
376 goto PvarXheaderWrite_ret_WRITE_FAIL;
377 }
378 } else {
379 // Filter out FILTER/INFO definitions iff the corresponding column has
380 // been removed.
381 const char* copy_start = xheader;
382 const char* xheader_end = &(xheader[xheader_blen]);
383 for (const char* xheader_iter = xheader; xheader_iter != xheader_end; ) {
384 const char* next_line_start = AdvPastDelim(xheader_iter, '\n');
385 if (((!write_filter) && StrStartsWithUnsafe(xheader_iter, "##FILTER=<ID=")) ||
386 ((!write_info) && StrStartsWithUnsafe(xheader_iter, "##INFO=<ID="))) {
387 if (copy_start != xheader_iter) {
388 if (unlikely(CsputsStd(copy_start, xheader_iter - copy_start, css_ptr, cswritepp))) {
389 goto PvarXheaderWrite_ret_WRITE_FAIL;
390 }
391 }
392 copy_start = next_line_start;
393 }
394 xheader_iter = next_line_start;
395 }
396 if (copy_start != xheader_end) {
397 if (unlikely(CsputsStd(copy_start, xheader_end - copy_start, css_ptr, cswritepp))) {
398 goto PvarXheaderWrite_ret_WRITE_FAIL;
399 }
400 }
401 }
402 } else {
403 // See the start of ExportVcf().
404 AppendVcfHeaderStart(1, cswritepp);
405 const uint32_t chr_ctl = BitCtToWordCt(cip->chr_ct);
406 uintptr_t* written_contig_header_lines;
407 if (unlikely(bigstack_calloc_w(chr_ctl, &written_contig_header_lines))) {
408 goto PvarXheaderWrite_ret_NOMEM;
409 }
410 uint32_t contig_zero_written = 0;
411 char* cswritep = *cswritepp;
412 // ExportVcf() has to perform a customized --merge-par operation, so it
413 // has special handling of chrX/PAR1/PAR2 ##contig header lines. We omit
414 // that here.
415 char* xheader_end = &(xheader[xheader_blen]);
416 for (char* line_end = xheader; line_end != xheader_end; ) {
417 char* line_start = line_end;
418 line_end = AdvPastDelim(line_start, '\n');
419 const uint32_t slen = line_end - line_start;
420 if ((slen > 14) && StrStartsWithUnsafe(line_start, "##contig=<ID=")) {
421 char* contig_name_start = &(line_start[13]);
422 char* contig_name_end = S_CAST(char*, memchr(contig_name_start, ',', slen - 14));
423 if (!contig_name_end) {
424 // if this line is technically well-formed (ends in '>'), it's
425 // useless anyway, throw it out
426 continue;
427 }
428 const uint32_t chr_idx = GetChrCodeCounted(cip, contig_name_end - contig_name_start, contig_name_start);
429 if (IsI32Neg(chr_idx) || (!IsSet(cip->chr_mask, chr_idx))) {
430 continue;
431 }
432 const uint32_t chr_fo_idx = cip->chr_idx_to_foidx[chr_idx];
433 if (unlikely(IsSet(written_contig_header_lines, chr_fo_idx))) {
434 logerrputs("Error: Duplicate ##contig line in .pvar file.\n");
435 goto PvarXheaderWrite_ret_MALFORMED_INPUT;
436 }
437 SetBit(chr_fo_idx, written_contig_header_lines);
438 // if --output-chr was used at some point, we need to sync the
439 // ##contig chromosome code with the code in the .pvar body.
440 char* chr_name_write_start = strcpya_k(cswritep, "##contig=<ID=");
441 char* chr_name_write_end = chrtoa(cip, chr_idx, chr_name_write_start);
442 if ((*chr_name_write_start == '0') && (chr_name_write_end == &(chr_name_write_start[1]))) {
443 // --allow-extra-chr 0 special case
444 // note that cswritep has *not* been advanced
445 contig_zero_written = 1; // technically we write this a bit later
446 continue;
447 }
448 cswritep = chr_name_write_end;
449 if (unlikely(Cswrite(css_ptr, &cswritep))) {
450 goto PvarXheaderWrite_ret_WRITE_FAIL;
451 }
452 if (unlikely(CsputsStd(contig_name_end, line_end - contig_name_end, css_ptr, &cswritep))) {
453 goto PvarXheaderWrite_ret_WRITE_FAIL;
454 }
455 } else {
456 if (!write_filter) {
457 if (StrStartsWithUnsafe(line_start, "##FILTER=<ID=")) {
458 continue;
459 }
460 }
461 if (!write_info) {
462 if (StrStartsWithUnsafe(line_start, "##INFO=<ID=")) {
463 continue;
464 }
465 }
466 if (unlikely(CsputsStd(line_start, slen, css_ptr, &cswritep))) {
467 goto PvarXheaderWrite_ret_WRITE_FAIL;
468 }
469 }
470 }
471 // fill in the missing ##contig lines
472 if (contig_zero_written) {
473 cswritep = strcpya_k(cswritep, "##contig=<ID=0,length=2147483645>" EOLN_STR);
474 }
475 for (uint32_t chr_fo_idx = 0; chr_fo_idx != cip->chr_ct; ++chr_fo_idx) {
476 if (IsSet(written_contig_header_lines, chr_fo_idx)) {
477 continue;
478 }
479 const uint32_t chr_idx = cip->chr_file_order[chr_fo_idx];
480 // AllBitsAreZero() doesn't do what we want in the --sort-vars case,
481 // but fortunately we don't need it there.
482 if ((!IsSet(cip->chr_mask, chr_idx)) || (variant_include && AllBitsAreZero(variant_include, cip->chr_fo_vidx_start[chr_fo_idx], cip->chr_fo_vidx_start[chr_fo_idx + 1]))) {
483 continue;
484 }
485 char* chr_name_write_start = strcpya_k(cswritep, "##contig=<ID=");
486 char* chr_name_write_end = chrtoa(cip, chr_idx, chr_name_write_start);
487 if ((*chr_name_write_start == '0') && (chr_name_write_end == &(chr_name_write_start[1]))) {
488 // --allow-extra-chr 0 special case
489 if (contig_zero_written) {
490 continue;
491 }
492 contig_zero_written = 1;
493 cswritep = strcpya_k(chr_name_write_end, ",length=2147483645");
494 } else {
495 cswritep = strcpya_k(chr_name_write_end, ",length=");
496 const uint32_t pos_end = ChrLenLbound(cip, variant_bps, allele_idx_offsets, allele_storage, new_variant_idx_to_old, chr_fo_idx, max_allele_slen, vpos_sortstatus);
497 cswritep = u32toa(pos_end, cswritep);
498 }
499 *cswritep++ = '>';
500 AppendBinaryEoln(&cswritep);
501 if (unlikely(Cswrite(css_ptr, &cswritep))) {
502 goto PvarXheaderWrite_ret_WRITE_FAIL;
503 }
504 }
505 *cswritepp = cswritep;
506 }
507 if (append_info_pr_header_line) {
508 *cswritepp = strcpya_k(*cswritepp, "##INFO=<ID=PR,Number=0,Type=Flag,Description=\"Provisional reference allele, may not be based on real reference genome\">" EOLN_STR);
509 }
510 }
511 while (0) {
512 PvarXheaderWrite_ret_NOMEM:
513 reterr = kPglRetNomem;
514 break;
515 PvarXheaderWrite_ret_WRITE_FAIL:
516 reterr = kPglRetWriteFail;
517 break;
518 PvarXheaderWrite_ret_MALFORMED_INPUT:
519 reterr = kPglRetMalformedInput;
520 break;
521 }
522 BigstackReset(bigstack_mark);
523 return reterr;
524 }
525
526 PglErr WritePvar(const char* outname, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* qual_present, const float* quals, const uintptr_t* filter_present, const uintptr_t* filter_npass, const char* const* filter_storage, const uintptr_t* nonref_flags, const char* pvar_info_reload, const double* variant_cms, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_slen, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t nonref_flags_storage, uint32_t max_filter_slen, uint32_t info_reload_slen, UnsortedVar vpos_sortstatus, PvarPsamFlags pvar_psam_flags, uint32_t thread_ct, char* xheader) {
527 // allele_presents must be nullptr unless we're trimming alt alleles
528 // split/join cases handled by WritePvarSplit() and WritePvarJoin()
529 unsigned char* bigstack_mark = g_bigstack_base;
530 char* cswritep = nullptr;
531 PglErr reterr = kPglRetSuccess;
532 CompressStreamState css;
533 TextStream pvar_reload_txs;
534 PreinitCstream(&css);
535 PreinitTextStream(&pvar_reload_txs);
536 {
537 const uint32_t max_chr_blen = GetMaxChrSlen(cip) + 1;
538 // includes trailing tab
539 char* chr_buf;
540
541 if (unlikely(bigstack_alloc_c(max_chr_blen, &chr_buf))) {
542 goto WritePvar_ret_NOMEM;
543 }
544 uintptr_t overflow_buf_size = kCompressStreamBlock + kMaxIdSlen + 512 + 2 * max_allele_slen + max_filter_slen + info_reload_slen;
545 if (overflow_buf_size < 2 * kCompressStreamBlock) {
546 overflow_buf_size = 2 * kCompressStreamBlock;
547 }
548 const uint32_t output_zst = (pvar_psam_flags / kfPvarZs) & 1;
549 reterr = InitCstreamAlloc(outname, 0, output_zst, thread_ct, overflow_buf_size, &css, &cswritep);
550 if (unlikely(reterr)) {
551 goto WritePvar_ret_1;
552 }
553 const uint32_t raw_variant_ctl = BitCtToWordCt(raw_variant_ct);
554 const uint32_t all_nonref = (nonref_flags_storage == 2);
555 uint32_t write_info_pr = all_nonref;
556 uint32_t write_info = (pvar_psam_flags & kfPvarColInfo) || pvar_info_reload;
557 if (write_info && nonref_flags) {
558 write_info_pr = !IntersectionIsEmpty(variant_include, nonref_flags, raw_variant_ctl);
559 }
560 write_info_pr = write_info_pr && write_info;
561 if (unlikely(write_info_pr && (info_flags & kfInfoPrNonflagPresent))) {
562 logputs("\n");
563 logerrputs("Error: Conflicting INFO:PR definitions. Either fix all REF alleles so that the\n'provisional reference' flag is no longer needed, or remove/rename the other\nuse of the INFO:PR key.\n");
564 goto WritePvar_ret_INCONSISTENT_INPUT;
565 }
566
567 uint32_t write_filter = 0;
568 if (pvar_psam_flags & kfPvarColFilter) {
569 write_filter = 1;
570 } else if ((pvar_psam_flags & kfPvarColMaybefilter) && filter_present) {
571 write_filter = !IntersectionIsEmpty(variant_include, filter_present, raw_variant_ctl);
572 }
573 char* pvar_info_line_iter = nullptr;
574 uint32_t info_col_idx = 0; // could save this during first load instead
575 const uint32_t info_pr_flag_present = (info_flags / kfInfoPrFlagPresent) & 1;
576 if (pvar_psam_flags & (kfPvarColXheader | kfPvarColVcfheader)) {
577 reterr = PvarXheaderWrite(variant_include, cip, variant_bps, allele_idx_offsets, allele_storage, nullptr, xheader_blen, (pvar_psam_flags / kfPvarColVcfheader) & 1, write_filter, write_info, write_info_pr && (!info_pr_flag_present), max_allele_slen, vpos_sortstatus, xheader, &css, &cswritep);
578 if (unlikely(reterr)) {
579 goto WritePvar_ret_1;
580 }
581 }
582 // bugfix (30 Jul 2017): may be necessary to reload INFO when no ## lines
583 // are in the header... er, should we still allow this?
584 if (pvar_info_reload) {
585 reterr = PvarInfoOpenAndReloadHeader(pvar_info_reload, 1 + (thread_ct > 1), &pvar_reload_txs, &pvar_info_line_iter, &info_col_idx);
586 if (unlikely(reterr)) {
587 goto WritePvar_ret_TSTREAM_FAIL;
588 }
589 }
590 if (cip->chrset_source) {
591 AppendChrsetLine(cip, &cswritep);
592 }
593 cswritep = strcpya_k(cswritep, "#CHROM\tPOS\tID\tREF\tALT");
594
595 uint32_t write_qual = 0;
596 if (pvar_psam_flags & kfPvarColQual) {
597 write_qual = 1;
598 } else if ((pvar_psam_flags & kfPvarColMaybequal) && qual_present) {
599 write_qual = !IntersectionIsEmpty(variant_include, qual_present, raw_variant_ctl);
600 }
601 if (write_qual) {
602 cswritep = strcpya_k(cswritep, "\tQUAL");
603 }
604
605 if (write_filter) {
606 cswritep = strcpya_k(cswritep, "\tFILTER");
607 }
608
609 if (write_info) {
610 cswritep = strcpya_k(cswritep, "\tINFO");
611 }
612
613 uint32_t write_cm = 0;
614 if (pvar_psam_flags & kfPvarColCm) {
615 write_cm = 1;
616 } else if ((pvar_psam_flags & kfPvarColMaybecm) && variant_cms) {
617 if (raw_variant_ct == variant_ct) {
618 // nonzero_cm_present check was performed
619 write_cm = 1;
620 } else {
621 uintptr_t variant_uidx_base = 0;
622 uintptr_t cur_bits = variant_include[0];
623 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
624 const uintptr_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
625 if (variant_cms[variant_uidx] != 0.0) {
626 write_cm = 1;
627 break;
628 }
629 }
630 }
631 }
632 if (write_cm) {
633 cswritep = strcpya_k(cswritep, "\tCM");
634 }
635 AppendBinaryEoln(&cswritep);
636
637 const char output_missing_geno_char = *g_output_missing_geno_ptr;
638 uint32_t trs_variant_uidx = 0;
639 uintptr_t variant_uidx_base = 0;
640 uintptr_t cur_bits = variant_include[0];
641 uint32_t chr_fo_idx = UINT32_MAX;
642 uint32_t chr_end = 0;
643 uint32_t chr_buf_blen = 0;
644 uint32_t ref_allele_idx = 0;
645 uint32_t alt1_allele_idx = 1;
646 uint32_t cur_allele_ct = 2;
647 uint32_t pct = 0;
648 uint32_t next_print_variant_idx = variant_ct / 100;
649 fputs("0%", stdout);
650 fflush(stdout);
651 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
652 const uint32_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
653 if (variant_uidx >= chr_end) {
654 do {
655 ++chr_fo_idx;
656 chr_end = cip->chr_fo_vidx_start[chr_fo_idx + 1];
657 } while (variant_uidx >= chr_end);
658 char* chr_name_end = chrtoa(cip, cip->chr_file_order[chr_fo_idx], chr_buf);
659 *chr_name_end = '\t';
660 chr_buf_blen = 1 + S_CAST(uintptr_t, chr_name_end - chr_buf);
661 }
662 cswritep = memcpya(cswritep, chr_buf, chr_buf_blen);
663 cswritep = u32toa_x(variant_bps[variant_uidx], '\t', cswritep);
664 cswritep = strcpyax(cswritep, variant_ids[variant_uidx], '\t');
665 uintptr_t allele_idx_offset_base;
666 if (!allele_idx_offsets) {
667 allele_idx_offset_base = variant_uidx * 2;
668 } else {
669 allele_idx_offset_base = allele_idx_offsets[variant_uidx];
670 cur_allele_ct = allele_idx_offsets[variant_uidx + 1] - allele_idx_offset_base;
671 }
672 const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
673 if (refalt1_select) {
674 ref_allele_idx = refalt1_select[variant_uidx][0];
675 alt1_allele_idx = refalt1_select[variant_uidx][1];
676 }
677 cswritep = strcpyax(cswritep, cur_alleles[ref_allele_idx], '\t');
678 uint32_t alt_allele_written = 0;
679 if ((!allele_presents) || IsSet(allele_presents, allele_idx_offset_base + alt1_allele_idx)) {
680 cswritep = strcpya(cswritep, cur_alleles[alt1_allele_idx]);
681 alt_allele_written = 1;
682 }
683 if (unlikely(Cswrite(&css, &cswritep))) {
684 goto WritePvar_ret_WRITE_FAIL;
685 }
686 if (cur_allele_ct > 2) {
687 for (uint32_t allele_idx = 0; allele_idx != cur_allele_ct; ++allele_idx) {
688 if ((allele_idx == ref_allele_idx) || (allele_idx == alt1_allele_idx) || (allele_presents && (!IsSet(allele_presents, allele_idx_offset_base + allele_idx)))) {
689 continue;
690 }
691 if (alt_allele_written) {
692 *cswritep++ = ',';
693 }
694 alt_allele_written = 1;
695 cswritep = strcpya(cswritep, cur_alleles[allele_idx]);
696 if (unlikely(Cswrite(&css, &cswritep))) {
697 goto WritePvar_ret_WRITE_FAIL;
698 }
699 }
700 }
701 if (!alt_allele_written) {
702 *cswritep++ = output_missing_geno_char;
703 }
704
705 if (write_qual) {
706 *cswritep++ = '\t';
707 if ((!qual_present) || (!IsSet(qual_present, variant_uidx))) {
708 *cswritep++ = '.';
709 } else {
710 cswritep = ftoa_g(quals[variant_uidx], cswritep);
711 }
712 }
713
714 if (write_filter) {
715 *cswritep++ = '\t';
716 if ((!filter_present) || (!IsSet(filter_present, variant_uidx))) {
717 *cswritep++ = '.';
718 } else if (!IsSet(filter_npass, variant_uidx)) {
719 cswritep = strcpya_k(cswritep, "PASS");
720 } else {
721 cswritep = strcpya(cswritep, filter_storage[variant_uidx]);
722 }
723 }
724
725 if (write_info) {
726 *cswritep++ = '\t';
727 const uint32_t is_pr = all_nonref || (nonref_flags && IsSet(nonref_flags, variant_uidx));
728 if (pvar_info_line_iter) {
729 reterr = PvarInfoReloadAndWrite(info_pr_flag_present, info_col_idx, variant_uidx, is_pr, &pvar_reload_txs, &pvar_info_line_iter, &cswritep, &trs_variant_uidx);
730 if (unlikely(reterr)) {
731 goto WritePvar_ret_TSTREAM_FAIL;
732 }
733 } else {
734 if (is_pr) {
735 cswritep = strcpya_k(cswritep, "PR");
736 } else {
737 *cswritep++ = '.';
738 }
739 }
740 }
741
742 if (write_cm) {
743 *cswritep++ = '\t';
744 if (!variant_cms) {
745 *cswritep++ = '0';
746 } else {
747 cswritep = dtoa_g_p8(variant_cms[variant_uidx], cswritep);
748 }
749 }
750 AppendBinaryEoln(&cswritep);
751 if (variant_idx >= next_print_variant_idx) {
752 if (pct > 10) {
753 putc_unlocked('\b', stdout);
754 }
755 pct = (variant_idx * 100LLU) / variant_ct;
756 printf("\b\b%u%%", pct++);
757 fflush(stdout);
758 next_print_variant_idx = (pct * S_CAST(uint64_t, variant_ct)) / 100;
759 }
760 }
761 if (unlikely(CswriteCloseNull(&css, cswritep))) {
762 goto WritePvar_ret_WRITE_FAIL;
763 }
764 if (pct > 10) {
765 putc_unlocked('\b', stdout);
766 }
767 fputs("\b\b", stdout);
768 }
769 while (0) {
770 WritePvar_ret_NOMEM:
771 reterr = kPglRetNomem;
772 break;
773 WritePvar_ret_TSTREAM_FAIL:
774 TextStreamErrPrint(pvar_info_reload, &pvar_reload_txs);
775 break;
776 WritePvar_ret_WRITE_FAIL:
777 reterr = kPglRetWriteFail;
778 break;
779 WritePvar_ret_INCONSISTENT_INPUT:
780 reterr = kPglRetInconsistentInput;
781 break;
782 }
783 WritePvar_ret_1:
784 CswriteCloseCond(&css, cswritep);
785 CleanupTextStream2(pvar_info_reload, &pvar_reload_txs, &reterr);
786 BigstackReset(bigstack_mark);
787 return reterr;
788 }
789
WriteFam(const char * outname,const uintptr_t * sample_include,const PedigreeIdInfo * piip,const uintptr_t * sex_nm,const uintptr_t * sex_male,const PhenoCol * pheno_cols,const uint32_t * new_sample_idx_to_old,uint32_t sample_ct,uint32_t pheno_ct,char delim)790 PglErr WriteFam(const char* outname, const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const uint32_t* new_sample_idx_to_old, uint32_t sample_ct, uint32_t pheno_ct, char delim) {
791 FILE* outfile = nullptr;
792 PglErr reterr = kPglRetSuccess;
793 {
794 if (unlikely(fopen_checked(outname, FOPEN_WB, &outfile))) {
795 goto WriteFam_ret_OPEN_FAIL;
796 }
797 uintptr_t* pheno_nm = nullptr;
798 uintptr_t* pheno_cc = nullptr;
799 double* pheno_qt = nullptr;
800 // .fam files don't support categorical phenotypes
801 const uint32_t pheno_idx = FirstCcOrQtPhenoIdx(pheno_cols, pheno_ct);
802 if (pheno_idx != UINT32_MAX) {
803 const PhenoDtype type_code = pheno_cols[pheno_idx].type_code;
804 pheno_nm = pheno_cols[pheno_idx].nonmiss;
805 if (type_code == kPhenoDtypeCc) {
806 pheno_cc = pheno_cols[pheno_idx].data.cc;
807 } else {
808 pheno_qt = pheno_cols[pheno_idx].data.qt;
809 }
810 }
811 const char* legacy_output_missing_pheno = g_legacy_output_missing_pheno;
812 const uint32_t lomp_slen = strlen(legacy_output_missing_pheno);
813
814 // possible todo: warning if two sample IDs only differ in SID? (check for
815 // this if any file is being exported that can't have a SID column)
816 const char* sample_ids = piip->sii.sample_ids;
817 const char* paternal_ids = piip->parental_id_info.paternal_ids;
818 const char* maternal_ids = piip->parental_id_info.maternal_ids;
819 const uintptr_t max_sample_id_blen = piip->sii.max_sample_id_blen;
820 const uintptr_t max_paternal_id_blen = piip->parental_id_info.max_paternal_id_blen;
821 const uintptr_t max_maternal_id_blen = piip->parental_id_info.max_maternal_id_blen;
822 uintptr_t sample_uidx_base = 0;
823 uintptr_t cur_bits = sample_include[0];
824 uint32_t sample_uidx2 = 0;
825 char* write_iter = g_textbuf;
826 char* textbuf_flush = &(write_iter[kMaxMediumLine]);
827 // not really necessary to make sample_uidx increment dependent on
828 // new_sample_idx_to_old == nullptr
829 for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
830 uintptr_t sample_uidx;
831 if (!new_sample_idx_to_old) {
832 sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
833 } else {
834 do {
835 sample_uidx = new_sample_idx_to_old[sample_uidx2++];
836 } while (!IsSet(sample_include, sample_uidx));
837 }
838 const char* cur_sample_id = &(sample_ids[max_sample_id_blen * sample_uidx]);
839 if (delim == '\t') {
840 write_iter = strcpya(write_iter, cur_sample_id);
841 } else {
842 const char* fid_end = AdvToDelim(cur_sample_id, '\t');
843 write_iter = memcpyax(write_iter, cur_sample_id, fid_end - cur_sample_id, delim);
844 write_iter = strcpya(write_iter, &(fid_end[1]));
845 }
846 *write_iter++ = delim;
847 write_iter = strcpyax(write_iter, &(paternal_ids[max_paternal_id_blen * sample_uidx]), delim);
848 write_iter = strcpyax(write_iter, &(maternal_ids[max_maternal_id_blen * sample_uidx]), delim);
849 *write_iter++ = Sexchar(sex_nm, sex_male, sample_uidx);
850 *write_iter++ = delim;
851 if ((!pheno_nm) || (!IsSet(pheno_nm, sample_uidx))) {
852 write_iter = memcpya(write_iter, legacy_output_missing_pheno, lomp_slen);
853 } else if (pheno_cc) {
854 // do we want to allow user to force 0/1 output?
855 *write_iter++ = '1' + IsSet(pheno_cc, sample_uidx);
856 } else {
857 write_iter = dtoa_g(pheno_qt[sample_uidx], write_iter);
858 }
859 AppendBinaryEoln(&write_iter);
860 if (unlikely(fwrite_ck(textbuf_flush, outfile, &write_iter))) {
861 goto WriteFam_ret_WRITE_FAIL;
862 }
863 }
864 if (unlikely(fclose_flush_null(textbuf_flush, write_iter, &outfile))) {
865 goto WriteFam_ret_WRITE_FAIL;
866 }
867 }
868 while (0) {
869 WriteFam_ret_OPEN_FAIL:
870 reterr = kPglRetOpenFail;
871 break;
872 WriteFam_ret_WRITE_FAIL:
873 reterr = kPglRetWriteFail;
874 break;
875 }
876 fclose_cond(outfile);
877 return reterr;
878 }
879
DataFidColIsRequired(const uintptr_t * sample_include,const SampleIdInfo * siip,uint32_t sample_ct,uint32_t maybe_modifier)880 uint32_t DataFidColIsRequired(const uintptr_t* sample_include, const SampleIdInfo* siip, uint32_t sample_ct, uint32_t maybe_modifier) {
881 if (maybe_modifier & 2) {
882 return 1;
883 }
884 if ((!(maybe_modifier & 1)) || (!(siip->flags & kfSampleIdFidPresent))) {
885 return 0;
886 }
887 const char* sample_ids = siip->sample_ids;
888 const uintptr_t max_sample_id_blen = siip->max_sample_id_blen;
889 uintptr_t sample_uidx_base = 0;
890 uintptr_t cur_bits = sample_include[0];
891 for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
892 const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
893 if (!memequal_k(&(sample_ids[sample_uidx * max_sample_id_blen]), "0\t", 2)) {
894 return 1;
895 }
896 }
897 return 0;
898 }
899
DataSidColIsRequired(const uintptr_t * sample_include,const char * sids,uint32_t sample_ct,uint32_t max_sid_blen,uint32_t maybe_modifier)900 uint32_t DataSidColIsRequired(const uintptr_t* sample_include, const char* sids, uint32_t sample_ct, uint32_t max_sid_blen, uint32_t maybe_modifier) {
901 // note that MAYBESID and SID can both be set
902 if (maybe_modifier & 2) {
903 return 1;
904 }
905 if (sids && (maybe_modifier & 1)) {
906 uintptr_t sample_uidx_base = 0;
907 uintptr_t cur_bits = sample_include[0];
908 for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
909 const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
910 if (!memequal_k(&(sids[sample_uidx * max_sid_blen]), "0", 2)) {
911 return 1;
912 }
913 }
914 }
915 return 0;
916 }
917
DataParentalColsAreRequired(const uintptr_t * sample_include,const PedigreeIdInfo * piip,uint32_t sample_ct,uint32_t maybe_modifier)918 uint32_t DataParentalColsAreRequired(const uintptr_t* sample_include, const PedigreeIdInfo* piip, uint32_t sample_ct, uint32_t maybe_modifier) {
919 if (maybe_modifier & 2) {
920 return 1;
921 }
922 if ((!(maybe_modifier & 1)) || (!(piip->sii.flags & kfSampleIdParentsPresent))) {
923 return 0;
924 }
925 const char* paternal_ids = piip->parental_id_info.paternal_ids;
926 const char* maternal_ids = piip->parental_id_info.maternal_ids;
927 const uintptr_t max_paternal_id_blen = piip->parental_id_info.max_paternal_id_blen;
928 const uintptr_t max_maternal_id_blen = piip->parental_id_info.max_maternal_id_blen;
929 uintptr_t sample_uidx_base = 0;
930 uintptr_t cur_bits = sample_include[0];
931 for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
932 const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
933 if ((!strequal_k_unsafe(&(paternal_ids[sample_uidx * max_paternal_id_blen]), "0")) || (!strequal_k_unsafe(&(maternal_ids[sample_uidx * max_maternal_id_blen]), "0"))) {
934 return 1;
935 }
936 }
937 return 0;
938 }
939
AppendPhenoStr(const PhenoCol * pheno_col,const char * output_missing_pheno,uint32_t omp_slen,uint32_t sample_uidx,char * write_iter)940 char* AppendPhenoStr(const PhenoCol* pheno_col, const char* output_missing_pheno, uint32_t omp_slen, uint32_t sample_uidx, char* write_iter) {
941 const PhenoDtype type_code = pheno_col->type_code;
942 if (type_code <= kPhenoDtypeQt) {
943 if (!IsSet(pheno_col->nonmiss, sample_uidx)) {
944 write_iter = memcpya(write_iter, output_missing_pheno, omp_slen);
945 } else if (type_code == kPhenoDtypeCc) {
946 *write_iter++ = '1' + IsSet(pheno_col->data.cc, sample_uidx);
947 } else {
948 write_iter = dtoa_g(pheno_col->data.qt[sample_uidx], write_iter);
949 }
950 } else {
951 write_iter = strcpya(write_iter, pheno_col->category_names[pheno_col->data.cat[sample_uidx]]);
952 }
953 return write_iter;
954 }
955
WritePsam(const char * outname,const uintptr_t * sample_include,const PedigreeIdInfo * piip,const uintptr_t * sex_nm,const uintptr_t * sex_male,const PhenoCol * pheno_cols,const char * pheno_names,const uint32_t * new_sample_idx_to_old,uint32_t sample_ct,uint32_t pheno_ct,uintptr_t max_pheno_name_blen,PvarPsamFlags pvar_psam_flags)956 PglErr WritePsam(const char* outname, const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const uint32_t* new_sample_idx_to_old, uint32_t sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, PvarPsamFlags pvar_psam_flags) {
957 FILE* outfile = nullptr;
958 PglErr reterr = kPglRetSuccess;
959 {
960 if (unlikely(fopen_checked(outname, FOPEN_WB, &outfile))) {
961 goto WritePsam_ret_OPEN_FAIL;
962 }
963 const char* output_missing_pheno = g_output_missing_pheno;
964 const uint32_t omp_slen = strlen(output_missing_pheno);
965
966 char* textbuf_flush = &(g_textbuf[kMaxMediumLine]);
967
968 const char* sample_ids = piip->sii.sample_ids;
969 const char* sids = piip->sii.sids;
970 const char* paternal_ids = piip->parental_id_info.paternal_ids;
971 const char* maternal_ids = piip->parental_id_info.maternal_ids;
972 const uintptr_t max_sample_id_blen = piip->sii.max_sample_id_blen;
973 const uintptr_t max_sid_blen = piip->sii.max_sid_blen;
974 const uintptr_t max_paternal_id_blen = piip->parental_id_info.max_paternal_id_blen;
975 const uintptr_t max_maternal_id_blen = piip->parental_id_info.max_maternal_id_blen;
976 const uint32_t write_fid = DataFidColIsRequired(sample_include, &(piip->sii), sample_ct, pvar_psam_flags / kfPsamColMaybefid);
977 const uint32_t write_sid = DataSidColIsRequired(sample_include, sids, sample_ct, max_sid_blen, pvar_psam_flags / kfPsamColMaybesid);
978 const uint32_t write_parents = DataParentalColsAreRequired(sample_include, piip, sample_ct, pvar_psam_flags / kfPsamColMaybeparents);
979 const uint32_t write_sex = (pvar_psam_flags / kfPsamColSex) & 1;
980 const uint32_t write_empty_pheno = (pvar_psam_flags & kfPsamColPheno1) && (!pheno_ct);
981 const uint32_t write_phenos = (pvar_psam_flags & (kfPsamColPheno1 | kfPsamColPhenos)) && pheno_ct;
982 if (write_phenos && (!(pvar_psam_flags & kfPsamColPhenos))) {
983 pheno_ct = 1;
984 }
985 char* write_iter = g_textbuf;
986 *write_iter++ = '#';
987 if (write_fid) {
988 write_iter = strcpya_k(write_iter, "FID\t");
989 }
990 write_iter = strcpya_k(write_iter, "IID");
991 if (write_sid) {
992 write_iter = strcpya_k(write_iter, "\tSID");
993 }
994 if (write_parents) {
995 write_iter = strcpya_k(write_iter, "\tPAT\tMAT");
996 }
997 if (write_sex) {
998 write_iter = strcpya_k(write_iter, "\tSEX");
999 }
1000 if (write_phenos) {
1001 for (uint32_t pheno_idx = 0; pheno_idx != pheno_ct; ++pheno_idx) {
1002 *write_iter++ = '\t';
1003 const char* cur_pheno_name = &(pheno_names[pheno_idx * max_pheno_name_blen]);
1004 const uint32_t cur_pheno_name_slen = strlen(cur_pheno_name);
1005 if (strequal_k(cur_pheno_name, "SEX", cur_pheno_name_slen)) {
1006 if (unlikely(write_sex)) {
1007 logerrputs("Error: .psam file cannot have both a regular SEX column and a phenotype named\n'SEX'. Exclude or rename one of these columns.\n");
1008 goto WritePsam_ret_INCONSISTENT_INPUT;
1009 }
1010 // does this phenotype column conform to the SEX column format?
1011 // case/control is always ok, but quantitative or categorical needs
1012 // to be checked
1013 const PhenoCol* sex_col = &(pheno_cols[pheno_idx]);
1014 if (sex_col->type_code != kPhenoDtypeCc) {
1015 // could bitwise-and sample_include and pheno_nm before the loop
1016 const uintptr_t* pheno_nm = sex_col->nonmiss;
1017 uintptr_t sample_uidx_base = 0;
1018 uintptr_t cur_bits = sample_include[0];
1019 if (sex_col->type_code == kPhenoDtypeQt) {
1020 const double* pheno_vals = sex_col->data.qt;
1021 for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
1022 const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
1023 if (IsSet(pheno_nm, sample_uidx)) {
1024 const double dxx = pheno_vals[sample_uidx];
1025 // tolerate '-9' and '0' as missing values, and anything in
1026 // [1, 2] (could be reasonable to represent XXY, etc. with
1027 // decimals).
1028 if (unlikely(((dxx < 1.0) && (dxx != -9.0) && (dxx != 0.0)) || (dxx > 2.0))) {
1029 logerrputs("Error: .psam numeric SEX values are expected to be in {-9, 0, 1, 2}.\n");
1030 goto WritePsam_ret_INCONSISTENT_INPUT;
1031 }
1032 }
1033 }
1034 } else {
1035 assert(sex_col->type_code == kPhenoDtypeCat);
1036 const uint32_t nonnull_cat_ct = sex_col->nonnull_category_ct;
1037 if (nonnull_cat_ct) {
1038 const char* const* cur_category_names = sex_col->category_names;
1039 // tolerate 'M' and 'm' being present simultaneously, etc.
1040 uint32_t male_cat_idx1 = 0;
1041 uint32_t male_cat_idx2 = 0;
1042 uint32_t female_cat_idx1 = 0;
1043 uint32_t female_cat_idx2 = 0;
1044 for (uint32_t cat_idx = 1; cat_idx <= nonnull_cat_ct; ++cat_idx) {
1045 const char* cur_cat_name = cur_category_names[cat_idx];
1046 if (!cur_cat_name[1]) {
1047 uint32_t first_char_code = ctou32(cur_cat_name[0]);
1048 first_char_code &= 0xdf;
1049 if (first_char_code == 70) {
1050 if (!female_cat_idx1) {
1051 female_cat_idx1 = cat_idx;
1052 } else {
1053 female_cat_idx2 = cat_idx;
1054 }
1055 } else if (first_char_code == 77) {
1056 if (!male_cat_idx1) {
1057 male_cat_idx1 = cat_idx;
1058 } else {
1059 male_cat_idx2 = cat_idx;
1060 }
1061 }
1062 }
1063 }
1064 if (S_CAST(uint32_t, (male_cat_idx1 != 0) + (male_cat_idx2 != 0) + (female_cat_idx1 != 0) + (female_cat_idx2 != 0)) < nonnull_cat_ct) {
1065 const uint32_t* pheno_vals = sex_col->data.cat;
1066 for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
1067 const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
1068 if (IsSet(pheno_nm, sample_uidx)) {
1069 const uint32_t cur_cat_idx = pheno_vals[sample_uidx];
1070 if (unlikely((cur_cat_idx != male_cat_idx1) && (cur_cat_idx != female_cat_idx1) && (cur_cat_idx != male_cat_idx2) && (cur_cat_idx != female_cat_idx2))) {
1071 logerrputs("Error: .psam alphabetic SEX values are expected to be in {'F', 'f', 'M', 'm'}.\n");
1072 goto WritePsam_ret_INCONSISTENT_INPUT;
1073 }
1074 }
1075 }
1076 }
1077 }
1078 }
1079 }
1080 }
1081 write_iter = memcpya(write_iter, cur_pheno_name, cur_pheno_name_slen);
1082 if (unlikely(fwrite_ck(textbuf_flush, outfile, &write_iter))) {
1083 goto WritePsam_ret_WRITE_FAIL;
1084 }
1085 }
1086 } else if (write_empty_pheno) {
1087 write_iter = strcpya_k(write_iter, "\tPHENO1");
1088 }
1089 AppendBinaryEoln(&write_iter);
1090
1091 uintptr_t sample_uidx_base = 0;
1092 uintptr_t cur_bits = sample_include[0];
1093 uint32_t sample_uidx2 = 0;
1094 // not really necessary to make sample_uidx increment dependent on
1095 // new_sample_idx_to_old == nullptr
1096 for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
1097 uintptr_t sample_uidx;
1098 if (!new_sample_idx_to_old) {
1099 sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
1100 } else {
1101 do {
1102 sample_uidx = new_sample_idx_to_old[sample_uidx2++];
1103 } while (!IsSet(sample_include, sample_uidx));
1104 }
1105 const char* cur_sample_id = &(sample_ids[max_sample_id_blen * sample_uidx]);
1106 if (!write_fid) {
1107 cur_sample_id = AdvPastDelim(cur_sample_id, '\t');
1108 }
1109 write_iter = strcpya(write_iter, cur_sample_id);
1110 if (write_sid) {
1111 *write_iter++ = '\t';
1112 if (sids) {
1113 write_iter = strcpya(write_iter, &(sids[max_sid_blen * sample_uidx]));
1114 } else {
1115 *write_iter++ = '0';
1116 }
1117 }
1118 if (write_parents) {
1119 *write_iter++ = '\t';
1120 write_iter = strcpyax(write_iter, &(paternal_ids[max_paternal_id_blen * sample_uidx]), '\t');
1121 write_iter = strcpya(write_iter, &(maternal_ids[max_maternal_id_blen * sample_uidx]));
1122 }
1123 if (write_sex) {
1124 *write_iter++ = '\t';
1125 if (IsSet(sex_nm, sample_uidx)) {
1126 *write_iter++ = '2' - IsSet(sex_male, sample_uidx);
1127 } else {
1128 // this is better than '0' since it allows the raw column to be used
1129 // as --covar input
1130 // (can't do this for .fam export, though: not worth the
1131 // compatibility issues)
1132 write_iter = strcpya_k(write_iter, "NA");
1133 }
1134 }
1135 if (write_phenos) {
1136 for (uint32_t pheno_idx = 0; pheno_idx != pheno_ct; ++pheno_idx) {
1137 *write_iter++ = '\t';
1138 write_iter = AppendPhenoStr(&(pheno_cols[pheno_idx]), output_missing_pheno, omp_slen, sample_uidx, write_iter);
1139 if (unlikely(fwrite_ck(textbuf_flush, outfile, &write_iter))) {
1140 goto WritePsam_ret_WRITE_FAIL;
1141 }
1142 }
1143 } else {
1144 if (write_empty_pheno) {
1145 *write_iter++ = '\t';
1146 write_iter = memcpya(write_iter, output_missing_pheno, omp_slen);
1147 }
1148 if (unlikely(fwrite_ck(textbuf_flush, outfile, &write_iter))) {
1149 goto WritePsam_ret_WRITE_FAIL;
1150 }
1151 }
1152 AppendBinaryEoln(&write_iter);
1153 }
1154 if (unlikely(fclose_flush_null(textbuf_flush, write_iter, &outfile))) {
1155 goto WritePsam_ret_WRITE_FAIL;
1156 }
1157 }
1158 while (0) {
1159 WritePsam_ret_OPEN_FAIL:
1160 reterr = kPglRetOpenFail;
1161 break;
1162 WritePsam_ret_WRITE_FAIL:
1163 reterr = kPglRetWriteFail;
1164 break;
1165 WritePsam_ret_INCONSISTENT_INPUT:
1166 reterr = kPglRetInconsistentInput;
1167 break;
1168 }
1169 fclose_cond(outfile);
1170 return reterr;
1171 }
1172
1173 /*
1174 #ifdef __arm__
1175 # error "Unaligned accesses in BitvecResort()."
1176 #endif
1177 void BitvecResort(const uintptr_t* bitvec, const uint32_t* new_sample_idx_to_old, uint32_t sample_ct, unsigned char* writebuf) {
1178 const uint32_t sample_ctl_m1 = BitCtToWordCt(sample_ct) - 1;
1179 uint32_t widx = 0;
1180 uint32_t cur_word_entry_ct = kBitsPerWord;
1181 const uint32_t* new_sample_idx_to_old_base = new_sample_idx_to_old;
1182 uintptr_t* writebuf_walias = (uintptr_t*)writebuf;
1183 while (1) {
1184 if (widx == sample_ctl_m1) {
1185 cur_word_entry_ct = 1 + ((sample_ct - 1) % kBitsPerWord);
1186 }
1187 uintptr_t cur_word = 0;
1188 for (uint32_t uii = 0; uii != cur_word_entry_ct; ++uii) {
1189 cur_word |= IsSet(bitvec, new_sample_idx_to_old_base[uii]) << uii;
1190 }
1191 if (widx == sample_ctl_m1) {
1192 memcpy(&(writebuf_walias[widx]), &cur_word, (cur_word_entry_ct + (CHAR_BIT - 1)) / CHAR_BIT);
1193 return;
1194 }
1195 writebuf_walias[widx++] = cur_word;
1196 new_sample_idx_to_old_base = &(new_sample_idx_to_old_base[kBitsPerWord]);
1197 }
1198 }
1199 */
1200
1201 #ifdef __arm__
1202 # error "Unaligned accesses in GenovecResort()."
1203 #endif
GenovecResort(const uintptr_t * genovec,const uint32_t * new_sample_idx_to_old,uint32_t sample_ct,void * writebuf)1204 void GenovecResort(const uintptr_t* genovec, const uint32_t* new_sample_idx_to_old, uint32_t sample_ct, void* writebuf) {
1205 // writebuf need not be word-aligned
1206 const uint32_t sample_ctl2_m1 = NypCtToWordCt(sample_ct) - 1;
1207 const uint32_t* new_sample_idx_to_old_iter = new_sample_idx_to_old;
1208 uintptr_t* writebuf_walias = S_CAST(uintptr_t*, writebuf);
1209 for (uint32_t widx = 0; widx != sample_ctl2_m1; ++widx) {
1210 uintptr_t cur_word = 0;
1211 // this is noticeably better than the ascending loop
1212 for (uint32_t uii = kBitsPerWordD2 - 1; ; --uii) {
1213 cur_word |= GetNyparrEntry(genovec, new_sample_idx_to_old_iter[uii]);
1214 if (!uii) {
1215 break;
1216 }
1217 cur_word = cur_word << 2;
1218 }
1219 writebuf_walias[widx] = cur_word;
1220 new_sample_idx_to_old_iter = &(new_sample_idx_to_old_iter[kBitsPerWordD2]);
1221 }
1222 const uint32_t cur_word_entry_ct = ModNz(sample_ct, kBitsPerWordD2);
1223 uintptr_t cur_word = 0;
1224 for (uint32_t uii = cur_word_entry_ct - 1; ; --uii) {
1225 cur_word |= GetNyparrEntry(genovec, new_sample_idx_to_old_iter[uii]);
1226 if (!uii) {
1227 break;
1228 }
1229 cur_word = cur_word << 2;
1230 }
1231 SubwordStore(cur_word, NypCtToByteCt(cur_word_entry_ct), &(writebuf_walias[sample_ctl2_m1]));
1232 }
1233
1234 // Revised phaseraw:
1235 // 4 byte het_ct, 4 byte explicit_phasepresent_ct
1236 // first half, up to (1 + (het_ct / kBitsPerWord)) words
1237 // second half, rounded up to vector boundary
UnpackHphase(const uintptr_t * __restrict all_hets,const uintptr_t * __restrict phaseraw,uint32_t raw_sample_ct,uintptr_t ** phasepresent_ptr,uintptr_t * __restrict phaseinfo)1238 void UnpackHphase(const uintptr_t* __restrict all_hets, const uintptr_t* __restrict phaseraw, uint32_t raw_sample_ct, uintptr_t** phasepresent_ptr, uintptr_t* __restrict phaseinfo) {
1239 const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
1240 const uint32_t het_ct = S_CAST(uint32_t, phaseraw[0]);
1241 const uintptr_t* aux2a = &(phaseraw[8 / kBytesPerWord]);
1242 if (!(aux2a[0] & 1)) {
1243 // phase always present
1244 *phasepresent_ptr = nullptr;
1245 ExpandBytearr(aux2a, all_hets, raw_sample_ctl, het_ct, 1, phaseinfo);
1246 } else {
1247 // bugfix (4 Mar 2018): need to pass raw_phasepresent_ct, not het_ct
1248 #ifdef __LP64__
1249 const uint32_t raw_phasepresent_ct = phaseraw[0] >> 32;
1250 #else
1251 const uint32_t raw_phasepresent_ct = phaseraw[1];
1252 #endif
1253 const uintptr_t* aux2b = &(aux2a[1 + (het_ct / kBitsPerWord)]);
1254 ExpandBytearrNested(aux2b, aux2a, all_hets, raw_sample_ctl, raw_phasepresent_ct, 1, *phasepresent_ptr, phaseinfo);
1255 }
1256 }
1257
UnpackHphaseSubset(const uintptr_t * __restrict all_hets,const uintptr_t * __restrict phaseraw,const uintptr_t * __restrict sample_include,uint32_t sample_ct,uintptr_t ** phasepresent_ptr,uintptr_t * __restrict phaseinfo)1258 void UnpackHphaseSubset(const uintptr_t* __restrict all_hets, const uintptr_t* __restrict phaseraw, const uintptr_t* __restrict sample_include, uint32_t sample_ct, uintptr_t** phasepresent_ptr, uintptr_t* __restrict phaseinfo) {
1259 // const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
1260 // const uint32_t het_ct = PopcountWords(all_hets, raw_sample_ctl);
1261 const uint32_t het_ct = S_CAST(uint32_t, phaseraw[0]);
1262 const uintptr_t* aux2a = &(phaseraw[8 / kBytesPerWord]);
1263 if (!(aux2a[0] & 1)) {
1264 // phase always present
1265 *phasepresent_ptr = nullptr;
1266 ExpandThenSubsetBytearr(aux2a, all_hets, sample_include, het_ct, sample_ct, 1, phaseinfo);
1267 } else {
1268 const uint32_t first_half_word_ct = 1 + (het_ct / kBitsPerWord);
1269 // const uint32_t raw_phasepresent_ct = PopcountWords(phaseraw, first_half_word_ct) - 1;
1270 #ifdef __LP64__
1271 const uint32_t raw_phasepresent_ct = phaseraw[0] >> 32;
1272 #else
1273 const uint32_t raw_phasepresent_ct = phaseraw[1];
1274 #endif
1275 const uintptr_t* aux2b = &(aux2a[first_half_word_ct]);
1276
1277 // see "if (explicit_phasepresent) {}" block in PgrGetRaw(). Could
1278 // change this convention.
1279 ExpandThenSubsetBytearrNested(aux2b, aux2a, all_hets, sample_include, sample_ct, raw_phasepresent_ct, 1, *phasepresent_ptr, phaseinfo);
1280 }
1281 }
1282
UnpackAndResortHphase(const uintptr_t * __restrict all_hets,const uintptr_t * __restrict phaseraw,const uintptr_t * sample_include,const uint32_t * old_sample_idx_to_new,uint32_t raw_sample_ct,uint32_t sample_ct,uintptr_t ** phasepresent_ptr,uintptr_t * __restrict phaseinfo)1283 void UnpackAndResortHphase(const uintptr_t* __restrict all_hets, const uintptr_t* __restrict phaseraw, const uintptr_t* sample_include, const uint32_t* old_sample_idx_to_new, uint32_t raw_sample_ct, uint32_t sample_ct, uintptr_t** phasepresent_ptr, uintptr_t* __restrict phaseinfo) {
1284 const uintptr_t* aux2a_iter = &(phaseraw[8 / kBytesPerWord]);
1285 const uint32_t* old_sample_idx_to_new_iter = old_sample_idx_to_new;
1286 const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
1287 const uint32_t sample_ctl = BitCtToWordCt(sample_ct);
1288 uintptr_t aux2a_word = *aux2a_iter++;
1289 uint32_t read_idx_lowbits = 1;
1290 ZeroWArr(sample_ctl, phaseinfo);
1291 if (!(aux2a_word & 1)) {
1292 // phase always present
1293 *phasepresent_ptr = nullptr;
1294 for (uint32_t widx = 0; widx != raw_sample_ctl; ++widx) {
1295 uintptr_t new_phasepresent_word = all_hets[widx];
1296 const uint32_t read_idx_lowbits_end = read_idx_lowbits + PopcountWord(new_phasepresent_word);
1297 uintptr_t tmp_phaseinfo_input_word = aux2a_word >> read_idx_lowbits;
1298 if (read_idx_lowbits_end >= kBitsPerWord) {
1299 // always safe to read an extra word off the end
1300 aux2a_word = *aux2a_iter++;
1301 if (read_idx_lowbits) {
1302 tmp_phaseinfo_input_word |= aux2a_word << (kBitsPerWord - read_idx_lowbits);
1303 }
1304 }
1305 // no need to mask off top bits of tmp_phaseinfo_input_word
1306 read_idx_lowbits = read_idx_lowbits_end % kBitsPerWord;
1307 if (!sample_include) {
1308 #ifdef USE_AVX2
1309 uintptr_t phaseinfo_bits_to_set = _pdep_u64(tmp_phaseinfo_input_word, new_phasepresent_word);
1310 while (phaseinfo_bits_to_set) {
1311 const uint32_t sample_uidx_lowbits = ctzw(phaseinfo_bits_to_set);
1312 SetBit(old_sample_idx_to_new_iter[sample_uidx_lowbits], phaseinfo);
1313 phaseinfo_bits_to_set &= phaseinfo_bits_to_set - 1;
1314 }
1315 #else
1316 while (new_phasepresent_word) {
1317 const uint32_t sample_uidx_lowbits = ctzw(new_phasepresent_word);
1318 if (tmp_phaseinfo_input_word & 1) {
1319 SetBit(old_sample_idx_to_new_iter[sample_uidx_lowbits], phaseinfo);
1320 }
1321 tmp_phaseinfo_input_word >>= 1;
1322 new_phasepresent_word &= new_phasepresent_word - 1;
1323 }
1324 #endif
1325 } else {
1326 #ifdef USE_AVX2
1327 uintptr_t phaseinfo_bits_to_set = _pdep_u64(tmp_phaseinfo_input_word, new_phasepresent_word) & sample_include[widx];
1328 while (phaseinfo_bits_to_set) {
1329 const uint32_t sample_uidx_lowbits = ctzw(phaseinfo_bits_to_set);
1330 SetBit(old_sample_idx_to_new_iter[sample_uidx_lowbits], phaseinfo);
1331 phaseinfo_bits_to_set &= phaseinfo_bits_to_set - 1;
1332 }
1333 #else
1334 uintptr_t masked_phasepresent_word = new_phasepresent_word & sample_include[widx];
1335 while (masked_phasepresent_word) {
1336 const uint32_t sample_uidx_lowbits = ctzw(masked_phasepresent_word);
1337 const uintptr_t lowmask = (k1LU << sample_uidx_lowbits) - k1LU;
1338 if ((tmp_phaseinfo_input_word >> PopcountWord(new_phasepresent_word & lowmask)) & 1) {
1339 SetBit(old_sample_idx_to_new_iter[sample_uidx_lowbits], phaseinfo);
1340 }
1341 masked_phasepresent_word &= masked_phasepresent_word - 1;
1342 }
1343 #endif
1344 }
1345 old_sample_idx_to_new_iter = &(old_sample_idx_to_new_iter[kBitsPerWord]);
1346 }
1347 return;
1348 }
1349 uintptr_t* phasepresent = *phasepresent_ptr;
1350 const uint32_t het_ct = S_CAST(uint32_t, phaseraw[0]);
1351 const uintptr_t* phaseinfo_read_iter = &(phaseraw[(8 / kBytesPerWord) + 1 + (het_ct / kBitsPerWord)]);
1352 uintptr_t phaseinfo_read_word = *phaseinfo_read_iter++;
1353 uint32_t phaseinfo_read_idx_lowbits = 0;
1354 ZeroWArr(sample_ctl, phasepresent);
1355 for (uint32_t widx = 0; widx != raw_sample_ctl; ++widx) {
1356 uintptr_t geno_hets = all_hets[widx];
1357 if (geno_hets) {
1358 const uint32_t read_idx_lowbits_end = read_idx_lowbits + PopcountWord(geno_hets);
1359 uintptr_t tmp_phasepresent_input_word = aux2a_word >> read_idx_lowbits;
1360 if (read_idx_lowbits_end >= kBitsPerWord) {
1361 // always safe to read an extra word off the end, when
1362 // read_idx_lowbits_end == kBitsPerWord and we're at the last word
1363 aux2a_word = *aux2a_iter++;
1364 if (read_idx_lowbits) {
1365 tmp_phasepresent_input_word |= aux2a_word << (kBitsPerWord - read_idx_lowbits);
1366 }
1367 }
1368 tmp_phasepresent_input_word = bzhi_max(tmp_phasepresent_input_word, read_idx_lowbits_end - read_idx_lowbits);
1369 read_idx_lowbits = read_idx_lowbits_end % kBitsPerWord;
1370 if (tmp_phasepresent_input_word) {
1371 const uint32_t read_phasepresent_ct = PopcountWord(tmp_phasepresent_input_word);
1372 uintptr_t tmp_phaseinfo_input_word;
1373 // avoid reading off end of phaseinfo here
1374 if (phaseinfo_read_idx_lowbits != kBitsPerWord) {
1375 const uint32_t phaseinfo_read_idx_lowbits_end = phaseinfo_read_idx_lowbits + read_phasepresent_ct;
1376 tmp_phaseinfo_input_word = phaseinfo_read_word >> phaseinfo_read_idx_lowbits;
1377 if (phaseinfo_read_idx_lowbits_end < kBitsPerWord) {
1378 phaseinfo_read_idx_lowbits = phaseinfo_read_idx_lowbits_end;
1379 } else {
1380 phaseinfo_read_word = *phaseinfo_read_iter++;
1381 tmp_phaseinfo_input_word |= phaseinfo_read_word << (kBitsPerWord - phaseinfo_read_idx_lowbits);
1382 phaseinfo_read_idx_lowbits = phaseinfo_read_idx_lowbits_end - kBitsPerWord;
1383 }
1384 } else {
1385 // special case, can't right-shift 64
1386 phaseinfo_read_word = *phaseinfo_read_iter++;
1387 phaseinfo_read_idx_lowbits = read_phasepresent_ct;
1388 tmp_phaseinfo_input_word = phaseinfo_read_word;
1389 }
1390 // no need to mask off top bits of tmp_phaseinfo_input_word
1391 if (!sample_include) {
1392 #ifdef USE_AVX2
1393 for (uintptr_t phasepresent_bits_to_set = _pdep_u64(tmp_phasepresent_input_word, geno_hets); ; ) {
1394 const uint32_t new_sample_idx = old_sample_idx_to_new_iter[ctzw(phasepresent_bits_to_set)];
1395 const uint32_t new_sample_widx = new_sample_idx / kBitsPerWord;
1396 const uint32_t new_sample_lowbits = new_sample_idx % kBitsPerWord;
1397 const uintptr_t shifted_bit = k1LU << new_sample_lowbits;
1398 phasepresent[new_sample_widx] |= shifted_bit;
1399 if (tmp_phaseinfo_input_word & 1) {
1400 phaseinfo[new_sample_widx] |= shifted_bit;
1401 }
1402 // branchless version doesn't seem to be any better here; probably
1403 // due to additional random memory access.
1404 // phaseinfo[new_sample_widx] |= (tmp_phaseinfo_input_word & 1) << new_sample_lowbits;
1405
1406 phasepresent_bits_to_set &= phasepresent_bits_to_set - 1;
1407 if (!phasepresent_bits_to_set) {
1408 break;
1409 }
1410 tmp_phaseinfo_input_word >>= 1;
1411 }
1412 #else
1413 for (; ; tmp_phasepresent_input_word >>= 1) {
1414 if (tmp_phasepresent_input_word & 1) {
1415 const uint32_t new_sample_idx = old_sample_idx_to_new_iter[ctzw(geno_hets)];
1416 const uint32_t new_sample_widx = new_sample_idx / kBitsPerWord;
1417 const uint32_t new_sample_lowbits = new_sample_idx % kBitsPerWord;
1418 const uintptr_t shifted_bit = k1LU << new_sample_lowbits;
1419 phasepresent[new_sample_widx] |= shifted_bit;
1420 if (tmp_phaseinfo_input_word & 1) {
1421 phaseinfo[new_sample_widx] |= shifted_bit;
1422 }
1423 if (tmp_phasepresent_input_word == 1) {
1424 break;
1425 }
1426 tmp_phaseinfo_input_word >>= 1;
1427 }
1428 geno_hets &= geno_hets - 1;
1429 }
1430 #endif
1431 } else {
1432 const uintptr_t sample_include_word = sample_include[widx];
1433 #ifdef USE_AVX2
1434 const uintptr_t phasepresent_word_expanded = _pdep_u64(tmp_phasepresent_input_word, geno_hets);
1435 uintptr_t phasepresent_bits_to_set = phasepresent_word_expanded & sample_include_word;
1436 if (phasepresent_bits_to_set) {
1437 // tmp_phaseinfo_input_word gives us the phasing state of the
1438 // positions in phasepresent_word_expanded.
1439 // However, we're only iterating over the positions in
1440 // (phasepresent_word_expanded & sample_include_word).
1441 // (can replace sample_include_word with phasepresent_bits_to_set
1442 // in this expression)
1443 uintptr_t collapsed_phaseinfo_input_word = _pext_u64(tmp_phaseinfo_input_word, _pext_u64(sample_include_word, phasepresent_word_expanded));
1444 while (1) {
1445 const uint32_t new_sample_idx = old_sample_idx_to_new_iter[ctzw(phasepresent_bits_to_set)];
1446 const uint32_t new_sample_widx = new_sample_idx / kBitsPerWord;
1447 const uint32_t new_sample_lowbits = new_sample_idx % kBitsPerWord;
1448 const uintptr_t shifted_bit = k1LU << new_sample_lowbits;
1449 phasepresent[new_sample_widx] |= shifted_bit;
1450 if (collapsed_phaseinfo_input_word & 1) {
1451 phaseinfo[new_sample_widx] |= shifted_bit;
1452 }
1453
1454 phasepresent_bits_to_set &= phasepresent_bits_to_set - 1;
1455 if (!phasepresent_bits_to_set) {
1456 break;
1457 }
1458 collapsed_phaseinfo_input_word >>= 1;
1459 }
1460 }
1461 #else
1462 for (; ; tmp_phasepresent_input_word >>= 1) {
1463 if (tmp_phasepresent_input_word & 1) {
1464 const uintptr_t geno_hets_lowbit = geno_hets & (-geno_hets);
1465 if (sample_include_word & geno_hets_lowbit) {
1466 const uint32_t sample_uidx_lowbits = ctzw(geno_hets_lowbit);
1467 const uint32_t new_sample_idx = old_sample_idx_to_new_iter[sample_uidx_lowbits];
1468 const uint32_t new_sample_widx = new_sample_idx / kBitsPerWord;
1469 const uint32_t new_sample_lowbits = new_sample_idx % kBitsPerWord;
1470 const uintptr_t shifted_bit = k1LU << new_sample_lowbits;
1471 phasepresent[new_sample_widx] |= shifted_bit;
1472 if (tmp_phaseinfo_input_word & 1) {
1473 phaseinfo[new_sample_widx] |= shifted_bit;
1474 }
1475 }
1476 if (tmp_phasepresent_input_word == 1) {
1477 break;
1478 }
1479 tmp_phaseinfo_input_word >>= 1;
1480 }
1481 geno_hets &= geno_hets - 1;
1482 }
1483 #endif
1484 }
1485 }
1486 }
1487 old_sample_idx_to_new_iter = &(old_sample_idx_to_new_iter[kBitsPerWord]);
1488 }
1489 }
1490
1491
1492 // these also work on dphaseraw
CopyDosage(const uintptr_t * __restrict read_dosagepresent,const Dosage * read_dosagevals,uint32_t raw_sample_ct,uint32_t dosage_ct,uintptr_t * __restrict write_dosagepresent,Dosage * write_dosagevals,uint32_t * write_dosage_ct_ptr)1493 void CopyDosage(const uintptr_t* __restrict read_dosagepresent, const Dosage* read_dosagevals, uint32_t raw_sample_ct, uint32_t dosage_ct, uintptr_t* __restrict write_dosagepresent, Dosage* write_dosagevals, uint32_t* write_dosage_ct_ptr) {
1494 const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
1495 *write_dosage_ct_ptr = dosage_ct;
1496 memcpy(write_dosagepresent, read_dosagepresent, raw_sample_ctl * sizeof(intptr_t));
1497 memcpy(write_dosagevals, read_dosagevals, dosage_ct * sizeof(Dosage));
1498 }
1499
CopyAndResort8bit(const uintptr_t * __restrict src_subset,const void * __restrict src_vals,const uint32_t * __restrict new_sample_idx_to_old,uint32_t raw_sample_ct,uint32_t sample_ct,uintptr_t * __restrict dst_subset,void * __restrict dst_vals,uint32_t * __restrict cumulative_popcount_buf)1500 uint32_t CopyAndResort8bit(const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uint32_t* __restrict new_sample_idx_to_old, uint32_t raw_sample_ct, uint32_t sample_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals, uint32_t* __restrict cumulative_popcount_buf) {
1501 const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
1502 FillCumulativePopcounts(src_subset, raw_sample_ctl, cumulative_popcount_buf);
1503 const uint32_t sample_ctl = BitCtToWordCt(sample_ct);
1504 ZeroWArr(sample_ctl, dst_subset);
1505 const unsigned char* src_vals_uc = S_CAST(const unsigned char*, src_vals);
1506 unsigned char* dst_vals_uc = S_CAST(unsigned char*, dst_vals);
1507 unsigned char* dst_vals_iter = dst_vals_uc;
1508 // Tried word-based loop, was significantly worse
1509 for (uint32_t new_sample_idx = 0; new_sample_idx != sample_ct; ++new_sample_idx) {
1510 const uint32_t old_sample_idx = new_sample_idx_to_old[new_sample_idx];
1511 if (IsSet(src_subset, old_sample_idx)) {
1512 SetBit(new_sample_idx, dst_subset);
1513 const uint32_t old_dosagevals_idx = RawToSubsettedPos(src_subset, cumulative_popcount_buf, old_sample_idx);
1514 *dst_vals_iter++ = src_vals_uc[old_dosagevals_idx];
1515 }
1516 }
1517 return dst_vals_iter - dst_vals_uc;
1518 }
1519
CopyAndResort16bit(const uintptr_t * __restrict src_subset,const void * __restrict src_vals,const uint32_t * __restrict new_sample_idx_to_old,uint32_t raw_sample_ct,uint32_t sample_ct,uintptr_t * __restrict dst_subset,void * __restrict dst_vals,uint32_t * __restrict cumulative_popcount_buf)1520 uint32_t CopyAndResort16bit(const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uint32_t* __restrict new_sample_idx_to_old, uint32_t raw_sample_ct, uint32_t sample_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals, uint32_t* __restrict cumulative_popcount_buf) {
1521 const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
1522 FillCumulativePopcounts(src_subset, raw_sample_ctl, cumulative_popcount_buf);
1523 const uint32_t sample_ctl = BitCtToWordCt(sample_ct);
1524 ZeroWArr(sample_ctl, dst_subset);
1525 const uint16_t* src_vals_u16 = S_CAST(const uint16_t*, src_vals);
1526 uint16_t* dst_vals_u16 = S_CAST(uint16_t*, dst_vals);
1527 uint16_t* dst_vals_iter = dst_vals_u16;
1528 // Tried word-based loop, was significantly worse
1529 for (uint32_t new_sample_idx = 0; new_sample_idx != sample_ct; ++new_sample_idx) {
1530 const uint32_t old_sample_idx = new_sample_idx_to_old[new_sample_idx];
1531 if (IsSet(src_subset, old_sample_idx)) {
1532 SetBit(new_sample_idx, dst_subset);
1533 const uint32_t old_dosagevals_idx = RawToSubsettedPos(src_subset, cumulative_popcount_buf, old_sample_idx);
1534 *dst_vals_iter++ = src_vals_u16[old_dosagevals_idx];
1535 }
1536 }
1537 return dst_vals_iter - dst_vals_u16;
1538 }
1539
1540 // Requires trailing bits of genovec to be zeroed out.
1541 // "Flat" = don't separate one_cts and two_cts.
1542 void GetMFlatCounts64(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, const PgenVariant* pgvp, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t allele_ct, STD_ARRAY_REF(uint32_t, 4) genocounts, uint64_t* all_dosages) {
1543 if (sample_ct == raw_sample_ct) {
1544 GenoarrCountFreqsUnsafe(pgvp->genovec, sample_ct, genocounts);
1545 } else {
1546 GenoarrCountSubsetFreqs(pgvp->genovec, sample_include_interleaved_vec, raw_sample_ct, sample_ct, genocounts);
1547 }
1548 all_dosages[0] = 2 * genocounts[0] + genocounts[1];
1549 all_dosages[1] = 2 * genocounts[2] + genocounts[1];
1550 ZeroU64Arr(allele_ct - 2, &(all_dosages[2]));
1551 const AlleleCode* patch_01_vals = pgvp->patch_01_vals;
1552 const AlleleCode* patch_10_vals = pgvp->patch_10_vals;
1553 const uint32_t patch_01_ct = pgvp->patch_01_ct;
1554 const uint32_t patch_10_ct = pgvp->patch_10_ct;
1555 if (sample_ct == raw_sample_ct) {
1556 all_dosages[1] -= patch_01_ct + 2 * patch_10_ct;
1557 for (uint32_t uii = 0; uii != patch_01_ct; ++uii) {
1558 all_dosages[patch_01_vals[uii]] += 1;
1559 }
1560 const uint32_t patch_10_ct_x2 = patch_10_ct * 2;
1561 for (uint32_t uii = 0; uii != patch_10_ct_x2; ++uii) {
1562 all_dosages[patch_10_vals[uii]] += 1;
1563 }
1564 } else {
1565 if (patch_01_ct) {
1566 const uintptr_t* patch_01_set = pgvp->patch_01_set;
1567 uintptr_t sample_widx = 0;
1568 uintptr_t patch_01_bits = patch_01_set[0];
1569 uint32_t subsetted_patch_01_ct = 0;
1570 for (uint32_t uii = 0; uii != patch_01_ct; ++uii) {
1571 const uintptr_t lowbit = BitIter1y(patch_01_set, &sample_widx, &patch_01_bits);
1572 if (sample_include[sample_widx] & lowbit) {
1573 all_dosages[patch_01_vals[uii]] += 1;
1574 ++subsetted_patch_01_ct;
1575 }
1576 }
1577 all_dosages[1] -= subsetted_patch_01_ct;
1578 }
1579 if (patch_10_ct) {
1580 const uintptr_t* patch_10_set = pgvp->patch_10_set;
1581 uintptr_t sample_widx = 0;
1582 uintptr_t patch_10_bits = patch_10_set[0];
1583 uint32_t subsetted_patch_10_ct = 0;
1584 for (uint32_t uii = 0; uii != patch_10_ct; ++uii) {
1585 const uintptr_t lowbit = BitIter1y(patch_10_set, &sample_widx, &patch_10_bits);
1586 if (sample_include[sample_widx] & lowbit) {
1587 all_dosages[patch_10_vals[2 * uii]] += 1;
1588 all_dosages[patch_10_vals[2 * uii + 1]] += 1;
1589 ++subsetted_patch_10_ct;
1590 }
1591 }
1592 all_dosages[1] -= 2 * subsetted_patch_10_ct;
1593 }
1594 }
1595 }
1596
1597 void GetMCounts64(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, const PgenVariant* pgvp, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t allele_ct, STD_ARRAY_REF(uint32_t, 4) genocounts, uint64_t* __restrict one_cts, uint64_t* __restrict two_cts) {
1598 // This mirrors GetMultiallelicCountsAndDosage16s().
1599 if (sample_ct == raw_sample_ct) {
1600 GenoarrCountFreqsUnsafe(pgvp->genovec, sample_ct, genocounts);
1601 } else {
1602 GenoarrCountSubsetFreqs(pgvp->genovec, sample_include_interleaved_vec, raw_sample_ct, sample_ct, genocounts);
1603 }
1604 one_cts[0] = genocounts[1];
1605 one_cts[1] = genocounts[1];
1606 ZeroU64Arr(allele_ct - 2, &(one_cts[2]));
1607 two_cts[0] = genocounts[0];
1608 two_cts[1] = genocounts[2];
1609 ZeroU64Arr(allele_ct - 2, &(two_cts[2]));
1610 const AlleleCode* patch_01_vals = pgvp->patch_01_vals;
1611 const AlleleCode* patch_10_vals = pgvp->patch_10_vals;
1612 const uint32_t patch_01_ct = pgvp->patch_01_ct;
1613 const uint32_t patch_10_ct = pgvp->patch_10_ct;
1614 if (sample_ct == raw_sample_ct) {
1615 one_cts[1] -= patch_01_ct;
1616 for (uint32_t uii = 0; uii != patch_01_ct; ++uii) {
1617 one_cts[patch_01_vals[uii]] += 1;
1618 }
1619 two_cts[1] -= patch_10_ct;
1620 const AlleleCode* patch_10_vals_iter = patch_10_vals;
1621 for (uint32_t uii = 0; uii != patch_10_ct; ++uii) {
1622 const AlleleCode code_lo = *patch_10_vals_iter++;
1623 const AlleleCode code_hi = *patch_10_vals_iter++;
1624 if (code_lo == code_hi) {
1625 two_cts[code_lo] += 1;
1626 } else {
1627 one_cts[code_lo] += 1;
1628 one_cts[code_hi] += 1;
1629 }
1630 }
1631 } else {
1632 if (patch_01_ct) {
1633 const uintptr_t* patch_01_set = pgvp->patch_01_set;
1634 uintptr_t sample_widx = 0;
1635 uintptr_t patch_01_bits = patch_01_set[0];
1636 uint32_t subsetted_patch_01_ct = 0;
1637 for (uint32_t uii = 0; uii != patch_01_ct; ++uii) {
1638 const uintptr_t lowbit = BitIter1y(patch_01_set, &sample_widx, &patch_01_bits);
1639 if (sample_include[sample_widx] & lowbit) {
1640 one_cts[patch_01_vals[uii]] += 1;
1641 ++subsetted_patch_01_ct;
1642 }
1643 }
1644 one_cts[1] -= subsetted_patch_01_ct;
1645 }
1646 if (patch_10_ct) {
1647 const uintptr_t* patch_10_set = pgvp->patch_10_set;
1648 uintptr_t sample_widx = 0;
1649 uintptr_t patch_10_bits = patch_10_set[0];
1650 uint32_t subsetted_patch_10_ct = 0;
1651 for (uint32_t uii = 0; uii != patch_10_ct; ++uii) {
1652 const uintptr_t lowbit = BitIter1y(patch_10_set, &sample_widx, &patch_10_bits);
1653 if (sample_include[sample_widx] & lowbit) {
1654 ++subsetted_patch_10_ct;
1655 const AlleleCode code_lo = patch_10_vals[2 * uii];
1656 const AlleleCode code_hi = patch_10_vals[2 * uii + 1];
1657 if (code_lo == code_hi) {
1658 two_cts[code_lo] += 1;
1659 } else {
1660 one_cts[code_lo] += 1;
1661 one_cts[code_hi] += 1;
1662 }
1663 }
1664 }
1665 two_cts[1] -= subsetted_patch_10_ct;
1666 }
1667 }
1668 }
1669
1670 typedef struct LoadAlleleAndGenoCountsCtxStruct {
1671 const uintptr_t* variant_include;
1672 const ChrInfo* cip;
1673 const uintptr_t* allele_idx_offsets;
1674 const uintptr_t* sample_include;
1675 uintptr_t* sample_include_interleaved_vec;
1676 uint32_t* sample_include_cumulative_popcounts;
1677 const uintptr_t* sex_male;
1678 uintptr_t* sex_male_interleaved_vec;
1679 uint32_t* sex_male_cumulative_popcounts;
1680 uintptr_t* nosex_interleaved_vec;
1681 const uintptr_t* founder_info;
1682 uintptr_t* founder_info_interleaved_vec;
1683 uint32_t* founder_info_cumulative_popcounts;
1684 uintptr_t* founder_male;
1685 uintptr_t* founder_male_interleaved_vec;
1686 uint32_t* founder_male_cumulative_popcounts;
1687 uintptr_t* founder_nosex_interleaved_vec;
1688 uint32_t raw_sample_ct;
1689 uint32_t sample_ct;
1690 uint32_t founder_ct;
1691 uint32_t male_ct;
1692 uint32_t nosex_ct;
1693 uint32_t founder_male_ct;
1694 uint32_t founder_nosex_ct;
1695 uint32_t first_hap_uidx;
1696 uint32_t is_minimac3_r2;
1697
1698 PgenReader** pgr_ptrs;
1699
1700 uintptr_t** genovecs;
1701 uintptr_t** thread_read_mhc;
1702 uintptr_t** dosage_presents;
1703 Dosage** dosage_mains;
1704 uint64_t** all_dosages;
1705 uint32_t* read_variant_uidx_starts;
1706
1707 // shouldn't need array, or errno storage, since kPglRetMalformedInput is the
1708 // only possible error for now
1709 PglErr reterr;
1710
1711 uint32_t cur_block_size;
1712
1713 unsigned char* allele_presents_bytearr;
1714 uint64_t* allele_ddosages;
1715 STD_ARRAY_PTR_DECL(uint32_t, 3, raw_geno_cts);
1716 uint32_t* variant_missing_hc_cts;
1717 uint32_t* variant_missing_dosage_cts;
1718 uint32_t* variant_hethap_cts;
1719 uint64_t* founder_allele_ddosages;
1720 STD_ARRAY_PTR_DECL(uint32_t, 3, founder_raw_geno_cts);
1721 STD_ARRAY_PTR_DECL(uint32_t, 3, x_male_geno_cts);
1722 STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_male_geno_cts);
1723 STD_ARRAY_PTR_DECL(uint32_t, 3, x_nosex_geno_cts);
1724 STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_nosex_geno_cts);
1725 double* imp_r2_vals;
1726 } LoadAlleleAndGenoCountsCtx;
1727
LoadAlleleAndGenoCountsThread(void * raw_arg)1728 THREAD_FUNC_DECL LoadAlleleAndGenoCountsThread(void* raw_arg) {
1729 ThreadGroupFuncArg* arg = S_CAST(ThreadGroupFuncArg*, raw_arg);
1730 const uintptr_t tidx = arg->tidx;
1731 LoadAlleleAndGenoCountsCtx* ctx = S_CAST(LoadAlleleAndGenoCountsCtx*, arg->sharedp->context);
1732
1733 const uintptr_t* variant_include = ctx->variant_include;
1734 const ChrInfo* cip = ctx->cip;
1735 const uintptr_t* allele_idx_offsets = ctx->allele_idx_offsets;
1736 const uint32_t thread_ct = GetThreadCt(arg->sharedp);
1737 const uint32_t subset_ct = (ctx->founder_info != nullptr) + 1;
1738 const uint32_t raw_sample_ct = ctx->raw_sample_ct;
1739 const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
1740 const uint32_t first_hap_uidx = ctx->first_hap_uidx;
1741 const uint32_t is_minimac3_r2 = ctx->is_minimac3_r2;
1742 const uint32_t y_code = cip->xymt_codes[kChrOffsetY];
1743 PgenReader* pgrp = ctx->pgr_ptrs[tidx];
1744 PgenVariant pgv;
1745 pgv.genovec = ctx->genovecs[tidx];
1746 SetPgvThreadMhcNull(raw_sample_ct, tidx, ctx->thread_read_mhc, &pgv);
1747 pgv.dosage_present = nullptr;
1748 pgv.dosage_main = nullptr;
1749 if (ctx->dosage_presents) {
1750 pgv.dosage_present = ctx->dosage_presents[tidx];
1751 pgv.dosage_main = ctx->dosage_mains[tidx];
1752 }
1753 uint64_t* all_dosages = nullptr;
1754 if (ctx->all_dosages) {
1755 all_dosages = ctx->all_dosages[tidx];
1756 }
1757 uint32_t is_y = 0;
1758 uint32_t is_nonxy_haploid = 0;
1759 uint32_t x_start = 0;
1760 uint32_t x_code;
1761 if (XymtExists(cip, kChrOffsetX, &x_code)) {
1762 const uint32_t x_chr_fo_idx = cip->chr_idx_to_foidx[x_code];
1763 x_start = cip->chr_fo_vidx_start[x_chr_fo_idx];
1764 }
1765 uint32_t allele_ct = 2;
1766 do {
1767 const uintptr_t cur_block_size = ctx->cur_block_size;
1768 // no overflow danger since cur_block_size <= 2^16, tidx < (2^16 - 1)
1769 const uint32_t cur_idx_end = ((tidx + 1) * cur_block_size) / thread_ct;
1770 const uintptr_t* sample_include = ctx->sample_include;
1771 const uintptr_t* sample_include_interleaved_vec = ctx->sample_include_interleaved_vec;
1772 const uint32_t* sample_include_cumulative_popcounts = ctx->sample_include_cumulative_popcounts;
1773 const uintptr_t* sex_male = ctx->sex_male;
1774 const uintptr_t* sex_male_interleaved_vec = ctx->sex_male_interleaved_vec;
1775 const uint32_t* sex_male_cumulative_popcounts = ctx->sex_male_cumulative_popcounts;
1776 const uintptr_t* nosex_interleaved_vec = ctx->nosex_interleaved_vec;
1777 uint32_t sample_ct = ctx->sample_ct;
1778 uint32_t male_ct = ctx->male_ct;
1779 uint32_t nosex_ct = ctx->nosex_ct;
1780 unsigned char* allele_presents_bytearr = ctx->allele_presents_bytearr;
1781 uint64_t* allele_ddosages = ctx->allele_ddosages;
1782 STD_ARRAY_PTR_DECL(uint32_t, 3, raw_geno_cts) = ctx->raw_geno_cts;
1783 uint32_t* variant_missing_hc_cts = ctx->variant_missing_hc_cts;
1784 uint32_t* variant_missing_dosage_cts = ctx->variant_missing_dosage_cts;
1785 uint32_t* variant_hethap_cts = ctx->variant_hethap_cts;
1786 STD_ARRAY_PTR_DECL(uint32_t, 3, x_male_geno_cts) = ctx->x_male_geno_cts;
1787 STD_ARRAY_PTR_DECL(uint32_t, 3, x_nosex_geno_cts) = ctx->x_nosex_geno_cts;
1788 double* imp_r2_vals = ctx->imp_r2_vals;
1789 pgv.dosage_ct = 0;
1790 for (uint32_t subset_idx = 0; ; ) {
1791 // bugfix (29 Dec 2019): this boolean can change with subset_idx
1792 const uint32_t no_multiallelic_branch = (!variant_hethap_cts) && (!allele_presents_bytearr) && (!allele_ddosages) && (!imp_r2_vals);
1793 PgrSampleSubsetIndex pssi;
1794 PgrSetSampleSubsetIndex(sample_include_cumulative_popcounts, pgrp, &pssi);
1795 uint32_t cur_idx = (tidx * cur_block_size) / thread_ct;
1796 uintptr_t variant_uidx_base;
1797 uintptr_t variant_include_bits;
1798 BitIter1Start(variant_include, ctx->read_variant_uidx_starts[tidx], &variant_uidx_base, &variant_include_bits);
1799 uint32_t chr_end = 0;
1800 uint32_t is_x_or_y = 0;
1801 PglErr reterr = kPglRetSuccess;
1802
1803 STD_ARRAY_DECL(uint32_t, 4, genocounts);
1804 STD_ARRAY_DECL(uint32_t, 4, sex_specific_genocounts);
1805 for (; cur_idx != cur_idx_end; ++cur_idx) {
1806 const uint32_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &variant_include_bits);
1807 if (variant_uidx >= chr_end) {
1808 const uint32_t chr_fo_idx = GetVariantChrFoIdx(cip, variant_uidx);
1809 const uint32_t chr_idx = cip->chr_file_order[chr_fo_idx];
1810 chr_end = cip->chr_fo_vidx_start[chr_fo_idx + 1];
1811 is_y = 0;
1812 is_nonxy_haploid = 0;
1813 if (chr_idx == x_code) {
1814 is_x_or_y = 1;
1815 PgrClearSampleSubsetIndex(pgrp, &pssi);
1816 } else if (chr_idx == y_code) {
1817 is_x_or_y = 1;
1818 is_y = 1;
1819 // ugh
1820 if ((!allele_presents_bytearr) || (sample_ct == male_ct)) {
1821 PgrSetSampleSubsetIndex(sex_male_cumulative_popcounts, pgrp, &pssi);
1822 } else {
1823 PgrClearSampleSubsetIndex(pgrp, &pssi);
1824 }
1825 } else {
1826 if (is_x_or_y) {
1827 PgrSetSampleSubsetIndex(sample_include_cumulative_popcounts, pgrp, &pssi);
1828 }
1829 is_x_or_y = 0;
1830 // true for MT
1831 is_nonxy_haploid = IsSet(cip->haploid_mask, chr_idx);
1832 }
1833 }
1834 uintptr_t cur_allele_idx_offset;
1835 if (!allele_idx_offsets) {
1836 cur_allele_idx_offset = 2 * variant_uidx;
1837 } else {
1838 cur_allele_idx_offset = allele_idx_offsets[variant_uidx];
1839 allele_ct = allele_idx_offsets[variant_uidx + 1] - cur_allele_idx_offset;
1840 }
1841 uint32_t hethap_ct;
1842 if ((allele_ct == 2) || no_multiallelic_branch) {
1843 uint64_t cur_dosages[2];
1844 if (!is_x_or_y) {
1845 reterr = PgrGetDCounts(sample_include, sample_include_interleaved_vec, pssi, sample_ct, variant_uidx, is_minimac3_r2, pgrp, imp_r2_vals? (&(imp_r2_vals[variant_uidx])) : nullptr, genocounts, cur_dosages);
1846 if (unlikely(reterr)) {
1847 ctx->reterr = reterr;
1848 break;
1849 }
1850 if (allele_presents_bytearr) {
1851 if (cur_dosages[0]) {
1852 allele_presents_bytearr[cur_allele_idx_offset] = 128;
1853 }
1854 if (cur_dosages[1]) {
1855 allele_presents_bytearr[cur_allele_idx_offset + 1] = 128;
1856 }
1857 }
1858 if (!is_nonxy_haploid) {
1859 hethap_ct = 0;
1860 if (allele_ddosages) {
1861 // ...but save all allele counts here.
1862 allele_ddosages[cur_allele_idx_offset] = cur_dosages[0] * 2;
1863 allele_ddosages[cur_allele_idx_offset + 1] = cur_dosages[1] * 2;
1864 }
1865 } else {
1866 // this hethap_ct can be inaccurate in multiallelic case
1867 hethap_ct = genocounts[1];
1868 if (imp_r2_vals && (!is_minimac3_r2)) {
1869 // Assuming the input data isn't malformed "phased haploid",
1870 // minimac3-r2 is independent of haploid/diploid state; only
1871 // mach-r2 requires a haploid correction.
1872 imp_r2_vals[variant_uidx] *= 0.5;
1873 }
1874 if (allele_ddosages) {
1875 allele_ddosages[cur_allele_idx_offset] = cur_dosages[0];
1876 allele_ddosages[cur_allele_idx_offset + 1] = cur_dosages[1];
1877 }
1878 }
1879 } else if (is_y) {
1880 if ((!allele_presents_bytearr) || (sample_ct == male_ct)) {
1881 reterr = PgrGetDCounts(sex_male, sex_male_interleaved_vec, pssi, male_ct, variant_uidx, 0, pgrp, imp_r2_vals? (&(imp_r2_vals[variant_uidx])) : nullptr, genocounts, cur_dosages);
1882 if (unlikely(reterr)) {
1883 ctx->reterr = reterr;
1884 break;
1885 }
1886 hethap_ct = genocounts[1];
1887 if (imp_r2_vals && (!is_minimac3_r2)) {
1888 // note that female/unknown-sex are not counted here
1889 imp_r2_vals[variant_uidx] *= 0.5;
1890 }
1891 if (allele_presents_bytearr) {
1892 if (cur_dosages[0]) {
1893 allele_presents_bytearr[cur_allele_idx_offset] = 128;
1894 }
1895 if (cur_dosages[1]) {
1896 allele_presents_bytearr[cur_allele_idx_offset + 1] = 128;
1897 }
1898 }
1899 if (allele_ddosages) {
1900 allele_ddosages[cur_allele_idx_offset] = cur_dosages[0];
1901 allele_ddosages[cur_allele_idx_offset + 1] = cur_dosages[1];
1902 }
1903 } else {
1904 // ugh, need to count female/unknown-sex for allele_presents and
1905 // ignore elsewhere
1906 reterr = PgrGetD(nullptr, pssi, raw_sample_ct, variant_uidx, pgrp, pgv.genovec, pgv.dosage_present, pgv.dosage_main, &pgv.dosage_ct);
1907 if (unlikely(reterr)) {
1908 ctx->reterr = reterr;
1909 break;
1910 }
1911 const uint32_t dosage_is_relevant = pgv.dosage_ct && ((sample_ct == raw_sample_ct) || (!IntersectionIsEmpty(sample_include, pgv.dosage_present, raw_sample_ctl)));
1912 if (dosage_is_relevant) {
1913 // at least one dosage value is present, that's all we need to
1914 // know
1915 allele_presents_bytearr[cur_allele_idx_offset] = 128;
1916 allele_presents_bytearr[cur_allele_idx_offset + 1] = 128;
1917 } else {
1918 // only hardcalls matter
1919 // bugfix (31 Jul 2018): forgot to initialize genocounts here
1920 // possible todo: use a specialized function which just checks
1921 // which alleles exist
1922 if (sample_ct == raw_sample_ct) {
1923 ZeroTrailingNyps(raw_sample_ct, pgv.genovec);
1924 GenoarrCountFreqsUnsafe(pgv.genovec, sample_ct, genocounts);
1925 } else {
1926 GenoarrCountSubsetFreqs(pgv.genovec, sample_include_interleaved_vec, raw_sample_ct, sample_ct, genocounts);
1927 }
1928 if (genocounts[0] || genocounts[1]) {
1929 allele_presents_bytearr[cur_allele_idx_offset] = 128;
1930 }
1931 if (genocounts[1] || genocounts[2]) {
1932 allele_presents_bytearr[cur_allele_idx_offset + 1] = 128;
1933 }
1934 }
1935 GenoarrCountSubsetFreqs(pgv.genovec, sex_male_interleaved_vec, raw_sample_ct, male_ct, genocounts);
1936 hethap_ct = genocounts[1];
1937 // x2, x4 since this is haploid
1938 uintptr_t alt1_ct_x2 = genocounts[2] * 2 + hethap_ct;
1939 uintptr_t alt1_sq_sum_x4 = genocounts[2] * (4 * k1LU) + hethap_ct;
1940 uint64_t alt1_ddosage = 0; // in 32768ths
1941 uint64_t alt1_ddosage_sq_sum = 0;
1942 uint32_t additional_dosage_ct = 0;
1943 if (dosage_is_relevant) {
1944 uintptr_t sample_widx = 0;
1945 uintptr_t dosage_present_bits = pgv.dosage_present[0];
1946 uint32_t sample_uidx = 0;
1947 for (uint32_t dosage_idx = 0; dosage_idx != pgv.dosage_ct; ++dosage_idx) {
1948 const uintptr_t lowbit = BitIter1y(pgv.dosage_present, &sample_widx, &dosage_present_bits);
1949 if (sample_include[sample_widx] & lowbit) {
1950 const uintptr_t cur_dosage_val = pgv.dosage_main[dosage_idx];
1951 alt1_ddosage += cur_dosage_val;
1952 alt1_ddosage_sq_sum += cur_dosage_val * cur_dosage_val;
1953 const uintptr_t hardcall_code = GetNyparrEntry(pgv.genovec, sample_uidx);
1954 if (hardcall_code != 3) {
1955 alt1_ct_x2 -= hardcall_code;
1956 alt1_sq_sum_x4 -= hardcall_code * hardcall_code;
1957 } else {
1958 ++additional_dosage_ct;
1959 }
1960 }
1961 }
1962 }
1963 const uintptr_t obs_ct = male_ct + additional_dosage_ct - genocounts[3];
1964 alt1_ddosage += alt1_ct_x2 * S_CAST(uint64_t, kDosageMid);
1965 alt1_ddosage_sq_sum += alt1_sq_sum_x4 * 0x10000000LLU;
1966 cur_dosages[0] = obs_ct * S_CAST(uint64_t, kDosageMax) - alt1_ddosage;
1967 cur_dosages[1] = alt1_ddosage;
1968 if (imp_r2_vals) {
1969 // minimac3-r2 and mach-r2 are identical in haploid case
1970 const double dosage_sumd = u63tod(alt1_ddosage);
1971 const double dosage_avg = dosage_sumd / u31tod(obs_ct);
1972 const double dosage_variance = u63tod(alt1_ddosage_sq_sum) - dosage_sumd * dosage_avg;
1973 imp_r2_vals[variant_uidx] = dosage_variance / (dosage_sumd * (32768 - dosage_avg));
1974 }
1975 if (allele_ddosages) {
1976 allele_ddosages[cur_allele_idx_offset] = cur_dosages[0];
1977 allele_ddosages[cur_allele_idx_offset + 1] = alt1_ddosage;
1978 }
1979 }
1980 } else {
1981 // chrX
1982 reterr = PgrGetD(nullptr, pssi, raw_sample_ct, variant_uidx, pgrp, pgv.genovec, pgv.dosage_present, pgv.dosage_main, &pgv.dosage_ct);
1983 if (unlikely(reterr)) {
1984 ctx->reterr = reterr;
1985 break;
1986 }
1987 if (sample_ct == raw_sample_ct) {
1988 ZeroTrailingNyps(raw_sample_ct, pgv.genovec);
1989 GenoarrCountFreqsUnsafe(pgv.genovec, sample_ct, genocounts);
1990 } else {
1991 GenoarrCountSubsetFreqs(pgv.genovec, sample_include_interleaved_vec, raw_sample_ct, sample_ct, genocounts);
1992 }
1993 GenoarrCountSubsetFreqs(pgv.genovec, sex_male_interleaved_vec, raw_sample_ct, male_ct, sex_specific_genocounts);
1994 hethap_ct = sex_specific_genocounts[1];
1995 // Could compute imputation r2 iff there are no unknown-sex
1996 // samples, but probably not worth it since larger datasets could
1997 // have a small number of Klinefelter syndrome cases, etc. coded as
1998 // unknown-sex, and we don't want to discourage their inclusion;
1999 // let's delegate that chrX filter to other software for now.
2000
2001 if (allele_presents_bytearr) {
2002 if (pgv.dosage_ct && ((sample_ct == raw_sample_ct) || (!IntersectionIsEmpty(sample_include, pgv.dosage_present, raw_sample_ctl)))) {
2003 // at least one dosage value is present, that's all we need to
2004 // know
2005 allele_presents_bytearr[cur_allele_idx_offset] = 128;
2006 allele_presents_bytearr[cur_allele_idx_offset + 1] = 128;
2007 } else {
2008 // only hardcalls matter
2009 if (genocounts[0] || genocounts[1]) {
2010 allele_presents_bytearr[cur_allele_idx_offset] = 128;
2011 }
2012 if (genocounts[1] || genocounts[2]) {
2013 allele_presents_bytearr[cur_allele_idx_offset + 1] = 128;
2014 }
2015 }
2016 }
2017 if (allele_ddosages) {
2018 uintptr_t alt1_ct = 4 * genocounts[2] + 2 * genocounts[1] - 2 * sex_specific_genocounts[2] - hethap_ct; // nonmales count twice
2019 uint64_t alt1_ddosage = 0; // in 32768ths, nonmales count twice
2020 uint32_t additional_dosage_ct = 0; // missing hardcalls only; nonmales count twice
2021 // bugfix (12 Jul 2018): dosage_present may be null if dosage_ct
2022 // == 0
2023 if (pgv.dosage_ct) {
2024 uintptr_t sample_uidx_base = 0;
2025 uintptr_t dosage_present_bits = pgv.dosage_present[0];
2026 if (sample_ct == raw_sample_ct) {
2027 for (uint32_t dosage_idx = 0; dosage_idx != pgv.dosage_ct; ++dosage_idx) {
2028 const uintptr_t sample_uidx = BitIter1(pgv.dosage_present, &sample_uidx_base, &dosage_present_bits);
2029 const uintptr_t cur_dosage_val = pgv.dosage_main[dosage_idx];
2030 const uintptr_t sex_multiplier = 2 - IsSet(sex_male, sample_uidx);
2031 alt1_ddosage += cur_dosage_val * sex_multiplier;
2032
2033 // could call GenoarrCountSubsetIntersectFreqs() twice
2034 // instead, but since we've already manually extracted the
2035 // sex bit it probably doesn't help?
2036 const uintptr_t hardcall_code = GetNyparrEntry(pgv.genovec, sample_uidx);
2037 if (hardcall_code != 3) {
2038 alt1_ct -= hardcall_code * sex_multiplier;
2039 } else {
2040 additional_dosage_ct += sex_multiplier;
2041 }
2042 }
2043 } else {
2044 for (uint32_t dosage_idx = 0; dosage_idx != pgv.dosage_ct; ++dosage_idx) {
2045 const uintptr_t sample_uidx = BitIter1(pgv.dosage_present, &sample_uidx_base, &dosage_present_bits);
2046 if (IsSet(sample_include, sample_uidx)) {
2047 const uintptr_t cur_dosage_val = pgv.dosage_main[dosage_idx];
2048 const uintptr_t sex_multiplier = 2 - IsSet(sex_male, sample_uidx);
2049 alt1_ddosage += cur_dosage_val * sex_multiplier;
2050 const uintptr_t hardcall_code = GetNyparrEntry(pgv.genovec, sample_uidx);
2051 if (hardcall_code != 3) {
2052 alt1_ct -= hardcall_code * sex_multiplier;
2053 } else {
2054 additional_dosage_ct += sex_multiplier;
2055 }
2056 }
2057 }
2058 }
2059 }
2060 alt1_ddosage += alt1_ct * S_CAST(uint64_t, kDosageMid);
2061
2062 // bugfix (14 May 2018): this didn't correctly distinguish
2063 // between missing vs. 'replaced' hardcalls
2064 const uintptr_t weighted_obs_ct = (2 * (sample_ct - genocounts[3]) - male_ct + sex_specific_genocounts[3] + additional_dosage_ct) * (2 * k1LU);
2065
2066 allele_ddosages[cur_allele_idx_offset] = weighted_obs_ct * S_CAST(uint64_t, kDosageMid) - alt1_ddosage;
2067 allele_ddosages[cur_allele_idx_offset + 1] = alt1_ddosage;
2068 }
2069 if (x_male_geno_cts) {
2070 STD_ARRAY_REF(uint32_t, 3) cur_x_male_geno_cts = x_male_geno_cts[variant_uidx - x_start];
2071 cur_x_male_geno_cts[0] = sex_specific_genocounts[0];
2072 cur_x_male_geno_cts[1] = sex_specific_genocounts[1];
2073 cur_x_male_geno_cts[2] = sex_specific_genocounts[2];
2074 if (x_nosex_geno_cts) {
2075 GenoarrCountSubsetFreqs(pgv.genovec, nosex_interleaved_vec, raw_sample_ct, nosex_ct, sex_specific_genocounts);
2076 STD_ARRAY_REF(uint32_t, 3) cur_nosex_geno_cts = x_nosex_geno_cts[variant_uidx - x_start];
2077 cur_nosex_geno_cts[0] = sex_specific_genocounts[0];
2078 cur_nosex_geno_cts[1] = sex_specific_genocounts[1];
2079 cur_nosex_geno_cts[2] = sex_specific_genocounts[2];
2080 }
2081 }
2082 }
2083 if (variant_missing_dosage_cts) {
2084 uint32_t missing_dosage_ct;
2085 if (!is_x_or_y) {
2086 missing_dosage_ct = sample_ct - ((cur_dosages[0] + cur_dosages[1]) / kDosageMax);
2087 } else if (is_y) {
2088 missing_dosage_ct = male_ct - ((cur_dosages[0] + cur_dosages[1]) / kDosageMax);
2089 } else {
2090 if (pgv.dosage_ct) {
2091 ZeroTrailingNyps(raw_sample_ct, pgv.genovec);
2092 missing_dosage_ct = GenoarrCountMissingInvsubsetUnsafe(pgv.genovec, pgv.dosage_present, raw_sample_ct);
2093 } else {
2094 missing_dosage_ct = genocounts[3];
2095 }
2096 }
2097 variant_missing_dosage_cts[variant_uidx] = missing_dosage_ct;
2098 }
2099 } else {
2100 // multiallelic cases
2101 if (!is_x_or_y) {
2102 reterr = PgrGetMDCounts(sample_include, sample_include_interleaved_vec, pssi, sample_ct, variant_uidx, is_minimac3_r2, pgrp, imp_r2_vals? (&(imp_r2_vals[variant_uidx])) : nullptr, &hethap_ct, genocounts, all_dosages);
2103 if (unlikely(reterr)) {
2104 ctx->reterr = reterr;
2105 break;
2106 }
2107 if (allele_presents_bytearr) {
2108 for (uintptr_t aidx = 0; aidx != allele_ct; ++aidx) {
2109 if (all_dosages[aidx]) {
2110 allele_presents_bytearr[cur_allele_idx_offset + aidx] = 128;
2111 }
2112 }
2113 }
2114 if (!is_nonxy_haploid) {
2115 hethap_ct = 0;
2116 if (allele_ddosages) {
2117 for (uintptr_t aidx = 0; aidx != allele_ct; ++aidx) {
2118 allele_ddosages[cur_allele_idx_offset + aidx] = all_dosages[aidx] * 2;
2119 }
2120 }
2121 } else {
2122 if (imp_r2_vals && (!is_minimac3_r2)) {
2123 imp_r2_vals[variant_uidx] *= 0.5;
2124 }
2125 if (allele_ddosages) {
2126 memcpy(&(allele_ddosages[cur_allele_idx_offset]), all_dosages, allele_ct * sizeof(int64_t));
2127 }
2128 }
2129 } else if (is_y) {
2130 if ((!allele_presents_bytearr) || (sample_ct == male_ct)) {
2131 reterr = PgrGetMDCounts(sex_male, sex_male_interleaved_vec, pssi, male_ct, variant_uidx, 0, pgrp, imp_r2_vals? (&(imp_r2_vals[variant_uidx])) : nullptr, &hethap_ct, genocounts, all_dosages);
2132 if (unlikely(reterr)) {
2133 ctx->reterr = reterr;
2134 break;
2135 }
2136 if (imp_r2_vals && (!is_minimac3_r2)) {
2137 imp_r2_vals[variant_uidx] *= 0.5;
2138 }
2139 if (allele_presents_bytearr) {
2140 for (uintptr_t aidx = 0; aidx != allele_ct; ++aidx) {
2141 if (all_dosages[aidx]) {
2142 allele_presents_bytearr[cur_allele_idx_offset + aidx] = 128;
2143 }
2144 }
2145 }
2146 if (allele_ddosages) {
2147 memcpy(&(allele_ddosages[cur_allele_idx_offset]), all_dosages, allele_ct * sizeof(int64_t));
2148 }
2149 } else {
2150 // need to count female/unknown-sex for allele_presents and
2151 // ignore elsewhere
2152 reterr = PgrGetM(nullptr, pssi, raw_sample_ct, variant_uidx, pgrp, &pgv);
2153 if (unlikely(reterr)) {
2154 ctx->reterr = reterr;
2155 break;
2156 }
2157 // possible todo: use a specialized function which just checks
2158 // which alleles exist
2159 ZeroTrailingNyps(raw_sample_ct, pgv.genovec);
2160 GetMFlatCounts64(sample_include, sample_include_interleaved_vec, &pgv, raw_sample_ct, sample_ct, allele_ct, genocounts, all_dosages);
2161 for (uintptr_t aidx = 0; aidx != allele_ct; ++aidx) {
2162 if (all_dosages[aidx]) {
2163 allele_presents_bytearr[cur_allele_idx_offset + aidx] = 128;
2164 }
2165 }
2166
2167 uint64_t* two_cts = &(all_dosages[allele_ct]);
2168 GetMCounts64(sex_male, sex_male_interleaved_vec, &pgv, raw_sample_ct, male_ct, allele_ct, genocounts, all_dosages, two_cts);
2169 uintptr_t hethap_x2 = 0;
2170 for (uint32_t aidx = 0; aidx != allele_ct; ++aidx) {
2171 hethap_x2 += all_dosages[aidx];
2172 }
2173 hethap_ct = hethap_x2 / 2;
2174 if (allele_ddosages) {
2175 for (uintptr_t aidx = 0; aidx != allele_ct; ++aidx) {
2176 allele_ddosages[cur_allele_idx_offset + aidx] = all_dosages[aidx] * kDosageMid + two_cts[aidx] * kDosageMax;
2177 }
2178 }
2179 if (imp_r2_vals) {
2180 for (uint32_t aidx = 0; aidx != allele_ct; ++aidx) {
2181 const uint64_t one_ct = allele_ddosages[aidx];
2182 const uint64_t two_ct = two_cts[aidx];
2183 // now sums
2184 allele_ddosages[aidx] = one_ct * kDosageMid + two_ct * kDosageMax;
2185 // now ssqs
2186 two_cts[aidx] = one_ct * kDosageMid * kDosageMid + two_ct * kDosageMax * kDosageMax;
2187 }
2188 imp_r2_vals[variant_uidx] = 0.5 * MultiallelicDiploidMachR2(all_dosages, two_cts, male_ct - genocounts[3], allele_ct);
2189 }
2190 }
2191 } else {
2192 // chrX
2193 // multiallelic dosages not supported yet
2194 reterr = PgrGetM(nullptr, pssi, raw_sample_ct, variant_uidx, pgrp, &pgv);
2195 if (unlikely(reterr)) {
2196 ctx->reterr = reterr;
2197 break;
2198 }
2199 ZeroTrailingNyps(raw_sample_ct, pgv.genovec);
2200 // We don't attempt to compute imp_r2 on chrX, so flat counts are
2201 // fine.
2202 GetMFlatCounts64(sample_include, sample_include_interleaved_vec, &pgv, raw_sample_ct, sample_ct, allele_ct, genocounts, all_dosages);
2203
2204 // Double all counts, then subtract male counts.
2205 for (uint32_t aidx = 0; aidx != allele_ct; ++aidx) {
2206 all_dosages[aidx] *= 2;
2207 }
2208 GenoarrCountSubsetFreqs(pgv.genovec, sex_male_interleaved_vec, raw_sample_ct, male_ct, sex_specific_genocounts);
2209 hethap_ct = sex_specific_genocounts[1];
2210 if (male_ct) {
2211 all_dosages[0] -= 2 * sex_specific_genocounts[0] + hethap_ct;
2212
2213 // may underflow
2214 all_dosages[1] -= 2 * sex_specific_genocounts[2] + hethap_ct;
2215
2216 if (pgv.patch_01_ct) {
2217 uintptr_t sample_widx = 0;
2218 uintptr_t patch_01_bits = pgv.patch_01_set[0];
2219 uint32_t male_patch_01_ct = 0;
2220 for (uint32_t uii = 0; uii != pgv.patch_01_ct; ++uii) {
2221 const uintptr_t lowbit = BitIter1y(pgv.patch_01_set, &sample_widx, &patch_01_bits);
2222 if (sex_male[sample_widx] & lowbit) {
2223 ++male_patch_01_ct;
2224 all_dosages[pgv.patch_01_vals[uii]] -= 1;
2225 }
2226 }
2227 all_dosages[1] += male_patch_01_ct;
2228 }
2229 if (pgv.patch_10_ct) {
2230 uintptr_t sample_widx = 0;
2231 uintptr_t patch_10_bits = pgv.patch_10_set[0];
2232 uint32_t male_patch_10_ct = 0;
2233 for (uint32_t uii = 0; uii != pgv.patch_10_ct; ++uii) {
2234 const uintptr_t lowbit = BitIter1y(pgv.patch_10_set, &sample_widx, &patch_10_bits);
2235 if (sex_male[sample_widx] & lowbit) {
2236 ++male_patch_10_ct;
2237 const AlleleCode code_lo = pgv.patch_10_vals[2 * uii];
2238 const AlleleCode code_hi = pgv.patch_10_vals[2 * uii + 1];
2239 all_dosages[code_lo] -= 1;
2240 all_dosages[code_hi] -= 1;
2241 hethap_ct += (code_lo != code_hi);
2242 }
2243 }
2244 all_dosages[1] += male_patch_10_ct * 2;
2245 }
2246 }
2247 if (allele_presents_bytearr) {
2248 for (uintptr_t allele_idx = 0; allele_idx != allele_ct; ++allele_idx) {
2249 if (all_dosages[allele_idx]) {
2250 allele_presents_bytearr[cur_allele_idx_offset + allele_idx] = 128;
2251 }
2252 }
2253 }
2254 if (allele_ddosages) {
2255 for (uintptr_t aidx = 0; aidx != allele_ct; ++aidx) {
2256 allele_ddosages[cur_allele_idx_offset + aidx] = all_dosages[aidx] * kDosageMid;
2257 }
2258 }
2259 if (x_male_geno_cts) {
2260 STD_ARRAY_REF(uint32_t, 3) cur_x_male_geno_cts = x_male_geno_cts[variant_uidx - x_start];
2261 cur_x_male_geno_cts[0] = sex_specific_genocounts[0];
2262 cur_x_male_geno_cts[1] = sex_specific_genocounts[1];
2263 cur_x_male_geno_cts[2] = sex_specific_genocounts[2];
2264 if (x_nosex_geno_cts) {
2265 GenoarrCountSubsetFreqs(pgv.genovec, nosex_interleaved_vec, raw_sample_ct, nosex_ct, sex_specific_genocounts);
2266 STD_ARRAY_REF(uint32_t, 3) cur_nosex_geno_cts = x_nosex_geno_cts[variant_uidx - x_start];
2267 cur_nosex_geno_cts[0] = sex_specific_genocounts[0];
2268 cur_nosex_geno_cts[1] = sex_specific_genocounts[1];
2269 cur_nosex_geno_cts[2] = sex_specific_genocounts[2];
2270 }
2271 }
2272 }
2273 if (variant_missing_dosage_cts) {
2274 // multiallelic dosage not supported yet
2275 variant_missing_dosage_cts[variant_uidx] = genocounts[3];
2276 }
2277 }
2278 if (raw_geno_cts) {
2279 STD_ARRAY_REF(uint32_t, 3) cur_raw_geno_cts = raw_geno_cts[variant_uidx];
2280 cur_raw_geno_cts[0] = genocounts[0];
2281 cur_raw_geno_cts[1] = genocounts[1];
2282 cur_raw_geno_cts[2] = genocounts[2];
2283 }
2284 if (variant_missing_hc_cts) {
2285 variant_missing_hc_cts[variant_uidx] = genocounts[3];
2286 if (variant_hethap_cts && (variant_uidx >= first_hap_uidx)) {
2287 variant_hethap_cts[variant_uidx - first_hap_uidx] = hethap_ct;
2288 }
2289 }
2290 }
2291 if ((++subset_idx == subset_ct) || reterr) {
2292 break;
2293 }
2294 sample_include = ctx->founder_info;
2295 sample_include_interleaved_vec = ctx->founder_info_interleaved_vec;
2296 sample_include_cumulative_popcounts = ctx->founder_info_cumulative_popcounts;
2297 sex_male = ctx->founder_male;
2298 sex_male_interleaved_vec = ctx->founder_male_interleaved_vec;
2299 sex_male_cumulative_popcounts = ctx->founder_male_cumulative_popcounts;
2300
2301 nosex_interleaved_vec = ctx->founder_nosex_interleaved_vec;
2302
2303 sample_ct = ctx->founder_ct;
2304 male_ct = ctx->founder_male_ct;
2305 nosex_ct = ctx->founder_nosex_ct;
2306 allele_presents_bytearr = nullptr;
2307 allele_ddosages = ctx->founder_allele_ddosages;
2308 variant_missing_hc_cts = nullptr;
2309 variant_missing_dosage_cts = nullptr;
2310 raw_geno_cts = ctx->founder_raw_geno_cts;
2311 x_male_geno_cts = ctx->founder_x_male_geno_cts;
2312 x_nosex_geno_cts = ctx->founder_x_nosex_geno_cts;
2313 imp_r2_vals = nullptr;
2314 }
2315 } while (!THREAD_BLOCK_FINISH(arg));
2316 THREAD_RETURN;
2317 }
2318
2319 PglErr LoadAlleleAndGenoCounts(const uintptr_t* sample_include, const uintptr_t* founder_info, const uintptr_t* sex_nm, const uintptr_t* sex_male, const uintptr_t* variant_include, const ChrInfo* cip, const uintptr_t* allele_idx_offsets, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t founder_ct, uint32_t male_ct, uint32_t nosex_ct, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t first_hap_uidx, uint32_t is_minimac3_r2, uint32_t max_thread_ct, uintptr_t pgr_alloc_cacheline_ct, PgenFileInfo* pgfip, uintptr_t* allele_presents, uint64_t* allele_ddosages, uint64_t* founder_allele_ddosages, uint32_t* variant_missing_hc_cts, uint32_t* variant_missing_dosage_cts, uint32_t* variant_hethap_cts, STD_ARRAY_PTR_DECL(uint32_t, 3, raw_geno_cts), STD_ARRAY_PTR_DECL(uint32_t, 3, founder_raw_geno_cts), STD_ARRAY_PTR_DECL(uint32_t, 3, x_male_geno_cts), STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_male_geno_cts), STD_ARRAY_PTR_DECL(uint32_t, 3, x_nosex_geno_cts), STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_nosex_geno_cts), double* imp_r2_vals) {
2320 unsigned char* bigstack_mark = g_bigstack_base;
2321 unsigned char* bigstack_end_mark = g_bigstack_end;
2322 PglErr reterr = kPglRetSuccess;
2323 ThreadGroup tg;
2324 PreinitThreads(&tg);
2325 LoadAlleleAndGenoCountsCtx ctx;
2326 {
2327 if (!variant_ct) {
2328 goto LoadAlleleAndGenoCounts_ret_1;
2329 }
2330
2331 // four cases:
2332 // 1. allele_ddosages, raw_geno_cts, and/or variant_missing_{hc,dosage}_cts
2333 // required, and that's it
2334 // 2. founder_allele_ddosages and/or founder_raw_geno_cts required, and
2335 // that's it
2336 // 3. both required, and founder_ct != sample_ct.
2337 // 4. both required, and founder_ct == sample_ct. caller is expected to
2338 // make founder_allele_ddosages and allele_ddosages point to the same
2339 // memory, ditto for founder_raw_geno_cts/raw_geno_cts.
2340 const uint32_t only_founder_cts_required = (!allele_presents) && (!allele_ddosages) && (!raw_geno_cts) && (!variant_missing_hc_cts) && (!variant_missing_dosage_cts);
2341 const uint32_t two_subsets_required = (founder_ct != sample_ct) && (!only_founder_cts_required) && (founder_allele_ddosages || founder_raw_geno_cts);
2342 ctx.cip = cip;
2343 ctx.sample_include = only_founder_cts_required? founder_info : sample_include;
2344 ctx.raw_sample_ct = raw_sample_ct;
2345 ctx.sample_ct = only_founder_cts_required? founder_ct : sample_ct;
2346 ctx.male_ct = male_ct;
2347 ctx.allele_ddosages = only_founder_cts_required? founder_allele_ddosages : allele_ddosages;
2348 ctx.raw_geno_cts = only_founder_cts_required? founder_raw_geno_cts : raw_geno_cts;
2349 ctx.x_male_geno_cts = only_founder_cts_required? founder_x_male_geno_cts : x_male_geno_cts;
2350 ctx.x_nosex_geno_cts = only_founder_cts_required? founder_x_nosex_geno_cts : x_nosex_geno_cts;
2351 ctx.imp_r2_vals = imp_r2_vals;
2352 const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
2353 const uint32_t raw_sample_ctv = BitCtToVecCt(raw_sample_ct);
2354 if (unlikely(
2355 bigstack_alloc_w(raw_sample_ctv * kWordsPerVec, &ctx.sample_include_interleaved_vec) ||
2356 bigstack_alloc_u32(raw_sample_ctl, &ctx.sample_include_cumulative_popcounts) ||
2357 bigstack_alloc_w(raw_sample_ctv * kWordsPerVec, &ctx.sex_male_interleaved_vec) ||
2358 bigstack_alloc_u32(raw_sample_ctl, &ctx.sex_male_cumulative_popcounts))) {
2359 goto LoadAlleleAndGenoCounts_ret_NOMEM;
2360 }
2361 FillInterleavedMaskVec(ctx.sample_include, raw_sample_ctv, ctx.sample_include_interleaved_vec);
2362 FillCumulativePopcounts(ctx.sample_include, raw_sample_ctl, ctx.sample_include_cumulative_popcounts);
2363 if ((founder_ct == sample_ct) || (!only_founder_cts_required)) {
2364 ctx.sex_male = sex_male;
2365 } else {
2366 // no nonfounder counts required
2367 uintptr_t* new_sex_male;
2368 if (unlikely(bigstack_alloc_w(raw_sample_ctl, &new_sex_male))) {
2369 goto LoadAlleleAndGenoCounts_ret_NOMEM;
2370 }
2371 BitvecAndCopy(sex_male, founder_info, raw_sample_ctl, new_sex_male);
2372 ZeroTrailingWords(raw_sample_ctl, new_sex_male);
2373 ctx.sex_male = new_sex_male;
2374 }
2375 FillInterleavedMaskVec(ctx.sex_male, raw_sample_ctv, ctx.sex_male_interleaved_vec);
2376 FillCumulativePopcounts(ctx.sex_male, raw_sample_ctl, ctx.sex_male_cumulative_popcounts);
2377 if (!(x_nosex_geno_cts || founder_x_nosex_geno_cts)) {
2378 nosex_ct = 0;
2379 }
2380 ctx.nosex_ct = nosex_ct;
2381 ctx.nosex_interleaved_vec = nullptr;
2382 uintptr_t* nosex_buf = nullptr;
2383 if (nosex_ct) {
2384 if (unlikely(
2385 bigstack_end_alloc_w(raw_sample_ctl, &nosex_buf) ||
2386 bigstack_alloc_w(raw_sample_ctv * kWordsPerVec, &ctx.nosex_interleaved_vec))) {
2387 goto LoadAlleleAndGenoCounts_ret_NOMEM;
2388 }
2389 BitvecInvmaskCopy(ctx.sample_include, sex_nm, raw_sample_ctl, nosex_buf);
2390 ZeroTrailingWords(raw_sample_ctl, nosex_buf);
2391 FillInterleavedMaskVec(nosex_buf, raw_sample_ctv, ctx.nosex_interleaved_vec);
2392 }
2393
2394 ctx.variant_missing_hc_cts = variant_missing_hc_cts;
2395 ctx.variant_missing_dosage_cts = variant_missing_dosage_cts;
2396 ctx.variant_hethap_cts = variant_hethap_cts;
2397 ctx.first_hap_uidx = first_hap_uidx;
2398 ctx.is_minimac3_r2 = is_minimac3_r2;
2399
2400 ctx.founder_info = nullptr;
2401 ctx.founder_info_interleaved_vec = nullptr;
2402 ctx.founder_info_cumulative_popcounts = nullptr;
2403 ctx.founder_male = nullptr;
2404 ctx.founder_male_interleaved_vec = nullptr;
2405 ctx.founder_male_cumulative_popcounts = nullptr;
2406 ctx.founder_nosex_interleaved_vec = nullptr;
2407 ctx.founder_ct = 0;
2408 ctx.founder_male_ct = 0;
2409 ctx.founder_nosex_ct = 0;
2410 ctx.founder_allele_ddosages = nullptr;
2411 ctx.founder_raw_geno_cts = nullptr;
2412 ctx.founder_x_male_geno_cts = nullptr;
2413 ctx.founder_x_nosex_geno_cts = nullptr;
2414 if (two_subsets_required) {
2415 if (founder_ct) {
2416 ctx.founder_info = founder_info;
2417 if (unlikely(
2418 bigstack_alloc_w(raw_sample_ctv * kWordsPerVec, &ctx.founder_info_interleaved_vec) ||
2419 bigstack_alloc_u32(raw_sample_ctl, &ctx.founder_info_cumulative_popcounts) ||
2420 bigstack_alloc_w(raw_sample_ctl, &ctx.founder_male) ||
2421 bigstack_alloc_w(raw_sample_ctv * kWordsPerVec, &ctx.founder_male_interleaved_vec) ||
2422 bigstack_alloc_u32(raw_sample_ctl, &ctx.founder_male_cumulative_popcounts))) {
2423 goto LoadAlleleAndGenoCounts_ret_NOMEM;
2424 }
2425 FillInterleavedMaskVec(founder_info, raw_sample_ctv, ctx.founder_info_interleaved_vec);
2426 FillCumulativePopcounts(founder_info, raw_sample_ctl, ctx.founder_info_cumulative_popcounts);
2427 BitvecAndCopy(sex_male, founder_info, raw_sample_ctl, ctx.founder_male);
2428 ZeroTrailingWords(raw_sample_ctl, ctx.founder_male);
2429 FillInterleavedMaskVec(ctx.founder_male, raw_sample_ctv, ctx.founder_male_interleaved_vec);
2430 FillCumulativePopcounts(ctx.founder_male, raw_sample_ctl, ctx.founder_male_cumulative_popcounts);
2431 ctx.founder_ct = founder_ct;
2432 ctx.founder_male_ct = ctx.founder_male_cumulative_popcounts[raw_sample_ctl - 1] + PopcountWord(ctx.founder_male[raw_sample_ctl - 1]);
2433 ctx.founder_allele_ddosages = founder_allele_ddosages;
2434 ctx.founder_raw_geno_cts = founder_raw_geno_cts;
2435 ctx.founder_x_male_geno_cts = founder_x_male_geno_cts;
2436 if (nosex_ct) {
2437 // caller currently responsible for ensuring that when
2438 // founder_nosex_ct is zero, founder_x_nosex_geno_cts ==
2439 // nullptr
2440 if (unlikely(bigstack_alloc_w(raw_sample_ctv * kWordsPerVec, &ctx.founder_nosex_interleaved_vec))) {
2441 goto LoadAlleleAndGenoCounts_ret_NOMEM;
2442 }
2443 BitvecAnd(founder_info, raw_sample_ctl, nosex_buf);
2444 ctx.founder_nosex_ct = PopcountWords(nosex_buf, raw_sample_ctl);
2445 assert(ctx.founder_nosex_ct);
2446 ZeroTrailingWords(raw_sample_ctl, nosex_buf);
2447 FillInterleavedMaskVec(nosex_buf, raw_sample_ctv, ctx.founder_nosex_interleaved_vec);
2448 ctx.founder_x_nosex_geno_cts = founder_x_nosex_geno_cts;
2449 }
2450 } else {
2451 if (founder_allele_ddosages) {
2452 ZeroU64Arr(allele_idx_offsets? allele_idx_offsets[raw_variant_ct] : (2 * raw_variant_ct), founder_allele_ddosages);
2453 }
2454 if (founder_raw_geno_cts) {
2455 memset(founder_raw_geno_cts, 0, raw_variant_ct * (3 * sizeof(int32_t)));
2456 }
2457 }
2458 } else if (founder_ct == sample_ct) {
2459 // bugfix: some founder and some nonfounder counts required
2460 if ((!ctx.allele_ddosages) && founder_allele_ddosages) {
2461 ctx.allele_ddosages = founder_allele_ddosages;
2462 }
2463 if ((!ctx.raw_geno_cts) && founder_raw_geno_cts) {
2464 ctx.raw_geno_cts = founder_raw_geno_cts;
2465 }
2466 if ((!ctx.x_male_geno_cts) && founder_x_male_geno_cts) {
2467 ctx.x_male_geno_cts = founder_x_male_geno_cts;
2468 }
2469 if ((!ctx.x_nosex_geno_cts) && founder_x_nosex_geno_cts) {
2470 ctx.x_nosex_geno_cts = founder_x_nosex_geno_cts;
2471 }
2472 } else if (only_founder_cts_required) {
2473 ctx.male_ct = ctx.sex_male_cumulative_popcounts[raw_sample_ctl - 1] + PopcountWord(ctx.sex_male[raw_sample_ctl - 1]);
2474 if (nosex_ct) {
2475 ctx.nosex_ct = PopcountWords(nosex_buf, raw_sample_ctl);
2476 }
2477 }
2478 const uintptr_t raw_allele_ct = allele_idx_offsets? allele_idx_offsets[raw_variant_ct] : (2 * raw_variant_ct);
2479 if (!ctx.sample_ct) {
2480 if (allele_presents) {
2481 ZeroWArr(BitCtToWordCt(raw_allele_ct), allele_presents);
2482 }
2483 if (ctx.allele_ddosages) {
2484 ZeroU64Arr(raw_allele_ct, ctx.allele_ddosages);
2485 }
2486 if (ctx.raw_geno_cts) {
2487 memset(ctx.raw_geno_cts, 0, raw_variant_ct * (3 * sizeof(int32_t)));
2488 }
2489 // early exit
2490 goto LoadAlleleAndGenoCounts_ret_1;
2491 }
2492 BigstackEndReset(bigstack_end_mark); // free nosex_buf
2493 if (allele_presents) {
2494 const uintptr_t raw_allele_ct_a64 = RoundUpPow2(raw_allele_ct, kCacheline);
2495 if (unlikely(bigstack_left() < raw_allele_ct_a64)) {
2496 goto LoadAlleleAndGenoCounts_ret_NOMEM;
2497 }
2498 // fill byte-array instead of bitarray so multithreading works
2499 ctx.allele_presents_bytearr = S_CAST(unsigned char*, bigstack_alloc_raw(raw_allele_ct_a64));
2500 memset(ctx.allele_presents_bytearr, 0, raw_allele_ct_a64);
2501 } else {
2502 ctx.allele_presents_bytearr = nullptr;
2503 }
2504
2505 uint32_t unused_chr_code;
2506 uint32_t unused_chr_code2;
2507 uint32_t xy_complications_present = ((allele_presents || allele_ddosages || founder_allele_ddosages || variant_missing_dosage_cts) && XymtExists(cip, kChrOffsetX, &unused_chr_code)) || (allele_presents && (sample_ct != male_ct) && XymtExists(cip, kChrOffsetY, &unused_chr_code2));
2508 const uint32_t xy_dosages_needed = (pgfip->gflags & kfPgenGlobalDosagePresent) && xy_complications_present;
2509
2510 // todo: check when this saturates
2511 uint32_t calc_thread_ct = (max_thread_ct > 2)? (max_thread_ct - 1) : max_thread_ct;
2512 const uint32_t max_allele_ct = pgfip->max_allele_ct;
2513 uint32_t mhc_needed = 0;
2514 ctx.thread_read_mhc = nullptr;
2515 if ((max_allele_ct > 2) && (variant_hethap_cts || allele_presents || allele_ddosages || founder_allele_ddosages || imp_r2_vals)) {
2516 if (unlikely(
2517 bigstack_alloc_u64p(calc_thread_ct, &ctx.all_dosages))) {
2518 goto LoadAlleleAndGenoCounts_ret_NOMEM;
2519 }
2520 mhc_needed = (xy_complications_present || ((variant_hethap_cts || imp_r2_vals) && XymtExists(cip, kChrOffsetX, &unused_chr_code)));
2521 for (uint32_t tidx = 0; tidx != calc_thread_ct; ++tidx) {
2522 // double allocation size, to leave room for chrY ssqs
2523 if (unlikely(
2524 bigstack_alloc_u64(max_allele_ct * 2, &(ctx.all_dosages[tidx])))) {
2525 goto LoadAlleleAndGenoCounts_ret_NOMEM;
2526 }
2527 }
2528 } else {
2529 ctx.all_dosages = nullptr;
2530 }
2531 STD_ARRAY_DECL(unsigned char*, 2, main_loadbufs);
2532 // defensive
2533 ctx.dosage_presents = nullptr;
2534 ctx.dosage_mains = nullptr;
2535 uint32_t read_block_size;
2536 // todo: check if raw_sample_ct should be replaced with sample_ct here
2537 if (unlikely(PgenMtLoadInit(variant_include, raw_sample_ct, variant_ct, bigstack_left(), pgr_alloc_cacheline_ct, 0, 0, 0, pgfip, &calc_thread_ct, &ctx.genovecs, mhc_needed? (&ctx.thread_read_mhc) : nullptr, nullptr, nullptr, xy_dosages_needed? (&ctx.dosage_presents) : nullptr, xy_dosages_needed? (&ctx.dosage_mains) : nullptr, nullptr, nullptr, &read_block_size, nullptr, main_loadbufs, &ctx.pgr_ptrs, &ctx.read_variant_uidx_starts))) {
2538 goto LoadAlleleAndGenoCounts_ret_NOMEM;
2539 }
2540 if (unlikely(SetThreadCt(calc_thread_ct, &tg))) {
2541 goto LoadAlleleAndGenoCounts_ret_NOMEM;
2542 }
2543 ctx.variant_include = variant_include;
2544 ctx.allele_idx_offsets = allele_idx_offsets;
2545 ctx.reterr = kPglRetSuccess;
2546 SetThreadFuncAndData(LoadAlleleAndGenoCountsThread, &ctx, &tg);
2547
2548 logputs("Calculating allele frequencies... ");
2549 fputs("0%", stdout);
2550 fflush(stdout);
2551 uint32_t pct = 0;
2552
2553 uint32_t parity = 0;
2554 uint32_t read_block_idx = 0;
2555 uint32_t next_print_variant_idx = variant_ct / 100;
2556 for (uint32_t variant_idx = 0; ; ) {
2557 const uint32_t cur_block_size = MultireadNonempty(variant_include, &tg, raw_variant_ct, read_block_size, pgfip, &read_block_idx, &reterr);
2558 if (unlikely(reterr)) {
2559 goto LoadAlleleAndGenoCounts_ret_PGR_FAIL;
2560 }
2561 if (variant_idx) {
2562 JoinThreads(&tg);
2563 reterr = ctx.reterr;
2564 if (unlikely(reterr)) {
2565 goto LoadAlleleAndGenoCounts_ret_PGR_FAIL;
2566 }
2567 }
2568 if (!IsLastBlock(&tg)) {
2569 ctx.cur_block_size = cur_block_size;
2570 ComputeUidxStartPartition(variant_include, cur_block_size, calc_thread_ct, read_block_idx * read_block_size, ctx.read_variant_uidx_starts);
2571 PgrCopyBaseAndOffset(pgfip, calc_thread_ct, ctx.pgr_ptrs);
2572 if (variant_idx + cur_block_size == variant_ct) {
2573 DeclareLastThreadBlock(&tg);
2574 }
2575 if (unlikely(SpawnThreads(&tg))) {
2576 goto LoadAlleleAndGenoCounts_ret_THREAD_CREATE_FAIL;
2577 }
2578 }
2579
2580 parity = 1 - parity;
2581 if (variant_idx == variant_ct) {
2582 break;
2583 }
2584 if (variant_idx >= next_print_variant_idx) {
2585 if (pct > 10) {
2586 putc_unlocked('\b', stdout);
2587 }
2588 pct = (variant_idx * 100LLU) / variant_ct;
2589 printf("\b\b%u%%", pct++);
2590 fflush(stdout);
2591 next_print_variant_idx = (pct * S_CAST(uint64_t, variant_ct)) / 100;
2592 }
2593
2594 ++read_block_idx;
2595 variant_idx += cur_block_size;
2596 // crucially, this is independent of the PgenReader block_base
2597 // pointers
2598 pgfip->block_base = main_loadbufs[parity];
2599 }
2600 if (allele_presents) {
2601 const uintptr_t raw_allele_ctl = BitCtToWordCt(raw_allele_ct);
2602 allele_presents[raw_allele_ctl - 1] = 0;
2603 #ifdef __LP64__
2604 const uintptr_t vec_ct = DivUp(raw_allele_ct, kBytesPerVec);
2605 VecUc* bytearr_alias = R_CAST(VecUc*, ctx.allele_presents_bytearr);
2606 Vec8thUint* allele_presents_alias = R_CAST(Vec8thUint*, allele_presents);
2607 for (uintptr_t vec_idx = 0; vec_idx != vec_ct; ++vec_idx) {
2608 allele_presents_alias[vec_idx] = vecuc_movemask(bytearr_alias[vec_idx]);
2609 }
2610 #else
2611 const uintptr_t twovec_ct = DivUp(raw_allele_ct, 8);
2612 uintptr_t* bytearr_iter = R_CAST(uintptr_t*, ctx.allele_presents_bytearr);
2613 unsigned char* allele_presents_iter = R_CAST(unsigned char*, allele_presents);
2614 unsigned char* allele_presents_stop = &(allele_presents_iter[twovec_ct]);
2615 for (; allele_presents_iter != allele_presents_stop; ++allele_presents_iter) {
2616 // 31,23,15,7 -> 3,2,1,0: multiply by number with bits 0,7,14,21 set,
2617 // then right-shift
2618 uintptr_t cur_word = ((*bytearr_iter++) * 0x204081) >> 28;
2619 cur_word |= ((*bytearr_iter++) * 0x204081) >> 24;
2620 *allele_presents_iter = cur_word;
2621 }
2622 #endif
2623 }
2624 if (pct > 10) {
2625 putc_unlocked('\b', stdout);
2626 }
2627 fputs("\b\b", stdout);
2628 logputs("done.\n");
2629 }
2630 while (0) {
2631 LoadAlleleAndGenoCounts_ret_NOMEM:
2632 reterr = kPglRetNomem;
2633 break;
2634 LoadAlleleAndGenoCounts_ret_PGR_FAIL:
2635 PgenErrPrintN(reterr);
2636 break;
2637 LoadAlleleAndGenoCounts_ret_THREAD_CREATE_FAIL:
2638 reterr = kPglRetThreadCreateFail;
2639 break;
2640 }
2641 LoadAlleleAndGenoCounts_ret_1:
2642 CleanupThreads(&tg);
2643 BigstackDoubleReset(bigstack_mark, bigstack_end_mark);
2644 pgfip->block_base = nullptr;
2645 return reterr;
2646 }
2647
ApplyHardCallThresh(const uintptr_t * dosage_present,const Dosage * dosage_main,uint32_t dosage_ct,uint32_t hard_call_halfdist,uintptr_t * genovec)2648 void ApplyHardCallThresh(const uintptr_t* dosage_present, const Dosage* dosage_main, uint32_t dosage_ct, uint32_t hard_call_halfdist, uintptr_t* genovec) {
2649 uintptr_t sample_uidx_base = 0;
2650 uintptr_t cur_bits = dosage_present[0];
2651 for (uint32_t dosage_idx = 0; dosage_idx != dosage_ct; ++dosage_idx) {
2652 const uintptr_t sample_uidx = BitIter1(dosage_present, &sample_uidx_base, &cur_bits);
2653 const uint32_t dosage_int = dosage_main[dosage_idx];
2654 const uint32_t halfdist = BiallelicDosageHalfdist(dosage_int);
2655 const uintptr_t widx = sample_uidx / kBitsPerWordD2;
2656 uintptr_t prev_geno_word = genovec[widx];
2657 const uint32_t shift = (sample_uidx % kBitsPerWordD2) * 2;
2658 uintptr_t new_geno;
2659 if (halfdist < hard_call_halfdist) {
2660 new_geno = 3;
2661 } else {
2662 new_geno = (dosage_int + kDosage4th) / kDosageMid;
2663 }
2664 const uintptr_t prev_geno = (prev_geno_word >> shift) & 3;
2665 const uintptr_t geno_xor = new_geno ^ prev_geno;
2666 if (geno_xor) {
2667 genovec[widx] = prev_geno_word ^ (geno_xor << shift);
2668 }
2669 }
2670 }
2671
ApplyHardCallThreshPhased(const uintptr_t * dosage_present,const Dosage * dosage_main,uint32_t dosage_ct,uint32_t hard_call_halfdist,uintptr_t * genovec,uintptr_t * phasepresent,uintptr_t * phaseinfo,uintptr_t * dphase_present,SDosage * dphase_delta,SDosage * tmp_dphase_delta)2672 uint32_t ApplyHardCallThreshPhased(const uintptr_t* dosage_present, const Dosage* dosage_main, uint32_t dosage_ct, uint32_t hard_call_halfdist, uintptr_t* genovec, uintptr_t* phasepresent, uintptr_t* phaseinfo, uintptr_t* dphase_present, SDosage* dphase_delta, SDosage* tmp_dphase_delta) {
2673 // Generate new hphase values when we're converting a hardcall from
2674 // missing/hom to het, and abs(dphase_delta) > 0.5. Erase explicit dphase in
2675 // that case if dphase_delta is maximal.
2676 //
2677 // Erase hphase value when we're converting a hardcall from het to
2678 // missing/hom. If hardcall was previously phased and no explicit dphase
2679 // value existed, add it.
2680 //
2681 // Since both insertions and deletions are possible, we write the updated
2682 // dphase_delta to a buffer and copy it back, instead of editing in place.
2683 //
2684 // Returns final dphase_ct.
2685 //
2686 // Some extraneous phaseinfo bits may be set on return.
2687 const SDosage* dphase_read_iter = dphase_delta;
2688 SDosage* dphase_write_iter = tmp_dphase_delta;
2689 uintptr_t sample_uidx_base = 0;
2690 uintptr_t cur_bits = dosage_present[0];
2691 for (uint32_t dosage_idx = 0; dosage_idx != dosage_ct; ++dosage_idx) {
2692 const uintptr_t sample_uidx = BitIter1(dosage_present, &sample_uidx_base, &cur_bits);
2693 const uint32_t dosage_int = dosage_main[dosage_idx];
2694 const uint32_t halfdist = BiallelicDosageHalfdist(dosage_int);
2695 const uintptr_t widx = sample_uidx / kBitsPerWordD2;
2696 uintptr_t prev_geno_word = genovec[widx];
2697 const uint32_t shift = (sample_uidx % kBitsPerWordD2) * 2;
2698 uintptr_t new_geno;
2699 if (halfdist < hard_call_halfdist) {
2700 new_geno = 3;
2701 } else {
2702 new_geno = (dosage_int + kDosage4th) / kDosageMid;
2703 }
2704 const uintptr_t prev_geno = (prev_geno_word >> shift) & 3;
2705 const uintptr_t geno_xor = new_geno ^ prev_geno;
2706 const uint32_t cur_hphase_present = IsSet(phasepresent, sample_uidx);
2707 if (IsSet(dphase_present, sample_uidx)) {
2708 int32_t dphase_delta_val = *dphase_read_iter++;
2709 *dphase_write_iter++ = dphase_delta_val;
2710 if (geno_xor) {
2711 if (new_geno == 1) {
2712 const uint32_t neg_sign_bit = -(S_CAST(uint32_t, dphase_delta_val) >> 31);
2713 const uint32_t abs_dphase_delta_val = (S_CAST(uint32_t, dphase_delta_val) ^ neg_sign_bit) - neg_sign_bit;
2714 if (abs_dphase_delta_val > kDosage4th) {
2715 SetBit(sample_uidx, phasepresent);
2716 AssignBit(sample_uidx, neg_sign_bit + 1, phaseinfo);
2717 // is dphase_delta maximal?
2718 if ((abs_dphase_delta_val == dosage_int) || (abs_dphase_delta_val + dosage_int == kDosageMax)) {
2719 ClearBit(sample_uidx, dphase_present);
2720 --dphase_write_iter;
2721 }
2722 }
2723 } else {
2724 ClearBit(sample_uidx, phasepresent);
2725 }
2726 genovec[widx] = prev_geno_word ^ (geno_xor << shift);
2727 }
2728 } else {
2729 if (geno_xor) {
2730 if (cur_hphase_present) {
2731 assert(new_geno != 1);
2732 ClearBit(sample_uidx, phasepresent);
2733 SetBit(sample_uidx, dphase_present);
2734 int32_t new_dphase_delta_val = DosageHomdist(dosage_int);
2735 if (!IsSet(phaseinfo, sample_uidx)) {
2736 new_dphase_delta_val = -new_dphase_delta_val;
2737 }
2738 *dphase_write_iter++ = new_dphase_delta_val;
2739 }
2740 genovec[widx] = prev_geno_word ^ (geno_xor << shift);
2741 }
2742 }
2743 }
2744 const uint32_t dphase_ct = dphase_write_iter - tmp_dphase_delta;
2745 memcpy(dphase_delta, tmp_dphase_delta, dphase_ct * sizeof(Dosage));
2746 return dphase_ct;
2747 }
2748
2749 uintptr_t InitWriteAlleleIdxOffsets(const uintptr_t* variant_include, const uintptr_t* allele_idx_offsets, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uint32_t* new_variant_idx_to_old, uint32_t variant_ct, uintptr_t* new_allele_idx_offsets) {
2750 uintptr_t cur_offset = 0;
2751 if (allele_presents) {
2752 uint32_t ref_allele_idx = 0;
2753 uintptr_t variant_uidx_base = 0;
2754 uintptr_t cur_bits = 0;
2755 if (!new_variant_idx_to_old) {
2756 cur_bits = variant_include[0];
2757 }
2758 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
2759 uint32_t variant_uidx;
2760 if (new_variant_idx_to_old) {
2761 variant_uidx = new_variant_idx_to_old[variant_idx];
2762 } else {
2763 variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
2764 }
2765 new_allele_idx_offsets[variant_idx] = cur_offset;
2766 const uintptr_t old_offset_start = allele_idx_offsets[variant_uidx];
2767 const uintptr_t old_offset_end = allele_idx_offsets[variant_uidx + 1];
2768 uint32_t cur_allele_ct = old_offset_end - old_offset_start;
2769 if (cur_allele_ct > 2) {
2770 cur_allele_ct = PopcountBitRange(allele_presents, old_offset_start, old_offset_end);
2771 if (refalt1_select) {
2772 ref_allele_idx = refalt1_select[variant_uidx][0];
2773 }
2774 if (!IsSet(allele_presents, old_offset_start + ref_allele_idx)) {
2775 ++cur_allele_ct;
2776 }
2777 if (cur_allele_ct < 2) {
2778 cur_allele_ct = 2;
2779 }
2780 }
2781 cur_offset += cur_allele_ct;
2782 }
2783 } else if (!new_variant_idx_to_old) {
2784 uintptr_t variant_uidx_base = 0;
2785 uintptr_t cur_bits = variant_include[0];
2786 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
2787 const uintptr_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
2788 new_allele_idx_offsets[variant_idx] = cur_offset;
2789 cur_offset += allele_idx_offsets[variant_uidx + 1] - allele_idx_offsets[variant_uidx];
2790 }
2791 } else {
2792 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
2793 const uint32_t variant_uidx = new_variant_idx_to_old[variant_idx];
2794 new_allele_idx_offsets[variant_idx] = cur_offset;
2795 cur_offset += allele_idx_offsets[variant_uidx + 1] - allele_idx_offsets[variant_uidx];
2796 }
2797 }
2798 return cur_offset;
2799 }
2800
2801 // Join behavior:
2802 // - Require sorted input .pvar for now, though it won't be difficult to lift
2803 // this restriction later. Ok for input to contain multiallelic variants.
2804 // - Don't need to do anything different when chr:pos only appears once in a
2805 // biallelic variant, or in a multiallelic variant in "+any" mode. For
2806 // multiallelic variants in +both/+snps mode, error out if the variant is
2807 // mixed SNP/non-SNP. In +both case, also error out if the variant is mixed
2808 // symbolic/non-symbolic, or it's symbolic and does not satisfy the REF or
2809 // INFO:END constraints.
2810 // - When multiple variants have the same chr:pos:
2811 // - In +snps mode, also don't need to do anything different for non-SNPs.
2812 // - Otherwise, create up to three linked lists of input variant records
2813 // (need to distinguish SNPs from non-SNPs in +both mode, and within the
2814 // non-SNP category, symbolic alleles are separate from non-symbolic).
2815 // The variant with symbolic alleles has additional constraints: a warning
2816 // is printed if INFO:END isn't defined, and an error occurs if either REF
2817 // is multi-character, or there's an INFO:END mismatch.
2818 // - Error out if REF alleles aren't all consistent, or any ALT allele is
2819 // duplicated (note that --pmerge must support the latter).
2820 // - For joined not-entirely-SNP non-symbolic variants, the final REF is the
2821 // longest of the original REFs; ALT alleles have bases added to the end if
2822 // necessary. (Yes, this causes SNPs to stop being visible to a strlen ==
2823 // 1 check in +any mode, which is why + is interpreted as +both instead.)
2824 // - Final ALT allele order is based on allele frequency (highest first),
2825 // with ties broken by natural-sort.
2826 // - ID: 1. If --set-all-var-ids specified, apply template.
2827 // 2. Otherwise, keep original variant ID if all sources identical and
2828 // nonmissing.
2829 // 3. Otherwise, if vid-join specified, check if all original IDs are
2830 // nonmissing and contain exactly one ';' per extra ALT allele. If
2831 // so, join on ';' in final ALT allele order.
2832 // 4. Otherwise, set to --set-missing-var-ids template or missing code.
2833 // - QUAL is minimum of inputs ('.' treated as positive infinity). FILTER is
2834 // natural-sorted union of non-PASS values; if all inputs were '.'/PASS,
2835 // output is PASS unless all inputs were missing.
2836 // - INFO join is based on the Number field in the key's header line. (If
2837 // there's no header line corresponding to a key, we error out.) For
2838 // Number=A, we join in the obvious manner. For Number=R (or Number=G in
2839 // the haploid case), we replace the reference allele entry with . iff
2840 // there's any string mismatch (e.g. '13' and '13.0' will be treated as
2841 // unequal), and print a warning (with more than 3 warnings, later warnings
2842 // are only written to log file). For diploid Number=G, we do the same for
2843 // the hom-ref entry. For Number=0, we error out if the header line Type
2844 // isn't Flag, and the final flag is set iff any of the original variants
2845 // have the flag set. For the other cases (Number=<fixed constant> or
2846 // '.'), we replace the value with '.' if there's any string mismatch.
2847 // The key won't appear at all iff it doesn't appear in any of the original
2848 // variants (not even a '.').
2849 // - We error out if the joined variant has total ALT dosage > ~2.02, or
2850 // either side of the total ALT phased dosage > ~1.01. (We scale the
2851 // components down if there's a <1% overflow.)
2852 // - Missing ALTs... ugh. Have to permit this in biallelic case, but don't
2853 // allow it elsewhere. This forces a SNP (if REF is single-char) and/or
2854 // non-SNP (if REF is multichar) entry to be written when no corresponding
2855 // regular ALT is present, genotype writing/merging will error out if a
2856 // missing allele has any dosage, and QUAL/FILTER/INFO is merged as usual
2857 // when there are multiple missing-ALT same-type variants at the same
2858 // position. However, when a regular ALT is present, the missing-ALT
2859 // variants are completely ignored.
2860
2861 // The simplest design involves precomputing the entire (new variant idx, new
2862 // allele idx) -> (old variant uidx, old allele idx) mapping here, and
2863 // referring to that in both the .pvar and .pgen writers. Unfortunately, that
2864 // has a rather high memory requirement of 5 bytes per allele (assuming
2865 // sizeof(AlleleCode) == 1). While that's smaller than the 8 bytes/allele we
2866 // pay for allele_storage[], and also practically always smaller than the
2867 // 21+[variant ID len] per variant we pay for variant_bps + variant_ids +
2868 // allele_idx_offsets, it's still worth some effort to avoid; in particular, we
2869 // want an 8 GiB workspace to be sufficient for most operations on the full
2870 // 1000 Genomes phase 3 variant set (~84.8 million), and that's barely true
2871 // right now, so a bit of additional complexity to avoid losing ~850 MB is
2872 // justified.
2873 //
2874 // Thus, we only save the number of alleles in each new variant here, and force
2875 // the which-allele-comes-from-where computation (as well as SNP vs. non-SNP
2876 // vs. symbolic handling) to be repeated in the .pvar and .pgen writers. This
2877 // sucks, but being forced to perform an ordinary analysis on a remote machine
2878 // rather than locally sucks a bit more.
2879 //
2880 // Incidentally, another place to look, if it's important to further reduce
2881 // memory requirements, is internal representation of the FILTER field. The
2882 // current design gains a bit of extra speed by simply storing the non-./PASS
2883 // strings without parsing them further; but we could put them into a temporary
2884 // storage location and then convert to bitarray + string table at the end of
2885 // LoadPvar(). (Note that we already use only bitarrays when all FILTER values
2886 // are ./PASS, though.)
2887
2888 ENUM_U31_DEF_START()
2889 kJoinVtypeError,
2890 kJoinVtypeSnp,
2891 kJoinVtypeNonsnp,
2892 kJoinVtypeMixedSnpNonsnp,
2893 kJoinVtypeSymbolic,
2894 kJoinVtypeEnd
2895 ENUM_U31_DEF_END(JoinVtype);
2896
2897 typedef struct JoinCountsStruct {
2898 uintptr_t snp_ct;
2899 uintptr_t nonsnp_ct;
2900 uintptr_t symbolic_ct;
2901 uint32_t missalt_snp_ct;
2902 uint32_t missalt_nonsnp_ct;
2903 } JoinCounts;
2904
JoinCount(const char * const * cur_alleles,uintptr_t allele_ct,JoinCounts * jcp)2905 JoinVtype JoinCount(const char* const* cur_alleles, uintptr_t allele_ct, JoinCounts* jcp) {
2906 jcp->snp_ct = 0;
2907 jcp->symbolic_ct = 0;
2908 jcp->missalt_snp_ct = 0;
2909 jcp->missalt_nonsnp_ct = 0;
2910 if (cur_alleles[0][1] == '\0') {
2911 jcp->nonsnp_ct = 0;
2912 for (uintptr_t allele_idx = 1; allele_idx != allele_ct; ++allele_idx) {
2913 const char* cur_allele = cur_alleles[allele_idx];
2914 if (cur_allele[0] == '<') {
2915 jcp->symbolic_ct += 1;
2916 } else if (cur_allele[1] == '\0') {
2917 if (cur_allele[0] == '.') {
2918 if (allele_ct == 2) {
2919 jcp->missalt_snp_ct = 1;
2920 return kJoinVtypeSnp;
2921 }
2922 return kJoinVtypeError;
2923 }
2924 jcp->snp_ct += 1;
2925 } else {
2926 jcp->nonsnp_ct += 1;
2927 }
2928 }
2929 if (jcp->symbolic_ct) {
2930 return (jcp->symbolic_ct == allele_ct - 1)? kJoinVtypeSymbolic : kJoinVtypeError;
2931 }
2932 if (jcp->nonsnp_ct) {
2933 return jcp->snp_ct? kJoinVtypeMixedSnpNonsnp : kJoinVtypeNonsnp;
2934 }
2935 return kJoinVtypeSnp;
2936 }
2937 for (uint32_t allele_idx = 1; allele_idx != allele_ct; ++allele_idx) {
2938 const char* cur_allele = cur_alleles[allele_idx];
2939 if (cur_allele[0] == '<') {
2940 return kJoinVtypeError;
2941 }
2942 if (memequal_k(cur_allele, ".", 2)) {
2943 if (allele_ct == 2) {
2944 jcp->nonsnp_ct = 0;
2945 jcp->missalt_nonsnp_ct = 1;
2946 return kJoinVtypeNonsnp;
2947 }
2948 return kJoinVtypeError;
2949 }
2950 }
2951 jcp->nonsnp_ct = allele_ct - 1;
2952 return kJoinVtypeNonsnp;
2953 }
2954
PlanJoinOne(uint32_t cur_alt_allele_ct,uintptr_t ** write_allele_idx_offsets_iterp,uintptr_t * cur_offsetp,uint32_t * max_write_allele_ctp)2955 void PlanJoinOne(uint32_t cur_alt_allele_ct, uintptr_t** write_allele_idx_offsets_iterp, uintptr_t* cur_offsetp, uint32_t* max_write_allele_ctp) {
2956 const uint32_t cur_write_allele_ct = 1 + MAXV(1, cur_alt_allele_ct);
2957 if (cur_write_allele_ct > (*max_write_allele_ctp)) {
2958 *max_write_allele_ctp = cur_write_allele_ct;
2959 }
2960 *cur_offsetp += cur_write_allele_ct;
2961 uintptr_t* write_allele_idx_offsets_iter = *write_allele_idx_offsets_iterp;
2962 *write_allele_idx_offsets_iter++ = *cur_offsetp;
2963 *write_allele_idx_offsets_iterp = write_allele_idx_offsets_iter;
2964 }
2965
PlanJoinFlushPos(const JoinCounts * jcp,MakePlink2Flags join_mode,uintptr_t ** write_allele_idx_offsets_iterp,uintptr_t * cur_offsetp,uint32_t * max_write_allele_ctp,uint32_t * max_missalt_ctp)2966 void PlanJoinFlushPos(const JoinCounts* jcp, MakePlink2Flags join_mode, uintptr_t** write_allele_idx_offsets_iterp, uintptr_t* cur_offsetp, uint32_t* max_write_allele_ctp, uint32_t* max_missalt_ctp) {
2967 if (join_mode == kfMakePlink2MJoinSnps) {
2968 if (!(jcp->snp_ct || jcp->missalt_snp_ct)) {
2969 // all non-SNPs at this position, which were already accounted for
2970 return;
2971 }
2972 PlanJoinOne(jcp->snp_ct, write_allele_idx_offsets_iterp, cur_offsetp, max_write_allele_ctp);
2973 if ((!jcp->snp_ct) && (jcp->missalt_snp_ct > (*max_missalt_ctp))) {
2974 *max_missalt_ctp = jcp->missalt_snp_ct;
2975 }
2976 return;
2977 }
2978 if (join_mode == kfMakePlink2MJoinBoth) {
2979 if (jcp->snp_ct || jcp->missalt_snp_ct) {
2980 PlanJoinOne(jcp->snp_ct, write_allele_idx_offsets_iterp, cur_offsetp, max_write_allele_ctp);
2981 if ((!jcp->snp_ct) && (jcp->missalt_snp_ct > (*max_missalt_ctp))) {
2982 *max_missalt_ctp = jcp->missalt_snp_ct;
2983 }
2984 }
2985 if (jcp->nonsnp_ct || jcp->missalt_nonsnp_ct) {
2986 PlanJoinOne(jcp->nonsnp_ct, write_allele_idx_offsets_iterp, cur_offsetp, max_write_allele_ctp);
2987 if ((!jcp->nonsnp_ct) && (jcp->missalt_nonsnp_ct > (*max_missalt_ctp))) {
2988 *max_missalt_ctp = jcp->missalt_nonsnp_ct;
2989 }
2990 }
2991 } else {
2992 if (jcp->snp_ct || jcp->nonsnp_ct || jcp->missalt_snp_ct || jcp->missalt_nonsnp_ct) {
2993 const uint32_t alt_allele_ct = jcp->snp_ct + jcp->nonsnp_ct;
2994 PlanJoinOne(alt_allele_ct, write_allele_idx_offsets_iterp, cur_offsetp, max_write_allele_ctp);
2995 const uint32_t missalt_ct = jcp->missalt_snp_ct + jcp->missalt_nonsnp_ct;
2996 if ((missalt_ct > (*max_missalt_ctp)) && (!alt_allele_ct)) {
2997 *max_missalt_ctp = missalt_ct;
2998 }
2999 }
3000 }
3001 if (jcp->symbolic_ct) {
3002 PlanJoinOne(jcp->symbolic_ct, write_allele_idx_offsets_iterp, cur_offsetp, max_write_allele_ctp);
3003 }
3004 }
3005
3006
3007 // *write_allele_idx_offsetsp is assumed to be initialized to nullptr.
3008 // *max_missalt_ctp is assumed to be initialized to 0.
PlanMultiallelicJoin(const uintptr_t * variant_include,const ChrInfo * cip,const uint32_t * variant_bps,const char * const * variant_ids,const uintptr_t * allele_idx_offsets,const char * const * allele_storage,MakePlink2Flags flags,uint32_t * write_variant_ctp,const uintptr_t ** write_allele_idx_offsetsp,uint32_t * max_write_allele_ctp,uint32_t * max_missalt_ctp)3009 PglErr PlanMultiallelicJoin(const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, MakePlink2Flags flags, uint32_t* write_variant_ctp, const uintptr_t** write_allele_idx_offsetsp, uint32_t* max_write_allele_ctp, uint32_t* max_missalt_ctp) {
3010 uint32_t variant_uidx = 0;
3011 PglErr reterr = kPglRetSuccess;
3012 {
3013 const uint32_t variant_ct = *write_variant_ctp;
3014 uintptr_t* write_allele_idx_offsets = R_CAST(uintptr_t*, g_bigstack_base);
3015 uintptr_t* write_allele_idx_offsets_stop = R_CAST(uintptr_t*, RoundDownPow2(R_CAST(uintptr_t, g_bigstack_end), kCacheline));
3016 if (write_allele_idx_offsets == write_allele_idx_offsets_stop) {
3017 goto PlanMultiallelicJoin_ret_NOMEM;
3018 }
3019 write_allele_idx_offsets_stop = &(write_allele_idx_offsets_stop[-4]);
3020 const MakePlink2Flags join_mode = flags & kfMakePlink2MMask;
3021 uintptr_t* write_allele_idx_offsets_iter = write_allele_idx_offsets;
3022 *write_allele_idx_offsets_iter++ = 0;
3023 uintptr_t cur_offset = 0;
3024 uintptr_t variant_uidx_base = 0;
3025 uintptr_t cur_bits = variant_include[0];
3026 uint32_t chr_fo_idx = UINT32_MAX;
3027 uint32_t chr_end = 0;
3028 uint32_t prev_bp = 0;
3029 uint32_t allele_ct = 2;
3030 uint32_t max_write_allele_ct = 2;
3031 JoinCounts jc;
3032 // possible todo: track max_write_allele_ct for each subcase, instead of
3033 // having a single value
3034 jc.snp_ct = 0;
3035 jc.nonsnp_ct = 0;
3036 jc.symbolic_ct = 0;
3037 jc.missalt_snp_ct = 0;
3038 jc.missalt_nonsnp_ct = 0;
3039 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
3040 variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
3041 if (variant_uidx >= chr_end) {
3042 do {
3043 ++chr_fo_idx;
3044 chr_end = cip->chr_fo_vidx_start[chr_fo_idx + 1];
3045 } while (variant_uidx >= chr_end);
3046 prev_bp = UINT32_MAX;
3047 }
3048 uintptr_t allele_idx_offset_base = variant_uidx * 2;
3049 if (allele_idx_offsets) {
3050 allele_idx_offset_base = allele_idx_offsets[variant_uidx];
3051 allele_ct = allele_idx_offsets[variant_uidx + 1] - allele_idx_offset_base;
3052 }
3053 const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
3054 JoinCounts cur_jc;
3055 JoinVtype jvt = JoinCount(cur_alleles, allele_ct, &cur_jc);
3056 if (unlikely(jvt == kJoinVtypeError)) {
3057 goto PlanMultiallelicJoin_ret_MIXED_SYMBOLIC;
3058 }
3059 if (unlikely((join_mode != kfMakePlink2MJoinAny) && (jvt == kJoinVtypeMixedSnpNonsnp))) {
3060 logerrprintfww("Error: Variant '%s' is mixed SNP/non-SNP; multiallelics=+both and +snps don't permit this.\n", variant_ids[variant_uidx]);
3061 goto PlanMultiallelicJoin_ret_INCONSISTENT_INPUT;
3062 }
3063 const uint32_t cur_bp = variant_bps[variant_uidx];
3064 if (cur_bp != prev_bp) {
3065 PlanJoinFlushPos(&jc, join_mode, &write_allele_idx_offsets_iter, &cur_offset, &max_write_allele_ct, max_missalt_ctp);
3066 if (join_mode == kfMakePlink2MJoinSnps) {
3067 if (cur_jc.nonsnp_ct || cur_jc.symbolic_ct || cur_jc.missalt_nonsnp_ct) {
3068 // Flush non-SNP immediately.
3069 const uint32_t cur_write_allele_ct = 1 + cur_jc.nonsnp_ct + cur_jc.symbolic_ct;
3070 cur_offset += cur_write_allele_ct;
3071 if (cur_write_allele_ct > max_write_allele_ct) {
3072 max_write_allele_ct = cur_write_allele_ct;
3073 }
3074 *write_allele_idx_offsets_iter++ = cur_offset;
3075 // Also need to reinitialize.
3076 jc.snp_ct = 0;
3077 jc.missalt_snp_ct = 0;
3078 } else {
3079 jc.snp_ct = cur_jc.snp_ct;
3080 jc.missalt_snp_ct = cur_jc.missalt_snp_ct;
3081 }
3082 } else {
3083 jc = cur_jc;
3084 }
3085 prev_bp = cur_bp;
3086 } else if ((join_mode == kfMakePlink2MJoinSnps) && (cur_jc.nonsnp_ct || cur_jc.symbolic_ct)) {
3087 // Flush non-SNP immediately.
3088 const uint32_t cur_write_allele_ct = 1 + cur_jc.nonsnp_ct + cur_jc.symbolic_ct;
3089 cur_offset += cur_write_allele_ct;
3090 if (cur_write_allele_ct > max_write_allele_ct) {
3091 max_write_allele_ct = cur_write_allele_ct;
3092 }
3093 *write_allele_idx_offsets_iter++ = cur_offset;
3094 } else {
3095 jc.snp_ct += cur_jc.snp_ct;
3096 jc.nonsnp_ct += cur_jc.nonsnp_ct;
3097 jc.symbolic_ct += cur_jc.symbolic_ct;
3098 jc.missalt_snp_ct += cur_jc.missalt_snp_ct;
3099 jc.missalt_nonsnp_ct += cur_jc.missalt_nonsnp_ct;
3100 continue;
3101 }
3102 if (write_allele_idx_offsets_iter > write_allele_idx_offsets_stop) {
3103 goto PlanMultiallelicJoin_ret_NOMEM;
3104 }
3105 }
3106 // Flush last position.
3107 PlanJoinFlushPos(&jc, join_mode, &write_allele_idx_offsets_iter, &cur_offset, &max_write_allele_ct, max_missalt_ctp);
3108 if (max_write_allele_ct > kPglMaxAltAlleleCt + 1) {
3109 goto PlanMultiallelicJoin_ret_TOO_MANY_ALTS;
3110 }
3111 *write_variant_ctp = S_CAST(uintptr_t, write_allele_idx_offsets_iter - write_allele_idx_offsets) - 1;
3112 *max_write_allele_ctp = max_write_allele_ct;
3113 if (max_write_allele_ct > 2) {
3114 BigstackBaseSet(write_allele_idx_offsets_iter);
3115 *write_allele_idx_offsetsp = write_allele_idx_offsets;
3116 }
3117 }
3118 while (0) {
3119 PlanMultiallelicJoin_ret_NOMEM:
3120 reterr = kPglRetNomem;
3121 break;
3122 PlanMultiallelicJoin_ret_MIXED_SYMBOLIC:
3123 logerrprintfww("Error: Variant '%s' mixes symbolic and non-symbolic alleles in an unsupported manner.\n", variant_ids[variant_uidx]);
3124 PlanMultiallelicJoin_ret_INCONSISTENT_INPUT:
3125 reterr = kPglRetInconsistentInput;
3126 break;
3127 PlanMultiallelicJoin_ret_TOO_MANY_ALTS:
3128 logerrprintf("Error: Variant-join would create a variant with too many ALT alleles for this\nplink2 build.\n");
3129 reterr = kPglRetNotYetSupported;
3130 break;
3131 }
3132 return reterr;
3133 }
3134
PlanMultiallelicSplit(const uintptr_t * variant_include,const uintptr_t * allele_idx_offsets,const char * const * allele_storage,uint32_t max_allele_ct,MakePlink2Flags flags,uint32_t * write_variant_ctp,const uintptr_t ** write_allele_idx_offsetsp)3135 PglErr PlanMultiallelicSplit(const uintptr_t* variant_include, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, uint32_t max_allele_ct, MakePlink2Flags flags, uint32_t* write_variant_ctp, const uintptr_t** write_allele_idx_offsetsp) {
3136 uint32_t variant_uidx = 0;
3137 PglErr reterr = kPglRetSuccess;
3138 {
3139 const uint32_t variant_ct = *write_variant_ctp;
3140 const uint32_t only_split_snps = ((flags & kfMakePlink2MMask) == kfMakePlink2MSplitSnps);
3141 uintptr_t* write_allele_idx_offsets = nullptr;
3142 uintptr_t* write_allele_idx_offsets_stop = nullptr;
3143 uintptr_t* write_allele_idx_offsets_iter = nullptr;
3144 if (only_split_snps) {
3145 write_allele_idx_offsets = R_CAST(uintptr_t*, g_bigstack_base);
3146 write_allele_idx_offsets_stop = R_CAST(uintptr_t*, RoundDownPow2(R_CAST(uintptr_t, g_bigstack_end), kCacheline));
3147 if (S_CAST(uintptr_t, write_allele_idx_offsets_stop - write_allele_idx_offsets) <= max_allele_ct) {
3148 goto PlanMultiallelicSplit_ret_NOMEM;
3149 }
3150 write_allele_idx_offsets_stop -= max_allele_ct;
3151 write_allele_idx_offsets_iter = write_allele_idx_offsets;
3152 *write_allele_idx_offsets_iter++ = 0;
3153 }
3154 uintptr_t cur_offset = 0;
3155 uintptr_t write_variant_ct = 0;
3156 uintptr_t variant_uidx_base = 0;
3157 uintptr_t cur_bits = variant_include[0];
3158 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
3159 variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
3160 const uintptr_t allele_idx_offset_base = allele_idx_offsets[variant_uidx];
3161 const uint32_t allele_ct = allele_idx_offsets[variant_uidx + 1] - allele_idx_offset_base;
3162 if (allele_ct == 2) {
3163 if (only_split_snps) {
3164 cur_offset += 2;
3165 *write_allele_idx_offsets_iter++ = cur_offset;
3166 }
3167 ++write_variant_ct;
3168 } else {
3169 if (only_split_snps) {
3170 const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
3171 uint32_t do_split = 1;
3172 for (uint32_t allele_idx = 0; allele_idx != allele_ct; ++allele_idx) {
3173 if (cur_alleles[allele_idx][1] != '\0') {
3174 do_split = 0;
3175 break;
3176 }
3177 }
3178 if (do_split) {
3179 for (uint32_t allele_idx = 1; allele_idx != allele_ct; ++allele_idx) {
3180 cur_offset += 2;
3181 *write_allele_idx_offsets_iter++ = cur_offset;
3182 }
3183 write_variant_ct += allele_ct - 1;
3184 } else {
3185 cur_offset += allele_ct;
3186 *write_allele_idx_offsets_iter++ = cur_offset;
3187 ++write_variant_ct;
3188 }
3189 if (write_allele_idx_offsets_iter > write_allele_idx_offsets_stop) {
3190 goto PlanMultiallelicSplit_ret_NOMEM;
3191 }
3192 } else {
3193 write_variant_ct += allele_ct - 1;
3194 }
3195 }
3196 }
3197 if (write_variant_ct > 0x7ffffffd) {
3198 logerrputs("Error: " PROG_NAME_STR " does not support more than 2^31 - 3 variants. We recommend using\nother software for very deep studies of small numbers of genomes.\n");
3199 goto PlanMultiallelicSplit_ret_INCONSISTENT_INPUT;
3200 }
3201 *write_variant_ctp = write_variant_ct;
3202 if (only_split_snps && (cur_offset != 2 * write_variant_ct)) {
3203 assert(cur_offset > 2 * write_variant_ct);
3204 BigstackBaseSet(write_allele_idx_offsets_iter);
3205 *write_allele_idx_offsetsp = write_allele_idx_offsets;
3206 }
3207 }
3208 while (0) {
3209 PlanMultiallelicSplit_ret_NOMEM:
3210 reterr = kPglRetNomem;
3211 break;
3212 PlanMultiallelicSplit_ret_INCONSISTENT_INPUT:
3213 reterr = kPglRetInconsistentInput;
3214 break;
3215 }
3216 return reterr;
3217 }
3218
3219 // Returns 1 iff there are exactly (allele_ct - 2) semicolons in
3220 // orig_variant_id, and no two are adjacent (or leading/trailing).
VaridSplitOk(const char * orig_variant_id,uint32_t allele_ct)3221 uint32_t VaridSplitOk(const char* orig_variant_id, uint32_t allele_ct) {
3222 const char* id_iter = orig_variant_id;
3223 for (uint32_t aidx = 2; aidx != allele_ct; ++aidx) {
3224 const char* tok_end = strchr(id_iter, ';');
3225 if ((!tok_end) || (tok_end == id_iter)) {
3226 return 0;
3227 }
3228 id_iter = &(tok_end[1]);
3229 }
3230 return (*id_iter != '\0') && (!strchr(id_iter, ';'));
3231 }
3232
3233 // Similar to WriteMapOrBim(), but there are enough small differences to
3234 // justify making this a separate function instead of clogging the original
3235 // with more conditionals.
WriteBimSplit(const char * outname,const uintptr_t * variant_include,const ChrInfo * cip,const uint32_t * variant_bps,const char * const * variant_ids,const uintptr_t * allele_idx_offsets,const char * const * allele_storage,const double * variant_cms,const char * varid_template_str,const char * missing_varid_match,uint32_t variant_ct,uint32_t max_allele_slen,uint32_t new_variant_id_max_allele_slen,uint32_t varid_split,uint32_t varid_dup,MiscFlags misc_flags,uint32_t output_zst,uint32_t thread_ct)3236 PglErr WriteBimSplit(const char* outname, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const double* variant_cms, const char* varid_template_str, const char* missing_varid_match, uint32_t variant_ct, uint32_t max_allele_slen, uint32_t new_variant_id_max_allele_slen, uint32_t varid_split, uint32_t varid_dup, MiscFlags misc_flags, uint32_t output_zst, uint32_t thread_ct) {
3237 unsigned char* bigstack_mark = g_bigstack_base;
3238 char* cswritep = nullptr;
3239 CompressStreamState css;
3240 PglErr reterr = kPglRetSuccess;
3241 PreinitCstream(&css);
3242 {
3243 const uint32_t max_chr_blen = GetMaxChrSlen(cip) + 1;
3244 // includes trailing tab
3245 char* chr_buf;
3246 if (unlikely(bigstack_alloc_c(max_chr_blen, &chr_buf))) {
3247 goto WriteBimSplit_ret_NOMEM;
3248 }
3249 const uint32_t new_variant_id_overflow_missing = (misc_flags / kfMiscNewVarIdOverflowMissing) & 1;
3250 const uint32_t varid_dup_nosplit = varid_dup && (!varid_split);
3251 VaridTemplate* varid_templatep = nullptr;
3252 uint32_t missing_varid_slen = 0;
3253 uint32_t missing_varid_match_blen = 0; // nonzero iff --set-missing-var-ids
3254 if (varid_template_str) {
3255 if (!missing_varid_match) {
3256 missing_varid_match = &(g_one_char_strs[92]); // '.'
3257 }
3258 missing_varid_slen = strlen(missing_varid_match);
3259 if (misc_flags & kfMiscSetMissingVarIds) {
3260 missing_varid_match_blen = missing_varid_slen + 1;
3261 }
3262 if (unlikely(BIGSTACK_ALLOC_X(VaridTemplate, 1, &varid_templatep))) {
3263 goto WriteBimSplit_ret_NOMEM;
3264 }
3265 const uint32_t overflow_substitute_blen = new_variant_id_overflow_missing? (missing_varid_slen + 1) : 0;
3266 VaridTemplateInit(varid_template_str, missing_varid_match, chr_buf, new_variant_id_max_allele_slen, overflow_substitute_blen, varid_templatep);
3267 if (varid_dup) {
3268 for (uint32_t uii = 0; uii != varid_templatep->insert_ct; ++uii) {
3269 const uint32_t insert_type = varid_templatep->insert_types[uii];
3270 if ((insert_type == 3) || ((insert_type == 2) && (varid_templatep->alleles_needed & 4))) {
3271 // Could define what takes precedence here, but simpler to prohibit
3272 // this combination.
3273 logerrputs("Error: 'vid-[split-]dup' cannot be used with a --set-all-var-ids or\n--set-missing-var-ids template string containing a non-REF allele.\n");
3274 goto WriteBimSplit_ret_INVALID_CMDLINE;
3275 }
3276 }
3277 }
3278 }
3279 const uintptr_t overflow_buf_size = kCompressStreamBlock + kMaxIdSlen + 512 + 2 * max_allele_slen;
3280 reterr = InitCstreamAlloc(outname, 0, output_zst, thread_ct, overflow_buf_size, &css, &cswritep);
3281 if (unlikely(reterr)) {
3282 goto WriteBimSplit_ret_1;
3283 }
3284
3285 const VaridTemplate* cur_varid_templatep = nullptr;
3286 const char* varid_token_start = nullptr; // for vid-split
3287 uint32_t chr_fo_idx = UINT32_MAX;
3288 uint32_t chr_end = 0;
3289 uint32_t chr_buf_blen = 0;
3290 uintptr_t variant_uidx_base = 0;
3291 uintptr_t cur_bits = variant_include[0];
3292 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
3293 const uint32_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
3294 if (variant_uidx >= chr_end) {
3295 do {
3296 ++chr_fo_idx;
3297 chr_end = cip->chr_fo_vidx_start[chr_fo_idx + 1];
3298 } while (variant_uidx >= chr_end);
3299 char* chr_name_end = chrtoa(cip, cip->chr_file_order[chr_fo_idx], chr_buf);
3300 *chr_name_end = '\t';
3301 const uint32_t chr_slen = chr_name_end - chr_buf;
3302 chr_buf_blen = 1 + chr_slen;
3303 if (varid_templatep) {
3304 const int32_t chr_slen_delta = chr_slen - varid_templatep->chr_slen;
3305 varid_templatep->chr_slen = chr_slen;
3306 varid_templatep->base_len += chr_slen_delta;
3307 }
3308 }
3309 const uintptr_t allele_idx_offset_base = allele_idx_offsets[variant_uidx];
3310 const uint32_t orig_allele_ct = allele_idx_offsets[variant_uidx + 1] - allele_idx_offset_base;
3311 const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
3312 const char* orig_variant_id = variant_ids[variant_uidx];
3313 const char* ref_allele = cur_alleles[0];
3314 const uint32_t ref_allele_slen = strlen(ref_allele);
3315 uint32_t keep_orig_id = 1;
3316 if ((orig_allele_ct > 2) && (!varid_dup_nosplit)) {
3317 keep_orig_id = 0;
3318 if (varid_templatep && (!missing_varid_match_blen)) {
3319 cur_varid_templatep = varid_templatep;
3320 } else {
3321 cur_varid_templatep = nullptr;
3322 if (varid_split) {
3323 if (VaridSplitOk(orig_variant_id, orig_allele_ct)) {
3324 varid_token_start = orig_variant_id;
3325 } else if (varid_dup) {
3326 keep_orig_id = 1;
3327 } else {
3328 varid_token_start = nullptr;
3329 }
3330 }
3331 if ((!varid_token_start) && varid_templatep) {
3332 // --set-missing-var-ids usually applies here when it's specified;
3333 // the exceptions are when vid-split was also specified and the
3334 // split succeeded, or vid-split-dup was specified.
3335 // (In the latter case, this value is ignored anyway.)
3336 cur_varid_templatep = varid_templatep;
3337 }
3338 }
3339 }
3340 const uint32_t cur_bp = variant_bps[variant_uidx];
3341 // We already verified that no variants to be written have >2 alleles, so
3342 // we don't need to distinguish between '-' and '-snps'.
3343 for (uint32_t alt_allele_idx = 1; alt_allele_idx != orig_allele_ct; ++alt_allele_idx) {
3344 cswritep = memcpya(cswritep, chr_buf, chr_buf_blen);
3345 const char* cur_alt_allele = cur_alleles[alt_allele_idx];
3346 const uint32_t cur_alt_allele_slen = strlen(cur_alt_allele);
3347 if (keep_orig_id) {
3348 cswritep = strcpyax(cswritep, orig_variant_id, '\t');
3349 } else {
3350 if (cur_varid_templatep) {
3351 // Always true in --set-all-var-ids case. True in
3352 // --set-missing-var-ids case when vid-split unspecified, or split
3353 // failed.
3354 cswritep = VaridTemplateWrite(cur_varid_templatep, ref_allele, cur_alt_allele, cur_bp, ref_allele_slen, 0, cur_alt_allele_slen, cswritep);
3355 *cswritep++ = '\t';
3356 } else if (varid_token_start) {
3357 const char* varid_token_end = strchrnul(varid_token_start, ';');
3358 // If substring matches missing code and --set-missing-var-ids is
3359 // specified, we replace it.
3360 if (varid_templatep && (S_CAST(uintptr_t, varid_token_end - varid_token_start) == missing_varid_slen) && memequal(varid_token_start, missing_varid_match, missing_varid_slen)) {
3361 cswritep = VaridTemplateWrite(varid_templatep, ref_allele, cur_alt_allele, cur_bp, ref_allele_slen, 0, cur_alt_allele_slen, cswritep);
3362 } else {
3363 cswritep = memcpya(cswritep, varid_token_start, varid_token_end - varid_token_start);
3364 }
3365 *cswritep++ = '\t';
3366 varid_token_start = &(varid_token_end[1]);
3367 } else {
3368 cswritep = memcpyax(cswritep, missing_varid_match, missing_varid_slen, '\t');
3369 }
3370 }
3371 if (!variant_cms) {
3372 *cswritep++ = '0';
3373 } else {
3374 cswritep = dtoa_g_p8(variant_cms[variant_uidx], cswritep);
3375 }
3376 *cswritep++ = '\t';
3377 cswritep = u32toa(cur_bp, cswritep);
3378 *cswritep++ = '\t';
3379 // note that VCF ref allele corresponds to A2, not A1
3380 cswritep = memcpya(cswritep, cur_alt_allele, cur_alt_allele_slen);
3381 *cswritep++ = '\t';
3382 cswritep = memcpya(cswritep, ref_allele, ref_allele_slen);
3383 AppendBinaryEoln(&cswritep);
3384 if (unlikely(Cswrite(&css, &cswritep))) {
3385 goto WriteBimSplit_ret_WRITE_FAIL;
3386 }
3387 }
3388 }
3389 if (unlikely(CswriteCloseNull(&css, cswritep))) {
3390 goto WriteBimSplit_ret_WRITE_FAIL;
3391 }
3392 }
3393 while (0) {
3394 WriteBimSplit_ret_NOMEM:
3395 reterr = kPglRetNomem;
3396 break;
3397 WriteBimSplit_ret_WRITE_FAIL:
3398 reterr = kPglRetWriteFail;
3399 break;
3400 WriteBimSplit_ret_INVALID_CMDLINE:
3401 reterr = kPglRetInvalidCmdline;
3402 break;
3403 }
3404 WriteBimSplit_ret_1:
3405 CswriteCloseCond(&css, cswritep);
3406 BigstackReset(bigstack_mark);
3407 return reterr;
3408 }
3409
3410 // We only need to distinguish between the following INFO-value-type cases:
3411 // Number=0 (flag), Number=<positive integer>, Number=., Number=A, Number=R,
3412 // and Number=G. We use negative numbers to represent the last 4 cases in
3413 // InfoVtype.
3414 CONSTI32(kInfoVtypeUnknown, -1);
3415 CONSTI32(kInfoVtypeA, -2);
3416 CONSTI32(kInfoVtypeR, -3);
3417 CONSTI32(kInfoVtypeG, -4);
3418
3419 // Main fixed data structure when splitting/joining INFO is a hashmap of keys.
3420 // Behavior when splitting:
3421 // - Field order in the original variant is retained.
3422 // - Number >= 0 and Number=. don't require any special handling, just copy the
3423 // entire key=value pair (or lone key, in the Flag case).
3424 // - Number=A and Number=R require splitting the value on ',' and verifying the
3425 // comma count is correct, but is otherwise straightforward since alleles
3426 // can't be permuted.
3427 // - Number=G requires a bit more work but isn't fundamentally different from
3428 // A/R.
3429 // When joining:
3430 // - Field order is determined by header line order.
3431 // - Number=. and Number>0 just require a buffer of size ~info_reload_slen, and
3432 // a boolean indicating whether no mismatch has been found.
3433 // - Number=0 (Flag) requires a single boolean, we perform an or operation.
3434 // - Number=A/R/G are the messy ones: we need to have enough space for
3435 // max_write_allele_ct (or that minus 1) comma-separated values in the =A and
3436 // =R cases, and max_write_allele_ct * (max_write_allele_ct + 1) / 2 in the
3437 // diploid =G case.
3438 // Since we permit already-multiallelic variants to be part of a join, the =G
3439 // case may require a lot of working memory to handle. We reserve up to 1/16
3440 // of remaining workspace memory for this when we cannot prove that we can
3441 // get by with less.
3442
3443 typedef struct InfoVtypeStruct {
3444 NONCOPYABLE(InfoVtypeStruct);
3445 int32_t num;
3446 char key[];
3447 } InfoVtype;
3448
3449 // info_keys[] entries point to the (variable-size) key[] member of InfoVtype
3450 // structs. We use [const_]container_of(x)->num to look up the associated
3451 // Number= value.
ParseInfoHeader(const char * xheader,uintptr_t xheader_blen,const char * const ** info_keys_ptr,uint32_t * info_key_ctp,uint32_t ** info_keys_htablep,uint32_t * info_keys_htable_sizep)3452 PglErr ParseInfoHeader(const char* xheader, uintptr_t xheader_blen, const char* const** info_keys_ptr, uint32_t* info_key_ctp, uint32_t** info_keys_htablep, uint32_t* info_keys_htable_sizep) {
3453 unsigned char* bigstack_mark = g_bigstack_base;
3454 unsigned char* bigstack_end_mark = g_bigstack_end;
3455 PglErr reterr = kPglRetSuccess;
3456 {
3457 // Parsing loop is similar to that in ExportVcf().
3458 const char* xheader_iter = xheader;
3459 const char* xheader_end = &(xheader[xheader_blen]);
3460 const char* line_end = xheader;
3461 unsigned char* tmp_alloc_end = bigstack_end_mark;
3462 const char** info_keys = R_CAST(const char**, bigstack_mark);
3463 const char** info_keys_iter = info_keys;
3464 while (line_end != xheader_end) {
3465 xheader_iter = line_end;
3466 line_end = AdvPastDelim(xheader_iter, '\n');
3467 const uint32_t slen = line_end - xheader_iter;
3468 if ((slen <= 12) || (!StrStartsWithUnsafe(xheader_iter, "##INFO=<ID="))) {
3469 continue;
3470 }
3471 const char* key_start = &(xheader_iter[11]);
3472 const char* key_end = S_CAST(const char*, memchr(key_start, ',', slen - 12));
3473 if (unlikely((!key_end) || (!StrStartsWithUnsafe(key_end, ",Number=")))) {
3474 goto ParseInfoHeader_ret_MALFORMED_INFO_HEADER_LINE;
3475 }
3476 const uint32_t key_slen = key_end - key_start;
3477 if (key_slen > kMaxInfoKeySlen) {
3478 logerrputs("Error: " PROG_NAME_STR " does not support INFO keys longer than " MAX_INFO_KEY_SLEN_STR " characters.\n");
3479 // VCF spec doesn't specify a limit, so this isn't "malformed input".
3480 // We enforce a limit so we can safely print INFO keys in error
3481 // messages, etc.; it's trivial to increase the limit if it's ever
3482 // necessary.
3483 reterr = kPglRetNotYetSupported;
3484 goto ParseInfoHeader_ret_1;
3485 }
3486 const uintptr_t entry_byte_ct = RoundUpPow2(offsetof(InfoVtype, key) + 1 + key_slen, sizeof(intptr_t));
3487 if (S_CAST(uintptr_t, tmp_alloc_end - R_CAST(unsigned char*, info_keys_iter)) < entry_byte_ct + 8) {
3488 goto ParseInfoHeader_ret_NOMEM;
3489 }
3490 tmp_alloc_end -= entry_byte_ct;
3491 InfoVtype* new_entry = R_CAST(InfoVtype*, tmp_alloc_end);
3492 memcpyx(new_entry->key, key_start, key_slen, '\0');
3493 *info_keys_iter++ = new_entry->key;
3494
3495 const char* num_iter = &(key_end[8]);
3496 const unsigned char first_num_char = num_iter[0];
3497 if (first_num_char < '1') {
3498 if (first_num_char == '0') {
3499 // don't see a reason to tolerate Number=01, etc.
3500 if (unlikely(!StrStartsWithUnsafe(num_iter, "0,Type=Flag,"))) {
3501 goto ParseInfoHeader_ret_MALFORMED_INFO_HEADER_LINE;
3502 }
3503 new_entry->num = 0;
3504 } else if (likely(first_num_char == '.')) {
3505 new_entry->num = kInfoVtypeUnknown;
3506 } else {
3507 goto ParseInfoHeader_ret_MALFORMED_INFO_HEADER_LINE;
3508 }
3509 } else if (first_num_char > '9') {
3510 if (first_num_char == 'A') {
3511 new_entry->num = kInfoVtypeA;
3512 } else if (first_num_char == 'R') {
3513 new_entry->num = kInfoVtypeR;
3514 } else if (likely(first_num_char == 'G')) {
3515 new_entry->num = kInfoVtypeG;
3516 } else {
3517 goto ParseInfoHeader_ret_MALFORMED_INFO_HEADER_LINE;
3518 }
3519 } else {
3520 uint32_t val;
3521 if (unlikely(ScanmovPosintCapped(UINT32_MAX, &num_iter, &val) || (num_iter[0] != ','))) {
3522 goto ParseInfoHeader_ret_MALFORMED_INFO_HEADER_LINE;
3523 }
3524 new_entry->num = val;
3525 }
3526 }
3527 const uintptr_t info_key_ct = info_keys_iter - info_keys;
3528 #ifdef __LP64__
3529 if (unlikely(info_key_ct > 0x7ffffffdU)) {
3530 logerrputs("Error: " PROG_NAME_STR " does not support more than 2^31 - 3 INFO keys.\n");
3531 reterr = kPglRetMalformedInput;
3532 goto ParseInfoHeader_ret_1;
3533 }
3534 #endif
3535 assert(info_key_ct);
3536 *info_key_ctp = info_key_ct;
3537 BigstackBaseSet(info_keys_iter);
3538 BigstackEndSet(tmp_alloc_end);
3539 bigstack_end_mark = g_bigstack_end;
3540 const uintptr_t info_key_ctl = BitCtToWordCt(info_key_ct);
3541 uintptr_t* dummy_include;
3542 if (unlikely(
3543 (g_bigstack_base > g_bigstack_end) ||
3544 bigstack_end_alloc_w(info_key_ctl, &dummy_include))) {
3545 goto ParseInfoHeader_ret_NOMEM;
3546 }
3547 SetAllBits(info_key_ct, dummy_include);
3548 reterr = AllocAndPopulateIdHtableMt(dummy_include, info_keys, info_key_ct, bigstack_left() / 32, 1, info_keys_htablep, nullptr, info_keys_htable_sizep, nullptr);
3549 if (unlikely(reterr)) {
3550 goto ParseInfoHeader_ret_1;
3551 }
3552 *info_keys_ptr = info_keys;
3553 bigstack_mark = g_bigstack_base;
3554 }
3555 while (0) {
3556 ParseInfoHeader_ret_NOMEM:
3557 reterr = kPglRetNomem;
3558 break;
3559 ParseInfoHeader_ret_MALFORMED_INFO_HEADER_LINE:
3560 logputs("\n");
3561 logerrputs("Error: Malformed or unrecognized INFO header line.\n");
3562 reterr = kPglRetMalformedInput;
3563 break;
3564 }
3565 ParseInfoHeader_ret_1:
3566 BigstackDoubleReset(bigstack_mark, bigstack_end_mark);
3567 return reterr;
3568 }
3569
WritePvarSplit(const char * outname,const uintptr_t * variant_include,const ChrInfo * cip,const uint32_t * variant_bps,const char * const * variant_ids,const uintptr_t * allele_idx_offsets,const char * const * allele_storage,const uintptr_t * qual_present,const float * quals,const uintptr_t * filter_present,const uintptr_t * filter_npass,const char * const * filter_storage,const uintptr_t * nonref_flags,const char * pvar_info_reload,const double * variant_cms,const char * varid_template_str,const char * missing_varid_match,const char * const * info_keys,const uint32_t * info_keys_htable,uint32_t raw_variant_ct,uint32_t variant_ct,uint32_t max_allele_slen,uint32_t new_variant_id_max_allele_slen,uintptr_t xheader_blen,InfoFlags info_flags,uint32_t nonref_flags_storage,uint32_t max_filter_slen,uint32_t info_reload_slen,UnsortedVar vpos_sortstatus,uint32_t info_key_ct,uint32_t info_keys_htable_size,MiscFlags misc_flags,MakePlink2Flags make_plink2_flags,PvarPsamFlags pvar_psam_flags,uint32_t thread_ct,char * xheader)3570 PglErr WritePvarSplit(const char* outname, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* qual_present, const float* quals, const uintptr_t* filter_present, const uintptr_t* filter_npass, const char* const* filter_storage, const uintptr_t* nonref_flags, const char* pvar_info_reload, const double* variant_cms, const char* varid_template_str, const char* missing_varid_match, const char* const* info_keys, const uint32_t* info_keys_htable, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_slen, uint32_t new_variant_id_max_allele_slen, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t nonref_flags_storage, uint32_t max_filter_slen, uint32_t info_reload_slen, UnsortedVar vpos_sortstatus, uint32_t info_key_ct, uint32_t info_keys_htable_size, MiscFlags misc_flags, MakePlink2Flags make_plink2_flags, PvarPsamFlags pvar_psam_flags, uint32_t thread_ct, char* xheader) {
3571 unsigned char* bigstack_mark = g_bigstack_base;
3572 char* cswritep = nullptr;
3573 PglErr reterr = kPglRetSuccess;
3574 CompressStreamState css;
3575 TextStream pvar_reload_txs;
3576 PreinitCstream(&css);
3577 PreinitTextStream(&pvar_reload_txs);
3578 {
3579 const uint32_t max_chr_blen = GetMaxChrSlen(cip) + 1;
3580 // includes trailing tab
3581 char* chr_buf;
3582
3583 if (unlikely(bigstack_alloc_c(max_chr_blen, &chr_buf))) {
3584 goto WritePvarSplit_ret_NOMEM;
3585 }
3586 const uint32_t new_variant_id_overflow_missing = (misc_flags / kfMiscNewVarIdOverflowMissing) & 1;
3587 const uint32_t varid_dup = (make_plink2_flags / kfMakePlink2VaridDup) & 1;
3588 VaridTemplate* varid_templatep = nullptr;
3589 if (!missing_varid_match) {
3590 missing_varid_match = &(g_one_char_strs[92]); // '.'
3591 }
3592 uint32_t missing_varid_slen = strlen(missing_varid_match);
3593 uint32_t missing_varid_match_blen = 0; // nonzero iff --set-missing-var-ids
3594 if (varid_template_str) {
3595 if (misc_flags & kfMiscSetMissingVarIds) {
3596 missing_varid_match_blen = missing_varid_slen + 1;
3597 }
3598 if (unlikely(BIGSTACK_ALLOC_X(VaridTemplate, 1, &varid_templatep))) {
3599 goto WritePvarSplit_ret_NOMEM;
3600 }
3601 const uint32_t overflow_substitute_blen = new_variant_id_overflow_missing? (missing_varid_slen + 1) : 0;
3602 VaridTemplateInit(varid_template_str, missing_varid_match, chr_buf, new_variant_id_max_allele_slen, overflow_substitute_blen, varid_templatep);
3603 if (varid_dup) {
3604 for (uint32_t uii = 0; uii != varid_templatep->insert_ct; ++uii) {
3605 const uint32_t insert_type = varid_templatep->insert_types[uii];
3606 if ((insert_type == 3) || ((insert_type == 2) && (varid_templatep->alleles_needed & 4))) {
3607 // Could define what takes precedence here, but simpler to prohibit
3608 // this combination.
3609 logerrputs("Error: 'vid-[split-]dup' cannot be used with a --set-all-var-ids or\n--set-missing-var-ids template string containing a non-REF allele.\n");
3610 goto WritePvarSplit_ret_INVALID_CMDLINE;
3611 }
3612 }
3613 }
3614 }
3615
3616 uintptr_t overflow_buf_size = kCompressStreamBlock + kMaxIdSlen + 512 + 2 * max_allele_slen + max_filter_slen + info_reload_slen;
3617 if (overflow_buf_size < 2 * kCompressStreamBlock) {
3618 overflow_buf_size = 2 * kCompressStreamBlock;
3619 }
3620 const uint32_t output_zst = (pvar_psam_flags / kfPvarZs) & 1;
3621 reterr = InitCstreamAlloc(outname, 0, output_zst, thread_ct, overflow_buf_size, &css, &cswritep);
3622 if (unlikely(reterr)) {
3623 goto WritePvarSplit_ret_1;
3624 }
3625
3626 const uint32_t raw_variant_ctl = BitCtToWordCt(raw_variant_ct);
3627 const uint32_t all_nonref = (nonref_flags_storage == 2);
3628 uint32_t write_info_pr = all_nonref;
3629 uint32_t write_info = (pvar_psam_flags & kfPvarColInfo) || pvar_info_reload;
3630 if (write_info && nonref_flags) {
3631 write_info_pr = !IntersectionIsEmpty(variant_include, nonref_flags, raw_variant_ctl);
3632 }
3633 write_info_pr = write_info_pr && write_info;
3634 if (unlikely(write_info_pr && (info_flags & kfInfoPrNonflagPresent))) {
3635 logputs("\n");
3636 logerrputs("Error: Conflicting INFO:PR definitions. Either fix all REF alleles so that the\n'provisional reference' flag is no longer needed, or remove/rename the other\nuse of the INFO:PR key.\n");
3637 goto WritePvarSplit_ret_INCONSISTENT_INPUT;
3638 }
3639
3640 char* pvar_info_line_iter = nullptr;
3641 uint32_t write_filter = 0;
3642 if (pvar_psam_flags & kfPvarColFilter) {
3643 write_filter = 1;
3644 } else if ((pvar_psam_flags & kfPvarColMaybefilter) && filter_present) {
3645 write_filter = !IntersectionIsEmpty(variant_include, filter_present, raw_variant_ctl);
3646 }
3647 uint32_t info_col_idx = 0; // could save this during first load instead
3648 const uint32_t info_pr_flag_present = (info_flags / kfInfoPrFlagPresent) & 1;
3649 if (pvar_psam_flags & (kfPvarColXheader | kfPvarColVcfheader)) {
3650 reterr = PvarXheaderWrite(variant_include, cip, variant_bps, allele_idx_offsets, allele_storage, nullptr, xheader_blen, (pvar_psam_flags / kfPvarColVcfheader) & 1, write_filter, write_info, write_info_pr && (!info_pr_flag_present), max_allele_slen, vpos_sortstatus, xheader, &css, &cswritep);
3651 if (unlikely(reterr)) {
3652 goto WritePvarSplit_ret_1;
3653 }
3654 }
3655 // could also make this an array-of-structs
3656 uint32_t* info_key_order = nullptr;
3657 const char** info_starts = nullptr;
3658 const char** info_ends = nullptr;
3659 const char** info_curs = nullptr;
3660 uint32_t* info_ref_blens = nullptr;
3661 if (pvar_info_reload) {
3662 if (unlikely(
3663 bigstack_alloc_u32(info_key_ct, &info_key_order) ||
3664 bigstack_alloc_kcp(info_key_ct, &info_starts) ||
3665 bigstack_alloc_kcp(info_key_ct, &info_ends) ||
3666 bigstack_alloc_kcp(info_key_ct, &info_curs) ||
3667 bigstack_alloc_u32(info_key_ct, &info_ref_blens))) {
3668 goto WritePvarSplit_ret_NOMEM;
3669 }
3670 reterr = PvarInfoOpenAndReloadHeader(pvar_info_reload, 1 + (thread_ct > 1), &pvar_reload_txs, &pvar_info_line_iter, &info_col_idx);
3671 if (unlikely(reterr)) {
3672 goto WritePvarSplit_ret_TSTREAM_FAIL;
3673 }
3674 }
3675 if (cip->chrset_source) {
3676 AppendChrsetLine(cip, &cswritep);
3677 }
3678 cswritep = strcpya_k(cswritep, "#CHROM\tPOS\tID\tREF\tALT");
3679
3680 uint32_t write_qual = 0;
3681 if (pvar_psam_flags & kfPvarColQual) {
3682 write_qual = 1;
3683 } else if ((pvar_psam_flags & kfPvarColMaybequal) && qual_present) {
3684 write_qual = !IntersectionIsEmpty(variant_include, qual_present, raw_variant_ctl);
3685 }
3686 if (write_qual) {
3687 cswritep = strcpya_k(cswritep, "\tQUAL");
3688 }
3689 if (write_filter) {
3690 cswritep = strcpya_k(cswritep, "\tFILTER");
3691 }
3692 if (write_info) {
3693 cswritep = strcpya_k(cswritep, "\tINFO");
3694 }
3695
3696 uint32_t write_cm = 0;
3697 if (pvar_psam_flags & kfPvarColCm) {
3698 write_cm = 1;
3699 } else if ((pvar_psam_flags & kfPvarColMaybecm) && variant_cms) {
3700 if (raw_variant_ct == variant_ct) {
3701 // nonzero_cm_present check was performed
3702 write_cm = 1;
3703 } else {
3704 uintptr_t variant_uidx_base = 0;
3705 uintptr_t cur_bits = variant_include[0];
3706 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
3707 const uintptr_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
3708 if (variant_cms[variant_uidx] != 0.0) {
3709 write_cm = 1;
3710 break;
3711 }
3712 }
3713 }
3714 }
3715 if (write_cm) {
3716 cswritep = strcpya_k(cswritep, "\tCM");
3717 }
3718 AppendBinaryEoln(&cswritep);
3719
3720 const VaridTemplate* cur_varid_templatep = nullptr;
3721 const char* varid_token_start = nullptr; // for vid-split
3722 const uint32_t varid_split = (make_plink2_flags / kfMakePlink2VaridSemicolon) & 1;
3723 const uint32_t varid_dup_nosplit = varid_dup && (!varid_split);
3724 const uint32_t split_just_snps = ((make_plink2_flags & (kfMakePlink2MSplitBase * 3)) == kfMakePlink2MSplitSnps);
3725 uint32_t trs_variant_uidx = 0;
3726 uintptr_t variant_uidx_base = 0;
3727 uintptr_t cur_bits = variant_include[0];
3728 uint32_t chr_fo_idx = UINT32_MAX;
3729 uint32_t chr_end = 0;
3730 uint32_t chr_buf_blen = 0;
3731 uint32_t orig_allele_ct = 2;
3732 uint32_t cur_info_key_ct = 0;
3733 uint32_t pct = 0;
3734 uint32_t next_print_variant_idx = variant_ct / 100;
3735 fputs("0%", stdout);
3736 fflush(stdout);
3737 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
3738 const uint32_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
3739 if (variant_uidx >= chr_end) {
3740 do {
3741 ++chr_fo_idx;
3742 chr_end = cip->chr_fo_vidx_start[chr_fo_idx + 1];
3743 } while (variant_uidx >= chr_end);
3744 char* chr_name_end = chrtoa(cip, cip->chr_file_order[chr_fo_idx], chr_buf);
3745 *chr_name_end = '\t';
3746 const uint32_t chr_slen = chr_name_end - chr_buf;
3747 chr_buf_blen = 1 + chr_slen;
3748 if (varid_templatep) {
3749 const int32_t chr_slen_delta = chr_slen - varid_templatep->chr_slen;
3750 varid_templatep->chr_slen = chr_slen;
3751 varid_templatep->base_len += chr_slen_delta;
3752 }
3753 }
3754 uintptr_t allele_idx_offset_base;
3755 if (!allele_idx_offsets) {
3756 allele_idx_offset_base = variant_uidx * 2;
3757 } else {
3758 allele_idx_offset_base = allele_idx_offsets[variant_uidx];
3759 orig_allele_ct = allele_idx_offsets[variant_uidx + 1] - allele_idx_offset_base;
3760 }
3761 const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
3762 const char* orig_variant_id = variant_ids[variant_uidx];
3763 const char* ref_allele = cur_alleles[0];
3764 const uint32_t ref_allele_slen = strlen(ref_allele);
3765 const uint32_t cur_bp = variant_bps[variant_uidx];
3766 uint32_t split_ct_p1 = orig_allele_ct;
3767 uint32_t keep_orig_id = 1;
3768 if (orig_allele_ct > 2) {
3769 if (!varid_dup_nosplit) {
3770 keep_orig_id = 0;
3771 if (varid_templatep && (!missing_varid_match_blen)) {
3772 cur_varid_templatep = varid_templatep;
3773 } else {
3774 cur_varid_templatep = nullptr;
3775 if (varid_split) {
3776 if (VaridSplitOk(orig_variant_id, orig_allele_ct)) {
3777 varid_token_start = orig_variant_id;
3778 } else if (varid_dup) {
3779 keep_orig_id = 1;
3780 } else {
3781 varid_token_start = nullptr;
3782 }
3783 }
3784 if ((!varid_token_start) && varid_templatep) {
3785 // Note that --set-missing-var-ids almost always applies here
3786 // when it's specified; only exception is when vid-split was also
3787 // specified and the split succeeded.
3788 cur_varid_templatep = varid_templatep;
3789 }
3790 }
3791 }
3792 // Necessary to distinguish between '-' and '-snps' here.
3793 if (split_just_snps) {
3794 for (uint32_t uii = 0; uii != orig_allele_ct; ++uii) {
3795 if (cur_alleles[uii][1]) {
3796 split_ct_p1 = 2;
3797 break;
3798 }
3799 }
3800 }
3801 if ((split_ct_p1 != 2) && pvar_info_line_iter) {
3802 reterr = PvarInfoReload(info_col_idx, variant_uidx, &pvar_reload_txs, &pvar_info_line_iter, &trs_variant_uidx);
3803 if (unlikely(reterr)) {
3804 goto WritePvarSplit_ret_TSTREAM_FAIL;
3805 }
3806 char* info_subtoken_iter = pvar_info_line_iter;
3807 pvar_info_line_iter = CurTokenEnd(pvar_info_line_iter);
3808 cur_info_key_ct = 0;
3809 // special case: if entire info field is '.', treat as zero keys
3810 if ((info_subtoken_iter[0] != '.') || (pvar_info_line_iter != &(info_subtoken_iter[1]))) {
3811 while (1) {
3812 if (unlikely(cur_info_key_ct == info_key_ct)) {
3813 snprintf(g_logbuf, kLogbufSize, "Error: Too many INFO keys for variant ID '%s'.\n", orig_variant_id);
3814 goto WritePvarSplit_ret_MALFORMED_INPUT_WW;
3815 }
3816 char* info_subtoken_end = AdvToDelimOrEnd(info_subtoken_iter, pvar_info_line_iter, ';');
3817 char* key_end = AdvToDelimOrEnd(info_subtoken_iter, info_subtoken_end, '=');
3818 const uint32_t key_slen = key_end - info_subtoken_iter;
3819 const uint32_t kidx = IdHtableFindNnt(info_subtoken_iter, info_keys, info_keys_htable, key_slen, info_keys_htable_size);
3820 if (unlikely(kidx == UINT32_MAX)) {
3821 snprintf(g_logbuf, kLogbufSize, "Error: INFO key for variant ID '%s' missing from header.\n", orig_variant_id);
3822 goto WritePvarSplit_ret_MALFORMED_INPUT_WW;
3823 }
3824 info_key_order[cur_info_key_ct] = kidx;
3825 const int32_t knum = const_container_of(info_keys[kidx], InfoVtype, key)->num;
3826 if (key_end == info_subtoken_end) {
3827 if (unlikely(knum)) {
3828 snprintf(g_logbuf, kLogbufSize, "Error: INFO key '%s' for variant ID '%s' does not have an accompanying value.\n", info_keys[kidx], orig_variant_id);
3829 goto WritePvarSplit_ret_MALFORMED_INPUT_WW;
3830 }
3831 } else {
3832 if (unlikely(!knum)) {
3833 snprintf(g_logbuf, kLogbufSize, "Error: INFO key '%s' for variant ID '%s' has an accompanying value, despite being of type Flag.\n", info_keys[kidx], orig_variant_id);
3834 goto WritePvarSplit_ret_MALFORMED_INPUT_WW;
3835 }
3836 info_subtoken_iter = &(key_end[1]);
3837
3838 // don't actually need this for Number=A case
3839 info_starts[cur_info_key_ct] = info_subtoken_iter;
3840
3841 info_ends[cur_info_key_ct] = info_subtoken_end;
3842 if (knum <= kInfoVtypeA) {
3843 // (Don't need to do anything else for kInfoVtypeUnknown or
3844 // positive; we unconditionally copy all the text in those
3845 // cases.)
3846 if (knum == kInfoVtypeA) {
3847 info_curs[cur_info_key_ct] = info_subtoken_iter;
3848 } else {
3849 char* ref_value_end = S_CAST(char*, memchr(info_subtoken_iter, ',', info_subtoken_end - info_subtoken_iter));
3850 if (unlikely(!ref_value_end)) {
3851 snprintf(g_logbuf, kLogbufSize, "Error: Too few values for INFO key '%s', variant ID '%s'.\n", info_keys[kidx], orig_variant_id);
3852 goto WritePvarSplit_ret_MALFORMED_INPUT_WW;
3853 }
3854 ++ref_value_end;
3855 info_ref_blens[cur_info_key_ct] = ref_value_end - info_subtoken_iter;
3856 info_curs[cur_info_key_ct] = ref_value_end;
3857 }
3858 }
3859 }
3860 ++cur_info_key_ct;
3861 if (info_subtoken_end == pvar_info_line_iter) {
3862 break;
3863 }
3864 info_subtoken_iter = &(info_subtoken_end[1]);
3865 }
3866 }
3867 }
3868 }
3869 for (uint32_t alt_allele_idx = 1; alt_allele_idx != split_ct_p1; ++alt_allele_idx) {
3870 cswritep = memcpya(cswritep, chr_buf, chr_buf_blen);
3871 cswritep = u32toa_x(cur_bp, '\t', cswritep);
3872 const char* cur_alt_allele = cur_alleles[alt_allele_idx];
3873 const uint32_t cur_alt_allele_slen = strlen(cur_alt_allele);
3874 if ((split_ct_p1 == 2) || keep_orig_id) {
3875 cswritep = strcpyax(cswritep, orig_variant_id, '\t');
3876 cswritep = memcpyax(cswritep, ref_allele, ref_allele_slen, '\t');
3877 cswritep = memcpya(cswritep, cur_alt_allele, cur_alt_allele_slen);
3878 if (unlikely(Cswrite(&css, &cswritep))) {
3879 goto WritePvarSplit_ret_WRITE_FAIL;
3880 }
3881 if ((orig_allele_ct > 2) && (split_ct_p1 == 2)) {
3882 // -snps non-split case
3883 for (uint32_t allele_idx = 2; allele_idx != orig_allele_ct; ++allele_idx) {
3884 *cswritep++ = ',';
3885 cswritep = strcpya(cswritep, cur_alleles[allele_idx]);
3886 if (unlikely(Cswrite(&css, &cswritep))) {
3887 goto WritePvarSplit_ret_WRITE_FAIL;
3888 }
3889 }
3890 }
3891 } else {
3892 if (cur_varid_templatep) {
3893 // Always true in --set-all-var-ids case. True in
3894 // --set-missing-var-ids case when vid-split unspecified, or split
3895 // failed.
3896 cswritep = VaridTemplateWrite(cur_varid_templatep, ref_allele, cur_alt_allele, cur_bp, ref_allele_slen, 0, cur_alt_allele_slen, cswritep);
3897 *cswritep++ = '\t';
3898 } else if (varid_token_start) {
3899 const char* varid_token_end = strchrnul(varid_token_start, ';');
3900 // If substring matches missing code and --set-missing-var-ids is
3901 // specified, we replace it.
3902 if (varid_templatep && (S_CAST(uintptr_t, varid_token_end - varid_token_start) == missing_varid_slen) && memequal(varid_token_start, missing_varid_match, missing_varid_slen)) {
3903 cswritep = VaridTemplateWrite(varid_templatep, ref_allele, cur_alt_allele, cur_bp, ref_allele_slen, 0, cur_alt_allele_slen, cswritep);
3904 } else {
3905 cswritep = memcpya(cswritep, varid_token_start, varid_token_end - varid_token_start);
3906 }
3907 *cswritep++ = '\t';
3908 varid_token_start = &(varid_token_end[1]);
3909 } else {
3910 cswritep = memcpyax(cswritep, missing_varid_match, missing_varid_slen, '\t');
3911 }
3912 cswritep = memcpyax(cswritep, ref_allele, ref_allele_slen, '\t');
3913 cswritep = memcpya(cswritep, cur_alt_allele, cur_alt_allele_slen);
3914 if (unlikely(Cswrite(&css, &cswritep))) {
3915 goto WritePvarSplit_ret_WRITE_FAIL;
3916 }
3917 }
3918 if (write_qual) {
3919 *cswritep++ = '\t';
3920 if ((!qual_present) || (!IsSet(qual_present, variant_uidx))) {
3921 *cswritep++ = '.';
3922 } else {
3923 cswritep = ftoa_g(quals[variant_uidx], cswritep);
3924 }
3925 }
3926
3927 if (write_filter) {
3928 *cswritep++ = '\t';
3929 if ((!filter_present) || (!IsSet(filter_present, variant_uidx))) {
3930 *cswritep++ = '.';
3931 } else if (!IsSet(filter_npass, variant_uidx)) {
3932 cswritep = strcpya_k(cswritep, "PASS");
3933 } else {
3934 cswritep = strcpya(cswritep, filter_storage[variant_uidx]);
3935 }
3936 }
3937
3938 if (write_info) {
3939 *cswritep++ = '\t';
3940 const uint32_t is_pr = all_nonref || (nonref_flags && IsSet(nonref_flags, variant_uidx));
3941 if (pvar_info_line_iter) {
3942 if (split_ct_p1 == 2) {
3943 reterr = PvarInfoReloadAndWrite(info_pr_flag_present, info_col_idx, variant_uidx, is_pr, &pvar_reload_txs, &pvar_info_line_iter, &cswritep, &trs_variant_uidx);
3944 if (unlikely(reterr)) {
3945 goto WritePvarSplit_ret_TSTREAM_FAIL;
3946 }
3947 } else {
3948 if (!cur_info_key_ct) {
3949 *cswritep++ = '.';
3950 } else {
3951 const uint32_t is_last_allele = (alt_allele_idx + 1 == split_ct_p1);
3952 for (uint32_t kpos = 0; kpos != cur_info_key_ct; ++kpos) {
3953 const uint32_t kidx = info_key_order[kpos];
3954 const char* cur_key_str = info_keys[kidx];
3955 cswritep = strcpya(cswritep, cur_key_str);
3956 const int32_t knum = const_container_of(info_keys[kidx], InfoVtype, key)->num;
3957 if (knum) {
3958 *cswritep++ = '=';
3959 const char* cur_info_start = info_starts[kpos];
3960 const char* cur_info_end = info_ends[kpos];
3961 if (knum >= kInfoVtypeUnknown) {
3962 cswritep = memcpya(cswritep, cur_info_start, cur_info_end - cur_info_start);
3963 } else {
3964 if (knum != kInfoVtypeA) {
3965 cswritep = memcpya(cswritep, cur_info_start, info_ref_blens[kpos]);
3966 }
3967 // okay, this needs a better name
3968 const char* cur_info_cur = info_curs[kpos];
3969
3970 const char* subtoken_end = AdvToDelimOrEnd(cur_info_cur, cur_info_end, ',');
3971 if (knum == kInfoVtypeG) {
3972 if (unlikely(subtoken_end == cur_info_end)) {
3973 snprintf(g_logbuf, kLogbufSize, "Error: Too few values for INFO key '%s', variant ID '%s'.\n", cur_key_str, orig_variant_id);
3974 goto WritePvarSplit_ret_MALFORMED_INPUT_WW;
3975 }
3976 cswritep = memcpya(cswritep, cur_info_cur, 1 + S_CAST(uintptr_t, subtoken_end - cur_info_cur));
3977 cur_info_cur = subtoken_end;
3978 const uint32_t skip_ct = alt_allele_idx - 1;
3979 if (skip_ct) {
3980 cur_info_cur = AdvToNthDelimChecked(&(cur_info_cur[1]), cur_info_end, skip_ct, ',');
3981 if (unlikely(!subtoken_end)) {
3982 snprintf(g_logbuf, kLogbufSize, "Error: Too few values for INFO key '%s', variant ID '%s'.\n", cur_key_str, orig_variant_id);
3983 goto WritePvarSplit_ret_MALFORMED_INPUT_WW;
3984 }
3985 }
3986 ++cur_info_cur;
3987 subtoken_end = AdvToDelimOrEnd(cur_info_cur, cur_info_end, ',');
3988 }
3989 if (unlikely((subtoken_end == cur_info_end) != is_last_allele)) {
3990 snprintf(g_logbuf, kLogbufSize, "Error: Wrong number of values for INFO key '%s', variant ID '%s'.\n", cur_key_str, orig_variant_id);
3991 goto WritePvarSplit_ret_MALFORMED_INPUT_WW;
3992 }
3993 cswritep = memcpya(cswritep, cur_info_cur, subtoken_end - cur_info_cur);
3994 info_curs[kpos] = &(subtoken_end[1]);
3995 }
3996 }
3997 *cswritep++ = ';';
3998 }
3999 --cswritep;
4000 }
4001 }
4002 } else {
4003 if (is_pr) {
4004 cswritep = strcpya_k(cswritep, "PR");
4005 } else {
4006 *cswritep++ = '.';
4007 }
4008 }
4009 }
4010
4011 if (write_cm) {
4012 *cswritep++ = '\t';
4013 if (!variant_cms) {
4014 *cswritep++ = '0';
4015 } else {
4016 cswritep = dtoa_g_p8(variant_cms[variant_uidx], cswritep);
4017 }
4018 }
4019 AppendBinaryEoln(&cswritep);
4020 }
4021 if (variant_idx >= next_print_variant_idx) {
4022 if (pct > 10) {
4023 putc_unlocked('\b', stdout);
4024 }
4025 pct = (variant_idx * 100LLU) / variant_ct;
4026 printf("\b\b%u%%", pct++);
4027 fflush(stdout);
4028 next_print_variant_idx = (pct * S_CAST(uint64_t, variant_ct)) / 100;
4029 }
4030 }
4031 if (unlikely(CswriteCloseNull(&css, cswritep))) {
4032 goto WritePvarSplit_ret_WRITE_FAIL;
4033 }
4034 if (pct > 10) {
4035 putc_unlocked('\b', stdout);
4036 }
4037 fputs("\b\b", stdout);
4038 }
4039 while (0) {
4040 WritePvarSplit_ret_NOMEM:
4041 reterr = kPglRetNomem;
4042 break;
4043 WritePvarSplit_ret_TSTREAM_FAIL:
4044 TextStreamErrPrint(pvar_info_reload, &pvar_reload_txs);
4045 break;
4046 WritePvarSplit_ret_WRITE_FAIL:
4047 reterr = kPglRetWriteFail;
4048 break;
4049 WritePvarSplit_ret_INVALID_CMDLINE:
4050 reterr = kPglRetInvalidCmdline;
4051 break;
4052 WritePvarSplit_ret_MALFORMED_INPUT_WW:
4053 logputs("\n");
4054 WordWrapB(0);
4055 logerrputsb();
4056 reterr = kPglRetMalformedInput;
4057 break;
4058 WritePvarSplit_ret_INCONSISTENT_INPUT:
4059 reterr = kPglRetInconsistentInput;
4060 break;
4061 }
4062 WritePvarSplit_ret_1:
4063 CswriteCloseCond(&css, cswritep);
4064 CleanupTextStream2(pvar_info_reload, &pvar_reload_txs, &reterr);
4065 BigstackReset(bigstack_mark);
4066 return reterr;
4067 }
4068
4069 // Final filter_keys is natural-sorted.
4070 // Return values are allocated on bottom of bigstack.
4071 // Caller must initialize all return values to correspond to the null table.
MakeFilterHtable(const uintptr_t * variant_include,const uintptr_t * filter_npass,const char * const * filter_storage,uint32_t variant_ct,const char *** filter_keys_ptr,uint32_t ** filter_keys_htable_ptr,uint32_t * filter_key_ct_ptr,uint32_t * filter_keys_htable_size_ptr)4072 PglErr MakeFilterHtable(const uintptr_t* variant_include, const uintptr_t* filter_npass, const char* const* filter_storage, uint32_t variant_ct, const char*** filter_keys_ptr, uint32_t** filter_keys_htable_ptr, uint32_t* filter_key_ct_ptr, uint32_t* filter_keys_htable_size_ptr) {
4073 unsigned char* bigstack_end_mark = g_bigstack_end;
4074 PglErr reterr = kPglRetSuccess;
4075 {
4076 // Start with empty size-128 table, which will practically always be enough
4077 // while still being small relative to L1 cache. Double table size
4078 // whenever load factor reaches 0.25; there shouldn't be *that* many
4079 // distinct filters.
4080 // possible todo: multithread this scan, merge results at the end; can also
4081 // separate this stage from the rest of the function.
4082 uint32_t table_size = 128;
4083 uint32_t hash_shift = 25; // 32 - log2(table_size)
4084 uint32_t filter_key_ct = 0;
4085 char** filter_tokens;
4086 if (unlikely(
4087 bigstack_end_calloc_cp(table_size, &filter_tokens))) {
4088 goto MakeFilterHtable_ret_NOMEM;
4089 }
4090
4091 unsigned char* tmp_alloc_base = g_bigstack_base;
4092 unsigned char* tmp_alloc_end = g_bigstack_end;
4093 uintptr_t variant_widx = 0;
4094 uintptr_t cur_bits = variant_include[0];
4095 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
4096 const uintptr_t lowbit = BitIter1y(variant_include, &variant_widx, &cur_bits);
4097 if (lowbit & filter_npass[variant_widx]) {
4098 const char* filter_iter = filter_storage[variant_widx * kBitsPerWord + ctzw(lowbit)];
4099 while (1) {
4100 const char* token_end = strchrnul(filter_iter, ';');
4101 const uint32_t cur_id_slen = token_end - filter_iter;
4102 for (uint32_t hashval = Hash32(filter_iter, cur_id_slen) >> hash_shift; ; ) {
4103 char* cur_token_ptr = filter_tokens[hashval];
4104 if (!cur_token_ptr) {
4105 char* storage_loc;
4106 if (StoreStringAtBase(tmp_alloc_end, filter_iter, cur_id_slen, &tmp_alloc_base, &storage_loc)) {
4107 goto MakeFilterHtable_ret_NOMEM;
4108 }
4109 ++filter_key_ct;
4110 if (filter_key_ct * 4 < table_size) {
4111 filter_tokens[hashval] = storage_loc;
4112 break;
4113 }
4114 #ifdef __LP64__
4115 if (unlikely(hash_shift == 1)) {
4116 // this is technically "not yet supported", but I fail to see a
4117 // valid use case for >536 million distinct FILTER keys...
4118 logerrprintf("Error: Too many distinct FILTER keys (max 2^29 - 1).\n");
4119 goto MakeFilterHtable_ret_MALFORMED_INPUT;
4120 }
4121 #endif
4122 // It's fine for the new table to overlap the old table, since we
4123 // can iterate through all the strings by walking forward from
4124 // g_bigstack_base.
4125 const uintptr_t extra_byte_ct = table_size * sizeof(intptr_t);
4126 if (unlikely(S_CAST(uintptr_t, tmp_alloc_end - tmp_alloc_base) < extra_byte_ct)) {
4127 goto MakeFilterHtable_ret_NOMEM;
4128 }
4129 tmp_alloc_end -= extra_byte_ct;
4130 filter_tokens = R_CAST(char**, tmp_alloc_end);
4131 memset(filter_tokens, 0, 2 * extra_byte_ct);
4132 table_size *= 2;
4133 --hash_shift;
4134 char* rehash_iter = R_CAST(char*, g_bigstack_base);
4135 for (uint32_t uii = 0; uii != filter_key_ct; ++uii) {
4136 char* rehash_token_end = strnul(rehash_iter);
4137 const uint32_t rehash_id_slen = rehash_token_end - rehash_iter;
4138 for (uint32_t rehashval = Hash32(rehash_iter, rehash_id_slen) >> hash_shift; ; ) {
4139 if (!filter_tokens[rehashval]) {
4140 filter_tokens[rehashval] = rehash_iter;
4141 break;
4142 }
4143 if (++rehashval == table_size) {
4144 rehashval = 0;
4145 }
4146 }
4147 rehash_iter = &(rehash_token_end[1]);
4148 }
4149 break;
4150 }
4151 if ((!memcmp(filter_iter, cur_token_ptr, cur_id_slen)) && (!cur_token_ptr[cur_id_slen])) {
4152 break;
4153 }
4154 if (++hashval == table_size) {
4155 hashval = 0;
4156 }
4157 }
4158 if (!(*token_end)) {
4159 break;
4160 }
4161 filter_iter = &(token_end[1]);
4162 }
4163 }
4164 }
4165 if (!filter_key_ct) {
4166 // All nonpassing variants were already filtered out.
4167 // Caller already initialized null table.
4168 goto MakeFilterHtable_ret_1;
4169 }
4170 char* token_iter = R_CAST(char*, g_bigstack_base);
4171 BigstackBaseSet(tmp_alloc_base);
4172 const uint32_t filter_keys_htable_size = GetHtableFastSize(filter_key_ct);
4173 if (unlikely(
4174 bigstack_alloc_kcp(filter_key_ct, filter_keys_ptr) ||
4175 bigstack_alloc_u32(filter_keys_htable_size, filter_keys_htable_ptr))) {
4176 goto MakeFilterHtable_ret_NOMEM;
4177 }
4178 const char** filter_keys = *filter_keys_ptr;
4179 for (uint32_t uii = 0; uii != filter_key_ct; ++uii) {
4180 filter_keys[uii] = token_iter;
4181 char* token_end = strnul(token_iter);
4182 token_iter = &(token_end[1]);
4183 }
4184 StrptrArrNsort(filter_key_ct, filter_keys);
4185 *filter_key_ct_ptr = filter_key_ct;
4186 *filter_keys_htable_size_ptr = filter_keys_htable_size;
4187 uint32_t* filter_keys_htable = *filter_keys_htable_ptr;
4188 SetAllU32Arr(filter_keys_htable_size, filter_keys_htable);
4189 for (uint32_t uii = 0; uii != filter_key_ct; ++uii) {
4190 for (uint32_t hashval = Hashceil(filter_keys[uii], strlen(filter_keys[uii]), filter_keys_htable_size); ; ) {
4191 if (filter_keys_htable[hashval] == UINT32_MAX) {
4192 filter_keys_htable[hashval] = uii;
4193 }
4194 if (++hashval == filter_keys_htable_size) {
4195 hashval = 0;
4196 }
4197 }
4198 }
4199 }
4200 while (0) {
4201 MakeFilterHtable_ret_NOMEM:
4202 reterr = kPglRetNomem;
4203 break;
4204 #ifdef __LP64__
4205 MakeFilterHtable_ret_MALFORMED_INPUT:
4206 reterr = kPglRetMalformedInput;
4207 break;
4208 #endif
4209 }
4210 MakeFilterHtable_ret_1:
4211 BigstackEndReset(bigstack_end_mark);
4212 return reterr;
4213 }
4214
4215 /*
4216 PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* qual_present, const float* quals, const uintptr_t* filter_present, const uintptr_t* filter_npass, const char* const* filter_storage, const uintptr_t* nonref_flags, const char* pvar_info_reload, const double* variant_cms, const char* varid_template_str, const char* missing_varid_match, const char* const* info_keys, const uint32_t* info_keys_htable, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_slen, uint32_t new_variant_id_max_allele_slen, uint32_t max_write_allele_ct, uint32_t max_missalt_ct, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t nonref_flags_storage, uint32_t max_filter_slen, uint32_t info_reload_slen, UnsortedVar vpos_sortstatus, uint32_t info_key_ct, uint32_t info_keys_htable_size, MiscFlags misc_flags, MakePlink2Flags make_plink2_flags, PvarPsamFlags pvar_psam_flags, uint32_t thread_ct, char* xheader) {
4217 unsigned char* bigstack_mark = g_bigstack_base;
4218 char* cswritep = nullptr;
4219 PglErr reterr = kPglRetSuccess;
4220 CompressStreamState css;
4221 TextStream pvar_reload_txs;
4222 PreinitCstream(&css);
4223 PreinitTextStream(&pvar_reload_txs);
4224 {
4225 const uint32_t max_chr_blen = GetMaxChrSlen(cip) + 1;
4226 // includes trailing tab
4227 char* chr_buf;
4228
4229 if (unlikely(bigstack_alloc_c(max_chr_blen, &chr_buf))) {
4230 goto WritePvarJoin_ret_NOMEM;
4231 }
4232 const uint32_t new_variant_id_overflow_missing = (misc_flags / kfMiscNewVarIdOverflowMissing) & 1;
4233 const uint32_t varid_dup = (make_plink2_flags / kfMakePlink2VaridDup) & 1;
4234 VaridTemplate* varid_templatep = nullptr;
4235 if (!missing_varid_match) {
4236 missing_varid_match = &(g_one_char_strs[92]); // '.'
4237 }
4238 uint32_t missing_varid_slen = strlen(missing_varid_match);
4239 uint32_t missing_varid_match_blen = 0; // nonzero iff --set-missing-var-ids
4240 if (varid_template_str) {
4241 if (misc_flags & kfMiscSetMissingVarIds) {
4242 missing_varid_match_blen = missing_varid_slen + 1;
4243 }
4244 if (unlikely(BIGSTACK_ALLOC_X(VaridTemplate, 1, &varid_templatep))) {
4245 goto WritePvarJoin_ret_NOMEM;
4246 }
4247 const uint32_t overflow_substitute_blen = new_variant_id_overflow_missing? (missing_varid_slen + 1) : 0;
4248 VaridTemplateInit(varid_template_str, missing_varid_match, chr_buf, new_variant_id_max_allele_slen, overflow_substitute_blen, varid_templatep);
4249 if (varid_dup) {
4250 for (uint32_t uii = 0; uii != varid_templatep->insert_ct; ++uii) {
4251 const uint32_t insert_type = varid_templatep->insert_types[uii];
4252 if ((insert_type == 3) || ((insert_type == 2) && (varid_templatep->alleles_needed & 4))) {
4253 // Could define what takes precedence here, but simpler to prohibit
4254 // this combination.
4255 logerrputs("Error: 'vid-[split-]dup' cannot be used with a --set-all-var-ids or\n--set-missing-var-ids template string containing a non-REF allele.\n");
4256 goto WritePvarJoin_ret_INVALID_CMDLINE;
4257 }
4258 }
4259 }
4260 }
4261
4262 uintptr_t overflow_buf_size = kCompressStreamBlock + kMaxIdSlen + 512 + 2 * max_allele_slen + max_filter_slen + S_CAST(uintptr_t, info_reload_slen) * (max_write_allele_ct - 1);
4263 if (overflow_buf_size < 2 * kCompressStreamBlock) {
4264 overflow_buf_size = 2 * kCompressStreamBlock;
4265 }
4266 const uint32_t output_zst = (pvar_psam_flags / kfPvarZs) & 1;
4267 reterr = InitCstreamAlloc(outname, 0, output_zst, thread_ct, overflow_buf_size, &css, &cswritep);
4268 if (unlikely(reterr)) {
4269 goto WritePvarJoin_ret_1;
4270 }
4271
4272 const uint32_t raw_variant_ctl = BitCtToWordCt(raw_variant_ct);
4273 const uint32_t all_nonref = (nonref_flags_storage == 2);
4274 uint32_t write_info_pr = all_nonref;
4275 uint32_t write_info = (pvar_psam_flags & kfPvarColInfo) || pvar_info_reload;
4276 if (write_info && nonref_flags) {
4277 write_info_pr = !IntersectionIsEmpty(variant_include, nonref_flags, raw_variant_ctl);
4278 }
4279 write_info_pr = write_info_pr && write_info;
4280 if (unlikely(write_info_pr && (info_flags & kfInfoPrNonflagPresent))) {
4281 logputs("\n");
4282 logerrputs("Error: Conflicting INFO:PR definitions. Either fix all REF alleles so that the\n'provisional reference' flag is no longer needed, or remove/rename the other\nuse of the INFO:PR key.\n");
4283 goto WritePvarJoin_ret_INCONSISTENT_INPUT;
4284 }
4285
4286 char* pvar_info_line_iter = nullptr;
4287 uint32_t write_filter = 0;
4288 if (pvar_psam_flags & kfPvarColFilter) {
4289 write_filter = 1;
4290 } else if ((pvar_psam_flags & kfPvarColMaybefilter) && filter_present) {
4291 write_filter = !IntersectionIsEmpty(variant_include, filter_present, raw_variant_ctl);
4292 }
4293 uint32_t info_col_idx = 0; // could save this during first load instead
4294 const uint32_t info_pr_flag_present = (info_flags / kfInfoPrFlagPresent) & 1;
4295 if (pvar_psam_flags & (kfPvarColXheader | kfPvarColVcfheader)) {
4296 reterr = PvarXheaderWrite(variant_include, cip, variant_bps, allele_idx_offsets, allele_storage, nullptr, xheader_blen, (pvar_psam_flags / kfPvarColVcfheader) & 1, write_filter, write_info, write_info_pr && (!info_pr_flag_present), max_allele_slen, vpos_sortstatus, xheader, &css, &cswritep);
4297 if (unlikely(reterr)) {
4298 goto WritePvarJoin_ret_1;
4299 }
4300 }
4301 const uint32_t join_mode = (make_plink2_flags & (kfMakePlink2MSplitBase * 7));
4302 uintptr_t info_cache_size = max_missalt_ct + max_write_allele_ct - 1;
4303 if (join_mode != kfMakePlink2MJoinSnps) {
4304 info_cache_size *= 3;
4305 }
4306 #ifndef __LP64__
4307 if (S_CAST(uint64_t, info_cache_size) * info_key_ct * sizeof(intptr_t) > 0x7fffffff) {
4308 goto WritePvarJoin_ret_NOMEM;
4309 }
4310 #endif
4311
4312 if (cip->chrset_source) {
4313 AppendChrsetLine(cip, &cswritep);
4314 }
4315 cswritep = strcpya_k(cswritep, "#CHROM\tPOS\tID\tREF\tALT");
4316
4317 uint32_t write_qual = 0;
4318 if (pvar_psam_flags & kfPvarColQual) {
4319 write_qual = 1;
4320 } else if ((pvar_psam_flags & kfPvarColMaybequal) && qual_present) {
4321 write_qual = !IntersectionIsEmpty(variant_include, qual_present, raw_variant_ctl);
4322 }
4323 if (write_qual) {
4324 cswritep = strcpya_k(cswritep, "\tQUAL");
4325 }
4326 const char** filter_keys = nullptr;
4327 uint32_t* filter_keys_htable = nullptr;
4328 uintptr_t* cur_filter_keys = nullptr;
4329 uint32_t filter_keys_htable_size = 0;
4330 uint32_t filter_key_ct = 0;
4331 uint32_t filter_key_ctl = 0;
4332 if (write_filter) {
4333 // The VCF spec doesn't require ##FILTER= header lines, and unlike the
4334 // case with INFO Number=A/R/G, we can join correctly without header
4335 // information. It's slightly computationally more expensive, but INFO
4336 // and genotype joining costs are more significant.
4337 if (filter_npass) {
4338 reterr = MakeFilterHtable(variant_include, filter_npass, filter_storage, variant_ct, &filter_keys, &filter_keys_htable, &filter_key_ct, &filter_keys_htable_size);
4339 if (unlikely(reterr)) {
4340 goto WritePvarJoin_ret_1;
4341 }
4342 if (filter_key_ct) {
4343 filter_key_ctl = BitCtToWordCt(filter_key_ct);
4344 if (unlikely(bigstack_alloc_w(filter_key_ctl, &cur_filter_keys))) {
4345 goto WritePvarJoin_ret_1;
4346 }
4347 }
4348 }
4349 cswritep = strcpya_k(cswritep, "\tFILTER");
4350 }
4351
4352 char** info_bufs = nullptr;
4353 const char** info_starts = nullptr;
4354 const char** info_ends = nullptr; // ugh, this is not related to INFO:END
4355 const char** info_curs = nullptr;
4356 uint32_t info_end_key_idx = UINT32_MAX;
4357 if (pvar_info_reload) {
4358 if (unlikely(
4359 bigstack_alloc_cp(info_cache_size, &info_bufs) ||
4360 bigstack_alloc_kcp(info_key_ct * info_cache_size, &info_starts) ||
4361 bigstack_alloc_kcp(info_key_ct * info_cache_size, &info_ends) ||
4362 bigstack_alloc_kcp(info_key_ct * info_cache_size, &info_curs))) {
4363 goto WritePvarJoin_ret_NOMEM;
4364 }
4365 reterr = PvarInfoOpenAndReloadHeader(pvar_info_reload, 1 + (thread_ct > 1), &pvar_reload_txs, &pvar_info_line_iter, &info_col_idx);
4366 if (unlikely(reterr)) {
4367 goto WritePvarJoin_ret_TSTREAM_FAIL;
4368 }
4369 info_end_key_idx = IdHtableFind("END", info_keys, info_keys_htable, strlen("END"), info_keys_htable_size);
4370 if (info_end_key_idx != UINT32_MAX) {
4371 const int32_t knum = const_container_of(info_keys[info_end_key_idx], InfoVtype, key)->num;
4372 if ((knum != 1) && (knum != kInfoVtypeUnknown)) {
4373 // TODO: verify type instead.
4374 // but if number is not . or 1, this is not the INFO:END we're
4375 // looking for.
4376 info_end_key_idx = UINT32_MAX;
4377 }
4378 }
4379 }
4380 if (write_info) {
4381 cswritep = strcpya_k(cswritep, "\tINFO");
4382 }
4383
4384 uint32_t write_cm = 0;
4385 if (pvar_psam_flags & kfPvarColCm) {
4386 write_cm = 1;
4387 } else if ((pvar_psam_flags & kfPvarColMaybecm) && variant_cms) {
4388 if (raw_variant_ct == variant_ct) {
4389 // nonzero_cm_present check was performed
4390 write_cm = 1;
4391 } else {
4392 uintptr_t variant_uidx_base = 0;
4393 uintptr_t cur_bits = variant_include[0];
4394 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
4395 const uintptr_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
4396 if (variant_cms[variant_uidx] != 0.0) {
4397 write_cm = 1;
4398 break;
4399 }
4400 }
4401 }
4402 }
4403 if (write_cm) {
4404 cswritep = strcpya_k(cswritep, "\tCM");
4405 }
4406 AppendBinaryEoln(&cswritep);
4407
4408 const VaridTemplate* cur_varid_templatep = nullptr;
4409 const char* varid_token_start = nullptr; // for vid-split
4410 const uint32_t varid_split = (make_plink2_flags / kfMakePlink2VaridSemicolon) & 1;
4411 const uint32_t varid_dup_nosplit = varid_dup && (!varid_split);
4412 uint32_t next_variant_idx = 0;
4413 uint32_t trs_variant_uidx = 0;
4414 uint32_t next_variant_uidx = 0;
4415 uintptr_t next_variant_uidx_base = 0;
4416 uintptr_t next_bits = variant_include[0];
4417 uint32_t chr_fo_idx = UINT32_MAX;
4418 uint32_t chr_end = 0;
4419 uint32_t chr_buf_blen = 0;
4420 uint32_t prev_bp = 0;
4421 uint32_t cur_bp = 0;
4422 uint32_t bp_start_variant_idx = 0;
4423 uint32_t bp_start_variant_uidx = 0;
4424 uintptr_t bp_start_variant_uidx_base = 0;
4425 uintptr_t bp_start_bits = variant_include[0];
4426 uint32_t allele_ct = 2;
4427 uint32_t pct = 0;
4428 uint32_t next_print_variant_idx = variant_ct / 100;
4429 JoinCounts jc;
4430 jc.snp_ct = 0;
4431 jc.nonsnp_ct = 0;
4432 jc.symbolic_ct = 0;
4433 jc.missalt_snp_ct = 0;
4434 jc.missalt_nonsnp_ct = 0;
4435 JoinCounts next_jc = jc;
4436 fputs("0%", stdout);
4437 fflush(stdout);
4438 while (1) {
4439 for (; next_variant_idx != variant_ct; ++next_variant_idx) {
4440 next_variant_uidx = BitIter1(variant_include, &next_variant_uidx_base, &next_bits);
4441 if (next_variant_uidx >= chr_end) {
4442 do {
4443 ++chr_fo_idx;
4444 chr_end = cip->chr_fo_vidx_start[chr_fo_idx + 1];
4445 } while (next_variant_uidx >= chr_end);
4446 char* chr_name_end = chrtoa(cip, cip->chr_file_order[chr_fo_idx], chr_buf);
4447 *chr_name_end = '\t';
4448 const uint32_t chr_slen = chr_name_end - chr_buf;
4449 chr_buf_blen = 1 + chr_slen;
4450 if (varid_templatep) {
4451 const int32_t chr_slen_delta = chr_slen - varid_templatep->chr_slen;
4452 varid_templatep->chr_slen = chr_slen;
4453 varid_templatep->base_len += chr_slen_delta;
4454 }
4455 prev_bp = UINT32_MAX;
4456 }
4457 cur_bp = variant_bps[next_variant_uidx];
4458 if (cur_bp != prev_bp) {
4459 break;
4460 }
4461 uintptr_t allele_idx_offset_base;
4462 if (!allele_idx_offsets) {
4463 allele_idx_offset_base = next_variant_uidx * 2;
4464 } else {
4465 allele_idx_offset_base = allele_idx_offsets[next_variant_uidx];
4466 allele_ct = allele_idx_offsets[next_variant_uidx + 1] - allele_idx_offset_base;
4467 }
4468 const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
4469 JoinVtype jvt = JoinCount(cur_alleles, allele_ct, &next_jc);
4470 // previously validated
4471 // if ((join_mode == kfMakePlink2MJoinSnps) && ()) {
4472 // }
4473
4474 // TODO
4475 jc.snp_ct += next_jc.snp_ct;
4476 jc.nonsnp_ct += next_jc.nonsnp_ct;
4477 jc.symbolic_ct += next_jc.symbolic_ct;
4478 jc.missalt_snp_ct += next_jc.missalt_snp_ct;
4479 jc.missalt_nonsnp_ct += next_jc.missalt_nonsnp_ct;
4480 }
4481 if (next_variant_idx == bp_start_variant_idx + 1) {
4482 // No join needed. This is usually the common case, so we duplicate a
4483 // bunch of code for the sake of avoiding slowdown here.
4484 cswritep = memcpya(cswritep, chr_buf, chr_buf_blen);
4485 cswritep = u32toa_x(variant_bps[bp_start_variant_uidx], '\t', cswritep);
4486 cswritep = strcpyax(cswritep, variant_ids[bp_start_variant_uidx], '\t');
4487 uintptr_t allele_idx_offset_base;
4488 if (!allele_idx_offsets) {
4489 allele_idx_offset_base = bp_start_variant_uidx * 2;
4490 } else {
4491 allele_idx_offset_base = allele_idx_offsets[bp_start_variant_uidx];
4492 allele_ct = allele_idx_offsets[bp_start_variant_uidx + 1] - allele_idx_offset_base;
4493 }
4494 const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
4495 cswritep = strcpyax(cswritep, cur_alleles[0], '\t');
4496 cswritep = strcpya(cswritep, cur_alleles[1]);
4497 if (unlikely(Cswrite(&css, &cswritep))) {
4498 goto WritePvarJoin_ret_WRITE_FAIL;
4499 }
4500 for (uint32_t allele_idx = 2; allele_idx != allele_ct; ++allele_idx) {
4501 *cswritep++ = ',';
4502 cswritep = strcpya(cswritep, cur_alleles[allele_idx]);
4503 if (unlikely(Cswrite(&css, &cswritep))) {
4504 goto WritePvarJoin_ret_WRITE_FAIL;
4505 }
4506 }
4507
4508 if (write_qual) {
4509 *cswritep++ = '\t';
4510 if ((!qual_present) || (!IsSet(qual_present, bp_start_variant_uidx))) {
4511 *cswritep++ = '.';
4512 } else {
4513 cswritep = ftoa_g(quals[bp_start_variant_uidx], cswritep);
4514 }
4515 }
4516
4517 if (write_filter) {
4518 *cswritep++ = '\t';
4519 if ((!filter_present) || (!IsSet(filter_present, bp_start_variant_uidx))) {
4520 *cswritep++ = '.';
4521 } else if (!IsSet(filter_npass, bp_start_variant_uidx)) {
4522 cswritep = strcpya_k(cswritep, "PASS");
4523 } else {
4524 cswritep = strcpya(cswritep, filter_storage[bp_start_variant_uidx]);
4525 }
4526 }
4527
4528 if (write_info) {
4529 *cswritep++ = '\t';
4530 const uint32_t is_pr = all_nonref || (nonref_flags && IsSet(nonref_flags, bp_start_variant_uidx));
4531 if (pvar_info_line_iter) {
4532 reterr = PvarInfoReloadAndWrite(info_pr_flag_present, info_col_idx, bp_start_variant_uidx, is_pr, &pvar_reload_txs, &pvar_info_line_iter, &cswritep, &trs_variant_uidx);
4533 if (unlikely(reterr)) {
4534 goto WritePvarJoin_ret_TSTREAM_FAIL;
4535 }
4536 } else {
4537 if (is_pr) {
4538 cswritep = strcpya_k(cswritep, "PR");
4539 } else {
4540 *cswritep++ = '.';
4541 }
4542 }
4543 }
4544
4545 if (write_cm) {
4546 *cswritep++ = '\t';
4547 if (!variant_cms) {
4548 *cswritep++ = '0';
4549 } else {
4550 cswritep = dtoa_g_p8(variant_cms[bp_start_variant_uidx], cswritep);
4551 }
4552 }
4553 AppendBinaryEoln(&cswritep);
4554 // next_jc guaranteed to be zero-initialized
4555 } else if (next_variant_idx) {
4556 // TODO
4557 ;;;;
4558 const char* orig_variant_id = variant_ids[variant_uidx];
4559 const char* ref_allele = cur_alleles[0];
4560 const uint32_t ref_allele_slen = strlen(ref_allele);
4561 uint32_t split_ct_p1 = allele_ct;
4562 if (allele_ct > 2) {
4563 if (!varid_dup) {
4564 if (varid_templatep && (!missing_varid_match_blen)) {
4565 cur_varid_templatep = varid_templatep;
4566 } else {
4567 cur_varid_templatep = nullptr;
4568 if (varid_split) {
4569 if (VaridSplitOk(orig_variant_id, allele_ct)) {
4570 varid_token_start = orig_variant_id;
4571 } else {
4572 varid_token_start = nullptr;
4573 }
4574 }
4575 if ((!varid_token_start) && varid_templatep) {
4576 // Note that --set-missing-var-ids almost always applies here
4577 // when it's specified; only exception is when vid-split was also
4578 // specified and the split succeeded.
4579 cur_varid_templatep = varid_templatep;
4580 }
4581 }
4582 }
4583 }
4584 ;;;;
4585 next_jc.snp_ct = 0;
4586 next_jc.nonsnp_ct = 0;
4587 next_jc.symbolic_ct = 0;
4588 next_jc.missalt_snp_ct = 0;
4589 next_jc.missalt_nonsnp_ct = 0;
4590 }
4591 if (next_variant_idx == variant_ct) {
4592 break;
4593 }
4594 // this_pos_write_variant_ct = 0;
4595 jc = next_jc;
4596 prev_bp = cur_bp;
4597 bp_start_variant_idx = next_variant_idx;
4598 bp_start_variant_uidx = next_variant_uidx;
4599 if (next_variant_idx >= next_print_variant_idx) {
4600 if (pct > 10) {
4601 putc_unlocked('\b', stdout);
4602 }
4603 pct = (next_variant_idx * 100LLU) / variant_ct;
4604 printf("\b\b%u%%", pct++);
4605 fflush(stdout);
4606 next_print_variant_idx = (pct * S_CAST(uint64_t, variant_ct)) / 100;
4607 }
4608 }
4609 if (unlikely(CswriteCloseNull(&css, cswritep))) {
4610 goto WritePvarJoin_ret_WRITE_FAIL;
4611 }
4612 if (pct > 10) {
4613 putc_unlocked('\b', stdout);
4614 }
4615 fputs("\b\b", stdout);
4616 }
4617 while (0) {
4618 WritePvarJoin_ret_NOMEM:
4619 reterr = kPglRetNomem;
4620 break;
4621 WritePvarJoin_ret_TSTREAM_FAIL:
4622 TextStreamErrPrint(pvar_info_reload, &pvar_reload_txs);
4623 break;
4624 WritePvarJoin_ret_WRITE_FAIL:
4625 reterr = kPglRetWriteFail;
4626 break;
4627 WritePvarJoin_ret_INVALID_CMDLINE:
4628 reterr = kPglRetInvalidCmdline;
4629 break;
4630 WritePvarJoin_ret_INCONSISTENT_INPUT:
4631 reterr = kPglRetInconsistentInput;
4632 break;
4633 }
4634 WritePvarJoin_ret_1:
4635 CswriteCloseCond(&css, cswritep);
4636 CleanupTextStream2(pvar_info_reload, &pvar_reload_txs, &reterr);
4637 BigstackReset(bigstack_mark);
4638 return reterr;
4639 }
4640 */
4641
4642 FLAGSET_DEF_START()
4643 kfPlink2Write0,
4644 kfPlink2WriteSetHhMissing = (1 << 0),
4645 kfPlink2WriteSetHhMissingKeepDosage = (1 << 1),
4646 kfPlink2WriteSetMixedMtMissing = (1 << 2),
4647 kfPlink2WriteSetMixedMtMissingKeepDosage = (1 << 3),
4648 kfPlink2WriteMeMissing = (1 << 4),
4649 kfPlink2WriteZeroCluster = (1 << 5),
4650 kfPlink2WriteFillRef = (1 << 6),
4651 kfPlink2WriteLateDosageErase = (1 << 7),
4652 // no need for sample_sort, determined by collapsed_sort_map != nullptr?
4653 kfPlink2WritePlink1 = (1 << 8)
4654 FLAGSET_DEF_END(Plink2WriteFlags);
4655 // todo: add .pgen-specific stuff
4656
4657 typedef struct MakeCommonStruct {
4658 const ChrInfo* cip;
4659 const uintptr_t* sample_include;
4660 uintptr_t* sex_male_collapsed_interleaved;
4661 uintptr_t* sex_female_collapsed_interleaved;
4662 const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select);
4663 uint32_t raw_sample_ct;
4664 uint32_t sample_ct;
4665 Plink2WriteFlags plink2_write_flags;
4666 uint32_t hard_call_halfdist;
4667 } MakeCommon;
4668
4669 typedef struct MakeBedlikeCtxStruct {
4670 const MakeCommon* mcp;
4671
4672 const uintptr_t* variant_include;
4673 uint32_t* sample_include_cumulative_popcounts;
4674 const uint32_t* collapsed_sort_map;
4675
4676 PgenReader** pgr_ptrs;
4677
4678 uint32_t* read_variant_uidx_starts;
4679 uint32_t cur_block_write_ct;
4680
4681 uintptr_t** genovecs;
4682 uintptr_t** dosage_presents;
4683 Dosage** dosage_mains;
4684
4685 unsigned char* writebufs[2];
4686 PglErr reterr; // can only be kPglRetMalformedInput for now
4687 } MakeBedlikeCtx;
4688
4689
MakeBedlikeThread(void * raw_arg)4690 THREAD_FUNC_DECL MakeBedlikeThread(void* raw_arg) {
4691 ThreadGroupFuncArg* arg = S_CAST(ThreadGroupFuncArg*, raw_arg);
4692 const uintptr_t tidx = arg->tidx;
4693 MakeBedlikeCtx* ctx = S_CAST(MakeBedlikeCtx*, arg->sharedp->context);
4694
4695 PgenReader* pgrp = ctx->pgr_ptrs[tidx];
4696 uintptr_t* genovec = ctx->genovecs[tidx];
4697 const MakeCommon* mcp = ctx->mcp;
4698 uintptr_t* dosage_present = nullptr;
4699 Dosage* dosage_main = nullptr;
4700 uint32_t hard_call_halfdist = 0;
4701 if (ctx->dosage_presents) {
4702 dosage_present = ctx->dosage_presents[tidx];
4703 dosage_main = ctx->dosage_mains[tidx];
4704 hard_call_halfdist = mcp->hard_call_halfdist;
4705 }
4706 const uintptr_t* variant_include = ctx->variant_include;
4707 const ChrInfo* cip = mcp->cip;
4708 const uintptr_t* sample_include = mcp->sample_include;
4709 PgrSampleSubsetIndex pssi;
4710 PgrSetSampleSubsetIndex(ctx->sample_include_cumulative_popcounts, pgrp, &pssi);
4711 const uintptr_t* sex_male_collapsed_interleaved = mcp->sex_male_collapsed_interleaved;
4712 const uintptr_t* sex_female_collapsed_interleaved = mcp->sex_female_collapsed_interleaved;
4713 const uint32_t* collapsed_sort_map = ctx->collapsed_sort_map;
4714 const Plink2WriteFlags plink2_write_flags = mcp->plink2_write_flags;
4715 const uint32_t set_hh_missing = plink2_write_flags & kfPlink2WriteSetHhMissing;
4716 const uint32_t set_mixed_mt_missing = plink2_write_flags & kfPlink2WriteSetMixedMtMissing;
4717 const uint32_t write_plink1 = plink2_write_flags & kfPlink2WritePlink1;
4718 const uint32_t sample_ct = mcp->sample_ct;
4719 const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);
4720 const uint32_t sample_ctv2 = NypCtToVecCt(sample_ct);
4721 const uint32_t sample_ct4 = NypCtToByteCt(sample_ct);
4722 const uint32_t calc_thread_ct = GetThreadCt(arg->sharedp);
4723 const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select) = mcp->refalt1_select;
4724 const uint32_t x_code = cip->xymt_codes[kChrOffsetX];
4725 const uint32_t y_code = cip->xymt_codes[kChrOffsetY];
4726 const uint32_t mt_code = cip->xymt_codes[kChrOffsetMT];
4727 uint32_t parity = 0;
4728 do {
4729 const uintptr_t cur_block_write_ct = ctx->cur_block_write_ct;
4730 uint32_t write_idx = (tidx * cur_block_write_ct) / calc_thread_ct;
4731 const uint32_t write_idx_end = ((tidx + 1) * cur_block_write_ct) / calc_thread_ct;
4732 unsigned char* writebuf_iter = &(ctx->writebufs[parity][write_idx * sample_ct4]);
4733 uintptr_t variant_uidx_base;
4734 uintptr_t cur_bits;
4735 BitIter1Start(variant_include, ctx->read_variant_uidx_starts[tidx], &variant_uidx_base, &cur_bits);
4736 uint32_t chr_end = 0;
4737 uint32_t is_x = 0;
4738 uint32_t is_y = 0;
4739 uint32_t is_haploid_nonmt = 0;
4740 uint32_t is_mt = 0;
4741 for (; write_idx != write_idx_end; ++write_idx) {
4742 const uint32_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
4743 if (variant_uidx >= chr_end) {
4744 const uint32_t chr_fo_idx = GetVariantChrFoIdx(cip, variant_uidx);
4745 const uint32_t chr_idx = cip->chr_file_order[chr_fo_idx];
4746 chr_end = cip->chr_fo_vidx_start[chr_fo_idx + 1];
4747 is_x = (chr_idx == x_code);
4748 is_y = (chr_idx == y_code);
4749 is_mt = (chr_idx == mt_code);
4750 is_haploid_nonmt = IsSet(cip->haploid_mask, chr_idx) && (!is_mt);
4751 }
4752 // todo: Multiallelic -> two-specific-alleles downcode.
4753 // This is pretty straightforward if we're just saving hardcalls:
4754 // with 1 copy of one allele and zero copies of the other allele, we
4755 // default to saving a missing call (in the diploid case).
4756 // If dosages are involved, things are a bit less obvious: what if the
4757 // unincluded alleles have a total dosage of 0.1? 0.5? It'll be
4758 // necessary to define a new flag allowing this threshold to be
4759 // configured.
4760 // I'm currently inclined to set unincluded dosage >= 0.5 to missing, and
4761 // otherwise the dosages are scaled up to sum to 2.
4762 // (Note that the multiallelic split operation won't work this way; it
4763 // has to use the convention that REF = anything other than the current
4764 // ALT allele. Probably also want to support that here.)
4765 if (!hard_call_halfdist) {
4766 // if multiallelic:
4767 // if split: call PgrGet1()
4768 // otherwise, if erase-alt2+: call PgrGet2()
4769 // otherwise, error out
4770 PglErr reterr = PgrGet(sample_include, pssi, sample_ct, variant_uidx, pgrp, genovec);
4771 if (unlikely(reterr)) {
4772 ctx->reterr = reterr;
4773 break;
4774 }
4775 } else {
4776 // this isn't fully implemented yet.
4777
4778 // quasi-bugfix (4 Dec 2017): it's user-hostile to make
4779 // --hard-call-threshold not apply here.
4780 uint32_t dosage_ct;
4781 // if multiallelic:
4782 // if split: call PgrGet1D()
4783 // otherwise, if refalt1_select + erase-alt2+: call PgrGetMD(),
4784 // rescale
4785 // otherwise, error out
4786 PglErr reterr = PgrGetD(sample_include, pssi, sample_ct, variant_uidx, pgrp, genovec, dosage_present, dosage_main, &dosage_ct);
4787 if (unlikely(reterr)) {
4788 ctx->reterr = reterr;
4789 break;
4790 }
4791 ApplyHardCallThresh(dosage_present, dosage_main, dosage_ct, hard_call_halfdist, genovec);
4792 }
4793 // remove this when proper multiallelic logic implemented
4794 if (refalt1_select && (refalt1_select[variant_uidx][0] == 1)) {
4795 GenovecInvertUnsafe(sample_ct, genovec);
4796 }
4797 if (set_hh_missing && is_haploid_nonmt) {
4798 if (is_x) {
4799 SetMaleHetMissing(sex_male_collapsed_interleaved, sample_ctv2, genovec);
4800 } else {
4801 // all hets to missing
4802 SetHetMissing(sample_ctl2, genovec);
4803 if (is_y) {
4804 InterleavedSetMissing(sex_female_collapsed_interleaved, sample_ctv2, genovec);
4805 }
4806 }
4807 } else if (set_mixed_mt_missing && is_mt) {
4808 // all hets to missing
4809 SetHetMissing(sample_ctl2, genovec);
4810 }
4811 // todo: --set-me-missing, --zero-cluster, --fill-missing-with-ref
4812 // (--set-me-missing should happen after --set-hh-missing)
4813 if (write_plink1) {
4814 PgrPlink2ToPlink1InplaceUnsafe(sample_ct, genovec);
4815 }
4816 // trailing bytes don't matter, but trailing bits of last byte may
4817 ZeroTrailingNyps(sample_ct, genovec);
4818 if (!collapsed_sort_map) {
4819 writebuf_iter = memcpyua(writebuf_iter, genovec, sample_ct4);
4820 } else {
4821 GenovecResort(genovec, collapsed_sort_map, sample_ct, writebuf_iter);
4822 writebuf_iter = &(writebuf_iter[sample_ct4]);
4823 }
4824 }
4825 parity = 1 - parity;
4826 } while (!THREAD_BLOCK_FINISH(arg));
4827 THREAD_RETURN;
4828 }
4829
4830 // initialized mcp fields: cip, sex_male_collapsed_interleaved,
4831 // sex_female_collapsed_interleaved, raw_sample_ct, sample_ct,
4832 // plink2_write_flags
4833 PglErr MakeBedlikeMain(const uintptr_t* sample_include, const uint32_t* new_sample_idx_to_old, const uintptr_t* variant_include, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_thread_ct, uint32_t hard_call_thresh, MakePlink2Flags make_plink2_flags, uintptr_t pgr_alloc_cacheline_ct, PgenFileInfo* pgfip, MakeCommon* mcp, char* outname, char* outname_end) {
4834 FILE* outfile = nullptr;
4835 PglErr reterr = kPglRetSuccess;
4836 ThreadGroup tg;
4837 PreinitThreads(&tg);
4838 MakeBedlikeCtx ctx;
4839 {
4840 assert(variant_ct);
4841 const uint32_t sample_ct = mcp->sample_ct;
4842 assert(sample_ct);
4843 if (make_plink2_flags & kfMakePlink2MMask) {
4844 logerrputs("Error: Multiallelic-split fixed-width output is not implemented yet.\n");
4845 reterr = kPglRetNotYetSupported;
4846 goto MakeBedlikeMain_ret_1;
4847 }
4848 // fixed-width
4849 const uint32_t make_pgen = make_plink2_flags & kfMakePgen;
4850 if (make_pgen) {
4851 snprintf(outname_end, kMaxOutfnameExtBlen, ".pgen");
4852 } else {
4853 snprintf(outname_end, kMaxOutfnameExtBlen, ".bed");
4854 }
4855 if (unlikely(fopen_checked(outname, FOPEN_WB, &outfile))) {
4856 goto MakeBedlikeMain_ret_OPEN_FAIL;
4857 }
4858 if (make_pgen) {
4859 fwrite_unlocked("l\x1b\x02", 3, 1, outfile);
4860 fwrite_unlocked(&variant_ct, 4, 1, outfile);
4861 fwrite_unlocked(&sample_ct, 4, 1, outfile);
4862 if (!pgfip->nonref_flags) {
4863 const PgenGlobalFlags gflags = pgfip->gflags;
4864 uint32_t uii = 64;
4865 if (gflags & kfPgenGlobalAllNonref) {
4866 uii = 128;
4867 }
4868 putc_unlocked(uii, outfile);
4869 } else {
4870 putc_unlocked(192, outfile);
4871 fwrite_unlocked(pgfip->nonref_flags, DivUp(variant_ct, CHAR_BIT), 1, outfile);
4872 }
4873 if (unlikely(ferror_unlocked(outfile))) {
4874 goto MakeBedlikeMain_ret_WRITE_FAIL;
4875 }
4876 } else {
4877 if (unlikely(fwrite_checked("l\x1b\x01", 3, outfile))) {
4878 goto MakeBedlikeMain_ret_WRITE_FAIL;
4879 }
4880 }
4881 logprintfww5("Writing %s ... ", outname);
4882 fputs("0%", stdout);
4883 fflush(stdout);
4884 uint32_t pct = 0;
4885 const uint32_t raw_sample_ct = mcp->raw_sample_ct;
4886 const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
4887 const uintptr_t sample_ct4 = NypCtToByteCt(sample_ct);
4888 if (unlikely(bigstack_alloc_u32(raw_sample_ctl, &ctx.sample_include_cumulative_popcounts))) {
4889 goto MakeBedlikeMain_ret_NOMEM;
4890 }
4891 FillCumulativePopcounts(sample_include, raw_sample_ctl, ctx.sample_include_cumulative_popcounts);
4892 // tried more threads, pointless since this is too I/O-bound
4893 // (exception: reordering samples)
4894 uint32_t calc_thread_ct = (max_thread_ct > 2)? (max_thread_ct - 1) : max_thread_ct;
4895 ctx.collapsed_sort_map = new_sample_idx_to_old;
4896 if (!new_sample_idx_to_old) {
4897 // Without BMI2 instructions, subsetting is most expensive with
4898 // sample_ct near 2/3 of raw_sample_ct; up to ~7 compute threads are
4899 // useful in that case. (See CopyNyparrNonemptySubset().)
4900 // With them, 1-2 compute threads appear to suffice.
4901 #ifdef USE_AVX2
4902 const uint32_t calc_thread_max = 2;
4903 #else
4904 uint64_t numer;
4905 if (sample_ct * (3 * k1LU) <= raw_sample_ct * (2 * k1LU)) {
4906 numer = sample_ct * (9 * k1LU);
4907 } else {
4908 numer = (raw_sample_ct - sample_ct) * (18 * k1LU);
4909 }
4910 const uint32_t calc_thread_max = 1 + (numer / raw_sample_ct);
4911 #endif
4912 if (calc_thread_max < calc_thread_ct) {
4913 calc_thread_ct = calc_thread_max;
4914 }
4915 } else if (sample_ct < raw_sample_ct) {
4916 uint32_t* new_collapsed_sort_map;
4917 if (unlikely(bigstack_alloc_u32(sample_ct, &new_collapsed_sort_map))) {
4918 goto MakeBedlikeMain_ret_NOMEM;
4919 }
4920 // bugfix (26 Mar 2018): forgot to initialize this
4921 memcpy(new_collapsed_sort_map, new_sample_idx_to_old, sample_ct * sizeof(int32_t));
4922 UidxsToIdxs(sample_include, ctx.sample_include_cumulative_popcounts, sample_ct, new_collapsed_sort_map);
4923 ctx.collapsed_sort_map = new_collapsed_sort_map;
4924 }
4925
4926 if (make_plink2_flags & kfMakeBed) {
4927 mcp->plink2_write_flags |= kfPlink2WritePlink1;
4928 }
4929
4930 mcp->hard_call_halfdist = 0;
4931 if ((hard_call_thresh != UINT32_MAX) && (pgfip->gflags & (kfPgenGlobalDosagePresent | kfPgenGlobalDosagePhasePresent))) {
4932 mcp->hard_call_halfdist = kDosage4th - hard_call_thresh;
4933 }
4934 STD_ARRAY_DECL(unsigned char*, 2, main_loadbufs);
4935 ctx.dosage_presents = nullptr;
4936 ctx.dosage_mains = nullptr;
4937 uint32_t read_block_size;
4938 if (unlikely(PgenMtLoadInit(variant_include, sample_ct, variant_ct, bigstack_left(), pgr_alloc_cacheline_ct, 0, 2 * (sample_ct4 + 1), 0, pgfip, &calc_thread_ct, &ctx.genovecs, nullptr, nullptr, nullptr, mcp->hard_call_halfdist? (&ctx.dosage_presents) : nullptr, mcp->hard_call_halfdist? (&ctx.dosage_mains) : nullptr, nullptr, nullptr, &read_block_size, nullptr, main_loadbufs, &ctx.pgr_ptrs, &ctx.read_variant_uidx_starts))) {
4939 goto MakeBedlikeMain_ret_NOMEM;
4940 }
4941 if (unlikely(
4942 bigstack_alloc_uc(sample_ct4 * read_block_size, &(ctx.writebufs[0])) ||
4943 bigstack_alloc_uc(sample_ct4 * read_block_size, &(ctx.writebufs[1])))) {
4944 // shouldn't be possible for this to fail
4945 goto MakeBedlikeMain_ret_NOMEM;
4946 }
4947 if (unlikely(SetThreadCt(calc_thread_ct, &tg))) {
4948 goto MakeBedlikeMain_ret_NOMEM;
4949 }
4950
4951 ctx.variant_include = variant_include;
4952 mcp->refalt1_select = refalt1_select;
4953 mcp->sample_include = sample_include;
4954 mcp->sample_ct = sample_ct;
4955 ctx.mcp = mcp;
4956 ctx.reterr = kPglRetSuccess;
4957 SetThreadFuncAndData(MakeBedlikeThread, &ctx, &tg);
4958
4959 // Main workflow:
4960 // 1. Set n=0, load/skip block 0
4961 //
4962 // 2. Spawn threads processing block n
4963 // 3. If n>0, write results for block (n-1)
4964 // 4. Increment n by 1
4965 // 5. Load/skip block n unless eof
4966 // 6. Join threads
4967 // 7. Goto step 2 unless eof
4968 //
4969 // 8. Write results for last block
4970 uint32_t parity = 0;
4971 uint32_t read_block_idx = 0;
4972 uint32_t prev_variant_idx = 0;
4973 uint32_t next_print_variant_idx = variant_ct / 100;
4974 for (uint32_t variant_idx = 0; ; ) {
4975 const uint32_t cur_block_write_ct = MultireadNonempty(variant_include, &tg, raw_variant_ct, read_block_size, pgfip, &read_block_idx, &reterr);
4976 if (unlikely(reterr)) {
4977 goto MakeBedlikeMain_ret_PGR_FAIL;
4978 }
4979 if (variant_idx) {
4980 JoinThreads(&tg);
4981 reterr = ctx.reterr;
4982 if (unlikely(reterr)) {
4983 // this should only be possible in MakePgenRobust()
4984 assert(reterr != kPglRetWriteFail);
4985 goto MakeBedlikeMain_ret_PGR_FAIL;
4986 }
4987 }
4988 if (!IsLastBlock(&tg)) {
4989 ctx.cur_block_write_ct = cur_block_write_ct;
4990 ComputeUidxStartPartition(variant_include, cur_block_write_ct, calc_thread_ct, read_block_idx * read_block_size, ctx.read_variant_uidx_starts);
4991 PgrCopyBaseAndOffset(pgfip, calc_thread_ct, ctx.pgr_ptrs);
4992 if (variant_idx + cur_block_write_ct == variant_ct) {
4993 DeclareLastThreadBlock(&tg);
4994 }
4995 if (unlikely(SpawnThreads(&tg))) {
4996 goto MakeBedlikeMain_ret_THREAD_CREATE_FAIL;
4997 }
4998 }
4999 parity = 1 - parity;
5000 if (variant_idx) {
5001 // write *previous* block results
5002 if (unlikely(fwrite_checked(ctx.writebufs[parity], (variant_idx - prev_variant_idx) * sample_ct4, outfile))) {
5003 goto MakeBedlikeMain_ret_WRITE_FAIL;
5004 }
5005 if (variant_idx == variant_ct) {
5006 break;
5007 }
5008 if (variant_idx >= next_print_variant_idx) {
5009 if (pct > 10) {
5010 putc_unlocked('\b', stdout);
5011 }
5012 pct = (variant_idx * 100LLU) / variant_ct;
5013 printf("\b\b%u%%", pct++);
5014 fflush(stdout);
5015 next_print_variant_idx = (pct * S_CAST(uint64_t, variant_ct)) / 100;
5016 }
5017 prev_variant_idx = variant_idx;
5018 }
5019 ++read_block_idx;
5020 variant_idx += cur_block_write_ct;
5021 // crucially, this is independent of the PgenReader block_base pointers
5022 pgfip->block_base = main_loadbufs[parity];
5023 }
5024 if (unlikely(fclose_null(&outfile))) {
5025 goto MakeBedlikeMain_ret_WRITE_FAIL;
5026 }
5027 if (pct > 10) {
5028 putc_unlocked('\b', stdout);
5029 }
5030 fputs("\b\b", stdout);
5031 logputs("done.\n");
5032 // BigstackReset(bigstack_mark);
5033 }
5034 while (0) {
5035 MakeBedlikeMain_ret_NOMEM:
5036 reterr = kPglRetNomem;
5037 break;
5038 MakeBedlikeMain_ret_OPEN_FAIL:
5039 reterr = kPglRetOpenFail;
5040 break;
5041 MakeBedlikeMain_ret_PGR_FAIL:
5042 PgenErrPrintN(reterr);
5043 break;
5044 MakeBedlikeMain_ret_WRITE_FAIL:
5045 reterr = kPglRetWriteFail;
5046 break;
5047 MakeBedlikeMain_ret_THREAD_CREATE_FAIL:
5048 reterr = kPglRetThreadCreateFail;
5049 break;
5050 }
5051 MakeBedlikeMain_ret_1:
5052 CleanupThreads(&tg);
5053 fclose_cond(outfile);
5054 // parent will free memory
5055 return reterr;
5056 }
5057
5058 typedef struct MakePgenCtxStruct {
5059 MakeCommon* mcp;
5060
5061 const uint32_t* new_sample_idx_to_old;
5062 uint32_t* old_sample_idx_to_new;
5063 // combine existing chr_mask/xymt_codes/haploid_mask/chr_idx_to_foidx with
5064 // new collapsed chromosome boundary table
5065 uint32_t* write_chr_fo_vidx_start;
5066 const uintptr_t* write_allele_idx_offsets;
5067 const uintptr_t* sex_male_collapsed;
5068 uintptr_t* sex_female_collapsed;
5069 uint32_t dosage_erase_halfdist;
5070
5071 uintptr_t** loadbuf_thread_starts[2];
5072 // phase, dosage
5073 unsigned char* loaded_vrtypes[2];
5074
5075 uint32_t cur_block_write_ct;
5076
5077 STPgenWriter* spgwp;
5078 PgenWriterCommon** pwcs;
5079 uintptr_t** thread_write_genovecs;
5080 uintptr_t** thread_write_mhc;
5081 // AlleleCode** thread_ac_rotate;
5082 uintptr_t** thread_write_phasepresents;
5083 uintptr_t** thread_write_phaseinfos;
5084 uintptr_t** thread_all_hets;
5085 uintptr_t** thread_write_dosagepresents;
5086 Dosage** thread_write_dosagevals;
5087 uintptr_t** thread_write_dphasepresents;
5088 SDosage** thread_write_dphasedeltas;
5089 uint32_t** thread_cumulative_popcount_bufs;
5090 PglErr write_reterr;
5091 int32_t write_errno;
5092 } MakePgenCtx;
5093
5094 // One-thread-per-vblock is sensible for possibly-phased biallelic data, where
5095 // subsetting and LD-compression are a substantial fraction of processing time,
5096 // and memory requirements tend to be low enough that it's actually reasonable
5097 // for each thread job to comprise 64k variants.
5098 // Beyond that... the VCF/.pgen division of labor looks nice, but far too much
5099 // of the work is usually being done in the initial PgrGetRaw() call, so just
5100 // fall back on single-threaded invocation of the same function; only
5101 // difference is that the thread owns the writer object.
MakePgenThread(void * raw_arg)5102 THREAD_FUNC_DECL MakePgenThread(void* raw_arg) {
5103 ThreadGroupFuncArg* arg = S_CAST(ThreadGroupFuncArg*, raw_arg);
5104 const uintptr_t tidx = arg->tidx;
5105 MakePgenCtx* ctx = S_CAST(MakePgenCtx*, arg->sharedp->context);
5106
5107 const uint32_t* new_sample_idx_to_old = ctx->new_sample_idx_to_old;
5108 const uint32_t* old_sample_idx_to_new = ctx->old_sample_idx_to_new;
5109 const MakeCommon* mcp = ctx->mcp;
5110 const ChrInfo* cip = mcp->cip;
5111 const uint32_t* write_chr_fo_vidx_start = ctx->write_chr_fo_vidx_start;
5112 const uintptr_t* write_allele_idx_offsets = ctx->write_allele_idx_offsets;
5113 const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select_iter) = mcp->refalt1_select;
5114 const uintptr_t* sample_include = mcp->sample_include;
5115
5116 const uintptr_t* sex_male_collapsed = ctx->sex_male_collapsed;
5117
5118 const uintptr_t* sex_male_collapsed_interleaved = mcp->sex_male_collapsed_interleaved;
5119 const uintptr_t* sex_female_collapsed = ctx->sex_female_collapsed;
5120 const uintptr_t* sex_female_collapsed_interleaved = mcp->sex_female_collapsed_interleaved;
5121 const uint32_t raw_sample_ct = mcp->raw_sample_ct;
5122 const uint32_t sample_ct = mcp->sample_ct;
5123 const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);
5124 const uint32_t sample_ctv2 = NypCtToVecCt(sample_ct);
5125 const uint32_t raw_sample_ctaw2 = NypCtToAlignedWordCt(raw_sample_ct);
5126 const uint32_t raw_sample_ctaw = BitCtToAlignedWordCt(raw_sample_ct);
5127 const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
5128 const uint32_t sample_ctl = BitCtToWordCt(sample_ct);
5129 const uint32_t x_code = cip->xymt_codes[kChrOffsetX];
5130 const uint32_t y_code = cip->xymt_codes[kChrOffsetY];
5131 const uint32_t mt_code = cip->xymt_codes[kChrOffsetMT];
5132
5133 const Plink2WriteFlags plink2_write_flags = mcp->plink2_write_flags;
5134 const uint32_t set_hh_missing = plink2_write_flags & kfPlink2WriteSetHhMissing;
5135 const uint32_t set_hh_missing_keep_dosage = plink2_write_flags & kfPlink2WriteSetHhMissingKeepDosage;
5136 const uint32_t set_mixed_mt_missing = plink2_write_flags & kfPlink2WriteSetMixedMtMissing;
5137 const uint32_t set_mixed_mt_missing_keep_dosage = plink2_write_flags & kfPlink2WriteSetMixedMtMissingKeepDosage;
5138 const uint32_t late_dosage_erase = plink2_write_flags & kfPlink2WriteLateDosageErase;
5139
5140 const uint32_t hard_call_halfdist = mcp->hard_call_halfdist;
5141 const uint32_t dosage_erase_halfdist = ctx->dosage_erase_halfdist;
5142 const uintptr_t dosageraw_word_ct = kWordsPerVec * (BitCtToVecCt(raw_sample_ct) + DivUp(raw_sample_ct, (kBytesPerVec / sizeof(Dosage))));
5143
5144 STPgenWriter* spgwp = ctx->spgwp;
5145 PgenWriterCommon* pwcp;
5146 if (spgwp) {
5147 // make this function stand out as an intrusive one
5148 pwcp = &GET_PRIVATE(*spgwp, pwc);
5149 } else {
5150 pwcp = ctx->pwcs[tidx];
5151 }
5152 uintptr_t* write_genovec = nullptr;
5153 // assumes sample_include == nullptr if sample_ct == raw_sample_ct
5154 if (new_sample_idx_to_old || sample_include) {
5155 write_genovec = ctx->thread_write_genovecs[tidx];
5156 write_genovec[sample_ctl2 - 1] = 0;
5157 }
5158 uintptr_t* write_patch_01_set = nullptr;
5159 AlleleCode* write_patch_01_vals = nullptr;
5160 uintptr_t* write_patch_10_set = nullptr;
5161 AlleleCode* write_patch_10_vals = nullptr;
5162 if (ctx->thread_write_mhc) {
5163 ExpandMhc(sample_ct, ctx->thread_write_mhc[tidx], &write_patch_01_set, &write_patch_01_vals, &write_patch_10_set, &write_patch_10_vals);
5164 }
5165 uintptr_t* write_phasepresent = nullptr;
5166 uintptr_t* write_phaseinfo = nullptr;
5167 uintptr_t* all_hets = nullptr;
5168 if (ctx->thread_write_phasepresents) {
5169 write_phasepresent = ctx->thread_write_phasepresents[tidx];
5170 write_phaseinfo = ctx->thread_write_phaseinfos[tidx];
5171 if (ctx->thread_all_hets) {
5172 all_hets = ctx->thread_all_hets[tidx];
5173 }
5174 }
5175 uintptr_t* write_dosagepresent = nullptr;
5176 Dosage* write_dosagevals = nullptr;
5177 uintptr_t* write_dphasepresent = nullptr;
5178 SDosage* write_dphasedeltas = nullptr;
5179 SDosage* tmp_dphasedeltas = nullptr;
5180 uint32_t* cumulative_popcount_buf = nullptr;
5181 if (ctx->thread_write_dosagepresents) {
5182 write_dosagepresent = ctx->thread_write_dosagepresents[tidx];
5183 write_dosagevals = ctx->thread_write_dosagevals[tidx];
5184 if (ctx->thread_write_dphasepresents) {
5185 write_dphasepresent = ctx->thread_write_dphasepresents[tidx];
5186 write_dphasedeltas = ctx->thread_write_dphasedeltas[tidx];
5187 tmp_dphasedeltas = &(write_dphasedeltas[RoundUpPow2(sample_ct, kCacheline / 2)]);
5188 }
5189 }
5190 if ((ctx->thread_write_mhc || ctx->thread_write_dosagepresents) && new_sample_idx_to_old) {
5191 cumulative_popcount_buf = ctx->thread_cumulative_popcount_bufs[tidx];
5192 }
5193 uint32_t variant_idx_offset = 0;
5194 uint32_t allele_ct = 2;
5195 uint32_t parity = 0;
5196 do {
5197 const uintptr_t cur_block_write_ct = ctx->cur_block_write_ct;
5198 uint32_t write_idx = tidx * kPglVblockSize;
5199 const uint32_t write_idx_end = MINV(write_idx + kPglVblockSize, cur_block_write_ct);
5200 uintptr_t* loadbuf_iter = ctx->loadbuf_thread_starts[parity][tidx];
5201 unsigned char* loaded_vrtypes = ctx->loaded_vrtypes[parity];
5202 uint32_t loaded_vrtype = 0;
5203 uint32_t chr_end_bidx = 0;
5204 uint32_t is_x = 0;
5205 uint32_t is_y = 0;
5206 uint32_t is_haploid_nonmt = 0;
5207 uint32_t is_mt = 0;
5208 // write_idx may start larger than write_idx_end
5209 for (; write_idx < write_idx_end; ++write_idx) {
5210 if (loaded_vrtypes) {
5211 loaded_vrtype = loaded_vrtypes[write_idx];
5212 }
5213 if (write_idx >= chr_end_bidx) {
5214 const uint32_t chr_fo_idx = CountSortedSmallerU32(&(write_chr_fo_vidx_start[1]), cip->chr_ct, write_idx + variant_idx_offset + 1);
5215 const uint32_t chr_idx = cip->chr_file_order[chr_fo_idx];
5216 chr_end_bidx = write_chr_fo_vidx_start[chr_fo_idx + 1] - variant_idx_offset;
5217 is_x = (chr_idx == x_code);
5218 is_y = (chr_idx == y_code);
5219 is_mt = (chr_idx == mt_code);
5220 is_haploid_nonmt = IsSet(cip->haploid_mask, chr_idx) && (!is_mt);
5221 }
5222 uintptr_t* cur_genovec_end = &(loadbuf_iter[raw_sample_ctaw2]);
5223 if (write_allele_idx_offsets) {
5224 allele_ct = write_allele_idx_offsets[write_idx + variant_idx_offset + 1] - write_allele_idx_offsets[write_idx + variant_idx_offset];
5225 }
5226 const uint32_t is_mhc = loaded_vrtype & 8;
5227 uint32_t read_rare01_ct = 0;
5228 uint32_t read_rare10_ct = 0;
5229 uintptr_t* read_patch_01_set = nullptr;
5230 AlleleCode* read_patch_01_vals = nullptr;
5231 uintptr_t* read_patch_10_set = nullptr;
5232 AlleleCode* read_patch_10_vals = nullptr;
5233 if (is_mhc) {
5234 assert(allele_ct > 2);
5235 read_rare01_ct = cur_genovec_end[0];
5236 read_rare10_ct = cur_genovec_end[1];
5237 cur_genovec_end = &(cur_genovec_end[RoundUpPow2(2, kWordsPerVec)]);
5238 if (read_rare01_ct) {
5239 read_patch_01_set = cur_genovec_end;
5240 cur_genovec_end = &(cur_genovec_end[raw_sample_ctl]);
5241 read_patch_01_vals = R_CAST(AlleleCode*, cur_genovec_end);
5242 cur_genovec_end = &(cur_genovec_end[DivUp(read_rare01_ct, kBytesPerWord / sizeof(AlleleCode))]);
5243 VecAlignUp64(&cur_genovec_end);
5244 }
5245 if (read_rare10_ct) {
5246 read_patch_10_set = cur_genovec_end;
5247 cur_genovec_end = &(cur_genovec_end[raw_sample_ctl]);
5248 read_patch_10_vals = R_CAST(AlleleCode*, cur_genovec_end);
5249 cur_genovec_end = &(cur_genovec_end[DivUp(read_rare10_ct, kBytesPerWord / (2 * sizeof(AlleleCode)))]);
5250 VecAlignUp64(&cur_genovec_end);
5251 }
5252 }
5253 uint32_t is_hphase = loaded_vrtype & 0x10;
5254 uintptr_t* cur_phaseraw = nullptr;
5255 if (is_hphase) {
5256 // tried skipping this and using ExpandThenSubsetBytearr in simplest
5257 // case, not worthwhile
5258 if (!read_rare10_ct) {
5259 PgrDetectGenoarrHets(loadbuf_iter, raw_sample_ct, all_hets);
5260 } else {
5261 PgrDetectGenoarrHetsMultiallelic(loadbuf_iter, read_patch_10_set, read_patch_10_vals, raw_sample_ct, all_hets);
5262 }
5263 cur_phaseraw = cur_genovec_end;
5264 const uint32_t het_ct = S_CAST(uint32_t, cur_phaseraw[0]);
5265 #ifdef __LP64__
5266 const uint32_t explicit_phasepresent_ct = cur_phaseraw[0] >> 32;
5267 #else
5268 const uint32_t explicit_phasepresent_ct = cur_phaseraw[1];
5269 #endif
5270 const uint32_t phaseraw_word_ct = (8 / kBytesPerWord) + 1 + (het_ct / kBitsPerWord) + DivUp(explicit_phasepresent_ct, kBitsPerWord);
5271 cur_genovec_end = &(cur_genovec_end[RoundUpPow2(phaseraw_word_ct, kWordsPerVec)]);
5272 }
5273 const uint32_t is_dosage = loaded_vrtype & 0x60;
5274 const uint32_t is_dphase = loaded_vrtype & 0x80;
5275 uintptr_t* cur_write_phasepresent = write_phasepresent;
5276 uintptr_t* cur_dosagepresent = nullptr;
5277 Dosage* cur_dosagevals = nullptr;
5278 uintptr_t* cur_dphasepresent = nullptr;
5279 SDosage* cur_dphasedelta = nullptr;
5280 uint32_t read_dosage_ct = 0;
5281 uint32_t read_dphase_ct = 0;
5282 if (is_dosage) {
5283 // multiallelic dosage not implemented yet
5284 assert(allele_ct == 2);
5285
5286 // this should have length dependent on dosage_ct
5287 cur_dosagepresent = cur_genovec_end;
5288 cur_dosagevals = R_CAST(Dosage*, &(cur_dosagepresent[raw_sample_ctaw]));
5289 read_dosage_ct = PopcountWords(cur_dosagepresent, raw_sample_ctl);
5290
5291 // temporary
5292 cur_genovec_end = &(cur_genovec_end[dosageraw_word_ct]);
5293
5294 if (is_dphase) {
5295 cur_dphasepresent = cur_genovec_end;
5296 cur_dphasedelta = R_CAST(SDosage*, &(cur_dphasepresent[raw_sample_ctaw]));
5297 read_dphase_ct = PopcountWords(cur_dphasepresent, raw_sample_ctl);
5298
5299 // temporary
5300 cur_genovec_end = &(cur_genovec_end[dosageraw_word_ct]);
5301 }
5302 }
5303 uint32_t write_rare01_ct = 0;
5304 uint32_t write_rare10_ct = 0;
5305 uint32_t write_dosage_ct = 0;
5306 uint32_t write_dphase_ct = 0;
5307 if (new_sample_idx_to_old) {
5308 GenovecResort(loadbuf_iter, new_sample_idx_to_old, sample_ct, write_genovec);
5309 if (read_rare01_ct) {
5310 write_rare01_ct = CopyAndResort8bit(read_patch_01_set, read_patch_01_vals, new_sample_idx_to_old, raw_sample_ct, sample_ct, write_patch_01_set, write_patch_01_vals, cumulative_popcount_buf);
5311 }
5312 if (read_rare10_ct) {
5313 write_rare10_ct = CopyAndResort16bit(read_patch_10_set, read_patch_10_vals, new_sample_idx_to_old, raw_sample_ct, sample_ct, write_patch_10_set, write_patch_10_vals, cumulative_popcount_buf);
5314 }
5315 if (is_hphase) {
5316 UnpackAndResortHphase(all_hets, cur_phaseraw, sample_include, old_sample_idx_to_new, raw_sample_ct, sample_ct, &cur_write_phasepresent, write_phaseinfo);
5317 }
5318 if (is_dosage) {
5319 write_dosage_ct = CopyAndResort16bit(cur_dosagepresent, cur_dosagevals, new_sample_idx_to_old, raw_sample_ct, sample_ct, write_dosagepresent, write_dosagevals, cumulative_popcount_buf);
5320 if (is_dphase) {
5321 write_dphase_ct = CopyAndResort16bit(cur_dphasepresent, cur_dphasedelta, new_sample_idx_to_old, raw_sample_ct, sample_ct, write_dphasepresent, write_dphasedeltas, cumulative_popcount_buf);
5322 }
5323 }
5324 } else if (sample_include) {
5325 CopyNyparrNonemptySubset(loadbuf_iter, sample_include, raw_sample_ct, sample_ct, write_genovec);
5326 if (is_mhc) {
5327 write_rare01_ct = Copy1bit8Subset(read_patch_01_set, read_patch_01_vals, sample_include, read_rare01_ct, sample_ct, write_patch_01_set, write_patch_01_vals);
5328 write_rare10_ct = Copy1bit16Subset(read_patch_10_set, read_patch_10_vals, sample_include, read_rare10_ct, sample_ct, write_patch_10_set, write_patch_10_vals);
5329 }
5330 if (is_hphase) {
5331 UnpackHphaseSubset(all_hets, cur_phaseraw, sample_include, sample_ct, &cur_write_phasepresent, write_phaseinfo);
5332 }
5333 if (is_dosage) {
5334 write_dosage_ct = Copy1bit16Subset(cur_dosagepresent, cur_dosagevals, sample_include, read_dosage_ct, sample_ct, write_dosagepresent, write_dosagevals);
5335 if (is_dphase) {
5336 write_dphase_ct = Copy1bit16Subset(cur_dphasepresent, cur_dphasedelta, sample_include, read_dphase_ct, sample_ct, write_dphasepresent, write_dphasedeltas);
5337 }
5338 }
5339 } else {
5340 write_genovec = loadbuf_iter;
5341 if (is_mhc) {
5342 // this doesn't work in refalt1_select case
5343 write_patch_01_set = read_patch_01_set;
5344 write_patch_01_vals = read_patch_01_vals;
5345 write_patch_10_set = read_patch_10_set;
5346 write_patch_10_vals = read_patch_10_vals;
5347 write_rare01_ct = read_rare01_ct;
5348 write_rare10_ct = read_rare10_ct;
5349 }
5350 if (is_hphase) {
5351 UnpackHphase(all_hets, cur_phaseraw, sample_ct, &cur_write_phasepresent, write_phaseinfo);
5352 }
5353 if (is_dosage) {
5354 CopyDosage(cur_dosagepresent, cur_dosagevals, sample_ct, read_dosage_ct, write_dosagepresent, write_dosagevals, &write_dosage_ct);
5355 if (is_dphase) {
5356 CopyDosage(cur_dphasepresent, R_CAST(Dosage*, cur_dphasedelta), sample_ct, read_dphase_ct, write_dphasepresent, R_CAST(Dosage*, write_dphasedeltas), &write_dphase_ct);
5357 }
5358 }
5359 }
5360 // multiallelic -> biallelic split:
5361 // main thread will probably compute split mapping in advance (bitarray
5362 // with filtered-and-split variant indices, set bit = unsplit variant
5363 // or last variant in a split group)? same pre-split variant can be
5364 // loaded multiple times.
5365 // biallelic -> multiallelic merge:
5366 // could require no multiallelic variants in remainder of dataset?
5367 // if handled with pgenlib, PgfiMultiread,
5368 // PgfiMultireadGetCachelineReq, and PgrGetRaw would need to be
5369 // extended to take a merge-info parameter. +both will be tricky...
5370 // probably not worth it.
5371 // compute merge pattern in MakePgenRobust() before main loop instead.
5372 // main thread also performs the actual merge.
5373 // both should require sorted .pvar.
5374 // neither should require any handling in this function.
5375 if (refalt1_select_iter && (refalt1_select_iter[write_idx][0] || (refalt1_select_iter[write_idx][1] != 1))) {
5376 if (allele_ct == 2) {
5377 GenovecInvertUnsafe(sample_ct, write_genovec);
5378 if (is_hphase) {
5379 // trailing bits don't matter
5380 BitvecInvert(sample_ctl, write_phaseinfo);
5381 }
5382 if (write_dosage_ct) {
5383 BiallelicDosage16Invert(write_dosage_ct, write_dosagevals);
5384 if (write_dphase_ct) {
5385 BiallelicDphase16Invert(write_dphase_ct, write_dphasedeltas);
5386 }
5387 }
5388 } else {
5389 exit(S_CAST(int32_t, kPglRetNotYetSupported));
5390 // this is the fun case
5391 // 1. fill length-(2 * sample_ct) AlleleCode[] buffer with codes
5392 // 2. fill lookup table describing remapping
5393 // 3. replace elements of table
5394 // 4. normalize order of each code pair, inverting a phaseinfo bit on
5395 // each swap
5396 // 5. call PglMultiallelicDenseToSparse to write back
5397 }
5398 }
5399 if (write_dosage_ct) {
5400 assert((!write_rare01_ct) && (!write_rare10_ct));
5401 if (hard_call_halfdist || (dosage_erase_halfdist < kDosage4th)) {
5402 if (is_hphase && (!cur_write_phasepresent)) {
5403 // explicit phasepresent required for these
5404 cur_write_phasepresent = write_phasepresent;
5405 // unsafe to just copy all_hets, because we may have resorted
5406 // todo: multiallelic dosage
5407 PgrDetectGenoarrHets(write_genovec, sample_ct, write_phasepresent);
5408 }
5409 if (write_dphasepresent && is_hphase && (!write_dphase_ct)) {
5410 // bugfix (29 Apr 2019): write_dphasepresent not guaranteed to be
5411 // non-null.
5412 ZeroWArr(sample_ctl, write_dphasepresent);
5413 }
5414 if (hard_call_halfdist) {
5415 if ((!is_hphase) && (!write_dphase_ct)) {
5416 ApplyHardCallThresh(write_dosagepresent, write_dosagevals, write_dosage_ct, hard_call_halfdist, write_genovec);
5417 } else {
5418 if (!is_hphase) {
5419 ZeroWArr(sample_ctl, write_phasepresent);
5420 }
5421 write_dphase_ct = ApplyHardCallThreshPhased(write_dosagepresent, write_dosagevals, write_dosage_ct, hard_call_halfdist, write_genovec, write_phasepresent, write_phaseinfo, write_dphasepresent, write_dphasedeltas, tmp_dphasedeltas);
5422 is_hphase = !AllWordsAreZero(write_phasepresent, sample_ctl);
5423 }
5424 }
5425 if (dosage_erase_halfdist < kDosage4th) {
5426 if (!is_hphase) {
5427 ZeroWArr(sample_ctl, write_phasepresent);
5428 }
5429 uint32_t dosage_read_idx = 0;
5430 uintptr_t sample_widx = 0;
5431 uintptr_t cur_bits = write_dosagepresent[0];
5432 uint32_t dosage_write_idx;
5433 if (!write_dphase_ct) {
5434 // If hardcall-phase and dosage present, threshold/2 applies
5435 // thanks to implicit dosage-phase value
5436 // const uint32_t dosage_erase_halfdist2 = (dosage_erase_halfdist + kDosage4th + 1) / 2;
5437 const uint32_t halfdist_extra = (kDosage4th + 1 - dosage_erase_halfdist) / 2;
5438 for (; dosage_read_idx != write_dosage_ct; ++dosage_read_idx) {
5439 const uint32_t sample_uidx_lowbits = BitIter1x(write_dosagepresent, &sample_widx, &cur_bits);
5440 const uint32_t dosage_int = write_dosagevals[dosage_read_idx];
5441 const uint32_t halfdist = BiallelicDosageHalfdist(dosage_int);
5442 if (halfdist >= dosage_erase_halfdist + ((write_phasepresent[sample_widx] >> sample_uidx_lowbits) & 1) * halfdist_extra) {
5443 write_dosagepresent[sample_widx] ^= k1LU << sample_uidx_lowbits;
5444 break;
5445 }
5446 }
5447 dosage_write_idx = dosage_read_idx;
5448 while (++dosage_read_idx < write_dosage_ct) {
5449 const uint32_t sample_uidx_lowbits = BitIter1x(write_dosagepresent, &sample_widx, &cur_bits);
5450 const uint32_t dosage_int = write_dosagevals[dosage_read_idx];
5451 const uint32_t halfdist = BiallelicDosageHalfdist(dosage_int);
5452 if (halfdist < dosage_erase_halfdist + ((write_phasepresent[sample_widx] >> sample_uidx_lowbits) & 1) * halfdist_extra) {
5453 write_dosagevals[dosage_write_idx++] = dosage_int;
5454 } else {
5455 write_dosagepresent[sample_widx] ^= k1LU << sample_uidx_lowbits;
5456 }
5457 }
5458 } else {
5459 // Only erase dosage if both sides are less than threshold/2
5460 // away from an integer.
5461 const uint32_t halfdist_extra = (kDosage4th + 1 - dosage_erase_halfdist) / 2;
5462 const uint32_t dosage_erase_halfdist2 = dosage_erase_halfdist + halfdist_extra;
5463 uint32_t dphase_read_idx = 0;
5464 uintptr_t lowbit = 0;
5465 for (; dosage_read_idx != write_dosage_ct; ++dosage_read_idx) {
5466 lowbit = BitIter1y(write_dosagepresent, &sample_widx, &cur_bits);
5467 const uint32_t dosage_int = write_dosagevals[dosage_read_idx];
5468 if (!(write_dphasepresent[sample_widx] & lowbit)) {
5469 // necessary for this to be separate to handle odd
5470 // dosage_int, missing phase case correctly
5471 const uint32_t halfdist = BiallelicDosageHalfdist(dosage_int);
5472 if (halfdist >= dosage_erase_halfdist + ((write_phasepresent[sample_widx] & lowbit) != 0) * halfdist_extra) {
5473 break;
5474 }
5475 } else {
5476 const int32_t dphase_delta = write_dphasedeltas[dphase_read_idx++];
5477 const uint32_t halfdist1 = HaploidDosageHalfdist((dosage_int + dphase_delta) >> 1);
5478 const uint32_t halfdist2 = HaploidDosageHalfdist((dosage_int - dphase_delta) >> 1);
5479 if ((halfdist1 >= dosage_erase_halfdist2) && (halfdist2 >= dosage_erase_halfdist2)) {
5480 break;
5481 }
5482 }
5483 }
5484 dosage_write_idx = dosage_read_idx;
5485 if (dosage_read_idx < write_dosage_ct) {
5486 uint32_t dphase_write_idx = dphase_read_idx;
5487 if (write_dphasepresent[sample_widx] & lowbit) {
5488 --dphase_write_idx;
5489 write_dphasepresent[sample_widx] ^= lowbit;
5490 }
5491 write_dosagepresent[sample_widx] ^= lowbit;
5492 while (++dosage_read_idx < write_dosage_ct) {
5493 lowbit = BitIter1y(write_dosagepresent, &sample_widx, &cur_bits);
5494 const uint32_t dosage_int = write_dosagevals[dosage_read_idx];
5495 if (!(write_dphasepresent[sample_widx] & lowbit)) {
5496 const uint32_t halfdist = BiallelicDosageHalfdist(dosage_int);
5497 if (halfdist < dosage_erase_halfdist + ((write_phasepresent[sample_widx] & lowbit) != 0) * halfdist_extra) {
5498 write_dosagevals[dosage_write_idx++] = dosage_int;
5499 } else {
5500 write_dosagepresent[sample_widx] ^= lowbit;
5501 }
5502 } else {
5503 const int32_t dphase_delta = write_dphasedeltas[dphase_read_idx++];
5504 const uint32_t halfdist1 = HaploidDosageHalfdist((dosage_int + dphase_delta) >> 1);
5505 const uint32_t halfdist2 = HaploidDosageHalfdist((dosage_int - dphase_delta) >> 1);
5506 if ((halfdist1 < dosage_erase_halfdist2) || (halfdist2 < dosage_erase_halfdist2)) {
5507 write_dosagevals[dosage_write_idx++] = dosage_int;
5508 write_dphasedeltas[dphase_write_idx++] = dphase_delta;
5509 } else {
5510 write_dosagepresent[sample_widx] ^= lowbit;
5511 write_dphasepresent[sample_widx] ^= lowbit;
5512 }
5513 }
5514 }
5515 write_dphase_ct = dphase_write_idx;
5516 }
5517 }
5518 write_dosage_ct = dosage_write_idx;
5519 }
5520 }
5521 if (late_dosage_erase) {
5522 write_dosage_ct = 0;
5523 write_dphase_ct = 0;
5524 }
5525 }
5526 // moved after --hard-call-threshold, since it makes sense to
5527 // immediately erase fresh het haploid calls
5528 if (set_hh_missing && is_haploid_nonmt) {
5529 if (is_x) {
5530 EraseMaleDphases(sex_male_collapsed, &write_dphase_ct, write_dphasepresent, write_dphasedeltas);
5531 if (!set_hh_missing_keep_dosage) {
5532 // need to erase dosages associated with the hardcalls we're
5533 // about to clear
5534
5535 // male 0/x hets to missing
5536 SetMaleHetMissingCleardosage(sex_male_collapsed, sex_male_collapsed_interleaved, sample_ctv2, write_genovec, &write_dosage_ct, write_dosagepresent, write_dosagevals);
5537 // male x/y hets to missing
5538 if (write_rare10_ct) {
5539 uintptr_t sample_widx = 0;
5540 uintptr_t patch_10_bits = write_patch_10_set[0];
5541 uint32_t read_patch_10_idx = 0;
5542 for (; read_patch_10_idx != write_rare10_ct; ++read_patch_10_idx) {
5543 uintptr_t lowbit = BitIter1y(write_patch_10_set, &sample_widx, &patch_10_bits);
5544 AlleleCode lo_code = write_patch_10_vals[read_patch_10_idx * 2];
5545 AlleleCode hi_code = write_patch_10_vals[read_patch_10_idx * 2 + 1];
5546 if ((sex_male_collapsed[sample_widx] & lowbit) && (lo_code != hi_code)) {
5547 write_patch_10_set[sample_widx] ^= lowbit;
5548 uint32_t write_patch_10_idx = read_patch_10_idx;
5549 ++read_patch_10_idx;
5550 for (; read_patch_10_idx != write_rare10_ct; ++read_patch_10_idx) {
5551 lowbit = BitIter1y(write_patch_10_set, &sample_widx, &patch_10_bits);
5552 lo_code = write_patch_10_vals[read_patch_10_idx * 2];
5553 hi_code = write_patch_10_vals[read_patch_10_idx * 2 + 1];
5554 if ((sex_male_collapsed[sample_widx] & lowbit) && (lo_code != hi_code)) {
5555 write_patch_10_set[sample_widx] ^= lowbit;
5556 } else {
5557 write_patch_10_vals[write_patch_10_idx * 2] = lo_code;
5558 write_patch_10_vals[write_patch_10_idx * 2 + 1] = hi_code;
5559 ++write_patch_10_idx;
5560 }
5561 }
5562 write_rare10_ct = write_patch_10_idx;
5563 break;
5564 }
5565 }
5566 }
5567 } else {
5568 assert(!write_rare01_ct);
5569 assert(!write_rare10_ct);
5570 // need to generate a new unphased dosage for each cleared
5571 // hardcall lacking a dosage entry
5572 SetMaleHetMissingKeepdosage(sex_male_collapsed, sex_male_collapsed_interleaved, sample_ctl2, write_genovec, &write_dosage_ct, write_dosagepresent, write_dosagevals);
5573 }
5574 if (is_hphase && cur_write_phasepresent) {
5575 // bugfix (28 Jul 2018): I was on crack when I moved this code
5576 // before SetMaleHetMissing{Clear,Keep}dosage() on 31 Mar
5577 if (!write_rare10_ct) {
5578 MaskGenoarrHetsUnsafe(write_genovec, sample_ctl2, cur_write_phasepresent);
5579 } else {
5580 MaskGenoarrHetsMultiallelicUnsafe(write_genovec, write_patch_10_set, write_patch_10_vals, sample_ctl2, cur_write_phasepresent);
5581 }
5582 is_hphase = !AllWordsAreZero(write_phasepresent, sample_ctl);
5583 }
5584 if (write_rare01_ct) {
5585 ClearGenoarrMissing1bit8Unsafe(write_genovec, &write_rare01_ct, write_patch_01_set, write_patch_01_vals);
5586 }
5587 if (write_rare10_ct) {
5588 ClearGenoarrMissing1bit16Unsafe(write_genovec, &write_rare10_ct, write_patch_10_set, write_patch_10_vals);
5589 }
5590 } else {
5591 // all hets to missing
5592 // may want to move is_hphase zeroing in front
5593 if (!set_hh_missing_keep_dosage) {
5594 SetHetMissingCleardosage(sample_ctl2, write_genovec, &write_dosage_ct, write_dosagepresent, write_dosagevals);
5595 } else {
5596 SetHetMissingKeepdosage(sample_ctl2, write_genovec, &write_dosage_ct, write_dosagepresent, write_dosagevals);
5597 }
5598 if (is_y) {
5599 InterleavedSetMissingCleardosage(sex_female_collapsed, sex_female_collapsed_interleaved, sample_ctv2, write_genovec, &write_dosage_ct, write_dosagepresent, write_dosagevals);
5600 }
5601 is_hphase = 0;
5602 write_rare01_ct = 0;
5603 if (write_rare10_ct) {
5604 ClearGenoarrMissing1bit16Unsafe(write_genovec, &write_rare10_ct, write_patch_10_set, write_patch_10_vals);
5605 }
5606 write_dphase_ct = 0;
5607 }
5608 } else if (set_mixed_mt_missing && is_mt) {
5609 if (!set_mixed_mt_missing_keep_dosage) {
5610 // all hets to missing
5611 SetHetMissingCleardosage(sample_ctl2, write_genovec, &write_dosage_ct, write_dosagepresent, write_dosagevals);
5612 } else {
5613 SetHetMissingKeepdosage(sample_ctl2, write_genovec, &write_dosage_ct, write_dosagepresent, write_dosagevals);
5614 }
5615 is_hphase = 0;
5616 write_rare01_ct = 0;
5617 if (write_rare10_ct) {
5618 ClearGenoarrMissing1bit16Unsafe(write_genovec, &write_rare10_ct, write_patch_10_set, write_patch_10_vals);
5619 }
5620 write_dphase_ct = 0;
5621 }
5622 ZeroTrailingNyps(sample_ct, write_genovec);
5623 // todo: --set-me-missing, --zero-cluster, --fill-missing-with-ref
5624 if (spgwp) {
5625 if (pwcp->fwrite_bufp >= &(pwcp->fwrite_buf[kPglFwriteBlockSize])) {
5626 const uintptr_t cur_byte_ct = pwcp->fwrite_bufp - pwcp->fwrite_buf;
5627 if (unlikely(fwrite_checked(pwcp->fwrite_buf, cur_byte_ct, GET_PRIVATE(*spgwp, pgen_outfile)))) {
5628 ctx->write_reterr = kPglRetWriteFail;
5629 ctx->write_errno = errno;
5630 break;
5631 }
5632 // printf("vblock_fpos_offset: %llu\n", pwcp->vblock_fpos_offset);
5633 pwcp->vblock_fpos_offset += cur_byte_ct;
5634 // printf("%u %llu\n", write_idx + variant_idx_offset, pwcp->vblock_fpos_offset);
5635 pwcp->fwrite_bufp = pwcp->fwrite_buf;
5636 }
5637 }
5638 if ((!write_rare01_ct) && (!write_rare10_ct)) {
5639 if ((!is_hphase) && (!write_dphase_ct)) {
5640 if (unlikely(PwcAppendBiallelicGenovecDosage16(write_genovec, write_dosagepresent, write_dosagevals, write_dosage_ct, pwcp))) {
5641 ctx->write_reterr = kPglRetVarRecordTooLarge;
5642 break;
5643 }
5644 } else {
5645 if (!is_hphase) {
5646 ZeroWArr(sample_ctl, write_phasepresent);
5647 }
5648 // extraneous phaseinfo bits may be set
5649 if (unlikely(PwcAppendBiallelicGenovecDphase16(write_genovec, cur_write_phasepresent, write_phaseinfo, write_dosagepresent, write_dphasepresent, write_dosagevals, write_dphasedeltas, write_dosage_ct, write_dphase_ct, pwcp))) {
5650 ctx->write_reterr = kPglRetVarRecordTooLarge;
5651 break;
5652 }
5653 }
5654 } else {
5655 // multiallelic dosage not supported
5656 if (!is_hphase) {
5657 if (unlikely(PwcAppendMultiallelicSparse(write_genovec, write_patch_01_set, write_patch_01_vals, write_patch_10_set, write_patch_10_vals, write_rare01_ct, write_rare10_ct, pwcp))) {
5658 ctx->write_reterr = kPglRetVarRecordTooLarge;
5659 break;
5660 }
5661 } else {
5662 if (unlikely(PwcAppendMultiallelicGenovecHphase(write_genovec, write_patch_01_set, write_patch_01_vals, write_patch_10_set, write_patch_10_vals, cur_write_phasepresent, write_phaseinfo, write_rare01_ct, write_rare10_ct, pwcp))) {
5663 ctx->write_reterr = kPglRetVarRecordTooLarge;
5664 break;
5665 }
5666 }
5667 }
5668 loadbuf_iter = cur_genovec_end;
5669 }
5670 parity = 1 - parity;
5671 variant_idx_offset += cur_block_write_ct;
5672 if (refalt1_select_iter) {
5673 refalt1_select_iter = &(refalt1_select_iter[cur_block_write_ct]);
5674 }
5675 } while (!THREAD_BLOCK_FINISH(arg));
5676 THREAD_RETURN;
5677 }
5678
GflagsVfilter(const uintptr_t * variant_include,const unsigned char * vrtypes,uint32_t raw_variant_ct,PgenGlobalFlags input_gflags)5679 PgenGlobalFlags GflagsVfilter(const uintptr_t* variant_include, const unsigned char* vrtypes, uint32_t raw_variant_ct, PgenGlobalFlags input_gflags) {
5680 PgenGlobalFlags read_gflags = kfPgenGlobal0;
5681 const uintptr_t* vrtypes_alias = R_CAST(const uintptr_t*, vrtypes);
5682 const uint32_t raw_variant_ctl = BitCtToWordCt(raw_variant_ct);
5683 uint32_t mask_multiply = ((input_gflags & kfPgenGlobalHardcallPhasePresent)? 0x10 : 0) + ((input_gflags & kfPgenGlobalDosagePresent)? 0x60 : 0) + ((input_gflags & kfPgenGlobalDosagePhasePresent)? 0x80 : 0);
5684 uintptr_t vrtypes_or = 0;
5685 // todo: try changing loop to be vec-based, use movemask to extract
5686 // information from vrtypes in 64-bit cases
5687 for (uint32_t widx = 0; widx != raw_variant_ctl; ++widx) {
5688 uintptr_t cur_variant_include_word = variant_include[widx];
5689 if (cur_variant_include_word) {
5690 // bugfix (20 Aug 2018): this needs to advance on every variant_include
5691 // word, not just the nonzero ones
5692 const uintptr_t* cur_vrtypes = &(vrtypes_alias[8 * widx]);
5693 #ifdef __LP64__
5694 for (uint32_t vi_byte_idx = 0; vi_byte_idx != 8; ++vi_byte_idx) {
5695 # ifdef USE_AVX2
5696 // this doesn't seem to be much faster than non-AVX2 code on my Mac...
5697 // inverse-movemask shouldn't be better than regular movemask here
5698 const uintptr_t cur_mask = _pdep_u64(cur_variant_include_word, kMask0101);
5699 # else
5700 // this operation maps binary hgfedcba to h0000000g0000000f...
5701 // ^ ^ ^
5702 // | | |
5703 // 56 48 40
5704 // 1. (cur_variant_include_word & 0xfe) gives us hgfedcb0;
5705 // necessary to avoid carryover.
5706 // 2. multiply by the number with bits 7, 14, 21, ..., 49 set, to
5707 // get hgfedcbhgfedcbhgf...
5708 // ^ ^ ^
5709 // | | |
5710 // 56 48 40
5711 // 3. mask out all but bits 8, 16, 24, ..., 56
5712 // todo: test if this actually beats the per-character loop...
5713 const uintptr_t cur_mask = (((cur_variant_include_word & 0xfe) * 0x2040810204080LLU) & kMask0101) | (cur_variant_include_word & 1);
5714 # endif
5715 vrtypes_or |= cur_vrtypes[vi_byte_idx] & (cur_mask * mask_multiply);
5716 cur_variant_include_word >>= 8;
5717 }
5718 #else
5719 for (uint32_t vi_hexa_idx = 0; vi_hexa_idx != 8; ++vi_hexa_idx) {
5720 // dcba -> d0000000c0000000b0000000a
5721 const uintptr_t cur_mask = ((cur_variant_include_word & 0xf) * 0x204081) & kMask0101;
5722 vrtypes_or |= cur_vrtypes[vi_hexa_idx] & (cur_mask * mask_multiply);
5723 cur_variant_include_word >>= 4;
5724 }
5725 #endif
5726 if (vrtypes_or) {
5727 // bugfix (8 Oct 2017): forgot to multiply by kMask0101
5728 if (vrtypes_or & (0x10 * kMask0101)) {
5729 read_gflags |= kfPgenGlobalHardcallPhasePresent;
5730 mask_multiply -= 0x10;
5731 }
5732 if (vrtypes_or & (0x60 * kMask0101)) {
5733 read_gflags |= kfPgenGlobalDosagePresent;
5734 mask_multiply -= 0x60;
5735 }
5736 if (vrtypes_or & (0x80 * kMask0101)) {
5737 read_gflags |= kfPgenGlobalDosagePhasePresent;
5738 mask_multiply -= 0x80;
5739 }
5740 if (!mask_multiply) {
5741 return read_gflags;
5742 }
5743 }
5744 }
5745 }
5746 return read_gflags;
5747 }
5748
SplitNonrefFlags()5749 void SplitNonrefFlags() {
5750 logerrputs("Provisional-reference flag split is not implemented yet.\n");
5751 exit(S_CAST(int32_t, kPglRetNotYetSupported));
5752 }
5753
JoinNonrefFlags()5754 void JoinNonrefFlags() {
5755 logerrputs("Provisional-reference flag join is not implemented yet.\n");
5756 exit(S_CAST(int32_t, kPglRetNotYetSupported));
5757 }
5758
5759 // Single-output-thread implementation. Allows variants to be unsorted.
5760 // (Note that MakePlink2NoVsort() currently requires enough memory for 64k * 2
5761 // variants per output thread, due to LD compression. This is faster in the
5762 // common case, but once you have 150k+ samples with dosage data...)
5763 //
5764 // initialized mcp fields: cip, sex_male_collapsed_interleaved,
5765 // sex_female_collapsed_interleaved, raw_sample_ct, sample_ct,
5766 // plink2_write_flags
5767 PglErr MakePgenRobust(const uintptr_t* sample_include, const uint32_t* new_sample_idx_to_old, const uintptr_t* variant_include, const uintptr_t* allele_idx_offsets, __maybe_unused const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* write_allele_idx_offsets, const uint32_t* new_variant_idx_to_old, const uintptr_t* sex_male_collapsed, uintptr_t* sex_female_collapsed, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t write_variant_ct, uint32_t max_read_allele_ct, uint32_t hard_call_thresh, uint32_t dosage_erase_thresh, MakePlink2Flags make_plink2_flags, MakeCommon* mcp, PgenReader* simple_pgrp, char* outname, char* outname_end) {
5768 // variant_uidx_new_to_old[] can be nullptr
5769
5770 unsigned char* bigstack_mark = g_bigstack_base;
5771 PglErr reterr = kPglRetSuccess;
5772 ThreadGroup tg;
5773 PreinitThreads(&tg);
5774 STPgenWriter spgw;
5775 PreinitSpgw(&spgw);
5776 MakePgenCtx ctx;
5777 {
5778 // plink2_write_flags assumed to include --set-hh-missing and
5779 // --set-mixed-mt-missing
5780 // sex_{fe}male_collapsed_interleaved assumed to be initialized if
5781 // necessary
5782
5783 if (unlikely(SetThreadCt(1, &tg))) {
5784 goto MakePgenRobust_ret_NOMEM;
5785 }
5786 ctx.spgwp = &spgw;
5787 const uint32_t raw_sample_ct = mcp->raw_sample_ct;
5788 const uint32_t sample_ct = mcp->sample_ct;
5789 const uint32_t subsetting_required = (sample_ct != raw_sample_ct);
5790 const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
5791 mcp->sample_include = subsetting_required? sample_include : nullptr;
5792 ctx.new_sample_idx_to_old = new_sample_idx_to_old;
5793 ctx.sex_male_collapsed = sex_male_collapsed;
5794 ctx.sex_female_collapsed = sex_female_collapsed;
5795 ctx.write_reterr = kPglRetSuccess;
5796 if ((make_plink2_flags & kfMakeBed) || ((make_plink2_flags & (kfMakePgen | (kfMakePgenFormatBase * 3))) == (kfMakePgen | kfMakePgenFormatBase))) {
5797 logerrputs("Error: Fixed-width .bed/.pgen output doesn't support sorting yet. Generate a\nregular sorted .pgen first, and then reformat it.\n");
5798 reterr = kPglRetNotYetSupported;
5799 goto MakePgenRobust_ret_1;
5800 } else {
5801 const uint32_t input_biallelic = (!allele_idx_offsets);
5802 // output_biallelic: test write_allele_idx_offsets equality to null
5803 ctx.write_allele_idx_offsets = write_allele_idx_offsets;
5804 if ((variant_ct == raw_variant_ct) || new_variant_idx_to_old) {
5805 ctx.write_chr_fo_vidx_start = mcp->cip->chr_fo_vidx_start;
5806 } else {
5807 if (unlikely(AllocAndFillSubsetChrFoVidxStart(variant_include, mcp->cip, &ctx.write_chr_fo_vidx_start))) {
5808 goto MakePgenRobust_ret_NOMEM;
5809 }
5810 }
5811 PgenGlobalFlags read_gflags = PgrGetGflags(simple_pgrp) & (kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePresent | kfPgenGlobalDosagePhasePresent);
5812 if (make_plink2_flags & (kfMakePlink2MJoin | kfMakePlink2EraseAlt2Plus)) {
5813 logerrputs("Error: multiallelic-join and 'erase-alt2+' modifiers are under development.\n");
5814 reterr = kPglRetNotYetSupported;
5815 goto MakePgenRobust_ret_1;
5816 }
5817 if (make_plink2_flags & kfMakePgenErasePhase) {
5818 read_gflags &= ~(kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePhasePresent);
5819 }
5820 if (make_plink2_flags & kfMakePgenEraseDosage) {
5821 if (hard_call_thresh == UINT32_MAX) {
5822 read_gflags &= ~(kfPgenGlobalDosagePresent | kfPgenGlobalDosagePhasePresent);
5823 } else {
5824 // bugfix (11 Apr 2018): this was in the wrong branch
5825 mcp->plink2_write_flags |= kfPlink2WriteLateDosageErase;
5826 }
5827 }
5828 if (read_gflags && (variant_ct < raw_variant_ct)) {
5829 read_gflags &= GflagsVfilter(variant_include, PgrGetVrtypes(simple_pgrp), raw_variant_ct, PgrGetGflags(simple_pgrp));
5830 }
5831 if (!input_biallelic) {
5832 // todo: conditional erase-alt2+ exception
5833 read_gflags |= kfPgenGlobalMultiallelicHardcallFound;
5834 }
5835 const uint32_t read_dosage_present = (read_gflags / kfPgenGlobalDosagePresent) & 1;
5836 // bugfix (25 Jul 2018): left expression needs ||, not &&
5837 mcp->hard_call_halfdist = ((hard_call_thresh == UINT32_MAX) || (!read_dosage_present))? 0 : (kDosage4th - hard_call_thresh);
5838 ctx.dosage_erase_halfdist = kDosage4th - dosage_erase_thresh;
5839 // bugfix/simplification (10 Mar 2020): it is possible for dosage-phase
5840 // to be present in the input without hardcall-phase. Don't try to treat
5841 // that differently than the usual scenario where hardcall-phase is
5842 // present.
5843 const uint32_t read_phase_present = !!(read_gflags & (kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePhasePresent));
5844 const uint32_t read_dphase_present = (read_gflags / kfPgenGlobalDosagePhasePresent) & 1;
5845 PgenGlobalFlags write_gflags = read_gflags;
5846 // When --hard-call-threshold is specified, if either hphase or dphase
5847 // values exist, the other can be generated.
5848 uint32_t read_or_write_phase_present = read_phase_present;
5849 uint32_t read_or_write_dphase_present = read_dphase_present;
5850 if (mcp->hard_call_halfdist && (read_phase_present || read_or_write_dphase_present)) {
5851 read_or_write_phase_present = 1;
5852 read_or_write_dphase_present = 1;
5853 write_gflags |= kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePhasePresent;
5854 } else if (dosage_erase_thresh && read_dosage_present) {
5855 // need write_phasepresent, pretty harmless to allocate write_phaseinfo
5856 read_or_write_phase_present = 1;
5857 }
5858 uint32_t read_or_write_dosage_present = read_dosage_present;
5859 if (mcp->plink2_write_flags & kfPlink2WriteLateDosageErase) {
5860 write_gflags &= ~(kfPgenGlobalDosagePresent | kfPgenGlobalDosagePhasePresent);
5861 } else if (mcp->plink2_write_flags & (kfPlink2WriteSetHhMissingKeepDosage | kfPlink2WriteSetMixedMtMissingKeepDosage)) {
5862 // bugfix (25 Jul 2018): this needs to check plink2_write_flags, not
5863 // make_plink2_flags
5864
5865 // command-line parser guarantees erase-dosage and
5866 // --set-hh-missing/--set-mixed-mt-missing keep-dosage aren't used
5867 // together
5868 read_or_write_dosage_present = 1;
5869
5870 // could verify at least one het haploid is present before setting this
5871 // flag...
5872 write_gflags |= kfPgenGlobalDosagePresent;
5873 }
5874 if ((write_gflags & (kfPgenGlobalMultiallelicHardcallFound | kfPgenGlobalDosagePresent)) == (kfPgenGlobalMultiallelicHardcallFound | kfPgenGlobalDosagePresent)) {
5875 logerrputs("Error: Multiallelic dosages aren't supported yet.\n");
5876 reterr = kPglRetNotYetSupported;
5877 goto MakePgenRobust_ret_1;
5878 }
5879
5880 uint32_t nonref_flags_storage = 3;
5881 uintptr_t* nonref_flags_write = PgrGetNonrefFlags(simple_pgrp);
5882 if (!nonref_flags_write) {
5883 nonref_flags_storage = (PgrGetGflags(simple_pgrp) & kfPgenGlobalAllNonref)? 2 : 1;
5884 } else if (variant_ct < raw_variant_ct) {
5885 const uint32_t write_variant_ctl = BitCtToWordCt(write_variant_ct);
5886 uintptr_t* old_nonref_flags = nonref_flags_write;
5887 if (bigstack_alloc_w(write_variant_ctl, &nonref_flags_write)) {
5888 goto MakePgenRobust_ret_NOMEM;
5889 }
5890 if ((variant_ct == write_variant_ct) && (!new_variant_idx_to_old)) {
5891 CopyBitarrSubset(old_nonref_flags, variant_include, variant_ct, nonref_flags_write);
5892 } else {
5893 ZeroWArr(write_variant_ctl, nonref_flags_write);
5894 if (variant_ct == write_variant_ct) {
5895 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
5896 const uintptr_t variant_uidx = new_variant_idx_to_old[variant_idx];
5897 if (IsSet(old_nonref_flags, variant_uidx)) {
5898 SetBit(variant_idx, nonref_flags_write);
5899 }
5900 }
5901 } else if (!write_allele_idx_offsets) {
5902 SplitNonrefFlags();
5903 } else {
5904 JoinNonrefFlags();
5905 }
5906 }
5907 if (nonref_flags_write[0] & 1) {
5908 if (AllBitsAreOne(nonref_flags_write, write_variant_ct)) {
5909 BigstackReset(nonref_flags_write);
5910 nonref_flags_write = nullptr;
5911 nonref_flags_storage = 2;
5912 }
5913 } else if (AllWordsAreZero(nonref_flags_write, write_variant_ctl)) {
5914 BigstackReset(nonref_flags_write);
5915 nonref_flags_write = nullptr;
5916 nonref_flags_storage = 1;
5917 }
5918 }
5919 snprintf(outname_end, kMaxOutfnameExtBlen, ".pgen");
5920 uintptr_t spgw_alloc_cacheline_ct;
5921 uint32_t max_vrec_len;
5922 reterr = SpgwInitPhase1(outname, write_allele_idx_offsets, nonref_flags_write, write_variant_ct, sample_ct, write_gflags, nonref_flags_storage, ctx.spgwp, &spgw_alloc_cacheline_ct, &max_vrec_len);
5923 if (unlikely(reterr)) {
5924 if (reterr == kPglRetOpenFail) {
5925 logerrprintfww(kErrprintfFopen, outname, strerror(errno));
5926 }
5927 goto MakePgenRobust_ret_1;
5928 }
5929 unsigned char* spgw_alloc;
5930 if (unlikely(
5931 bigstack_alloc_wp(1, &(ctx.loadbuf_thread_starts[0])) ||
5932 bigstack_alloc_wp(1, &(ctx.loadbuf_thread_starts[1])) ||
5933 bigstack_alloc_uc(spgw_alloc_cacheline_ct * kCacheline, &spgw_alloc))) {
5934 goto MakePgenRobust_ret_NOMEM;
5935 }
5936 SpgwInitPhase2(max_vrec_len, ctx.spgwp, spgw_alloc);
5937
5938 const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);
5939 const uint32_t sample_ctl = BitCtToWordCt(sample_ct);
5940 ctx.thread_write_genovecs = nullptr;
5941 uint32_t write_mhc_needed = 0;
5942 if (new_sample_idx_to_old || subsetting_required) {
5943 if (unlikely(bigstack_alloc_wp(1, &ctx.thread_write_genovecs))) {
5944 goto MakePgenRobust_ret_NOMEM;
5945 }
5946 if (read_phase_present && new_sample_idx_to_old) {
5947 if (unlikely(bigstack_alloc_u32(raw_sample_ct, &ctx.old_sample_idx_to_new))) {
5948 goto MakePgenRobust_ret_NOMEM;
5949 }
5950 for (uint32_t new_sample_idx = 0; new_sample_idx != sample_ct; ++new_sample_idx) {
5951 ctx.old_sample_idx_to_new[new_sample_idx_to_old[new_sample_idx]] = new_sample_idx;
5952 }
5953 }
5954 if (unlikely(bigstack_alloc_w(sample_ctl2, &(ctx.thread_write_genovecs[0])))) {
5955 goto MakePgenRobust_ret_NOMEM;
5956 }
5957 write_mhc_needed = 1;
5958 }
5959 ctx.thread_write_mhc = nullptr;
5960 if (write_mhc_needed) {
5961 if (unlikely(bigstack_alloc_wp(1, &ctx.thread_write_mhc))) {
5962 goto MakePgenRobust_ret_NOMEM;
5963 }
5964 // todo: refalt1_select
5965 const uintptr_t mhcwrite_word_ct = GetMhcWordCt(sample_ct);
5966 if (unlikely(bigstack_alloc_w(mhcwrite_word_ct, &(ctx.thread_write_mhc[0])))) {
5967 goto MakePgenRobust_ret_NOMEM;
5968 }
5969 }
5970 ctx.thread_write_phasepresents = nullptr;
5971 ctx.thread_all_hets = nullptr;
5972 if (read_or_write_phase_present) {
5973 if (unlikely(
5974 bigstack_alloc_wp(1, &ctx.thread_write_phasepresents) ||
5975 bigstack_alloc_wp(1, &ctx.thread_write_phaseinfos) ||
5976 bigstack_alloc_w(sample_ctl, &(ctx.thread_write_phasepresents[0])) ||
5977 bigstack_alloc_w(sample_ctl, &(ctx.thread_write_phaseinfos[0])))) {
5978 goto MakePgenRobust_ret_NOMEM;
5979 }
5980 if (read_phase_present) {
5981 if (unlikely(
5982 bigstack_alloc_wp(1, &ctx.thread_all_hets) ||
5983 bigstack_alloc_w(raw_sample_ctl, &(ctx.thread_all_hets[0])))) {
5984 goto MakePgenRobust_ret_NOMEM;
5985 }
5986 }
5987 }
5988 ctx.thread_write_dosagepresents = nullptr;
5989 ctx.thread_write_dphasepresents = nullptr;
5990 if (read_or_write_dosage_present) {
5991 if (unlikely(
5992 bigstack_alloc_wp(1, &ctx.thread_write_dosagepresents) ||
5993 bigstack_alloc_dosagep(1, &ctx.thread_write_dosagevals) ||
5994 bigstack_alloc_w(sample_ctl, &(ctx.thread_write_dosagepresents[0])) ||
5995 bigstack_alloc_dosage(sample_ct, &(ctx.thread_write_dosagevals[0])))) {
5996 goto MakePgenRobust_ret_NOMEM;
5997 }
5998 if (read_or_write_dphase_present) {
5999 if (unlikely(
6000 bigstack_alloc_wp(1, &ctx.thread_write_dphasepresents) ||
6001 bigstack_alloc_dphasep(1, &ctx.thread_write_dphasedeltas) ||
6002 bigstack_alloc_w(sample_ctl, &(ctx.thread_write_dphasepresents[0])) ||
6003 bigstack_alloc_dphase(sample_ct + RoundUpPow2(sample_ct, kCacheline / 2), &(ctx.thread_write_dphasedeltas[0])))) {
6004 goto MakePgenRobust_ret_NOMEM;
6005 }
6006 }
6007 }
6008 if ((write_mhc_needed || read_dosage_present) && new_sample_idx_to_old) {
6009 if (unlikely(
6010 bigstack_alloc_u32p(1, &ctx.thread_cumulative_popcount_bufs) ||
6011 bigstack_alloc_u32(raw_sample_ctl, &(ctx.thread_cumulative_popcount_bufs[0])))) {
6012 goto MakePgenRobust_ret_NOMEM;
6013 }
6014 }
6015 mcp->refalt1_select = refalt1_select;
6016 if (refalt1_select) {
6017 if (write_allele_idx_offsets) {
6018 // this will require write_mhc and an additional AlleleCode buffer
6019 logerrputs("Error: Multiallelic allele rotation is under development.\n");
6020 reterr = kPglRetNotYetSupported;
6021 goto MakePgenRobust_ret_1;
6022 }
6023 if (new_variant_idx_to_old || (variant_ct < raw_variant_ct)) {
6024 // might want inner loop to map variant uidx -> idx instead
6025 STD_ARRAY_PTR_DECL(AlleleCode, 2, tmp_refalt1_select);
6026 if (unlikely(BIGSTACK_ALLOC_STD_ARRAY(AlleleCode, 2, variant_ct, &tmp_refalt1_select))) {
6027 goto MakePgenRobust_ret_NOMEM;
6028 }
6029 if (new_variant_idx_to_old) {
6030 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
6031 const uintptr_t variant_uidx = new_variant_idx_to_old[variant_idx];
6032 STD_ARRAY_COPY(refalt1_select[variant_uidx], 2, tmp_refalt1_select[variant_idx]);
6033 }
6034 } else {
6035 uintptr_t variant_uidx_base = 0;
6036 uintptr_t cur_bits = variant_include[0];
6037 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
6038 const uintptr_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
6039 STD_ARRAY_COPY(refalt1_select[variant_uidx], 2, tmp_refalt1_select[variant_idx]);
6040 }
6041 }
6042 mcp->refalt1_select = tmp_refalt1_select;
6043 }
6044 }
6045 ctx.mcp = mcp;
6046 const uint32_t raw_sample_ctl2 = NypCtToWordCt(raw_sample_ct);
6047 PgenVariant pgv;
6048 PreinitPgv(&pgv);
6049 uint32_t* alt_regular_one_cts = nullptr;
6050 uint32_t* alt_invphase_one_cts = nullptr;
6051 uint32_t* alt_two_cts = nullptr;
6052 uint32_t* alt_sample_idx_buf = nullptr;
6053 uint32_t** alt_regular_one_sample_idx_starts = nullptr;
6054 uint32_t** alt_invphase_one_sample_idx_starts = nullptr;
6055 uint32_t** alt_two_sample_idx_starts = nullptr;
6056 if (make_plink2_flags & (kfMakePlink2MSplitBase * 7)) {
6057 // split or join
6058 // this is currently for split with no dosages
6059 if (unlikely(
6060 bigstack_alloc_w(raw_sample_ctl2, &pgv.genovec) ||
6061 bigstack_alloc_w(raw_sample_ctl, &pgv.patch_01_set) ||
6062 bigstack_alloc_ac(raw_sample_ct, &pgv.patch_01_vals) ||
6063 bigstack_alloc_w(raw_sample_ctl, &pgv.patch_10_set) ||
6064 bigstack_alloc_ac(2 * raw_sample_ct, &pgv.patch_10_vals) ||
6065 bigstack_alloc_u32(max_read_allele_ct, &alt_regular_one_cts) ||
6066 bigstack_alloc_u32(max_read_allele_ct, &alt_two_cts) ||
6067 bigstack_alloc_u32(2 * raw_sample_ct + 1, &alt_sample_idx_buf) ||
6068 bigstack_alloc_u32p(max_read_allele_ct + 1, &alt_regular_one_sample_idx_starts) ||
6069 bigstack_alloc_u32p(max_read_allele_ct + 1, &alt_two_sample_idx_starts))) {
6070 goto MakePgenRobust_ret_NOMEM;
6071 }
6072 if (read_phase_present) {
6073 if (unlikely(
6074 bigstack_alloc_w(raw_sample_ctl, &pgv.phasepresent) ||
6075 bigstack_alloc_w(raw_sample_ctl, &pgv.phaseinfo) ||
6076 bigstack_alloc_u32(max_read_allele_ct, &alt_invphase_one_cts) ||
6077 bigstack_alloc_u32p(max_read_allele_ct + 1, &alt_invphase_one_sample_idx_starts))) {
6078 goto MakePgenRobust_ret_NOMEM;
6079 }
6080 }
6081 if (read_dosage_present) {
6082 logerrputs("Error: Multiallelic dosages aren't supported yet.\n");
6083 reterr = kPglRetNotYetSupported;
6084 goto MakePgenRobust_ret_1;
6085 }
6086 }
6087
6088 const uint32_t raw_sample_ctv2 = NypCtToVecCt(raw_sample_ct);
6089 uintptr_t load_variant_vec_ct = raw_sample_ctv2;
6090 uint32_t loaded_vrtypes_needed = (read_gflags & kfPgenGlobalMultiallelicHardcallFound)? 1 : 0;
6091 if (read_phase_present || read_dosage_present) {
6092 loaded_vrtypes_needed = 1;
6093 if (read_phase_present) {
6094 // phaseraw has three parts:
6095 // 1. het_ct as uint32_t, and explicit_phasepresent_ct as uint32_t.
6096 // 2. vec-aligned bitarray of up to (raw_sample_ct + 1) bits. first
6097 // bit is set iff phasepresent is explicitly stored at all (if
6098 // not, all hets are assumed to be phased), if yes the remaining
6099 // bits store packed phasepresent values for all hets, if no the
6100 // remaining bits store packed phaseinfo values for all hets.
6101 // 3. word-aligned bitarray of up to raw_sample_ct bits, storing
6102 // phaseinfo values. (end of this array is vec-aligned.)
6103 const uintptr_t phaseraw_word_ct = (8 / kBytesPerWord) + kWordsPerVec + RoundDownPow2(raw_sample_ct / kBitsPerWordD2, kWordsPerVec);
6104 load_variant_vec_ct += WordCtToVecCt(phaseraw_word_ct);
6105 }
6106 if (read_dosage_present) {
6107 // biallelic dosageraw has two parts:
6108 // 1. vec-aligned bitarray of up to raw_sample_ct bits, storing which
6109 // samples have dosages.
6110 // 2. word-aligned array of uint16s with 0..32768 fixed-point
6111 // dosages.
6112 // dphaseraw has the same structure, with the uint16s replaced with
6113 // an int16 array of (left - right) values.
6114 const uintptr_t dosageraw_word_ct = kWordsPerVec * (BitCtToVecCt(raw_sample_ct) + DivUp(raw_sample_ct, (kBytesPerVec / sizeof(Dosage))));
6115 load_variant_vec_ct += WordCtToVecCt(dosageraw_word_ct) * (1 + read_dphase_present);
6116 }
6117 }
6118
6119 uintptr_t bytes_left = bigstack_left();
6120 if (unlikely(bytes_left < 7 * kCacheline)) {
6121 goto MakePgenRobust_ret_NOMEM;
6122 }
6123 bytes_left -= 7 * kCacheline; // defend against adverse rounding
6124 uintptr_t ulii = bytes_left / (2 * (kBytesPerVec * load_variant_vec_ct + loaded_vrtypes_needed));
6125 if (unlikely(!ulii)) {
6126 goto MakePgenRobust_ret_NOMEM;
6127 }
6128 if (ulii > MINV(kPglVblockSize, write_variant_ct)) {
6129 ulii = MINV(kPglVblockSize, write_variant_ct);
6130 }
6131 const uint32_t write_block_size = ulii;
6132 uintptr_t* main_loadbufs[2];
6133 main_loadbufs[0] = S_CAST(uintptr_t*, bigstack_alloc_raw_rd(load_variant_vec_ct * kBytesPerVec * write_block_size));
6134 main_loadbufs[1] = S_CAST(uintptr_t*, bigstack_alloc_raw_rd(load_variant_vec_ct * kBytesPerVec * write_block_size));
6135
6136 // todo: multiallelic trim-alts support
6137
6138 if (loaded_vrtypes_needed) {
6139 ctx.loaded_vrtypes[0] = S_CAST(unsigned char*, bigstack_alloc_raw_rd(write_block_size));
6140 ctx.loaded_vrtypes[1] = S_CAST(unsigned char*, bigstack_alloc_raw_rd(write_block_size));
6141 } else {
6142 ctx.loaded_vrtypes[0] = nullptr;
6143 ctx.loaded_vrtypes[1] = nullptr;
6144 }
6145 SetThreadFuncAndData(MakePgenThread, &ctx, &tg);
6146
6147 logprintfww5("Writing %s ... ", outname);
6148 fputs("0%", stdout);
6149 fflush(stdout);
6150
6151 // Main workflow:
6152 // 1. Set n=0, load first write_block_size post-filtering variants
6153 //
6154 // 2. Spawn single thread processing batch n
6155 // 3. Load batch (n+1) unless eof
6156 // 4. Join thread
6157 // 5. Increment n by 1
6158 // 6. Goto step 2 unless eof
6159 const uint32_t* new_variant_idx_to_old_iter = new_variant_idx_to_old;
6160 const uintptr_t* cur_write_allele_idx_offsets = nullptr;
6161 const uint32_t batch_ct_m1 = (write_variant_ct - 1) / write_block_size;
6162 uint32_t pct = 0;
6163 uint32_t parity = 0;
6164 uint32_t cur_batch_size = write_block_size;
6165 uint32_t next_print_write_variant_idx = write_variant_ct / 100;
6166 uint32_t cur_read_allele_ct = 2;
6167 uint32_t cur_write_allele_ct = 2;
6168 uint32_t cur_het_ct = 0;
6169 uintptr_t read_variant_uidx_base = 0;
6170
6171 // now need to retain these across loop iterations in case a split is
6172 // interrupted by batch-end
6173 uint32_t read_variant_uidx = 0;
6174 uint32_t write_aidx = 1;
6175
6176 uintptr_t cur_bits = variant_include[0];
6177 PgrSampleSubsetIndex null_pssi;
6178 PgrClearSampleSubsetIndex(simple_pgrp, &null_pssi);
6179 for (uint32_t read_batch_idx = 0; ; ++read_batch_idx) {
6180 if (!IsLastBlock(&tg)) {
6181 if (read_batch_idx == batch_ct_m1) {
6182 cur_batch_size = write_variant_ct - (read_batch_idx * write_block_size);
6183 }
6184 uintptr_t* cur_loadbuf = main_loadbufs[parity];
6185 uintptr_t* loadbuf_iter = cur_loadbuf;
6186 unsigned char* cur_loaded_vrtypes = ctx.loaded_vrtypes[parity];
6187 ctx.loadbuf_thread_starts[parity][0] = loadbuf_iter;
6188 if (write_allele_idx_offsets) {
6189 cur_write_allele_idx_offsets = &(write_allele_idx_offsets[read_batch_idx * write_block_size]);
6190 }
6191 for (uint32_t block_widx = 0; block_widx != cur_batch_size; ) {
6192 if (write_aidx == 1) {
6193 if (!new_variant_idx_to_old_iter) {
6194 read_variant_uidx = BitIter1(variant_include, &read_variant_uidx_base, &cur_bits);
6195 } else {
6196 read_variant_uidx = *new_variant_idx_to_old_iter++;
6197 }
6198 // todo: multiallelic trim-alts
6199 // todo: multiallelic merge
6200 // split: load to buffer instead of loadbuf_iter, have function
6201 // for writing to loadbuf_iter given buffer contents, this
6202 // should work if split is 'interrupted' by batch boundary
6203 // in middle
6204 // merge: track loadbuf_iter location at beginning of
6205 // same-position block... (finish writing this later)
6206 if (allele_idx_offsets) {
6207 cur_read_allele_ct = allele_idx_offsets[read_variant_uidx + 1] - allele_idx_offsets[read_variant_uidx];
6208 }
6209 }
6210 if (cur_write_allele_idx_offsets) {
6211 cur_write_allele_ct = cur_write_allele_idx_offsets[block_widx + 1] - cur_write_allele_idx_offsets[block_widx];
6212 }
6213 if (cur_read_allele_ct == cur_write_allele_ct) {
6214 reterr = PgrGetRaw(read_variant_uidx, read_gflags, simple_pgrp, &loadbuf_iter, cur_loaded_vrtypes? (&(cur_loaded_vrtypes[block_widx])) : nullptr);
6215 if (unlikely(reterr)) {
6216 goto MakePgenRobust_ret_PGR_FAIL;
6217 }
6218 ++block_widx;
6219 continue;
6220 } else if (cur_write_allele_ct == 2) {
6221 if (write_aidx == 1) {
6222 // 1. read into normal, not raw representation
6223 if (read_phase_present) {
6224 reterr = PgrGetMDp(nullptr, null_pssi, raw_sample_ct, read_variant_uidx, simple_pgrp, &pgv);
6225 } else {
6226 reterr = PgrGetMD(nullptr, null_pssi, raw_sample_ct, read_variant_uidx, simple_pgrp, &pgv);
6227 }
6228 if (unlikely(reterr)) {
6229 goto MakePgenRobust_ret_PGR_FAIL;
6230 }
6231
6232 // 2a. count # of each alt
6233 // 2b. create het and hom lists for each alt
6234 uintptr_t* genovec = pgv.genovec;
6235 ZeroTrailingNyps(raw_sample_ct, genovec);
6236 uint32_t raw_01_ct;
6237 uint32_t raw_10_ct;
6238 GenovecCount12Unsafe(genovec, raw_sample_ct, &raw_01_ct, &raw_10_ct);
6239 ZeroU32Arr(cur_read_allele_ct, alt_regular_one_cts);
6240 alt_regular_one_cts[1] = raw_01_ct - pgv.patch_01_ct;
6241 for (uint32_t rarealt_idx = 0; rarealt_idx != pgv.patch_01_ct; ++rarealt_idx) {
6242 alt_regular_one_cts[pgv.patch_01_vals[rarealt_idx]] += 1;
6243 }
6244 ZeroU32Arr(cur_read_allele_ct, alt_two_cts);
6245 if (!pgv.phasepresent_ct) {
6246 for (uint32_t uii = 0; uii != pgv.patch_10_ct; ++uii) {
6247 const AlleleCode ac0 = pgv.patch_10_vals[2 * uii];
6248 const AlleleCode ac1 = pgv.patch_10_vals[2 * uii + 1];
6249 if (ac0 == ac1) {
6250 alt_two_cts[ac0] += 1;
6251 } else {
6252 alt_regular_one_cts[ac0] += 1;
6253 alt_regular_one_cts[ac1] += 1;
6254 }
6255 }
6256 } else {
6257 ZeroU32Arr(cur_read_allele_ct, alt_invphase_one_cts);
6258 for (uint32_t uii = 0; uii != pgv.patch_10_ct; ++uii) {
6259 const AlleleCode ac0 = pgv.patch_10_vals[2 * uii];
6260 const AlleleCode ac1 = pgv.patch_10_vals[2 * uii + 1];
6261 if (ac0 == ac1) {
6262 alt_two_cts[ac0] += 1;
6263 } else {
6264 alt_invphase_one_cts[ac0] += 1;
6265 alt_regular_one_cts[ac1] += 1;
6266 }
6267 }
6268 }
6269
6270 alt_two_cts[1] = raw_10_ct - pgv.patch_10_ct;
6271 cur_het_ct = raw_01_ct + pgv.patch_10_ct;
6272 for (uint32_t aidx = 2; aidx != cur_read_allele_ct; ++aidx) {
6273 cur_het_ct -= alt_two_cts[aidx];
6274 }
6275
6276 uint32_t* sample_idx_buf_iter = alt_sample_idx_buf;
6277 alt_regular_one_sample_idx_starts[0] = alt_sample_idx_buf;
6278 for (uint32_t aidx = 1; aidx != cur_read_allele_ct; ++aidx) {
6279 alt_regular_one_sample_idx_starts[aidx] = sample_idx_buf_iter;
6280 sample_idx_buf_iter = &(sample_idx_buf_iter[alt_regular_one_cts[aidx]]);
6281 }
6282 alt_regular_one_sample_idx_starts[cur_read_allele_ct] = sample_idx_buf_iter;
6283 if (pgv.phasepresent_ct) {
6284 alt_invphase_one_sample_idx_starts[0] = sample_idx_buf_iter;
6285 for (uint32_t aidx = 1; aidx != cur_read_allele_ct - 1; ++aidx) {
6286 alt_invphase_one_sample_idx_starts[aidx] = sample_idx_buf_iter;
6287 sample_idx_buf_iter = &(sample_idx_buf_iter[alt_invphase_one_cts[aidx]]);
6288 }
6289 alt_invphase_one_sample_idx_starts[cur_read_allele_ct - 1] = sample_idx_buf_iter;
6290 alt_invphase_one_sample_idx_starts[cur_read_allele_ct] = sample_idx_buf_iter;
6291 }
6292 alt_two_sample_idx_starts[0] = sample_idx_buf_iter;
6293 for (uint32_t aidx = 1; aidx != cur_read_allele_ct; ++aidx) {
6294 alt_two_sample_idx_starts[aidx] = sample_idx_buf_iter;
6295 sample_idx_buf_iter = &(sample_idx_buf_iter[alt_two_cts[aidx]]);
6296 }
6297 alt_two_sample_idx_starts[cur_read_allele_ct] = sample_idx_buf_iter;
6298
6299 Halfword* patch_01_set_alias = R_CAST(Halfword*, pgv.patch_01_set);
6300 Halfword* patch_10_set_alias = R_CAST(Halfword*, pgv.patch_10_set);
6301 uint32_t idx_01 = 0;
6302 uint32_t idx_10 = 0;
6303 for (uint32_t widx = 0; widx != raw_sample_ctl2; ++widx) {
6304 const uintptr_t geno_word = genovec[widx];
6305 const uint32_t sample_idx_offset = widx * kBitsPerWordD2;
6306 uintptr_t geno_01 = Word01(geno_word);
6307 if (geno_01) {
6308 if (!pgv.patch_01_ct) {
6309 // patch_01_set not initialized in this case
6310 do {
6311 const uint32_t sample_idx = sample_idx_offset + ctzw(geno_01) / 2;
6312 alt_regular_one_sample_idx_starts[1][0] = sample_idx;
6313 alt_regular_one_sample_idx_starts[1] += 1;
6314 geno_01 &= geno_01 - 1;
6315 } while (geno_01);
6316 } else {
6317 uint32_t geno_01_hw = PackWordToHalfword(geno_01);
6318 const uint32_t patch_01_hw = patch_01_set_alias[widx];
6319 do {
6320 const uint32_t lowbit = geno_01_hw & (-geno_01_hw);
6321 const uint32_t sample_idx = sample_idx_offset + ctzu32(lowbit);
6322 if (lowbit & patch_01_hw) {
6323 AlleleCode ac = pgv.patch_01_vals[idx_01];
6324 alt_regular_one_sample_idx_starts[ac][0] = sample_idx;
6325 alt_regular_one_sample_idx_starts[ac] += 1;
6326 ++idx_01;
6327 } else {
6328 alt_regular_one_sample_idx_starts[1][0] = sample_idx;
6329 alt_regular_one_sample_idx_starts[1] += 1;
6330 }
6331 geno_01_hw ^= lowbit;
6332 } while (geno_01_hw);
6333 }
6334 }
6335 uintptr_t geno_10 = Word10(geno_word);
6336 if (geno_10) {
6337 if (!pgv.patch_10_ct) {
6338 // patch_10_set not initialized in this case
6339 do {
6340 const uint32_t sample_idx = sample_idx_offset + ctzw(geno_10) / 2;
6341 alt_two_sample_idx_starts[1][0] = sample_idx;
6342 alt_two_sample_idx_starts[1] += 1;
6343 geno_10 &= geno_10 - 1;
6344 } while (geno_10);
6345 } else {
6346 uint32_t geno_10_hw = PackWordToHalfword(geno_10);
6347 const uint32_t patch_10_hw = patch_10_set_alias[widx];
6348 if (!pgv.phasepresent_ct) {
6349 do {
6350 const uint32_t lowbit = geno_10_hw & (-geno_10_hw);
6351 const uint32_t sample_idx = sample_idx_offset + ctzu32(lowbit);
6352 if (lowbit & patch_10_hw) {
6353 AlleleCode ac0 = pgv.patch_10_vals[2 * idx_10];
6354 AlleleCode ac1 = pgv.patch_10_vals[2 * idx_10 + 1];
6355 if (ac0 == ac1) {
6356 alt_two_sample_idx_starts[ac0][0] = sample_idx;
6357 alt_two_sample_idx_starts[ac0] += 1;
6358 } else {
6359 alt_regular_one_sample_idx_starts[ac0][0] = sample_idx;
6360 alt_regular_one_sample_idx_starts[ac0] += 1;
6361 alt_regular_one_sample_idx_starts[ac1][0] = sample_idx;
6362 alt_regular_one_sample_idx_starts[ac1] += 1;
6363 }
6364 ++idx_10;
6365 } else {
6366 alt_two_sample_idx_starts[1][0] = sample_idx;
6367 alt_two_sample_idx_starts[1] += 1;
6368 }
6369 geno_10_hw ^= lowbit;
6370 } while (geno_10_hw);
6371 } else {
6372 do {
6373 const uint32_t lowbit = geno_10_hw & (-geno_10_hw);
6374 const uint32_t sample_idx = sample_idx_offset + ctzu32(lowbit);
6375 if (lowbit & patch_10_hw) {
6376 AlleleCode ac0 = pgv.patch_10_vals[2 * idx_10];
6377 AlleleCode ac1 = pgv.patch_10_vals[2 * idx_10 + 1];
6378 if (ac0 == ac1) {
6379 alt_two_sample_idx_starts[ac0][0] = sample_idx;
6380 alt_two_sample_idx_starts[ac0] += 1;
6381 } else {
6382 alt_invphase_one_sample_idx_starts[ac0][0] = sample_idx;
6383 alt_invphase_one_sample_idx_starts[ac0] += 1;
6384 alt_regular_one_sample_idx_starts[ac1][0] = sample_idx;
6385 alt_regular_one_sample_idx_starts[ac1] += 1;
6386 }
6387 ++idx_10;
6388 } else {
6389 alt_two_sample_idx_starts[1][0] = sample_idx;
6390 alt_two_sample_idx_starts[1] += 1;
6391 }
6392 geno_10_hw ^= lowbit;
6393 } while (geno_10_hw);
6394 }
6395 }
6396 }
6397 }
6398 for (uint32_t aidx = cur_read_allele_ct - 1; aidx; --aidx) {
6399 alt_regular_one_sample_idx_starts[aidx] = alt_regular_one_sample_idx_starts[aidx - 1];
6400 alt_two_sample_idx_starts[aidx] = alt_two_sample_idx_starts[aidx - 1];
6401 }
6402 if (pgv.phasepresent_ct) {
6403 for (uint32_t aidx = cur_read_allele_ct - 1; aidx; --aidx) {
6404 alt_invphase_one_sample_idx_starts[aidx] = alt_invphase_one_sample_idx_starts[aidx - 1];
6405 }
6406 }
6407 // todo: multiallelic dosage
6408
6409 for (uint32_t widx = 0; widx != raw_sample_ctl2; ++widx) {
6410 // keep 3s, set 1s and 2s to 0
6411 genovec[widx] = Word11(genovec[widx]) * 3;
6412 }
6413 }
6414 const uint32_t split_stop = MINV(cur_batch_size + 1 - block_widx, cur_read_allele_ct);
6415 for (; write_aidx != split_stop; ++write_aidx, ++block_widx) {
6416 // 3. synthesize raw
6417 // (save to loaded_vrtypes if necessary)
6418 // genovec, vector-aligned
6419 // if hphase present and relevant:
6420 // (compute het_ct; het_ctdl := het_ct / kBitsPerWord)
6421 // (first_half_byte_ct := 1 + (het_ct / CHAR_BIT))
6422 // <uint32 het_ct>
6423 // <uint32 raw_phasepresent_ct if explicit>
6424 // <first_half_byte_ct phasepresent or phaseinfo bytes>
6425 // <0-pad up to word boundary, to make popcount safe>
6426 // [if explicit phasepresent, i.e. lowest bit set:
6427 // (second_half_byte_ct := DivUp(raw_phasepresent_ct, 8))
6428 // <second_half_byte_ct phaseinfo contents>
6429 // ]
6430 // align up to vector boundary
6431 uintptr_t* new_genovec = loadbuf_iter;
6432 memcpy(new_genovec, pgv.genovec, raw_sample_ctl2 * sizeof(intptr_t));
6433 loadbuf_iter = &(loadbuf_iter[raw_sample_ctv2 * kWordsPerVec]);
6434 uint32_t new_phasepresent_ct = 0;
6435 uint32_t new_het_ct = 0;
6436 uint32_t* regular_stop = alt_regular_one_sample_idx_starts[write_aidx + 1];
6437 if (pgv.phasepresent_ct) {
6438 uint32_t* regular_iter = alt_regular_one_sample_idx_starts[write_aidx];
6439 uint32_t* invphase_iter = alt_invphase_one_sample_idx_starts[write_aidx];
6440 uint32_t* invphase_stop = alt_invphase_one_sample_idx_starts[write_aidx + 1];
6441 new_het_ct = (regular_stop - regular_iter) + (invphase_stop - invphase_iter);
6442 if (pgv.phasepresent_ct == cur_het_ct) {
6443 new_phasepresent_ct = new_het_ct;
6444 } else {
6445 uintptr_t* phasepresent = pgv.phasepresent;
6446 for (; regular_iter != regular_stop; ++regular_iter) {
6447 new_phasepresent_ct += IsSet(phasepresent, *regular_iter);
6448 }
6449 for (; invphase_iter != invphase_stop; ++invphase_iter) {
6450 new_phasepresent_ct += IsSet(phasepresent, *invphase_iter);
6451 }
6452 }
6453 }
6454 uint32_t* two_stop = alt_two_sample_idx_starts[write_aidx + 1];
6455 for (uint32_t* two_iter = alt_two_sample_idx_starts[write_aidx]; two_iter != two_stop; ++two_iter) {
6456 const uint32_t sample_uidx = *two_iter;
6457 SetBit(sample_uidx * 2 + 1, new_genovec);
6458 }
6459 uint32_t* regular_iter = alt_regular_one_sample_idx_starts[write_aidx];
6460 if (!new_phasepresent_ct) {
6461 for (; regular_iter != regular_stop; ++regular_iter) {
6462 const uint32_t sample_uidx = *regular_iter;
6463 SetBit(sample_uidx * 2, new_genovec);
6464 }
6465 if (pgv.phasepresent_ct) {
6466 uint32_t* invphase_stop = alt_invphase_one_sample_idx_starts[write_aidx + 1];
6467 for (uint32_t* invphase_iter = alt_invphase_one_sample_idx_starts[write_aidx]; invphase_iter != invphase_stop; ++invphase_iter) {
6468 const uint32_t sample_uidx = *invphase_iter;
6469 SetBit(sample_uidx * 2, new_genovec);
6470 }
6471 }
6472 if (cur_loaded_vrtypes) {
6473 cur_loaded_vrtypes[block_widx] = 0;
6474 }
6475 } else {
6476 // need to write raw hphase
6477 const uint32_t het_ctdl = new_het_ct / kBitsPerWord;
6478 uintptr_t* shifted_part1 = &(loadbuf_iter[8 / kBytesPerWord]);
6479 uintptr_t* part1_end = &(shifted_part1[1 + het_ctdl]);
6480 uint32_t* invphase_iter = alt_invphase_one_sample_idx_starts[write_aidx];
6481 uint32_t* invphase_stop = alt_invphase_one_sample_idx_starts[write_aidx + 1];
6482 const uint32_t orig_regular_end = *regular_stop;
6483 const uint32_t orig_invphase_end = *invphase_stop;
6484 // sentinel value to simplify the next loop.
6485 *invphase_stop = UINT32_MAX;
6486 // must grab this before setting *regular_stop, in case
6487 // they overlap; and after setting *invphase_stop, in case
6488 // this list is empty
6489 uint32_t invphase_idx = *invphase_iter++;
6490
6491 *regular_stop = UINT32_MAX;
6492 uint32_t regular_idx = *regular_iter++;
6493 uint32_t shifted_het_idx = 1;
6494 if (new_phasepresent_ct == new_het_ct) {
6495 loadbuf_iter[0] = new_het_ct;
6496 #ifndef __LP64__
6497 loadbuf_iter[1] = 0;
6498 #endif
6499 // shifted_part1 is phaseinfo
6500 shifted_part1[0] = 0;
6501 shifted_part1[het_ctdl] = 0;
6502 while (regular_idx != invphase_idx) {
6503 uintptr_t is_inverted = (invphase_idx < regular_idx);
6504 uint32_t sample_uidx;
6505 if (is_inverted) {
6506 sample_uidx = invphase_idx;
6507 invphase_idx = *invphase_iter++;
6508 } else {
6509 sample_uidx = regular_idx;
6510 regular_idx = *regular_iter++;
6511 }
6512 SetBit(sample_uidx * 2, new_genovec);
6513 AssignBit(shifted_het_idx, is_inverted ^ IsSet(pgv.phaseinfo, sample_uidx), shifted_part1);
6514 ++shifted_het_idx;
6515 }
6516 assert(shifted_het_idx == new_het_ct + 1);
6517 loadbuf_iter = part1_end;
6518 } else {
6519 #ifdef __LP64__
6520 loadbuf_iter[0] = new_het_ct | (S_CAST(uint64_t, new_phasepresent_ct) << 32);
6521 #else
6522 loadbuf_iter[0] = new_het_ct;
6523 loadbuf_iter[1] = new_phasepresent_ct;
6524 #endif
6525 shifted_part1[0] = 1;
6526 memset(shifted_part1, 0, (1 + het_ctdl) * sizeof(intptr_t));
6527 // shifted_part1 is phasepresent
6528 // part1_end is start of phaseinfo
6529 const uint32_t new_phasepresent_ctl = BitCtToWordCt(new_phasepresent_ct);
6530 part1_end[new_phasepresent_ctl - 1] = 0;
6531 uint32_t phasepresent_idx = 0;
6532 while (regular_idx != invphase_idx) {
6533 uintptr_t is_inverted = (invphase_idx < regular_idx);
6534 uint32_t sample_uidx;
6535 if (is_inverted) {
6536 sample_uidx = invphase_idx;
6537 invphase_idx = *invphase_iter++;
6538 } else {
6539 sample_uidx = regular_idx;
6540 regular_idx = *regular_iter++;
6541 }
6542 SetBit(sample_uidx * 2, new_genovec);
6543 if (IsSet(pgv.phasepresent, sample_uidx)) {
6544 SetBit(shifted_het_idx, shifted_part1);
6545 AssignBit(phasepresent_idx, is_inverted ^ IsSet(pgv.phaseinfo, sample_uidx), part1_end);
6546 ++phasepresent_idx;
6547 }
6548 ++shifted_het_idx;
6549 }
6550 assert(phasepresent_idx == new_phasepresent_ct);
6551 }
6552 assert(regular_idx == UINT32_MAX);
6553 *regular_stop = orig_regular_end;
6554 *invphase_stop = orig_invphase_end;
6555 VecAlignUp(&loadbuf_iter);
6556 if (cur_loaded_vrtypes) {
6557 cur_loaded_vrtypes[block_widx] = 0x10;
6558 }
6559 }
6560 }
6561 if (split_stop != cur_read_allele_ct) {
6562 break;
6563 }
6564 write_aidx = 1;
6565 } else {
6566 // merge; todo
6567 }
6568 }
6569 }
6570 if (read_batch_idx) {
6571 JoinThreads(&tg);
6572 reterr = ctx.write_reterr;
6573 if (unlikely(reterr)) {
6574 if (reterr == kPglRetWriteFail) {
6575 errno = ctx.write_errno;
6576 }
6577 goto MakePgenRobust_ret_1;
6578 }
6579 }
6580 if (!IsLastBlock(&tg)) {
6581 ctx.cur_block_write_ct = cur_batch_size;
6582 if (read_batch_idx == batch_ct_m1) {
6583 DeclareLastThreadBlock(&tg);
6584 }
6585 if (unlikely(SpawnThreads(&tg))) {
6586 goto MakePgenRobust_ret_THREAD_CREATE_FAIL;
6587 }
6588 }
6589 parity = 1 - parity;
6590 if (read_batch_idx) {
6591 if (read_batch_idx > batch_ct_m1) {
6592 break;
6593 }
6594 const uint32_t write_idx_end = read_batch_idx * write_block_size;
6595 if (write_idx_end >= next_print_write_variant_idx) {
6596 if (pct > 10) {
6597 putc_unlocked('\b', stdout);
6598 }
6599 pct = (write_idx_end * 100LLU) / write_variant_ct;
6600 printf("\b\b%u%%", pct++);
6601 fflush(stdout);
6602 next_print_write_variant_idx = (pct * S_CAST(uint64_t, write_variant_ct)) / 100;
6603 }
6604 }
6605 }
6606 SpgwFinish(ctx.spgwp);
6607 if (pct > 10) {
6608 putc_unlocked('\b', stdout);
6609 }
6610 fputs("\b\b", stdout);
6611 logputs("done.\n");
6612 }
6613 }
6614 while (0) {
6615 MakePgenRobust_ret_NOMEM:
6616 reterr = kPglRetNomem;
6617 break;
6618 MakePgenRobust_ret_PGR_FAIL:
6619 PgenErrPrintN(reterr);
6620 break;
6621 MakePgenRobust_ret_THREAD_CREATE_FAIL:
6622 reterr = kPglRetThreadCreateFail;
6623 break;
6624 }
6625 MakePgenRobust_ret_1:
6626 CleanupThreads(&tg);
6627 CleanupSpgw(&spgw, &reterr);
6628 BigstackReset(bigstack_mark);
6629 return reterr;
6630 }
6631
6632 // allele_presents should be nullptr iff trim_alts not true
6633 PglErr MakePlink2NoVsort(const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const uint32_t* new_sample_idx_to_old, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* pvar_qual_present, const float* pvar_quals, const uintptr_t* pvar_filter_present, const uintptr_t* pvar_filter_npass, const char* const* pvar_filter_storage, const char* pvar_info_reload, const double* variant_cms, const char* varid_template_str, __maybe_unused const char* varid_multi_template_str, __maybe_unused const char* varid_multi_nonsnp_template_str, const char* missing_varid_match, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_ct, uint32_t max_allele_slen, uint32_t max_filter_slen, uint32_t info_reload_slen, UnsortedVar vpos_sortstatus, uint32_t max_thread_ct, uint32_t hard_call_thresh, uint32_t dosage_erase_thresh, uint32_t new_variant_id_max_allele_slen, MiscFlags misc_flags, MakePlink2Flags make_plink2_flags, PvarPsamFlags pvar_psam_flags, uintptr_t pgr_alloc_cacheline_ct, char* xheader, PgenFileInfo* pgfip, PgenReader* simple_pgrp, char* outname, char* outname_end) {
6634 unsigned char* bigstack_mark = g_bigstack_base;
6635 FILE* outfile = nullptr;
6636 PglErr reterr = kPglRetSuccess;
6637 ThreadGroup tg;
6638 PreinitThreads(&tg);
6639 MTPgenWriter* mpgwp = nullptr;
6640 MakePgenCtx ctx;
6641 {
6642 if (make_plink2_flags & kfMakeFam) {
6643 snprintf(outname_end, kMaxOutfnameExtBlen, ".fam");
6644 logprintfww5("Writing %s ... ", outname);
6645 fflush(stdout);
6646 reterr = WriteFam(outname, sample_include, piip, sex_nm, sex_male, pheno_cols, new_sample_idx_to_old, sample_ct, pheno_ct, '\t');
6647 if (unlikely(reterr)) {
6648 goto MakePlink2NoVsort_ret_1;
6649 }
6650 logputs("done.\n");
6651 }
6652 if (make_plink2_flags & kfMakePsam) {
6653 snprintf(outname_end, kMaxOutfnameExtBlen, ".psam");
6654 logprintfww5("Writing %s ... ", outname);
6655 fflush(stdout);
6656 reterr = WritePsam(outname, sample_include, piip, sex_nm, sex_male, pheno_cols, pheno_names, new_sample_idx_to_old, sample_ct, pheno_ct, max_pheno_name_blen, pvar_psam_flags);
6657 if (unlikely(reterr)) {
6658 goto MakePlink2NoVsort_ret_1;
6659 }
6660 logputs("done.\n");
6661 }
6662 const uint32_t input_biallelic = (!allele_idx_offsets);
6663 // output_biallelic: test write_allele_idx_offsets equality to null
6664 PgenGlobalFlags read_gflags = pgfip->gflags & (kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePresent | kfPgenGlobalDosagePhasePresent);
6665 if (!input_biallelic) {
6666 // Can only skip this when there are actually zero copies of alt2+.
6667 // Otherwise, even with erase-alt2+, we still need to distinguish alt1
6668 // from alt2 so we can set calls involving the latter to missing.
6669 read_gflags |= kfPgenGlobalMultiallelicHardcallFound;
6670 }
6671 const uintptr_t* write_allele_idx_offsets = nullptr;
6672 uint32_t write_variant_ct = variant_ct;
6673 uint32_t max_write_allele_ct = max_allele_ct;
6674 uint32_t max_missalt_ct = 0;
6675 if (make_plink2_flags & kfMakePlink2MMask) {
6676 // TODO: enforce on command line
6677 assert((!refalt1_select) && (!allele_presents));
6678 if (make_plink2_flags & kfMakePlink2MJoin) {
6679 reterr = PlanMultiallelicJoin(variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, make_plink2_flags, &write_variant_ct, &write_allele_idx_offsets, &max_write_allele_ct, &max_missalt_ct);
6680 } else if (!allele_idx_offsets) {
6681 // no splitting to do
6682 logputs("Note: All variants are biallelic; nothing to split.\n");
6683 } else {
6684 reterr = PlanMultiallelicSplit(variant_include, allele_idx_offsets, allele_storage, max_allele_ct, make_plink2_flags, &write_variant_ct, &write_allele_idx_offsets);
6685 }
6686 if (unlikely(reterr)) {
6687 goto MakePlink2NoVsort_ret_1;
6688 }
6689 } else if (allele_idx_offsets) {
6690 if (allele_presents) {
6691 fputs("multiallelic variants + trim-alts not yet supported\n", stderr);
6692 exit(S_CAST(int32_t, kPglRetNotYetSupported));
6693 }
6694 if (variant_ct < raw_variant_ct) {
6695 uintptr_t* new_allele_idx_offsets;
6696 if (bigstack_alloc_w(variant_ct + 1, &new_allele_idx_offsets)) {
6697 goto MakePlink2NoVsort_ret_NOMEM;
6698 }
6699 const uintptr_t final_offset = InitWriteAlleleIdxOffsets(variant_include, allele_idx_offsets, nullptr, refalt1_select, nullptr, variant_ct, new_allele_idx_offsets);
6700 if (final_offset != 2 * variant_ct) {
6701 new_allele_idx_offsets[variant_ct] = final_offset;
6702 write_allele_idx_offsets = new_allele_idx_offsets;
6703 } else {
6704 BigstackReset(new_allele_idx_offsets);
6705 }
6706 } else {
6707 write_allele_idx_offsets = allele_idx_offsets;
6708 }
6709 }
6710 if (make_plink2_flags & kfMakeBim) {
6711 const uint32_t bim_zst = (make_plink2_flags / kfMakeBimZs) & 1;
6712 OutnameZstSet(".bim", bim_zst, outname_end);
6713 logprintfww5("Writing %s ... ", outname);
6714 fflush(stdout);
6715 if (unlikely(write_allele_idx_offsets)) {
6716 logputs("\n");
6717 logerrprintf("Error: %s cannot contain multiallelic variants.\n", outname);
6718 goto MakePlink2NoVsort_ret_INCONSISTENT_INPUT;
6719 }
6720 if (write_variant_ct == variant_ct) {
6721 reterr = WriteMapOrBim(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, variant_cms, variant_ct, max_allele_slen, '\t', bim_zst, max_thread_ct);
6722 } else {
6723 assert(write_variant_ct > variant_ct);
6724 reterr = WriteBimSplit(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, variant_cms, varid_template_str, missing_varid_match, variant_ct, max_allele_slen, new_variant_id_max_allele_slen, (make_plink2_flags / kfMakePlink2VaridSemicolon) & 1, (make_plink2_flags / kfMakePlink2VaridDup) & 1, misc_flags, bim_zst, max_thread_ct);
6725 }
6726 if (unlikely(reterr)) {
6727 goto MakePlink2NoVsort_ret_1;
6728 }
6729 logputs("done.\n");
6730 }
6731 if (make_plink2_flags & kfMakePvar) {
6732 OutnameZstSet(".pvar", pvar_psam_flags & kfPvarZs, outname_end);
6733 logprintfww5("Writing %s ... ", outname);
6734 fflush(stdout);
6735 uint32_t nonref_flags_storage = 3;
6736 if (!pgfip->nonref_flags) {
6737 nonref_flags_storage = (pgfip->gflags & kfPgenGlobalAllNonref)? 2 : 1;
6738 }
6739 if (write_variant_ct == variant_ct) {
6740 reterr = WritePvar(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, pgfip->nonref_flags, pvar_info_reload, variant_cms, raw_variant_ct, variant_ct, max_allele_slen, xheader_blen, info_flags, nonref_flags_storage, max_filter_slen, info_reload_slen, vpos_sortstatus, pvar_psam_flags, max_thread_ct, xheader);
6741 } else {
6742 const char* const* info_keys = nullptr;
6743 uint32_t info_key_ct = 0;
6744 uint32_t* info_keys_htable = nullptr;
6745 uint32_t info_keys_htable_size = 0;
6746 if (pvar_info_reload) {
6747 reterr = ParseInfoHeader(xheader, xheader_blen, &info_keys, &info_key_ct, &info_keys_htable, &info_keys_htable_size);
6748 if (reterr) {
6749 goto MakePlink2NoVsort_ret_1;
6750 }
6751 }
6752 if (write_variant_ct > variant_ct) {
6753 reterr = WritePvarSplit(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, pgfip->nonref_flags, pvar_info_reload, variant_cms, varid_template_str, missing_varid_match, info_keys, info_keys_htable, raw_variant_ct, variant_ct, max_allele_slen, new_variant_id_max_allele_slen, xheader_blen, info_flags, nonref_flags_storage, max_filter_slen, info_reload_slen, vpos_sortstatus, info_key_ct, info_keys_htable_size, misc_flags, make_plink2_flags, pvar_psam_flags, max_thread_ct, xheader);
6754 } else {
6755 logerrputs("Error: Multiallelic join is under development.\n");
6756 reterr = kPglRetNotYetSupported;
6757 goto MakePlink2NoVsort_ret_1;
6758 // reterr = WritePvarJoin(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, pgfip->nonref_flags, pvar_info_reload, variant_cms, varid_template_str, missing_varid_match, info_keys, info_keys_htable, raw_variant_ct, variant_ct, max_allele_slen, new_variant_id_max_allele_slen, max_write_allele_ct, max_missalt_ct, xheader_blen, info_flags, nonref_flags_storage, max_filter_slen, info_reload_slen, vpos_sortstatus, info_key_ct, info_keys_htable_size, misc_flags, make_plink2_flags, pvar_psam_flags, max_thread_ct, xheader);
6759 }
6760 }
6761 if (unlikely(reterr)) {
6762 goto MakePlink2NoVsort_ret_1;
6763 }
6764 logputs("done.\n");
6765 }
6766 MakeCommon mc;
6767 mc.plink2_write_flags = kfPlink2Write0;
6768 const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
6769 const uint32_t sample_ctl = BitCtToWordCt(sample_ct);
6770 ctx.sex_male_collapsed = nullptr; // defensive
6771 if (make_plink2_flags & kfMakePlink2SetHhMissing) {
6772 const uint32_t sample_ctv = BitCtToVecCt(sample_ct);
6773 uintptr_t* new_sex_male;
6774 if (unlikely(
6775 bigstack_alloc_w(sample_ctv * kWordsPerVec, &new_sex_male) ||
6776 bigstack_alloc_w(sample_ctv * kWordsPerVec, &mc.sex_male_collapsed_interleaved) ||
6777 bigstack_alloc_w(sample_ctv * kWordsPerVec, &ctx.sex_female_collapsed) ||
6778 bigstack_alloc_w(sample_ctv * kWordsPerVec, &mc.sex_female_collapsed_interleaved))) {
6779 goto MakePlink2NoVsort_ret_NOMEM;
6780 }
6781 CopyBitarrSubset(sex_male, sample_include, sample_ct, new_sex_male);
6782 ZeroTrailingWords(sample_ctl, new_sex_male);
6783 ctx.sex_male_collapsed = new_sex_male;
6784 FillInterleavedMaskVec(ctx.sex_male_collapsed, sample_ctv, mc.sex_male_collapsed_interleaved);
6785
6786 CopyBitarrSubset(sex_nm, sample_include, sample_ct, ctx.sex_female_collapsed);
6787 BitvecInvmask(new_sex_male, sample_ctl, ctx.sex_female_collapsed);
6788 ZeroTrailingWords(sample_ctl, ctx.sex_female_collapsed);
6789 FillInterleavedMaskVec(ctx.sex_female_collapsed, sample_ctv, mc.sex_female_collapsed_interleaved);
6790
6791 mc.plink2_write_flags |= kfPlink2WriteSetHhMissing;
6792 if (make_plink2_flags & kfMakePlink2SetHhMissingKeepDosage) {
6793 mc.plink2_write_flags |= kfPlink2WriteSetHhMissingKeepDosage;
6794 }
6795 } else {
6796 // defensive
6797 mc.sex_male_collapsed_interleaved = nullptr;
6798 mc.sex_female_collapsed_interleaved = nullptr;
6799 }
6800 if (make_plink2_flags & kfMakePlink2SetMixedMtMissing) {
6801 mc.plink2_write_flags |= kfPlink2WriteSetMixedMtMissing;
6802 if (make_plink2_flags & kfMakePlink2SetMixedMtMissingKeepDosage) {
6803 mc.plink2_write_flags |= kfPlink2WriteSetMixedMtMissingKeepDosage;
6804 }
6805 }
6806 mc.cip = cip;
6807 mc.raw_sample_ct = raw_sample_ct;
6808 mc.sample_ct = sample_ct;
6809 unsigned char* bigstack_mark2 = g_bigstack_base;
6810 const uint32_t make_pgen = make_plink2_flags & kfMakePgen;
6811 // todo: prohibit .pgen + .bim write when data is multiallelic without
6812 // either multiallelic split or erase-alt2+ specified
6813 // (--make-bed = automatic erase-alt2+?)
6814 if ((make_plink2_flags & kfMakeBed) || ((make_plink2_flags & (kfMakePgen | (kfMakePgenFormatBase * 3))) == (kfMakePgen | kfMakePgenFormatBase))) {
6815 reterr = MakeBedlikeMain(sample_include, new_sample_idx_to_old, variant_include, refalt1_select, raw_variant_ct, variant_ct, max_thread_ct, hard_call_thresh, make_plink2_flags, pgr_alloc_cacheline_ct, pgfip, &mc, outname, outname_end);
6816 } else if (make_pgen) {
6817 assert(variant_ct);
6818 assert(sample_ct);
6819 if (make_plink2_flags & (kfMakePlink2MSplitBase * 7)) {
6820 // don't duplicate complicated multiallelic split/merge/trim-alts logic
6821 // here for now.
6822 // (also auto-punt multiallelic dosage?)
6823 goto MakePlink2NoVsort_fallback;
6824 }
6825 ctx.write_allele_idx_offsets = write_allele_idx_offsets;
6826 if (variant_ct == raw_variant_ct) {
6827 ctx.write_chr_fo_vidx_start = cip->chr_fo_vidx_start;
6828 } else {
6829 if (AllocAndFillSubsetChrFoVidxStart(variant_include, cip, &ctx.write_chr_fo_vidx_start)) {
6830 goto MakePlink2NoVsort_fallback;
6831 }
6832 }
6833 if (make_plink2_flags & kfMakePgenErasePhase) {
6834 read_gflags &= ~(kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePhasePresent);
6835 }
6836 if (make_plink2_flags & kfMakePgenEraseDosage) {
6837 if (hard_call_thresh == UINT32_MAX) {
6838 read_gflags &= ~(kfPgenGlobalDosagePresent | kfPgenGlobalDosagePhasePresent);
6839 } else {
6840 // erase-dosage + --hard-call-threshold currently requires dosages to
6841 // be read, and only thrown away at the last minute
6842 // (alternatively, we could build --hard-call-threshold directly into
6843 // pgr_read_raw?)
6844 mc.plink2_write_flags |= kfPlink2WriteLateDosageErase;
6845 }
6846 }
6847 if (read_gflags && (variant_ct < raw_variant_ct)) {
6848 // did we e.g. filter out all the phased variants?
6849 // do not check for multiallelic-hc here for now
6850 // (write_allele_idx_offsets check above serves the same purpose)
6851 read_gflags &= kfPgenGlobalMultiallelicHardcallFound | GflagsVfilter(variant_include, pgfip->vrtypes, raw_variant_ct, pgfip->gflags);
6852 }
6853 // could check if all the phased samples were also filtered out, but
6854 // that's already caught by running --make-pgen twice, so not a big deal
6855
6856 const uint32_t read_dosage_present = (read_gflags / kfPgenGlobalDosagePresent) & 1;
6857 mc.hard_call_halfdist = ((hard_call_thresh == UINT32_MAX) || (!read_dosage_present))? 0 : (kDosage4th - hard_call_thresh);
6858 ctx.dosage_erase_halfdist = kDosage4th - dosage_erase_thresh;
6859 const uint32_t read_phase_present = !!(read_gflags & (kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePhasePresent));
6860 const uint32_t read_dphase_present = (read_gflags / kfPgenGlobalDosagePhasePresent) & 1;
6861 PgenGlobalFlags write_gflags = read_gflags;
6862 uint32_t read_or_write_phase_present = read_phase_present;
6863 uint32_t read_or_write_dphase_present = read_dphase_present;
6864 if (mc.hard_call_halfdist && (read_phase_present || read_or_write_dphase_present)) {
6865 read_or_write_phase_present = 1;
6866 read_or_write_dphase_present = 1;
6867 write_gflags |= kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePhasePresent;
6868 } else if (dosage_erase_thresh && read_dosage_present) {
6869 read_or_write_phase_present = 1;
6870 }
6871 uint32_t read_or_write_dosage_present = read_dosage_present;
6872 if (mc.plink2_write_flags & kfPlink2WriteLateDosageErase) {
6873 write_gflags &= ~(kfPgenGlobalDosagePresent | kfPgenGlobalDosagePhasePresent);
6874 } else if (mc.plink2_write_flags & (kfPlink2WriteSetHhMissingKeepDosage | kfPlink2WriteSetMixedMtMissingKeepDosage)) {
6875 read_or_write_dosage_present = 1;
6876 write_gflags |= kfPgenGlobalDosagePresent;
6877 }
6878 if ((write_gflags & (kfPgenGlobalMultiallelicHardcallFound | kfPgenGlobalDosagePresent)) == (kfPgenGlobalMultiallelicHardcallFound | kfPgenGlobalDosagePresent)) {
6879 logerrputs("Error: Multiallelic dosages aren't supported yet.\n");
6880 reterr = kPglRetNotYetSupported;
6881 goto MakePlink2NoVsort_ret_1;
6882 }
6883 write_gflags &= ~kfPgenGlobalMultiallelicHardcallFound;
6884 uintptr_t alloc_base_cacheline_ct;
6885 uint64_t mpgw_per_thread_cacheline_ct;
6886 uint32_t vrec_len_byte_ct;
6887 uint64_t vblock_cacheline_ct;
6888 // may want to have a load_sample_ct which is raw_sample_ct when e.g.
6889 // sample_ct > 0.1 * raw_sample_ct, and sample_ct otherwise.
6890 MpgwInitPhase1(write_allele_idx_offsets, variant_ct, sample_ct, write_gflags, &alloc_base_cacheline_ct, &mpgw_per_thread_cacheline_ct, &vrec_len_byte_ct, &vblock_cacheline_ct);
6891
6892 // bugfix: each variant currently needs to be vector-aligned
6893 // bugfix?: need to use raw_sample_ct here, not sample_ct
6894 const uint32_t raw_sample_ctv2 = NypCtToVecCt(raw_sample_ct);
6895 const uint32_t max_vblock_size = MINV(kPglVblockSize, variant_ct);
6896 uint64_t load_vblock_cacheline_ct = VecCtToCachelineCtU64(S_CAST(uint64_t, raw_sample_ctv2) * max_vblock_size);
6897
6898 if (make_plink2_flags & ((kfMakePlink2MSplitBase * 7) | kfMakePlink2EraseAlt2Plus)) {
6899 logerrputs("Error: 'multiallelics=' and 'erase-alt2+' modifiers are under development.\n");
6900 reterr = kPglRetNotYetSupported;
6901 goto MakePlink2NoVsort_ret_NOMEM;
6902 }
6903 if (read_gflags & kfPgenGlobalMultiallelicHardcallFound) {
6904 // raw multiallelic hardcall track has three parts:
6905 // 1. two words with rare01_ct and rare10_ct.
6906 // 2. (vector-aligned) patch_01_set and patch_01_vals.
6907 // 3. (vector-aligned) patch_10_set and patch_10_vals.
6908 const uintptr_t mhcraw_word_ct = RoundUpPow2(2, kWordsPerVec) + GetMhcWordCt(raw_sample_ct);
6909 load_vblock_cacheline_ct += WordCtToCachelineCtU64(S_CAST(uint64_t, mhcraw_word_ct) * max_vblock_size);
6910 }
6911 if (read_phase_present) {
6912 // could make this bound tighter when lots of unphased variants are
6913 // mixed in among the phased variants, but this isn't nearly as
6914 // important as the analogous multiallelic optimization
6915
6916 // phaseraw has three parts:
6917 // 1. het_ct as uint32_t, and explicit_phasepresent_ct as uint32_t.
6918 // 2. vec-aligned bitarray of up to (raw_sample_ct + 1) bits. first
6919 // bit is set iff phasepresent is explicitly stored at all (if not,
6920 // all hets are assumed to be phased), if yes the remaining bits
6921 // store packed phasepresent values for all hets, if no the
6922 // remaining bits store packed phaseinfo values for all hets.
6923 // 3. word-aligned bitarray of up to raw_sample_ct bits, storing
6924 // phaseinfo values. (end of this array is vec-aligned.)
6925 const uintptr_t phaseraw_word_ct = (8 / kBytesPerWord) + kWordsPerVec + RoundDownPow2(raw_sample_ct / kBitsPerWordD2, kWordsPerVec);
6926 load_vblock_cacheline_ct += WordCtToCachelineCtU64(S_CAST(uint64_t, phaseraw_word_ct) * max_vblock_size);
6927 }
6928 if (read_dosage_present) {
6929 // biallelic dosageraw has two parts:
6930 // 1. vec-aligned bitarray of up to raw_sample_ct bits, storing which
6931 // samples have dosages.
6932 // 2. word-aligned array of uint16s with 0..32768 fixed-point dosages.
6933 // dphaseraw has the same structure, with the uint16s replaced with an
6934 // int16 array of (left - right) values.
6935 const uintptr_t dosageraw_word_ct = kWordsPerVec * (BitCtToVecCt(raw_sample_ct) + DivUp(raw_sample_ct, kBytesPerVec / sizeof(Dosage)));
6936 load_vblock_cacheline_ct += WordCtToCachelineCtU64(dosageraw_word_ct * S_CAST(uint64_t, max_vblock_size)) * (1 + read_dphase_present);
6937 }
6938
6939 #ifndef __LP64__
6940 if ((mpgw_per_thread_cacheline_ct > (0x7fffffff / kCacheline)) || (load_vblock_cacheline_ct > (0x7fffffff / kCacheline))) {
6941 goto MakePlink2NoVsort_fallback;
6942 }
6943 #endif
6944 uint32_t calc_thread_ct = DivUp(variant_ct, kPglVblockSize);
6945 if (calc_thread_ct >= max_thread_ct) {
6946 calc_thread_ct = (max_thread_ct > 2)? (max_thread_ct - 1) : max_thread_ct;
6947 }
6948 const uint32_t subsetting_required = (sample_ct != raw_sample_ct);
6949 if (!new_sample_idx_to_old) {
6950 // hphase doesn't seem to affect read:write ratio much
6951 #ifdef USE_AVX2
6952 const uint32_t max_calc_thread_ct = 2;
6953 #else
6954 const uint32_t max_calc_thread_ct = 2 + subsetting_required;
6955 #endif
6956 if (calc_thread_ct > max_calc_thread_ct) {
6957 calc_thread_ct = max_calc_thread_ct;
6958 }
6959 }
6960 // this is frequently I/O-bound even when resorting, but I'll postpone
6961 // tuning thread count there
6962 mc.refalt1_select = refalt1_select;
6963 if (refalt1_select) {
6964 if (write_allele_idx_offsets) {
6965 // this will require write_mhc and an additional AlleleCode buffer
6966 logerrputs("Error: Multiallelic allele rotation is under development.\n");
6967 reterr = kPglRetNotYetSupported;
6968 goto MakePlink2NoVsort_ret_1;
6969 }
6970 if (variant_ct < raw_variant_ct) {
6971 // might want inner loop to map variant uidx -> idx instead
6972 STD_ARRAY_PTR_DECL(AlleleCode, 2, tmp_refalt1_select);
6973 if (BIGSTACK_ALLOC_STD_ARRAY(AlleleCode, 2, variant_ct, &tmp_refalt1_select)) {
6974 goto MakePlink2NoVsort_fallback;
6975 }
6976 uintptr_t variant_uidx_base = 0;
6977 uintptr_t cur_bits = variant_include[0];
6978 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
6979 const uintptr_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
6980 STD_ARRAY_COPY(refalt1_select[variant_uidx], 2, tmp_refalt1_select[variant_idx]);
6981 }
6982 mc.refalt1_select = tmp_refalt1_select;
6983 }
6984 }
6985 mpgwp = S_CAST(MTPgenWriter*, bigstack_alloc((calc_thread_ct + DivUp(sizeof(MTPgenWriter), kBytesPerWord)) * sizeof(intptr_t)));
6986 if (!mpgwp) {
6987 goto MakePlink2NoVsort_fallback;
6988 }
6989 mpgwp->pgen_outfile = nullptr;
6990 if (bigstack_alloc_wp(calc_thread_ct, &(ctx.loadbuf_thread_starts[0])) ||
6991 bigstack_alloc_wp(calc_thread_ct, &(ctx.loadbuf_thread_starts[1]))) {
6992 goto MakePlink2NoVsort_fallback;
6993 }
6994 uint32_t nonref_flags_storage = 3;
6995 uintptr_t* nonref_flags_write = pgfip->nonref_flags;
6996 if (!nonref_flags_write) {
6997 nonref_flags_storage = (pgfip->gflags & kfPgenGlobalAllNonref)? 2 : 1;
6998 } else if (variant_ct < raw_variant_ct) {
6999 const uint32_t write_variant_ctl = BitCtToWordCt(write_variant_ct);
7000 uintptr_t* old_nonref_flags = nonref_flags_write;
7001 if (bigstack_alloc_w(write_variant_ctl, &nonref_flags_write)) {
7002 goto MakePlink2NoVsort_fallback;
7003 }
7004 if (variant_ct == write_variant_ct) {
7005 CopyBitarrSubset(old_nonref_flags, variant_include, variant_ct, nonref_flags_write);
7006 } else {
7007 ZeroWArr(write_variant_ctl, nonref_flags_write);
7008 if (!write_allele_idx_offsets) {
7009 SplitNonrefFlags();
7010 } else {
7011 JoinNonrefFlags();
7012 }
7013 }
7014 if (nonref_flags_write[0] & 1) {
7015 if (AllBitsAreOne(nonref_flags_write, write_variant_ct)) {
7016 BigstackReset(nonref_flags_write);
7017 nonref_flags_write = nullptr;
7018 nonref_flags_storage = 2;
7019 }
7020 } else if (AllWordsAreZero(nonref_flags_write, write_variant_ctl)) {
7021 BigstackReset(nonref_flags_write);
7022 nonref_flags_write = nullptr;
7023 nonref_flags_storage = 1;
7024 }
7025 }
7026 ctx.pwcs = &(mpgwp->pwcs[0]);
7027 ctx.new_sample_idx_to_old = new_sample_idx_to_old;
7028 ctx.thread_write_genovecs = nullptr;
7029 ctx.thread_write_mhc = nullptr;
7030
7031 // Each worker thread handles with 64k loaded variants at a time, while
7032 // the I/O thread loads the next (64k * thread_ct).
7033 uintptr_t other_per_thread_cacheline_ct = 2 * load_vblock_cacheline_ct;
7034
7035 uint32_t write_mhc_needed = 0;
7036 if (new_sample_idx_to_old || subsetting_required) {
7037 if (bigstack_alloc_wp(calc_thread_ct, &ctx.thread_write_genovecs)) {
7038 goto MakePlink2NoVsort_fallback;
7039 }
7040 if (read_phase_present && new_sample_idx_to_old) {
7041 if (bigstack_alloc_u32(raw_sample_ct, &ctx.old_sample_idx_to_new)) {
7042 goto MakePlink2NoVsort_fallback;
7043 }
7044 for (uint32_t new_sample_idx = 0; new_sample_idx != sample_ct; ++new_sample_idx) {
7045 ctx.old_sample_idx_to_new[new_sample_idx_to_old[new_sample_idx]] = new_sample_idx;
7046 }
7047 }
7048 // ctx.thread_write_genovecs
7049 other_per_thread_cacheline_ct += NypCtToCachelineCt(sample_ct);
7050 write_mhc_needed = 1;
7051 }
7052 uintptr_t write_mhcraw_cacheline_ct = 0;
7053 if (write_mhc_needed) {
7054 if (bigstack_alloc_wp(calc_thread_ct, &ctx.thread_write_mhc)) {
7055 goto MakePlink2NoVsort_fallback;
7056 }
7057 // todo: refalt1_select
7058 const uintptr_t mhcwrite_word_ct = GetMhcWordCt(sample_ct);
7059 write_mhcraw_cacheline_ct = DivUp(mhcwrite_word_ct, kWordsPerCacheline);
7060 other_per_thread_cacheline_ct += write_mhcraw_cacheline_ct;
7061 }
7062 if ((write_mhc_needed || read_dosage_present) && new_sample_idx_to_old) {
7063 // ctx.thread_cumulative_popcount_bufs
7064 other_per_thread_cacheline_ct += Int32CtToCachelineCt(raw_sample_ctl);
7065 if (bigstack_alloc_u32p(calc_thread_ct, &ctx.thread_cumulative_popcount_bufs)) {
7066 goto MakePlink2NoVsort_fallback;
7067 }
7068 }
7069 ctx.thread_write_phasepresents = nullptr;
7070 ctx.thread_all_hets = nullptr;
7071 ctx.thread_write_dosagepresents = nullptr;
7072 ctx.thread_write_dphasepresents = nullptr;
7073 if (read_or_write_phase_present || read_or_write_dosage_present) {
7074 if (read_or_write_phase_present) {
7075 if (bigstack_alloc_wp(calc_thread_ct, &ctx.thread_write_phasepresents) ||
7076 bigstack_alloc_wp(calc_thread_ct, &ctx.thread_write_phaseinfos)) {
7077 goto MakePlink2NoVsort_fallback;
7078 }
7079 if (read_phase_present) {
7080 if (bigstack_alloc_wp(calc_thread_ct, &ctx.thread_all_hets)) {
7081 goto MakePlink2NoVsort_fallback;
7082 }
7083 other_per_thread_cacheline_ct += BitCtToCachelineCt(raw_sample_ct);
7084 }
7085 // phasepresent, phaseinfo
7086 other_per_thread_cacheline_ct += 2 * BitCtToCachelineCt(sample_ct);
7087 }
7088 if (read_or_write_dosage_present) {
7089 if (bigstack_alloc_wp(calc_thread_ct, &ctx.thread_write_dosagepresents) ||
7090 bigstack_alloc_dosagep(calc_thread_ct, &ctx.thread_write_dosagevals)) {
7091 goto MakePlink2NoVsort_fallback;
7092 }
7093 if (read_or_write_dphase_present) {
7094 if (bigstack_alloc_wp(calc_thread_ct, &ctx.thread_write_dphasepresents) ||
7095 bigstack_alloc_dphasep(calc_thread_ct, &ctx.thread_write_dphasedeltas)) {
7096 goto MakePlink2NoVsort_fallback;
7097 }
7098 }
7099 // dosage_present, dphase_present
7100 other_per_thread_cacheline_ct += BitCtToCachelineCt(sample_ct) * (1 + read_or_write_dphase_present);
7101
7102 // dosage_main, dphase_delta
7103 other_per_thread_cacheline_ct += DivUp(sample_ct, (kCacheline / sizeof(Dosage))) * (1 + 2 * read_or_write_dphase_present);
7104
7105 // todo: multiallelic dosage
7106 }
7107 }
7108 if (read_or_write_phase_present || read_dosage_present || (read_gflags & kfPgenGlobalMultiallelicHardcallFound)) {
7109 // ctx.loaded_vrtypes
7110 other_per_thread_cacheline_ct += 2 * (kPglVblockSize / kCacheline);
7111 }
7112 const uintptr_t cachelines_avail = bigstack_left() / kCacheline;
7113 if (cachelines_avail < alloc_base_cacheline_ct + (mpgw_per_thread_cacheline_ct + other_per_thread_cacheline_ct) * calc_thread_ct) {
7114 if (cachelines_avail < alloc_base_cacheline_ct + mpgw_per_thread_cacheline_ct + other_per_thread_cacheline_ct) {
7115 goto MakePlink2NoVsort_fallback;
7116 }
7117 calc_thread_ct = (cachelines_avail - alloc_base_cacheline_ct) / (mpgw_per_thread_cacheline_ct + other_per_thread_cacheline_ct);
7118 }
7119 uintptr_t* main_loadbufs[2];
7120 main_loadbufs[0] = S_CAST(uintptr_t*, bigstack_alloc_raw(load_vblock_cacheline_ct * calc_thread_ct * kCacheline));
7121 main_loadbufs[1] = S_CAST(uintptr_t*, bigstack_alloc_raw(load_vblock_cacheline_ct * calc_thread_ct * kCacheline));
7122 ctx.loaded_vrtypes[0] = nullptr;
7123 ctx.loaded_vrtypes[1] = nullptr;
7124 if (read_or_write_phase_present || read_dosage_present || (read_gflags & kfPgenGlobalMultiallelicHardcallFound)) {
7125 ctx.loaded_vrtypes[0] = S_CAST(unsigned char*, bigstack_alloc_raw(kPglVblockSize * calc_thread_ct));
7126 ctx.loaded_vrtypes[1] = S_CAST(unsigned char*, bigstack_alloc_raw(kPglVblockSize * calc_thread_ct));
7127 }
7128 if (read_or_write_phase_present || read_or_write_dosage_present) {
7129 const uint32_t bitvec_writebuf_byte_ct = BitCtToCachelineCt(sample_ct) * kCacheline;
7130 const uintptr_t dosagevals_writebuf_byte_ct = DivUp(sample_ct, (kCacheline / 2)) * kCacheline;
7131 for (uint32_t tidx = 0; tidx != calc_thread_ct; ++tidx) {
7132 if (read_or_write_phase_present) {
7133 ctx.thread_write_phasepresents[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(bitvec_writebuf_byte_ct));
7134 ctx.thread_write_phaseinfos[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(bitvec_writebuf_byte_ct));
7135
7136 if (read_phase_present) {
7137 ctx.thread_all_hets[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(BitCtToCachelineCt(raw_sample_ct) * kCacheline));
7138 }
7139 }
7140 if (read_or_write_dosage_present) {
7141 ctx.thread_write_dosagepresents[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(bitvec_writebuf_byte_ct));
7142 ctx.thread_write_dosagevals[tidx] = S_CAST(Dosage*, bigstack_alloc_raw(dosagevals_writebuf_byte_ct));
7143 if (read_or_write_dphase_present) {
7144 ctx.thread_write_dphasepresents[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(bitvec_writebuf_byte_ct));
7145 ctx.thread_write_dphasedeltas[tidx] = S_CAST(SDosage*, bigstack_alloc_raw(2 * dosagevals_writebuf_byte_ct));
7146 }
7147 }
7148 }
7149 }
7150 if (new_sample_idx_to_old || subsetting_required) {
7151 uintptr_t writebuf_byte_ct = input_biallelic? NypCtToByteCt(sample_ct) : (2 * sample_ct * sizeof(AlleleCode));
7152 writebuf_byte_ct = RoundUpPow2(writebuf_byte_ct, kCacheline);
7153 for (uint32_t tidx = 0; tidx != calc_thread_ct; ++tidx) {
7154 ctx.thread_write_genovecs[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(writebuf_byte_ct));
7155 if (write_mhc_needed) {
7156 ctx.thread_write_mhc[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(write_mhcraw_cacheline_ct * kCacheline));
7157 }
7158 if ((write_mhc_needed || read_dosage_present) && new_sample_idx_to_old) {
7159 ctx.thread_cumulative_popcount_bufs[tidx] = S_CAST(uint32_t*, bigstack_alloc_raw(Int32CtToCachelineCt(raw_sample_ctl) * kCacheline));
7160 }
7161 }
7162 }
7163 snprintf(outname_end, kMaxOutfnameExtBlen, ".pgen");
7164 logprintfww5("Writing %s ... ", outname);
7165 fputs("0%", stdout);
7166 fflush(stdout);
7167 unsigned char* mpgw_alloc = S_CAST(unsigned char*, bigstack_alloc_raw((alloc_base_cacheline_ct + mpgw_per_thread_cacheline_ct * calc_thread_ct) * kCacheline));
7168 assert(g_bigstack_base <= g_bigstack_end);
7169 reterr = MpgwInitPhase2(outname, write_allele_idx_offsets, nonref_flags_write, variant_ct, sample_ct, write_gflags, nonref_flags_storage, vrec_len_byte_ct, vblock_cacheline_ct, calc_thread_ct, mpgw_alloc, mpgwp);
7170 if (unlikely(reterr)) {
7171 if (reterr == kPglRetOpenFail) {
7172 logputs("\n");
7173 logerrprintfww(kErrprintfFopen, outname, strerror(errno));
7174 }
7175 goto MakePlink2NoVsort_ret_1;
7176 }
7177 if (unlikely(SetThreadCt(calc_thread_ct, &tg))) {
7178 goto MakePlink2NoVsort_ret_NOMEM;
7179 }
7180 mc.sample_include = subsetting_required? sample_include : nullptr;
7181 ctx.mcp = &mc;
7182 ctx.spgwp = nullptr;
7183 ctx.write_reterr = kPglRetSuccess;
7184 SetThreadFuncAndData(MakePgenThread, &ctx, &tg);
7185
7186 // Main workflow:
7187 // 1. Set n=0, load first calc_thread_ct * kPglVblockSize
7188 // *post-filtering* variants.
7189 // This doesn't play well with blockload when any variants are
7190 // filtered out, so we don't use it. (todo: look into special-casing
7191 // variant_ct == raw_variant_ct.)
7192 //
7193 // 2. Spawn threads processing batch n
7194 // 3. Load batch (n+1) unless eof
7195 // 4. Join threads
7196 // 5. Flush results for batch n (must happen here since we aren't using
7197 // two output buffers. this may be a mistake, revisit this choice...)
7198 // 6. Increment n by 1
7199 // 7. Goto step 2 unless eof
7200 const uint32_t batch_ct_m1 = (variant_ct - 1) / (kPglVblockSize * calc_thread_ct);
7201 uint32_t pct = 0;
7202 uint32_t parity = 0;
7203 uint32_t read_batch_idx = 0;
7204 uint32_t cur_batch_size = kPglVblockSize * calc_thread_ct;
7205 uint32_t next_print_variant_idx = variant_ct / 100;
7206 uintptr_t read_variant_uidx_base = 0;
7207 uintptr_t cur_bits = variant_include[0];
7208 PgrClearLdCache(simple_pgrp);
7209 for (uint32_t write_idx_end = 0; ; ++read_batch_idx, write_idx_end += cur_batch_size) {
7210 if (read_batch_idx) {
7211 ctx.cur_block_write_ct = cur_batch_size;
7212 if (write_idx_end == variant_ct) {
7213 DeclareLastThreadBlock(&tg);
7214 }
7215 if (unlikely(SpawnThreads(&tg))) {
7216 goto MakePlink2NoVsort_ret_THREAD_CREATE_FAIL;
7217 }
7218 }
7219 if (!IsLastBlock(&tg)) {
7220 if (read_batch_idx == batch_ct_m1) {
7221 cur_batch_size = variant_ct - (read_batch_idx * kPglVblockSize * calc_thread_ct);
7222 }
7223 uintptr_t* cur_loadbuf = main_loadbufs[parity];
7224 uintptr_t* loadbuf_iter = cur_loadbuf;
7225 unsigned char* cur_loaded_vrtypes = ctx.loaded_vrtypes[parity];
7226 for (uint32_t uii = 0; uii != cur_batch_size; ++uii) {
7227 if (!(uii % kPglVblockSize)) {
7228 ctx.loadbuf_thread_starts[parity][uii / kPglVblockSize] = loadbuf_iter;
7229 }
7230 const uintptr_t read_variant_uidx = BitIter1(variant_include, &read_variant_uidx_base, &cur_bits);
7231 reterr = PgrGetRaw(read_variant_uidx, read_gflags, simple_pgrp, &loadbuf_iter, cur_loaded_vrtypes? (&(cur_loaded_vrtypes[uii])) : nullptr);
7232 if (unlikely(reterr)) {
7233 goto MakePlink2NoVsort_ret_PGR_FAIL;
7234 }
7235 }
7236 }
7237 if (read_batch_idx) {
7238 JoinThreads(&tg);
7239 reterr = ctx.write_reterr;
7240 if (unlikely(reterr)) {
7241 // only possible error is kPglRetVarRecordTooLarge?
7242 goto MakePlink2NoVsort_ret_1;
7243 }
7244 }
7245 parity = 1 - parity;
7246 if (write_idx_end) {
7247 reterr = MpgwFlush(mpgwp);
7248 if (unlikely(reterr)) {
7249 goto MakePlink2NoVsort_ret_WRITE_FAIL;
7250 }
7251 if (write_idx_end == variant_ct) {
7252 mpgwp = nullptr;
7253 break;
7254 }
7255 if (write_idx_end >= next_print_variant_idx) {
7256 if (pct > 10) {
7257 putc_unlocked('\b', stdout);
7258 }
7259 pct = (write_idx_end * 100LLU) / variant_ct;
7260 printf("\b\b%u%%", pct++);
7261 fflush(stdout);
7262 next_print_variant_idx = (pct * S_CAST(uint64_t, variant_ct)) / 100;
7263 }
7264 }
7265 }
7266 if (pct > 10) {
7267 putc_unlocked('\b', stdout);
7268 }
7269 fputs("\b\b", stdout);
7270 logputs("done.\n");
7271 // BigstackReset(bigstack_mark);
7272 } else if (0) {
7273 MakePlink2NoVsort_fallback:
7274 g_failed_alloc_attempt_size = 0;
7275 mpgwp = nullptr;
7276 BigstackReset(bigstack_mark2);
7277 reterr = MakePgenRobust(sample_include, new_sample_idx_to_old, variant_include, allele_idx_offsets, allele_presents, refalt1_select, write_allele_idx_offsets, nullptr, ctx.sex_male_collapsed, ctx.sex_female_collapsed, raw_variant_ct, variant_ct, write_variant_ct, max_allele_ct, hard_call_thresh, dosage_erase_thresh, make_plink2_flags, &mc, simple_pgrp, outname, outname_end);
7278 if (unlikely(reterr)) {
7279 goto MakePlink2NoVsort_ret_1;
7280 }
7281 if (variant_ct != write_variant_ct) {
7282 logprintfww("Multiallelic %s: %u variant%s written.\n", (variant_ct < write_variant_ct)? "split" : "join", write_variant_ct, (write_variant_ct == 1)? "" : "s");
7283 }
7284 }
7285 }
7286 while (0) {
7287 MakePlink2NoVsort_ret_NOMEM:
7288 reterr = kPglRetNomem;
7289 break;
7290 MakePlink2NoVsort_ret_PGR_FAIL:
7291 PgenErrPrintN(reterr);
7292 break;
7293 MakePlink2NoVsort_ret_WRITE_FAIL:
7294 reterr = kPglRetWriteFail;
7295 break;
7296 MakePlink2NoVsort_ret_INCONSISTENT_INPUT:
7297 reterr = kPglRetInconsistentInput;
7298 break;
7299 MakePlink2NoVsort_ret_THREAD_CREATE_FAIL:
7300 reterr = kPglRetThreadCreateFail;
7301 break;
7302 }
7303 MakePlink2NoVsort_ret_1:
7304 CleanupMpgw(mpgwp, &reterr);
7305 CleanupThreads(&tg);
7306 fclose_cond(outfile);
7307 pgfip->block_base = nullptr;
7308 BigstackReset(bigstack_mark);
7309 return reterr;
7310 }
7311
7312
SortChr(const ChrInfo * cip,const uint32_t * chr_idx_to_size,uint32_t use_nsort,ChrInfo * write_cip)7313 BoolErr SortChr(const ChrInfo* cip, const uint32_t* chr_idx_to_size, uint32_t use_nsort, ChrInfo* write_cip) {
7314 // Finishes initialization of write_cip. Assumes chr_fo_vidx_start is
7315 // allocated and initialized to all-bits-one, chr_file_order/chr_idx_to_foidx
7316 // are unallocated, and chr_ct is uninitialized.
7317 const uint32_t max_code = cip->max_code;
7318 const uint32_t chr_code_end = max_code + 1 + cip->name_ct;
7319 uint32_t new_chr_ct = 0;
7320 for (uint32_t chr_idx = 0; chr_idx != chr_code_end; ++chr_idx) {
7321 const uint32_t cur_chr_size = chr_idx_to_size[chr_idx];
7322 if (cur_chr_size) {
7323 ++new_chr_ct;
7324 }
7325 }
7326 // bugfix (25 Nov 2019): must add 1 for chr_fo_vidx_start
7327 if (bigstack_alloc_u32(new_chr_ct, &(write_cip->chr_file_order)) ||
7328 bigstack_alloc_u32(new_chr_ct + 1, &(write_cip->chr_fo_vidx_start))) {
7329 return 1;
7330 }
7331 write_cip->chr_ct = new_chr_ct;
7332 // now for the actual sorting.
7333 // autosomes and PAR1/X/PAR2/Y/XY/MT come first, then contig names.
7334 const uint32_t autosome_ct = cip->autosome_ct;
7335 const uint32_t xymt_ct = max_code - autosome_ct;
7336 const uint32_t autosome_ct_p1 = autosome_ct + 1;
7337
7338 STD_ARRAY_KREF(uint32_t, kChrOffsetCt) xymt_codes = cip->xymt_codes;
7339 const uintptr_t xymt_idx_to_chr_sort_offset[kChrOffsetCt] = {1, 3, 4, 5, 0, 2};
7340
7341 // chr_sort_idx in high bits, original chr_idx in low
7342 uint64_t* std_sortbuf;
7343 uint64_t* std_sortbuf_iter;
7344 if (bigstack_alloc_u64(max_code + 1, &std_sortbuf)) {
7345 return 1;
7346 }
7347 std_sortbuf_iter = std_sortbuf;
7348 for (uintptr_t chr_idx = 0; chr_idx <= autosome_ct; ++chr_idx) {
7349 if (chr_idx_to_size[chr_idx]) {
7350 *std_sortbuf_iter++ = chr_idx * 0x100000001LLU;
7351 }
7352 }
7353 for (uint32_t xymt_idx = 0; xymt_idx != xymt_ct; ++xymt_idx) {
7354 const uint32_t xymt_code = xymt_codes[xymt_idx];
7355 if (!IsI32Neg(xymt_code)) {
7356 if (chr_idx_to_size[xymt_idx + autosome_ct_p1]) {
7357 *std_sortbuf_iter++ = (S_CAST(uint64_t, xymt_idx_to_chr_sort_offset[xymt_idx] + autosome_ct_p1) << 32) | (xymt_idx + autosome_ct_p1);
7358 }
7359 }
7360 }
7361 const uint32_t std_sortbuf_len = std_sortbuf_iter - std_sortbuf;
7362 STD_SORT(std_sortbuf_len, u64cmp, std_sortbuf);
7363 uint32_t write_vidx = 0;
7364 write_cip->chr_fo_vidx_start[0] = 0;
7365 for (uint32_t new_chr_fo_idx = 0; new_chr_fo_idx != std_sortbuf_len; ++new_chr_fo_idx) {
7366 const uint64_t cur_entry = std_sortbuf[new_chr_fo_idx];
7367 const uintptr_t chr_idx = S_CAST(uint32_t, cur_entry);
7368 const uint32_t chr_size = chr_idx_to_size[chr_idx];
7369 write_cip->chr_file_order[new_chr_fo_idx] = chr_idx;
7370 write_vidx += chr_size;
7371 write_cip->chr_fo_vidx_start[new_chr_fo_idx + 1] = write_vidx;
7372 write_cip->chr_idx_to_foidx[chr_idx] = new_chr_fo_idx;
7373 }
7374
7375 const uint32_t new_nonstd_ct = new_chr_ct - std_sortbuf_len;
7376 if (new_nonstd_ct) {
7377 StrSortIndexedDeref* nonstd_sort_buf = S_CAST(StrSortIndexedDeref*, bigstack_alloc_raw_rd(new_nonstd_ct * sizeof(StrSortIndexedDeref)));
7378 if (!nonstd_sort_buf) {
7379 return 1;
7380 }
7381 const char** nonstd_names = cip->nonstd_names;
7382 uint32_t str_idx = 0;
7383 for (uint32_t chr_idx = max_code + 1; chr_idx != chr_code_end; ++chr_idx) {
7384 if (chr_idx_to_size[chr_idx]) {
7385 nonstd_sort_buf[str_idx].strptr = nonstd_names[chr_idx];
7386 nonstd_sort_buf[str_idx].orig_idx = chr_idx;
7387 ++str_idx;
7388 }
7389 }
7390 assert(str_idx == new_nonstd_ct);
7391 // nonstd_names are not allocated in main workspace, so can't overread.
7392 StrptrArrSortMain(new_nonstd_ct, 0, use_nsort, nonstd_sort_buf);
7393 uint32_t new_chr_fo_idx = std_sortbuf_len;
7394 for (str_idx = 0; str_idx != new_nonstd_ct; ++str_idx, ++new_chr_fo_idx) {
7395 const uint32_t chr_idx = nonstd_sort_buf[str_idx].orig_idx;
7396 const uint32_t chr_size = chr_idx_to_size[chr_idx];
7397 write_cip->chr_file_order[new_chr_fo_idx] = chr_idx;
7398 write_vidx += chr_size;
7399 write_cip->chr_fo_vidx_start[new_chr_fo_idx + 1] = write_vidx;
7400 write_cip->chr_idx_to_foidx[chr_idx] = new_chr_fo_idx;
7401 }
7402 }
7403 BigstackReset(std_sortbuf);
7404 return 0;
7405 }
7406
7407 // hybrid of WriteMapOrBim() and write_pvar_resorted()
7408 PglErr WriteBimResorted(const char* outname, const ChrInfo* write_cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const double* variant_cms, const uint32_t* new_variant_idx_to_old, uint32_t variant_ct, uint32_t max_allele_slen, uint32_t output_zst, uint32_t thread_ct) {
7409 // allele_presents must be nullptr unless we're trimming alt alleles
7410 unsigned char* bigstack_mark = g_bigstack_base;
7411 char* cswritep = nullptr;
7412 CompressStreamState css;
7413 PglErr reterr = kPglRetSuccess;
7414 PreinitCstream(&css);
7415 {
7416 const uint32_t max_chr_blen = GetMaxChrSlen(write_cip) + 1;
7417 // includes trailing tab
7418 char* chr_buf;
7419
7420 if (unlikely(bigstack_alloc_c(max_chr_blen, &chr_buf))) {
7421 goto WriteBimResorted_ret_NOMEM;
7422 }
7423 const uintptr_t overflow_buf_size = kCompressStreamBlock + kMaxIdSlen + 512 + 2 * max_allele_slen;
7424 reterr = InitCstreamAlloc(outname, 0, output_zst, thread_ct, overflow_buf_size, &css, &cswritep);
7425 if (unlikely(reterr)) {
7426 goto WriteBimResorted_ret_1;
7427 }
7428
7429 const char output_missing_geno_char = *g_output_missing_geno_ptr;
7430 uint32_t chr_fo_idx = UINT32_MAX;
7431 uint32_t chr_end = 0;
7432 uint32_t chr_buf_blen = 0;
7433 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
7434 const uint32_t variant_uidx = new_variant_idx_to_old[variant_idx];
7435 if (variant_idx >= chr_end) {
7436 do {
7437 ++chr_fo_idx;
7438 chr_end = write_cip->chr_fo_vidx_start[chr_fo_idx + 1];
7439 } while (variant_idx >= chr_end);
7440 char* chr_name_end = chrtoa(write_cip, write_cip->chr_file_order[chr_fo_idx], chr_buf);
7441 *chr_name_end = '\t';
7442 chr_buf_blen = 1 + S_CAST(uintptr_t, chr_name_end - chr_buf);
7443 }
7444 cswritep = memcpya(cswritep, chr_buf, chr_buf_blen);
7445 cswritep = strcpyax(cswritep, variant_ids[variant_uidx], '\t');
7446 if (!variant_cms) {
7447 *cswritep++ = '0';
7448 } else {
7449 cswritep = dtoa_g_p8(variant_cms[variant_uidx], cswritep);
7450 }
7451 *cswritep++ = '\t';
7452 cswritep = u32toa(variant_bps[variant_uidx], cswritep);
7453 *cswritep++ = '\t';
7454 const uintptr_t allele_idx_offset_base = allele_idx_offsets? allele_idx_offsets[variant_uidx] : (variant_uidx * 2);
7455 const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
7456 // note that VCF ref allele corresponds to A2, not A1
7457 if (!refalt1_select) {
7458 if ((!allele_presents) || IsSet(allele_presents, 1 + allele_idx_offset_base)) {
7459 cswritep = strcpya(cswritep, cur_alleles[1]);
7460 } else {
7461 *cswritep++ = output_missing_geno_char;
7462 }
7463 *cswritep++ = '\t';
7464 cswritep = strcpya(cswritep, cur_alleles[0]);
7465 } else {
7466 STD_ARRAY_KREF(AlleleCode, 2) cur_refalt1_select = refalt1_select[variant_uidx];
7467 if ((!allele_presents) || IsSet(allele_presents, cur_refalt1_select[1] + allele_idx_offset_base)) {
7468 cswritep = strcpya(cswritep, cur_alleles[cur_refalt1_select[1]]);
7469 } else {
7470 *cswritep++ = output_missing_geno_char;
7471 }
7472 *cswritep++ = '\t';
7473 cswritep = strcpya(cswritep, cur_alleles[cur_refalt1_select[0]]);
7474 }
7475 AppendBinaryEoln(&cswritep);
7476 if (unlikely(Cswrite(&css, &cswritep))) {
7477 goto WriteBimResorted_ret_WRITE_FAIL;
7478 }
7479 }
7480 if (unlikely(CswriteCloseNull(&css, cswritep))) {
7481 goto WriteBimResorted_ret_WRITE_FAIL;
7482 }
7483 }
7484 while (0) {
7485 WriteBimResorted_ret_NOMEM:
7486 reterr = kPglRetNomem;
7487 break;
7488 WriteBimResorted_ret_WRITE_FAIL:
7489 reterr = kPglRetWriteFail;
7490 break;
7491 }
7492 WriteBimResorted_ret_1:
7493 CswriteCloseCond(&css, cswritep);
7494 BigstackReset(bigstack_mark);
7495 return reterr;
7496 }
7497
PvarInfoReloadInterval(const uint32_t * old_variant_uidx_to_new,uint32_t variant_idx_start,uint32_t variant_idx_end,TextStream * pvar_reload_txsp,char ** pvar_info_strs)7498 PglErr PvarInfoReloadInterval(const uint32_t* old_variant_uidx_to_new, uint32_t variant_idx_start, uint32_t variant_idx_end, TextStream* pvar_reload_txsp, char** pvar_info_strs) {
7499 // We assume the batch size was chosen such that there's no risk of
7500 // scribbling past g_bigstack_end (barring pathological cases like another
7501 // process modifying the .pvar file after initial load).
7502 // We also assume no more dynamic allocations are needed after this;
7503 // otherwise, str_store_iter should be returned.
7504 char* line_iter;
7505 // probable todo: avoid rewind when one batch is entirely after the previous
7506 // batch (this is likely when input was already almost-sorted, and just a few
7507 // coordinates changed due to e.g. --normalize)
7508 PglErr reterr = TextRewind(pvar_reload_txsp);
7509 if (unlikely(reterr)) {
7510 return reterr;
7511 }
7512 const uint32_t cur_batch_size = variant_idx_end - variant_idx_start;
7513 char* str_store_iter = R_CAST(char*, g_bigstack_base);
7514 uint32_t info_col_idx;
7515 reterr = PvarInfoReloadHeader(pvar_reload_txsp, &line_iter, &info_col_idx);
7516 if (unlikely(reterr)) {
7517 return reterr;
7518 }
7519 uint32_t variant_idx = 0;
7520 for (uint32_t variant_uidx = 0; ; ++variant_uidx) {
7521 reterr = TextNextLineLstrip(pvar_reload_txsp, &line_iter);
7522 if (unlikely(reterr)) {
7523 return reterr;
7524 }
7525 const uint32_t new_variant_idx_offset = old_variant_uidx_to_new[variant_uidx] - variant_idx_start;
7526 // exploit wraparound, UINT32_MAX null value
7527 if (new_variant_idx_offset >= cur_batch_size) {
7528 continue;
7529 }
7530 line_iter = NextTokenMultFar(line_iter, info_col_idx);
7531 if (!line_iter) {
7532 return kPglRetRewindFail;
7533 }
7534 char* info_end = CurTokenEnd(line_iter);
7535 const uint32_t info_slen = info_end - line_iter;
7536 pvar_info_strs[new_variant_idx_offset] = str_store_iter;
7537 str_store_iter = memcpyax(str_store_iter, line_iter, info_slen, '\0');
7538 line_iter = info_end;
7539 if (++variant_idx == cur_batch_size) {
7540 break;
7541 }
7542 }
7543 assert(str_store_iter <= R_CAST(char*, g_bigstack_end));
7544 return kPglRetSuccess;
7545 }
7546
7547 // could be BoolErr
7548 PglErr WritePvarResortedInterval(const ChrInfo* write_cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* qual_present, const float* quals, const uintptr_t* filter_present, const uintptr_t* filter_npass, const char* const* filter_storage, const uintptr_t* nonref_flags, const double* variant_cms, const uint32_t* new_variant_idx_to_old, uint32_t variant_idx_start, uint32_t variant_idx_end, uint32_t info_pr_flag_present, uint32_t write_qual, uint32_t write_filter, uint32_t write_info, uint32_t all_nonref, uint32_t write_cm, char** pvar_info_strs, CompressStreamState* cssp, char** cswritepp, uint32_t* chr_fo_idxp, uint32_t* chr_endp, uint32_t* chr_buf_blenp, char* chr_buf) {
7549 char* cswritep = *cswritepp;
7550 uint32_t chr_fo_idx = *chr_fo_idxp;
7551 uint32_t chr_end = *chr_endp;
7552 uint32_t chr_buf_blen = *chr_buf_blenp;
7553 PglErr reterr = kPglRetSuccess;
7554 {
7555 const char output_missing_geno_char = *g_output_missing_geno_ptr;
7556 uint32_t ref_allele_idx = 0;
7557 uint32_t alt1_allele_idx = 1;
7558 uint32_t cur_allele_ct = 2;
7559 for (uint32_t variant_idx = variant_idx_start; variant_idx != variant_idx_end; ++variant_idx) {
7560 const uint32_t variant_uidx = new_variant_idx_to_old[variant_idx];
7561 if (variant_idx == chr_end) {
7562 ++chr_fo_idx;
7563 chr_end = write_cip->chr_fo_vidx_start[chr_fo_idx + 1];
7564 assert(variant_idx < chr_end);
7565 char* chr_name_end = chrtoa(write_cip, write_cip->chr_file_order[chr_fo_idx], chr_buf);
7566 *chr_name_end = '\t';
7567 chr_buf_blen = 1 + S_CAST(uintptr_t, chr_name_end - chr_buf);
7568 }
7569 cswritep = memcpya(cswritep, chr_buf, chr_buf_blen);
7570 cswritep = u32toa_x(variant_bps[variant_uidx], '\t', cswritep);
7571 cswritep = strcpyax(cswritep, variant_ids[variant_uidx], '\t');
7572 uintptr_t allele_idx_offset_base;
7573 if (!allele_idx_offsets) {
7574 allele_idx_offset_base = variant_uidx * 2;
7575 } else {
7576 allele_idx_offset_base = allele_idx_offsets[variant_uidx];
7577 cur_allele_ct = allele_idx_offsets[variant_uidx + 1] - allele_idx_offset_base;
7578 }
7579 const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
7580 if (refalt1_select) {
7581 ref_allele_idx = refalt1_select[variant_uidx][0];
7582 alt1_allele_idx = refalt1_select[variant_uidx][1];
7583 }
7584 cswritep = strcpyax(cswritep, cur_alleles[ref_allele_idx], '\t');
7585 uint32_t alt_allele_written = 0;
7586 if ((!allele_presents) || IsSet(allele_presents, allele_idx_offset_base + alt1_allele_idx)) {
7587 cswritep = strcpya(cswritep, cur_alleles[alt1_allele_idx]);
7588 alt_allele_written = 1;
7589 }
7590 if (unlikely(Cswrite(cssp, &cswritep))) {
7591 goto WritePvarResortedInterval_ret_WRITE_FAIL;
7592 }
7593 if (cur_allele_ct > 2) {
7594 for (uint32_t allele_idx = 0; allele_idx != cur_allele_ct; ++allele_idx) {
7595 if ((allele_idx == ref_allele_idx) || (allele_idx == alt1_allele_idx) || (allele_presents && (!IsSet(allele_presents, allele_idx_offset_base + allele_idx)))) {
7596 continue;
7597 }
7598 if (alt_allele_written) {
7599 *cswritep++ = ',';
7600 }
7601 alt_allele_written = 1;
7602 cswritep = strcpya(cswritep, cur_alleles[allele_idx]);
7603 if (unlikely(Cswrite(cssp, &cswritep))) {
7604 goto WritePvarResortedInterval_ret_WRITE_FAIL;
7605 }
7606 }
7607 }
7608 if (!alt_allele_written) {
7609 *cswritep++ = output_missing_geno_char;
7610 }
7611
7612 if (write_qual) {
7613 *cswritep++ = '\t';
7614 if (!IsSet(qual_present, variant_uidx)) {
7615 *cswritep++ = '.';
7616 } else {
7617 cswritep = ftoa_g(quals[variant_uidx], cswritep);
7618 }
7619 }
7620
7621 if (write_filter) {
7622 *cswritep++ = '\t';
7623 if (!IsSet(filter_present, variant_uidx)) {
7624 *cswritep++ = '.';
7625 } else if (!IsSet(filter_npass, variant_uidx)) {
7626 cswritep = strcpya_k(cswritep, "PASS");
7627 } else {
7628 cswritep = strcpya(cswritep, filter_storage[variant_uidx]);
7629 }
7630 }
7631
7632 if (write_info) {
7633 *cswritep++ = '\t';
7634 const uint32_t is_pr = all_nonref || (nonref_flags && IsSet(nonref_flags, variant_uidx));
7635 if (pvar_info_strs) {
7636 PvarInfoWrite(info_pr_flag_present, is_pr, pvar_info_strs[variant_idx - variant_idx_start], &cswritep);
7637 } else {
7638 if (is_pr) {
7639 cswritep = strcpya_k(cswritep, "PR");
7640 } else {
7641 *cswritep++ = '.';
7642 }
7643 }
7644 }
7645
7646 if (write_cm) {
7647 *cswritep++ = '\t';
7648 if (!variant_cms) {
7649 *cswritep++ = '0';
7650 } else {
7651 cswritep = dtoa_g_p8(variant_cms[variant_uidx], cswritep);
7652 }
7653 }
7654 AppendBinaryEoln(&cswritep);
7655 }
7656
7657 }
7658 while (0) {
7659 WritePvarResortedInterval_ret_WRITE_FAIL:
7660 reterr = kPglRetWriteFail;
7661 break;
7662 }
7663 *cswritepp = cswritep;
7664 *chr_fo_idxp = chr_fo_idx;
7665 *chr_endp = chr_end;
7666 *chr_buf_blenp = chr_buf_blen;
7667 return reterr;
7668 }
7669
7670 // allele_presents must be nullptr unless we're trimming alt alleles.
7671 //
7672 // The annoying part of this is handling a sequence of INFO strings that don't
7673 // fit in memory; we use a multipass approach for that. File creation,
7674 // allocation of buffers, and generating the header line occurs directly in
7675 // this function, while loading the next pvar_info_strs batch and writing the
7676 // next .pvar line batch are one level down.
7677 PglErr WritePvarResorted(const char* outname, const uintptr_t* variant_include, const ChrInfo* write_cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* qual_present, const float* quals, const uintptr_t* filter_present, const uintptr_t* filter_npass, const char* const* filter_storage, const uintptr_t* nonref_flags, const char* pvar_info_reload, const double* variant_cms, const uint32_t* new_variant_idx_to_old, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_slen, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t nonref_flags_storage, uint32_t max_filter_slen, uint32_t info_reload_slen, PvarPsamFlags pvar_psam_flags, uint32_t thread_ct, char* xheader) {
7678 unsigned char* bigstack_mark = g_bigstack_base;
7679 char* cswritep = nullptr;
7680 PglErr reterr = kPglRetSuccess;
7681 CompressStreamState css;
7682 TextStream pvar_reload_txs;
7683 PreinitCstream(&css);
7684 PreinitTextStream(&pvar_reload_txs);
7685 {
7686 const uint32_t max_chr_blen = GetMaxChrSlen(write_cip) + 1;
7687 // includes trailing tab
7688 char* chr_buf;
7689
7690 if (unlikely(
7691 bigstack_alloc_c(max_chr_blen, &chr_buf))) {
7692 goto WritePvarResorted_ret_NOMEM;
7693 }
7694 uintptr_t overflow_buf_size = kCompressStreamBlock + kMaxIdSlen + 512 + 2 * max_allele_slen + max_filter_slen + info_reload_slen;
7695 if (overflow_buf_size < 2 * kCompressStreamBlock) {
7696 overflow_buf_size = 2 * kCompressStreamBlock;
7697 }
7698 const uint32_t output_zst = (pvar_psam_flags / kfPvarZs) & 1;
7699 reterr = InitCstreamAlloc(outname, 0, output_zst, thread_ct, overflow_buf_size, &css, &cswritep);
7700 if (unlikely(reterr)) {
7701 goto WritePvarResorted_ret_1;
7702 }
7703 const uint32_t raw_variant_ctl = BitCtToWordCt(raw_variant_ct);
7704 const uint32_t all_nonref = (nonref_flags_storage == 2);
7705 uint32_t write_info_pr = all_nonref;
7706 uint32_t write_info = (pvar_psam_flags & kfPvarColInfo) || pvar_info_reload;
7707 if (write_info && nonref_flags) {
7708 write_info_pr = !IntersectionIsEmpty(variant_include, nonref_flags, raw_variant_ctl);
7709 }
7710 write_info_pr = write_info_pr && write_info;
7711 if (unlikely(write_info_pr && (info_flags & kfInfoPrNonflagPresent))) {
7712 logputs("\n");
7713 logerrputs("Error: Conflicting INFO:PR definitions. Either fix all REF alleles so that the\n'provisional reference' flag is no longer needed, or remove/rename the other\nuse of the INFO:PR key.\n");
7714 goto WritePvarResorted_ret_INCONSISTENT_INPUT;
7715 }
7716
7717 uint32_t write_filter = 0;
7718 if (pvar_psam_flags & kfPvarColFilter) {
7719 write_filter = 1;
7720 } else if ((pvar_psam_flags & kfPvarColMaybefilter) && filter_present) {
7721 write_filter = !IntersectionIsEmpty(variant_include, filter_present, raw_variant_ctl);
7722 }
7723 const uint32_t info_pr_flag_present = (info_flags / kfInfoPrFlagPresent) & 1;
7724 if (pvar_psam_flags & (kfPvarColXheader | kfPvarColVcfheader)) {
7725 reterr = PvarXheaderWrite(nullptr, write_cip, variant_bps, allele_idx_offsets, allele_storage, new_variant_idx_to_old, xheader_blen, (pvar_psam_flags / kfPvarColVcfheader) & 1, write_filter, write_info, write_info_pr && (!info_pr_flag_present), max_allele_slen, kfUnsortedVar0, xheader, &css, &cswritep);
7726 if (unlikely(reterr)) {
7727 goto WritePvarResorted_ret_1;
7728 }
7729 }
7730 if (write_cip->chrset_source) {
7731 AppendChrsetLine(write_cip, &cswritep);
7732 }
7733 cswritep = strcpya_k(cswritep, "#CHROM\tPOS\tID\tREF\tALT");
7734
7735 uint32_t write_qual = 0;
7736 if (pvar_psam_flags & kfPvarColQual) {
7737 write_qual = 1;
7738 } else if ((pvar_psam_flags & kfPvarColMaybequal) && qual_present) {
7739 write_qual = !IntersectionIsEmpty(variant_include, qual_present, raw_variant_ctl);
7740 }
7741 if (write_qual) {
7742 cswritep = strcpya_k(cswritep, "\tQUAL");
7743 }
7744
7745 if (write_filter) {
7746 cswritep = strcpya_k(cswritep, "\tFILTER");
7747 }
7748
7749 if (write_info) {
7750 cswritep = strcpya_k(cswritep, "\tINFO");
7751 }
7752
7753 uint32_t write_cm = 0;
7754 if (pvar_psam_flags & kfPvarColCm) {
7755 write_cm = 1;
7756 } else if ((pvar_psam_flags & kfPvarColMaybecm) && variant_cms) {
7757 if (raw_variant_ct == variant_ct) {
7758 // nonzero_cm_present check was performed
7759 write_cm = 1;
7760 } else {
7761 uintptr_t variant_uidx_base = 0;
7762 uintptr_t cur_bits = variant_include[0];
7763 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
7764 const uintptr_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
7765 if (variant_cms[variant_uidx] != 0.0) {
7766 write_cm = 1;
7767 break;
7768 }
7769 }
7770 }
7771 }
7772 if (write_cm) {
7773 cswritep = strcpya_k(cswritep, "\tCM");
7774 }
7775 AppendBinaryEoln(&cswritep);
7776
7777 uint32_t* old_variant_uidx_to_new = nullptr;
7778 char** pvar_info_strs = nullptr;
7779 uint32_t batch_size = variant_ct;
7780 uint32_t batch_ct = 1;
7781 if (pvar_info_reload) {
7782 if (unlikely(bigstack_alloc_u32(raw_variant_ct, &old_variant_uidx_to_new))) {
7783 goto WritePvarResorted_ret_NOMEM;
7784 }
7785 SetAllU32Arr(raw_variant_ct, old_variant_uidx_to_new);
7786 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
7787 const uint32_t old_variant_uidx = new_variant_idx_to_old[variant_idx];
7788 old_variant_uidx_to_new[old_variant_uidx] = variant_idx;
7789 }
7790
7791 uint32_t decompress_thread_ct = 1;
7792 if (!output_zst) {
7793 decompress_thread_ct = thread_ct - 1;
7794 if (!decompress_thread_ct) {
7795 decompress_thread_ct = 1;
7796 }
7797 }
7798 reterr = SizeAndInitTextStream(pvar_info_reload, bigstack_left() / 4, decompress_thread_ct, &pvar_reload_txs);
7799 if (unlikely(reterr)) {
7800 goto WritePvarResorted_ret_TSTREAM_FAIL;
7801 }
7802
7803 // subtract kCacheline to allow for rounding
7804 uintptr_t bytes_left = bigstack_left() - kCacheline;
7805 uint32_t single_variant_byte_ct = info_reload_slen + 1 + sizeof(intptr_t);
7806 if (variant_ct * single_variant_byte_ct > bytes_left) {
7807 batch_size = bytes_left / single_variant_byte_ct;
7808 batch_ct = 1 + (variant_ct - 1) / batch_size;
7809 }
7810 pvar_info_strs = S_CAST(char**, bigstack_alloc_raw_rd(batch_size * sizeof(intptr_t)));
7811 }
7812
7813 uint32_t variant_idx_start = 0;
7814 uint32_t chr_fo_idx = UINT32_MAX;
7815 uint32_t chr_end = 0;
7816 uint32_t chr_buf_blen = 0;
7817 uint32_t pct = 0;
7818 uint32_t next_print_variant_idx = variant_ct / 100;
7819 fputs("0%", stdout);
7820 fflush(stdout);
7821 for (uint32_t batch_idx = 0; batch_idx != batch_ct; ++batch_idx) {
7822 if (variant_idx_start >= next_print_variant_idx) {
7823 if (pct > 10) {
7824 putc_unlocked('\b', stdout);
7825 }
7826 pct = (variant_idx_start * 100LLU) / variant_ct;
7827 printf("\b\b%u%%", pct++);
7828 fflush(stdout);
7829 next_print_variant_idx = (pct * S_CAST(uint64_t, variant_ct)) / 100;
7830 }
7831 uint32_t variant_idx_end = MINV(variant_idx_start + batch_size, variant_ct);
7832 if (pvar_info_reload) {
7833 reterr = PvarInfoReloadInterval(old_variant_uidx_to_new, variant_idx_start, variant_idx_end, &pvar_reload_txs, pvar_info_strs);
7834 if (unlikely(reterr)) {
7835 goto WritePvarResorted_ret_TSTREAM_FAIL;
7836 }
7837 }
7838 reterr = WritePvarResortedInterval(write_cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, qual_present, quals, filter_present, filter_npass, filter_storage, nonref_flags, variant_cms, new_variant_idx_to_old, variant_idx_start, variant_idx_end, info_pr_flag_present, write_qual, write_filter, write_info, all_nonref, write_cm, pvar_info_strs, &css, &cswritep, &chr_fo_idx, &chr_end, &chr_buf_blen, chr_buf);
7839 if (unlikely(reterr)) {
7840 goto WritePvarResorted_ret_1;
7841 }
7842 variant_idx_start = variant_idx_end;
7843 }
7844
7845 if (unlikely(CswriteCloseNull(&css, cswritep))) {
7846 goto WritePvarResorted_ret_WRITE_FAIL;
7847 }
7848 if (pct > 10) {
7849 putc_unlocked('\b', stdout);
7850 }
7851 fputs("\b\b", stdout);
7852 }
7853 while (0) {
7854 WritePvarResorted_ret_NOMEM:
7855 reterr = kPglRetNomem;
7856 break;
7857 WritePvarResorted_ret_TSTREAM_FAIL:
7858 TextStreamErrPrint(pvar_info_reload, &pvar_reload_txs);
7859 break;
7860 WritePvarResorted_ret_WRITE_FAIL:
7861 reterr = kPglRetWriteFail;
7862 break;
7863 WritePvarResorted_ret_INCONSISTENT_INPUT:
7864 reterr = kPglRetInconsistentInput;
7865 break;
7866 }
7867 WritePvarResorted_ret_1:
7868 CswriteCloseCond(&css, cswritep);
7869 CleanupTextStream2(pvar_info_reload, &pvar_reload_txs, &reterr);
7870 BigstackReset(bigstack_mark);
7871 return reterr;
7872 }
7873
7874 PglErr MakePlink2Vsort(const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const uint32_t* new_sample_idx_to_old, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* pvar_qual_present, const float* pvar_quals, const uintptr_t* pvar_filter_present, const uintptr_t* pvar_filter_npass, const char* const* pvar_filter_storage, const char* pvar_info_reload, const double* variant_cms, const ChrIdx* chr_idxs, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_ct, uint32_t max_allele_slen, uint32_t max_filter_slen, uint32_t info_reload_slen, uint32_t max_thread_ct, uint32_t hard_call_thresh, uint32_t dosage_erase_thresh, MakePlink2Flags make_plink2_flags, uint32_t use_nsort, PvarPsamFlags pvar_psam_flags, char* xheader, PgenReader* simple_pgrp, char* outname, char* outname_end) {
7875 unsigned char* bigstack_mark = g_bigstack_base;
7876 unsigned char* bigstack_end_mark = g_bigstack_end;
7877 PglErr reterr = kPglRetSuccess;
7878 {
7879 // Resort the variants.
7880 // 1. (todo) Apply --update-chr if necessary.
7881 // 2. Count number of remaining variants in each chromosome, then sort the
7882 // chromosomes.
7883 // 3. Within each chromosome, sort by position. Could add 0.5 for
7884 // non-SNPs (not currently implemented)? Could multithread this by
7885 // chromosome, and/or use C++17 multithreaded sort, but INFO-reload is a
7886 // much bigger bottleneck in practice.
7887 // 4. Scan for position ties, sort on ID (according to --sort-vars setting,
7888 // defaults to natural-sort but can be ASCII).
7889 // 5. Fill new_variant_idx_to_old, free sort buffers.
7890
7891 // possible todo: put this in a "copy constructor" function
7892 ChrInfo write_chr_info;
7893
7894 write_chr_info.haploid_mask = K_CAST(uintptr_t*, cip->haploid_mask);
7895 write_chr_info.nonstd_names = K_CAST(const char**, cip->nonstd_names);
7896 write_chr_info.nonstd_id_htable = K_CAST(uint32_t*, cip->nonstd_id_htable);
7897 write_chr_info.chrset_source = cip->chrset_source;
7898 memcpy(write_chr_info.chr_exclude, cip->chr_exclude, kChrExcludeWords * sizeof(intptr_t));
7899 STD_ARRAY_COPY(cip->xymt_codes, kChrOffsetCt, write_chr_info.xymt_codes);
7900 write_chr_info.max_numeric_code = cip->max_numeric_code;
7901 write_chr_info.max_code = cip->max_code;
7902 write_chr_info.autosome_ct = cip->autosome_ct;
7903 write_chr_info.zero_extra_chrs = cip->zero_extra_chrs;
7904 write_chr_info.name_ct = cip->name_ct;
7905 write_chr_info.incl_excl_name_stack = K_CAST(LlStr*, cip->incl_excl_name_stack);
7906 write_chr_info.is_include_stack = cip->is_include_stack;
7907 write_chr_info.output_encoding = cip->output_encoding;
7908
7909 const uint32_t chr_code_end = cip->max_code + 1 + cip->name_ct;
7910 uint32_t* chr_idx_to_size;
7911 if (unlikely(
7912 bigstack_calloc_w(kChrMaskWords, &write_chr_info.chr_mask) ||
7913 bigstack_alloc_u32(chr_code_end, &write_chr_info.chr_idx_to_foidx) ||
7914 bigstack_end_calloc_u32(chr_code_end, &chr_idx_to_size))) {
7915 goto MakePlink2Vsort_ret_NOMEM;
7916 }
7917 SetAllU32Arr(chr_code_end, write_chr_info.chr_idx_to_foidx);
7918 if (chr_idxs) {
7919 uintptr_t variant_uidx_base = 0;
7920 uintptr_t cur_base = variant_include[0];
7921 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
7922 const uintptr_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_base);
7923 chr_idx_to_size[chr_idxs[variant_uidx]] += 1;
7924 }
7925 for (uint32_t chr_idx = 0; chr_idx != chr_code_end; ++chr_idx) {
7926 if (chr_idx_to_size[chr_idx]) {
7927 SetBit(chr_idx, write_chr_info.chr_mask);
7928 }
7929 }
7930 // bugfix: chr_file_order is invalid
7931 } else {
7932 const uint32_t* chr_fo_vidx_start = cip->chr_fo_vidx_start;
7933 const uint32_t orig_chr_ct = cip->chr_ct;
7934 uint32_t vidx_start = 0;
7935 for (uint32_t chr_fo_idx = 0; chr_fo_idx != orig_chr_ct; ++chr_fo_idx) {
7936 const uint32_t vidx_end = chr_fo_vidx_start[chr_fo_idx + 1];
7937 const uint32_t chr_idx = cip->chr_file_order[chr_fo_idx];
7938 chr_idx_to_size[chr_idx] = PopcountBitRange(variant_include, vidx_start, vidx_end);
7939 if (chr_idx_to_size[chr_idx]) {
7940 SetBit(chr_idx, write_chr_info.chr_mask);
7941 }
7942 vidx_start = vidx_end;
7943 }
7944 }
7945 if (unlikely(SortChr(cip, chr_idx_to_size, use_nsort, &write_chr_info))) {
7946 goto MakePlink2Vsort_ret_NOMEM;
7947 }
7948
7949 uint32_t* new_variant_idx_to_old;
7950
7951 // pos_vidx_sort_buf has variant_bp in high bits, variant_uidx in low
7952 uint64_t* pos_vidx_sort_buf;
7953 if (unlikely(
7954 bigstack_alloc_u32(variant_ct, &new_variant_idx_to_old) ||
7955 bigstack_alloc_u64(variant_ct + 1, &pos_vidx_sort_buf))) {
7956 goto MakePlink2Vsort_ret_NOMEM;
7957 }
7958 pos_vidx_sort_buf[variant_ct] = ~0LLU;
7959 const uint32_t new_chr_ct = write_chr_info.chr_ct;
7960 if (chr_idxs) {
7961 uint32_t* next_write_vidxs;
7962 if (unlikely(bigstack_alloc_u32(chr_code_end, &next_write_vidxs))) {
7963 goto MakePlink2Vsort_ret_NOMEM;
7964 }
7965 for (uint32_t new_chr_fo_idx = 0; new_chr_fo_idx != new_chr_ct; ++new_chr_fo_idx) {
7966 const uint32_t chr_idx = write_chr_info.chr_file_order[new_chr_fo_idx];
7967 next_write_vidxs[chr_idx] = write_chr_info.chr_fo_vidx_start[new_chr_fo_idx];
7968 }
7969 uintptr_t variant_uidx_base = 0;
7970 uintptr_t cur_bits = variant_include[0];
7971 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
7972 const uintptr_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
7973 const uint32_t chr_idx = chr_idxs[variant_uidx];
7974 const uint32_t write_vidx = next_write_vidxs[chr_idx];
7975 pos_vidx_sort_buf[write_vidx] = (S_CAST(uint64_t, variant_bps[variant_uidx]) << 32) | variant_uidx;
7976 next_write_vidxs[chr_idx] += 1;
7977 }
7978 BigstackReset(next_write_vidxs);
7979 } else {
7980 uint32_t old_chr_fo_idx = UINT32_MAX;
7981 uint32_t chr_end = 0;
7982 uintptr_t variant_uidx_base = 0;
7983 uintptr_t cur_bits = variant_include[0];
7984 uint32_t chr_idx = 0;
7985 uint32_t write_vidx = 0;
7986 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx, ++write_vidx) {
7987 const uint32_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
7988 if (variant_uidx >= chr_end) {
7989 do {
7990 ++old_chr_fo_idx;
7991 chr_end = cip->chr_fo_vidx_start[old_chr_fo_idx + 1];
7992 } while (variant_uidx >= chr_end);
7993 chr_idx = cip->chr_file_order[old_chr_fo_idx];
7994 // bugfix (8 Sep 2018): write_vidx was set to the wrong value here
7995 const uint32_t new_chr_fo_idx = write_chr_info.chr_idx_to_foidx[chr_idx];
7996 write_vidx = write_chr_info.chr_fo_vidx_start[new_chr_fo_idx];
7997 }
7998 pos_vidx_sort_buf[write_vidx] = (S_CAST(uint64_t, variant_bps[variant_uidx]) << 32) | variant_uidx;
7999 }
8000 }
8001
8002 StrSortIndexedDeref* same_pos_sort_buf = R_CAST(StrSortIndexedDeref*, g_bigstack_base);
8003 const uintptr_t same_pos_sort_buf_size = bigstack_left() / sizeof(StrSortIndexedDeref);
8004
8005 uint32_t vidx_start = 0;
8006 uint32_t* new_variant_idx_to_old_iter = new_variant_idx_to_old;
8007 for (uint32_t new_chr_fo_idx = 0; new_chr_fo_idx != new_chr_ct; ++new_chr_fo_idx) {
8008 const uint32_t vidx_end = write_chr_info.chr_fo_vidx_start[new_chr_fo_idx + 1];
8009 const uint32_t chr_size = vidx_end - vidx_start;
8010 const uint64_t post_entry = pos_vidx_sort_buf[vidx_end];
8011 pos_vidx_sort_buf[vidx_end] = ~0LLU; // simplify end-of-chromosome logic
8012 uint64_t* pos_vidx_sort_chr = &(pos_vidx_sort_buf[vidx_start]);
8013 STD_SORT_PAR_UNSEQ(chr_size, u64cmp, pos_vidx_sort_chr);
8014 uint32_t prev_pos = pos_vidx_sort_chr[0] >> 32;
8015 uint32_t prev_variant_uidx = S_CAST(uint32_t, pos_vidx_sort_chr[0]);
8016 uint32_t prev_cidx = 0;
8017 uint32_t cidx = 1;
8018 // is chr_size == 0 possible here? document if this code is revisited.
8019 for (; cidx < chr_size; ++cidx) {
8020 uint64_t cur_entry = pos_vidx_sort_chr[cidx];
8021 uint32_t cur_pos = cur_entry >> 32;
8022 if (cur_pos == prev_pos) {
8023 same_pos_sort_buf[0].strptr = variant_ids[prev_variant_uidx];
8024 same_pos_sort_buf[0].orig_idx = prev_variant_uidx;
8025 uint32_t equal_pos_ct = 1;
8026 const uint64_t* pos_vidx_sort_chr2 = &(pos_vidx_sort_chr[prev_cidx]);
8027 do {
8028 if (unlikely(equal_pos_ct >= same_pos_sort_buf_size)) {
8029 goto MakePlink2Vsort_ret_NOMEM;
8030 }
8031 const uint32_t variant_uidx = S_CAST(uint32_t, cur_entry);
8032 same_pos_sort_buf[equal_pos_ct].strptr = variant_ids[variant_uidx];
8033 same_pos_sort_buf[equal_pos_ct].orig_idx = variant_uidx;
8034 cur_entry = pos_vidx_sort_chr2[++equal_pos_ct];
8035 cur_pos = cur_entry >> 32;
8036 } while (cur_pos == prev_pos);
8037 StrptrArrSortMain(equal_pos_ct, 1, use_nsort, same_pos_sort_buf);
8038 for (uint32_t equal_pos_idx = 0; equal_pos_idx != equal_pos_ct; ++equal_pos_idx) {
8039 *new_variant_idx_to_old_iter++ = same_pos_sort_buf[equal_pos_idx].orig_idx;
8040 }
8041 cidx += equal_pos_ct - 1;
8042 } else {
8043 *new_variant_idx_to_old_iter++ = prev_variant_uidx;
8044 }
8045 prev_pos = cur_pos;
8046 prev_cidx = cidx;
8047 prev_variant_uidx = S_CAST(uint32_t, cur_entry);
8048 }
8049 if (cidx == chr_size) {
8050 // if [cidx - 1] is part of an identical-bp batch, cidx will actually
8051 // be chr_size + 1 after loop exit. It's equal to chr_size iff we
8052 // haven't written the last entry to new_variant_idx_to_old[].
8053 *new_variant_idx_to_old_iter++ = prev_variant_uidx;
8054 }
8055 vidx_start = vidx_end;
8056 pos_vidx_sort_buf[vidx_end] = post_entry;
8057 }
8058 BigstackReset(pos_vidx_sort_buf);
8059
8060 if (make_plink2_flags & kfMakeBim) {
8061 const uint32_t bim_zst = (make_plink2_flags / kfMakeBimZs) & 1;
8062 OutnameZstSet(".bim", bim_zst, outname_end);
8063 logprintfww5("Writing %s ... ", outname);
8064 fflush(stdout);
8065
8066 reterr = WriteBimResorted(outname, &write_chr_info, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, variant_cms, new_variant_idx_to_old, variant_ct, max_allele_slen, bim_zst, max_thread_ct);
8067 if (unlikely(reterr)) {
8068 goto MakePlink2Vsort_ret_1;
8069 }
8070 logputs("done.\n");
8071 }
8072 if (make_plink2_flags & kfMakePvar) {
8073 OutnameZstSet(".pvar", pvar_psam_flags & kfPvarZs, outname_end);
8074 logprintfww5("Writing %s ... ", outname);
8075 fflush(stdout);
8076 uint32_t nonref_flags_storage = 3;
8077 if (!PgrGetNonrefFlags(simple_pgrp)) {
8078 nonref_flags_storage = (PgrGetGflags(simple_pgrp) & kfPgenGlobalAllNonref)? 2 : 1;
8079 }
8080 reterr = WritePvarResorted(outname, variant_include, &write_chr_info, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, PgrGetNonrefFlags(simple_pgrp), pvar_info_reload, variant_cms, new_variant_idx_to_old, raw_variant_ct, variant_ct, max_allele_slen, xheader_blen, info_flags, nonref_flags_storage, max_filter_slen, info_reload_slen, pvar_psam_flags, max_thread_ct, xheader);
8081 if (unlikely(reterr)) {
8082 goto MakePlink2Vsort_ret_1;
8083 }
8084 logputs("done.\n");
8085 }
8086 if (make_plink2_flags & kfMakeFam) {
8087 snprintf(outname_end, kMaxOutfnameExtBlen, ".fam");
8088 logprintfww5("Writing %s ... ", outname);
8089 fflush(stdout);
8090 reterr = WriteFam(outname, sample_include, piip, sex_nm, sex_male, pheno_cols, new_sample_idx_to_old, sample_ct, pheno_ct, '\t');
8091 if (unlikely(reterr)) {
8092 goto MakePlink2Vsort_ret_1;
8093 }
8094 logputs("done.\n");
8095 }
8096 if (make_plink2_flags & kfMakePsam) {
8097 snprintf(outname_end, kMaxOutfnameExtBlen, ".psam");
8098 logprintfww5("Writing %s ... ", outname);
8099 fflush(stdout);
8100 reterr = WritePsam(outname, sample_include, piip, sex_nm, sex_male, pheno_cols, pheno_names, new_sample_idx_to_old, sample_ct, pheno_ct, max_pheno_name_blen, pvar_psam_flags);
8101 if (unlikely(reterr)) {
8102 goto MakePlink2Vsort_ret_1;
8103 }
8104 logputs("done.\n");
8105 }
8106 if (make_plink2_flags & (kfMakeBed | kfMakePgen)) {
8107 // boilerplate from start of MakePlink2NoVsort()
8108 if (make_plink2_flags & kfMakePlink2MMask) {
8109 logerrputs("Error: --make-bed/--make-[b]pgen multiallelics= is currently under development.\n");
8110 reterr = kPglRetNotYetSupported;
8111 goto MakePlink2Vsort_ret_1;
8112 }
8113 MakeCommon mc;
8114 mc.plink2_write_flags = kfPlink2Write0;
8115 mc.raw_sample_ct = raw_sample_ct;
8116 mc.sample_ct = sample_ct;
8117 uintptr_t* sex_male_collapsed = nullptr;
8118 uintptr_t* sex_female_collapsed = nullptr;
8119 if (make_plink2_flags & kfMakePlink2SetHhMissing) {
8120 const uint32_t sample_ctv = BitCtToVecCt(sample_ct);
8121 const uint32_t sample_ctl = BitCtToWordCt(sample_ct);
8122 uintptr_t* new_sex_male;
8123 if (unlikely(
8124 bigstack_alloc_w(sample_ctv * kWordsPerVec, &new_sex_male) ||
8125 bigstack_alloc_w(sample_ctv * kWordsPerVec, &mc.sex_male_collapsed_interleaved) ||
8126 bigstack_alloc_w(sample_ctv * kWordsPerVec, &sex_female_collapsed) ||
8127 bigstack_alloc_w(sample_ctv * kWordsPerVec, &mc.sex_female_collapsed_interleaved))) {
8128 goto MakePlink2Vsort_ret_NOMEM;
8129 }
8130 CopyBitarrSubset(sex_male, sample_include, sample_ct, new_sex_male);
8131 ZeroTrailingWords(sample_ctl, new_sex_male);
8132 sex_male_collapsed = new_sex_male;
8133 FillInterleavedMaskVec(sex_male_collapsed, sample_ctv, mc.sex_male_collapsed_interleaved);
8134
8135 CopyBitarrSubset(sex_nm, sample_include, sample_ct, sex_female_collapsed);
8136 BitvecInvmask(new_sex_male, sample_ctl, sex_female_collapsed);
8137 ZeroTrailingWords(sample_ctl, sex_female_collapsed);
8138 FillInterleavedMaskVec(sex_female_collapsed, sample_ctv, mc.sex_female_collapsed_interleaved);
8139
8140 mc.plink2_write_flags |= kfPlink2WriteSetHhMissing;
8141 } else {
8142 // defensive
8143 mc.sex_male_collapsed_interleaved = nullptr;
8144 mc.sex_female_collapsed_interleaved = nullptr;
8145 }
8146 if (make_plink2_flags & kfMakePlink2SetMixedMtMissing) {
8147 mc.plink2_write_flags |= kfPlink2WriteSetMixedMtMissing;
8148 }
8149 mc.cip = &write_chr_info;
8150 const uintptr_t* write_allele_idx_offsets = nullptr;
8151 if (allele_idx_offsets && (!(make_plink2_flags & kfMakePlink2EraseAlt2Plus))) {
8152 if ((variant_ct < raw_variant_ct) || new_variant_idx_to_old) {
8153 uintptr_t* new_allele_idx_offsets;
8154 if (unlikely(bigstack_alloc_w(variant_ct + 1, &new_allele_idx_offsets))) {
8155 goto MakePlink2Vsort_ret_NOMEM;
8156 }
8157 const uintptr_t final_offset = InitWriteAlleleIdxOffsets(variant_include, allele_idx_offsets, allele_presents, refalt1_select, new_variant_idx_to_old, variant_ct, new_allele_idx_offsets);
8158 if (final_offset != 2 * variant_ct) {
8159 new_allele_idx_offsets[variant_ct] = final_offset;
8160 write_allele_idx_offsets = new_allele_idx_offsets;
8161 } else {
8162 BigstackReset(new_allele_idx_offsets);
8163 }
8164 } else {
8165 write_allele_idx_offsets = allele_idx_offsets;
8166 }
8167 }
8168 reterr = MakePgenRobust(sample_include, new_sample_idx_to_old, variant_include, allele_idx_offsets, allele_presents, refalt1_select, write_allele_idx_offsets, new_variant_idx_to_old, sex_male_collapsed, sex_female_collapsed, raw_variant_ct, variant_ct, variant_ct, max_allele_ct, hard_call_thresh, dosage_erase_thresh, make_plink2_flags, &mc, simple_pgrp, outname, outname_end);
8169 if (unlikely(reterr)) {
8170 goto MakePlink2Vsort_ret_1;
8171 }
8172 }
8173 }
8174 while (0) {
8175 MakePlink2Vsort_ret_NOMEM:
8176 reterr = kPglRetNomem;
8177 break;
8178 }
8179 MakePlink2Vsort_ret_1:
8180 BigstackDoubleReset(bigstack_mark, bigstack_end_mark);
8181 return reterr;
8182 }
8183
SampleSortFileMap(const uintptr_t * sample_include,const SampleIdInfo * siip,const char * sample_sort_fname,uint32_t raw_sample_ct,uint32_t sample_ct,uint32_t ** new_sample_idx_to_old_ptr)8184 PglErr SampleSortFileMap(const uintptr_t* sample_include, const SampleIdInfo* siip, const char* sample_sort_fname, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t** new_sample_idx_to_old_ptr) {
8185 // assumes sample_ct >= 2 (enforced by caller)
8186 // return strbox is not collapsed
8187 unsigned char* bigstack_mark = g_bigstack_base;
8188 uintptr_t line_idx = 0;
8189 PglErr reterr = kPglRetSuccess;
8190 TextStream txs;
8191 PreinitTextStream(&txs);
8192 {
8193 char* idbuf;
8194 uintptr_t* already_seen;
8195 if (unlikely(
8196 bigstack_alloc_u32(raw_sample_ct, new_sample_idx_to_old_ptr) ||
8197 bigstack_alloc_c(siip->max_sample_id_blen, &idbuf) ||
8198 bigstack_calloc_w(BitCtToWordCt(raw_sample_ct), &already_seen))) {
8199 goto SampleSortFileMap_ret_NOMEM;
8200 }
8201
8202 uint32_t max_line_blen;
8203 if (unlikely(StandardizeMaxLineBlen(bigstack_left() - (bigstack_left() / 4), &max_line_blen))) {
8204 goto SampleSortFileMap_ret_NOMEM;
8205 }
8206 char* line_start;
8207 XidMode xid_mode;
8208 reterr = OpenAndLoadXidHeader(sample_sort_fname, "indiv-sort", (siip->sids || (siip->flags & kfSampleIdStrictSid0))? kfXidHeader0 : kfXidHeaderIgnoreSid, max_line_blen, &txs, &xid_mode, &line_idx, &line_start, nullptr);
8209 if (unlikely(reterr)) {
8210 if (reterr == kPglRetEof) {
8211 logerrputs("Error: --indiv-sort file is empty.\n");
8212 goto SampleSortFileMap_ret_MALFORMED_INPUT;
8213 }
8214 goto SampleSortFileMap_ret_TSTREAM_XID_FAIL;
8215 }
8216 uint32_t* xid_map;
8217 char* sorted_xidbox;
8218 uintptr_t max_xid_blen;
8219 reterr = SortedXidboxInitAlloc(sample_include, siip, sample_ct, 0, xid_mode, 0, &sorted_xidbox, &xid_map, &max_xid_blen);
8220 if (unlikely(reterr)) {
8221 goto SampleSortFileMap_ret_1;
8222 }
8223 uint32_t* new_sample_idx_to_old_iter = *new_sample_idx_to_old_ptr;
8224 if (*line_start == '#') {
8225 ++line_idx;
8226 line_start = TextGet(&txs);
8227 }
8228 for (; line_start; ++line_idx, line_start = TextGet(&txs)) {
8229 if (unlikely(line_start[0] == '#')) {
8230 snprintf(g_logbuf, kLogbufSize, "Error: Line %" PRIuPTR " of --indiv-sort file starts with a '#'. (This is only permitted before the first nonheader line, and if a #FID/IID header line is present it must denote the end of the header block.)\n", line_idx);
8231 goto SampleSortFileMap_ret_MALFORMED_INPUT_WW;
8232 }
8233 const char* linebuf_iter = line_start;
8234 uint32_t sample_uidx;
8235 if (!SortedXidboxReadFind(sorted_xidbox, xid_map, max_xid_blen, sample_ct, 0, xid_mode, &linebuf_iter, &sample_uidx, idbuf)) {
8236 if (unlikely(IsSet(already_seen, sample_uidx))) {
8237 char* tab_iter = AdvToDelim(idbuf, '\t');
8238 *tab_iter = ' ';
8239 if (xid_mode & kfXidModeFlagSid) {
8240 *AdvToDelim(&(tab_iter[1]), '\t') = ' ';
8241 }
8242 snprintf(g_logbuf, kLogbufSize, "Error: Duplicate sample ID '%s' in --indiv-sort file.\n", idbuf);
8243 goto SampleSortFileMap_ret_MALFORMED_INPUT_WW;
8244 }
8245 SetBit(sample_uidx, already_seen);
8246 *new_sample_idx_to_old_iter++ = sample_uidx;
8247 } else if (unlikely(!linebuf_iter)) {
8248 goto SampleSortFileMap_ret_MISSING_TOKENS;
8249 }
8250 }
8251 if (unlikely(TextStreamErrcode2(&txs, &reterr))) {
8252 goto SampleSortFileMap_ret_TSTREAM_FAIL;
8253 }
8254 if (unlikely(S_CAST(uintptr_t, new_sample_idx_to_old_iter - (*new_sample_idx_to_old_ptr)) != sample_ct)) {
8255 logerrputs("Error: --indiv-sort file does not contain all loaded sample IDs.\n");
8256 goto SampleSortFileMap_ret_INCONSISTENT_INPUT;
8257 }
8258 bigstack_mark = R_CAST(unsigned char*, idbuf);
8259 }
8260 while (0) {
8261 SampleSortFileMap_ret_NOMEM:
8262 reterr = kPglRetNomem;
8263 break;
8264 SampleSortFileMap_ret_MALFORMED_INPUT_WW:
8265 WordWrapB(0);
8266 logerrputsb();
8267 SampleSortFileMap_ret_MALFORMED_INPUT:
8268 reterr = kPglRetMalformedInput;
8269 break;
8270 SampleSortFileMap_ret_TSTREAM_XID_FAIL:
8271 if (!TextStreamErrcode(&txs)) {
8272 break;
8273 }
8274 SampleSortFileMap_ret_TSTREAM_FAIL:
8275 TextStreamErrPrint("--indiv-sort file", &txs);
8276 break;
8277 SampleSortFileMap_ret_MISSING_TOKENS:
8278 logerrprintf("Error: Line %" PRIuPTR " of --indiv-sort file has fewer tokens than expected.\n", line_idx);
8279 SampleSortFileMap_ret_INCONSISTENT_INPUT:
8280 reterr = kPglRetInconsistentInput;
8281 break;
8282 }
8283 SampleSortFileMap_ret_1:
8284 CleanupTextStream2("--indiv-sort file", &txs, &reterr);
8285 BigstackReset(bigstack_mark);
8286 return reterr;
8287 }
8288
8289 #ifdef __cplusplus
8290 } // namespace plink2
8291 #endif
8292