1 // This library is part of PLINK 2.00, copyright (C) 2005-2020 Shaun Purcell,
2 // Christopher Chang.
3 //
4 // This library is free software: you can redistribute it and/or modify it
5 // under the terms of the GNU Lesser General Public License as published by the
6 // Free Software Foundation; either version 3 of the License, or (at your
7 // option) any later version.
8 //
9 // This library is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
12 // for more details.
13 //
14 // You should have received a copy of the GNU Lesser General Public License
15 // along with this library. If not, see <http://www.gnu.org/licenses/>.
16
17
18 #include "plink2_common.h"
19
20 #ifdef __cplusplus
21 namespace plink2 {
22 #endif
23
InitPedigreeIdInfo(MiscFlags misc_flags,PedigreeIdInfo * piip)24 void InitPedigreeIdInfo(MiscFlags misc_flags, PedigreeIdInfo* piip) {
25 piip->sii.sample_ids = nullptr;
26 piip->sii.sids = nullptr;
27 piip->sii.max_sample_id_blen = 4;
28 piip->sii.max_sid_blen = 0;
29 piip->sii.flags = kfSampleId0;
30 if (misc_flags & kfMiscNoIdHeader) {
31 piip->sii.flags |= kfSampleIdNoIdHeader;
32 if (misc_flags & kfMiscNoIdHeaderIidOnly) {
33 piip->sii.flags |= kfSampleIdNoIdHeaderIidOnly;
34 }
35 }
36 if (misc_flags & kfMiscStrictSid0) {
37 piip->sii.flags |= kfSampleIdStrictSid0;
38 }
39 piip->parental_id_info.paternal_ids = nullptr;
40 piip->parental_id_info.maternal_ids = nullptr;
41 piip->parental_id_info.max_paternal_id_blen = 2;
42 piip->parental_id_info.max_maternal_id_blen = 2;
43 }
44
BigstackAllocPgv(uint32_t sample_ct,uint32_t multiallelic_needed,PgenGlobalFlags gflags,PgenVariant * pgvp)45 BoolErr BigstackAllocPgv(uint32_t sample_ct, uint32_t multiallelic_needed, PgenGlobalFlags gflags, PgenVariant* pgvp) {
46 const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);
47 if (unlikely(
48 bigstack_alloc_w(sample_ctl2, &(pgvp->genovec)))) {
49 return 1;
50 }
51 const uint32_t sample_ctl = BitCtToWordCt(sample_ct);
52 if (multiallelic_needed) {
53 if (unlikely(
54 bigstack_alloc_w(sample_ctl, &(pgvp->patch_01_set)) ||
55 bigstack_alloc_ac(sample_ct, &(pgvp->patch_01_vals)) ||
56 bigstack_alloc_w(sample_ctl, &(pgvp->patch_10_set)) ||
57 bigstack_alloc_ac(sample_ct * 2, &(pgvp->patch_10_vals)))) {
58 return 1;
59 }
60 } else {
61 // defensive
62 pgvp->patch_01_set = nullptr;
63 pgvp->patch_01_vals = nullptr;
64 pgvp->patch_10_set = nullptr;
65 pgvp->patch_10_vals = nullptr;
66 }
67 if (gflags & (kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePhasePresent)) {
68 if (unlikely(
69 bigstack_alloc_w(sample_ctl, &(pgvp->phasepresent)) ||
70 bigstack_alloc_w(sample_ctl, &(pgvp->phaseinfo)))) {
71 return 1;
72 }
73 } else {
74 pgvp->phasepresent = nullptr;
75 pgvp->phaseinfo = nullptr;
76 }
77 if (gflags & kfPgenGlobalDosagePresent) {
78 if (unlikely(
79 bigstack_alloc_w(sample_ctl, &(pgvp->dosage_present)) ||
80 bigstack_alloc_dosage(sample_ct, &(pgvp->dosage_main)))) {
81 return 1;
82 }
83 if (multiallelic_needed) {
84 // todo
85 }
86 if (gflags & kfPgenGlobalDosagePhasePresent) {
87 if (unlikely(
88 bigstack_alloc_w(sample_ctl, &(pgvp->dphase_present)) ||
89 bigstack_alloc_dphase(sample_ct, &(pgvp->dphase_delta)))) {
90 return 1;
91 }
92 if (multiallelic_needed) {
93 // todo
94 }
95 }
96 } else {
97 pgvp->dosage_present = nullptr;
98 pgvp->dosage_main = nullptr;
99 pgvp->dphase_present = nullptr;
100 pgvp->dphase_delta = nullptr;
101 // todo: multiallelic-dosage buffers
102 }
103 return 0;
104 }
105
106 static_assert(kDosageMid == 16384, "PrintDosageDecimal() needs to be updated.");
PrintDosageDecimal(uint32_t remainder,char * start)107 char* PrintDosageDecimal(uint32_t remainder, char* start) {
108 // Instead of constant 5-digit precision, we print fewer digits whenever that
109 // doesn't interfere with proper round-tripping. I.e. we search for the
110 // shortest string in
111 // ((n - 0.5)/16384, (n + 0.5)/16384).
112 // E.g. 3277/16384 is 0.20001 when printed with 5-digit precision, but we'd
113 // print that as 0.2 since that's still in (3276.5/16384, 3277.5/16384).
114 *start++ = '.';
115 // (remainder * 2) is in 32768ths
116 // 32768 * 625 = 20480k, smallest common denominator with 10^4
117
118 const uint32_t range_top_20480k = (remainder * 2 + 1) * 625;
119 // this is technically checking a half-open rather than a fully-open
120 // interval, but that's fine since we never hit the boundary points
121 if ((range_top_20480k % 2048) < 1250) {
122 // when this is true, the four-decimal-place approximation is in the range
123 // which round-trips back to our original number.
124 const uint32_t four_decimal_places = range_top_20480k / 2048;
125 return u32toa_trunc4(four_decimal_places, start);
126 }
127
128 // we wish to print (100000 * remainder + 8192) / 16384, left-0-padded. and
129 // may as well banker's round too.
130 //
131 // banker's rounding yields a different result than regular rounding for n/64
132 // when n is congruent to 1 mod 4:
133 // 1/64 = .015625 -> print 0.01562
134 // 3/64 = .046875 -> print 0.04688
135 // 5/64 = .078125 -> print 0.07812
136 const uint32_t five_decimal_places = ((3125 * remainder + 256) / 512) - ((remainder % 1024) == 256);
137 const uint32_t first_decimal_place = five_decimal_places / 10000;
138 *start++ = '0' + first_decimal_place;
139 const uint32_t last_four_digits = five_decimal_places - first_decimal_place * 10000;
140 if (last_four_digits) {
141 return u32toa_trunc4(last_four_digits, start);
142 }
143 return start;
144 }
145
146 static_assert(kDosageMax == 32768, "ddosagetoa() needs to be updated.");
ddosagetoa(uint64_t dosage,char * start)147 char* ddosagetoa(uint64_t dosage, char* start) {
148 // 3 digit precision seems like the best compromise between accuracy and
149 // avoidance of rounding ugliness
150 // (Rounding ugliness is not actually hidden for e.g. 1000 Genomes phase 1,
151 // since there are lots of 0.05 and 0.1 dosages which all get rounded in the
152 // same direction; oh well.)
153
154 // +16 since we need to round .99951 up to 1
155 const uint64_t dosage_p16 = dosage + 16;
156 start = u32toa(dosage_p16 / kDosageMax, start);
157 const uint32_t remainder_p16 = S_CAST(uint32_t, dosage_p16) % kDosageMax;
158 if (remainder_p16 < 33) {
159 return start;
160 }
161 // (1000 * remainder + 16384) / 32768
162 // 1/16 = .0625 -> print 0.062
163 // 3/16 = .1875 -> print 0.188
164 // 5/16 = .3125 -> print 0.312
165 // const uint32_t three_decimal_places = ((125 * remainder + 2048) / 4096) - ((remainder % 8192) == 2048);
166 const uint32_t three_decimal_places = ((125 * remainder_p16 + 48) / 4096) - ((remainder_p16 % 8192) == 4048);
167 // three_decimal_places guaranteed to be nonzero here
168 *start++ = '.';
169 const uint32_t first_decimal_place = three_decimal_places / 100;
170 *start++ = '0' + first_decimal_place;
171 const uint32_t last_two_digits = three_decimal_places - first_decimal_place * 100;
172 if (last_two_digits) {
173 memcpy_k(start, &(kDigitPair[last_two_digits]), 2);
174 return &(start[1 + (start[1] != '0')]);
175 }
176 return start;
177 }
178
179 static_assert(kDosageMax == 32768, "PrintDdosageDecimal() needs to be updated.");
PrintDdosageDecimal(uint32_t remainder,char * start)180 char* PrintDdosageDecimal(uint32_t remainder, char* start) {
181 // Instead of constant 5-digit precision, we print fewer digits whenever that
182 // doesn't interfere with proper round-tripping. I.e. we search for the
183 // shortest string in
184 // ((n - 0.5)/32768, (n + 0.5)/32768).
185 // E.g. 6554/32768 is 0.20001 when printed with 5-digit precision, but we'd
186 // print that as 0.2 since that's still in (6553.5/32768, 6554.5/32768).
187 // (This is more precise than necessary for most chromosomes, but it's
188 // necessary for chrX.)
189 *start++ = '.';
190 // (remainder * 2) is in 65536ths
191 // 65536 * 625 = 40960k, smallest common denominator with 10^4
192
193 const uint32_t range_top_40960k = (remainder * 2 + 1) * 625;
194 // this is technically checking a half-open rather than a fully-open
195 // interval, but that's fine since we never hit the boundary points
196 if ((range_top_40960k % 4096) < 1250) {
197 // when this is true, the four-decimal-place approximation is in the range
198 // which round-trips back to our original number.
199 const uint32_t four_decimal_places = range_top_40960k / 4096;
200 return u32toa_trunc4(four_decimal_places, start);
201 }
202
203 // we wish to print (100000 * remainder + 16384) / 32768, left-0-padded. and
204 // may as well banker's round too.
205 //
206 // banker's rounding yields a different result than regular rounding for n/64
207 // when n is congruent to 1 mod 4:
208 // 1/64 = .015625 -> print 0.01562
209 // 3/64 = .046875 -> print 0.04688
210 // 5/64 = .078125 -> print 0.07812
211 const uint32_t five_decimal_places = ((3125 * remainder + 512) / 1024) - ((remainder % 2048) == 512);
212 const uint32_t first_decimal_place = five_decimal_places / 10000;
213 *start++ = '0' + first_decimal_place;
214 const uint32_t last_four_digits = five_decimal_places - first_decimal_place * 10000;
215 if (last_four_digits) {
216 return u32toa_trunc4(last_four_digits, start);
217 }
218 return start;
219 }
220
221 static const uint16_t kHcToDosage[1024] = QUAD_TABLE256(0, kDosageMid, kDosageMax, kDosageMissing);
222
PopulateDenseDosage(const uintptr_t * genoarr,const uintptr_t * dosage_present,const Dosage * dosage_main,uint32_t sample_ct,uint32_t dosage_ct,Dosage * dense_dosage)223 void PopulateDenseDosage(const uintptr_t* genoarr, const uintptr_t* dosage_present, const Dosage* dosage_main, uint32_t sample_ct, uint32_t dosage_ct, Dosage* dense_dosage) {
224 GenoarrLookup256x2bx4(genoarr, kHcToDosage, sample_ct, dense_dosage);
225 // fill trailing bits with missing values so vector operations work
226 const uint32_t trailing_entry_ct = (-sample_ct) % kDosagePerVec;
227 if (trailing_entry_ct) {
228 Dosage* dense_dosage_end = &(dense_dosage[sample_ct]);
229 for (uint32_t uii = 0; uii != trailing_entry_ct; ++uii) {
230 dense_dosage_end[uii] = kDosageMissing;
231 }
232 }
233 if (dosage_ct) {
234 uintptr_t sample_idx_base = 0;
235 uintptr_t cur_bits = dosage_present[0];
236 for (uint32_t dosage_idx = 0; dosage_idx != dosage_ct; ++dosage_idx) {
237 const uintptr_t sample_idx = BitIter1(dosage_present, &sample_idx_base, &cur_bits);
238 dense_dosage[sample_idx] = dosage_main[dosage_idx];
239 }
240 }
241 }
242
PopulateRescaledDosage(const uintptr_t * genoarr,const uintptr_t * dosage_present,const Dosage * dosage_main,double slope,double intercept,double missing_val,uint32_t sample_ct,uint32_t dosage_ct,double * expanded_dosages)243 void PopulateRescaledDosage(const uintptr_t* genoarr, const uintptr_t* dosage_present, const Dosage* dosage_main, double slope, double intercept, double missing_val, uint32_t sample_ct, uint32_t dosage_ct, double* expanded_dosages) {
244 double lookup_vals[32] ALIGNV16;
245 lookup_vals[0] = intercept;
246 lookup_vals[2] = intercept + slope;
247 lookup_vals[4] = intercept + 2 * slope;
248 lookup_vals[6] = missing_val;
249 InitLookup16x8bx2(lookup_vals);
250 GenoarrLookup16x8bx2(genoarr, lookup_vals, sample_ct, expanded_dosages);
251 if (!dosage_ct) {
252 return;
253 }
254 slope *= kRecipDosageMid;
255 uintptr_t sample_uidx_base = 0;
256 uintptr_t cur_bits = dosage_present[0];
257 for (uint32_t dosage_idx = 0; dosage_idx != dosage_ct; ++dosage_idx) {
258 const uintptr_t sample_uidx = BitIter1(dosage_present, &sample_uidx_base, &cur_bits);
259 expanded_dosages[sample_uidx] = dosage_main[dosage_idx] * slope + intercept;
260 }
261 }
262
263 static_assert(sizeof(AlleleCode) == 1, "AtLeastOneMultiallelicHet() needs to be updated.");
AtLeastOneMultiallelicHet(const PgenVariant * pgvp,uint32_t sample_ct)264 uint32_t AtLeastOneMultiallelicHet(const PgenVariant* pgvp, uint32_t sample_ct) {
265 if (pgvp->patch_01_ct) {
266 return 1;
267 }
268 {
269 const uintptr_t* genoarr = pgvp->genovec;
270 const uint32_t fullvec_ct = sample_ct / kBitsPerWordD2;
271 for (uint32_t uii = 0; uii != fullvec_ct; ++uii) {
272 const uintptr_t geno_word = genoarr[uii];
273 if (Word01(geno_word)) {
274 return 1;
275 }
276 }
277 const uint32_t remainder = sample_ct % kBitsPerWordD2;
278 if (remainder) {
279 if (Word01(bzhi(genoarr[fullvec_ct], 2 * remainder))) {
280 return 1;
281 }
282 }
283 }
284 const uint32_t patch_10_ct = pgvp->patch_10_ct;
285 if (patch_10_ct) {
286 const AlleleCode* patch_10_vals = pgvp->patch_10_vals;
287 #ifdef __LP64__
288 const uint32_t fullvec_ct = patch_10_ct / (kBytesPerVec / 2);
289 const VecU16* patch_10_valias = R_CAST(const VecU16*, patch_10_vals);
290 const VecU16 m8 = vecu16_set1(0xff);
291 for (uint32_t vidx = 0; vidx != fullvec_ct; ++vidx) {
292 const VecU16 vv_orig = vecu16_loadu(&(patch_10_valias[vidx]));
293 const VecU16 vv_hi = vecu16_srli(vv_orig, 8);
294 const VecU16 vv_lo = vv_orig & m8;
295 const VecU16 vv_eq = (vv_hi == vv_lo);
296 if (vecu16_movemask(vv_eq) != kVec8thUintMax) {
297 return 1;
298 }
299 }
300 uint32_t uii = fullvec_ct * (kBytesPerVec / 2);
301 #else
302 uint32_t uii = 0;
303 #endif
304 for (; uii != patch_10_ct; ++uii) {
305 if (patch_10_vals[2 * uii] != patch_10_vals[2 * uii + 1]) {
306 return 1;
307 }
308 }
309 }
310 return 0;
311 }
312
SetHetMissing(uintptr_t word_ct,uintptr_t * genovec)313 void SetHetMissing(uintptr_t word_ct, uintptr_t* genovec) {
314 // 01 -> 11, nothing else changes
315 #ifdef __LP64__
316 const VecW m1 = VCONST_W(kMask5555);
317 VecW* geno_vvec_iter = R_CAST(VecW*, genovec);
318 const uintptr_t full_vec_ct = word_ct / kWordsPerVec;
319 if (full_vec_ct & 1) {
320 const VecW cur_geno_vword = *geno_vvec_iter;
321 const VecW cur_geno_vword_low_lshifted = vecw_slli(cur_geno_vword & m1, 1);
322 *geno_vvec_iter++ = cur_geno_vword | cur_geno_vword_low_lshifted;
323 }
324 if (full_vec_ct & 2) {
325 VecW cur_geno_vword = *geno_vvec_iter;
326 VecW cur_geno_vword_low_lshifted = vecw_slli(cur_geno_vword & m1, 1);
327 *geno_vvec_iter++ = cur_geno_vword | cur_geno_vword_low_lshifted;
328 cur_geno_vword = *geno_vvec_iter;
329 cur_geno_vword_low_lshifted = vecw_slli(cur_geno_vword & m1, 1);
330 *geno_vvec_iter++ = cur_geno_vword | cur_geno_vword_low_lshifted;
331 }
332 for (uintptr_t ulii = 3; ulii < full_vec_ct; ulii += 4) {
333 VecW cur_geno_vword = *geno_vvec_iter;
334 VecW cur_geno_vword_low_lshifted = vecw_slli(cur_geno_vword & m1, 1);
335 *geno_vvec_iter++ = cur_geno_vword | cur_geno_vword_low_lshifted;
336 cur_geno_vword = *geno_vvec_iter;
337 cur_geno_vword_low_lshifted = vecw_slli(cur_geno_vword & m1, 1);
338 *geno_vvec_iter++ = cur_geno_vword | cur_geno_vword_low_lshifted;
339 cur_geno_vword = *geno_vvec_iter;
340 cur_geno_vword_low_lshifted = vecw_slli(cur_geno_vword & m1, 1);
341 *geno_vvec_iter++ = cur_geno_vword | cur_geno_vword_low_lshifted;
342 cur_geno_vword = *geno_vvec_iter;
343 cur_geno_vword_low_lshifted = vecw_slli(cur_geno_vword & m1, 1);
344 *geno_vvec_iter++ = cur_geno_vword | cur_geno_vword_low_lshifted;
345 }
346 # ifdef USE_AVX2
347 if (word_ct & 2) {
348 const uintptr_t base_idx = full_vec_ct * kWordsPerVec;
349 uintptr_t geno_word = genovec[base_idx];
350 genovec[base_idx] = geno_word | ((geno_word & kMask5555) << 1);
351 geno_word = genovec[base_idx + 1];
352 genovec[base_idx + 1] = geno_word | ((geno_word & kMask5555) << 1);
353 }
354 # endif
355 if (word_ct & 1) {
356 const uintptr_t geno_word = genovec[word_ct - 1];
357 genovec[word_ct - 1] = geno_word | ((geno_word & kMask5555) << 1);
358 }
359 #else
360 for (uintptr_t widx = 0; widx != word_ct; ++widx) {
361 const uintptr_t geno_word = genovec[widx];
362 genovec[widx] = geno_word | ((geno_word & kMask5555) << 1);
363 }
364 #endif
365 }
366
SetHetMissingCleardosage(uintptr_t word_ct,uintptr_t * __restrict genovec,uint32_t * write_dosage_ct_ptr,uintptr_t * __restrict dosagepresent,Dosage * dosage_main)367 void SetHetMissingCleardosage(uintptr_t word_ct, uintptr_t* __restrict genovec, uint32_t* write_dosage_ct_ptr, uintptr_t* __restrict dosagepresent, Dosage* dosage_main) {
368 const uint32_t orig_write_dosage_ct = *write_dosage_ct_ptr;
369 if (orig_write_dosage_ct) {
370 uintptr_t sample_uidx_base = 0;
371 uintptr_t cur_bits = dosagepresent[0];
372 for (uint32_t dosage_read_idx = 0; dosage_read_idx != orig_write_dosage_ct; ++dosage_read_idx) {
373 const uintptr_t sample_uidx = BitIter1(dosagepresent, &sample_uidx_base, &cur_bits);
374 if (GetNyparrEntry(genovec, sample_uidx) == 1) {
375 ClearBit(sample_uidx, dosagepresent);
376 uint32_t dosage_write_idx = dosage_read_idx++;
377 for (; dosage_read_idx == orig_write_dosage_ct; ++dosage_read_idx) {
378 const uintptr_t sample_uidx2 = BitIter1(dosagepresent, &sample_uidx_base, &cur_bits);
379 if (GetNyparrEntry(genovec, sample_uidx2) == 1) {
380 ClearBit(sample_uidx2, dosagepresent);
381 } else {
382 dosage_main[dosage_write_idx++] = dosage_main[dosage_read_idx];
383 }
384 }
385 *write_dosage_ct_ptr = dosage_write_idx;
386 break;
387 }
388 }
389 }
390 SetHetMissing(word_ct, genovec);
391 }
392
SetHetMissingKeepdosage(uintptr_t word_ct,uintptr_t * __restrict genovec,uint32_t * write_dosage_ct_ptr,uintptr_t * __restrict dosagepresent,Dosage * dosage_main)393 void SetHetMissingKeepdosage(uintptr_t word_ct, uintptr_t* __restrict genovec, uint32_t* write_dosage_ct_ptr, uintptr_t* __restrict dosagepresent, Dosage* dosage_main) {
394 // count # of 1.00000 dosages we need to insert, and then rewrite dosage_main
395 // from back to front so we don't need temporary buffers.
396
397 const uint32_t orig_dosage_ct = *write_dosage_ct_ptr;
398 // can't assume dosagepresent is initialized in this case
399 if (!orig_dosage_ct) {
400 ZeroWArr(DivUp(word_ct, 2), dosagepresent);
401 }
402 Halfword* dosagepresent_alias = R_CAST(Halfword*, dosagepresent);
403 uint32_t new_dosage_ct = 0;
404 // todo: try vectorizing this loop
405 for (uintptr_t widx = 0; widx != word_ct; ++widx) {
406 const uintptr_t geno_hets = Word01(genovec[widx]);
407 const uintptr_t dosagepresent_word = UnpackHalfwordToWord(dosagepresent_alias[widx]);
408 new_dosage_ct += Popcount01Word(geno_hets & (~dosagepresent_word));
409 }
410 if (!new_dosage_ct) {
411 SetHetMissing(word_ct, genovec);
412 return;
413 }
414 uint32_t dosage_write_idx = orig_dosage_ct + new_dosage_ct;
415 uint32_t dosage_read_idx = orig_dosage_ct;
416 uint32_t widx = word_ct;
417 *write_dosage_ct_ptr = dosage_write_idx;
418 do {
419 --widx;
420 const uintptr_t geno_word = genovec[widx];
421 const uintptr_t dosagepresent_word = UnpackHalfwordToWord(dosagepresent_alias[widx]);
422 const uintptr_t geno_hets = Word01(geno_word);
423 uintptr_t new_dosagepresent_word = dosagepresent_word | geno_hets;
424 if (new_dosagepresent_word) {
425 dosagepresent_alias[widx] = PackWordToHalfword(new_dosagepresent_word);
426 do {
427 const uint32_t top_set_bit = bsrw(new_dosagepresent_word);
428 const uintptr_t cur_bit_word = k1LU << top_set_bit;
429 Dosage cur_dosage = kDosageMid;
430 if (cur_bit_word & dosagepresent_word) {
431 cur_dosage = dosage_main[--dosage_read_idx];
432 }
433 dosage_main[--dosage_write_idx] = cur_dosage;
434 new_dosagepresent_word -= cur_bit_word;
435 } while (new_dosagepresent_word);
436 genovec[widx] = geno_word | (geno_hets << 1);
437 }
438 } while (widx);
439 }
440
441 // todo: try vectorizing this
GenoarrToNonmissing(const uintptr_t * genoarr,uint32_t sample_ct,uintptr_t * nonmissing_bitarr)442 void GenoarrToNonmissing(const uintptr_t* genoarr, uint32_t sample_ct, uintptr_t* nonmissing_bitarr) {
443 const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);
444 const uintptr_t* genoarr_iter = genoarr;
445 Halfword* nonmissing_bitarr_iter = R_CAST(Halfword*, nonmissing_bitarr);
446 for (uint32_t widx = 0; widx != sample_ctl2; ++widx) {
447 uintptr_t ww = ~(*genoarr_iter++);
448 ww = ww | (ww >> 1);
449 *nonmissing_bitarr_iter++ = PackWordToHalfwordMask5555(ww);
450 }
451 // zero trailing bits up to word boundary, in a way that doesn't create
452 // aliasing issues
453 // (if zeroing is needed up to vector boundary, that's the caller's
454 // responsibility)
455 const uint32_t trail_ct = sample_ct % kBitsPerWordD2;
456 if (trail_ct) {
457 nonmissing_bitarr_iter[-1] &= (1U << trail_ct) - 1;
458 }
459 if (sample_ctl2 % 2) {
460 *nonmissing_bitarr_iter = 0;
461 }
462 }
463
464 // These functions may move to pgenlib_misc later.
CountMissingVec6(const VecW * __restrict geno_vvec,uint32_t vec_ct)465 uint32_t CountMissingVec6(const VecW* __restrict geno_vvec, uint32_t vec_ct) {
466 assert(!(vec_ct % 6));
467 const VecW m1 = VCONST_W(kMask5555);
468 const VecW m2 = VCONST_W(kMask3333);
469 const VecW m4 = VCONST_W(kMask0F0F);
470 const VecW* geno_vvec_iter = geno_vvec;
471 VecW acc_bothset = vecw_setzero();
472 uintptr_t cur_incr = 60;
473 for (; ; vec_ct -= cur_incr) {
474 if (vec_ct < 60) {
475 if (!vec_ct) {
476 return HsumW(acc_bothset);
477 }
478 cur_incr = vec_ct;
479 }
480 VecW inner_acc_bothset = vecw_setzero();
481 const VecW* geno_vvec_stop = &(geno_vvec_iter[cur_incr]);
482 do {
483 VecW cur_geno_vword = vecw_loadu(geno_vvec_iter);
484 ++geno_vvec_iter;
485 VecW bothset1 = m1 & cur_geno_vword & vecw_srli(cur_geno_vword, 1);
486
487 cur_geno_vword = vecw_loadu(geno_vvec_iter);
488 ++geno_vvec_iter;
489 bothset1 = bothset1 + (m1 & cur_geno_vword & vecw_srli(cur_geno_vword, 1));
490
491 cur_geno_vword = vecw_loadu(geno_vvec_iter);
492 ++geno_vvec_iter;
493 bothset1 = bothset1 + (m1 & cur_geno_vword & vecw_srli(cur_geno_vword, 1));
494 bothset1 = (bothset1 & m2) + (vecw_srli(bothset1, 2) & m2);
495
496 cur_geno_vword = vecw_loadu(geno_vvec_iter);
497 ++geno_vvec_iter;
498 VecW bothset2 = m1 & cur_geno_vword & vecw_srli(cur_geno_vword, 1);
499
500 cur_geno_vword = vecw_loadu(geno_vvec_iter);
501 ++geno_vvec_iter;
502 bothset2 = bothset2 + (m1 & cur_geno_vword & vecw_srli(cur_geno_vword, 1));
503
504 cur_geno_vword = vecw_loadu(geno_vvec_iter);
505 ++geno_vvec_iter;
506 bothset2 = bothset2 + (m1 & cur_geno_vword & vecw_srli(cur_geno_vword, 1));
507
508 bothset1 = bothset1 + (bothset2 & m2) + (vecw_srli(bothset2, 2) & m2);
509 // these now contain 4-bit values from 0-12
510
511 inner_acc_bothset = inner_acc_bothset + (bothset1 & m4) + (vecw_srli(bothset1, 4) & m4);
512 } while (geno_vvec_iter < geno_vvec_stop);
513 const VecW m0 = vecw_setzero();
514 acc_bothset = acc_bothset + vecw_bytesum(inner_acc_bothset, m0);
515 }
516 }
517
GenoarrCountMissingUnsafe(const uintptr_t * genoarr,uint32_t sample_ct)518 uint32_t GenoarrCountMissingUnsafe(const uintptr_t* genoarr, uint32_t sample_ct) {
519 const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);
520 uint32_t word_idx = sample_ctl2 - (sample_ctl2 % (6 * kWordsPerVec));
521 uint32_t missing_ct = CountMissingVec6(R_CAST(const VecW*, genoarr), word_idx / kWordsPerVec);
522 for (; word_idx != sample_ctl2; ++word_idx) {
523 uintptr_t ww = genoarr[word_idx];
524 ww = ww & (ww >> 1);
525 missing_ct += Popcount01Word(ww);
526 }
527 return missing_ct;
528 }
529
530 // geno_vvec allowed to be unaligned.
CountMissingMaskedVec6(const VecW * __restrict geno_vvec,const VecW * __restrict interleaved_mask_vvec,uint32_t vec_ct)531 uint32_t CountMissingMaskedVec6(const VecW* __restrict geno_vvec, const VecW* __restrict interleaved_mask_vvec, uint32_t vec_ct) {
532 assert(!(vec_ct % 6));
533 const VecW m1 = VCONST_W(kMask5555);
534 const VecW m2 = VCONST_W(kMask3333);
535 const VecW m4 = VCONST_W(kMask0F0F);
536 const VecW* geno_vvec_iter = geno_vvec;
537 const VecW* interleaved_mask_vvec_iter = interleaved_mask_vvec;
538 VecW acc_bothset = vecw_setzero();
539 uintptr_t cur_incr = 60;
540 for (; ; vec_ct -= cur_incr) {
541 if (vec_ct < 60) {
542 if (!vec_ct) {
543 return HsumW(acc_bothset);
544 }
545 cur_incr = vec_ct;
546 }
547 VecW inner_acc_bothset = vecw_setzero();
548 const VecW* geno_vvec_stop = &(geno_vvec_iter[cur_incr]);
549 do {
550 VecW interleaved_mask_vword = *interleaved_mask_vvec_iter++;
551 VecW cur_geno_vword = vecw_loadu(geno_vvec_iter);
552 ++geno_vvec_iter;
553 VecW cur_mask = interleaved_mask_vword & m1;
554 VecW bothset1 = cur_mask & cur_geno_vword & vecw_srli(cur_geno_vword, 1);
555
556 cur_mask = vecw_srli(interleaved_mask_vword, 1) & m1;
557 cur_geno_vword = vecw_loadu(geno_vvec_iter);
558 ++geno_vvec_iter;
559 bothset1 = bothset1 + (cur_mask & cur_geno_vword & vecw_srli(cur_geno_vword, 1));
560
561 interleaved_mask_vword = *interleaved_mask_vvec_iter++;
562 cur_mask = interleaved_mask_vword & m1;
563 cur_geno_vword = vecw_loadu(geno_vvec_iter);
564 ++geno_vvec_iter;
565 bothset1 = bothset1 + (cur_mask & cur_geno_vword & vecw_srli(cur_geno_vword, 1));
566 bothset1 = (bothset1 & m2) + (vecw_srli(bothset1, 2) & m2);
567
568 cur_mask = vecw_srli(interleaved_mask_vword, 1) & m1;
569 cur_geno_vword = vecw_loadu(geno_vvec_iter);
570 ++geno_vvec_iter;
571 VecW bothset2 = cur_mask & cur_geno_vword & vecw_srli(cur_geno_vword, 1);
572
573 interleaved_mask_vword = *interleaved_mask_vvec_iter++;
574 cur_mask = interleaved_mask_vword & m1;
575 cur_geno_vword = vecw_loadu(geno_vvec_iter);
576 ++geno_vvec_iter;
577 bothset2 = bothset2 + (cur_mask & cur_geno_vword & vecw_srli(cur_geno_vword, 1));
578
579 cur_mask = vecw_srli(interleaved_mask_vword, 1) & m1;
580 cur_geno_vword = vecw_loadu(geno_vvec_iter);
581 ++geno_vvec_iter;
582 bothset2 = bothset2 + (cur_mask & cur_geno_vword & vecw_srli(cur_geno_vword, 1));
583
584 bothset1 = bothset1 + (bothset2 & m2) + (vecw_srli(bothset2, 2) & m2);
585 // these now contain 4-bit values from 0-12
586
587 inner_acc_bothset = inner_acc_bothset + (bothset1 & m4) + (vecw_srli(bothset1, 4) & m4);
588 } while (geno_vvec_iter < geno_vvec_stop);
589 const VecW m0 = vecw_setzero();
590 acc_bothset = acc_bothset + vecw_bytesum(inner_acc_bothset, m0);
591 }
592 }
593
GenoarrCountMissingSubset(const uintptr_t * genoarr,const uintptr_t * interleaved_vec,uint32_t sample_ct)594 uint32_t GenoarrCountMissingSubset(const uintptr_t* genoarr, const uintptr_t* interleaved_vec, uint32_t sample_ct) {
595 // See GenoarrCountSubsetFreqs().
596 const uint32_t sample_ctv2 = NypCtToVecCt(sample_ct);
597 uint32_t missing_ct;
598 #ifdef __LP64__
599 uint32_t vec_idx = sample_ctv2 - (sample_ctv2 % 6);
600 missing_ct = CountMissingMaskedVec6(R_CAST(const VecW*, genoarr), R_CAST(const VecW*, interleaved_vec), vec_idx);
601 const uintptr_t* genoarr_iter = &(genoarr[kWordsPerVec * vec_idx]);
602 const uintptr_t* interleaved_mask_iter = &(interleaved_vec[(kWordsPerVec / 2) * vec_idx]);
603 # ifdef USE_AVX2
604 uintptr_t mask_base1 = 0;
605 uintptr_t mask_base2 = 0;
606 uintptr_t mask_base3 = 0;
607 uintptr_t mask_base4 = 0;
608 for (; vec_idx != sample_ctv2; ++vec_idx) {
609 uintptr_t mask_word1;
610 uintptr_t mask_word2;
611 uintptr_t mask_word3;
612 uintptr_t mask_word4;
613 if (!(vec_idx % 2)) {
614 mask_base1 = *interleaved_mask_iter++;
615 mask_base2 = *interleaved_mask_iter++;
616 mask_base3 = *interleaved_mask_iter++;
617 mask_base4 = *interleaved_mask_iter++;
618 mask_word1 = mask_base1 & kMask5555;
619 mask_word2 = mask_base2 & kMask5555;
620 mask_word3 = mask_base3 & kMask5555;
621 mask_word4 = mask_base4 & kMask5555;
622 } else {
623 mask_word1 = (mask_base1 >> 1) & kMask5555;
624 mask_word2 = (mask_base2 >> 1) & kMask5555;
625 mask_word3 = (mask_base3 >> 1) & kMask5555;
626 mask_word4 = (mask_base4 >> 1) & kMask5555;
627 }
628 for (uint32_t vechalf_idx = 0; ; ++vechalf_idx) {
629 const uintptr_t cur_geno_word1 = *genoarr_iter++;
630 const uintptr_t cur_geno_word2 = *genoarr_iter++;
631 const uintptr_t cur_geno_word1_high_masked = mask_word1 & (cur_geno_word1 >> 1);
632 const uintptr_t cur_geno_word2_high_masked = mask_word2 & (cur_geno_word2 >> 1);
633 missing_ct += PopcountWord(((cur_geno_word1 & cur_geno_word1_high_masked) << 1) | (cur_geno_word2 & cur_geno_word2_high_masked));
634 if (vechalf_idx) {
635 break;
636 }
637 mask_word1 = mask_word3;
638 mask_word2 = mask_word4;
639 }
640 }
641 # else // not USE_AVX2
642 uintptr_t mask_base1 = 0;
643 uintptr_t mask_base2 = 0;
644 for (; vec_idx != sample_ctv2; ++vec_idx) {
645 uintptr_t mask_word1;
646 uintptr_t mask_word2;
647 if (!(vec_idx % 2)) {
648 mask_base1 = *interleaved_mask_iter++;
649 mask_base2 = *interleaved_mask_iter++;
650 mask_word1 = mask_base1 & kMask5555;
651 mask_word2 = mask_base2 & kMask5555;
652 } else {
653 mask_word1 = (mask_base1 >> 1) & kMask5555;
654 mask_word2 = (mask_base2 >> 1) & kMask5555;
655 }
656 const uintptr_t cur_geno_word1 = *genoarr_iter++;
657 const uintptr_t cur_geno_word2 = *genoarr_iter++;
658 const uintptr_t cur_geno_word1_high_masked = mask_word1 & (cur_geno_word1 >> 1);
659 const uintptr_t cur_geno_word2_high_masked = mask_word2 & (cur_geno_word2 >> 1);
660 # ifdef USE_SSE42
661 missing_ct += PopcountWord(((cur_geno_word1 & cur_geno_word1_high_masked) << 1) | (cur_geno_word2 & cur_geno_word2_high_masked));
662 # else
663 missing_ct += NypsumWord((cur_geno_word1 & cur_geno_word1_high_masked) + (cur_geno_word2 & cur_geno_word2_high_masked));
664 # endif
665 }
666 # endif // not USE_AVX2
667 #else // not __LP64__
668 uint32_t word_idx = sample_ctv2 - (sample_ctv2 % 6);
669 missing_ct = CountMissingMaskedVec6(R_CAST(const VecW*, genoarr), R_CAST(const VecW*, interleaved_vec), word_idx);
670 const uintptr_t* interleaved_mask_iter = &(interleaved_vec[word_idx / 2]);
671 uintptr_t mask_base = 0;
672 for (; word_idx != sample_ctv2; ++word_idx) {
673 uintptr_t mask_word;
674 if (!(word_idx % 2)) {
675 mask_base = *interleaved_mask_iter++;
676 mask_word = mask_base & kMask5555;
677 } else {
678 mask_word = (mask_base >> 1) & kMask5555;
679 }
680 const uintptr_t cur_geno_word = genoarr[word_idx];
681 const uintptr_t cur_geno_word_high_masked = mask_word & (cur_geno_word >> 1);
682 missing_ct += Popcount01Word(cur_geno_word & cur_geno_word_high_masked);
683 }
684 #endif
685 return missing_ct;
686 }
687
688 // counts post-dosage missing, for which we don't have an interleaved mask
GenoarrCountMissingInvsubsetUnsafe(const uintptr_t * genoarr,const uintptr_t * exclude_mask,uint32_t sample_ct)689 uint32_t GenoarrCountMissingInvsubsetUnsafe(const uintptr_t* genoarr, const uintptr_t* exclude_mask, uint32_t sample_ct) {
690 const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);
691 const uintptr_t* genoarr_iter = genoarr;
692 const Halfword* exclude_alias_iter = R_CAST(const Halfword*, exclude_mask);
693 uint32_t missing_ct = 0;
694 for (uint32_t widx = 0; widx != sample_ctl2; ++widx) {
695 uintptr_t ww = *genoarr_iter++;
696 ww = ww & (ww >> 1);
697 const uint32_t include_mask = ~(*exclude_alias_iter++);
698 missing_ct += Popcount01Word(ww & UnpackHalfwordToWord(include_mask));
699 }
700 return missing_ct;
701 }
702
703
CollapsedSampleFmtidInit(const uintptr_t * sample_include,const SampleIdInfo * siip,uint32_t sample_ct,uint32_t include_fid,uint32_t include_sid,uintptr_t max_sample_fmtid_blen,char * collapsed_sample_fmtids_iter)704 void CollapsedSampleFmtidInit(const uintptr_t* sample_include, const SampleIdInfo* siip, uint32_t sample_ct, uint32_t include_fid, uint32_t include_sid, uintptr_t max_sample_fmtid_blen, char* collapsed_sample_fmtids_iter) {
705 const char* sample_ids = siip->sample_ids;
706 const char* sids = siip->sids;
707 const uintptr_t max_sample_id_blen = siip->max_sample_id_blen;
708 uintptr_t max_sid_blen = 0;
709 if (include_sid) {
710 max_sid_blen = sids? siip->max_sid_blen : 2;
711 }
712 uintptr_t sample_uidx_base = 0;
713 uintptr_t cur_bits = sample_include[0];
714 for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
715 const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
716 const char* cur_sample_id = &(sample_ids[sample_uidx * max_sample_id_blen]);
717 if (!include_fid) {
718 cur_sample_id = AdvPastDelim(cur_sample_id, '\t');
719 }
720 char* write_iter = strcpya(collapsed_sample_fmtids_iter, cur_sample_id);
721 if (include_sid) {
722 *write_iter++ = '\t';
723 if (sids) {
724 strcpy(write_iter, &(sids[sample_uidx * max_sid_blen]));
725 } else {
726 strcpy_k(write_iter, "0");
727 }
728 } else {
729 *write_iter = '\0';
730 }
731 collapsed_sample_fmtids_iter = &(collapsed_sample_fmtids_iter[max_sample_fmtid_blen]);
732 }
733 }
734
CollapsedSampleFmtidInitAlloc(const uintptr_t * sample_include,const SampleIdInfo * siip,uint32_t sample_ct,uint32_t include_fid,uint32_t include_sid,char ** collapsed_sample_fmtids_ptr,uintptr_t * max_sample_fmtid_blen_ptr)735 BoolErr CollapsedSampleFmtidInitAlloc(const uintptr_t* sample_include, const SampleIdInfo* siip, uint32_t sample_ct, uint32_t include_fid, uint32_t include_sid, char** collapsed_sample_fmtids_ptr, uintptr_t* max_sample_fmtid_blen_ptr) {
736 const uintptr_t max_sample_fmtid_blen = GetMaxSampleFmtidBlen(siip, include_fid, include_sid);
737 *max_sample_fmtid_blen_ptr = max_sample_fmtid_blen;
738 // might add sample_fmtid_map later
739 if (unlikely(bigstack_alloc_c(max_sample_fmtid_blen * sample_ct, collapsed_sample_fmtids_ptr))) {
740 return 1;
741 }
742 CollapsedSampleFmtidInit(sample_include, siip, sample_ct, include_fid, include_sid, max_sample_fmtid_blen, *collapsed_sample_fmtids_ptr);
743 return 0;
744 }
745
OnlyOneFid(const uintptr_t * sample_include,const SampleIdInfo * siip,uint32_t sample_ct)746 uint32_t OnlyOneFid(const uintptr_t* sample_include, const SampleIdInfo* siip, uint32_t sample_ct) {
747 if (!(siip->flags & kfSampleIdFidPresent)) {
748 return 1;
749 }
750 const char* sample_ids = siip->sample_ids;
751 const uintptr_t max_sample_id_blen = siip->max_sample_id_blen;
752 uintptr_t sample_uidx_base = 0;
753 uintptr_t cur_bits = sample_include[0];
754 const uintptr_t first_sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
755 const char* fid_start = &(sample_ids[first_sample_uidx * max_sample_id_blen]);
756 const uintptr_t fid_blen = AdvPastDelim(fid_start, '\t') - fid_start;
757 for (uint32_t sample_idx = 1; sample_idx != sample_ct; ++sample_idx) {
758 const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
759 const char* cur_fid_start = &(sample_ids[sample_uidx * max_sample_id_blen]);
760 if (!memequal(fid_start, cur_fid_start, fid_blen)) {
761 return 0;
762 }
763 }
764 return 1;
765 }
766
GetMajIdxMulti(const double * cur_allele_freqs,uint32_t cur_allele_ct)767 uint32_t GetMajIdxMulti(const double* cur_allele_freqs, uint32_t cur_allele_ct) {
768 assert(cur_allele_ct > 2);
769 double max_freq = cur_allele_freqs[1];
770 if (max_freq >= 0.5) {
771 return 1;
772 }
773 double tot_nonlast_freq = cur_allele_freqs[0];
774 uint32_t maj_allele_idx = 1;
775 if (tot_nonlast_freq >= max_freq) {
776 maj_allele_idx = 0;
777 max_freq = tot_nonlast_freq;
778 }
779 tot_nonlast_freq += max_freq;
780 const uint32_t cur_allele_ct_m1 = cur_allele_ct - 1;
781 for (uint32_t allele_idx = 2; allele_idx != cur_allele_ct_m1; ++allele_idx) {
782 const double cur_freq = cur_allele_freqs[allele_idx];
783 if (cur_freq > max_freq) {
784 maj_allele_idx = allele_idx;
785 max_freq = cur_freq;
786 }
787 tot_nonlast_freq += cur_freq;
788 }
789 if (max_freq + tot_nonlast_freq < 1.0 - kSmallEpsilon) {
790 return cur_allele_ct_m1;
791 }
792 return maj_allele_idx;
793 }
794
795 // forced SID '0' if sids == nullptr
796 // ok for sample_augid_map_ptr == nullptr
AugidInitAlloc(const uintptr_t * sample_include,const SampleIdInfo * siip,uint32_t sample_ct,uint32_t ** sample_augid_map_ptr,char ** sample_augids_ptr,uintptr_t * max_sample_augid_blen_ptr)797 PglErr AugidInitAlloc(const uintptr_t* sample_include, const SampleIdInfo* siip, uint32_t sample_ct, uint32_t** sample_augid_map_ptr, char** sample_augids_ptr, uintptr_t* max_sample_augid_blen_ptr) {
798 const char* sample_ids = siip->sample_ids;
799 const char* sids = siip->sids;
800 const uintptr_t max_sample_id_blen = siip->max_sample_id_blen;
801 uintptr_t max_sid_blen = siip->max_sid_blen;
802 if (!sids) {
803 max_sid_blen = 2;
804 }
805 const uintptr_t max_sample_augid_blen = max_sample_id_blen + max_sid_blen;
806 *max_sample_augid_blen_ptr = max_sample_augid_blen;
807 uint32_t* sample_augid_map = nullptr;
808 if (sample_augid_map_ptr) {
809 if (unlikely(bigstack_alloc_u32(sample_ct, sample_augid_map_ptr))) {
810 return kPglRetNomem;
811 }
812 sample_augid_map = *sample_augid_map_ptr;
813 }
814 if (unlikely(bigstack_alloc_c(max_sample_augid_blen * sample_ct, sample_augids_ptr))) {
815 return kPglRetNomem;
816 }
817 char* sample_augids_iter = *sample_augids_ptr;
818 uintptr_t sample_uidx_base = 0;
819 uintptr_t cur_bits = sample_include[0];
820 for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
821 const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
822 char* write_iter = strcpyax(sample_augids_iter, &(sample_ids[sample_uidx * max_sample_id_blen]), '\t');
823 if (sids) {
824 strcpy(write_iter, &(sids[sample_uidx * max_sid_blen]));
825 } else {
826 strcpy_k(write_iter, "0");
827 }
828 sample_augids_iter = &(sample_augids_iter[max_sample_augid_blen]);
829 if (sample_augid_map) {
830 sample_augid_map[sample_idx] = sample_uidx;
831 }
832 }
833 return kPglRetSuccess;
834 }
835
SortedXidboxInitAlloc(const uintptr_t * sample_include,const SampleIdInfo * siip,uint32_t sample_ct,uint32_t allow_dups,XidMode xid_mode,uint32_t use_nsort,char ** sorted_xidbox_ptr,uint32_t ** xid_map_ptr,uintptr_t * max_xid_blen_ptr)836 PglErr SortedXidboxInitAlloc(const uintptr_t* sample_include, const SampleIdInfo* siip, uint32_t sample_ct, uint32_t allow_dups, XidMode xid_mode, uint32_t use_nsort, char** sorted_xidbox_ptr, uint32_t** xid_map_ptr, uintptr_t* max_xid_blen_ptr) {
837 if (!(xid_mode & kfXidModeFlagSid)) {
838 // two fields
839 *max_xid_blen_ptr = siip->max_sample_id_blen;
840 return CopySortStrboxSubset(sample_include, siip->sample_ids, sample_ct, siip->max_sample_id_blen, allow_dups, 0, use_nsort, sorted_xidbox_ptr, xid_map_ptr);
841 }
842 // three fields
843 if (unlikely(AugidInitAlloc(sample_include, siip, sample_ct, xid_map_ptr, sorted_xidbox_ptr, max_xid_blen_ptr))) {
844 return kPglRetNomem;
845 }
846 if (unlikely(SortStrboxIndexed(sample_ct, *max_xid_blen_ptr, use_nsort, *sorted_xidbox_ptr, *xid_map_ptr))) {
847 return kPglRetNomem;
848 }
849 if (!allow_dups) {
850 char* dup_id = ScanForDuplicateIds(*sorted_xidbox_ptr, sample_ct, *max_xid_blen_ptr);
851 if (unlikely(dup_id)) {
852 char* tab_iter = AdvToDelim(dup_id, '\t');
853 *tab_iter = ' ';
854 tab_iter = AdvToDelim(&(tab_iter[1]), '\t');
855 *tab_iter = ' ';
856 logerrprintfww("Error: Duplicate ID '%s'.\n", dup_id);
857 return kPglRetMalformedInput;
858 }
859 }
860 return kPglRetSuccess;
861 }
862
XidRead(uintptr_t max_xid_blen,uint32_t comma_delim,XidMode xid_mode,const char ** read_pp,char * __restrict idbuf)863 uint32_t XidRead(uintptr_t max_xid_blen, uint32_t comma_delim, XidMode xid_mode, const char** read_pp, char* __restrict idbuf) {
864 // Saves normalized tokens pointed to by *read_pp to idbuf, and advances
865 // *read_pp.
866 //
867 // Input *read_pp must point to beginning of sample ID; this is a change from
868 // plink 1.9.
869 //
870 // *read_pp is now set to point to the end of the last parsed token instead
871 // of the beginning of the next; this is another change from plink 1.9.
872 //
873 // Returns 0 on missing token *or* guaranteed sample ID mismatch (longer than
874 // max_xid_blen); cases can be distinguished by checking whether *read_pp ==
875 // nullptr. Otherwise, returns slen of the parsed ID saved to idbuf. (idbuf
876 // is not null-terminated.)
877 //
878 // Alpha 2 behavior change: If there's no FID column, it's now set to 0
879 // instead of the IID value.
880 const char* first_token_start = *read_pp;
881 uintptr_t slen_fid = 0; // now zero if no FID column
882 uintptr_t blen_sid = 0;
883 const char* sid_ptr = nullptr;
884 const char* token_iter;
885 const char* iid_ptr;
886 uintptr_t slen_iid;
887 if (comma_delim) {
888 token_iter = first_token_start;
889 unsigned char ucc = *token_iter;
890 while (ucc != ',') {
891 if (ucc < 32) {
892 if (unlikely(!(xid_mode & kfXidModeFlagOneTokenOk))) {
893 *read_pp = nullptr;
894 return 0;
895 }
896 goto XidRead_comma_single_token;
897 }
898 ucc = *(++token_iter);
899 }
900 if (xid_mode & kfXidModeFlagNeverFid) {
901 XidRead_comma_single_token:
902 iid_ptr = first_token_start;
903 slen_iid = token_iter - first_token_start;
904 } else {
905 slen_fid = token_iter - first_token_start;
906 do {
907 ucc = *(++token_iter);
908 } while ((ucc == ' ') || (ucc == '\t'));
909 iid_ptr = token_iter;
910 while ((ucc >= 32) && (ucc != ',')) {
911 ucc = *(++token_iter);
912 }
913 slen_iid = token_iter - iid_ptr;
914 }
915 // token_iter now points to comma/eoln at end of IID
916 if (xid_mode & kfXidModeFlagSid) {
917 if (*token_iter != ',') {
918 return 0;
919 }
920 do {
921 ucc = *(++token_iter);
922 } while ((ucc == ' ') || (ucc == '\t'));
923 sid_ptr = token_iter;
924 while ((ucc >= 32) && (ucc != ',')) {
925 ucc = *(++token_iter);
926 }
927 blen_sid = 1 + S_CAST(uintptr_t, token_iter - sid_ptr);
928 if (token_iter == sid_ptr) {
929 // special case: treat missing SID as '0'
930 blen_sid = 2;
931 sid_ptr = &(g_one_char_strs[96]);
932 }
933 }
934 } else {
935 assert(!IsEolnKns(*first_token_start));
936 token_iter = CurTokenEnd(first_token_start);
937 if (xid_mode & kfXidModeFlagNeverFid) {
938 XidRead_space_single_token:
939 iid_ptr = first_token_start;
940 slen_iid = token_iter - first_token_start;
941 } else {
942 slen_fid = token_iter - first_token_start;
943 token_iter = FirstNonTspace(token_iter);
944 if (IsEolnKns(*token_iter)) {
945 if (unlikely(!(xid_mode & kfXidModeFlagOneTokenOk))) {
946 *read_pp = nullptr;
947 return 0;
948 }
949 // need to backtrack
950 token_iter = &(first_token_start[slen_fid]);
951 // bugfix (26 Feb 2018): forgot to zero this
952 slen_fid = 0;
953 goto XidRead_space_single_token;
954 }
955 iid_ptr = token_iter;
956 token_iter = CurTokenEnd(token_iter);
957 slen_iid = token_iter - iid_ptr;
958 }
959 // token_iter now points to space/eoln at end of IID
960 if (xid_mode & kfXidModeFlagSid) {
961 token_iter = FirstNonTspace(token_iter);
962 if (unlikely(IsEolnKns(*token_iter))) {
963 *read_pp = nullptr;
964 return 0;
965 }
966 sid_ptr = token_iter;
967 token_iter = CurTokenEnd(token_iter);
968 blen_sid = 1 + S_CAST(uintptr_t, token_iter - sid_ptr);
969 }
970 }
971 *read_pp = token_iter;
972 const uintptr_t slen_final = slen_fid + (slen_fid == 0) + slen_iid + blen_sid + 1;
973 if (slen_final >= max_xid_blen) {
974 // avoid buffer overflow
975 return 0;
976 }
977 char* idbuf_iter = idbuf;
978 if (slen_fid) {
979 idbuf_iter = memcpya(idbuf, first_token_start, slen_fid);
980 } else {
981 *idbuf_iter++ = '0';
982 }
983 *idbuf_iter++ = '\t';
984 idbuf_iter = memcpya(idbuf_iter, iid_ptr, slen_iid);
985 if (blen_sid) {
986 *idbuf_iter++ = '\t';
987 memcpy(idbuf_iter, sid_ptr, blen_sid - 1);
988 }
989 return slen_final;
990 }
991
SortedXidboxReadMultifind(const char * __restrict sorted_xidbox,uintptr_t max_xid_blen,uintptr_t xid_ct,uint32_t comma_delim,XidMode xid_mode,const char ** read_pp,uint32_t * __restrict xid_idx_start_ptr,uint32_t * __restrict xid_idx_end_ptr,char * __restrict idbuf)992 BoolErr SortedXidboxReadMultifind(const char* __restrict sorted_xidbox, uintptr_t max_xid_blen, uintptr_t xid_ct, uint32_t comma_delim, XidMode xid_mode, const char** read_pp, uint32_t* __restrict xid_idx_start_ptr, uint32_t* __restrict xid_idx_end_ptr, char* __restrict idbuf) {
993 // sorted_xidbox = packed, sorted list of ID strings to search over.
994 const uint32_t slen_final = XidRead(max_xid_blen, comma_delim, xid_mode, read_pp, idbuf);
995 if (!slen_final) {
996 return 1;
997 }
998 const uint32_t lb_idx = bsearch_str_lb(idbuf, sorted_xidbox, slen_final, max_xid_blen, xid_ct);
999 idbuf[slen_final] = ' ';
1000 const uint32_t ub_idx = ExpsearchStrLb(idbuf, sorted_xidbox, slen_final + 1, max_xid_blen, xid_ct, lb_idx);
1001 if (lb_idx == ub_idx) {
1002 return 1;
1003 }
1004 *xid_idx_start_ptr = lb_idx;
1005 *xid_idx_end_ptr = ub_idx;
1006 return 0;
1007 }
1008
LoadXidHeader(const char * flag_name,XidHeaderFlags xid_header_flags,uintptr_t * line_idx_ptr,TextStream * txsp,XidMode * xid_mode_ptr,char ** line_startp,char ** line_iterp)1009 PglErr LoadXidHeader(const char* flag_name, XidHeaderFlags xid_header_flags, uintptr_t* line_idx_ptr, TextStream* txsp, XidMode* xid_mode_ptr, char** line_startp, char** line_iterp) {
1010 // possible todo: support comma delimiter
1011 uintptr_t line_idx = *line_idx_ptr;
1012 char* line_iter;
1013 uint32_t is_header_line;
1014 do {
1015 ++line_idx;
1016 line_iter = TextGet(txsp);
1017 // eof may be ok
1018 if (!line_iter) {
1019 return TextStreamRawErrcode(txsp);
1020 }
1021 is_header_line = (line_iter[0] == '#');
1022 } while (is_header_line && (!tokequal_k(&(line_iter[1]), "FID")) && (!tokequal_k(&(line_iter[1]), "IID")));
1023 if (line_startp) {
1024 *line_startp = line_iter;
1025 }
1026 XidMode xid_mode = kfXidMode0;
1027 if (is_header_line) {
1028 // The following header leading columns are supported:
1029 // #FID IID
1030 // #FID IID SID (SID ignored if kfXidHeaderIgnoreSid set)
1031 // #IID
1032 // #IID SID
1033 char* linebuf_iter = &(line_iter[4]);
1034 if (line_iter[1] == 'I') {
1035 xid_mode = kfXidModeFlagNeverFid;
1036 } else {
1037 linebuf_iter = FirstNonTspace(linebuf_iter);
1038 if (unlikely(!tokequal_k(linebuf_iter, "IID"))) {
1039 logerrprintf("Error: No IID column on line %" PRIuPTR " of --%s file.\n", line_idx, flag_name);
1040 return kPglRetMalformedInput;
1041 }
1042 linebuf_iter = &(linebuf_iter[3]);
1043 }
1044 linebuf_iter = FirstNonTspace(linebuf_iter);
1045 if (tokequal_k(linebuf_iter, "SID")) {
1046 if (!(xid_header_flags & kfXidHeaderIgnoreSid)) {
1047 xid_mode |= kfXidModeFlagSid;
1048 }
1049 linebuf_iter = FirstNonTspace(&(linebuf_iter[3]));
1050 } else if (xid_mode == kfXidModeFlagNeverFid) {
1051 xid_mode = kfXidModeIid;
1052 }
1053 line_iter = linebuf_iter;
1054 } else {
1055 xid_mode = (xid_header_flags & kfXidHeaderFixedWidth)? kfXidModeFidIid : kfXidModeFidIidOrIid;
1056 }
1057 if (line_iterp) {
1058 *line_iterp = line_iter;
1059 }
1060 *line_idx_ptr = line_idx;
1061 *xid_mode_ptr = xid_mode;
1062 return kPglRetSuccess;
1063 }
1064
OpenAndLoadXidHeader(const char * fname,const char * flag_name,XidHeaderFlags xid_header_flags,uint32_t max_line_blen,TextStream * txsp,XidMode * xid_mode_ptr,uintptr_t * line_idx_ptr,char ** line_startp,char ** line_iterp)1065 PglErr OpenAndLoadXidHeader(const char* fname, const char* flag_name, XidHeaderFlags xid_header_flags, uint32_t max_line_blen, TextStream* txsp, XidMode* xid_mode_ptr, uintptr_t* line_idx_ptr, char** line_startp, char** line_iterp) {
1066 PglErr reterr = InitTextStream(fname, max_line_blen, 1, txsp);
1067 // remove unlikely() if any caller treats eof as ok
1068 if (unlikely(reterr)) {
1069 return reterr;
1070 }
1071 *line_idx_ptr = 0;
1072 return LoadXidHeader(flag_name, xid_header_flags, line_idx_ptr, txsp, xid_mode_ptr, line_startp, line_iterp);
1073 }
1074
LoadXidHeaderPair(const char * flag_name,uint32_t sid_over_fid,uintptr_t * line_idx_ptr,TextStream * txsp,XidMode * xid_mode_ptr,char ** line_startp,char ** line_iterp)1075 PglErr LoadXidHeaderPair(const char* flag_name, uint32_t sid_over_fid, uintptr_t* line_idx_ptr, TextStream* txsp, XidMode* xid_mode_ptr, char** line_startp, char** line_iterp) {
1076 uintptr_t line_idx = *line_idx_ptr;
1077 char* line_iter;
1078 uint32_t is_header_line;
1079 do {
1080 ++line_idx;
1081 line_iter = TextGet(txsp);
1082 // eof may be ok
1083 if (!line_iter) {
1084 return TextStreamRawErrcode(txsp);
1085 }
1086 is_header_line = (line_iter[0] == '#');
1087 } while (is_header_line && (!tokequal_k(&(line_iter[1]), "FID1")) && (!tokequal_k(&(line_iter[1]), "ID1")) && (!tokequal_k(&(line_iter[1]), "IID1")));
1088 if (line_startp) {
1089 *line_startp = line_iter;
1090 }
1091 XidMode xid_mode = kfXidMode0;
1092 if (is_header_line) {
1093 // The following header leading columns are supported:
1094 // #FID1 {I}ID1 FID2 {I}ID2
1095 // #FID1 {I}ID1 SID1 FID2 {I}ID2 SID2
1096 // #{I}ID1 {I}ID2
1097 // #{I}ID1 SID1 {I}ID2 SID2
1098 char* linebuf_iter = &(line_iter[4]);
1099 if (line_iter[1] == 'I') {
1100 xid_mode = kfXidModeFlagNeverFid;
1101 } else {
1102 linebuf_iter = FirstNonTspace(linebuf_iter);
1103 if (tokequal_k(linebuf_iter, "ID1")) {
1104 linebuf_iter = &(linebuf_iter[3]);
1105 } else if (likely(tokequal_k(linebuf_iter, "IID1"))) {
1106 linebuf_iter = &(linebuf_iter[4]);
1107 } else {
1108 logerrprintf("Error: No [I]ID1 column on line %" PRIuPTR " of --%s file.\n", line_idx, flag_name);
1109 return kPglRetMalformedInput;
1110 }
1111 }
1112 linebuf_iter = FirstNonTspace(linebuf_iter);
1113 if (tokequal_k(linebuf_iter, "SID1")) {
1114 xid_mode |= kfXidModeFlagSid;
1115 linebuf_iter = FirstNonTspace(&(linebuf_iter[4]));
1116 }
1117 // Now verify the expected ID2 token sequence follows.
1118 if (!(xid_mode & kfXidModeFlagNeverFid)) {
1119 if (unlikely(!tokequal_k(linebuf_iter, "FID2"))) {
1120 logerrprintf("Error: No FID2 column on line %" PRIuPTR " of --%s file.\n", line_idx, flag_name);
1121 return kPglRetMalformedInput;
1122 }
1123 linebuf_iter = FirstNonTspace(&(linebuf_iter[4]));
1124 }
1125 if (tokequal_k(linebuf_iter, "ID2")) {
1126 linebuf_iter = &(linebuf_iter[3]);
1127 } else if (likely(tokequal_k(linebuf_iter, "IID2"))) {
1128 linebuf_iter = &(linebuf_iter[4]);
1129 } else {
1130 logerrprintf("Error: No [I]ID2 column on line %" PRIuPTR " of --%s file.\n", line_idx, flag_name);
1131 return kPglRetMalformedInput;
1132 }
1133 if (xid_mode & kfXidModeFlagSid) {
1134 linebuf_iter = FirstNonTspace(linebuf_iter);
1135 if (unlikely(!tokequal_k(linebuf_iter, "SID2"))) {
1136 logerrprintf("Error: No SID2 column on line %" PRIuPTR " of --%s file.\n", line_idx, flag_name);
1137 return kPglRetMalformedInput;
1138 }
1139 linebuf_iter = &(linebuf_iter[4]);
1140 }
1141 line_iter = linebuf_iter;
1142 } else {
1143 // Assume the file contains nothing but ID pairs.
1144 const uint32_t token_ct = CountTokens(line_iter);
1145 if (token_ct == 2) {
1146 xid_mode = kfXidModeIid;
1147 } else if (token_ct == 4) {
1148 xid_mode = sid_over_fid? kfXidModeIidSid : kfXidModeFidIid;
1149 } else if (likely(token_ct == 6)) {
1150 xid_mode = kfXidModeFidIidSid;
1151 } else {
1152 logerrprintf("Error: Unexpected token count on line %" PRIuPTR " of --%s file.\n", line_idx, flag_name);
1153 return kPglRetMalformedInput;
1154 }
1155 }
1156 if (line_iter) {
1157 *line_iterp = line_iter;
1158 }
1159 *line_idx_ptr = line_idx;
1160 *xid_mode_ptr = xid_mode;
1161 return kPglRetSuccess;
1162 }
1163
1164
1165 const char g_xymt_log_names[kChrOffsetCt][5] = {"chrX", "chrY", "XY", "chrM", "PAR1", "PAR2"};
1166
1167 static_assert(!(kChrRawEnd % kBytesPerVec), "kChrRawEnd must be a multiple of kBytesPerVec.");
InitChrInfo(ChrInfo * cip)1168 PglErr InitChrInfo(ChrInfo* cip) {
1169 // "constructor". initializes with maximum capacity. doesn't use bigstack.
1170 // chr_mask, haploid_mask: bits
1171 // chr_file_order, chr_idx_to_foidx: uint32s
1172 // chr_fo_vidx_start: uint32s, with an extra trailing element
1173 // nonstd_names: intptr_ts
1174 // nonstd_id_htable: kChrHtableSize uint32s
1175
1176 // this assumes kChrRawEnd is divisible by kBytesPerVec
1177 const uintptr_t vecs_required = 2 * BitCtToVecCt(kChrRawEnd) + 3 * (kChrRawEnd / kInt32PerVec) + 1 + (kChrRawEnd / kWordsPerVec) + Int32CtToVecCt(kChrHtableSize);
1178
1179 // needed for proper cleanup
1180 cip->name_ct = 0;
1181 cip->incl_excl_name_stack = nullptr;
1182 if (unlikely(vecaligned_malloc(vecs_required * kBytesPerVec, &(cip->chr_mask)))) {
1183 return kPglRetNomem;
1184 }
1185 uintptr_t* alloc_iter = &(cip->chr_mask[BitCtToVecCt(kChrRawEnd) * kWordsPerVec]);
1186 cip->haploid_mask = alloc_iter;
1187 alloc_iter = &(alloc_iter[BitCtToVecCt(kChrRawEnd) * kWordsPerVec]);
1188 cip->chr_file_order = R_CAST(uint32_t*, alloc_iter);
1189 alloc_iter = &(alloc_iter[(kChrRawEnd / kInt32PerVec) * kWordsPerVec]);
1190 cip->chr_fo_vidx_start = R_CAST(uint32_t*, alloc_iter);
1191 alloc_iter = &(alloc_iter[((kChrRawEnd / kInt32PerVec) + 1) * kWordsPerVec]);
1192 cip->chr_idx_to_foidx = R_CAST(uint32_t*, alloc_iter);
1193 alloc_iter = &(alloc_iter[(kChrRawEnd / kInt32PerVec) * kWordsPerVec]);
1194 cip->nonstd_names = R_CAST(const char**, alloc_iter);
1195 alloc_iter = &(alloc_iter[kChrRawEnd]);
1196 cip->nonstd_id_htable = R_CAST(uint32_t*, alloc_iter);
1197 // alloc_iter = &(alloc_iter[((kChrHtableSize + (kInt32PerVec - 1)) / kInt32PerVec) * kWordsPerVec]);
1198 // SetAllU32Arr(kChrHtableSize, cip->nonstd_id_htable);
1199
1200 ZeroWArr(kChrMaskWords, cip->chr_mask);
1201 ZeroWArr(kChrExcludeWords, cip->chr_exclude);
1202
1203 // This is a change from plink 1.x. MT > M since the former matches Ensembl,
1204 // while the latter doesn't match any major resource. no "chr" to reduce
1205 // file sizes and reduce the impact of this change.
1206 cip->output_encoding = kfChrOutputMT;
1207
1208 cip->zero_extra_chrs = 0;
1209 cip->is_include_stack = 0;
1210 cip->chrset_source = kChrsetSourceDefault;
1211 cip->autosome_ct = 22;
1212 for (uint32_t xymt_idx = 0; xymt_idx != kChrOffsetCt; ++xymt_idx) {
1213 cip->xymt_codes[xymt_idx] = 23 + xymt_idx;
1214 }
1215 // Now includes MT again, as of alpha 2.
1216 cip->haploid_mask[0] = 0x5800000;
1217 ZeroWArr(kChrMaskWords - 1, &(cip->haploid_mask[1]));
1218 return kPglRetSuccess;
1219 }
1220
1221 // explicit plink 1.07 species (now initialized by command line parser):
1222 // human: 22, X, Y, XY, MT, PAR1, PAR2 (PAR1/PAR2 added, XY deprecated in plink
1223 // 2.0)
1224 // cow: 29, X, Y, MT
1225 // dog: 38, X, Y, XY, MT
1226 // horse: 31, X, Y
1227 // mouse: 19, X, Y
1228 // rice: 12
1229 // sheep: 26, X, Y
1230
1231 // must be safe to call this twice.
FinalizeChrset(MiscFlags misc_flags,ChrInfo * cip)1232 void FinalizeChrset(MiscFlags misc_flags, ChrInfo* cip) {
1233 uint32_t autosome_ct = cip->autosome_ct;
1234 uint32_t max_code = autosome_ct;
1235 for (uint32_t xymt_idx_p1 = kChrOffsetCt; xymt_idx_p1; --xymt_idx_p1) {
1236 if (!IsI32Neg(cip->xymt_codes[xymt_idx_p1 - 1])) {
1237 max_code = autosome_ct + xymt_idx_p1;
1238 break;
1239 }
1240 }
1241
1242 // could initialize haploid_mask bits (after the first) here, instead of
1243 // earlier...
1244
1245 cip->max_numeric_code = MINV(max_code, autosome_ct + 4);
1246 cip->max_code = max_code;
1247 uintptr_t* chr_mask = cip->chr_mask;
1248 uintptr_t last_chr_mask_word = chr_mask[kChrMaskWords - 1];
1249 STD_ARRAY_REF(uint32_t, kChrOffsetCt) xymt_codes = cip->xymt_codes;
1250 if (last_chr_mask_word) {
1251 // avoids repeating some work if this is called twice
1252 chr_mask[kChrMaskWords - 1] = 0;
1253
1254 uint32_t xymt_include = last_chr_mask_word >> (kBitsPerWord - kChrOffsetCt);
1255 do {
1256 const uint32_t xymt_idx = ctzu32(xymt_include);
1257 const uint32_t cur_chr_code = xymt_codes[xymt_idx];
1258 if (!IsI32Neg(cur_chr_code)) {
1259 SetBit(cur_chr_code, chr_mask);
1260 }
1261 xymt_include &= xymt_include - 1;
1262 } while (xymt_include);
1263 } else if (AllWordsAreZero(chr_mask, kChrExcludeWords) && (!cip->is_include_stack)) {
1264 // init_default_chr_mask()
1265 SetAllBits(cip->autosome_ct + 1, chr_mask);
1266 for (uint32_t xymt_idx = 0; xymt_idx != kChrOffsetCt; ++xymt_idx) {
1267 const uint32_t cur_chr_code = cip->xymt_codes[xymt_idx];
1268 if (!IsI32Neg(cur_chr_code)) {
1269 SetBit(cur_chr_code, chr_mask);
1270 }
1271 }
1272 } else if (misc_flags & (kfMiscAutosomePar | kfMiscAutosomeOnly)) {
1273 FillBitsNz(1, cip->autosome_ct + 1, chr_mask);
1274 ClearBitsNz(cip->autosome_ct + 1, kChrExcludeWords * kBitsPerWord, chr_mask);
1275 if (misc_flags & kfMiscAutosomePar) {
1276 uint32_t par_chr_code = cip->xymt_codes[kChrOffsetXY];
1277 if (!IsI32Neg(par_chr_code)) {
1278 SetBit(par_chr_code, chr_mask);
1279 }
1280 par_chr_code = cip->xymt_codes[kChrOffsetPAR1];
1281 if (!IsI32Neg(par_chr_code)) {
1282 SetBit(par_chr_code, chr_mask);
1283 }
1284 par_chr_code = cip->xymt_codes[kChrOffsetPAR2];
1285 if (!IsI32Neg(par_chr_code)) {
1286 SetBit(par_chr_code, chr_mask);
1287 }
1288 }
1289 }
1290
1291 uintptr_t* chr_exclude = cip->chr_exclude;
1292 uintptr_t last_chr_exclude_word = chr_exclude[kChrExcludeWords - 1];
1293 uint32_t xymt_exclude = last_chr_exclude_word >> (kBitsPerWord - kChrOffsetCt);
1294 last_chr_exclude_word = bzhi(last_chr_exclude_word, kBitsPerWord - kChrOffsetCt);
1295 for (uint32_t widx = 0; widx != kChrExcludeWords - 1; ++widx) {
1296 chr_mask[widx] &= ~chr_exclude[widx];
1297 }
1298 chr_mask[kChrExcludeWords - 1] &= ~last_chr_exclude_word;
1299 if (xymt_exclude) {
1300 do {
1301 const uint32_t xymt_idx = ctzu32(xymt_exclude);
1302 const uint32_t cur_chr_code = xymt_codes[xymt_idx];
1303 if (!IsI32Neg(cur_chr_code)) {
1304 ClearBit(cur_chr_code, chr_mask);
1305 }
1306 xymt_exclude &= xymt_exclude - 1;
1307 } while (xymt_exclude);
1308 }
1309 SetAllU32Arr(max_code + 1, cip->chr_idx_to_foidx);
1310 }
1311
ForgetExtraChrNames(uint32_t reinitialize,ChrInfo * cip)1312 void ForgetExtraChrNames(uint32_t reinitialize, ChrInfo* cip) {
1313 const uint32_t name_ct = cip->name_ct;
1314 if (name_ct) {
1315 const char** nonstd_names = cip->nonstd_names;
1316 const uint32_t chr_idx_end = cip->max_code + 1 + name_ct;
1317 for (uint32_t chr_idx = cip->max_code + 1; chr_idx != chr_idx_end; ++chr_idx) {
1318 free_const(nonstd_names[chr_idx]);
1319 nonstd_names[chr_idx] = nullptr;
1320 }
1321 if (reinitialize) {
1322 // SetAllU32Arr(kChrHtableSize, cip->nonstd_id_htable);
1323 cip->name_ct = 0;
1324 }
1325 }
1326 }
1327
1328 // not currently called. might want to do so in the future.
FinalizeChrInfo(ChrInfo * cip)1329 PglErr FinalizeChrInfo(ChrInfo* cip) {
1330 const uint32_t chr_ct = cip->chr_ct;
1331 const uint32_t name_ct = cip->name_ct;
1332 const uint32_t chr_code_end = cip->max_code + 1 + name_ct;
1333 const uint32_t chr_code_bitvec_ct = BitCtToVecCt(chr_code_end);
1334 const uint32_t chr_ct_int32vec_ct = Int32CtToVecCt(chr_ct);
1335 const uint32_t chr_ct_p1_int32vec_ct = 1 + (chr_ct / kInt32PerVec);
1336 const uint32_t chr_code_end_int32vec_ct = Int32CtToVecCt(chr_code_end);
1337 const uint32_t chr_code_end_wordvec_ct = WordCtToVecCt(chr_code_end);
1338 uint32_t final_vecs_required = 2 * chr_code_bitvec_ct + chr_ct_int32vec_ct + chr_ct_p1_int32vec_ct + chr_code_end_int32vec_ct;
1339 if (name_ct) {
1340 final_vecs_required += chr_code_end_wordvec_ct + Int32CtToVecCt(kChrHtableSize);
1341 }
1342 uintptr_t* new_alloc;
1343 if (unlikely(vecaligned_malloc(final_vecs_required * kBytesPerVec, &new_alloc))) {
1344 return kPglRetNomem;
1345 }
1346 uintptr_t* old_alloc = cip->chr_mask;
1347 uintptr_t* new_alloc_iter = new_alloc;
1348
1349 memcpy(new_alloc_iter, cip->chr_mask, chr_code_bitvec_ct * kBytesPerVec);
1350 cip->chr_mask = new_alloc_iter;
1351 new_alloc_iter = &(new_alloc_iter[chr_code_bitvec_ct * kWordsPerVec]);
1352
1353 memcpy(new_alloc_iter, cip->haploid_mask, chr_code_bitvec_ct * kBytesPerVec);
1354 cip->haploid_mask = new_alloc_iter;
1355 new_alloc_iter = &(new_alloc_iter[chr_code_bitvec_ct * kWordsPerVec]);
1356
1357 memcpy(new_alloc_iter, cip->chr_file_order, chr_ct_int32vec_ct * kBytesPerVec);
1358 cip->chr_file_order = R_CAST(uint32_t*, new_alloc_iter);
1359 new_alloc_iter = &(new_alloc_iter[chr_ct_int32vec_ct * kWordsPerVec]);
1360
1361 memcpy(new_alloc_iter, cip->chr_fo_vidx_start, chr_ct_p1_int32vec_ct * kBytesPerVec);
1362 cip->chr_fo_vidx_start = R_CAST(uint32_t*, new_alloc_iter);
1363 new_alloc_iter = &(new_alloc_iter[chr_ct_p1_int32vec_ct * kWordsPerVec]);
1364
1365 memcpy(new_alloc_iter, cip->chr_idx_to_foidx, chr_code_end_int32vec_ct * kBytesPerVec);
1366 cip->chr_idx_to_foidx = R_CAST(uint32_t*, new_alloc_iter);
1367
1368 if (!name_ct) {
1369 cip->nonstd_names = nullptr;
1370 cip->nonstd_id_htable = nullptr;
1371 } else {
1372 new_alloc_iter = &(new_alloc_iter[chr_code_end_int32vec_ct * kWordsPerVec]);
1373
1374 memcpy(new_alloc_iter, cip->nonstd_names, chr_code_end_wordvec_ct * kBytesPerVec);
1375 cip->nonstd_names = R_CAST(const char**, new_alloc_iter);
1376 new_alloc_iter = &(new_alloc_iter[chr_code_end_wordvec_ct * kWordsPerVec]);
1377
1378 memcpy(new_alloc_iter, cip->nonstd_id_htable, kChrHtableSize * sizeof(int32_t));
1379 cip->nonstd_id_htable = R_CAST(uint32_t*, new_alloc_iter);
1380 }
1381 vecaligned_free(old_alloc);
1382 return kPglRetSuccess;
1383 }
1384
CleanupChrInfo(ChrInfo * cip)1385 void CleanupChrInfo(ChrInfo* cip) {
1386 if (cip->chr_mask) {
1387 ForgetExtraChrNames(0, cip);
1388 vecaligned_free(cip->chr_mask);
1389 cip->chr_mask = nullptr;
1390 }
1391 LlStr* llstr_ptr = cip->incl_excl_name_stack;
1392 while (llstr_ptr) {
1393 LlStr* next_ptr = llstr_ptr->next;
1394 free(llstr_ptr);
1395 llstr_ptr = next_ptr;
1396 }
1397 cip->incl_excl_name_stack = nullptr;
1398 }
1399
ChrNameStd(const ChrInfo * cip,uint32_t chr_idx,char * buf)1400 char* ChrNameStd(const ChrInfo* cip, uint32_t chr_idx, char* buf) {
1401 const uint32_t output_encoding = cip->output_encoding;
1402 if (chr_idx > cip->max_numeric_code) {
1403 // This is usually encoding-independent; no real numeric representation of
1404 // PAR1/PAR2 is defined. However, since there'd otherwise be no way to
1405 // include the pseudoautosomal regions in e.g. ADMIXTURE, we render them
1406 // them as 25 (in the human case) when "--output-chr 26" is specified.
1407 if (output_encoding) {
1408 const uint32_t parx = 'P' + ('A' << 8) + ('R' << 16) + ('0' << 24) + ((chr_idx - cip->max_numeric_code) << 24);
1409 return memcpya(buf, &parx, 4);
1410 }
1411 return u32toa(cip->autosome_ct + (kChrOffsetXY + 1), buf);
1412 }
1413 if (output_encoding & (kfChrOutputPrefix | kfChrOutput0M)) {
1414 if (output_encoding == kfChrOutput0M) {
1415 // force two chars
1416 if (chr_idx <= cip->autosome_ct) {
1417 buf = memcpya_k(buf, &(kDigitPair[chr_idx]), 2);
1418 } else if (chr_idx == cip->xymt_codes[kChrOffsetY]) {
1419 buf = strcpya_k(buf, "XY");
1420 } else {
1421 *buf++ = '0';
1422 if (chr_idx == cip->xymt_codes[kChrOffsetX]) {
1423 *buf++ = 'X';
1424 } else {
1425 // assumes only X/Y/XY/MT defined
1426 *buf++ = (chr_idx == cip->xymt_codes[kChrOffsetY])? 'Y' : 'M';
1427 }
1428 }
1429 return buf;
1430 }
1431 buf = strcpya_k(buf, "chr");
1432 }
1433 if ((!(output_encoding & (kfChrOutputM | kfChrOutputMT))) || (chr_idx <= cip->autosome_ct)) {
1434 return u32toa(chr_idx, buf);
1435 }
1436 if (chr_idx == cip->xymt_codes[kChrOffsetX]) {
1437 *buf++ = 'X';
1438 } else if (chr_idx == cip->xymt_codes[kChrOffsetY]) {
1439 *buf++ = 'Y';
1440 } else if (chr_idx == cip->xymt_codes[kChrOffsetXY]) {
1441 buf = strcpya_k(buf, "XY");
1442 } else {
1443 *buf++ = 'M';
1444 if (output_encoding & kfChrOutputMT) {
1445 *buf++ = 'T';
1446 }
1447 }
1448 return buf;
1449 }
1450
chrtoa(const ChrInfo * cip,uint32_t chr_idx,char * buf)1451 char* chrtoa(const ChrInfo* cip, uint32_t chr_idx, char* buf) {
1452 // assumes chr_idx is valid
1453 if (!chr_idx) {
1454 // Note that this never has a 'chr' prefix. Which is probably fine, it
1455 // isn't a real chromosome.
1456 *buf++ = '0';
1457 return buf;
1458 }
1459 if (chr_idx <= cip->max_code) {
1460 return ChrNameStd(cip, chr_idx, buf);
1461 }
1462 if (cip->zero_extra_chrs) {
1463 *buf++ = '0';
1464 return buf;
1465 }
1466 return strcpya(buf, cip->nonstd_names[chr_idx]);
1467 }
1468
GetMaxChrSlen(const ChrInfo * cip)1469 uint32_t GetMaxChrSlen(const ChrInfo* cip) {
1470 // does not include trailing null
1471 // can be overestimate
1472 // if more functions start calling this, it should just be built into
1473 // load_bim() instead
1474 if (cip->zero_extra_chrs) {
1475 return 3 + kMaxChrTextnum;
1476 }
1477 const uint32_t chr_ct = cip->chr_ct;
1478 const uint32_t max_code = cip->max_code;
1479 uint32_t max_chr_slen = 3 + kMaxChrTextnum;
1480 for (uint32_t chr_fo_idx = 0; chr_fo_idx != chr_ct; ++chr_fo_idx) {
1481 const uint32_t chr_idx = cip->chr_file_order[chr_fo_idx];
1482 if (!IsSet(cip->chr_mask, chr_idx)) {
1483 continue;
1484 }
1485 if (chr_idx > max_code) {
1486 const uint32_t name_slen = strlen(cip->nonstd_names[chr_idx]);
1487 if (name_slen > max_chr_slen) {
1488 max_chr_slen = name_slen;
1489 }
1490 }
1491 }
1492 return max_chr_slen;
1493 }
1494
IsHaploidChrPresent(const ChrInfo * cip)1495 uint32_t IsHaploidChrPresent(const ChrInfo* cip) {
1496 const uintptr_t* chr_mask = cip->chr_mask;
1497 const uintptr_t* haploid_mask = cip->haploid_mask;
1498 // since we don't load haploid vs. diploid info from ##contig header lines,
1499 // this is sufficient
1500 for (uint32_t widx = 0; widx != kChrExcludeWords; ++widx) {
1501 if (chr_mask[widx] & haploid_mask[widx]) {
1502 return 1;
1503 }
1504 }
1505 return 0;
1506 }
1507
IsAutosomalDiploidChrPresent(const ChrInfo * cip)1508 uint32_t IsAutosomalDiploidChrPresent(const ChrInfo* cip) {
1509 const uintptr_t* haploid_mask = cip->haploid_mask;
1510 if (haploid_mask[0] & 1) {
1511 return 0;
1512 }
1513 const uint32_t max_code_p1 = cip->max_code + 1;
1514 const uint32_t name_ct = cip->name_ct;
1515 const uint32_t chr_code_end = max_code_p1 + name_ct;
1516 const uint32_t word_ct = BitCtToWordCt(chr_code_end);
1517 const uintptr_t* chr_mask = cip->chr_mask;
1518 for (uint32_t widx = 0; widx != word_ct; ++widx) {
1519 if (chr_mask[widx] & (~haploid_mask[widx])) {
1520 return 1;
1521 }
1522 }
1523 return 0;
1524 }
1525
SingleCapLetterChrCode(uint32_t cap_letter)1526 static inline uint32_t SingleCapLetterChrCode(uint32_t cap_letter) {
1527 if (cap_letter == 'X') {
1528 return kChrRawX;
1529 }
1530 if (cap_letter == 'Y') {
1531 return kChrRawY;
1532 }
1533 if (cap_letter == 'M') {
1534 return kChrRawMT;
1535 }
1536 return UINT32_MAX;
1537 }
1538
1539 static_assert(kMaxChrTextnumSlen == 2, "GetChrCodeRaw() must be updated.");
GetChrCodeRaw(const char * str_iter)1540 uint32_t GetChrCodeRaw(const char* str_iter) {
1541 // any character <= ' ' is considered a terminator
1542 // note that char arithmetic tends to be compiled to uint32 operations, so we
1543 // mostly work with ints here
1544 uint32_t first_char_code = ctou32(str_iter[0]);
1545 uint32_t first_char_toi;
1546 if (first_char_code < 58) {
1547 GetChrCodeRaw_digits:
1548 first_char_toi = first_char_code - '0';
1549 if (first_char_toi < 10) {
1550 const uint32_t second_char_code = ctou32(str_iter[1]);
1551 if (second_char_code <= ' ') {
1552 return first_char_toi;
1553 }
1554 if (ctou32(str_iter[2]) <= ' ') {
1555 const uint32_t second_char_toi = second_char_code - '0';
1556 if (second_char_toi < 10) {
1557 return first_char_toi * 10 + second_char_toi;
1558 }
1559 if (!first_char_toi) {
1560 // accept '0X', '0Y', '0M' emitted by Oxford software
1561 return SingleCapLetterChrCode(second_char_code & 0xdf);
1562 }
1563 }
1564 }
1565 return UINT32_MAX;
1566 }
1567 first_char_code &= 0xdf;
1568 uint32_t second_char_code = ctou32(str_iter[1]);
1569 if (first_char_code == 'P') {
1570 // chrPAR1 *not* supported; has to be PAR1 by itself.
1571 // can't do uint16_t compare of multiple characters, since we could be
1572 // dealing with a length-1 null-terminated string; that IS faster when it's
1573 // safe, though
1574 if (((second_char_code & 0xdf) == 'A') && ((ctou32(str_iter[2]) & 0xdf) == 'R')) {
1575 const uint32_t par_idx_m1 = ctou32(str_iter[3]) - '1';
1576 if ((par_idx_m1 < 2) && (ctou32(str_iter[4]) <= ' ')) {
1577 return kChrRawPAR1 + par_idx_m1;
1578 }
1579 }
1580 return UINT32_MAX;
1581 }
1582 if (first_char_code == 'C') {
1583 if (((second_char_code & 0xdf) != 'H') || ((ctou32(str_iter[2]) & 0xdf) != 'R')) {
1584 return UINT32_MAX;
1585 }
1586 str_iter = &(str_iter[3]);
1587 first_char_code = ctou32(str_iter[0]);
1588 if (first_char_code < 58) {
1589 goto GetChrCodeRaw_digits;
1590 }
1591 first_char_code &= 0xdf;
1592 second_char_code = ctou32(str_iter[1]);
1593 }
1594 if (second_char_code <= ' ') {
1595 return SingleCapLetterChrCode(first_char_code);
1596 }
1597 if (ctou32(str_iter[2]) <= ' ') {
1598 second_char_code &= 0xdf;
1599 if ((first_char_code == 'X') && (second_char_code == 'Y')) {
1600 return kChrRawXY;
1601 } else if ((first_char_code == 'M') && (second_char_code == 'T')) {
1602 return kChrRawMT;
1603 }
1604 }
1605 return UINT32_MAX;
1606 }
1607
GetChrCode(const char * chr_name,const ChrInfo * cip,uint32_t name_slen)1608 uint32_t GetChrCode(const char* chr_name, const ChrInfo* cip, uint32_t name_slen) {
1609 // requires chr_name to be null-terminated
1610 // in practice, name_slen will usually already be known, may as well avoid
1611 // redundant strlen() calls even though this uglifies the interface
1612 // does not perform exhaustive error-checking
1613 // UINT32_MAX = --allow-extra-chr ok, UINT32_MAXM1 = total fail
1614 uint32_t chr_code_raw = GetChrCodeRaw(chr_name);
1615 if (chr_code_raw <= cip->max_code) {
1616 return chr_code_raw;
1617 }
1618 if (chr_code_raw != UINT32_MAX) {
1619 if (chr_code_raw >= kMaxContigs) {
1620 return cip->xymt_codes[chr_code_raw - kMaxContigs];
1621 }
1622 return UINT32_MAXM1;
1623 }
1624 if (!cip->name_ct) {
1625 return UINT32_MAX;
1626 }
1627 // note that IdHtableFind returns UINT32_MAX if name not found
1628 // can't overread, nonstd_names not in main workspace
1629 return IdHtableFind(chr_name, cip->nonstd_names, cip->nonstd_id_htable, name_slen, kChrHtableSize);
1630 }
1631
GetChrCodeCounted(const ChrInfo * cip,uint32_t name_slen,char * chr_name)1632 uint32_t GetChrCodeCounted(const ChrInfo* cip, uint32_t name_slen, char* chr_name) {
1633 // When the chromosome name isn't null-terminated.
1634 // Yeah, probably want to revise this so that chr_name doesn't need to be
1635 // mutable here. However, that currently requires new substitutes for both
1636 // GetChrCodeRaw AND IdHtableFind (IdHtableFindNnt doesn't work due to
1637 // overread); when either of those are needed for some other reason, that's a
1638 // good time to revise this function.
1639 char* s_end = &(chr_name[name_slen]);
1640 const char tmpc = *s_end;
1641 *s_end = '\0';
1642 const uint32_t chr_code = GetChrCode(chr_name, cip, name_slen);
1643 *s_end = tmpc;
1644 return chr_code;
1645 }
1646
ChrError(const char * chr_name,const char * file_descrip,const ChrInfo * cip,uintptr_t line_idx,uint32_t error_code)1647 void ChrError(const char* chr_name, const char* file_descrip, const ChrInfo* cip, uintptr_t line_idx, uint32_t error_code) {
1648 // assumes chr_name is null-terminated
1649 const uint32_t raw_code = GetChrCodeRaw(chr_name);
1650 logputs("\n");
1651 if (line_idx) {
1652 logerrprintfww("Error: Invalid chromosome code '%s' on line %" PRIuPTR " of %s.\n", chr_name, line_idx, file_descrip);
1653 } else {
1654 logerrprintfww("Error: Invalid chromosome code '%s' in %s.\n", chr_name, file_descrip);
1655 }
1656 if ((S_CAST(int32_t, raw_code) > S_CAST(int32_t, cip->max_code)) && ((raw_code <= kMaxChrTextnum + kChrOffsetCt) || (raw_code >= kMaxContigs))) {
1657 // numeric code or X/Y/MT/PAR
1658 if (cip->chrset_source == kChrsetSourceDefault) {
1659 logerrputs("(This is disallowed for humans. Check if the problem is with your data, or if\nyou forgot to define a different chromosome set with e.g. --chr-set.).\n");
1660 } else if (cip->chrset_source == kChrsetSourceCmdline) {
1661 logerrputs("(This is disallowed by your command-line flags.)\n");
1662 } else {
1663 // kChrsetSourceFile
1664 logerrputs("(This is disallowed by the file's own ##chrSet header line.)\n");
1665 }
1666 // maybe want to print message(s) depending on whether chromosome set was
1667 // defined on the command line or by the input file?
1668 } else if (error_code == UINT32_MAX) {
1669 logerrputs("(Use --allow-extra-chr to force it to be accepted.)\n");
1670 }
1671 }
1672
TryToAddChrName(const char * chr_name,const char * file_descrip,uintptr_t line_idx,uint32_t name_slen,uint32_t allow_extra_chrs,uint32_t * chr_idx_ptr,ChrInfo * cip)1673 PglErr TryToAddChrName(const char* chr_name, const char* file_descrip, uintptr_t line_idx, uint32_t name_slen, uint32_t allow_extra_chrs, uint32_t* chr_idx_ptr, ChrInfo* cip) {
1674 // assumes chr_name is either nonstandard (i.e. not "2", "chr2", "chrX",
1675 // etc.), or a rejected xymt.
1676 // requires chr_name to be null-terminated
1677 // assumes chr_idx currently has the return value of GetChrCode()
1678 if (unlikely((!allow_extra_chrs) || ((*chr_idx_ptr) == UINT32_MAXM1))) {
1679 ChrError(chr_name, file_descrip, cip, line_idx, *chr_idx_ptr);
1680 return kPglRetMalformedInput;
1681 }
1682
1683 // quasi-bugfix: remove redundant hash table check
1684
1685 if (unlikely(chr_name[0] == '#')) {
1686 // redundant with some of the comment-skipping loaders, but this isn't
1687 // performance-critical
1688 logputs("\n");
1689 logerrputs("Error: Chromosome/contig names may not begin with '#'.\n");
1690 return kPglRetMalformedInput;
1691 }
1692 if (unlikely(name_slen > kMaxIdSlen)) {
1693 logputs("\n");
1694 if (line_idx) {
1695 logerrprintfww("Error: Line %" PRIuPTR " of %s has an excessively long chromosome/contig name. (The " PROG_NAME_STR " limit is " MAX_ID_SLEN_STR " characters.)\n", line_idx, file_descrip);
1696 } else {
1697 logerrprintfww("Error: Excessively long chromosome/contig name in %s. (The " PROG_NAME_STR " limit is " MAX_ID_SLEN_STR " characters.)\n", file_descrip);
1698 }
1699 return kPglRetMalformedInput;
1700 }
1701 const uint32_t max_code_p1 = cip->max_code + 1;
1702 const uint32_t name_ct = cip->name_ct;
1703 const uint32_t chr_code_end = max_code_p1 + name_ct;
1704 if (unlikely(chr_code_end == kMaxContigs)) {
1705 logputs("\n");
1706 logerrputs("Error: Too many distinct nonstandard chromosome/contig names.\n");
1707 return kPglRetMalformedInput;
1708 }
1709 if (!name_ct) {
1710 // lazy initialization
1711 SetAllU32Arr(kChrHtableSize, cip->nonstd_id_htable);
1712 }
1713 char* new_nonstd_name;
1714 if (unlikely(pgl_malloc(name_slen + 1, &new_nonstd_name))) {
1715 return kPglRetNomem;
1716 }
1717 LlStr* name_stack_ptr = cip->incl_excl_name_stack;
1718 uint32_t in_name_stack = 0;
1719 while (name_stack_ptr) {
1720 // there shouldn't be many of these, so sorting is unimportant
1721 if (!strcmp(chr_name, name_stack_ptr->str)) {
1722 in_name_stack = 1;
1723 break;
1724 }
1725 name_stack_ptr = name_stack_ptr->next;
1726 }
1727 if ((in_name_stack && cip->is_include_stack) || ((!in_name_stack) && (!cip->is_include_stack))) {
1728 SetBit(chr_code_end, cip->chr_mask);
1729 if (cip->haploid_mask[0] & 1) {
1730 SetBit(chr_code_end, cip->haploid_mask);
1731 }
1732 }
1733 memcpy(new_nonstd_name, chr_name, name_slen + 1);
1734 cip->nonstd_names[chr_code_end] = new_nonstd_name;
1735 *chr_idx_ptr = chr_code_end;
1736 cip->name_ct = name_ct + 1;
1737 uint32_t* id_htable = cip->nonstd_id_htable;
1738 for (uint32_t hashval = Hashceil(chr_name, name_slen, kChrHtableSize); ; ) {
1739 if (id_htable[hashval] == UINT32_MAX) {
1740 id_htable[hashval] = chr_code_end;
1741 return kPglRetSuccess;
1742 }
1743 if (++hashval == kChrHtableSize) {
1744 hashval = 0;
1745 }
1746 }
1747 }
1748
1749
1750 /*
1751 uintptr_t count_11_vecs(const VecW* geno_vvec, uintptr_t vec_ct) {
1752 // Counts number of aligned 11s in vptr[0..(vec_ct-1)]. Assumes vec_ct is a
1753 // multiple of 6 (0 ok).
1754 assert(!(vec_ct % 6));
1755 const VecW m1 = VCONST_W(kMask5555);
1756 const VecW m2 = VCONST_W(kMask3333);
1757 const VecW m4 = VCONST_W(kMask0F0F);
1758 const VecW m8 = VCONST_W(kMask00FF);
1759 const VecW* geno_vvec_iter = geno_vvec;
1760 const VecW* geno_vvec_end = &(geno_vvec[vec_ct]);
1761 uintptr_t tot = 0;
1762
1763 while (1) {
1764 const VecW* geno_vvec_stop = &(geno_vvec_iter[60]);
1765
1766 UniVec acc;
1767 acc.vw = vecw_setzero();
1768
1769 if (geno_vvec_stop > geno_vvec_end) {
1770 if (geno_vvec_iter == geno_vvec_end) {
1771 return tot;
1772 }
1773 geno_vvec_stop = geno_vvec_end;
1774 }
1775 do {
1776 VecW cur_geno_vword = *geno_vvec_iter++;
1777 VecW count1 = cur_geno_vword & m1;
1778 count1 = count1 & vecw_srli(cur_geno_vword, 1);
1779
1780 cur_geno_vword = *geno_vvec_iter++;
1781 VecW cur_11 = cur_geno_vword & m1;
1782 cur_11 = cur_11 & vecw_srli(cur_geno_vword, 1);
1783 count1 = count1 + cur_11;
1784
1785 cur_geno_vword = *geno_vvec_iter++;
1786 cur_11 = cur_geno_vword & m1;
1787 cur_11 = cur_11 & vecw_srli(cur_geno_vword, 1);
1788 count1 = count1 + cur_11;
1789 count1 = (count1 & m2) + (vecw_srli(count1, 2) & m2);
1790
1791 cur_geno_vword = *geno_vvec_iter++;
1792 VecW count2 = cur_geno_vword & m1;
1793 count2 = count2 & vecw_srli(cur_geno_vword, 1);
1794
1795 cur_geno_vword = *geno_vvec_iter++;
1796 VecW cur_11 = cur_geno_vword & m1;
1797 cur_11 = cur_11 & vecw_srli(cur_geno_vword, 1);
1798 count2 = count2 + cur_11;
1799
1800 cur_geno_vword = *geno_vvec_iter++;
1801 cur_11 = cur_geno_vword & m1;
1802 cur_11 = cur_11 & vecw_srli(cur_geno_vword, 1);
1803 count2 = count2 + cur_11;
1804 count1 = count1 + (count2 & m2) + (vecw_srli(count2, 2) & m2);
1805
1806 acc.vw = acc.vw + (count1 & m4) + (vecw_srli(count1, 4) & m4);
1807 } while (geno_vvec_iter < geno_vvec_stop);
1808 acc.vw = (acc.vw & m8) + (vecw_srli(acc.vw, 8) & m8);
1809 tot += UniVecHsum16(acc);
1810 }
1811 }
1812
1813 uintptr_t count_11_longs(const uintptr_t* genovec, uintptr_t word_ct) {
1814 uintptr_t tot = 0;
1815 if (word_ct >= (6 * kWordsPerVec)) {
1816 assert(VecIsAligned(genovec));
1817 const uintptr_t remainder = word_ct % (6 * kWordsPerVec);
1818 const uintptr_t main_block_word_ct = word_ct - remainder;
1819 tot = count_11_vecs((const VecW*)genovec, main_block_word_ct / kWordsPerVec);
1820 word_ct = remainder;
1821 genovec = &(genovec[main_block_word_ct]);
1822 }
1823 for (uintptr_t trailing_word_idx = 0; trailing_word_idx != word_ct; ++trailing_word_idx) {
1824 const uintptr_t cur_geno_word = genovec[trailing_word_idx];
1825 tot += Popcount01Word(Word11(cur_geno_word));
1826 }
1827 }
1828 */
1829
AllGenoEqual(const uintptr_t * genoarr,uint32_t sample_ct)1830 uint32_t AllGenoEqual(const uintptr_t* genoarr, uint32_t sample_ct) {
1831 const uint32_t word_ct_m1 = (sample_ct - 1) / kBitsPerWordD2;
1832 const uintptr_t match_word = (genoarr[0] & 3) * kMask5555;
1833 for (uint32_t widx = 0; widx != word_ct_m1; ++widx) {
1834 if (genoarr[widx] != match_word) {
1835 return 0;
1836 }
1837 }
1838 const uint32_t remainder = ModNz(sample_ct, kBitsPerWordD2);
1839 return !bzhi_max(genoarr[word_ct_m1] ^ match_word, 2 * remainder);
1840 }
1841
InterleavedMaskZero(const uintptr_t * __restrict interleaved_mask,uintptr_t vec_ct,uintptr_t * __restrict genovec)1842 void InterleavedMaskZero(const uintptr_t* __restrict interleaved_mask, uintptr_t vec_ct, uintptr_t* __restrict genovec) {
1843 const uintptr_t twovec_ct = vec_ct / 2;
1844 #ifdef __LP64__
1845 const VecW m1 = VCONST_W(kMask5555);
1846 const VecW* interleaved_mask_iter = R_CAST(const VecW*, interleaved_mask);
1847 VecW* genovvec_iter = R_CAST(VecW*, genovec);
1848 for (uintptr_t twovec_idx = 0; twovec_idx != twovec_ct; ++twovec_idx) {
1849 const VecW mask_vvec = *interleaved_mask_iter++;
1850 VecW mask_first = mask_vvec & m1;
1851 mask_first = mask_first | vecw_slli(mask_first, 1);
1852 VecW mask_second = vecw_and_notfirst(m1, mask_vvec);
1853 mask_second = mask_second | vecw_srli(mask_second, 1);
1854 *genovvec_iter = (*genovvec_iter) & mask_first;
1855 ++genovvec_iter;
1856 *genovvec_iter = (*genovvec_iter) & mask_second;
1857 ++genovvec_iter;
1858 }
1859 if (vec_ct & 1) {
1860 VecW mask_first = *interleaved_mask_iter;
1861 mask_first = mask_first | vecw_slli(mask_first, 1);
1862 *genovvec_iter = (*genovvec_iter) & mask_first;
1863 }
1864 #else
1865 const uintptr_t* interleaved_mask_iter = interleaved_mask;
1866 uintptr_t* genovec_iter = genovec;
1867 for (uintptr_t twovec_idx = 0; twovec_idx != twovec_ct; ++twovec_idx) {
1868 const uintptr_t mask_word = *interleaved_mask_iter++;
1869 *genovec_iter &= (mask_word & kMask5555) * 3;
1870 ++genovec_iter;
1871 *genovec_iter &= ((mask_word >> 1) & kMask5555) * 3;
1872 ++genovec_iter;
1873 }
1874 if (vec_ct & 1) {
1875 const uintptr_t mask_word = *interleaved_mask_iter;
1876 *genovec_iter &= mask_word * 3;
1877 }
1878 #endif
1879 }
1880
InterleavedMaskMissing(const uintptr_t * __restrict interleaved_mask,uintptr_t vec_ct,uintptr_t * __restrict genovec)1881 void InterleavedMaskMissing(const uintptr_t* __restrict interleaved_mask, uintptr_t vec_ct, uintptr_t* __restrict genovec) {
1882 const uintptr_t twovec_ct = vec_ct / 2;
1883 #ifdef __LP64__
1884 const VecW m1 = VCONST_W(kMask5555);
1885 const VecW inv_m1 = VCONST_W(kMaskAAAA);
1886 const VecW* interleaved_mask_iter = R_CAST(const VecW*, interleaved_mask);
1887 VecW* genovvec_iter = R_CAST(VecW*, genovec);
1888 for (uintptr_t twovec_idx = 0; twovec_idx != twovec_ct; ++twovec_idx) {
1889 const VecW mask_vvec = *interleaved_mask_iter++;
1890 VecW set_first = vecw_and_notfirst(mask_vvec, m1);
1891 set_first = set_first | vecw_slli(set_first, 1);
1892 VecW set_second = vecw_and_notfirst(mask_vvec, inv_m1);
1893 set_second = set_second | vecw_srli(set_second, 1);
1894 *genovvec_iter = (*genovvec_iter) | set_first;
1895 ++genovvec_iter;
1896 *genovvec_iter = (*genovvec_iter) | set_second;
1897 ++genovvec_iter;
1898 }
1899 if (vec_ct & 1) {
1900 VecW set_first = vecw_and_notfirst(*interleaved_mask_iter, m1);
1901 set_first = set_first | vecw_slli(set_first, 1);
1902 *genovvec_iter = (*genovvec_iter) | set_first;
1903 }
1904 #else
1905 const uintptr_t* interleaved_mask_iter = interleaved_mask;
1906 uintptr_t* genovec_iter = genovec;
1907 for (uintptr_t twovec_idx = 0; twovec_idx != twovec_ct; ++twovec_idx) {
1908 const uintptr_t set_invword = *interleaved_mask_iter++;
1909 *genovec_iter |= ((~set_invword) & kMask5555) * 3;
1910 ++genovec_iter;
1911 *genovec_iter |= (((~set_invword) >> 1) & kMask5555) * 3;
1912 ++genovec_iter;
1913 }
1914 if (vec_ct & 1) {
1915 const uintptr_t set_invword = *interleaved_mask_iter;
1916 *genovec_iter |= ((~set_invword) & kMask5555) * 3;
1917 }
1918 #endif
1919 }
1920
InterleavedSetMissing(const uintptr_t * __restrict interleaved_set,uintptr_t vec_ct,uintptr_t * __restrict genovec)1921 void InterleavedSetMissing(const uintptr_t* __restrict interleaved_set, uintptr_t vec_ct, uintptr_t* __restrict genovec) {
1922 const uintptr_t twovec_ct = vec_ct / 2;
1923 #ifdef __LP64__
1924 const VecW m1 = VCONST_W(kMask5555);
1925 const VecW* interleaved_set_iter = R_CAST(const VecW*, interleaved_set);
1926 VecW* genovvec_iter = R_CAST(VecW*, genovec);
1927 for (uintptr_t twovec_idx = 0; twovec_idx != twovec_ct; ++twovec_idx) {
1928 const VecW set_vvec = *interleaved_set_iter++;
1929 VecW set_first = set_vvec & m1;
1930 set_first = set_first | vecw_slli(set_first, 1);
1931 VecW set_second = vecw_and_notfirst(m1, set_vvec);
1932 set_second = set_second | vecw_srli(set_second, 1);
1933 *genovvec_iter = (*genovvec_iter) | set_first;
1934 ++genovvec_iter;
1935 *genovvec_iter = (*genovvec_iter) | set_second;
1936 ++genovvec_iter;
1937 }
1938 if (vec_ct & 1) {
1939 VecW set_first = *interleaved_set_iter;
1940 set_first = set_first | vecw_slli(set_first, 1);
1941 *genovvec_iter = (*genovvec_iter) | set_first;
1942 }
1943 #else
1944 const uintptr_t* interleaved_set_iter = interleaved_set;
1945 uintptr_t* genovec_iter = genovec;
1946 for (uintptr_t twovec_idx = 0; twovec_idx != twovec_ct; ++twovec_idx) {
1947 const uintptr_t set_word = *interleaved_set_iter++;
1948 *genovec_iter |= (set_word & kMask5555) * 3;
1949 ++genovec_iter;
1950 *genovec_iter |= ((set_word >> 1) & kMask5555) * 3;
1951 ++genovec_iter;
1952 }
1953 if (vec_ct & 1) {
1954 const uintptr_t set_word = *interleaved_set_iter;
1955 *genovec_iter |= set_word * 3;
1956 }
1957 #endif
1958 }
1959
InterleavedSetMissingCleardosage(const uintptr_t * __restrict orig_set,const uintptr_t * __restrict interleaved_set,uintptr_t vec_ct,uintptr_t * __restrict genovec,uint32_t * __restrict write_dosage_ct_ptr,uintptr_t * __restrict dosagepresent,Dosage * dosage_main)1960 void InterleavedSetMissingCleardosage(const uintptr_t* __restrict orig_set, const uintptr_t* __restrict interleaved_set, uintptr_t vec_ct, uintptr_t* __restrict genovec, uint32_t* __restrict write_dosage_ct_ptr, uintptr_t* __restrict dosagepresent, Dosage* dosage_main) {
1961 const uint32_t orig_write_dosage_ct = *write_dosage_ct_ptr;
1962 if (orig_write_dosage_ct) {
1963 uintptr_t sample_widx = 0;
1964 uintptr_t cur_bits = dosagepresent[0];
1965 for (uint32_t dosage_read_idx = 0; dosage_read_idx != orig_write_dosage_ct; ++dosage_read_idx) {
1966 const uintptr_t lowbit = BitIter1y(dosagepresent, &sample_widx, &cur_bits);
1967 if (orig_set[sample_widx] & lowbit) {
1968 dosagepresent[sample_widx] ^= lowbit;
1969 uint32_t dosage_write_idx = dosage_read_idx++;
1970 for (; dosage_read_idx == orig_write_dosage_ct; ++dosage_read_idx) {
1971 const uintptr_t lowbit2 = BitIter1y(dosagepresent, &sample_widx, &cur_bits);
1972 if (orig_set[sample_widx] & lowbit2) {
1973 dosagepresent[sample_widx] ^= lowbit2;
1974 } else {
1975 dosage_main[dosage_write_idx++] = dosage_main[dosage_read_idx];
1976 }
1977 }
1978 *write_dosage_ct_ptr = dosage_write_idx;
1979 break;
1980 }
1981 }
1982 }
1983 InterleavedSetMissing(interleaved_set, vec_ct, genovec);
1984 }
1985
SetMaleHetMissing(const uintptr_t * __restrict sex_male_interleaved,uint32_t vec_ct,uintptr_t * __restrict genovec)1986 void SetMaleHetMissing(const uintptr_t* __restrict sex_male_interleaved, uint32_t vec_ct, uintptr_t* __restrict genovec) {
1987 const uint32_t twovec_ct = vec_ct / 2;
1988 #ifdef __LP64__
1989 const VecW m1 = VCONST_W(kMask5555);
1990 const VecW* sex_male_interleaved_iter = R_CAST(const VecW*, sex_male_interleaved);
1991 VecW* genovvec_iter = R_CAST(VecW*, genovec);
1992 for (uint32_t twovec_idx = 0; twovec_idx != twovec_ct; ++twovec_idx) {
1993 const VecW sex_male_vvec = *sex_male_interleaved_iter++;
1994 // we wish to bitwise-or with (sex_male_nypvec_01 & genovec) << 1
1995 const VecW sex_male_first = sex_male_vvec & m1;
1996 const VecW sex_male_second_shifted = vecw_and_notfirst(m1, sex_male_vvec);
1997 VecW cur_geno_vword = *genovvec_iter;
1998
1999 const VecW missing_male_vword = sex_male_first & cur_geno_vword;
2000
2001 *genovvec_iter++ = cur_geno_vword | vecw_slli(missing_male_vword, 1);
2002 cur_geno_vword = *genovvec_iter;
2003 *genovvec_iter++ = cur_geno_vword | (sex_male_second_shifted & vecw_slli(cur_geno_vword, 1));
2004 }
2005 if (vec_ct & 1) {
2006 const VecW sex_male_first = (*sex_male_interleaved_iter) & m1;
2007 const VecW cur_geno_vword = *genovvec_iter;
2008 const VecW missing_male_vword = sex_male_first & cur_geno_vword;
2009 *genovvec_iter = cur_geno_vword | vecw_slli(missing_male_vword, 1);
2010 }
2011 #else
2012 const uintptr_t* sex_male_interleaved_iter = sex_male_interleaved;
2013 uintptr_t* genovec_iter = genovec;
2014 for (uint32_t twovec_idx = 0; twovec_idx != twovec_ct; ++twovec_idx) {
2015 const uintptr_t sex_male_word = *sex_male_interleaved_iter++;
2016 uintptr_t cur_geno_word = *genovec_iter;
2017 *genovec_iter++ = cur_geno_word | ((sex_male_word & kMask5555 & cur_geno_word) << 1);
2018 cur_geno_word = *genovec_iter;
2019 *genovec_iter++ = cur_geno_word | (sex_male_word & kMaskAAAA & (cur_geno_word << 1));
2020 }
2021 if (vec_ct & 1) {
2022 const uintptr_t sex_male_word = *sex_male_interleaved_iter;
2023 uintptr_t cur_geno_word = *genovec_iter;
2024 *genovec_iter = cur_geno_word | ((sex_male_word & kMask5555 & cur_geno_word) << 1);
2025 }
2026 #endif
2027 }
2028
EraseMaleHetDosages(const uintptr_t * __restrict sex_male,const uintptr_t * __restrict genoarr,uint32_t * __restrict write_dosage_ct_ptr,uintptr_t * __restrict dosagepresent,Dosage * dosage_main)2029 void EraseMaleHetDosages(const uintptr_t* __restrict sex_male, const uintptr_t* __restrict genoarr, uint32_t* __restrict write_dosage_ct_ptr, uintptr_t* __restrict dosagepresent, Dosage* dosage_main) {
2030 const uint32_t orig_write_dosage_ct = *write_dosage_ct_ptr;
2031 if (!orig_write_dosage_ct) {
2032 return;
2033 }
2034 uintptr_t sample_uidx_base = 0;
2035 uintptr_t cur_bits = dosagepresent[0];
2036 for (uint32_t dosage_read_idx = 0; dosage_read_idx != orig_write_dosage_ct; ++dosage_read_idx) {
2037 const uintptr_t sample_uidx = BitIter1(dosagepresent, &sample_uidx_base, &cur_bits);
2038 if (IsSet(sex_male, sample_uidx) && (GetNyparrEntry(genoarr, sample_uidx) == 1)) {
2039 ClearBit(sample_uidx, dosagepresent);
2040 uint32_t dosage_write_idx = dosage_read_idx++;
2041 for (; dosage_read_idx != orig_write_dosage_ct; ++dosage_read_idx) {
2042 const uintptr_t sample_uidx2 = BitIter1(dosagepresent, &sample_uidx_base, &cur_bits);
2043 if (IsSet(sex_male, sample_uidx2) && (GetNyparrEntry(genoarr, sample_uidx2) == 1)) {
2044 ClearBit(sample_uidx2, dosagepresent);
2045 } else {
2046 dosage_main[dosage_write_idx++] = dosage_main[dosage_read_idx];
2047 }
2048 }
2049 *write_dosage_ct_ptr = dosage_write_idx;
2050 return;
2051 }
2052 }
2053 }
2054
EraseMaleDphases(const uintptr_t * __restrict sex_male,uint32_t * __restrict write_dphase_ct_ptr,uintptr_t * __restrict dphasepresent,SDosage * dphase_delta)2055 void EraseMaleDphases(const uintptr_t* __restrict sex_male, uint32_t* __restrict write_dphase_ct_ptr, uintptr_t* __restrict dphasepresent, SDosage* dphase_delta) {
2056 const uint32_t orig_write_dphase_ct = *write_dphase_ct_ptr;
2057 if (!orig_write_dphase_ct) {
2058 return;
2059 }
2060 uintptr_t sample_widx = 0;
2061 uintptr_t cur_bits = dphasepresent[0];
2062 for (uint32_t dphase_read_idx = 0; dphase_read_idx != orig_write_dphase_ct; ++dphase_read_idx) {
2063 const uintptr_t lowbit = BitIter1y(dphasepresent, &sample_widx, &cur_bits);
2064 if (sex_male[sample_widx] & lowbit) {
2065 dphasepresent[sample_widx] ^= lowbit;
2066 uint32_t dphase_write_idx = dphase_read_idx++;
2067 for (; dphase_read_idx == orig_write_dphase_ct; ++dphase_read_idx) {
2068 const uintptr_t lowbit2 = BitIter1y(dphasepresent, &sample_widx, &cur_bits);
2069 if (sex_male[sample_widx] & lowbit2) {
2070 dphasepresent[sample_widx] ^= lowbit2;
2071 } else {
2072 dphase_delta[dphase_write_idx++] = dphase_delta[dphase_read_idx];
2073 }
2074 }
2075 *write_dphase_ct_ptr = dphase_write_idx;
2076 return;
2077 }
2078 }
2079 }
2080
SetMaleHetMissingCleardosage(const uintptr_t * __restrict sex_male,const uintptr_t * __restrict sex_male_interleaved,uint32_t vec_ct,uintptr_t * __restrict genovec,uint32_t * __restrict write_dosage_ct_ptr,uintptr_t * __restrict dosagepresent,Dosage * dosage_main)2081 void SetMaleHetMissingCleardosage(const uintptr_t* __restrict sex_male, const uintptr_t* __restrict sex_male_interleaved, uint32_t vec_ct, uintptr_t* __restrict genovec, uint32_t* __restrict write_dosage_ct_ptr, uintptr_t* __restrict dosagepresent, Dosage* dosage_main) {
2082 if (*write_dosage_ct_ptr) {
2083 EraseMaleHetDosages(sex_male, genovec, write_dosage_ct_ptr, dosagepresent, dosage_main);
2084 }
2085 SetMaleHetMissing(sex_male_interleaved, vec_ct, genovec);
2086 }
2087
SetMaleHetMissingKeepdosage(const uintptr_t * __restrict sex_male,const uintptr_t * __restrict sex_male_interleaved,uint32_t word_ct,uintptr_t * __restrict genovec,uint32_t * __restrict write_dosage_ct_ptr,uintptr_t * __restrict dosagepresent,Dosage * dosage_main)2088 void SetMaleHetMissingKeepdosage(const uintptr_t* __restrict sex_male, const uintptr_t* __restrict sex_male_interleaved, uint32_t word_ct, uintptr_t* __restrict genovec, uint32_t* __restrict write_dosage_ct_ptr, uintptr_t* __restrict dosagepresent, Dosage* dosage_main) {
2089 // count # of 1.00000 dosages we need to insert, and then rewrite dosage_main
2090 // from back to front so we don't need temporary buffers.
2091 const uint32_t orig_dosage_ct = *write_dosage_ct_ptr;
2092 if (!orig_dosage_ct) {
2093 // can't assume dosagepresent is initialized in this case
2094 ZeroWArr(DivUp(word_ct, 2), dosagepresent);
2095 }
2096 const Halfword* sex_male_alias = R_CAST(const Halfword*, sex_male);
2097 Halfword* dosagepresent_alias = R_CAST(Halfword*, dosagepresent);
2098 uint32_t new_dosage_ct = 0;
2099 // todo: try vectorizing this loop
2100 for (uintptr_t widx = 0; widx != word_ct; ++widx) {
2101 const uintptr_t geno_hets = Word01(genovec[widx]);
2102 const uintptr_t male_nodosage_word = UnpackHalfwordToWord(sex_male_alias[widx] & (~dosagepresent_alias[widx]));
2103 new_dosage_ct += Popcount01Word(geno_hets & male_nodosage_word);
2104 }
2105 if (!new_dosage_ct) {
2106 SetMaleHetMissing(sex_male_interleaved, WordCtToVecCt(word_ct), genovec);
2107 return;
2108 }
2109 uint32_t dosage_write_idx = orig_dosage_ct + new_dosage_ct;
2110 uint32_t dosage_read_idx = orig_dosage_ct;
2111 uint32_t widx = word_ct;
2112 *write_dosage_ct_ptr = dosage_write_idx;
2113 do {
2114 --widx;
2115 const uintptr_t geno_word = genovec[widx];
2116 const uintptr_t male_geno_hets = geno_word & (~(geno_word >> 1)) & UnpackHalfwordToWord(sex_male_alias[widx]);
2117 const uintptr_t dosagepresent_word = UnpackHalfwordToWord(dosagepresent_alias[widx]);
2118 uintptr_t new_dosagepresent_word = dosagepresent_word | male_geno_hets;
2119 if (new_dosagepresent_word) {
2120 dosagepresent_alias[widx] = PackWordToHalfword(new_dosagepresent_word);
2121 do {
2122 const uint32_t top_set_bit = bsrw(new_dosagepresent_word);
2123 const uintptr_t cur_bit_word = k1LU << top_set_bit;
2124 Dosage cur_dosage = kDosageMid;
2125 if (cur_bit_word & dosagepresent_word) {
2126 cur_dosage = dosage_main[--dosage_read_idx];
2127 }
2128 dosage_main[--dosage_write_idx] = cur_dosage;
2129 new_dosagepresent_word -= cur_bit_word;
2130 } while (new_dosagepresent_word);
2131 genovec[widx] = geno_word | (male_geno_hets << 1);
2132 }
2133 } while (widx);
2134 }
2135
2136 // Clears each bit in bitarr which doesn't correspond to a genovec het.
2137 // Assumes that either trailing bits of bitarr are already zero, or trailing
2138 // bits of genovec are zero.
2139 //
2140 // Similar to PgrDetectGenoarrHetsUnsafe().
2141 //
2142 // todo: try vectorizing this
MaskGenoarrHetsUnsafe(const uintptr_t * __restrict genoarr,uint32_t raw_sample_ctl2,uintptr_t * __restrict bitarr)2143 void MaskGenoarrHetsUnsafe(const uintptr_t* __restrict genoarr, uint32_t raw_sample_ctl2, uintptr_t* __restrict bitarr) {
2144 Halfword* bitarr_alias = R_CAST(Halfword*, bitarr);
2145 for (uint32_t widx = 0; widx != raw_sample_ctl2; ++widx) {
2146 const uintptr_t cur_word = genoarr[widx];
2147 uintptr_t ww = (~(cur_word >> 1)) & cur_word; // low 1, high 0
2148 bitarr_alias[widx] &= PackWordToHalfwordMask5555(ww);
2149 }
2150 }
2151
MaskGenoarrHetsMultiallelicUnsafe(const uintptr_t * __restrict genoarr,const uintptr_t * __restrict patch_10_set,const AlleleCode * __restrict patch_10_vals,uint32_t raw_sample_ctl2,uintptr_t * __restrict bitarr)2152 void MaskGenoarrHetsMultiallelicUnsafe(const uintptr_t* __restrict genoarr, const uintptr_t* __restrict patch_10_set, const AlleleCode* __restrict patch_10_vals, uint32_t raw_sample_ctl2, uintptr_t* __restrict bitarr) {
2153 // Related to PgrDetectGenoarrHetsMultiallelic().
2154 const Halfword* patch_10_set_alias = R_CAST(const Halfword*, patch_10_set);
2155 const AlleleCode* patch_10_vals_iter = patch_10_vals;
2156 Halfword* bitarr_alias = R_CAST(Halfword*, bitarr);
2157 for (uint32_t widx = 0; widx != raw_sample_ctl2; ++widx) {
2158 const uintptr_t cur_word = genoarr[widx];
2159 uint32_t patch_10_hw = patch_10_set_alias[widx];
2160 uint32_t cur_hets = Pack01ToHalfword(cur_word);
2161 while (patch_10_hw) {
2162 const AlleleCode code1 = *patch_10_vals_iter++;
2163 const AlleleCode code2 = *patch_10_vals_iter++;
2164 if (code1 != code2) {
2165 cur_hets |= ctzu32(patch_10_hw);
2166 }
2167 patch_10_hw &= patch_10_hw - 1;
2168 }
2169 bitarr_alias[widx] &= cur_hets;
2170 }
2171 }
2172
2173 /*
2174 uint32_t chr_window_max(const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bp, uint32_t chr_fo_idx, uint32_t ct_max, uint32_t bp_max, uint32_t cur_window_max) {
2175 if (cur_window_max >= ct_max) {
2176 return ct_max;
2177 }
2178 const uint32_t chr_end = cip->chr_fo_vidx_start[chr_fo_idx + 1];
2179 uint32_t variant_uidx = AdvBoundedTo1Bit(variant_include, cip->chr_fo_vidx_start[chr_fo_idx], chr_end);
2180 const uint32_t variant_ct = PopcountBitRange(variant_include, variant_uidx, chr_end);
2181 if (variant_ct <= cur_window_max) {
2182 return cur_window_max;
2183 }
2184 uint32_t window_idx_first = 0;
2185 uint32_t window_uidx_first = variant_uidx;
2186 uint32_t window_bp_first = variant_bp[variant_uidx];
2187 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_uidx, ++variant_idx) {
2188 MovU32To1Bit(variant_include, &variant_uidx);
2189 uint32_t variant_bp_thresh = variant_bp[variant_uidx];
2190 if (variant_bp_thresh < bp_max) {
2191 variant_bp_thresh = 0;
2192 } else {
2193 variant_bp_thresh -= bp_max;
2194 }
2195 if (variant_bp_thresh > window_bp_first) {
2196 do {
2197 ++window_uidx_first;
2198 MovU32To1Bit(variant_include, &window_uidx_first);
2199 window_bp_first = variant_bp[window_uidx_first];
2200 ++window_idx_first;
2201 } while (variant_bp_thresh > window_bp_first);
2202 } else if (variant_idx - window_idx_first == cur_window_max) {
2203 if (++cur_window_max == ct_max) {
2204 return cur_window_max;
2205 }
2206 }
2207 }
2208 return cur_window_max;
2209 }
2210 */
2211
NotOnlyXymt(const uintptr_t * variant_include,const ChrInfo * cip,uint32_t raw_variant_ct,uint32_t xymt_offset)2212 uint32_t NotOnlyXymt(const uintptr_t* variant_include, const ChrInfo* cip, uint32_t raw_variant_ct, uint32_t xymt_offset) {
2213 const uint32_t xymt_code = cip->xymt_codes[xymt_offset];
2214 assert(!IsI32Neg(xymt_code));
2215 const uint32_t cur_chr_fo_idx = cip->chr_idx_to_foidx[xymt_code];
2216 const uint32_t chr_start = cip->chr_fo_vidx_start[cur_chr_fo_idx];
2217 if (chr_start) {
2218 const uint32_t first_uidx = AdvTo1Bit(variant_include, 0);
2219 if (first_uidx < chr_start) {
2220 return 1;
2221 }
2222 }
2223 const uint32_t chr_end = cip->chr_fo_vidx_start[cur_chr_fo_idx + 1];
2224 return (chr_end < raw_variant_ct) && (!AllBitsAreZero(variant_include, chr_end, raw_variant_ct));
2225 }
2226
CountNonAutosomalVariants(const uintptr_t * variant_include,const ChrInfo * cip,uint32_t count_x,uint32_t count_mt)2227 uint32_t CountNonAutosomalVariants(const uintptr_t* variant_include, const ChrInfo* cip, uint32_t count_x, uint32_t count_mt) {
2228 // for backward compatibility, unplaced markers are considered to be
2229 // autosomal here
2230 uint32_t ct = 0;
2231 if (count_x) {
2232 uint32_t x_code;
2233 if (XymtExists(cip, kChrOffsetX, &x_code)) {
2234 ct += CountChrVariantsUnsafe(variant_include, cip, x_code);
2235 }
2236 }
2237 uint32_t y_code;
2238 if (XymtExists(cip, kChrOffsetY, &y_code)) {
2239 ct += CountChrVariantsUnsafe(variant_include, cip, y_code);
2240 }
2241 if (count_mt) {
2242 uint32_t mt_code;
2243 if (XymtExists(cip, kChrOffsetMT, &mt_code)) {
2244 ct += CountChrVariantsUnsafe(variant_include, cip, mt_code);
2245 }
2246 }
2247 return ct;
2248 }
2249
ExcludeNonAutosomalVariants(const ChrInfo * cip,uintptr_t * variant_include)2250 void ExcludeNonAutosomalVariants(const ChrInfo* cip, uintptr_t* variant_include) {
2251 uint32_t x_code;
2252 if (XymtExists(cip, kChrOffsetX, &x_code)) {
2253 const uint32_t chr_fo_idx = cip->chr_idx_to_foidx[x_code];
2254 ClearBitsNz(cip->chr_fo_vidx_start[chr_fo_idx], cip->chr_fo_vidx_start[chr_fo_idx + 1], variant_include);
2255 }
2256 uint32_t y_code;
2257 if (XymtExists(cip, kChrOffsetY, &y_code)) {
2258 const uint32_t chr_fo_idx = cip->chr_idx_to_foidx[y_code];
2259 ClearBitsNz(cip->chr_fo_vidx_start[chr_fo_idx], cip->chr_fo_vidx_start[chr_fo_idx + 1], variant_include);
2260 }
2261 uint32_t mt_code;
2262 if (XymtExists(cip, kChrOffsetMT, &mt_code)) {
2263 const uint32_t chr_fo_idx = cip->chr_idx_to_foidx[mt_code];
2264 ClearBitsNz(cip->chr_fo_vidx_start[chr_fo_idx], cip->chr_fo_vidx_start[chr_fo_idx + 1], variant_include);
2265 }
2266 }
2267
ConditionalAllocateNonAutosomalVariants(const ChrInfo * cip,const char * calc_descrip,uint32_t raw_variant_ct,const uintptr_t ** variant_include_ptr,uint32_t * variant_ct_ptr)2268 PglErr ConditionalAllocateNonAutosomalVariants(const ChrInfo* cip, const char* calc_descrip, uint32_t raw_variant_ct, const uintptr_t** variant_include_ptr, uint32_t* variant_ct_ptr) {
2269 const uint32_t non_autosomal_variant_ct = CountNonAutosomalVariants(*variant_include_ptr, cip, 1, 1);
2270 if (!non_autosomal_variant_ct) {
2271 return kPglRetSuccess;
2272 }
2273 logprintf("Excluding %u variant%s on non-autosomes from %s.\n", non_autosomal_variant_ct, (non_autosomal_variant_ct == 1)? "" : "s", calc_descrip);
2274 *variant_ct_ptr -= non_autosomal_variant_ct;
2275 if (unlikely(!(*variant_ct_ptr))) {
2276 // this may not always be an error condition, probably add a flag later to
2277 // control printing of this error message, etc.
2278 logerrprintf("Error: No variants remaining for %s.\n", calc_descrip);
2279 return kPglRetDegenerateData;
2280 }
2281 const uint32_t raw_variant_ctl = BitCtToWordCt(raw_variant_ct);
2282 uintptr_t* working_variant_include;
2283 if (unlikely(bigstack_alloc_w(raw_variant_ctl, &working_variant_include))) {
2284 return kPglRetNomem;
2285 }
2286 memcpy(working_variant_include, *variant_include_ptr, raw_variant_ctl * sizeof(intptr_t));
2287 ExcludeNonAutosomalVariants(cip, working_variant_include);
2288 *variant_include_ptr = working_variant_include;
2289 return kPglRetSuccess;
2290 }
2291
FillSubsetChrFoVidxStart(const uintptr_t * variant_include,const ChrInfo * cip,uint32_t * subset_chr_fo_vidx_start)2292 void FillSubsetChrFoVidxStart(const uintptr_t* variant_include, const ChrInfo* cip, uint32_t* subset_chr_fo_vidx_start) {
2293 const uint32_t chr_ct = cip->chr_ct;
2294 subset_chr_fo_vidx_start[0] = 0;
2295 uint32_t variant_uidx = 0;
2296 uint32_t variant_idx = 0;
2297 for (uint32_t chr_fo_idx = 1; chr_fo_idx <= chr_ct; ++chr_fo_idx) {
2298 const uint32_t chr_end_variant_uidx = cip->chr_fo_vidx_start[chr_fo_idx];
2299 variant_idx += PopcountBitRange(variant_include, variant_uidx, chr_end_variant_uidx);
2300 subset_chr_fo_vidx_start[chr_fo_idx] = variant_idx;
2301 variant_uidx = chr_end_variant_uidx;
2302 }
2303 }
2304
2305 /*
2306 BoolErr allele_set(const char* newval, uint32_t allele_slen, char** allele_ptr) {
2307 char* newptr;
2308 if (allele_slen == 1) {
2309 // const_cast
2310 newptr = (char*)((uintptr_t)(&(g_one_char_strs[((unsigned char)(*newval)) * 2])));
2311 } else {
2312 char* new_alloc;
2313 if (pgl_malloc(allele_slen + 1, &new_alloc)) {
2314 return 1;
2315 }
2316 memcpyx(new_alloc, newval, allele_slen, '\0');
2317 newptr = new_alloc;
2318 }
2319 *allele_ptr = newptr;
2320 return 0;
2321 }
2322
2323 BoolErr allele_reset(const char* newval, uint32_t allele_slen, char** allele_ptr) {
2324 char* newptr;
2325 if (allele_slen == 1) {
2326 // const_cast
2327 newptr = (char*)((uintptr_t)(&(g_one_char_strs[((unsigned char)(*newval)) * 2])));
2328 } else {
2329 char* new_alloc;
2330 if (pgl_malloc(allele_slen + 1, &new_alloc)) {
2331 return 1;
2332 }
2333 memcpyx(new_alloc, newval, allele_slen, '\0');
2334 newptr = new_alloc;
2335 }
2336 const uintptr_t bigstack_end_addr = (uintptr_t)g_bigstack_end;
2337 const uintptr_t maxdiff = ((uintptr_t)(&(g_one_char_strs[512]))) - bigstack_end_addr;
2338 // take advantage of unsigned wraparound
2339 if ((((uintptr_t)(*allele_ptr)) - bigstack_end_addr) >= maxdiff) {
2340 free(*allele_ptr);
2341 }
2342 *allele_ptr = newptr;
2343 return 0;
2344 }
2345
2346 void cleanup_allele_storage(uint32_t max_allele_slen, uintptr_t allele_storage_entry_ct, const char** allele_storage) {
2347 // Now doesn't improperly free bigstack allocations (as long as they aren't
2348 // past g_bigstack_end), and doesn't need to be called at all most of the
2349 // time.
2350
2351 // An alternative representation: have a separate bitarray which indicates
2352 // whether the allele_storage[] element should be interpreted as a heap
2353 // pointer or an in-place zero-terminated string (i.e. string length can be
2354 // up to 7 on 64-bit systems). I expect that to be more efficient for new
2355 // datasets, but let's get the simple (and 1.9-codebase-compatible)
2356 // implementation working first, and then benchmark the fancier code later.
2357 if (allele_storage && (max_allele_slen > 1)) {
2358 const uintptr_t bigstack_end_addr = (uintptr_t)g_bigstack_end;
2359 const uintptr_t maxdiff = ((uintptr_t)(&(g_one_char_strs[512]))) - bigstack_end_addr;
2360 for (uintptr_t idx = 0; idx != allele_storage_entry_ct; ++idx) {
2361 const char* cur_entry = allele_storage[idx];
2362 assert(cur_entry);
2363 // take advantage of unsigned wraparound
2364 if ((((uintptr_t)cur_entry) - bigstack_end_addr) >= maxdiff) {
2365 free_const(cur_entry);
2366 }
2367 }
2368 }
2369 }
2370 */
2371
2372 char g_missing_catname[kMaxMissingPhenostrBlen];
2373 char g_output_missing_pheno[kMaxMissingPhenostrBlen];
2374 char g_legacy_output_missing_pheno[kMaxMissingPhenostrBlen];
2375
InitPheno()2376 void InitPheno() {
2377 snprintf(g_missing_catname, kMaxMissingPhenostrBlen, "NONE");
2378 snprintf(g_output_missing_pheno, kMaxMissingPhenostrBlen, "NA");
2379 snprintf(g_legacy_output_missing_pheno, kMaxMissingPhenostrBlen, "-9");
2380 }
2381
IsCategoricalPhenostr(const char * phenostr_iter)2382 uint32_t IsCategoricalPhenostr(const char* phenostr_iter) {
2383 uint32_t first_char_code = ctou32(*phenostr_iter++);
2384 // allow leading +/-
2385 if ((first_char_code == 43) || (first_char_code == 45)) {
2386 first_char_code = ctou32(*phenostr_iter++);
2387 }
2388 if (((first_char_code - 48) < 10) || (first_char_code == 44) || (first_char_code < 32)) {
2389 // the last two conditions are for detecting CSV empty strings
2390 return 0;
2391 }
2392 if (first_char_code == 46) {
2393 // decimal point. classify based on whether next character is a digit.
2394 const uint32_t second_char_code = ctou32(phenostr_iter[0]);
2395 return ((second_char_code - 48) >= 10);
2396 }
2397 // allow any capitalization of "NA"/"nan", but not "inf"
2398 if ((first_char_code & 0xdf) != 78) {
2399 return 1;
2400 }
2401 const uint32_t second_char_code = ctou32(phenostr_iter[0]);
2402 if ((second_char_code & 0xdf) != 65) {
2403 return 1;
2404 }
2405 const uint32_t third_char_code = ctou32(phenostr_iter[1]);
2406 if ((third_char_code & 0xdf) == 78) {
2407 return (ctou32(phenostr_iter[2]) > ' ');
2408 }
2409 return (third_char_code > 32);
2410 }
2411
IsCategoricalPhenostrNocsv(const char * phenostr_iter)2412 uint32_t IsCategoricalPhenostrNocsv(const char* phenostr_iter) {
2413 uint32_t first_char_code = ctou32(*phenostr_iter++);
2414 // allow leading +/-
2415 if ((first_char_code == 43) || (first_char_code == 45)) {
2416 first_char_code = ctou32(*phenostr_iter++);
2417 }
2418 if ((first_char_code - 48) < 10) {
2419 return 0;
2420 }
2421 if (first_char_code == 46) {
2422 // decimal point. classify based on whether next character is a digit.
2423 const uint32_t second_char_code = ctou32(phenostr_iter[0]);
2424 return ((second_char_code - 48) >= 10);
2425 }
2426 // allow any capitalization of "NA"/"nan", but not "inf"
2427 if ((first_char_code & 0xdf) != 78) {
2428 return 1;
2429 }
2430 const uint32_t second_char_code = ctou32(phenostr_iter[0]);
2431 if ((second_char_code & 0xdf) != 65) {
2432 return 1;
2433 }
2434 const uint32_t third_char_code = ctou32(phenostr_iter[1]);
2435 if ((third_char_code & 0xdf) == 78) {
2436 return (ctou32(phenostr_iter[2]) > ' ');
2437 }
2438 return (third_char_code > 32);
2439 }
2440
FirstCcOrQtPhenoIdx(const PhenoCol * pheno_cols,uint32_t pheno_ct)2441 uint32_t FirstCcOrQtPhenoIdx(const PhenoCol* pheno_cols, uint32_t pheno_ct) {
2442 for (uint32_t pheno_idx = 0; pheno_idx != pheno_ct; ++pheno_idx) {
2443 if (pheno_cols[pheno_idx].type_code < kPhenoDtypeCat) {
2444 return pheno_idx;
2445 }
2446 }
2447 return UINT32_MAX;
2448 }
2449
IsConstCovar(const PhenoCol * covar_col,const uintptr_t * sample_include,uint32_t sample_ct)2450 uint32_t IsConstCovar(const PhenoCol* covar_col, const uintptr_t* sample_include, uint32_t sample_ct) {
2451 if (sample_ct < 2) {
2452 return 1;
2453 }
2454 uintptr_t sample_uidx_base = 0;
2455 uintptr_t cur_bits = sample_include[0];
2456 const uintptr_t initial_sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
2457 if (covar_col->type_code == kPhenoDtypeQt) {
2458 const double* covar_vals = covar_col->data.qt;
2459 const double first_covar_val = covar_vals[initial_sample_uidx];
2460 for (uint32_t sample_idx = 1; sample_idx != sample_ct; ++sample_idx) {
2461 const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
2462 if (covar_vals[sample_uidx] != first_covar_val) {
2463 return 0;
2464 }
2465 }
2466 return 1;
2467 }
2468 assert(covar_col->type_code == kPhenoDtypeCat);
2469 const uint32_t* covar_cats = covar_col->data.cat;
2470 const uint32_t first_covar_cat = covar_cats[initial_sample_uidx];
2471 for (uint32_t sample_idx = 1; sample_idx != sample_ct; ++sample_idx) {
2472 const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
2473 if (covar_cats[sample_uidx] != first_covar_cat) {
2474 return 0;
2475 }
2476 }
2477 return 1;
2478 }
2479
IdentifyRemainingCats(const uintptr_t * sample_include,const PhenoCol * covar_col,uint32_t sample_ct,uintptr_t * observed_cat_bitarr)2480 uint32_t IdentifyRemainingCats(const uintptr_t* sample_include, const PhenoCol* covar_col, uint32_t sample_ct, uintptr_t* observed_cat_bitarr) {
2481 // assumes covar_col->type_code == kPhenoTypeCat
2482 const uint32_t nonnull_cat_ct = covar_col->nonnull_category_ct;
2483 const uint32_t* covar_cats = covar_col->data.cat;
2484 const uint32_t word_ct = 1 + (nonnull_cat_ct / kBitsPerWord);
2485 ZeroWArr(word_ct, observed_cat_bitarr);
2486 uintptr_t sample_uidx_base = 0;
2487 uintptr_t cur_bits = sample_include[0];
2488 for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
2489 const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
2490 SetBit(covar_cats[sample_uidx], observed_cat_bitarr);
2491 }
2492 return PopcountWords(observed_cat_bitarr, word_ct);
2493 }
2494
2495 // returns index of most common category
IdentifyRemainingCatsAndMostCommon(const uintptr_t * sample_include,const PhenoCol * covar_col,uint32_t sample_ct,uintptr_t * observed_cat_bitarr,uint32_t * cat_obs_buf)2496 uint32_t IdentifyRemainingCatsAndMostCommon(const uintptr_t* sample_include, const PhenoCol* covar_col, uint32_t sample_ct, uintptr_t* observed_cat_bitarr, uint32_t* cat_obs_buf) {
2497 // assumes covar_col->type_code == kPhenoTypeCat
2498 const uint32_t nonnull_cat_ct = covar_col->nonnull_category_ct;
2499 const uint32_t* covar_cats = covar_col->data.cat;
2500 const uint32_t word_ct = 1 + (nonnull_cat_ct / kBitsPerWord);
2501 ZeroWArr(word_ct, observed_cat_bitarr);
2502 ZeroU32Arr(nonnull_cat_ct + 1, cat_obs_buf);
2503 uintptr_t sample_uidx_base = 0;
2504 uintptr_t cur_bits = sample_include[0];
2505 for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
2506 const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
2507 cat_obs_buf[covar_cats[sample_uidx]] += 1;
2508 SetBit(covar_cats[sample_uidx], observed_cat_bitarr);
2509 }
2510 if (cat_obs_buf[0]) {
2511 // don't actually need this for now since we're only calling this with the
2512 // missing category excluded, but let's be consistent.
2513 SetBit(0, observed_cat_bitarr);
2514 }
2515 uint32_t best_cat_idx = 0;
2516 uint32_t best_cat_obs_ct = 0;
2517 for (uint32_t cat_idx = 1; cat_idx <= nonnull_cat_ct; ++cat_idx) {
2518 const uint32_t cat_obs_ct = cat_obs_buf[cat_idx];
2519 if (cat_obs_ct) {
2520 SetBit(cat_idx, observed_cat_bitarr);
2521 if (cat_obs_ct > best_cat_obs_ct) {
2522 best_cat_idx = cat_idx;
2523 best_cat_obs_ct = cat_obs_ct;
2524 }
2525 }
2526 }
2527 return best_cat_idx;
2528 }
2529
GetCatSamples(const uintptr_t * sample_include_base,const PhenoCol * cat_pheno_col,uint32_t raw_sample_ctl,uint32_t sample_ct,uint32_t cat_uidx,uintptr_t * cur_cat_samples)2530 uint32_t GetCatSamples(const uintptr_t* sample_include_base, const PhenoCol* cat_pheno_col, uint32_t raw_sample_ctl, uint32_t sample_ct, uint32_t cat_uidx, uintptr_t* cur_cat_samples) {
2531 ZeroWArr(raw_sample_ctl, cur_cat_samples);
2532 const uint32_t* cat_vals = cat_pheno_col->data.cat;
2533 uintptr_t sample_uidx_base = 0;
2534 uintptr_t cur_bits = sample_include_base[0];
2535 for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
2536 const uintptr_t sample_uidx = BitIter1(sample_include_base, &sample_uidx_base, &cur_bits);
2537 if (cat_vals[sample_uidx] == cat_uidx) {
2538 SetBit(sample_uidx, cur_cat_samples);
2539 }
2540 }
2541 return PopcountWords(cur_cat_samples, raw_sample_ctl);
2542 }
2543
CleanupPhenoCols(uint32_t pheno_ct,PhenoCol * pheno_cols)2544 void CleanupPhenoCols(uint32_t pheno_ct, PhenoCol* pheno_cols) {
2545 if (pheno_cols) {
2546 for (uint32_t pheno_idx = 0; pheno_idx != pheno_ct; ++pheno_idx) {
2547 vecaligned_free_cond(pheno_cols[pheno_idx].nonmiss);
2548 }
2549 free(pheno_cols);
2550 }
2551 }
2552
ParseChrRanges(const char * const * argvk,const char * flagname_p,const char * errstr_append,uint32_t param_ct,uint32_t allow_extra_chrs,uint32_t xymt_subtract,char range_delim,ChrInfo * cip,uintptr_t * chr_mask)2553 PglErr ParseChrRanges(const char* const* argvk, const char* flagname_p, const char* errstr_append, uint32_t param_ct, uint32_t allow_extra_chrs, uint32_t xymt_subtract, char range_delim, ChrInfo* cip, uintptr_t* chr_mask) {
2554 PglErr reterr = kPglRetSuccess;
2555 {
2556 const char* cur_arg_ptr = argvk[1];
2557 char* token_buf = g_textbuf;
2558 const char* range_end = nullptr;
2559 uint32_t cur_param_idx = 1;
2560 uint32_t rs_len = 0;
2561 uint32_t re_len = 0;
2562 while (1) {
2563 const char* range_start;
2564 if (unlikely(ParseNextRange(argvk, param_ct, range_delim, &cur_param_idx, &cur_arg_ptr, &range_start, &rs_len, &range_end, &re_len))) {
2565 snprintf(g_logbuf, kLogbufSize, "Error: Invalid --%s parameter '%s'.\n", flagname_p, argvk[cur_param_idx]);
2566 goto ParseChrRanges_ret_INVALID_CMDLINE_WWA;
2567 }
2568 if (!range_start) {
2569 break;
2570 }
2571 // Could avoid this copy, but only at the cost of mutating argv. Which
2572 // isn't actually problematic--it's not like we have multiple threads
2573 // parsing the command line--but it's not like this is a performance
2574 // bottleneck either.
2575 // ParseNextRange() prevents buffer overflow.
2576 memcpyx(token_buf, range_start, rs_len, '\0');
2577 uint32_t chr_code_start = GetChrCodeRaw(token_buf);
2578 if (IsI32Neg(chr_code_start)) {
2579 if (unlikely(!allow_extra_chrs)) {
2580 snprintf(g_logbuf, kLogbufSize, "Error: Invalid --%s chromosome code '%s'.\n", flagname_p, token_buf);
2581 goto ParseChrRanges_ret_INVALID_CMDLINE_WWA;
2582 }
2583 if (unlikely(range_end)) {
2584 goto ParseChrRanges_ret_INVALID_CMDLINE_NONSTD;
2585 }
2586 if (unlikely(PushLlStr(token_buf, &(cip->incl_excl_name_stack)))) {
2587 goto ParseChrRanges_ret_NOMEM;
2588 }
2589 } else {
2590 if (chr_code_start >= kMaxContigs) {
2591 chr_code_start -= xymt_subtract;
2592 }
2593 if (range_end) {
2594 memcpyx(token_buf, range_end, re_len, '\0');
2595 uint32_t chr_code_end = GetChrCodeRaw(token_buf);
2596 if (unlikely(IsI32Neg(chr_code_end))) {
2597 if (!allow_extra_chrs) {
2598 snprintf(g_logbuf, kLogbufSize, "Error: Invalid --%s chromosome code '%s'.\n", flagname_p, range_end);
2599 goto ParseChrRanges_ret_INVALID_CMDLINE_WWA;
2600 }
2601 goto ParseChrRanges_ret_INVALID_CMDLINE_NONSTD;
2602 }
2603 if (unlikely(chr_code_end >= kMaxContigs)) {
2604 // prohibit stuff like "--chr par1-par2", "--chr x-y", "--chr x-26"
2605 snprintf(g_logbuf, kLogbufSize, "Error: --%s chromosome code '%s' cannot be the end of a range.\n", flagname_p, range_end);
2606 goto ParseChrRanges_ret_INVALID_CMDLINE_WWA;
2607 }
2608 if (unlikely(chr_code_end <= chr_code_start)) {
2609 snprintf(g_logbuf, kLogbufSize, "Error: --%s chromosome code '%s' is not greater than '%s'.\n", flagname_p, range_end, range_start);
2610 goto ParseChrRanges_ret_INVALID_CMDLINE_WWA;
2611 }
2612 FillBitsNz(chr_code_start, chr_code_end + 1, chr_mask);
2613 } else {
2614 SetBit(chr_code_start, chr_mask);
2615 }
2616 }
2617 }
2618 // no compelling reason to prohibit "--not-chr ,"
2619 }
2620 while (0) {
2621 ParseChrRanges_ret_NOMEM:
2622 reterr = kPglRetNomem;
2623 break;
2624 ParseChrRanges_ret_INVALID_CMDLINE_NONSTD:
2625 logerrputs("Error: Chromosome ranges cannot include nonstandard names.\n");
2626 reterr = kPglRetInvalidCmdline;
2627 break;
2628 ParseChrRanges_ret_INVALID_CMDLINE_WWA:
2629 WordWrapB(0);
2630 logerrputsb();
2631 logerrputs(errstr_append);
2632 reterr = kPglRetInvalidCmdline;
2633 break;
2634 }
2635 return reterr;
2636 }
2637
2638 /*
2639 uint32_t MultiallelicVariantPresent(const uintptr_t* variant_include, const uintptr_t* allele_idx_offsets, uint32_t variant_ct) {
2640 if (!allele_idx_offsets) {
2641 return 0;
2642 }
2643 uintptr_t variant_uidx_base = 0;
2644 uintptr_t cur_bits = variant_include[0];
2645 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
2646 const uintptr_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
2647 if (allele_idx_offsets[variant_uidx + 1] != allele_idx_offsets[variant_uidx] + 2) {
2648 return 1;
2649 }
2650 }
2651 return 0;
2652 }
2653 */
2654
CountBiallelicVariants(const uintptr_t * variant_include,const uintptr_t * allele_idx_offsets,uint32_t variant_ct)2655 uint32_t CountBiallelicVariants(const uintptr_t* variant_include, const uintptr_t* allele_idx_offsets, uint32_t variant_ct) {
2656 if (!allele_idx_offsets) {
2657 return variant_ct;
2658 }
2659 uint32_t biallelic_variant_ct = 0;
2660 uintptr_t variant_uidx_base = 0;
2661 uintptr_t cur_bits = variant_include[0];
2662 for (uint32_t variant_idx = 0; variant_idx != variant_ct; ++variant_idx) {
2663 const uintptr_t variant_uidx = BitIter1(variant_include, &variant_uidx_base, &cur_bits);
2664 biallelic_variant_ct += (allele_idx_offsets[variant_uidx + 1] == allele_idx_offsets[variant_uidx] + 2);
2665 }
2666 return biallelic_variant_ct;
2667 }
2668
CountAlleles(const uintptr_t * variant_include,const uintptr_t * allele_idx_offsets,uint32_t raw_variant_ct,uint32_t variant_ct)2669 uintptr_t CountAlleles(const uintptr_t* variant_include, const uintptr_t* allele_idx_offsets, uint32_t raw_variant_ct, uint32_t variant_ct) {
2670 if (!allele_idx_offsets) {
2671 return variant_ct * 2;
2672 }
2673 if (raw_variant_ct == variant_ct) {
2674 return allele_idx_offsets[raw_variant_ct];
2675 }
2676 uintptr_t allele_ct = 0;
2677 uint32_t variant_uidx = 0;
2678 while (variant_ct) {
2679 const uint32_t variant_uidx_start = AdvTo1Bit(variant_include, variant_uidx);
2680 variant_uidx = AdvBoundedTo0Bit(variant_include, variant_uidx_start + 1, raw_variant_ct);
2681 allele_ct += allele_idx_offsets[variant_uidx] - allele_idx_offsets[variant_uidx_start];
2682 variant_ct -= variant_uidx - variant_uidx_start;
2683 }
2684 return allele_ct;
2685 }
2686
2687 /*
2688 uintptr_t GetMaxAlleleBlockSize(const uintptr_t* __restrict variant_include, const uintptr_t* __restrict allele_idx_offsets, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t read_block_size) {
2689 uintptr_t max_allele_block_size = 2 * read_block_size;
2690 if (!allele_idx_offsets) {
2691 return max_allele_block_size;
2692 }
2693 uint32_t variant_idx = 0;
2694 uint32_t variant_uidx_end = 0;
2695 while (1) {
2696 uint32_t variant_uidx = variant_uidx_end;
2697 variant_uidx_end += read_block_size;
2698 if (variant_uidx_end > raw_variant_ct) {
2699 variant_uidx_end = raw_variant_ct;
2700 read_block_size = raw_variant_ct - variant_uidx;
2701 }
2702 uint32_t cur_variant_ct = PopcountBitRange(variant_include, variant_uidx, variant_uidx_end);
2703 if (cur_variant_ct) {
2704 const uint32_t omitted_variant_ct = read_block_size - cur_variant_ct;
2705 const uintptr_t block_size_ubound = allele_idx_offsets[variant_uidx_end] - allele_idx_offsets[variant_uidx] - (2 * omitted_variant_ct);
2706 variant_idx += cur_variant_ct;
2707 if (block_size_ubound > max_allele_block_size) {
2708 // subtract at beginning of each variant_include bitblock, add at end.
2709 uintptr_t cur_allele_block_size = 0;
2710 do {
2711 const uint32_t cur_bitblock_start = AdvTo1Bit(variant_include, variant_uidx);
2712 // deliberate underflow
2713 cur_allele_block_size -= allele_idx_offsets[cur_bitblock_start];
2714 if (variant_uidx_end - cur_bitblock_start == cur_variant_ct) {
2715 cur_allele_block_size += allele_idx_offsets[variant_uidx_end];
2716 break;
2717 }
2718 variant_uidx = AdvTo0Bit(variant_include, cur_bitblock_start);
2719 cur_allele_block_size += allele_idx_offsets[variant_uidx];
2720 cur_variant_ct -= variant_uidx - cur_bitblock_start;
2721 } while (cur_variant_ct);
2722 if (cur_allele_block_size > max_allele_block_size) {
2723 max_allele_block_size = cur_allele_block_size;
2724 }
2725 }
2726 if (variant_idx == variant_ct) {
2727 return max_allele_block_size;
2728 }
2729 }
2730 }
2731 }
2732 */
2733
GetMaxAltAlleleBlockSize(const uintptr_t * __restrict variant_include,const uintptr_t * __restrict allele_idx_offsets,uint32_t raw_variant_ct,uint32_t variant_ct,uint32_t read_block_size)2734 uintptr_t GetMaxAltAlleleBlockSize(const uintptr_t* __restrict variant_include, const uintptr_t* __restrict allele_idx_offsets, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t read_block_size) {
2735 uintptr_t max_alt_allele_block_size = read_block_size;
2736 if (!allele_idx_offsets) {
2737 return max_alt_allele_block_size;
2738 }
2739 uint32_t variant_idx = 0;
2740 for (uint32_t variant_uidx_end = 0; ; ) {
2741 uint32_t variant_uidx = variant_uidx_end;
2742 variant_uidx_end += read_block_size;
2743 if (variant_uidx_end > raw_variant_ct) {
2744 variant_uidx_end = raw_variant_ct;
2745 read_block_size = raw_variant_ct - variant_uidx;
2746 }
2747 uint32_t cur_variant_ct = PopcountBitRange(variant_include, variant_uidx, variant_uidx_end);
2748 if (cur_variant_ct) {
2749 const uint32_t omitted_variant_ct = read_block_size - cur_variant_ct;
2750 const uintptr_t cur_ubound = allele_idx_offsets[variant_uidx_end] - allele_idx_offsets[variant_uidx] - omitted_variant_ct - read_block_size;
2751 variant_idx += cur_variant_ct;
2752 if (cur_ubound > max_alt_allele_block_size) {
2753 // subtract at beginning of each variant_include bitblock, add at end.
2754 // deliberate underflow
2755 uintptr_t cur_alt_allele_block_size = -S_CAST(uintptr_t, cur_variant_ct);
2756 do {
2757 const uint32_t cur_bitblock_start = AdvTo1Bit(variant_include, variant_uidx);
2758 // deliberate underflow
2759 cur_alt_allele_block_size -= allele_idx_offsets[cur_bitblock_start];
2760 if (variant_uidx_end - cur_bitblock_start == cur_variant_ct) {
2761 cur_alt_allele_block_size += allele_idx_offsets[variant_uidx_end];
2762 break;
2763 }
2764 variant_uidx = AdvTo0Bit(variant_include, cur_bitblock_start);
2765 cur_alt_allele_block_size += allele_idx_offsets[variant_uidx];
2766 cur_variant_ct -= variant_uidx - cur_bitblock_start;
2767 } while (cur_variant_ct);
2768 if (cur_alt_allele_block_size > max_alt_allele_block_size) {
2769 max_alt_allele_block_size = cur_alt_allele_block_size;
2770 }
2771 }
2772 if (variant_idx == variant_ct) {
2773 return max_alt_allele_block_size;
2774 }
2775 }
2776 }
2777 }
2778
GetMhcWordCt(uintptr_t sample_ct)2779 uintptr_t GetMhcWordCt(uintptr_t sample_ct) {
2780 const uintptr_t sample_ctl = BitCtToWordCt(sample_ct);
2781 const uintptr_t patch_01_max_word_ct = sample_ctl + DivUp(sizeof(AlleleCode) * sample_ct, kBytesPerWord);
2782 const uintptr_t patch_10_max_word_ct = sample_ctl + DivUp(2 * sizeof(AlleleCode) * sample_ct, kBytesPerWord);
2783 return RoundUpPow2(patch_01_max_word_ct, kWordsPerVec) + RoundUpPow2(patch_10_max_word_ct, kWordsPerVec);
2784 }
2785
2786 PglErr PgenMtLoadInit(const uintptr_t* variant_include, uint32_t sample_ct, uint32_t variant_ct, uintptr_t bytes_avail, uintptr_t pgr_alloc_cacheline_ct, uintptr_t thread_xalloc_cacheline_ct, uintptr_t per_variant_xalloc_byte_ct, uintptr_t per_alt_allele_xalloc_byte_ct, PgenFileInfo* pgfip, uint32_t* calc_thread_ct_ptr, uintptr_t*** genovecs_ptr, uintptr_t*** mhc_ptr, uintptr_t*** phasepresent_ptr, uintptr_t*** phaseinfo_ptr, uintptr_t*** dosage_present_ptr, Dosage*** dosage_mains_ptr, uintptr_t*** dphase_present_ptr, SDosage*** dphase_delta_ptr, uint32_t* read_block_size_ptr, uintptr_t* max_alt_allele_block_size_ptr, STD_ARRAY_REF(unsigned char*, 2) main_loadbufs, PgenReader*** pgr_pps, uint32_t** read_variant_uidx_starts_ptr) {
2787 uintptr_t cachelines_avail = bytes_avail / kCacheline;
2788 uint32_t read_block_size = kPglVblockSize;
2789 uint64_t multiread_cacheline_ct;
2790 for (; ; read_block_size /= 2) {
2791 multiread_cacheline_ct = PgfiMultireadGetCachelineReq(variant_include, pgfip, variant_ct, read_block_size);
2792 // limit each raw load buffer to 1/4 of remaining workspace
2793 // if there's an additional per-variant allocation, put it in the same bin
2794 // as the load buffers
2795 if ((multiread_cacheline_ct + (S_CAST(uint64_t, per_variant_xalloc_byte_ct) * read_block_size) / kCacheline) * 4 <= cachelines_avail) {
2796 break;
2797 }
2798 // lots of callers require read_block_size to be either raw_variant_ct or a
2799 // multiple of kBitsPerVec
2800 #ifdef __LP64__
2801 if (read_block_size <= kBitsPerVec) {
2802 return kPglRetNomem;
2803 }
2804 #else
2805 if (read_block_size <= kCacheline) {
2806 return kPglRetNomem;
2807 }
2808 #endif
2809 }
2810 #ifndef __LP64__
2811 if (multiread_cacheline_ct > (kMaxBytesPerIO / kCacheline)) {
2812 return kPglRetNomem;
2813 }
2814 #endif
2815 main_loadbufs[0] = S_CAST(unsigned char*, bigstack_alloc_raw(multiread_cacheline_ct * kCacheline));
2816 main_loadbufs[1] = S_CAST(unsigned char*, bigstack_alloc_raw(multiread_cacheline_ct * kCacheline));
2817 pgfip->block_base = main_loadbufs[0];
2818 *read_block_size_ptr = read_block_size;
2819 cachelines_avail -= 2 * (multiread_cacheline_ct + (S_CAST(uint64_t, per_variant_xalloc_byte_ct) * read_block_size) / kCacheline);
2820 if (per_alt_allele_xalloc_byte_ct) {
2821 const uintptr_t max_alt_allele_block_size = GetMaxAltAlleleBlockSize(variant_include, pgfip->allele_idx_offsets, pgfip->raw_variant_ct, variant_ct, read_block_size);
2822
2823 cachelines_avail -= DivUpU64(S_CAST(uint64_t, per_alt_allele_xalloc_byte_ct) * read_block_size, kCacheline);
2824 // we assume it's unnecessary to return this if
2825 // per_alt_allele_xalloc_byte_ct is zero
2826 *max_alt_allele_block_size_ptr = max_alt_allele_block_size;
2827 }
2828 // reduce calc_thread_ct if necessary
2829 uint32_t calc_thread_ct = *calc_thread_ct_ptr;
2830 if (calc_thread_ct > read_block_size) {
2831 calc_thread_ct = read_block_size;
2832 *calc_thread_ct_ptr = calc_thread_ct;
2833 }
2834
2835 // pgr_pps, read_variant_uidx_starts_ptr, (*pgr_pps)[tidx], pgr_alloc;
2836 // deliberately a slight overestimate
2837 const uintptr_t pgr_struct_alloc = RoundUpPow2(sizeof(PgenReader), kCacheline);
2838 uintptr_t thread_alloc_cacheline_ct = 1 + 1 + (pgr_struct_alloc / kCacheline) + pgr_alloc_cacheline_ct + thread_xalloc_cacheline_ct;
2839
2840 const uint32_t sample_ctcl2 = NypCtToCachelineCt(sample_ct);
2841 const uint32_t sample_ctcl = BitCtToCachelineCt(sample_ct);
2842
2843 // todo: multiallelic dosage
2844 const uintptr_t dosage_main_cl = DivUp(sample_ct, (kCacheline / sizeof(Dosage)));
2845 uintptr_t mhc_cl = 0;
2846 if (genovecs_ptr) {
2847 thread_alloc_cacheline_ct += 1 + sample_ctcl2;
2848 if (mhc_ptr) {
2849 mhc_cl = DivUp(GetMhcWordCt(sample_ct), kWordsPerCacheline);
2850 thread_alloc_cacheline_ct += 1 + mhc_cl;
2851 }
2852 if (phasepresent_ptr) {
2853 assert(phaseinfo_ptr);
2854 thread_alloc_cacheline_ct += 2 + sample_ctcl;
2855 }
2856 if (dosage_present_ptr) {
2857 assert(dosage_mains_ptr);
2858 thread_alloc_cacheline_ct += 2 + sample_ctcl + dosage_main_cl;
2859 if (dphase_present_ptr) {
2860 assert(dphase_delta_ptr);
2861 thread_alloc_cacheline_ct += 2 + sample_ctcl + dosage_main_cl;
2862 }
2863 }
2864 }
2865 if (thread_alloc_cacheline_ct * calc_thread_ct > cachelines_avail) {
2866 if (thread_alloc_cacheline_ct > cachelines_avail) {
2867 return kPglRetNomem;
2868 }
2869 calc_thread_ct = cachelines_avail / thread_alloc_cacheline_ct;
2870 *calc_thread_ct_ptr = calc_thread_ct;
2871 }
2872
2873 const uint32_t array_of_ptrs_alloc = RoundUpPow2(calc_thread_ct * sizeof(intptr_t), kCacheline);
2874 *pgr_pps = S_CAST(PgenReader**, bigstack_alloc_raw(array_of_ptrs_alloc));
2875 *read_variant_uidx_starts_ptr = S_CAST(uint32_t*, bigstack_alloc_raw_rd(calc_thread_ct * sizeof(int32_t)));
2876 for (uint32_t tidx = 0; tidx != calc_thread_ct; ++tidx) {
2877 (*pgr_pps)[tidx] = S_CAST(PgenReader*, bigstack_alloc_raw(pgr_struct_alloc));
2878 // PreinitPgr(g_pgr_ptrs[tidx]);
2879 unsigned char* pgr_alloc = S_CAST(unsigned char*, bigstack_alloc_raw(pgr_alloc_cacheline_ct * kCacheline));
2880
2881 // shouldn't be possible for this to fail
2882 PgrInit(nullptr, 0, pgfip, (*pgr_pps)[tidx], pgr_alloc);
2883 }
2884 if (genovecs_ptr) {
2885 *genovecs_ptr = S_CAST(uintptr_t**, bigstack_alloc_raw(array_of_ptrs_alloc));
2886 if (mhc_ptr) {
2887 *mhc_ptr = S_CAST(uintptr_t**, bigstack_alloc_raw(array_of_ptrs_alloc));
2888 }
2889 if (phasepresent_ptr) {
2890 *phasepresent_ptr = S_CAST(uintptr_t**, bigstack_alloc_raw(array_of_ptrs_alloc));
2891 *phaseinfo_ptr = S_CAST(uintptr_t**, bigstack_alloc_raw(array_of_ptrs_alloc));
2892 }
2893 if (dosage_present_ptr) {
2894 *dosage_present_ptr = S_CAST(uintptr_t**, bigstack_alloc_raw(array_of_ptrs_alloc));
2895 *dosage_mains_ptr = S_CAST(Dosage**, bigstack_alloc_raw(array_of_ptrs_alloc));
2896 if (dphase_present_ptr) {
2897 *dphase_present_ptr = S_CAST(uintptr_t**, bigstack_alloc_raw(array_of_ptrs_alloc));
2898 *dphase_delta_ptr = S_CAST(SDosage**, bigstack_alloc_raw(array_of_ptrs_alloc));
2899 }
2900 }
2901 const uintptr_t genovec_alloc = sample_ctcl2 * kCacheline;
2902 const uintptr_t bitarray_alloc = sample_ctcl * kCacheline;
2903 const uintptr_t dosage_main_alloc = dosage_main_cl * kCacheline;
2904 for (uint32_t tidx = 0; tidx != calc_thread_ct; ++tidx) {
2905 (*genovecs_ptr)[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(genovec_alloc));
2906 if (mhc_ptr) {
2907 (*mhc_ptr)[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(mhc_cl * kCacheline));
2908 }
2909 if (phasepresent_ptr) {
2910 (*phasepresent_ptr)[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(bitarray_alloc));
2911 (*phaseinfo_ptr)[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(bitarray_alloc));
2912 }
2913 if (dosage_present_ptr) {
2914 (*dosage_present_ptr)[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(bitarray_alloc));
2915 (*dosage_mains_ptr)[tidx] = S_CAST(Dosage*, bigstack_alloc_raw(dosage_main_alloc));
2916 if (dphase_present_ptr) {
2917 (*dphase_present_ptr)[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(bitarray_alloc));
2918 (*dphase_delta_ptr)[tidx] = S_CAST(SDosage*, bigstack_alloc_raw(dosage_main_alloc));
2919 }
2920 }
2921 }
2922 }
2923 return kPglRetSuccess;
2924 }
2925
MultireadNonempty(const uintptr_t * variant_include,const ThreadGroup * tgp,uint32_t raw_variant_ct,uint32_t read_block_size,PgenFileInfo * pgfip,uint32_t * read_block_idxp,PglErr * reterrp)2926 uint32_t MultireadNonempty(const uintptr_t* variant_include, const ThreadGroup* tgp, uint32_t raw_variant_ct, uint32_t read_block_size, PgenFileInfo* pgfip, uint32_t* read_block_idxp, PglErr* reterrp) {
2927 if (IsLastBlock(tgp)) {
2928 return 0;
2929 }
2930 // This condition ensures the PopcountWords() calls are valid. If it's
2931 // ever inconvenient, PopcountBitRange() can be used instead.
2932 assert((!(read_block_size % kBitsPerVec)) || (raw_variant_ct <= read_block_size));
2933
2934 const uint32_t read_block_sizel = read_block_size / kBitsPerWord;
2935 uint32_t read_block_idx = *read_block_idxp;
2936 uint32_t offset = read_block_idx * read_block_size;
2937 uint32_t cur_read_block_size = read_block_size;
2938 uint32_t cur_block_write_ct;
2939 for (; ; ++read_block_idx, offset += read_block_size) {
2940 if (offset + read_block_size >= raw_variant_ct) {
2941 cur_read_block_size = raw_variant_ct - offset;
2942 cur_block_write_ct = PopcountWords(&(variant_include[read_block_idx * read_block_sizel]), BitCtToWordCt(cur_read_block_size));
2943 assert(cur_block_write_ct); // otherwise, IsLastBlock should be set
2944 break;
2945 }
2946 cur_block_write_ct = PopcountWords(&(variant_include[read_block_idx * read_block_sizel]), read_block_sizel);
2947 if (cur_block_write_ct) {
2948 break;
2949 }
2950 }
2951 *read_block_idxp = read_block_idx;
2952 *reterrp = PgfiMultiread(variant_include, offset, offset + cur_read_block_size, cur_block_write_ct, pgfip);
2953 return cur_block_write_ct;
2954 }
2955
ExpandMhc(uint32_t sample_ct,uintptr_t * mhc,uintptr_t ** patch_01_set_ptr,AlleleCode ** patch_01_vals_ptr,uintptr_t ** patch_10_set_ptr,AlleleCode ** patch_10_vals_ptr)2956 void ExpandMhc(uint32_t sample_ct, uintptr_t* mhc, uintptr_t** patch_01_set_ptr, AlleleCode** patch_01_vals_ptr, uintptr_t** patch_10_set_ptr, AlleleCode** patch_10_vals_ptr) {
2957 const uint32_t sample_ctl = BitCtToWordCt(sample_ct);
2958 *patch_01_set_ptr = mhc;
2959 AlleleCode* patch_01_vals = R_CAST(AlleleCode*, &(mhc[sample_ctl]));
2960 *patch_01_vals_ptr = patch_01_vals;
2961 AlleleCode* patch_01_vals_end = &(patch_01_vals[sample_ct]);
2962 VecAlignUp(&patch_01_vals_end);
2963 uintptr_t* patch_10_set = R_CAST(uintptr_t*, patch_01_vals_end);
2964 *patch_10_set_ptr = patch_10_set;
2965 *patch_10_vals_ptr = R_CAST(AlleleCode*, &(patch_10_set[sample_ctl]));
2966 }
2967
WriteSampleIdsOverride(const uintptr_t * sample_include,const SampleIdInfo * siip,const char * outname,uint32_t sample_ct,SampleIdFlags override_flags)2968 PglErr WriteSampleIdsOverride(const uintptr_t* sample_include, const SampleIdInfo* siip, const char* outname, uint32_t sample_ct, SampleIdFlags override_flags) {
2969 FILE* outfile = nullptr;
2970 PglErr reterr = kPglRetSuccess;
2971 {
2972 if (unlikely(fopen_checked(outname, FOPEN_WB, &outfile))) {
2973 goto WriteSampleIdsOverride_ret_OPEN_FAIL;
2974 }
2975 const char* sample_ids = siip->sample_ids;
2976 const char* sids = siip->sids;
2977 const uintptr_t max_sample_id_blen = siip->max_sample_id_blen;
2978 const uintptr_t max_sid_blen = siip->max_sid_blen;
2979 char* write_iter = g_textbuf;
2980 char* textbuf_flush = &(write_iter[kMaxMediumLine]);
2981 if (!(override_flags & kfSampleIdNoIdHeader)) {
2982 *write_iter++ = '#';
2983 if (override_flags & kfSampleIdFidPresent) {
2984 write_iter = strcpya_k(write_iter, "FID\t");
2985 } else {
2986 // every single row starts with "0\t", so this causes only IIDs to be
2987 // reported
2988 sample_ids = &(sample_ids[2]);
2989 }
2990 write_iter = strcpya_k(write_iter, "IID");
2991 if (sids) {
2992 write_iter = strcpya_k(write_iter, "\tSID");
2993 }
2994 AppendBinaryEoln(&write_iter);
2995 } else {
2996 if (override_flags & kfSampleIdNoIdHeaderIidOnly) {
2997 // We've previously verified that all FIDs are in fact '0'.
2998 sample_ids = &(sample_ids[2]);
2999 }
3000 sids = nullptr;
3001 }
3002 uintptr_t sample_uidx_base = 0;
3003 uintptr_t cur_bits = sample_include[0];
3004 for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
3005 const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &cur_bits);
3006 write_iter = strcpya(write_iter, &(sample_ids[sample_uidx * max_sample_id_blen]));
3007 if (sids) {
3008 *write_iter++ = '\t';
3009 write_iter = strcpya(write_iter, &(sids[sample_uidx * max_sid_blen]));
3010 }
3011 AppendBinaryEoln(&write_iter);
3012 if (unlikely(fwrite_ck(textbuf_flush, outfile, &write_iter))) {
3013 goto WriteSampleIdsOverride_ret_WRITE_FAIL;
3014 }
3015 }
3016 if (unlikely(fclose_flush_null(textbuf_flush, write_iter, &outfile))) {
3017 goto WriteSampleIdsOverride_ret_WRITE_FAIL;
3018 }
3019 }
3020 while (0) {
3021 WriteSampleIdsOverride_ret_OPEN_FAIL:
3022 reterr = kPglRetOpenFail;
3023 break;
3024 WriteSampleIdsOverride_ret_WRITE_FAIL:
3025 reterr = kPglRetWriteFail;
3026 break;
3027 }
3028 fclose_cond(outfile);
3029 return reterr;
3030 }
3031
RealpathIdentical(const char * outname,const char * read_realpath,char * write_realpath_buf)3032 uint32_t RealpathIdentical(const char* outname, const char* read_realpath, char* write_realpath_buf) {
3033 #ifdef _WIN32
3034 const uint32_t fname_slen = GetFullPathName(outname, kPglFnamesize, write_realpath_buf, nullptr);
3035 return (fname_slen && (fname_slen <= kPglFnamesize) && memequal(read_realpath, write_realpath_buf, fname_slen + 1));
3036 #else
3037 return (realpath(outname, write_realpath_buf) && strequal_overread(read_realpath, write_realpath_buf));
3038 #endif
3039 }
3040
3041 // assumes rawval is in [1, 32767]
3042 static_assert(kDosageMax == 32768, "PrintHaploidNonintDosage() needs to be updated.");
PrintHaploidNonintDosage(uint32_t rawval,char * start)3043 char* PrintHaploidNonintDosage(uint32_t rawval, char* start) {
3044 // Instead of constant 5-digit precision, we print fewer digits whenever that
3045 // doesn't interfere with proper round-tripping. I.e. we search for the
3046 // shortest string in
3047 // ((n - 0.5)/32768, (n + 0.5)/32768).
3048 assert(rawval - 1 < 32767);
3049 start = strcpya_k(start, "0.");
3050
3051 // (rawval * 2) is in 65536ths
3052 // 65536 * 625 = 40960k
3053 const uint32_t range_top_40960k = rawval * 1250 + 625;
3054 // ok to check half-open interval since we never hit boundary
3055 if ((range_top_40960k % 4096) < 1250) {
3056 // when this is true, the four-decimal-place approximation is in the range
3057 // which round-trips back to our original number.
3058 const uint32_t four_decimal_places = range_top_40960k / 4096;
3059 return u32toa_trunc4(four_decimal_places, start);
3060 }
3061
3062 // we wish to print (100000 * remainder + 16384) / 32768, left-0-padded. and
3063 // may as well banker's round too.
3064 //
3065 // banker's rounding yields a different result than regular rounding for n/64
3066 // when n is congruent to 1 mod 4. 32768/64 = 512.
3067 const uint32_t five_decimal_places = ((3125 * rawval + 512) / 1024) - ((rawval % 2048) == 512);
3068 const uint32_t first_decimal_place = five_decimal_places / 10000;
3069 *start++ = '0' + first_decimal_place;
3070 const uint32_t last_four_digits = five_decimal_places - first_decimal_place * 10000;
3071 if (last_four_digits) {
3072 return u32toa_trunc4(last_four_digits, start);
3073 }
3074 return start;
3075 }
3076
PrintMultiallelicHcAsDs(uint32_t hc1,uint32_t hc2,uint32_t allele_ct,char * start)3077 char* PrintMultiallelicHcAsDs(uint32_t hc1, uint32_t hc2, uint32_t allele_ct, char* start) {
3078 if (hc1 == kMissingAlleleCode) {
3079 *start++ = '.';
3080 return start;
3081 }
3082 for (uint32_t uii = 1; uii < hc1; ++uii) {
3083 start = strcpya_k(start, "0,");
3084 }
3085 if (hc1 == hc2) {
3086 if (hc1) {
3087 start = strcpya_k(start, "2,");
3088 }
3089 for (uint32_t uii = hc1 + 1; uii != allele_ct; ++uii) {
3090 start = strcpya_k(start, "0,");
3091 }
3092 return &(start[-1]);
3093 }
3094 if (hc1) {
3095 start = strcpya_k(start, "1,");
3096 }
3097 for (uint32_t uii = hc1 + 1; uii != hc2; ++uii) {
3098 start = strcpya_k(start, "0,");
3099 }
3100 start = strcpya_k(start, "1,");
3101 for (uint32_t uii = hc2 + 1; uii != allele_ct; ++uii) {
3102 start = strcpya_k(start, "0,");
3103 }
3104 return &(start[-1]);
3105 }
3106
PrintMultiallelicHcAsHaploidDs(uint32_t hc1,uint32_t hc2,uint32_t allele_ct,char * start)3107 char* PrintMultiallelicHcAsHaploidDs(uint32_t hc1, uint32_t hc2, uint32_t allele_ct, char* start) {
3108 if (hc1 == kMissingAlleleCode) {
3109 *start++ = '.';
3110 return start;
3111 }
3112 for (uint32_t uii = 1; uii < hc1; ++uii) {
3113 start = strcpya_k(start, "0,");
3114 }
3115 if (hc1 == hc2) {
3116 if (hc1) {
3117 start = strcpya_k(start, "1,");
3118 }
3119 for (uint32_t uii = hc1 + 1; uii != allele_ct; ++uii) {
3120 start = strcpya_k(start, "0,");
3121 }
3122 return &(start[-1]);
3123 }
3124 if (hc1) {
3125 start = strcpya_k(start, "0.5,");
3126 }
3127 for (uint32_t uii = hc1 + 1; uii != hc2; ++uii) {
3128 start = strcpya_k(start, "0,");
3129 }
3130 start = strcpya_k(start, "0.5,");
3131 for (uint32_t uii = hc2 + 1; uii != allele_ct; ++uii) {
3132 start = strcpya_k(start, "0,");
3133 }
3134 return &(start[-1]);
3135 }
3136
3137 const char g_vft_names[3][18] = {"extract", "extract-intersect", "exclude"};
3138
3139 #ifdef __cplusplus
3140 } // namespace plink2
3141 #endif
3142