1 #ifndef __PLINK_COMMON_H__
2 #define __PLINK_COMMON_H__
3 
4 // This file is part of PLINK 1.90, copyright (C) 2005-2020 Shaun Purcell,
5 // Christopher Chang.
6 //
7 // This program is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // This program is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 
20 
21 // Resources needed across all plink modules.
22 
23 #define _FILE_OFFSET_BITS 64
24 
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <math.h>
29 #include <stdint.h>
30 #ifndef __STDC_FORMAT_MACROS
31 #  define __STDC_FORMAT_MACROS 1
32 #endif
33 #include <inttypes.h>
34 
35 // avoid compiler warning
36 #ifndef NDEBUG
37   #define NDEBUG
38 #endif
39 #include <assert.h>
40 
41 // Uncomment this to build this without CBLAS/CLAPACK.
42 // #define NOLAPACK
43 
44 // Uncomment this to prevent all unstable features from being accessible from
45 // the command line.
46 // #define STABLE_BUILD
47 
48 #define SPECIES_HUMAN 0
49 #define SPECIES_COW 1
50 #define SPECIES_DOG 2
51 #define SPECIES_HORSE 3
52 #define SPECIES_MOUSE 4
53 #define SPECIES_RICE 5
54 #define SPECIES_SHEEP 6
55 #define SPECIES_UNKNOWN 7
56 #define SPECIES_DEFAULT SPECIES_HUMAN
57 
58 #define PROG_NAME_STR "plink"
59 #define PROG_NAME_CAPS "PLINK"
60 
61 #ifdef _WIN32
62   // needed for MEMORYSTATUSEX
63   #ifndef _WIN64
64     #define WINVER 0x0500
65   #else
66     #define __LP64__
67   #endif
68   #ifndef WIN32_LEAN_AND_MEAN
69     #define WIN32_LEAN_AND_MEAN
70   #endif
71   #include <windows.h>
72 #else // Unix
73   #include <sys/stat.h>
74 #endif
75 
76 #ifndef HAVE_NULLPTR
77   #ifndef __cplusplus
78     #define nullptr NULL
79   #else
80     #if __cplusplus <= 199711L
81       #ifndef nullptr
82         #define nullptr NULL
83       #endif
84     #endif
85   #endif
86 #endif
87 
88 #ifdef _WIN32
89   #define fseeko fseeko64
90   #define ftello ftello64
91   #include <process.h>
92 #  undef PRId64
93 #  undef PRIu64
94 #  define PRId64 "I64d"
95 #  define PRIu64 "I64u"
96   #define pthread_t HANDLE
97   #define THREAD_RET_TYPE unsigned __stdcall
98   #define THREAD_RETURN return 0
99   #define EOLN_STR "\r\n"
100   #define FOPEN_RB "rb"
101   #define FOPEN_WB "wb"
102   #ifdef _WIN64
103     #define getc_unlocked _fgetc_nolock
104     #define putc_unlocked _fputc_nolock
105   #else
106     #define getc_unlocked getc
107     #define putc_unlocked putc
108   #endif
109   #if __cplusplus < 201103L
110     #define uint64_t unsigned long long
111     #define int64_t long long
112   #endif
113 #else
114   #include <pthread.h>
115   #define THREAD_RET_TYPE void*
116   #define THREAD_RETURN return nullptr
117   #ifdef __cplusplus
118     #ifndef PRId64
119       #define PRId64 "lld"
120     #endif
121   #endif
122   #define EOLN_STR "\n"
123   #define FOPEN_RB "r"
124   #define FOPEN_WB "w"
125   #if !defined(__APPLE__) && !defined(__FreeBSD__)
126     // argh
127     // not sure what the right threshold actually is, but this works for now
128     // (may break on gcc <3.0?  but that shouldn't matter anymore)
129     // tried defining GCC_VERSION, but that didn't always work
130     #if (__GNUC__ <= 4) && (__GNUC_MINOR__ < 8)
131       #define uint64_t unsigned long long
132       #define int64_t long long
133     #endif
134   #endif
135 #endif
136 
137 #ifdef _WIN64
138   #define __LP64__
139   #define CTZLU __builtin_ctzll
140   #define CLZLU __builtin_clzll
141 #else
142   #define CTZLU __builtin_ctzl
143   #define CLZLU __builtin_clzl
144   #ifndef __LP64__
145     // attempt to patch GCC 6 build failure
146     #if !defined(__FreeBSD__) && (__GNUC__ <= 4) && (__GNUC_MINOR__ < 8)
147       #ifndef uintptr_t
148         #define uintptr_t unsigned long
149       #endif
150       #ifndef intptr_t
151         #define intptr_t long
152       #endif
153     #endif
154   #endif
155 #endif
156 
157 #ifdef __cplusplus
158   #include <algorithm>
159 #  ifdef _WIN32
160 // Windows C++11 <algorithm> resets these values :(
161 #    undef PRIu64
162 #    undef PRId64
163 #    define PRIu64 "I64u"
164 #    define PRId64 "I64d"
165 #    undef PRIuPTR
166 #    undef PRIdPTR
167 #    ifdef __LP64__
168 #      define PRIuPTR PRIu64
169 #      define PRIdPTR PRId64
170 #    else
171 #      if __cplusplus < 201103L
172 #        define PRIuPTR "lu"
173 #        define PRIdPTR "ld"
174 #      else
175 #        define PRIuPTR "u"
176 #        define PRIdPTR "d"
177 #      endif
178 #    endif
179 #  endif
180   #define HEADER_INLINE inline
181 #else
182   #define HEADER_INLINE static inline
183 #endif
184 
185 // It would be useful to disable compilation on big-endian platforms, but I
186 // don't see a decent portable way to do this (see e.g. discussion at
187 // http://esr.ibiblio.org/?p=5095 ).
188 
189 #ifdef __LP64__
190   #ifndef __SSE2__
191     // It's obviously possible to support this by writing 64-bit non-SSE2 code
192     // shadowing each SSE2 intrinsic, but this almost certainly isn't worth the
193     // development/testing effort until regular PLINK 2.0 development is
194     // complete.  No researcher has ever asked me for this feature.
195     #error "64-bit builds currently require SSE2.  Try producing a 32-bit build instead."
196   #endif
197   #include <emmintrin.h>
198 
199   #define VECFTYPE __m128
200   #define VECITYPE __m128i
201   #define VECDTYPE __m128d
202 
203   // useful because of its bitwise complement: ~ZEROLU is a word with all 1
204   // bits, while ~0 is always 32 1 bits.
205   #define ZEROLU 0LLU
206 
207   // mainly useful for bitshifts: (ONELU << 32) works in 64-bit builds, while
208   // (1 << 32) is undefined.  also used to cast some numbers/expressions to
209   // uintptr_t (e.g. multiplying an int constant by ONELU widens it to 64 bits
210   // only in 64-bit builds; note that 1LU fails on Win64 while 1LLU doesn't do
211   // the right thing for 32-bit builds).
212   #define ONELU 1LLU
213 
214   #ifdef _WIN32 // i.e. Win64
215 
216     #ifndef PRIuPTR
217       #define PRIuPTR PRIu64
218     #endif
219     #ifndef PRIdPTR
220       #define PRIdPTR PRId64
221     #endif
222     #define PRIxPTR2 "016I64x"
223 
224   #else // not _WIN32
225 
226     #ifndef PRIuPTR
227       #define PRIuPTR "lu"
228     #endif
229     #ifndef PRIdPTR
230       #define PRIdPTR "ld"
231     #endif
232     #define PRIxPTR2 "016lx"
233 
234   #endif // Win64
235 
236   #define VEC_BYTES 16
237 
238 #else // not __LP64__
239 
240   #define ZEROLU 0LU
241   #define ONELU 1LU
242 #  if (__GNUC__ <= 4) && (__GNUC_MINOR__ < 8) && (__cplusplus < 201103L)
243 #    undef PRIuPTR
244 #    undef PRIdPTR
245 #    define PRIuPTR "lu"
246 #    define PRIdPTR "ld"
247 #  endif
248   #define PRIxPTR2 "08lx"
249 
250   // todo: update code so this still works when reduced to 4
251   #define VEC_BYTES 8
252 
253 #endif // __LP64__
254 
255 // use constexpr for these as soon as compiler support is available on all
256 // platforms
257 #define FIVEMASK ((~ZEROLU) / 3)
258 #define AAAAMASK (FIVEMASK * 2)
259 
260 #define VEC_BYTES_M1 (VEC_BYTES - 1)
261 #define VEC_BITS (VEC_BYTES * 8)
262 #define VEC_BITS_M1 (VEC_BITS - 1)
263 
264 #ifdef DYNAMIC_ZLIB
265   #include <zlib.h>
266   #if !defined(ZLIB_VERNUM) || ZLIB_VERNUM < 0x1240
267     #error "zlib version 1.2.4 or later required."
268   #endif
269 #else
270   #include "../zlib-1.2.11/zlib.h"
271 #endif
272 #include "SFMT.h"
273 
274 // 64MB of non-workspace memory guaranteed for now.
275 // Currently also serves as the maximum allele length.
276 #define NON_BIGSTACK_MIN 67108864
277 
278 #define PI 3.1415926535897932
279 #define RECIP_2_32 0.00000000023283064365386962890625
280 #define RECIP_2_53 0.00000000000000011102230246251565404236316680908203125
281 // floating point comparison-to-nonzero tolerance, currently 2^{-30}
282 #define EPSILON 0.000000000931322574615478515625
283 // less tolerant versions (2^{-35}, 2^{-44}) for some exact calculations
284 #define SMALLISH_EPSILON 0.00000000002910383045673370361328125
285 #define SMALL_EPSILON 0.00000000000005684341886080801486968994140625
286 // at least sqrt(SMALL_EPSILON)
287 #define BIG_EPSILON 0.000000476837158203125
288 // 53-bit double precision limit
289 #define DOUBLE_PREC_LIMIT 0.00000000000000011102230246251565404236316680908203125
290 #define TWO_63 9223372036854775808.0
291 #define SQRT_HALF 0.70710678118654746
292 
293 // 2^{-83} bias to give exact tests maximum ability to determine tiny p-values.
294 // (~2^{-53} is necessary to take advantage of denormalized small numbers, then
295 // allow tail sum to be up to 2^30.)
296 #define EXACT_TEST_BIAS 0.00000000000000000000000010339757656912845935892608650874535669572651386260986328125
297 
298 // occasionally used as an infinity substitute that avoids the 32-bit Windows
299 // performance penalty
300 // can import from limits.h, we don't bother to include that for now
301 #ifndef DBL_MAX
302   #define DBL_MAX 1.7976931348623157e308
303 #endif
304 
305 // not quite the same as FLT_MAX since it's a double-precision constant
306 #define FLT_MAXD 3.4028234663852886e38
307 
308 #define RET_SUCCESS 0
309 #define RET_NOMEM 1
310 #define RET_OPEN_FAIL 2
311 #define RET_INVALID_FORMAT 3
312 #define RET_CALC_NOT_YET_SUPPORTED 4
313 #define RET_INVALID_CMDLINE 5
314 #define RET_WRITE_FAIL 6
315 #define RET_READ_FAIL 7
316 #define RET_THREAD_CREATE_FAIL 8
317 #define RET_ALLELE_MISMATCH 9
318 #define RET_NULL_CALC 10
319 #define RET_ALL_SAMPLES_EXCLUDED 11
320 #define RET_ALL_MARKERS_EXCLUDED 12
321 #define RET_NETWORK 13
322 #define RET_DEGENERATE_DATA 14
323 #define LOAD_PHENO_LAST_COL 127
324 
325 // for 2.0 -> 1.9 backports
326 #define RET_MALFORMED_INPUT RET_INVALID_FORMAT
327 
328 #define MISC_AFFECTION_01 1LLU
329 #define MISC_NONFOUNDERS 2LLU
330 #define MISC_MAF_SUCC 4LLU
331 #define MISC_FREQ_COUNTS 8LLU
332 #define MISC_FREQ_CC 0x10LLU
333 #define MISC_FREQX 0x20LLU
334 #define MISC_KEEP_ALLELE_ORDER 0x40LLU
335 #define MISC_SET_HH_MISSING 0x80LLU
336 #define MISC_SET_MIXED_MT_MISSING 0x100LLU
337 #define MISC_KEEP_AUTOCONV 0x200LLU
338 #define MISC_LOAD_CLUSTER_KEEP_NA 0x400LLU
339 #define MISC_WRITE_CLUSTER_OMIT_UNASSIGNED 0x800LLU
340 #define MISC_ALLOW_EXTRA_CHROMS 0x1000LLU
341 #define MISC_MAKE_FOUNDERS_REQUIRE_2_MISSING 0x2000LLU
342 #define MISC_MAKE_FOUNDERS_FIRST 0x4000LLU
343 #define MISC_LASSO_REPORT_ZEROES 0x8000LLU
344 #define MISC_LASSO_SELECT_COVARS 0x10000LLU
345 #define MISC_DOUBLE_ID 0x20000LLU
346 #define MISC_BIALLELIC_ONLY 0x40000LLU
347 #define MISC_BIALLELIC_ONLY_STRICT 0x80000LLU
348 #define MISC_BIALLELIC_ONLY_LIST 0x100000LLU
349 #define MISC_VCF_FILTER 0x200000LLU
350 #define MISC_GPLINK 0x400000LLU
351 #define MISC_SNPS_ONLY_JUST_ACGT 0x800000LLU
352 #define MISC_IMPUTE_SEX 0x1000000LLU
353 #define MISC_OXFORD_SNPID_CHR 0x2000000LLU
354 #define MISC_EXTRACT_RANGE 0x4000000LLU
355 #define MISC_EXCLUDE_RANGE 0x8000000LLU
356 #define MISC_MERGEX 0x10000000LLU
357 #define MISC_SET_ME_MISSING 0x20000000LLU
358 #define MISC_SEXCHECK_YCOUNT 0x40000000LLU
359 #define MISC_SEXCHECK_YONLY 0x80000000LLU
360 #define MISC_FAMILY_CLUSTERS 0x100000000LLU
361 #define MISC_FILL_MISSING_A2 0x200000000LLU
362 #define MISC_HET_SMALL_SAMPLE 0x400000000LLU
363 #define MISC_FST_CC 0x800000000LLU
364 #define MISC_SPLIT_MERGE_NOFAIL 0x1000000000LLU
365 #define MISC_REAL_REF_ALLELES 0x2000000000LLU
366 #define MISC_RPLUGIN_DEBUG 0x4000000000LLU
367 #define MISC_MISSING_GZ 0x8000000000LLU
368 #define MISC_FREQ_GZ 0x10000000000LLU
369 #define MISC_HET_GZ 0x20000000000LLU
370 #define MISC_ALLOW_NO_SAMPLES 0x40000000000LLU
371 #define MISC_ALLOW_NO_VARS 0x80000000000LLU
372 #define MISC_VCF_REQUIRE_GT 0x100000000000LLU
373 
374 // assume for now that .bed must always be accompanied by both .bim and .fam
375 #define FILTER_ALL_REQ 1LLU
376 #define FILTER_BIM_REQ 2LLU
377 #define FILTER_FAM_REQ 4LLU
378 
379 // ok with --dosage + --map, but not --dosage by itself
380 #define FILTER_DOSAGEMAP 8LLU
381 
382 #define FILTER_NODOSAGE 0x10LLU
383 #define FILTER_NOCNV 0x20LLU
384 #define FILTER_EXCLUDE_MARKERNAME_SNP 0x40LLU
385 #define FILTER_BINARY_CASES 0x80LLU
386 #define FILTER_BINARY_CONTROLS 0x100LLU
387 #define FILTER_BINARY_FEMALES 0x200LLU
388 #define FILTER_BINARY_MALES 0x400LLU
389 #define FILTER_BINARY_FOUNDERS 0x800LLU
390 #define FILTER_BINARY_NONFOUNDERS 0x1000LLU
391 #define FILTER_MAKE_FOUNDERS 0x2000LLU
392 #define FILTER_PRUNE 0x4000LLU
393 #define FILTER_SNPS_ONLY 0x8000LLU
394 #define FILTER_TAIL_PHENO 0x10000LLU
395 #define FILTER_ZERO_CMS 0x20000LLU
396 
397 #define CALC_RELATIONSHIP 1LLU
398 #define CALC_IBC 2LLU
399 #define CALC_DISTANCE 4LLU
400 #define CALC_PLINK1_DISTANCE_MATRIX 8LLU
401 #define CALC_PLINK1_IBS_MATRIX 0x10LLU
402 #define CALC_GDISTANCE_MASK 0x1cLLU
403 #define CALC_GROUPDIST 0x20LLU
404 #define CALC_REGRESS_DISTANCE 0x40LLU
405 #define CALC_UNRELATED_HERITABILITY 0x80LLU
406 #define CALC_FREQ 0x100LLU
407 #define CALC_REL_CUTOFF 0x200LLU
408 #define CALC_WRITE_SNPLIST 0x400LLU
409 #define CALC_LIST_23_INDELS 0x800LLU
410 #define CALC_GENOME 0x1000LLU
411 #define CALC_REGRESS_REL 0x2000LLU
412 #define CALC_LD_PRUNE 0x4000LLU
413 #define CALC_DFAM 0x8000LLU
414 #define CALC_TUCC 0x10000LLU
415 #define CALC_MAKE_BED 0x20000LLU
416 #define CALC_RECODE 0x40000LLU
417 #define CALC_MERGE 0x80000LLU
418 #define CALC_WRITE_COVAR 0x100000LLU
419 #define CALC_WRITE_CLUSTER 0x200000LLU
420 #define CALC_MODEL 0x400000LLU
421 #define CALC_HARDY 0x800000LLU
422 #define CALC_GXE 0x1000000LLU
423 #define CALC_IBS_TEST 0x2000000LLU
424 #define CALC_CLUSTER 0x4000000LLU
425 #define CALC_HOMOZYG 0x8000000LLU
426 #define CALC_NEIGHBOR 0x10000000LLU
427 #define CALC_GLM 0x20000000LLU
428 #define CALC_MISSING_REPORT 0x40000000LLU
429 #define CALC_CMH 0x80000000LLU
430 #define CALC_HOMOG 0x100000000LLU
431 #define CALC_LASSO 0x200000000LLU
432 #define CALC_LASSO_LAMBDA 0x400000000LLU
433 #define CALC_WRITE_SET 0x800000000LLU
434 #define CALC_LD 0x1000000000LLU
435 #define CALC_EPI 0x2000000000LLU
436 #define CALC_TESTMISS 0x4000000000LLU
437 #define CALC_TESTMISHAP 0x8000000000LLU
438 #define CALC_SEXCHECK 0x10000000000LLU
439 #define CALC_CLUMP 0x20000000000LLU
440 #define CALC_PCA 0x40000000000LLU
441 #define CALC_BLOCKS 0x80000000000LLU
442 #define CALC_SCORE 0x100000000000LLU
443 #define CALC_MENDEL 0x200000000000LLU
444 #define CALC_HET 0x400000000000LLU
445 #define CALC_FLIPSCAN 0x800000000000LLU
446 #define CALC_TDT 0x1000000000000LLU
447 #define CALC_MAKE_PERM_PHENO 0x2000000000000LLU
448 #define CALC_QFAM 0x4000000000000LLU
449 #define CALC_FST 0x8000000000000LLU
450 #define CALC_SHOW_TAGS 0x10000000000000LLU
451 #define CALC_MAKE_BIM 0x20000000000000LLU
452 #define CALC_MAKE_FAM 0x40000000000000LLU
453 #define CALC_WRITE_VAR_RANGES 0x80000000000000LLU
454 #define CALC_DUPVAR 0x100000000000000LLU
455 #define CALC_RPLUGIN 0x200000000000000LLU
456 #define CALC_ONLY_BIM (CALC_WRITE_SET | CALC_WRITE_SNPLIST | CALC_WRITE_VAR_RANGES | CALC_LIST_23_INDELS | CALC_MAKE_BIM | CALC_DUPVAR)
457 #define CALC_ONLY_FAM (CALC_MAKE_PERM_PHENO | CALC_WRITE_COVAR | CALC_MAKE_FAM)
458 // only room for 6 more basic commands before we need to switch from a single
459 // uint64_t to uintptr_t*/is_set()/etc.
460 
461 // necessary to patch heterozygous haploids/female Y chromosome genotypes
462 // during loading?
463 #define XMHH_EXISTS 1
464 #define Y_FIX_NEEDED 2
465 #define NXMHH_EXISTS 4
466 
467 #define ALLELE_RECODE 1
468 #define ALLELE_RECODE_MULTICHAR 2
469 #define ALLELE_RECODE_ACGT 4
470 
471 // 0 = non-explicit error
472 #define VCF_HALF_CALL_ERROR 1
473 #define VCF_HALF_CALL_MISSING 2
474 #define VCF_HALF_CALL_HAPLOID 3
475 #define VCF_HALF_CALL_REFERENCE 4
476 
477 #define M23_MALE 1
478 #define M23_FEMALE 2
479 #define M23_FORCE_MISSING_SEX 4
480 #define M23_SEX 7
481 
482 #define MARKER_CMS_OPTIONAL 1
483 #define MARKER_CMS_FORCED 2
484 
485 #define UNSORTED_CHROM 1
486 #define UNSORTED_BP 2
487 #define UNSORTED_CM 4
488 #define UNSORTED_SPLIT_CHROM 8
489 
490 #define ALLOW_NO_SEX 1
491 #define MUST_HAVE_SEX 2
492 
493 #define LGEN_REFERENCE 1
494 #define LGEN_ALLELE_COUNT 2
495 
496 #define PHENO_ALL 1
497 #define PHENO_MERGE 2
498 
499 #define FAM_COL_1 1
500 #define FAM_COL_34 2
501 #define FAM_COL_5 4
502 #define FAM_COL_6 8
503 #define FAM_COL_13456 15
504 
505 #define COVAR_KEEP_PHENO_ON_MISSING_COV 1
506 #define COVAR_NAME 2
507 #define COVAR_NUMBER 4
508 #define COVAR_NO_CONST 8
509 #define COVAR_ALLOW_NONE 0x10
510 
511 #define DISTANCE_SQ 1
512 #define DISTANCE_SQ0 2
513 #define DISTANCE_TRI 3
514 #define DISTANCE_SHAPEMASK 3
515 #define DISTANCE_GZ 4
516 #define DISTANCE_BIN 8
517 #define DISTANCE_BIN4 0x10
518 #define DISTANCE_IBS 0x20
519 #define DISTANCE_1_MINUS_IBS 0x40
520 #define DISTANCE_ALCT 0x80
521 #define DISTANCE_TYPEMASK 0xe0
522 #define DISTANCE_FLAT_MISSING 0x100
523 #define DISTANCE_CLUSTER 0x200
524 #define DISTANCE_WTS_NOHEADER 0x400
525 
526 #define RECODE_01 1
527 #define RECODE_12 2
528 #define RECODE_TAB 4
529 #define RECODE_DELIMX 8
530 #define RECODE_23 0x10
531 #define RECODE_A 0x20
532 #define RECODE_A_TRANSPOSE 0x40
533 #define RECODE_AD 0x80
534 #define RECODE_BEAGLE 0x100
535 #define RECODE_BEAGLE_NOMAP 0x200
536 #define RECODE_BIMBAM 0x400
537 #define RECODE_BIMBAM_1CHR 0x800
538 #define RECODE_COMPOUND 0x1000
539 #define RECODE_FASTPHASE 0x2000
540 #define RECODE_FASTPHASE_1CHR 0x4000
541 #define RECODE_HV 0x8000
542 #define RECODE_HV_1CHR 0x10000
543 #define RECODE_LGEN 0x20000
544 #define RECODE_LGEN_REF 0x40000
545 #define RECODE_LIST 0x80000
546 #define RECODE_OXFORD 0x100000
547 #define RECODE_RLIST 0x200000
548 #define RECODE_STRUCTURE 0x400000
549 #define RECODE_TRANSPOSE 0x800000
550 #define RECODE_PED 0x1000000
551 #define RECODE_VCF 0x2000000
552 #define RECODE_TYPEMASK 0x3fffff0
553 #define RECODE_FID 0x4000000
554 #define RECODE_IID 0x8000000
555 #define RECODE_INCLUDE_ALT 0x10000000
556 #define RECODE_BGZ 0x20000000
557 #define RECODE_GEN_GZ 0x40000000
558 #define RECODE_OMIT_NONMALE_Y 0x80000000U
559 
560 #define GENOME_OUTPUT_GZ 1
561 #define GENOME_REL_CHECK 2
562 #define GENOME_OUTPUT_FULL 4
563 #define GENOME_IBD_UNBOUNDED 8
564 #define GENOME_NUDGE 0x10
565 // separate flag to ensure behavior is unchanged under --unbounded
566 #define GENOME_FILTER_PI_HAT 0x20
567 
568 #define WRITE_COVAR_PHENO 1
569 #define WRITE_COVAR_NO_PARENTS 2
570 #define WRITE_COVAR_NO_SEX 4
571 #define WRITE_COVAR_FEMALE_2 8
572 #define WRITE_COVAR_DUMMY 0x10
573 #define WRITE_COVAR_DUMMY_NO_ROUND 0x20
574 
575 #define MERGE_MODE_MASK 7
576 #define MERGE_EQUAL_POS 8
577 #define MERGE_BINARY 16
578 #define MERGE_LIST 32
579 
580 #define SAMPLE_SORT_NONE 1
581 #define SAMPLE_SORT_NATURAL 2
582 #define SAMPLE_SORT_ASCII 4
583 #define SAMPLE_SORT_FILE 8
584 
585 #define HWE_MIDP 1
586 #define HWE_THRESH_MIDP 2
587 #define HWE_THRESH_ALL 4
588 #define HWE_GZ 8
589 
590 #define MENDEL_FILTER 1
591 #define MENDEL_FILTER_VAR_FIRST 2
592 #define MENDEL_DUOS 4
593 #define MENDEL_MULTIGEN 8
594 #define MENDEL_SUMMARIES_ONLY 0x10
595 
596 #define DUMMY_MISSING_GENO 1
597 #define DUMMY_MISSING_PHENO 2
598 #define DUMMY_SCALAR_PHENO 4
599 #define DUMMY_ACGT 8
600 #define DUMMY_1234 0x10
601 #define DUMMY_12 0x20
602 
603 #define SIMULATE_QT 1
604 #define SIMULATE_TAGS 2
605 #define SIMULATE_HAPS 4
606 #define SIMULATE_ACGT 8
607 #define SIMULATE_1234 0x10
608 #define SIMULATE_12 0x20
609 
610 #define MODEL_ASSOC 1
611 #define MODEL_FISHER 2
612 #define MODEL_FISHER_MIDP 4
613 #define MODEL_PERM 8
614 #define MODEL_MPERM 0x10
615 #define MODEL_GENEDROP 0x20
616 #define MODEL_PERM_COUNT 0x40
617 #define MODEL_ASSOC_COUNTS 0x80
618 #define MODEL_ASSOC_FDEPR 0x100
619 #define MODEL_DMASK 0x1a6
620 #define MODEL_QT_MEANS 0x200
621 #define MODEL_PDOM 0x400
622 #define MODEL_PREC 0x800
623 #define MODEL_PGEN 0x1000
624 #define MODEL_PTREND 0x2000
625 #define MODEL_TRENDONLY 0x4000
626 #define MODEL_PMASK (MODEL_PDOM | MODEL_PREC | MODEL_PGEN | MODEL_PTREND | MODEL_TRENDONLY)
627 #define MODEL_LIN 0x8000
628 #define MODEL_QMASK (MODEL_QT_MEANS | MODEL_LIN)
629 #define MODEL_SET_TEST 0x10000
630 
631 #define GLM_LOGISTIC 1
632 #define GLM_PERM 2
633 #define GLM_MPERM 4
634 #define GLM_GENEDROP 8
635 #define GLM_PERM_COUNT 0x10
636 #define GLM_GENOTYPIC 0x20
637 #define GLM_HETHOM 0x40
638 #define GLM_DOMINANT 0x80
639 #define GLM_RECESSIVE 0x100
640 #define GLM_NO_SNP 0x200
641 #define GLM_HIDE_COVAR 0x400
642 #define GLM_SEX 0x800
643 #define GLM_NO_X_SEX 0x1000
644 #define GLM_INTERACTION 0x2000
645 #define GLM_STANDARD_BETA 0x4000
646 #define GLM_BETA 0x8000
647 #define GLM_TEST_ALL 0x10000
648 #define GLM_CONDITION_DOMINANT 0x20000
649 #define GLM_CONDITION_RECESSIVE 0x40000
650 #define GLM_SET_TEST 0x80000
651 #define GLM_NO_SNP_EXCL 0x831ea
652 #define GLM_INTERCEPT 0x100000
653 
654 #define MPERM_DUMP_BEST 1
655 #define MPERM_DUMP_ALL 2
656 
657 // (2^31 - 1000001) / 2
658 #define APERM_MAX 1073241823
659 
660 #define ADJUST_GC 2
661 #define ADJUST_LOG10 4
662 #define ADJUST_QQ 8
663 #define ADJUST_LAMBDA 16
664 
665 #define DUPVAR_REF 1
666 #define DUPVAR_IDS_ONLY 2
667 #define DUPVAR_SUPPRESS_FIRST 4
668 
669 #define CNV_MAKE_MAP 1
670 #define CNV_MAKE_MAP_LONG 2
671 #define CNV_CHECK_NO_OVERLAP 4
672 #define CNV_DEL 8
673 #define CNV_DUP 0x10
674 #define CNV_WRITE_FREQ 0x20
675 #define CNV_UNIQUE 0x40
676 #define CNV_DROP_NO_SEGMENT 0x80
677 #define CNV_SAMPLE_PERM 0x100
678 #define CNV_ENRICHMENT_TEST 0x200
679 #define CNV_TEST 0x400
680 #define CNV_TEST_FORCE_1SIDED 0x800
681 #define CNV_TEST_FORCE_2SIDED 0x1000
682 #define CNV_TEST_REGION 0x2000
683 #define CNV_TRACK 0x4000
684 #define CNV_SEGLIST 0x8000
685 #define CNV_REPORT_REGIONS 0x10000
686 #define CNV_VERBOSE_REPORT_REGIONS 0x20000
687 #define CNV_WRITE 0x40000
688 #define CNV_EXCLUDE_OFF_BY_1 0x80000
689 
690 #define CNV_INTERSECT 1
691 #define CNV_EXCLUDE 2
692 #define CNV_COUNT 4
693 
694 #define CNV_OVERLAP 1
695 #define CNV_OVERLAP_REGION 2
696 #define CNV_OVERLAP_UNION 3
697 #define CNV_DISRUPT 4
698 
699 #define CNV_FREQ_EXCLUDE_ABOVE 1
700 #define CNV_FREQ_EXCLUDE_BELOW 2
701 #define CNV_FREQ_EXCLUDE_EXACT 4
702 #define CNV_FREQ_INCLUDE_EXACT 8
703 #define CNV_FREQ_FILTER 15
704 #define CNV_FREQ_OVERLAP 16
705 #define CNV_FREQ_METHOD2 32
706 
707 #define SEGMENT_GROUP 1
708 
709 // default jackknife iterations
710 #define ITERS_DEFAULT 100000
711 #define MAX_PCS_DEFAULT 20
712 
713 #define BIGSTACK_MIN_MB 64
714 #define BIGSTACK_DEFAULT_MB 2048
715 
716 #ifdef __LP64__
717   #define BITCT 64
718 
719   // unions generally shouldn't be used for reinterpret_cast's job (memcpy is
720   // the right C-compatible way), but vectors are an exception to this rule.
721   typedef union {
722     VECFTYPE vf;
723     VECITYPE vi;
724     VECDTYPE vd;
725     uintptr_t u8[VEC_BITS / BITCT];
726     double d8[VEC_BYTES / sizeof(double)];
727     float f4[VEC_BYTES / sizeof(float)];
728     uint32_t u4[VEC_BYTES / sizeof(int32_t)];
729   } __univec;
730 #else
731   #define BITCT 32
732 #endif
733 
734 #define BITCT2 (BITCT / 2)
735 #define BYTECT (BITCT / 8)
736 #define BYTECT4 (BITCT / 32)
737 #define VEC_WORDS (VEC_BITS / BITCT)
738 #define VEC_INT32 (VEC_BYTES / 4)
739 
740 // assumed number of bytes per cache line, for alignment
741 #define CACHELINE 64
742 
743 #define CACHELINE_BIT (CACHELINE * 8)
744 #define CACHELINE_INT32 (CACHELINE / 4)
745 #define CACHELINE_INT64 (CACHELINE / 8)
746 #define CACHELINE_WORD (CACHELINE / BYTECT)
747 #define CACHELINE_DBL (CACHELINE / 8)
748 
749 // alignment must be a power of 2
round_up_pow2(uintptr_t val,uintptr_t alignment)750 HEADER_INLINE uintptr_t round_up_pow2(uintptr_t val, uintptr_t alignment) {
751   uintptr_t alignment_m1 = alignment - 1;
752   assert(!(alignment & alignment_m1));
753   return (val + alignment_m1) & (~alignment_m1);
754 }
755 
756 #define BITCT_TO_VECCT(val) (((val) + (VEC_BITS - 1)) / VEC_BITS)
757 #define BITCT_TO_WORDCT(val) (((val) + (BITCT - 1)) / BITCT)
758 #define BITCT_TO_ALIGNED_WORDCT(val) (VEC_WORDS * BITCT_TO_VECCT(val))
759 
760 #define QUATERCT_TO_VECCT(val) (((val) + ((VEC_BITS / 2) - 1)) / (VEC_BITS / 2))
761 #define QUATERCT_TO_WORDCT(val) (((val) + (BITCT2 - 1)) / BITCT2)
762 #define QUATERCT_TO_ALIGNED_WORDCT(val) (VEC_WORDS * QUATERCT_TO_VECCT(val))
763 
764 // todo: get rid of (BITCT_TO_WORDCT(x) == QUATERCT_TO_VECCT(x)) and similar
765 // assumptions, in preparation for AVX2
766 
767 #ifdef __LP64__
768 #define round_up_pow2_ull round_up_pow2
769 #else
round_up_pow2_ull(uint64_t val,uint64_t alignment)770 HEADER_INLINE uint64_t round_up_pow2_ull(uint64_t val, uint64_t alignment) {
771   uint64_t alignment_m1 = alignment - 1;
772   assert(!(alignment & alignment_m1));
773   return (val + alignment_m1) & (~alignment_m1);
774 }
775 #endif
776 
777 // 32-bit instead of word-length bitwise not here, when val can be assumed to
778 // be 32-bit.
779 // (note that the sizeof operator "returns" an uintptr_t, not a uint32_t; hence
780 // the lack of sizeof in the CACHELINE_INT32, etc. definitions.)
round_up_pow2_ui(uint32_t val,uint32_t alignment)781 HEADER_INLINE uint32_t round_up_pow2_ui(uint32_t val, uint32_t alignment) {
782   uint32_t alignment_m1 = alignment - 1;
783   assert(!(alignment & alignment_m1));
784   return (val + alignment_m1) & (~alignment_m1);
785 }
786 
787 #define MAXV(aa, bb) (((bb) > (aa))? (bb) : (aa))
788 #define MINV(aa, bb) (((aa) > (bb))? (bb) : (aa))
789 
790 #ifdef _WIN32
791 // if MAX_THREADS > 65, single WaitForMultipleObjects calls must be converted
792 // into loops
793   #define MAX_THREADS 64
794   #define MAX_THREADS_P1 65
795 #else
796 // shouldn't be larger than MODEL_BLOCKSIZE for now
797   #define MAX_THREADS 512
798   #define MAX_THREADS_P1 513
799 #endif
800 
801 // defined as a macro since type of idx can vary; might want a debug
802 // compilation mode which performs type-checking, though
803 #define EXTRACT_2BIT_GENO(ulptr, idx) (((ulptr)[(idx) / BITCT2] >> (2 * ((idx) % BITCT2))) & 3)
804 
805 // generic maximum line length.  .ped/.vcf/etc. lines can of course be longer
806 #define MAXLINELEN 131072
807 
808 // must be at least 2 * MAXLINELEN + 2 to support generic token loader.
809 #define TEXTBUF_SIZE (2 * MAXLINELEN + 256)
810 
811 // Maximum length of chromosome, variant, FID, IID, cluster, and set IDs (not
812 // including terminating null, that's what _P1 is for).  This value supports up
813 // to 8 IDs per line (maximum so far is 5, for e.g. --hom).
814 #define MAX_ID_SLEN 16000
815 
816 #define MAX_ID_BLEN (MAX_ID_SLEN + 1)
817 #define MAX_ID_SLEN_STR "16000"
818 
819 // Maximum size of "dynamically" allocated line load buffer.  (This is the
820 // limit that applies to .vcf and similar files.)  Inconvenient to go higher
821 // since fgets() takes a int32_t size argument.
822 #define MAXLINEBUFLEN 0x7fffffc0
823 
824 // Default --perm-batch-size value in most contexts.  It may actually be better
825 // to *avoid* a power of two due to the need for transpositions involving this
826 // stride; see e.g. http://danluu.com/3c-conflict/ ; try 448 instead?  This
827 // should be tested during PLINK 2.0 development.
828 #define DEFAULT_PERM_BATCH_SIZE 512
829 
830 // note that this is NOT foolproof: see e.g.
831 // http://insanecoding.blogspot.com/2007/11/pathmax-simply-isnt.html .  (This
832 // is why I haven't bothered with OS-based #ifdefs here.)  But it should be
833 // good enough in practice.
834 #define FNAMESIZE 4096
835 
836 // allow extensions like .model.trend.fisher.set.score.adjusted
837 #define MAX_POST_EXT 39
838 
839 // number of types of jackknife values to precompute (x^2, y^2, x, y, xy)
840 #define JACKKNIFE_VALS_DIST 5
841 #define JACKKNIFE_VALS_GROUPDIST 3
842 
843 #ifdef __LP64__
844   // number of snp-major .bed lines to read at once for distance calc if
845   // exponent is nonzero.
846   #define MULTIPLEX_DIST_EXP 64
847   // number of snp-major .bed lines to read at once for relationship calc
848   #define MULTIPLEX_REL 60
849 #else
850   // N.B. 32-bit version not as carefully tested or optimized, but I'll try to
851   // make sure it works properly
852   #define MULTIPLEX_DIST_EXP 28
853   #define MULTIPLEX_REL 30
854 #endif
855 
856 // used to size a few tables
857 #define EXPECTED_MISSING_FREQ 0.05
858 
859 // load markers in blocks to enable multithreading and, for quantitative
860 // phenotypes, PERMORY-style LD exploitation
861 #define MODEL_BLOCKSIZE 1024
862 #define MODEL_BLOCKKEEP 64
863 
864 // string hash table constants, currently only relevant for merge operations
865 // and annotate()
866 // (dynamic sizing used for main marker name lookup)
867 
868 // last prime before 2^19
869 // size chosen to be likely to fit in L3 cache
870 #define HASHSIZE 524287
871 #define HASHSIZE_S 524287
872 
873 #ifdef __LP64__
874 #define HASHMEM 4194304
875 #else
876 #define HASHMEM 2097152
877 #endif
878 
879 typedef struct {
880   uint32_t min;
881   uint32_t max;
882   double alpha;
883   double beta;
884   double init_interval;
885   double interval_slope;
886 } Aperm_info;
887 
888 // Generic text I/O buffer: any function which reads from/writes to a text file
889 // or the console may clobber it.  Sized to fit two MAXLINELEN-length lines
890 // plus a bit extra.
891 extern char g_textbuf[];
892 
893 extern const char g_one_char_strs[];
894 extern const char* g_missing_geno_ptr;
895 extern const char* g_output_missing_geno_ptr;
896 
cond_replace(const char * ss,const char * match_str,const char * replace_str)897 HEADER_INLINE const char* cond_replace(const char* ss, const char* match_str, const char* replace_str) {
898   return (ss != match_str)? ss : replace_str;
899 }
900 
901 uint32_t aligned_malloc(uintptr_t size, uintptr_t** aligned_pp);
902 
903 void aligned_free(uintptr_t* aligned_pp);
904 
aligned_free_cond(uintptr_t * aligned_ptr)905 HEADER_INLINE void aligned_free_cond(uintptr_t* aligned_ptr) {
906   if (aligned_ptr) {
907     aligned_free(aligned_ptr);
908   }
909 }
910 
aligned_free_null(uintptr_t ** aligned_pp)911 HEADER_INLINE void aligned_free_null(uintptr_t** aligned_pp) {
912   aligned_free(*aligned_pp);
913   *aligned_pp = nullptr;
914 }
915 
aligned_free_cond_null(uintptr_t ** aligned_pp)916 HEADER_INLINE void aligned_free_cond_null(uintptr_t** aligned_pp) {
917   if (*aligned_pp) {
918     aligned_free(*aligned_pp);
919     *aligned_pp = nullptr;
920   }
921 }
922 
923 extern uintptr_t g_failed_alloc_attempt_size;
924 
925 extern sfmt_t g_sfmt;
926 
927 // file-scope string constants don't always have the g_ prefix, but multi-file
928 // constants are always tagged.
929 extern const char g_errstr_fopen[];
930 extern const char g_cmdline_format_str[];
931 
932 extern FILE* g_logfile;
933 
934 // mostly-safe sprintf buffer.  warning: do NOT put allele codes or
935 // arbitrary-length lists in here.
936 extern char g_logbuf[];
937 
938 extern uint32_t g_debug_on;
939 extern uint32_t g_log_failed;
940 
941 // should remove this global: multithreaded functions should use a file-local
942 // thread_ct which will occasionally be smaller due to job size.
943 extern uint32_t g_thread_ct;
944 
945 typedef struct ll_str_struct {
946   struct ll_str_struct* next;
947   char ss[];
948 } Ll_str;
949 
950 typedef struct ll_ctstr_entry_struct {
951   struct ll_ctstr_entry_struct* next;
952   uint32_t ct;
953   char ss[];
954 } Ll_ctstr_entry;
955 
956 typedef struct two_col_params_struct {
957   uint32_t colx;
958   uint32_t colid;
959   uint32_t skip;
960   char skipchar;
961   char fname[];
962 } Two_col_params;
963 
964 typedef struct range_list_struct {
965   char* names;
966   unsigned char* starts_range;
967   uint32_t name_ct;
968   uint32_t name_max_len;
969 } Range_list;
970 
971 // Pushes a copy of ss (allocated via malloc) onto ll_stack.
972 uint32_t push_ll_str(const char* ss, Ll_str** ll_stack_ptr);
973 
974 // warning: do NOT include allele codes (unless they're guaranteed to be SNPs)
975 // in log strings; they can overflow the buffer.
976 void logstr(const char* ss);
977 
978 void logprint(const char* ss);
979 
980 void logerrprint(const char* ss);
981 
982 void logprintb();
983 
984 void logerrprintb();
985 
986 #define LOGPRINTF(...) sprintf(g_logbuf, __VA_ARGS__); logprintb();
987 
988 #define LOGERRPRINTF(...) sprintf(g_logbuf, __VA_ARGS__); logerrprintb();
989 
990 // input for wordwrap/LOGPRINTFWW should have no intermediate '\n's.  If
991 // suffix_len is 0, there should be a terminating \n.
992 // void wordwrap(uint32_t suffix_len, char* ss);
993 
994 void wordwrapb(uint32_t suffix_len);
995 
996 #define LOGPREPRINTFWW(...) sprintf(g_logbuf, __VA_ARGS__); wordwrapb(0);
997 
998 #define LOGPRINTFWW(...) sprintf(g_logbuf, __VA_ARGS__); wordwrapb(0); logprintb();
999 
1000 #define LOGERRPRINTFWW(...) sprintf(g_logbuf, __VA_ARGS__); wordwrapb(0); logerrprintb();
1001 
1002 // 5 = length of "done." suffix, which is commonly used
1003 #define LOGPRINTFWW5(...) sprintf(g_logbuf, __VA_ARGS__); wordwrapb(5); logprintb();
1004 
1005 #ifdef STABLE_BUILD
1006   #define UNSTABLE(val) sptr = strcpya(&(g_logbuf[9]), val); goto main_unstable_disabled
1007 #else
1008   #define UNSTABLE(val)
1009 #endif
1010 
1011 int32_t fopen_checked(const char* fname, const char* mode, FILE** target_ptr);
1012 
putc_checked(int32_t ii,FILE * outfile)1013 HEADER_INLINE int32_t putc_checked(int32_t ii, FILE* outfile) {
1014   putc_unlocked(ii, outfile);
1015   return ferror(outfile);
1016 }
1017 
fputs_checked(const char * ss,FILE * outfile)1018 HEADER_INLINE int32_t fputs_checked(const char* ss, FILE* outfile) {
1019   fputs(ss, outfile);
1020   return ferror(outfile);
1021 }
1022 
1023 // This must be used for all fwrite() calls where len could be >= 2^31, since
1024 // OS X raw fwrite() doesn't work in that case.
1025 int32_t fwrite_checked(const void* buf, size_t len, FILE* outfile);
1026 
fread_checked(char * buf,uintptr_t len,FILE * infile,uintptr_t * bytes_read_ptr)1027 HEADER_INLINE int32_t fread_checked(char* buf, uintptr_t len, FILE* infile, uintptr_t* bytes_read_ptr) {
1028   *bytes_read_ptr = fread(buf, 1, len, infile);
1029   return ferror(infile);
1030 }
1031 
fclose_cond(FILE * fptr)1032 HEADER_INLINE void fclose_cond(FILE* fptr) {
1033   if (fptr) {
1034     fclose(fptr);
1035   }
1036 }
1037 
fclose_null(FILE ** fptr_ptr)1038 HEADER_INLINE int32_t fclose_null(FILE** fptr_ptr) {
1039   int32_t ii = ferror(*fptr_ptr);
1040   int32_t jj = fclose(*fptr_ptr);
1041   *fptr_ptr = nullptr;
1042   return ii || jj;
1043 }
1044 
1045 // Also sets 128k read buffer.  Can return RET_OPEN_FAIL or RET_NOMEM.
1046 int32_t gzopen_read_checked(const char* fname, gzFile* gzf_ptr);
1047 
1048 // pigz interface should be used for writing .gz files.
1049 
gzclose_null(gzFile * gzf_ptr)1050 HEADER_INLINE int32_t gzclose_null(gzFile* gzf_ptr) {
1051   int32_t ii = gzclose(*gzf_ptr);
1052   *gzf_ptr = nullptr;
1053   return (ii != Z_OK);
1054 }
1055 
gzclose_cond(gzFile gz_infile)1056 HEADER_INLINE void gzclose_cond(gzFile gz_infile) {
1057   if (gz_infile) {
1058     gzclose(gz_infile);
1059   }
1060 }
1061 
flexwrite_checked(const void * buf,size_t len,uint32_t output_gz,FILE * outfile,gzFile gz_outfile)1062 HEADER_INLINE int32_t flexwrite_checked(const void* buf, size_t len, uint32_t output_gz, FILE* outfile, gzFile gz_outfile) {
1063   if (!output_gz) {
1064     return fwrite_checked(buf, len, outfile);
1065   } else {
1066     return (!gzwrite(gz_outfile, buf, len));
1067   }
1068 }
1069 
flexputc_checked(int32_t ii,uint32_t output_gz,FILE * outfile,gzFile gz_outfile)1070 HEADER_INLINE int32_t flexputc_checked(int32_t ii, uint32_t output_gz, FILE* outfile, gzFile gz_outfile) {
1071   if (!output_gz) {
1072     putc(ii, outfile);
1073     return ferror(outfile);
1074   } else {
1075     return (gzputc(gz_outfile, ii) == -1);
1076   }
1077 }
1078 
flexputs_checked(const char * ss,uint32_t output_gz,FILE * outfile,gzFile gz_outfile)1079 HEADER_INLINE int32_t flexputs_checked(const char* ss, uint32_t output_gz, FILE* outfile, gzFile gz_outfile) {
1080   if (!output_gz) {
1081     return fputs_checked(ss, outfile);
1082   } else {
1083     return (gzputs(gz_outfile, ss) == -1);
1084   }
1085 }
1086 
flexclose_null(uint32_t output_gz,FILE ** fptr_ptr,gzFile * gzf_ptr)1087 HEADER_INLINE int32_t flexclose_null(uint32_t output_gz, FILE** fptr_ptr, gzFile* gzf_ptr) {
1088   if (!output_gz) {
1089     return fclose_null(fptr_ptr);
1090   } else {
1091     return gzclose_null(gzf_ptr);
1092   }
1093 }
1094 
1095 // manually managed, very large double-ended stack
1096 extern unsigned char* g_bigstack_base;
1097 extern unsigned char* g_bigstack_end;
1098 
bigstack_left()1099 HEADER_INLINE uintptr_t bigstack_left() {
1100   return (((uintptr_t)g_bigstack_end) - ((uintptr_t)g_bigstack_base));
1101 }
1102 
1103 // Basic 64-byte-aligned allocation at bottom of stack.
1104 unsigned char* bigstack_alloc(uintptr_t size);
1105 
1106 
1107 // Typesafe, return-0-iff-success interfaces.  (See also bigstack_calloc_...
1108 // further below.)
bigstack_alloc_c(uintptr_t ct,char ** cp_ptr)1109 HEADER_INLINE int32_t bigstack_alloc_c(uintptr_t ct, char** cp_ptr) {
1110   *cp_ptr = (char*)bigstack_alloc(ct);
1111   return !(*cp_ptr);
1112 }
1113 
bigstack_alloc_d(uintptr_t ct,double ** dp_ptr)1114 HEADER_INLINE int32_t bigstack_alloc_d(uintptr_t ct, double** dp_ptr) {
1115   *dp_ptr = (double*)bigstack_alloc(ct * sizeof(double));
1116   return !(*dp_ptr);
1117 }
1118 
bigstack_alloc_f(uintptr_t ct,float ** fp_ptr)1119 HEADER_INLINE int32_t bigstack_alloc_f(uintptr_t ct, float** fp_ptr) {
1120   *fp_ptr = (float*)bigstack_alloc(ct * sizeof(float));
1121   return !(*fp_ptr);
1122 }
1123 
bigstack_alloc_i(uintptr_t ct,int32_t ** ip_ptr)1124 HEADER_INLINE int32_t bigstack_alloc_i(uintptr_t ct, int32_t** ip_ptr) {
1125   *ip_ptr = (int32_t*)bigstack_alloc(ct * sizeof(int32_t));
1126   return !(*ip_ptr);
1127 }
1128 
bigstack_alloc_uc(uintptr_t ct,unsigned char ** ucp_ptr)1129 HEADER_INLINE int32_t bigstack_alloc_uc(uintptr_t ct, unsigned char** ucp_ptr) {
1130   *ucp_ptr = bigstack_alloc(ct);
1131   return !(*ucp_ptr);
1132 }
1133 
bigstack_alloc_ui(uintptr_t ct,uint32_t ** uip_ptr)1134 HEADER_INLINE int32_t bigstack_alloc_ui(uintptr_t ct, uint32_t** uip_ptr) {
1135   *uip_ptr = (uint32_t*)bigstack_alloc(ct * sizeof(int32_t));
1136   return !(*uip_ptr);
1137 }
1138 
bigstack_alloc_ul(uintptr_t ct,uintptr_t ** ulp_ptr)1139 HEADER_INLINE int32_t bigstack_alloc_ul(uintptr_t ct, uintptr_t** ulp_ptr) {
1140   *ulp_ptr = (uintptr_t*)bigstack_alloc(ct * sizeof(intptr_t));
1141   return !(*ulp_ptr);
1142 }
1143 
bigstack_alloc_ll(uintptr_t ct,int64_t ** llp_ptr)1144 HEADER_INLINE int32_t bigstack_alloc_ll(uintptr_t ct, int64_t** llp_ptr) {
1145   *llp_ptr = (int64_t*)bigstack_alloc(ct * sizeof(int64_t));
1146   return !(*llp_ptr);
1147 }
1148 
bigstack_alloc_ull(uintptr_t ct,uint64_t ** ullp_ptr)1149 HEADER_INLINE int32_t bigstack_alloc_ull(uintptr_t ct, uint64_t** ullp_ptr) {
1150   *ullp_ptr = (uint64_t*)bigstack_alloc(ct * sizeof(int64_t));
1151   return !(*ullp_ptr);
1152 }
1153 
bigstack_reset(const void * new_base)1154 HEADER_INLINE void bigstack_reset(const void* new_base) {
1155   g_bigstack_base = (unsigned char*)new_base;
1156 }
1157 
bigstack_end_reset(const void * new_end)1158 HEADER_INLINE void bigstack_end_reset(const void* new_end) {
1159   g_bigstack_end = (unsigned char*)new_end;
1160 }
1161 
bigstack_double_reset(const void * new_base,const void * new_end)1162 HEADER_INLINE void bigstack_double_reset(const void* new_base, const void* new_end) {
1163   bigstack_reset(new_base);
1164   bigstack_end_reset(new_end);
1165 }
1166 
1167 void bigstack_shrink_top(const void* rebase, uintptr_t new_size);
1168 
1169 #define END_ALLOC_CHUNK 16
1170 #define END_ALLOC_CHUNK_M1 (END_ALLOC_CHUNK - 1)
1171 
bigstack_end_set(const void * unaligned_end)1172 HEADER_INLINE void bigstack_end_set(const void* unaligned_end) {
1173   g_bigstack_end = (unsigned char*)(((uintptr_t)unaligned_end) & (~(END_ALLOC_CHUNK_M1 * ONELU)));
1174 }
1175 
1176 // assumes size is divisible by END_ALLOC_CHUNK
1177 // (no value in directly calling this with a constant size parameter: the
1178 // compiler will properly optimize a bigstack_end_alloc() call)
1179 unsigned char* bigstack_end_alloc_presized(uintptr_t size);
1180 
bigstack_end_alloc(uintptr_t size)1181 HEADER_INLINE unsigned char* bigstack_end_alloc(uintptr_t size) {
1182   // multiplication by ONELU is one way to widen an int to word-size.
1183   size = round_up_pow2(size, END_ALLOC_CHUNK);
1184   return bigstack_end_alloc_presized(size);
1185 }
1186 
1187 #define bigstack_end_aligned_alloc bigstack_end_alloc
1188 
bigstack_end_alloc_c(uintptr_t ct,char ** cp_ptr)1189 HEADER_INLINE int32_t bigstack_end_alloc_c(uintptr_t ct, char** cp_ptr) {
1190   *cp_ptr = (char*)bigstack_end_alloc(ct);
1191   return !(*cp_ptr);
1192 }
1193 
bigstack_end_alloc_d(uintptr_t ct,double ** dp_ptr)1194 HEADER_INLINE int32_t bigstack_end_alloc_d(uintptr_t ct, double** dp_ptr) {
1195   *dp_ptr = (double*)bigstack_end_alloc(ct * sizeof(double));
1196   return !(*dp_ptr);
1197 }
1198 
bigstack_end_alloc_f(uintptr_t ct,float ** fp_ptr)1199 HEADER_INLINE int32_t bigstack_end_alloc_f(uintptr_t ct, float** fp_ptr) {
1200   *fp_ptr = (float*)bigstack_end_alloc(ct * sizeof(float));
1201   return !(*fp_ptr);
1202 }
1203 
bigstack_end_alloc_i(uintptr_t ct,int32_t ** ip_ptr)1204 HEADER_INLINE int32_t bigstack_end_alloc_i(uintptr_t ct, int32_t** ip_ptr) {
1205   *ip_ptr = (int32_t*)bigstack_end_alloc(ct * sizeof(int32_t));
1206   return !(*ip_ptr);
1207 }
1208 
bigstack_end_alloc_uc(uintptr_t ct,unsigned char ** ucp_ptr)1209 HEADER_INLINE int32_t bigstack_end_alloc_uc(uintptr_t ct, unsigned char** ucp_ptr) {
1210   *ucp_ptr = bigstack_end_alloc(ct);
1211   return !(*ucp_ptr);
1212 }
1213 
bigstack_end_alloc_ui(uintptr_t ct,uint32_t ** uip_ptr)1214 HEADER_INLINE int32_t bigstack_end_alloc_ui(uintptr_t ct, uint32_t** uip_ptr) {
1215   *uip_ptr = (uint32_t*)bigstack_end_alloc(ct * sizeof(int32_t));
1216   return !(*uip_ptr);
1217 }
1218 
bigstack_end_alloc_ul(uintptr_t ct,uintptr_t ** ulp_ptr)1219 HEADER_INLINE int32_t bigstack_end_alloc_ul(uintptr_t ct, uintptr_t** ulp_ptr) {
1220   *ulp_ptr = (uintptr_t*)bigstack_end_alloc(ct * sizeof(intptr_t));
1221   return !(*ulp_ptr);
1222 }
1223 
bigstack_end_alloc_ll(uintptr_t ct,int64_t ** llp_ptr)1224 HEADER_INLINE int32_t bigstack_end_alloc_ll(uintptr_t ct, int64_t** llp_ptr) {
1225   *llp_ptr = (int64_t*)bigstack_end_alloc(ct * sizeof(int64_t));
1226   return !(*llp_ptr);
1227 }
1228 
bigstack_end_alloc_ull(uintptr_t ct,uint64_t ** ullp_ptr)1229 HEADER_INLINE int32_t bigstack_end_alloc_ull(uintptr_t ct, uint64_t** ullp_ptr) {
1230   *ullp_ptr = (uint64_t*)bigstack_end_alloc(ct * sizeof(int64_t));
1231   return !(*ullp_ptr);
1232 }
1233 
bigstack_end_alloc_llstr(uintptr_t str_bytes,Ll_str ** llstrp_ptr)1234 HEADER_INLINE int32_t bigstack_end_alloc_llstr(uintptr_t str_bytes, Ll_str** llstrp_ptr) {
1235   *llstrp_ptr = (Ll_str*)bigstack_end_alloc(str_bytes + sizeof(Ll_str));
1236   return !(*llstrp_ptr);
1237 }
1238 
1239 
is_letter(unsigned char ucc)1240 HEADER_INLINE int32_t is_letter(unsigned char ucc) {
1241   return (((ucc & 192) == 64) && (((ucc - 1) & 31) < 26));
1242 }
1243 
1244 // if we need the digit value, better to use (unsigned char)cc - '0'...
is_digit(unsigned char ucc)1245 HEADER_INLINE int32_t is_digit(unsigned char ucc) {
1246   return (ucc <= '9') && (ucc >= '0');
1247 }
1248 
is_not_digit(unsigned char ucc)1249 HEADER_INLINE int32_t is_not_digit(unsigned char ucc) {
1250   return (ucc > '9') || (ucc < '0');
1251 }
1252 
is_not_nzdigit(unsigned char ucc)1253 HEADER_INLINE int32_t is_not_nzdigit(unsigned char ucc) {
1254   return (ucc > '9') || (ucc <= '0');
1255 }
1256 
1257 // may as well treat all chars < 32, except tab, as eoln...
1258 // kns = "known non-space" (where tab counts as a space)
1259 /*
1260 HEADER_INLINE int32_t is_eoln_kns(unsigned char ucc) {
1261   return (ucc < 32);
1262 }
1263 */
1264 
is_space_or_eoln(unsigned char ucc)1265 HEADER_INLINE int32_t is_space_or_eoln(unsigned char ucc) {
1266   return (ucc <= 32);
1267 }
1268 
1269 // could assert ucc is not a space/tab
1270 #define is_eoln_kns is_space_or_eoln
1271 
is_eoln_char(unsigned char ucc)1272 HEADER_INLINE int32_t is_eoln_char(unsigned char ucc) {
1273   return (ucc < 32) && (ucc != 9);
1274 }
1275 
is_eoln_or_comment_kns(unsigned char ucc)1276 HEADER_INLINE int32_t is_eoln_or_comment_kns(unsigned char ucc) {
1277   return (ucc < 32) || (ucc == '#');
1278 }
1279 
no_more_tokens_kns(const char * sptr)1280 HEADER_INLINE int32_t no_more_tokens_kns(const char* sptr) {
1281   return ((!sptr) || is_eoln_kns(*sptr));
1282 }
1283 
skip_initial_spaces(char * sptr)1284 HEADER_INLINE char* skip_initial_spaces(char* sptr) {
1285   while ((*sptr == ' ') || (*sptr == '\t')) {
1286     sptr++;
1287   }
1288   return sptr;
1289 }
1290 
1291 /*
1292 HEADER_INLINE int32_t is_space_or_eoln(unsigned char cc) {
1293   // ' ', \t, \n, \0, \r
1294 #ifdef __LP64__
1295   return (ucc <= 32) && (0x100002601LLU & (1LLU << ucc));
1296 #else
1297   return ((ucc <= 32) && ((ucc == ' ') || (0x2601LU & (ONELU << ucc))));
1298 #endif
1299 }
1300 */
1301 
1302 // Returns whether uppercased ss matches nonempty fixed_str.  Assumes fixed_str
1303 // contains nothing but letters and a null terminator.
1304 uint32_t match_upper(const char* ss, const char* fixed_str);
1305 
1306 uint32_t match_upper_counted(const char* ss, const char* fixed_str, uint32_t ct);
1307 
1308 // Reads an integer in [1, cap].  Assumes first character is nonspace.  Has the
1309 // overflow detection atoi() lacks.
1310 #ifdef __LP64__
1311 uint32_t scan_posint_capped(const char* ss, uint64_t cap, uint32_t* valp);
1312 
1313 uint32_t scan_uint_capped(const char* ss, uint64_t cap, uint32_t* valp);
1314 
1315 uint32_t scan_int_abs_bounded(const char* ss, uint64_t bound, int32_t* valp);
1316 #else // not __LP64__
1317 // Need to be more careful in 32-bit case due to overflow.
1318 // A funny-looking div_10/mod_10 interface is used since the cap will usually
1319 // be a constant, and we want the integer division/modulus to occur at compile
1320 // time.
1321 uint32_t scan_posint_capped32(const char* ss, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp);
1322 
1323 uint32_t scan_uint_capped32(const char* ss, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp);
1324 
1325 uint32_t scan_int_abs_bounded32(const char* ss, uint32_t bound_div_10, uint32_t bound_mod_10, int32_t* valp);
1326 
1327   #define scan_posint_capped(aa, bb, cc) scan_posint_capped32((aa), (bb) / 10, (bb) % 10, (cc))
1328 
1329   #define scan_uint_capped(aa, bb, cc) scan_uint_capped32((aa), (bb) / 10, (bb) % 10, (cc))
1330 
1331   #define scan_int_abs_bounded(aa, bb, cc) scan_int_abs_bounded32((aa), (bb) / 10, (bb) % 10, (cc))
1332 #endif
1333 
1334 // intentionally rejects -2^31 for now
scan_int32(const char * ss,int32_t * valp)1335 HEADER_INLINE uint32_t scan_int32(const char* ss, int32_t* valp) {
1336   return scan_int_abs_bounded(ss, 0x7fffffff, valp);
1337 }
1338 
1339 // default cap = 0x7ffffffe
scan_posint_defcap(const char * ss,uint32_t * valp)1340 HEADER_INLINE uint32_t scan_posint_defcap(const char* ss, uint32_t* valp) {
1341   return scan_posint_capped(ss, 0x7ffffffe, valp);
1342 }
1343 
scan_uint_defcap(const char * ss,uint32_t * valp)1344 HEADER_INLINE uint32_t scan_uint_defcap(const char* ss, uint32_t* valp) {
1345   return scan_uint_capped(ss, 0x7ffffffe, valp);
1346 }
1347 
scan_int_abs_defcap(const char * ss,int32_t * valp)1348 HEADER_INLINE uint32_t scan_int_abs_defcap(const char* ss, int32_t* valp) {
1349   return scan_int_abs_bounded(ss, 0x7ffffffe, valp);
1350 }
1351 
scan_uint_icap(const char * ss,uint32_t * valp)1352 HEADER_INLINE uint32_t scan_uint_icap(const char* ss, uint32_t* valp) {
1353   return scan_uint_capped(ss, 0x7fffffff, valp);
1354 }
1355 
1356 uint32_t scan_posintptr(const char* ss, uintptr_t* valp);
1357 
scan_double(const char * ss,double * valp)1358 HEADER_INLINE uint32_t scan_double(const char* ss, double* valp) {
1359   char* ss2;
1360   *valp = strtod(ss, &ss2);
1361   return (ss == ss2);
1362 }
1363 
scan_float(const char * ss,float * valp)1364 HEADER_INLINE uint32_t scan_float(const char* ss, float* valp) {
1365   char* ss2;
1366   *valp = strtof(ss, &ss2);
1367   return (ss == ss2);
1368 }
1369 
1370 // More restrictive parsing of command-line parameters.
scan_doublex(const char * ss,double * valp)1371 HEADER_INLINE uint32_t scan_doublex(const char* ss, double* valp) {
1372   char* ss2;
1373   *valp = strtod(ss, &ss2);
1374   return (ss == ss2) || (!is_space_or_eoln(ss2[0]));
1375 }
1376 
1377 uint32_t scan_posint_cappedx(const char* ss, uint64_t cap, uint32_t* valp);
1378 
1379 uint32_t scan_uint_cappedx(const char* ss, uint64_t cap, uint32_t* valp);
1380 
1381 uint32_t scan_int_abs_boundedx(const char* ss, uint64_t bound, int32_t* valp);
1382 
scan_int32x(const char * ss,int32_t * valp)1383 HEADER_INLINE uint32_t scan_int32x(const char* ss, int32_t* valp) {
1384   return scan_int_abs_boundedx(ss, 0x7fffffff, valp);
1385 }
1386 
scan_posint_defcapx(const char * ss,uint32_t * valp)1387 HEADER_INLINE uint32_t scan_posint_defcapx(const char* ss, uint32_t* valp) {
1388   return scan_posint_cappedx(ss, 0x7ffffffe, valp);
1389 }
1390 
scan_uint_defcapx(const char * ss,uint32_t * valp)1391 HEADER_INLINE uint32_t scan_uint_defcapx(const char* ss, uint32_t* valp) {
1392   return scan_uint_cappedx(ss, 0x7ffffffe, valp);
1393 }
1394 
1395 uint32_t scan_posintptrx(const char* ss, uintptr_t* valp);
1396 
1397 // __restrict isn't very important for newer x86 processors since loads/stores
1398 // tend to be automatically reordered, but may as well use it properly in
1399 // plink_common.
1400 uint32_t scan_two_doubles(char* ss, double* __restrict val1p, double* __restrict val2p);
1401 
1402 int32_t scan_token_ct_len(uintptr_t half_bufsize, FILE* infile, char* buf, uintptr_t* __restrict token_ct_ptr, uintptr_t* __restrict max_token_len_ptr);
1403 
1404 int32_t read_tokens(uintptr_t half_bufsize, uintptr_t token_ct, uintptr_t max_token_len, FILE* infile, char* __restrict buf, char* __restrict token_name_buf);
1405 
memseta(char * target,unsigned char val,uintptr_t ct)1406 HEADER_INLINE char* memseta(char* target, unsigned char val, uintptr_t ct) {
1407   memset(target, val, ct);
1408   return &(target[ct]);
1409 }
1410 
memcpya(char * __restrict target,const void * __restrict source,uintptr_t ct)1411 HEADER_INLINE char* memcpya(char* __restrict target, const void* __restrict source, uintptr_t ct) {
1412   memcpy(target, source, ct);
1413   return &(target[ct]);
1414 }
1415 
memcpyb(char * __restrict target,const void * __restrict source,uint32_t ct)1416 HEADER_INLINE char* memcpyb(char* __restrict target, const void* __restrict source, uint32_t ct) {
1417   // Same as memcpya, except the return value is one byte earlier.  Generally
1418   // used when source is a null-terminated string of known length and we want
1419   // to copy the null, but sometimes we need to append later.
1420   memcpy(target, source, ct);
1421   return &(target[ct - 1]);
1422 }
1423 
memcpyax(char * __restrict target,const void * __restrict source,uint32_t ct,char extra_char)1424 HEADER_INLINE char* memcpyax(char* __restrict target, const void* __restrict source, uint32_t ct, char extra_char) {
1425   memcpy(target, source, ct);
1426   target[ct] = extra_char;
1427   return &(target[ct + 1]);
1428 }
1429 
memcpyx(char * __restrict target,const void * __restrict source,uint32_t ct,char extra_char)1430 HEADER_INLINE void memcpyx(char* __restrict target, const void* __restrict source, uint32_t ct, char extra_char) {
1431   memcpy(target, source, ct);
1432   target[ct] = extra_char;
1433 }
1434 
memcpyl3(char * __restrict target,const void * __restrict source)1435 HEADER_INLINE void memcpyl3(char* __restrict target, const void* __restrict source) {
1436   // when it's safe to clobber the fourth character, this is faster
1437   *((uint32_t*)target) = *((const uint32_t*)source);
1438 }
1439 
memcpyl3a(char * __restrict target,const void * __restrict source)1440 HEADER_INLINE char* memcpyl3a(char* __restrict target, const void* __restrict source) {
1441   memcpyl3(target, source);
1442   return &(target[3]);
1443 }
1444 
1445 // note that, unlike stpcpy(), this does not copy the null terminator
strcpya(char * __restrict target,const void * __restrict source)1446 HEADER_INLINE char* strcpya(char* __restrict target, const void* __restrict source) {
1447   uintptr_t slen = strlen((char*)source);
1448   memcpy(target, source, slen);
1449   return &(target[slen]);
1450 }
1451 
strcpyax(char * __restrict target,const void * __restrict source,char extra_char)1452 HEADER_INLINE char* strcpyax(char* __restrict target, const void* __restrict source, char extra_char) {
1453   uintptr_t slen = strlen((char*)source);
1454   memcpy(target, source, slen);
1455   target[slen] = extra_char;
1456   return &(target[slen + 1]);
1457 }
1458 
append_binary_eoln(char ** target_ptr)1459 HEADER_INLINE void append_binary_eoln(char** target_ptr) {
1460 #ifdef _WIN32
1461   (*target_ptr)[0] = '\r';
1462   (*target_ptr)[1] = '\n';
1463   *target_ptr += 2;
1464 #else
1465   **target_ptr = '\n';
1466   *target_ptr += 1;
1467 #endif
1468 }
1469 
fputs_w4(const char * ss,FILE * outfile)1470 HEADER_INLINE void fputs_w4(const char* ss, FILE* outfile) {
1471   // for efficient handling of width-4 allele columns; don't want to call
1472   // strlen() since that's redundant with fputs
1473   if (!ss[1]) {
1474     fputs("   ", outfile);
1475     putc(ss[0], outfile);
1476   } else {
1477     if (!ss[2]) {
1478       putc(' ', outfile);
1479       putc(' ', outfile);
1480     } else if (!ss[3]) {
1481       putc(' ', outfile);
1482     }
1483     fputs(ss, outfile);
1484   }
1485 }
1486 
1487 int32_t gzputs_w4(gzFile gz_outfile, const char* ss);
1488 
1489 int32_t get_next_noncomment(FILE* fptr, char** lptr_ptr, uintptr_t* line_idx_ptr);
1490 
1491 int32_t get_next_noncomment_excl(const uintptr_t* __restrict marker_exclude, FILE* fptr, char** lptr_ptr, uintptr_t* __restrict line_idx_ptr, uintptr_t* __restrict marker_uidx_ptr);
1492 
1493 // assumes we are currently in a token -- UNSAFE OTHERWISE
token_endnn(char * sptr)1494 HEADER_INLINE char* token_endnn(char* sptr) {
1495   while (!is_space_or_eoln(*(++sptr)));
1496   return sptr;
1497 }
1498 
1499 void get_top_two_ui(const uint32_t* __restrict uint_arr, uintptr_t uia_size, uintptr_t* __restrict top_idx_ptr, uintptr_t* __restrict second_idx_ptr);
1500 
1501 uint32_t intlen(int32_t num);
1502 
1503 // safer than token_endnn(), since it handles length zero
1504 // "se" = stops at space or eoln character
strlen_se(const char * ss)1505 HEADER_INLINE uintptr_t strlen_se(const char* ss) {
1506   const char* ss2 = ss;
1507   while (!is_space_or_eoln(*ss2)) {
1508     ss2++;
1509   }
1510   return (uintptr_t)(ss2 - ss);
1511 }
1512 
1513 int32_t strcmp_se(const char* s_read, const char* s_const, uint32_t s_const_len);
1514 
1515 char* next_token(char* sptr);
1516 
1517 char* next_token_mult(char* sptr, uint32_t ct);
1518 
next_token_multz(char * sptr,uint32_t ct)1519 HEADER_INLINE char* next_token_multz(char* sptr, uint32_t ct) {
1520   // tried replacing this with ternary operator, but that actually seemed to
1521   // slow things down a bit under gcc 4.2.1 (tail call optimization issue?).
1522   // todo: recheck this under newer gcc/clang.
1523   if (ct) {
1524     return next_token_mult(sptr, ct);
1525   } else {
1526     return sptr;
1527   }
1528 }
1529 
1530 uint32_t count_tokens(const char* bufptr);
1531 
fw_strcpyn(uint32_t min_width,uint32_t source_len,const char * source,char * dest)1532 HEADER_INLINE char* fw_strcpyn(uint32_t min_width, uint32_t source_len, const char* source, char* dest) {
1533   // right-justified strcpy with known source length
1534   if (source_len < min_width) {
1535     memcpy(memseta(dest, 32, min_width - source_len), source, source_len);
1536     return &(dest[min_width]);
1537   } else {
1538     return memcpya(dest, source, source_len);
1539   }
1540 }
1541 
fw_strcpy(uint32_t min_width,const char * source,char * dest)1542 HEADER_INLINE char* fw_strcpy(uint32_t min_width, const char* source, char* dest) {
1543   return fw_strcpyn(min_width, strlen(source), source, dest);
1544 }
1545 
1546 uint32_t count_and_measure_multistr(const char* multistr, uintptr_t* max_slen_ptr);
1547 
1548 char* uint32toa(uint32_t uii, char* start);
1549 
1550 char* int32toa(int32_t ii, char* start);
1551 
1552 // Write exactly four digits (padding with zeroes if necessary); useful for
1553 // e.g. floating point encoders.  uii must not be >= 10^4.
1554 char* uitoa_z4(uint32_t uii, char* start);
1555 
1556 char* int64toa(int64_t llii, char* start);
1557 
1558 // Minimum field width 4 (padding with spaces on left).
1559 char* uint32toa_w4(uint32_t uii, char* start);
1560 
1561 char* uint32toa_w6(uint32_t uii, char* start);
1562 
1563 char* uint32toa_w7(uint32_t uii, char* start);
1564 
1565 char* uint32toa_w8(uint32_t uii, char* start);
1566 
1567 char* uint32toa_w10(uint32_t uii, char* start);
1568 
1569 // These limited-precision converters are usually several times as fast as
1570 // grisu2's descendants; and let's not even speak of sprintf.  (I'm guessing
1571 // that the algorithm cannot be made round-trip-safe without throwing away its
1572 // performance advantage, since we currently multiply by numbers like 1.0e256
1573 // which don't have an exact representation.  But these functions are very,
1574 // very good at what they do.)
1575 char* dtoa_e(double dxx, char* start);
1576 
1577 char* ftoa_e(float dxx, char* start);
1578 
1579 char* dtoa_f_p2(double dxx, char* start);
1580 
1581 char* dtoa_f_p3(double dxx, char* start);
1582 
1583 char* dtoa_f_w9p6(double dxx, char* start);
1584 
1585 char* dtoa_f_w7p4(double dxx, char* start);
1586 
trailing_zeroes_to_spaces(char * start)1587 HEADER_INLINE void trailing_zeroes_to_spaces(char* start) {
1588   // removes trailing zeroes
1589   start--;
1590   while (*start == '0') {
1591     *start-- = ' ';
1592   }
1593   if (*start == '.') {
1594     *start = ' ';
1595   }
1596 }
1597 
clip_trailing_zeroes(char * start)1598 HEADER_INLINE char* clip_trailing_zeroes(char* start) {
1599   char cc;
1600   do {
1601     cc = *(--start);
1602   } while (cc == '0');
1603   return &(start[(cc != '.')]);
1604 }
1605 
1606 char* dtoa_f_w9p6_spaced(double dxx, char* start);
1607 
1608 char* dtoa_f_w9p6_clipped(double dxx, char* start);
1609 
1610 char* dtoa_g(double dxx, char* start);
1611 
1612 char* ftoa_g(float dxx, char* start);
1613 
width_force(uint32_t min_width,char * startp,char * endp)1614 HEADER_INLINE char* width_force(uint32_t min_width, char* startp, char* endp) {
1615   uintptr_t diff = (endp - startp);
1616   if (diff >= min_width) {
1617     return endp;
1618   } else {
1619     diff = min_width - diff;
1620     do {
1621       --endp;
1622       endp[diff] = *endp;
1623     } while (endp > startp);
1624     memset(startp, 32, diff);
1625     return &(startp[min_width]);
1626   }
1627 }
1628 
1629 // assumes min_width >= 5.
1630 char* dtoa_g_wxp2(double dxx, uint32_t min_width, char* start);
1631 
1632 // assumes min_width >= 5.
1633 char* dtoa_g_wxp3(double dxx, uint32_t min_width, char* start);
1634 
1635 // only requires min_width to be positive; less than 5 is ok
1636 char* dtoa_g_wxp4(double dxx, uint32_t min_width, char* start);
1637 
1638 // only requires min_width to be positive; less than 8 is ok
1639 char* dtoa_g_wxp8(double dxx, uint32_t min_width, char* start);
1640 
uint32toa_x(uint32_t uii,char extra_char,char * start)1641 HEADER_INLINE char* uint32toa_x(uint32_t uii, char extra_char, char* start) {
1642   char* penult = uint32toa(uii, start);
1643   *penult = extra_char;
1644   return &(penult[1]);
1645 }
1646 
int32toa_x(int32_t ii,char extra_char,char * start)1647 HEADER_INLINE char* int32toa_x(int32_t ii, char extra_char, char* start) {
1648   char* penult = int32toa(ii, start);
1649   *penult = extra_char;
1650   return &(penult[1]);
1651 }
1652 
uint32toa_w4x(uint32_t uii,char extra_char,char * start)1653 HEADER_INLINE char* uint32toa_w4x(uint32_t uii, char extra_char, char* start) {
1654   char* penult = uint32toa_w4(uii, start);
1655   *penult = extra_char;
1656   return &(penult[1]);
1657 }
1658 
uint32toa_w6x(uint32_t uii,char extra_char,char * start)1659 HEADER_INLINE char* uint32toa_w6x(uint32_t uii, char extra_char, char* start) {
1660   char* penult = uint32toa_w6(uii, start);
1661   *penult = extra_char;
1662   return &(penult[1]);
1663 }
1664 
uint32toa_w7x(uint32_t uii,char extra_char,char * start)1665 HEADER_INLINE char* uint32toa_w7x(uint32_t uii, char extra_char, char* start) {
1666   char* penult = uint32toa_w7(uii, start);
1667   *penult = extra_char;
1668   return &(penult[1]);
1669 }
1670 
uint32toa_w8x(uint32_t uii,char extra_char,char * start)1671 HEADER_INLINE char* uint32toa_w8x(uint32_t uii, char extra_char, char* start) {
1672   char* penult = uint32toa_w8(uii, start);
1673   *penult = extra_char;
1674   return &(penult[1]);
1675 }
1676 
uint32toa_w10x(uint32_t uii,char extra_char,char * start)1677 HEADER_INLINE char* uint32toa_w10x(uint32_t uii, char extra_char, char* start) {
1678   char* penult = uint32toa_w10(uii, start);
1679   *penult = extra_char;
1680   return &(penult[1]);
1681 }
1682 
dtoa_ex(double dxx,char extra_char,char * start)1683 HEADER_INLINE char* dtoa_ex(double dxx, char extra_char, char* start) {
1684   char* penult = dtoa_e(dxx, start);
1685   *penult = extra_char;
1686   return &(penult[1]);
1687 }
1688 
ftoa_ex(float fxx,char extra_char,char * start)1689 HEADER_INLINE char* ftoa_ex(float fxx, char extra_char, char* start) {
1690   char* penult = ftoa_e(fxx, start);
1691   *penult = extra_char;
1692   return &(penult[1]);
1693 }
1694 
dtoa_f_w9p6x(double dxx,char extra_char,char * start)1695 HEADER_INLINE char* dtoa_f_w9p6x(double dxx, char extra_char, char* start) {
1696   char* penult = dtoa_f_w9p6(dxx, start);
1697   *penult = extra_char;
1698   return &(penult[1]);
1699 }
1700 
dtoa_f_w7p4x(double dxx,char extra_char,char * start)1701 HEADER_INLINE char* dtoa_f_w7p4x(double dxx, char extra_char, char* start) {
1702   char* penult = dtoa_f_w7p4(dxx, start);
1703   *penult = extra_char;
1704   return &(penult[1]);
1705 }
1706 
dtoa_gx(double dxx,char extra_char,char * start)1707 HEADER_INLINE char* dtoa_gx(double dxx, char extra_char, char* start) {
1708   char* penult = dtoa_g(dxx, start);
1709   *penult = extra_char;
1710   return &(penult[1]);
1711 }
1712 
1713 /*
1714 HEADER_INLINE char* ftoa_gx(float dxx, char extra_char, char* start) {
1715   char* penult = ftoa_g(dxx, start);
1716   *penult = extra_char;
1717   return &(penult[1]);
1718 }
1719 */
1720 
dtoa_g_wxp3x(double dxx,uint32_t min_width,char extra_char,char * start)1721 HEADER_INLINE char* dtoa_g_wxp3x(double dxx, uint32_t min_width, char extra_char, char* start) {
1722   char* penult = dtoa_g_wxp3(dxx, min_width, start);
1723   *penult = extra_char;
1724   return &(penult[1]);
1725 }
1726 
dtoa_g_wxp4x(double dxx,uint32_t min_width,char extra_char,char * start)1727 HEADER_INLINE char* dtoa_g_wxp4x(double dxx, uint32_t min_width, char extra_char, char* start) {
1728   char* penult = dtoa_g_wxp4(dxx, min_width, start);
1729   *penult = extra_char;
1730   return &(penult[1]);
1731 }
1732 
dtoa_g_wxp8x(double dxx,uint32_t min_width,char extra_char,char * start)1733 HEADER_INLINE char* dtoa_g_wxp8x(double dxx, uint32_t min_width, char extra_char, char* start) {
1734   char* penult = dtoa_g_wxp8(dxx, min_width, start);
1735   *penult = extra_char;
1736   return &(penult[1]);
1737 }
1738 
1739 char* chrom_print_human(uint32_t num, char* buf);
1740 
1741 void magic_num(uint32_t divisor, uint64_t* multp, uint32_t* __restrict pre_shiftp, uint32_t* __restrict post_shiftp, uint32_t* __restrict incrp);
1742 
tri_coord_no_diag(uintptr_t small_coord,uintptr_t big_coord)1743 HEADER_INLINE uintptr_t tri_coord_no_diag(uintptr_t small_coord, uintptr_t big_coord) {
1744   // small_coord and big_coord are 0-based indices, small_coord < big_coord
1745   return ((big_coord * (big_coord - 1)) / 2) + small_coord;
1746 }
1747 
tri_coord_no_diag_32(uint32_t small_coord,uint32_t big_coord)1748 HEADER_INLINE uint32_t tri_coord_no_diag_32(uint32_t small_coord, uint32_t big_coord) {
1749   return ((big_coord * (big_coord - 1)) / 2) + small_coord;
1750 }
1751 
1752 // let the compiler worry about the second argument's bit width here
1753 #define SET_BIT(idx, arr) ((arr)[(idx) / BITCT] |= ONELU << ((idx) % BITCT))
1754 
1755 #define SET_BIT_DBL(idx, arr) ((arr)[(idx) / BITCT2] |= ONELU << (2 * ((idx) % BITCT2)))
1756 
1757 // useful for coercing int32_t loc to unsigned
set_bit(uint32_t loc,uintptr_t * bitarr)1758 HEADER_INLINE void set_bit(uint32_t loc, uintptr_t* bitarr) {
1759   bitarr[loc / BITCT] |= (ONELU << (loc % BITCT));
1760 }
1761 
set_bit_ul(uintptr_t loc,uintptr_t * bitarr)1762 HEADER_INLINE void set_bit_ul(uintptr_t loc, uintptr_t* bitarr) {
1763   bitarr[loc / BITCT] |= (ONELU << (loc % BITCT));
1764 }
1765 
1766 // requires positive len
1767 void fill_bits(uintptr_t loc_start, uintptr_t len, uintptr_t* bitarr);
1768 
1769 // requires positive len
1770 void clear_bits(uintptr_t loc_start, uintptr_t len, uintptr_t* bitarr);
1771 
1772 #define CLEAR_BIT(idx, arr) ((arr)[(idx) / BITCT] &= ~(ONELU << ((idx) % BITCT)))
1773 
1774 #define CLEAR_BIT_DBL(idx, arr) ((arr)[(idx) / BITCT2] &= ~(ONELU << (2 * ((idx) % BITCT2))))
1775 
clear_bit(uint32_t loc,uintptr_t * bitarr)1776 HEADER_INLINE void clear_bit(uint32_t loc, uintptr_t* bitarr) {
1777   bitarr[loc / BITCT] &= ~(ONELU << (loc % BITCT));
1778 }
1779 
clear_bit_ul(uintptr_t loc,uintptr_t * bitarr)1780 HEADER_INLINE void clear_bit_ul(uintptr_t loc, uintptr_t* bitarr) {
1781   bitarr[loc / BITCT] &= ~(ONELU << (loc % BITCT));
1782 }
1783 
1784 #define IS_SET(arr, idx) (((arr)[(idx) / BITCT] >> ((idx) % BITCT)) & 1)
1785 
1786 #define IS_SET_DBL(arr, idx) (((arr)[(idx) / BITCT2] >> (2 * ((idx) % BITCT2))) & 1)
1787 
1788 // use this instead of IS_SET() for signed 32-bit integers
is_set(const uintptr_t * bitarr,uint32_t loc)1789 HEADER_INLINE uint32_t is_set(const uintptr_t* bitarr, uint32_t loc) {
1790   return (bitarr[loc / BITCT] >> (loc % BITCT)) & 1;
1791 }
1792 
is_set_ul(const uintptr_t * bitarr,uintptr_t loc)1793 HEADER_INLINE uint32_t is_set_ul(const uintptr_t* bitarr, uintptr_t loc) {
1794   return (bitarr[loc / BITCT] >> (loc % BITCT)) & 1;
1795 }
1796 
1797 #define IS_NONNULL_AND_SET(arr, idx) ((arr) && IS_SET(arr, idx))
1798 
1799 uint32_t next_unset_unsafe(const uintptr_t* bitarr, uint32_t loc);
1800 
next_unset_unsafe_ck(const uintptr_t * __restrict bitarr,uint32_t * __restrict loc_ptr)1801 HEADER_INLINE void next_unset_unsafe_ck(const uintptr_t* __restrict bitarr, uint32_t* __restrict loc_ptr) {
1802   if (IS_SET(bitarr, *loc_ptr)) {
1803     *loc_ptr = next_unset_unsafe(bitarr, *loc_ptr);
1804   }
1805 }
1806 
1807 #ifdef __LP64__
1808 uintptr_t next_unset_ul_unsafe(const uintptr_t* bitarr, uintptr_t loc);
1809 #else
next_unset_ul_unsafe(const uintptr_t * bitarr,uintptr_t loc)1810 HEADER_INLINE uintptr_t next_unset_ul_unsafe(const uintptr_t* bitarr, uintptr_t loc) {
1811   return (uintptr_t)next_unset_unsafe(bitarr, loc);
1812 }
1813 #endif
1814 
next_unset_ul_unsafe_ck(const uintptr_t * __restrict bitarr,uintptr_t * __restrict loc_ptr)1815 HEADER_INLINE void next_unset_ul_unsafe_ck(const uintptr_t* __restrict bitarr, uintptr_t* __restrict loc_ptr) {
1816   if (IS_SET(bitarr, *loc_ptr)) {
1817     *loc_ptr = next_unset_ul_unsafe(bitarr, *loc_ptr);
1818   }
1819 }
1820 
1821 uint32_t next_unset(const uintptr_t* bitarr, uint32_t loc, uint32_t ceil);
1822 
next_unset_ck(const uintptr_t * __restrict bitarr,uint32_t ceil,uint32_t * __restrict loc_ptr)1823 HEADER_INLINE void next_unset_ck(const uintptr_t* __restrict bitarr, uint32_t ceil, uint32_t* __restrict loc_ptr) {
1824   if (IS_SET(bitarr, *loc_ptr)) {
1825     *loc_ptr = next_unset(bitarr, *loc_ptr, ceil);
1826   }
1827 }
1828 
1829 #ifdef __LP64__
1830 uintptr_t next_unset_ul(const uintptr_t* bitarr, uintptr_t loc, uintptr_t ceil);
1831 #else
next_unset_ul(const uintptr_t * bitarr,uintptr_t loc,uintptr_t ceil)1832 HEADER_INLINE uintptr_t next_unset_ul(const uintptr_t* bitarr, uintptr_t loc, uintptr_t ceil) {
1833   return (uintptr_t)next_unset(bitarr, loc, ceil);
1834 }
1835 #endif
1836 
next_unset_ul_ck(const uintptr_t * __restrict bitarr,uintptr_t ceil,uintptr_t * __restrict loc_ptr)1837 HEADER_INLINE void next_unset_ul_ck(const uintptr_t* __restrict bitarr, uintptr_t ceil, uintptr_t* __restrict loc_ptr) {
1838   if (IS_SET(bitarr, *loc_ptr)) {
1839     *loc_ptr = next_unset_ul(bitarr, *loc_ptr, ceil);
1840   }
1841 }
1842 
1843 uint32_t next_set_unsafe(const uintptr_t* bitarr, uint32_t loc);
1844 
next_set_unsafe_ck(const uintptr_t * __restrict bitarr,uint32_t * __restrict loc_ptr)1845 HEADER_INLINE void next_set_unsafe_ck(const uintptr_t* __restrict bitarr, uint32_t* __restrict loc_ptr) {
1846   if (!IS_SET(bitarr, *loc_ptr)) {
1847     *loc_ptr = next_set_unsafe(bitarr, *loc_ptr);
1848   }
1849 }
1850 
1851 #ifdef __LP64__
1852 uintptr_t next_set_ul_unsafe(const uintptr_t* bitarr, uintptr_t loc);
1853 #else
next_set_ul_unsafe(const uintptr_t * bitarr,uintptr_t loc)1854 HEADER_INLINE uintptr_t next_set_ul_unsafe(const uintptr_t* bitarr, uintptr_t loc) {
1855   return (uintptr_t)next_set_unsafe(bitarr, loc);
1856 }
1857 #endif
1858 
next_set_ul_unsafe_ck(const uintptr_t * __restrict bitarr,uintptr_t * __restrict loc_ptr)1859 HEADER_INLINE void next_set_ul_unsafe_ck(const uintptr_t* __restrict bitarr, uintptr_t* __restrict loc_ptr) {
1860   if (!IS_SET(bitarr, *loc_ptr)) {
1861     *loc_ptr = next_set_ul_unsafe(bitarr, *loc_ptr);
1862   }
1863 }
1864 
1865 uint32_t next_set(const uintptr_t* bitarr, uint32_t loc, uint32_t ceil);
1866 
next_set_ck(const uintptr_t * __restrict bitarr,uint32_t ceil,uint32_t * __restrict loc_ptr)1867 HEADER_INLINE void next_set_ck(const uintptr_t* __restrict bitarr, uint32_t ceil, uint32_t* __restrict loc_ptr) {
1868   if (!IS_SET(bitarr, *loc_ptr)) {
1869     *loc_ptr = next_set(bitarr, *loc_ptr, ceil);
1870   }
1871 }
1872 
1873 #ifdef __LP64__
1874 uintptr_t next_set_ul(const uintptr_t* bitarr, uintptr_t loc, uintptr_t ceil);
1875 #else
next_set_ul(const uintptr_t * bitarr,uintptr_t loc,uintptr_t ceil)1876 HEADER_INLINE uintptr_t next_set_ul(const uintptr_t* bitarr, uintptr_t loc, uintptr_t ceil) {
1877   return (uintptr_t)next_set(bitarr, loc, ceil);
1878 }
1879 #endif
1880 
next_set_ul_ck(const uintptr_t * __restrict bitarr,uintptr_t ceil,uintptr_t * loc_ptr)1881 HEADER_INLINE void next_set_ul_ck(const uintptr_t* __restrict bitarr, uintptr_t ceil, uintptr_t* loc_ptr) {
1882   if (!IS_SET(bitarr, *loc_ptr)) {
1883     *loc_ptr = next_set_ul(bitarr, *loc_ptr, ceil);
1884   }
1885 }
1886 
1887 int32_t last_set_bit(const uintptr_t* bitarr, uint32_t word_ct);
1888 
1889 // note different interface from last_set_bit()
1890 // int32_t last_clear_bit(uintptr_t* bitarr, uint32_t ceil);
1891 
1892 // unlike the next_[un]set family, this always returns a STRICTLY earlier
1893 // position
1894 uint32_t prev_unset_unsafe(const uintptr_t* bitarr, uint32_t loc);
1895 
1896 // uint32_t prev_unset(uintptr_t* bitarr, uint32_t loc, uint32_t floor);
1897 
prev_unset_unsafe_ck(const uintptr_t * bitarr,uint32_t * loc_ptr)1898 HEADER_INLINE void prev_unset_unsafe_ck(const uintptr_t* bitarr, uint32_t* loc_ptr) {
1899   *loc_ptr -= 1;
1900   if (IS_SET(bitarr, *loc_ptr)) {
1901     *loc_ptr = prev_unset_unsafe(bitarr, *loc_ptr);
1902   }
1903 }
1904 
1905 // These functions seem to optimize better than memset(arr, 0, x) under OS X
1906 // <10.9's gcc, and they should be equivalent for later versions (looks like
1907 // memcpy/memset were redone in gcc 4.3).
fill_ulong_zero(size_t size,uintptr_t * ularr)1908 HEADER_INLINE void fill_ulong_zero(size_t size, uintptr_t* ularr) {
1909   size_t ulii;
1910   for (ulii = 0; ulii < size; ulii++) {
1911     *ularr++ = 0;
1912   }
1913 }
1914 
1915 #ifdef __LP64__
fill_ull_zero(size_t size,uint64_t * ullarr)1916 HEADER_INLINE void fill_ull_zero(size_t size, uint64_t* ullarr) {
1917   fill_ulong_zero(size, (uintptr_t*)ullarr);
1918 }
1919 
1920 // double v indicates that size is a vector count, not a word count.
fill_vvec_zero(size_t size,VECITYPE * vvec)1921 HEADER_INLINE void fill_vvec_zero(size_t size, VECITYPE* vvec) {
1922   size_t ulii;
1923   for (ulii = 0; ulii < size; ulii++) {
1924     *vvec++ = _mm_setzero_si128();
1925   }
1926 }
1927 #else
fill_ull_zero(size_t size,uint64_t * ullarr)1928 HEADER_INLINE void fill_ull_zero(size_t size, uint64_t* ullarr) {
1929   fill_ulong_zero(size * 2, (uintptr_t*)ullarr);
1930 }
1931 #endif
1932 
fill_ulong_one(size_t size,uintptr_t * ularr)1933 HEADER_INLINE void fill_ulong_one(size_t size, uintptr_t* ularr) {
1934   size_t ulii;
1935   for (ulii = 0; ulii < size; ulii++) {
1936     *ularr++ = ~ZEROLU;
1937   }
1938 }
1939 
1940 #ifdef __LP64__
fill_ull_one(size_t size,uint64_t * ullarr)1941 HEADER_INLINE void fill_ull_one(size_t size, uint64_t* ullarr) {
1942   fill_ulong_one(size, (uintptr_t*)ullarr);
1943 }
1944 #else
fill_ull_one(size_t size,uint64_t * ullarr)1945 HEADER_INLINE void fill_ull_one(size_t size, uint64_t* ullarr) {
1946   fill_ulong_one(size * 2, (uintptr_t*)ullarr);
1947 }
1948 #endif
1949 
fill_int_zero(size_t size,int32_t * iarr)1950 HEADER_INLINE void fill_int_zero(size_t size, int32_t* iarr) {
1951   size_t ulii;
1952   for (ulii = 0; ulii < size; ulii++) {
1953     *iarr++ = 0;
1954   }
1955 }
1956 
fill_int_one(size_t size,int32_t * iarr)1957 HEADER_INLINE void fill_int_one(size_t size, int32_t* iarr) {
1958   size_t ulii;
1959   for (ulii = 0; ulii < size; ulii++) {
1960     *iarr++ = -1;
1961   }
1962 }
1963 
fill_uint_zero(size_t size,uint32_t * uiarr)1964 HEADER_INLINE void fill_uint_zero(size_t size, uint32_t* uiarr) {
1965   size_t ulii;
1966   for (ulii = 0; ulii < size; ulii++) {
1967     *uiarr++ = 0;
1968   }
1969 }
1970 
fill_uint_one(size_t size,uint32_t * uiarr)1971 HEADER_INLINE void fill_uint_one(size_t size, uint32_t* uiarr) {
1972   size_t ulii;
1973   for (ulii = 0; ulii < size; ulii++) {
1974     *uiarr++ = ~0U;
1975   }
1976 }
1977 
fill_float_zero(size_t size,float * farr)1978 HEADER_INLINE void fill_float_zero(size_t size, float* farr) {
1979   size_t ulii;
1980   for (ulii = 0; ulii < size; ulii++) {
1981     *farr++ = 0.0;
1982   }
1983 }
1984 
fill_double_zero(size_t size,double * darr)1985 HEADER_INLINE void fill_double_zero(size_t size, double* darr) {
1986   size_t ulii;
1987   for (ulii = 0; ulii < size; ulii++) {
1988     *darr++ = 0.0;
1989   }
1990 }
1991 
1992 
1993 int32_t bigstack_calloc_uc(uintptr_t ct, unsigned char** ucp_ptr);
1994 
1995 int32_t bigstack_calloc_d(uintptr_t ct, double** dp_ptr);
1996 
1997 int32_t bigstack_calloc_f(uintptr_t ct, float** fp_ptr);
1998 
1999 int32_t bigstack_calloc_ui(uintptr_t ct, uint32_t** uip_ptr);
2000 
2001 int32_t bigstack_calloc_ul(uintptr_t ct, uintptr_t** ulp_ptr);
2002 
2003 int32_t bigstack_calloc_ull(uintptr_t ct, uint64_t** ullp_ptr);
2004 
bigstack_calloc_c(uintptr_t ct,char ** cp_ptr)2005 HEADER_INLINE int32_t bigstack_calloc_c(uintptr_t ct, char** cp_ptr) {
2006   return bigstack_calloc_uc(ct, (unsigned char**)cp_ptr);
2007 }
2008 
bigstack_calloc_i(uintptr_t ct,int32_t ** ip_ptr)2009 HEADER_INLINE int32_t bigstack_calloc_i(uintptr_t ct, int32_t** ip_ptr) {
2010   return bigstack_calloc_ui(ct, (uint32_t**)ip_ptr);
2011 }
2012 
bigstack_calloc_ll(uintptr_t ct,int64_t ** llp_ptr)2013 HEADER_INLINE int32_t bigstack_calloc_ll(uintptr_t ct, int64_t** llp_ptr) {
2014   return bigstack_calloc_ull(ct, (uint64_t**)llp_ptr);
2015 }
2016 
2017 int32_t bigstack_end_calloc_uc(uintptr_t ct, unsigned char** ucp_ptr);
2018 
2019 int32_t bigstack_end_calloc_d(uintptr_t ct, double** dp_ptr);
2020 
2021 int32_t bigstack_end_calloc_f(uintptr_t ct, float** fp_ptr);
2022 
2023 int32_t bigstack_end_calloc_ui(uintptr_t ct, uint32_t** uip_ptr);
2024 
2025 int32_t bigstack_end_calloc_ul(uintptr_t ct, uintptr_t** ulp_ptr);
2026 
2027 int32_t bigstack_end_calloc_ull(uintptr_t ct, uint64_t** ullp_ptr);
2028 
bigstack_end_calloc_c(uintptr_t ct,char ** cp_ptr)2029 HEADER_INLINE int32_t bigstack_end_calloc_c(uintptr_t ct, char** cp_ptr) {
2030   return bigstack_end_calloc_uc(ct, (unsigned char**)cp_ptr);
2031 }
2032 
bigstack_end_calloc_i(uintptr_t ct,int32_t ** ip_ptr)2033 HEADER_INLINE int32_t bigstack_end_calloc_i(uintptr_t ct, int32_t** ip_ptr) {
2034   return bigstack_end_calloc_ui(ct, (uint32_t**)ip_ptr);
2035 }
2036 
bigstack_end_calloc_ll(uintptr_t ct,int64_t ** llp_ptr)2037 HEADER_INLINE int32_t bigstack_end_calloc_ll(uintptr_t ct, int64_t** llp_ptr) {
2038   return bigstack_end_calloc_ull(ct, (uint64_t**)llp_ptr);
2039 }
2040 
2041 
2042 uint32_t murmurhash3_32(const void* key, uint32_t len);
2043 
hashval2(const char * idstr,uint32_t idlen)2044 HEADER_INLINE uint32_t hashval2(const char* idstr, uint32_t idlen) {
2045   return murmurhash3_32(idstr, idlen) % HASHSIZE;
2046 }
2047 
2048 uintptr_t geqprime(uintptr_t floor);
2049 
get_id_htable_size(uintptr_t item_ct)2050 HEADER_INLINE uint32_t get_id_htable_size(uintptr_t item_ct) {
2051   if (item_ct < 32761) {
2052     return 65521;
2053   } else {
2054     return geqprime(item_ct * 2 + 1);
2055   }
2056 }
2057 
2058 int32_t populate_id_htable(uintptr_t unfiltered_ct, const uintptr_t* exclude_arr, uintptr_t item_ct, const char* item_ids, uintptr_t max_id_len, uint32_t store_dups, uint32_t id_htable_size, uint32_t* id_htable);
2059 
alloc_and_populate_id_htable(uintptr_t unfiltered_ct,const uintptr_t * exclude_arr,uintptr_t item_ct,const char * item_ids,uintptr_t max_id_len,uint32_t allow_dups,uint32_t * id_htable_size_ptr,uint32_t ** id_htable_ptr)2060 HEADER_INLINE int32_t alloc_and_populate_id_htable(uintptr_t unfiltered_ct, const uintptr_t* exclude_arr, uintptr_t item_ct, const char* item_ids, uintptr_t max_id_len, uint32_t allow_dups, uint32_t* id_htable_size_ptr, uint32_t** id_htable_ptr) {
2061   uint32_t id_htable_size = get_id_htable_size(item_ct);
2062   if (bigstack_alloc_ui(id_htable_size, id_htable_ptr)) {
2063     return RET_NOMEM;
2064   }
2065   *id_htable_size_ptr = id_htable_size;
2066   return populate_id_htable(unfiltered_ct, exclude_arr, item_ct, item_ids, max_id_len, allow_dups, id_htable_size, *id_htable_ptr);
2067 }
2068 
2069 uint32_t id_htable_find(const char* id_buf, uintptr_t cur_id_len, const uint32_t* id_htable, uint32_t id_htable_size, const char* item_ids, uintptr_t max_id_len);
2070 
2071 void fill_idx_to_uidx(const uintptr_t* exclude_arr, uintptr_t unfiltered_item_ct, uintptr_t item_ct, uint32_t* idx_to_uidx);
2072 
2073 void fill_idx_to_uidx_incl(const uintptr_t* include_arr, uintptr_t unfiltered_item_ct, uintptr_t item_ct, uint32_t* idx_to_uidx);
2074 
2075 void fill_uidx_to_idx(const uintptr_t* exclude_arr, uint32_t unfiltered_item_ct, uint32_t item_ct, uint32_t* uidx_to_idx);
2076 
2077 void fill_uidx_to_idx_incl(const uintptr_t* include_arr, uint32_t unfiltered_item_ct, uint32_t item_ct, uint32_t* uidx_to_idx);
2078 
2079 void fill_midx_to_idx(const uintptr_t* exclude_arr_orig, const uintptr_t* exclude_arr, uint32_t item_ct, uint32_t* midx_to_idx);
2080 
2081 
2082 // "quaterarr" refers to a packed group of base-4 (2-bit) elements, analogous
2083 // to "bitarr".  (Based on "quaternary", not "quarter".)  "quatervec"
2084 // indicates that vector-alignment is also required.
2085 void fill_quatervec_55(uint32_t ct, uintptr_t* quatervec);
2086 
2087 // Used to unpack e.g. unfiltered sex_male to a filtered quaterarr usable as a
2088 // raw input bitmask.
2089 // Assumes output_quaterarr is sized to a multiple of 16 bytes.
2090 void quaterarr_collapse_init(const uintptr_t* __restrict unfiltered_bitarr, uint32_t unfiltered_ct, const uintptr_t* __restrict filter_bitarr, uint32_t filtered_ct, uintptr_t* __restrict output_quaterarr);
2091 
2092 void quaterarr_collapse_init_exclude(const uintptr_t* __restrict unfiltered_bitarr, uint32_t unfiltered_ct, const uintptr_t* __restrict filter_exclude_bitarr, uint32_t filtered_ct, uintptr_t* __restrict output_quaterarr);
2093 
2094 uint32_t alloc_collapsed_haploid_filters(const uintptr_t* __restrict sample_bitarr, const uintptr_t* __restrict sex_male, uint32_t unfiltered_sample_ct, uint32_t sample_ct, uint32_t hh_exists, uint32_t is_include, uintptr_t** sample_include_quatervec_ptr, uintptr_t** sample_male_include_quatervec_ptr);
2095 
free_cond(void * memptr)2096 HEADER_INLINE void free_cond(void* memptr) {
2097   if (memptr) {
2098     free(memptr);
2099   }
2100 }
2101 
realnum(double dd)2102 HEADER_INLINE uint32_t realnum(double dd) {
2103   return (dd == dd) && (dd != INFINITY) && (dd != -INFINITY);
2104 }
2105 
get_maf(double allele_freq)2106 HEADER_INLINE double get_maf(double allele_freq) {
2107   return (allele_freq <= 0.5)? allele_freq : (1.0 - allele_freq);
2108 }
2109 
filename_exists(const char * __restrict fname_append,char * fname,char * fname_end)2110 HEADER_INLINE int32_t filename_exists(const char* __restrict fname_append, char* fname, char* fname_end) {
2111 #ifdef _WIN32
2112   DWORD file_attr;
2113   strcpy(fname_end, fname_append);
2114   file_attr = GetFileAttributes(fname);
2115   return (file_attr != 0xffffffffU);
2116 #else
2117   struct stat st;
2118   strcpy(fname_end, fname_append);
2119   return (stat(fname, &st) == 0);
2120 #endif
2121 }
2122 
2123 void sample_delim_convert(uintptr_t unfiltered_sample_ct, const uintptr_t* sample_exclude, uint32_t sample_ct, uintptr_t max_sample_id_len, char oldc, char newc, char* sample_ids);
2124 
2125 void get_set_wrange_align(const uintptr_t* __restrict bitarr, uintptr_t word_ct, uintptr_t* __restrict firstw_ptr, uintptr_t* __restrict wlen_ptr);
2126 
2127 // for hash tables where maximum ID string length is not known in advance.
2128 uint32_t unklen_id_htable_find(const char* cur_id, const char* const* item_ids, const uint32_t* id_htable, uint32_t hashval, uint32_t id_htable_size);
2129 
2130 // okay, time to provide O(c log c)-time instead of O(c^2)-time initialization
2131 // (c = # of chromosomes/contigs).
2132 #define MAX_POSSIBLE_CHROM 65280
2133 
2134 // get_id_htable_size(MAX_POSSIBLE_CHROM) (use constexpr once sufficient
2135 // compiler support is available)
2136 #define CHROM_NAME_HTABLE_SIZE 130579
2137 
2138 // assumes MAX_POSSIBLE_CHROM is a multiple of 64, otherwise add round-up
2139 #define CHROM_MASK_WORDS (MAX_POSSIBLE_CHROM / BITCT)
2140 
2141 // (note that n+1, n+2, n+3, and n+4 are reserved for X/Y/XY/MT)
2142 #define MAX_CHROM_TEXTNUM 95
2143 
2144 // get_chrom_code_raw() needs to be modified if this changes
2145 #define MAX_CHROM_TEXTNUM_SLEN 2
2146 
2147 #define X_OFFSET 0
2148 #define Y_OFFSET 1
2149 #define XY_OFFSET 2
2150 #define MT_OFFSET 3
2151 #define XYMT_OFFSET_CT 4
2152 
2153 #define CHROM_X (MAX_POSSIBLE_CHROM + X_OFFSET)
2154 #define CHROM_Y (MAX_POSSIBLE_CHROM + Y_OFFSET)
2155 #define CHROM_XY (MAX_POSSIBLE_CHROM + XY_OFFSET)
2156 #define CHROM_MT (MAX_POSSIBLE_CHROM + MT_OFFSET)
2157 
2158 #ifdef __LP64__
2159   // dog requires 42 bits, and other species require less
2160   #define CHROM_MASK_INITIAL_WORDS 1
2161 #else
2162   #define CHROM_MASK_INITIAL_WORDS 2
2163 #endif
2164 
2165 typedef struct {
2166   // Main dynamic block intended to be allocated as a single aligned block of
2167   // memory on the heap freeable with vecaligned_free(), with chrom_mask at the
2168   // base.
2169 
2170   uintptr_t* chrom_mask; // which chromosomes aren't known to be absent?
2171   // This is a misnomer--it includes X and excludes MT.  Underlying concept is
2172   // "are some calls guaranteed to be homozygous (assuming >= 1 male)", which
2173   // is no longer true for MT since heteroplasmy is a thing.  (Well, the real
2174   // goal with MT is to enable dosage-based analysis, but until all pipelines
2175   // have adapted, diploid data handling loses slightly less information than
2176   // haploid.)
2177   uintptr_t* haploid_mask;
2178 
2179   // order of chromosomes in input files
2180   // currently tolerates out-of-order chromosomes, as long as all variants for
2181   // any given chromosome are together
2182   uint32_t* chrom_file_order;
2183 
2184   // if the second chromosome in the dataset is chr5, chrom_file_order[1] == 5,
2185   // the raw variant indexes for chr5 are in [chrom_fo_vidx_start[1],
2186   // chrom_fo_vidx_start[2]). and chrom_idx_to_foidx[5] == 1.
2187   uint32_t* chrom_fo_vidx_start;
2188   uint32_t* chrom_idx_to_foidx;
2189 
2190   // --allow-extra-chr support
2191   char** nonstd_names;
2192   uint32_t* nonstd_id_htable;
2193   // end main dynamic block
2194 
2195   uint32_t chrom_ct; // number of distinct chromosomes/contigs
2196   uint32_t species;
2197 
2198   int32_t xymt_codes[XYMT_OFFSET_CT]; // x, y, xy, mt
2199   uint32_t max_code;
2200 
2201   uint32_t autosome_ct;
2202 
2203   // yet more --allow-extra-chr support
2204   uint32_t zero_extra_chroms;
2205   uint32_t name_ct;
2206   Ll_str* incl_excl_name_stack;
2207   uint32_t is_include_stack;
2208   uint32_t output_encoding;
2209 } Chrom_info;
2210 
2211 extern const char* g_species_singular;
2212 extern const char* g_species_plural;
2213 
2214 int32_t init_chrom_info(Chrom_info* chrom_info_ptr);
2215 
2216 void init_species(uint32_t species_code, Chrom_info* chrom_info_ptr);
2217 
2218 void init_default_chrom_mask(Chrom_info* chrom_info_ptr);
2219 
init_chrom_info_human(Chrom_info * chrom_info_ptr)2220 HEADER_INLINE int32_t init_chrom_info_human(Chrom_info* chrom_info_ptr) {
2221   // convenience wrapper
2222   if (init_chrom_info(chrom_info_ptr)) {
2223     return RET_NOMEM;
2224   }
2225   init_species(SPECIES_HUMAN, chrom_info_ptr);
2226   init_default_chrom_mask(chrom_info_ptr);
2227   return 0;
2228 }
2229 
2230 void forget_extra_chrom_names(uint32_t reinitialize, Chrom_info* chrom_info_ptr);
2231 
2232 // in the usual case where the number of chromosomes/contigs is much less than
2233 // MAX_POSSIBLE_CHROM, this reduces chrom_info's memory consumption and
2234 // improves locality.
2235 int32_t finalize_chrom_info(Chrom_info* chrom_info_ptr);
2236 
2237 void cleanup_chrom_info(Chrom_info* chrom_info_ptr);
2238 
species_str(uintptr_t ct)2239 HEADER_INLINE const char* species_str(uintptr_t ct) {
2240   return (ct == ONELU)? g_species_singular : g_species_plural;
2241 }
2242 
2243 #define CHR_OUTPUT_PREFIX 1
2244 #define CHR_OUTPUT_M 2
2245 #define CHR_OUTPUT_MT 4
2246 #define CHR_OUTPUT_0M 8
2247 
are_all_words_zero(const uintptr_t * word_arr,uintptr_t word_ct)2248 HEADER_INLINE uint32_t are_all_words_zero(const uintptr_t* word_arr, uintptr_t word_ct) {
2249   while (word_ct--) {
2250     if (*word_arr++) {
2251       return 0;
2252     }
2253   }
2254   return 1;
2255 }
2256 
2257 char* chrom_name_write(const Chrom_info* chrom_info_ptr, uint32_t chrom_idx, char* buf);
2258 
2259 char* chrom_name_buf5w4write(const Chrom_info* chrom_info_ptr, uint32_t chrom_idx, uint32_t* chrom_name_len_ptr, char* buf5);
2260 
2261 uint32_t get_max_chrom_slen(const Chrom_info* chrom_info_ptr);
2262 
2263 uint32_t haploid_chrom_present(const Chrom_info* chrom_info_ptr);
2264 
2265 // does not require null-termination
2266 // only handles 1-99, X, Y, XY, MT, and "chr" prefix
2267 int32_t get_chrom_code_raw(const char* sptr);
2268 
2269 // now requires null-termination
2270 // now returns -1 when --allow-extra-chr may be ok, and -2 on total fail
2271 int32_t get_chrom_code(const char* chrom_name, const Chrom_info* chrom_info_ptr, uint32_t name_slen);
2272 
2273 // when the chromosome name isn't null-terminated, but we want to preserve the
2274 // character there
2275 // requires chrom_name[name_slen] to be mutable
2276 int32_t get_chrom_code_counted(const Chrom_info* chrom_info_ptr, uint32_t name_slen, char* chrom_name);
2277 
2278 // when it's okay to just replace the terminating space/tab with a \0
get_chrom_code_destructive(const Chrom_info * chrom_info_ptr,char * chrom_name)2279 HEADER_INLINE int32_t get_chrom_code_destructive(const Chrom_info* chrom_info_ptr, char* chrom_name) {
2280   char* chrom_token_end = token_endnn(chrom_name);
2281   *chrom_token_end = '\0';
2282   return get_chrom_code(chrom_name, chrom_info_ptr, (uintptr_t)(chrom_token_end - chrom_name));
2283 }
2284 
2285 uint32_t get_variant_chrom_fo_idx(const Chrom_info* chrom_info_ptr, uintptr_t variant_uidx);
2286 
get_variant_chrom(const Chrom_info * chrom_info_ptr,uintptr_t variant_uidx)2287 HEADER_INLINE uint32_t get_variant_chrom(const Chrom_info* chrom_info_ptr, uintptr_t variant_uidx) {
2288   return chrom_info_ptr->chrom_file_order[get_variant_chrom_fo_idx(chrom_info_ptr, variant_uidx)];
2289 }
2290 
2291 
2292 // these assume the chromosome is present in the dataset
get_chrom_start_vidx(const Chrom_info * chrom_info_ptr,uint32_t chrom_idx)2293 HEADER_INLINE uint32_t get_chrom_start_vidx(const Chrom_info* chrom_info_ptr, uint32_t chrom_idx) {
2294   return chrom_info_ptr->chrom_fo_vidx_start[chrom_info_ptr->chrom_idx_to_foidx[chrom_idx]];
2295 }
2296 
get_chrom_end_vidx(const Chrom_info * chrom_info_ptr,uint32_t chrom_idx)2297 HEADER_INLINE uint32_t get_chrom_end_vidx(const Chrom_info* chrom_info_ptr, uint32_t chrom_idx) {
2298   return chrom_info_ptr->chrom_fo_vidx_start[chrom_info_ptr->chrom_idx_to_foidx[chrom_idx] + 1];
2299 }
2300 
2301 // now assumes chrom_name is null-terminated
2302 int32_t try_to_add_chrom_name(const char* chrom_name, const char* file_descrip, uintptr_t line_idx, uint32_t name_slen, uint32_t allow_extra_chroms, int32_t* chrom_idx_ptr, Chrom_info* chrom_info_ptr);
2303 
get_or_add_chrom_code(const char * chrom_name,const char * file_descrip,uintptr_t line_idx,uint32_t name_slen,uint32_t allow_extra_chroms,Chrom_info * chrom_info_ptr,int32_t * chrom_idx_ptr)2304 HEADER_INLINE int32_t get_or_add_chrom_code(const char* chrom_name, const char* file_descrip, uintptr_t line_idx, uint32_t name_slen, uint32_t allow_extra_chroms, Chrom_info* chrom_info_ptr, int32_t* chrom_idx_ptr) {
2305   *chrom_idx_ptr = get_chrom_code(chrom_name, chrom_info_ptr, name_slen);
2306   if (*chrom_idx_ptr >= 0) {
2307     return 0;
2308   }
2309   return try_to_add_chrom_name(chrom_name, file_descrip, line_idx, name_slen, allow_extra_chroms, chrom_idx_ptr, chrom_info_ptr);
2310 }
2311 
get_or_add_chrom_code_destructive(const char * file_descrip,uintptr_t line_idx,uint32_t allow_extra_chroms,char * chrom_name,char * chrom_name_end,Chrom_info * chrom_info_ptr,int32_t * chrom_idx_ptr)2312 HEADER_INLINE int32_t get_or_add_chrom_code_destructive(const char* file_descrip, uintptr_t line_idx, uint32_t allow_extra_chroms, char* chrom_name, char* chrom_name_end, Chrom_info* chrom_info_ptr, int32_t* chrom_idx_ptr) {
2313   *chrom_name_end = '\0';
2314   return get_or_add_chrom_code(chrom_name, file_descrip, line_idx, (uintptr_t)(chrom_name_end - chrom_name), allow_extra_chroms, chrom_info_ptr, chrom_idx_ptr);
2315 }
2316 
2317 // newval does not need to be null-terminated
2318 // assumes *allele_ptr is not initialized
2319 // make last parameter const char** later
2320 uint32_t allele_set(const char* newval, uint32_t allele_slen, char** allele_ptr);
2321 
2322 // *allele_ptr must be initialized; frees *allele_ptr if necessary
2323 uint32_t allele_reset(const char* newval, uint32_t allele_slen, char** allele_ptr);
2324 
2325 void cleanup_allele_storage(uint32_t max_allele_slen, uintptr_t allele_storage_entry_ct, char** allele_storage);
2326 
2327 // needed by fixed --merge-equal-pos implementation, which takes more liberties
2328 // with allele_storage[]
2329 void cleanup_allele_storage2(uintptr_t allele_storage_entry_ct, char** allele_storage);
2330 
2331 // no need for this; code is simpler if we just create a copy of marker_exclude
2332 // with all non-autosomal loci removed
2333 /*
2334 HEADER_INLINE uintptr_t next_autosomal_unsafe(uintptr_t* marker_exclude, uintptr_t marker_uidx, Chrom_info* chrom_info_ptr, uint32_t* chrom_end_ptr, uint32_t* chrom_fo_idx_ptr) {
2335   // assumes we are at an autosomal marker if marker_uidx < *chrom_end_ptr
2336   next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
2337   if (marker_uidx < (*chrom_end_ptr)) {
2338     return marker_uidx;
2339   }
2340   uintptr_t* haploid_mask = chrom_info_ptr->haploid_mask;
2341   uint32_t chrom_idx;
2342   while (1) {
2343     do {
2344       *chrom_fo_idx_ptr += 1;
2345       *chrom_end_ptr = chrom_info_ptr->chrom_file_order_marker_idx[(*chrom_fo_idx_ptr) + 1];
2346     } while (marker_uidx >= (*chrom_end_ptr));
2347     chrom_idx = chrom_info_ptr->chrom_file_order[*chrom_fo_idx_ptr];
2348     if (!IS_SET(haploid_mask, chrom_idx)) {
2349       return marker_uidx;
2350     }
2351     marker_uidx = next_unset_ul_unsafe(marker_exclude, *chrom_end_ptr);
2352   }
2353 }
2354 */
2355 
2356 void refresh_chrom_info(const Chrom_info* chrom_info_ptr, uintptr_t marker_uidx, uint32_t* __restrict chrom_end_ptr, uint32_t* __restrict chrom_fo_idx_ptr, uint32_t* __restrict is_x_ptr, uint32_t* __restrict is_y_ptr, uint32_t* __restrict is_mt_ptr, uint32_t* __restrict is_haploid_ptr);
2357 
2358 int32_t single_chrom_start(const Chrom_info* chrom_info_ptr, const uintptr_t* marker_exclude, uint32_t unfiltered_marker_ct);
2359 
2360 double get_dmedian(const double* sorted_arr, uintptr_t len);
2361 
2362 double destructive_get_dmedian(uintptr_t len, double* unsorted_arr);
2363 
2364 int32_t strcmp_casted(const void* s1, const void* s2);
2365 
2366 int32_t strcmp_natural(const void* s1, const void* s2);
2367 
2368 int32_t strcmp_deref(const void* s1, const void* s2);
2369 
2370 int32_t strcmp_natural_deref(const void* s1, const void* s2);
2371 
2372 int32_t get_uidx_from_unsorted(const char* idstr, const uintptr_t* exclude_arr, uint32_t id_ct, const char* unsorted_ids, uintptr_t max_id_len);
2373 
2374 // sorted_ids contents not changed, but not worth the trouble of returning a
2375 // const char*
2376 char* scan_for_duplicate_ids(char* sorted_ids, uintptr_t id_ct, uintptr_t max_id_len);
2377 
2378 char* scan_for_duplicate_or_overlap_ids(char* sorted_ids, uintptr_t id_ct, uintptr_t max_id_len, const char* sorted_nonoverlap_ids, uintptr_t nonoverlap_id_ct, uintptr_t max_nonoverlap_id_len);
2379 
2380 int32_t eval_affection(const char* bufptr, double missing_phenod);
2381 
2382 uint32_t triangle_divide(int64_t cur_prod, int32_t modif);
2383 
2384 void triangle_fill(uint32_t ct, uint32_t pieces, uint32_t parallel_idx, uint32_t parallel_tot, uint32_t start, uint32_t align, uint32_t* target_arr);
2385 
2386 int32_t relationship_req(uint64_t calculation_type);
2387 
2388 int32_t distance_req(const char* read_dists_fname, uint64_t calculation_type);
2389 
2390 int32_t double_cmp(const void* aa, const void* bb);
2391 
2392 int32_t double_cmp_decr(const void* aa, const void* bb);
2393 
2394 int32_t double_cmp_deref(const void* aa, const void* bb);
2395 
2396 int32_t char_cmp_deref(const void* aa, const void* bb);
2397 
2398 int32_t intcmp(const void* aa, const void* bb);
2399 
2400 int32_t uintcmp(const void* aa, const void* bb);
2401 
2402 #ifndef __cplusplus
2403 int32_t intcmp2(const void* aa, const void* bb);
2404 #endif
2405 
2406 int32_t intcmp3_decr(const void* aa, const void* bb);
2407 
2408 #ifndef __cplusplus
2409 int32_t llcmp(const void* aa, const void* bb);
2410 #endif
2411 
2412 void qsort_ext2(char* main_arr, uintptr_t arr_length, uintptr_t item_length, int(* comparator_deref)(const void*, const void*), char* secondary_arr, uintptr_t secondary_item_len, char* proxy_arr, uintptr_t proxy_len);
2413 
2414 int32_t qsort_ext(char* main_arr, uintptr_t arr_length, uintptr_t item_length, int(* comparator_deref)(const void*, const void*), char* secondary_arr, intptr_t secondary_item_len);
2415 
2416 int32_t sort_item_ids_noalloc(uintptr_t unfiltered_ct, const uintptr_t* exclude_arr, uintptr_t item_ct, const char* __restrict item_ids, uintptr_t max_id_len, uint32_t allow_dups, uint32_t collapse_idxs, int(* comparator_deref)(const void*, const void*), char* __restrict sorted_ids, uint32_t* id_map);
2417 
2418 int32_t sort_item_ids(uintptr_t unfiltered_ct, const uintptr_t* exclude_arr, uintptr_t exclude_ct, const char* __restrict item_ids, uintptr_t max_id_len, uint32_t allow_dups, uint32_t collapse_idxs, int(* comparator_deref)(const void*, const void*), char** sorted_ids_ptr, uint32_t** id_map_ptr);
2419 
2420 uint32_t uint32arr_greater_than(const uint32_t* sorted_uint32_arr, uint32_t arr_length, uint32_t uii);
2421 
2422 uint32_t int32arr_greater_than(const int32_t* sorted_int32_arr, uint32_t arr_length, int32_t ii);
2423 
2424 uintptr_t uint64arr_greater_than(const uint64_t* sorted_uint64_arr, uintptr_t arr_length, uint64_t ullii);
2425 
2426 uintptr_t doublearr_greater_than(const double* sorted_dbl_arr, uintptr_t arr_length, double dxx);
2427 
2428 uintptr_t nonincr_doublearr_leq_stride(const double* nonincr_dbl_arr, uintptr_t arr_length, uintptr_t stride, double dxx);
2429 
2430 int32_t bsearch_str(const char* id_buf, uintptr_t cur_id_len, const char* lptr, uintptr_t max_id_len, uintptr_t end_idx);
2431 
bsearch_str_nl(const char * id_buf,const char * lptr,uintptr_t max_id_len,intptr_t end_idx)2432 HEADER_INLINE int32_t bsearch_str_nl(const char* id_buf, const char* lptr, uintptr_t max_id_len, intptr_t end_idx) {
2433   return bsearch_str(id_buf, strlen(id_buf), lptr, max_id_len, end_idx);
2434 }
2435 
2436 int32_t bsearch_str_natural(const char* id_buf, const char* lptr, uintptr_t max_id_len, uintptr_t end_idx);
2437 
2438 uintptr_t bsearch_str_lb(const char* id_buf, uintptr_t cur_id_len, const char* lptr, uintptr_t max_id_len, uintptr_t end_idx);
2439 
2440 uint32_t bsearch_read_fam_indiv(char* __restrict read_ptr, const char* __restrict lptr, uintptr_t max_id_len, uintptr_t filter_line_ct, char** read_pp_new, int32_t* retval_ptr, char* __restrict id_buf);
2441 
2442 void bsearch_fam(const char* __restrict fam_id, const char* __restrict lptr, uintptr_t max_id_len, uint32_t filter_line_ct, uint32_t* __restrict first_idx_ptr, uint32_t* __restrict last_idx_ptr, char* __restrict id_buf);
2443 
2444 // These ensure the trailing bits are zeroed out.
2445 void bitarr_invert(uintptr_t bit_ct, uintptr_t* bitarr);
2446 
2447 void bitarr_invert_copy(const uintptr_t* input_bitarr, uintptr_t bit_ct, uintptr_t* output_bitarr);
2448 
2449 
2450 // "bitvec" indicates that word count is used instead of vector count.
2451 void bitvec_and(const uintptr_t* __restrict arg_bitvec, uintptr_t word_ct, uintptr_t* __restrict main_bitvec);
2452 
2453 void bitvec_andnot(const uintptr_t* __restrict exclude_bitvec, uintptr_t word_ct, uintptr_t* __restrict main_bitvec);
2454 
2455 void bitvec_andnot_reversed_args(const uintptr_t* __restrict include_bitvec, uintptr_t word_ct, uintptr_t* __restrict main_bitvec);
2456 
2457 void bitvec_or(const uintptr_t* __restrict arg_bitvec, uintptr_t word_ct, uintptr_t* main_bitvec);
2458 
2459 void bitvec_ornot(const uintptr_t* __restrict inverted_or_bitvec, uintptr_t word_ct, uintptr_t* main_bitvec);
2460 
2461 void bitvec_xor(const uintptr_t* __restrict arg_bitvec, uintptr_t word_ct, uintptr_t* __restrict main_bitvec);
2462 
popcount2_long(uintptr_t val)2463 HEADER_INLINE uint32_t popcount2_long(uintptr_t val) {
2464 #ifdef __LP64__
2465   val = (val & 0x3333333333333333LLU) + ((val >> 2) & 0x3333333333333333LLU);
2466   return (((val + (val >> 4)) & 0x0f0f0f0f0f0f0f0fLLU) * 0x0101010101010101LLU) >> 56;
2467 #else
2468   val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
2469   return (((val + (val >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24;
2470 #endif
2471 }
2472 
popcount_long(uintptr_t val)2473 HEADER_INLINE uint32_t popcount_long(uintptr_t val) {
2474   // the simple version, good enough for all non-time-critical stuff
2475   return popcount2_long(val - ((val >> 1) & FIVEMASK));
2476 }
2477 
2478 uint32_t is_monomorphic_a2(const uintptr_t* geno_arr, uint32_t sample_ct);
2479 
2480 uint32_t is_monomorphic(const uintptr_t* geno_arr, uint32_t sample_ct);
2481 
2482 // same as is_monomorphic, except it also flags the all-heterozygote case
2483 uint32_t less_than_two_genotypes(const uintptr_t* geno_arr, uint32_t sample_ct);
2484 
2485 // uint32_t has_three_genotypes(uintptr_t* lptr, uint32_t sample_ct);
2486 
2487 uintptr_t popcount_longs(const uintptr_t* lptr, uintptr_t word_ct);
2488 
2489 #ifdef __LP64__
popcount_longs_nzbase(const uintptr_t * lptr,uintptr_t start_idx,uintptr_t end_idx)2490 HEADER_INLINE uintptr_t popcount_longs_nzbase(const uintptr_t* lptr, uintptr_t start_idx, uintptr_t end_idx) {
2491   uintptr_t prefix_ct = 0;
2492   if (start_idx & 1) {
2493     if (end_idx == start_idx) {
2494       return 0;
2495     }
2496     prefix_ct = popcount_long(lptr[start_idx++]);
2497   }
2498   return prefix_ct + popcount_longs(&(lptr[start_idx]), end_idx - start_idx);
2499 }
2500 #else
popcount_longs_nzbase(const uintptr_t * lptr,uintptr_t start_idx,uintptr_t end_idx)2501 HEADER_INLINE uintptr_t popcount_longs_nzbase(const uintptr_t* lptr, uintptr_t start_idx, uintptr_t end_idx) {
2502   return popcount_longs(&(lptr[start_idx]), end_idx - start_idx);
2503 }
2504 #endif
2505 
2506 uintptr_t popcount2_longs(const uintptr_t* lptr, uintptr_t word_ct);
2507 
2508 #define popcount01_longs popcount2_longs
2509 
2510 uintptr_t popcount_bit_idx(const uintptr_t* lptr, uintptr_t start_idx, uintptr_t end_idx);
2511 
2512 uint32_t chrom_window_max(const uint32_t* marker_pos, const uintptr_t* marker_exclude, const Chrom_info* chrom_info_ptr, uint32_t chrom_idx, uint32_t ct_max, uint32_t bp_max, uint32_t cur_window_max);
2513 
2514 uint32_t window_back(const uint32_t* __restrict marker_pos, const double* __restrict marker_cms, const uintptr_t* marker_exclude, uint32_t marker_uidx_min, uint32_t marker_uidx_start, uint32_t count_max, uint32_t bp_max, double cm_max, uint32_t* __restrict window_trail_ct_ptr);
2515 
2516 uint32_t window_forward(const uint32_t* __restrict marker_pos, const double* __restrict marker_cms, const uintptr_t* marker_exclude, uint32_t marker_uidx_start, uint32_t marker_uidx_last, uint32_t count_max, uint32_t bp_max, double cm_max, uint32_t* __restrict window_lead_ct_ptr);
2517 
2518 uintptr_t jump_forward_unset_unsafe(const uintptr_t* bitarr, uintptr_t cur_pos, uintptr_t forward_ct);
2519 
popcount_chars(const uintptr_t * lptr,uintptr_t start_idx,uintptr_t end_idx)2520 HEADER_INLINE uintptr_t popcount_chars(const uintptr_t* lptr, uintptr_t start_idx, uintptr_t end_idx) {
2521   return popcount_bit_idx(lptr, start_idx * 8, end_idx * 8);
2522 }
2523 
2524 // end_idx is in word, not bit, units
2525 uintptr_t popcount_longs_exclude(const uintptr_t* __restrict lptr, const uintptr_t* __restrict exclude_arr, uintptr_t end_idx);
2526 
2527 uintptr_t popcount_longs_intersect(const uintptr_t* __restrict lptr1, const uintptr_t* __restrict lptr2, uintptr_t word_ct);
2528 
2529 void vertical_bitct_subtract(const uintptr_t* bitarr, uint32_t item_ct, uint32_t* sum_arr);
2530 
2531 #ifdef __LP64__
2532 void count_2freq_dbl_960b(const VECITYPE* geno_vvec, const VECITYPE* geno_vvec_end, const VECITYPE* __restrict mask1vp, const VECITYPE* __restrict mask2vp, uint32_t* __restrict ct1abp, uint32_t* __restrict ct1cp, uint32_t* __restrict ct2abp, uint32_t* __restrict ct2cp);
2533 
2534 void count_3freq_1920b(const VECITYPE* geno_vvec, const VECITYPE* geno_vvec_end, const VECITYPE* __restrict maskvp, uint32_t* __restrict ctap, uint32_t* __restrict ctbp, uint32_t* __restrict ctcp);
2535 #else
2536 void count_2freq_dbl_24b(const uintptr_t* __restrict geno_vec, const uintptr_t* __restrict mask1p, const uintptr_t* __restrict mask2p, uint32_t* __restrict ct1abp, uint32_t* __restrict ct1cp, uint32_t* __restrict ct2abp, uint32_t* __restrict ct2cp);
2537 
2538 void count_3freq_48b(const uintptr_t* __restrict geno_vec, const uintptr_t* __restrict maskp, uint32_t* __restrict ctap, uint32_t* __restrict ctbp, uint32_t* __restrict ctcp);
2539 #endif
2540 
2541 void genovec_set_freq(const uintptr_t* __restrict geno_vec, const uintptr_t* __restrict include_quatervec, uintptr_t sample_ctl2, uint32_t* __restrict set_ctp, uint32_t* __restrict missing_ctp);
2542 
2543 void genovec_set_freq_x(const uintptr_t* __restrict geno_vec, const uintptr_t* __restrict include_quatervec, const uintptr_t* __restrict male_quatervec, uintptr_t sample_ctl2, uint32_t* __restrict set_ctp, uint32_t* __restrict missing_ctp);
2544 
2545 void genovec_set_freq_y(const uintptr_t* __restrict geno_vec, const uintptr_t* __restrict include_quatervec, const uintptr_t* __restrict nonmale_quatervec, uintptr_t sample_ctl2, uint32_t* __restrict set_ctp, uint32_t* __restrict missing_ctp);
2546 
2547 void genovec_3freq(const uintptr_t* __restrict geno_vec, const uintptr_t* __restrict include_quatervec, uintptr_t sample_ctl2, uint32_t* __restrict missing_ctp, uint32_t* __restrict het_ctp, uint32_t* __restrict homset_ctp);
2548 
2549 uintptr_t count_01(const uintptr_t* quatervec, uintptr_t word_ct);
2550 
zero_trailing_bits(uintptr_t unfiltered_ct,uintptr_t * bitarr)2551 HEADER_INLINE void zero_trailing_bits(uintptr_t unfiltered_ct, uintptr_t* bitarr) {
2552   uintptr_t trail_ct = unfiltered_ct & (BITCT - 1);
2553   if (trail_ct) {
2554     bitarr[unfiltered_ct / BITCT] &= (ONELU << trail_ct) - ONELU;
2555   }
2556 }
2557 
2558 void fill_all_bits(uintptr_t ct, uintptr_t* bitarr);
2559 
2560 uint32_t numeric_range_list_to_bitarr(const Range_list* range_list_ptr, uint32_t item_ct, uint32_t offset, uint32_t ignore_overflow, uintptr_t* bitarr);
2561 
2562 int32_t string_range_list_to_bitarr(char* header_line, uint32_t item_ct, uint32_t fixed_len, const Range_list* range_list_ptr, const char* __restrict sorted_ids, const uint32_t* __restrict id_map, const char* __restrict range_list_flag, const char* __restrict file_descrip, uintptr_t* bitarr, int32_t* __restrict seen_idxs);
2563 
2564 int32_t string_range_list_to_bitarr_alloc(char* header_line, uint32_t item_ct, uint32_t fixed_len, const Range_list* range_list_ptr, const char* __restrict range_list_flag, const char* __restrict file_descrip, uintptr_t** bitarr_ptr);
2565 
2566 int32_t string_range_list_to_bitarr2(const char* __restrict sorted_ids, const uint32_t* id_map, uintptr_t item_ct, uintptr_t max_id_len, const Range_list* __restrict range_list_ptr, const char* __restrict range_list_flag, uintptr_t* bitarr_excl);
2567 
count_chrom_markers(const Chrom_info * chrom_info_ptr,const uintptr_t * marker_exclude,uint32_t chrom_idx)2568 HEADER_INLINE uint32_t count_chrom_markers(const Chrom_info* chrom_info_ptr, const uintptr_t* marker_exclude, uint32_t chrom_idx) {
2569   if (!is_set(chrom_info_ptr->chrom_mask, chrom_idx)) {
2570     return 0;
2571   }
2572   const uint32_t chrom_fo_idx = chrom_info_ptr->chrom_idx_to_foidx[chrom_idx];
2573   const uint32_t min_idx = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
2574   const uint32_t max_idx = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
2575   return (max_idx - min_idx) - ((uint32_t)popcount_bit_idx(marker_exclude, min_idx, max_idx));
2576 }
2577 
2578 uint32_t count_non_autosomal_markers(const Chrom_info* chrom_info_ptr, const uintptr_t* marker_exclude, uint32_t count_x, uint32_t count_mt);
2579 
2580 int32_t conditional_allocate_non_autosomal_markers(const Chrom_info* chrom_info_ptr, uintptr_t unfiltered_marker_ct, const uintptr_t* marker_exclude_orig, uint32_t marker_ct, uint32_t count_x, uint32_t count_mt, const char* calc_descrip, uintptr_t** marker_exclude_ptr, uint32_t* newly_excluded_ct_ptr);
2581 
2582 uint32_t get_max_chrom_size(const Chrom_info* chrom_info_ptr, const uintptr_t* marker_exclude, uint32_t* last_chrom_fo_idx_ptr);
2583 
2584 void count_genders(const uintptr_t* __restrict sex_nm, const uintptr_t* __restrict sex_male, const uintptr_t* __restrict sample_exclude, uintptr_t unfiltered_sample_ct, uint32_t* __restrict male_ct_ptr, uint32_t* __restrict female_ct_ptr, uint32_t* __restrict unk_ct_ptr);
2585 
2586 void reverse_loadbuf(uintptr_t unfiltered_sample_ct, unsigned char* loadbuf);
2587 
2588 // deprecated, try to just use copy_quaterarr_nonempty_subset()
2589 void copy_quaterarr_nonempty_subset_excl(const uintptr_t* __restrict raw_quaterarr, const uintptr_t* __restrict subset_excl, uint32_t raw_quaterarr_size, uint32_t subset_size, uintptr_t* __restrict output_quaterarr);
2590 
load_raw(uintptr_t unfiltered_sample_ct4,FILE * bedfile,uintptr_t * rawbuf)2591 HEADER_INLINE uint32_t load_raw(uintptr_t unfiltered_sample_ct4, FILE* bedfile, uintptr_t* rawbuf) {
2592   // only use this if all accesses to the data involve
2593   // 1. some sort of mask, or
2594   // 2. explicit iteration from 0..(unfiltered_sample_ct-1).
2595   // otherwise improper trailing bits might cause a segfault, when we should
2596   // be ignoring them or just issuing a warning.
2597   return (fread(rawbuf, 1, unfiltered_sample_ct4, bedfile) < unfiltered_sample_ct4);
2598 }
2599 
get_final_mask(uint32_t sample_ct)2600 HEADER_INLINE uintptr_t get_final_mask(uint32_t sample_ct) {
2601   uint32_t uii = sample_ct % BITCT2;
2602   if (uii) {
2603     return (ONELU << (2 * uii)) - ONELU;
2604   } else {
2605     return ~ZEROLU;
2606   }
2607 }
2608 
load_raw2(uintptr_t unfiltered_sample_ct4,uintptr_t unfiltered_sample_ctl2m1,uintptr_t final_mask,FILE * bedfile,uintptr_t * rawbuf)2609 HEADER_INLINE uint32_t load_raw2(uintptr_t unfiltered_sample_ct4, uintptr_t unfiltered_sample_ctl2m1, uintptr_t final_mask, FILE* bedfile, uintptr_t* rawbuf) {
2610   if (fread(rawbuf, 1, unfiltered_sample_ct4, bedfile) < unfiltered_sample_ct4) {
2611     return 1;
2612   }
2613   rawbuf[unfiltered_sample_ctl2m1] &= final_mask;
2614   return 0;
2615 }
2616 
2617 uint32_t load_and_collapse(uint32_t unfiltered_sample_ct, uint32_t sample_ct, const uintptr_t* __restrict sample_exclude, uintptr_t final_mask, uint32_t do_reverse, FILE* bedfile, uintptr_t* __restrict rawbuf, uintptr_t* __restrict mainbuf);
2618 
2619 // was "collapse_copy_quaterarr_incl", but this should be better way to think
2620 // about it
2621 void copy_quaterarr_nonempty_subset(const uintptr_t* __restrict raw_quaterarr, const uintptr_t* __restrict subset_mask, uint32_t raw_quaterarr_size, uint32_t subset_size, uintptr_t* __restrict output_quaterarr);
2622 
2623 /*
2624 // in-place version of copy_quaterarr_subset (usually destroying original
2625 // data).
2626 // this doesn't seem to provide a meaningful advantage over
2627 // copy_quaterarr_subset in practice, and the latter is more versatile without
2628 // requiring much more memory.
2629 void inplace_quaterarr_proper_subset(const uintptr_t* __restrict subset_mask, uint32_t orig_quaterarr_size, uint32_t subset_size, uintptr_t* __restrict main_quaterarr);
2630 
2631 HEADER_INLINE void inplace_quaterarr_subset(const uintptr_t* __restrict subset_mask, uint32_t orig_quaterarr_size, uint32_t subset_size, uintptr_t* __restrict main_quaterarr) {
2632   if (orig_quaterarr_size == subset_size) {
2633     return;
2634   }
2635   inplace_quaterarr_proper_subset(subset_mask, orig_quaterarr_size, subset_size, main_quaterarr);
2636 }
2637 */
2638 
2639 uint32_t load_and_collapse_incl(uint32_t unfiltered_sample_ct, uint32_t sample_ct, const uintptr_t* __restrict sample_include, uintptr_t final_mask, uint32_t do_reverse, FILE* bedfile, uintptr_t* __restrict rawbuf, uintptr_t* __restrict mainbuf);
2640 
2641 // uint32_t load_and_collapse_incl_inplace(const uintptr_t* __restrict sample_include, uint32_t unfiltered_sample_ct, uint32_t sample_ct, uintptr_t final_mask, uint32_t do_reverse, FILE* bedfile, uintptr_t* __restrict mainbuf);
2642 
2643 uint32_t load_and_split(uint32_t unfiltered_sample_ct, const uintptr_t* __restrict pheno_nm, const uintptr_t* __restrict pheno_c, FILE* bedfile, uintptr_t* __restrict rawbuf, uintptr_t* __restrict casebuf, uintptr_t* __restrict ctrlbuf);
2644 
2645 void init_quaterarr_from_bitarr(const uintptr_t* __restrict bitarr, uintptr_t unfiltered_sample_ct, uintptr_t* __restrict new_quaterarr);
2646 
2647 void init_quaterarr_from_inverted_bitarr(const uintptr_t* __restrict inverted_bitarr, uintptr_t unfiltered_sample_ct, uintptr_t* __restrict new_quaterarr);
2648 
2649 void quatervec_01_init_invert(const uintptr_t* __restrict source_quatervec, uintptr_t entry_ct, uintptr_t* __restrict target_quatervec);
2650 
2651 // target_vec := source_vec ANDNOT exclude_vec
2652 // may write an extra word
2653 void bitvec_andnot_copy(const uintptr_t* __restrict source_vec, const uintptr_t* __restrict exclude_vec, uintptr_t word_ct, uintptr_t* __restrict target_vec);
2654 
2655 void apply_bitarr_mask_to_quaterarr_01(const uintptr_t* __restrict mask_bitarr, uintptr_t unfiltered_sample_ct, uintptr_t* main_quaterarr);
2656 
2657 void apply_bitarr_excl_to_quaterarr_01(const uintptr_t* __restrict excl_bitarr, uintptr_t unfiltered_sample_ct, uintptr_t* __restrict main_quaterarr);
2658 
2659 // excludes (excl_bitarr_1 & excl_bitarr_2).  (union can be excluded by calling
2660 // apply_excl_to_quaterarr_01() twice.)
2661 void apply_excl_intersect_to_quaterarr_01(const uintptr_t* __restrict excl_bitarr_1, const uintptr_t* __restrict excl_bitarr_2, uintptr_t unfiltered_sample_ct, uintptr_t* __restrict main_quaterarr);
2662 
2663 // initializes output_quatervec bits to 01 iff input_quatervec bits are 01,
2664 // everything else zeroed out
2665 void quatervec_copy_only_01(const uintptr_t* __restrict input_quatervec, uintptr_t unfiltered_sample_ct, uintptr_t* __restrict output_quatervec);
2666 
2667 void quatervec_01_invert(uintptr_t unfiltered_sample_ct, uintptr_t* main_quatervec);
2668 
2669 void vec_datamask(uintptr_t unfiltered_sample_ct, uint32_t matchval, uintptr_t* data_ptr, uintptr_t* mask_ptr, uintptr_t* result_ptr);
2670 
2671 // void vec_rotate_plink1_to_plink2(uintptr_t* lptr, uint32_t word_ct);
2672 
2673 void rotate_plink1_to_a2ct_and_copy(uintptr_t* loadbuf, uintptr_t* writebuf, uintptr_t word_ct);
2674 
2675 void extract_collapsed_missing_bitfield(uintptr_t* lptr, uintptr_t unfiltered_sample_ct, uintptr_t* sample_include_quaterarr, uintptr_t sample_ct, uintptr_t* missing_bitfield);
2676 
2677 void hh_reset(unsigned char* loadbuf, uintptr_t* sample_include_quaterarr, uintptr_t unfiltered_sample_ct);
2678 
2679 void hh_reset_y(unsigned char* loadbuf, uintptr_t* sample_include_quaterarr, uintptr_t* sample_male_include_quaterarr, uintptr_t unfiltered_sample_ct);
2680 
haploid_fix(uint32_t hh_exists,uintptr_t * sample_include_quaterarr,uintptr_t * sample_male_include_quaterarr,uintptr_t sample_ct,uint32_t is_x,uint32_t is_y,unsigned char * loadbuf)2681 HEADER_INLINE void haploid_fix(uint32_t hh_exists, uintptr_t* sample_include_quaterarr, uintptr_t* sample_male_include_quaterarr, uintptr_t sample_ct, uint32_t is_x, uint32_t is_y, unsigned char* loadbuf) {
2682   if (is_x) {
2683     if (hh_exists & XMHH_EXISTS) {
2684       hh_reset(loadbuf, sample_male_include_quaterarr, sample_ct);
2685     }
2686   } else if (is_y) {
2687     if (hh_exists & Y_FIX_NEEDED) {
2688       hh_reset_y(loadbuf, sample_include_quaterarr, sample_male_include_quaterarr, sample_ct);
2689     }
2690   } else if (hh_exists & NXMHH_EXISTS) {
2691     hh_reset(loadbuf, sample_include_quaterarr, sample_ct);
2692   }
2693 }
2694 
2695 uint32_t alloc_raw_haploid_filters(uint32_t unfiltered_sample_ct, uint32_t hh_exists, uint32_t is_include, uintptr_t* sample_bitarr, uintptr_t* sex_male, uintptr_t** sample_raw_include_quatervec_ptr, uintptr_t** sample_raw_male_quatervec_ptr);
2696 
2697 void haploid_fix_multiple(uintptr_t* marker_exclude, uintptr_t marker_uidx_start, uintptr_t marker_ct, Chrom_info* chrom_info_ptr, uint32_t hh_exists, uint32_t set_hh_missing, uint32_t set_mixed_mt_missing, uintptr_t* sample_raw_include2, uintptr_t* sample_raw_male_include2, uintptr_t unfiltered_sample_ct, uintptr_t byte_ct_per_marker, unsigned char* loadbuf);
2698 
2699 void force_missing(unsigned char* loadbuf, uintptr_t* force_missing_include2, uintptr_t unfiltered_sample_ct);
2700 
sexchar(uintptr_t * sex_nm,uintptr_t * sex_male,uintptr_t sample_uidx)2701 HEADER_INLINE char sexchar(uintptr_t* sex_nm, uintptr_t* sex_male, uintptr_t sample_uidx) {
2702   if (is_set(sex_nm, sample_uidx)) {
2703     return '2' - is_set(sex_male, sample_uidx);
2704   } else {
2705     return '0';
2706   }
2707 }
2708 
2709 int32_t open_and_size_string_list(char* fname, FILE** infile_ptr, uintptr_t* list_len_ptr, uintptr_t* max_str_len_ptr);
2710 
2711 int32_t load_string_list(FILE** infile_ptr, uintptr_t max_str_len, char* str_list);
2712 
2713 int32_t open_and_skip_first_lines(FILE** infile_ptr, char* fname, char* loadbuf, uintptr_t loadbuf_size, uint32_t lines_to_skip);
2714 
2715 int32_t load_to_first_token(FILE* infile, uintptr_t loadbuf_size, char comment_char, const char* file_descrip, char* loadbuf, char** bufptr_ptr, uintptr_t* line_idx_ptr);
2716 
2717 int32_t open_and_load_to_first_token(FILE** infile_ptr, char* fname, uintptr_t loadbuf_size, char comment_char, const char* file_descrip, char* loadbuf, char** bufptr_ptr, uintptr_t* line_idx_ptr);
2718 
2719 int32_t scan_max_strlen(char* fname, uint32_t colnum, uint32_t colnum2, uint32_t headerskip, char skipchar, uintptr_t* max_str_len_ptr, uintptr_t* max_str2_len_ptr);
2720 
2721 int32_t scan_max_fam_indiv_strlen(char* fname, uint32_t colnum, uintptr_t* max_sample_id_len_ptr);
2722 
2723 // void inplace_collapse_uint32(uint32_t* item_arr, uint32_t unfiltered_ct, uintptr_t* exclude_arr, uint32_t filtered_ct);
2724 
2725 void inplace_collapse_uint32_incl(uint32_t* item_arr, uint32_t unfiltered_ct, uintptr_t* incl_arr, uint32_t filtered_ct);
2726 
2727 char* alloc_and_init_collapsed_arr(char* item_arr, uintptr_t item_len, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t filtered_ct, uint32_t read_only);
2728 
2729 char* alloc_and_init_collapsed_arr_incl(char* item_arr, uintptr_t item_len, uintptr_t unfiltered_ct, uintptr_t* include_arr, uintptr_t filtered_ct, uint32_t read_only);
2730 
2731 void inplace_delta_collapse_arr(char* item_arr, uintptr_t item_len, uintptr_t filtered_ct_orig, uintptr_t filtered_ct_new, uintptr_t* exclude_orig, uintptr_t* exclude_new);
2732 
2733 void inplace_delta_collapse_bitfield(uintptr_t* read_ptr, uint32_t filtered_ct_new, uintptr_t* exclude_orig, uintptr_t* exclude_new);
2734 
2735 // deprecated, migrate to copy_bitarr_subset()
2736 void copy_bitarr_subset_excl(const uintptr_t* __restrict raw_bitarr, const uintptr_t* __restrict subset_excl, uint32_t raw_bitarr_size, uint32_t subset_size, uintptr_t* __restrict output_bitarr);
2737 
2738 void copy_bitarr_subset(const uintptr_t* __restrict raw_bitarr, const uintptr_t* __restrict subset_mask, uint32_t raw_bitarr_size, uint32_t subset_size, uintptr_t* __restrict output_bitarr);
2739 
2740 void uncollapse_copy_flip_include_arr(uintptr_t* collapsed_include_arr, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* output_exclude_arr);
2741 
2742 void copy_when_nonmissing(uintptr_t* loadbuf, char* source, uintptr_t elem_size, uintptr_t unfiltered_sample_ct, uintptr_t missing_ct, char* dest);
2743 
2744 uint32_t collapse_duplicate_ids(char* sorted_ids, uintptr_t id_ct, uintptr_t max_id_len, uint32_t* id_starts);
2745 
rand_unif(void)2746 HEADER_INLINE double rand_unif(void) {
2747   return (sfmt_genrand_uint32(&g_sfmt) + 0.5) * RECIP_2_32;
2748 }
2749 
2750 void range_list_init(Range_list* range_list_ptr);
2751 
2752 void free_range_list(Range_list* range_list_ptr);
2753 
2754 double normdist(double zz);
2755 
2756 double rand_normal(double* secondval_ptr);
2757 
2758 void init_sfmt64_from_sfmt32(sfmt_t* sfmt32, sfmt_t* sfmt64);
2759 
precompute_mods(uintptr_t sample_ct,uint32_t * precomputed_mods)2760 HEADER_INLINE void precompute_mods(uintptr_t sample_ct, uint32_t* precomputed_mods) {
2761   // sets precomputed_mods[n] = 2^32 mod (n-2)
2762   uintptr_t sample_idx;
2763   for (sample_idx = 2; sample_idx <= sample_ct; sample_idx++) {
2764     *precomputed_mods++ = (uint32_t)(0x100000000LLU % sample_idx);
2765   }
2766 }
2767 
2768 void generate_perm1_interleaved(uint32_t tot_ct, uint32_t set_ct, uintptr_t perm_idx, uintptr_t perm_ct, uintptr_t* perm_buf);
2769 
2770 uint32_t cubic_real_roots(double coef_a, double coef_b, double coef_c, double* solutions);
2771 
2772 void join_threads(pthread_t* threads, uint32_t ctp1);
2773 
2774 #ifdef _WIN32
2775 int32_t spawn_threads(pthread_t* threads, unsigned (__stdcall *start_routine)(void*), uintptr_t ct);
2776 #else
2777 int32_t spawn_threads(pthread_t* threads, void* (*start_routine)(void*), uintptr_t ct);
2778 #endif
2779 
2780 extern uintptr_t g_thread_spawn_ct;
2781 extern uint32_t g_is_last_thread_block;
2782 
2783 #ifdef _WIN32
2784 extern HANDLE g_thread_start_next_event[];
2785 extern HANDLE g_thread_cur_block_done_events[];
2786 
THREAD_BLOCK_FINISH(uintptr_t tidx)2787 HEADER_INLINE void THREAD_BLOCK_FINISH(uintptr_t tidx) {
2788   SetEvent(g_thread_cur_block_done_events[tidx - 1]);
2789   WaitForSingleObject(g_thread_start_next_event[tidx - 1], INFINITE);
2790 }
2791 
2792 void join_threads2(pthread_t* threads, uint32_t ctp1, uint32_t is_last_block);
2793 
2794 int32_t spawn_threads2(pthread_t* threads, unsigned (__stdcall *start_routine)(void*), uintptr_t ct, uint32_t is_last_block);
2795 #else
2796 void THREAD_BLOCK_FINISH(uintptr_t tidx);
2797 
2798 void join_threads2(pthread_t* threads, uint32_t ctp1, uint32_t is_last_block);
2799 
2800 int32_t spawn_threads2(pthread_t* threads, void* (*start_routine)(void*), uintptr_t ct, uint32_t is_last_block);
2801 #endif
2802 
2803 extern sfmt_t** g_sfmtp_arr;
2804 
2805 uint32_t bigstack_init_sfmtp(uint32_t thread_ct);
2806 
2807 #endif // __PLINK_COMMON_H__
2808