1 #ifndef __PLINK2_STRING_H__
2 #define __PLINK2_STRING_H__
3 
4 // This library is part of PLINK 2.00, copyright (C) 2005-2020 Shaun Purcell,
5 // Christopher Chang.
6 //
7 // This program is free software: you can redistribute it and/or modify it
8 // under the terms of the GNU Lesser General Public License as published by the
9 // Free Software Foundation, either version 3 of the License, or (at your
10 // option) any later version.
11 //
12 // This library is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
15 // for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 
20 
21 // Standalone string-printing and parsing functions which neither make
22 // permanent memory allocations nor use g_bigstack for temporary allocations.
23 
24 #include "plink2_base.h"
25 
26 #include <math.h>  // fabs(), isfinite()
27 #include <stddef.h>  // offsetof()
28 
29 #ifdef __cplusplus
30 #  include <algorithm>
31 #  if __cplusplus >= 201902L
32 #    include <execution>
33 #  endif
34 #  ifdef _WIN32
35     // Windows C++11 <algorithm> resets these values :(
36 #    undef PRIu64
37 #    undef PRId64
38 #    define PRIu64 "I64u"
39 #    define PRId64 "I64d"
40 #    undef PRIuPTR
41 #    undef PRIdPTR
42 #    ifdef __LP64__
43 #      define PRIuPTR PRIu64
44 #      define PRIdPTR PRId64
45 #    else
46 #      if __cplusplus < 201103L
47 #        define PRIuPTR "lu"
48 #        define PRIdPTR "ld"
49 #      else
50 #        define PRIuPTR "u"
51 #        define PRIdPTR "d"
52 #      endif
53 #    endif
54 #  endif
55 #endif
56 
57 #ifdef _WIN32
58 #  define EOLN_STR "\r\n"
59 #else
60 #  define EOLN_STR "\n"
61 #endif
62 
63 // generic maximum line byte length, currently also used as a default I/O
64 // buffer size.  .ped/.vcf/etc. lines can of course be longer.
65 CONSTI32(kMaxMediumLine, 131072);
66 
67 // apparently these aren't always defined in limits.h
68 #ifndef DBL_MAX
69 #  define DBL_MAX 1.7976931348623157e308
70 #endif
71 #ifndef FLT_MAX
72 #  define FLT_MAX S_CAST(float, 3.40282347e38)
73 #endif
74 
75 // These are needed by plink2_stats.  May want to define these elsewhere, but
76 // they can't live in plink2_cmdline any more.
77 static const double kE = 2.7182818284590452;
78 static const double kPi = 3.1415926535897932;
79 static const double kSqrt2 = 1.4142135623730951;
80 static const double kRecipE = 0.36787944117144233;
81 static const double kLn2 = 0.6931471805599453;
82 static const double kRecip2m53 = 0.00000000000000011102230246251565404236316680908203125;
83 
84 // floating point comparison-to-nonzero tolerance, currently 2^{-30}
85 static const double kEpsilon = 0.000000000931322574615478515625;
86 // less tolerant versions (2^{-35}, 2^{-44}) for some exact calculations
87 static const double kSmallEpsilon = 0.00000000000005684341886080801486968994140625;
88 
89 // 2^{-21}, must be >= sqrt(kSmallEpsilon)
90 static const double kBigEpsilon = 0.000000476837158203125;
91 
92 // 2^{-83} bias to give exact tests maximum ability to determine tiny p-values.
93 // (~2^{-53} is necessary to take advantage of denormalized small numbers, then
94 // allow tail sum to be up to 2^30.)
95 static const double kExactTestBias = 0.00000000000000000000000010339757656912845935892608650874535669572651386260986328125;
96 
97 #ifdef __cplusplus
98 #  define STD_SORT(ct, fallback_cmp, arr) std::sort(&((arr)[0]), (&((arr)[ct])))
99 #  if __cplusplus >= 201902L
100 // this should only be used for arrays of length >= variant_ct or sample_ct
101 // (sample_ct is cutting it close).
102 // macro should still be used in e.g. non-__cplusplus blocks, so that we have
103 // the option of falling back on a hand-coded parallel sort.
104 #    define STD_SORT_PAR_UNSEQ(ct, fallback_cmp, arr) std::sort(std::execution::par_unseq, &((arr)[0]), (&((arr)[ct])))
105 #  else
106 #    define STD_SORT_PAR_UNSEQ(ct, fallback_cmp, arr) std::sort(&((arr)[0]), (&((arr)[ct])))
107 #  endif
108 #else
109 #  define STD_SORT(ct, fallback_cmp, arr) qsort((arr), (ct), sizeof(*(arr)), (fallback_cmp))
110 #  define STD_SORT_PAR_UNSEQ(ct, fallback_cmp, arr) qsort((arr), (ct), sizeof(*(arr)), (fallback_cmp))
111 #endif
112 
113 #ifdef __cplusplus
114 namespace plink2 {
115 #endif
116 
117 // A bunch of library functions operating on char*s don't modify the buffer
118 // themselves, but return a char* which should inherit the constness of the
119 // input parameter.  We want them to
120 //   1. check const-correctness when this is compiled as C++
121 //   2. still be valid C99
122 // and the method of achieving this should be minimally bug-prone.
123 //
124 // Current hack:
125 //   1. First declare the const char*-accepting version, with return type of
126 //      CXXCONST_CP.  Make all return statements include a C-style cast, even
127 //      when not strictly necessary (e.g. relaying the return value of another
128 //      function of this sort), unless nullptr is being returned.
129 //   2. Then declare the char*-accepting version in an immediately following
130 //      #ifdef __cplusplus block, which is simply a wrapper that uses
131 //      const_cast twice.
132 // This is kind of ugly, so I may not use this approach in non-library source
133 // code.
134 //
135 // There are related issues with const char** and const char* const*.
136 // Unfortunately, while C99 implicitly converts char* to const char*, it does
137 // not do char** -> const char* const*, so caller-side casts are required.
138 // (And char** -> const char** implicit conversion doesn't happen in C++ either
139 // for good reason, see http://c-faq.com/ansi/constmismatch.html .)  The 'good'
140 // news is that this removes the need for duplicate C++ function prototypes,
141 // but it's generally an even worse situation than the single-indirection case;
142 // these macro names are intentionally verbose to encourage assignment to the
143 // appropriate-qualified type as soon as possible.
144 #ifdef __cplusplus
145 #  define CXXCONST_CP const char*
146 #  define CXXCONST_VOIDP const void*
147 #  define TO_CONSTCPCONSTP(char_pp) (char_pp)
148 #else
149 #  define CXXCONST_CP char*
150 #  define CXXCONST_VOIDP void*
151 #  define TO_CONSTCPCONSTP(char_pp) ((const char* const*)(char_pp))
152 #endif
153 
154 #ifdef _GNU_SOURCE
155 // There was some recent (2016) discussion on the gcc mailing list on strlen()
156 // vs. rawmemchr(., 0), where it was claimed that rawmemchr(., 0) could be
157 // compiled to something slower than &(.[strlen(.)]), rather than being at
158 // least as good.  However, this didn't happen when I tried to benchmark this,
159 // so I'll stick to the function that returns the right type (except when
160 // rawmemchr itself has to be emulated).
strnul(const char * str)161 HEADER_INLINE CXXCONST_CP strnul(const char* str) {
162   return S_CAST(CXXCONST_CP, rawmemchr(str, 0));
163 }
164 
165 #  ifdef __cplusplus
strnul(char * str)166 HEADER_INLINE char* strnul(char* str) {
167   return const_cast<char*>(strnul(const_cast<const char*>(str)));
168 }
169 #  endif
170 
171 #else  // !_GNU_SOURCE
172 
173 #  ifdef __LP64__
174 CXXCONST_VOIDP rawmemchr(const void* ss, int cc);
175 
176 HEADER_INLINE CXXCONST_CP strnul(const char* str) {
177   return S_CAST(CXXCONST_CP, rawmemchr(str, 0));
178 }
179 #  else  // !_GNU_SOURCE, !__LP64__
180 HEADER_INLINE CXXCONST_VOIDP rawmemchr(const void* ss, int cc) {
181   return S_CAST(CXXCONST_VOIDP, memchr(ss, cc, 0x80000000U - kBytesPerVec));
182 }
183 
184 HEADER_INLINE CXXCONST_CP strnul(const char* str) {
185   return S_CAST(CXXCONST_CP, &(str[strlen(str)]));
186 }
187 #  endif
188 
189 #  ifdef __cplusplus
190 HEADER_INLINE void* rawmemchr(void* ss, int cc) {
191   return const_cast<void*>(rawmemchr(const_cast<const void*>(ss), cc));
192 }
193 
194 HEADER_INLINE char* strnul(char* str) {
195   return const_cast<char*>(strnul(const_cast<const char*>(str)));
196 }
197 #  endif
198 
199 #endif  // !_GNU_SOURCE
200 
201 // See also AdvToDelimOrEnd later below, which is an obvious alternative memchr
202 // interface.
203 
204 // ReadLineStream emits lines which are *not* null-terminated, but are
205 // guaranteed to have trailing '\n's.
206 CXXCONST_VOIDP rawmemchr2(const void* ss, unsigned char ucc1, unsigned char ucc2);
207 
strchrnul_n(const char * ss,unsigned char ucc1)208 HEADER_INLINE CXXCONST_CP strchrnul_n(const char* ss, unsigned char ucc1) {
209   return S_CAST(CXXCONST_CP, rawmemchr2(ss, ucc1, '\n'));
210 }
211 
212 CXXCONST_VOIDP rawmemchr3(const void* ss, unsigned char ucc1, unsigned char ucc2, unsigned char ucc3);
213 
strchrnul2(const char * ss,unsigned char ucc1,unsigned char ucc2)214 HEADER_INLINE CXXCONST_CP strchrnul2(const char* ss, unsigned char ucc1, unsigned char ucc2) {
215   return S_CAST(CXXCONST_CP, rawmemchr3(ss, ucc1, ucc2, '\0'));
216 }
217 
strchrnul2_n(const char * ss,unsigned char ucc1,unsigned char ucc2)218 HEADER_INLINE CXXCONST_CP strchrnul2_n(const char* ss, unsigned char ucc1, unsigned char ucc2) {
219   return S_CAST(CXXCONST_CP, rawmemchr3(ss, ucc1, ucc2, '\n'));
220 }
221 
222 CXXCONST_CP strchrnul3(const char* ss, unsigned char ucc1, unsigned char ucc2, unsigned char ucc3);
223 
224 #ifdef __cplusplus
rawmemchr2(void * ss,unsigned char ucc1,unsigned char ucc2)225 HEADER_INLINE void* rawmemchr2(void* ss, unsigned char ucc1, unsigned char ucc2) {
226   return const_cast<void*>(rawmemchr2(const_cast<const void*>(ss), ucc1, ucc2));
227 }
228 
strchrnul_n(char * ss,unsigned char ucc1)229 HEADER_INLINE char* strchrnul_n(char* ss, unsigned char ucc1) {
230   return const_cast<char*>(strchrnul_n(const_cast<const char*>(ss), ucc1));
231 }
232 
rawmemchr3(void * ss,unsigned char ucc1,unsigned char ucc2,unsigned char ucc3)233 HEADER_INLINE void* rawmemchr3(void* ss, unsigned char ucc1, unsigned char ucc2, unsigned char ucc3) {
234   return const_cast<void*>(rawmemchr3(const_cast<const void*>(ss), ucc1, ucc2, ucc3));
235 }
236 
strchrnul2(char * ss,unsigned char ucc1,unsigned char ucc2)237 HEADER_INLINE char* strchrnul2(char* ss, unsigned char ucc1, unsigned char ucc2) {
238   return const_cast<char*>(strchrnul2(const_cast<const char*>(ss), ucc1, ucc2));
239 }
240 
strchrnul2_n(char * ss,unsigned char ucc1,unsigned char ucc2)241 HEADER_INLINE char* strchrnul2_n(char* ss, unsigned char ucc1, unsigned char ucc2) {
242   return const_cast<char*>(strchrnul2_n(const_cast<const char*>(ss), ucc1, ucc2));
243 }
244 
strchrnul3(char * ss,unsigned char ucc1,unsigned char ucc2,unsigned char ucc3)245 HEADER_INLINE char* strchrnul3(char* ss, unsigned char ucc1, unsigned char ucc2, unsigned char ucc3) {
246   return const_cast<char*>(strchrnul3(const_cast<const char*>(ss), ucc1, ucc2, ucc3));
247 }
248 #endif
249 
250 #ifndef _GNU_SOURCE
251 #  ifdef __LP64__
strchrnul(const char * str,int needle)252 HEADER_INLINE CXXCONST_CP strchrnul(const char* str, int needle) {
253   return S_CAST(CXXCONST_CP, rawmemchr2(str, 0, needle));
254 }
255 #  else
strchrnul(const char * str,int cc)256 HEADER_INLINE CXXCONST_CP strchrnul(const char* str, int cc) {
257   const char* strchr_result = strchr(str, cc);
258   if (strchr_result) {
259     return S_CAST(CXXCONST_CP, strchr_result);
260   }
261   return S_CAST(CXXCONST_CP, strnul(str));
262 }
263 #  endif
264 
265 #  ifdef __cplusplus
strchrnul(char * ss,int needle)266 HEADER_INLINE char* strchrnul(char* ss, int needle) {
267   return const_cast<char*>(strchrnul(const_cast<const char*>(ss), needle));
268 }
269 #  endif
270 #endif
271 
272 // These return 1 at eoln.
strchrnul_n_mov(unsigned char ucc1,const char ** ss_ptr)273 HEADER_INLINE uint32_t strchrnul_n_mov(unsigned char ucc1, const char** ss_ptr) {
274   const char* ss_next = strchrnul_n(*ss_ptr, ucc1);
275   *ss_ptr = ss_next;
276   return (*ss_next != ucc1);
277 }
278 
incr_strchrnul_n_mov(unsigned char ucc1,const char ** ss_ptr)279 HEADER_INLINE uint32_t incr_strchrnul_n_mov(unsigned char ucc1, const char** ss_ptr) {
280   const char* ss_next = strchrnul_n(&((*ss_ptr)[1]), ucc1);
281   *ss_ptr = ss_next;
282   return (*ss_next != ucc1);
283 }
284 
285 // input for WordWrap should have no intermediate '\n's.  If suffix_len is 0,
286 // there should be a terminating \n.
287 void WordWrap(uint32_t suffix_len, char* strbuf);
288 
289 uint32_t UintSlen(uint32_t num);
290 
IntSlen(int32_t num)291 HEADER_INLINE uint32_t IntSlen(int32_t num) {
292   // see abs_i32()
293   const uint32_t neg_sign_bit = -(S_CAST(uint32_t, num) >> 31);
294   return UintSlen((S_CAST(uint32_t, num) ^ neg_sign_bit) - neg_sign_bit) - neg_sign_bit;
295 }
296 
297 // memcpya(), memseta() defined in plink2_base.h
298 
memcpyax(void * __restrict dst,const void * __restrict src,uint32_t ct,char extra_char)299 HEADER_INLINE char* memcpyax(void* __restrict dst, const void* __restrict src, uint32_t ct, char extra_char) {
300   memcpy(dst, src, ct);
301   S_CAST(char*, dst)[ct] = extra_char;
302   return &(S_CAST(char*, dst)[ct + 1]);
303 }
304 
memcpyx(void * __restrict dst,const void * __restrict src,uint32_t ct,char extra_char)305 HEADER_INLINE void memcpyx(void* __restrict dst, const void* __restrict src, uint32_t ct, char extra_char) {
306   memcpy(dst, src, ct);
307   S_CAST(char*, dst)[ct] = extra_char;
308 }
309 
strcpya(char * __restrict dst,const void * __restrict src)310 HEADER_INLINE char* strcpya(char* __restrict dst, const void* __restrict src) {
311   const uintptr_t slen = strlen(S_CAST(const char*, src));
312   return memcpya(dst, src, slen);
313 }
314 
strcpyax(char * __restrict dst,const void * __restrict src,char extra_char)315 HEADER_INLINE char* strcpyax(char* __restrict dst, const void* __restrict src, char extra_char) {
316   const uintptr_t slen = strlen(S_CAST(const char*, src));
317   memcpy(dst, src, slen);
318   dst[slen] = extra_char;
319   return &(dst[slen + 1]);
320 }
321 
322 // MinGW support for stpcpy is a mess, so I'll use a capitalized name to route
323 // around the problem.
324 #if defined(_GNU_SOURCE) || defined(__APPLE__) || (_POSIX_C_SOURCE >= 200809L)
Stpcpy(char * __restrict dst,const char * __restrict src)325 HEADER_INLINE char* Stpcpy(char* __restrict dst, const char* __restrict src) {
326   return stpcpy(dst, src);
327 }
328 #else
Stpcpy(char * __restrict dst,const char * __restrict src)329 HEADER_INLINE char* Stpcpy(char* __restrict dst, const char* __restrict src) {
330   uintptr_t slen = strlen(src);
331   memcpy(dst, src, slen + 1);
332   return &(dst[slen]);
333 }
334 #endif
335 
336 #if defined(__LP64__) && (__cplusplus >= 201103L)
MemcpyaxK(void * __restrict dst,const void * __restrict src,char extra_char)337 template <uint32_t N> char* MemcpyaxK(void* __restrict dst, const void* __restrict src, char extra_char) {
338   memcpyo_k(dst, src, N);
339   S_CAST(char*, dst)[N] = extra_char;
340   return &(S_CAST(char*, dst)[N + 1]);
341 }
342 
343 #  define memcpyax_k(dst, src, ct, extra_char) plink2::MemcpyaxK<ct>(dst, src, extra_char)
344 
StrequalK(const char * s1,const char * k_s2,uint32_t s1_slen)345 template <uint32_t N> int32_t StrequalK(const char* s1, const char* k_s2, uint32_t s1_slen) {
346   return (s1_slen == N) && memequal_k(s1, k_s2, N);
347 };
348 
CompileTimeSlen(const char * k_str)349 constexpr uint32_t CompileTimeSlen(const char* k_str) {
350   return k_str[0]? (1 + CompileTimeSlen(&(k_str[1]))) : 0;
351 }
352 
353 // can also use sizeof(k_s2) - 1, but that's less safe
354 #  define strequal_k(s1, k_s2, s1_slen) plink2::StrequalK<plink2::CompileTimeSlen(k_s2)>(s1, k_s2, s1_slen)
355 
356 #  define strequal_k_unsafe(s1, k_s2) memequal_k(s1, k_s2, 1 + plink2::CompileTimeSlen(k_s2))
357 
358 #  define strcpy_k(dst, src) plink2::MemcpyKImpl<plink2::CompileTimeSlen(src) + 1>::MemcpyK(dst, src);
359 
360 #  define strcpya_k(dst, src) plink2::MemcpyaoK<plink2::CompileTimeSlen(src)>(dst, src);
361 #else  // !(defined(__LP64__) && (__cplusplus >= 201103L))
memcpyax_k(void * __restrict dst,const void * __restrict src,uint32_t ct,char extra_char)362 HEADER_INLINE char* memcpyax_k(void* __restrict dst, const void* __restrict src, uint32_t ct, char extra_char) {
363   return memcpyax(dst, src, ct, extra_char);
364 }
365 
strequal_k(const char * s1,const char * k_s2,uint32_t s1_slen)366 HEADER_INLINE int32_t strequal_k(const char* s1, const char* k_s2, uint32_t s1_slen) {
367   // any sane compiler should compute s2_slen at compile-time if k_s2 is a
368   // constant string
369   const uint32_t s2_slen = strlen(k_s2);
370   return (s1_slen == s2_slen) && memequal(s1, k_s2, s2_slen);
371 }
372 
373 // Can use this when it's always safe to read first (1 + strlen(k_s2)) bytes of
374 // s1.
strequal_k_unsafe(const char * s1,const char * k_s2)375 HEADER_INLINE int32_t strequal_k_unsafe(const char* s1, const char* k_s2) {
376   const uint32_t s2_blen = 1 + strlen(k_s2);
377   return memequal(s1, k_s2, s2_blen);
378 }
379 
strcpy_k(char * __restrict dst,const void * __restrict src)380 HEADER_INLINE void strcpy_k(char* __restrict dst, const void* __restrict src) {
381   strcpy(dst, S_CAST(const char*, src));
382 }
383 
strcpya_k(char * __restrict dst,const void * __restrict src)384 HEADER_INLINE char* strcpya_k(char* __restrict dst, const void* __restrict src) {
385   return strcpya(dst, src);
386 }
387 #endif
388 
389 #if defined(__cplusplus)
390 #  if __cplusplus >= 201103L
isfinite_f(float fxx)391 HEADER_INLINE bool isfinite_f(float fxx) {
392   using namespace std;
393   return isfinite(fxx);
394 }
395 #  else
396 #    ifdef isfinite
397 #      define isfinite_f isfinite
398 #    else
isfinite_f(float fxx)399 HEADER_INLINE bool isfinite_f(float fxx) {
400   return (fxx == fxx) && (fxx != INFINITY) && (fxx != -INFINITY);
401 }
402 #    endif
403 #  endif
404 #else
405 #  define isfinite_f isfinite
406 #endif
407 
IsSpaceOrEoln(unsigned char ucc)408 HEADER_INLINE int32_t IsSpaceOrEoln(unsigned char ucc) {
409   return (ucc <= 32);
410 }
411 
412 // Assumes it's safe to read first 1 + strlen(s_const) bytes of s_read, i.e.
413 // this is ALWAYS 'unsafe'.
414 // Differs from strequal_k_unsafe() since strings are not considered equal when
415 // s_read[strlen(s_const)] isn't a token-ender.
tokequal_k(const char * s_read,const char * s_const)416 HEADER_INLINE int32_t tokequal_k(const char* s_read, const char* s_const) {
417   const uint32_t s_const_slen = strlen(s_const);
418   return memequal(s_read, s_const, s_const_slen) && IsSpaceOrEoln(s_read[s_const_slen]);
419 }
420 
421 // s_prefix must be strictly contained.
StrStartsWith(const char * s_read,const char * s_prefix_const,uint32_t s_read_slen)422 HEADER_INLINE int32_t StrStartsWith(const char* s_read, const char* s_prefix_const, uint32_t s_read_slen) {
423   const uint32_t s_const_slen = strlen(s_prefix_const);
424   return (s_read_slen > s_const_slen) && memequal(s_read, s_prefix_const, s_const_slen);
425 }
426 
427 // permits s_read and s_prefix to be equal.
StrStartsWith0(const char * s_read,const char * s_prefix_const,uint32_t s_read_slen)428 HEADER_INLINE int32_t StrStartsWith0(const char* s_read, const char* s_prefix_const, uint32_t s_read_slen) {
429   const uint32_t s_const_slen = strlen(s_prefix_const);
430   return (s_read_slen >= s_const_slen) && memequal(s_read, s_prefix_const, s_const_slen);
431 }
432 
433 // Can use this when it's always safe to read first strlen(s_prefix_const)
434 // bytes of s_read.
StrStartsWithUnsafe(const char * s_read,const char * s_prefix_const)435 HEADER_INLINE int32_t StrStartsWithUnsafe(const char* s_read, const char* s_prefix_const) {
436   const uint32_t s_const_slen = strlen(s_prefix_const);
437   return memequal(s_read, s_prefix_const, s_const_slen);
438 }
439 
440 // s_suffix must be strictly contained.
StrEndsWith(const char * s_read,const char * s_suffix_const,uint32_t s_read_slen)441 HEADER_INLINE int32_t StrEndsWith(const char* s_read, const char* s_suffix_const, uint32_t s_read_slen) {
442   const uint32_t s_const_slen = strlen(s_suffix_const);
443   return (s_read_slen > s_const_slen) && memequal(&(s_read[s_read_slen - s_const_slen]), s_suffix_const, s_const_slen);
444 }
445 
446 // These are likely to be revised to take const void* parameters, and moved to
447 // plink2_base.
448 // This requires len >= 4.
449 uintptr_t FirstUnequal4(const char* s1, const char* s2, uintptr_t slen);
450 
FirstUnequal(const char * s1,const char * s2,uintptr_t slen)451 HEADER_INLINE uintptr_t FirstUnequal(const char* s1, const char* s2, uintptr_t slen) {
452   // Returns position of first mismatch, or slen if none was found.
453   if (slen >= 4) {
454     return FirstUnequal4(s1, s2, slen);
455   }
456   for (uintptr_t pos = 0; pos != slen; ++pos) {
457     if (s1[pos] != s2[pos]) {
458       return pos;
459     }
460   }
461   return slen;
462 }
463 
464 // May read (kBytesPerWord - 1) bytes past the end of each string.
strequal_overread(const char * s1,const char * s2)465 HEADER_INLINE int32_t strequal_overread(const char* s1, const char* s2) {
466   const uintptr_t* s1_alias = R_CAST(const uintptr_t*, s1);
467   const uintptr_t* s2_alias = R_CAST(const uintptr_t*, s2);
468   for (uintptr_t widx = 0; ; ++widx) {
469     const uintptr_t w1 = s1_alias[widx];
470     const uintptr_t zcheck = DetectFirstZeroByte(w1);
471     const uintptr_t w2 = s2_alias[widx];
472     const uintptr_t xor_word = w1 ^ w2;
473     if (zcheck) {
474       // Mask out bytes past the known null.
475       const uintptr_t mask = zcheck ^ (zcheck - 1);
476       return (xor_word & mask)? 0 : 1;
477     }
478     if (xor_word) {
479       return 0;
480     }
481   }
482 }
483 
484 int32_t strcmp_overread(const char* s1, const char* s2);
485 
486 // Support for sorting arrays of strings, represented either as an array of
487 // const char*s, or a single [# of strings] x [max byte width] array of chars
488 // (suitable for old-school qsort(), referred to as a 'strbox' here).
489 // The SortStrboxIndexed functions automatically construct an array of const
490 // char*s and sort that when the max byte width is large.
491 int32_t strcmp_casted(const void* s1, const void* s2);
492 
493 int32_t strcmp_overread_casted(const void* s1, const void* s2);
494 
495 
496 int32_t strcmp_natural(const void* s1, const void* s2);
497 
498 
499 int32_t strcmp_deref(const void* s1, const void* s2);
500 
501 int32_t strcmp_overread_deref(const void* s1, const void* s2);
502 
503 int32_t strcmp_natural_deref(const void* s1, const void* s2);
504 
505 
506 int32_t strcmp_natural_uncasted(const char* s1, const char* s2);
507 
508 #ifdef __cplusplus
509 typedef struct Strbuf28UiStruct {
510   char strbuf[28];
511   uint32_t orig_idx;
512   bool operator<(const struct Strbuf28UiStruct& rhs) const {
513     return (strcmp_natural_uncasted(strbuf, rhs.strbuf) < 0);
514   }
515 } Strbuf28Ui;
516 
517 typedef struct Strbuf60UiStruct {
518   char strbuf[60];
519   uint32_t orig_idx;
520   bool operator<(const struct Strbuf60UiStruct& rhs) const {
521     return (strcmp_natural_uncasted(strbuf, rhs.strbuf) < 0);
522   }
523 } Strbuf60Ui;
524 #endif
525 
526 uintptr_t GetStrboxsortWentryBlen(uintptr_t max_str_blen);
527 
528 #ifdef __cplusplus
529 typedef struct StrSortDerefStruct {
530   const char* strptr;
531 
532   bool operator<(const struct StrSortDerefStruct& rhs) const {
533     return (strcmp(strptr, rhs.strptr) < 0);
534   }
535 } StrSortDeref;
536 
strcmp_overread_lt(const char * s1,const char * s2)537 HEADER_INLINE bool strcmp_overread_lt(const char* s1, const char* s2) {
538   const uintptr_t* s1_alias = R_CAST(const uintptr_t*, s1);
539   const uintptr_t* s2_alias = R_CAST(const uintptr_t*, s2);
540   for (uintptr_t widx = 0; ; ++widx) {
541     uintptr_t w1 = s1_alias[widx];
542     const uintptr_t zcheck = DetectFirstZeroByte(w1);
543     uintptr_t w2 = s2_alias[widx];
544     if (zcheck) {
545       // Mask out bytes past the known null.
546       // Note that we can't safely include the garbage bytes past the null in
547       // the comparison even if they aren't being changed, because they may be
548       // uninitialized, and if they're also past a page boundary the OS may
549       // return different values for consecutive queries on the same address!
550       // See e.g. "CppCon 2016: Nicholas Ormrod 'The strange details of
551       // std::string at Facebook'"
552       // (https://www.youtube.com/watch?v=kPR8h4-qZdk ) starting at ~22:15.
553       const uintptr_t mask = zcheck ^ (zcheck - 1);
554       w1 &= mask;
555       w2 &= mask;
556       if (w1 == w2) {
557         return false;
558       }
559       goto strcmp_overread_lt_finish;
560     }
561     if (w1 != w2) {
562     strcmp_overread_lt_finish:
563 #  ifdef __LP64__
564       return __builtin_bswap64(w1) < __builtin_bswap64(w2);
565 #  else
566       return __builtin_bswap32(w1) < __builtin_bswap32(w2);
567 #  endif
568     }
569   }
570 }
571 
572 typedef struct StrSortDerefOverreadStruct {
573   // Must be safe to read up to (kBytesPerWord - 1) bytes past the end of these
574   // strings.  Enough of a speed advantage to be worth using whenever possible,
575   // though.
576   const char* strptr;
577 
578   bool operator<(const struct StrSortDerefOverreadStruct& rhs) const {
579     return strcmp_overread_lt(strptr, rhs.strptr);
580   }
581 } StrSortDerefOverread;
582 
583 typedef struct StrNsortDerefStruct {
584   const char* strptr;
585   bool operator<(const struct StrNsortDerefStruct& rhs) const {
586     return (strcmp_natural(strptr, rhs.strptr) < 0);
587   }
588 } StrNsortDeref;
589 
590 typedef struct StrNsortIndexedDerefStruct {
591   const char* strptr;
592   uint32_t orig_idx;
593   bool operator<(const struct StrNsortIndexedDerefStruct& rhs) const {
594     return (strcmp_natural_uncasted(strptr, rhs.strptr) < 0);
595   }
596 } StrNsortIndexedDeref;
597 
StrptrArrSort(uintptr_t ct,const char ** strptr_arr)598 HEADER_INLINE void StrptrArrSort(uintptr_t ct, const char** strptr_arr) {
599   std::sort(R_CAST(StrSortDeref*, strptr_arr), &(R_CAST(StrSortDeref*, strptr_arr)[ct]));
600 }
601 
StrptrArrSortOverread(uintptr_t ct,const char ** strptr_arr)602 HEADER_INLINE void StrptrArrSortOverread(uintptr_t ct, const char** strptr_arr) {
603   std::sort(R_CAST(StrSortDerefOverread*, strptr_arr), &(R_CAST(StrSortDerefOverread*, strptr_arr)[ct]));
604 }
605 
StrptrArrNsort(uintptr_t ct,const char ** strptr_arr)606 HEADER_INLINE void StrptrArrNsort(uintptr_t ct, const char** strptr_arr) {
607   std::sort(R_CAST(StrNsortDeref*, strptr_arr), &(R_CAST(StrNsortDeref*, strptr_arr)[ct]));
608 }
609 
610 // need to expose these for plink2_cmdline bigstack-allocating
611 // SortStrboxIndexed()'s use
612 void SortStrbox32bFinish(uintptr_t str_ct, uintptr_t max_str_blen, uint32_t use_nsort, Strbuf28Ui* filled_wkspace, char* sorted_strbox, uint32_t* id_map);
613 
614 void SortStrbox64bFinish(uintptr_t str_ct, uintptr_t max_str_blen, uint32_t use_nsort, Strbuf60Ui* filled_wkspace, char* sorted_strbox, uint32_t* id_map);
615 
616 // Must be ok to overread.
617 void SortStrboxIndexed2(uintptr_t str_ct, uintptr_t max_str_blen, uint32_t use_nsort, char* strbox, uint32_t* id_map, void* sort_wkspace);
618 #else  // !__cplusplus
StrptrArrSort(uintptr_t ct,const char ** strptr_arr)619 HEADER_INLINE void StrptrArrSort(uintptr_t ct, const char** strptr_arr) {
620   qsort(strptr_arr, ct, sizeof(intptr_t), strcmp_deref);
621 }
622 
StrptrArrSortOverread(uintptr_t ct,const char ** strptr_arr)623 HEADER_INLINE void StrptrArrSortOverread(uintptr_t ct, const char** strptr_arr) {
624   qsort(strptr_arr, ct, sizeof(intptr_t), strcmp_overread_deref);
625 }
626 
StrptrArrNsort(uintptr_t ct,const char ** strptr_arr)627 HEADER_INLINE void StrptrArrNsort(uintptr_t ct, const char** strptr_arr) {
628   qsort(strptr_arr, ct, sizeof(intptr_t), strcmp_natural_deref);
629 }
630 
631 // Must be ok to overread.
632 void SortStrboxIndexed2Fallback(uintptr_t str_ct, uintptr_t max_str_blen, uint32_t use_nsort, char* strbox, uint32_t* id_map, void* sort_wkspace);
633 
SortStrboxIndexed2(uintptr_t str_ct,uintptr_t max_str_blen,uint32_t use_nsort,char * strbox,uint32_t * id_map,void * sort_wkspace)634 HEADER_INLINE void SortStrboxIndexed2(uintptr_t str_ct, uintptr_t max_str_blen, uint32_t use_nsort, char* strbox, uint32_t* id_map, void* sort_wkspace) {
635   SortStrboxIndexed2Fallback(str_ct, max_str_blen, use_nsort, strbox, id_map, sort_wkspace);
636 }
637 #endif
638 
639 // Uses malloc instead of bigstack.
640 // Must be ok to overread strbox.
641 BoolErr SortStrboxIndexedMalloc(uintptr_t str_ct, uintptr_t max_str_blen, char* strbox, uint32_t* id_map);
642 
643 // Returns dedup'd strbox entry count.
644 uint32_t CopyAndDedupSortedStrptrsToStrbox(const char* const* sorted_strptrs, uintptr_t str_ct, uintptr_t max_str_blen, char* strbox);
645 
646 // note that this can be expected to have size 16 bytes, not 12, on 64-bit
647 // systems
648 typedef struct StrSortIndexedDerefStruct {
649   const char* strptr;
650   uint32_t orig_idx;
651 #ifdef __cplusplus
652   bool operator<(const struct StrSortIndexedDerefStruct& rhs) const {
653     return (strcmp(strptr, rhs.strptr) < 0);
654   }
655 #endif
656 } StrSortIndexedDeref;
657 
658 typedef struct StrSortIndexedDerefOverreadStruct {
659   // must be safe to read up to (kBytesPerWord - 1) bytes past the end of these
660   // strings.
661   const char* strptr;
662 
663   uint32_t orig_idx;
664 #ifdef __cplusplus
665   bool operator<(const struct StrSortIndexedDerefOverreadStruct& rhs) const {
666     return strcmp_overread_lt(strptr, rhs.strptr);
667   }
668 #endif
669 } StrSortIndexedDerefOverread;
670 
671 void StrptrArrSortMain(uintptr_t str_ct, uint32_t overread_ok, uint32_t use_nsort, StrSortIndexedDeref* wkspace_alias);
672 
673 
IsLetter(unsigned char ucc)674 HEADER_INLINE int32_t IsLetter(unsigned char ucc) {
675   return (((ucc & 192) == 64) && (((ucc - 1) & 31) < 26));
676 }
677 
678 // if we need the digit value, better to use (unsigned char)cc - '0'...
IsDigit(unsigned char ucc)679 HEADER_INLINE int32_t IsDigit(unsigned char ucc) {
680   return (ucc <= '9') && (ucc >= '0');
681 }
682 
IsNotDigit(unsigned char ucc)683 HEADER_INLINE int32_t IsNotDigit(unsigned char ucc) {
684   return (ucc > '9') || (ucc < '0');
685 }
686 
IsNotNzdigit(unsigned char ucc)687 HEADER_INLINE int32_t IsNotNzdigit(unsigned char ucc) {
688   return (ucc > '9') || (ucc <= '0');
689 }
690 
691 // May as well treat all chars < 32, except tab, as eoln...
692 // kns = "known non-space" (where tab counts as a space)
693 // This is of course identical to IsSpaceOrEoln(), but intent should be
694 // clearer and we can insert a debug-assert that we aren't at a space/tab.
IsEolnKns(unsigned char ucc)695 HEADER_INLINE int32_t IsEolnKns(unsigned char ucc) {
696   // could assert ucc is not a space/tab?
697   return (ucc <= 32);
698 }
699 
IsEolnOrCommentKns(unsigned char ucc)700 HEADER_INLINE int32_t IsEolnOrCommentKns(unsigned char ucc) {
701   return (ucc < 32) || (ucc == '#');
702 }
703 
NoMoreTokensKns(const char * str)704 HEADER_INLINE int32_t NoMoreTokensKns(const char* str) {
705   return ((!str) || IsEolnKns(*str));
706 }
707 
FirstNonChar(const char * str_iter,char cc)708 HEADER_INLINE CXXCONST_CP FirstNonChar(const char* str_iter, char cc) {
709   while (*str_iter == cc) {
710     ++str_iter;
711   }
712   return S_CAST(CXXCONST_CP, str_iter);
713 }
714 
715 #ifdef __cplusplus
FirstNonChar(char * str_iter,char cc)716 HEADER_INLINE char* FirstNonChar(char* str_iter, char cc) {
717   return const_cast<char*>(FirstNonChar(const_cast<const char*>(str_iter), cc));
718 }
719 #endif
720 
FirstNonTspace(const char * str_iter)721 HEADER_INLINE CXXCONST_CP FirstNonTspace(const char* str_iter) {
722   while ((*str_iter == ' ') || (*str_iter == '\t')) {
723     ++str_iter;
724   }
725   return S_CAST(CXXCONST_CP, str_iter);
726 }
727 
728 #ifdef __cplusplus
FirstNonTspace(char * str_iter)729 HEADER_INLINE char* FirstNonTspace(char* str_iter) {
730   return const_cast<char*>(FirstNonTspace(const_cast<const char*>(str_iter)));
731 }
732 #endif
733 
FirstPostspaceBounded(const char * str_iter,const char * str_end)734 HEADER_INLINE CXXCONST_CP FirstPostspaceBounded(const char* str_iter, const char* str_end) {
735   for (; str_iter != str_end; ++str_iter) {
736     if (ctou32(*str_iter) > ' ') {
737       break;
738     }
739   }
740   return S_CAST(CXXCONST_CP, str_iter);
741 }
742 
743 #ifdef __cplusplus
FirstPostspaceBounded(char * str_iter,char * str_end)744 HEADER_INLINE char* FirstPostspaceBounded(char* str_iter, char* str_end) {
745   return const_cast<char*>(FirstPostspaceBounded(const_cast<const char*>(str_iter), const_cast<const char*>(str_end)));
746 }
747 #endif
748 
749 
750 // See also (93) on TAOCP vol 4a, pp. 153.  Todo: benchmark a
751 // FirstPrecharUnsafe() function which uses that (unsafe because it reads up to
752 // 7 characters past buffer end).
FirstPrechar(const char * str_iter,uint32_t char_code)753 HEADER_INLINE CXXCONST_CP FirstPrechar(const char* str_iter, uint32_t char_code) {
754   while (ctou32(*str_iter) >= char_code) {
755     ++str_iter;
756   }
757   return S_CAST(CXXCONST_CP, str_iter);
758 }
759 
760 // It is worth distinguishing between incremental parsing functions where it's
761 // rarely necessary to scan more than 40 characters or so, and scans which are
762 // likely to be long-range.  The former is frequently best handled with a
763 // simple loop even when AVX2 movemask is available; it may not even be worth
764 // inserting a conditional to select between the two when length is known.  The
765 // latter benefits greatly from movemask.
766 //
767 // The following standard library and plink2-library scanning functions can be
768 // trusted to use movemask:
769 //   strlen, strchr, memchr
770 //   rawmemchr, strchrnul
771 //   rawmemchr2, rawmemchr3, strnul, strchrnul_n, strchrnul2, strchrnul3,
772 //     strchrnul_n_mov, incr_strchrnul_n_mov
773 //   NextTokenMultFar
774 //   AdvToNthDelimChecked, AdvToNthDelim, AdvToDelimOrEnd, Memrchr,
775 //     LastSpaceOrEoln
776 
777 /*
778 #ifdef __LP64__
779 // Requires char_code <= 128.
780 CXXCONST_CP FirstPrecharFar(const char* str_iter, uint32_t char_code);
781 #else
782 HEADER_INLINE CXXCONST_CP FirstPrecharFar(const char* str_iter, uint32_t char_code) {
783   return FirstPrechar(str_iter, char_code);
784 }
785 #endif
786 */
787 
FirstPrespace(const char * str_iter)788 HEADER_INLINE CXXCONST_CP FirstPrespace(const char* str_iter) {
789   return S_CAST(CXXCONST_CP, FirstPrechar(str_iter, ' '));
790 }
791 
NextPrespace(const char * str_iter)792 HEADER_INLINE CXXCONST_CP NextPrespace(const char* str_iter) {
793   return S_CAST(CXXCONST_CP, FirstPrechar(&(str_iter[1]), ' '));
794 }
795 
FirstSpaceOrEoln(const char * str_iter)796 HEADER_INLINE CXXCONST_CP FirstSpaceOrEoln(const char* str_iter) {
797   return S_CAST(CXXCONST_CP, FirstPrechar(str_iter, 33));
798 }
799 
800 // assumes we are currently in a token -- UNSAFE OTHERWISE
CurTokenEnd(const char * str_iter)801 HEADER_INLINE CXXCONST_CP CurTokenEnd(const char* str_iter) {
802   // assert(ctou32(*str_iter) > 32);
803   return S_CAST(CXXCONST_CP, FirstPrechar(&(str_iter[1]), 33));
804 }
805 
806 #ifdef __cplusplus
FirstPrechar(char * str_iter,uint32_t char_code)807 HEADER_INLINE char* FirstPrechar(char* str_iter, uint32_t char_code) {
808   return const_cast<char*>(FirstPrechar(const_cast<const char*>(str_iter), char_code));
809 }
810 
811 /*
812 HEADER_INLINE char* FirstPrecharFar(char* str_iter, uint32_t char_code) {
813   return const_cast<char*>(FirstPrecharFar(const_cast<const char*>(str_iter), char_code));
814 }
815 */
816 
FirstPrespace(char * str_iter)817 HEADER_INLINE char* FirstPrespace(char* str_iter) {
818   return const_cast<char*>(FirstPrespace(const_cast<const char*>(str_iter)));
819 }
820 
NextPrespace(char * str_iter)821 HEADER_INLINE char* NextPrespace(char* str_iter) {
822   return const_cast<char*>(NextPrespace(const_cast<const char*>(str_iter)));
823 }
824 
FirstSpaceOrEoln(char * str_iter)825 HEADER_INLINE char* FirstSpaceOrEoln(char* str_iter) {
826   return const_cast<char*>(FirstSpaceOrEoln(const_cast<const char*>(str_iter)));
827 }
828 
CurTokenEnd(char * str_iter)829 HEADER_INLINE char* CurTokenEnd(char* str_iter) {
830   return const_cast<char*>(CurTokenEnd(const_cast<const char*>(str_iter)));
831 }
832 #endif
833 
CsvFieldEnd(const char * token_iter)834 HEADER_INLINE CXXCONST_CP CsvFieldEnd(const char* token_iter) {
835   unsigned char ucc = *token_iter;
836   while ((ucc >= ' ') && (ucc != ',')) {
837     ucc = *(++token_iter);
838   }
839   return S_CAST(CXXCONST_CP, token_iter);
840 }
841 
842 // length-zero tokens and non-leading spaces are permitted in the
843 // comma-delimiter case
CommaOrTspaceTokenEnd(const char * token_iter,uint32_t comma_delim)844 HEADER_INLINE CXXCONST_CP CommaOrTspaceTokenEnd(const char* token_iter, uint32_t comma_delim) {
845   if (comma_delim) {
846     return CsvFieldEnd(token_iter);
847   }
848   return S_CAST(CXXCONST_CP, CurTokenEnd(token_iter));
849 }
850 
851 #ifdef __cplusplus
CsvFieldEnd(char * token_iter)852 HEADER_INLINE char* CsvFieldEnd(char* token_iter) {
853   return const_cast<char*>(CsvFieldEnd(const_cast<const char*>(token_iter)));
854 }
855 
CommaOrTspaceTokenEnd(char * token_iter,uint32_t comma_delim)856 HEADER_INLINE char* CommaOrTspaceTokenEnd(char* token_iter, uint32_t comma_delim) {
857   return const_cast<char*>(CommaOrTspaceTokenEnd(const_cast<const char*>(token_iter), comma_delim));
858 }
859 #endif
860 
CommaOrTspaceFirstToken(const char * token_end_iter,uint32_t comma_delim)861 HEADER_INLINE CXXCONST_CP CommaOrTspaceFirstToken(const char* token_end_iter, uint32_t comma_delim) {
862   // assumes token_end_iter is non-null, returns nullptr if there are no more
863   // tokens
864   // assert(token_end_iter);
865   if (comma_delim) {
866     if ((*token_end_iter) != ',') {
867       return nullptr;
868     }
869     return S_CAST(CXXCONST_CP, FirstNonTspace(&(token_end_iter[1])));
870   }
871   const char* str = FirstNonTspace(token_end_iter);
872   return IsEolnKns(*str)? nullptr : S_CAST(CXXCONST_CP, str);
873 }
874 
875 #ifdef __cplusplus
CommaOrTspaceFirstToken(char * token_end_iter,uint32_t comma_delim)876 HEADER_INLINE char* CommaOrTspaceFirstToken(char* token_end_iter, uint32_t comma_delim) {
877   return const_cast<char*>(CommaOrTspaceFirstToken(const_cast<const char*>(token_end_iter), comma_delim));
878 }
879 #endif
880 
881 
882 // Returns whether uppercased str_iter matches nonempty fixed_str.  Assumes
883 // fixed_str contains nothing but letters and a null terminator.
884 // uint32_t match_upper(const char* str_iter, const char* fixed_str);
885 
886 uint32_t MatchUpperCounted(const char* str, const char* fixed_str, uint32_t ct);
887 
MatchUpperKLen(const char * str,const char * fixed_str,uint32_t str_slen)888 HEADER_INLINE uint32_t MatchUpperKLen(const char* str, const char* fixed_str, uint32_t str_slen) {
889   const uint32_t fixed_slen = strlen(fixed_str);
890   if (str_slen != fixed_slen) {
891     return 0;
892   }
893   return MatchUpperCounted(str, fixed_str, fixed_slen);
894 }
895 
MatchUpperK(const char * str,const char * fixed_str)896 HEADER_INLINE uint32_t MatchUpperK(const char* str, const char* fixed_str) {
897   return MatchUpperCounted(str, fixed_str, strlen(fixed_str));
898 }
899 
900 uint32_t strcaseequal(const char* str1, const char* str2, uint32_t ct);
901 
902 /*
903 void str_toupper(char* str_iter);
904 
905 void buf_toupper(uint32_t slen, char* strbuf);
906 
907 void strcpy_toupper(char* target, const char* source);
908 
909 char* memcpya_toupper(char* __restrict target, const char* __restrict source, uint32_t slen);
910 */
911 
912 uint32_t IsAlphanumeric(const char* str_iter);
913 
914 // ScanPosintCapped(), ScanUintCapped(), ScanIntAbsBounded(), ScanInt32(),
915 // ScanPosintDefcap(), ScanUintDefcap(), ScanIntAbsDefcap(), ScanUintIcap() in
916 // plink2_base
917 
918 BoolErr ScanPosintptr(const char* str_iter, uintptr_t* valp);
919 
920 #ifdef __LP64__
921 BoolErr ScanmovPosintCapped(uint64_t cap, const char** str_iterp, uint32_t* valp);
922 
923 BoolErr ScanmovUintCapped(uint64_t cap, const char** str_iterp, uint32_t* valp);
924 
925 // 2^{-31} < -abs_floor <= 0 <= cap < 2^31
926 BoolErr ScanmovIntBounded(uint64_t abs_floor, uint64_t cap, const char** str_iterp, int32_t* valp);
927 #else
928 BoolErr ScanmovPosintCapped32(uint32_t cap_div_10, uint32_t cap_mod_10, const char** str_iterp, uint32_t* valp);
929 
930 BoolErr ScanmovUintCapped32(uint32_t cap_div_10, uint32_t cap_mod_10, const char** str_iterp, uint32_t* valp);
931 
932 BoolErr ScanmovIntBounded32(uint32_t abs_floor_div_10, uint32_t abs_floor_mod_10, uint32_t cap_div_10, uint32_t cap_mod_10, const char** str_iterp, int32_t* valp);
933 
ScanmovPosintCapped(uint32_t cap,const char ** str_iterp,uint32_t * valp)934 HEADER_INLINE BoolErr ScanmovPosintCapped(uint32_t cap, const char** str_iterp, uint32_t* valp) {
935  return ScanmovPosintCapped32(cap / 10, cap % 10, str_iterp, valp);
936 }
937 
ScanmovUintCapped(uint32_t cap,const char ** str_iterp,uint32_t * valp)938 HEADER_INLINE BoolErr ScanmovUintCapped(uint32_t cap, const char** str_iterp, uint32_t* valp) {
939  return ScanmovUintCapped32(cap / 10, cap % 10, str_iterp, valp);
940 }
941 
ScanmovIntBounded(uint32_t abs_floor,uint32_t cap,const char ** str_iterp,int32_t * valp)942 HEADER_INLINE BoolErr ScanmovIntBounded(uint32_t abs_floor, uint32_t cap, const char** str_iterp, int32_t* valp) {
943   return ScanmovIntBounded32(abs_floor / 10, abs_floor % 10, cap / 10, cap % 10, str_iterp, valp);
944 }
945 #endif
946 
ScanmovUintDefcap(const char ** str_iterp,uint32_t * valp)947 HEADER_INLINE BoolErr ScanmovUintDefcap(const char** str_iterp, uint32_t* valp) {
948   return ScanmovUintCapped(0x7ffffffe, str_iterp, valp);
949 }
950 
951 // This has different semantics from ScanmovPosintCapped, etc. since integer
952 // readers don't take much code (so it's fine to have a bunch of similar
953 // functions, optimized for slightly different use cases), but we only want one
954 // core floating point reader.
955 // (update, 3 Feb 2018: renamed the integer readers above to start with
956 // scanmov_ instead of scanadv_, to reflect the interface difference between
957 // returning a pointer and moving the input pointer forward.)
958 CXXCONST_CP ScanadvDouble(const char* str_iter, double* valp);
959 
960 // Thin wrapper that verifies the token ends with whitespace/eoln; should be
961 // used whenever comma/semicolon/etc. delimiters are not ok.
ScantokDouble(const char * str_iter,double * valp)962 HEADER_INLINE CXXCONST_CP ScantokDouble(const char* str_iter, double* valp) {
963   CXXCONST_CP parsed_end = ScanadvDouble(str_iter, valp);
964   if ((!parsed_end) || (!IsSpaceOrEoln(*parsed_end))) {
965     return nullptr;
966   }
967   return parsed_end;
968 }
969 
970 #ifdef __cplusplus
ScanadvDouble(char * str_iter,double * valp)971 HEADER_INLINE char* ScanadvDouble(char* str_iter, double* valp) {
972   return const_cast<char*>(ScanadvDouble(const_cast<const char*>(str_iter), valp));
973 }
974 
ScantokDouble(char * str_iter,double * valp)975 HEADER_INLINE char* ScantokDouble(char* str_iter, double* valp) {
976   return const_cast<char*>(ScantokDouble(const_cast<const char*>(str_iter), valp));
977 }
978 #endif
979 
980 // remove unlikely() if any caller ever tries to reparse the string as
981 // something else in error case (this is a valid ScanadvDouble() use case)
ScanFloat(const char * ss,float * valp)982 HEADER_INLINE BoolErr ScanFloat(const char* ss, float* valp) {
983   double dxx;
984   if (unlikely(!ScantokDouble(ss, &dxx))) {
985     return 1;
986   }
987   if (unlikely(fabs(dxx) > 3.4028235677973362e38)) {
988     return 1;
989   }
990   *valp = S_CAST(float, dxx);
991   return 0;
992 }
993 
994 CXXCONST_CP ScanadvLn(const char* str_iter, double* ln_ptr);
995 
ScantokLn(const char * str_iter,double * ln_ptr)996 HEADER_INLINE CXXCONST_CP ScantokLn(const char* str_iter, double* ln_ptr) {
997   CXXCONST_CP parsed_end = ScanadvLn(str_iter, ln_ptr);
998   if ((!parsed_end) || (!IsSpaceOrEoln(*parsed_end))) {
999     return nullptr;
1000   }
1001   return parsed_end;
1002 }
1003 
1004 #ifdef __cplusplus
ScanadvLn(char * str_iter,double * ln_ptr)1005 HEADER_INLINE char* ScanadvLn(char* str_iter, double* ln_ptr) {
1006   return const_cast<char*>(ScanadvLn(const_cast<const char*>(str_iter), ln_ptr));
1007 }
1008 
ScantokLn(char * str_iter,double * ln_ptr)1009 HEADER_INLINE char* ScantokLn(char* str_iter, double* ln_ptr) {
1010   return const_cast<char*>(ScantokLn(const_cast<const char*>(str_iter), ln_ptr));
1011 }
1012 #endif
1013 
1014 // These provide the same interface as ScanPosintCapped(), etc., but there are
1015 // two differences in behavior to make these more suitable for parsing of
1016 // command-line parameters:
1017 // - The strings are initially parsed as floating-point, and then (if an
1018 //   integer is expected) the function errors out if the result isn't an exact
1019 //   integer.  This allows exponential notation to be used.
1020 // - Unlike atoi()/ScanPosintCapped(), the function errors out if parsing stops
1021 //   at non-whitespace.
1022 // The performance cost of this behavior is relatively high: these functions
1023 // shouldn't be used for internal file-reading loops.
1024 BoolErr ScanPosintCappedx(const char* str_iter, uint64_t cap, uint32_t* valp);
1025 
1026 BoolErr ScanUintCappedx(const char* str_iter, uint64_t cap, uint32_t* valp);
1027 
1028 BoolErr ScanIntAbsBoundedx(const char* str_iter, int64_t bound, int32_t* valp);
1029 
ScanInt32x(const char * str,int32_t * valp)1030 HEADER_INLINE BoolErr ScanInt32x(const char* str, int32_t* valp) {
1031   return ScanIntAbsBoundedx(str, 0x7fffffff, valp);
1032 }
1033 
ScanPosintDefcapx(const char * str,uint32_t * valp)1034 HEADER_INLINE BoolErr ScanPosintDefcapx(const char* str, uint32_t* valp) {
1035   return ScanPosintCappedx(str, 0x7ffffffe, valp);
1036 }
1037 
ScanUintDefcapx(const char * str,uint32_t * valp)1038 HEADER_INLINE BoolErr ScanUintDefcapx(const char* str, uint32_t* valp) {
1039   return ScanUintCappedx(str, 0x7ffffffe, valp);
1040 }
1041 
1042 BoolErr ScanPosintptrx(const char* str_iter, uintptr_t* valp);
1043 
1044 
AppendBinaryEoln(char ** dst_ptr)1045 HEADER_INLINE void AppendBinaryEoln(char** dst_ptr) {
1046 #ifdef _WIN32
1047   (*dst_ptr)[0] = '\r';
1048   (*dst_ptr)[1] = '\n';
1049   *dst_ptr += 2;
1050 #else
1051   **dst_ptr = '\n';
1052   *dst_ptr += 1;
1053 #endif
1054 }
1055 
DecrAppendBinaryEoln(char ** dst_ptr)1056 HEADER_INLINE void DecrAppendBinaryEoln(char** dst_ptr) {
1057 #ifdef _WIN32
1058   (*dst_ptr)[-1] = '\r';
1059   (*dst_ptr)[0] = '\n';
1060   *dst_ptr += 1;
1061 #else
1062   (*dst_ptr)[-1] = '\n';
1063 #endif
1064 }
1065 
1066 void GetTopTwoUi(const uint32_t* __restrict uint_arr, uintptr_t uia_size, uintptr_t* __restrict top_idx_ptr, uintptr_t* __restrict second_idx_ptr);
1067 
1068 // safer than CurTokenEnd(), since it handles length zero
1069 // "se" = space/eoln treated as terminators
strlen_se(const char * ss)1070 HEADER_INLINE uintptr_t strlen_se(const char* ss) {
1071   const char* ss2 = ss;
1072   while (!IsSpaceOrEoln(*ss2)) {
1073     ss2++;
1074   }
1075   return ss2 - ss;
1076 }
1077 
1078 // just an alias for rawmemchr which doesn't require a subsequent static-cast.
AdvToDelim(const char * str_iter,char delim)1079 HEADER_INLINE CXXCONST_CP AdvToDelim(const char* str_iter, char delim) {
1080   return S_CAST(CXXCONST_CP, rawmemchr(str_iter, delim));
1081 }
1082 
1083 #ifdef __cplusplus
AdvToDelim(char * str_iter,char delim)1084 HEADER_INLINE char* AdvToDelim(char* str_iter, char delim) {
1085   return const_cast<char*>(AdvToDelim(const_cast<const char*>(str_iter), delim));
1086 }
1087 #endif
1088 
AdvPastDelim(const char * str_iter,char delim)1089 HEADER_INLINE CXXCONST_CP AdvPastDelim(const char* str_iter, char delim) {
1090   return &(AdvToDelim(str_iter, delim)[1]);
1091 }
1092 
1093 #ifdef __cplusplus
AdvPastDelim(char * str_iter,char delim)1094 HEADER_INLINE char* AdvPastDelim(char* str_iter, char delim) {
1095   return &(AdvToDelim(str_iter, delim)[1]);
1096 }
1097 #endif
1098 
1099 
1100 #ifdef __LP64__
1101 // This is a major VCF-parsing bottleneck, and inlining it makes a big
1102 // difference.
1103 
1104 // ct must be nonzero
AdvToNthDelimChecked(const char * str_iter,const char * str_end,uint32_t ct,char delim)1105 HEADER_INLINE CXXCONST_CP AdvToNthDelimChecked(const char* str_iter, const char* str_end, uint32_t ct, char delim) {
1106   const uintptr_t starting_addr = R_CAST(uintptr_t, str_iter);
1107   const uintptr_t ending_addr = R_CAST(uintptr_t, str_end);
1108   VecUc* str_viter = R_CAST(VecUc*, RoundDownPow2(starting_addr, kBytesPerVec));
1109   const VecUc vvec_all_delim = vecuc_set1(delim);
1110   VecUc cur_vvec = *str_viter;
1111   VecUc delim_vvec = (cur_vvec == vvec_all_delim);
1112   uint32_t delimiter_bytes = vecuc_movemask(delim_vvec);
1113   const uint32_t leading_byte_ct = starting_addr - R_CAST(uintptr_t, str_viter);
1114   const uint32_t leading_mask = UINT32_MAX << leading_byte_ct;
1115   delimiter_bytes &= leading_mask;
1116   for (uint32_t remaining_delim_ct = ct; ; ) {
1117     const uint32_t cur_delim_ct = PopcountVec8thUint(delimiter_bytes);
1118     if (cur_delim_ct >= remaining_delim_ct) {
1119       delimiter_bytes = ClearBottomSetBits(remaining_delim_ct - 1, delimiter_bytes);
1120       const uint32_t byte_offset_in_vec = ctzu32(delimiter_bytes);
1121       const uintptr_t result_addr = R_CAST(uintptr_t, str_viter) + byte_offset_in_vec;
1122       if (result_addr >= ending_addr) {
1123         return nullptr;
1124       }
1125       return R_CAST(CXXCONST_CP, result_addr);
1126     }
1127     remaining_delim_ct -= cur_delim_ct;
1128     ++str_viter;
1129     if (R_CAST(uintptr_t, str_viter) >= ending_addr) {
1130       return nullptr;
1131     }
1132     cur_vvec = *str_viter;
1133     delim_vvec = (cur_vvec == vvec_all_delim);
1134     delimiter_bytes = vecuc_movemask(delim_vvec);
1135   }
1136 }
1137 
AdvToNthDelim(const char * str_iter,uint32_t ct,char delim)1138 HEADER_INLINE CXXCONST_CP AdvToNthDelim(const char* str_iter, uint32_t ct, char delim) {
1139   const uintptr_t starting_addr = R_CAST(uintptr_t, str_iter);
1140   VecUc* str_viter = R_CAST(VecUc*, RoundDownPow2(starting_addr, kBytesPerVec));
1141   const VecUc vvec_all_delim = vecuc_set1(delim);
1142   VecUc cur_vvec = *str_viter;
1143   VecUc delim_vvec = (cur_vvec == vvec_all_delim);
1144   uint32_t delimiter_bytes = vecuc_movemask(delim_vvec);
1145   const uint32_t leading_byte_ct = starting_addr - R_CAST(uintptr_t, str_viter);
1146   const uint32_t leading_mask = UINT32_MAX << leading_byte_ct;
1147   delimiter_bytes &= leading_mask;
1148   for (uint32_t remaining_delim_ct = ct; ; ) {
1149     const uint32_t cur_delim_ct = PopcountVec8thUint(delimiter_bytes);
1150     if (cur_delim_ct >= remaining_delim_ct) {
1151       delimiter_bytes = ClearBottomSetBits(remaining_delim_ct - 1, delimiter_bytes);
1152       const uint32_t byte_offset_in_vec = ctzu32(delimiter_bytes);
1153       const uintptr_t result_addr = R_CAST(uintptr_t, str_viter) + byte_offset_in_vec;
1154       return R_CAST(CXXCONST_CP, result_addr);
1155     }
1156     remaining_delim_ct -= cur_delim_ct;
1157     ++str_viter;
1158     cur_vvec = *str_viter;
1159     delim_vvec = (cur_vvec == vvec_all_delim);
1160     delimiter_bytes = vecuc_movemask(delim_vvec);
1161   }
1162 }
1163 #else  // !__LP64__
AdvToNthDelimChecked(const char * str_iter,const char * str_end,uint32_t ct,char delim)1164 HEADER_INLINE CXXCONST_CP AdvToNthDelimChecked(const char* str_iter, const char* str_end, uint32_t ct, char delim) {
1165   for (uint32_t remaining_delim_ct = ct; ; ) {
1166     const char* next_delim = S_CAST(const char*, memchr(str_iter, delim, str_end - str_iter));
1167     if (!next_delim) {
1168       return nullptr;
1169     }
1170     if (!(--remaining_delim_ct)) {
1171       return S_CAST(CXXCONST_CP, next_delim);
1172     }
1173     str_iter = &(next_delim[1]);
1174   }
1175 }
1176 
AdvToNthDelim(const char * str_iter,uint32_t ct,char delim)1177 HEADER_INLINE CXXCONST_CP AdvToNthDelim(const char* str_iter, uint32_t ct, char delim) {
1178   for (uint32_t remaining_delim_ct = ct; ; ) {
1179     const char* next_delim = AdvToDelim(str_iter, delim);
1180     if (!(--remaining_delim_ct)) {
1181       return S_CAST(CXXCONST_CP, next_delim);
1182     }
1183     str_iter = &(next_delim[1]);
1184   }
1185 }
1186 #endif
1187 
1188 #ifdef __cplusplus
AdvToNthDelimChecked(char * str_iter,char * str_end,uint32_t ct,char delim)1189 HEADER_INLINE char* AdvToNthDelimChecked(char* str_iter, char* str_end, uint32_t ct, char delim) {
1190   return const_cast<char*>(AdvToNthDelimChecked(const_cast<const char*>(str_iter), const_cast<const char*>(str_end), ct, delim));
1191 }
1192 
AdvToNthDelim(char * str_iter,uint32_t ct,char delim)1193 HEADER_INLINE char* AdvToNthDelim(char* str_iter, uint32_t ct, char delim) {
1194   return const_cast<char*>(AdvToNthDelim(const_cast<const char*>(str_iter), ct, delim));
1195 }
1196 #endif
1197 
1198 // ok if str_iter is at end of current token
NextToken(const char * str_iter)1199 HEADER_INLINE CXXCONST_CP NextToken(const char* str_iter) {
1200   if (!str_iter) {
1201     return nullptr;
1202   }
1203   unsigned char ucc = *str_iter;
1204   while (ucc > ' ') {
1205     ucc = *(++str_iter);
1206   }
1207   while ((ucc == ' ') || (ucc == '\t')) {
1208     ucc = *(++str_iter);
1209   }
1210   return (ucc > 32)? S_CAST(CXXCONST_CP, str_iter) : nullptr;
1211 }
1212 
1213 #ifdef __cplusplus
NextToken(char * str_iter)1214 HEADER_INLINE char* NextToken(char* str_iter) {
1215   return const_cast<char*>(NextToken(const_cast<const char*>(str_iter)));
1216 }
1217 #endif
1218 
NextTokenMult(const char * str_iter,uint32_t ct)1219 HEADER_INLINE CXXCONST_CP NextTokenMult(const char* str_iter, uint32_t ct) {
1220   // assert(ct);
1221   if (!str_iter) {
1222     return nullptr;
1223   }
1224   unsigned char ucc = *str_iter;
1225   do {
1226     while (ucc > 32) {
1227       ucc = *(++str_iter);
1228     }
1229     while ((ucc == ' ') || (ucc == '\t')) {
1230       ucc = *(++str_iter);
1231     }
1232     if (ucc <= 32) {
1233       return nullptr;
1234     }
1235   } while (--ct);
1236   return S_CAST(CXXCONST_CP, str_iter);
1237 }
1238 
1239 #ifdef USE_AVX2
1240 // todo: determine minimum ct where this pays off.
1241 CXXCONST_CP NextTokenMultFar(const char* str_iter, uint32_t ct);
1242 #else
NextTokenMultFar(const char * str_iter,uint32_t ct)1243 HEADER_INLINE CXXCONST_CP NextTokenMultFar(const char* str_iter, uint32_t ct) {
1244   return NextTokenMult(str_iter, ct);
1245 }
1246 #endif
1247 
1248 #ifdef __cplusplus
NextTokenMult(char * str_iter,uint32_t ct)1249 HEADER_INLINE char* NextTokenMult(char* str_iter, uint32_t ct) {
1250   return const_cast<char*>(NextTokenMult(const_cast<const char*>(str_iter), ct));
1251 }
1252 
NextTokenMultFar(char * str_iter,uint32_t ct)1253 HEADER_INLINE char* NextTokenMultFar(char* str_iter, uint32_t ct) {
1254   return const_cast<char*>(NextTokenMultFar(const_cast<const char*>(str_iter), ct));
1255 }
1256 #endif
1257 
NextTokenMult0(const char * str,uint32_t ct)1258 HEADER_INLINE CXXCONST_CP NextTokenMult0(const char* str, uint32_t ct) {
1259   // tried replacing this with ternary operator, but that actually seemed to
1260   // slow things down a bit under gcc 4.2.1 (tail call optimization issue?).
1261   // todo: recheck this under newer gcc/clang.
1262   if (ct) {
1263     return S_CAST(CXXCONST_CP, NextTokenMult(str, ct));
1264   }
1265   return S_CAST(CXXCONST_CP, str);
1266 }
1267 
1268 #ifdef __cplusplus
NextTokenMult0(char * str,uint32_t ct)1269 HEADER_INLINE char* NextTokenMult0(char* str, uint32_t ct) {
1270   return const_cast<char*>(NextTokenMult0(const_cast<const char*>(str), ct));
1271 }
1272 #endif
1273 
1274 #ifdef USE_AVX2
1275 const char* TokenLexK0(const char* str_iter, const uint32_t* col_types, const uint32_t* col_skips, uint32_t relevant_col_ct, const char** token_ptrs, uint32_t* token_slens);
1276 
TokenLexK(const char * str_iter,const uint32_t * col_types,const uint32_t * col_skips,uint32_t relevant_col_ct,const char ** token_ptrs,uint32_t * token_slens)1277 HEADER_INLINE const char* TokenLexK(const char* str_iter, const uint32_t* col_types, const uint32_t* col_skips, uint32_t relevant_col_ct, const char** token_ptrs, uint32_t* token_slens) {
1278   return TokenLexK0(str_iter, col_types, col_skips, relevant_col_ct, token_ptrs, token_slens);
1279 }
1280 #else
1281 // assumes str_iter != nullptr
1282 // returns nullptr on missing token, otherwise returns pointer to end of last
1283 //   lexed token
TokenLexK(const char * str_iter,const uint32_t * col_types,const uint32_t * col_skips,uint32_t relevant_col_ct,const char ** token_ptrs,uint32_t * token_slens)1284 HEADER_INLINE const char* TokenLexK(const char* str_iter, const uint32_t* col_types, const uint32_t* col_skips, uint32_t relevant_col_ct, const char** token_ptrs, uint32_t* token_slens) {
1285   for (uint32_t relevant_col_idx = 0; relevant_col_idx != relevant_col_ct; ++relevant_col_idx) {
1286     const uint32_t cur_col_type = col_types[relevant_col_idx];
1287     str_iter = NextTokenMult(str_iter, col_skips[relevant_col_idx]);
1288     if (!str_iter) {
1289       return nullptr;
1290     }
1291     token_ptrs[cur_col_type] = str_iter;
1292     const char* token_end = CurTokenEnd(str_iter);
1293     token_slens[cur_col_type] = token_end - str_iter;
1294     str_iter = token_end;
1295   }
1296   return str_iter;
1297 }
1298 
TokenLexK0(const char * str_iter,const uint32_t * col_types,const uint32_t * col_skips,uint32_t relevant_col_ct,const char ** token_ptrs,uint32_t * token_slens)1299 HEADER_INLINE const char* TokenLexK0(const char* str_iter, const uint32_t* col_types, const uint32_t* col_skips, uint32_t relevant_col_ct, const char** token_ptrs, uint32_t* token_slens) {
1300   if (!col_skips[0]) {
1301     const uint32_t cur_col_type = col_types[0];
1302     const char* first_token_end = CurTokenEnd(str_iter);
1303     token_ptrs[cur_col_type] = str_iter;
1304     token_slens[cur_col_type] = first_token_end - str_iter;
1305     str_iter = first_token_end;
1306     ++col_types;
1307     ++col_skips;
1308     --relevant_col_ct;
1309   }
1310   return TokenLexK(str_iter, col_types, col_skips, relevant_col_ct, token_ptrs, token_slens);
1311 }
1312 #endif
1313 
TokenLex(char * str_iter,const uint32_t * col_types,const uint32_t * col_skips,uint32_t relevant_col_ct,char ** token_ptrs,uint32_t * token_slens)1314 HEADER_INLINE char* TokenLex(char* str_iter, const uint32_t* col_types, const uint32_t* col_skips, uint32_t relevant_col_ct, char** token_ptrs, uint32_t* token_slens) {
1315   return K_CAST(char*, TokenLexK(str_iter, col_types, col_skips, relevant_col_ct, K_CAST(const char**, token_ptrs), token_slens));
1316 }
1317 
TokenLex0(char * str_iter,const uint32_t * col_types,const uint32_t * col_skips,uint32_t relevant_col_ct,char ** token_ptrs,uint32_t * token_slens)1318 HEADER_INLINE char* TokenLex0(char* str_iter, const uint32_t* col_types, const uint32_t* col_skips, uint32_t relevant_col_ct, char** token_ptrs, uint32_t* token_slens) {
1319   return K_CAST(char*, TokenLexK0(str_iter, col_types, col_skips, relevant_col_ct, K_CAST(const char**, token_ptrs), token_slens));
1320 }
1321 
1322 // ct must be positive for these functions.
1323 CXXCONST_CP NextCsvMult(const char* str_iter, uint32_t ct);
1324 
CommaOrTspaceNextTokenMult(const char * str_iter,uint32_t ct,uint32_t comma_delim)1325 HEADER_INLINE CXXCONST_CP CommaOrTspaceNextTokenMult(const char* str_iter, uint32_t ct, uint32_t comma_delim) {
1326   if (!comma_delim) {
1327     return NextTokenMult(str_iter, ct);
1328   }
1329   return NextCsvMult(str_iter, ct);
1330 }
1331 
1332 #ifdef __cplusplus
NextCsvMult(char * str_iter,uint32_t ct)1333 HEADER_INLINE char* NextCsvMult(char* str_iter, uint32_t ct) {
1334   return const_cast<char*>(NextCsvMult(const_cast<const char*>(str_iter), ct));
1335 }
1336 
CommaOrTspaceNextTokenMult(char * str_iter,uint32_t ct,uint32_t comma_delim)1337 HEADER_INLINE char* CommaOrTspaceNextTokenMult(char* str_iter, uint32_t ct, uint32_t comma_delim) {
1338   return const_cast<char*>(CommaOrTspaceNextTokenMult(const_cast<const char*>(str_iter), ct, comma_delim));
1339 }
1340 #endif
1341 
1342 #ifdef USE_AVX2
1343 const char* CsvLexK(const char* str_iter, const uint32_t* col_types, const uint32_t* col_skips, uint32_t relevant_col_ct, const char** token_ptrs, uint32_t* token_slens);
1344 #else
CsvLexK(const char * str_iter,const uint32_t * col_types,const uint32_t * col_skips,uint32_t relevant_col_ct,const char ** token_ptrs,uint32_t * token_slens)1345 HEADER_INLINE const char* CsvLexK(const char* str_iter, const uint32_t* col_types, const uint32_t* col_skips, uint32_t relevant_col_ct, const char** token_ptrs, uint32_t* token_slens) {
1346   for (uint32_t relevant_col_idx = 0; relevant_col_idx != relevant_col_ct; ++relevant_col_idx) {
1347     const uint32_t cur_col_type = col_types[relevant_col_idx];
1348     str_iter = NextCsvMult(str_iter, col_skips[relevant_col_idx]);
1349     if (!str_iter) {
1350       return nullptr;
1351     }
1352     token_ptrs[cur_col_type] = str_iter;
1353     const char* token_end = CsvFieldEnd(str_iter);
1354     token_slens[cur_col_type] = token_end - str_iter;
1355     str_iter = token_end;
1356   }
1357   return str_iter;
1358 }
1359 #endif
1360 
1361 // todo: movemask version of this
1362 uint32_t CountTokens(const char* str_iter);
1363 
1364 // uint32_t CommaOrSpaceCountTokens(const char* str_iter, uint32_t comma_delim);
1365 
1366 // empty multistr ok
1367 uint32_t CountAndMeasureMultistr(const char* multistr, uintptr_t* max_blen_ptr);
1368 
1369 extern const uint16_t kDigitPair[];
1370 
1371 char* u32toa(uint32_t uii, char* start);
1372 
1373 char* i32toa(int32_t ii, char* start);
1374 
1375 char* u32toa_z5(uint32_t uii, char* start);
1376 
1377 char* i64toa(int64_t llii, char* start);
1378 
1379 #ifdef __LP64__
1380 // really just for printing line numbers
1381 // must be less than 2^63
wtoa(uintptr_t ulii,char * start)1382 HEADER_INLINE char* wtoa(uintptr_t ulii, char* start) {
1383   return i64toa(ulii, start);
1384 }
1385 #else
wtoa(uintptr_t ulii,char * start)1386 HEADER_INLINE char* wtoa(uintptr_t ulii, char* start) {
1387   return u32toa(ulii, start);
1388 }
1389 #endif
1390 
1391 char* u32toa_trunc4(uint32_t uii, char* start);
1392 
1393 // Write-buffer-sizing constants, to support replacement of dtoa_g() with
1394 // higher-precision variants like dtoa_g_p8() or full-blown Ryu.
1395 
1396 // -0.000123456
1397 // -1.23456e-38
1398 CONSTI32(kMaxFloatGSlen, 12);
1399 
1400 CONSTI32(kMaxDoubleGSlen, 13);
1401 
1402 // 1.23456e-2147483647
1403 CONSTI32(kMaxLnGSlen, 19);
1404 
1405 char* dtoa_g(double dxx, char* start);
1406 
1407 // We try to avoid micromanaging floating point printing and just use %g
1408 // everywhere, but occasionally we explicitly need more precision.
1409 //
1410 // dtoa_g_p8 provides generic 8-digit precision (instead of %g's 6-digit
1411 // default), while print_dosage in plink2_common provides up to 3 places after
1412 // the decimal point when dealing with dosages (which are internally
1413 // represented as 32768ths).
1414 // (may want to replace _p8 with _p10 for perfect int32 handling.)
1415 char* dtoa_g_p8(double dxx, char* start);
1416 
1417 static const double kLn10 = 2.3025850929940457;
1418 static const double kRecipLn10 = 0.43429448190325176;
1419 static const double kLnNormalMin = -708.3964185322641;
1420 
1421 char* lntoa_g(double ln_val, char* start);
1422 
TrailingZeroesToSpaces(char * start)1423 HEADER_INLINE void TrailingZeroesToSpaces(char* start) {
1424   --start;
1425   while (*start == '0') {
1426     *start-- = ' ';
1427   }
1428   if (*start == '.') {
1429     *start = ' ';
1430   }
1431 }
1432 
ClipTrailingZeroes(char * start)1433 HEADER_INLINE char* ClipTrailingZeroes(char* start) {
1434   char cc;
1435   do {
1436     cc = *(--start);
1437   } while (cc == '0');
1438   return &(start[(cc != '.')]);
1439 }
1440 
1441 // "prob" means that the number is guaranteed to be in [0, 1].
1442 // no leading space is printed.  trailing zeroes (/decimal point) are erased
1443 //   iff there is equality to ~13 decimal places.
1444 char* dtoa_f_probp6_spaced(double dxx, char* start);
1445 
1446 char* dtoa_f_probp6_clipped(double dxx, char* start);
1447 
1448 // char* dtoa_f_p5_clipped(double dxx, char* start);
1449 
1450 // dedicated ftoa_g() discontinued
ftoa_g(float fxx,char * start)1451 HEADER_INLINE char* ftoa_g(float fxx, char* start) {
1452   return dtoa_g(S_CAST(double, fxx), start);
1453 }
1454 
u32toa_x(uint32_t uii,char extra_char,char * start)1455 HEADER_INLINE char* u32toa_x(uint32_t uii, char extra_char, char* start) {
1456   char* penult = u32toa(uii, start);
1457   *penult = extra_char;
1458   return &(penult[1]);
1459 }
1460 
i32toa_x(int32_t ii,char extra_char,char * start)1461 HEADER_INLINE char* i32toa_x(int32_t ii, char extra_char, char* start) {
1462   char* penult = i32toa(ii, start);
1463   *penult = extra_char;
1464   return &(penult[1]);
1465 }
1466 
1467 
1468 // overread must be ok.
1469 CXXCONST_CP ScanForDuplicateIds(const char* sorted_ids, uintptr_t id_ct, uintptr_t max_id_blen);
1470 
1471 #ifdef __cplusplus
ScanForDuplicateIds(char * sorted_ids,uintptr_t id_ct,uintptr_t max_id_blen)1472 HEADER_INLINE char* ScanForDuplicateIds(char* sorted_ids, uintptr_t id_ct, uintptr_t max_id_blen) {
1473   return const_cast<char*>(ScanForDuplicateIds(const_cast<const char*>(sorted_ids), id_ct, max_id_blen));
1474 }
1475 #endif
1476 
1477 // Collapses array of sorted IDs to remove duplicates, and writes pre-collapse
1478 // positions to id_starts (so e.g. duplication count of any sample ID can be
1479 // determined via subtraction) if it isn't nullptr.
1480 // Overread must be ok.
1481 // Returns id_ct of collapsed array.
1482 uint32_t CollapseDuplicateIds(uintptr_t id_ct, uintptr_t max_id_blen, char* sorted_ids, uint32_t* id_starts);
1483 
1484 
1485 // returns position of string, or -1 if not found.
1486 int32_t bsearch_str(const char* idbuf, const char* sorted_strbox, uintptr_t cur_id_slen, uintptr_t max_id_blen, uintptr_t end_idx);
1487 
1488 // requires null-terminated string
1489 int32_t bsearch_str_natural(const char* idbuf, const char* sorted_strbox, uintptr_t max_id_blen, uintptr_t end_idx);
1490 
1491 
1492 // returns number of elements in sorted_strbox[] less than idbuf.
1493 uintptr_t bsearch_str_lb(const char* idbuf, const char* sorted_strbox, uintptr_t cur_id_slen, uintptr_t max_id_blen, uintptr_t end_idx);
1494 
1495 // same result as bsearch_str_lb(), but checks against [cur_idx],
1496 // [cur_idx + 1], [cur_idx + 3], [cur_idx + 7], etc. before finishing with a
1497 // binary search, and assumes cur_id_slen <= max_id_blen and end_idx > 0.
1498 uintptr_t ExpsearchStrLb(const char* idbuf, const char* sorted_strbox, uintptr_t cur_id_slen, uintptr_t max_id_blen, uintptr_t end_idx, uintptr_t cur_idx);
1499 
1500 // Null-terminated string required.
1501 uintptr_t ExpsearchNsortStrLb(const char* idbuf, const char* nsorted_strbox, uintptr_t max_id_blen, uintptr_t end_idx, uintptr_t cur_idx);
1502 
1503 // this is frequently preferable to bsearch_str(), since it's way too easy to
1504 // forget to convert the sorted-stringbox index to the final index
1505 // sample_id_map == nullptr is permitted; in this case id will be an index into
1506 // the sorted array
SortedIdboxFind(const char * idbuf,const char * sorted_idbox,const uint32_t * id_map,uintptr_t cur_id_slen,uintptr_t max_id_blen,uintptr_t end_idx,uint32_t * id_ptr)1507 HEADER_INLINE BoolErr SortedIdboxFind(const char* idbuf, const char* sorted_idbox, const uint32_t* id_map, uintptr_t cur_id_slen, uintptr_t max_id_blen, uintptr_t end_idx, uint32_t* id_ptr) {
1508   const int32_t ii = bsearch_str(idbuf, sorted_idbox, cur_id_slen, max_id_blen, end_idx);
1509   if (ii == -1) {
1510     return 1;
1511   }
1512   *id_ptr = id_map? id_map[S_CAST(uint32_t, ii)] : S_CAST(uint32_t, ii);
1513   return 0;
1514 }
1515 
1516 #ifdef __arm__
1517 #  error "Unaligned accesses in IsNanStr()."
1518 #endif
1519 // This returns 1 on any capitalization of 'na' or 'nan', 0 otherwise.
1520 // todo: check whether there's actually any point to the uint16_t type-pun
IsNanStr(const char * ss,uint32_t slen)1521 HEADER_INLINE uint32_t IsNanStr(const char* ss, uint32_t slen) {
1522   if ((slen > 3) || (slen == 1)) {
1523     return 0;
1524   }
1525   if (!slen) {
1526     return 1;
1527   }
1528   const uint32_t first_two_chars_code = R_CAST(const uint16_t*, ss)[0];
1529   // assumes little-endian
1530   if ((first_two_chars_code & 0xdfdf) != 0x414e) {
1531     return 0;
1532   }
1533   return (slen == 2) || ((ctou32(ss[2]) & 0xdf) == 78);
1534 }
1535 
1536 // Matches "inf"/"infinity", any capitalization, can have sign in front.
1537 // Assumes is_neg zero-initialized.
1538 // Assumes one-char overread is ok.
1539 uint32_t IsInfStr(const char* ss, uint32_t slen, uint32_t* is_neg_ptr);
1540 
1541 
AdvToDelimOrEnd(const char * str_iter,const char * str_end,char delim)1542 HEADER_INLINE CXXCONST_CP AdvToDelimOrEnd(const char* str_iter, const char* str_end, char delim) {
1543   CXXCONST_CP memchr_result = S_CAST(CXXCONST_CP, memchr(str_iter, delim, str_end - str_iter));
1544   if (memchr_result) {
1545     return memchr_result;
1546   }
1547   return S_CAST(CXXCONST_CP, str_end);
1548 }
1549 
1550 // tried memchr_likely_short(), not worth it
1551 
1552 #ifdef __cplusplus
AdvToDelimOrEnd(char * str_iter,char * str_end,char needle)1553 HEADER_INLINE char* AdvToDelimOrEnd(char* str_iter, char* str_end, char needle) {
1554   return const_cast<char*>(AdvToDelimOrEnd(const_cast<const char*>(str_iter), const_cast<const char*>(str_end), needle));
1555 }
1556 #endif
1557 
1558 // memrchr() not available on some platforms.  This implementation also
1559 // includes a tweak which trades off a bit of performance around length 35 for
1560 // substantially better performance on the longer lines often seen in e.g. VCF
1561 // files, so we don't normally use the base implementation when it's available,
1562 // hence the initial capital letter.
1563 #ifdef __LP64__
1564 CXXCONST_CP Memrchr(const char* str_start, char needle, uintptr_t slen);
1565 
1566 CXXCONST_CP LastSpaceOrEoln(const char* str_start, uintptr_t slen);
1567 #else  // !__LP64__
Memrchr(const char * str_start,char needle,uintptr_t slen)1568 HEADER_INLINE CXXCONST_CP Memrchr(const char* str_start, char needle, uintptr_t slen) {
1569 #  ifdef _GNU_SOURCE
1570   return S_CAST(CXXCONST_CP, memrchr(str_start, ctou32(needle), slen));
1571 #  else  // !_GNU_SOURCE
1572   // Could check one word at a time for not-that-small slen.
1573   for (uintptr_t pos = slen; pos; ) {
1574     if (str_start[--pos] == needle) {
1575       return S_CAST(CXXCONST_CP, &(str_start[pos]));
1576     }
1577   }
1578   return nullptr;
1579 #  endif  // !_GNU_SOURCE
1580 }
1581 
LastSpaceOrEoln(const char * str_start,uintptr_t slen)1582 HEADER_INLINE CXXCONST_CP LastSpaceOrEoln(const char* str_start, uintptr_t slen) {
1583   for (uintptr_t pos = slen; pos; ) {
1584     if (ctou32(str_start[--pos]) <= 32) {
1585       return S_CAST(CXXCONST_CP, &(str_start[pos]));
1586     }
1587   }
1588   return nullptr;
1589 }
1590 #endif  // !__LP64__
1591 
1592 #ifdef __cplusplus
Memrchr(char * str_start,char needle,uintptr_t slen)1593 HEADER_INLINE char* Memrchr(char* str_start, char needle, uintptr_t slen) {
1594   return const_cast<char*>(Memrchr(const_cast<const char*>(str_start), needle, slen));
1595 }
1596 
LastSpaceOrEoln(char * str_start,uintptr_t slen)1597 HEADER_INLINE char* LastSpaceOrEoln(char* str_start, uintptr_t slen) {
1598   return const_cast<char*>(LastSpaceOrEoln(const_cast<const char*>(str_start), slen));
1599 }
1600 #endif
1601 
1602 // void ReplaceAllInstances(char old_char, char new_char, uint32_t slen, char* dst);
1603 
1604 void TabsToSpaces(char* ss_iter);
1605 
1606 // Errors out if new_char is already present.
1607 BoolErr ReplaceCharAdvChecked(char old_char, char new_char, char** str_ptr);
1608 
1609 #ifdef __cplusplus
1610 }  // namespace plink2
1611 #endif
1612 
1613 #endif  // __PLINK2_STRING_H__
1614