1 
2 /******************************************************************************
3  *
4  *  This file is part of meryl-utility, a collection of miscellaneous code
5  *  used by Meryl, Canu and others.
6  *
7  *  This software is based on:
8  *    'Canu' v2.0              (https://github.com/marbl/canu)
9  *  which is based on:
10  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
11  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
12  *
13  *  Except as indicated otherwise, this is a 'United States Government Work',
14  *  and is released in the public domain.
15  *
16  *  File 'README.licenses' in the root directory of this distribution
17  *  contains full conditions and disclaimers.
18  */
19 
20 #ifndef TYPES_H
21 #define TYPES_H
22 
23 //  Do NOT enable _GLIBCXX_PARALLEL.  Performance is atrocious, and it will
24 //  not sort in-place, so memory will blow up.  More comments in AS_global.C.
25 #undef  _GLIBCXX_PARALLEL
26 
27 //  ISO C99 says that to get INT32_MAX et al, these must be defined.
28 //  (7.18.2, 7.18.4, 7.8.1)
29 #ifndef __STDC_CONSTANT_MACROS
30 #define __STDC_CONSTANT_MACROS
31 #endif
32 #ifndef __STDC_LIMIT_MACROS
33 #define __STDC_LIMIT_MACROS
34 #endif
35 #ifndef __STDC_FORMAT_MACROS
36 #define __STDC_FORMAT_MACROS
37 #endif
38 
39 //  Tell gcc (and others, maybe) about unused parameters.  This is important for gcc (especially
40 //  newer ones) that complain about unused parameters.  Thanks to ideasman42 at
41 //  http://stackoverflow.com/questions/3599160/unused-parameter-warnings-in-c-code.
42 #ifdef __GNUC__
43 #define UNUSED(x) UNUSED_ ## x __attribute__((__unused__))
44 #else
45 #define UNUSED(x) UNUSED_ ## x
46 #endif
47 
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <stdint.h>
51 #include <inttypes.h>
52 #include <limits.h>
53 #include <unistd.h>
54 #include <string.h>
55 #include <ctype.h>
56 
57 #include <float.h>
58 #include <cmath>
59 
60 #include <assert.h>
61 #include <errno.h>
62 #include <time.h>
63 
64 #include <sys/types.h>
65 #include <sys/stat.h>
66 
67 #include <omp.h>
68 
69 #include <limits>
70 #include <set>
71 #include <vector>
72 
73 #if defined(_FILE_OFFSET_BITS) && (_FILE_OFFSET_BITS == 32)
74 #error I do not support 32-bit off_t.
75 #endif
76 
77 //  Make the basic int types a bit more friendly.
78 
79 typedef unsigned __int128  uint128;
80 typedef          __int128   int128;
81 
82 typedef uint64_t           uint64;
83 typedef  int64_t            int64;
84 
85 typedef uint32_t           uint32;
86 typedef  int32_t            int32;
87 
88 typedef uint16_t           uint16;
89 typedef  int16_t            int16;
90 
91 typedef  uint8_t           uint8;
92 typedef   int8_t            int8;
93 
94 //  There's no way to assign a constant value to the 128-bit integers
95 //  directly, but with a helper function we can assign it using two 64-bit
96 //  integers.  This only really makes sense for the unsigned flavor, e.g.,
97 //  when used for bit packed quantities.
98 
build_uint128(uint64 a,uint64 b)99 constexpr inline uint128   build_uint128(uint64 a, uint64 b)    { return(((uint128)a << 64) | ((uint128)b)); };
build_int128(int64 a,int64 b)100 constexpr inline  int128   build_int128 ( int64 a,  int64 b)    { return( ((int128)a << 64) |  ((int128)b)); };
101 
102 //  Some handy constants.
103 //
104 //  numeric_limits<> on the 128-bit types is undefined, so we're forced to do
105 //  it the hard way.
106 
107 constexpr uint128   uint128zero = 0;
108 constexpr uint128   uint128one  = 1;
109 constexpr uint128   uint128min  = 0;
110 constexpr uint128   uint128max  = (uint128)(0xffffffffffffffffllu) << 64 | (uint128)(0xffffffffffffffffllu);
111 
112 constexpr  int128    int128zero = 0;
113 constexpr  int128    int128one  = 1;
114 constexpr  int128    int128min  = (uint128)(0x8000000000000000llu) << 64 | (uint128)(0x0000000000000000llu);
115 constexpr  int128    int128max  = (uint128)(0x7fffffffffffffffllu) << 64 | (uint128)(0xffffffffffffffffllu);
116 
117 constexpr uint64    uint64zero  = 0;
118 constexpr uint64    uint64one   = 1;
119 constexpr uint64    uint64min   = std::numeric_limits<uint64>::min();
120 constexpr uint64    uint64max   = std::numeric_limits<uint64>::max();
121 
122 constexpr  int64     int64zero  = 0;
123 constexpr  int64     int64one   = 1;
124 constexpr  int64     int64min   = std::numeric_limits<int64>::min();
125 constexpr  int64     int64max   = std::numeric_limits<int64>::max();
126 
127 constexpr uint32    uint32zero  = 0;
128 constexpr uint32    uint32one   = 1;
129 constexpr uint32    uint32min   = std::numeric_limits<uint32>::min();
130 constexpr uint32    uint32max   = std::numeric_limits<uint32>::max();
131 
132 constexpr  int32     int32zero  = 0;
133 constexpr  int32     int32one   = 1;
134 constexpr  int32     int32min   = std::numeric_limits<int32>::min();
135 constexpr  int32     int32max   = std::numeric_limits<int32>::max();
136 
137 constexpr uint16    uint16zero  = 0;
138 constexpr uint16    uint16one   = 1;
139 constexpr uint16    uint16min   = std::numeric_limits<uint16>::min();
140 constexpr uint16    uint16max   = std::numeric_limits<uint16>::max();
141 
142 constexpr  int16     int16zero  = 0;
143 constexpr  int16     int16one   = 1;
144 constexpr  int16     int16min   = std::numeric_limits<int16>::min();
145 constexpr  int16     int16max   = std::numeric_limits<int16>::max();
146 
147 constexpr uint8     uint8zero   = 0;
148 constexpr uint8     uint8one    = 1;
149 constexpr uint8     uint8min    = std::numeric_limits<uint8>::min();
150 constexpr uint8     uint8max    = std::numeric_limits<uint8>::max();
151 
152 constexpr  int8      int8zero   = 0;
153 constexpr  int8      int8one    = 1;
154 constexpr  int8      int8min    = std::numeric_limits<int8>::min();
155 constexpr  int8      int8max    = std::numeric_limits<int8>::max();
156 
157 //  Conversion from floating point to integer.  lrint() rounds the
158 //  floating-point argument to an integer value, using the current rounding
159 //  mode.  This mode can be set with std::fesetround().
160 
doubletoint64(double d)161 inline  int64  doubletoint64(double d)   { return(std::llrint(d)); }
doubletoint32(double d)162 inline  int32  doubletoint32(double d)   { return(std:: lrint(d)); }
163 
164 //  Decoding stings into numbers (and a boolean).
165 //   - The first set simply convert the string to a number and return that
166 //     number.
167 //   - The second set converts the string to a number and returns a pointer
168 //     to the letter in the string just after the number.
169 //
170 //  There probably should be a strtobool() of the second form, but I'm not
171 //  really sure what to do with the 'invalid' case that is currently treated
172 //  as 'false'.
173 
174 uint128 strtoullll(char const *nptr, char **endptr);   //  The original strtoul() et al take char**
175  int128 strtollll (char const *nptr, char **endptr);   //  as the second arg.
176 
strtouint128(char const * str)177 inline uint128 strtouint128(char const *str)  {  return((uint128)strtoullll(str, nullptr));      }
strtoint128(char const * str)178 inline  int128 strtoint128 (char const *str)  {  return( (int128)strtollll (str, nullptr));      }
strtouint64(char const * str)179 inline uint64  strtouint64 (char const *str)  {  return((uint64) strtoull  (str, nullptr, 10));  }
strtoint64(char const * str)180 inline  int64  strtoint64  (char const *str)  {  return( (int64) strtoll   (str, nullptr, 10));  }
strtouint32(char const * str)181 inline uint32  strtouint32 (char const *str)  {  return((uint32) strtoul   (str, nullptr, 10));  }
strtoint32(char const * str)182 inline  int32  strtoint32  (char const *str)  {  return( (int32) strtol    (str, nullptr, 10));  }
strtouint16(char const * str)183 inline uint16  strtouint16 (char const *str)  {  return((uint16) strtoul   (str, nullptr, 10));  }   //  WARNING: these convert to
strtoint16(char const * str)184 inline  int16  strtoint16  (char const *str)  {  return( (int16) strtol    (str, nullptr, 10));  }   //  a 32-bit integer then cast
strtouint8(char const * str)185 inline uint8   strtouint8  (char const *str)  {  return((uint8)  strtoul   (str, nullptr, 10));  }   //  to 16- or 8-bit integers.
strtoint8(char const * str)186 inline  int8   strtoint8   (char const *str)  {  return( (int8)  strtol    (str, nullptr, 10));  }
strtofloat(char const * str)187 inline  float  strtofloat  (char const *str)  {  return( (float) strtof    (str, nullptr));      }
strtodouble(char const * str)188 inline double  strtodouble (char const *str)  {  return((double) strtod    (str, nullptr));      }
189 
strtonumber(char const * str,uint128 & num)190 inline char const *strtonumber(char const *str, uint128 &num)  {  char *rem;  num = (uint128)strtoullll(str, &rem);      return(rem);  }
strtonumber(char const * str,int128 & num)191 inline char const *strtonumber(char const *str,  int128 &num)  {  char *rem;  num =  (int128)strtollll (str, &rem);      return(rem);  }
strtonumber(char const * str,uint64 & num)192 inline char const *strtonumber(char const *str, uint64  &num)  {  char *rem;  num = (uint64) strtoull  (str, &rem, 10);  return(rem);  }
strtonumber(char const * str,int64 & num)193 inline char const *strtonumber(char const *str,  int64  &num)  {  char *rem;  num =  (int64) strtoll   (str, &rem, 10);  return(rem);  }
strtonumber(char const * str,uint32 & num)194 inline char const *strtonumber(char const *str, uint32  &num)  {  char *rem;  num = (uint32) strtoul   (str, &rem, 10);  return(rem);  }
strtonumber(char const * str,int32 & num)195 inline char const *strtonumber(char const *str,  int32  &num)  {  char *rem;  num =  (int32) strtol    (str, &rem, 10);  return(rem);  }
strtonumber(char const * str,uint16 & num)196 inline char const *strtonumber(char const *str, uint16  &num)  {  char *rem;  num = (uint16) strtoul   (str, &rem, 10);  return(rem);  }
strtonumber(char const * str,int16 & num)197 inline char const *strtonumber(char const *str,  int16  &num)  {  char *rem;  num =  (int16) strtol    (str, &rem, 10);  return(rem);  }
strtonumber(char const * str,uint8 & num)198 inline char const *strtonumber(char const *str, uint8   &num)  {  char *rem;  num = (uint8)  strtoul   (str, &rem, 10);  return(rem);  }
strtonumber(char const * str,int8 & num)199 inline char const *strtonumber(char const *str,  int8   &num)  {  char *rem;  num =  (int8)  strtol    (str, &rem, 10);  return(rem);  }
strtonumber(char const * str,float & num)200 inline char const *strtonumber(char const *str,  float  &num)  {  char *rem;  num = (double) strtof    (str, &rem);      return(rem);  }
strtonumber(char const * str,double & num)201 inline char const *strtonumber(char const *str, double  &num)  {  char *rem;  num = (double) strtod    (str, &rem);      return(rem);  }
202 
strtobool(char const * str)203 inline bool    strtobool(char const *str)  {
204   if ((str == nullptr) ||
205       (str[0] == 0))
206     return(false);
207 
208   if (((str[0] == 'y') && (str[1] == 0)) ||
209       ((str[0] == 'Y') && (str[1] == 0)) ||
210       ((str[0] == 't') && (str[1] == 0)) ||
211       ((str[0] == 'T') && (str[1] == 0)) ||
212       ((str[0] == '1') && (str[1] == 0)) ||
213       ((str[0] == '+') && (str[1] == 0)))
214     return(true);
215 
216   if ((strcasecmp(str, "yes")  == 0) ||
217       (strcasecmp(str, "true") == 0))
218     return(true);
219 
220   return(false);
221 }
222 
223 //  Test if a character or string is of the desired encoding.
224 
isNUL(char c)225 inline bool   isNUL(char c)        { return(c == 0); }
226 
isVisible(char c)227 inline bool   isVisible(char c)    { return(('!' <= c) && (c <= '~')); }
228 
isLetter(char c)229 inline bool   isLetter(char c)     { return((('a' <= c) && (c <= 'z')) ||
230                                             (('A' <= c) && (c <= 'Z')));  }
231 
isWhiteSpace(char c)232 inline bool   isWhiteSpace(char c) { return((c == ' ')  || (c == '\n') ||
233                                             (c == '\t') || (c == '\r')); };
234 
isComment(char c)235 inline bool   isComment(char c)    { return((c == '!') || (c == '#') || (c == 0));        };
isDelimiter(char c)236 inline bool   isDelimiter(char c)  { return((c == ':') || (c == '='));                    };
237 
isBinDigit(char c)238 inline bool   isBinDigit(char c)   { return((('0' <= c) && (c <= '1')));  }
isOctDigit(char c)239 inline bool   isOctDigit(char c)   { return((('0' <= c) && (c <= '7')));  }
isDecDigit(char c)240 inline bool   isDecDigit(char c)   { return((('0' <= c) && (c <= '9')));  }
isHexDigit(char c)241 inline bool   isHexDigit(char c)   { return((('0' <= c) && (c <= '9')) ||
242                                             (('a' <= c) && (c <= 'f')) ||
243                                             (('A' <= c) && (c <= 'F')));  }
244 
245 bool          isBinNumber (char const *s);
246 bool          isOctNumber (char const *s);
247 bool          isDecNumber (char const *s, char dot='.');
isDecInteger(char const * s)248 bool inline   isDecInteger(char const *s)   { return(isDecNumber(s,  0));  };
isDecFloat(char const * s)249 bool inline   isDecFloat  (char const *s)   { return(isDecNumber(s, '.')); };
250 bool          isHexNumber (char const *s);
251 
252 //  Disallow the usual character tests becuse of their goofy return values.
253 
254 #undef  isalnum
255 #undef  isalpha
256 #undef  iscntrl
257 #undef  isdigit
258 #undef  isgraph
259 #undef  islower
260 #undef  isprint
261 #undef  ispunct
262 #undef  isspace
263 #undef  isupper
264 #undef  isxdigit
265 #undef  isnumber
266 
267 int inline isalnum (char c) = delete;
268 int inline isalpha (char c) = delete;
269 int inline iscntrl (char c) = delete;
270 int inline isdigit (char c) = delete;
271 int inline isgraph (char c) = delete;
272 int inline islower (char c) = delete;
273 int inline isprint (char c) = delete;
274 int inline ispunct (char c) = delete;
275 int inline isspace (char c) = delete;
276 int inline isupper (char c) = delete;
277 int inline isxdigit(char c) = delete;
278 int inline isnumber(char c) = delete;
279 
280 //  Convert an ascii binary, octal, decimal or hexadecimal letter to an
281 //  integer.  No type checking is performed; you've already called
282 //  isHexNumber() et al, right?
283 //
284 //  The pieces of asciiHexToInteger() are as follows:
285 //    (d & 0xf)        //  Decodes '0'-'9' as 0-9, 'a' - 'f' as 1-6
286 //    (d >> 6)         //  Decodes digits as 0, letters as 1.
287 //   ((d >> 6) << 3)   //  Decodes digits as 0, letters as 8.
288 
asciiBinToInteger(char d)289 inline uint8  asciiBinToInteger(char d)   { return(d - '0'); }   //  Pretty trivial.
asciiOctToInteger(char d)290 inline uint8  asciiOctToInteger(char d)   { return(d - '0'); }
asciiDecToInteger(char d)291 inline uint8  asciiDecToInteger(char d)   { return(d - '0'); }
asciiHexToInteger(char d)292 inline uint8  asciiHexToInteger(char d)   { return(((uint8)d & 0xf) + ((uint8)d >> 6) + (((uint8)d >> 6) << 3)); }
293 
294 //  Convert an integer to a printable letter.  If it's not a printable
295 //  letter, returns '.'.
296 
297 inline
298 char
integerToLetter(uint32 i)299 integerToLetter(uint32 i) {
300   return(((' ' <= i) && (i <= '~')) ? i : '.');
301 }
302 
303 //  Convert a string representing a set of numbers to
304 //   - the first and last values (for form '#' or '#-#')
305 //   - a vector of the low and high values
306 //   - a set of the values
307 //
308 //  The string should be comprised of multiple comma separated ranges:
309 //   - #     a single number
310 //   - #-#   a range of numbers
311 //   - #/#   a one-out-of-N specification
312 //
313 //  The first form returns a pointer to the letter after the decoded values.
314 //
315 //  If a single number is encountered in the first or second forms, both
316 //  'bgn' and 'end' are set to that value.
317 //
318 //  If 'numberType' is a 128-bit integer, only 64-bit integers can be
319 //  converted.
320 
321 template<typename numberType> char const *decodeRange(char const *range, numberType &bgn, numberType &end);
322 template<typename numberType> void        decodeRange(char const *range, std::vector<numberType> &bgn, std::vector<numberType> &end);
323 template<typename numberType> void        decodeRange(char const *range, std::set<numberType> &values);
324 
325 //  Convert an unsigned integer representing bits or bytes to
326 //  a floating point number representing GB or MB.
327 
bitsToGB(uint64 bits)328 inline double bitsToGB(uint64 bits)   { return(bits / 8 / 1024.0 / 1024.0 / 1024.0); }
bitsToMB(uint64 bits)329 inline double bitsToMB(uint64 bits)   { return(bits / 8 / 1024.0 / 1024.0);          }
330 
331 //  Convert an unsigned integer to one with 3 significant digit number, and
332 //  also return the correct SI base.
333 
334 uint64      scaledNumber(uint64 n, uint32 div=1024);   //  Return n between 0 and div,
335 char        scaledUnit  (uint64 n, uint32 div=1024);   //  and the SI unit of that
336 const char *scaledName  (uint64 n, uint32 div=1024);   //  scaling.
337 
338 //  Convert an unsigned integer to a character string in the desired base.
339 //
340 //    char *toXXX(v, str)
341 //      Expects a pre-allocated character buffer 'str' with enough space for
342 //      the output string and a NUL terminating byte.  It returns a pointer
343 //      to the NUL byte.  A 128-bit integer in:
344 //        binary      needs 129 bytes
345 //        octal       needs  44 bytes
346 //        decimal     needs  40 bytes (it's 340,282,366,920,938,463,463,374,607,431,768,211,455)
347 //        hexadecimal needs  33 bytes
348 //
349 //    char const *toXX(v)
350 //      Returns a pointer to one of 32 private string buffers.  This is
351 //      thread safe, as long as you don't use it more than 32 times at once.
352 //
353 //  Both forms take an optional 'width' (in bits) to display.  The actual
354 //  width used is the minimum of this width and the number of bits in the
355 //  type.  toDec() accepts the width, but doesn't use it.
356 
357 template<typename uintType> char       *toBin(uintType value, char *out, uint32 width=128);
358 template<typename uintType> char       *toOct(uintType value, char *out, uint32 width=128);
359 template<typename uintType> char       *toDec(uintType value, char *out, uint32 width=128);
360 template<typename uintType> char       *toHex(uintType value, char *out, uint32 width=128);
361 
362 template<typename uintType> char const *toBin(uintType value, uint32 width=128);
363 template<typename uintType> char const *toOct(uintType value, uint32 width=128);
364 template<typename uintType> char const *toDec(uintType value, uint32 width=128);
365 template<typename uintType> char const *toHex(uintType value, uint32 width=128);
366 
367 //  Format specifications for printf()
368 
369 #define F_PTR    "0x%016p"   // Pointers
370 #define F_C           "%c"   // Characters
371 #define F_CP           "c"
372 #define F_CI         "%*c"
373 #define F_STR         "%s"   // Strings
374 #define F_STRP         "s"
375 #define F_STRI       "%*s"
376 #define F_S16    "%" PRId16  // Integers
377 #define F_S16P       PRId16
378 #define F_S16I  "%*" PRId16
379 #define F_U16    "%" PRIu16
380 #define F_U16P       PRIu16
381 #define F_U16I  "%*" PRIu16
382 #define F_S32    "%" PRId32
383 #define F_S32P       PRId32
384 #define F_S32I  "%*" PRId32
385 #define F_U32    "%" PRIu32
386 #define F_U32P       PRIu32
387 #define F_U32I  "%*" PRIu32
388 #define F_S64    "%" PRId64
389 #define F_S64P       PRId64
390 #define F_S64I  "%*" PRId64
391 #define F_U64    "%" PRIu64
392 #define F_U64P       PRIu64
393 #define F_U64I  "%*" PRIu64
394 #define F_X64 "%016" PRIx64
395 #define F_X64P       PRIx64
396 #define F_X64I  "%*" PRIx64
397 #define F_F32         "%f"   // Floating points
398 #define F_F32P         "f"
399 #define F_F32I       "%*f"
400 #define F_F64        "%lf"
401 #define F_F64P        "lf"
402 #define F_F64I      "%*lf"
403 #define F_SIZE_T     "%zu"   // Standard typedefs
404 #define F_SIZE_TP     "zu"
405 #define F_SIZE_TI   "%*zu"
406 #define F_OFF_T     F_S64
407 #define F_OFF_TP    F_S64P
408 #define F_OFF_TI    F_S64I
409 
410 
411 #endif  //  TYPES_H
412