1 //  binutils.h
2 //  ipdb / ipup / geod
3 //
4 //  Created by Dr. Rolf Jansen on 2016-08-13.
5 //  Copyright (c) 2016 projectworld.net. All rights reserved.
6 //
7 //  Redistribution and use in source and binary forms, with or without modification,
8 //  are permitted provided that the following conditions are met:
9 //
10 //  1. Redistributions of source code must retain the above copyright notice,
11 //     this list of conditions and the following disclaimer.
12 //
13 //  2. Redistributions in binary form must reproduce the above copyright notice,
14 //     this list of conditions and the following disclaimer in the documentation
15 //     and/or other materials provided with the distribution.
16 //
17 //  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
18 //  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
19 //  AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER
20 //  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 //  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 //  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
23 //  IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
24 //  THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 
27 #define noerr 0
28 
29 typedef unsigned char uchar;
30 
31 
32 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
33 
34    #define b2_0 0
35    #define b2_1 1
36 
37    #define b4_0 0
38    #define b4_1 1
39    #define b4_2 2
40    #define b4_3 3
41 
42    #define b8_0 0
43    #define b8_1 1
44    #define b8_2 2
45    #define b8_3 3
46    #define b8_4 4
47    #define b8_5 5
48    #define b8_6 6
49    #define b8_7 7
50 
51    #define swapInt16(x) _swapInt16(x)
52    #define swapInt32(x) _swapInt32(x)
53    #define swapInt64(x) _swapInt64(x)
54 
55    #if defined(__i386__) || defined(__x86_64__)
56 
_swapInt16(uint16_t x)57       static inline uint16_t _swapInt16(uint16_t x)
58       {
59          __asm__("rolw $8,%0" : "+q" (x));
60          return x;
61       }
62 
_swapInt32(uint32_t x)63       static inline uint32_t _swapInt32(uint32_t x)
64       {
65          __asm__("bswapl %0" : "+q" (x));
66          return x;
67       }
68 
69    #else
70 
_swapInt16(uint16_t x)71       static inline uint16_t _swapInt16(uint16_t x)
72       {
73          uint16_t z;
74          uint8_t *p = (uint8_t *)&x;
75          uint8_t *q = (uint8_t *)&z;
76          q[0] = p[1];
77          q[1] = p[0];
78          return z;
79       }
80 
_swapInt32(uint32_t x)81       static inline uint32_t _swapInt32(uint32_t x)
82       {
83          uint32_t z;
84          uint8_t *p = (uint8_t *)&x;
85          uint8_t *q = (uint8_t *)&z;
86          q[0] = p[3];
87          q[1] = p[2];
88          q[2] = p[1];
89          q[3] = p[0];
90          return z;
91       }
92 
93    #endif
94 
95    #if defined(__x86_64__)
96 
_swapInt64(uint64_t x)97       static inline uint64_t _swapInt64(uint64_t x)
98       {
99          __asm__("bswapq %0" : "+q" (x));
100          return x;
101       }
102 
103    #else
104 
_swapInt64(uint64_t x)105       static inline uint64_t _swapInt64(uint64_t x)
106       {
107          uint64_t z;
108          uint8_t *p = (uint8_t *)&x;
109          uint8_t *q = (uint8_t *)&z;
110          q[0] = p[7];
111          q[1] = p[6];
112          q[2] = p[5];
113          q[3] = p[4];
114          q[4] = p[3];
115          q[5] = p[2];
116          q[6] = p[1];
117          q[7] = p[0];
118          return z;
119       }
120 
121    #endif
122 
123 #else
124 
125    #define b2_0 1
126    #define b2_1 0
127 
128    #define b4_0 3
129    #define b4_1 2
130    #define b4_2 1
131    #define b4_3 0
132 
133    #define b8_0 7
134    #define b8_1 6
135    #define b8_2 5
136    #define b8_3 4
137    #define b8_4 3
138    #define b8_5 2
139    #define b8_6 1
140    #define b8_7 0
141 
142    #define swapInt16(x) (x)
143    #define swapInt32(x) (x)
144    #define swapInt64(x) (x)
145 
146 #endif
147 
148 
149 #if defined(__x86_64__)
150 
151    #include <x86intrin.h>
152 
153    static const __m128i nul16 = {0x0000000000000000ULL, 0x0000000000000000ULL};  // 16 bytes with nul
154    static const __m128i lfd16 = {0x0A0A0A0A0A0A0A0AULL, 0x0A0A0A0A0A0A0A0AULL};  // 16 bytes with line feed
155    static const __m128i col16 = {0x3A3A3A3A3A3A3A3AULL, 0x3A3A3A3A3A3A3A3AULL};  // 16 bytes with colon ':' limit
156    static const __m128i vtl16 = {0x7C7C7C7C7C7C7C7CULL, 0x7C7C7C7C7C7C7C7CULL};  // 16 bytes with vertical line '|' limit
157    static const __m128i blk16 = {0x2020202020202020ULL, 0x2020202020202020ULL};  // 16 bytes with inner blank limit
158    static const __m128i obl16 = {0x2121212121212121ULL, 0x2121212121212121ULL};  // 16 bytes with outer blank limit
159 
160    // Drop-in replacement for strlen(), utilizing some builtin SSSE3 instructions
strvlen(const char * str)161    static inline int strvlen(const char *str)
162    {
163       if (!str || !*str)
164          return 0;
165 
166       unsigned bmask;
167       if (bmask = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((__m128i *)str), nul16)))
168          return __builtin_ctz(bmask);
169 
170       for (int len = 16 - (intptr_t)str%16;; len += 16)
171          if (bmask = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_load_si128((__m128i *)&str[len]), nul16)))
172             return len + __builtin_ctz(bmask);
173    }
174 
linelen(const char * line)175    static inline int linelen(const char *line)
176    {
177       if (!line || !*line)
178          return 0;
179 
180       unsigned bmask;
181       if (bmask = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((__m128i *)line), nul16))
182                 | (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((__m128i *)line), lfd16)))
183          return __builtin_ctz(bmask);
184 
185       for (int len = 16 - (intptr_t)line%16;; len += 16)
186          if (bmask = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_load_si128((__m128i *)&line[len]), nul16))
187                    | (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_load_si128((__m128i *)&line[len]), lfd16)))
188             return len + __builtin_ctz(bmask);
189    }
190 
taglen(const char * tag)191    static inline int taglen(const char *tag)
192    {
193       if (!tag || !*tag)
194          return 0;
195 
196       unsigned bmask;
197       if (bmask = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((__m128i *)tag), nul16))
198                 | (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((__m128i *)tag), col16)))
199          return __builtin_ctz(bmask);
200 
201       for (int len = 16 - (intptr_t)tag%16;; len += 16)
202          if (bmask = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_load_si128((__m128i *)&tag[len]), nul16))
203                    | (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_load_si128((__m128i *)&tag[len]), col16)))
204             return len + __builtin_ctz(bmask);
205    }
206 
fieldlen(const char * field)207    static inline int fieldlen(const char *field)
208    {
209       if (!field || !*field)
210          return 0;
211 
212       unsigned bmask;
213       if (bmask = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((__m128i *)field), nul16))
214                 | (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((__m128i *)field), vtl16)))
215          return __builtin_ctz(bmask);
216 
217       for (int len = 16 - (intptr_t)field%16;; len += 16)
218          if (bmask = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_load_si128((__m128i *)&field[len]), nul16))
219                    | (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_load_si128((__m128i *)&field[len]), vtl16)))
220             return len + __builtin_ctz(bmask);
221    }
222 
wordlen(const char * word)223    static inline int wordlen(const char *word)
224    {
225       if (!word || !*word)
226          return 0;
227 
228       unsigned bmask;
229       if (bmask = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(blk16, _mm_max_epu8(blk16, _mm_loadu_si128((__m128i *)word)))))
230          return __builtin_ctz(bmask);      // ^^^^^^^ unsigned comparison (a >= b) is identical to a == maxu(a, b) ^^^^^^^
231 
232       for (int len = 16 - (intptr_t)word%16;; len += 16)
233          if (bmask = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(blk16, _mm_max_epu8(blk16, _mm_load_si128((__m128i *)&word[len])))))
234             return len + __builtin_ctz(bmask);
235    }
236 
blanklen(const char * blank)237    static inline int blanklen(const char *blank)
238    {
239       if (!blank || !*blank)
240          return 0;
241 
242       unsigned bmask;
243       if (bmask = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((__m128i *)blank), nul16))
244                 | (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(obl16, _mm_min_epu8(obl16, _mm_loadu_si128((__m128i *)blank)))))
245          return __builtin_ctz(bmask);      // ^^^^^^^ unsigned comparison (a <= b) is identical to a == minu(a, b) ^^^^^^^
246 
247       for (int len = 16 - (intptr_t)blank%16;; len += 16)
248          if (bmask = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_load_si128((__m128i *)&blank[len]), nul16))
249                    | (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(obl16, _mm_min_epu8(obl16, _mm_load_si128((__m128i *)&blank[len])))))
250             return len + __builtin_ctz(bmask);
251    }
252 
253 
254    // String copying from src to dst.
255    // m: Max. capacity of dst, including the final nul.
256    //    A value of 0 would indicate that the capacity of dst matches the size of src (including nul)
257    // l: On entry, src length or 0, on exit, the length of src, maybe NULL
258    // Returns the length of the resulting string in dst.
strmlcpy(char * dst,const char * src,int m,int * l)259    static inline int strmlcpy(char *dst, const char *src, int m, int *l)
260    {
261       int k, n;
262 
263       if (l)
264       {
265          if (!*l)
266             *l = strvlen(src);
267          k = *l;
268       }
269       else
270          k = strvlen(src);
271 
272       if (!m)
273          n = k;
274       else
275          n = (k < m) ? k : m-1;
276 
277       switch (n)
278       {
279          default:
280             if ((intptr_t)dst&0xF || (intptr_t)src&0xF)
281                for (k = 0; k  < n>>4<<1; k += 2)
282                   ((uint64_t *)dst)[k] = ((uint64_t *)src)[k], ((uint64_t *)dst)[k+1] = ((uint64_t *)src)[k+1];
283             else
284                for (k = 0; k  < n>>4; k++)
285                   _mm_store_si128(&((__m128i *)dst)[k], _mm_load_si128(&((__m128i *)src)[k]));
286          case 8 ... 15:
287             if ((k = n>>4<<1) < n>>3)
288                ((uint64_t *)dst)[k] = ((uint64_t *)src)[k];
289          case 4 ... 7:
290             if ((k = n>>3<<1) < n>>2)
291                ((uint32_t *)dst)[k] = ((uint32_t *)src)[k];
292          case 2 ... 3:
293             if ((k = n>>2<<1) < n>>1)
294                ((uint16_t *)dst)[k] = ((uint16_t *)src)[k];
295          case 1:
296             if ((k = n>>1<<1) < n)
297                dst[k] = src[k];
298          case 0:
299             ;
300       }
301 
302       dst[n] = '\0';
303       return n;
304    }
305 
306 #else
307 
308    #define strvlen(s) strlen(s)
309 
linelen(const char * line)310    static inline int linelen(const char *line)
311    {
312       if (!line || !*line)
313          return 0;
314 
315       int l;
316       for (l = 0; line[l] && line[l] != '\n'; l++)
317          ;
318       return l;
319    }
320 
taglen(const char * tag)321    static inline int taglen(const char *tag)
322    {
323       if (!tag || !*tag)
324          return 0;
325 
326       int l;
327       for (l = 0; tag[l] && tag[l] != ':'; l++)
328          ;
329       return l;
330    }
331 
fieldlen(const char * field)332    static inline int fieldlen(const char *field)
333    {
334       if (!field || !*field)
335          return 0;
336 
337       int l;
338       for (l = 0; field[l] && field[l] != '|'; l++)
339          ;
340       return l;
341    }
342 
wordlen(const char * word)343    static inline int wordlen(const char *word)
344    {
345       if (!word || !*word)
346          return 0;
347 
348       int l;
349       for (l = 0; (uchar)word[l] > ' '; l++)
350          ;
351       return l;
352    }
353 
blanklen(const char * blank)354    static inline int blanklen(const char *blank)
355    {
356       if (!blank || !*blank)
357          return 0;
358 
359       int l;
360       for (l = 0; blank[l] && (uchar)blank[l] <= ' '; l++)
361          ;
362       return l;
363    }
364 
365 
366    // String copying from src to dst.
367    // m: Max. capacity of dst, including the final nul.
368    //    A value of 0 would indicate that the capacity of dst matches the size of src (including nul)
369    // l: On entry, src length or 0, on exit, the length of src, maybe NULL
370    // Returns the length of the resulting string in dst.
strmlcpy(char * dst,const char * src,int m,int * l)371    static inline int strmlcpy(char *dst, const char *src, int m, int *l)
372    {
373       int k, n;
374 
375       if (l)
376       {
377          if (!*l)
378             *l = (int)strlen(src);
379          k = *l;
380       }
381       else
382          k = (int)strlen(src);
383 
384       if (!m)
385          n = k;
386       else
387          n = (k < m) ? k : m-1;
388 
389       strlcpy(dst, src, n+1);
390       return n;
391    }
392 
393 #endif
394 
395 
396 // forward skip white space  !!! s MUST NOT be NULL !!!
skip(char * s)397 static inline char *skip(char *s)
398 {
399    for (;;)
400       switch (*s)
401       {
402          case '\t'...'\r':
403          case ' ':
404             s++;
405             break;
406 
407          default:
408             return s;
409       }
410 }
411 
412 // backward skip white space  !!! s MUST NOT be NULL !!!
bskip(char * s)413 static inline char *bskip(char *s)
414 {
415    for (;;)
416       switch (*--s)
417       {
418          case '\t'...'\r':
419          case ' ':
420             break;
421 
422          default:
423             return s+1;
424       }
425 }
426 
trim(char * s)427 static inline char *trim(char *s)
428 {
429    *bskip(s+strvlen(s)) = '\0';
430    return skip(s);
431 }
432 
433 
lowercase(char * s,int n)434 static inline char *lowercase(char *s, int n)
435 {
436    if (s)
437    {
438       char c, *p = s;
439       for (int i = 0; i < n && (c = *p); i++)
440          if ('A' <= c && c <= 'Z')
441             *p++ = c + 0x20;
442          else
443             p++;
444    }
445    return s;
446 }
447 
uppercase(char * s,int n)448 static inline char *uppercase(char *s, int n)
449 {
450    if (s)
451    {
452       char c, *p = s;
453       for (int i = 0; i < n && (c = *p); i++)
454          if ('a' <= c && c <= 'z')
455             *p++ = c - 0x20;
456          else
457             p++;
458    }
459    return s;
460 }
461 
462 
463 #pragma mark ••• Fencing Memory Allocation Wrappers •••
464 
465 // void pointer reference
466 #define VPR(p) (void **)&(p)
467 
468 typedef struct
469 {
470    ssize_t size;
471    size_t  check;
472    char    payload[16];
473 // size_t  zerowall;       // the allocation routines allocate sizeof(size_t) extra space and set this to zero
474 } allocation;
475 
476 #define allocationMetaSize (offsetof(allocation, payload) - offsetof(allocation, size))
477 
478 extern ssize_t gAllocationTotal;
479 
480 void *allocate(ssize_t size, bool cleanout);
481 void *reallocate(void *p, ssize_t size, bool cleanout, bool free_on_error);
482 void deallocate(void **p, bool cleanout);
483 void deallocate_batch(unsigned cleanout, ...);
484 
485 
486 #pragma mark ••• uint128 Arithmetic •••
487 
488 typedef struct
489 {
490    uint64_t quad[2];
491 } uint128s;
492 
493 #if ((defined(__x86_64__) || defined(__arm64__))) && !defined(UInt128_Testing)
494 
495    typedef __uint128_t uint128t;
496 
497    #define u64_to_u128t(u) ((uint128t)((uint64_t)(u)))
498 
eq_u128(uint128t a,uint128t b)499    static inline bool eq_u128(uint128t a, uint128t b)
500    {
501       return a == b;
502    }
503 
lt_u128(uint128t a,uint128t b)504    static inline bool lt_u128(uint128t a, uint128t b)
505    {
506       return a < b;
507    }
508 
le_u128(uint128t a,uint128t b)509    static inline bool le_u128(uint128t a, uint128t b)
510    {
511       return a <= b;
512    }
513 
gt_u128(uint128t a,uint128t b)514    static inline bool gt_u128(uint128t a, uint128t b)
515    {
516       return a > b;
517    }
518 
ge_u128(uint128t a,uint128t b)519    static inline bool ge_u128(uint128t a, uint128t b)
520    {
521       return a >= b;
522    }
523 
shl_u128(uint128t a,uint32_t n)524    static inline uint128t shl_u128(uint128t a, uint32_t n)
525    {
526       return a << n;
527    }
528 
shr_u128(uint128t a,uint32_t n)529    static inline uint128t shr_u128(uint128t a, uint32_t n)
530    {
531       return a >> n;
532    }
533 
inc_u128(uint128t * a)534    static inline uint128t inc_u128(uint128t *a)
535    {
536       return ++(*a);
537    }
538 
dec_u128(uint128t * a)539    static inline uint128t dec_u128(uint128t *a)
540    {
541       return --(*a);
542    }
543 
add_u128(uint128t a,uint128t b)544    static inline uint128t add_u128(uint128t a, uint128t b)
545    {
546       return a + b;
547    }
548 
sub_u128(uint128t a,uint128t b)549    static inline uint128t sub_u128(uint128t a, uint128t b)
550    {
551       return a - b;
552    }
553 
mul_u128(uint128t a,uint128t b)554    static inline uint128t mul_u128(uint128t a, uint128t b)
555    {
556       return a * b;
557    }
558 
div_u128(uint128t a,uint128t b)559    static inline uint128t div_u128(uint128t a, uint128t b)
560    {
561       return a / b;
562    }
563 
rem_u128(uint128t a,uint128t b)564    static inline uint128t rem_u128(uint128t a, uint128t b)
565    {
566       return a % b;
567    }
568 
569 #else
570 
571    typedef uint128s uint128t;
572 
573    #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
574       #define u64_to_u128t(u) ((uint128t){(uint64_t)(u), 0})
575    #else
576       #define u64_to_u128t(u) ((uint128t){0, (uint64_t)(u)})
577    #endif
578 
eq_u128(uint128t a,uint128t b)579    static inline bool eq_u128(uint128t a, uint128t b)
580    {
581       return (a.quad[b2_1] == b.quad[b2_1] && a.quad[b2_0] == b.quad[b2_0]);
582    }
583 
lt_u128(uint128t a,uint128t b)584    static inline bool lt_u128(uint128t a, uint128t b)
585    {
586       return (a.quad[b2_1] == b.quad[b2_1])
587             ? a.quad[b2_0] < b.quad[b2_0]
588             : a.quad[b2_1] < b.quad[b2_1];
589    }
590 
le_u128(uint128t a,uint128t b)591    static inline bool le_u128(uint128t a, uint128t b)
592    {
593       if (a.quad[b2_1] == b.quad[b2_1])
594          if (a.quad[b2_0] == b.quad[b2_0])
595             return true;
596          else
597             return a.quad[b2_0] < b.quad[b2_0];
598       else
599          return a.quad[b2_1] < b.quad[b2_1];
600    }
601 
gt_u128(uint128t a,uint128t b)602    static inline bool gt_u128(uint128t a, uint128t b)
603    {
604       return (a.quad[b2_1] == b.quad[b2_1])
605             ? a.quad[b2_0] > b.quad[b2_0]
606             : a.quad[b2_1] > b.quad[b2_1];
607    }
608 
ge_u128(uint128t a,uint128t b)609    static inline bool ge_u128(uint128t a, uint128t b)
610    {
611       if (a.quad[b2_1] == b.quad[b2_1])
612          if (a.quad[b2_0] == b.quad[b2_0])
613             return true;
614          else
615             return a.quad[b2_0] > b.quad[b2_0];
616       else
617          return a.quad[b2_1] > b.quad[b2_1];
618    }
619 
shl_u128(uint128t a,uint32_t n)620    static inline uint128t shl_u128(uint128t a, uint32_t n)
621    {
622       if (n &= 0x7F)
623       {
624          if (n > 64)
625             a.quad[b2_1] = a.quad[b2_0] << (n - 64),                         a.quad[b2_0] = 0;
626          else if (n < 64)
627             a.quad[b2_1] = (a.quad[b2_1] << n) | (a.quad[b2_0] >> (64 - n)), a.quad[b2_0] = a.quad[b2_0] << n;
628          else // (n == 64)
629             a.quad[b2_1] = a.quad[b2_0],                                     a.quad[b2_0] = 0;
630       }
631       return a;
632    }
633 
shr_u128(uint128t a,uint32_t n)634    static inline uint128t shr_u128(uint128t a, uint32_t n)
635    {
636       if (n &= 0x7F)
637       {
638          if (n > 64)
639             a.quad[b2_0] = a.quad[b2_1] >> (n - 64),                         a.quad[b2_1] = 0;
640          else if (n < 64)
641             a.quad[b2_0] = (a.quad[b2_0] >> n) | (a.quad[b2_1] << (64 - n)), a.quad[b2_1] = a.quad[b2_1] >> n;
642          else // (n == 64)
643             a.quad[b2_0] = a.quad[b2_1],                                     a.quad[b2_1] = 0;
644       }
645       return a;
646    }
647 
inc_u128(uint128t * a)648    static inline uint128t inc_u128(uint128t *a)
649    {
650       if (++(a->quad[b2_0]) == 0)
651          (a->quad[b2_1])++;
652       return *a;
653    }
654 
dec_u128(uint128t * a)655    static inline uint128t dec_u128(uint128t *a)
656    {
657       if ((a->quad[b2_0])-- == 0)
658          (a->quad[b2_1])--;
659       return *a;
660    }
661 
add_u128(uint128t a,uint128t b)662    static inline uint128t add_u128(uint128t a, uint128t b)
663    {
664       uint64_t c = a.quad[b2_0];
665       a.quad[b2_0] += b.quad[b2_0];
666       a.quad[b2_1] += b.quad[b2_1] + (a.quad[b2_0] < c);
667       return a;
668    }
669 
sub_u128(uint128t a,uint128t b)670    static inline uint128t sub_u128(uint128t a, uint128t b)
671    {
672       uint64_t c = a.quad[b2_0];
673       a.quad[b2_0] -= b.quad[b2_0];
674       a.quad[b2_1] -= b.quad[b2_1] + (a.quad[b2_0] > c);
675       return a;
676    }
677 
678    uint128t mul_u128(uint128t a, uint128t b);
679 
680    void divrem_u128(uint128t a, uint128t b, uint128t *q, uint128t *r);
div_u128(uint128t a,uint128t b)681    static inline uint128t div_u128(uint128t a, uint128t b)
682    {
683       divrem_u128(a, b, &a, NULL);
684       return a;
685    }
686 
rem_u128(uint128t a,uint128t b)687    static inline uint128t rem_u128(uint128t a, uint128t b)
688    {
689       divrem_u128(a, b, NULL, &a);
690       return a;
691    }
692 
693 #endif
694