1 /*
2  * Copyright (c) 2016 Jean-Michel Merliot
3  * Copyright (c) 2021 The DPS8M Development Team
4  *
5  * All rights reserved.
6  *
7  * This software is made available under the terms of the ICU
8  * License, version 1.8.1 or later.  For more details, see the
9  * LICENSE.md file at the top-level directory of this distribution.
10  */
11 
12 #ifndef DPS8_MATH128
13 # define DPS8_MATH128
14 
15 # ifndef CPPCHECK
16 
17 #  ifdef TEST_128
18 // gcc -m32 -DTEST_128 -DNEED_128 dps8_math128.c
19 #   include <stdbool.h>
20 #   include <stdint.h>
21 #   include <inttypes.h>
22 #   include <stdio.h>
23 
24 typedef struct { uint64_t h; uint64_t l; } uint128;
25 typedef struct { int64_t h; uint64_t l; } int128;
26 
27 typedef uint128 word72;
28 typedef int128  word72s;
29 typedef uint128 word73;
30 typedef uint128 word74;
31 
32 #   define construct_128(h, l) ((uint128_t) { (h), (l) })
33 #   define construct_s128(h, l) ((int128) { (h), (l) })
34 
35 #   define MASK63          0x7FFFFFFFFFFFFFFF
36 #   define MASK64          0xFFFFFFFFFFFFFFFF
37 #   define SIGN64          ((uint64_t)1U << 63)
38 #  else
39 
40 #   include "dps8.h"
41 #  endif
42 
43 # endif /* ifndef CPPCHECK */
44 
45 # include "dps8_math128.h"
46 
47 # ifdef NEED_128
48 
iszero_128(uint128 w)49 bool iszero_128 (uint128 w)
50   {
51     if (w.h || w.l)
52       return false;
53     return true;
54   }
55 
isnonzero_128(uint128 w)56 bool isnonzero_128 (uint128 w)
57   {
58     if (w.h || w.l)
59       return true;
60     return false;
61   }
62 
iseq_128(uint128 a,uint128 b)63 bool iseq_128 (uint128 a, uint128 b)
64   {
65     return a.h == b.h && a.l == b.l;
66   }
67 
isgt_128(uint128 a,uint128 b)68 bool isgt_128 (uint128 a, uint128 b)
69   {
70     if (a.h > b.h) return true;
71     if (a.h < b.h) return false;
72     if (a.l > b.l) return true;
73     return false;
74   }
75 
islt_128(uint128 a,uint128 b)76 bool islt_128 (uint128 a, uint128 b)
77   {
78     if (a.h < b.h) return true;
79     if (a.h > b.h) return false;
80     if (a.l < b.l) return true;
81     return false;
82   }
83 
isge_128(uint128 a,uint128 b)84 bool isge_128 (uint128 a, uint128 b)
85   {
86     if (a.h > b.h) return true;
87     if (a.h < b.h) return false;
88     if (a.l >= b.l) return true;
89     return false;
90   }
91 
islt_s128(int128 a,int128 b)92 bool islt_s128 (int128 a, int128 b)
93   {
94     if (a.h < b.h) return true;
95     if (a.h > b.h) return false;
96     if (a.l < b.l) return true;
97     return false;
98   }
99 
isgt_s128(int128 a,int128 b)100 bool isgt_s128 (int128 a, int128 b)
101   {
102     if (a.h > b.h) return true;
103     if (a.h < b.h) return false;
104     if (a.l > b.l) return true;
105     return false;
106   }
107 
and_128(uint128 a,uint128 b)108 uint128 and_128 (uint128 a, uint128 b)
109   {
110     return (uint128) {a.h & b.h, a.l & b.l};
111   }
112 
and_s128(int128 a,uint128 b)113 int128 and_s128 (int128 a, uint128 b)
114   {
115     return (int128) {a.h & (int64_t)b.h, a.l & b.l};
116   }
117 
or_128(uint128 a,uint128 b)118 uint128 or_128 (uint128 a, uint128 b)
119   {
120     return (uint128) {a.h | b.h, a.l | b.l};
121   }
122 
xor_128(uint128 a,uint128 b)123 uint128 xor_128 (uint128 a, uint128 b)
124   {
125     return (uint128) {a.h ^ b.h, a.l ^ b.l};
126   }
127 
complement_128(uint128 a)128 uint128 complement_128 (uint128 a)
129   {
130     return (uint128) {~ a.h, ~ a.l};
131   }
132 
add_128(uint128 a,uint128 b)133 uint128 add_128 (uint128 a, uint128 b)
134   {
135 // To do carry detection from low to high, bust the low into 1 bit/63
136 // bit fields; add the 63 bit fields checking for carry in the "sign"
137 // bit; add the 1 bit fields plus that carry
138 
139     uint64_t al63 = a.l & MASK63;  // low 63 bits of a
140     uint64_t bl63 = b.l & MASK63;  // low 63 bits of b
141     uint64_t l63 = al63 + bl63;    // low 63 bits of a + b, with carry into bit 64
142     uint64_t c63 = l63 & SIGN64;   // the carry out of low 63 a + b
143     l63 &= MASK63;               // lose the carry bit
144 
145     unsigned int al64 = (a.l >> 63) & 1; // bit 64 of a
146     unsigned int bl64 = (b.l >> 63) & 1; // bit 64 of b
147     unsigned int cl64 = (c63 >> 63) & 1; // the carry out of bit 63
148     uint64_t l64 = al64 + bl64 + cl64; // bit 64 a + b + carry in
149     unsigned int c64 = l64 > 1 ? 1 : 0;    // carry out of bit 64
150     l64 = (l64 & 1) << 63;         // put bit 64 in the right place
151     l64 |= l63;                    // put low 63 bits in
152     uint64_t h64 = a.h + b.h + c64;    // compute the high
153     return construct_128 (h64, l64);
154   }
155 
subtract_128(uint128 a,uint128 b)156 uint128 subtract_128 (uint128 a, uint128 b)
157   {
158 //printf ("sub a %016llx %016llx\n", a.h, a.l);
159 //printf ("sub b %016llx %016llx\n", b.h, b.l);
160     bool borrow = !! (b.l > a.l);
161     uint128 res = construct_128 (a.h - b.h, a.l - b.l);
162     if (borrow)
163       res.h --;
164 //printf ("sub res %016llx %016llx\n", res.h, res.l);
165     return res;
166   }
167 
negate_128(uint128 a)168 uint128 negate_128 (uint128 a)
169   {
170     return add_128 (complement_128 (a), construct_128 (0, 1));
171   }
172 
negate_s128(int128 a)173 int128 negate_s128 (int128 a)
174   {
175     uint128 t = add_128 (complement_128 (cast_128 (a)), construct_128 (0, 1));
176     return cast_s128 (t);
177   }
178 
lshift_128(uint128 a,unsigned int n)179 uint128 lshift_128 (uint128 a, unsigned int n)
180   {
181     if (n < 64)
182       {
183         uint64_t nmask = (uint64_t) ((~(MASK64 << n)));
184 //printf ("nmask %016llx\r\n", nmask);
185         // capture the high bits of the low half
186         uint64_t keep = (a.l >> (64 - n)) & nmask;
187 //printf ("keep %016llx\r\n", keep);
188         // shift the low half
189         uint64_t l = a.l << n;
190 //printf ("l %016llx\r\n", l);
191         // shift the high half
192         uint64_t h = a.h << n;
193 //printf ("h %016llx\r\n", h);
194         // put the bits from the low into the high
195         h |= keep;
196 //printf ("h|keep %016llx\r\n", h);
197         return construct_128 (h, l);
198       }
199     uint64_t h = a.l << (n - 64);
200     return construct_128 (h, 0);
201   }
202 
lshift_s128(int128 a,unsigned int n)203 int128 lshift_s128 (int128 a, unsigned int n)
204   {
205     uint128 t = lshift_128 (cast_128 (a), n);
206     return cast_s128 (t);
207   }
208 
rshift_128(uint128 a,unsigned int n)209 uint128 rshift_128 (uint128 a, unsigned int n)
210   {
211 #  if 0
212     uint64_t sign = a.h & SIGN64;
213     if (n < 64)
214       {
215         uint64_t nmask = (uint64_t) ((~(-1 << n)));
216         // capture the low bits of the high half
217         uint64_t keep = (a.h & nmask) << (64 - n);
218         // shift the low half
219         uint64_t l = a.l >> n;
220         // shifting zeros in from on high
221         l &= (uint64_t) (-(1 << (64 - n)));
222         // put in the bits from the high half
223         l |= keep;
224         // shift the high half
225         uint64_t h = a.h >> n;
226         if (sign)
227           // extending the signbit
228           l |= nmask << (64 - n);
229         else
230           // shifting zeros from on high
231           l &= (uint64_t) (-(1 << (64 - n)));
232         return construct_128 (h, l);
233       }
234     if (n == 64)
235       {
236         if (sign)
237           return construct_128 (MASK64, a.h);
238         return construct_128 (0, a.h);
239       }
240     uint64_t nmask = (uint64_t) ((1LLU << (n - 65)) - 1);
241 printf ("nmask %016llx\n", nmask);
242     uint64_t l = a.h >> (n - 64);
243 printf ("l %016llx\n", l);
244     uint64_t h = sign ? MASK64 : 0;
245 printf ("h %016llx\n", h);
246     if (sign)
247       {
248         // extending the signbit
249         l |= nmask << (64 - n);
250       }
251     else
252       {
253         // shifting zeros from on high
254         l &= (uint64_t) (~(-1 << (64 - n)));
255       }
256 printf ("l2 %016llx\n", l);
257 #  endif
258 
259     uint64_t h = a.h;
260     uint64_t l = a.l;
261     uint64_t sign = a.h & SIGN64;
262     while (n)
263       {
264         uint64_t b = (h & 1) ? SIGN64 : 0;
265         h >>= 1;
266         h |= sign;
267         l >>= 1;
268         l &= MASK63;
269         l |= b;
270         n --;
271       }
272     return construct_128 (h, l);
273   }
274 
rshift_s128(int128 a,unsigned int n)275 int128 rshift_s128 (int128 a, unsigned int n)
276   {
277     uint128 t = rshift_128 (cast_128 (a), n);
278     return cast_s128 (t);
279   }
280 
281 
282 // See: http://www.icodeguru.com/Embedded/Hacker's-Delight/
283 
mulmn(uint32_t w[],uint32_t u[],uint32_t v[],int m,int n)284 static void mulmn (uint32_t w[], uint32_t u[],
285                    uint32_t v[], int m, int n)
286   {
287 //for (int i = m - 1; i >= 0; i --) fprintf (stderr, "%08x", u [i]);
288 //fprintf (stderr, "  ");
289 //for (int i = n - 1; i >= 0; i --) fprintf (stderr, "%08x", v [i]);
290 //fprintf (stderr, "\n");
291     uint64_t k, t;
292     int i, j;
293 
294     for (i = 0; i < m; i++)
295        w[i] = 0;
296 
297     for (j = 0; j < n; j++)
298       {
299         k = 0;
300         for (i = 0; i < m; i++)
301           {
302             t = (uint64_t) u[i] * (uint64_t) v[j] + (uint64_t) w[i + j] + k;
303 //printf ("%d %d %016llx\n",i, j, t);
304             w[i + j] = (uint32_t) t;        // (I.e., t & 0xFFFF).
305             k = t >> 32;
306           }
307         w[j + m] = (uint32_t) k;
308       }
309   }
310 
mulmns(uint32_t w[],uint32_t u[],uint32_t v[],int m,int n)311 static void mulmns (uint32_t w[], uint32_t u[],
312                     uint32_t v[], int m, int n)
313   {
314     mulmn (w, u, v, m, n);
315 
316     // Now w[] has the unsigned product. Correct by
317     // subtracting v*2**16m if u < 0, and
318     // subtracting u*2**16n if v < 0.
319 
320     if ((int32_t)u[m - 1] < 0)
321       {
322         int b = 0;                  // Initialize borrow.
323         for (int j = 0; j < n; j++)
324           {
325             int t = (int) w[j + m] - (int) v[j] - b;
326             w[j + m] = (uint32_t) t;
327             b = t >> 31;
328           }
329       }
330     if ((int32_t)v[n - 1] < 0)
331       {
332         int b = 0;
333         for (int i = 0; i < m; i++)
334           {
335             int t = (int) w[i + n] - (int) u[i] - b;
336             w[i + n] = (uint32_t) t;
337             b = t >> 31;
338           }
339       }
340     return;
341   }
342 
nlz(unsigned x)343 static int32_t nlz (unsigned x)
344   {
345     unsigned y;
346     int n;
347 
348     n = 32;
349     y = x >>16; if (y != 0) {n = n -16;x = y;}
350     y = x >> 8; if (y != 0) {n = n - 8;x = y;}
351     y = x >> 4; if (y != 0) {n = n - 4;x = y;}
352     y = x >> 2; if (y != 0) {n = n - 2;x = y;}
353     y = x >> 1; if (y != 0) return n - 2;
354     return n - (int) x;
355  }
356 
divmnu(uint16_t q[],uint16_t r[],const uint16_t u[],const uint16_t v[],int m,int n)357 int divmnu (uint16_t q[], uint16_t r[],
358             const uint16_t u[], const uint16_t v[],
359             int m, int n)
360   {
361 
362     const uint32_t b = 65536; // Number base (16 bits).
363     //uint16_t *un, *vn;        // Normalized form of u, v.
364     unsigned qhat;            // Estimated quotient digit.
365     unsigned rhat;            // A remainder.
366     unsigned p;               // Product of two digits.
367     int s, i, j, t, k;
368 
369     if (m < n || n <= 0 || v[n-1] == 0)
370       return 1;               // Return if invalid param.
371 
372     // Take care of the case of a single-digit span
373     if (n == 1)
374       {
375         k = 0;
376         for (j = m - 1; j >= 0; j--)
377           {
378             q[j] = (uint16_t) (((unsigned int) k*b + u[j])/v[0]);    // divisor here.
379             k = (int) (((unsigned int) k*b + u[j]) - q[j]*v[0]);
380           }
381         if (r != NULL) r[0] = (uint16_t) k;
382         return 0;
383       }
384 
385     // Normalize by shifting v left just enough so that
386     // its high-order bit is on, and shift u left the
387     // same amount. We may have to append a high-order
388     // digit on the dividend; we do that unconditionally.
389 
390     s = nlz (v[n-1]) - 16;      // 0 <= s <= 16.
391     //vn = (uint16_t *) alloca (2*n);
392     uint16_t vn [n];
393     for (i = n - 1; i > 0; i--)
394       vn[i] = (uint16_t) (v[i] << s) | (uint16_t) (v[i-1] >> (16-s));
395     vn[0] = (uint16_t) (v[0] << s);
396 
397     //un = (uint16_t *)alloca(2*(m + 1));
398     uint16_t un [m+1];
399     un[m] = u[m-1] >> (16-s);
400     for (i = m - 1; i > 0; i--)
401       un[i] = (uint16_t) (u[i] << s) | (uint16_t) (u[i-1] >> (16-s));
402     un[0] = (uint16_t) (u[0] << s);
403     for (j = m - n; j >= 0; j--)
404       {       // Main loop.
405         // Compute estimate qhat of q[j].
406         qhat = (un[j+n]*b + un[j+n-1])/vn[n-1];
407         rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1];
408 again:
409         if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2])
410           {
411             qhat = qhat - 1;
412             rhat = rhat + vn[n-1];
413             if (rhat < b) goto again;
414           }
415 
416         // Multiply and subtract.
417         k = 0;
418         for (i = 0; i < n; i++)
419           {
420             p = qhat*vn[i];
421             t = (int32_t) un[i+j] - k - (int32_t) (p & 0xFFFF);
422             un[i+j] = (uint16_t) t;
423             k = (int) (p >> 16) - (t >> 16);
424           }
425         t = un[j+n] - k;
426         un[j+n] = (uint16_t) t;
427 
428         q[j] = (uint16_t) qhat;            // Store quotient digit.
429         if (t < 0)
430           {            // If we subtracted too
431             q[j] = q[j] - 1;       // much, add back.
432             k = 0;
433             for (i = 0; i < n; i++)
434               {
435                 t = un[i+j] + vn[i] + k;
436                 un[i+j] = (uint16_t) t;
437                 k = t >> 16;
438                }
439              un[j+n] = (uint16_t) (un[j+n] + k);
440           }
441       } // End j.
442     // If the caller wants the remainder, unnormalize
443     // it and pass it back.
444     if (r != NULL)
445       {
446         for (i = 0; i < n; i++)
447           r[i] = (uint16_t) (un[i] >> s) | (uint16_t) (un[i+1] << (16-s));
448       }
449     return 0;
450   }
451 
multiply_128(uint128 a,uint128 b)452 uint128 multiply_128 (uint128 a, uint128 b)
453   {
454 //printf ("%016llx%016llx %016llx%016llx\n", a.h,a.l,b.h,b.l);
455     const int l = 4;
456     uint32_t w[l+l], u[l], v[l];
457 
458     u[3] = (uint32_t) (a.h >> 32);
459     u[2] = (uint32_t) a.h;
460     u[1] = (uint32_t) (a.l >> 32);
461     u[0] = (uint32_t) a.l;
462     v[3] = (uint32_t) (b.h >> 32);
463     v[2] = (uint32_t) b.h;
464     v[1] = (uint32_t) (b.l >> 32);
465     v[0] = (uint32_t) b.l;
466     mulmn (w, u, v, l, l);
467     return construct_128 (
468        (((uint64_t) w[3]) << 32) | w[2],
469        (((uint64_t) w[1]) << 32) | w[0]);
470   }
471 
multiply_s128(int128 a,int128 b)472 int128 multiply_s128 (int128 a, int128 b)
473   {
474 //printf ("%016llx%016llx %016llx%016llx\n", a.h,a.l,b.h,b.l);
475     const int l = 4;
476     uint32_t w[l+l], u[l], v[l];
477 
478     u[3] = (uint32_t) (a.h >> 32);
479     u[2] = (uint32_t) a.h;
480     u[1] = (uint32_t) (a.l >> 32);
481     u[0] = (uint32_t) a.l;
482     v[3] = (uint32_t) (b.h >> 32);
483     v[2] = (uint32_t) b.h;
484     v[1] = (uint32_t) (b.l >> 32);
485     v[0] = (uint32_t) b.l;
486     mulmns (w, u, v, l, l);
487     return construct_s128 (
488        (((int64_t) w[3]) << 32) | w[2],
489        (((uint64_t) w[1]) << 32) | w[0]);
490   }
491 
492 // Note: divisor is < 2^16
divide_128(uint128 a,uint128 b,uint128 * remp)493 uint128 divide_128 (uint128 a, uint128 b, uint128 * remp)
494   {
495     const int m = 8;
496     const int n = 8;
497     uint16_t q[m], u[m], v[n];
498     u[0] = (uint16_t) a.l;
499     u[1] = (uint16_t) (a.l >> 16);
500     u[2] = (uint16_t) (a.l >> 32);
501     u[3] = (uint16_t) (a.l >> 48);
502     u[4] = (uint16_t) a.h;
503     u[5] = (uint16_t) (a.h >> 16);
504     u[6] = (uint16_t) (a.h >> 32);
505     u[7] = (uint16_t) (a.h >> 48);
506 
507     v[0] = (uint16_t) b.l;
508     v[1] = (uint16_t) (b.l >> 16);
509     v[2] = (uint16_t) (b.l >> 32);
510     v[3] = (uint16_t) (b.l >> 48);
511     v[4] = (uint16_t) b.h;
512     v[5] = (uint16_t) (b.h >> 16);
513     v[6] = (uint16_t) (b.h >> 32);
514     v[7] = (uint16_t) (b.h >> 48);
515 
516     int normlen;
517     for (normlen = 8; normlen > 0; normlen --)
518       if (v [normlen - 1])
519         break;
520     uint16_t r [8] = { 8 * 0 };
521     divmnu (q, remp ? r : NULL, u, v, m, normlen);
522     if (remp)
523       {
524         * remp =  construct_128 (
525        (((uint64_t) r [7]) << 48) |
526        (((uint64_t) r [6]) << 32) |
527        (((uint64_t) r [5]) << 16) |
528        (((uint64_t) r [4]) <<  0),
529        (((uint64_t) r [3]) << 48) |
530        (((uint64_t) r [2]) << 32) |
531        (((uint64_t) r [1]) << 16) |
532        (((uint64_t) r [0]) <<  0));
533       }
534     return construct_128 (
535        (((uint64_t) q [7]) << 48) |
536        (((uint64_t) q [6]) << 32) |
537        (((uint64_t) q [5]) << 16) |
538        (((uint64_t) q [4]) <<  0),
539        (((uint64_t) q [3]) << 48) |
540        (((uint64_t) q [2]) << 32) |
541        (((uint64_t) q [1]) << 16) |
542        (((uint64_t) q [0]) <<  0));
543   }
544 
545 // Note: divisor is < 2^16
divide_128_16(uint128 a,uint16_t b,uint16_t * remp)546 uint128 divide_128_16 (uint128 a, uint16_t b, uint16_t * remp)
547   {
548     const int m = 8;
549     const int n = 1;
550     uint16_t q[m], u[m], v[n];
551     u[0] = (uint16_t) a.l;
552     u[1] = (uint16_t) (a.l >> 16);
553     u[2] = (uint16_t) (a.l >> 32);
554     u[3] = (uint16_t) (a.l >> 48);
555     u[4] = (uint16_t) a.h;
556     u[5] = (uint16_t) (a.h >> 16);
557     u[6] = (uint16_t) (a.h >> 32);
558     u[7] = (uint16_t) (a.h >> 48);
559 
560     v[0] = (uint16_t) b;
561     divmnu (q, remp, u, v, m, n);
562     return construct_128 (
563        (((uint64_t) q [7]) << 48) |
564        (((uint64_t) q [6]) << 32) |
565        (((uint64_t) q [5]) << 16) |
566        (((uint64_t) q [4]) <<  0),
567        (((uint64_t) q [3]) << 48) |
568        (((uint64_t) q [2]) << 32) |
569        (((uint64_t) q [1]) << 16) |
570        (((uint64_t) q [0]) <<  0));
571   }
572 
573 //divide_128_32 (
574 //00000000000000010000000000000000, 00010000) returned
575 //00000000000000000001000000000000, 00000000
576 //divide_128_32 (ffffffffffffffffffffffffffffffff, 00010000) returned f771ffffffffffffffffffffffffffff, 0000ffff
577 
578 // Note: divisor is < 2^32 and >= 2^16; i.e. high 16 bits *must* be none zero
divide_128_32(uint128 a,uint32_t b,uint32_t * remp)579 uint128 divide_128_32 (uint128 a, uint32_t b, uint32_t * remp)
580   {
581     const int m = 8;
582     const int n = 2;
583     uint16_t q[m], u[m], v[n], r[2];
584     u[0] = (uint16_t) a.l;
585     u[1] = (uint16_t) (a.l >> 16);
586     u[2] = (uint16_t) (a.l >> 32);
587     u[3] = (uint16_t) (a.l >> 48);
588     u[4] = (uint16_t) a.h;
589     u[5] = (uint16_t) (a.h >> 16);
590     u[6] = (uint16_t) (a.h >> 32);
591     u[7] = (uint16_t) (a.h >> 48);
592 
593     v[0] = (uint16_t) b;
594     v[1] = (uint16_t) (b >> 16);
595 
596     divmnu (q, remp ? r : NULL, u, v, m, n);
597     if (remp)
598       * remp = r [0] | (uint32_t) r[1] << 16;
599 
600     return construct_128 (
601        (((uint64_t) q [7]) << 48) |
602        (((uint64_t) q [6]) << 32) |
603        (((uint64_t) q [5]) << 16) |
604        (((uint64_t) q [4]) <<  0),
605        (((uint64_t) q [3]) << 48) |
606        (((uint64_t) q [2]) << 32) |
607        (((uint64_t) q [1]) << 16) |
608        (((uint64_t) q [0]) <<  0));
609   }
610 
611 #  ifdef TEST_128
612 
tisz(uint64_t h,uint64_t l,bool expect)613 static void tisz (uint64_t h, uint64_t l, bool expect)
614   {
615     bool r = iszero_128 (construct_128 (h, l));
616     if (r != expect)
617       fprintf (stderr, "iszero_128 (%llu, %llu) returned %u\n", h, l, r);
618     return;
619   }
620 
tand(uint64_t ah,uint64_t al,uint64_t bh,uint64_t bl,uint64_t rh,uint64_t rl)621 static void tand (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
622                   uint64_t rh, uint64_t rl)
623   {
624     uint128 a = construct_128 (ah, al);
625     uint128 b = construct_128 (bh, bl);
626     uint128 r = and_128 (a, b);
627     if (r.h != rh || r.l != rl)
628       fprintf (stderr, "and_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
629                ah, al, bh, bl, r.h, r.l);
630   }
631 
tor(uint64_t ah,uint64_t al,uint64_t bh,uint64_t bl,uint64_t rh,uint64_t rl)632 static void tor (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
633                  uint64_t rh, uint64_t rl)
634   {
635     uint128 a = construct_128 (ah, al);
636     uint128 b = construct_128 (bh, bl);
637     uint128 r = or_128 (a, b);
638     if (r.h != rh || r.l != rl)
639       fprintf (stderr, "or_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
640                ah, al, bh, bl, r.h, r.l);
641   }
642 
tcomp(uint64_t ah,uint64_t al,uint64_t rh,uint64_t rl)643 static void tcomp (uint64_t ah, uint64_t al,
644                    uint64_t rh, uint64_t rl)
645   {
646     uint128 a = construct_128 (ah, al);
647     uint128 r = complement_128 (a);
648     if (r.h != rh || r.l != rl)
649       fprintf (stderr, "complement_128 (%016llx%016llx) returned %016llx%016llx\n",
650                ah, al, r.h, r.l);
651   }
652 
tadd(uint64_t ah,uint64_t al,uint64_t bh,uint64_t bl,uint64_t rh,uint64_t rl)653 static void tadd (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
654                   uint64_t rh, uint64_t rl)
655   {
656     uint128 a = construct_128 (ah, al);
657     uint128 b = construct_128 (bh, bl);
658     uint128 r = add_128 (a, b);
659     if (r.h != rh || r.l != rl)
660       fprintf (stderr, "add_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
661                ah, al, bh, bl, r.h, r.l);
662   }
663 
tsub(uint64_t ah,uint64_t al,uint64_t bh,uint64_t bl,uint64_t rh,uint64_t rl)664 static void tsub (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
665                   uint64_t rh, uint64_t rl)
666   {
667     uint128 a = construct_128 (ah, al);
668     uint128 b = construct_128 (bh, bl);
669     uint128 r = subtract_128 (a, b);
670     if (r.h != rh || r.l != rl)
671       fprintf (stderr, "subtract_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
672                ah, al, bh, bl, r.h, r.l);
673   }
674 
tneg(uint64_t ah,uint64_t al,uint64_t rh,uint64_t rl)675 static void tneg (uint64_t ah, uint64_t al,
676                   uint64_t rh, uint64_t rl)
677   {
678     uint128 a = construct_128 (ah, al);
679     uint128 r = negate_128 (a);
680     if (r.h != rh || r.l != rl)
681       fprintf (stderr, "negate_128 (%016llx%016llx) returned %016llx%016llx\n",
682                ah, al, r.h, r.l);
683   }
684 
tgt(uint64_t ah,uint64_t al,uint64_t bh,uint64_t bl,bool expect)685 static void tgt (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
686                  bool expect)
687   {
688     uint128 a = construct_128 (ah, al);
689     uint128 b = construct_128 (bh, bl);
690     bool r = isgt_128 (a, b);
691     if (r != expect)
692       fprintf (stderr, "gt_128 (%016llx%016llx, %016llx%016llx) returned %u\n",
693                ah, al, bh, bl, r);
694   }
695 
tls(uint64_t ah,uint64_t al,unsigned int n,uint64_t rh,uint64_t rl)696 static void tls (uint64_t ah, uint64_t al, unsigned int n,
697                  uint64_t rh, uint64_t rl)
698   {
699     uint128 a = construct_128 (ah, al);
700     uint128 r = lshift_128 (a, n);
701     if (r.h != rh || r.l != rl)
702       fprintf (stderr, "lshift_128 (%016llx%016llx, %u) returned %016llx%016llx\n",
703                ah, al, n, r.h, r.l);
704   }
705 
trs(uint64_t ah,uint64_t al,unsigned int n,uint64_t rh,uint64_t rl)706 static void trs (uint64_t ah, uint64_t al, unsigned int n,
707                  uint64_t rh, uint64_t rl)
708   {
709     uint128 a = construct_128 (ah, al);
710     uint128 r = rshift_128 (a, n);
711     if (r.h != rh || r.l != rl)
712       fprintf (stderr, "rshift_128 (%016llx%016llx, %u) returned %016llx%016llx\n",
713                ah, al, n, r.h, r.l);
714   }
715 
tmul(uint64_t ah,uint64_t al,uint64_t bh,uint64_t bl,uint64_t rh,uint64_t rl)716 static void tmul (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
717                   uint64_t rh, uint64_t rl)
718   {
719     uint128 a = construct_128 (ah, al);
720     uint128 b = construct_128 (bh, bl);
721     uint128 r = multiply_128 (a, b);
722     if (r.h != rh || r.l != rl)
723       fprintf (stderr, "multiply_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
724                ah, al, bh, bl, r.h, r.l);
725   }
726 
tsmul(int64_t ah,uint64_t al,int64_t bh,uint64_t bl,int64_t rh,uint64_t rl)727 static void tsmul (int64_t ah, uint64_t al, int64_t bh, uint64_t bl,
728                   int64_t rh, uint64_t rl)
729   {
730     int128 a = construct_s128 (ah, al);
731     int128 b = construct_s128 (bh, bl);
732     int128 r = multiply_s128 (a, b);
733     if (r.h != rh || r.l != rl)
734       fprintf (stderr, "multiply_s128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
735                ah, al, bh, bl, r.h, r.l);
736   }
737 
tdiv16(uint64_t ah,uint64_t al,uint16_t b,uint64_t resh,uint64_t resl,uint16_t remainder)738 static void tdiv16 (uint64_t ah, uint64_t al, uint16_t b,
739                     uint64_t resh, uint64_t resl,
740                     uint16_t remainder)
741   {
742     uint128 a = construct_128 (ah, al);
743     uint16_t rem;
744     uint128 res = divide_128_16 (a, b, & rem);
745     if (res.h != resh || res.l != resl || rem != remainder)
746       fprintf (stderr, "divide_128_16 (%016llx%016llx, %04x) returned %016llx%016llx, %04x\n",
747                ah, al, b, res.h, res.l, rem);
748   }
749 
tdiv32(uint64_t ah,uint64_t al,uint32_t b,uint64_t resh,uint64_t resl,uint32_t remainder)750 static void tdiv32 (uint64_t ah, uint64_t al, uint32_t b,
751                   uint64_t resh, uint64_t resl,
752                   uint32_t remainder)
753   {
754     uint128 a = construct_128 (ah, al);
755     uint32_t rem;
756     uint128 res = divide_128_32 (a, b, & rem);
757     if (res.h != resh || res.l != resl || rem != remainder)
758       fprintf (stderr, "divide_128_32 (%016llx%016llx, %08x) returned %016llx%016llx, %08x\n",
759                ah, al, b, res.h, res.l, rem);
760   }
761 
main(int argc,char * argv[])762 int main (int argc, char * argv [])
763   {
764 
765     //uint128 x = construct_128 (0, 0);
766     //int128 y;
767     //y = * (int128 *) & x;
768 
769     tisz (0, 0, true);
770     tisz (1, 0, false);
771     tisz (0, 1, false);
772     tisz (1, 1, false);
773     tisz (SIGN64, 0, false);
774     tisz (0, SIGN64, false);
775     tisz (SIGN64, SIGN64, false);
776 
777     tand (0, 0,              0, 0,              0, 0);
778     tand (MASK64, MASK64,    0, 0,              0, 0);
779     tand (0, 0,              MASK64, MASK64,    0, 0);
780     tand (MASK64, MASK64,    MASK64, MASK64,    MASK64, MASK64);
781 
782     tor (0, 0,              0, 0,              0, 0);
783     tor (MASK64, MASK64,    0, 0,              MASK64, MASK64);
784     tor (0, 0,              MASK64, MASK64,    MASK64, MASK64);
785     tor (MASK64, MASK64,    MASK64, MASK64,    MASK64, MASK64);
786 
787     tcomp (MASK64,   MASK64,   0, 0);
788     tcomp (0, 0,     MASK64,   MASK64);
789     tcomp (0, 1,     MASK64,   MASK64 - 1);
790 
791     tadd (0, 0,      0, 0,             0, 0);
792     tadd (0, 0,      0, 1,             0, 1);
793     tadd (0, 1,      0, 0,             0, 1);
794     tadd (0, 1,      0, 1,             0, 2);
795     tadd (0, 1,      0, MASK64,        1, 0);
796     tadd (0, 1,      MASK64, MASK64,   0, 0);
797 
798     tsub (0, 0,             0, 0,             0, 0);
799     tsub (0, 1,             0, 1,             0, 0);
800     tsub (MASK64, MASK64,   MASK64, MASK64,   0, 0);
801     tsub (MASK64, MASK64,   0, 0,             MASK64, MASK64);
802     tsub (0, 0,             0, 1,             MASK64, MASK64);
803 
804 
805     tneg (0, 0,  0, 0);
806     tneg (0, 1,  MASK64, MASK64);
807     tneg (MASK64, MASK64, 0, 1);
808 
809     tgt (0, 0,              0, 0,              false);
810     tgt (0, 0,              0, 1,              false);
811     tgt (0, 1,              0, 0,              true);
812     tgt (MASK64, MASK64,    MASK64, MASK64,    false);
813     tgt (0, 0,              MASK64, MASK64,    false);
814     tgt (MASK64, MASK64,    0, 0,              true);
815 
816     tls (0, 0,              0,    0, 0);
817     tls (MASK64, MASK64,    0,    MASK64, MASK64);
818     tls (0, 1,            127,    SIGN64, 0);
819     tls (0, MASK64,        64,    MASK64, 0);
820     tls (0, MASK64,         1,    1, MASK64 - 1);
821     tls (0, 1,             64,    1, 0);
822     tls (0, 1,             63,    0, SIGN64);
823     tls (1, 0,             63,    SIGN64, 0);
824 
825     trs (0, 0,              0,    0, 0);
826     trs (MASK64, MASK64,    0,    MASK64, MASK64);
827     trs (SIGN64, 0,       127,    MASK64, MASK64);
828     trs (MASK64, 0,        64,    MASK64, MASK64);
829     trs (MASK64, 0,         1,    MASK64, SIGN64);
830     trs (1, 0,             64,    0, 1);
831     trs (1, 0,              1,    0, SIGN64);
832     trs (SIGN64, 0,        63,    MASK64, 0);
833     trs (SIGN64, 0,        64,    MASK64, SIGN64);
834 
835     tmul (0, 0,            0, 0,            0, 0);
836     tmul (MASK64, MASK64,  0, 0,            0, 0);
837     tmul (0, 0,            MASK64, MASK64,  0, 0);
838     tmul (0, 1,            0, 1,            0, 1);
839     tmul (0, 1,            0, 10,           0, 10);
840     tmul (0, 10,           0, 10,           0, 100);
841     tmul (0, 100,          0, 10,           0, 1000);
842     tmul (0, MASK64,       0, 2,            1, MASK64-1);
843 //printf ("%016llx\n", (uint64_t) -1ll * (uint64_t) -1ll);
844 //printf ("%016llx\n", (int64_t) -1ll * (int64_t) -1ll);
845     tmul (MASK64, MASK64, MASK64, MASK64,   0, 1);
846 
847     tsmul (0, 1, MASK64, MASK64, MASK64, MASK64);
848     tsmul (MASK64, MASK64, MASK64, MASK64, 0, 1);
849 
850 
851     tdiv16 (0, 1,           1,                0, 1,               0);
852     tdiv16 (0, 10,          2,                0, 5,               0);
853     tdiv16 (MASK64, MASK64, 16,               MASK64>>4, MASK64,  15);
854     tdiv16 (0, 3,           2,                0, 1,               1);
855 
856     tdiv32 (1, 0,           1 << 16,          0, 1ll << 48,         0);
857     tdiv32 (MASK64, MASK64, 1 << 16,          MASK64 >> 16, MASK64, 0xffff);
858     return 0;
859   }
860 #  endif
861 
862 # else
863 
864 #  if (__SIZEOF_LONG__ < 8) && ( !defined(__MINGW64__) || !defined(__MINGW32__) )
865 
866 #   include "dps8_math128.h"
867 
868 void __udivmodti3(UTItype div, UTItype dvd,UTItype *result,UTItype *remain);
869 UTItype __udivti3(UTItype div, UTItype dvd);
870 UTItype __udivti3(UTItype div, UTItype dvd);
871 
872 UTItype __umodti3(UTItype div, UTItype dvd);
873 
__udivti3(UTItype div,UTItype dvd)874 UTItype __udivti3(UTItype div, UTItype dvd)
875 {
876         UTItype result,remain;
877 
878         __udivmodti3(div,dvd,&result,&remain);
879 
880         return result;
881 }
882 
__udivmodti3(UTItype div,UTItype dvd,UTItype * result,UTItype * remain)883 void __udivmodti3(UTItype div, UTItype dvd,UTItype *result,UTItype *remain)
884 {
885         UTItype z1 = dvd;
886         UTItype z2 = (UTItype)1;
887 
888         *result = (UTItype)0;
889         *remain = div;
890 
891         if( z1 == (UTItype)0)
892                 1/0;
893 
894         while( z1 < *remain )
895         {
896                 z1 <<= 1 ;
897                 z2 <<= 1;
898         }
899 
900         do
901         {
902                 if( *remain >= z1 )
903                 {
904                         *remain -= z1;
905                         *result += z2;
906                 }
907                 z1 >>= 1;
908                 z2 >>= 1;
909         } while( z2 );
910 
911 }
912 
__divti3(TItype div,TItype dvd)913 TItype __divti3(TItype div, TItype dvd)
914 {
915         int sign=1;
916 
917         if (div < (TItype)0)
918         {
919                 sign = -1;
920                 div = -div;
921         }
922 
923         if (dvd < (TItype)0)
924         {
925                 sign = -sign;
926                 dvd = -dvd;
927         }
928 
929         if (sign > 0)
930                 return (TItype)__udivti3(div,dvd);
931         else
932                 return -((TItype)__udivti3(div,dvd));
933 }
934 
__modti3(TItype div,TItype dvd)935 TItype __modti3(TItype div, TItype dvd)
936 {
937         int sign=1;
938 
939         if (div < (TItype)0)
940         {
941                 sign = -1;
942                 div = -div;
943         }
944 
945         if (dvd < (TItype)0)
946         {
947                 sign = -sign;
948                 dvd = -dvd;
949         }
950 
951         if (sign > 0)
952                 return (TItype)__umodti3(div,dvd);
953         else
954                 return ((TItype)0-(TItype)__umodti3(div,dvd));
955 }
956 
__umodti3(UTItype div,UTItype dvd)957 UTItype __umodti3(UTItype div, UTItype dvd)
958 {
959         UTItype result,remain;
960 
961         __udivmodti3(div,dvd,&result,&remain);
962 
963         return remain;
964 }
965 
__multi3(TItype u,TItype v)966 TItype __multi3 (TItype u, TItype v)
967 {
968         TItype result = (TItype)0;
969         int sign = 1;
970 
971         if(u<0)
972         {
973                 sign = -1;
974                 u = -u;
975         }
976 
977         while (u!=(TItype)0)
978         {
979                 if( u&(TItype)1 )
980                         result += v;
981                 u>>=1;
982                 v<<=1;
983         }
984 
985         if ( sign < 0 )
986                 return -result;
987         else
988                 return result;
989 
990 }
991 #  endif
992 # endif
993 
994 #endif
995