1 /*
2 * Copyright (c) 2016 Jean-Michel Merliot
3 * Copyright (c) 2021 The DPS8M Development Team
4 *
5 * All rights reserved.
6 *
7 * This software is made available under the terms of the ICU
8 * License, version 1.8.1 or later. For more details, see the
9 * LICENSE.md file at the top-level directory of this distribution.
10 */
11
12 #ifndef DPS8_MATH128
13 # define DPS8_MATH128
14
15 # ifndef CPPCHECK
16
17 # ifdef TEST_128
18 // gcc -m32 -DTEST_128 -DNEED_128 dps8_math128.c
19 # include <stdbool.h>
20 # include <stdint.h>
21 # include <inttypes.h>
22 # include <stdio.h>
23
24 typedef struct { uint64_t h; uint64_t l; } uint128;
25 typedef struct { int64_t h; uint64_t l; } int128;
26
27 typedef uint128 word72;
28 typedef int128 word72s;
29 typedef uint128 word73;
30 typedef uint128 word74;
31
32 # define construct_128(h, l) ((uint128_t) { (h), (l) })
33 # define construct_s128(h, l) ((int128) { (h), (l) })
34
35 # define MASK63 0x7FFFFFFFFFFFFFFF
36 # define MASK64 0xFFFFFFFFFFFFFFFF
37 # define SIGN64 ((uint64_t)1U << 63)
38 # else
39
40 # include "dps8.h"
41 # endif
42
43 # endif /* ifndef CPPCHECK */
44
45 # include "dps8_math128.h"
46
47 # ifdef NEED_128
48
iszero_128(uint128 w)49 bool iszero_128 (uint128 w)
50 {
51 if (w.h || w.l)
52 return false;
53 return true;
54 }
55
isnonzero_128(uint128 w)56 bool isnonzero_128 (uint128 w)
57 {
58 if (w.h || w.l)
59 return true;
60 return false;
61 }
62
iseq_128(uint128 a,uint128 b)63 bool iseq_128 (uint128 a, uint128 b)
64 {
65 return a.h == b.h && a.l == b.l;
66 }
67
isgt_128(uint128 a,uint128 b)68 bool isgt_128 (uint128 a, uint128 b)
69 {
70 if (a.h > b.h) return true;
71 if (a.h < b.h) return false;
72 if (a.l > b.l) return true;
73 return false;
74 }
75
islt_128(uint128 a,uint128 b)76 bool islt_128 (uint128 a, uint128 b)
77 {
78 if (a.h < b.h) return true;
79 if (a.h > b.h) return false;
80 if (a.l < b.l) return true;
81 return false;
82 }
83
isge_128(uint128 a,uint128 b)84 bool isge_128 (uint128 a, uint128 b)
85 {
86 if (a.h > b.h) return true;
87 if (a.h < b.h) return false;
88 if (a.l >= b.l) return true;
89 return false;
90 }
91
islt_s128(int128 a,int128 b)92 bool islt_s128 (int128 a, int128 b)
93 {
94 if (a.h < b.h) return true;
95 if (a.h > b.h) return false;
96 if (a.l < b.l) return true;
97 return false;
98 }
99
isgt_s128(int128 a,int128 b)100 bool isgt_s128 (int128 a, int128 b)
101 {
102 if (a.h > b.h) return true;
103 if (a.h < b.h) return false;
104 if (a.l > b.l) return true;
105 return false;
106 }
107
and_128(uint128 a,uint128 b)108 uint128 and_128 (uint128 a, uint128 b)
109 {
110 return (uint128) {a.h & b.h, a.l & b.l};
111 }
112
and_s128(int128 a,uint128 b)113 int128 and_s128 (int128 a, uint128 b)
114 {
115 return (int128) {a.h & (int64_t)b.h, a.l & b.l};
116 }
117
or_128(uint128 a,uint128 b)118 uint128 or_128 (uint128 a, uint128 b)
119 {
120 return (uint128) {a.h | b.h, a.l | b.l};
121 }
122
xor_128(uint128 a,uint128 b)123 uint128 xor_128 (uint128 a, uint128 b)
124 {
125 return (uint128) {a.h ^ b.h, a.l ^ b.l};
126 }
127
complement_128(uint128 a)128 uint128 complement_128 (uint128 a)
129 {
130 return (uint128) {~ a.h, ~ a.l};
131 }
132
add_128(uint128 a,uint128 b)133 uint128 add_128 (uint128 a, uint128 b)
134 {
135 // To do carry detection from low to high, bust the low into 1 bit/63
136 // bit fields; add the 63 bit fields checking for carry in the "sign"
137 // bit; add the 1 bit fields plus that carry
138
139 uint64_t al63 = a.l & MASK63; // low 63 bits of a
140 uint64_t bl63 = b.l & MASK63; // low 63 bits of b
141 uint64_t l63 = al63 + bl63; // low 63 bits of a + b, with carry into bit 64
142 uint64_t c63 = l63 & SIGN64; // the carry out of low 63 a + b
143 l63 &= MASK63; // lose the carry bit
144
145 unsigned int al64 = (a.l >> 63) & 1; // bit 64 of a
146 unsigned int bl64 = (b.l >> 63) & 1; // bit 64 of b
147 unsigned int cl64 = (c63 >> 63) & 1; // the carry out of bit 63
148 uint64_t l64 = al64 + bl64 + cl64; // bit 64 a + b + carry in
149 unsigned int c64 = l64 > 1 ? 1 : 0; // carry out of bit 64
150 l64 = (l64 & 1) << 63; // put bit 64 in the right place
151 l64 |= l63; // put low 63 bits in
152 uint64_t h64 = a.h + b.h + c64; // compute the high
153 return construct_128 (h64, l64);
154 }
155
subtract_128(uint128 a,uint128 b)156 uint128 subtract_128 (uint128 a, uint128 b)
157 {
158 //printf ("sub a %016llx %016llx\n", a.h, a.l);
159 //printf ("sub b %016llx %016llx\n", b.h, b.l);
160 bool borrow = !! (b.l > a.l);
161 uint128 res = construct_128 (a.h - b.h, a.l - b.l);
162 if (borrow)
163 res.h --;
164 //printf ("sub res %016llx %016llx\n", res.h, res.l);
165 return res;
166 }
167
negate_128(uint128 a)168 uint128 negate_128 (uint128 a)
169 {
170 return add_128 (complement_128 (a), construct_128 (0, 1));
171 }
172
negate_s128(int128 a)173 int128 negate_s128 (int128 a)
174 {
175 uint128 t = add_128 (complement_128 (cast_128 (a)), construct_128 (0, 1));
176 return cast_s128 (t);
177 }
178
lshift_128(uint128 a,unsigned int n)179 uint128 lshift_128 (uint128 a, unsigned int n)
180 {
181 if (n < 64)
182 {
183 uint64_t nmask = (uint64_t) ((~(MASK64 << n)));
184 //printf ("nmask %016llx\r\n", nmask);
185 // capture the high bits of the low half
186 uint64_t keep = (a.l >> (64 - n)) & nmask;
187 //printf ("keep %016llx\r\n", keep);
188 // shift the low half
189 uint64_t l = a.l << n;
190 //printf ("l %016llx\r\n", l);
191 // shift the high half
192 uint64_t h = a.h << n;
193 //printf ("h %016llx\r\n", h);
194 // put the bits from the low into the high
195 h |= keep;
196 //printf ("h|keep %016llx\r\n", h);
197 return construct_128 (h, l);
198 }
199 uint64_t h = a.l << (n - 64);
200 return construct_128 (h, 0);
201 }
202
lshift_s128(int128 a,unsigned int n)203 int128 lshift_s128 (int128 a, unsigned int n)
204 {
205 uint128 t = lshift_128 (cast_128 (a), n);
206 return cast_s128 (t);
207 }
208
rshift_128(uint128 a,unsigned int n)209 uint128 rshift_128 (uint128 a, unsigned int n)
210 {
211 # if 0
212 uint64_t sign = a.h & SIGN64;
213 if (n < 64)
214 {
215 uint64_t nmask = (uint64_t) ((~(-1 << n)));
216 // capture the low bits of the high half
217 uint64_t keep = (a.h & nmask) << (64 - n);
218 // shift the low half
219 uint64_t l = a.l >> n;
220 // shifting zeros in from on high
221 l &= (uint64_t) (-(1 << (64 - n)));
222 // put in the bits from the high half
223 l |= keep;
224 // shift the high half
225 uint64_t h = a.h >> n;
226 if (sign)
227 // extending the signbit
228 l |= nmask << (64 - n);
229 else
230 // shifting zeros from on high
231 l &= (uint64_t) (-(1 << (64 - n)));
232 return construct_128 (h, l);
233 }
234 if (n == 64)
235 {
236 if (sign)
237 return construct_128 (MASK64, a.h);
238 return construct_128 (0, a.h);
239 }
240 uint64_t nmask = (uint64_t) ((1LLU << (n - 65)) - 1);
241 printf ("nmask %016llx\n", nmask);
242 uint64_t l = a.h >> (n - 64);
243 printf ("l %016llx\n", l);
244 uint64_t h = sign ? MASK64 : 0;
245 printf ("h %016llx\n", h);
246 if (sign)
247 {
248 // extending the signbit
249 l |= nmask << (64 - n);
250 }
251 else
252 {
253 // shifting zeros from on high
254 l &= (uint64_t) (~(-1 << (64 - n)));
255 }
256 printf ("l2 %016llx\n", l);
257 # endif
258
259 uint64_t h = a.h;
260 uint64_t l = a.l;
261 uint64_t sign = a.h & SIGN64;
262 while (n)
263 {
264 uint64_t b = (h & 1) ? SIGN64 : 0;
265 h >>= 1;
266 h |= sign;
267 l >>= 1;
268 l &= MASK63;
269 l |= b;
270 n --;
271 }
272 return construct_128 (h, l);
273 }
274
rshift_s128(int128 a,unsigned int n)275 int128 rshift_s128 (int128 a, unsigned int n)
276 {
277 uint128 t = rshift_128 (cast_128 (a), n);
278 return cast_s128 (t);
279 }
280
281
282 // See: http://www.icodeguru.com/Embedded/Hacker's-Delight/
283
mulmn(uint32_t w[],uint32_t u[],uint32_t v[],int m,int n)284 static void mulmn (uint32_t w[], uint32_t u[],
285 uint32_t v[], int m, int n)
286 {
287 //for (int i = m - 1; i >= 0; i --) fprintf (stderr, "%08x", u [i]);
288 //fprintf (stderr, " ");
289 //for (int i = n - 1; i >= 0; i --) fprintf (stderr, "%08x", v [i]);
290 //fprintf (stderr, "\n");
291 uint64_t k, t;
292 int i, j;
293
294 for (i = 0; i < m; i++)
295 w[i] = 0;
296
297 for (j = 0; j < n; j++)
298 {
299 k = 0;
300 for (i = 0; i < m; i++)
301 {
302 t = (uint64_t) u[i] * (uint64_t) v[j] + (uint64_t) w[i + j] + k;
303 //printf ("%d %d %016llx\n",i, j, t);
304 w[i + j] = (uint32_t) t; // (I.e., t & 0xFFFF).
305 k = t >> 32;
306 }
307 w[j + m] = (uint32_t) k;
308 }
309 }
310
mulmns(uint32_t w[],uint32_t u[],uint32_t v[],int m,int n)311 static void mulmns (uint32_t w[], uint32_t u[],
312 uint32_t v[], int m, int n)
313 {
314 mulmn (w, u, v, m, n);
315
316 // Now w[] has the unsigned product. Correct by
317 // subtracting v*2**16m if u < 0, and
318 // subtracting u*2**16n if v < 0.
319
320 if ((int32_t)u[m - 1] < 0)
321 {
322 int b = 0; // Initialize borrow.
323 for (int j = 0; j < n; j++)
324 {
325 int t = (int) w[j + m] - (int) v[j] - b;
326 w[j + m] = (uint32_t) t;
327 b = t >> 31;
328 }
329 }
330 if ((int32_t)v[n - 1] < 0)
331 {
332 int b = 0;
333 for (int i = 0; i < m; i++)
334 {
335 int t = (int) w[i + n] - (int) u[i] - b;
336 w[i + n] = (uint32_t) t;
337 b = t >> 31;
338 }
339 }
340 return;
341 }
342
nlz(unsigned x)343 static int32_t nlz (unsigned x)
344 {
345 unsigned y;
346 int n;
347
348 n = 32;
349 y = x >>16; if (y != 0) {n = n -16;x = y;}
350 y = x >> 8; if (y != 0) {n = n - 8;x = y;}
351 y = x >> 4; if (y != 0) {n = n - 4;x = y;}
352 y = x >> 2; if (y != 0) {n = n - 2;x = y;}
353 y = x >> 1; if (y != 0) return n - 2;
354 return n - (int) x;
355 }
356
divmnu(uint16_t q[],uint16_t r[],const uint16_t u[],const uint16_t v[],int m,int n)357 int divmnu (uint16_t q[], uint16_t r[],
358 const uint16_t u[], const uint16_t v[],
359 int m, int n)
360 {
361
362 const uint32_t b = 65536; // Number base (16 bits).
363 //uint16_t *un, *vn; // Normalized form of u, v.
364 unsigned qhat; // Estimated quotient digit.
365 unsigned rhat; // A remainder.
366 unsigned p; // Product of two digits.
367 int s, i, j, t, k;
368
369 if (m < n || n <= 0 || v[n-1] == 0)
370 return 1; // Return if invalid param.
371
372 // Take care of the case of a single-digit span
373 if (n == 1)
374 {
375 k = 0;
376 for (j = m - 1; j >= 0; j--)
377 {
378 q[j] = (uint16_t) (((unsigned int) k*b + u[j])/v[0]); // divisor here.
379 k = (int) (((unsigned int) k*b + u[j]) - q[j]*v[0]);
380 }
381 if (r != NULL) r[0] = (uint16_t) k;
382 return 0;
383 }
384
385 // Normalize by shifting v left just enough so that
386 // its high-order bit is on, and shift u left the
387 // same amount. We may have to append a high-order
388 // digit on the dividend; we do that unconditionally.
389
390 s = nlz (v[n-1]) - 16; // 0 <= s <= 16.
391 //vn = (uint16_t *) alloca (2*n);
392 uint16_t vn [n];
393 for (i = n - 1; i > 0; i--)
394 vn[i] = (uint16_t) (v[i] << s) | (uint16_t) (v[i-1] >> (16-s));
395 vn[0] = (uint16_t) (v[0] << s);
396
397 //un = (uint16_t *)alloca(2*(m + 1));
398 uint16_t un [m+1];
399 un[m] = u[m-1] >> (16-s);
400 for (i = m - 1; i > 0; i--)
401 un[i] = (uint16_t) (u[i] << s) | (uint16_t) (u[i-1] >> (16-s));
402 un[0] = (uint16_t) (u[0] << s);
403 for (j = m - n; j >= 0; j--)
404 { // Main loop.
405 // Compute estimate qhat of q[j].
406 qhat = (un[j+n]*b + un[j+n-1])/vn[n-1];
407 rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1];
408 again:
409 if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2])
410 {
411 qhat = qhat - 1;
412 rhat = rhat + vn[n-1];
413 if (rhat < b) goto again;
414 }
415
416 // Multiply and subtract.
417 k = 0;
418 for (i = 0; i < n; i++)
419 {
420 p = qhat*vn[i];
421 t = (int32_t) un[i+j] - k - (int32_t) (p & 0xFFFF);
422 un[i+j] = (uint16_t) t;
423 k = (int) (p >> 16) - (t >> 16);
424 }
425 t = un[j+n] - k;
426 un[j+n] = (uint16_t) t;
427
428 q[j] = (uint16_t) qhat; // Store quotient digit.
429 if (t < 0)
430 { // If we subtracted too
431 q[j] = q[j] - 1; // much, add back.
432 k = 0;
433 for (i = 0; i < n; i++)
434 {
435 t = un[i+j] + vn[i] + k;
436 un[i+j] = (uint16_t) t;
437 k = t >> 16;
438 }
439 un[j+n] = (uint16_t) (un[j+n] + k);
440 }
441 } // End j.
442 // If the caller wants the remainder, unnormalize
443 // it and pass it back.
444 if (r != NULL)
445 {
446 for (i = 0; i < n; i++)
447 r[i] = (uint16_t) (un[i] >> s) | (uint16_t) (un[i+1] << (16-s));
448 }
449 return 0;
450 }
451
multiply_128(uint128 a,uint128 b)452 uint128 multiply_128 (uint128 a, uint128 b)
453 {
454 //printf ("%016llx%016llx %016llx%016llx\n", a.h,a.l,b.h,b.l);
455 const int l = 4;
456 uint32_t w[l+l], u[l], v[l];
457
458 u[3] = (uint32_t) (a.h >> 32);
459 u[2] = (uint32_t) a.h;
460 u[1] = (uint32_t) (a.l >> 32);
461 u[0] = (uint32_t) a.l;
462 v[3] = (uint32_t) (b.h >> 32);
463 v[2] = (uint32_t) b.h;
464 v[1] = (uint32_t) (b.l >> 32);
465 v[0] = (uint32_t) b.l;
466 mulmn (w, u, v, l, l);
467 return construct_128 (
468 (((uint64_t) w[3]) << 32) | w[2],
469 (((uint64_t) w[1]) << 32) | w[0]);
470 }
471
multiply_s128(int128 a,int128 b)472 int128 multiply_s128 (int128 a, int128 b)
473 {
474 //printf ("%016llx%016llx %016llx%016llx\n", a.h,a.l,b.h,b.l);
475 const int l = 4;
476 uint32_t w[l+l], u[l], v[l];
477
478 u[3] = (uint32_t) (a.h >> 32);
479 u[2] = (uint32_t) a.h;
480 u[1] = (uint32_t) (a.l >> 32);
481 u[0] = (uint32_t) a.l;
482 v[3] = (uint32_t) (b.h >> 32);
483 v[2] = (uint32_t) b.h;
484 v[1] = (uint32_t) (b.l >> 32);
485 v[0] = (uint32_t) b.l;
486 mulmns (w, u, v, l, l);
487 return construct_s128 (
488 (((int64_t) w[3]) << 32) | w[2],
489 (((uint64_t) w[1]) << 32) | w[0]);
490 }
491
492 // Note: divisor is < 2^16
divide_128(uint128 a,uint128 b,uint128 * remp)493 uint128 divide_128 (uint128 a, uint128 b, uint128 * remp)
494 {
495 const int m = 8;
496 const int n = 8;
497 uint16_t q[m], u[m], v[n];
498 u[0] = (uint16_t) a.l;
499 u[1] = (uint16_t) (a.l >> 16);
500 u[2] = (uint16_t) (a.l >> 32);
501 u[3] = (uint16_t) (a.l >> 48);
502 u[4] = (uint16_t) a.h;
503 u[5] = (uint16_t) (a.h >> 16);
504 u[6] = (uint16_t) (a.h >> 32);
505 u[7] = (uint16_t) (a.h >> 48);
506
507 v[0] = (uint16_t) b.l;
508 v[1] = (uint16_t) (b.l >> 16);
509 v[2] = (uint16_t) (b.l >> 32);
510 v[3] = (uint16_t) (b.l >> 48);
511 v[4] = (uint16_t) b.h;
512 v[5] = (uint16_t) (b.h >> 16);
513 v[6] = (uint16_t) (b.h >> 32);
514 v[7] = (uint16_t) (b.h >> 48);
515
516 int normlen;
517 for (normlen = 8; normlen > 0; normlen --)
518 if (v [normlen - 1])
519 break;
520 uint16_t r [8] = { 8 * 0 };
521 divmnu (q, remp ? r : NULL, u, v, m, normlen);
522 if (remp)
523 {
524 * remp = construct_128 (
525 (((uint64_t) r [7]) << 48) |
526 (((uint64_t) r [6]) << 32) |
527 (((uint64_t) r [5]) << 16) |
528 (((uint64_t) r [4]) << 0),
529 (((uint64_t) r [3]) << 48) |
530 (((uint64_t) r [2]) << 32) |
531 (((uint64_t) r [1]) << 16) |
532 (((uint64_t) r [0]) << 0));
533 }
534 return construct_128 (
535 (((uint64_t) q [7]) << 48) |
536 (((uint64_t) q [6]) << 32) |
537 (((uint64_t) q [5]) << 16) |
538 (((uint64_t) q [4]) << 0),
539 (((uint64_t) q [3]) << 48) |
540 (((uint64_t) q [2]) << 32) |
541 (((uint64_t) q [1]) << 16) |
542 (((uint64_t) q [0]) << 0));
543 }
544
545 // Note: divisor is < 2^16
divide_128_16(uint128 a,uint16_t b,uint16_t * remp)546 uint128 divide_128_16 (uint128 a, uint16_t b, uint16_t * remp)
547 {
548 const int m = 8;
549 const int n = 1;
550 uint16_t q[m], u[m], v[n];
551 u[0] = (uint16_t) a.l;
552 u[1] = (uint16_t) (a.l >> 16);
553 u[2] = (uint16_t) (a.l >> 32);
554 u[3] = (uint16_t) (a.l >> 48);
555 u[4] = (uint16_t) a.h;
556 u[5] = (uint16_t) (a.h >> 16);
557 u[6] = (uint16_t) (a.h >> 32);
558 u[7] = (uint16_t) (a.h >> 48);
559
560 v[0] = (uint16_t) b;
561 divmnu (q, remp, u, v, m, n);
562 return construct_128 (
563 (((uint64_t) q [7]) << 48) |
564 (((uint64_t) q [6]) << 32) |
565 (((uint64_t) q [5]) << 16) |
566 (((uint64_t) q [4]) << 0),
567 (((uint64_t) q [3]) << 48) |
568 (((uint64_t) q [2]) << 32) |
569 (((uint64_t) q [1]) << 16) |
570 (((uint64_t) q [0]) << 0));
571 }
572
573 //divide_128_32 (
574 //00000000000000010000000000000000, 00010000) returned
575 //00000000000000000001000000000000, 00000000
576 //divide_128_32 (ffffffffffffffffffffffffffffffff, 00010000) returned f771ffffffffffffffffffffffffffff, 0000ffff
577
578 // Note: divisor is < 2^32 and >= 2^16; i.e. high 16 bits *must* be none zero
divide_128_32(uint128 a,uint32_t b,uint32_t * remp)579 uint128 divide_128_32 (uint128 a, uint32_t b, uint32_t * remp)
580 {
581 const int m = 8;
582 const int n = 2;
583 uint16_t q[m], u[m], v[n], r[2];
584 u[0] = (uint16_t) a.l;
585 u[1] = (uint16_t) (a.l >> 16);
586 u[2] = (uint16_t) (a.l >> 32);
587 u[3] = (uint16_t) (a.l >> 48);
588 u[4] = (uint16_t) a.h;
589 u[5] = (uint16_t) (a.h >> 16);
590 u[6] = (uint16_t) (a.h >> 32);
591 u[7] = (uint16_t) (a.h >> 48);
592
593 v[0] = (uint16_t) b;
594 v[1] = (uint16_t) (b >> 16);
595
596 divmnu (q, remp ? r : NULL, u, v, m, n);
597 if (remp)
598 * remp = r [0] | (uint32_t) r[1] << 16;
599
600 return construct_128 (
601 (((uint64_t) q [7]) << 48) |
602 (((uint64_t) q [6]) << 32) |
603 (((uint64_t) q [5]) << 16) |
604 (((uint64_t) q [4]) << 0),
605 (((uint64_t) q [3]) << 48) |
606 (((uint64_t) q [2]) << 32) |
607 (((uint64_t) q [1]) << 16) |
608 (((uint64_t) q [0]) << 0));
609 }
610
611 # ifdef TEST_128
612
tisz(uint64_t h,uint64_t l,bool expect)613 static void tisz (uint64_t h, uint64_t l, bool expect)
614 {
615 bool r = iszero_128 (construct_128 (h, l));
616 if (r != expect)
617 fprintf (stderr, "iszero_128 (%llu, %llu) returned %u\n", h, l, r);
618 return;
619 }
620
tand(uint64_t ah,uint64_t al,uint64_t bh,uint64_t bl,uint64_t rh,uint64_t rl)621 static void tand (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
622 uint64_t rh, uint64_t rl)
623 {
624 uint128 a = construct_128 (ah, al);
625 uint128 b = construct_128 (bh, bl);
626 uint128 r = and_128 (a, b);
627 if (r.h != rh || r.l != rl)
628 fprintf (stderr, "and_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
629 ah, al, bh, bl, r.h, r.l);
630 }
631
tor(uint64_t ah,uint64_t al,uint64_t bh,uint64_t bl,uint64_t rh,uint64_t rl)632 static void tor (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
633 uint64_t rh, uint64_t rl)
634 {
635 uint128 a = construct_128 (ah, al);
636 uint128 b = construct_128 (bh, bl);
637 uint128 r = or_128 (a, b);
638 if (r.h != rh || r.l != rl)
639 fprintf (stderr, "or_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
640 ah, al, bh, bl, r.h, r.l);
641 }
642
tcomp(uint64_t ah,uint64_t al,uint64_t rh,uint64_t rl)643 static void tcomp (uint64_t ah, uint64_t al,
644 uint64_t rh, uint64_t rl)
645 {
646 uint128 a = construct_128 (ah, al);
647 uint128 r = complement_128 (a);
648 if (r.h != rh || r.l != rl)
649 fprintf (stderr, "complement_128 (%016llx%016llx) returned %016llx%016llx\n",
650 ah, al, r.h, r.l);
651 }
652
tadd(uint64_t ah,uint64_t al,uint64_t bh,uint64_t bl,uint64_t rh,uint64_t rl)653 static void tadd (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
654 uint64_t rh, uint64_t rl)
655 {
656 uint128 a = construct_128 (ah, al);
657 uint128 b = construct_128 (bh, bl);
658 uint128 r = add_128 (a, b);
659 if (r.h != rh || r.l != rl)
660 fprintf (stderr, "add_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
661 ah, al, bh, bl, r.h, r.l);
662 }
663
tsub(uint64_t ah,uint64_t al,uint64_t bh,uint64_t bl,uint64_t rh,uint64_t rl)664 static void tsub (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
665 uint64_t rh, uint64_t rl)
666 {
667 uint128 a = construct_128 (ah, al);
668 uint128 b = construct_128 (bh, bl);
669 uint128 r = subtract_128 (a, b);
670 if (r.h != rh || r.l != rl)
671 fprintf (stderr, "subtract_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
672 ah, al, bh, bl, r.h, r.l);
673 }
674
tneg(uint64_t ah,uint64_t al,uint64_t rh,uint64_t rl)675 static void tneg (uint64_t ah, uint64_t al,
676 uint64_t rh, uint64_t rl)
677 {
678 uint128 a = construct_128 (ah, al);
679 uint128 r = negate_128 (a);
680 if (r.h != rh || r.l != rl)
681 fprintf (stderr, "negate_128 (%016llx%016llx) returned %016llx%016llx\n",
682 ah, al, r.h, r.l);
683 }
684
tgt(uint64_t ah,uint64_t al,uint64_t bh,uint64_t bl,bool expect)685 static void tgt (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
686 bool expect)
687 {
688 uint128 a = construct_128 (ah, al);
689 uint128 b = construct_128 (bh, bl);
690 bool r = isgt_128 (a, b);
691 if (r != expect)
692 fprintf (stderr, "gt_128 (%016llx%016llx, %016llx%016llx) returned %u\n",
693 ah, al, bh, bl, r);
694 }
695
tls(uint64_t ah,uint64_t al,unsigned int n,uint64_t rh,uint64_t rl)696 static void tls (uint64_t ah, uint64_t al, unsigned int n,
697 uint64_t rh, uint64_t rl)
698 {
699 uint128 a = construct_128 (ah, al);
700 uint128 r = lshift_128 (a, n);
701 if (r.h != rh || r.l != rl)
702 fprintf (stderr, "lshift_128 (%016llx%016llx, %u) returned %016llx%016llx\n",
703 ah, al, n, r.h, r.l);
704 }
705
trs(uint64_t ah,uint64_t al,unsigned int n,uint64_t rh,uint64_t rl)706 static void trs (uint64_t ah, uint64_t al, unsigned int n,
707 uint64_t rh, uint64_t rl)
708 {
709 uint128 a = construct_128 (ah, al);
710 uint128 r = rshift_128 (a, n);
711 if (r.h != rh || r.l != rl)
712 fprintf (stderr, "rshift_128 (%016llx%016llx, %u) returned %016llx%016llx\n",
713 ah, al, n, r.h, r.l);
714 }
715
tmul(uint64_t ah,uint64_t al,uint64_t bh,uint64_t bl,uint64_t rh,uint64_t rl)716 static void tmul (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
717 uint64_t rh, uint64_t rl)
718 {
719 uint128 a = construct_128 (ah, al);
720 uint128 b = construct_128 (bh, bl);
721 uint128 r = multiply_128 (a, b);
722 if (r.h != rh || r.l != rl)
723 fprintf (stderr, "multiply_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
724 ah, al, bh, bl, r.h, r.l);
725 }
726
tsmul(int64_t ah,uint64_t al,int64_t bh,uint64_t bl,int64_t rh,uint64_t rl)727 static void tsmul (int64_t ah, uint64_t al, int64_t bh, uint64_t bl,
728 int64_t rh, uint64_t rl)
729 {
730 int128 a = construct_s128 (ah, al);
731 int128 b = construct_s128 (bh, bl);
732 int128 r = multiply_s128 (a, b);
733 if (r.h != rh || r.l != rl)
734 fprintf (stderr, "multiply_s128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
735 ah, al, bh, bl, r.h, r.l);
736 }
737
tdiv16(uint64_t ah,uint64_t al,uint16_t b,uint64_t resh,uint64_t resl,uint16_t remainder)738 static void tdiv16 (uint64_t ah, uint64_t al, uint16_t b,
739 uint64_t resh, uint64_t resl,
740 uint16_t remainder)
741 {
742 uint128 a = construct_128 (ah, al);
743 uint16_t rem;
744 uint128 res = divide_128_16 (a, b, & rem);
745 if (res.h != resh || res.l != resl || rem != remainder)
746 fprintf (stderr, "divide_128_16 (%016llx%016llx, %04x) returned %016llx%016llx, %04x\n",
747 ah, al, b, res.h, res.l, rem);
748 }
749
tdiv32(uint64_t ah,uint64_t al,uint32_t b,uint64_t resh,uint64_t resl,uint32_t remainder)750 static void tdiv32 (uint64_t ah, uint64_t al, uint32_t b,
751 uint64_t resh, uint64_t resl,
752 uint32_t remainder)
753 {
754 uint128 a = construct_128 (ah, al);
755 uint32_t rem;
756 uint128 res = divide_128_32 (a, b, & rem);
757 if (res.h != resh || res.l != resl || rem != remainder)
758 fprintf (stderr, "divide_128_32 (%016llx%016llx, %08x) returned %016llx%016llx, %08x\n",
759 ah, al, b, res.h, res.l, rem);
760 }
761
main(int argc,char * argv[])762 int main (int argc, char * argv [])
763 {
764
765 //uint128 x = construct_128 (0, 0);
766 //int128 y;
767 //y = * (int128 *) & x;
768
769 tisz (0, 0, true);
770 tisz (1, 0, false);
771 tisz (0, 1, false);
772 tisz (1, 1, false);
773 tisz (SIGN64, 0, false);
774 tisz (0, SIGN64, false);
775 tisz (SIGN64, SIGN64, false);
776
777 tand (0, 0, 0, 0, 0, 0);
778 tand (MASK64, MASK64, 0, 0, 0, 0);
779 tand (0, 0, MASK64, MASK64, 0, 0);
780 tand (MASK64, MASK64, MASK64, MASK64, MASK64, MASK64);
781
782 tor (0, 0, 0, 0, 0, 0);
783 tor (MASK64, MASK64, 0, 0, MASK64, MASK64);
784 tor (0, 0, MASK64, MASK64, MASK64, MASK64);
785 tor (MASK64, MASK64, MASK64, MASK64, MASK64, MASK64);
786
787 tcomp (MASK64, MASK64, 0, 0);
788 tcomp (0, 0, MASK64, MASK64);
789 tcomp (0, 1, MASK64, MASK64 - 1);
790
791 tadd (0, 0, 0, 0, 0, 0);
792 tadd (0, 0, 0, 1, 0, 1);
793 tadd (0, 1, 0, 0, 0, 1);
794 tadd (0, 1, 0, 1, 0, 2);
795 tadd (0, 1, 0, MASK64, 1, 0);
796 tadd (0, 1, MASK64, MASK64, 0, 0);
797
798 tsub (0, 0, 0, 0, 0, 0);
799 tsub (0, 1, 0, 1, 0, 0);
800 tsub (MASK64, MASK64, MASK64, MASK64, 0, 0);
801 tsub (MASK64, MASK64, 0, 0, MASK64, MASK64);
802 tsub (0, 0, 0, 1, MASK64, MASK64);
803
804
805 tneg (0, 0, 0, 0);
806 tneg (0, 1, MASK64, MASK64);
807 tneg (MASK64, MASK64, 0, 1);
808
809 tgt (0, 0, 0, 0, false);
810 tgt (0, 0, 0, 1, false);
811 tgt (0, 1, 0, 0, true);
812 tgt (MASK64, MASK64, MASK64, MASK64, false);
813 tgt (0, 0, MASK64, MASK64, false);
814 tgt (MASK64, MASK64, 0, 0, true);
815
816 tls (0, 0, 0, 0, 0);
817 tls (MASK64, MASK64, 0, MASK64, MASK64);
818 tls (0, 1, 127, SIGN64, 0);
819 tls (0, MASK64, 64, MASK64, 0);
820 tls (0, MASK64, 1, 1, MASK64 - 1);
821 tls (0, 1, 64, 1, 0);
822 tls (0, 1, 63, 0, SIGN64);
823 tls (1, 0, 63, SIGN64, 0);
824
825 trs (0, 0, 0, 0, 0);
826 trs (MASK64, MASK64, 0, MASK64, MASK64);
827 trs (SIGN64, 0, 127, MASK64, MASK64);
828 trs (MASK64, 0, 64, MASK64, MASK64);
829 trs (MASK64, 0, 1, MASK64, SIGN64);
830 trs (1, 0, 64, 0, 1);
831 trs (1, 0, 1, 0, SIGN64);
832 trs (SIGN64, 0, 63, MASK64, 0);
833 trs (SIGN64, 0, 64, MASK64, SIGN64);
834
835 tmul (0, 0, 0, 0, 0, 0);
836 tmul (MASK64, MASK64, 0, 0, 0, 0);
837 tmul (0, 0, MASK64, MASK64, 0, 0);
838 tmul (0, 1, 0, 1, 0, 1);
839 tmul (0, 1, 0, 10, 0, 10);
840 tmul (0, 10, 0, 10, 0, 100);
841 tmul (0, 100, 0, 10, 0, 1000);
842 tmul (0, MASK64, 0, 2, 1, MASK64-1);
843 //printf ("%016llx\n", (uint64_t) -1ll * (uint64_t) -1ll);
844 //printf ("%016llx\n", (int64_t) -1ll * (int64_t) -1ll);
845 tmul (MASK64, MASK64, MASK64, MASK64, 0, 1);
846
847 tsmul (0, 1, MASK64, MASK64, MASK64, MASK64);
848 tsmul (MASK64, MASK64, MASK64, MASK64, 0, 1);
849
850
851 tdiv16 (0, 1, 1, 0, 1, 0);
852 tdiv16 (0, 10, 2, 0, 5, 0);
853 tdiv16 (MASK64, MASK64, 16, MASK64>>4, MASK64, 15);
854 tdiv16 (0, 3, 2, 0, 1, 1);
855
856 tdiv32 (1, 0, 1 << 16, 0, 1ll << 48, 0);
857 tdiv32 (MASK64, MASK64, 1 << 16, MASK64 >> 16, MASK64, 0xffff);
858 return 0;
859 }
860 # endif
861
862 # else
863
864 # if (__SIZEOF_LONG__ < 8) && ( !defined(__MINGW64__) || !defined(__MINGW32__) )
865
866 # include "dps8_math128.h"
867
868 void __udivmodti3(UTItype div, UTItype dvd,UTItype *result,UTItype *remain);
869 UTItype __udivti3(UTItype div, UTItype dvd);
870 UTItype __udivti3(UTItype div, UTItype dvd);
871
872 UTItype __umodti3(UTItype div, UTItype dvd);
873
__udivti3(UTItype div,UTItype dvd)874 UTItype __udivti3(UTItype div, UTItype dvd)
875 {
876 UTItype result,remain;
877
878 __udivmodti3(div,dvd,&result,&remain);
879
880 return result;
881 }
882
__udivmodti3(UTItype div,UTItype dvd,UTItype * result,UTItype * remain)883 void __udivmodti3(UTItype div, UTItype dvd,UTItype *result,UTItype *remain)
884 {
885 UTItype z1 = dvd;
886 UTItype z2 = (UTItype)1;
887
888 *result = (UTItype)0;
889 *remain = div;
890
891 if( z1 == (UTItype)0)
892 1/0;
893
894 while( z1 < *remain )
895 {
896 z1 <<= 1 ;
897 z2 <<= 1;
898 }
899
900 do
901 {
902 if( *remain >= z1 )
903 {
904 *remain -= z1;
905 *result += z2;
906 }
907 z1 >>= 1;
908 z2 >>= 1;
909 } while( z2 );
910
911 }
912
__divti3(TItype div,TItype dvd)913 TItype __divti3(TItype div, TItype dvd)
914 {
915 int sign=1;
916
917 if (div < (TItype)0)
918 {
919 sign = -1;
920 div = -div;
921 }
922
923 if (dvd < (TItype)0)
924 {
925 sign = -sign;
926 dvd = -dvd;
927 }
928
929 if (sign > 0)
930 return (TItype)__udivti3(div,dvd);
931 else
932 return -((TItype)__udivti3(div,dvd));
933 }
934
__modti3(TItype div,TItype dvd)935 TItype __modti3(TItype div, TItype dvd)
936 {
937 int sign=1;
938
939 if (div < (TItype)0)
940 {
941 sign = -1;
942 div = -div;
943 }
944
945 if (dvd < (TItype)0)
946 {
947 sign = -sign;
948 dvd = -dvd;
949 }
950
951 if (sign > 0)
952 return (TItype)__umodti3(div,dvd);
953 else
954 return ((TItype)0-(TItype)__umodti3(div,dvd));
955 }
956
__umodti3(UTItype div,UTItype dvd)957 UTItype __umodti3(UTItype div, UTItype dvd)
958 {
959 UTItype result,remain;
960
961 __udivmodti3(div,dvd,&result,&remain);
962
963 return remain;
964 }
965
__multi3(TItype u,TItype v)966 TItype __multi3 (TItype u, TItype v)
967 {
968 TItype result = (TItype)0;
969 int sign = 1;
970
971 if(u<0)
972 {
973 sign = -1;
974 u = -u;
975 }
976
977 while (u!=(TItype)0)
978 {
979 if( u&(TItype)1 )
980 result += v;
981 u>>=1;
982 v<<=1;
983 }
984
985 if ( sign < 0 )
986 return -result;
987 else
988 return result;
989
990 }
991 # endif
992 # endif
993
994 #endif
995