1 /* ------------------------------------------------------------------ */
2 /* decBasic.c -- common base code for Basic decimal types */
3 /* ------------------------------------------------------------------ */
4 /* Copyright (c) IBM Corporation, 2000, 2010. All rights reserved. */
5 /* */
6 /* This software is made available under the terms of the */
7 /* ICU License -- ICU 1.8.1 and later. */
8 /* */
9 /* The description and User's Guide ("The decNumber C Library") for */
10 /* this software is included in the package as decNumber.pdf. This */
11 /* document is also available in HTML, together with specifications, */
12 /* testcases, and Web links, on the General Decimal Arithmetic page. */
13 /* */
14 /* Please send comments, suggestions, and corrections to the author: */
15 /* mfc@uk.ibm.com */
16 /* Mike Cowlishaw, IBM Fellow */
17 /* IBM UK, PO Box 31, Birmingham Road, Warwick CV34 5JL, UK */
18 /* ------------------------------------------------------------------ */
19 /* This module comprises code that is shared between decDouble and */
20 /* decQuad (but not decSingle). The main arithmetic operations are */
21 /* here (Add, Subtract, Multiply, FMA, and Division operators). */
22 /* */
23 /* Unlike decNumber, parameterization takes place at compile time */
24 /* rather than at runtime. The parameters are set in the decDouble.c */
25 /* (etc.) files, which then include this one to produce the compiled */
26 /* code. The functions here, therefore, are code shared between */
27 /* multiple formats. */
28 /* */
29 /* This must be included after decCommon.c. */
30 /* ------------------------------------------------------------------ */
31 // Names here refer to decFloat rather than to decDouble, etc., and
32 // the functions are in strict alphabetical order.
33
34 // The compile-time flags SINGLE, DOUBLE, and QUAD are set up in
35 // decCommon.c
36 #if !defined(QUAD)
37 #error decBasic.c must be included after decCommon.c
38 #endif
39 #if SINGLE
40 #error Routines in decBasic.c are for decDouble and decQuad only
41 #endif
42
43 /* Private constants */
44 #define DIVIDE 0x80000000 // Divide operations [as flags]
45 #define REMAINDER 0x40000000 // ..
46 #define DIVIDEINT 0x20000000 // ..
47 #define REMNEAR 0x10000000 // ..
48
49 /* Private functions (local, used only by routines in this module) */
50 static decFloat *decDivide(decFloat *, const decFloat *,
51 const decFloat *, decContext *, uInt);
52 static decFloat *decCanonical(decFloat *, const decFloat *);
53 static void decFiniteMultiply(bcdnum *, uByte *, const decFloat *,
54 const decFloat *);
55 static decFloat *decInfinity(decFloat *, const decFloat *);
56 static decFloat *decInvalid(decFloat *, decContext *);
57 static decFloat *decNaNs(decFloat *, const decFloat *, const decFloat *,
58 decContext *);
59 static Int decNumCompare(const decFloat *, const decFloat *, Flag);
60 static decFloat *decToIntegral(decFloat *, const decFloat *, decContext *,
61 enum rounding, Flag);
62 static uInt decToInt32(const decFloat *, decContext *, enum rounding,
63 Flag, Flag);
64
65 /* ------------------------------------------------------------------ */
66 /* decCanonical -- copy a decFloat, making canonical */
67 /* */
68 /* result gets the canonicalized df */
69 /* df is the decFloat to copy and make canonical */
70 /* returns result */
71 /* */
72 /* This is exposed via decFloatCanonical for Double and Quad only. */
73 /* This works on specials, too; no error or exception is possible. */
74 /* ------------------------------------------------------------------ */
decCanonical(decFloat * result,const decFloat * df)75 static decFloat * decCanonical(decFloat *result, const decFloat *df) {
76 uInt encode, precode, dpd; // work
77 uInt inword, uoff, canon; // ..
78 Int n; // counter (down)
79 if (df!=result) *result=*df; // effect copy if needed
80 if (DFISSPECIAL(result)) {
81 if (DFISINF(result)) return decInfinity(result, df); // clean Infinity
82 // is a NaN
83 DFWORD(result, 0)&=~ECONNANMASK; // clear ECON except selector
84 if (DFISCCZERO(df)) return result; // coefficient continuation is 0
85 // drop through to check payload
86 }
87 // return quickly if the coefficient continuation is canonical
88 { // declare block
89 #if DOUBLE
90 uInt sourhi=DFWORD(df, 0);
91 uInt sourlo=DFWORD(df, 1);
92 if (CANONDPDOFF(sourhi, 8)
93 && CANONDPDTWO(sourhi, sourlo, 30)
94 && CANONDPDOFF(sourlo, 20)
95 && CANONDPDOFF(sourlo, 10)
96 && CANONDPDOFF(sourlo, 0)) return result;
97 #elif QUAD
98 uInt sourhi=DFWORD(df, 0);
99 uInt sourmh=DFWORD(df, 1);
100 uInt sourml=DFWORD(df, 2);
101 uInt sourlo=DFWORD(df, 3);
102 if (CANONDPDOFF(sourhi, 4)
103 && CANONDPDTWO(sourhi, sourmh, 26)
104 && CANONDPDOFF(sourmh, 16)
105 && CANONDPDOFF(sourmh, 6)
106 && CANONDPDTWO(sourmh, sourml, 28)
107 && CANONDPDOFF(sourml, 18)
108 && CANONDPDOFF(sourml, 8)
109 && CANONDPDTWO(sourml, sourlo, 30)
110 && CANONDPDOFF(sourlo, 20)
111 && CANONDPDOFF(sourlo, 10)
112 && CANONDPDOFF(sourlo, 0)) return result;
113 #endif
114 } // block
115
116 // Loop to repair a non-canonical coefficent, as needed
117 inword=DECWORDS-1; // current input word
118 uoff=0; // bit offset of declet
119 encode=DFWORD(result, inword);
120 for (n=DECLETS-1; n>=0; n--) { // count down declets of 10 bits
121 dpd=encode>>uoff;
122 uoff+=10;
123 if (uoff>32) { // crossed uInt boundary
124 inword--;
125 encode=DFWORD(result, inword);
126 uoff-=32;
127 dpd|=encode<<(10-uoff); // get pending bits
128 }
129 dpd&=0x3ff; // clear uninteresting bits
130 if (dpd<0x16e) continue; // must be canonical
131 canon=BIN2DPD[DPD2BIN[dpd]]; // determine canonical declet
132 if (canon==dpd) continue; // have canonical declet
133 // need to replace declet
134 if (uoff>=10) { // all within current word
135 encode&=~(0x3ff<<(uoff-10)); // clear the 10 bits ready for replace
136 encode|=canon<<(uoff-10); // insert the canonical form
137 DFWORD(result, inword)=encode; // .. and save
138 continue;
139 }
140 // straddled words
141 precode=DFWORD(result, inword+1); // get previous
142 precode&=0xffffffff>>(10-uoff); // clear top bits
143 DFWORD(result, inword+1)=precode|(canon<<(32-(10-uoff)));
144 encode&=0xffffffff<<uoff; // clear bottom bits
145 encode|=canon>>(10-uoff); // insert canonical
146 DFWORD(result, inword)=encode; // .. and save
147 } // n
148 return result;
149 } // decCanonical
150
151 /* ------------------------------------------------------------------ */
152 /* decDivide -- divide operations */
153 /* */
154 /* result gets the result of dividing dfl by dfr: */
155 /* dfl is the first decFloat (lhs) */
156 /* dfr is the second decFloat (rhs) */
157 /* set is the context */
158 /* op is the operation selector */
159 /* returns result */
160 /* */
161 /* op is one of DIVIDE, REMAINDER, DIVIDEINT, or REMNEAR. */
162 /* ------------------------------------------------------------------ */
163 #define DIVCOUNT 0 // 1 to instrument subtractions counter
164 #define DIVBASE ((uInt)BILLION) // the base used for divide
165 #define DIVOPLEN DECPMAX9 // operand length ('digits' base 10**9)
166 #define DIVACCLEN (DIVOPLEN*3) // accumulator length (ditto)
decDivide(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set,uInt op)167 static decFloat * decDivide(decFloat *result, const decFloat *dfl,
168 const decFloat *dfr, decContext *set, uInt op) {
169 decFloat quotient; // for remainders
170 bcdnum num; // for final conversion
171 uInt acc[DIVACCLEN]; // coefficent in base-billion ..
172 uInt div[DIVOPLEN]; // divisor in base-billion ..
173 uInt quo[DIVOPLEN+1]; // quotient in base-billion ..
174 uByte bcdacc[(DIVOPLEN+1)*9+2]; // for quotient in BCD, +1, +1
175 uInt *msua, *msud, *msuq; // -> msu of acc, div, and quo
176 Int divunits, accunits; // lengths
177 Int quodigits; // digits in quotient
178 uInt *lsua, *lsuq; // -> current acc and quo lsus
179 Int length, multiplier; // work
180 uInt carry, sign; // ..
181 uInt *ua, *ud, *uq; // ..
182 uByte *ub; // ..
183 uInt uiwork; // for macros
184 uInt divtop; // top unit of div adjusted for estimating
185 #if DIVCOUNT
186 static uInt maxcount=0; // worst-seen subtractions count
187 uInt divcount=0; // subtractions count [this divide]
188 #endif
189
190 // calculate sign
191 num.sign=(DFWORD(dfl, 0)^DFWORD(dfr, 0)) & DECFLOAT_Sign;
192
193 if (DFISSPECIAL(dfl) || DFISSPECIAL(dfr)) { // either is special?
194 // NaNs are handled as usual
195 if (DFISNAN(dfl) || DFISNAN(dfr)) return decNaNs(result, dfl, dfr, set);
196 // one or two infinities
197 if (DFISINF(dfl)) {
198 if (DFISINF(dfr)) return decInvalid(result, set); // Two infinities bad
199 if (op&(REMAINDER|REMNEAR)) return decInvalid(result, set); // as is rem
200 // Infinity/x is infinite and quiet, even if x=0
201 DFWORD(result, 0)=num.sign;
202 return decInfinity(result, result);
203 }
204 // must be x/Infinity -- remainders are lhs
205 if (op&(REMAINDER|REMNEAR)) return decCanonical(result, dfl);
206 // divides: return zero with correct sign and exponent depending
207 // on op (Etiny for divide, 0 for divideInt)
208 decFloatZero(result);
209 if (op==DIVIDEINT) DFWORD(result, 0)|=num.sign; // add sign
210 else DFWORD(result, 0)=num.sign; // zeros the exponent, too
211 return result;
212 }
213 // next, handle zero operands (x/0 and 0/x)
214 if (DFISZERO(dfr)) { // x/0
215 if (DFISZERO(dfl)) { // 0/0 is undefined
216 decFloatZero(result);
217 DFWORD(result, 0)=DECFLOAT_qNaN;
218 set->status|=DEC_Division_undefined;
219 return result;
220 }
221 if (op&(REMAINDER|REMNEAR)) return decInvalid(result, set); // bad rem
222 set->status|=DEC_Division_by_zero;
223 DFWORD(result, 0)=num.sign;
224 return decInfinity(result, result); // x/0 -> signed Infinity
225 }
226 num.exponent=GETEXPUN(dfl)-GETEXPUN(dfr); // ideal exponent
227 if (DFISZERO(dfl)) { // 0/x (x!=0)
228 // if divide, result is 0 with ideal exponent; divideInt has
229 // exponent=0, remainders give zero with lower exponent
230 if (op&DIVIDEINT) {
231 decFloatZero(result);
232 DFWORD(result, 0)|=num.sign; // add sign
233 return result;
234 }
235 if (!(op&DIVIDE)) { // a remainder
236 // exponent is the minimum of the operands
237 num.exponent=MINI(GETEXPUN(dfl), GETEXPUN(dfr));
238 // if the result is zero the sign shall be sign of dfl
239 num.sign=DFWORD(dfl, 0)&DECFLOAT_Sign;
240 }
241 bcdacc[0]=0;
242 num.msd=bcdacc; // -> 0
243 num.lsd=bcdacc; // ..
244 return decFinalize(result, &num, set); // [divide may clamp exponent]
245 } // 0/x
246 // [here, both operands are known to be finite and non-zero]
247
248 // extract the operand coefficents into 'units' which are
249 // base-billion; the lhs is high-aligned in acc and the msu of both
250 // acc and div is at the right-hand end of array (offset length-1);
251 // the quotient can need one more unit than the operands as digits
252 // in it are not necessarily aligned neatly; further, the quotient
253 // may not start accumulating until after the end of the initial
254 // operand in acc if that is small (e.g., 1) so the accumulator
255 // must have at least that number of units extra (at the ls end)
256 GETCOEFFBILL(dfl, acc+DIVACCLEN-DIVOPLEN);
257 GETCOEFFBILL(dfr, div);
258 // zero the low uInts of acc
259 acc[0]=0;
260 acc[1]=0;
261 acc[2]=0;
262 acc[3]=0;
263 #if DOUBLE
264 #if DIVOPLEN!=2
265 #error Unexpected Double DIVOPLEN
266 #endif
267 #elif QUAD
268 acc[4]=0;
269 acc[5]=0;
270 acc[6]=0;
271 acc[7]=0;
272 #if DIVOPLEN!=4
273 #error Unexpected Quad DIVOPLEN
274 #endif
275 #endif
276
277 // set msu and lsu pointers
278 msua=acc+DIVACCLEN-1; // [leading zeros removed below]
279 msuq=quo+DIVOPLEN;
280 //[loop for div will terminate because operands are non-zero]
281 for (msud=div+DIVOPLEN-1; *msud==0;) msud--;
282 // the initial least-significant unit of acc is set so acc appears
283 // to have the same length as div.
284 // This moves one position towards the least possible for each
285 // iteration
286 divunits=(Int)(msud-div+1); // precalculate
287 lsua=msua-divunits+1; // initial working lsu of acc
288 lsuq=msuq; // and of quo
289
290 // set up the estimator for the multiplier; this is the msu of div,
291 // plus two bits from the unit below (if any) rounded up by one if
292 // there are any non-zero bits or units below that [the extra two
293 // bits makes for a much better estimate when the top unit is small]
294 divtop=*msud<<2;
295 if (divunits>1) {
296 uInt *um=msud-1;
297 uInt d=*um;
298 if (d>=750000000) {divtop+=3; d-=750000000;}
299 else if (d>=500000000) {divtop+=2; d-=500000000;}
300 else if (d>=250000000) {divtop++; d-=250000000;}
301 if (d) divtop++;
302 else for (um--; um>=div; um--) if (*um) {
303 divtop++;
304 break;
305 }
306 } // >1 unit
307
308 #if DECTRACE
309 {Int i;
310 printf("----- div=");
311 for (i=divunits-1; i>=0; i--) printf("%09ld ", (LI)div[i]);
312 printf("\n");}
313 #endif
314
315 // now collect up to DECPMAX+1 digits in the quotient (this may
316 // need OPLEN+1 uInts if unaligned)
317 quodigits=0; // no digits yet
318 for (;; lsua--) { // outer loop -- each input position
319 #if DECCHECK
320 if (lsua<acc) {
321 printf("Acc underrun...\n");
322 break;
323 }
324 #endif
325 #if DECTRACE
326 printf("Outer: quodigits=%ld acc=", (LI)quodigits);
327 for (ua=msua; ua>=lsua; ua--) printf("%09ld ", (LI)*ua);
328 printf("\n");
329 #endif
330 *lsuq=0; // default unit result is 0
331 for (;;) { // inner loop -- calculate quotient unit
332 // strip leading zero units from acc (either there initially or
333 // from subtraction below); this may strip all if exactly 0
334 for (; *msua==0 && msua>=lsua;) msua--;
335 accunits=(Int)(msua-lsua+1); // [maybe 0]
336 // subtraction is only necessary and possible if there are as
337 // least as many units remaining in acc for this iteration as
338 // there are in div
339 if (accunits<divunits) {
340 if (accunits==0) msua++; // restore
341 break;
342 }
343
344 // If acc is longer than div then subtraction is definitely
345 // possible (as msu of both is non-zero), but if they are the
346 // same length a comparison is needed.
347 // If a subtraction is needed then a good estimate of the
348 // multiplier for the subtraction is also needed in order to
349 // minimise the iterations of this inner loop because the
350 // subtractions needed dominate division performance.
351 if (accunits==divunits) {
352 // compare the high divunits of acc and div:
353 // acc<div: this quotient unit is unchanged; subtraction
354 // will be possible on the next iteration
355 // acc==div: quotient gains 1, set acc=0
356 // acc>div: subtraction necessary at this position
357 for (ud=msud, ua=msua; ud>div; ud--, ua--) if (*ud!=*ua) break;
358 // [now at first mismatch or lsu]
359 if (*ud>*ua) break; // next time...
360 if (*ud==*ua) { // all compared equal
361 *lsuq+=1; // increment result
362 msua=lsua; // collapse acc units
363 *msua=0; // .. to a zero
364 break;
365 }
366
367 // subtraction necessary; estimate multiplier [see above]
368 // if both *msud and *msua are small it is cost-effective to
369 // bring in part of the following units (if any) to get a
370 // better estimate (assume some other non-zero in div)
371 #define DIVLO 1000000U
372 #define DIVHI (DIVBASE/DIVLO)
373 #if DECUSE64
374 if (divunits>1) {
375 // there cannot be a *(msud-2) for DECDOUBLE so next is
376 // an exact calculation unless DECQUAD (which needs to
377 // assume bits out there if divunits>2)
378 uLong mul=(uLong)*msua * DIVBASE + *(msua-1);
379 uLong div=(uLong)*msud * DIVBASE + *(msud-1);
380 #if QUAD
381 if (divunits>2) div++;
382 #endif
383 mul/=div;
384 multiplier=(Int)mul;
385 }
386 else multiplier=*msua/(*msud);
387 #else
388 if (divunits>1 && *msua<DIVLO && *msud<DIVLO) {
389 multiplier=(*msua*DIVHI + *(msua-1)/DIVLO)
390 /(*msud*DIVHI + *(msud-1)/DIVLO +1);
391 }
392 else multiplier=(*msua<<2)/divtop;
393 #endif
394 }
395 else { // accunits>divunits
396 // msud is one unit 'lower' than msua, so estimate differently
397 #if DECUSE64
398 uLong mul;
399 // as before, bring in extra digits if possible
400 if (divunits>1 && *msua<DIVLO && *msud<DIVLO) {
401 mul=((uLong)*msua * DIVHI * DIVBASE) + *(msua-1) * DIVHI
402 + *(msua-2)/DIVLO;
403 mul/=(*msud*DIVHI + *(msud-1)/DIVLO +1);
404 }
405 else if (divunits==1) {
406 mul=(uLong)*msua * DIVBASE + *(msua-1);
407 mul/=*msud; // no more to the right
408 }
409 else {
410 mul=(uLong)(*msua) * (uInt)(DIVBASE<<2)
411 + (*(msua-1)<<2);
412 mul/=divtop; // [divtop already allows for sticky bits]
413 }
414 multiplier=(Int)mul;
415 #else
416 multiplier=*msua * ((DIVBASE<<2)/divtop);
417 #endif
418 }
419 if (multiplier==0) multiplier=1; // marginal case
420 *lsuq+=multiplier;
421
422 #if DIVCOUNT
423 // printf("Multiplier: %ld\n", (LI)multiplier);
424 divcount++;
425 #endif
426
427 // Carry out the subtraction acc-(div*multiplier); for each
428 // unit in div, do the multiply, split to units (see
429 // decFloatMultiply for the algorithm), and subtract from acc
430 #define DIVMAGIC 2305843009U // 2**61/10**9
431 #define DIVSHIFTA 29
432 #define DIVSHIFTB 32
433 carry=0;
434 for (ud=div, ua=lsua; ud<=msud; ud++, ua++) {
435 uInt lo, hop;
436 #if DECUSE64
437 uLong sub=(uLong)multiplier*(*ud)+carry;
438 if (sub<DIVBASE) {
439 carry=0;
440 lo=(uInt)sub;
441 }
442 else {
443 hop=(uInt)(sub>>DIVSHIFTA);
444 carry=(uInt)(((uLong)hop*DIVMAGIC)>>DIVSHIFTB);
445 // the estimate is now in hi; now calculate sub-hi*10**9
446 // to get the remainder (which will be <DIVBASE))
447 lo=(uInt)sub;
448 lo-=carry*DIVBASE; // low word of result
449 if (lo>=DIVBASE) {
450 lo-=DIVBASE; // correct by +1
451 carry++;
452 }
453 }
454 #else // 32-bit
455 uInt hi;
456 // calculate multiplier*(*ud) into hi and lo
457 LONGMUL32HI(hi, *ud, multiplier); // get the high word
458 lo=multiplier*(*ud); // .. and the low
459 lo+=carry; // add the old hi
460 carry=hi+(lo<carry); // .. with any carry
461 if (carry || lo>=DIVBASE) { // split is needed
462 hop=(carry<<3)+(lo>>DIVSHIFTA); // hi:lo/2**29
463 LONGMUL32HI(carry, hop, DIVMAGIC); // only need the high word
464 // [DIVSHIFTB is 32, so carry can be used directly]
465 // the estimate is now in carry; now calculate hi:lo-est*10**9;
466 // happily the top word of the result is irrelevant because it
467 // will always be zero so this needs only one multiplication
468 lo-=(carry*DIVBASE);
469 // the correction here will be at most +1; do it
470 if (lo>=DIVBASE) {
471 lo-=DIVBASE;
472 carry++;
473 }
474 }
475 #endif
476 if (lo>*ua) { // borrow needed
477 *ua+=DIVBASE;
478 carry++;
479 }
480 *ua-=lo;
481 } // ud loop
482 if (carry) *ua-=carry; // accdigits>divdigits [cannot borrow]
483 } // inner loop
484
485 // the outer loop terminates when there is either an exact result
486 // or enough digits; first update the quotient digit count and
487 // pointer (if any significant digits)
488 #if DECTRACE
489 if (*lsuq || quodigits) printf("*lsuq=%09ld\n", (LI)*lsuq);
490 #endif
491 if (quodigits) {
492 quodigits+=9; // had leading unit earlier
493 lsuq--;
494 if (quodigits>DECPMAX+1) break; // have enough
495 }
496 else if (*lsuq) { // first quotient digits
497 const uInt *pow;
498 for (pow=DECPOWERS; *lsuq>=*pow; pow++) quodigits++;
499 lsuq--;
500 // [cannot have >DECPMAX+1 on first unit]
501 }
502
503 if (*msua!=0) continue; // not an exact result
504 // acc is zero iff used all of original units and zero down to lsua
505 // (must also continue to original lsu for correct quotient length)
506 if (lsua>acc+DIVACCLEN-DIVOPLEN) continue;
507 for (; msua>lsua && *msua==0;) msua--;
508 if (*msua==0 && msua==lsua) break;
509 } // outer loop
510
511 // all of the original operand in acc has been covered at this point
512 // quotient now has at least DECPMAX+2 digits
513 // *msua is now non-0 if inexact and sticky bits
514 // lsuq is one below the last uint of the quotient
515 lsuq++; // set -> true lsu of quo
516 if (*msua) *lsuq|=1; // apply sticky bit
517
518 // quo now holds the (unrounded) quotient in base-billion; one
519 // base-billion 'digit' per uInt.
520 #if DECTRACE
521 printf("DivQuo:");
522 for (uq=msuq; uq>=lsuq; uq--) printf(" %09ld", (LI)*uq);
523 printf("\n");
524 #endif
525
526 // Now convert to BCD for rounding and cleanup, starting from the
527 // most significant end [offset by one into bcdacc to leave room
528 // for a possible carry digit if rounding for REMNEAR is needed]
529 for (uq=msuq, ub=bcdacc+1; uq>=lsuq; uq--, ub+=9) {
530 uInt top, mid, rem; // work
531 if (*uq==0) { // no split needed
532 UBFROMUI(ub, 0); // clear 9 BCD8s
533 UBFROMUI(ub+4, 0); // ..
534 *(ub+8)=0; // ..
535 continue;
536 }
537 // *uq is non-zero -- split the base-billion digit into
538 // hi, mid, and low three-digits
539 #define divsplit9 1000000 // divisor
540 #define divsplit6 1000 // divisor
541 // The splitting is done by simple divides and remainders,
542 // assuming the compiler will optimize these [GCC does]
543 top=*uq/divsplit9;
544 rem=*uq%divsplit9;
545 mid=rem/divsplit6;
546 rem=rem%divsplit6;
547 // lay out the nine BCD digits (plus one unwanted byte)
548 UBFROMUI(ub, UBTOUI(&BIN2BCD8[top*4]));
549 UBFROMUI(ub+3, UBTOUI(&BIN2BCD8[mid*4]));
550 UBFROMUI(ub+6, UBTOUI(&BIN2BCD8[rem*4]));
551 } // BCD conversion loop
552 ub--; // -> lsu
553
554 // complete the bcdnum; quodigits is correct, so the position of
555 // the first non-zero is known
556 num.msd=bcdacc+1+(msuq-lsuq+1)*9-quodigits;
557 num.lsd=ub;
558
559 // make exponent adjustments, etc
560 if (lsua<acc+DIVACCLEN-DIVOPLEN) { // used extra digits
561 num.exponent-=(Int)((acc+DIVACCLEN-DIVOPLEN-lsua)*9);
562 // if the result was exact then there may be up to 8 extra
563 // trailing zeros in the overflowed quotient final unit
564 if (*msua==0) {
565 for (; *ub==0;) ub--; // drop zeros
566 num.exponent+=(Int)(num.lsd-ub); // and adjust exponent
567 num.lsd=ub;
568 }
569 } // adjustment needed
570
571 #if DIVCOUNT
572 if (divcount>maxcount) { // new high-water nark
573 maxcount=divcount;
574 printf("DivNewMaxCount: %ld\n", (LI)maxcount);
575 }
576 #endif
577
578 if (op&DIVIDE) return decFinalize(result, &num, set); // all done
579
580 // Is DIVIDEINT or a remainder; there is more to do -- first form
581 // the integer (this is done 'after the fact', unlike as in
582 // decNumber, so as not to tax DIVIDE)
583
584 // The first non-zero digit will be in the first 9 digits, known
585 // from quodigits and num.msd, so there is always space for DECPMAX
586 // digits
587
588 length=(Int)(num.lsd-num.msd+1);
589 //printf("Length exp: %ld %ld\n", (LI)length, (LI)num.exponent);
590
591 if (length+num.exponent>DECPMAX) { // cannot fit
592 decFloatZero(result);
593 DFWORD(result, 0)=DECFLOAT_qNaN;
594 set->status|=DEC_Division_impossible;
595 return result;
596 }
597
598 if (num.exponent>=0) { // already an int, or need pad zeros
599 for (ub=num.lsd+1; ub<=num.lsd+num.exponent; ub++) *ub=0;
600 num.lsd+=num.exponent;
601 }
602 else { // too long: round or truncate needed
603 Int drop=-num.exponent;
604 if (!(op&REMNEAR)) { // simple truncate
605 num.lsd-=drop;
606 if (num.lsd<num.msd) { // truncated all
607 num.lsd=num.msd; // make 0
608 *num.lsd=0; // .. [sign still relevant]
609 }
610 }
611 else { // round to nearest even [sigh]
612 // round-to-nearest, in-place; msd is at or to right of bcdacc+1
613 // (this is a special case of Quantize -- q.v. for commentary)
614 uByte *roundat; // -> re-round digit
615 uByte reround; // reround value
616 *(num.msd-1)=0; // in case of left carry, or make 0
617 if (drop<length) roundat=num.lsd-drop+1;
618 else if (drop==length) roundat=num.msd;
619 else roundat=num.msd-1; // [-> 0]
620 reround=*roundat;
621 for (ub=roundat+1; ub<=num.lsd; ub++) {
622 if (*ub!=0) {
623 reround=DECSTICKYTAB[reround];
624 break;
625 }
626 } // check stickies
627 if (roundat>num.msd) num.lsd=roundat-1;
628 else {
629 num.msd--; // use the 0 ..
630 num.lsd=num.msd; // .. at the new MSD place
631 }
632 if (reround!=0) { // discarding non-zero
633 uInt bump=0;
634 // rounding is DEC_ROUND_HALF_EVEN always
635 if (reround>5) bump=1; // >0.5 goes up
636 else if (reround==5) // exactly 0.5000 ..
637 bump=*(num.lsd) & 0x01; // .. up iff [new] lsd is odd
638 if (bump!=0) { // need increment
639 // increment the coefficient; this might end up with 1000...
640 ub=num.lsd;
641 for (; UBTOUI(ub-3)==0x09090909; ub-=4) UBFROMUI(ub-3, 0);
642 for (; *ub==9; ub--) *ub=0; // at most 3 more
643 *ub+=1;
644 if (ub<num.msd) num.msd--; // carried
645 } // bump needed
646 } // reround!=0
647 } // remnear
648 } // round or truncate needed
649 num.exponent=0; // all paths
650 //decShowNum(&num, "int");
651
652 if (op&DIVIDEINT) return decFinalize(result, &num, set); // all done
653
654 // Have a remainder to calculate
655 decFinalize("ient, &num, set); // lay out the integer so far
656 DFWORD("ient, 0)^=DECFLOAT_Sign; // negate it
657 sign=DFWORD(dfl, 0); // save sign of dfl
658 decFloatFMA(result, "ient, dfr, dfl, set);
659 if (!DFISZERO(result)) return result;
660 // if the result is zero the sign shall be sign of dfl
661 DFWORD("ient, 0)=sign; // construct decFloat of sign
662 return decFloatCopySign(result, result, "ient);
663 } // decDivide
664
665 /* ------------------------------------------------------------------ */
666 /* decFiniteMultiply -- multiply two finite decFloats */
667 /* */
668 /* num gets the result of multiplying dfl and dfr */
669 /* bcdacc .. with the coefficient in this array */
670 /* dfl is the first decFloat (lhs) */
671 /* dfr is the second decFloat (rhs) */
672 /* */
673 /* This effects the multiplication of two decFloats, both known to be */
674 /* finite, leaving the result in a bcdnum ready for decFinalize (for */
675 /* use in Multiply) or in a following addition (FMA). */
676 /* */
677 /* bcdacc must have space for at least DECPMAX9*18+1 bytes. */
678 /* No error is possible and no status is set. */
679 /* ------------------------------------------------------------------ */
680 // This routine has two separate implementations of the core
681 // multiplication; both using base-billion. One uses only 32-bit
682 // variables (Ints and uInts) or smaller; the other uses uLongs (for
683 // multiplication and addition only). Both implementations cover
684 // both arithmetic sizes (DOUBLE and QUAD) in order to allow timing
685 // comparisons. In any one compilation only one implementation for
686 // each size can be used, and if DECUSE64 is 0 then use of the 32-bit
687 // version is forced.
688 //
689 // Historical note: an earlier version of this code also supported the
690 // 256-bit format and has been preserved. That is somewhat trickier
691 // during lazy carry splitting because the initial quotient estimate
692 // (est) can exceed 32 bits.
693
694 #define MULTBASE ((uInt)BILLION) // the base used for multiply
695 #define MULOPLEN DECPMAX9 // operand length ('digits' base 10**9)
696 #define MULACCLEN (MULOPLEN*2) // accumulator length (ditto)
697 #define LEADZEROS (MULACCLEN*9 - DECPMAX*2) // leading zeros always
698
699 // Assertions: exponent not too large and MULACCLEN is a multiple of 4
700 #if DECEMAXD>9
701 #error Exponent may overflow when doubled for Multiply
702 #endif
703 #if MULACCLEN!=(MULACCLEN/4)*4
704 // This assumption is used below only for initialization
705 #error MULACCLEN is not a multiple of 4
706 #endif
707
decFiniteMultiply(bcdnum * num,uByte * bcdacc,const decFloat * dfl,const decFloat * dfr)708 static void decFiniteMultiply(bcdnum *num, uByte *bcdacc,
709 const decFloat *dfl, const decFloat *dfr) {
710 uInt bufl[MULOPLEN]; // left coefficient (base-billion)
711 uInt bufr[MULOPLEN]; // right coefficient (base-billion)
712 uInt *ui, *uj; // work
713 uByte *ub; // ..
714 uInt uiwork; // for macros
715
716 #if DECUSE64
717 uLong accl[MULACCLEN]; // lazy accumulator (base-billion+)
718 uLong *pl; // work -> lazy accumulator
719 uInt acc[MULACCLEN]; // coefficent in base-billion ..
720 #else
721 uInt acc[MULACCLEN*2]; // accumulator in base-billion ..
722 #endif
723 uInt *pa; // work -> accumulator
724 //printf("Base10**9: OpLen=%d MulAcclen=%d\n", OPLEN, MULACCLEN);
725
726 /* Calculate sign and exponent */
727 num->sign=(DFWORD(dfl, 0)^DFWORD(dfr, 0)) & DECFLOAT_Sign;
728 num->exponent=GETEXPUN(dfl)+GETEXPUN(dfr); // [see assertion above]
729
730 /* Extract the coefficients and prepare the accumulator */
731 // the coefficients of the operands are decoded into base-billion
732 // numbers in uInt arrays (bufl and bufr, LSD at offset 0) of the
733 // appropriate size.
734 GETCOEFFBILL(dfl, bufl);
735 GETCOEFFBILL(dfr, bufr);
736 #if DECTRACE && 0
737 printf("CoeffbL:");
738 for (ui=bufl+MULOPLEN-1; ui>=bufl; ui--) printf(" %08lx", (LI)*ui);
739 printf("\n");
740 printf("CoeffbR:");
741 for (uj=bufr+MULOPLEN-1; uj>=bufr; uj--) printf(" %08lx", (LI)*uj);
742 printf("\n");
743 #endif
744
745 // start the 64-bit/32-bit differing paths...
746 #if DECUSE64
747
748 // zero the accumulator
749 #if MULACCLEN==4
750 accl[0]=0; accl[1]=0; accl[2]=0; accl[3]=0;
751 #else // use a loop
752 // MULACCLEN is a multiple of four, asserted above
753 for (pl=accl; pl<accl+MULACCLEN; pl+=4) {
754 *pl=0; *(pl+1)=0; *(pl+2)=0; *(pl+3)=0;// [reduce overhead]
755 } // pl
756 #endif
757
758 /* Effect the multiplication */
759 // The multiplcation proceeds using MFC's lazy-carry resolution
760 // algorithm from decNumber. First, the multiplication is
761 // effected, allowing accumulation of the partial products (which
762 // are in base-billion at each column position) into 64 bits
763 // without resolving back to base=billion after each addition.
764 // These 64-bit numbers (which may contain up to 19 decimal digits)
765 // are then split using the Clark & Cowlishaw algorithm (see below).
766 // [Testing for 0 in the inner loop is not really a 'win']
767 for (ui=bufr; ui<bufr+MULOPLEN; ui++) { // over each item in rhs
768 if (*ui==0) continue; // product cannot affect result
769 pl=accl+(ui-bufr); // where to add the lhs
770 for (uj=bufl; uj<bufl+MULOPLEN; uj++, pl++) { // over each item in lhs
771 // if (*uj==0) continue; // product cannot affect result
772 *pl+=((uLong)*ui)*(*uj);
773 } // uj
774 } // ui
775
776 // The 64-bit carries must now be resolved; this means that a
777 // quotient/remainder has to be calculated for base-billion (1E+9).
778 // For this, Clark & Cowlishaw's quotient estimation approach (also
779 // used in decNumber) is needed, because 64-bit divide is generally
780 // extremely slow on 32-bit machines, and may be slower than this
781 // approach even on 64-bit machines. This algorithm splits X
782 // using:
783 //
784 // magic=2**(A+B)/1E+9; // 'magic number'
785 // hop=X/2**A; // high order part of X (by shift)
786 // est=magic*hop/2**B // quotient estimate (may be low by 1)
787 //
788 // A and B are quite constrained; hop and magic must fit in 32 bits,
789 // and 2**(A+B) must be as large as possible (which is 2**61 if
790 // magic is to fit). Further, maxX increases with the length of
791 // the operands (and hence the number of partial products
792 // accumulated); maxX is OPLEN*(10**18), which is up to 19 digits.
793 //
794 // It can be shown that when OPLEN is 2 then the maximum error in
795 // the estimated quotient is <1, but for larger maximum x the
796 // maximum error is above 1 so a correction that is >1 may be
797 // needed. Values of A and B are chosen to satisfy the constraints
798 // just mentioned while minimizing the maximum error (and hence the
799 // maximum correction), as shown in the following table:
800 //
801 // Type OPLEN A B maxX maxError maxCorrection
802 // ---------------------------------------------------------
803 // DOUBLE 2 29 32 <2*10**18 0.63 1
804 // QUAD 4 30 31 <4*10**18 1.17 2
805 //
806 // In the OPLEN==2 case there is most choice, but the value for B
807 // of 32 has a big advantage as then the calculation of the
808 // estimate requires no shifting; the compiler can extract the high
809 // word directly after multiplying magic*hop.
810 #define MULMAGIC 2305843009U // 2**61/10**9 [both cases]
811 #if DOUBLE
812 #define MULSHIFTA 29
813 #define MULSHIFTB 32
814 #elif QUAD
815 #define MULSHIFTA 30
816 #define MULSHIFTB 31
817 #else
818 #error Unexpected type
819 #endif
820
821 #if DECTRACE
822 printf("MulAccl:");
823 for (pl=accl+MULACCLEN-1; pl>=accl; pl--)
824 printf(" %08lx:%08lx", (LI)(*pl>>32), (LI)(*pl&0xffffffff));
825 printf("\n");
826 #endif
827
828 for (pl=accl, pa=acc; pl<accl+MULACCLEN; pl++, pa++) { // each column position
829 uInt lo, hop; // work
830 uInt est; // cannot exceed 4E+9
831 if (*pl>=MULTBASE) {
832 // *pl holds a binary number which needs to be split
833 hop=(uInt)(*pl>>MULSHIFTA);
834 est=(uInt)(((uLong)hop*MULMAGIC)>>MULSHIFTB);
835 // the estimate is now in est; now calculate hi:lo-est*10**9;
836 // happily the top word of the result is irrelevant because it
837 // will always be zero so this needs only one multiplication
838 lo=(uInt)(*pl-((uLong)est*MULTBASE)); // low word of result
839 // If QUAD, the correction here could be +2
840 if (lo>=MULTBASE) {
841 lo-=MULTBASE; // correct by +1
842 est++;
843 #if QUAD
844 // may need to correct by +2
845 if (lo>=MULTBASE) {
846 lo-=MULTBASE;
847 est++;
848 }
849 #endif
850 }
851 // finally place lo as the new coefficient 'digit' and add est to
852 // the next place up [this is safe because this path is never
853 // taken on the final iteration as *pl will fit]
854 *pa=lo;
855 *(pl+1)+=est;
856 } // *pl needed split
857 else { // *pl<MULTBASE
858 *pa=(uInt)*pl; // just copy across
859 }
860 } // pl loop
861
862 #else // 32-bit
863 for (pa=acc;; pa+=4) { // zero the accumulator
864 *pa=0; *(pa+1)=0; *(pa+2)=0; *(pa+3)=0; // [reduce overhead]
865 if (pa==acc+MULACCLEN*2-4) break; // multiple of 4 asserted
866 } // pa
867
868 /* Effect the multiplication */
869 // uLongs are not available (and in particular, there is no uLong
870 // divide) but it is still possible to use MFC's lazy-carry
871 // resolution algorithm from decNumber. First, the multiplication
872 // is effected, allowing accumulation of the partial products
873 // (which are in base-billion at each column position) into 64 bits
874 // [with the high-order 32 bits in each position being held at
875 // offset +ACCLEN from the low-order 32 bits in the accumulator].
876 // These 64-bit numbers (which may contain up to 19 decimal digits)
877 // are then split using the Clark & Cowlishaw algorithm (see
878 // below).
879 for (ui=bufr;; ui++) { // over each item in rhs
880 uInt hi, lo; // words of exact multiply result
881 pa=acc+(ui-bufr); // where to add the lhs
882 for (uj=bufl;; uj++, pa++) { // over each item in lhs
883 LONGMUL32HI(hi, *ui, *uj); // calculate product of digits
884 lo=(*ui)*(*uj); // ..
885 *pa+=lo; // accumulate low bits and ..
886 *(pa+MULACCLEN)+=hi+(*pa<lo); // .. high bits with any carry
887 if (uj==bufl+MULOPLEN-1) break;
888 }
889 if (ui==bufr+MULOPLEN-1) break;
890 }
891
892 // The 64-bit carries must now be resolved; this means that a
893 // quotient/remainder has to be calculated for base-billion (1E+9).
894 // For this, Clark & Cowlishaw's quotient estimation approach (also
895 // used in decNumber) is needed, because 64-bit divide is generally
896 // extremely slow on 32-bit machines. This algorithm splits X
897 // using:
898 //
899 // magic=2**(A+B)/1E+9; // 'magic number'
900 // hop=X/2**A; // high order part of X (by shift)
901 // est=magic*hop/2**B // quotient estimate (may be low by 1)
902 //
903 // A and B are quite constrained; hop and magic must fit in 32 bits,
904 // and 2**(A+B) must be as large as possible (which is 2**61 if
905 // magic is to fit). Further, maxX increases with the length of
906 // the operands (and hence the number of partial products
907 // accumulated); maxX is OPLEN*(10**18), which is up to 19 digits.
908 //
909 // It can be shown that when OPLEN is 2 then the maximum error in
910 // the estimated quotient is <1, but for larger maximum x the
911 // maximum error is above 1 so a correction that is >1 may be
912 // needed. Values of A and B are chosen to satisfy the constraints
913 // just mentioned while minimizing the maximum error (and hence the
914 // maximum correction), as shown in the following table:
915 //
916 // Type OPLEN A B maxX maxError maxCorrection
917 // ---------------------------------------------------------
918 // DOUBLE 2 29 32 <2*10**18 0.63 1
919 // QUAD 4 30 31 <4*10**18 1.17 2
920 //
921 // In the OPLEN==2 case there is most choice, but the value for B
922 // of 32 has a big advantage as then the calculation of the
923 // estimate requires no shifting; the high word is simply
924 // calculated from multiplying magic*hop.
925 #define MULMAGIC 2305843009U // 2**61/10**9 [both cases]
926 #if DOUBLE
927 #define MULSHIFTA 29
928 #define MULSHIFTB 32
929 #elif QUAD
930 #define MULSHIFTA 30
931 #define MULSHIFTB 31
932 #else
933 #error Unexpected type
934 #endif
935
936 #if DECTRACE
937 printf("MulHiLo:");
938 for (pa=acc+MULACCLEN-1; pa>=acc; pa--)
939 printf(" %08lx:%08lx", (LI)*(pa+MULACCLEN), (LI)*pa);
940 printf("\n");
941 #endif
942
943 for (pa=acc;; pa++) { // each low uInt
944 uInt hi, lo; // words of exact multiply result
945 uInt hop, estlo; // work
946 #if QUAD
947 uInt esthi; // ..
948 #endif
949
950 lo=*pa;
951 hi=*(pa+MULACCLEN); // top 32 bits
952 // hi and lo now hold a binary number which needs to be split
953
954 #if DOUBLE
955 hop=(hi<<3)+(lo>>MULSHIFTA); // hi:lo/2**29
956 LONGMUL32HI(estlo, hop, MULMAGIC);// only need the high word
957 // [MULSHIFTB is 32, so estlo can be used directly]
958 // the estimate is now in estlo; now calculate hi:lo-est*10**9;
959 // happily the top word of the result is irrelevant because it
960 // will always be zero so this needs only one multiplication
961 lo-=(estlo*MULTBASE);
962 // esthi=0; // high word is ignored below
963 // the correction here will be at most +1; do it
964 if (lo>=MULTBASE) {
965 lo-=MULTBASE;
966 estlo++;
967 }
968 #elif QUAD
969 hop=(hi<<2)+(lo>>MULSHIFTA); // hi:lo/2**30
970 LONGMUL32HI(esthi, hop, MULMAGIC);// shift will be 31 ..
971 estlo=hop*MULMAGIC; // .. so low word needed
972 estlo=(esthi<<1)+(estlo>>MULSHIFTB); // [just the top bit]
973 // esthi=0; // high word is ignored below
974 lo-=(estlo*MULTBASE); // as above
975 // the correction here could be +1 or +2
976 if (lo>=MULTBASE) {
977 lo-=MULTBASE;
978 estlo++;
979 }
980 if (lo>=MULTBASE) {
981 lo-=MULTBASE;
982 estlo++;
983 }
984 #else
985 #error Unexpected type
986 #endif
987
988 // finally place lo as the new accumulator digit and add est to
989 // the next place up; this latter add could cause a carry of 1
990 // to the high word of the next place
991 *pa=lo;
992 *(pa+1)+=estlo;
993 // esthi is always 0 for DOUBLE and QUAD so this is skipped
994 // *(pa+1+MULACCLEN)+=esthi;
995 if (*(pa+1)<estlo) *(pa+1+MULACCLEN)+=1; // carry
996 if (pa==acc+MULACCLEN-2) break; // [MULACCLEN-1 will never need split]
997 } // pa loop
998 #endif
999
1000 // At this point, whether using the 64-bit or the 32-bit paths, the
1001 // accumulator now holds the (unrounded) result in base-billion;
1002 // one base-billion 'digit' per uInt.
1003 #if DECTRACE
1004 printf("MultAcc:");
1005 for (pa=acc+MULACCLEN-1; pa>=acc; pa--) printf(" %09ld", (LI)*pa);
1006 printf("\n");
1007 #endif
1008
1009 // Now convert to BCD for rounding and cleanup, starting from the
1010 // most significant end
1011 pa=acc+MULACCLEN-1;
1012 if (*pa!=0) num->msd=bcdacc+LEADZEROS;// drop known lead zeros
1013 else { // >=1 word of leading zeros
1014 num->msd=bcdacc; // known leading zeros are gone
1015 pa--; // skip first word ..
1016 for (; *pa==0; pa--) if (pa==acc) break; // .. and any more leading 0s
1017 }
1018 for (ub=bcdacc;; pa--, ub+=9) {
1019 if (*pa!=0) { // split(s) needed
1020 uInt top, mid, rem; // work
1021 // *pa is non-zero -- split the base-billion acc digit into
1022 // hi, mid, and low three-digits
1023 #define mulsplit9 1000000 // divisor
1024 #define mulsplit6 1000 // divisor
1025 // The splitting is done by simple divides and remainders,
1026 // assuming the compiler will optimize these where useful
1027 // [GCC does]
1028 top=*pa/mulsplit9;
1029 rem=*pa%mulsplit9;
1030 mid=rem/mulsplit6;
1031 rem=rem%mulsplit6;
1032 // lay out the nine BCD digits (plus one unwanted byte)
1033 UBFROMUI(ub, UBTOUI(&BIN2BCD8[top*4]));
1034 UBFROMUI(ub+3, UBTOUI(&BIN2BCD8[mid*4]));
1035 UBFROMUI(ub+6, UBTOUI(&BIN2BCD8[rem*4]));
1036 }
1037 else { // *pa==0
1038 UBFROMUI(ub, 0); // clear 9 BCD8s
1039 UBFROMUI(ub+4, 0); // ..
1040 *(ub+8)=0; // ..
1041 }
1042 if (pa==acc) break;
1043 } // BCD conversion loop
1044
1045 num->lsd=ub+8; // complete the bcdnum ..
1046
1047 #if DECTRACE
1048 decShowNum(num, "postmult");
1049 decFloatShow(dfl, "dfl");
1050 decFloatShow(dfr, "dfr");
1051 #endif
1052 return;
1053 } // decFiniteMultiply
1054
1055 /* ------------------------------------------------------------------ */
1056 /* decFloatAbs -- absolute value, heeding NaNs, etc. */
1057 /* */
1058 /* result gets the canonicalized df with sign 0 */
1059 /* df is the decFloat to abs */
1060 /* set is the context */
1061 /* returns result */
1062 /* */
1063 /* This has the same effect as decFloatPlus unless df is negative, */
1064 /* in which case it has the same effect as decFloatMinus. The */
1065 /* effect is also the same as decFloatCopyAbs except that NaNs are */
1066 /* handled normally (the sign of a NaN is not affected, and an sNaN */
1067 /* will signal) and the result will be canonical. */
1068 /* ------------------------------------------------------------------ */
decFloatAbs(decFloat * result,const decFloat * df,decContext * set)1069 decFloat * decFloatAbs(decFloat *result, const decFloat *df,
1070 decContext *set) {
1071 if (DFISNAN(df)) return decNaNs(result, df, NULL, set);
1072 decCanonical(result, df); // copy and check
1073 DFBYTE(result, 0)&=~0x80; // zero sign bit
1074 return result;
1075 } // decFloatAbs
1076
1077 /* ------------------------------------------------------------------ */
1078 /* decFloatAdd -- add two decFloats */
1079 /* */
1080 /* result gets the result of adding dfl and dfr: */
1081 /* dfl is the first decFloat (lhs) */
1082 /* dfr is the second decFloat (rhs) */
1083 /* set is the context */
1084 /* returns result */
1085 /* */
1086 /* ------------------------------------------------------------------ */
1087 #if QUAD
1088 // Table for testing MSDs for fastpath elimination; returns the MSD of
1089 // a decDouble or decQuad (top 6 bits tested) ignoring the sign.
1090 // Infinities return -32 and NaNs return -128 so that summing the two
1091 // MSDs also allows rapid tests for the Specials (see code below).
1092 const Int DECTESTMSD[64]={
1093 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
1094 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 9, 8, 9, -32, -128,
1095 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
1096 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 9, 8, 9, -32, -128};
1097 #else
1098 // The table for testing MSDs is shared between the modules
1099 extern const Int DECTESTMSD[64];
1100 #endif
1101
decFloatAdd(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)1102 decFloat * decFloatAdd(decFloat *result,
1103 const decFloat *dfl, const decFloat *dfr,
1104 decContext *set) {
1105 bcdnum num; // for final conversion
1106 Int bexpl, bexpr; // left and right biased exponents
1107 uByte *ub, *us, *ut; // work
1108 uInt uiwork; // for macros
1109 #if QUAD
1110 uShort uswork; // ..
1111 #endif
1112
1113 uInt sourhil, sourhir; // top words from source decFloats
1114 // [valid only through end of
1115 // fastpath code -- before swap]
1116 uInt diffsign; // non-zero if signs differ
1117 uInt carry; // carry: 0 or 1 before add loop
1118 Int overlap; // coefficient overlap (if full)
1119 Int summ; // sum of the MSDs
1120 // the following buffers hold coefficients with various alignments
1121 // (see commentary and diagrams below)
1122 uByte acc[4+2+DECPMAX*3+8];
1123 uByte buf[4+2+DECPMAX*2];
1124 uByte *umsd, *ulsd; // local MSD and LSD pointers
1125
1126 #if DECLITEND
1127 #define CARRYPAT 0x01000000 // carry=1 pattern
1128 #else
1129 #define CARRYPAT 0x00000001 // carry=1 pattern
1130 #endif
1131
1132 /* Start decoding the arguments */
1133 // The initial exponents are placed into the opposite Ints to
1134 // that which might be expected; there are two sets of data to
1135 // keep track of (each decFloat and the corresponding exponent),
1136 // and this scheme means that at the swap point (after comparing
1137 // exponents) only one pair of words needs to be swapped
1138 // whichever path is taken (thereby minimising worst-case path).
1139 // The calculated exponents will be nonsense when the arguments are
1140 // Special, but are not used in that path
1141 sourhil=DFWORD(dfl, 0); // LHS top word
1142 summ=DECTESTMSD[sourhil>>26]; // get first MSD for testing
1143 bexpr=DECCOMBEXP[sourhil>>26]; // get exponent high bits (in place)
1144 bexpr+=GETECON(dfl); // .. + continuation
1145
1146 sourhir=DFWORD(dfr, 0); // RHS top word
1147 summ+=DECTESTMSD[sourhir>>26]; // sum MSDs for testing
1148 bexpl=DECCOMBEXP[sourhir>>26];
1149 bexpl+=GETECON(dfr);
1150
1151 // here bexpr has biased exponent from lhs, and vice versa
1152
1153 diffsign=(sourhil^sourhir)&DECFLOAT_Sign;
1154
1155 // now determine whether to take a fast path or the full-function
1156 // slow path. The slow path must be taken when:
1157 // -- both numbers are finite, and:
1158 // the exponents are different, or
1159 // the signs are different, or
1160 // the sum of the MSDs is >8 (hence might overflow)
1161 // specialness and the sum of the MSDs can be tested at once using
1162 // the summ value just calculated, so the test for specials is no
1163 // longer on the worst-case path (as of 3.60)
1164
1165 if (summ<=8) { // MSD+MSD is good, or there is a special
1166 if (summ<0) { // there is a special
1167 // Inf+Inf would give -64; Inf+finite is -32 or higher
1168 if (summ<-64) return decNaNs(result, dfl, dfr, set); // one or two NaNs
1169 // two infinities with different signs is invalid
1170 if (summ==-64 && diffsign) return decInvalid(result, set);
1171 if (DFISINF(dfl)) return decInfinity(result, dfl); // LHS is infinite
1172 return decInfinity(result, dfr); // RHS must be Inf
1173 }
1174 // Here when both arguments are finite; fast path is possible
1175 // (currently only for aligned and same-sign)
1176 if (bexpr==bexpl && !diffsign) {
1177 uInt tac[DECLETS+1]; // base-1000 coefficient
1178 uInt encode; // work
1179
1180 // Get one coefficient as base-1000 and add the other
1181 GETCOEFFTHOU(dfl, tac); // least-significant goes to [0]
1182 ADDCOEFFTHOU(dfr, tac);
1183 // here the sum of the MSDs (plus any carry) will be <10 due to
1184 // the fastpath test earlier
1185
1186 // construct the result; low word is the same for both formats
1187 encode =BIN2DPD[tac[0]];
1188 encode|=BIN2DPD[tac[1]]<<10;
1189 encode|=BIN2DPD[tac[2]]<<20;
1190 encode|=BIN2DPD[tac[3]]<<30;
1191 DFWORD(result, (DECBYTES/4)-1)=encode;
1192
1193 // collect next two declets (all that remains, for Double)
1194 encode =BIN2DPD[tac[3]]>>2;
1195 encode|=BIN2DPD[tac[4]]<<8;
1196
1197 #if QUAD
1198 // complete and lay out middling words
1199 encode|=BIN2DPD[tac[5]]<<18;
1200 encode|=BIN2DPD[tac[6]]<<28;
1201 DFWORD(result, 2)=encode;
1202
1203 encode =BIN2DPD[tac[6]]>>4;
1204 encode|=BIN2DPD[tac[7]]<<6;
1205 encode|=BIN2DPD[tac[8]]<<16;
1206 encode|=BIN2DPD[tac[9]]<<26;
1207 DFWORD(result, 1)=encode;
1208
1209 // and final two declets
1210 encode =BIN2DPD[tac[9]]>>6;
1211 encode|=BIN2DPD[tac[10]]<<4;
1212 #endif
1213
1214 // add exponent continuation and sign (from either argument)
1215 encode|=sourhil & (ECONMASK | DECFLOAT_Sign);
1216
1217 // create lookup index = MSD + top two bits of biased exponent <<4
1218 tac[DECLETS]|=(bexpl>>DECECONL)<<4;
1219 encode|=DECCOMBFROM[tac[DECLETS]]; // add constructed combination field
1220 DFWORD(result, 0)=encode; // complete
1221
1222 // decFloatShow(result, ">");
1223 return result;
1224 } // fast path OK
1225 // drop through to slow path
1226 } // low sum or Special(s)
1227
1228 /* Slow path required -- arguments are finite and might overflow, */
1229 /* or require alignment, or might have different signs */
1230
1231 // now swap either exponents or argument pointers
1232 if (bexpl<=bexpr) {
1233 // original left is bigger
1234 Int bexpswap=bexpl;
1235 bexpl=bexpr;
1236 bexpr=bexpswap;
1237 // printf("left bigger\n");
1238 }
1239 else {
1240 const decFloat *dfswap=dfl;
1241 dfl=dfr;
1242 dfr=dfswap;
1243 // printf("right bigger\n");
1244 }
1245 // [here dfl and bexpl refer to the datum with the larger exponent,
1246 // of if the exponents are equal then the original LHS argument]
1247
1248 // if lhs is zero then result will be the rhs (now known to have
1249 // the smaller exponent), which also may need to be tested for zero
1250 // for the weird IEEE 754 sign rules
1251 if (DFISZERO(dfl)) {
1252 decCanonical(result, dfr); // clean copy
1253 // "When the sum of two operands with opposite signs is
1254 // exactly zero, the sign of that sum shall be '+' in all
1255 // rounding modes except round toward -Infinity, in which
1256 // mode that sign shall be '-'."
1257 if (diffsign && DFISZERO(result)) {
1258 DFWORD(result, 0)&=~DECFLOAT_Sign; // assume sign 0
1259 if (set->round==DEC_ROUND_FLOOR) DFWORD(result, 0)|=DECFLOAT_Sign;
1260 }
1261 return result;
1262 } // numfl is zero
1263 // [here, LHS is non-zero; code below assumes that]
1264
1265 // Coefficients layout during the calculations to follow:
1266 //
1267 // Overlap case:
1268 // +------------------------------------------------+
1269 // acc: |0000| coeffa | tail B | |
1270 // +------------------------------------------------+
1271 // buf: |0000| pad0s | coeffb | |
1272 // +------------------------------------------------+
1273 //
1274 // Touching coefficients or gap:
1275 // +------------------------------------------------+
1276 // acc: |0000| coeffa | gap | coeffb |
1277 // +------------------------------------------------+
1278 // [buf not used or needed; gap clamped to Pmax]
1279
1280 // lay out lhs coefficient into accumulator; this starts at acc+4
1281 // for decDouble or acc+6 for decQuad so the LSD is word-
1282 // aligned; the top word gap is there only in case a carry digit
1283 // is prefixed after the add -- it does not need to be zeroed
1284 #if DOUBLE
1285 #define COFF 4 // offset into acc
1286 #elif QUAD
1287 UBFROMUS(acc+4, 0); // prefix 00
1288 #define COFF 6 // offset into acc
1289 #endif
1290
1291 GETCOEFF(dfl, acc+COFF); // decode from decFloat
1292 ulsd=acc+COFF+DECPMAX-1;
1293 umsd=acc+4; // [having this here avoids
1294
1295 #if DECTRACE
1296 {bcdnum tum;
1297 tum.msd=umsd;
1298 tum.lsd=ulsd;
1299 tum.exponent=bexpl-DECBIAS;
1300 tum.sign=DFWORD(dfl, 0) & DECFLOAT_Sign;
1301 decShowNum(&tum, "dflx");}
1302 #endif
1303
1304 // if signs differ, take ten's complement of lhs (here the
1305 // coefficient is subtracted from all-nines; the 1 is added during
1306 // the later add cycle -- zeros to the right do not matter because
1307 // the complement of zero is zero); these are fixed-length inverts
1308 // where the lsd is known to be at a 4-byte boundary (so no borrow
1309 // possible)
1310 carry=0; // assume no carry
1311 if (diffsign) {
1312 carry=CARRYPAT; // for +1 during add
1313 UBFROMUI(acc+ 4, 0x09090909-UBTOUI(acc+ 4));
1314 UBFROMUI(acc+ 8, 0x09090909-UBTOUI(acc+ 8));
1315 UBFROMUI(acc+12, 0x09090909-UBTOUI(acc+12));
1316 UBFROMUI(acc+16, 0x09090909-UBTOUI(acc+16));
1317 #if QUAD
1318 UBFROMUI(acc+20, 0x09090909-UBTOUI(acc+20));
1319 UBFROMUI(acc+24, 0x09090909-UBTOUI(acc+24));
1320 UBFROMUI(acc+28, 0x09090909-UBTOUI(acc+28));
1321 UBFROMUI(acc+32, 0x09090909-UBTOUI(acc+32));
1322 UBFROMUI(acc+36, 0x09090909-UBTOUI(acc+36));
1323 #endif
1324 } // diffsign
1325
1326 // now process the rhs coefficient; if it cannot overlap lhs then
1327 // it can be put straight into acc (with an appropriate gap, if
1328 // needed) because no actual addition will be needed (except
1329 // possibly to complete ten's complement)
1330 overlap=DECPMAX-(bexpl-bexpr);
1331 #if DECTRACE
1332 printf("exps: %ld %ld\n", (LI)(bexpl-DECBIAS), (LI)(bexpr-DECBIAS));
1333 printf("Overlap=%ld carry=%08lx\n", (LI)overlap, (LI)carry);
1334 #endif
1335
1336 if (overlap<=0) { // no overlap possible
1337 uInt gap; // local work
1338 // since a full addition is not needed, a ten's complement
1339 // calculation started above may need to be completed
1340 if (carry) {
1341 for (ub=ulsd; *ub==9; ub--) *ub=0;
1342 *ub+=1;
1343 carry=0; // taken care of
1344 }
1345 // up to DECPMAX-1 digits of the final result can extend down
1346 // below the LSD of the lhs, so if the gap is >DECPMAX then the
1347 // rhs will be simply sticky bits. In this case the gap is
1348 // clamped to DECPMAX and the exponent adjusted to suit [this is
1349 // safe because the lhs is non-zero].
1350 gap=-overlap;
1351 if (gap>DECPMAX) {
1352 bexpr+=gap-1;
1353 gap=DECPMAX;
1354 }
1355 ub=ulsd+gap+1; // where MSD will go
1356 // Fill the gap with 0s; note that there is no addition to do
1357 ut=acc+COFF+DECPMAX; // start of gap
1358 for (; ut<ub; ut+=4) UBFROMUI(ut, 0); // mind the gap
1359 if (overlap<-DECPMAX) { // gap was > DECPMAX
1360 *ub=(uByte)(!DFISZERO(dfr)); // make sticky digit
1361 }
1362 else { // need full coefficient
1363 GETCOEFF(dfr, ub); // decode from decFloat
1364 ub+=DECPMAX-1; // new LSD...
1365 }
1366 ulsd=ub; // save new LSD
1367 } // no overlap possible
1368
1369 else { // overlap>0
1370 // coefficients overlap (perhaps completely, although also
1371 // perhaps only where zeros)
1372 if (overlap==DECPMAX) { // aligned
1373 ub=buf+COFF; // where msd will go
1374 #if QUAD
1375 UBFROMUS(buf+4, 0); // clear quad's 00
1376 #endif
1377 GETCOEFF(dfr, ub); // decode from decFloat
1378 }
1379 else { // unaligned
1380 ub=buf+COFF+DECPMAX-overlap; // where MSD will go
1381 // Fill the prefix gap with 0s; 8 will cover most common
1382 // unalignments, so start with direct assignments (a loop is
1383 // then used for any remaining -- the loop (and the one in a
1384 // moment) is not then on the critical path because the number
1385 // of additions is reduced by (at least) two in this case)
1386 UBFROMUI(buf+4, 0); // [clears decQuad 00 too]
1387 UBFROMUI(buf+8, 0);
1388 if (ub>buf+12) {
1389 ut=buf+12; // start any remaining
1390 for (; ut<ub; ut+=4) UBFROMUI(ut, 0); // fill them
1391 }
1392 GETCOEFF(dfr, ub); // decode from decFloat
1393
1394 // now move tail of rhs across to main acc; again use direct
1395 // copies for 8 digits-worth
1396 UBFROMUI(acc+COFF+DECPMAX, UBTOUI(buf+COFF+DECPMAX));
1397 UBFROMUI(acc+COFF+DECPMAX+4, UBTOUI(buf+COFF+DECPMAX+4));
1398 if (buf+COFF+DECPMAX+8<ub+DECPMAX) {
1399 us=buf+COFF+DECPMAX+8; // source
1400 ut=acc+COFF+DECPMAX+8; // target
1401 for (; us<ub+DECPMAX; us+=4, ut+=4) UBFROMUI(ut, UBTOUI(us));
1402 }
1403 } // unaligned
1404
1405 ulsd=acc+(ub-buf+DECPMAX-1); // update LSD pointer
1406
1407 // Now do the add of the non-tail; this is all nicely aligned,
1408 // and is over a multiple of four digits (because for Quad two
1409 // zero digits were added on the left); words in both acc and
1410 // buf (buf especially) will often be zero
1411 // [byte-by-byte add, here, is about 15% slower total effect than
1412 // the by-fours]
1413
1414 // Now effect the add; this is harder on a little-endian
1415 // machine as the inter-digit carry cannot use the usual BCD
1416 // addition trick because the bytes are loaded in the wrong order
1417 // [this loop could be unrolled, but probably scarcely worth it]
1418
1419 ut=acc+COFF+DECPMAX-4; // target LSW (acc)
1420 us=buf+COFF+DECPMAX-4; // source LSW (buf, to add to acc)
1421
1422 #if !DECLITEND
1423 for (; ut>=acc+4; ut-=4, us-=4) { // big-endian add loop
1424 // bcd8 add
1425 carry+=UBTOUI(us); // rhs + carry
1426 if (carry==0) continue; // no-op
1427 carry+=UBTOUI(ut); // lhs
1428 // Big-endian BCD adjust (uses internal carry)
1429 carry+=0x76f6f6f6; // note top nibble not all bits
1430 // apply BCD adjust and save
1431 UBFROMUI(ut, (carry & 0x0f0f0f0f) - ((carry & 0x60606060)>>4));
1432 carry>>=31; // true carry was at far left
1433 } // add loop
1434 #else
1435 for (; ut>=acc+4; ut-=4, us-=4) { // little-endian add loop
1436 // bcd8 add
1437 carry+=UBTOUI(us); // rhs + carry
1438 if (carry==0) continue; // no-op [common if unaligned]
1439 carry+=UBTOUI(ut); // lhs
1440 // Little-endian BCD adjust; inter-digit carry must be manual
1441 // because the lsb from the array will be in the most-significant
1442 // byte of carry
1443 carry+=0x76767676; // note no inter-byte carries
1444 carry+=(carry & 0x80000000)>>15;
1445 carry+=(carry & 0x00800000)>>15;
1446 carry+=(carry & 0x00008000)>>15;
1447 carry-=(carry & 0x60606060)>>4; // BCD adjust back
1448 UBFROMUI(ut, carry & 0x0f0f0f0f); // clear debris and save
1449 // here, final carry-out bit is at 0x00000080; move it ready
1450 // for next word-add (i.e., to 0x01000000)
1451 carry=(carry & 0x00000080)<<17;
1452 } // add loop
1453 #endif
1454
1455 #if DECTRACE
1456 {bcdnum tum;
1457 printf("Add done, carry=%08lx, diffsign=%ld\n", (LI)carry, (LI)diffsign);
1458 tum.msd=umsd; // acc+4;
1459 tum.lsd=ulsd;
1460 tum.exponent=0;
1461 tum.sign=0;
1462 decShowNum(&tum, "dfadd");}
1463 #endif
1464 } // overlap possible
1465
1466 // ordering here is a little strange in order to have slowest path
1467 // first in GCC asm listing
1468 if (diffsign) { // subtraction
1469 if (!carry) { // no carry out means RHS<LHS
1470 // borrowed -- take ten's complement
1471 // sign is lhs sign
1472 num.sign=DFWORD(dfl, 0) & DECFLOAT_Sign;
1473
1474 // invert the coefficient first by fours, then add one; space
1475 // at the end of the buffer ensures the by-fours is always
1476 // safe, but lsd+1 must be cleared to prevent a borrow
1477 // if big-endian
1478 #if !DECLITEND
1479 *(ulsd+1)=0;
1480 #endif
1481 // there are always at least four coefficient words
1482 UBFROMUI(umsd, 0x09090909-UBTOUI(umsd));
1483 UBFROMUI(umsd+4, 0x09090909-UBTOUI(umsd+4));
1484 UBFROMUI(umsd+8, 0x09090909-UBTOUI(umsd+8));
1485 UBFROMUI(umsd+12, 0x09090909-UBTOUI(umsd+12));
1486 #if DOUBLE
1487 #define BNEXT 16
1488 #elif QUAD
1489 UBFROMUI(umsd+16, 0x09090909-UBTOUI(umsd+16));
1490 UBFROMUI(umsd+20, 0x09090909-UBTOUI(umsd+20));
1491 UBFROMUI(umsd+24, 0x09090909-UBTOUI(umsd+24));
1492 UBFROMUI(umsd+28, 0x09090909-UBTOUI(umsd+28));
1493 UBFROMUI(umsd+32, 0x09090909-UBTOUI(umsd+32));
1494 #define BNEXT 36
1495 #endif
1496 if (ulsd>=umsd+BNEXT) { // unaligned
1497 // eight will handle most unaligments for Double; 16 for Quad
1498 UBFROMUI(umsd+BNEXT, 0x09090909-UBTOUI(umsd+BNEXT));
1499 UBFROMUI(umsd+BNEXT+4, 0x09090909-UBTOUI(umsd+BNEXT+4));
1500 #if DOUBLE
1501 #define BNEXTY (BNEXT+8)
1502 #elif QUAD
1503 UBFROMUI(umsd+BNEXT+8, 0x09090909-UBTOUI(umsd+BNEXT+8));
1504 UBFROMUI(umsd+BNEXT+12, 0x09090909-UBTOUI(umsd+BNEXT+12));
1505 #define BNEXTY (BNEXT+16)
1506 #endif
1507 if (ulsd>=umsd+BNEXTY) { // very unaligned
1508 ut=umsd+BNEXTY; // -> continue
1509 for (;;ut+=4) {
1510 UBFROMUI(ut, 0x09090909-UBTOUI(ut)); // invert four digits
1511 if (ut>=ulsd-3) break; // all done
1512 }
1513 }
1514 }
1515 // complete the ten's complement by adding 1
1516 for (ub=ulsd; *ub==9; ub--) *ub=0;
1517 *ub+=1;
1518 } // borrowed
1519
1520 else { // carry out means RHS>=LHS
1521 num.sign=DFWORD(dfr, 0) & DECFLOAT_Sign;
1522 // all done except for the special IEEE 754 exact-zero-result
1523 // rule (see above); while testing for zero, strip leading
1524 // zeros (which will save decFinalize doing it) (this is in
1525 // diffsign path, so carry impossible and true umsd is
1526 // acc+COFF)
1527
1528 // Check the initial coefficient area using the fast macro;
1529 // this will often be all that needs to be done (as on the
1530 // worst-case path when the subtraction was aligned and
1531 // full-length)
1532 if (ISCOEFFZERO(acc+COFF)) {
1533 umsd=acc+COFF+DECPMAX-1; // so far, so zero
1534 if (ulsd>umsd) { // more to check
1535 umsd++; // to align after checked area
1536 for (; UBTOUI(umsd)==0 && umsd+3<ulsd;) umsd+=4;
1537 for (; *umsd==0 && umsd<ulsd;) umsd++;
1538 }
1539 if (*umsd==0) { // must be true zero (and diffsign)
1540 num.sign=0; // assume +
1541 if (set->round==DEC_ROUND_FLOOR) num.sign=DECFLOAT_Sign;
1542 }
1543 }
1544 // [else was not zero, might still have leading zeros]
1545 } // subtraction gave positive result
1546 } // diffsign
1547
1548 else { // same-sign addition
1549 num.sign=DFWORD(dfl, 0)&DECFLOAT_Sign;
1550 #if DOUBLE
1551 if (carry) { // only possible with decDouble
1552 *(acc+3)=1; // [Quad has leading 00]
1553 umsd=acc+3;
1554 }
1555 #endif
1556 } // same sign
1557
1558 num.msd=umsd; // set MSD ..
1559 num.lsd=ulsd; // .. and LSD
1560 num.exponent=bexpr-DECBIAS; // set exponent to smaller, unbiassed
1561
1562 #if DECTRACE
1563 decFloatShow(dfl, "dfl");
1564 decFloatShow(dfr, "dfr");
1565 decShowNum(&num, "postadd");
1566 #endif
1567 return decFinalize(result, &num, set); // round, check, and lay out
1568 } // decFloatAdd
1569
1570 /* ------------------------------------------------------------------ */
1571 /* decFloatAnd -- logical digitwise AND of two decFloats */
1572 /* */
1573 /* result gets the result of ANDing dfl and dfr */
1574 /* dfl is the first decFloat (lhs) */
1575 /* dfr is the second decFloat (rhs) */
1576 /* set is the context */
1577 /* returns result, which will be canonical with sign=0 */
1578 /* */
1579 /* The operands must be positive, finite with exponent q=0, and */
1580 /* comprise just zeros and ones; if not, Invalid operation results. */
1581 /* ------------------------------------------------------------------ */
decFloatAnd(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)1582 decFloat * decFloatAnd(decFloat *result,
1583 const decFloat *dfl, const decFloat *dfr,
1584 decContext *set) {
1585 if (!DFISUINT01(dfl) || !DFISUINT01(dfr)
1586 || !DFISCC01(dfl) || !DFISCC01(dfr)) return decInvalid(result, set);
1587 // the operands are positive finite integers (q=0) with just 0s and 1s
1588 #if DOUBLE
1589 DFWORD(result, 0)=ZEROWORD
1590 |((DFWORD(dfl, 0) & DFWORD(dfr, 0))&0x04009124);
1591 DFWORD(result, 1)=(DFWORD(dfl, 1) & DFWORD(dfr, 1))&0x49124491;
1592 #elif QUAD
1593 DFWORD(result, 0)=ZEROWORD
1594 |((DFWORD(dfl, 0) & DFWORD(dfr, 0))&0x04000912);
1595 DFWORD(result, 1)=(DFWORD(dfl, 1) & DFWORD(dfr, 1))&0x44912449;
1596 DFWORD(result, 2)=(DFWORD(dfl, 2) & DFWORD(dfr, 2))&0x12449124;
1597 DFWORD(result, 3)=(DFWORD(dfl, 3) & DFWORD(dfr, 3))&0x49124491;
1598 #endif
1599 return result;
1600 } // decFloatAnd
1601
1602 /* ------------------------------------------------------------------ */
1603 /* decFloatCanonical -- copy a decFloat, making canonical */
1604 /* */
1605 /* result gets the canonicalized df */
1606 /* df is the decFloat to copy and make canonical */
1607 /* returns result */
1608 /* */
1609 /* This works on specials, too; no error or exception is possible. */
1610 /* ------------------------------------------------------------------ */
decFloatCanonical(decFloat * result,const decFloat * df)1611 decFloat * decFloatCanonical(decFloat *result, const decFloat *df) {
1612 return decCanonical(result, df);
1613 } // decFloatCanonical
1614
1615 /* ------------------------------------------------------------------ */
1616 /* decFloatClass -- return the class of a decFloat */
1617 /* */
1618 /* df is the decFloat to test */
1619 /* returns the decClass that df falls into */
1620 /* ------------------------------------------------------------------ */
decFloatClass(const decFloat * df)1621 enum decClass decFloatClass(const decFloat *df) {
1622 Int exp; // exponent
1623 if (DFISSPECIAL(df)) {
1624 if (DFISQNAN(df)) return DEC_CLASS_QNAN;
1625 if (DFISSNAN(df)) return DEC_CLASS_SNAN;
1626 // must be an infinity
1627 if (DFISSIGNED(df)) return DEC_CLASS_NEG_INF;
1628 return DEC_CLASS_POS_INF;
1629 }
1630 if (DFISZERO(df)) { // quite common
1631 if (DFISSIGNED(df)) return DEC_CLASS_NEG_ZERO;
1632 return DEC_CLASS_POS_ZERO;
1633 }
1634 // is finite and non-zero; similar code to decFloatIsNormal, here
1635 // [this could be speeded up slightly by in-lining decFloatDigits]
1636 exp=GETEXPUN(df) // get unbiased exponent ..
1637 +decFloatDigits(df)-1; // .. and make adjusted exponent
1638 if (exp>=DECEMIN) { // is normal
1639 if (DFISSIGNED(df)) return DEC_CLASS_NEG_NORMAL;
1640 return DEC_CLASS_POS_NORMAL;
1641 }
1642 // is subnormal
1643 if (DFISSIGNED(df)) return DEC_CLASS_NEG_SUBNORMAL;
1644 return DEC_CLASS_POS_SUBNORMAL;
1645 } // decFloatClass
1646
1647 /* ------------------------------------------------------------------ */
1648 /* decFloatClassString -- return the class of a decFloat as a string */
1649 /* */
1650 /* df is the decFloat to test */
1651 /* returns a constant string describing the class df falls into */
1652 /* ------------------------------------------------------------------ */
decFloatClassString(const decFloat * df)1653 const char *decFloatClassString(const decFloat *df) {
1654 enum decClass eclass=decFloatClass(df);
1655 if (eclass==DEC_CLASS_POS_NORMAL) return DEC_ClassString_PN;
1656 if (eclass==DEC_CLASS_NEG_NORMAL) return DEC_ClassString_NN;
1657 if (eclass==DEC_CLASS_POS_ZERO) return DEC_ClassString_PZ;
1658 if (eclass==DEC_CLASS_NEG_ZERO) return DEC_ClassString_NZ;
1659 if (eclass==DEC_CLASS_POS_SUBNORMAL) return DEC_ClassString_PS;
1660 if (eclass==DEC_CLASS_NEG_SUBNORMAL) return DEC_ClassString_NS;
1661 if (eclass==DEC_CLASS_POS_INF) return DEC_ClassString_PI;
1662 if (eclass==DEC_CLASS_NEG_INF) return DEC_ClassString_NI;
1663 if (eclass==DEC_CLASS_QNAN) return DEC_ClassString_QN;
1664 if (eclass==DEC_CLASS_SNAN) return DEC_ClassString_SN;
1665 return DEC_ClassString_UN; // Unknown
1666 } // decFloatClassString
1667
1668 /* ------------------------------------------------------------------ */
1669 /* decFloatCompare -- compare two decFloats; quiet NaNs allowed */
1670 /* */
1671 /* result gets the result of comparing dfl and dfr */
1672 /* dfl is the first decFloat (lhs) */
1673 /* dfr is the second decFloat (rhs) */
1674 /* set is the context */
1675 /* returns result, which may be -1, 0, 1, or NaN (Unordered) */
1676 /* ------------------------------------------------------------------ */
decFloatCompare(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)1677 decFloat * decFloatCompare(decFloat *result,
1678 const decFloat *dfl, const decFloat *dfr,
1679 decContext *set) {
1680 Int comp; // work
1681 // NaNs are handled as usual
1682 if (DFISNAN(dfl) || DFISNAN(dfr)) return decNaNs(result, dfl, dfr, set);
1683 // numeric comparison needed
1684 comp=decNumCompare(dfl, dfr, 0);
1685 decFloatZero(result);
1686 if (comp==0) return result;
1687 DFBYTE(result, DECBYTES-1)=0x01; // LSD=1
1688 if (comp<0) DFBYTE(result, 0)|=0x80; // set sign bit
1689 return result;
1690 } // decFloatCompare
1691
1692 /* ------------------------------------------------------------------ */
1693 /* decFloatCompareSignal -- compare two decFloats; all NaNs signal */
1694 /* */
1695 /* result gets the result of comparing dfl and dfr */
1696 /* dfl is the first decFloat (lhs) */
1697 /* dfr is the second decFloat (rhs) */
1698 /* set is the context */
1699 /* returns result, which may be -1, 0, 1, or NaN (Unordered) */
1700 /* ------------------------------------------------------------------ */
decFloatCompareSignal(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)1701 decFloat * decFloatCompareSignal(decFloat *result,
1702 const decFloat *dfl, const decFloat *dfr,
1703 decContext *set) {
1704 Int comp; // work
1705 // NaNs are handled as usual, except that all NaNs signal
1706 if (DFISNAN(dfl) || DFISNAN(dfr)) {
1707 set->status|=DEC_Invalid_operation;
1708 return decNaNs(result, dfl, dfr, set);
1709 }
1710 // numeric comparison needed
1711 comp=decNumCompare(dfl, dfr, 0);
1712 decFloatZero(result);
1713 if (comp==0) return result;
1714 DFBYTE(result, DECBYTES-1)=0x01; // LSD=1
1715 if (comp<0) DFBYTE(result, 0)|=0x80; // set sign bit
1716 return result;
1717 } // decFloatCompareSignal
1718
1719 /* ------------------------------------------------------------------ */
1720 /* decFloatCompareTotal -- compare two decFloats with total ordering */
1721 /* */
1722 /* result gets the result of comparing dfl and dfr */
1723 /* dfl is the first decFloat (lhs) */
1724 /* dfr is the second decFloat (rhs) */
1725 /* returns result, which may be -1, 0, or 1 */
1726 /* ------------------------------------------------------------------ */
decFloatCompareTotal(decFloat * result,const decFloat * dfl,const decFloat * dfr)1727 decFloat * decFloatCompareTotal(decFloat *result,
1728 const decFloat *dfl, const decFloat *dfr) {
1729 Int comp; // work
1730 uInt uiwork; // for macros
1731 #if QUAD
1732 uShort uswork; // ..
1733 #endif
1734 if (DFISNAN(dfl) || DFISNAN(dfr)) {
1735 Int nanl, nanr; // work
1736 // morph NaNs to +/- 1 or 2, leave numbers as 0
1737 nanl=DFISSNAN(dfl)+DFISQNAN(dfl)*2; // quiet > signalling
1738 if (DFISSIGNED(dfl)) nanl=-nanl;
1739 nanr=DFISSNAN(dfr)+DFISQNAN(dfr)*2;
1740 if (DFISSIGNED(dfr)) nanr=-nanr;
1741 if (nanl>nanr) comp=+1;
1742 else if (nanl<nanr) comp=-1;
1743 else { // NaNs are the same type and sign .. must compare payload
1744 // buffers need +2 for QUAD
1745 uByte bufl[DECPMAX+4]; // for LHS coefficient + foot
1746 uByte bufr[DECPMAX+4]; // for RHS coefficient + foot
1747 uByte *ub, *uc; // work
1748 Int sigl; // signum of LHS
1749 sigl=(DFISSIGNED(dfl) ? -1 : +1);
1750
1751 // decode the coefficients
1752 // (shift both right two if Quad to make a multiple of four)
1753 #if QUAD
1754 UBFROMUS(bufl, 0);
1755 UBFROMUS(bufr, 0);
1756 #endif
1757 GETCOEFF(dfl, bufl+QUAD*2); // decode from decFloat
1758 GETCOEFF(dfr, bufr+QUAD*2); // ..
1759 // all multiples of four, here
1760 comp=0; // assume equal
1761 for (ub=bufl, uc=bufr; ub<bufl+DECPMAX+QUAD*2; ub+=4, uc+=4) {
1762 uInt ui=UBTOUI(ub);
1763 if (ui==UBTOUI(uc)) continue; // so far so same
1764 // about to find a winner; go by bytes in case little-endian
1765 for (;; ub++, uc++) {
1766 if (*ub==*uc) continue;
1767 if (*ub>*uc) comp=sigl; // difference found
1768 else comp=-sigl; // ..
1769 break;
1770 }
1771 }
1772 } // same NaN type and sign
1773 }
1774 else {
1775 // numeric comparison needed
1776 comp=decNumCompare(dfl, dfr, 1); // total ordering
1777 }
1778 decFloatZero(result);
1779 if (comp==0) return result;
1780 DFBYTE(result, DECBYTES-1)=0x01; // LSD=1
1781 if (comp<0) DFBYTE(result, 0)|=0x80; // set sign bit
1782 return result;
1783 } // decFloatCompareTotal
1784
1785 /* ------------------------------------------------------------------ */
1786 /* decFloatCompareTotalMag -- compare magnitudes with total ordering */
1787 /* */
1788 /* result gets the result of comparing abs(dfl) and abs(dfr) */
1789 /* dfl is the first decFloat (lhs) */
1790 /* dfr is the second decFloat (rhs) */
1791 /* returns result, which may be -1, 0, or 1 */
1792 /* ------------------------------------------------------------------ */
decFloatCompareTotalMag(decFloat * result,const decFloat * dfl,const decFloat * dfr)1793 decFloat * decFloatCompareTotalMag(decFloat *result,
1794 const decFloat *dfl, const decFloat *dfr) {
1795 decFloat a, b; // for copy if needed
1796 // copy and redirect signed operand(s)
1797 if (DFISSIGNED(dfl)) {
1798 decFloatCopyAbs(&a, dfl);
1799 dfl=&a;
1800 }
1801 if (DFISSIGNED(dfr)) {
1802 decFloatCopyAbs(&b, dfr);
1803 dfr=&b;
1804 }
1805 return decFloatCompareTotal(result, dfl, dfr);
1806 } // decFloatCompareTotalMag
1807
1808 /* ------------------------------------------------------------------ */
1809 /* decFloatCopy -- copy a decFloat as-is */
1810 /* */
1811 /* result gets the copy of dfl */
1812 /* dfl is the decFloat to copy */
1813 /* returns result */
1814 /* */
1815 /* This is a bitwise operation; no errors or exceptions are possible. */
1816 /* ------------------------------------------------------------------ */
decFloatCopy(decFloat * result,const decFloat * dfl)1817 decFloat * decFloatCopy(decFloat *result, const decFloat *dfl) {
1818 if (dfl!=result) *result=*dfl; // copy needed
1819 return result;
1820 } // decFloatCopy
1821
1822 /* ------------------------------------------------------------------ */
1823 /* decFloatCopyAbs -- copy a decFloat as-is and set sign bit to 0 */
1824 /* */
1825 /* result gets the copy of dfl with sign bit 0 */
1826 /* dfl is the decFloat to copy */
1827 /* returns result */
1828 /* */
1829 /* This is a bitwise operation; no errors or exceptions are possible. */
1830 /* ------------------------------------------------------------------ */
decFloatCopyAbs(decFloat * result,const decFloat * dfl)1831 decFloat * decFloatCopyAbs(decFloat *result, const decFloat *dfl) {
1832 if (dfl!=result) *result=*dfl; // copy needed
1833 DFBYTE(result, 0)&=~0x80; // zero sign bit
1834 return result;
1835 } // decFloatCopyAbs
1836
1837 /* ------------------------------------------------------------------ */
1838 /* decFloatCopyNegate -- copy a decFloat as-is with inverted sign bit */
1839 /* */
1840 /* result gets the copy of dfl with sign bit inverted */
1841 /* dfl is the decFloat to copy */
1842 /* returns result */
1843 /* */
1844 /* This is a bitwise operation; no errors or exceptions are possible. */
1845 /* ------------------------------------------------------------------ */
decFloatCopyNegate(decFloat * result,const decFloat * dfl)1846 decFloat * decFloatCopyNegate(decFloat *result, const decFloat *dfl) {
1847 if (dfl!=result) *result=*dfl; // copy needed
1848 DFBYTE(result, 0)^=0x80; // invert sign bit
1849 return result;
1850 } // decFloatCopyNegate
1851
1852 /* ------------------------------------------------------------------ */
1853 /* decFloatCopySign -- copy a decFloat with the sign of another */
1854 /* */
1855 /* result gets the result of copying dfl with the sign of dfr */
1856 /* dfl is the first decFloat (lhs) */
1857 /* dfr is the second decFloat (rhs) */
1858 /* returns result */
1859 /* */
1860 /* This is a bitwise operation; no errors or exceptions are possible. */
1861 /* ------------------------------------------------------------------ */
decFloatCopySign(decFloat * result,const decFloat * dfl,const decFloat * dfr)1862 decFloat * decFloatCopySign(decFloat *result,
1863 const decFloat *dfl, const decFloat *dfr) {
1864 uByte sign=(uByte)(DFBYTE(dfr, 0)&0x80); // save sign bit
1865 if (dfl!=result) *result=*dfl; // copy needed
1866 DFBYTE(result, 0)&=~0x80; // clear sign ..
1867 DFBYTE(result, 0)=(uByte)(DFBYTE(result, 0)|sign); // .. and set saved
1868 return result;
1869 } // decFloatCopySign
1870
1871 /* ------------------------------------------------------------------ */
1872 /* decFloatDigits -- return the number of digits in a decFloat */
1873 /* */
1874 /* df is the decFloat to investigate */
1875 /* returns the number of significant digits in the decFloat; a */
1876 /* zero coefficient returns 1 as does an infinity (a NaN returns */
1877 /* the number of digits in the payload) */
1878 /* ------------------------------------------------------------------ */
1879 // private macro to extract a declet according to provided formula
1880 // (form), and if it is non-zero then return the calculated digits
1881 // depending on the declet number (n), where n=0 for the most
1882 // significant declet; uses uInt dpd for work
1883 #define dpdlenchk(n, form) dpd=(form)&0x3ff; \
1884 if (dpd) return (DECPMAX-1-3*(n))-(3-DPD2BCD8[dpd*4+3])
1885 // next one is used when it is known that the declet must be
1886 // non-zero, or is the final zero declet
1887 #define dpdlendun(n, form) dpd=(form)&0x3ff; \
1888 if (dpd==0) return 1; \
1889 return (DECPMAX-1-3*(n))-(3-DPD2BCD8[dpd*4+3])
1890
decFloatDigits(const decFloat * df)1891 uInt decFloatDigits(const decFloat *df) {
1892 uInt dpd; // work
1893 uInt sourhi=DFWORD(df, 0); // top word from source decFloat
1894 #if QUAD
1895 uInt sourmh, sourml;
1896 #endif
1897 uInt sourlo;
1898
1899 if (DFISINF(df)) return 1;
1900 // A NaN effectively has an MSD of 0; otherwise if non-zero MSD
1901 // then the coefficient is full-length
1902 if (!DFISNAN(df) && DECCOMBMSD[sourhi>>26]) return DECPMAX;
1903
1904 #if DOUBLE
1905 if (sourhi&0x0003ffff) { // ends in first
1906 dpdlenchk(0, sourhi>>8);
1907 sourlo=DFWORD(df, 1);
1908 dpdlendun(1, (sourhi<<2) | (sourlo>>30));
1909 } // [cannot drop through]
1910 sourlo=DFWORD(df, 1); // sourhi not involved now
1911 if (sourlo&0xfff00000) { // in one of first two
1912 dpdlenchk(1, sourlo>>30); // very rare
1913 dpdlendun(2, sourlo>>20);
1914 } // [cannot drop through]
1915 dpdlenchk(3, sourlo>>10);
1916 dpdlendun(4, sourlo);
1917 // [cannot drop through]
1918
1919 #elif QUAD
1920 if (sourhi&0x00003fff) { // ends in first
1921 dpdlenchk(0, sourhi>>4);
1922 sourmh=DFWORD(df, 1);
1923 dpdlendun(1, ((sourhi)<<6) | (sourmh>>26));
1924 } // [cannot drop through]
1925 sourmh=DFWORD(df, 1);
1926 if (sourmh) {
1927 dpdlenchk(1, sourmh>>26);
1928 dpdlenchk(2, sourmh>>16);
1929 dpdlenchk(3, sourmh>>6);
1930 sourml=DFWORD(df, 2);
1931 dpdlendun(4, ((sourmh)<<4) | (sourml>>28));
1932 } // [cannot drop through]
1933 sourml=DFWORD(df, 2);
1934 if (sourml) {
1935 dpdlenchk(4, sourml>>28);
1936 dpdlenchk(5, sourml>>18);
1937 dpdlenchk(6, sourml>>8);
1938 sourlo=DFWORD(df, 3);
1939 dpdlendun(7, ((sourml)<<2) | (sourlo>>30));
1940 } // [cannot drop through]
1941 sourlo=DFWORD(df, 3);
1942 if (sourlo&0xfff00000) { // in one of first two
1943 dpdlenchk(7, sourlo>>30); // very rare
1944 dpdlendun(8, sourlo>>20);
1945 } // [cannot drop through]
1946 dpdlenchk(9, sourlo>>10);
1947 dpdlendun(10, sourlo);
1948 // [cannot drop through]
1949 #endif
1950 } // decFloatDigits
1951
1952 /* ------------------------------------------------------------------ */
1953 /* decFloatDivide -- divide a decFloat by another */
1954 /* */
1955 /* result gets the result of dividing dfl by dfr: */
1956 /* dfl is the first decFloat (lhs) */
1957 /* dfr is the second decFloat (rhs) */
1958 /* set is the context */
1959 /* returns result */
1960 /* */
1961 /* ------------------------------------------------------------------ */
1962 // This is just a wrapper.
decFloatDivide(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)1963 decFloat * decFloatDivide(decFloat *result,
1964 const decFloat *dfl, const decFloat *dfr,
1965 decContext *set) {
1966 return decDivide(result, dfl, dfr, set, DIVIDE);
1967 } // decFloatDivide
1968
1969 /* ------------------------------------------------------------------ */
1970 /* decFloatDivideInteger -- integer divide a decFloat by another */
1971 /* */
1972 /* result gets the result of dividing dfl by dfr: */
1973 /* dfl is the first decFloat (lhs) */
1974 /* dfr is the second decFloat (rhs) */
1975 /* set is the context */
1976 /* returns result */
1977 /* */
1978 /* ------------------------------------------------------------------ */
decFloatDivideInteger(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)1979 decFloat * decFloatDivideInteger(decFloat *result,
1980 const decFloat *dfl, const decFloat *dfr,
1981 decContext *set) {
1982 return decDivide(result, dfl, dfr, set, DIVIDEINT);
1983 } // decFloatDivideInteger
1984
1985 /* ------------------------------------------------------------------ */
1986 /* decFloatFMA -- multiply and add three decFloats, fused */
1987 /* */
1988 /* result gets the result of (dfl*dfr)+dff with a single rounding */
1989 /* dfl is the first decFloat (lhs) */
1990 /* dfr is the second decFloat (rhs) */
1991 /* dff is the final decFloat (fhs) */
1992 /* set is the context */
1993 /* returns result */
1994 /* */
1995 /* ------------------------------------------------------------------ */
decFloatFMA(decFloat * result,const decFloat * dfl,const decFloat * dfr,const decFloat * dff,decContext * set)1996 decFloat * decFloatFMA(decFloat *result, const decFloat *dfl,
1997 const decFloat *dfr, const decFloat *dff,
1998 decContext *set) {
1999
2000 // The accumulator has the bytes needed for FiniteMultiply, plus
2001 // one byte to the left in case of carry, plus DECPMAX+2 to the
2002 // right for the final addition (up to full fhs + round & sticky)
2003 #define FMALEN (ROUNDUP4(1+ (DECPMAX9*18+1) +DECPMAX+2))
2004 uByte acc[FMALEN]; // for multiplied coefficient in BCD
2005 // .. and for final result
2006 bcdnum mul; // for multiplication result
2007 bcdnum fin; // for final operand, expanded
2008 uByte coe[ROUNDUP4(DECPMAX)]; // dff coefficient in BCD
2009 bcdnum *hi, *lo; // bcdnum with higher/lower exponent
2010 uInt diffsign; // non-zero if signs differ
2011 uInt hipad; // pad digit for hi if needed
2012 Int padding; // excess exponent
2013 uInt carry; // +1 for ten's complement and during add
2014 uByte *ub, *uh, *ul; // work
2015 uInt uiwork; // for macros
2016
2017 // handle all the special values [any special operand leads to a
2018 // special result]
2019 if (DFISSPECIAL(dfl) || DFISSPECIAL(dfr) || DFISSPECIAL(dff)) {
2020 decFloat proxy; // multiplication result proxy
2021 // NaNs are handled as usual, giving priority to sNaNs
2022 if (DFISSNAN(dfl) || DFISSNAN(dfr)) return decNaNs(result, dfl, dfr, set);
2023 if (DFISSNAN(dff)) return decNaNs(result, dff, NULL, set);
2024 if (DFISNAN(dfl) || DFISNAN(dfr)) return decNaNs(result, dfl, dfr, set);
2025 if (DFISNAN(dff)) return decNaNs(result, dff, NULL, set);
2026 // One or more of the three is infinite
2027 // infinity times zero is bad
2028 decFloatZero(&proxy);
2029 if (DFISINF(dfl)) {
2030 if (DFISZERO(dfr)) return decInvalid(result, set);
2031 decInfinity(&proxy, &proxy);
2032 }
2033 else if (DFISINF(dfr)) {
2034 if (DFISZERO(dfl)) return decInvalid(result, set);
2035 decInfinity(&proxy, &proxy);
2036 }
2037 // compute sign of multiplication and place in proxy
2038 DFWORD(&proxy, 0)|=(DFWORD(dfl, 0)^DFWORD(dfr, 0))&DECFLOAT_Sign;
2039 if (!DFISINF(dff)) return decFloatCopy(result, &proxy);
2040 // dff is Infinite
2041 if (!DFISINF(&proxy)) return decInfinity(result, dff);
2042 // both sides of addition are infinite; different sign is bad
2043 if ((DFWORD(dff, 0)&DECFLOAT_Sign)!=(DFWORD(&proxy, 0)&DECFLOAT_Sign))
2044 return decInvalid(result, set);
2045 return decFloatCopy(result, &proxy);
2046 }
2047
2048 /* Here when all operands are finite */
2049
2050 // First multiply dfl*dfr
2051 decFiniteMultiply(&mul, acc+1, dfl, dfr);
2052 // The multiply is complete, exact and unbounded, and described in
2053 // mul with the coefficient held in acc[1...]
2054
2055 // now add in dff; the algorithm is essentially the same as
2056 // decFloatAdd, but the code is different because the code there
2057 // is highly optimized for adding two numbers of the same size
2058 fin.exponent=GETEXPUN(dff); // get dff exponent and sign
2059 fin.sign=DFWORD(dff, 0)&DECFLOAT_Sign;
2060 diffsign=mul.sign^fin.sign; // note if signs differ
2061 fin.msd=coe;
2062 fin.lsd=coe+DECPMAX-1;
2063 GETCOEFF(dff, coe); // extract the coefficient
2064
2065 // now set hi and lo so that hi points to whichever of mul and fin
2066 // has the higher exponent and lo points to the other [don't care,
2067 // if the same]. One coefficient will be in acc, the other in coe.
2068 if (mul.exponent>=fin.exponent) {
2069 hi=&mul;
2070 lo=&fin;
2071 }
2072 else {
2073 hi=&fin;
2074 lo=&mul;
2075 }
2076
2077 // remove leading zeros on both operands; this will save time later
2078 // and make testing for zero trivial (tests are safe because acc
2079 // and coe are rounded up to uInts)
2080 for (; UBTOUI(hi->msd)==0 && hi->msd+3<hi->lsd;) hi->msd+=4;
2081 for (; *hi->msd==0 && hi->msd<hi->lsd;) hi->msd++;
2082 for (; UBTOUI(lo->msd)==0 && lo->msd+3<lo->lsd;) lo->msd+=4;
2083 for (; *lo->msd==0 && lo->msd<lo->lsd;) lo->msd++;
2084
2085 // if hi is zero then result will be lo (which has the smaller
2086 // exponent), which also may need to be tested for zero for the
2087 // weird IEEE 754 sign rules
2088 if (*hi->msd==0) { // hi is zero
2089 // "When the sum of two operands with opposite signs is
2090 // exactly zero, the sign of that sum shall be '+' in all
2091 // rounding modes except round toward -Infinity, in which
2092 // mode that sign shall be '-'."
2093 if (diffsign) {
2094 if (*lo->msd==0) { // lo is zero
2095 lo->sign=0;
2096 if (set->round==DEC_ROUND_FLOOR) lo->sign=DECFLOAT_Sign;
2097 } // diffsign && lo=0
2098 } // diffsign
2099 return decFinalize(result, lo, set); // may need clamping
2100 } // numfl is zero
2101 // [here, both are minimal length and hi is non-zero]
2102 // (if lo is zero then padding with zeros may be needed, below)
2103
2104 // if signs differ, take the ten's complement of hi (zeros to the
2105 // right do not matter because the complement of zero is zero); the
2106 // +1 is done later, as part of the addition, inserted at the
2107 // correct digit
2108 hipad=0;
2109 carry=0;
2110 if (diffsign) {
2111 hipad=9;
2112 carry=1;
2113 // exactly the correct number of digits must be inverted
2114 for (uh=hi->msd; uh<hi->lsd-3; uh+=4) UBFROMUI(uh, 0x09090909-UBTOUI(uh));
2115 for (; uh<=hi->lsd; uh++) *uh=(uByte)(0x09-*uh);
2116 }
2117
2118 // ready to add; note that hi has no leading zeros so gap
2119 // calculation does not have to be as pessimistic as in decFloatAdd
2120 // (this is much more like the arbitrary-precision algorithm in
2121 // Rexx and decNumber)
2122
2123 // padding is the number of zeros that would need to be added to hi
2124 // for its lsd to be aligned with the lsd of lo
2125 padding=hi->exponent-lo->exponent;
2126 // printf("FMA pad %ld\n", (LI)padding);
2127
2128 // the result of the addition will be built into the accumulator,
2129 // starting from the far right; this could be either hi or lo, and
2130 // will be aligned
2131 ub=acc+FMALEN-1; // where lsd of result will go
2132 ul=lo->lsd; // lsd of rhs
2133
2134 if (padding!=0) { // unaligned
2135 // if the msd of lo is more than DECPMAX+2 digits to the right of
2136 // the original msd of hi then it can be reduced to a single
2137 // digit at the right place, as it stays clear of hi digits
2138 // [it must be DECPMAX+2 because during a subtraction the msd
2139 // could become 0 after a borrow from 1.000 to 0.9999...]
2140
2141 Int hilen=(Int)(hi->lsd-hi->msd+1); // length of hi
2142 Int lolen=(Int)(lo->lsd-lo->msd+1); // and of lo
2143
2144 if (hilen+padding-lolen > DECPMAX+2) { // can reduce lo to single
2145 // make sure it is virtually at least DECPMAX from hi->msd, at
2146 // least to right of hi->lsd (in case of destructive subtract),
2147 // and separated by at least two digits from either of those
2148 // (the tricky DOUBLE case is when hi is a 1 that will become a
2149 // 0.9999... by subtraction:
2150 // hi: 1 E+16
2151 // lo: .................1000000000000000 E-16
2152 // which for the addition pads to:
2153 // hi: 1000000000000000000 E-16
2154 // lo: .................1000000000000000 E-16
2155 Int newexp=MINI(hi->exponent, hi->exponent+hilen-DECPMAX)-3;
2156
2157 // printf("FMA reduce: %ld\n", (LI)reduce);
2158 lo->lsd=lo->msd; // to single digit [maybe 0]
2159 lo->exponent=newexp; // new lowest exponent
2160 padding=hi->exponent-lo->exponent; // recalculate
2161 ul=lo->lsd; // .. and repoint
2162 }
2163
2164 // padding is still > 0, but will fit in acc (less leading carry slot)
2165 #if DECCHECK
2166 if (padding<=0) printf("FMA low padding: %ld\n", (LI)padding);
2167 if (hilen+padding+1>FMALEN)
2168 printf("FMA excess hilen+padding: %ld+%ld \n", (LI)hilen, (LI)padding);
2169 // printf("FMA padding: %ld\n", (LI)padding);
2170 #endif
2171
2172 // padding digits can now be set in the result; one or more of
2173 // these will come from lo; others will be zeros in the gap
2174 for (; ul-3>=lo->msd && padding>3; padding-=4, ul-=4, ub-=4) {
2175 UBFROMUI(ub-3, UBTOUI(ul-3)); // [cannot overlap]
2176 }
2177 for (; ul>=lo->msd && padding>0; padding--, ul--, ub--) *ub=*ul;
2178 for (;padding>0; padding--, ub--) *ub=0; // mind the gap
2179 }
2180
2181 // addition now complete to the right of the rightmost digit of hi
2182 uh=hi->lsd;
2183
2184 // dow do the add from hi->lsd to the left
2185 // [bytewise, because either operand can run out at any time]
2186 // carry was set up depending on ten's complement above
2187 // first assume both operands have some digits
2188 for (;; ub--) {
2189 if (uh<hi->msd || ul<lo->msd) break;
2190 *ub=(uByte)(carry+(*uh--)+(*ul--));
2191 carry=0;
2192 if (*ub<10) continue;
2193 *ub-=10;
2194 carry=1;
2195 } // both loop
2196
2197 if (ul<lo->msd) { // to left of lo
2198 for (;; ub--) {
2199 if (uh<hi->msd) break;
2200 *ub=(uByte)(carry+(*uh--)); // [+0]
2201 carry=0;
2202 if (*ub<10) continue;
2203 *ub-=10;
2204 carry=1;
2205 } // hi loop
2206 }
2207 else { // to left of hi
2208 for (;; ub--) {
2209 if (ul<lo->msd) break;
2210 *ub=(uByte)(carry+hipad+(*ul--));
2211 carry=0;
2212 if (*ub<10) continue;
2213 *ub-=10;
2214 carry=1;
2215 } // lo loop
2216 }
2217
2218 // addition complete -- now handle carry, borrow, etc.
2219 // use lo to set up the num (its exponent is already correct, and
2220 // sign usually is)
2221 lo->msd=ub+1;
2222 lo->lsd=acc+FMALEN-1;
2223 // decShowNum(lo, "lo");
2224 if (!diffsign) { // same-sign addition
2225 if (carry) { // carry out
2226 *ub=1; // place the 1 ..
2227 lo->msd--; // .. and update
2228 }
2229 } // same sign
2230 else { // signs differed (subtraction)
2231 if (!carry) { // no carry out means hi<lo
2232 // borrowed -- take ten's complement of the right digits
2233 lo->sign=hi->sign; // sign is lhs sign
2234 for (ul=lo->msd; ul<lo->lsd-3; ul+=4) UBFROMUI(ul, 0x09090909-UBTOUI(ul));
2235 for (; ul<=lo->lsd; ul++) *ul=(uByte)(0x09-*ul); // [leaves ul at lsd+1]
2236 // complete the ten's complement by adding 1 [cannot overrun]
2237 for (ul--; *ul==9; ul--) *ul=0;
2238 *ul+=1;
2239 } // borrowed
2240 else { // carry out means hi>=lo
2241 // sign to use is lo->sign
2242 // all done except for the special IEEE 754 exact-zero-result
2243 // rule (see above); while testing for zero, strip leading
2244 // zeros (which will save decFinalize doing it)
2245 for (; UBTOUI(lo->msd)==0 && lo->msd+3<lo->lsd;) lo->msd+=4;
2246 for (; *lo->msd==0 && lo->msd<lo->lsd;) lo->msd++;
2247 if (*lo->msd==0) { // must be true zero (and diffsign)
2248 lo->sign=0; // assume +
2249 if (set->round==DEC_ROUND_FLOOR) lo->sign=DECFLOAT_Sign;
2250 }
2251 // [else was not zero, might still have leading zeros]
2252 } // subtraction gave positive result
2253 } // diffsign
2254
2255 #if DECCHECK
2256 // assert no left underrun
2257 if (lo->msd<acc) {
2258 printf("FMA underrun by %ld \n", (LI)(acc-lo->msd));
2259 }
2260 #endif
2261
2262 return decFinalize(result, lo, set); // round, check, and lay out
2263 } // decFloatFMA
2264
2265 /* ------------------------------------------------------------------ */
2266 /* decFloatFromInt -- initialise a decFloat from an Int */
2267 /* */
2268 /* result gets the converted Int */
2269 /* n is the Int to convert */
2270 /* returns result */
2271 /* */
2272 /* The result is Exact; no errors or exceptions are possible. */
2273 /* ------------------------------------------------------------------ */
decFloatFromInt32(decFloat * result,Int n)2274 decFloat * decFloatFromInt32(decFloat *result, Int n) {
2275 uInt u=(uInt)n; // copy as bits
2276 uInt encode; // work
2277 DFWORD(result, 0)=ZEROWORD; // always
2278 #if QUAD
2279 DFWORD(result, 1)=0;
2280 DFWORD(result, 2)=0;
2281 #endif
2282 if (n<0) { // handle -n with care
2283 // [This can be done without the test, but is then slightly slower]
2284 u=(~u)+1;
2285 DFWORD(result, 0)|=DECFLOAT_Sign;
2286 }
2287 // Since the maximum value of u now is 2**31, only the low word of
2288 // result is affected
2289 encode=BIN2DPD[u%1000];
2290 u/=1000;
2291 encode|=BIN2DPD[u%1000]<<10;
2292 u/=1000;
2293 encode|=BIN2DPD[u%1000]<<20;
2294 u/=1000; // now 0, 1, or 2
2295 encode|=u<<30;
2296 DFWORD(result, DECWORDS-1)=encode;
2297 return result;
2298 } // decFloatFromInt32
2299
2300 /* ------------------------------------------------------------------ */
2301 /* decFloatFromUInt -- initialise a decFloat from a uInt */
2302 /* */
2303 /* result gets the converted uInt */
2304 /* n is the uInt to convert */
2305 /* returns result */
2306 /* */
2307 /* The result is Exact; no errors or exceptions are possible. */
2308 /* ------------------------------------------------------------------ */
decFloatFromUInt32(decFloat * result,uInt u)2309 decFloat * decFloatFromUInt32(decFloat *result, uInt u) {
2310 uInt encode; // work
2311 DFWORD(result, 0)=ZEROWORD; // always
2312 #if QUAD
2313 DFWORD(result, 1)=0;
2314 DFWORD(result, 2)=0;
2315 #endif
2316 encode=BIN2DPD[u%1000];
2317 u/=1000;
2318 encode|=BIN2DPD[u%1000]<<10;
2319 u/=1000;
2320 encode|=BIN2DPD[u%1000]<<20;
2321 u/=1000; // now 0 -> 4
2322 encode|=u<<30;
2323 DFWORD(result, DECWORDS-1)=encode;
2324 DFWORD(result, DECWORDS-2)|=u>>2; // rarely non-zero
2325 return result;
2326 } // decFloatFromUInt32
2327
2328 /* ------------------------------------------------------------------ */
2329 /* decFloatInvert -- logical digitwise INVERT of a decFloat */
2330 /* */
2331 /* result gets the result of INVERTing df */
2332 /* df is the decFloat to invert */
2333 /* set is the context */
2334 /* returns result, which will be canonical with sign=0 */
2335 /* */
2336 /* The operand must be positive, finite with exponent q=0, and */
2337 /* comprise just zeros and ones; if not, Invalid operation results. */
2338 /* ------------------------------------------------------------------ */
decFloatInvert(decFloat * result,const decFloat * df,decContext * set)2339 decFloat * decFloatInvert(decFloat *result, const decFloat *df,
2340 decContext *set) {
2341 uInt sourhi=DFWORD(df, 0); // top word of dfs
2342
2343 if (!DFISUINT01(df) || !DFISCC01(df)) return decInvalid(result, set);
2344 // the operand is a finite integer (q=0)
2345 #if DOUBLE
2346 DFWORD(result, 0)=ZEROWORD|((~sourhi)&0x04009124);
2347 DFWORD(result, 1)=(~DFWORD(df, 1)) &0x49124491;
2348 #elif QUAD
2349 DFWORD(result, 0)=ZEROWORD|((~sourhi)&0x04000912);
2350 DFWORD(result, 1)=(~DFWORD(df, 1)) &0x44912449;
2351 DFWORD(result, 2)=(~DFWORD(df, 2)) &0x12449124;
2352 DFWORD(result, 3)=(~DFWORD(df, 3)) &0x49124491;
2353 #endif
2354 return result;
2355 } // decFloatInvert
2356
2357 /* ------------------------------------------------------------------ */
2358 /* decFloatIs -- decFloat tests (IsSigned, etc.) */
2359 /* */
2360 /* df is the decFloat to test */
2361 /* returns 0 or 1 in a uInt */
2362 /* */
2363 /* Many of these could be macros, but having them as real functions */
2364 /* is a little cleaner (and they can be referred to here by the */
2365 /* generic names) */
2366 /* ------------------------------------------------------------------ */
decFloatIsCanonical(const decFloat * df)2367 uInt decFloatIsCanonical(const decFloat *df) {
2368 if (DFISSPECIAL(df)) {
2369 if (DFISINF(df)) {
2370 if (DFWORD(df, 0)&ECONMASK) return 0; // exponent continuation
2371 if (!DFISCCZERO(df)) return 0; // coefficient continuation
2372 return 1;
2373 }
2374 // is a NaN
2375 if (DFWORD(df, 0)&ECONNANMASK) return 0; // exponent continuation
2376 if (DFISCCZERO(df)) return 1; // coefficient continuation
2377 // drop through to check payload
2378 }
2379 { // declare block
2380 #if DOUBLE
2381 uInt sourhi=DFWORD(df, 0);
2382 uInt sourlo=DFWORD(df, 1);
2383 if (CANONDPDOFF(sourhi, 8)
2384 && CANONDPDTWO(sourhi, sourlo, 30)
2385 && CANONDPDOFF(sourlo, 20)
2386 && CANONDPDOFF(sourlo, 10)
2387 && CANONDPDOFF(sourlo, 0)) return 1;
2388 #elif QUAD
2389 uInt sourhi=DFWORD(df, 0);
2390 uInt sourmh=DFWORD(df, 1);
2391 uInt sourml=DFWORD(df, 2);
2392 uInt sourlo=DFWORD(df, 3);
2393 if (CANONDPDOFF(sourhi, 4)
2394 && CANONDPDTWO(sourhi, sourmh, 26)
2395 && CANONDPDOFF(sourmh, 16)
2396 && CANONDPDOFF(sourmh, 6)
2397 && CANONDPDTWO(sourmh, sourml, 28)
2398 && CANONDPDOFF(sourml, 18)
2399 && CANONDPDOFF(sourml, 8)
2400 && CANONDPDTWO(sourml, sourlo, 30)
2401 && CANONDPDOFF(sourlo, 20)
2402 && CANONDPDOFF(sourlo, 10)
2403 && CANONDPDOFF(sourlo, 0)) return 1;
2404 #endif
2405 } // block
2406 return 0; // a declet is non-canonical
2407 }
2408
decFloatIsFinite(const decFloat * df)2409 uInt decFloatIsFinite(const decFloat *df) {
2410 return !DFISSPECIAL(df);
2411 }
decFloatIsInfinite(const decFloat * df)2412 uInt decFloatIsInfinite(const decFloat *df) {
2413 return DFISINF(df);
2414 }
decFloatIsInteger(const decFloat * df)2415 uInt decFloatIsInteger(const decFloat *df) {
2416 return DFISINT(df);
2417 }
decFloatIsLogical(const decFloat * df)2418 uInt decFloatIsLogical(const decFloat *df) {
2419 return DFISUINT01(df) & DFISCC01(df);
2420 }
decFloatIsNaN(const decFloat * df)2421 uInt decFloatIsNaN(const decFloat *df) {
2422 return DFISNAN(df);
2423 }
decFloatIsNegative(const decFloat * df)2424 uInt decFloatIsNegative(const decFloat *df) {
2425 return DFISSIGNED(df) && !DFISZERO(df) && !DFISNAN(df);
2426 }
decFloatIsNormal(const decFloat * df)2427 uInt decFloatIsNormal(const decFloat *df) {
2428 Int exp; // exponent
2429 if (DFISSPECIAL(df)) return 0;
2430 if (DFISZERO(df)) return 0;
2431 // is finite and non-zero
2432 exp=GETEXPUN(df) // get unbiased exponent ..
2433 +decFloatDigits(df)-1; // .. and make adjusted exponent
2434 return (exp>=DECEMIN); // < DECEMIN is subnormal
2435 }
decFloatIsPositive(const decFloat * df)2436 uInt decFloatIsPositive(const decFloat *df) {
2437 return !DFISSIGNED(df) && !DFISZERO(df) && !DFISNAN(df);
2438 }
decFloatIsSignaling(const decFloat * df)2439 uInt decFloatIsSignaling(const decFloat *df) {
2440 return DFISSNAN(df);
2441 }
decFloatIsSignalling(const decFloat * df)2442 uInt decFloatIsSignalling(const decFloat *df) {
2443 return DFISSNAN(df);
2444 }
decFloatIsSigned(const decFloat * df)2445 uInt decFloatIsSigned(const decFloat *df) {
2446 return DFISSIGNED(df)!=0;
2447 }
decFloatIsSubnormal(const decFloat * df)2448 uInt decFloatIsSubnormal(const decFloat *df) {
2449 if (DFISSPECIAL(df)) return 0;
2450 // is finite
2451 if (decFloatIsNormal(df)) return 0;
2452 // it is <Nmin, but could be zero
2453 if (DFISZERO(df)) return 0;
2454 return 1; // is subnormal
2455 }
decFloatIsZero(const decFloat * df)2456 uInt decFloatIsZero(const decFloat *df) {
2457 return DFISZERO(df);
2458 } // decFloatIs...
2459
2460 /* ------------------------------------------------------------------ */
2461 /* decFloatLogB -- return adjusted exponent, by 754 rules */
2462 /* */
2463 /* result gets the adjusted exponent as an integer, or a NaN etc. */
2464 /* df is the decFloat to be examined */
2465 /* set is the context */
2466 /* returns result */
2467 /* */
2468 /* Notable cases: */
2469 /* A<0 -> Use |A| */
2470 /* A=0 -> -Infinity (Division by zero) */
2471 /* A=Infinite -> +Infinity (Exact) */
2472 /* A=1 exactly -> 0 (Exact) */
2473 /* NaNs are propagated as usual */
2474 /* ------------------------------------------------------------------ */
decFloatLogB(decFloat * result,const decFloat * df,decContext * set)2475 decFloat * decFloatLogB(decFloat *result, const decFloat *df,
2476 decContext *set) {
2477 Int ae; // adjusted exponent
2478 if (DFISNAN(df)) return decNaNs(result, df, NULL, set);
2479 if (DFISINF(df)) {
2480 DFWORD(result, 0)=0; // need +ve
2481 return decInfinity(result, result); // canonical +Infinity
2482 }
2483 if (DFISZERO(df)) {
2484 set->status|=DEC_Division_by_zero; // as per 754
2485 DFWORD(result, 0)=DECFLOAT_Sign; // make negative
2486 return decInfinity(result, result); // canonical -Infinity
2487 }
2488 ae=GETEXPUN(df) // get unbiased exponent ..
2489 +decFloatDigits(df)-1; // .. and make adjusted exponent
2490 // ae has limited range (3 digits for DOUBLE and 4 for QUAD), so
2491 // it is worth using a special case of decFloatFromInt32
2492 DFWORD(result, 0)=ZEROWORD; // always
2493 if (ae<0) {
2494 DFWORD(result, 0)|=DECFLOAT_Sign; // -0 so far
2495 ae=-ae;
2496 }
2497 #if DOUBLE
2498 DFWORD(result, 1)=BIN2DPD[ae]; // a single declet
2499 #elif QUAD
2500 DFWORD(result, 1)=0;
2501 DFWORD(result, 2)=0;
2502 DFWORD(result, 3)=(ae/1000)<<10; // is <10, so need no DPD encode
2503 DFWORD(result, 3)|=BIN2DPD[ae%1000];
2504 #endif
2505 return result;
2506 } // decFloatLogB
2507
2508 /* ------------------------------------------------------------------ */
2509 /* decFloatMax -- return maxnum of two operands */
2510 /* */
2511 /* result gets the chosen decFloat */
2512 /* dfl is the first decFloat (lhs) */
2513 /* dfr is the second decFloat (rhs) */
2514 /* set is the context */
2515 /* returns result */
2516 /* */
2517 /* If just one operand is a quiet NaN it is ignored. */
2518 /* ------------------------------------------------------------------ */
decFloatMax(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)2519 decFloat * decFloatMax(decFloat *result,
2520 const decFloat *dfl, const decFloat *dfr,
2521 decContext *set) {
2522 Int comp;
2523 if (DFISNAN(dfl)) {
2524 // sNaN or both NaNs leads to normal NaN processing
2525 if (DFISNAN(dfr) || DFISSNAN(dfl)) return decNaNs(result, dfl, dfr, set);
2526 return decCanonical(result, dfr); // RHS is numeric
2527 }
2528 if (DFISNAN(dfr)) {
2529 // sNaN leads to normal NaN processing (both NaN handled above)
2530 if (DFISSNAN(dfr)) return decNaNs(result, dfl, dfr, set);
2531 return decCanonical(result, dfl); // LHS is numeric
2532 }
2533 // Both operands are numeric; numeric comparison needed -- use
2534 // total order for a well-defined choice (and +0 > -0)
2535 comp=decNumCompare(dfl, dfr, 1);
2536 if (comp>=0) return decCanonical(result, dfl);
2537 return decCanonical(result, dfr);
2538 } // decFloatMax
2539
2540 /* ------------------------------------------------------------------ */
2541 /* decFloatMaxMag -- return maxnummag of two operands */
2542 /* */
2543 /* result gets the chosen decFloat */
2544 /* dfl is the first decFloat (lhs) */
2545 /* dfr is the second decFloat (rhs) */
2546 /* set is the context */
2547 /* returns result */
2548 /* */
2549 /* Returns according to the magnitude comparisons if both numeric and */
2550 /* unequal, otherwise returns maxnum */
2551 /* ------------------------------------------------------------------ */
decFloatMaxMag(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)2552 decFloat * decFloatMaxMag(decFloat *result,
2553 const decFloat *dfl, const decFloat *dfr,
2554 decContext *set) {
2555 Int comp;
2556 decFloat absl, absr;
2557 if (DFISNAN(dfl) || DFISNAN(dfr)) return decFloatMax(result, dfl, dfr, set);
2558
2559 decFloatCopyAbs(&absl, dfl);
2560 decFloatCopyAbs(&absr, dfr);
2561 comp=decNumCompare(&absl, &absr, 0);
2562 if (comp>0) return decCanonical(result, dfl);
2563 if (comp<0) return decCanonical(result, dfr);
2564 return decFloatMax(result, dfl, dfr, set);
2565 } // decFloatMaxMag
2566
2567 /* ------------------------------------------------------------------ */
2568 /* decFloatMin -- return minnum of two operands */
2569 /* */
2570 /* result gets the chosen decFloat */
2571 /* dfl is the first decFloat (lhs) */
2572 /* dfr is the second decFloat (rhs) */
2573 /* set is the context */
2574 /* returns result */
2575 /* */
2576 /* If just one operand is a quiet NaN it is ignored. */
2577 /* ------------------------------------------------------------------ */
decFloatMin(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)2578 decFloat * decFloatMin(decFloat *result,
2579 const decFloat *dfl, const decFloat *dfr,
2580 decContext *set) {
2581 Int comp;
2582 if (DFISNAN(dfl)) {
2583 // sNaN or both NaNs leads to normal NaN processing
2584 if (DFISNAN(dfr) || DFISSNAN(dfl)) return decNaNs(result, dfl, dfr, set);
2585 return decCanonical(result, dfr); // RHS is numeric
2586 }
2587 if (DFISNAN(dfr)) {
2588 // sNaN leads to normal NaN processing (both NaN handled above)
2589 if (DFISSNAN(dfr)) return decNaNs(result, dfl, dfr, set);
2590 return decCanonical(result, dfl); // LHS is numeric
2591 }
2592 // Both operands are numeric; numeric comparison needed -- use
2593 // total order for a well-defined choice (and +0 > -0)
2594 comp=decNumCompare(dfl, dfr, 1);
2595 if (comp<=0) return decCanonical(result, dfl);
2596 return decCanonical(result, dfr);
2597 } // decFloatMin
2598
2599 /* ------------------------------------------------------------------ */
2600 /* decFloatMinMag -- return minnummag of two operands */
2601 /* */
2602 /* result gets the chosen decFloat */
2603 /* dfl is the first decFloat (lhs) */
2604 /* dfr is the second decFloat (rhs) */
2605 /* set is the context */
2606 /* returns result */
2607 /* */
2608 /* Returns according to the magnitude comparisons if both numeric and */
2609 /* unequal, otherwise returns minnum */
2610 /* ------------------------------------------------------------------ */
decFloatMinMag(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)2611 decFloat * decFloatMinMag(decFloat *result,
2612 const decFloat *dfl, const decFloat *dfr,
2613 decContext *set) {
2614 Int comp;
2615 decFloat absl, absr;
2616 if (DFISNAN(dfl) || DFISNAN(dfr)) return decFloatMin(result, dfl, dfr, set);
2617
2618 decFloatCopyAbs(&absl, dfl);
2619 decFloatCopyAbs(&absr, dfr);
2620 comp=decNumCompare(&absl, &absr, 0);
2621 if (comp<0) return decCanonical(result, dfl);
2622 if (comp>0) return decCanonical(result, dfr);
2623 return decFloatMin(result, dfl, dfr, set);
2624 } // decFloatMinMag
2625
2626 /* ------------------------------------------------------------------ */
2627 /* decFloatMinus -- negate value, heeding NaNs, etc. */
2628 /* */
2629 /* result gets the canonicalized 0-df */
2630 /* df is the decFloat to minus */
2631 /* set is the context */
2632 /* returns result */
2633 /* */
2634 /* This has the same effect as 0-df where the exponent of the zero is */
2635 /* the same as that of df (if df is finite). */
2636 /* The effect is also the same as decFloatCopyNegate except that NaNs */
2637 /* are handled normally (the sign of a NaN is not affected, and an */
2638 /* sNaN will signal), the result is canonical, and zero gets sign 0. */
2639 /* ------------------------------------------------------------------ */
decFloatMinus(decFloat * result,const decFloat * df,decContext * set)2640 decFloat * decFloatMinus(decFloat *result, const decFloat *df,
2641 decContext *set) {
2642 if (DFISNAN(df)) return decNaNs(result, df, NULL, set);
2643 decCanonical(result, df); // copy and check
2644 if (DFISZERO(df)) DFBYTE(result, 0)&=~0x80; // turn off sign bit
2645 else DFBYTE(result, 0)^=0x80; // flip sign bit
2646 return result;
2647 } // decFloatMinus
2648
2649 /* ------------------------------------------------------------------ */
2650 /* decFloatMultiply -- multiply two decFloats */
2651 /* */
2652 /* result gets the result of multiplying dfl and dfr: */
2653 /* dfl is the first decFloat (lhs) */
2654 /* dfr is the second decFloat (rhs) */
2655 /* set is the context */
2656 /* returns result */
2657 /* */
2658 /* ------------------------------------------------------------------ */
decFloatMultiply(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)2659 decFloat * decFloatMultiply(decFloat *result,
2660 const decFloat *dfl, const decFloat *dfr,
2661 decContext *set) {
2662 bcdnum num; // for final conversion
2663 uByte bcdacc[DECPMAX9*18+1]; // for coefficent in BCD
2664
2665 if (DFISSPECIAL(dfl) || DFISSPECIAL(dfr)) { // either is special?
2666 // NaNs are handled as usual
2667 if (DFISNAN(dfl) || DFISNAN(dfr)) return decNaNs(result, dfl, dfr, set);
2668 // infinity times zero is bad
2669 if (DFISINF(dfl) && DFISZERO(dfr)) return decInvalid(result, set);
2670 if (DFISINF(dfr) && DFISZERO(dfl)) return decInvalid(result, set);
2671 // both infinite; return canonical infinity with computed sign
2672 DFWORD(result, 0)=DFWORD(dfl, 0)^DFWORD(dfr, 0); // compute sign
2673 return decInfinity(result, result);
2674 }
2675
2676 /* Here when both operands are finite */
2677 decFiniteMultiply(&num, bcdacc, dfl, dfr);
2678 return decFinalize(result, &num, set); // round, check, and lay out
2679 } // decFloatMultiply
2680
2681 /* ------------------------------------------------------------------ */
2682 /* decFloatNextMinus -- next towards -Infinity */
2683 /* */
2684 /* result gets the next lesser decFloat */
2685 /* dfl is the decFloat to start with */
2686 /* set is the context */
2687 /* returns result */
2688 /* */
2689 /* This is 754 nextdown; Invalid is the only status possible (from */
2690 /* an sNaN). */
2691 /* ------------------------------------------------------------------ */
decFloatNextMinus(decFloat * result,const decFloat * dfl,decContext * set)2692 decFloat * decFloatNextMinus(decFloat *result, const decFloat *dfl,
2693 decContext *set) {
2694 decFloat delta; // tiny increment
2695 uInt savestat; // saves status
2696 enum rounding saveround; // .. and mode
2697
2698 // +Infinity is the special case
2699 if (DFISINF(dfl) && !DFISSIGNED(dfl)) {
2700 DFSETNMAX(result);
2701 return result; // [no status to set]
2702 }
2703 // other cases are effected by sutracting a tiny delta -- this
2704 // should be done in a wider format as the delta is unrepresentable
2705 // here (but can be done with normal add if the sign of zero is
2706 // treated carefully, because no Inexactitude is interesting);
2707 // rounding to -Infinity then pushes the result to next below
2708 decFloatZero(&delta); // set up tiny delta
2709 DFWORD(&delta, DECWORDS-1)=1; // coefficient=1
2710 DFWORD(&delta, 0)=DECFLOAT_Sign; // Sign=1 + biased exponent=0
2711 // set up for the directional round
2712 saveround=set->round; // save mode
2713 set->round=DEC_ROUND_FLOOR; // .. round towards -Infinity
2714 savestat=set->status; // save status
2715 decFloatAdd(result, dfl, &delta, set);
2716 // Add rules mess up the sign when going from +Ntiny to 0
2717 if (DFISZERO(result)) DFWORD(result, 0)^=DECFLOAT_Sign; // correct
2718 set->status&=DEC_Invalid_operation; // preserve only sNaN status
2719 set->status|=savestat; // restore pending flags
2720 set->round=saveround; // .. and mode
2721 return result;
2722 } // decFloatNextMinus
2723
2724 /* ------------------------------------------------------------------ */
2725 /* decFloatNextPlus -- next towards +Infinity */
2726 /* */
2727 /* result gets the next larger decFloat */
2728 /* dfl is the decFloat to start with */
2729 /* set is the context */
2730 /* returns result */
2731 /* */
2732 /* This is 754 nextup; Invalid is the only status possible (from */
2733 /* an sNaN). */
2734 /* ------------------------------------------------------------------ */
decFloatNextPlus(decFloat * result,const decFloat * dfl,decContext * set)2735 decFloat * decFloatNextPlus(decFloat *result, const decFloat *dfl,
2736 decContext *set) {
2737 uInt savestat; // saves status
2738 enum rounding saveround; // .. and mode
2739 decFloat delta; // tiny increment
2740
2741 // -Infinity is the special case
2742 if (DFISINF(dfl) && DFISSIGNED(dfl)) {
2743 DFSETNMAX(result);
2744 DFWORD(result, 0)|=DECFLOAT_Sign; // make negative
2745 return result; // [no status to set]
2746 }
2747 // other cases are effected by sutracting a tiny delta -- this
2748 // should be done in a wider format as the delta is unrepresentable
2749 // here (but can be done with normal add if the sign of zero is
2750 // treated carefully, because no Inexactitude is interesting);
2751 // rounding to +Infinity then pushes the result to next above
2752 decFloatZero(&delta); // set up tiny delta
2753 DFWORD(&delta, DECWORDS-1)=1; // coefficient=1
2754 DFWORD(&delta, 0)=0; // Sign=0 + biased exponent=0
2755 // set up for the directional round
2756 saveround=set->round; // save mode
2757 set->round=DEC_ROUND_CEILING; // .. round towards +Infinity
2758 savestat=set->status; // save status
2759 decFloatAdd(result, dfl, &delta, set);
2760 // Add rules mess up the sign when going from -Ntiny to -0
2761 if (DFISZERO(result)) DFWORD(result, 0)^=DECFLOAT_Sign; // correct
2762 set->status&=DEC_Invalid_operation; // preserve only sNaN status
2763 set->status|=savestat; // restore pending flags
2764 set->round=saveround; // .. and mode
2765 return result;
2766 } // decFloatNextPlus
2767
2768 /* ------------------------------------------------------------------ */
2769 /* decFloatNextToward -- next towards a decFloat */
2770 /* */
2771 /* result gets the next decFloat */
2772 /* dfl is the decFloat to start with */
2773 /* dfr is the decFloat to move toward */
2774 /* set is the context */
2775 /* returns result */
2776 /* */
2777 /* This is 754-1985 nextafter, as modified during revision (dropped */
2778 /* from 754-2008); status may be set unless the result is a normal */
2779 /* number. */
2780 /* ------------------------------------------------------------------ */
decFloatNextToward(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)2781 decFloat * decFloatNextToward(decFloat *result,
2782 const decFloat *dfl, const decFloat *dfr,
2783 decContext *set) {
2784 decFloat delta; // tiny increment or decrement
2785 decFloat pointone; // 1e-1
2786 uInt savestat; // saves status
2787 enum rounding saveround; // .. and mode
2788 uInt deltatop; // top word for delta
2789 Int comp; // work
2790
2791 if (DFISNAN(dfl) || DFISNAN(dfr)) return decNaNs(result, dfl, dfr, set);
2792 // Both are numeric, so Invalid no longer a possibility
2793 comp=decNumCompare(dfl, dfr, 0);
2794 if (comp==0) return decFloatCopySign(result, dfl, dfr); // equal
2795 // unequal; do NextPlus or NextMinus but with different status rules
2796
2797 if (comp<0) { // lhs<rhs, do NextPlus, see above for commentary
2798 if (DFISINF(dfl) && DFISSIGNED(dfl)) { // -Infinity special case
2799 DFSETNMAX(result);
2800 DFWORD(result, 0)|=DECFLOAT_Sign;
2801 return result;
2802 }
2803 saveround=set->round; // save mode
2804 set->round=DEC_ROUND_CEILING; // .. round towards +Infinity
2805 deltatop=0; // positive delta
2806 }
2807 else { // lhs>rhs, do NextMinus, see above for commentary
2808 if (DFISINF(dfl) && !DFISSIGNED(dfl)) { // +Infinity special case
2809 DFSETNMAX(result);
2810 return result;
2811 }
2812 saveround=set->round; // save mode
2813 set->round=DEC_ROUND_FLOOR; // .. round towards -Infinity
2814 deltatop=DECFLOAT_Sign; // negative delta
2815 }
2816 savestat=set->status; // save status
2817 // Here, Inexact is needed where appropriate (and hence Underflow,
2818 // etc.). Therefore the tiny delta which is otherwise
2819 // unrepresentable (see NextPlus and NextMinus) is constructed
2820 // using the multiplication of FMA.
2821 decFloatZero(&delta); // set up tiny delta
2822 DFWORD(&delta, DECWORDS-1)=1; // coefficient=1
2823 DFWORD(&delta, 0)=deltatop; // Sign + biased exponent=0
2824 decFloatFromString(&pointone, "1E-1", set); // set up multiplier
2825 decFloatFMA(result, &delta, &pointone, dfl, set);
2826 // [Delta is truly tiny, so no need to correct sign of zero]
2827 // use new status unless the result is normal
2828 if (decFloatIsNormal(result)) set->status=savestat; // else goes forward
2829 set->round=saveround; // restore mode
2830 return result;
2831 } // decFloatNextToward
2832
2833 /* ------------------------------------------------------------------ */
2834 /* decFloatOr -- logical digitwise OR of two decFloats */
2835 /* */
2836 /* result gets the result of ORing dfl and dfr */
2837 /* dfl is the first decFloat (lhs) */
2838 /* dfr is the second decFloat (rhs) */
2839 /* set is the context */
2840 /* returns result, which will be canonical with sign=0 */
2841 /* */
2842 /* The operands must be positive, finite with exponent q=0, and */
2843 /* comprise just zeros and ones; if not, Invalid operation results. */
2844 /* ------------------------------------------------------------------ */
decFloatOr(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)2845 decFloat * decFloatOr(decFloat *result,
2846 const decFloat *dfl, const decFloat *dfr,
2847 decContext *set) {
2848 if (!DFISUINT01(dfl) || !DFISUINT01(dfr)
2849 || !DFISCC01(dfl) || !DFISCC01(dfr)) return decInvalid(result, set);
2850 // the operands are positive finite integers (q=0) with just 0s and 1s
2851 #if DOUBLE
2852 DFWORD(result, 0)=ZEROWORD
2853 |((DFWORD(dfl, 0) | DFWORD(dfr, 0))&0x04009124);
2854 DFWORD(result, 1)=(DFWORD(dfl, 1) | DFWORD(dfr, 1))&0x49124491;
2855 #elif QUAD
2856 DFWORD(result, 0)=ZEROWORD
2857 |((DFWORD(dfl, 0) | DFWORD(dfr, 0))&0x04000912);
2858 DFWORD(result, 1)=(DFWORD(dfl, 1) | DFWORD(dfr, 1))&0x44912449;
2859 DFWORD(result, 2)=(DFWORD(dfl, 2) | DFWORD(dfr, 2))&0x12449124;
2860 DFWORD(result, 3)=(DFWORD(dfl, 3) | DFWORD(dfr, 3))&0x49124491;
2861 #endif
2862 return result;
2863 } // decFloatOr
2864
2865 /* ------------------------------------------------------------------ */
2866 /* decFloatPlus -- add value to 0, heeding NaNs, etc. */
2867 /* */
2868 /* result gets the canonicalized 0+df */
2869 /* df is the decFloat to plus */
2870 /* set is the context */
2871 /* returns result */
2872 /* */
2873 /* This has the same effect as 0+df where the exponent of the zero is */
2874 /* the same as that of df (if df is finite). */
2875 /* The effect is also the same as decFloatCopy except that NaNs */
2876 /* are handled normally (the sign of a NaN is not affected, and an */
2877 /* sNaN will signal), the result is canonical, and zero gets sign 0. */
2878 /* ------------------------------------------------------------------ */
decFloatPlus(decFloat * result,const decFloat * df,decContext * set)2879 decFloat * decFloatPlus(decFloat *result, const decFloat *df,
2880 decContext *set) {
2881 if (DFISNAN(df)) return decNaNs(result, df, NULL, set);
2882 decCanonical(result, df); // copy and check
2883 if (DFISZERO(df)) DFBYTE(result, 0)&=~0x80; // turn off sign bit
2884 return result;
2885 } // decFloatPlus
2886
2887 /* ------------------------------------------------------------------ */
2888 /* decFloatQuantize -- quantize a decFloat */
2889 /* */
2890 /* result gets the result of quantizing dfl to match dfr */
2891 /* dfl is the first decFloat (lhs) */
2892 /* dfr is the second decFloat (rhs), which sets the exponent */
2893 /* set is the context */
2894 /* returns result */
2895 /* */
2896 /* Unless there is an error or the result is infinite, the exponent */
2897 /* of result is guaranteed to be the same as that of dfr. */
2898 /* ------------------------------------------------------------------ */
decFloatQuantize(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)2899 decFloat * decFloatQuantize(decFloat *result,
2900 const decFloat *dfl, const decFloat *dfr,
2901 decContext *set) {
2902 Int explb, exprb; // left and right biased exponents
2903 uByte *ulsd; // local LSD pointer
2904 uByte *ub, *uc; // work
2905 Int drop; // ..
2906 uInt dpd; // ..
2907 uInt encode; // encoding accumulator
2908 uInt sourhil, sourhir; // top words from source decFloats
2909 uInt uiwork; // for macros
2910 #if QUAD
2911 uShort uswork; // ..
2912 #endif
2913 // the following buffer holds the coefficient for manipulation
2914 uByte buf[4+DECPMAX*3+2*QUAD]; // + space for zeros to left or right
2915 #if DECTRACE
2916 bcdnum num; // for trace displays
2917 #endif
2918
2919 /* Start decoding the arguments */
2920 sourhil=DFWORD(dfl, 0); // LHS top word
2921 explb=DECCOMBEXP[sourhil>>26]; // get exponent high bits (in place)
2922 sourhir=DFWORD(dfr, 0); // RHS top word
2923 exprb=DECCOMBEXP[sourhir>>26];
2924
2925 if (EXPISSPECIAL(explb | exprb)) { // either is special?
2926 // NaNs are handled as usual
2927 if (DFISNAN(dfl) || DFISNAN(dfr)) return decNaNs(result, dfl, dfr, set);
2928 // one infinity but not both is bad
2929 if (DFISINF(dfl)!=DFISINF(dfr)) return decInvalid(result, set);
2930 // both infinite; return canonical infinity with sign of LHS
2931 return decInfinity(result, dfl);
2932 }
2933
2934 /* Here when both arguments are finite */
2935 // complete extraction of the exponents [no need to unbias]
2936 explb+=GETECON(dfl); // + continuation
2937 exprb+=GETECON(dfr); // ..
2938
2939 // calculate the number of digits to drop from the coefficient
2940 drop=exprb-explb; // 0 if nothing to do
2941 if (drop==0) return decCanonical(result, dfl); // return canonical
2942
2943 // the coefficient is needed; lay it out into buf, offset so zeros
2944 // can be added before or after as needed -- an extra heading is
2945 // added so can safely pad Quad DECPMAX-1 zeros to the left by
2946 // fours
2947 #define BUFOFF (buf+4+DECPMAX)
2948 GETCOEFF(dfl, BUFOFF); // decode from decFloat
2949 // [now the msd is at BUFOFF and the lsd is at BUFOFF+DECPMAX-1]
2950
2951 #if DECTRACE
2952 num.msd=BUFOFF;
2953 num.lsd=BUFOFF+DECPMAX-1;
2954 num.exponent=explb-DECBIAS;
2955 num.sign=sourhil & DECFLOAT_Sign;
2956 decShowNum(&num, "dfl");
2957 #endif
2958
2959 if (drop>0) { // [most common case]
2960 // (this code is very similar to that in decFloatFinalize, but
2961 // has many differences so is duplicated here -- so any changes
2962 // may need to be made there, too)
2963 uByte *roundat; // -> re-round digit
2964 uByte reround; // reround value
2965 // printf("Rounding; drop=%ld\n", (LI)drop);
2966
2967 // there is at least one zero needed to the left, in all but one
2968 // exceptional (all-nines) case, so place four zeros now; this is
2969 // needed almost always and makes rounding all-nines by fours safe
2970 UBFROMUI(BUFOFF-4, 0);
2971
2972 // Three cases here:
2973 // 1. new LSD is in coefficient (almost always)
2974 // 2. new LSD is digit to left of coefficient (so MSD is
2975 // round-for-reround digit)
2976 // 3. new LSD is to left of case 2 (whole coefficient is sticky)
2977 // Note that leading zeros can safely be treated as useful digits
2978
2979 // [duplicate check-stickies code to save a test]
2980 // [by-digit check for stickies as runs of zeros are rare]
2981 if (drop<DECPMAX) { // NB lengths not addresses
2982 roundat=BUFOFF+DECPMAX-drop;
2983 reround=*roundat;
2984 for (ub=roundat+1; ub<BUFOFF+DECPMAX; ub++) {
2985 if (*ub!=0) { // non-zero to be discarded
2986 reround=DECSTICKYTAB[reround]; // apply sticky bit
2987 break; // [remainder don't-care]
2988 }
2989 } // check stickies
2990 ulsd=roundat-1; // set LSD
2991 }
2992 else { // edge case
2993 if (drop==DECPMAX) {
2994 roundat=BUFOFF;
2995 reround=*roundat;
2996 }
2997 else {
2998 roundat=BUFOFF-1;
2999 reround=0;
3000 }
3001 for (ub=roundat+1; ub<BUFOFF+DECPMAX; ub++) {
3002 if (*ub!=0) { // non-zero to be discarded
3003 reround=DECSTICKYTAB[reround]; // apply sticky bit
3004 break; // [remainder don't-care]
3005 }
3006 } // check stickies
3007 *BUFOFF=0; // make a coefficient of 0
3008 ulsd=BUFOFF; // .. at the MSD place
3009 }
3010
3011 if (reround!=0) { // discarding non-zero
3012 uInt bump=0;
3013 set->status|=DEC_Inexact;
3014
3015 // next decide whether to increment the coefficient
3016 if (set->round==DEC_ROUND_HALF_EVEN) { // fastpath slowest case
3017 if (reround>5) bump=1; // >0.5 goes up
3018 else if (reround==5) // exactly 0.5000 ..
3019 bump=*ulsd & 0x01; // .. up iff [new] lsd is odd
3020 } // r-h-e
3021 else switch (set->round) {
3022 case DEC_ROUND_DOWN: {
3023 // no change
3024 break;} // r-d
3025 case DEC_ROUND_HALF_DOWN: {
3026 if (reround>5) bump=1;
3027 break;} // r-h-d
3028 case DEC_ROUND_HALF_UP: {
3029 if (reround>=5) bump=1;
3030 break;} // r-h-u
3031 case DEC_ROUND_UP: {
3032 if (reround>0) bump=1;
3033 break;} // r-u
3034 case DEC_ROUND_CEILING: {
3035 // same as _UP for positive numbers, and as _DOWN for negatives
3036 if (!(sourhil&DECFLOAT_Sign) && reround>0) bump=1;
3037 break;} // r-c
3038 case DEC_ROUND_FLOOR: {
3039 // same as _UP for negative numbers, and as _DOWN for positive
3040 // [negative reround cannot occur on 0]
3041 if (sourhil&DECFLOAT_Sign && reround>0) bump=1;
3042 break;} // r-f
3043 case DEC_ROUND_05UP: {
3044 if (reround>0) { // anything out there is 'sticky'
3045 // bump iff lsd=0 or 5; this cannot carry so it could be
3046 // effected immediately with no bump -- but the code
3047 // is clearer if this is done the same way as the others
3048 if (*ulsd==0 || *ulsd==5) bump=1;
3049 }
3050 break;} // r-r
3051 default: { // e.g., DEC_ROUND_MAX
3052 set->status|=DEC_Invalid_context;
3053 #if DECCHECK
3054 printf("Unknown rounding mode: %ld\n", (LI)set->round);
3055 #endif
3056 break;}
3057 } // switch (not r-h-e)
3058 // printf("ReRound: %ld bump: %ld\n", (LI)reround, (LI)bump);
3059
3060 if (bump!=0) { // need increment
3061 // increment the coefficient; this could give 1000... (after
3062 // the all nines case)
3063 ub=ulsd;
3064 for (; UBTOUI(ub-3)==0x09090909; ub-=4) UBFROMUI(ub-3, 0);
3065 // now at most 3 digits left to non-9 (usually just the one)
3066 for (; *ub==9; ub--) *ub=0;
3067 *ub+=1;
3068 // [the all-nines case will have carried one digit to the
3069 // left of the original MSD -- just where it is needed]
3070 } // bump needed
3071 } // inexact rounding
3072
3073 // now clear zeros to the left so exactly DECPMAX digits will be
3074 // available in the coefficent -- the first word to the left was
3075 // cleared earlier for safe carry; now add any more needed
3076 if (drop>4) {
3077 UBFROMUI(BUFOFF-8, 0); // must be at least 5
3078 for (uc=BUFOFF-12; uc>ulsd-DECPMAX-3; uc-=4) UBFROMUI(uc, 0);
3079 }
3080 } // need round (drop>0)
3081
3082 else { // drop<0; padding with -drop digits is needed
3083 // This is the case where an error can occur if the padded
3084 // coefficient will not fit; checking for this can be done in the
3085 // same loop as padding for zeros if the no-hope and zero cases
3086 // are checked first
3087 if (-drop>DECPMAX-1) { // cannot fit unless 0
3088 if (!ISCOEFFZERO(BUFOFF)) return decInvalid(result, set);
3089 // a zero can have any exponent; just drop through and use it
3090 ulsd=BUFOFF+DECPMAX-1;
3091 }
3092 else { // padding will fit (but may still be too long)
3093 // final-word mask depends on endianess
3094 #if DECLITEND
3095 static const uInt dmask[]={0, 0x000000ff, 0x0000ffff, 0x00ffffff};
3096 #else
3097 static const uInt dmask[]={0, 0xff000000, 0xffff0000, 0xffffff00};
3098 #endif
3099 // note that here zeros to the right are added by fours, so in
3100 // the Quad case this could write 36 zeros if the coefficient has
3101 // fewer than three significant digits (hence the +2*QUAD for buf)
3102 for (uc=BUFOFF+DECPMAX;; uc+=4) {
3103 UBFROMUI(uc, 0);
3104 if (UBTOUI(uc-DECPMAX)!=0) { // could be bad
3105 // if all four digits should be zero, definitely bad
3106 if (uc<=BUFOFF+DECPMAX+(-drop)-4)
3107 return decInvalid(result, set);
3108 // must be a 1- to 3-digit sequence; check more carefully
3109 if ((UBTOUI(uc-DECPMAX)&dmask[(-drop)%4])!=0)
3110 return decInvalid(result, set);
3111 break; // no need for loop end test
3112 }
3113 if (uc>=BUFOFF+DECPMAX+(-drop)-4) break; // done
3114 }
3115 ulsd=BUFOFF+DECPMAX+(-drop)-1;
3116 } // pad and check leading zeros
3117 } // drop<0
3118
3119 #if DECTRACE
3120 num.msd=ulsd-DECPMAX+1;
3121 num.lsd=ulsd;
3122 num.exponent=explb-DECBIAS;
3123 num.sign=sourhil & DECFLOAT_Sign;
3124 decShowNum(&num, "res");
3125 #endif
3126
3127 /*------------------------------------------------------------------*/
3128 /* At this point the result is DECPMAX digits, ending at ulsd, so */
3129 /* fits the encoding exactly; there is no possibility of error */
3130 /*------------------------------------------------------------------*/
3131 encode=((exprb>>DECECONL)<<4) + *(ulsd-DECPMAX+1); // make index
3132 encode=DECCOMBFROM[encode]; // indexed by (0-2)*16+msd
3133 // the exponent continuation can be extracted from the original RHS
3134 encode|=sourhir & ECONMASK;
3135 encode|=sourhil&DECFLOAT_Sign; // add the sign from LHS
3136
3137 // finally encode the coefficient
3138 // private macro to encode a declet; this version can be used
3139 // because all coefficient digits exist
3140 #define getDPD3q(dpd, n) ub=ulsd-(3*(n))-2; \
3141 dpd=BCD2DPD[(*ub*256)+(*(ub+1)*16)+*(ub+2)];
3142
3143 #if DOUBLE
3144 getDPD3q(dpd, 4); encode|=dpd<<8;
3145 getDPD3q(dpd, 3); encode|=dpd>>2;
3146 DFWORD(result, 0)=encode;
3147 encode=dpd<<30;
3148 getDPD3q(dpd, 2); encode|=dpd<<20;
3149 getDPD3q(dpd, 1); encode|=dpd<<10;
3150 getDPD3q(dpd, 0); encode|=dpd;
3151 DFWORD(result, 1)=encode;
3152
3153 #elif QUAD
3154 getDPD3q(dpd,10); encode|=dpd<<4;
3155 getDPD3q(dpd, 9); encode|=dpd>>6;
3156 DFWORD(result, 0)=encode;
3157 encode=dpd<<26;
3158 getDPD3q(dpd, 8); encode|=dpd<<16;
3159 getDPD3q(dpd, 7); encode|=dpd<<6;
3160 getDPD3q(dpd, 6); encode|=dpd>>4;
3161 DFWORD(result, 1)=encode;
3162 encode=dpd<<28;
3163 getDPD3q(dpd, 5); encode|=dpd<<18;
3164 getDPD3q(dpd, 4); encode|=dpd<<8;
3165 getDPD3q(dpd, 3); encode|=dpd>>2;
3166 DFWORD(result, 2)=encode;
3167 encode=dpd<<30;
3168 getDPD3q(dpd, 2); encode|=dpd<<20;
3169 getDPD3q(dpd, 1); encode|=dpd<<10;
3170 getDPD3q(dpd, 0); encode|=dpd;
3171 DFWORD(result, 3)=encode;
3172 #endif
3173 return result;
3174 } // decFloatQuantize
3175
3176 /* ------------------------------------------------------------------ */
3177 /* decFloatReduce -- reduce finite coefficient to minimum length */
3178 /* */
3179 /* result gets the reduced decFloat */
3180 /* df is the source decFloat */
3181 /* set is the context */
3182 /* returns result, which will be canonical */
3183 /* */
3184 /* This removes all possible trailing zeros from the coefficient; */
3185 /* some may remain when the number is very close to Nmax. */
3186 /* Special values are unchanged and no status is set unless df=sNaN. */
3187 /* Reduced zero has an exponent q=0. */
3188 /* ------------------------------------------------------------------ */
decFloatReduce(decFloat * result,const decFloat * df,decContext * set)3189 decFloat * decFloatReduce(decFloat *result, const decFloat *df,
3190 decContext *set) {
3191 bcdnum num; // work
3192 uByte buf[DECPMAX], *ub; // coefficient and pointer
3193 if (df!=result) *result=*df; // copy, if needed
3194 if (DFISNAN(df)) return decNaNs(result, df, NULL, set); // sNaN
3195 // zeros and infinites propagate too
3196 if (DFISINF(df)) return decInfinity(result, df); // canonical
3197 if (DFISZERO(df)) {
3198 uInt sign=DFWORD(df, 0)&DECFLOAT_Sign;
3199 decFloatZero(result);
3200 DFWORD(result, 0)|=sign;
3201 return result; // exponent dropped, sign OK
3202 }
3203 // non-zero finite
3204 GETCOEFF(df, buf);
3205 ub=buf+DECPMAX-1; // -> lsd
3206 if (*ub) return result; // no trailing zeros
3207 for (ub--; *ub==0;) ub--; // terminates because non-zero
3208 // *ub is the first non-zero from the right
3209 num.sign=DFWORD(df, 0)&DECFLOAT_Sign; // set up number...
3210 num.exponent=GETEXPUN(df)+(Int)(buf+DECPMAX-1-ub); // adjusted exponent
3211 num.msd=buf;
3212 num.lsd=ub;
3213 return decFinalize(result, &num, set);
3214 } // decFloatReduce
3215
3216 /* ------------------------------------------------------------------ */
3217 /* decFloatRemainder -- integer divide and return remainder */
3218 /* */
3219 /* result gets the remainder of dividing dfl by dfr: */
3220 /* dfl is the first decFloat (lhs) */
3221 /* dfr is the second decFloat (rhs) */
3222 /* set is the context */
3223 /* returns result */
3224 /* */
3225 /* ------------------------------------------------------------------ */
decFloatRemainder(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)3226 decFloat * decFloatRemainder(decFloat *result,
3227 const decFloat *dfl, const decFloat *dfr,
3228 decContext *set) {
3229 return decDivide(result, dfl, dfr, set, REMAINDER);
3230 } // decFloatRemainder
3231
3232 /* ------------------------------------------------------------------ */
3233 /* decFloatRemainderNear -- integer divide to nearest and remainder */
3234 /* */
3235 /* result gets the remainder of dividing dfl by dfr: */
3236 /* dfl is the first decFloat (lhs) */
3237 /* dfr is the second decFloat (rhs) */
3238 /* set is the context */
3239 /* returns result */
3240 /* */
3241 /* This is the IEEE remainder, where the nearest integer is used. */
3242 /* ------------------------------------------------------------------ */
decFloatRemainderNear(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)3243 decFloat * decFloatRemainderNear(decFloat *result,
3244 const decFloat *dfl, const decFloat *dfr,
3245 decContext *set) {
3246 return decDivide(result, dfl, dfr, set, REMNEAR);
3247 } // decFloatRemainderNear
3248
3249 /* ------------------------------------------------------------------ */
3250 /* decFloatRotate -- rotate the coefficient of a decFloat left/right */
3251 /* */
3252 /* result gets the result of rotating dfl */
3253 /* dfl is the source decFloat to rotate */
3254 /* dfr is the count of digits to rotate, an integer (with q=0) */
3255 /* set is the context */
3256 /* returns result */
3257 /* */
3258 /* The digits of the coefficient of dfl are rotated to the left (if */
3259 /* dfr is positive) or to the right (if dfr is negative) without */
3260 /* adjusting the exponent or the sign of dfl. */
3261 /* */
3262 /* dfr must be in the range -DECPMAX through +DECPMAX. */
3263 /* NaNs are propagated as usual. An infinite dfl is unaffected (but */
3264 /* dfr must be valid). No status is set unless dfr is invalid or an */
3265 /* operand is an sNaN. The result is canonical. */
3266 /* ------------------------------------------------------------------ */
3267 #define PHALF (ROUNDUP(DECPMAX/2, 4)) // half length, rounded up
decFloatRotate(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)3268 decFloat * decFloatRotate(decFloat *result,
3269 const decFloat *dfl, const decFloat *dfr,
3270 decContext *set) {
3271 Int rotate; // dfr as an Int
3272 uByte buf[DECPMAX+PHALF]; // coefficient + half
3273 uInt digits, savestat; // work
3274 bcdnum num; // ..
3275 uByte *ub; // ..
3276
3277 if (DFISNAN(dfl)||DFISNAN(dfr)) return decNaNs(result, dfl, dfr, set);
3278 if (!DFISINT(dfr)) return decInvalid(result, set);
3279 digits=decFloatDigits(dfr); // calculate digits
3280 if (digits>2) return decInvalid(result, set); // definitely out of range
3281 rotate=DPD2BIN[DFWORD(dfr, DECWORDS-1)&0x3ff]; // is in bottom declet
3282 if (rotate>DECPMAX) return decInvalid(result, set); // too big
3283 // [from here on no error or status change is possible]
3284 if (DFISINF(dfl)) return decInfinity(result, dfl); // canonical
3285 // handle no-rotate cases
3286 if (rotate==0 || rotate==DECPMAX) return decCanonical(result, dfl);
3287 // a real rotate is needed: 0 < rotate < DECPMAX
3288 // reduce the rotation to no more than half to reduce copying later
3289 // (for QUAD in fact half + 2 digits)
3290 if (DFISSIGNED(dfr)) rotate=-rotate;
3291 if (abs(rotate)>PHALF) {
3292 if (rotate<0) rotate=DECPMAX+rotate;
3293 else rotate=rotate-DECPMAX;
3294 }
3295 // now lay out the coefficient, leaving room to the right or the
3296 // left depending on the direction of rotation
3297 ub=buf;
3298 if (rotate<0) ub+=PHALF; // rotate right, so space to left
3299 GETCOEFF(dfl, ub);
3300 // copy half the digits to left or right, and set num.msd
3301 if (rotate<0) {
3302 memcpy(buf, buf+DECPMAX, PHALF);
3303 num.msd=buf+PHALF+rotate;
3304 }
3305 else {
3306 memcpy(buf+DECPMAX, buf, PHALF);
3307 num.msd=buf+rotate;
3308 }
3309 // fill in rest of num
3310 num.lsd=num.msd+DECPMAX-1;
3311 num.sign=DFWORD(dfl, 0)&DECFLOAT_Sign;
3312 num.exponent=GETEXPUN(dfl);
3313 savestat=set->status; // record
3314 decFinalize(result, &num, set);
3315 set->status=savestat; // restore
3316 return result;
3317 } // decFloatRotate
3318
3319 /* ------------------------------------------------------------------ */
3320 /* decFloatSameQuantum -- test decFloats for same quantum */
3321 /* */
3322 /* dfl is the first decFloat (lhs) */
3323 /* dfr is the second decFloat (rhs) */
3324 /* returns 1 if the operands have the same quantum, 0 otherwise */
3325 /* */
3326 /* No error is possible and no status results. */
3327 /* ------------------------------------------------------------------ */
decFloatSameQuantum(const decFloat * dfl,const decFloat * dfr)3328 uInt decFloatSameQuantum(const decFloat *dfl, const decFloat *dfr) {
3329 if (DFISSPECIAL(dfl) || DFISSPECIAL(dfr)) {
3330 if (DFISNAN(dfl) && DFISNAN(dfr)) return 1;
3331 if (DFISINF(dfl) && DFISINF(dfr)) return 1;
3332 return 0; // any other special mixture gives false
3333 }
3334 if (GETEXP(dfl)==GETEXP(dfr)) return 1; // biased exponents match
3335 return 0;
3336 } // decFloatSameQuantum
3337
3338 /* ------------------------------------------------------------------ */
3339 /* decFloatScaleB -- multiply by a power of 10, as per 754 */
3340 /* */
3341 /* result gets the result of the operation */
3342 /* dfl is the first decFloat (lhs) */
3343 /* dfr is the second decFloat (rhs), am integer (with q=0) */
3344 /* set is the context */
3345 /* returns result */
3346 /* */
3347 /* This computes result=dfl x 10**dfr where dfr is an integer in the */
3348 /* range +/-2*(emax+pmax), typically resulting from LogB. */
3349 /* Underflow and Overflow (with Inexact) may occur. NaNs propagate */
3350 /* as usual. */
3351 /* ------------------------------------------------------------------ */
3352 #define SCALEBMAX 2*(DECEMAX+DECPMAX) // D=800, Q=12356
decFloatScaleB(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)3353 decFloat * decFloatScaleB(decFloat *result,
3354 const decFloat *dfl, const decFloat *dfr,
3355 decContext *set) {
3356 uInt digits; // work
3357 Int expr; // dfr as an Int
3358
3359 if (DFISNAN(dfl)||DFISNAN(dfr)) return decNaNs(result, dfl, dfr, set);
3360 if (!DFISINT(dfr)) return decInvalid(result, set);
3361 digits=decFloatDigits(dfr); // calculate digits
3362
3363 #if DOUBLE
3364 if (digits>3) return decInvalid(result, set); // definitely out of range
3365 expr=DPD2BIN[DFWORD(dfr, 1)&0x3ff]; // must be in bottom declet
3366 #elif QUAD
3367 if (digits>5) return decInvalid(result, set); // definitely out of range
3368 expr=DPD2BIN[DFWORD(dfr, 3)&0x3ff] // in bottom 2 declets ..
3369 +DPD2BIN[(DFWORD(dfr, 3)>>10)&0x3ff]*1000; // ..
3370 #endif
3371 if (expr>SCALEBMAX) return decInvalid(result, set); // oops
3372 // [from now on no error possible]
3373 if (DFISINF(dfl)) return decInfinity(result, dfl); // canonical
3374 if (DFISSIGNED(dfr)) expr=-expr;
3375 // dfl is finite and expr is valid
3376 *result=*dfl; // copy to target
3377 return decFloatSetExponent(result, set, GETEXPUN(result)+expr);
3378 } // decFloatScaleB
3379
3380 /* ------------------------------------------------------------------ */
3381 /* decFloatShift -- shift the coefficient of a decFloat left or right */
3382 /* */
3383 /* result gets the result of shifting dfl */
3384 /* dfl is the source decFloat to shift */
3385 /* dfr is the count of digits to shift, an integer (with q=0) */
3386 /* set is the context */
3387 /* returns result */
3388 /* */
3389 /* The digits of the coefficient of dfl are shifted to the left (if */
3390 /* dfr is positive) or to the right (if dfr is negative) without */
3391 /* adjusting the exponent or the sign of dfl. */
3392 /* */
3393 /* dfr must be in the range -DECPMAX through +DECPMAX. */
3394 /* NaNs are propagated as usual. An infinite dfl is unaffected (but */
3395 /* dfr must be valid). No status is set unless dfr is invalid or an */
3396 /* operand is an sNaN. The result is canonical. */
3397 /* ------------------------------------------------------------------ */
decFloatShift(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)3398 decFloat * decFloatShift(decFloat *result,
3399 const decFloat *dfl, const decFloat *dfr,
3400 decContext *set) {
3401 Int shift; // dfr as an Int
3402 uByte buf[DECPMAX*2]; // coefficient + padding
3403 uInt digits, savestat; // work
3404 bcdnum num; // ..
3405 uInt uiwork; // for macros
3406
3407 if (DFISNAN(dfl)||DFISNAN(dfr)) return decNaNs(result, dfl, dfr, set);
3408 if (!DFISINT(dfr)) return decInvalid(result, set);
3409 digits=decFloatDigits(dfr); // calculate digits
3410 if (digits>2) return decInvalid(result, set); // definitely out of range
3411 shift=DPD2BIN[DFWORD(dfr, DECWORDS-1)&0x3ff]; // is in bottom declet
3412 if (shift>DECPMAX) return decInvalid(result, set); // too big
3413 // [from here on no error or status change is possible]
3414
3415 if (DFISINF(dfl)) return decInfinity(result, dfl); // canonical
3416 // handle no-shift and all-shift (clear to zero) cases
3417 if (shift==0) return decCanonical(result, dfl);
3418 if (shift==DECPMAX) { // zero with sign
3419 uByte sign=(uByte)(DFBYTE(dfl, 0)&0x80); // save sign bit
3420 decFloatZero(result); // make +0
3421 DFBYTE(result, 0)=(uByte)(DFBYTE(result, 0)|sign); // and set sign
3422 // [cannot safely use CopySign]
3423 return result;
3424 }
3425 // a real shift is needed: 0 < shift < DECPMAX
3426 num.sign=DFWORD(dfl, 0)&DECFLOAT_Sign;
3427 num.exponent=GETEXPUN(dfl);
3428 num.msd=buf;
3429 GETCOEFF(dfl, buf);
3430 if (DFISSIGNED(dfr)) { // shift right
3431 // edge cases are taken care of, so this is easy
3432 num.lsd=buf+DECPMAX-shift-1;
3433 }
3434 else { // shift left -- zero padding needed to right
3435 UBFROMUI(buf+DECPMAX, 0); // 8 will handle most cases
3436 UBFROMUI(buf+DECPMAX+4, 0); // ..
3437 if (shift>8) memset(buf+DECPMAX+8, 0, 8+QUAD*18); // all other cases
3438 num.msd+=shift;
3439 num.lsd=num.msd+DECPMAX-1;
3440 }
3441 savestat=set->status; // record
3442 decFinalize(result, &num, set);
3443 set->status=savestat; // restore
3444 return result;
3445 } // decFloatShift
3446
3447 /* ------------------------------------------------------------------ */
3448 /* decFloatSubtract -- subtract a decFloat from another */
3449 /* */
3450 /* result gets the result of subtracting dfr from dfl: */
3451 /* dfl is the first decFloat (lhs) */
3452 /* dfr is the second decFloat (rhs) */
3453 /* set is the context */
3454 /* returns result */
3455 /* */
3456 /* ------------------------------------------------------------------ */
decFloatSubtract(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)3457 decFloat * decFloatSubtract(decFloat *result,
3458 const decFloat *dfl, const decFloat *dfr,
3459 decContext *set) {
3460 decFloat temp;
3461 // NaNs must propagate without sign change
3462 if (DFISNAN(dfr)) return decFloatAdd(result, dfl, dfr, set);
3463 temp=*dfr; // make a copy
3464 DFBYTE(&temp, 0)^=0x80; // flip sign
3465 return decFloatAdd(result, dfl, &temp, set); // and add to the lhs
3466 } // decFloatSubtract
3467
3468 /* ------------------------------------------------------------------ */
3469 /* decFloatToInt -- round to 32-bit binary integer (4 flavours) */
3470 /* */
3471 /* df is the decFloat to round */
3472 /* set is the context */
3473 /* round is the rounding mode to use */
3474 /* returns a uInt or an Int, rounded according to the name */
3475 /* */
3476 /* Invalid will always be signaled if df is a NaN, is Infinite, or is */
3477 /* outside the range of the target; Inexact will not be signaled for */
3478 /* simple rounding unless 'Exact' appears in the name. */
3479 /* ------------------------------------------------------------------ */
decFloatToUInt32(const decFloat * df,decContext * set,enum rounding round)3480 uInt decFloatToUInt32(const decFloat *df, decContext *set,
3481 enum rounding round) {
3482 return decToInt32(df, set, round, 0, 1);}
3483
decFloatToUInt32Exact(const decFloat * df,decContext * set,enum rounding round)3484 uInt decFloatToUInt32Exact(const decFloat *df, decContext *set,
3485 enum rounding round) {
3486 return decToInt32(df, set, round, 1, 1);}
3487
decFloatToInt32(const decFloat * df,decContext * set,enum rounding round)3488 Int decFloatToInt32(const decFloat *df, decContext *set,
3489 enum rounding round) {
3490 return (Int)decToInt32(df, set, round, 0, 0);}
3491
decFloatToInt32Exact(const decFloat * df,decContext * set,enum rounding round)3492 Int decFloatToInt32Exact(const decFloat *df, decContext *set,
3493 enum rounding round) {
3494 return (Int)decToInt32(df, set, round, 1, 0);}
3495
3496 /* ------------------------------------------------------------------ */
3497 /* decFloatToIntegral -- round to integral value (two flavours) */
3498 /* */
3499 /* result gets the result */
3500 /* df is the decFloat to round */
3501 /* set is the context */
3502 /* round is the rounding mode to use */
3503 /* returns result */
3504 /* */
3505 /* No exceptions, even Inexact, are raised except for sNaN input, or */
3506 /* if 'Exact' appears in the name. */
3507 /* ------------------------------------------------------------------ */
decFloatToIntegralValue(decFloat * result,const decFloat * df,decContext * set,enum rounding round)3508 decFloat * decFloatToIntegralValue(decFloat *result, const decFloat *df,
3509 decContext *set, enum rounding round) {
3510 return decToIntegral(result, df, set, round, 0);}
3511
decFloatToIntegralExact(decFloat * result,const decFloat * df,decContext * set)3512 decFloat * decFloatToIntegralExact(decFloat *result, const decFloat *df,
3513 decContext *set) {
3514 return decToIntegral(result, df, set, set->round, 1);}
3515
3516 /* ------------------------------------------------------------------ */
3517 /* decFloatXor -- logical digitwise XOR of two decFloats */
3518 /* */
3519 /* result gets the result of XORing dfl and dfr */
3520 /* dfl is the first decFloat (lhs) */
3521 /* dfr is the second decFloat (rhs) */
3522 /* set is the context */
3523 /* returns result, which will be canonical with sign=0 */
3524 /* */
3525 /* The operands must be positive, finite with exponent q=0, and */
3526 /* comprise just zeros and ones; if not, Invalid operation results. */
3527 /* ------------------------------------------------------------------ */
decFloatXor(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)3528 decFloat * decFloatXor(decFloat *result,
3529 const decFloat *dfl, const decFloat *dfr,
3530 decContext *set) {
3531 if (!DFISUINT01(dfl) || !DFISUINT01(dfr)
3532 || !DFISCC01(dfl) || !DFISCC01(dfr)) return decInvalid(result, set);
3533 // the operands are positive finite integers (q=0) with just 0s and 1s
3534 #if DOUBLE
3535 DFWORD(result, 0)=ZEROWORD
3536 |((DFWORD(dfl, 0) ^ DFWORD(dfr, 0))&0x04009124);
3537 DFWORD(result, 1)=(DFWORD(dfl, 1) ^ DFWORD(dfr, 1))&0x49124491;
3538 #elif QUAD
3539 DFWORD(result, 0)=ZEROWORD
3540 |((DFWORD(dfl, 0) ^ DFWORD(dfr, 0))&0x04000912);
3541 DFWORD(result, 1)=(DFWORD(dfl, 1) ^ DFWORD(dfr, 1))&0x44912449;
3542 DFWORD(result, 2)=(DFWORD(dfl, 2) ^ DFWORD(dfr, 2))&0x12449124;
3543 DFWORD(result, 3)=(DFWORD(dfl, 3) ^ DFWORD(dfr, 3))&0x49124491;
3544 #endif
3545 return result;
3546 } // decFloatXor
3547
3548 /* ------------------------------------------------------------------ */
3549 /* decInvalid -- set Invalid_operation result */
3550 /* */
3551 /* result gets a canonical NaN */
3552 /* set is the context */
3553 /* returns result */
3554 /* */
3555 /* status has Invalid_operation added */
3556 /* ------------------------------------------------------------------ */
decInvalid(decFloat * result,decContext * set)3557 static decFloat *decInvalid(decFloat *result, decContext *set) {
3558 decFloatZero(result);
3559 DFWORD(result, 0)=DECFLOAT_qNaN;
3560 set->status|=DEC_Invalid_operation;
3561 return result;
3562 } // decInvalid
3563
3564 /* ------------------------------------------------------------------ */
3565 /* decInfinity -- set canonical Infinity with sign from a decFloat */
3566 /* */
3567 /* result gets a canonical Infinity */
3568 /* df is source decFloat (only the sign is used) */
3569 /* returns result */
3570 /* */
3571 /* df may be the same as result */
3572 /* ------------------------------------------------------------------ */
decInfinity(decFloat * result,const decFloat * df)3573 static decFloat *decInfinity(decFloat *result, const decFloat *df) {
3574 uInt sign=DFWORD(df, 0); // save source signword
3575 decFloatZero(result); // clear everything
3576 DFWORD(result, 0)=DECFLOAT_Inf | (sign & DECFLOAT_Sign);
3577 return result;
3578 } // decInfinity
3579
3580 /* ------------------------------------------------------------------ */
3581 /* decNaNs -- handle NaN argument(s) */
3582 /* */
3583 /* result gets the result of handling dfl and dfr, one or both of */
3584 /* which is a NaN */
3585 /* dfl is the first decFloat (lhs) */
3586 /* dfr is the second decFloat (rhs) -- may be NULL for a single- */
3587 /* operand operation */
3588 /* set is the context */
3589 /* returns result */
3590 /* */
3591 /* Called when one or both operands is a NaN, and propagates the */
3592 /* appropriate result to res. When an sNaN is found, it is changed */
3593 /* to a qNaN and Invalid operation is set. */
3594 /* ------------------------------------------------------------------ */
decNaNs(decFloat * result,const decFloat * dfl,const decFloat * dfr,decContext * set)3595 static decFloat *decNaNs(decFloat *result,
3596 const decFloat *dfl, const decFloat *dfr,
3597 decContext *set) {
3598 // handle sNaNs first
3599 if (dfr!=NULL && DFISSNAN(dfr) && !DFISSNAN(dfl)) dfl=dfr; // use RHS
3600 if (DFISSNAN(dfl)) {
3601 decCanonical(result, dfl); // propagate canonical sNaN
3602 DFWORD(result, 0)&=~(DECFLOAT_qNaN ^ DECFLOAT_sNaN); // quiet
3603 set->status|=DEC_Invalid_operation;
3604 return result;
3605 }
3606 // one or both is a quiet NaN
3607 if (!DFISNAN(dfl)) dfl=dfr; // RHS must be NaN, use it
3608 return decCanonical(result, dfl); // propagate canonical qNaN
3609 } // decNaNs
3610
3611 /* ------------------------------------------------------------------ */
3612 /* decNumCompare -- numeric comparison of two decFloats */
3613 /* */
3614 /* dfl is the left-hand decFloat, which is not a NaN */
3615 /* dfr is the right-hand decFloat, which is not a NaN */
3616 /* tot is 1 for total order compare, 0 for simple numeric */
3617 /* returns -1, 0, or +1 for dfl<dfr, dfl=dfr, dfl>dfr */
3618 /* */
3619 /* No error is possible; status and mode are unchanged. */
3620 /* ------------------------------------------------------------------ */
decNumCompare(const decFloat * dfl,const decFloat * dfr,Flag tot)3621 static Int decNumCompare(const decFloat *dfl, const decFloat *dfr, Flag tot) {
3622 Int sigl, sigr; // LHS and RHS non-0 signums
3623 Int shift; // shift needed to align operands
3624 uByte *ub, *uc; // work
3625 uInt uiwork; // for macros
3626 // buffers +2 if Quad (36 digits), need double plus 4 for safe padding
3627 uByte bufl[DECPMAX*2+QUAD*2+4]; // for LHS coefficient + padding
3628 uByte bufr[DECPMAX*2+QUAD*2+4]; // for RHS coefficient + padding
3629
3630 sigl=1;
3631 if (DFISSIGNED(dfl)) {
3632 if (!DFISSIGNED(dfr)) { // -LHS +RHS
3633 if (DFISZERO(dfl) && DFISZERO(dfr) && !tot) return 0;
3634 return -1; // RHS wins
3635 }
3636 sigl=-1;
3637 }
3638 if (DFISSIGNED(dfr)) {
3639 if (!DFISSIGNED(dfl)) { // +LHS -RHS
3640 if (DFISZERO(dfl) && DFISZERO(dfr) && !tot) return 0;
3641 return +1; // LHS wins
3642 }
3643 }
3644
3645 // signs are the same; operand(s) could be zero
3646 sigr=-sigl; // sign to return if abs(RHS) wins
3647
3648 if (DFISINF(dfl)) {
3649 if (DFISINF(dfr)) return 0; // both infinite & same sign
3650 return sigl; // inf > n
3651 }
3652 if (DFISINF(dfr)) return sigr; // n < inf [dfl is finite]
3653
3654 // here, both are same sign and finite; calculate their offset
3655 shift=GETEXP(dfl)-GETEXP(dfr); // [0 means aligned]
3656 // [bias can be ignored -- the absolute exponent is not relevant]
3657
3658 if (DFISZERO(dfl)) {
3659 if (!DFISZERO(dfr)) return sigr; // LHS=0, RHS!=0
3660 // both are zero, return 0 if both same exponent or numeric compare
3661 if (shift==0 || !tot) return 0;
3662 if (shift>0) return sigl;
3663 return sigr; // [shift<0]
3664 }
3665 else { // LHS!=0
3666 if (DFISZERO(dfr)) return sigl; // LHS!=0, RHS=0
3667 }
3668 // both are known to be non-zero at this point
3669
3670 // if the exponents are so different that the coefficients do not
3671 // overlap (by even one digit) then a full comparison is not needed
3672 if (abs(shift)>=DECPMAX) { // no overlap
3673 // coefficients are known to be non-zero
3674 if (shift>0) return sigl;
3675 return sigr; // [shift<0]
3676 }
3677
3678 // decode the coefficients
3679 // (shift both right two if Quad to make a multiple of four)
3680 #if QUAD
3681 UBFROMUI(bufl, 0);
3682 UBFROMUI(bufr, 0);
3683 #endif
3684 GETCOEFF(dfl, bufl+QUAD*2); // decode from decFloat
3685 GETCOEFF(dfr, bufr+QUAD*2); // ..
3686 if (shift==0) { // aligned; common and easy
3687 // all multiples of four, here
3688 for (ub=bufl, uc=bufr; ub<bufl+DECPMAX+QUAD*2; ub+=4, uc+=4) {
3689 uInt ui=UBTOUI(ub);
3690 if (ui==UBTOUI(uc)) continue; // so far so same
3691 // about to find a winner; go by bytes in case little-endian
3692 for (;; ub++, uc++) {
3693 if (*ub>*uc) return sigl; // difference found
3694 if (*ub<*uc) return sigr; // ..
3695 }
3696 }
3697 } // aligned
3698 else if (shift>0) { // lhs to left
3699 ub=bufl; // RHS pointer
3700 // pad bufl so right-aligned; most shifts will fit in 8
3701 UBFROMUI(bufl+DECPMAX+QUAD*2, 0); // add eight zeros
3702 UBFROMUI(bufl+DECPMAX+QUAD*2+4, 0); // ..
3703 if (shift>8) {
3704 // more than eight; fill the rest, and also worth doing the
3705 // lead-in by fours
3706 uByte *up; // work
3707 uByte *upend=bufl+DECPMAX+QUAD*2+shift;
3708 for (up=bufl+DECPMAX+QUAD*2+8; up<upend; up+=4) UBFROMUI(up, 0);
3709 // [pads up to 36 in all for Quad]
3710 for (;; ub+=4) {
3711 if (UBTOUI(ub)!=0) return sigl;
3712 if (ub+4>bufl+shift-4) break;
3713 }
3714 }
3715 // check remaining leading digits
3716 for (; ub<bufl+shift; ub++) if (*ub!=0) return sigl;
3717 // now start the overlapped part; bufl has been padded, so the
3718 // comparison can go for the full length of bufr, which is a
3719 // multiple of 4 bytes
3720 for (uc=bufr; ; uc+=4, ub+=4) {
3721 uInt ui=UBTOUI(ub);
3722 if (ui!=UBTOUI(uc)) { // mismatch found
3723 for (;; uc++, ub++) { // check from left [little-endian?]
3724 if (*ub>*uc) return sigl; // difference found
3725 if (*ub<*uc) return sigr; // ..
3726 }
3727 } // mismatch
3728 if (uc==bufr+QUAD*2+DECPMAX-4) break; // all checked
3729 }
3730 } // shift>0
3731
3732 else { // shift<0) .. RHS is to left of LHS; mirror shift>0
3733 uc=bufr; // RHS pointer
3734 // pad bufr so right-aligned; most shifts will fit in 8
3735 UBFROMUI(bufr+DECPMAX+QUAD*2, 0); // add eight zeros
3736 UBFROMUI(bufr+DECPMAX+QUAD*2+4, 0); // ..
3737 if (shift<-8) {
3738 // more than eight; fill the rest, and also worth doing the
3739 // lead-in by fours
3740 uByte *up; // work
3741 uByte *upend=bufr+DECPMAX+QUAD*2-shift;
3742 for (up=bufr+DECPMAX+QUAD*2+8; up<upend; up+=4) UBFROMUI(up, 0);
3743 // [pads up to 36 in all for Quad]
3744 for (;; uc+=4) {
3745 if (UBTOUI(uc)!=0) return sigr;
3746 if (uc+4>bufr-shift-4) break;
3747 }
3748 }
3749 // check remaining leading digits
3750 for (; uc<bufr-shift; uc++) if (*uc!=0) return sigr;
3751 // now start the overlapped part; bufr has been padded, so the
3752 // comparison can go for the full length of bufl, which is a
3753 // multiple of 4 bytes
3754 for (ub=bufl; ; ub+=4, uc+=4) {
3755 uInt ui=UBTOUI(ub);
3756 if (ui!=UBTOUI(uc)) { // mismatch found
3757 for (;; ub++, uc++) { // check from left [little-endian?]
3758 if (*ub>*uc) return sigl; // difference found
3759 if (*ub<*uc) return sigr; // ..
3760 }
3761 } // mismatch
3762 if (ub==bufl+QUAD*2+DECPMAX-4) break; // all checked
3763 }
3764 } // shift<0
3765
3766 // Here when compare equal
3767 if (!tot) return 0; // numerically equal
3768 // total ordering .. exponent matters
3769 if (shift>0) return sigl; // total order by exponent
3770 if (shift<0) return sigr; // ..
3771 return 0;
3772 } // decNumCompare
3773
3774 /* ------------------------------------------------------------------ */
3775 /* decToInt32 -- local routine to effect ToInteger conversions */
3776 /* */
3777 /* df is the decFloat to convert */
3778 /* set is the context */
3779 /* rmode is the rounding mode to use */
3780 /* exact is 1 if Inexact should be signalled */
3781 /* unsign is 1 if the result a uInt, 0 if an Int (cast to uInt) */
3782 /* returns 32-bit result as a uInt */
3783 /* */
3784 /* Invalid is set is df is a NaN, is infinite, or is out-of-range; in */
3785 /* these cases 0 is returned. */
3786 /* ------------------------------------------------------------------ */
decToInt32(const decFloat * df,decContext * set,enum rounding rmode,Flag exact,Flag unsign)3787 static uInt decToInt32(const decFloat *df, decContext *set,
3788 enum rounding rmode, Flag exact, Flag unsign) {
3789 Int exp; // exponent
3790 uInt sourhi, sourpen, sourlo; // top word from source decFloat ..
3791 uInt hi, lo; // .. penultimate, least, etc.
3792 decFloat zero, result; // work
3793 Int i; // ..
3794
3795 /* Start decoding the argument */
3796 sourhi=DFWORD(df, 0); // top word
3797 exp=DECCOMBEXP[sourhi>>26]; // get exponent high bits (in place)
3798 if (EXPISSPECIAL(exp)) { // is special?
3799 set->status|=DEC_Invalid_operation; // signal
3800 return 0;
3801 }
3802
3803 /* Here when the argument is finite */
3804 if (GETEXPUN(df)==0) result=*df; // already a true integer
3805 else { // need to round to integer
3806 enum rounding saveround; // saver
3807 uInt savestatus; // ..
3808 saveround=set->round; // save rounding mode ..
3809 savestatus=set->status; // .. and status
3810 set->round=rmode; // set mode
3811 decFloatZero(&zero); // make 0E+0
3812 set->status=0; // clear
3813 decFloatQuantize(&result, df, &zero, set); // [this may fail]
3814 set->round=saveround; // restore rounding mode ..
3815 if (exact) set->status|=savestatus; // include Inexact
3816 else set->status=savestatus; // .. or just original status
3817 }
3818
3819 // only the last four declets of the coefficient can contain
3820 // non-zero; check for others (and also NaN or Infinity from the
3821 // Quantize) first (see DFISZERO for explanation):
3822 // decFloatShow(&result, "sofar");
3823 #if DOUBLE
3824 if ((DFWORD(&result, 0)&0x1c03ff00)!=0
3825 || (DFWORD(&result, 0)&0x60000000)==0x60000000) {
3826 #elif QUAD
3827 if ((DFWORD(&result, 2)&0xffffff00)!=0
3828 || DFWORD(&result, 1)!=0
3829 || (DFWORD(&result, 0)&0x1c003fff)!=0
3830 || (DFWORD(&result, 0)&0x60000000)==0x60000000) {
3831 #endif
3832 set->status|=DEC_Invalid_operation; // Invalid or out of range
3833 return 0;
3834 }
3835 // get last twelve digits of the coefficent into hi & ho, base
3836 // 10**9 (see GETCOEFFBILL):
3837 sourlo=DFWORD(&result, DECWORDS-1);
3838 lo=DPD2BIN0[sourlo&0x3ff]
3839 +DPD2BINK[(sourlo>>10)&0x3ff]
3840 +DPD2BINM[(sourlo>>20)&0x3ff];
3841 sourpen=DFWORD(&result, DECWORDS-2);
3842 hi=DPD2BIN0[((sourpen<<2) | (sourlo>>30))&0x3ff];
3843
3844 // according to request, check range carefully
3845 if (unsign) {
3846 if (hi>4 || (hi==4 && lo>294967295) || (hi+lo!=0 && DFISSIGNED(&result))) {
3847 set->status|=DEC_Invalid_operation; // out of range
3848 return 0;
3849 }
3850 return hi*BILLION+lo;
3851 }
3852 // signed
3853 if (hi>2 || (hi==2 && lo>147483647)) {
3854 // handle the usual edge case
3855 if (lo==147483648 && hi==2 && DFISSIGNED(&result)) return 0x80000000;
3856 set->status|=DEC_Invalid_operation; // truly out of range
3857 return 0;
3858 }
3859 i=hi*BILLION+lo;
3860 if (DFISSIGNED(&result)) i=-i;
3861 return (uInt)i;
3862 } // decToInt32
3863
3864 /* ------------------------------------------------------------------ */
3865 /* decToIntegral -- local routine to effect ToIntegral value */
3866 /* */
3867 /* result gets the result */
3868 /* df is the decFloat to round */
3869 /* set is the context */
3870 /* rmode is the rounding mode to use */
3871 /* exact is 1 if Inexact should be signalled */
3872 /* returns result */
3873 /* ------------------------------------------------------------------ */
3874 static decFloat * decToIntegral(decFloat *result, const decFloat *df,
3875 decContext *set, enum rounding rmode,
3876 Flag exact) {
3877 Int exp; // exponent
3878 uInt sourhi; // top word from source decFloat
3879 enum rounding saveround; // saver
3880 uInt savestatus; // ..
3881 decFloat zero; // work
3882
3883 /* Start decoding the argument */
3884 sourhi=DFWORD(df, 0); // top word
3885 exp=DECCOMBEXP[sourhi>>26]; // get exponent high bits (in place)
3886
3887 if (EXPISSPECIAL(exp)) { // is special?
3888 // NaNs are handled as usual
3889 if (DFISNAN(df)) return decNaNs(result, df, NULL, set);
3890 // must be infinite; return canonical infinity with sign of df
3891 return decInfinity(result, df);
3892 }
3893
3894 /* Here when the argument is finite */
3895 // complete extraction of the exponent
3896 exp+=GETECON(df)-DECBIAS; // .. + continuation and unbias
3897
3898 if (exp>=0) return decCanonical(result, df); // already integral
3899
3900 saveround=set->round; // save rounding mode ..
3901 savestatus=set->status; // .. and status
3902 set->round=rmode; // set mode
3903 decFloatZero(&zero); // make 0E+0
3904 decFloatQuantize(result, df, &zero, set); // 'integrate'; cannot fail
3905 set->round=saveround; // restore rounding mode ..
3906 if (!exact) set->status=savestatus; // .. and status, unless exact
3907 return result;
3908 } // decToIntegral
3909