1 /**
2  * \file
3  * Copyright (c) Microsoft. All rights reserved.
4  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
5  *
6  * Copyright 2015 Xamarin Inc
7  *
8  * File: decimal.c
9  *
10  * Ported from C++ to C and adjusted to Mono runtime
11  *
12  * Pending:
13  *   DoToCurrency (they look like new methods we do not have)
14  */
15 #ifndef DISABLE_DECIMAL
16 #include "config.h"
17 #include <stdint.h>
18 #include <glib.h>
19 #include <mono/utils/mono-compiler.h>
20 #include <mono/metadata/exception.h>
21 #include <mono/metadata/object-internals.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <math.h>
26 #ifdef HAVE_MEMORY_H
27 #include <memory.h>
28 #endif
29 #ifdef _MSC_VER
30 #include <intrin.h>
31 #endif
32 #include "decimal-ms.h"
33 #include "number-ms.h"
34 
35 #define min(a, b) (((a) < (b)) ? (a) : (b))
36 
37 typedef enum {
38 	MONO_DECIMAL_OK,
39 	MONO_DECIMAL_OVERFLOW,
40 	MONO_DECIMAL_INVALID_ARGUMENT,
41 	MONO_DECIMAL_DIVBYZERO,
42 	MONO_DECIMAL_ARGUMENT_OUT_OF_RANGE
43 } MonoDecimalStatus;
44 
45 #ifndef FC_GC_POLL
46 #   define FC_GC_POLL()
47 #endif
48 
49 static const uint32_t ten_to_nine    = 1000000000U;
50 static const uint32_t ten_to_ten_div_4 = 2500000000U;
51 #define POWER10_MAX     9
52 #define DECIMAL_NEG ((uint8_t)0x80)
53 #define DECMAX 28
54 #define DECIMAL_SCALE(dec)       ((dec).u.u.scale)
55 #define DECIMAL_SIGN(dec)        ((dec).u.u.sign)
56 #define DECIMAL_SIGNSCALE(dec)   ((dec).u.signscale)
57 #define DECIMAL_LO32(dec)        ((dec).v.v.Lo32)
58 #define DECIMAL_MID32(dec)       ((dec).v.v.Mid32)
59 #define DECIMAL_HI32(dec)        ((dec).Hi32)
60 #if G_BYTE_ORDER != G_LITTLE_ENDIAN
61 # define DECIMAL_LO64_GET(dec)   (((uint64_t)((dec).v.v.Mid32) << 32) | (dec).v.v.Lo32)
62 # define DECIMAL_LO64_SET(dec,value)   {(dec).v.v.Lo32 = (value); (dec).v.v.Mid32 = ((value) >> 32); }
63 #else
64 # define DECIMAL_LO64_GET(dec)    ((dec).v.Lo64)
65 # define DECIMAL_LO64_SET(dec,value)   {(dec).v.Lo64 = value; }
66 #endif
67 
68 #define DECIMAL_SETZERO(dec) {DECIMAL_LO32(dec) = 0; DECIMAL_MID32(dec) = 0; DECIMAL_HI32(dec) = 0; DECIMAL_SIGNSCALE(dec) = 0;}
69 #define COPYDEC(dest, src) {DECIMAL_SIGNSCALE(dest) = DECIMAL_SIGNSCALE(src); DECIMAL_HI32(dest) = DECIMAL_HI32(src); \
70     DECIMAL_MID32(dest) = DECIMAL_MID32(src); DECIMAL_LO32(dest) = DECIMAL_LO32(src); }
71 
72 #define DEC_SCALE_MAX   28
73 #define POWER10_MAX     9
74 
75 #define OVFL_MAX_9_HI   4
76 #define OVFL_MAX_9_MID  1266874889
77 #define OVFL_MAX_9_LO   3047500985u
78 
79 #define OVFL_MAX_5_HI   42949
80 #define OVFL_MAX_5_MID  2890341191
81 
82 #define OVFL_MAX_1_HI   429496729
83 
84 typedef union {
85 	uint64_t int64;
86 	struct {
87 #if BYTE_ORDER == G_BIG_ENDIAN
88         uint32_t Hi;
89         uint32_t Lo;
90 #else
91         uint32_t Lo;
92         uint32_t Hi;
93 #endif
94     } u;
95 } SPLIT64;
96 
97 static const SPLIT64    ten_to_eighteen = { 1000000000000000000ULL };
98 
99 const MonoDouble_double ds2to64 = { .s = { .sign = 0, .exp = MONO_DOUBLE_BIAS + 65, .mantHi = 0, .mantLo = 0 } };
100 
101 //
102 // Data tables
103 //
104 
105 static const uint32_t power10 [POWER10_MAX+1] = {
106 	1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000
107 };
108 
109 
110 static const double double_power10[] = {
111 	1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
112 	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
113 	1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29,
114 	1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
115 	1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49,
116 	1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59,
117 	1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69,
118 	1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79,
119 	1e80 };
120 
121 const SPLIT64 sdl_power10[] = { {10000000000ULL},          // 1E10
122 				{100000000000ULL},         // 1E11
123 				{1000000000000ULL},        // 1E12
124 				{10000000000000ULL},       // 1E13
125 				{100000000000000ULL} };    // 1E14
126 
127 static const uint64_t long_power10[] = {
128 	1,
129 	10ULL,
130 	100ULL,
131 	1000ULL,
132 	10000ULL,
133 	100000ULL,
134 	1000000ULL,
135 	10000000ULL,
136 	100000000ULL,
137 	1000000000ULL,
138 	10000000000ULL,
139 	100000000000ULL,
140 	1000000000000ULL,
141 	10000000000000ULL,
142 	100000000000000ULL,
143 	1000000000000000ULL,
144 	10000000000000000ULL,
145 	100000000000000000ULL,
146 	1000000000000000000ULL,
147 	10000000000000000000ULL};
148 
149 typedef struct  {
150 	uint32_t Hi, Mid, Lo;
151 } DECOVFL;
152 
153 const DECOVFL power_overflow[] = {
154 // This is a table of the largest values that can be in the upper two
155 // ULONGs of a 96-bit number that will not overflow when multiplied
156 // by a given power.  For the upper word, this is a table of
157 // 2^32 / 10^n for 1 <= n <= 9.  For the lower word, this is the
158 // remaining fraction part * 2^32.  2^32 = 4294967296.
159 //
160     { 429496729u, 2576980377u, 2576980377u }, // 10^1 remainder 0.6
161     { 42949672u,  4123168604u, 687194767u  }, // 10^2 remainder 0.16
162     { 4294967u,   1271310319u, 2645699854u }, // 10^3 remainder 0.616
163     { 429496u,    3133608139u, 694066715u  }, // 10^4 remainder 0.1616
164     { 42949u,     2890341191u, 2216890319u }, // 10^5 remainder 0.51616
165     { 4294u,      4154504685u, 2369172679u }, // 10^6 remainder 0.551616
166     { 429u,       2133437386u, 4102387834u }, // 10^7 remainder 0.9551616
167     { 42u,        4078814305u, 410238783u  }, // 10^8 remainder 0.09991616
168     { 4u,         1266874889u, 3047500985u }, // 10^9 remainder 0.709551616
169 };
170 
171 
172 #define UInt32x32To64(a, b) ((uint64_t)((uint32_t)(a)) * (uint64_t)((uint32_t)(b)))
173 #define Div64by32(num, den) ((uint32_t)((uint64_t)(num) / (uint32_t)(den)))
174 #define Mod64by32(num, den) ((uint32_t)((uint64_t)(num) % (uint32_t)(den)))
175 
176 static double
fnDblPower10(int ix)177 fnDblPower10(int ix)
178 {
179     const int maxIx = (sizeof(double_power10)/sizeof(double_power10[0]));
180     g_assert(ix >= 0);
181     if (ix < maxIx)
182         return double_power10[ix];
183     return pow(10.0, ix);
184 } // double fnDblPower10()
185 
186 
187 static inline int64_t
DivMod32by32(int32_t num,int32_t den)188 DivMod32by32(int32_t num, int32_t den)
189 {
190     SPLIT64  sdl;
191 
192     sdl.u.Lo = num / den;
193     sdl.u.Hi = num % den;
194     return sdl.int64;
195 }
196 
197 static inline int64_t
DivMod64by32(int64_t num,int32_t den)198 DivMod64by32(int64_t num, int32_t den)
199 {
200     SPLIT64  sdl;
201 
202     sdl.u.Lo = Div64by32(num, den);
203     sdl.u.Hi = Mod64by32(num, den);
204     return sdl.int64;
205 }
206 
207 static uint64_t
UInt64x64To128(SPLIT64 op1,SPLIT64 op2,uint64_t * hi)208 UInt64x64To128(SPLIT64 op1, SPLIT64 op2, uint64_t *hi)
209 {
210 	SPLIT64  tmp1;
211 	SPLIT64  tmp2;
212 	SPLIT64  tmp3;
213 
214 	tmp1.int64 = UInt32x32To64(op1.u.Lo, op2.u.Lo); // lo partial prod
215 	tmp2.int64 = UInt32x32To64(op1.u.Lo, op2.u.Hi); // mid 1 partial prod
216 	tmp1.u.Hi += tmp2.u.Lo;
217 	if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
218 		tmp2.u.Hi++;
219 	tmp3.int64 = UInt32x32To64(op1.u.Hi, op2.u.Hi) + (uint64_t)tmp2.u.Hi;
220 	tmp2.int64 = UInt32x32To64(op1.u.Hi, op2.u.Lo);
221 	tmp1.u.Hi += tmp2.u.Lo;
222 	if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
223 		tmp2.u.Hi++;
224 	tmp3.int64 += (uint64_t)tmp2.u.Hi;
225 
226 	*hi = tmp3.int64;
227 	return tmp1.int64;
228 }
229 
230 /**
231 * FullDiv64By32:
232 *
233 * Entry:
234 *   pdlNum  - Pointer to 64-bit dividend
235 *   ulDen   - 32-bit divisor
236 *
237 * Purpose:
238 *   Do full divide, yielding 64-bit result and 32-bit remainder.
239 *
240 * Exit:
241 *   Quotient overwrites dividend.
242 *   Returns remainder.
243 *
244 * Exceptions:
245 *   None.
246 */
247 // Was: FullDiv64By32
248 static uint32_t
FullDiv64By32(uint64_t * num,uint32_t den)249 FullDiv64By32 (uint64_t *num, uint32_t den)
250 {
251 	SPLIT64  tmp;
252 	SPLIT64  res;
253 
254 	tmp.int64 = *num;
255 	res.u.Hi = 0;
256 
257 	if (tmp.u.Hi >= den) {
258 		// DivMod64by32 returns quotient in Lo, remainder in Hi.
259 		//
260 		res.u.Lo = tmp.u.Hi;
261 		res.int64 = DivMod64by32(res.int64, den);
262 		tmp.u.Hi = res.u.Hi;
263 		res.u.Hi = res.u.Lo;
264 	}
265 
266 	tmp.int64 = DivMod64by32(tmp.int64, den);
267 	res.u.Lo = tmp.u.Lo;
268 	*num = res.int64;
269 	return tmp.u.Hi;
270 }
271 
272 /***
273  * SearchScale
274  *
275  * Entry:
276  *   res_hi - Top uint32_t of quotient
277  *   res_mid - Middle uint32_t of quotient
278  *   res_lo - Bottom uint32_t of quotient
279  *   scale  - Scale factor of quotient, range -DEC_SCALE_MAX to DEC_SCALE_MAX
280  *
281  * Purpose:
282  *   Determine the max power of 10, <= 9, that the quotient can be scaled
283  *   up by and still fit in 96 bits.
284  *
285  * Exit:
286  *   Returns power of 10 to scale by, -1 if overflow error.
287  *
288  ***********************************************************************/
289 
290 static int
SearchScale(uint32_t res_hi,uint32_t res_mid,uint32_t res_lo,int scale)291 SearchScale(uint32_t res_hi, uint32_t res_mid, uint32_t res_lo, int scale)
292 {
293 	int   cur_scale;
294 
295 	// Quick check to stop us from trying to scale any more.
296 	//
297 	if (res_hi > OVFL_MAX_1_HI || scale >= DEC_SCALE_MAX) {
298 		cur_scale = 0;
299 		goto HaveScale;
300 	}
301 
302 	if (scale > DEC_SCALE_MAX - 9) {
303 		// We can't scale by 10^9 without exceeding the max scale factor.
304 		// See if we can scale to the max.  If not, we'll fall into
305 		// standard search for scale factor.
306 		//
307 		cur_scale = DEC_SCALE_MAX - scale;
308 		if (res_hi < power_overflow[cur_scale - 1].Hi)
309 			goto HaveScale;
310 
311 		if (res_hi == power_overflow[cur_scale - 1].Hi) {
312 		UpperEq:
313 			if (res_mid > power_overflow[cur_scale - 1].Mid ||
314 			    (res_mid == power_overflow[cur_scale - 1].Mid && res_lo > power_overflow[cur_scale - 1].Lo)) {
315 				cur_scale--;
316 			}
317 			goto HaveScale;
318 		}
319 	} else if (res_hi < OVFL_MAX_9_HI || (res_hi == OVFL_MAX_9_HI && res_mid < OVFL_MAX_9_MID) || (res_hi == OVFL_MAX_9_HI && res_mid == OVFL_MAX_9_MID && res_lo <= OVFL_MAX_9_LO))
320 		return 9;
321 
322 	// Search for a power to scale by < 9.  Do a binary search
323 	// on power_overflow[].
324 	//
325 	cur_scale = 5;
326 	if (res_hi < OVFL_MAX_5_HI)
327 		cur_scale = 7;
328 	else if (res_hi > OVFL_MAX_5_HI)
329 		cur_scale = 3;
330 	else
331 		goto UpperEq;
332 
333 	// cur_scale is 3 or 7.
334 	//
335 	if (res_hi < power_overflow[cur_scale - 1].Hi)
336 		cur_scale++;
337 	else if (res_hi > power_overflow[cur_scale - 1].Hi)
338 		cur_scale--;
339 	else
340 		goto UpperEq;
341 
342 	// cur_scale is 2, 4, 6, or 8.
343 	//
344 	// In all cases, we already found we could not use the power one larger.
345 	// So if we can use this power, it is the biggest, and we're done.  If
346 	// we can't use this power, the one below it is correct for all cases
347 	// unless it's 10^1 -- we might have to go to 10^0 (no scaling).
348 	//
349 	if (res_hi > power_overflow[cur_scale - 1].Hi)
350 		cur_scale--;
351 
352 	if (res_hi == power_overflow[cur_scale - 1].Hi)
353 		goto UpperEq;
354 
355 HaveScale:
356 	// cur_scale = largest power of 10 we can scale by without overflow,
357 	// cur_scale < 9.  See if this is enough to make scale factor
358 	// positive if it isn't already.
359 	//
360 	if (cur_scale + scale < 0)
361 		cur_scale = -1;
362 
363 	return cur_scale;
364 }
365 
366 
367 /**
368 * Div96By32
369 *
370 * Entry:
371 *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
372 *   ulDen   - 32-bit divisor.
373 *
374 * Purpose:
375 *   Do full divide, yielding 96-bit result and 32-bit remainder.
376 *
377 * Exit:
378 *   Quotient overwrites dividend.
379 *   Returns remainder.
380 *
381 * Exceptions:
382 *   None.
383 *
384 */
385 static uint32_t
Div96By32(uint32_t * num,uint32_t den)386 Div96By32(uint32_t *num, uint32_t den)
387 {
388 	SPLIT64  tmp;
389 
390 	tmp.u.Hi = 0;
391 
392 	if (num[2] != 0)
393 		goto Div3Word;
394 
395 	if (num[1] >= den)
396 		goto Div2Word;
397 
398 	tmp.u.Hi = num[1];
399 	num[1] = 0;
400 	goto Div1Word;
401 
402 Div3Word:
403 	tmp.u.Lo = num[2];
404 	tmp.int64 = DivMod64by32(tmp.int64, den);
405 	num[2] = tmp.u.Lo;
406 Div2Word:
407 	tmp.u.Lo = num[1];
408 	tmp.int64 = DivMod64by32(tmp.int64, den);
409 	num[1] = tmp.u.Lo;
410 Div1Word:
411 	tmp.u.Lo = num[0];
412 	tmp.int64 = DivMod64by32(tmp.int64, den);
413 	num[0] = tmp.u.Lo;
414 	return tmp.u.Hi;
415 }
416 
417 /***
418  * DecFixInt
419  *
420  * Entry:
421  *   pdecRes - Pointer to Decimal result location
422  *   operand  - Pointer to Decimal operand
423  *
424  * Purpose:
425  *   Chop the value to integer.  Return remainder so Int() function
426  *   can round down if non-zero.
427  *
428  * Exit:
429  *   Returns remainder.
430  *
431  * Exceptions:
432  *   None.
433  *
434  ***********************************************************************/
435 
436 static uint32_t
DecFixInt(MonoDecimal * result,MonoDecimal * operand)437 DecFixInt(MonoDecimal * result, MonoDecimal * operand)
438 {
439 	uint32_t   num[3];
440 	uint32_t   rem;
441 	uint32_t   pwr;
442 	int     scale;
443 
444 	if (operand->u.u.scale > 0) {
445 		num[0] = operand->v.v.Lo32;
446 		num[1] = operand->v.v.Mid32;
447 		num[2] = operand->Hi32;
448 		scale = operand->u.u.scale;
449 		result->u.u.sign = operand->u.u.sign;
450 		rem = 0;
451 
452 		do {
453 			if (scale > POWER10_MAX)
454 				pwr = ten_to_nine;
455 			else
456 				pwr = power10[scale];
457 
458 			rem |= Div96By32(num, pwr);
459 			scale -= 9;
460 		}while (scale > 0);
461 
462 		result->v.v.Lo32 = num[0];
463 		result->v.v.Mid32 = num[1];
464 		result->Hi32 = num[2];
465 		result->u.u.scale = 0;
466 
467 		return rem;
468 	}
469 
470 	COPYDEC(*result, *operand);
471 	// Odd, the Microsoft code does not set result->reserved to zero on this case
472 	return 0;
473 }
474 
475 /**
476  * ScaleResult:
477  *
478  * Entry:
479  *   res - Array of uint32_ts with value, least-significant first.
480  *   hi_res  - Index of last non-zero value in res.
481  *   scale  - Scale factor for this value, range 0 - 2 * DEC_SCALE_MAX
482  *
483  * Purpose:
484  *   See if we need to scale the result to fit it in 96 bits.
485  *   Perform needed scaling.  Adjust scale factor accordingly.
486  *
487  * Exit:
488  *   res updated in place, always 3 uint32_ts.
489  *   New scale factor returned, -1 if overflow error.
490  *
491  */
492 static int
ScaleResult(uint32_t * res,int hi_res,int scale)493 ScaleResult(uint32_t *res, int hi_res, int scale)
494 {
495 	int     new_scale;
496 	int     cur;
497 	uint32_t   pwr;
498 	uint32_t   tmp;
499 	uint32_t   sticky;
500 	SPLIT64 sdlTmp;
501 
502 	// See if we need to scale the result.  The combined scale must
503 	// be <= DEC_SCALE_MAX and the upper 96 bits must be zero.
504 	//
505 	// Start by figuring a lower bound on the scaling needed to make
506 	// the upper 96 bits zero.  hi_res is the index into res[]
507 	// of the highest non-zero uint32_t.
508 	//
509 	new_scale =   hi_res * 32 - 64 - 1;
510 	if (new_scale > 0) {
511 
512 		// Find the MSB.
513 		//
514 		tmp = res[hi_res];
515 		if (!(tmp & 0xFFFF0000)) {
516 			new_scale -= 16;
517 			tmp <<= 16;
518 		}
519 		if (!(tmp & 0xFF000000)) {
520 			new_scale -= 8;
521 			tmp <<= 8;
522 		}
523 		if (!(tmp & 0xF0000000)) {
524 			new_scale -= 4;
525 			tmp <<= 4;
526 		}
527 		if (!(tmp & 0xC0000000)) {
528 			new_scale -= 2;
529 			tmp <<= 2;
530 		}
531 		if (!(tmp & 0x80000000)) {
532 			new_scale--;
533 			tmp <<= 1;
534 		}
535 
536 		// Multiply bit position by log10(2) to figure it's power of 10.
537 		// We scale the log by 256.  log(2) = .30103, * 256 = 77.  Doing this
538 		// with a multiply saves a 96-byte lookup table.  The power returned
539 		// is <= the power of the number, so we must add one power of 10
540 		// to make it's integer part zero after dividing by 256.
541 		//
542 		// Note: the result of this multiplication by an approximation of
543 		// log10(2) have been exhaustively checked to verify it gives the
544 		// correct result.  (There were only 95 to check...)
545 		//
546 		new_scale = ((new_scale * 77) >> 8) + 1;
547 
548 		// new_scale = min scale factor to make high 96 bits zero, 0 - 29.
549 		// This reduces the scale factor of the result.  If it exceeds the
550 		// current scale of the result, we'll overflow.
551 		//
552 		if (new_scale > scale)
553 			return -1;
554 	}
555 	else
556 		new_scale = 0;
557 
558 	// Make sure we scale by enough to bring the current scale factor
559 	// into valid range.
560 	//
561 	if (new_scale < scale - DEC_SCALE_MAX)
562 		new_scale = scale - DEC_SCALE_MAX;
563 
564 	if (new_scale != 0) {
565 		// Scale by the power of 10 given by new_scale.  Note that this is
566 		// NOT guaranteed to bring the number within 96 bits -- it could
567 		// be 1 power of 10 short.
568 		//
569 		scale -= new_scale;
570 		sticky = 0;
571 		sdlTmp.u.Hi = 0; // initialize remainder
572 
573 		for (;;) {
574 
575 			sticky |= sdlTmp.u.Hi; // record remainder as sticky bit
576 
577 			if (new_scale > POWER10_MAX)
578 				pwr = ten_to_nine;
579 			else
580 				pwr = power10[new_scale];
581 
582 			// Compute first quotient.
583 			// DivMod64by32 returns quotient in Lo, remainder in Hi.
584 			//
585 			sdlTmp.int64 = DivMod64by32(res[hi_res], pwr);
586 			res[hi_res] = sdlTmp.u.Lo;
587 			cur = hi_res - 1;
588 
589 			if (cur >= 0) {
590 				// If first quotient was 0, update hi_res.
591 				//
592 				if (sdlTmp.u.Lo == 0)
593 					hi_res--;
594 
595 				// Compute subsequent quotients.
596 				//
597 				do {
598 					sdlTmp.u.Lo = res[cur];
599 					sdlTmp.int64 = DivMod64by32(sdlTmp.int64, pwr);
600 					res[cur] = sdlTmp.u.Lo;
601 					cur--;
602 				} while (cur >= 0);
603 
604 			}
605 
606 			new_scale -= POWER10_MAX;
607 			if (new_scale > 0)
608 				continue; // scale some more
609 
610 			// If we scaled enough, hi_res would be 2 or less.  If not,
611 			// divide by 10 more.
612 			//
613 			if (hi_res > 2) {
614 				new_scale = 1;
615 				scale--;
616 				continue; // scale by 10
617 			}
618 
619 			// Round final result.  See if remainder >= 1/2 of divisor.
620 			// If remainder == 1/2 divisor, round up if odd or sticky bit set.
621 			//
622 			pwr >>= 1;  // power of 10 always even
623 			if ( pwr <= sdlTmp.u.Hi && (pwr < sdlTmp.u.Hi ||
624 						    ((res[0] & 1) | sticky)) ) {
625 				cur = -1;
626 				while (++res[++cur] == 0);
627 
628 				if (cur > 2) {
629 					// The rounding caused us to carry beyond 96 bits.
630 					// Scale by 10 more.
631 					//
632 					hi_res = cur;
633 					sticky = 0;  // no sticky bit
634 					sdlTmp.u.Hi = 0; // or remainder
635 					new_scale = 1;
636 					scale--;
637 					continue; // scale by 10
638 				}
639 			}
640 
641 			// We may have scaled it more than we planned.  Make sure the scale
642 			// factor hasn't gone negative, indicating overflow.
643 			//
644 			if (scale < 0)
645 				return -1;
646 
647 			return scale;
648 		} // for(;;)
649 	}
650 	return scale;
651 }
652 
653 // Decimal multiply
654 // Returns: MONO_DECIMAL_OVERFLOW or MONO_DECIMAL_OK
655 static MonoDecimalStatus
mono_decimal_multiply_result(MonoDecimal * left,MonoDecimal * right,MonoDecimal * result)656 mono_decimal_multiply_result(MonoDecimal * left, MonoDecimal * right, MonoDecimal * result)
657 {
658 	SPLIT64 tmp;
659 	SPLIT64 tmp2;
660 	SPLIT64 tmp3;
661 	int     scale;
662 	int     hi_prod;
663 	uint32_t   pwr;
664 	uint32_t   rem_lo;
665 	uint32_t   rem_hi;
666 	uint32_t   prod[6];
667 
668 	scale = left->u.u.scale + right->u.u.scale;
669 
670 	if ((left->Hi32 | left->v.v.Mid32 | right->Hi32 | right->v.v.Mid32) == 0) {
671 		// Upper 64 bits are zero.
672 		//
673 		tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
674 		if (scale > DEC_SCALE_MAX)
675 		{
676 			// Result scale is too big.  Divide result by power of 10 to reduce it.
677 			// If the amount to divide by is > 19 the result is guaranteed
678 			// less than 1/2.  [max value in 64 bits = 1.84E19]
679 			//
680 			scale -= DEC_SCALE_MAX;
681 			if (scale > 19) {
682 			ReturnZero:
683 				DECIMAL_SETZERO(*result);
684 				return MONO_DECIMAL_OK;
685 			}
686 
687 			if (scale > POWER10_MAX) {
688 				// Divide by 1E10 first, to get the power down to a 32-bit quantity.
689 				// 1E10 itself doesn't fit in 32 bits, so we'll divide by 2.5E9 now
690 				// then multiply the next divisor by 4 (which will be a max of 4E9).
691 				//
692 				rem_lo = FullDiv64By32(&tmp.int64, ten_to_ten_div_4);
693 				pwr = power10[scale - 10] << 2;
694 			} else {
695 				pwr = power10[scale];
696 				rem_lo = 0;
697 			}
698 
699 			// Power to divide by fits in 32 bits.
700 			//
701 			rem_hi = FullDiv64By32(&tmp.int64, pwr);
702 
703 			// Round result.  See if remainder >= 1/2 of divisor.
704 			// Divisor is a power of 10, so it is always even.
705 			//
706 			pwr >>= 1;
707 			if (rem_hi >= pwr && (rem_hi > pwr || (rem_lo | (tmp.u.Lo & 1))))
708 				tmp.int64++;
709 
710 			scale = DEC_SCALE_MAX;
711 		}
712 		DECIMAL_LO32(*result) = tmp.u.Lo;
713 		DECIMAL_MID32(*result) = tmp.u.Hi;
714 		DECIMAL_HI32(*result) = 0;
715 	} else {
716 		// At least one operand has bits set in the upper 64 bits.
717 		//
718 		// Compute and accumulate the 9 partial products into a
719 		// 192-bit (24-byte) result.
720 		//
721 		//                [l-h][l-m][l-l]   left high, middle, low
722 		//             x  [r-h][r-m][r-l]   right high, middle, low
723 		// ------------------------------
724 		//
725 		//                     [0-h][0-l]   l-l * r-l
726 		//                [1ah][1al]        l-l * r-m
727 		//                [1bh][1bl]        l-m * r-l
728 		//           [2ah][2al]             l-m * r-m
729 		//           [2bh][2bl]             l-l * r-h
730 		//           [2ch][2cl]             l-h * r-l
731 		//      [3ah][3al]                  l-m * r-h
732 		//      [3bh][3bl]                  l-h * r-m
733 		// [4-h][4-l]                       l-h * r-h
734 		// ------------------------------
735 		// [p-5][p-4][p-3][p-2][p-1][p-0]   prod[] array
736 		//
737 		tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
738 		prod[0] = tmp.u.Lo;
739 
740 		tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Mid32) + tmp.u.Hi;
741 
742 		tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Lo32);
743 		tmp.int64 += tmp2.int64; // this could generate carry
744 		prod[1] = tmp.u.Lo;
745 		if (tmp.int64 < tmp2.int64) // detect carry
746 			tmp2.u.Hi = 1;
747 		else
748 			tmp2.u.Hi = 0;
749 		tmp2.u.Lo = tmp.u.Hi;
750 
751 		tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Mid32) + tmp2.int64;
752 
753 		if (left->Hi32 | right->Hi32) {
754 			// Highest 32 bits is non-zero.  Calculate 5 more partial products.
755 			//
756 			tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->Hi32);
757 			tmp.int64 += tmp2.int64; // this could generate carry
758 			if (tmp.int64 < tmp2.int64) // detect carry
759 				tmp3.u.Hi = 1;
760 			else
761 				tmp3.u.Hi = 0;
762 
763 			tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Lo32);
764 			tmp.int64 += tmp2.int64; // this could generate carry
765 			prod[2] = tmp.u.Lo;
766 			if (tmp.int64 < tmp2.int64) // detect carry
767 				tmp3.u.Hi++;
768 			tmp3.u.Lo = tmp.u.Hi;
769 
770 			tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->Hi32);
771 			tmp.int64 += tmp3.int64; // this could generate carry
772 			if (tmp.int64 < tmp3.int64) // detect carry
773 				tmp3.u.Hi = 1;
774 			else
775 				tmp3.u.Hi = 0;
776 
777 			tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Mid32);
778 			tmp.int64 += tmp2.int64; // this could generate carry
779 			prod[3] = tmp.u.Lo;
780 			if (tmp.int64 < tmp2.int64) // detect carry
781 				tmp3.u.Hi++;
782 			tmp3.u.Lo = tmp.u.Hi;
783 
784 			tmp.int64 = UInt32x32To64(left->Hi32, right->Hi32) + tmp3.int64;
785 			prod[4] = tmp.u.Lo;
786 			prod[5] = tmp.u.Hi;
787 
788 			hi_prod = 5;
789 		}
790 		else {
791 			prod[2] = tmp.u.Lo;
792 			prod[3] = tmp.u.Hi;
793 			hi_prod = 3;
794 		}
795 
796 		// Check for leading zero uint32_ts on the product
797 		//
798 		while (prod[hi_prod] == 0) {
799 			hi_prod--;
800 			if (hi_prod < 0)
801 				goto ReturnZero;
802 		}
803 
804 		scale = ScaleResult(prod, hi_prod, scale);
805 		if (scale == -1)
806 			return MONO_DECIMAL_OVERFLOW;
807 
808 		result->v.v.Lo32 = prod[0];
809 		result->v.v.Mid32 = prod[1];
810 		result->Hi32 = prod[2];
811 	}
812 
813 	result->u.u.sign = right->u.u.sign ^ left->u.u.sign;
814 	result->u.u.scale = (char)scale;
815 	return MONO_DECIMAL_OK;
816 }
817 
818 // Addition and subtraction
819 static MonoDecimalStatus
DecAddSub(MonoDecimal * left,MonoDecimal * right,MonoDecimal * result,int8_t sign)820 DecAddSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result, int8_t sign)
821 {
822 	uint32_t     num[6];
823 	uint32_t     pwr;
824 	int       scale;
825 	int       hi_prod;
826 	int       cur;
827 	SPLIT64   tmp;
828 	MonoDecimal decRes;
829 	MonoDecimal decTmp;
830 	MonoDecimal *pdecTmp;
831 
832 	sign ^= (right->u.u.sign ^ left->u.u.sign) & DECIMAL_NEG;
833 
834 	if (right->u.u.scale == left->u.u.scale) {
835 		// Scale factors are equal, no alignment necessary.
836 		//
837 		decRes.u.signscale = left->u.signscale;
838 
839 	AlignedAdd:
840 		if (sign) {
841 			// Signs differ - subtract
842 			//
843 			DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right));
844 			DECIMAL_HI32(decRes) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
845 
846 			// Propagate carry
847 			//
848 			if (DECIMAL_LO64_GET(decRes) > DECIMAL_LO64_GET(*left)) {
849 				decRes.Hi32--;
850 				if (decRes.Hi32 >= left->Hi32)
851 					goto SignFlip;
852 			} else if (decRes.Hi32 > left->Hi32) {
853 				// Got negative result.  Flip its sign.
854 				//
855 			SignFlip:
856 				DECIMAL_LO64_SET(decRes, -(uint64_t)DECIMAL_LO64_GET(decRes));
857 				decRes.Hi32 = ~decRes.Hi32;
858 				if (DECIMAL_LO64_GET(decRes) == 0)
859 					decRes.Hi32++;
860 				decRes.u.u.sign ^= DECIMAL_NEG;
861 			}
862 
863 		} else {
864 			// Signs are the same - add
865 			//
866 			DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right));
867 			decRes.Hi32 = left->Hi32 + right->Hi32;
868 
869 			// Propagate carry
870 			//
871 			if (DECIMAL_LO64_GET(decRes) < DECIMAL_LO64_GET(*left)) {
872 				decRes.Hi32++;
873 				if (decRes.Hi32 <= left->Hi32)
874 					goto AlignedScale;
875 			} else if (decRes.Hi32 < left->Hi32) {
876 			AlignedScale:
877 				// The addition carried above 96 bits.  Divide the result by 10,
878 				// dropping the scale factor.
879 				//
880 				if (decRes.u.u.scale == 0)
881 					return MONO_DECIMAL_OVERFLOW;
882 				decRes.u.u.scale--;
883 
884 				tmp.u.Lo = decRes.Hi32;
885 				tmp.u.Hi = 1;
886 				tmp.int64 = DivMod64by32(tmp.int64, 10);
887 				decRes.Hi32 = tmp.u.Lo;
888 
889 				tmp.u.Lo = decRes.v.v.Mid32;
890 				tmp.int64 = DivMod64by32(tmp.int64, 10);
891 				decRes.v.v.Mid32 = tmp.u.Lo;
892 
893 				tmp.u.Lo = decRes.v.v.Lo32;
894 				tmp.int64 = DivMod64by32(tmp.int64, 10);
895 				decRes.v.v.Lo32 = tmp.u.Lo;
896 
897 				// See if we need to round up.
898 				//
899 				if (tmp.u.Hi >= 5 && (tmp.u.Hi > 5 || (decRes.v.v.Lo32 & 1))) {
900 					DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(decRes)+1)
901 						if (DECIMAL_LO64_GET(decRes) == 0)
902 							decRes.Hi32++;
903 				}
904 			}
905 		}
906 	}
907 	else {
908 		// Scale factors are not equal.  Assume that a larger scale
909 		// factor (more decimal places) is likely to mean that number
910 		// is smaller.  Start by guessing that the right operand has
911 		// the larger scale factor.  The result will have the larger
912 		// scale factor.
913 		//
914 		decRes.u.u.scale = right->u.u.scale;  // scale factor of "smaller"
915 		decRes.u.u.sign = left->u.u.sign;    // but sign of "larger"
916 		scale = decRes.u.u.scale - left->u.u.scale;
917 
918 		if (scale < 0) {
919 			// Guessed scale factor wrong. Swap operands.
920 			//
921 			scale = -scale;
922 			decRes.u.u.scale = left->u.u.scale;
923 			decRes.u.u.sign ^= sign;
924 			pdecTmp = right;
925 			right = left;
926 			left = pdecTmp;
927 		}
928 
929 		// *left will need to be multiplied by 10^scale so
930 		// it will have the same scale as *right.  We could be
931 		// extending it to up to 192 bits of precision.
932 		//
933 		if (scale <= POWER10_MAX) {
934 			// Scaling won't make it larger than 4 uint32_ts
935 			//
936 			pwr = power10[scale];
937 			DECIMAL_LO64_SET(decTmp, UInt32x32To64(left->v.v.Lo32, pwr));
938 			tmp.int64 = UInt32x32To64(left->v.v.Mid32, pwr);
939 			tmp.int64 += decTmp.v.v.Mid32;
940 			decTmp.v.v.Mid32 = tmp.u.Lo;
941 			decTmp.Hi32 = tmp.u.Hi;
942 			tmp.int64 = UInt32x32To64(left->Hi32, pwr);
943 			tmp.int64 += decTmp.Hi32;
944 			if (tmp.u.Hi == 0) {
945 				// Result fits in 96 bits.  Use standard aligned add.
946 				//
947 				decTmp.Hi32 = tmp.u.Lo;
948 				left = &decTmp;
949 				goto AlignedAdd;
950 			}
951 			num[0] = decTmp.v.v.Lo32;
952 			num[1] = decTmp.v.v.Mid32;
953 			num[2] = tmp.u.Lo;
954 			num[3] = tmp.u.Hi;
955 			hi_prod = 3;
956 		}
957 		else {
958 			// Have to scale by a bunch.  Move the number to a buffer
959 			// where it has room to grow as it's scaled.
960 			//
961 			num[0] = left->v.v.Lo32;
962 			num[1] = left->v.v.Mid32;
963 			num[2] = left->Hi32;
964 			hi_prod = 2;
965 
966 			// Scan for zeros in the upper words.
967 			//
968 			if (num[2] == 0) {
969 				hi_prod = 1;
970 				if (num[1] == 0) {
971 					hi_prod = 0;
972 					if (num[0] == 0) {
973 						// Left arg is zero, return right.
974 						//
975 						DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*right));
976 						decRes.Hi32 = right->Hi32;
977 						decRes.u.u.sign ^= sign;
978 						goto RetDec;
979 					}
980 				}
981 			}
982 
983 			// Scaling loop, up to 10^9 at a time.  hi_prod stays updated
984 			// with index of highest non-zero uint32_t.
985 			//
986 			for (; scale > 0; scale -= POWER10_MAX) {
987 				if (scale > POWER10_MAX)
988 					pwr = ten_to_nine;
989 				else
990 					pwr = power10[scale];
991 
992 				tmp.u.Hi = 0;
993 				for (cur = 0; cur <= hi_prod; cur++) {
994 					tmp.int64 = UInt32x32To64(num[cur], pwr) + tmp.u.Hi;
995 					num[cur] = tmp.u.Lo;
996 				}
997 
998 				if (tmp.u.Hi != 0)
999 					// We're extending the result by another uint32_t.
1000 					num[++hi_prod] = tmp.u.Hi;
1001 			}
1002 		}
1003 
1004 		// Scaling complete, do the add.  Could be subtract if signs differ.
1005 		//
1006 		tmp.u.Lo = num[0];
1007 		tmp.u.Hi = num[1];
1008 
1009 		if (sign) {
1010 			// Signs differ, subtract.
1011 			//
1012 			DECIMAL_LO64_SET(decRes, tmp.int64 - DECIMAL_LO64_GET(*right));
1013 			decRes.Hi32 = num[2] - right->Hi32;
1014 
1015 			// Propagate carry
1016 			//
1017 			if (DECIMAL_LO64_GET(decRes) > tmp.int64) {
1018 				decRes.Hi32--;
1019 				if (decRes.Hi32 >= num[2])
1020 					goto LongSub;
1021 			}
1022 			else if (decRes.Hi32 > num[2]) {
1023 			LongSub:
1024 				// If num has more than 96 bits of precision, then we need to
1025 				// carry the subtraction into the higher bits.  If it doesn't,
1026 				// then we subtracted in the wrong order and have to flip the
1027 				// sign of the result.
1028 				//
1029 				if (hi_prod <= 2)
1030 					goto SignFlip;
1031 
1032 				cur = 3;
1033 				while(num[cur++]-- == 0);
1034 				if (num[hi_prod] == 0)
1035 					hi_prod--;
1036 			}
1037 		}
1038 		else {
1039 			// Signs the same, add.
1040 			//
1041 			DECIMAL_LO64_SET(decRes, tmp.int64 + DECIMAL_LO64_GET(*right));
1042 			decRes.Hi32 = num[2] + right->Hi32;
1043 
1044 			// Propagate carry
1045 			//
1046 			if (DECIMAL_LO64_GET(decRes) < tmp.int64) {
1047 				decRes.Hi32++;
1048 				if (decRes.Hi32 <= num[2])
1049 					goto LongAdd;
1050 			}
1051 			else if (decRes.Hi32 < num[2]) {
1052 			LongAdd:
1053 				// Had a carry above 96 bits.
1054 				//
1055 				cur = 3;
1056 				do {
1057 					if (hi_prod < cur) {
1058 						num[cur] = 1;
1059 						hi_prod = cur;
1060 						break;
1061 					}
1062 				}while (++num[cur++] == 0);
1063 			}
1064 		}
1065 
1066 		if (hi_prod > 2) {
1067 			num[0] = decRes.v.v.Lo32;
1068 			num[1] = decRes.v.v.Mid32;
1069 			num[2] = decRes.Hi32;
1070 			decRes.u.u.scale = ScaleResult(num, hi_prod, decRes.u.u.scale);
1071 			if (decRes.u.u.scale == (uint8_t) -1)
1072 				return MONO_DECIMAL_OVERFLOW;
1073 
1074 			decRes.v.v.Lo32 = num[0];
1075 			decRes.v.v.Mid32 = num[1];
1076 			decRes.Hi32 = num[2];
1077 		}
1078 	}
1079 
1080 RetDec:
1081 	COPYDEC(*result, decRes);
1082 	// Odd, the Microsoft code does not set result->reserved to zero on this case
1083 	return MONO_DECIMAL_OK;
1084 }
1085 
1086 // Decimal addition
1087 static MonoDecimalStatus G_GNUC_UNUSED
mono_decimal_add(MonoDecimal * left,MonoDecimal * right,MonoDecimal * result)1088 mono_decimal_add(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1089 {
1090     return DecAddSub (left, right, result, 0);
1091 }
1092 
1093 // Decimal subtraction
1094 static MonoDecimalStatus G_GNUC_UNUSED
mono_decimal_sub(MonoDecimal * left,MonoDecimal * right,MonoDecimal * result)1095 mono_decimal_sub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1096 {
1097     return DecAddSub (left, right, result, DECIMAL_NEG);
1098 }
1099 
1100 /**
1101  * IncreaseScale:
1102  *
1103  * Entry:
1104  *   num - Pointer to 96-bit number as array of uint32_ts, least-sig first
1105  *   pwr   - Scale factor to multiply by
1106  *
1107  * Purpose:
1108  *   Multiply the two numbers.  The low 96 bits of the result overwrite
1109  *   the input.  The last 32 bits of the product are the return value.
1110  *
1111  * Exit:
1112  *   Returns highest 32 bits of product.
1113  *
1114  * Exceptions:
1115  *   None.
1116  *
1117  */
1118 static uint32_t
IncreaseScale(uint32_t * num,uint32_t pwr)1119 IncreaseScale(uint32_t *num, uint32_t pwr)
1120 {
1121 	SPLIT64   sdlTmp;
1122 
1123 	sdlTmp.int64 = UInt32x32To64(num[0], pwr);
1124 	num[0] = sdlTmp.u.Lo;
1125 	sdlTmp.int64 = UInt32x32To64(num[1], pwr) + sdlTmp.u.Hi;
1126 	num[1] = sdlTmp.u.Lo;
1127 	sdlTmp.int64 = UInt32x32To64(num[2], pwr) + sdlTmp.u.Hi;
1128 	num[2] = sdlTmp.u.Lo;
1129 	return sdlTmp.u.Hi;
1130 }
1131 
1132 /**
1133  * Div96By64:
1134  *
1135  * Entry:
1136  *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
1137  *   sdlDen  - 64-bit divisor.
1138  *
1139  * Purpose:
1140  *   Do partial divide, yielding 32-bit result and 64-bit remainder.
1141  *   Divisor must be larger than upper 64 bits of dividend.
1142  *
1143  * Exit:
1144  *   Remainder overwrites lower 64-bits of dividend.
1145  *   Returns quotient.
1146  *
1147  * Exceptions:
1148  *   None.
1149  *
1150  */
1151 static uint32_t
Div96By64(uint32_t * num,SPLIT64 den)1152 Div96By64(uint32_t *num, SPLIT64 den)
1153 {
1154 	SPLIT64 quo;
1155 	SPLIT64 sdlNum;
1156 	SPLIT64 prod;
1157 
1158 	sdlNum.u.Lo = num[0];
1159 
1160 	if (num[2] >= den.u.Hi) {
1161 		// Divide would overflow.  Assume a quotient of 2^32, and set
1162 		// up remainder accordingly.  Then jump to loop which reduces
1163 		// the quotient.
1164 		//
1165 		sdlNum.u.Hi = num[1] - den.u.Lo;
1166 		quo.u.Lo = 0;
1167 		goto NegRem;
1168 	}
1169 
1170 	// Hardware divide won't overflow
1171 	//
1172 	if (num[2] == 0 && num[1] < den.u.Hi)
1173 		// Result is zero.  Entire dividend is remainder.
1174 		//
1175 		return 0;
1176 
1177 	// DivMod64by32 returns quotient in Lo, remainder in Hi.
1178 	//
1179 	quo.u.Lo = num[1];
1180 	quo.u.Hi = num[2];
1181 	quo.int64 = DivMod64by32(quo.int64, den.u.Hi);
1182 	sdlNum.u.Hi = quo.u.Hi; // remainder
1183 
1184 	// Compute full remainder, rem = dividend - (quo * divisor).
1185 	//
1186 	prod.int64 = UInt32x32To64(quo.u.Lo, den.u.Lo); // quo * lo divisor
1187 	sdlNum.int64 -= prod.int64;
1188 
1189 	if (sdlNum.int64 > ~prod.int64) {
1190 	NegRem:
1191 		// Remainder went negative.  Add divisor back in until it's positive,
1192 		// a max of 2 times.
1193 		//
1194 		do {
1195 			quo.u.Lo--;
1196 			sdlNum.int64 += den.int64;
1197 		}while (sdlNum.int64 >= den.int64);
1198 	}
1199 
1200 	num[0] = sdlNum.u.Lo;
1201 	num[1] = sdlNum.u.Hi;
1202 	return quo.u.Lo;
1203 }
1204 
1205 /***
1206 * Div128By96
1207 *
1208 * Entry:
1209 *   rgulNum - Pointer to 128-bit dividend as array of uint32_ts, least-sig first
1210 *   den - Pointer to 96-bit divisor.
1211 *
1212 * Purpose:
1213 *   Do partial divide, yielding 32-bit result and 96-bit remainder.
1214 *   Top divisor uint32_t must be larger than top dividend uint32_t.  This is
1215 *   assured in the initial call because the divisor is normalized
1216 *   and the dividend can't be.  In subsequent calls, the remainder
1217 *   is multiplied by 10^9 (max), so it can be no more than 1/4 of
1218 *   the divisor which is effectively multiplied by 2^32 (4 * 10^9).
1219 *
1220 * Exit:
1221 *   Remainder overwrites lower 96-bits of dividend.
1222 *   Returns quotient.
1223 *
1224 * Exceptions:
1225 *   None.
1226 *
1227 ***********************************************************************/
1228 
1229 static uint32_t
Div128By96(uint32_t * num,uint32_t * den)1230 Div128By96(uint32_t *num, uint32_t *den)
1231 {
1232 	SPLIT64 sdlQuo;
1233 	SPLIT64 sdlNum;
1234 	SPLIT64 sdlProd1;
1235 	SPLIT64 sdlProd2;
1236 
1237 	sdlNum.u.Lo = num[0];
1238 	sdlNum.u.Hi = num[1];
1239 
1240 	if (num[3] == 0 && num[2] < den[2]){
1241 		// Result is zero.  Entire dividend is remainder.
1242 		//
1243 		return 0;
1244 	}
1245 
1246 	// DivMod64by32 returns quotient in Lo, remainder in Hi.
1247 	//
1248 	sdlQuo.u.Lo = num[2];
1249 	sdlQuo.u.Hi = num[3];
1250 	sdlQuo.int64 = DivMod64by32(sdlQuo.int64, den[2]);
1251 
1252 	// Compute full remainder, rem = dividend - (quo * divisor).
1253 	//
1254 	sdlProd1.int64 = UInt32x32To64(sdlQuo.u.Lo, den[0]); // quo * lo divisor
1255 	sdlProd2.int64 = UInt32x32To64(sdlQuo.u.Lo, den[1]); // quo * mid divisor
1256 	sdlProd2.int64 += sdlProd1.u.Hi;
1257 	sdlProd1.u.Hi = sdlProd2.u.Lo;
1258 
1259 	sdlNum.int64 -= sdlProd1.int64;
1260 	num[2] = sdlQuo.u.Hi - sdlProd2.u.Hi; // sdlQuo.Hi is remainder
1261 
1262 	// Propagate carries
1263 	//
1264 	if (sdlNum.int64 > ~sdlProd1.int64) {
1265 		num[2]--;
1266 		if (num[2] >= ~sdlProd2.u.Hi)
1267 			goto NegRem;
1268 	} else if (num[2] > ~sdlProd2.u.Hi) {
1269 	NegRem:
1270 		// Remainder went negative.  Add divisor back in until it's positive,
1271 		// a max of 2 times.
1272 		//
1273 		sdlProd1.u.Lo = den[0];
1274 		sdlProd1.u.Hi = den[1];
1275 
1276 		for (;;) {
1277 			sdlQuo.u.Lo--;
1278 			sdlNum.int64 += sdlProd1.int64;
1279 			num[2] += den[2];
1280 
1281 			if (sdlNum.int64 < sdlProd1.int64) {
1282 				// Detected carry. Check for carry out of top
1283 				// before adding it in.
1284 				//
1285 				if (num[2]++ < den[2])
1286 					break;
1287 			}
1288 			if (num[2] < den[2])
1289 				break; // detected carry
1290 		}
1291 	}
1292 
1293 	num[0] = sdlNum.u.Lo;
1294 	num[1] = sdlNum.u.Hi;
1295 	return sdlQuo.u.Lo;
1296 }
1297 
1298 // Add a 32 bit unsigned long to an array of 3 unsigned longs representing a 96 integer
1299 // Returns FALSE if there is an overflow
1300 static gboolean
Add32To96(uint32_t * num,uint32_t value)1301 Add32To96(uint32_t *num, uint32_t value)
1302 {
1303 	num[0] += value;
1304 	if (num[0] < value) {
1305 		if (++num[1] == 0) {
1306 			if (++num[2] == 0) {
1307 				return FALSE;
1308 			}
1309 		}
1310 	}
1311 	return TRUE;
1312 }
1313 
1314 static void
OverflowUnscale(uint32_t * quo,gboolean remainder)1315 OverflowUnscale (uint32_t *quo, gboolean remainder)
1316 {
1317 	SPLIT64  sdlTmp;
1318 
1319 	// We have overflown, so load the high bit with a one.
1320 	sdlTmp.u.Hi = 1u;
1321 	sdlTmp.u.Lo = quo[2];
1322 	sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1323 	quo[2] = sdlTmp.u.Lo;
1324 	sdlTmp.u.Lo = quo[1];
1325 	sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1326 	quo[1] = sdlTmp.u.Lo;
1327 	sdlTmp.u.Lo = quo[0];
1328 	sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1329 	quo[0] = sdlTmp.u.Lo;
1330 	// The remainder is the last digit that does not fit, so we can use it to work out if we need to round up
1331 	if ((sdlTmp.u.Hi > 5) || ((sdlTmp.u.Hi == 5) && ( remainder || (quo[0] & 1)))) {
1332 		Add32To96(quo, 1u);
1333 	}
1334 }
1335 
1336 // mono_decimal_divide - Decimal divide
1337 static MonoDecimalStatus G_GNUC_UNUSED
mono_decimal_divide_result(MonoDecimal * left,MonoDecimal * right,MonoDecimal * result)1338 mono_decimal_divide_result(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1339 {
1340 	uint32_t   quo[3];
1341 	uint32_t   quoSave[3];
1342 	uint32_t   rem[4];
1343 	uint32_t   divisor[3];
1344 	uint32_t   pwr;
1345 	uint32_t   utmp;
1346 	uint32_t   utmp1;
1347 	SPLIT64 sdlTmp;
1348 	SPLIT64 sdlDivisor;
1349 	int     scale;
1350 	int     cur_scale;
1351 
1352 	scale = left->u.u.scale - right->u.u.scale;
1353 	divisor[0] = right->v.v.Lo32;
1354 	divisor[1] = right->v.v.Mid32;
1355 	divisor[2] = right->Hi32;
1356 
1357 	if (divisor[1] == 0 && divisor[2] == 0) {
1358 		// Divisor is only 32 bits.  Easy divide.
1359 		//
1360 		if (divisor[0] == 0)
1361 			return MONO_DECIMAL_DIVBYZERO;
1362 
1363 		quo[0] = left->v.v.Lo32;
1364 		quo[1] = left->v.v.Mid32;
1365 		quo[2] = left->Hi32;
1366 		rem[0] = Div96By32(quo, divisor[0]);
1367 
1368 		for (;;) {
1369 			if (rem[0] == 0) {
1370 				if (scale < 0) {
1371 					cur_scale = min(9, -scale);
1372 					goto HaveScale;
1373 				}
1374 				break;
1375 			}
1376 
1377 			// We have computed a quotient based on the natural scale
1378 			// ( <dividend scale> - <divisor scale> ).  We have a non-zero
1379 			// remainder, so now we should increase the scale if possible to
1380 			// include more quotient bits.
1381 			//
1382 			// If it doesn't cause overflow, we'll loop scaling by 10^9 and
1383 			// computing more quotient bits as long as the remainder stays
1384 			// non-zero.  If scaling by that much would cause overflow, we'll
1385 			// drop out of the loop and scale by as much as we can.
1386 			//
1387 			// Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
1388 			// = 4.294 967 296.  So the upper limit is quo[2] == 4 and
1389 			// quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
1390 			// quotient bits in quo[0] could be all 1's, then 1,266,874,888
1391 			// is the largest value in quo[1] (when quo[2] == 4) that is
1392 			// assured not to overflow.
1393 			//
1394 			cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1395 			if (cur_scale == 0) {
1396 				// No more scaling to be done, but remainder is non-zero.
1397 				// Round quotient.
1398 				//
1399 				utmp = rem[0] << 1;
1400 				if (utmp < rem[0] || (utmp >= divisor[0] &&
1401 						      (utmp > divisor[0] || (quo[0] & 1)))) {
1402 				RoundUp:
1403 					if (++quo[0] == 0)
1404 						if (++quo[1] == 0)
1405 							quo[2]++;
1406 				}
1407 				break;
1408 			}
1409 
1410 			if (cur_scale == -1)
1411 				return MONO_DECIMAL_OVERFLOW;
1412 
1413 		HaveScale:
1414 			pwr = power10[cur_scale];
1415 			scale += cur_scale;
1416 
1417 			if (IncreaseScale(quo, pwr) != 0)
1418 				return MONO_DECIMAL_OVERFLOW;
1419 
1420 			sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
1421 			rem[0] = sdlTmp.u.Hi;
1422 
1423 			quo[0] += sdlTmp.u.Lo;
1424 			if (quo[0] < sdlTmp.u.Lo) {
1425 				if (++quo[1] == 0)
1426 					quo[2]++;
1427 			}
1428 		} // for (;;)
1429 	}
1430 	else {
1431 		// Divisor has bits set in the upper 64 bits.
1432 		//
1433 		// Divisor must be fully normalized (shifted so bit 31 of the most
1434 		// significant uint32_t is 1).  Locate the MSB so we know how much to
1435 		// normalize by.  The dividend will be shifted by the same amount so
1436 		// the quotient is not changed.
1437 		//
1438 		if (divisor[2] == 0)
1439 			utmp = divisor[1];
1440 		else
1441 			utmp = divisor[2];
1442 
1443 		cur_scale = 0;
1444 		if (!(utmp & 0xFFFF0000)) {
1445 			cur_scale += 16;
1446 			utmp <<= 16;
1447 		}
1448 		if (!(utmp & 0xFF000000)) {
1449 			cur_scale += 8;
1450 			utmp <<= 8;
1451 		}
1452 		if (!(utmp & 0xF0000000)) {
1453 			cur_scale += 4;
1454 			utmp <<= 4;
1455 		}
1456 		if (!(utmp & 0xC0000000)) {
1457 			cur_scale += 2;
1458 			utmp <<= 2;
1459 		}
1460 		if (!(utmp & 0x80000000)) {
1461 			cur_scale++;
1462 			utmp <<= 1;
1463 		}
1464 
1465 		// Shift both dividend and divisor left by cur_scale.
1466 		//
1467 		sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
1468 		rem[0] = sdlTmp.u.Lo;
1469 		rem[1] = sdlTmp.u.Hi;
1470 		sdlTmp.u.Lo = left->v.v.Mid32;
1471 		sdlTmp.u.Hi = left->Hi32;
1472 		sdlTmp.int64 <<= cur_scale;
1473 		rem[2] = sdlTmp.u.Hi;
1474 		rem[3] = (left->Hi32 >> (31 - cur_scale)) >> 1;
1475 
1476 		sdlDivisor.u.Lo = divisor[0];
1477 		sdlDivisor.u.Hi = divisor[1];
1478 		sdlDivisor.int64 <<= cur_scale;
1479 
1480 		if (divisor[2] == 0) {
1481 			// Have a 64-bit divisor in sdlDivisor.  The remainder
1482 			// (currently 96 bits spread over 4 uint32_ts) will be < divisor.
1483 			//
1484 			sdlTmp.u.Lo = rem[2];
1485 			sdlTmp.u.Hi = rem[3];
1486 
1487 			quo[2] = 0;
1488 			quo[1] = Div96By64(&rem[1], sdlDivisor);
1489 			quo[0] = Div96By64(rem, sdlDivisor);
1490 
1491 			for (;;) {
1492 				if ((rem[0] | rem[1]) == 0) {
1493 					if (scale < 0) {
1494 						cur_scale = min(9, -scale);
1495 						goto HaveScale64;
1496 					}
1497 					break;
1498 				}
1499 
1500 				// Remainder is non-zero.  Scale up quotient and remainder by
1501 				// powers of 10 so we can compute more significant bits.
1502 				//
1503 				cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1504 				if (cur_scale == 0) {
1505 					// No more scaling to be done, but remainder is non-zero.
1506 					// Round quotient.
1507 					//
1508 					sdlTmp.u.Lo = rem[0];
1509 					sdlTmp.u.Hi = rem[1];
1510 					if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
1511 					    (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
1512 						goto RoundUp;
1513 					break;
1514 				}
1515 
1516 				if (cur_scale == -1)
1517 					return MONO_DECIMAL_OVERFLOW;
1518 
1519 			HaveScale64:
1520 				pwr = power10[cur_scale];
1521 				scale += cur_scale;
1522 
1523 				if (IncreaseScale(quo, pwr) != 0)
1524 					return MONO_DECIMAL_OVERFLOW;
1525 
1526 				rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
1527 				IncreaseScale(rem, pwr);
1528 				utmp = Div96By64(rem, sdlDivisor);
1529 				quo[0] += utmp;
1530 				if (quo[0] < utmp)
1531 					if (++quo[1] == 0)
1532 						quo[2]++;
1533 
1534 			} // for (;;)
1535 		}
1536 		else {
1537 			// Have a 96-bit divisor in divisor[].
1538 			//
1539 			// Start by finishing the shift left by cur_scale.
1540 			//
1541 			sdlTmp.u.Lo = divisor[1];
1542 			sdlTmp.u.Hi = divisor[2];
1543 			sdlTmp.int64 <<= cur_scale;
1544 			divisor[0] = sdlDivisor.u.Lo;
1545 			divisor[1] = sdlDivisor.u.Hi;
1546 			divisor[2] = sdlTmp.u.Hi;
1547 
1548 			// The remainder (currently 96 bits spread over 4 uint32_ts)
1549 			// will be < divisor.
1550 			//
1551 			quo[2] = 0;
1552 			quo[1] = 0;
1553 			quo[0] = Div128By96(rem, divisor);
1554 
1555 			for (;;) {
1556 				if ((rem[0] | rem[1] | rem[2]) == 0) {
1557 					if (scale < 0) {
1558 						cur_scale = min(9, -scale);
1559 						goto HaveScale96;
1560 					}
1561 					break;
1562 				}
1563 
1564 				// Remainder is non-zero.  Scale up quotient and remainder by
1565 				// powers of 10 so we can compute more significant bits.
1566 				//
1567 				cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1568 				if (cur_scale == 0) {
1569 					// No more scaling to be done, but remainder is non-zero.
1570 					// Round quotient.
1571 					//
1572 					if (rem[2] >= 0x80000000)
1573 						goto RoundUp;
1574 
1575 					utmp = rem[0] > 0x80000000;
1576 					utmp1 = rem[1] > 0x80000000;
1577 					rem[0] <<= 1;
1578 					rem[1] = (rem[1] << 1) + utmp;
1579 					rem[2] = (rem[2] << 1) + utmp1;
1580 
1581 					if ((rem[2] > divisor[2] || rem[2] == divisor[2]) &&
1582 					    ((rem[1] > divisor[1] || rem[1] == divisor[1]) &&
1583 					     ((rem[0] > divisor[0] || rem[0] == divisor[0]) &&
1584 					      (quo[0] & 1))))
1585 						goto RoundUp;
1586 					break;
1587 				}
1588 
1589 				if (cur_scale == -1)
1590 					return MONO_DECIMAL_OVERFLOW;
1591 
1592 			HaveScale96:
1593 				pwr = power10[cur_scale];
1594 				scale += cur_scale;
1595 
1596 				if (IncreaseScale(quo, pwr) != 0)
1597 					return MONO_DECIMAL_OVERFLOW;
1598 
1599 				rem[3] = IncreaseScale(rem, pwr);
1600 				utmp = Div128By96(rem, divisor);
1601 				quo[0] += utmp;
1602 				if (quo[0] < utmp)
1603 					if (++quo[1] == 0)
1604 						quo[2]++;
1605 
1606 			} // for (;;)
1607 		}
1608 	}
1609 
1610 	// No more remainder.  Try extracting any extra powers of 10 we may have
1611 	// added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
1612 	// If a division by one of these powers returns a zero remainder, then
1613 	// we keep the quotient.  If the remainder is not zero, then we restore
1614 	// the previous value.
1615 	//
1616 	// Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
1617 	// we can extract.  We use this as a quick test on whether to try a
1618 	// given power.
1619 	//
1620 	while ((quo[0] & 0xFF) == 0 && scale >= 8) {
1621 		quoSave[0] = quo[0];
1622 		quoSave[1] = quo[1];
1623 		quoSave[2] = quo[2];
1624 
1625 		if (Div96By32(quoSave, 100000000) == 0) {
1626 			quo[0] = quoSave[0];
1627 			quo[1] = quoSave[1];
1628 			quo[2] = quoSave[2];
1629 			scale -= 8;
1630 		}
1631 		else
1632 			break;
1633 	}
1634 
1635 	if ((quo[0] & 0xF) == 0 && scale >= 4) {
1636 		quoSave[0] = quo[0];
1637 		quoSave[1] = quo[1];
1638 		quoSave[2] = quo[2];
1639 
1640 		if (Div96By32(quoSave, 10000) == 0) {
1641 			quo[0] = quoSave[0];
1642 			quo[1] = quoSave[1];
1643 			quo[2] = quoSave[2];
1644 			scale -= 4;
1645 		}
1646 	}
1647 
1648 	if ((quo[0] & 3) == 0 && scale >= 2) {
1649 		quoSave[0] = quo[0];
1650 		quoSave[1] = quo[1];
1651 		quoSave[2] = quo[2];
1652 
1653 		if (Div96By32(quoSave, 100) == 0) {
1654 			quo[0] = quoSave[0];
1655 			quo[1] = quoSave[1];
1656 			quo[2] = quoSave[2];
1657 			scale -= 2;
1658 		}
1659 	}
1660 
1661 	if ((quo[0] & 1) == 0 && scale >= 1) {
1662 		quoSave[0] = quo[0];
1663 		quoSave[1] = quo[1];
1664 		quoSave[2] = quo[2];
1665 
1666 		if (Div96By32(quoSave, 10) == 0) {
1667 			quo[0] = quoSave[0];
1668 			quo[1] = quoSave[1];
1669 			quo[2] = quoSave[2];
1670 			scale -= 1;
1671 		}
1672 	}
1673 
1674 	result->Hi32 = quo[2];
1675 	result->v.v.Mid32 = quo[1];
1676 	result->v.v.Lo32 = quo[0];
1677 	result->u.u.scale = scale;
1678 	result->u.u.sign = left->u.u.sign ^ right->u.u.sign;
1679 	return MONO_DECIMAL_OK;
1680 }
1681 
1682 // mono_decimal_absolute - Decimal Absolute Value
1683 static void G_GNUC_UNUSED
mono_decimal_absolute(MonoDecimal * pdecOprd,MonoDecimal * result)1684 mono_decimal_absolute (MonoDecimal *pdecOprd, MonoDecimal *result)
1685 {
1686 	COPYDEC(*result, *pdecOprd);
1687 	result->u.u.sign &= ~DECIMAL_NEG;
1688 	// Microsoft does not set reserved here
1689 }
1690 
1691 // mono_decimal_fix - Decimal Fix (chop to integer)
1692 static void
mono_decimal_fix(MonoDecimal * pdecOprd,MonoDecimal * result)1693 mono_decimal_fix (MonoDecimal *pdecOprd, MonoDecimal *result)
1694 {
1695 	DecFixInt(result, pdecOprd);
1696 }
1697 
1698 // mono_decimal_round_to_int - Decimal Int (round down to integer)
1699 static void
mono_decimal_round_to_int(MonoDecimal * pdecOprd,MonoDecimal * result)1700 mono_decimal_round_to_int (MonoDecimal *pdecOprd, MonoDecimal *result)
1701 {
1702 	if (DecFixInt(result, pdecOprd) != 0 && (result->u.u.sign & DECIMAL_NEG)) {
1703 		// We have chopped off a non-zero amount from a negative value.  Since
1704 		// we round toward -infinity, we must increase the integer result by
1705 		// 1 to make it more negative.  This will never overflow because
1706 		// in order to have a remainder, we must have had a non-zero scale factor.
1707 		// Our scale factor is back to zero now.
1708 		//
1709 		DECIMAL_LO64_SET(*result, DECIMAL_LO64_GET(*result) + 1);
1710 		if (DECIMAL_LO64_GET(*result) == 0)
1711 			result->Hi32++;
1712 	}
1713 }
1714 
1715 // mono_decimal_negate - Decimal Negate
1716 static void G_GNUC_UNUSED
mono_decimal_negate(MonoDecimal * pdecOprd,MonoDecimal * result)1717 mono_decimal_negate (MonoDecimal *pdecOprd, MonoDecimal *result)
1718 {
1719 	COPYDEC(*result, *pdecOprd);
1720 	// Microsoft does not set result->reserved to zero on this case.
1721 	result->u.u.sign ^= DECIMAL_NEG;
1722 }
1723 
1724 //
1725 // Returns: MONO_DECIMAL_INVALID_ARGUMENT, MONO_DECIMAL_OK
1726 //
1727 static MonoDecimalStatus
mono_decimal_round_result(MonoDecimal * input,int cDecimals,MonoDecimal * result)1728 mono_decimal_round_result(MonoDecimal *input, int cDecimals, MonoDecimal *result)
1729 {
1730 	uint32_t num[3];
1731 	uint32_t rem;
1732 	uint32_t sticky;
1733 	uint32_t pwr;
1734 	int scale;
1735 
1736 	if (cDecimals < 0)
1737 		return MONO_DECIMAL_INVALID_ARGUMENT;
1738 
1739 	scale = input->u.u.scale - cDecimals;
1740 	if (scale > 0) {
1741 		num[0] = input->v.v.Lo32;
1742 		num[1] = input->v.v.Mid32;
1743 		num[2] = input->Hi32;
1744 		result->u.u.sign = input->u.u.sign;
1745 		rem = sticky = 0;
1746 
1747 		do {
1748 			sticky |= rem;
1749 			if (scale > POWER10_MAX)
1750 				pwr = ten_to_nine;
1751 			else
1752 				pwr = power10[scale];
1753 
1754 			rem = Div96By32(num, pwr);
1755 			scale -= 9;
1756 		}while (scale > 0);
1757 
1758 		// Now round.  rem has last remainder, sticky has sticky bits.
1759 		// To do IEEE rounding, we add LSB of result to sticky bits so
1760 		// either causes round up if remainder * 2 == last divisor.
1761 		//
1762 		sticky |= num[0] & 1;
1763 		rem = (rem << 1) + (sticky != 0);
1764 		if (pwr < rem &&
1765 		    ++num[0] == 0 &&
1766 		    ++num[1] == 0
1767 			)
1768 			++num[2];
1769 
1770 		result->v.v.Lo32 = num[0];
1771 		result->v.v.Mid32 = num[1];
1772 		result->Hi32 = num[2];
1773 		result->u.u.scale = cDecimals;
1774 		return MONO_DECIMAL_OK;
1775 	}
1776 
1777 	COPYDEC(*result, *input);
1778 	// Odd, the Microsoft source does not set the result->reserved to zero here.
1779 	return MONO_DECIMAL_OK;
1780 }
1781 
1782 //
1783 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1784 static MonoDecimalStatus
mono_decimal_from_float(float input_f,MonoDecimal * result)1785 mono_decimal_from_float (float input_f, MonoDecimal* result)
1786 {
1787 	int         exp;    // number of bits to left of binary point
1788 	int         power;
1789 	uint32_t       mant;
1790 	double      dbl;
1791 	SPLIT64     sdlLo;
1792 	SPLIT64     sdlHi;
1793 	int         lmax, cur;  // temps used during scale reduction
1794 	MonoSingle_float input = { .f = input_f };
1795 
1796 	// The most we can scale by is 10^28, which is just slightly more
1797 	// than 2^93.  So a float with an exponent of -94 could just
1798 	// barely reach 0.5, but smaller exponents will always round to zero.
1799 	//
1800 	if ((exp = input.s.exp - MONO_SINGLE_BIAS) < -94 ) {
1801 		DECIMAL_SETZERO(*result);
1802 		return MONO_DECIMAL_OK;
1803 	}
1804 
1805 	if (exp > 96)
1806 		return MONO_DECIMAL_OVERFLOW;
1807 
1808 	// Round the input to a 7-digit integer.  The R4 format has
1809 	// only 7 digits of precision, and we want to keep garbage digits
1810 	// out of the Decimal were making.
1811 	//
1812 	// Calculate max power of 10 input value could have by multiplying
1813 	// the exponent by log10(2).  Using scaled integer multiplcation,
1814 	// log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1815 	//
1816 	dbl = fabs(input.f);
1817 	power = 6 - ((exp * 19728) >> 16);
1818 
1819 	if (power >= 0) {
1820 		// We have less than 7 digits, scale input up.
1821 		//
1822 		if (power > DECMAX)
1823 			power = DECMAX;
1824 
1825 		dbl = dbl * double_power10[power];
1826 	} else {
1827 		if (power != -1 || dbl >= 1E7)
1828 			dbl = dbl / fnDblPower10(-power);
1829 		else
1830 			power = 0; // didn't scale it
1831 	}
1832 
1833 	g_assert (dbl < 1E7);
1834 	if (dbl < 1E6 && power < DECMAX) {
1835 		dbl *= 10;
1836 		power++;
1837 		g_assert(dbl >= 1E6);
1838 	}
1839 
1840 	// Round to integer
1841 	//
1842 	mant = (int32_t)dbl;
1843 	dbl -= (double)mant;  // difference between input & integer
1844 	if ( dbl > 0.5 || (dbl == 0.5 && (mant & 1)))
1845 		mant++;
1846 
1847 	if (mant == 0) {
1848 		DECIMAL_SETZERO(*result);
1849 		return MONO_DECIMAL_OK;
1850 	}
1851 
1852 	if (power < 0) {
1853 		// Add -power factors of 10, -power <= (29 - 7) = 22.
1854 		//
1855 		power = -power;
1856 		if (power < 10) {
1857 			sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power]);
1858 
1859 			DECIMAL_LO32(*result) = sdlLo.u.Lo;
1860 			DECIMAL_MID32(*result) = sdlLo.u.Hi;
1861 			DECIMAL_HI32(*result) = 0;
1862 		} else {
1863 			// Have a big power of 10.
1864 			//
1865 			if (power > 18) {
1866 				sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 18]);
1867 				sdlLo.int64 = UInt64x64To128(sdlLo, ten_to_eighteen, &sdlHi.int64);
1868 
1869 				if (sdlHi.u.Hi != 0)
1870 					return MONO_DECIMAL_OVERFLOW;
1871 			}
1872 			else {
1873 				sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 9]);
1874 				sdlHi.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Hi);
1875 				sdlLo.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Lo);
1876 				sdlHi.int64 += sdlLo.u.Hi;
1877 				sdlLo.u.Hi = sdlHi.u.Lo;
1878 				sdlHi.u.Lo = sdlHi.u.Hi;
1879 			}
1880 			DECIMAL_LO32(*result) = sdlLo.u.Lo;
1881 			DECIMAL_MID32(*result) = sdlLo.u.Hi;
1882 			DECIMAL_HI32(*result) = sdlHi.u.Lo;
1883 		}
1884 		DECIMAL_SCALE(*result) = 0;
1885 	} else {
1886 		// Factor out powers of 10 to reduce the scale, if possible.
1887 		// The maximum number we could factor out would be 6.  This
1888 		// comes from the fact we have a 7-digit number, and the
1889 		// MSD must be non-zero -- but the lower 6 digits could be
1890 		// zero.  Note also the scale factor is never negative, so
1891 		// we can't scale by any more than the power we used to
1892 		// get the integer.
1893 		//
1894 		// DivMod32by32 returns the quotient in Lo, the remainder in Hi.
1895 		//
1896 		lmax = min(power, 6);
1897 
1898 		// lmax is the largest power of 10 to try, lmax <= 6.
1899 		// We'll try powers 4, 2, and 1 unless they're too big.
1900 		//
1901 		for (cur = 4; cur > 0; cur >>= 1)
1902 		{
1903 			if (cur > lmax)
1904 				continue;
1905 
1906 			sdlLo.int64 = DivMod32by32(mant, (uint32_t)long_power10[cur]);
1907 
1908 			if (sdlLo.u.Hi == 0) {
1909 				mant = sdlLo.u.Lo;
1910 				power -= cur;
1911 				lmax -= cur;
1912 			}
1913 		}
1914 		DECIMAL_LO32(*result) = mant;
1915 		DECIMAL_MID32(*result) = 0;
1916 		DECIMAL_HI32(*result) = 0;
1917 		DECIMAL_SCALE(*result) = power;
1918 	}
1919 
1920 	DECIMAL_SIGN(*result) = (char)input.s.sign << 7;
1921 	return MONO_DECIMAL_OK;
1922 }
1923 
1924 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1925 static MonoDecimalStatus
mono_decimal_from_double(double input_d,MonoDecimal * result)1926 mono_decimal_from_double (double input_d, MonoDecimal *result)
1927 {
1928 	int         exp;    // number of bits to left of binary point
1929 	int         power;  // power-of-10 scale factor
1930 	SPLIT64     sdlMant;
1931 	SPLIT64     sdlLo;
1932 	double      dbl;
1933 	int         lmax, cur;  // temps used during scale reduction
1934 	uint32_t       pwr_cur;
1935 	uint32_t       quo;
1936 	MonoDouble_double input = { .d = input_d };
1937 
1938 	// The most we can scale by is 10^28, which is just slightly more
1939 	// than 2^93.  So a float with an exponent of -94 could just
1940 	// barely reach 0.5, but smaller exponents will always round to zero.
1941 	//
1942 	if ((exp = input.s.exp - MONO_DOUBLE_BIAS) < -94) {
1943 		DECIMAL_SETZERO(*result);
1944 		return MONO_DECIMAL_OK;
1945 	}
1946 
1947 	if (exp > 96)
1948 		return MONO_DECIMAL_OVERFLOW;
1949 
1950 	// Round the input to a 15-digit integer.  The R8 format has
1951 	// only 15 digits of precision, and we want to keep garbage digits
1952 	// out of the Decimal were making.
1953 	//
1954 	// Calculate max power of 10 input value could have by multiplying
1955 	// the exponent by log10(2).  Using scaled integer multiplcation,
1956 	// log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1957 	//
1958 	dbl = fabs(input.d);
1959 	power = 14 - ((exp * 19728) >> 16);
1960 
1961 	if (power >= 0) {
1962 		// We have less than 15 digits, scale input up.
1963 		//
1964 		if (power > DECMAX)
1965 			power = DECMAX;
1966 
1967 		dbl = dbl * double_power10[power];
1968 	} else {
1969 		if (power != -1 || dbl >= 1E15)
1970 			dbl = dbl / fnDblPower10(-power);
1971 		else
1972 			power = 0; // didn't scale it
1973 	}
1974 
1975 	g_assert (dbl < 1E15);
1976 	if (dbl < 1E14 && power < DECMAX) {
1977 		dbl *= 10;
1978 		power++;
1979 		g_assert(dbl >= 1E14);
1980 	}
1981 
1982 	// Round to int64
1983 	//
1984 	sdlMant.int64 = (int64_t)dbl;
1985 	dbl -= (double)(int64_t)sdlMant.int64;  // dif between input & integer
1986 	if ( dbl > 0.5 || (dbl == 0.5 && (sdlMant.u.Lo & 1)))
1987 		sdlMant.int64++;
1988 
1989 	if (sdlMant.int64 == 0) {
1990 		DECIMAL_SETZERO(*result);
1991 		return MONO_DECIMAL_OK;
1992 	}
1993 
1994 	if (power < 0) {
1995 		// Add -power factors of 10, -power <= (29 - 15) = 14.
1996 		//
1997 		power = -power;
1998 		if (power < 10) {
1999 			sdlLo.int64 = UInt32x32To64(sdlMant.u.Lo, (uint32_t)long_power10[power]);
2000 			sdlMant.int64 = UInt32x32To64(sdlMant.u.Hi, (uint32_t)long_power10[power]);
2001 			sdlMant.int64 += sdlLo.u.Hi;
2002 			sdlLo.u.Hi = sdlMant.u.Lo;
2003 			sdlMant.u.Lo = sdlMant.u.Hi;
2004 		}
2005 		else {
2006 			// Have a big power of 10.
2007 			//
2008 			g_assert(power <= 14);
2009 			sdlLo.int64 = UInt64x64To128(sdlMant, sdl_power10[power-10], &sdlMant.int64);
2010 
2011 			if (sdlMant.u.Hi != 0)
2012 				return MONO_DECIMAL_OVERFLOW;
2013 		}
2014 		DECIMAL_LO32(*result) = sdlLo.u.Lo;
2015 		DECIMAL_MID32(*result) = sdlLo.u.Hi;
2016 		DECIMAL_HI32(*result) = sdlMant.u.Lo;
2017 		DECIMAL_SCALE(*result) = 0;
2018 	}
2019 	else {
2020 		// Factor out powers of 10 to reduce the scale, if possible.
2021 		// The maximum number we could factor out would be 14.  This
2022 		// comes from the fact we have a 15-digit number, and the
2023 		// MSD must be non-zero -- but the lower 14 digits could be
2024 		// zero.  Note also the scale factor is never negative, so
2025 		// we can't scale by any more than the power we used to
2026 		// get the integer.
2027 		//
2028 		// DivMod64by32 returns the quotient in Lo, the remainder in Hi.
2029 		//
2030 		lmax = min(power, 14);
2031 
2032 		// lmax is the largest power of 10 to try, lmax <= 14.
2033 		// We'll try powers 8, 4, 2, and 1 unless they're too big.
2034 		//
2035 		for (cur = 8; cur > 0; cur >>= 1)
2036 		{
2037 			if (cur > lmax)
2038 				continue;
2039 
2040 			pwr_cur = (uint32_t)long_power10[cur];
2041 
2042 			if (sdlMant.u.Hi >= pwr_cur) {
2043 				// Overflow if we try to divide in one step.
2044 				//
2045 				sdlLo.int64 = DivMod64by32(sdlMant.u.Hi, pwr_cur);
2046 				quo = sdlLo.u.Lo;
2047 				sdlLo.u.Lo = sdlMant.u.Lo;
2048 				sdlLo.int64 = DivMod64by32(sdlLo.int64, pwr_cur);
2049 			}
2050 			else {
2051 				quo = 0;
2052 				sdlLo.int64 = DivMod64by32(sdlMant.int64, pwr_cur);
2053 			}
2054 
2055 			if (sdlLo.u.Hi == 0) {
2056 				sdlMant.u.Hi = quo;
2057 				sdlMant.u.Lo = sdlLo.u.Lo;
2058 				power -= cur;
2059 				lmax -= cur;
2060 			}
2061 		}
2062 
2063 		DECIMAL_HI32(*result) = 0;
2064 		DECIMAL_SCALE(*result) = power;
2065 		DECIMAL_LO32(*result) = sdlMant.u.Lo;
2066 		DECIMAL_MID32(*result) = sdlMant.u.Hi;
2067 	}
2068 
2069 	DECIMAL_SIGN(*result) = (char)input.s.sign << 7;
2070 	return MONO_DECIMAL_OK;
2071 }
2072 
2073 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2074 static MonoDecimalStatus
mono_decimal_to_double_result(MonoDecimal * input,double * result)2075 mono_decimal_to_double_result(MonoDecimal *input, double *result)
2076 {
2077 	SPLIT64  tmp;
2078 	double   dbl;
2079 
2080 	if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2081 		return MONO_DECIMAL_INVALID_ARGUMENT;
2082 
2083 	tmp.u.Lo = DECIMAL_LO32(*input);
2084 	tmp.u.Hi = DECIMAL_MID32(*input);
2085 
2086 	if ((int32_t)DECIMAL_MID32(*input) < 0)
2087 		dbl = (ds2to64.d + (double)(int64_t)tmp.int64 +
2088 		       (double)DECIMAL_HI32(*input) * ds2to64.d) / fnDblPower10(DECIMAL_SCALE(*input)) ;
2089 	else
2090 		dbl = ((double)(int64_t)tmp.int64 +
2091 		       (double)DECIMAL_HI32(*input) * ds2to64.d) / fnDblPower10(DECIMAL_SCALE(*input));
2092 
2093 	if (DECIMAL_SIGN(*input))
2094 		dbl = -dbl;
2095 
2096 	*result = dbl;
2097 	return MONO_DECIMAL_OK;
2098 }
2099 
2100 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2101 static MonoDecimalStatus
mono_decimal_to_float_result(MonoDecimal * input,float * result)2102 mono_decimal_to_float_result(MonoDecimal *input, float *result)
2103 {
2104 	double   dbl;
2105 
2106 	if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2107 		return MONO_DECIMAL_INVALID_ARGUMENT;
2108 
2109 	// Can't overflow; no errors possible.
2110 	//
2111 	mono_decimal_to_double_result(input, &dbl);
2112 	*result = (float)dbl;
2113 	return MONO_DECIMAL_OK;
2114 }
2115 
2116 static void
DecShiftLeft(MonoDecimal * value)2117 DecShiftLeft(MonoDecimal* value)
2118 {
2119 	unsigned int c0 = DECIMAL_LO32(*value) & 0x80000000? 1: 0;
2120     unsigned int c1 = DECIMAL_MID32(*value) & 0x80000000? 1: 0;
2121     g_assert(value != NULL);
2122 
2123     DECIMAL_LO32(*value) <<= 1;
2124     DECIMAL_MID32(*value) = DECIMAL_MID32(*value) << 1 | c0;
2125     DECIMAL_HI32(*value) = DECIMAL_HI32(*value) << 1 | c1;
2126 }
2127 
2128 static int
D32AddCarry(uint32_t * value,uint32_t i)2129 D32AddCarry(uint32_t* value, uint32_t i)
2130 {
2131     uint32_t v = *value;
2132     uint32_t sum = v + i;
2133     *value = sum;
2134     return sum < v || sum < i? 1: 0;
2135 }
2136 
2137 static void
DecAdd(MonoDecimal * value,MonoDecimal * d)2138 DecAdd(MonoDecimal *value, MonoDecimal* d)
2139 {
2140 	g_assert(value != NULL && d != NULL);
2141 
2142 	if (D32AddCarry(&DECIMAL_LO32(*value), DECIMAL_LO32(*d))) {
2143 		if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2144 			D32AddCarry(&DECIMAL_HI32(*value), 1);
2145 		}
2146 	}
2147 	if (D32AddCarry(&DECIMAL_MID32(*value), DECIMAL_MID32(*d))) {
2148 		D32AddCarry(&DECIMAL_HI32(*value), 1);
2149 	}
2150 	D32AddCarry(&DECIMAL_HI32(*value), DECIMAL_HI32(*d));
2151 }
2152 
2153 static void
DecMul10(MonoDecimal * value)2154 DecMul10(MonoDecimal* value)
2155 {
2156 	MonoDecimal d = *value;
2157 	g_assert (value != NULL);
2158 
2159 	DecShiftLeft(value);
2160 	DecShiftLeft(value);
2161 	DecAdd(value, &d);
2162 	DecShiftLeft(value);
2163 }
2164 
2165 static void
DecAddInt32(MonoDecimal * value,unsigned int i)2166 DecAddInt32(MonoDecimal* value, unsigned int i)
2167 {
2168 	g_assert(value != NULL);
2169 
2170 	if (D32AddCarry(&DECIMAL_LO32(*value), i)) {
2171 		if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2172 			D32AddCarry(&DECIMAL_HI32(*value), 1);
2173 		}
2174 	}
2175 }
2176 
2177 MonoDecimalCompareResult
mono_decimal_compare(MonoDecimal * left,MonoDecimal * right)2178 mono_decimal_compare (MonoDecimal *left, MonoDecimal *right)
2179 {
2180 	uint32_t   left_sign;
2181 	uint32_t   right_sign;
2182 	MonoDecimal result;
2183 
2184 	result.Hi32 = 0; 	// Just to shut up the compiler
2185 
2186 	// First check signs and whether either are zero.  If both are
2187 	// non-zero and of the same sign, just use subtraction to compare.
2188 	//
2189 	left_sign = left->v.v.Lo32 | left->v.v.Mid32 | left->Hi32;
2190 	right_sign = right->v.v.Lo32 | right->v.v.Mid32 | right->Hi32;
2191 	if (left_sign != 0)
2192 		left_sign = (left->u.u.sign & DECIMAL_NEG) | 1;
2193 
2194 	if (right_sign != 0)
2195 		right_sign = (right->u.u.sign & DECIMAL_NEG) | 1;
2196 
2197 	// left_sign & right_sign have values 1, 0, or 0x81 depending on if the left/right
2198 	// operand is +, 0, or -.
2199 	//
2200 	if (left_sign == right_sign) {
2201 		if (left_sign == 0)    // both are zero
2202 			return MONO_DECIMAL_CMP_EQ; // return equal
2203 
2204 		DecAddSub(left, right, &result, DECIMAL_NEG);
2205 		if (DECIMAL_LO64_GET(result) == 0 && result.Hi32 == 0)
2206 			return MONO_DECIMAL_CMP_EQ;
2207 		if (result.u.u.sign & DECIMAL_NEG)
2208 			return MONO_DECIMAL_CMP_LT;
2209 		return MONO_DECIMAL_CMP_GT;
2210 	}
2211 
2212 	//
2213 	// Signs are different.  Use signed byte comparison
2214 	//
2215 	if ((signed char)left_sign > (signed char)right_sign)
2216 		return MONO_DECIMAL_CMP_GT;
2217 	return MONO_DECIMAL_CMP_LT;
2218 }
2219 
2220 void
mono_decimal_init_single(MonoDecimal * _this,float value)2221 mono_decimal_init_single (MonoDecimal *_this, float value)
2222 {
2223 	if (mono_decimal_from_float (value, _this) == MONO_DECIMAL_OVERFLOW) {
2224 		mono_set_pending_exception (mono_get_exception_overflow ());
2225 		return;
2226 	}
2227 	_this->reserved = 0;
2228 }
2229 
2230 void
mono_decimal_init_double(MonoDecimal * _this,double value)2231 mono_decimal_init_double (MonoDecimal *_this, double value)
2232 {
2233 	if (mono_decimal_from_double (value, _this) == MONO_DECIMAL_OVERFLOW) {
2234 		mono_set_pending_exception (mono_get_exception_overflow ());
2235 		return;
2236 	}
2237 	_this->reserved = 0;
2238 }
2239 
2240 void
mono_decimal_floor(MonoDecimal * d)2241 mono_decimal_floor (MonoDecimal *d)
2242 {
2243 	MonoDecimal decRes;
2244 
2245 	mono_decimal_round_to_int(d, &decRes);
2246 
2247 	// copy decRes into d
2248 	COPYDEC(*d, decRes);
2249 	d->reserved = 0;
2250 	FC_GC_POLL ();
2251 }
2252 
2253 int32_t
mono_decimal_get_hash_code(MonoDecimal * d)2254 mono_decimal_get_hash_code (MonoDecimal *d)
2255 {
2256 	double dbl;
2257 
2258 	if (mono_decimal_to_double_result(d, &dbl) != MONO_DECIMAL_OK)
2259 		return 0;
2260 
2261 	if (dbl == 0.0) {
2262 		// Ensure 0 and -0 have the same hash code
2263 		return 0;
2264 	}
2265 	// conversion to double is lossy and produces rounding errors so we mask off the lowest 4 bits
2266 	//
2267 	// For example these two numerically equal decimals with different internal representations produce
2268 	// slightly different results when converted to double:
2269 	//
2270 	// decimal a = new decimal(new int[] { 0x76969696, 0x2fdd49fa, 0x409783ff, 0x00160000 });
2271 	//                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.176470588
2272 	// decimal b = new decimal(new int[] { 0x3f0f0f0f, 0x1e62edcc, 0x06758d33, 0x00150000 });
2273 	//                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.1764705882
2274 	//
2275 	return ((((int *)&dbl)[0]) & 0xFFFFFFF0) ^ ((int *)&dbl)[1];
2276 
2277 }
2278 
2279 void
mono_decimal_multiply(MonoDecimal * d1,MonoDecimal * d2)2280 mono_decimal_multiply (MonoDecimal *d1, MonoDecimal *d2)
2281 {
2282 	MonoDecimal decRes;
2283 
2284 	MonoDecimalStatus status = mono_decimal_multiply_result(d1, d2, &decRes);
2285 	if (status != MONO_DECIMAL_OK) {
2286 		mono_set_pending_exception (mono_get_exception_overflow ());
2287 		return;
2288 	}
2289 
2290 	COPYDEC(*d1, decRes);
2291 	d1->reserved = 0;
2292 
2293 	FC_GC_POLL ();
2294 }
2295 
2296 void
mono_decimal_round(MonoDecimal * d,int32_t decimals)2297 mono_decimal_round (MonoDecimal *d, int32_t decimals)
2298 {
2299 	MonoDecimal decRes;
2300 
2301 	// GC is only triggered for throwing, no need to protect result
2302 	if (decimals < 0 || decimals > 28) {
2303 		mono_set_pending_exception (mono_get_exception_argument_out_of_range ("d"));
2304 		return;
2305 	}
2306 
2307 	mono_decimal_round_result(d, decimals, &decRes);
2308 
2309 	// copy decRes into d
2310 	COPYDEC(*d, decRes);
2311 	d->reserved = 0;
2312 
2313 	FC_GC_POLL();
2314 }
2315 
2316 void
mono_decimal_tocurrency(MonoDecimal * decimal)2317 mono_decimal_tocurrency (MonoDecimal *decimal)
2318 {
2319 	// TODO
2320 }
2321 
2322 double
mono_decimal_to_double(MonoDecimal d)2323 mono_decimal_to_double (MonoDecimal d)
2324 {
2325 	double result = 0.0;
2326 	// Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2327 	mono_decimal_to_double_result(&d, &result);
2328 	return result;
2329 }
2330 
2331 int32_t
mono_decimal_to_int32(MonoDecimal d)2332 mono_decimal_to_int32 (MonoDecimal d)
2333 {
2334 	MonoDecimal result;
2335 
2336 	// The following can not return an error, it only returns INVALID_ARG if the decimals is < 0
2337 	mono_decimal_round_result(&d, 0, &result);
2338 
2339 	if (DECIMAL_SCALE(result) != 0) {
2340 		d = result;
2341 		mono_decimal_fix (&d, &result);
2342 	}
2343 
2344 	if (DECIMAL_HI32(result) == 0 && DECIMAL_MID32(result) == 0) {
2345 		int32_t i = DECIMAL_LO32(result);
2346 		if ((int16_t)DECIMAL_SIGNSCALE(result) >= 0) {
2347 			if (i >= 0)
2348 				return i;
2349 		} else {
2350 			i = -i;
2351 			if (i <= 0)
2352 				return i;
2353 		}
2354 	}
2355 
2356 	mono_set_pending_exception (mono_get_exception_overflow ());
2357 	return 0;
2358 }
2359 
2360 float
mono_decimal_to_float(MonoDecimal d)2361 mono_decimal_to_float (MonoDecimal d)
2362 {
2363 	float result = 0.0f;
2364 	// Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2365 	mono_decimal_to_float_result(&d, &result);
2366 	return result;
2367 }
2368 
2369 void
mono_decimal_truncate(MonoDecimal * d)2370 mono_decimal_truncate (MonoDecimal *d)
2371 {
2372 	MonoDecimal decRes;
2373 
2374 	mono_decimal_fix(d, &decRes);
2375 
2376 	// copy decRes into d
2377 	COPYDEC(*d, decRes);
2378 	d->reserved = 0;
2379 	FC_GC_POLL();
2380 }
2381 
2382 void
mono_decimal_addsub(MonoDecimal * left,MonoDecimal * right,uint8_t sign)2383 mono_decimal_addsub (MonoDecimal *left, MonoDecimal *right, uint8_t sign)
2384 {
2385 	MonoDecimal result, decTmp;
2386 	MonoDecimal *pdecTmp, *leftOriginal;
2387 	uint32_t    num[6], pwr;
2388 	int         scale, hi_prod, cur;
2389 	SPLIT64     sdlTmp;
2390 
2391 	g_assert(sign == 0 || sign == DECIMAL_NEG);
2392 
2393 	leftOriginal = left;
2394 
2395 	sign ^= (DECIMAL_SIGN(*right) ^ DECIMAL_SIGN(*left)) & DECIMAL_NEG;
2396 
2397 	if (DECIMAL_SCALE(*right) == DECIMAL_SCALE(*left)) {
2398 		// Scale factors are equal, no alignment necessary.
2399 		//
2400 		DECIMAL_SIGNSCALE(result) = DECIMAL_SIGNSCALE(*left);
2401 
2402 	AlignedAdd:
2403 		if (sign) {
2404 			// Signs differ - subtract
2405 			//
2406 			DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right)));
2407 			DECIMAL_HI32(result) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
2408 
2409 			// Propagate carry
2410 			//
2411 			if (DECIMAL_LO64_GET(result) > DECIMAL_LO64_GET(*left)) {
2412 				DECIMAL_HI32(result)--;
2413 				if (DECIMAL_HI32(result) >= DECIMAL_HI32(*left))
2414 					goto SignFlip;
2415 			} else if (DECIMAL_HI32(result) > DECIMAL_HI32(*left)) {
2416 				// Got negative result.  Flip its sign.
2417 				//
2418 			SignFlip:
2419 				DECIMAL_LO64_SET(result, -(int64_t)DECIMAL_LO64_GET(result));
2420 				DECIMAL_HI32(result) = ~DECIMAL_HI32(result);
2421 				if (DECIMAL_LO64_GET(result) == 0)
2422 					DECIMAL_HI32(result)++;
2423 				DECIMAL_SIGN(result) ^= DECIMAL_NEG;
2424 			}
2425 
2426 		} else {
2427 			// Signs are the same - add
2428 			//
2429 			DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right)));
2430 			DECIMAL_HI32(result) = DECIMAL_HI32(*left) + DECIMAL_HI32(*right);
2431 
2432 			// Propagate carry
2433 			//
2434 			if (DECIMAL_LO64_GET(result) < DECIMAL_LO64_GET(*left)) {
2435 				DECIMAL_HI32(result)++;
2436 				if (DECIMAL_HI32(result) <= DECIMAL_HI32(*left))
2437 					goto AlignedScale;
2438 			} else if (DECIMAL_HI32(result) < DECIMAL_HI32(*left)) {
2439 			AlignedScale:
2440 				// The addition carried above 96 bits.  Divide the result by 10,
2441 				// dropping the scale factor.
2442 				//
2443 				if (DECIMAL_SCALE(result) == 0) {
2444 					mono_set_pending_exception (mono_get_exception_overflow ());
2445 					return;
2446 				}
2447 				DECIMAL_SCALE(result)--;
2448 
2449 				sdlTmp.u.Lo = DECIMAL_HI32(result);
2450 				sdlTmp.u.Hi = 1;
2451 				sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2452 				DECIMAL_HI32(result) = sdlTmp.u.Lo;
2453 
2454 				sdlTmp.u.Lo = DECIMAL_MID32(result);
2455 				sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2456 				DECIMAL_MID32(result) = sdlTmp.u.Lo;
2457 
2458 				sdlTmp.u.Lo = DECIMAL_LO32(result);
2459 				sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2460 				DECIMAL_LO32(result) = sdlTmp.u.Lo;
2461 
2462 				// See if we need to round up.
2463 				//
2464 				if (sdlTmp.u.Hi >= 5 && (sdlTmp.u.Hi > 5 || (DECIMAL_LO32(result) & 1))) {
2465 					DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(result)+1);
2466 					if (DECIMAL_LO64_GET(result) == 0)
2467 						DECIMAL_HI32(result)++;
2468 				}
2469 			}
2470 		}
2471 	} else {
2472 		// Scale factors are not equal.  Assume that a larger scale
2473 		// factor (more decimal places) is likely to mean that number
2474 		// is smaller.  Start by guessing that the right operand has
2475 		// the larger scale factor.  The result will have the larger
2476 		// scale factor.
2477 		//
2478 		DECIMAL_SCALE(result) = DECIMAL_SCALE(*right);  // scale factor of "smaller"
2479 		DECIMAL_SIGN(result) = DECIMAL_SIGN(*left);    // but sign of "larger"
2480 		scale = DECIMAL_SCALE(result)- DECIMAL_SCALE(*left);
2481 
2482 		if (scale < 0) {
2483 			// Guessed scale factor wrong. Swap operands.
2484 			//
2485 			scale = -scale;
2486 			DECIMAL_SCALE(result) = DECIMAL_SCALE(*left);
2487 			DECIMAL_SIGN(result) ^= sign;
2488 			pdecTmp = right;
2489 			right = left;
2490 			left = pdecTmp;
2491 		}
2492 
2493 		// *left will need to be multiplied by 10^scale so
2494 		// it will have the same scale as *right.  We could be
2495 		// extending it to up to 192 bits of precision.
2496 		//
2497 		if (scale <= POWER10_MAX) {
2498 			// Scaling won't make it larger than 4 uint32_ts
2499 			//
2500 			pwr = power10[scale];
2501 			DECIMAL_LO64_SET(decTmp, UInt32x32To64(DECIMAL_LO32(*left), pwr));
2502 			sdlTmp.int64 = UInt32x32To64(DECIMAL_MID32(*left), pwr);
2503 			sdlTmp.int64 += DECIMAL_MID32(decTmp);
2504 			DECIMAL_MID32(decTmp) = sdlTmp.u.Lo;
2505 			DECIMAL_HI32(decTmp) = sdlTmp.u.Hi;
2506 			sdlTmp.int64 = UInt32x32To64(DECIMAL_HI32(*left), pwr);
2507 			sdlTmp.int64 += DECIMAL_HI32(decTmp);
2508 			if (sdlTmp.u.Hi == 0) {
2509 				// Result fits in 96 bits.  Use standard aligned add.
2510 				//
2511 				DECIMAL_HI32(decTmp) = sdlTmp.u.Lo;
2512 				left = &decTmp;
2513 				goto AlignedAdd;
2514 			}
2515 			num[0] = DECIMAL_LO32(decTmp);
2516 			num[1] = DECIMAL_MID32(decTmp);
2517 			num[2] = sdlTmp.u.Lo;
2518 			num[3] = sdlTmp.u.Hi;
2519 			hi_prod = 3;
2520 		} else {
2521 			// Have to scale by a bunch.  Move the number to a buffer
2522 			// where it has room to grow as it's scaled.
2523 			//
2524 			num[0] = DECIMAL_LO32(*left);
2525 			num[1] = DECIMAL_MID32(*left);
2526 			num[2] = DECIMAL_HI32(*left);
2527 			hi_prod = 2;
2528 
2529 			// Scan for zeros in the upper words.
2530 			//
2531 			if (num[2] == 0) {
2532 				hi_prod = 1;
2533 				if (num[1] == 0) {
2534 					hi_prod = 0;
2535 					if (num[0] == 0) {
2536 						// Left arg is zero, return right.
2537 						//
2538 						DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(*right));
2539 						DECIMAL_HI32(result) = DECIMAL_HI32(*right);
2540 						DECIMAL_SIGN(result) ^= sign;
2541 						goto RetDec;
2542 					}
2543 				}
2544 			}
2545 
2546 			// Scaling loop, up to 10^9 at a time.  hi_prod stays updated
2547 			// with index of highest non-zero uint32_t.
2548 			//
2549 			for (; scale > 0; scale -= POWER10_MAX) {
2550 				if (scale > POWER10_MAX)
2551 					pwr = ten_to_nine;
2552 				else
2553 					pwr = power10[scale];
2554 
2555 				sdlTmp.u.Hi = 0;
2556 				for (cur = 0; cur <= hi_prod; cur++) {
2557 					sdlTmp.int64 = UInt32x32To64(num[cur], pwr) + sdlTmp.u.Hi;
2558 					num[cur] = sdlTmp.u.Lo;
2559 				}
2560 
2561 				if (sdlTmp.u.Hi != 0)
2562 					// We're extending the result by another uint32_t.
2563 					num[++hi_prod] = sdlTmp.u.Hi;
2564 			}
2565 		}
2566 
2567 		// Scaling complete, do the add.  Could be subtract if signs differ.
2568 		//
2569 		sdlTmp.u.Lo = num[0];
2570 		sdlTmp.u.Hi = num[1];
2571 
2572 		if (sign) {
2573 			// Signs differ, subtract.
2574 			//
2575 			DECIMAL_LO64_SET(result, (sdlTmp.int64 - DECIMAL_LO64_GET(*right)));
2576 			DECIMAL_HI32(result) = num[2] - DECIMAL_HI32(*right);
2577 
2578 			// Propagate carry
2579 			//
2580 			if (DECIMAL_LO64_GET(result) > sdlTmp.int64) {
2581 				DECIMAL_HI32(result)--;
2582 				if (DECIMAL_HI32(result) >= num[2])
2583 					goto LongSub;
2584 			} else if (DECIMAL_HI32(result) > num[2]) {
2585 			LongSub:
2586 				// If num has more than 96 bits of precision, then we need to
2587 				// carry the subtraction into the higher bits.  If it doesn't,
2588 				// then we subtracted in the wrong order and have to flip the
2589 				// sign of the result.
2590 				//
2591 				if (hi_prod <= 2)
2592 					goto SignFlip;
2593 
2594 				cur = 3;
2595 				while(num[cur++]-- == 0);
2596 				if (num[hi_prod] == 0)
2597 					hi_prod--;
2598 			}
2599 		} else {
2600 			// Signs the same, add.
2601 			//
2602 			DECIMAL_LO64_SET(result, (sdlTmp.int64 + DECIMAL_LO64_GET(*right)));
2603 			DECIMAL_HI32(result) = num[2] + DECIMAL_HI32(*right);
2604 
2605 			// Propagate carry
2606 			//
2607 			if (DECIMAL_LO64_GET(result) < sdlTmp.int64) {
2608 				DECIMAL_HI32(result)++;
2609 				if (DECIMAL_HI32(result) <= num[2])
2610 					goto LongAdd;
2611 			} else if (DECIMAL_HI32(result) < num[2]) {
2612 			LongAdd:
2613 				// Had a carry above 96 bits.
2614 				//
2615 				cur = 3;
2616 				do {
2617 					if (hi_prod < cur) {
2618 						num[cur] = 1;
2619 						hi_prod = cur;
2620 						break;
2621 					}
2622 				}while (++num[cur++] == 0);
2623 			}
2624 		}
2625 
2626 		if (hi_prod > 2) {
2627 			num[0] = DECIMAL_LO32(result);
2628 			num[1] = DECIMAL_MID32(result);
2629 			num[2] = DECIMAL_HI32(result);
2630 			DECIMAL_SCALE(result) = (uint8_t)ScaleResult(num, hi_prod, DECIMAL_SCALE(result));
2631 			if (DECIMAL_SCALE(result) == (uint8_t)-1) {
2632 				mono_set_pending_exception (mono_get_exception_overflow ());
2633 				return;
2634 			}
2635 
2636 			DECIMAL_LO32(result) = num[0];
2637 			DECIMAL_MID32(result) = num[1];
2638 			DECIMAL_HI32(result) = num[2];
2639 		}
2640 	}
2641 
2642 RetDec:
2643 	left = leftOriginal;
2644 	COPYDEC(*left, result);
2645 	left->reserved = 0;
2646 }
2647 
2648 void
mono_decimal_divide(MonoDecimal * left,MonoDecimal * right)2649 mono_decimal_divide (MonoDecimal *left, MonoDecimal *right)
2650 {
2651 	uint32_t quo[3], quo_save[3],rem[4], divisor[3];
2652 	uint32_t pwr, tmp, tmp1;
2653 	SPLIT64  sdlTmp, sdlDivisor;
2654 	int      scale, cur_scale;
2655 	gboolean unscale;
2656 
2657 	scale = DECIMAL_SCALE(*left) - DECIMAL_SCALE(*right);
2658 	unscale = FALSE;
2659 	divisor[0] = DECIMAL_LO32(*right);
2660 	divisor[1] = DECIMAL_MID32(*right);
2661 	divisor[2] = DECIMAL_HI32(*right);
2662 
2663 	if (divisor[1] == 0 && divisor[2] == 0) {
2664 		// Divisor is only 32 bits.  Easy divide.
2665 		//
2666 		if (divisor[0] == 0) {
2667 			mono_set_pending_exception (mono_get_exception_divide_by_zero ());
2668 			return;
2669 		}
2670 
2671 		quo[0] = DECIMAL_LO32(*left);
2672 		quo[1] = DECIMAL_MID32(*left);
2673 		quo[2] = DECIMAL_HI32(*left);
2674 		rem[0] = Div96By32(quo, divisor[0]);
2675 
2676 		for (;;) {
2677 			if (rem[0] == 0) {
2678 				if (scale < 0) {
2679 					cur_scale = min(9, -scale);
2680 					goto HaveScale;
2681 				}
2682 				break;
2683 			}
2684 			// We need to unscale if and only if we have a non-zero remainder
2685 			unscale = TRUE;
2686 
2687 			// We have computed a quotient based on the natural scale
2688 			// ( <dividend scale> - <divisor scale> ).  We have a non-zero
2689 			// remainder, so now we should increase the scale if possible to
2690 			// include more quotient bits.
2691 			//
2692 			// If it doesn't cause overflow, we'll loop scaling by 10^9 and
2693 			// computing more quotient bits as long as the remainder stays
2694 			// non-zero.  If scaling by that much would cause overflow, we'll
2695 			// drop out of the loop and scale by as much as we can.
2696 			//
2697 			// Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
2698 			// = 4.294 967 296.  So the upper limit is quo[2] == 4 and
2699 			// quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
2700 			// quotient bits in quo[0] could be all 1's, then 1,266,874,888
2701 			// is the largest value in quo[1] (when quo[2] == 4) that is
2702 			// assured not to overflow.
2703 			//
2704 			cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2705 			if (cur_scale == 0) {
2706 				// No more scaling to be done, but remainder is non-zero.
2707 				// Round quotient.
2708 				//
2709 				tmp = rem[0] << 1;
2710 				if (tmp < rem[0] || (tmp >= divisor[0] &&
2711 							   (tmp > divisor[0] || (quo[0] & 1)))) {
2712 				RoundUp:
2713 					if (!Add32To96(quo, 1)) {
2714 						if (scale == 0) {
2715 							mono_set_pending_exception (mono_get_exception_overflow ());
2716 							return;
2717 						}
2718 						scale--;
2719 						OverflowUnscale(quo, TRUE);
2720 						break;
2721 					}
2722 				}
2723 				break;
2724 			}
2725 
2726 			if (cur_scale < 0) {
2727 				mono_set_pending_exception (mono_get_exception_overflow ());
2728 				return;
2729 			}
2730 
2731 		HaveScale:
2732 			pwr = power10[cur_scale];
2733 			scale += cur_scale;
2734 
2735 			if (IncreaseScale(quo, pwr) != 0) {
2736 				mono_set_pending_exception (mono_get_exception_overflow ());
2737 				return;
2738 			}
2739 
2740 			sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
2741 			rem[0] = sdlTmp.u.Hi;
2742 
2743 			if (!Add32To96(quo, sdlTmp.u.Lo)) {
2744 				if (scale == 0) {
2745 					mono_set_pending_exception (mono_get_exception_overflow ());
2746 					return;
2747 				}
2748 				scale--;
2749 				OverflowUnscale(quo, (rem[0] != 0));
2750 				break;
2751 			}
2752 		} // for (;;)
2753 	} else {
2754 		// Divisor has bits set in the upper 64 bits.
2755 		//
2756 		// Divisor must be fully normalized (shifted so bit 31 of the most
2757 		// significant uint32_t is 1).  Locate the MSB so we know how much to
2758 		// normalize by.  The dividend will be shifted by the same amount so
2759 		// the quotient is not changed.
2760 		//
2761 		if (divisor[2] == 0)
2762 			tmp = divisor[1];
2763 		else
2764 			tmp = divisor[2];
2765 
2766 		cur_scale = 0;
2767 		if (!(tmp & 0xFFFF0000)) {
2768 			cur_scale += 16;
2769 			tmp <<= 16;
2770 		}
2771 		if (!(tmp & 0xFF000000)) {
2772 			cur_scale += 8;
2773 			tmp <<= 8;
2774 		}
2775 		if (!(tmp & 0xF0000000)) {
2776 			cur_scale += 4;
2777 			tmp <<= 4;
2778 		}
2779 		if (!(tmp & 0xC0000000)) {
2780 			cur_scale += 2;
2781 			tmp <<= 2;
2782 		}
2783 		if (!(tmp & 0x80000000)) {
2784 			cur_scale++;
2785 			tmp <<= 1;
2786 		}
2787 
2788 		// Shift both dividend and divisor left by cur_scale.
2789 		//
2790 		sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
2791 		rem[0] = sdlTmp.u.Lo;
2792 		rem[1] = sdlTmp.u.Hi;
2793 		sdlTmp.u.Lo = DECIMAL_MID32(*left);
2794 		sdlTmp.u.Hi = DECIMAL_HI32(*left);
2795 		sdlTmp.int64 <<= cur_scale;
2796 		rem[2] = sdlTmp.u.Hi;
2797 		rem[3] = (DECIMAL_HI32(*left) >> (31 - cur_scale)) >> 1;
2798 
2799 		sdlDivisor.u.Lo = divisor[0];
2800 		sdlDivisor.u.Hi = divisor[1];
2801 		sdlDivisor.int64 <<= cur_scale;
2802 
2803 		if (divisor[2] == 0) {
2804 			// Have a 64-bit divisor in sdlDivisor.  The remainder
2805 			// (currently 96 bits spread over 4 uint32_ts) will be < divisor.
2806 			//
2807 			sdlTmp.u.Lo = rem[2];
2808 			sdlTmp.u.Hi = rem[3];
2809 
2810 			quo[2] = 0;
2811 			quo[1] = Div96By64(&rem[1], sdlDivisor);
2812 			quo[0] = Div96By64(rem, sdlDivisor);
2813 
2814 			for (;;) {
2815 				if ((rem[0] | rem[1]) == 0) {
2816 					if (scale < 0) {
2817 						cur_scale = min(9, -scale);
2818 						goto HaveScale64;
2819 					}
2820 					break;
2821 				}
2822 
2823 				// We need to unscale if and only if we have a non-zero remainder
2824 				unscale = TRUE;
2825 
2826 				// Remainder is non-zero.  Scale up quotient and remainder by
2827 				// powers of 10 so we can compute more significant bits.
2828 				//
2829 				cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2830 				if (cur_scale == 0) {
2831 					// No more scaling to be done, but remainder is non-zero.
2832 					// Round quotient.
2833 					//
2834 					sdlTmp.u.Lo = rem[0];
2835 					sdlTmp.u.Hi = rem[1];
2836 					if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
2837 					    (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
2838 						goto RoundUp;
2839 					break;
2840 				}
2841 
2842 				if (cur_scale < 0) {
2843 					mono_set_pending_exception (mono_get_exception_overflow ());
2844 					return;
2845 				}
2846 
2847 			HaveScale64:
2848 				pwr = power10[cur_scale];
2849 				scale += cur_scale;
2850 
2851 				if (IncreaseScale(quo, pwr) != 0) {
2852 					mono_set_pending_exception (mono_get_exception_overflow ());
2853 					return;
2854 				}
2855 
2856 				rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
2857 				IncreaseScale(rem, pwr);
2858 				tmp = Div96By64(rem, sdlDivisor);
2859 				if (!Add32To96(quo, tmp)) {
2860 					if (scale == 0) {
2861 						mono_set_pending_exception (mono_get_exception_overflow ());
2862 						return;
2863 					}
2864 					scale--;
2865 					OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0));
2866 					break;
2867 				}
2868 
2869 			} // for (;;)
2870 		} else {
2871 			// Have a 96-bit divisor in divisor[].
2872 			//
2873 			// Start by finishing the shift left by cur_scale.
2874 			//
2875 			sdlTmp.u.Lo = divisor[1];
2876 			sdlTmp.u.Hi = divisor[2];
2877 			sdlTmp.int64 <<= cur_scale;
2878 			divisor[0] = sdlDivisor.u.Lo;
2879 			divisor[1] = sdlDivisor.u.Hi;
2880 			divisor[2] = sdlTmp.u.Hi;
2881 
2882 			// The remainder (currently 96 bits spread over 4 uint32_ts)
2883 			// will be < divisor.
2884 			//
2885 			quo[2] = 0;
2886 			quo[1] = 0;
2887 			quo[0] = Div128By96(rem, divisor);
2888 
2889 			for (;;) {
2890 				if ((rem[0] | rem[1] | rem[2]) == 0) {
2891 					if (scale < 0) {
2892 						cur_scale = min(9, -scale);
2893 						goto HaveScale96;
2894 					}
2895 					break;
2896 				}
2897 
2898 				// We need to unscale if and only if we have a non-zero remainder
2899 				unscale = TRUE;
2900 
2901 				// Remainder is non-zero.  Scale up quotient and remainder by
2902 				// powers of 10 so we can compute more significant bits.
2903 				//
2904 				cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2905 				if (cur_scale == 0) {
2906 					// No more scaling to be done, but remainder is non-zero.
2907 					// Round quotient.
2908 					//
2909 					if (rem[2] >= 0x80000000)
2910 						goto RoundUp;
2911 
2912 					tmp = rem[0] > 0x80000000;
2913 					tmp1 = rem[1] > 0x80000000;
2914 					rem[0] <<= 1;
2915 					rem[1] = (rem[1] << 1) + tmp;
2916 					rem[2] = (rem[2] << 1) + tmp1;
2917 
2918 					if (rem[2] > divisor[2] || (rem[2] == divisor[2] && (rem[1] > divisor[1] || rem[1] == (divisor[1] && (rem[0] > divisor[0] || (rem[0] == divisor[0] && (quo[0] & 1)))))))
2919 						goto RoundUp;
2920 					break;
2921 				}
2922 
2923 				if (cur_scale < 0) {
2924 					mono_set_pending_exception (mono_get_exception_overflow ());
2925 					return;
2926 				}
2927 
2928 			HaveScale96:
2929 				pwr = power10[cur_scale];
2930 				scale += cur_scale;
2931 
2932 				if (IncreaseScale(quo, pwr) != 0) {
2933 					mono_set_pending_exception (mono_get_exception_overflow ());
2934 					return;
2935 				}
2936 
2937 				rem[3] = IncreaseScale(rem, pwr);
2938 				tmp = Div128By96(rem, divisor);
2939 				if (!Add32To96(quo, tmp)) {
2940 					if (scale == 0) {
2941 						mono_set_pending_exception (mono_get_exception_overflow ());
2942 						return;
2943 					}
2944 
2945 					scale--;
2946 					OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0 || rem[2] != 0 || rem[3] != 0));
2947 					break;
2948 				}
2949 
2950 			} // for (;;)
2951 		}
2952 	}
2953 
2954 	// We need to unscale if and only if we have a non-zero remainder
2955 	if (unscale) {
2956 		// Try extracting any extra powers of 10 we may have
2957 		// added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
2958 		// If a division by one of these powers returns a zero remainder, then
2959 		// we keep the quotient.  If the remainder is not zero, then we restore
2960 		// the previous value.
2961 		//
2962 		// Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
2963 		// we can extract.  We use this as a quick test on whether to try a
2964 		// given power.
2965 		//
2966 		while ((quo[0] & 0xFF) == 0 && scale >= 8) {
2967 			quo_save[0] = quo[0];
2968 			quo_save[1] = quo[1];
2969 			quo_save[2] = quo[2];
2970 
2971 			if (Div96By32(quo_save, 100000000) == 0) {
2972 				quo[0] = quo_save[0];
2973 				quo[1] = quo_save[1];
2974 				quo[2] = quo_save[2];
2975 				scale -= 8;
2976 			} else
2977 				break;
2978 		}
2979 
2980 		if ((quo[0] & 0xF) == 0 && scale >= 4) {
2981 			quo_save[0] = quo[0];
2982 			quo_save[1] = quo[1];
2983 			quo_save[2] = quo[2];
2984 
2985 			if (Div96By32(quo_save, 10000) == 0) {
2986 				quo[0] = quo_save[0];
2987 				quo[1] = quo_save[1];
2988 				quo[2] = quo_save[2];
2989 				scale -= 4;
2990 			}
2991 		}
2992 
2993 		if ((quo[0] & 3) == 0 && scale >= 2) {
2994 			quo_save[0] = quo[0];
2995 			quo_save[1] = quo[1];
2996 			quo_save[2] = quo[2];
2997 
2998 			if (Div96By32(quo_save, 100) == 0) {
2999 				quo[0] = quo_save[0];
3000 				quo[1] = quo_save[1];
3001 				quo[2] = quo_save[2];
3002 				scale -= 2;
3003 			}
3004 		}
3005 
3006 		if ((quo[0] & 1) == 0 && scale >= 1) {
3007 			quo_save[0] = quo[0];
3008 			quo_save[1] = quo[1];
3009 			quo_save[2] = quo[2];
3010 
3011 			if (Div96By32(quo_save, 10) == 0) {
3012 				quo[0] = quo_save[0];
3013 				quo[1] = quo_save[1];
3014 				quo[2] = quo_save[2];
3015 				scale -= 1;
3016 			}
3017 		}
3018 	}
3019 
3020 	DECIMAL_SIGN(*left) = DECIMAL_SIGN(*left) ^ DECIMAL_SIGN(*right);
3021 	DECIMAL_HI32(*left) = quo[2];
3022 	DECIMAL_MID32(*left) = quo[1];
3023 	DECIMAL_LO32(*left) = quo[0];
3024 	DECIMAL_SCALE(*left) = (uint8_t)scale;
3025 	left->reserved = 0;
3026 
3027 }
3028 
3029 #define DECIMAL_PRECISION 29
3030 
3031 int
mono_decimal_from_number(void * from,MonoDecimal * target)3032 mono_decimal_from_number (void *from, MonoDecimal *target)
3033 {
3034 	MonoNumber *number = (MonoNumber *) from;
3035 	uint16_t* p = number->digits;
3036 	MonoDecimal d;
3037 	int e = number->scale;
3038 	g_assert(number != NULL);
3039 	g_assert(target != NULL);
3040 
3041 	d.reserved = 0;
3042 	DECIMAL_SIGNSCALE(d) = 0;
3043 	DECIMAL_HI32(d) = 0;
3044 	DECIMAL_LO32(d) = 0;
3045 	DECIMAL_MID32(d) = 0;
3046 	g_assert(p != NULL);
3047 	if (!*p) {
3048 		// To avoid risking an app-compat issue with pre 4.5 (where some app was illegally using Reflection to examine the internal scale bits), we'll only force
3049 		// the scale to 0 if the scale was previously positive
3050 		if (e > 0) {
3051 			e = 0;
3052 		}
3053 	} else {
3054 		if (e > DECIMAL_PRECISION) return 0;
3055 		while ((e > 0 || (*p && e > -28)) && (DECIMAL_HI32(d) < 0x19999999 || (DECIMAL_HI32(d) == 0x19999999 && (DECIMAL_MID32(d) < 0x99999999 || (DECIMAL_MID32(d) == 0x99999999 && (DECIMAL_LO32(d) < 0x99999999 || (DECIMAL_LO32(d) == 0x99999999 && *p <= '5'))))))) {
3056 			DecMul10(&d);
3057 			if (*p)
3058 				DecAddInt32(&d, *p++ - '0');
3059 			e--;
3060 		}
3061 		if (*p++ >= '5') {
3062 			gboolean round = TRUE;
3063 			if (*(p-1) == '5' && *(p-2) % 2 == 0) { // Check if previous digit is even, only if the when we are unsure whether hows to do Banker's rounding
3064 				// For digits > 5 we will be roundinp up anyway.
3065 				int count = 20; // Look at the next 20 digits to check to round
3066 				while (*p == '0' && count != 0) {
3067 					p++;
3068 					count--;
3069 				}
3070 				if (*p == '\0' || count == 0)
3071 					round = FALSE;// Do nothing
3072 			}
3073 
3074 			if (round) {
3075 				DecAddInt32(&d, 1);
3076 				if ((DECIMAL_HI32(d) | DECIMAL_MID32(d) | DECIMAL_LO32(d)) == 0) {
3077 					DECIMAL_HI32(d) = 0x19999999;
3078 					DECIMAL_MID32(d) = 0x99999999;
3079 					DECIMAL_LO32(d) = 0x9999999A;
3080 					e++;
3081 				}
3082 			}
3083 		}
3084 	}
3085 	if (e > 0)
3086 		return 0;
3087 	if (e <= -DECIMAL_PRECISION) {
3088 		// Parsing a large scale zero can give you more precision than fits in the decimal.
3089 		// This should only happen for actual zeros or very small numbers that round to zero.
3090 		DECIMAL_SIGNSCALE(d) = 0;
3091 		DECIMAL_HI32(d) = 0;
3092 		DECIMAL_LO32(d) = 0;
3093 		DECIMAL_MID32(d) = 0;
3094 		DECIMAL_SCALE(d) = (DECIMAL_PRECISION - 1);
3095 	} else {
3096 		DECIMAL_SCALE(d) = (uint8_t)(-e);
3097 	}
3098 
3099 	DECIMAL_SIGN(d) = number->sign? DECIMAL_NEG: 0;
3100 	*target = d;
3101 	return 1;
3102 }
3103 
3104 
3105 #endif
3106