1 /* hp3000_cpu_fp.c: HP 3000 floating-point arithmetic simulator
2
3 Copyright (c) 2016-2020, J. David Bryan
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22 Except as contained in this notice, the name of the author shall not be used
23 in advertising or otherwise to promote the sale, use or other dealings in
24 this Software without prior written authorization from the author.
25
26 09-Oct-20 JDB Modified to return Ext_Float traps for extended operands
27 30-Sep-20 JDB Cleaned up QWORD assembly in "norm_round_pack"
28 14-Sep-20 JDB Added a guard bit to 56-bit calculations for rounding
29 "divide" now renormalizes divisors to reduce calculation
30 11-Jun-16 JDB Bit mask constants are now unsigned
31 03-Feb-16 JDB First release version
32 25-Aug-15 JDB Fixed FSUB zero subtrahend bug (from Norwin Malmberg)
33 01-Apr-15 JDB Passes the floating point tests in the CPU diagnostic (D420A1)
34 29-Mar-15 JDB Created
35
36 References:
37 - HP 3000 Series II System Microprogram Listing
38 (30000-90023, August 1976)
39 - HP 3000 Series II/III System Reference Manual
40 (30000-90020, July 1978)
41 - Machine Instruction Set Reference Manual
42 (30000-90022, June 1984)
43
44
45 This module implements multiple-precision floating-point operations to
46 support the HP 3000 CPU instruction set. It employs 64-bit integer
47 arithmetic for speed and simplicity of implementation. The host compiler
48 must support 64-bit integers.
49
50 HP 3000 computers use a proprietary floating-point format. All 3000s
51 support two-word "single-precision" floating-point arithmetic as standard
52 equipment. The original HP 3000 CX and Series I CPUs support three-word
53 "extended-precision" floating-point arithmetic when the optional HP 30011A
54 Extended Instruction Set microcode was installed. The Series II and later
55 machines replace the three-word instructions with four-word "double-
56 precision" floating-point arithmetic and include the EIS as part of the
57 standard equipment.
58
59 Floating-point numbers have this format:
60
61 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
62 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
63 | S | exponent biased by +256 | positive mantissa |
64 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
65 | positive mantissa |
66 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
67 | positive mantissa | (extended)
68 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
69 | positive mantissa | (double)
70 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
71
72 That is, the three- and four-word formats merely extended the mantissa with
73 no change to the exponent range.
74
75 The mantissa is represented in sign-magnitude format. The mantissa is always
76 positive, with an assumed "1" to the left of the MSB, and the sign bit is set
77 for negative values. The exponent is in "excess-256" format, i.e.,
78 represented as an unsigned value biased by +256, giving an unbiased range of
79 -256 to +255. The binary point is assumed to be between the leading "1" and
80 the MSB, so a zero value must be handled as a special case of all bits equal
81 to zero, which otherwise would represent the value +1.0 * 2 ** -256.
82 Normalization shifts the mantissa left and decrements the exponent until a
83 "1" bit appears in bit 9.
84
85 The use of sign-magnitude format means that floating-point negation merely
86 complements the sign bit, and floating-point comparison simply checks the
87 signs and, if they are the same, then applies an integer comparison to the
88 packed values. However, it also implies the existence of a "negative zero"
89 value, represented by all zeros except for the sign bit. This value is
90 undefined; if a negative zero is supplied as an operand to one of the
91 arithmetic routines, it is treated as positive zero. Negative zero is never
92 returned even if, e.g., it is supplied as the dividend or multiplier.
93
94 This implementation provides add, subtract, multiply, divide, float, and fix
95 operations on two-, three-, and four-word floating point operands. The
96 routines are called via a common floating-point executor ("fp_exec") by
97 supplying the operation to be performed and the operand(s) on which to act.
98 An operand contains the packed (i.e., in-memory) representation and the
99 precision of the value. The returned value includes the packed
100 representation and the precision, along with a value that indicates whether
101 or not the operation resulted in an arithmetic trap. It is the
102 responsibility of the caller to take the trap if it is indicated.
103
104 Seven user trap values are returned:
105
106 Parameter Description
107 --------- ------------------------------------------------
108 000001 Integer Overflow
109 000002 Float Overflow
110 000003 Float Underflow
111 000005 Float Zero Divide
112 000010 Extended Precision Floating Point Overflow
113 000011 Extended Precision Floating Point Underflow
114 000012 Extended Precision Floating Point Divide by Zero
115 */
116
117
118
119 #include "hp3000_defs.h"
120 #include "hp3000_cpu.h"
121 #include "hp3000_cpu_fp.h"
122
123
124
125 /* Program constants */
126
127 #define EXPONENT_BIAS 256 /* the exponent is biased by +256 */
128
129 #define MIN_EXPONENT -256 /* the smallest representable exponent */
130 #define MAX_EXPONENT +255 /* the largest representable exponent */
131
132 #define EXPONENT_MASK 0077700u /* the mask to isolate the exponent in the first word */
133 #define MANTISSA_MASK 0000077u /* the mask to isolate the mantissa in the first word */
134
135 #define EXPONENT_SHIFT 6 /* the exponent alignment shift */
136 #define MANTISSA_SHIFT 0 /* the mantissa alignment shift */
137
138 #define UNPACKED_BITS 54 /* the number of significant bits in the unpacked mantissa */
139 #define GUARD_BITS 1 /* the number of guard bits in the mantissa used for rounding */
140
141 #define IMPLIED_BIT ((t_uint64) 1uL << UNPACKED_BITS + GUARD_BITS) /* the implied MSB in the mantissa */
142 #define CARRY_BIT (IMPLIED_BIT << 1) /* the carry from the MSB in the mantissa */
143
144
145 /* Floating-point accessors */
146
147 #define MANTISSA(w) ((t_uint64) (((w) & MANTISSA_MASK) >> MANTISSA_SHIFT))
148 #define EXPONENT(w) ((int32) (((w) & EXPONENT_MASK) >> EXPONENT_SHIFT))
149
150 #define TO_EXPONENT(w) ((w) + EXPONENT_BIAS << EXPONENT_SHIFT & EXPONENT_MASK)
151
152 #define DENORMALIZED(m) ((m) > 0 && ((m) & IMPLIED_BIT) == 0)
153
154
155 /* Floating-point unpacked representation */
156
157 typedef struct {
158 t_uint64 mantissa; /* the unsigned mantissa */
159 int32 exponent; /* the unbiased exponent */
160 t_bool negative; /* TRUE if the mantissa is negative */
161 FP_OPSIZE precision; /* the precision currently expressed by the value */
162 } FPU;
163
164 static const FPU zero = { 0, 0, FALSE, fp_f }; /* an unpacked zero value */
165
166
167 /* Floating-point descriptors */
168
169 static const int32 mantissa_bits [] = { /* the number of mantissa bits, indexed by FP_OPSIZE */
170 16 - 1, /* in_s bits available - sign bit */
171 32 - 1, /* in_d bits available - sign bit */
172 22 + 1, /* fp_f bits explicit + bits implied */
173 38 + 1, /* fp_x bits explicit + bits implied */
174 54 + 1 /* fp_e bits explicit + bits implied */
175 };
176
177 static const t_uint64 mantissa_mask [] = { /* the mask to the mantissa bits, indexed by FP_OPSIZE */
178 ((t_uint64) 1uL << 16) - 1 << 0, /* in_s 16-bit mantissa */
179 ((t_uint64) 1uL << 32) - 1 << 0, /* in_d 32-bit mantissa */
180 ((t_uint64) 1uL << 22) - 1 << 32, /* fp_f 22-bit mantissa */
181 ((t_uint64) 1uL << 38) - 1 << 16, /* fp_x 38-bit mantissa */
182 ((t_uint64) 1uL << 54) - 1 << 0 /* fp_e 54-bit mantissa */
183 };
184
185
186 static const t_uint64 half_lsb [] = { /* half of the LSB for rounding, indexed by FP_OPSIZE */
187 0, /* in_s not used */
188 (t_uint64) 1uL << 54, /* in_d LSB when shifted right by exponent value */
189 (t_uint64) 1uL << 32, /* fp_f word 2 LSB */
190 (t_uint64) 1uL << 16, /* fp_x word 3 LSB */
191 (t_uint64) 1uL << 0 /* fp_e word 4 LSB */
192 };
193
194
195 /* Floating-point local utility routine declarations */
196
197 static FPU unpack (FP_OPND packed);
198 static FP_OPND norm_round_pack (FPU unpacked);
199
200 static TRAP_CLASS add (FPU *sum, FPU augend, FPU addend);
201 static TRAP_CLASS subtract (FPU *difference, FPU minuend, FPU subtrahend);
202 static TRAP_CLASS multiply (FPU *product, FPU multiplicand, FPU multiplier);
203 static TRAP_CLASS divide (FPU *quotient, FPU dividend, FPU divisor);
204
205 static TRAP_CLASS ffloat (FPU *real, FPU integer);
206 static TRAP_CLASS fix (FPU *integer, FPU real, t_bool round);
207
208
209
210 /* Floating-point global routines */
211
212
213 /* Execute a floating-point operation.
214
215 The operator specified by the "operation" parameter is applied to the
216 "left_op" and to the "right_op" (if applicable), and the result is returned.
217 The "precision" fields of the operands must be set to the representations
218 stored within before calling this routine.
219
220 On entry, the left and right (if needed) operands are unpacked, and the
221 executor for the specified operation is called. The result is normalized,
222 rounded, and packed. Any trap condition detected by the operator routine is
223 set into the packed operand, unless the normalize/round/pack routine detected
224 its own trap condition. Finally, the packed result is returned.
225 */
226
fp_exec(FP_OPR operator,FP_OPND left_op,FP_OPND right_op)227 FP_OPND fp_exec (FP_OPR operator, FP_OPND left_op, FP_OPND right_op)
228 {
229 FPU left, right, result;
230 FP_OPND result_op;
231 TRAP_CLASS trap;
232
233 left = unpack (left_op); /* unpack the left-hand operand */
234
235 if (operator <= fp_div) /* if the operator requires two operands */
236 right = unpack (right_op); /* then unpack the right-hand operation */
237
238 switch (operator) { /* dispatch the floating-point operation */
239
240 case fp_add:
241 trap = add (&result, left, right); /* add the two operands */
242 break;
243
244 case fp_sub:
245 trap = subtract (&result, left, right); /* subtract the two operands */
246 break;
247
248 case fp_mpy:
249 trap = multiply (&result, left, right); /* multiply the two operands */
250 break;
251
252 case fp_div:
253 trap = divide (&result, left, right); /* divide the two operands */
254 break;
255
256 case fp_flt:
257 trap = ffloat (&result, left); /* convert the integer operand to a floating-point number */
258 break;
259
260 case fp_fixr:
261 trap = fix (&result, left, TRUE); /* round the floating-point operand to an integer */
262 break;
263
264 case fp_fixt:
265 trap = fix (&result, left, FALSE); /* truncate the floating-point operand to an integer */
266 break;
267
268 default:
269 result = zero; /* if an unimplemented operation is requested */
270 result.precision = left.precision; /* then return a zero of the appropriate precision */
271 trap = trap_Unimplemented; /* and trap for an Unimplemented Instruction */
272 break;
273 } /* all cases are handled */
274
275 result_op = norm_round_pack (result); /* normalize, round, and pack the result */
276
277 if (result_op.trap == trap_None) /* if the pack operation succeeded */
278 result_op.trap = trap; /* then set any arithmetic trap returned by the operation */
279
280 return result_op; /* return the result */
281 }
282
283
284
285 /* Floating-point local utility routine declarations */
286
287
288 /* Unpack a packed operand.
289
290 A packed integer or floating-point value is split into separate mantissa and
291 exponent variables. The multiple words of the mantissa are concatenated into
292 a single 64-bit unsigned value, and the exponent is shifted with recovery of
293 the sign. The mantissa is then shifted left one additional position to add a
294 guard bit that will be used for rounding.
295
296 The absolute values of single and double integers are unpacked into the
297 mantissas and preshifted by 32 or 16 bits, respectively, to reduce the
298 shifting needed for normalization. The resulting value is unnormalized, but
299 the exponent is set correctly to reflect the preshift. The precisions for
300 unpacked integers are set to single-precision but are valid for extended- and
301 double-precision, as the unpacked representations are identical.
302
303 The packed floating-point representation contains an implied "1" bit
304 preceding the binary point in the mantissa, except if the floating-point
305 value is zero. The unpacked mantissa includes the implied bit. The bias is
306 removed from the exponent, producing a signed value, and the sign of the
307 mantissa is set from the sign of the packed value.
308
309 A packed zero value is represented by all words set to zero. In the unpacked
310 representation, the mantissa is zero, the exponent is the minimum value
311 (-256), and the sign is always positive (as "negative zero" is undefined).
312
313
314 Implementation notes:
315
316 1. Integers could have been copied directly to the mantissa with the
317 exponents set to the appropriate values (54 in this case). However, the
318 current implementation unpacks integers only in preparation for repacking
319 as single-precision floating-point numbers i.e., to implement the "float"
320 operator. This would require a larger number of shifts to normalize the
321 values -- as many as 54 to normalize the value 1. Preshifting reduces
322 the number of normalizing shifts needed to between 6 and 22.
323 */
324
unpack(FP_OPND packed)325 static FPU unpack (FP_OPND packed)
326 {
327 FPU unpacked;
328 uint32 word;
329
330 switch (packed.precision) { /* dispatch based on the operand precision */
331
332 case in_s: /* unpack a single integer */
333 word = packed.words [0]; /* from the first word */
334
335 if (word & D16_SIGN) { /* if the value is negative */
336 word = NEG16 (word); /* then make it positive */
337 unpacked.negative = TRUE; /* and set the mantissa sign flag */
338 }
339
340 else /* otherwise the value is positive */
341 unpacked.negative = FALSE; /* so clear the sign flag */
342
343 unpacked.mantissa = (t_uint64) word << 32 + GUARD_BITS; /* store the preshifted value as the mantissa */
344 unpacked.exponent = UNPACKED_BITS - 32; /* and set the exponent to account for the shift */
345 unpacked.precision = fp_f; /* set the precision */
346 break;
347
348
349 case in_d: /* unpack a double integer */
350 word = TO_DWORD (packed.words [0], packed.words [1]); /* from the first two words */
351
352 if (word & D32_SIGN) { /* if the value is negative */
353 word = NEG32 (word); /* then make it positive */
354 unpacked.negative = TRUE; /* and set the mantissa sign flag */
355 }
356
357 else /* otherwise the value is positive */
358 unpacked.negative = FALSE; /* so clear the sign flag */
359
360 unpacked.mantissa = (t_uint64) word << 16 + GUARD_BITS; /* store the preshifted value as the mantissa */
361 unpacked.exponent = UNPACKED_BITS - 16; /* and set the exponent to account for the shift */
362 unpacked.precision = fp_f; /* set the precision */
363 break;
364
365
366 case fp_f: /* unpack a single-precision */
367 case fp_x: /* extended-precision */
368 case fp_e: /* or double-precision floating-point number */
369 unpacked.mantissa = MANTISSA (packed.words [0]); /* starting with the first word */
370
371 for (word = 1; word <= 3; word++) { /* unpack from one to three more words */
372 unpacked.mantissa <<= D16_WIDTH; /* shift the accumulated value */
373
374 if (word < TO_COUNT (packed.precision)) /* if all words are not included yet */
375 unpacked.mantissa |= packed.words [word]; /* then merge the next word into value */
376 }
377
378 unpacked.mantissa <<= GUARD_BITS; /* add the guard bits */
379
380 unpacked.exponent = /* store the exponent */
381 EXPONENT (packed.words [0]) - EXPONENT_BIAS; /* after removing the bias */
382
383 if (unpacked.exponent == MIN_EXPONENT /* if the biased exponent and mantissa are zero */
384 && unpacked.mantissa == 0) /* then the mantissa is positive */
385 unpacked.negative = FALSE; /* regardless of the packed sign */
386
387 else { /* otherwise the value is non-zero */
388 unpacked.mantissa |= IMPLIED_BIT; /* so add back the implied "1" bit */
389 unpacked.negative = ((packed.words [0] & D16_SIGN) != 0); /* and set the sign as directed */
390 }
391
392 unpacked.precision = packed.precision; /* set the precision */
393 break;
394 } /* all cases are handled */
395
396 return unpacked; /* return the unpacked value */
397 }
398
399
400 /* Normalize, round, and pack an unpacked value.
401
402 An unpacked value is normalized, rounded, and packed into the representation
403 indicated by the operand precision. If the supplied value cannot be
404 represented, the appropriate trap indication is returned.
405
406 A single- or double-integer is packed into the first word or two words of the
407 result as a twos-complement value. If the value is too large for the result
408 precision, an Integer Overflow trap is indicated, and a zero value is
409 returned.
410
411 For a real of any precision, the mantissa is first normalized by shifting
412 right if the carry bit is set, or by shifting left until the implied bit is
413 set. The exponent is adjusted for any shifts performed. The value is then
414 rounded by adding one-half of the least-significant bit; if that causes a
415 carry, the exponent is adjusted again. Finally, the mantissa is masked to
416 the number of bits corresponding to the desired precision and packed into the
417 in-memory representation. The exponent is checked, and it exceeds the
418 permitted range, the appropriate trap indication is returned.
419
420
421 Implementation notes:
422
423 1. If a carry occurs due to rounding, the mantissa is not shifted because
424 the carry bit will be masked off during packing. Incrementing the
425 exponent in this case is sufficient.
426
427 2. Masking the mantissa is required to remove the carry and implied bits
428 before packing. Masking the value bits in excess of the specified
429 precision is not required but is desirable to avoid implying more
430 precision than actually is present.
431
432 3. The result value +/-1 x 2 ** -256 is considered an underflow, as the
433 packed representation is identical to the zero representation, i.e., an
434 all-zeros value.
435 */
436
norm_round_pack(FPU unpacked)437 static FP_OPND norm_round_pack (FPU unpacked)
438 {
439 FP_OPND packed;
440 int32 integer;
441
442 packed.precision = unpacked.precision; /* set the precision */
443
444 if (unpacked.mantissa == 0) { /* if the mantissa is zero */
445 packed.words [0] = 0; /* then set */
446 packed.words [1] = 0; /* the packed */
447 packed.words [2] = 0; /* representation to */
448 packed.words [3] = 0; /* all zeros */
449
450 packed.trap = trap_None; /* report that packing succeeded */
451 }
452
453 else if (unpacked.precision <= in_d) /* if packing a single or double integer */
454 if (unpacked.exponent >= mantissa_bits [unpacked.precision]) { /* then if the value is too large to fit */
455 packed.words [0] = 0; /* then return */
456 packed.words [1] = 0; /* a zero value */
457 packed.trap = trap_Integer_Overflow; /* and an overflow trap */
458 }
459
460 else { /* otherwise */
461 integer = (int32) /* convert the value to an integer */
462 (unpacked.mantissa >> UNPACKED_BITS + GUARD_BITS - unpacked.exponent /* by shifting right to align */
463 & mantissa_mask [unpacked.precision]); /* and masking to the precision */
464
465 if (unpacked.negative) /* if the value is negative */
466 integer = - integer; /* then negate the result */
467
468 packed.words [0] = UPPER_WORD (integer); /* split the result */
469 packed.words [1] = LOWER_WORD (integer); /* into the first two words */
470
471 packed.trap = trap_None; /* report that packing succeeded */
472 }
473
474 else { /* otherwise a real number is to be packed */
475 if (unpacked.mantissa & CARRY_BIT) { /* if a carry out of the MSB has occurred */
476 unpacked.mantissa >>= 1; /* then shift the mantissa to normalize */
477 unpacked.exponent += 1; /* and increment the exponent to compensate */
478 }
479
480 else /* otherwise */
481 while (DENORMALIZED (unpacked.mantissa)) { /* while the mantissa is not in normal form */
482 unpacked.mantissa <<= 1; /* shift the mantissa toward the implied-bit position */
483 unpacked.exponent -= 1; /* and decrement the exponent to compensate */
484 }
485
486 unpacked.mantissa += half_lsb [unpacked.precision]; /* round the mantissa by adding one-half of the LSB */
487
488 if (unpacked.mantissa & CARRY_BIT) /* if rounding caused a carry out of the MSB */
489 unpacked.exponent = unpacked.exponent + 1; /* then increment the exponent to compensate */
490
491 unpacked.mantissa >>= GUARD_BITS; /* remove the guard bits */
492
493 unpacked.mantissa &= mantissa_mask [unpacked.precision]; /* mask the mantissa to the specified precision */
494
495 packed.words [0] = HIGH_UPPER_WORD (unpacked.mantissa) /* pack the first word of the mantissa */
496 | TO_EXPONENT (unpacked.exponent) /* with the exponent */
497 | (unpacked.negative ? D16_SIGN : 0); /* and the sign bit */
498
499 packed.words [1] = LOW_UPPER_WORD (unpacked.mantissa); /* pack the second */
500 packed.words [2] = UPPER_WORD (unpacked.mantissa); /* and third */
501 packed.words [3] = LOWER_WORD (unpacked.mantissa); /* and fourth words */
502
503 if (unpacked.exponent < MIN_EXPONENT /* if the exponent is too small */
504 || unpacked.exponent == MIN_EXPONENT && unpacked.mantissa == 0) /* or the result would be all zeros */
505 if (packed.precision == fp_f) /* then if this is a standard operand */
506 packed.trap = trap_Float_Underflow; /* then report a standard underflow trap */
507 else /* otherwise */
508 packed.trap = trap_Ext_Float_Underflow; /* report an extended underflow trap */
509
510 else if (unpacked.exponent > MAX_EXPONENT) /* otherwise if the exponent is too large */
511 if (packed.precision == fp_f) /* then if this is a standard operand */
512 packed.trap = trap_Float_Overflow; /* then report a standard overflow trap */
513 else /* otherwise */
514 packed.trap = trap_Ext_Float_Overflow; /* report an extended overflow trap */
515
516 else /* otherwise */
517 packed.trap = trap_None; /* report that packing succeeded */
518 }
519
520 return packed; /* return the packed value */
521 }
522
523
524 /* Add two unpacked numbers.
525
526 The sum of the two operands is returned. If one operand is zero and the
527 other is not, the non-zero operand is returned. If both operand are zero, a
528 "defined zero" is returned in case one or both operands are "negative zeros."
529
530 Otherwise, the difference between the operand exponents is determined. If
531 the magnitude of the difference between the exponents is greater than the
532 number of significant bits, then the smaller number has been scaled to zero
533 (swamped), and so the sum is simply the larger operand. However, if the sum
534 will be significant, the smaller mantissa is shifted to align with the larger
535 mantissa, and the larger exponent is used (as, after the scaling shift, the
536 smaller value has the same exponent). Finally, if the operand signs are the
537 same, the result is the sum of the mantissas. If the signs are different,
538 then the sum is the smaller value subtracted from the larger value, and the
539 result adopts the sign of the larger value.
540
541
542 Implementation notes:
543
544 1. If the addend is zero, the microcode converts the undefined value
545 "negative zero" to the defined positive zero if it is passed as the
546 augend. This also applies to the subtraction operator, which passes a
547 negative zero if the subtrahend is zero.
548 */
549
add(FPU * sum,FPU augend,FPU addend)550 static TRAP_CLASS add (FPU *sum, FPU augend, FPU addend)
551 {
552 int32 magnitude;
553
554 if (addend.mantissa == 0) /* if the addend is zero */
555 if (augend.mantissa == 0) { /* then if the augend is also zero */
556 *sum = zero; /* then the sum is (positive) zero */
557 sum->precision = augend.precision; /* with the appropriate precision */
558 }
559
560 else /* otherwise the augend is non-zero */
561 *sum = augend; /* so the sum is just the augend */
562
563 else if (augend.mantissa == 0) /* otherwise if the augend is zero */
564 *sum = addend; /* then the sum is just the addend */
565
566 else { /* otherwise both operands are non-zero */
567 magnitude = augend.exponent - addend.exponent; /* so determine the magnitude of the difference */
568
569 if (abs (magnitude) > mantissa_bits [augend.precision]) /* if one of the operands is swamped */
570 if (magnitude > 0) /* then if the addend is smaller */
571 *sum = augend; /* then return the augend */
572 else /* otherwise */
573 *sum = addend; /* return the addend */
574
575 else { /* otherwise the addition is required */
576 sum->precision = addend.precision; /* so set the precision to that of the operands */
577
578 if (magnitude > 0) { /* if the addend is smaller */
579 addend.mantissa >>= magnitude; /* then shift right to align the addend */
580 sum->exponent = augend.exponent; /* and use the augend's exponent */
581 }
582
583 else { /* otherwise the augend is smaller or the same */
584 augend.mantissa >>= - magnitude; /* shift right to align the augend */
585 sum->exponent = addend.exponent; /* and use the addend's exponent */
586 }
587
588 if (addend.negative == augend.negative) { /* if the mantissa signs are the same */
589 sum->mantissa = addend.mantissa + augend.mantissa; /* then add the mantissas */
590 sum->negative = addend.negative; /* and use the addend sign for the sum */
591 }
592
593 else if (addend.mantissa > augend.mantissa) { /* otherwise if the addend is larger */
594 sum->mantissa = addend.mantissa - augend.mantissa; /* then subtract the augend */
595 sum->negative = addend.negative; /* and use the addend sign */
596 }
597
598 else { /* otherwise the augend is larger */
599 sum->mantissa = augend.mantissa - addend.mantissa; /* so subtract the addend */
600 sum->negative = augend.negative; /* and use the augend sign */
601 }
602 }
603 }
604
605 return trap_None; /* report that the addition succeeded */
606 }
607
608
609 /* Subtract two unpacked numbers.
610
611 The difference of the two operands is returned. Subtraction is implemented
612 by negating the subtrahend and then adding the minuend.
613
614
615 Implementation notes:
616
617 1. If the subtrahend is zero, negating produces the undefined "negative
618 zero." However, the "add" routine handles this as positive zero, so we
619 do not need to worry about this condition.
620 */
621
subtract(FPU * difference,FPU minuend,FPU subtrahend)622 static TRAP_CLASS subtract (FPU *difference, FPU minuend, FPU subtrahend)
623 {
624 subtrahend.negative = ! subtrahend.negative; /* invert the sign of the subtrahend */
625
626 add (difference, minuend, subtrahend); /* add to obtain the difference */
627
628 return trap_None; /* report that the subtraction succeeded */
629 }
630
631
632 /* Multiply two unpacked numbers.
633
634 The product of the two operands is returned. Conceptually, the
635 implementation requires a 64 x 64 = 128-bit multiply, rounded to the upper 64
636 bits. Instead of implementing the FMPY or EMPY firmware algorithm directly,
637 which uses 16 x 16 = 32-bit partial-product multiplies, it is more efficient
638 under simulation to use 32 x 32 = 64-bit multiplications by splitting the
639 operand mantissas ("a" and "b") into 32-bit high and low ("h" and "l") parts
640 and forming the cross-products:
641
642 64-bit operands
643 ah al
644 +-------+-------+
645 bh bl
646 +-------+-------+
647 _________________
648
649 al * bl [ll]
650 +-------+-------+
651 ah * bl [hl]
652 +-------+-------+
653 al * bh [lh]
654 +-------+-------+
655 ah * bh [hh]
656 +-------+-------+
657 _________________________________
658
659 64-bit product
660 +-------+-------+
661
662 If either operand is zero, a "defined zero" is returned in case one or both
663 operands are "negative zeros." Otherwise, the product exponent is set to the
664 sum of the operand exponents, and the four 64-bit cross-products are formed.
665 The lower 64 bits of the products are summed to form the carry into the upper
666 64 bits, which are summed to produce the product. The product mantissa is
667 aligned, and the product sign is set negative if the operand signs differ.
668
669 Mantissas are represented internally as fixed-point numbers with 54 data bits
670 plus one guard bit to the right of the binary point. That is, the real
671 number represented is the integer mantissa value * (2 ** -55), where the
672 right-hand term represents the delta for a change of one bit. Multiplication
673 is therefore:
674
675 (product * delta) = (multiplicand * delta) * (multiplier * delta)
676
677 The product is:
678
679 product = (multiplicand * multiplier) * (delta * delta) / delta
680
681 ...which reduces to:
682
683 product = multiplicand * multiplier * delta
684
685 Multiplying the product by (2 ** -55) is equivalent to right-shifting by 55.
686 However, using only the top 64 bits of the 128-bit product is equivalent to
687 right-shifting by 64, so the net correction is a left-shift by 9.
688
689
690 Implementation notes:
691
692 1. 32 x 32 = 64-bit multiplies use intrinsic instructions on the IA-32
693 processor family.
694 */
695
multiply(FPU * product,FPU multiplicand,FPU multiplier)696 static TRAP_CLASS multiply (FPU *product, FPU multiplicand, FPU multiplier)
697 {
698 const uint32 delta_shift = D64_WIDTH - (UNPACKED_BITS + GUARD_BITS);
699 const uint32 carry_shift = D32_WIDTH - delta_shift;
700 uint32 ah, al, bh, bl;
701 t_uint64 hh, hl, lh, ll, carry;
702
703 if (multiplicand.mantissa == 0 || multiplier.mantissa == 0) { /* if either operand is zero */
704 *product = zero; /* then the product is (positive) zero */
705 product->precision = multiplicand.precision; /* with the appropriate precision */
706 }
707
708 else { /* otherwise both operands are non-zero */
709 product->precision = multiplicand.precision; /* so set the precision to that of the operands */
710
711 product->exponent = multiplicand.exponent /* the product exponent */
712 + multiplier.exponent; /* is the sum of the operand exponents */
713
714 ah = UPPER_DWORD (multiplicand.mantissa); /* split the multiplicand */
715 al = LOWER_DWORD (multiplicand.mantissa); /* into high and low double-words */
716
717 bh = UPPER_DWORD (multiplier.mantissa); /* split the multiplier */
718 bl = LOWER_DWORD (multiplier.mantissa); /* into high and low double-words */
719
720 hh = (t_uint64) ah * (t_uint64) bh; /* form the */
721 hl = (t_uint64) ah * (t_uint64) bl; /* four cross products */
722 lh = (t_uint64) al * (t_uint64) bh; /* using 32 x 32 = 64-bit multiplies */
723 ll = (t_uint64) al * (t_uint64) bl; /* for efficiency */
724
725 carry = ((t_uint64) UPPER_DWORD (ll) /* do a 64-bit add of the upper half of "ll" */
726 + LOWER_DWORD (hl) /* to the lower halves of "hl" */
727 + LOWER_DWORD (lh)) >> carry_shift; /* and "lh" and shift to leave just the carry bits */
728
729 product->mantissa = hh + UPPER_DWORD (hl) /* add "hh" to the upper halves of "hl" and "lh" */
730 + UPPER_DWORD (lh);
731
732 product->mantissa <<= delta_shift; /* align the result to the binary point */
733 product->mantissa += carry; /* and add the carry from the discarded bits */
734
735 product->negative = /* set the product sign negative */
736 (multiplicand.negative != multiplier.negative); /* if the operand signs differ */
737 }
738
739 return trap_None; /* report that the multiplication succeeded */
740 }
741
742
743 /* Divide two unpacked numbers.
744
745 The quotient of the two operands is returned, and the remainder is discarded.
746 Conceptually, the implementation requires a 128 / 64 = 64-bit division, with
747 64 bits of zeros appended to the dividend to get the required precision.
748 However, instead of implementing the FDIV or EDIV firmware algorithm
749 directly, which uses 32 / 16 = 16-bit trial divisions, it is more efficient
750 under simulation to use 64 / 32 = 64-bit divisions with the classic
751 divide-and-correct method.
752
753 This method considers the 64-bit dividend and divisor each to consist of two
754 32-bit "digits." The 64-bit dividend "ah,al" is divided by the first 32-bit
755 digit "bh" of the 64-bit divisor "bh,bl", yielding a 64-bit trial quotient
756 and a 64-bit remainder. A correction is developed by multiplying the second
757 32-bit digit "bl" of the divisor and the trial quotient. If the remainder is
758 smaller than the correction, the trial quotient is too large, so it is
759 decremented, and the (full 64-bit) divisor is added to the remainder. The
760 correction is then subtracted from the remainder, and the process is repeated
761 using the corrected remainder as the dividend to develop the second 64-bit
762 trial quotient and second quotient digit. The first quotient digit is
763 positioned, and the two quotient digits are then added to produce the final
764 64-bit quotient. The quotient sign is set negative if the operand signs
765 differ.
766
767 Mantissas are represented internally as fixed-point numbers with 54 data bits
768 plus one guard bit to the right of the binary point. That is, the real
769 number represented is the integer mantissa value * (2 ** -55), where the
770 right-hand term represents the delta for a change of one bit. Division is
771 therefore:
772
773 (quotient * delta) = (dividend * delta) / (divisor * delta)
774
775 The quotient is:
776
777 quotient = (dividend / divisor) * (delta / (delta * delta))
778
779 ...which reduces to:
780
781 quotient = (dividend / divisor) / delta
782
783 Dividing the quotient by (2 ** -55) is equivalent to left-shifting by 55.
784 However, using only the top 64 bits of the 128-bit product is equivalent to
785 right-shifting by 64, so the net correction is a right-shift by 9.
786
787 Care must be taken to ensure that the correction product does not overflow.
788 In hardware, this is detected by examining the ALU carry bit after the
789 correction multiplication. In simulation, there is no portable way of
790 determining if the 64 x 32 multiply overflowed. However, we can take
791 advantage of the fact that the upper eight bits of the mantissa are always
792 zero (54 data bits plus one guard bit plus one implied bit = 56 significant
793 bits). By shifting the divisor left by eight bits, we ensure that the
794 quotient will be eight bits shorter, and so guarantee that the correction
795 product will not overflow. Moreover, because the divisor is now at least
796 one-half of the maximum value (because of the implied bit), it ensures that
797 the trial quotient will either be correct or off by one, so at most a single
798 correction will be needed.
799
800 The final quotient would require a left-shift of 8 to account for the
801 renormalized divisor, but from the above calculations, we also need a right
802 shift of 9 to align it. The net result needs only a right shift of one to
803 correct, which we handle by decrementing the exponent in lieu of additional
804 shifting.
805
806 See "Divide-and-Correct Methods for Multiple Precision Division" by Marvin L.
807 Stein, Communications of the ACM, August 1964 and "Multiple-Length Division
808 Revisited: a Tour of the Minefield" by Per Brinch Hansen, Software Practice
809 and Experience, June 1994 for background.
810
811
812 Implementation notes:
813
814 1. IA-32 processors do not have a 64 / 32 = 64-bit divide instruction (they
815 have a 64 / 32 = 32 instruction instead). Therefore, a run-time library
816 routine for 64 / 64 = 64 is generated. Consequently, "bh" and "bl" are
817 declared as 64-bit variables, as this produces simpler code than if they
818 were declared as 32-bit variables.
819
820 2. "bh" is guaranteed to be non-zero because the divisor mantissa is
821 normalized on entry. Therefore, no division-by-zero check is needed.
822 */
823
divide(FPU * quotient,FPU dividend,FPU divisor)824 static TRAP_CLASS divide (FPU *quotient, FPU dividend, FPU divisor)
825 {
826 const uint32 renorm_shift = D64_WIDTH - (UNPACKED_BITS + GUARD_BITS + 1);
827 t_uint64 bh, bl, q1, q2, r1, r2, c1, c2;
828
829 if (divisor.mantissa == 0) { /* if the divisor is zero */
830 *quotient = dividend; /* then return the dividend */
831
832 if (dividend.precision == fp_f) /* if this is a standard operand */
833 return trap_Float_Zero_Divide; /* then report a standard zero-divide trap */
834 else /* otherwise */
835 return trap_Ext_Float_Zero_Divide; /* report an extended zero-divide trap */
836 }
837
838 else if (dividend.mantissa == 0) { /* otherwise if the dividend is zero */
839 *quotient = zero; /* then the quotient is (positive) zero */
840 quotient->precision = dividend.precision; /* with the appropriate precision */
841 }
842
843 else { /* otherwise both operands are non-zero */
844 quotient->precision = dividend.precision; /* so set the precision to that of the operands */
845
846 quotient->exponent = dividend.exponent /* the quotient exponent is the difference of the */
847 - divisor.exponent - 1; /* operand exponents (with alignment correction) */
848
849 divisor.mantissa <<= renorm_shift; /* renormalize the divisor */
850
851 bh = (t_uint64) UPPER_DWORD (divisor.mantissa); /* split the divisor */
852 bl = (t_uint64) LOWER_DWORD (divisor.mantissa); /* into high and low halves */
853
854 q1 = dividend.mantissa / bh; /* form the first trial quotient */
855 r1 = dividend.mantissa % bh; /* and remainder */
856
857 c1 = bl * q1; /* form the first remainder correction */
858 r1 = r1 << D32_WIDTH; /* and scale the remainder to match */
859
860 if (r1 < c1) { /* if a correction is required */
861 q1 = q1 - 1; /* then the first trial quotient is too large */
862 r1 = r1 + divisor.mantissa; /* so reduce it and increase the remainder */
863 }
864
865 r1 = r1 - c1; /* correct the first remainder */
866
867 q2 = r1 / bh; /* form the second trial quotient */
868 r2 = r1 % bh; /* and remainder */
869
870 c2 = bl * q2; /* form the second remainder correction */
871 r2 = r2 << D32_WIDTH; /* and scale the remainder to match */
872
873 if (r2 < c2) { /* if a correction is required */
874 q2 = q2 - 1; /* then the second trial quotient is too large */
875 r2 = r2 + divisor.mantissa; /* so reduce it and increase the remainder */
876 }
877
878 quotient->mantissa = (q1 << D32_WIDTH) + q2; /* position and sum the quotient digits */
879
880 quotient->negative = /* set the quotient sign negative */
881 (dividend.negative != divisor.negative); /* if the operand signs differ */
882 }
883
884 return trap_None; /* report that the division succeeded */
885 }
886
887
888 /* Float an integer to a floating-point value.
889
890 The integer operand is converted to a floating-point value and returned. The
891 desired precision of the result must be set before calling.
892
893 Conversion is simply a matter of copying the integer value. When the
894 unpacked value is normalized, it will be converted to floating-point format.
895
896
897 Implementation notes:
898
899 1. The incoming integer has already been unpacked into fp_f format, so we do
900 not need to set the precision here.
901 */
902
ffloat(FPU * real,FPU integer)903 static TRAP_CLASS ffloat (FPU *real, FPU integer)
904 {
905 *real = integer; /* copy the unpacked value */
906 return trap_None; /* report that the float succeeded */
907 }
908
909
910 /* Fix an unpacked floating-point value to an integer.
911
912 A floating-point value is converted to a double-word integer. If the "round"
913 parameter is true, the value is rounded before conversion; otherwise, it is
914 truncated.
915
916 If the real value is less than 0.5, then the integer value is zero.
917 Otherwise, if rounding is requested, add 0.5 (created by shifting a "1" into
918 the position immediately to the right of the least significant bit of the
919 integer result) to the value. Finally, the result precision is set. The
920 remaining conversion occurs when the result is packed.
921
922
923 Implementation notes:
924
925 1. The FIXR/FIXT microcode gives an Integer Overflow for exponent > 30, even
926 though -2 ** 31 (143700 000000) does fit in the result.
927 */
928
fix(FPU * integer,FPU real,t_bool round)929 static TRAP_CLASS fix (FPU *integer, FPU real, t_bool round)
930 {
931 if (real.exponent < -1) /* if the real value is < 0.5 */
932 integer->mantissa = 0; /* then the integer value is 0 */
933
934 else { /* otherwise the value is convertible */
935 integer->mantissa = real.mantissa; /* so set the mantissa */
936
937 if (round && real.exponent < UNPACKED_BITS) /* if rounding is requested and won't overflow */
938 integer->mantissa += half_lsb [in_d] >> real.exponent; /* then add one-half of the LSB to the value */
939 }
940
941 integer->exponent = real.exponent; /* copy the exponent */
942 integer->negative = real.negative; /* and sign */
943 integer->precision = in_d; /* and set to pack to a double integer */
944
945 return trap_None; /* report that the fix succeeded */
946 }
947