1 /* hp3000_cpu_fp.c: HP 3000 floating-point arithmetic simulator
2
3 Copyright (c) 2016, J. David Bryan
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22 Except as contained in this notice, the name of the author shall not be used
23 in advertising or otherwise to promote the sale, use or other dealings in
24 this Software without prior written authorization from the author.
25
26 11-Jun-16 JDB Bit mask constants are now unsigned
27 03-Feb-16 JDB First release version
28 25-Aug-15 JDB Fixed FSUB zero subtrahend bug (from Norwin Malmberg)
29 01-Apr-15 JDB Passes the floating point tests in the CPU diagnostic (D420A1)
30 29-Mar-15 JDB Created
31
32 References:
33 - HP 3000 Series II System Microprogram Listing
34 (30000-90023, August 1976)
35 - HP 3000 Series II/III System Reference Manual
36 (30000-90020, July 1978)
37 - Machine Instruction Set Reference Manual
38 (30000-90022, June 1984)
39
40
41 This module implements multiple-precision floating-point operations to
42 support the HP 3000 CPU instruction set. It employs 64-bit integer
43 arithmetic for speed and simplicity of implementation. The host compiler
44 must support 64-bit integers.
45
46 HP 3000 computers use a proprietary floating-point format. All 3000s
47 support two-word "single-precision" floating-point arithmetic as standard
48 equipment. The original HP 3000 CX and Series I CPUs support three-word
49 "extended-precision" floating-point arithmetic when the optional HP 30011A
50 Extended Instruction Set microcode was installed. The Series II and later
51 machines replace the three-word instructions with four-word "double-
52 precision" floating-point arithmetic and include the EIS as part of the
53 standard equipment.
54
55 Floating-point numbers have this format:
56
57 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
58 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
59 | S | exponent biased by +256 | positive mantissa |
60 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
61 | positive mantissa |
62 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
63 | positive mantissa | (extended)
64 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
65 | positive mantissa | (double)
66 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
67
68 That is, the three- and four-word formats merely extended the mantissa with
69 no change to the exponent range.
70
71 The mantissa is represented in sign-magnitude format. The mantissa is always
72 positive, with an assumed "1" to the left of the MSB, and the sign bit is set
73 for negative values. The exponent is in "excess-256" format, i.e.,
74 represented as an unsigned value biased by +256, giving an unbiased range of
75 -256 to +255. The binary point is assumed to be between the leading "1" and
76 the MSB, so a zero value must be handled as a special case of all bits equal
77 to zero, which otherwise would represent the value +1.0 * 2 ** -256.
78 Normalization shifts the mantissa left and decrements the exponent until a
79 "1" bit appears in bit 9.
80
81 The use of sign-magnitude format means that floating-point negation merely
82 complements the sign bit, and floating-point comparison simply checks the
83 signs and, if they are the same, then applies an integer comparison to the
84 packed values. However, it also implies the existence of a "negative zero"
85 value, represented by all zeros except for the sign bit. This value is
86 undefined; if a negative zero is supplied as an operand to one of the
87 arithmetic routines, it is treated as positive zero. Negative zero is never
88 returned even if, e.g., it is supplied as the dividend or multiplier.
89
90 This implementation provides add, subtract, multiply, divide, float, and fix
91 operations on two-, three-, and four-word floating point operands. The
92 routines are called via a common floating-point executor ("fp_exec") by
93 supplying the operation to be performed and the operand(s) on which to act.
94 An operand contains the packed (i.e., in-memory) representation and the
95 precision of the value. The returned value includes the packed
96 representation and the precision, along with a value that indicates whether
97 or not the operation resulted in an arithmetic trap. It is the
98 responsibility of the caller to take the trap if it is indicated.
99 */
100
101
102
103 #include "hp3000_defs.h"
104 #include "hp3000_cpu.h"
105 #include "hp3000_cpu_fp.h"
106
107
108
109 /* Program constants */
110
111 #define EXPONENT_BIAS 256 /* the exponent is biased by +256 */
112
113 #define MIN_EXPONENT -256 /* the smallest representable exponent */
114 #define MAX_EXPONENT +255 /* the largest representable exponent */
115
116 #define EXPONENT_MASK 0077700u /* the mask to isolate the exponent in the first word */
117 #define MANTISSA_MASK 0000077u /* the mask to isolate the mantissa in the first word */
118
119 #define EXPONENT_SHIFT 6 /* the exponent alignment shift */
120 #define MANTISSA_SHIFT 0 /* the mantissa alignment shift */
121
122 #define UNPACKED_BITS 54 /* the number of significant bits in the unpacked mantissa */
123
124 #define IMPLIED_BIT ((t_uint64) 1uL << UNPACKED_BITS) /* the implied MSB in the mantissa */
125 #define CARRY_BIT ((t_uint64) 1uL << UNPACKED_BITS + 1) /* the carry from the MSB in the mantissa */
126
127 #define DELTA_ALIGNMENT (D64_WIDTH - UNPACKED_BITS) /* net shift to align the binary point */
128
129
130 /* Floating-point accessors */
131
132 #define MANTISSA(w) ((t_uint64) (((w) & MANTISSA_MASK) >> MANTISSA_SHIFT))
133 #define EXPONENT(w) ((int32) (((w) & EXPONENT_MASK) >> EXPONENT_SHIFT))
134
135 #define TO_EXPONENT(w) ((w) + EXPONENT_BIAS << EXPONENT_SHIFT & EXPONENT_MASK)
136
137 #define DENORMALIZED(m) (((m) & IMPLIED_BIT) == 0)
138
139
140 /* Floating-point unpacked representation */
141
142 typedef struct {
143 t_uint64 mantissa; /* the unsigned mantissa */
144 int32 exponent; /* the unbiased exponent */
145 t_bool negative; /* TRUE if the mantissa is negative */
146 FP_OPSIZE precision; /* the precision currently expressed by the value */
147 } FPU;
148
149 static const FPU zero = { 0, 0, FALSE, fp_f }; /* an unpacked zero value */
150
151
152 /* Floating-point descriptors */
153
154 static const int32 mantissa_bits [] = { /* the number of mantissa bits, indexed by FP_OPSIZE */
155 16 - 1, /* in_s bits available - sign bit */
156 32 - 1, /* in_d bits available - sign bit */
157 22 + 1, /* fp_f bits explicit + bits implied */
158 38 + 1, /* fp_x bits explicit + bits implied */
159 54 + 1 /* fp_e bits explicit + bits implied */
160 };
161
162 static const t_uint64 mantissa_mask [] = { /* the mask to the mantissa bits, indexed by FP_OPSIZE */
163 ((t_uint64) 1 << 16) - 1 << 0, /* in_s 16-bit mantissa */
164 ((t_uint64) 1 << 32) - 1 << 0, /* in_d 32-bit mantissa */
165 ((t_uint64) 1 << 22) - 1 << 32, /* fp_f 22-bit mantissa */
166 ((t_uint64) 1 << 38) - 1 << 16, /* fp_x 38-bit mantissa */
167 ((t_uint64) 1 << 54) - 1 << 0 /* fp_e 54-bit mantissa */
168 };
169
170
171 static const t_uint64 half_lsb [] = { /* half of the LSB for rounding, indexed by FP_OPSIZE */
172 0, /* in_s not used */
173 0, /* in_d not used */
174 (t_uint64) 1 << 31, /* fp_f word 2 LSB */
175 (t_uint64) 1 << 15, /* fp_x word 3 LSB */
176 (t_uint64) 1 << 0 /* fp_e word 4 LSB */
177 };
178
179
180 /* Floating-point local utility routine declarations */
181
182 static FPU unpack (FP_OPND packed);
183 static FP_OPND norm_round_pack (FPU unpacked);
184
185 static TRAP_CLASS add (FPU *sum, FPU augend, FPU addend);
186 static TRAP_CLASS subtract (FPU *difference, FPU minuend, FPU subtrahend);
187 static TRAP_CLASS multiply (FPU *product, FPU multiplicand, FPU multiplier);
188 static TRAP_CLASS divide (FPU *quotient, FPU dividend, FPU divisor);
189
190 static TRAP_CLASS ffloat (FPU *real, FPU integer);
191 static TRAP_CLASS fix (FPU *integer, FPU real, t_bool round);
192
193
194
195 /* Floating-point global routines */
196
197
198 /* Execute a floating-point operation.
199
200 The operator specified by the "operation" parameter is applied to the
201 "left_op" and to the "right_op" (if applicable), and the result is returned.
202 The "precision" fields of the operands must be set to the representations
203 stored within before calling this routine.
204
205 On entry, the left and right (if needed) operands are unpacked, and the
206 executor for the specified operation is called. The result is normalized,
207 rounded, and packed. Any trap condition detected by the operator routine is
208 set into the packed operand, unless the normalize/round/pack routine detected
209 its own trap condition. Finally, the packed result is returned.
210 */
211
fp_exec(FP_OPR operator,FP_OPND left_op,FP_OPND right_op)212 FP_OPND fp_exec (FP_OPR operator, FP_OPND left_op, FP_OPND right_op)
213 {
214 FPU left, right, result;
215 FP_OPND result_op;
216 TRAP_CLASS trap;
217
218 left = unpack (left_op); /* unpack the left-hand operand */
219
220 if (operator <= fp_div) /* if the operator requires two operands */
221 right = unpack (right_op); /* then unpack the right-hand operation */
222
223 switch (operator) { /* dispatch the floating-point operation */
224
225 case fp_add:
226 trap = add (&result, left, right); /* add the two operands */
227 break;
228
229 case fp_sub:
230 trap = subtract (&result, left, right); /* subtract the two operands */
231 break;
232
233 case fp_mpy:
234 trap = multiply (&result, left, right); /* multiply the two operands */
235 break;
236
237 case fp_div:
238 trap = divide (&result, left, right); /* divide the two operands */
239 break;
240
241 case fp_flt:
242 trap = ffloat (&result, left); /* convert the integer operand to a floating-point number */
243 break;
244
245 case fp_fixr:
246 trap = fix (&result, left, TRUE); /* round the floating-point operand to an integer */
247 break;
248
249 case fp_fixt:
250 trap = fix (&result, left, FALSE); /* truncate the floating-point operand to an integer */
251 break;
252
253 default:
254 result = zero; /* if an unimplemented operation is requested */
255 result.precision = left.precision; /* then return a zero of the appropriate precision */
256 trap = trap_Unimplemented; /* and trap for an Unimplemented Instruction */
257 break;
258 } /* all cases are handled */
259
260 result_op = norm_round_pack (result); /* normalize, round, and pack the result */
261
262 if (result_op.trap == trap_None) /* if the pack operation succeeded */
263 result_op.trap = trap; /* then set any arithmetic trap returned by the operation */
264
265 return result_op; /* return the result */
266 }
267
268
269
270 /* Floating-point local utility routine declarations */
271
272
273 /* Unpack a packed operand.
274
275 A packed integer or floating-point value is split into separate mantissa and
276 exponent variables. The multiple words of the mantissa are concatenated into
277 a single 64-bit unsigned value, and the exponent is shifted with recovery of
278 the sign.
279
280 The absolute values of single and double integers are unpacked into the
281 mantissas and preshifted by 32 or 16 bits, respectively, to reduce the
282 shifting needed for normalization. The resulting value is unnormalized, but
283 the exponent is set correctly to reflect the preshift. The precisions for
284 unpacked integers are set to single-precision but are valid for extended- and
285 double-precision, as the unpacked representations are identical.
286
287 The packed floating-point representation contains an implied "1" bit
288 preceding the binary point in the mantissa, except if the floating-point
289 value is zero. The unpacked mantissa includes the implied bit. The bias is
290 removed from the exponent, producing a signed value, and the sign of the
291 mantissa is set from the sign of the packed value.
292
293 A packed zero value is represented by all words set to zero. In the unpacked
294 representation, the mantissa is zero, the exponent is the minimum value
295 (-256), and the sign is always positive (as "negative zero" is undefined).
296
297
298 Implementation notes:
299
300 1. Integers could have been copied directly to the mantissa with the
301 exponents set to the appropriate values (54 in this case). However, the
302 current implementation unpacks integers only in preparation for repacking
303 as floating-point numbers i.e., to implement the "float" operator. This
304 would require a larger number of shifts to normalize the values -- as
305 many as 54 to normalize the value 1. Preshifting reduces the number of
306 normalizing shifts needed to between 6 and 22.
307 */
308
unpack(FP_OPND packed)309 static FPU unpack (FP_OPND packed)
310 {
311 FPU unpacked;
312 uint32 word;
313
314 switch (packed.precision) { /* dispatch based on the operand precision */
315
316 case in_s: /* unpack a single integer */
317 word = packed.words [0]; /* from the first word */
318
319 if (word & D16_SIGN) { /* if the value is negative */
320 word = NEG16 (word); /* then make it positive */
321 unpacked.negative = TRUE; /* and set the mantissa sign flag */
322 }
323
324 else /* otherwise the value is positive */
325 unpacked.negative = FALSE; /* so clear the sign flag */
326
327 unpacked.mantissa = (t_uint64) word << 32; /* store the preshifted value as the mantissa */
328 unpacked.exponent = UNPACKED_BITS - 32; /* and set the exponent to account for the shift */
329 unpacked.precision = fp_f; /* set the precision */
330 break;
331
332
333 case in_d: /* unpack a double integer */
334 word = TO_DWORD (packed.words [0], packed.words [1]); /* from the first two words */
335
336 if (word & D32_SIGN) { /* if the value is negative */
337 word = NEG32 (word); /* then make it positive */
338 unpacked.negative = TRUE; /* and set the mantissa sign flag */
339 }
340
341 else /* otherwise the value is positive */
342 unpacked.negative = FALSE; /* so clear the sign flag */
343
344 unpacked.mantissa = (t_uint64) word << 16; /* store the preshifted value as the mantissa */
345 unpacked.exponent = UNPACKED_BITS - 16; /* and set the exponent to account for the shift */
346 unpacked.precision = fp_f; /* set the precision */
347 break;
348
349
350 case fp_f: /* unpack a single-precision */
351 case fp_x: /* extended-precision */
352 case fp_e: /* or double-precision floating-point number */
353 unpacked.mantissa = MANTISSA (packed.words [0]); /* starting with the first word */
354
355 for (word = 1; word <= 3; word++) { /* unpack from one to three more words */
356 unpacked.mantissa <<= 16; /* shift the accumulated value */
357
358 if (word < TO_COUNT (packed.precision)) /* if all words are not included yet */
359 unpacked.mantissa |= packed.words [word]; /* then merge the next word into value */
360 }
361
362 unpacked.exponent = /* store the exponent */
363 EXPONENT (packed.words [0]) - EXPONENT_BIAS; /* after removing the bias */
364
365 if (unpacked.exponent == MIN_EXPONENT /* if the biased exponent and mantissa are zero */
366 && unpacked.mantissa == 0) /* then the mantissa is positive */
367 unpacked.negative = FALSE; /* regardless of the packed sign */
368
369 else { /* otherwise the value is non-zero */
370 unpacked.mantissa |= IMPLIED_BIT; /* so add back the implied "1" bit */
371 unpacked.negative = ((packed.words [0] & D16_SIGN) != 0); /* and set the sign as directed */
372 }
373
374 unpacked.precision = packed.precision; /* set the precision */
375 break;
376 } /* all cases are handled */
377
378 return unpacked; /* return the unpacked value */
379 }
380
381
382 /* Normalize, round, and pack an unpacked value.
383
384 An unpacked value is normalized, rounded, and packed into the representation
385 indicated by the operand precision. If the supplied value cannot be
386 represented, the appropriate trap indication is returned.
387
388 A single- or double-integer is packed into the first word or two words of the
389 result as a twos-complement value. If the value is too large for the result
390 precision, an Integer Overflow trap is indicated, and a zero value is
391 returned.
392
393 For a real of any precision, the mantissa is first normalized by shifting
394 right if the carry bit is set, or by shifting left until the implied bit is
395 set. The exponent is adjusted for any shifts performed. The value is then
396 rounded by adding one-half of the least-significant bit; if that causes a
397 carry, the exponent is adjusted again. Finally, the mantissa is masked to
398 the number of bits corresponding to the desired precision and packed into the
399 in-memory representation. The exponent is checked, and it exceeds the
400 permitted range, the appropriate trap indication is returned.
401
402
403 Implementation notes:
404
405 1. If a carry occurs due to rounding, the mantissa is not shifted because
406 the carry bit will be masked off during packing. Incrementing the
407 exponent in this case is sufficient.
408
409 2. Masking the mantissa is required to remove the carry and implied bits
410 before packing. Masking the value bits in excess of the specified
411 precision is not required but is desirable to avoid implying more
412 precision than actually is present.
413
414 3. The result value +/-1 x 2 ** -256 is considered an underflow, as the
415 packed representation is identical to the zero representation, i.e., an
416 all-zeros value.
417 */
418
norm_round_pack(FPU unpacked)419 static FP_OPND norm_round_pack (FPU unpacked)
420 {
421 FP_OPND packed;
422 int32 integer;
423
424 packed.precision = unpacked.precision; /* set the precision */
425
426 if (unpacked.mantissa == 0) { /* if the mantissa is zero */
427 packed.words [0] = 0; /* then set */
428 packed.words [1] = 0; /* the packed */
429 packed.words [2] = 0; /* representation to */
430 packed.words [3] = 0; /* all zeros */
431
432 packed.trap = trap_None; /* report that packing succeeded */
433 }
434
435 else if (unpacked.precision <= in_d) /* if packing a single or double integer */
436 if (unpacked.exponent >= mantissa_bits [unpacked.precision]) { /* then if the value is too large to fit */
437 packed.words [0] = 0; /* then return */
438 packed.words [1] = 0; /* a zero value */
439 packed.trap = trap_Integer_Overflow; /* and an overflow trap */
440 }
441
442 else { /* otherwise */
443 integer = (int32) /* convert the value to an integer */
444 (unpacked.mantissa >> UNPACKED_BITS - unpacked.exponent /* by shifting right to align */
445 & mantissa_mask [unpacked.precision]); /* and masking to the desired precision */
446
447 if (unpacked.negative) /* if the value is negative */
448 integer = - integer; /* then negate the result */
449
450 packed.words [0] = UPPER_WORD (integer); /* split the result */
451 packed.words [1] = LOWER_WORD (integer); /* into the first two words */
452
453 packed.trap = trap_None; /* report that packing succeeded */
454 }
455
456 else { /* otherwise a real number is to be packed */
457 if (unpacked.mantissa & CARRY_BIT) { /* if a carry out of the MSB has occurred */
458 unpacked.mantissa >>= 1; /* then shift the mantissa to normalize */
459 unpacked.exponent += 1; /* and increment the exponent to compensate */
460 }
461
462 else /* otherwise */
463 while (DENORMALIZED (unpacked.mantissa)) { /* while the mantissa is not in normal form */
464 unpacked.mantissa <<= 1; /* shift the mantissa toward the implied-bit position */
465 unpacked.exponent -= 1; /* and decrement the exponent to compensate */
466 }
467
468
469 unpacked.mantissa += half_lsb [unpacked.precision]; /* round the mantissa by adding one-half of the LSB */
470
471 if (unpacked.mantissa & CARRY_BIT) /* if rounding caused a carry out of the MSB */
472 unpacked.exponent = unpacked.exponent + 1; /* then increment the exponent to compensate */
473
474
475 unpacked.mantissa &= mantissa_mask [unpacked.precision]; /* mask the mantissa to the specified precision */
476
477 packed.words [0] = (HP_WORD) (unpacked.mantissa >> 48) & DV_MASK /* pack the first word of the mantissa */
478 | TO_EXPONENT (unpacked.exponent) /* with the exponent */
479 | (unpacked.negative ? D16_SIGN : 0); /* and the sign bit */
480
481 packed.words [1] = (HP_WORD) (unpacked.mantissa >> 32) & DV_MASK; /* pack the second */
482 packed.words [2] = (HP_WORD) (unpacked.mantissa >> 16) & DV_MASK; /* and third */
483 packed.words [3] = (HP_WORD) (unpacked.mantissa >> 0) & DV_MASK; /* and fourth words */
484
485 if (unpacked.exponent < MIN_EXPONENT /* if the exponent is too small */
486 || unpacked.exponent == MIN_EXPONENT && unpacked.mantissa == 0) /* or the result would be all zeros */
487 packed.trap = trap_Float_Underflow; /* then report an underflow trap */
488
489 else if (unpacked.exponent > MAX_EXPONENT) /* otherwise if the exponent is too large */
490 packed.trap = trap_Float_Overflow; /* then report an overflow trap */
491
492 else /* otherwise */
493 packed.trap = trap_None; /* report that packing succeeded */
494 }
495
496 return packed; /* return the packed value */
497 }
498
499
500 /* Add two unpacked numbers.
501
502 The sum of the two operands is returned. If one operand is zero and the
503 other is not, the non-zero operand is returned. If both operand are zero, a
504 "defined zero" is returned in case one or both operands are "negative zeros."
505
506 Otherwise, the difference between the operand exponents is determined. If
507 the magnitude of the difference between the exponents is greater than the
508 number of significant bits, then the smaller number has been scaled to zero
509 (swamped), and so the sum is simply the larger operand. However, if the sum
510 will be significant, the smaller mantissa is shifted to align with the larger
511 mantissa, and the larger exponent is used (as, after the scaling shift, the
512 smaller value has the same exponent). Finally, if the operand signs are the
513 same, the result is the sum of the mantissas. If the signs are different,
514 then the sum is the smaller value subtracted from the larger value, and the
515 result adopts the sign of the larger value.
516
517
518 Implementation notes:
519
520 1. If the addend is zero, the microcode converts the undefined value
521 "negative zero" to the defined positive zero if it is passed as the
522 augend. This also applies to the subtraction operator, which passes a
523 negative zero if the subtrahend is zero.
524 */
525
add(FPU * sum,FPU augend,FPU addend)526 static TRAP_CLASS add (FPU *sum, FPU augend, FPU addend)
527 {
528 int32 magnitude;
529
530 if (addend.mantissa == 0) /* if the addend is zero */
531 if (augend.mantissa == 0) { /* then if the augend is also zero */
532 *sum = zero; /* then the sum is (positive) zero */
533 sum->precision = augend.precision; /* with the appropriate precision */
534 }
535
536 else /* otherwise the augend is non-zero */
537 *sum = augend; /* so the sum is just the augend */
538
539 else if (augend.mantissa == 0) /* otherwise if the augend is zero */
540 *sum = addend; /* then the sum is just the addend */
541
542 else { /* otherwise both operands are non-zero */
543 magnitude = augend.exponent - addend.exponent; /* so determine the magnitude of the difference */
544
545 if (abs (magnitude) > mantissa_bits [augend.precision]) /* if one of the operands is swamped */
546 if (magnitude > 0) /* then if the addend is smaller */
547 *sum = augend; /* then return the augend */
548 else /* otherwise */
549 *sum = addend; /* return the addend */
550
551 else { /* otherwise the addition is required */
552 sum->precision = addend.precision; /* so set the precision to that of the operands */
553
554 if (magnitude > 0) { /* if the addend is smaller */
555 addend.mantissa >>= magnitude; /* then shift right to align the addend */
556 sum->exponent = augend.exponent; /* and use the augend's exponent */
557 }
558
559 else { /* otherwise the augend is smaller or the same */
560 augend.mantissa >>= - magnitude; /* shift right to align the augend */
561 sum->exponent = addend.exponent; /* and use the addend's exponent */
562 }
563
564 if (addend.negative == augend.negative) { /* if the mantissa signs are the same */
565 sum->mantissa = addend.mantissa + augend.mantissa; /* then add the mantissas */
566 sum->negative = addend.negative; /* and use the addend sign for the sum */
567 }
568
569 else if (addend.mantissa > augend.mantissa) { /* otherwise if the addend is larger */
570 sum->mantissa = addend.mantissa - augend.mantissa; /* then subtract the augend */
571 sum->negative = addend.negative; /* and use the addend sign */
572 }
573
574 else { /* otherwise the augend is larger */
575 sum->mantissa = augend.mantissa - addend.mantissa; /* so subtract the addend */
576 sum->negative = augend.negative; /* and use the augend sign */
577 }
578 }
579 }
580
581 return trap_None; /* report that the addition succeeded */
582 }
583
584
585 /* Subtract two unpacked numbers.
586
587 The difference of the two operands is returned. Subtraction is implemented
588 by negating the subtrahend and then adding the minuend.
589
590
591 Implementation notes:
592
593 1. If the subtrahend is zero, negating produces the undefined "negative
594 zero." However, the "add" routine handles this as positive zero, so we
595 do not need to worry about this condition.
596 */
597
subtract(FPU * difference,FPU minuend,FPU subtrahend)598 static TRAP_CLASS subtract (FPU *difference, FPU minuend, FPU subtrahend)
599 {
600 subtrahend.negative = ! subtrahend.negative; /* invert the sign of the subtrahend */
601
602 add (difference, minuend, subtrahend); /* add to obtain the difference */
603
604 return trap_None; /* report that the subtraction succeeded */
605 }
606
607
608 /* Multiply two unpacked numbers.
609
610 The product of the two operands is returned. Conceptually, the
611 implementation requires a 64 x 64 = 128-bit multiply, rounded to the upper 64
612 bits. Instead of implementing the FMPY or EMPY firmware algorithm directly,
613 which uses 16 x 16 = 32-bit partial-product multiplies, it is more efficient
614 under simulation to use 32 x 32 = 64-bit multiplications by splitting the
615 operand mantissas ("a" and "b") into 32-bit high and low ("h" and "l") parts
616 and forming the cross-products:
617
618 64-bit operands
619 ah al
620 +-------+-------+
621 bh bl
622 +-------+-------+
623 _________________
624
625 al * bl [ll]
626 +-------+-------+
627 ah * bl [hl]
628 +-------+-------+
629 al * bh [lh]
630 +-------+-------+
631 ah * bh [hh]
632 +-------+-------+
633 _________________________________
634
635 64-bit product
636 +-------+-------+
637
638 If either operand is zero, a "defined zero" is returned in case one or both
639 operands are "negative zeros." Otherwise, the product exponent is set to the
640 sum of the operand exponents, and the four 64-bit cross-products are formed.
641 The lower 64 bits of the products are summed to form the carry into the upper
642 64 bits, which are summed to produce the product. The product mantissa is
643 aligned, and the product sign is set negative if the operand signs differ.
644
645 Mantissas are represented internally as fixed-point numbers with 54 bits to
646 the right of the binary point. That is, the real number represented is the
647 integer mantissa value * (2 ** -54), where the right-hand term represents the
648 delta for a change of one bit. Multiplication is therefore:
649
650 (product * delta) = (multiplicand * delta) * (multiplier * delta)
651
652 The product is:
653
654 product = (multiplicand * multiplier) * (delta * delta) / delta
655
656 ...which reduces to:
657
658 product = multiplicand * multiplier * delta
659
660 Multiplying the product by (2 ** -54) is equivalent to right-shifting by 54.
661 However, using only the top 64 bits of the 128-bit product is equivalent to
662 right-shifting by 64, so the net correction is a left-shift by 10.
663
664
665 Implementation notes:
666
667 1. 32 x 32 = 64-bit multiplies use intrinsic instructions on the IA-32
668 processor family.
669 */
670
multiply(FPU * product,FPU multiplicand,FPU multiplier)671 static TRAP_CLASS multiply (FPU *product, FPU multiplicand, FPU multiplier)
672 {
673 uint32 ah, al, bh, bl;
674 t_uint64 hh, hl, lh, ll, carry;
675
676 if (multiplicand.mantissa == 0 || multiplier.mantissa == 0) { /* if either operand is zero */
677 *product = zero; /* then the product is (positive) zero */
678 product->precision = multiplicand.precision; /* with the appropriate precision */
679 }
680
681 else { /* otherwise both operands are non-zero */
682 product->precision = multiplicand.precision; /* so set the precision to that of the operands */
683
684 product->exponent = multiplicand.exponent /* the product exponent */
685 + multiplier.exponent; /* is the sum of the operand exponents */
686
687 ah = (uint32) (multiplicand.mantissa >> D32_WIDTH); /* split the multiplicand */
688 al = (uint32) (multiplicand.mantissa & D32_MASK); /* into high and low double-words */
689
690 bh = (uint32) (multiplier.mantissa >> D32_WIDTH); /* split the multiplier */
691 bl = (uint32) (multiplier.mantissa & D32_MASK); /* into high and low double-words */
692
693 hh = ((t_uint64) ah * bh); /* form the */
694 hl = ((t_uint64) ah * bl); /* four cross products */
695 lh = ((t_uint64) al * bh); /* using 32 x 32 = 64-bit multiplies */
696 ll = ((t_uint64) al * bl); /* for efficiency */
697
698 carry = ((ll >> D32_WIDTH) + (hl & D32_MASK) /* add the upper half of "ll" to the lower halves of "hl" */
699 + (lh & D32_MASK)) >> D32_WIDTH; /* and "lh" and shift to leave just the carry bit */
700
701 product->mantissa = hh + (hl >> D32_WIDTH) /* add "hh" to the upper halves of "hl" and "lh" */
702 + (lh >> D32_WIDTH) + carry; /* and the carry bit */
703
704 product->mantissa <<= DELTA_ALIGNMENT; /* align the result */
705
706 product->negative = /* set the product sign negative */
707 (multiplicand.negative != multiplier.negative); /* if the operand signs differ */
708 }
709
710 return trap_None; /* report that the multiplication succeeded */
711 }
712
713
714 /* Divide two unpacked numbers.
715
716 The quotient of the two operands is returned, and the remainder is discarded.
717 Conceptually, the implementation requires a 128 / 64 = 64-bit division, with
718 64 bits of zeros appended to the dividend to get the required precision.
719 However, instead of implementing the FDIV or EDIV firmware algorithm
720 directly, which uses 32 / 16 = 16-bit trial divisions, it is more efficient
721 under simulation to use 64 / 32 = 64-bit divisions with the classic
722 divide-and-correct method.
723
724 This method considers the 64-bit dividend and divisor each to consist of two
725 32-bit "digits." The 64-bit dividend "ah,al" is divided by the first 32-bit
726 digit "bh" of the 64-bit divisor "bh,bl", yielding a 64-bit trial quotient
727 and a 64-bit remainder. A correction is developed by subtracting the product
728 of the second 32-bit digit "bl" of the divisor and the trial quotient from
729 the remainder. If the remainder is negative, the trial quotient is too
730 large, so it is decremented, and the (full 64-bit) divisor is added to the
731 correction. This is repeated until the correction is non-negative,
732 indicating that the first quotient digit is correct. The process is then
733 repeated using the corrected remainder as the dividend to develop the second
734 64-bit trial quotient and second quotient digit. The first quotient digit is
735 positioned, and the two quotient digits are then added to produce the final
736 64-bit quotient. The quotient mantissa is aligned, and the quotient sign is
737 set negative if the operand signs differ.
738
739 Mantissas are represented internally as fixed-point numbers with 54 bits to
740 the right of the binary point. That is, the real number represented is the
741 integer mantissa value * (2 ** -54), where the right-hand term represents the
742 delta for a change of one bit. Division is therefore:
743
744 (quotient * delta) = (dividend * delta) / (divisor * delta)
745
746 The quotient is:
747
748 quotient = (dividend / divisor) * (delta / (delta * delta))
749
750 ...which reduces to:
751
752 quotient = (dividend / divisor) / delta
753
754 Dividing the quotient by (2 ** -54) is equivalent to left-shifting by 54.
755 However, using only the top 64 bits of the 128-bit product is equivalent to
756 right-shifting by 64, so the net correction is a right-shift by 10.
757
758 See "Divide-and-Correct Methods for Multiple Precision Division" by Marvin L.
759 Stein, Communications of the ACM, August 1964 for background.
760
761
762 Implementation notes:
763
764 1. IA-32 processors do not have a 64 / 32 = 64-bit divide instruction (they
765 have a 64 / 32 = 32 instruction instead). Therefore, a run-time library
766 routine for 64 / 64 = 64 is generated. Consequently, "bh" and "bl" are
767 declared as 64-bit variables, as this produces simpler code than if they
768 were declared as 32-bit variables.
769
770 2. "bh" is guaranteed to be non-zero because the divisor mantissa is
771 normalized on entry. Therefore, no division-by-zero check is needed.
772
773 3. The quotient alignment shift logically expresses ((q1 << 32) + q2) >> 10,
774 but it must be implemented as (q1 << 22) + (q2 >> 10) as otherwise the
775 left-shift would lose significant bits.
776 */
777
divide(FPU * quotient,FPU dividend,FPU divisor)778 static TRAP_CLASS divide (FPU *quotient, FPU dividend, FPU divisor)
779 {
780 t_uint64 bh, bl, q1, q2, r1, r2;
781 t_int64 c1, c2;
782
783 if (divisor.mantissa == 0) { /* if the divisor is zero */
784 *quotient = dividend; /* then return the dividend */
785 return trap_Float_Zero_Divide; /* and report the error */
786 }
787
788 else if (dividend.mantissa == 0) { /* otherwise if the dividend is zero */
789 *quotient = zero; /* then the quotient is (positive) zero */
790 quotient->precision = dividend.precision; /* with the appropriate precision */
791 }
792
793 else { /* otherwise both operands are non-zero */
794 quotient->precision = dividend.precision; /* so set the precision to that of the operands */
795
796 quotient->exponent = dividend.exponent /* the quotient exponent */
797 - divisor.exponent; /* is the difference of the operand exponents */
798
799 bh = divisor.mantissa >> D32_WIDTH; /* split the divisor */
800 bl = divisor.mantissa & D32_MASK; /* into high and low halves */
801
802 q1 = dividend.mantissa / bh; /* form the first trial quotient */
803 r1 = dividend.mantissa % bh; /* and remainder */
804
805 c1 = r1 - bl * q1; /* form the first corrected remainder */
806
807 while (c1 < 0) { /* while a correction is required */
808 q1 = q1 - 1; /* the first trial quotient is too large */
809 c1 = c1 + divisor.mantissa; /* so reduce it and increase the remainder */
810 }
811
812 q2 = c1 / bh; /* form the second trial quotient */
813 r2 = c1 % bh; /* and remainder */
814
815 c2 = r2 - bl * q2; /* form the second corrected remainder */
816
817 while (c2 < 0) { /* while a correction is required */
818 q2 = q2 - 1; /* the second trial quotient is too large */
819 c2 = c2 + divisor.mantissa; /* so reduce it and increase the remainder */
820 }
821
822 quotient->mantissa = (q1 << D32_WIDTH - DELTA_ALIGNMENT) /* sum the quotient digits */
823 + (q2 >> DELTA_ALIGNMENT); /* and align the result */
824
825 quotient->negative = /* set the quotient sign negative */
826 (dividend.negative != divisor.negative); /* if the operand signs differ */
827 }
828
829 return trap_None; /* report that the division succeeded */
830 }
831
832
833 /* Float an integer to a floating-point value.
834
835 The integer operand is converted to a floating-point value and returned. The
836 desired precision of the result must be set before calling.
837
838 Conversion is simply a matter of copying the integer value. When the
839 unpacked value is normalized, it will be converted to floating-point format.
840
841
842 Implementation notes:
843
844 1. The incoming integer has already been unpacked into fp_f format, so we do
845 not need to set the precision here.
846 */
847
ffloat(FPU * real,FPU integer)848 static TRAP_CLASS ffloat (FPU *real, FPU integer)
849 {
850 *real = integer; /* copy the unpacked value */
851 return trap_None; /* report that the float succeeded */
852 }
853
854
855 /* Fix an unpacked floating-point value to an integer.
856
857 A floating-point value is converted to a double-word integer. If the "round"
858 parameter is true, the value is rounded before conversion; otherwise, it is
859 truncated.
860
861 If the real value is less than 0.5, then the integer value is zero.
862 Otherwise, if rounding is requested, add 0.5 (created by shifting a "1" into
863 the position immediately to the right of the least significant bit of the
864 integer result) to the value. Finally, the result precision is set. The
865 remaining conversion occurs when the result is packed.
866
867
868 Implementation notes:
869
870 1. The FIXR/FIXT microcode gives an Integer Overflow for exponent > 30, even
871 though -2 ** 31 (143700 000000) does fit in the result.
872 */
873
fix(FPU * integer,FPU real,t_bool round)874 static TRAP_CLASS fix (FPU *integer, FPU real, t_bool round)
875 {
876 if (real.exponent < -1) /* if the real value is < 0.5 */
877 integer->mantissa = 0; /* then the integer value is 0 */
878
879 else { /* otherwise the value is convertible */
880 integer->mantissa = real.mantissa; /* so set the mantissa */
881
882 if (round && real.exponent < UNPACKED_BITS) /* if rounding is requested and the value won't overflow */
883 integer->mantissa += /* then add one-half of the LSB to the value */
884 (t_uint64) 1 << (UNPACKED_BITS - real.exponent - 1);
885 }
886
887 integer->exponent = real.exponent; /* copy the exponent */
888 integer->negative = real.negative; /* and sign */
889 integer->precision = in_d; /* and set to pack to a double integer */
890
891 return trap_None; /* report that the fix succeeded */
892 }
893