1 /* hp3000_cpu_fp.c: HP 3000 floating-point arithmetic simulator
2 
3    Copyright (c) 2016-2020, J. David Bryan
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
18    AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 
22    Except as contained in this notice, the name of the author shall not be used
23    in advertising or otherwise to promote the sale, use or other dealings in
24    this Software without prior written authorization from the author.
25 
26    09-Oct-20    JDB     Modified to return Ext_Float traps for extended operands
27    30-Sep-20    JDB     Cleaned up QWORD assembly in "norm_round_pack"
28    14-Sep-20    JDB     Added a guard bit to 56-bit calculations for rounding
29                         "divide" now renormalizes divisors to reduce calculation
30    11-Jun-16    JDB     Bit mask constants are now unsigned
31    03-Feb-16    JDB     First release version
32    25-Aug-15    JDB     Fixed FSUB zero subtrahend bug (from Norwin Malmberg)
33    01-Apr-15    JDB     Passes the floating point tests in the CPU diagnostic (D420A1)
34    29-Mar-15    JDB     Created
35 
36    References:
37      - HP 3000 Series II System Microprogram Listing
38          (30000-90023, August 1976)
39      - HP 3000 Series II/III System Reference Manual
40          (30000-90020, July 1978)
41      - Machine Instruction Set Reference Manual
42          (30000-90022, June 1984)
43 
44 
45    This module implements multiple-precision floating-point operations to
46    support the HP 3000 CPU instruction set.  It employs 64-bit integer
47    arithmetic for speed and simplicity of implementation.  The host compiler
48    must support 64-bit integers.
49 
50    HP 3000 computers use a proprietary floating-point format.  All 3000s
51    support two-word "single-precision" floating-point arithmetic as standard
52    equipment.  The original HP 3000 CX and Series I CPUs support three-word
53    "extended-precision" floating-point arithmetic when the optional HP 30011A
54    Extended Instruction Set microcode was installed.  The Series II and later
55    machines replace the three-word instructions with four-word "double-
56    precision" floating-point arithmetic and include the EIS as part of the
57    standard equipment.
58 
59    Floating-point numbers have this format:
60 
61        0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
62      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
63      | S |      exponent biased by +256      |   positive mantissa   |
64      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
65      |                       positive mantissa                       |
66      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
67      |                       positive mantissa                       | (extended)
68      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
69      |                       positive mantissa                       | (double)
70      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
71 
72    That is, the three- and four-word formats merely extended the mantissa with
73    no change to the exponent range.
74 
75    The mantissa is represented in sign-magnitude format.  The mantissa is always
76    positive, with an assumed "1" to the left of the MSB, and the sign bit is set
77    for negative values.  The exponent is in "excess-256" format, i.e.,
78    represented as an unsigned value biased by +256, giving an unbiased range of
79    -256 to +255.  The binary point is assumed to be between the leading "1" and
80    the MSB, so a zero value must be handled as a special case of all bits equal
81    to zero, which otherwise would represent the value +1.0 * 2 ** -256.
82    Normalization shifts the mantissa left and decrements the exponent until a
83    "1" bit appears in bit 9.
84 
85    The use of sign-magnitude format means that floating-point negation merely
86    complements the sign bit, and floating-point comparison simply checks the
87    signs and, if they are the same, then applies an integer comparison to the
88    packed values.  However, it also implies the existence of a "negative zero"
89    value, represented by all zeros except for the sign bit.  This value is
90    undefined; if a negative zero is supplied as an operand to one of the
91    arithmetic routines, it is treated as positive zero.  Negative zero is never
92    returned even if, e.g., it is supplied as the dividend or multiplier.
93 
94    This implementation provides add, subtract, multiply, divide, float, and fix
95    operations on two-, three-, and four-word floating point operands.  The
96    routines are called via a common floating-point executor ("fp_exec") by
97    supplying the operation to be performed and the operand(s) on which to act.
98    An operand contains the packed (i.e., in-memory) representation and the
99    precision of the value.  The returned value includes the packed
100    representation and the precision, along with a value that indicates whether
101    or not the operation resulted in an arithmetic trap.  It is the
102    responsibility of the caller to take the trap if it is indicated.
103 
104    Seven user trap values are returned:
105 
106      Parameter  Description
107      ---------  ------------------------------------------------
108       000001    Integer Overflow
109       000002    Float Overflow
110       000003    Float Underflow
111       000005    Float Zero Divide
112       000010    Extended Precision Floating Point Overflow
113       000011    Extended Precision Floating Point Underflow
114       000012    Extended Precision Floating Point Divide by Zero
115 */
116 
117 
118 
119 #include "hp3000_defs.h"
120 #include "hp3000_cpu.h"
121 #include "hp3000_cpu_fp.h"
122 
123 
124 
125 /* Program constants */
126 
127 #define EXPONENT_BIAS       256                 /* the exponent is biased by +256 */
128 
129 #define MIN_EXPONENT        -256                /* the smallest representable exponent */
130 #define MAX_EXPONENT        +255                /* the largest representable exponent */
131 
132 #define EXPONENT_MASK       0077700u            /* the mask to isolate the exponent in the first word */
133 #define MANTISSA_MASK       0000077u            /* the mask to isolate the mantissa in the first word */
134 
135 #define EXPONENT_SHIFT      6                   /* the exponent alignment shift */
136 #define MANTISSA_SHIFT      0                   /* the mantissa alignment shift */
137 
138 #define UNPACKED_BITS       54                  /* the number of significant bits in the unpacked mantissa */
139 #define GUARD_BITS          1                   /* the number of guard bits in the mantissa used for rounding */
140 
141 #define IMPLIED_BIT         ((t_uint64) 1uL << UNPACKED_BITS + GUARD_BITS)  /* the implied MSB in the mantissa */
142 #define CARRY_BIT           (IMPLIED_BIT << 1)                              /* the carry from the MSB in the mantissa */
143 
144 
145 /* Floating-point accessors */
146 
147 #define MANTISSA(w)         ((t_uint64) (((w) & MANTISSA_MASK) >> MANTISSA_SHIFT))
148 #define EXPONENT(w)         ((int32)    (((w) & EXPONENT_MASK) >> EXPONENT_SHIFT))
149 
150 #define TO_EXPONENT(w)      ((w) + EXPONENT_BIAS << EXPONENT_SHIFT & EXPONENT_MASK)
151 
152 #define DENORMALIZED(m)     ((m) > 0 && ((m) & IMPLIED_BIT) == 0)
153 
154 
155 /* Floating-point unpacked representation */
156 
157 typedef struct {
158     t_uint64   mantissa;                        /* the unsigned mantissa */
159     int32      exponent;                        /* the unbiased exponent */
160     t_bool     negative;                        /* TRUE if the mantissa is negative */
161     FP_OPSIZE  precision;                       /* the precision currently expressed by the value */
162     } FPU;
163 
164 static const FPU zero = { 0, 0, FALSE, fp_f };  /* an unpacked zero value */
165 
166 
167 /* Floating-point descriptors */
168 
169 static const int32 mantissa_bits [] = {         /* the number of mantissa bits, indexed by FP_OPSIZE */
170     16 - 1,                                     /*   in_s bits available - sign bit */
171     32 - 1,                                     /*   in_d bits available - sign bit */
172     22 + 1,                                     /*   fp_f bits explicit + bits implied */
173     38 + 1,                                     /*   fp_x bits explicit + bits implied */
174     54 + 1                                      /*   fp_e bits explicit + bits implied */
175     };
176 
177 static const t_uint64 mantissa_mask [] = {      /* the mask to the mantissa bits, indexed by FP_OPSIZE */
178     ((t_uint64) 1uL << 16) - 1 <<  0,           /*   in_s 16-bit mantissa */
179     ((t_uint64) 1uL << 32) - 1 <<  0,           /*   in_d 32-bit mantissa */
180     ((t_uint64) 1uL << 22) - 1 << 32,           /*   fp_f 22-bit mantissa */
181     ((t_uint64) 1uL << 38) - 1 << 16,           /*   fp_x 38-bit mantissa */
182     ((t_uint64) 1uL << 54) - 1 <<  0            /*   fp_e 54-bit mantissa */
183     };
184 
185 
186 static const t_uint64 half_lsb [] = {           /* half of the LSB for rounding, indexed by FP_OPSIZE */
187     0,                                          /*   in_s not used */
188     (t_uint64) 1uL << 54,                       /*   in_d LSB when shifted right by exponent value */
189     (t_uint64) 1uL << 32,                       /*   fp_f word 2 LSB */
190     (t_uint64) 1uL << 16,                       /*   fp_x word 3 LSB */
191     (t_uint64) 1uL <<  0                        /*   fp_e word 4 LSB */
192     };
193 
194 
195 /* Floating-point local utility routine declarations */
196 
197 static FPU     unpack          (FP_OPND packed);
198 static FP_OPND norm_round_pack (FPU     unpacked);
199 
200 static TRAP_CLASS add      (FPU *sum,        FPU augend,       FPU addend);
201 static TRAP_CLASS subtract (FPU *difference, FPU minuend,      FPU subtrahend);
202 static TRAP_CLASS multiply (FPU *product,    FPU multiplicand, FPU multiplier);
203 static TRAP_CLASS divide   (FPU *quotient,   FPU dividend,     FPU divisor);
204 
205 static TRAP_CLASS ffloat (FPU *real,    FPU integer);
206 static TRAP_CLASS fix    (FPU *integer, FPU real, t_bool round);
207 
208 
209 
210 /* Floating-point global routines */
211 
212 
213 /* Execute a floating-point operation.
214 
215    The operator specified by the "operation" parameter is applied to the
216    "left_op" and to the "right_op" (if applicable), and the result is returned.
217    The "precision" fields of the operands must be set to the representations
218    stored within before calling this routine.
219 
220    On entry, the left and right (if needed) operands are unpacked, and the
221    executor for the specified operation is called.  The result is normalized,
222    rounded, and packed.  Any trap condition detected by the operator routine is
223    set into the packed operand, unless the normalize/round/pack routine detected
224    its own trap condition.  Finally, the packed result is returned.
225 */
226 
fp_exec(FP_OPR operator,FP_OPND left_op,FP_OPND right_op)227 FP_OPND fp_exec (FP_OPR operator, FP_OPND left_op, FP_OPND right_op)
228 {
229 FPU        left, right, result;
230 FP_OPND    result_op;
231 TRAP_CLASS trap;
232 
233 left = unpack (left_op);                                /* unpack the left-hand operand */
234 
235 if (operator <= fp_div)                                 /* if the operator requires two operands */
236     right = unpack (right_op);                          /*   then unpack the right-hand operation */
237 
238 switch (operator) {                                     /* dispatch the floating-point operation */
239 
240     case fp_add:
241         trap = add (&result, left, right);              /* add the two operands */
242         break;
243 
244     case fp_sub:
245         trap = subtract (&result, left, right);         /* subtract the two operands */
246         break;
247 
248     case fp_mpy:
249         trap = multiply (&result, left, right);         /* multiply the two operands */
250         break;
251 
252     case fp_div:
253         trap = divide (&result, left, right);           /* divide the two operands */
254         break;
255 
256     case fp_flt:
257         trap = ffloat (&result, left);                  /* convert the integer operand to a floating-point number */
258         break;
259 
260     case fp_fixr:
261         trap = fix (&result, left, TRUE);               /* round the floating-point operand to an integer */
262         break;
263 
264     case fp_fixt:
265         trap = fix (&result, left, FALSE);              /* truncate the floating-point operand to an integer */
266         break;
267 
268     default:
269         result = zero;                                  /* if an unimplemented operation is requested */
270         result.precision = left.precision;              /*   then return a zero of the appropriate precision */
271         trap = trap_Unimplemented;                      /*     and trap for an Unimplemented Instruction */
272         break;
273     }                                                   /* all cases are handled */
274 
275 result_op = norm_round_pack (result);                   /* normalize, round, and pack the result */
276 
277 if (result_op.trap == trap_None)                        /* if the pack operation succeeded */
278     result_op.trap = trap;                              /*   then set any arithmetic trap returned by the operation */
279 
280 return result_op;                                       /* return the result */
281 }
282 
283 
284 
285 /* Floating-point local utility routine declarations */
286 
287 
288 /* Unpack a packed operand.
289 
290    A packed integer or floating-point value is split into separate mantissa and
291    exponent variables.  The multiple words of the mantissa are concatenated into
292    a single 64-bit unsigned value, and the exponent is shifted with recovery of
293    the sign.  The mantissa is then shifted left one additional position to add a
294    guard bit that will be used for rounding.
295 
296    The absolute values of single and double integers are unpacked into the
297    mantissas and preshifted by 32 or 16 bits, respectively, to reduce the
298    shifting needed for normalization.  The resulting value is unnormalized, but
299    the exponent is set correctly to reflect the preshift.  The precisions for
300    unpacked integers are set to single-precision but are valid for extended- and
301    double-precision, as the unpacked representations are identical.
302 
303    The packed floating-point representation contains an implied "1" bit
304    preceding the binary point in the mantissa, except if the floating-point
305    value is zero.  The unpacked mantissa includes the implied bit.  The bias is
306    removed from the exponent, producing a signed value, and the sign of the
307    mantissa is set from the sign of the packed value.
308 
309    A packed zero value is represented by all words set to zero.  In the unpacked
310    representation, the mantissa is zero, the exponent is the minimum value
311    (-256), and the sign is always positive (as "negative zero" is undefined).
312 
313 
314    Implementation notes:
315 
316     1. Integers could have been copied directly to the mantissa with the
317        exponents set to the appropriate values (54 in this case).  However, the
318        current implementation unpacks integers only in preparation for repacking
319        as single-precision floating-point numbers i.e., to implement the "float"
320        operator.  This would require a larger number of shifts to normalize the
321        values -- as many as 54 to normalize the value 1.  Preshifting reduces
322        the number of normalizing shifts needed to between 6 and 22.
323 */
324 
unpack(FP_OPND packed)325 static FPU unpack (FP_OPND packed)
326 {
327 FPU    unpacked;
328 uint32 word;
329 
330 switch (packed.precision) {                             /* dispatch based on the operand precision */
331 
332     case in_s:                                          /* unpack a single integer */
333         word = packed.words [0];                        /*   from the first word */
334 
335         if (word & D16_SIGN) {                          /* if the value is negative */
336             word = NEG16 (word);                        /*   then make it positive */
337             unpacked.negative = TRUE;                   /*     and set the mantissa sign flag */
338             }
339 
340         else                                            /* otherwise the value is positive */
341             unpacked.negative = FALSE;                  /*   so clear the sign flag */
342 
343         unpacked.mantissa = (t_uint64) word << 32 + GUARD_BITS; /* store the preshifted value as the mantissa */
344         unpacked.exponent = UNPACKED_BITS - 32;                 /*   and set the exponent to account for the shift */
345         unpacked.precision = fp_f;                              /* set the precision */
346         break;
347 
348 
349     case in_d:                                                  /* unpack a double integer */
350         word = TO_DWORD (packed.words [0], packed.words [1]);   /*   from the first two words */
351 
352         if (word & D32_SIGN) {                          /* if the value is negative */
353             word = NEG32 (word);                        /*   then make it positive */
354             unpacked.negative = TRUE;                   /*     and set the mantissa sign flag */
355             }
356 
357         else                                            /* otherwise the value is positive */
358             unpacked.negative = FALSE;                  /*   so clear the sign flag */
359 
360         unpacked.mantissa = (t_uint64) word << 16 + GUARD_BITS; /* store the preshifted value as the mantissa */
361         unpacked.exponent = UNPACKED_BITS - 16;                 /*   and set the exponent to account for the shift */
362         unpacked.precision = fp_f;                              /* set the precision */
363         break;
364 
365 
366     case fp_f:                                              /* unpack a single-precision */
367     case fp_x:                                              /*   extended-precision */
368     case fp_e:                                              /*     or double-precision floating-point number */
369         unpacked.mantissa = MANTISSA (packed.words [0]);    /*       starting with the first word */
370 
371         for (word = 1; word <= 3; word++) {                 /* unpack from one to three more words */
372             unpacked.mantissa <<= D16_WIDTH;                /* shift the accumulated value */
373 
374             if (word < TO_COUNT (packed.precision))         /* if all words are not included yet */
375                 unpacked.mantissa |= packed.words [word];   /*   then merge the next word into value */
376             }
377 
378         unpacked.mantissa <<= GUARD_BITS;                   /* add the guard bits */
379 
380         unpacked.exponent =                                 /* store the exponent */
381            EXPONENT (packed.words [0]) - EXPONENT_BIAS;     /*   after removing the bias */
382 
383         if (unpacked.exponent == MIN_EXPONENT               /* if the biased exponent and mantissa are zero */
384           && unpacked.mantissa == 0)                        /*   then the mantissa is positive */
385             unpacked.negative = FALSE;                      /*     regardless of the packed sign */
386 
387         else {                                                          /* otherwise the value is non-zero */
388             unpacked.mantissa |= IMPLIED_BIT;                           /*   so add back the implied "1" bit */
389             unpacked.negative = ((packed.words [0] & D16_SIGN) != 0);   /*     and set the sign as directed */
390             }
391 
392         unpacked.precision = packed.precision;          /* set the precision */
393         break;
394     }                                                   /* all cases are handled */
395 
396 return unpacked;                                        /* return the unpacked value */
397 }
398 
399 
400 /* Normalize, round, and pack an unpacked value.
401 
402    An unpacked value is normalized, rounded, and packed into the representation
403    indicated by the operand precision.  If the supplied value cannot be
404    represented, the appropriate trap indication is returned.
405 
406    A single- or double-integer is packed into the first word or two words of the
407    result as a twos-complement value.  If the value is too large for the result
408    precision, an Integer Overflow trap is indicated, and a zero value is
409    returned.
410 
411    For a real of any precision, the mantissa is first normalized by shifting
412    right if the carry bit is set, or by shifting left until the implied bit is
413    set.  The exponent is adjusted for any shifts performed.  The value is then
414    rounded by adding one-half of the least-significant bit; if that causes a
415    carry, the exponent is adjusted again.  Finally, the mantissa is masked to
416    the number of bits corresponding to the desired precision and packed into the
417    in-memory representation.  The exponent is checked, and it exceeds the
418    permitted range, the appropriate trap indication is returned.
419 
420 
421    Implementation notes:
422 
423     1. If a carry occurs due to rounding, the mantissa is not shifted because
424        the carry bit will be masked off during packing.  Incrementing the
425        exponent in this case is sufficient.
426 
427     2. Masking the mantissa is required to remove the carry and implied bits
428        before packing.  Masking the value bits in excess of the specified
429        precision is not required but is desirable to avoid implying more
430        precision than actually is present.
431 
432     3. The result value +/-1 x 2 ** -256 is considered an underflow, as the
433        packed representation is identical to the zero representation, i.e., an
434        all-zeros value.
435 */
436 
norm_round_pack(FPU unpacked)437 static FP_OPND norm_round_pack (FPU unpacked)
438 {
439 FP_OPND packed;
440 int32   integer;
441 
442 packed.precision = unpacked.precision;                  /* set the precision */
443 
444 if (unpacked.mantissa == 0) {                           /* if the mantissa is zero */
445     packed.words [0] = 0;                               /*   then set */
446     packed.words [1] = 0;                               /*     the packed */
447     packed.words [2] = 0;                               /*       representation to */
448     packed.words [3] = 0;                               /*         all zeros */
449 
450     packed.trap = trap_None;                            /* report that packing succeeded */
451     }
452 
453 else if (unpacked.precision <= in_d)                                /* if packing a single or double integer */
454     if (unpacked.exponent >= mantissa_bits [unpacked.precision]) {  /*   then if the value is too large to fit */
455         packed.words [0] = 0;                                       /*     then return */
456         packed.words [1] = 0;                                       /*       a zero value */
457         packed.trap = trap_Integer_Overflow;                        /*         and an overflow trap */
458         }
459 
460     else {                                                                      /* otherwise */
461         integer = (int32)                                                       /*   convert the value to an integer */
462           (unpacked.mantissa >> UNPACKED_BITS + GUARD_BITS - unpacked.exponent  /*     by shifting right to align */
463           & mantissa_mask [unpacked.precision]);                                /*       and masking to the precision */
464 
465         if (unpacked.negative)                          /* if the value is negative */
466             integer = - integer;                        /*   then negate the result */
467 
468         packed.words [0] = UPPER_WORD (integer);        /* split the result */
469         packed.words [1] = LOWER_WORD (integer);        /*   into the first two words */
470 
471         packed.trap = trap_None;                        /* report that packing succeeded */
472         }
473 
474 else {                                                  /* otherwise a real number is to be packed */
475     if (unpacked.mantissa & CARRY_BIT) {                /* if a carry out of the MSB has occurred */
476         unpacked.mantissa >>= 1;                        /*   then shift the mantissa to normalize */
477         unpacked.exponent +=  1;                        /*     and increment the exponent to compensate */
478         }
479 
480     else                                                /* otherwise */
481         while (DENORMALIZED (unpacked.mantissa)) {      /*   while the mantissa is not in normal form */
482             unpacked.mantissa <<= 1;                    /*     shift the mantissa toward the implied-bit position */
483             unpacked.exponent -=  1;                    /*       and decrement the exponent to compensate */
484             }
485 
486     unpacked.mantissa += half_lsb [unpacked.precision]; /* round the mantissa by adding one-half of the LSB */
487 
488     if (unpacked.mantissa & CARRY_BIT)                  /* if rounding caused a carry out of the MSB */
489         unpacked.exponent = unpacked.exponent + 1;      /*   then increment the exponent to compensate */
490 
491     unpacked.mantissa >>= GUARD_BITS;                   /* remove the guard bits */
492 
493     unpacked.mantissa &= mantissa_mask [unpacked.precision];    /* mask the mantissa to the specified precision */
494 
495     packed.words [0] = HIGH_UPPER_WORD (unpacked.mantissa)      /* pack the first word of the mantissa */
496                        | TO_EXPONENT (unpacked.exponent)        /*   with the exponent */
497                        | (unpacked.negative ? D16_SIGN : 0);    /*     and the sign bit */
498 
499     packed.words [1] = LOW_UPPER_WORD (unpacked.mantissa);      /* pack the second */
500     packed.words [2] = UPPER_WORD (unpacked.mantissa);          /*   and third */
501     packed.words [3] = LOWER_WORD (unpacked.mantissa);          /*     and fourth words */
502 
503     if (unpacked.exponent < MIN_EXPONENT                                /* if the exponent is too small */
504       || unpacked.exponent == MIN_EXPONENT && unpacked.mantissa == 0)   /*   or the result would be all zeros */
505         if (packed.precision == fp_f)                                   /*     then if this is a standard operand */
506             packed.trap = trap_Float_Underflow;                         /*       then report a standard underflow trap */
507         else                                                            /*     otherwise */
508             packed.trap = trap_Ext_Float_Underflow;                     /*       report an extended underflow trap */
509 
510     else if (unpacked.exponent > MAX_EXPONENT)                          /* otherwise if the exponent is too large */
511         if (packed.precision == fp_f)                                   /*   then if this is a standard operand */
512             packed.trap = trap_Float_Overflow;                          /*     then report a standard overflow trap */
513         else                                                            /*   otherwise */
514             packed.trap = trap_Ext_Float_Overflow;                      /*     report an extended overflow trap */
515 
516     else                                                                /* otherwise */
517         packed.trap = trap_None;                                        /*   report that packing succeeded */
518     }
519 
520 return packed;                                          /* return the packed value */
521 }
522 
523 
524 /* Add two unpacked numbers.
525 
526    The sum of the two operands is returned.  If one operand is zero and the
527    other is not, the non-zero operand is returned.  If both operand are zero, a
528    "defined zero" is returned in case one or both operands are "negative zeros."
529 
530    Otherwise, the difference between the operand exponents is determined.  If
531    the magnitude of the difference between the exponents is greater than the
532    number of significant bits, then the smaller number has been scaled to zero
533    (swamped), and so the sum is simply the larger operand.  However, if the sum
534    will be significant, the smaller mantissa is shifted to align with the larger
535    mantissa, and the larger exponent is used (as, after the scaling shift, the
536    smaller value has the same exponent).  Finally, if the operand signs are the
537    same, the result is the sum of the mantissas.  If the signs are different,
538    then the sum is the smaller value subtracted from the larger value, and the
539    result adopts the sign of the larger value.
540 
541 
542    Implementation notes:
543 
544     1. If the addend is zero, the microcode converts the undefined value
545        "negative zero" to the defined positive zero if it is passed as the
546        augend.  This also applies to the subtraction operator, which passes a
547        negative zero if the subtrahend is zero.
548 */
549 
add(FPU * sum,FPU augend,FPU addend)550 static TRAP_CLASS add (FPU *sum, FPU augend, FPU addend)
551 {
552 int32 magnitude;
553 
554 if (addend.mantissa == 0)                              /* if the addend is zero */
555     if (augend.mantissa == 0) {                        /*   then if the augend is also zero */
556         *sum = zero;                                   /*     then the sum is (positive) zero */
557         sum->precision = augend.precision;             /*       with the appropriate precision */
558         }
559 
560     else                                                /*   otherwise the augend is non-zero */
561         *sum = augend;                                  /*     so the sum is just the augend */
562 
563 else if (augend.mantissa == 0)                          /* otherwise if the augend is zero */
564     *sum = addend;                                      /*   then the sum is just the addend */
565 
566 else {                                                  /* otherwise both operands are non-zero */
567     magnitude = augend.exponent - addend.exponent;      /*   so determine the magnitude of the difference */
568 
569     if (abs (magnitude) > mantissa_bits [augend.precision]) /* if one of the operands is swamped */
570         if (magnitude > 0)                                  /*   then if the addend is smaller */
571             *sum = augend;                                  /*     then return the augend */
572         else                                                /*   otherwise */
573             *sum = addend;                                  /*     return the addend */
574 
575     else {                                              /* otherwise the addition is required */
576         sum->precision = addend.precision;              /*   so set the precision to that of the operands */
577 
578         if (magnitude > 0) {                            /* if the addend is smaller */
579             addend.mantissa >>= magnitude;              /*   then shift right to align the addend */
580             sum->exponent = augend.exponent;            /*     and use the augend's exponent */
581             }
582 
583         else {                                          /* otherwise the augend is smaller or the same */
584             augend.mantissa >>= - magnitude;            /*   shift right to align the augend */
585             sum->exponent = addend.exponent;            /*     and use the addend's exponent */
586             }
587 
588         if (addend.negative == augend.negative) {               /* if the mantissa signs are the same */
589             sum->mantissa = addend.mantissa + augend.mantissa;  /*   then add the mantissas */
590             sum->negative = addend.negative;                    /*     and use the addend sign for the sum */
591             }
592 
593         else if (addend.mantissa > augend.mantissa) {           /* otherwise if the addend is larger */
594             sum->mantissa = addend.mantissa - augend.mantissa;  /*   then subtract the augend */
595             sum->negative = addend.negative;                    /*     and use the addend sign */
596             }
597 
598         else {                                                  /* otherwise the augend is larger */
599             sum->mantissa = augend.mantissa - addend.mantissa;  /*   so subtract the addend */
600             sum->negative = augend.negative;                    /*     and use the augend sign */
601             }
602         }
603     }
604 
605 return trap_None;                                       /* report that the addition succeeded */
606 }
607 
608 
609 /* Subtract two unpacked numbers.
610 
611    The difference of the two operands is returned.  Subtraction is implemented
612    by negating the subtrahend and then adding the minuend.
613 
614 
615    Implementation notes:
616 
617     1. If the subtrahend is zero, negating produces the undefined "negative
618        zero."  However, the "add" routine handles this as positive zero, so we
619        do not need to worry about this condition.
620 */
621 
subtract(FPU * difference,FPU minuend,FPU subtrahend)622 static TRAP_CLASS subtract (FPU *difference, FPU minuend, FPU subtrahend)
623 {
624 subtrahend.negative = ! subtrahend.negative;            /* invert the sign of the subtrahend */
625 
626 add (difference, minuend, subtrahend);                  /* add to obtain the difference */
627 
628 return trap_None;                                       /* report that the subtraction succeeded */
629 }
630 
631 
632 /* Multiply two unpacked numbers.
633 
634    The product of the two operands is returned.  Conceptually, the
635    implementation requires a 64 x 64 = 128-bit multiply, rounded to the upper 64
636    bits.  Instead of implementing the FMPY or EMPY firmware algorithm directly,
637    which uses 16 x 16 = 32-bit partial-product multiplies, it is more efficient
638    under simulation to use 32 x 32 = 64-bit multiplications by splitting the
639    operand mantissas ("a" and "b") into 32-bit high and low ("h" and "l") parts
640    and forming the cross-products:
641 
642                       64-bit operands
643                          ah      al
644                      +-------+-------+
645                          bh      bl
646                      +-------+-------+
647                      _________________
648 
649                          al  *   bl      [ll]
650                      +-------+-------+
651                  ah  *   bl              [hl]
652              +-------+-------+
653                  al  *   bh              [lh]
654              +-------+-------+
655          ah  *   bh                      [hh]
656      +-------+-------+
657      _________________________________
658 
659        64-bit product
660      +-------+-------+
661 
662    If either operand is zero, a "defined zero" is returned in case one or both
663    operands are "negative zeros."  Otherwise, the product exponent is set to the
664    sum of the operand exponents, and the four 64-bit cross-products are formed.
665    The lower 64 bits of the products are summed to form the carry into the upper
666    64 bits, which are summed to produce the product.  The product mantissa is
667    aligned, and the product sign is set negative if the operand signs differ.
668 
669    Mantissas are represented internally as fixed-point numbers with 54 data bits
670    plus one guard bit to the right of the binary point.  That is, the real
671    number represented is the integer mantissa value * (2 ** -55), where the
672    right-hand term represents the delta for a change of one bit.  Multiplication
673    is therefore:
674 
675      (product * delta) = (multiplicand * delta) * (multiplier * delta)
676 
677    The product is:
678 
679      product = (multiplicand * multiplier) * (delta * delta) / delta
680 
681    ...which reduces to:
682 
683      product = multiplicand * multiplier * delta
684 
685    Multiplying the product by (2 ** -55) is equivalent to right-shifting by 55.
686    However, using only the top 64 bits of the 128-bit product is equivalent to
687    right-shifting by 64, so the net correction is a left-shift by 9.
688 
689 
690    Implementation notes:
691 
692     1. 32 x 32 = 64-bit multiplies use intrinsic instructions on the IA-32
693        processor family.
694 */
695 
multiply(FPU * product,FPU multiplicand,FPU multiplier)696 static TRAP_CLASS multiply (FPU *product, FPU multiplicand, FPU multiplier)
697 {
698 const uint32 delta_shift = D64_WIDTH - (UNPACKED_BITS + GUARD_BITS);
699 const uint32 carry_shift = D32_WIDTH - delta_shift;
700 uint32       ah, al, bh, bl;
701 t_uint64     hh, hl, lh, ll, carry;
702 
703 if (multiplicand.mantissa == 0 || multiplier.mantissa == 0) {   /* if either operand is zero */
704     *product = zero;                                            /*   then the product is (positive) zero */
705     product->precision = multiplicand.precision;                /*     with the appropriate precision */
706     }
707 
708 else {                                                  /* otherwise both operands are non-zero */
709     product->precision = multiplicand.precision;        /*   so set the precision to that of the operands */
710 
711     product->exponent = multiplicand.exponent           /* the product exponent */
712                           + multiplier.exponent;        /*   is the sum of the operand exponents */
713 
714     ah = UPPER_DWORD (multiplicand.mantissa);           /* split the multiplicand */
715     al = LOWER_DWORD (multiplicand.mantissa);           /*   into high and low double-words */
716 
717     bh = UPPER_DWORD (multiplier.mantissa);             /* split the multiplier */
718     bl = LOWER_DWORD (multiplier.mantissa);             /*   into high and low double-words */
719 
720     hh = (t_uint64) ah * (t_uint64) bh;                 /* form the */
721     hl = (t_uint64) ah * (t_uint64) bl;                 /*   four cross products */
722     lh = (t_uint64) al * (t_uint64) bh;                 /*     using 32 x 32 = 64-bit multiplies */
723     ll = (t_uint64) al * (t_uint64) bl;                 /*       for efficiency */
724 
725     carry = ((t_uint64) UPPER_DWORD (ll)                    /* do a 64-bit add of the upper half of "ll" */
726                       + LOWER_DWORD (hl)                    /*   to the lower halves of "hl" */
727                       + LOWER_DWORD (lh)) >> carry_shift;   /*     and "lh" and shift to leave just the carry bits */
728 
729     product->mantissa = hh + UPPER_DWORD (hl)           /* add "hh" to the upper halves of "hl" and "lh" */
730                            + UPPER_DWORD (lh);
731 
732     product->mantissa <<= delta_shift;                  /* align the result to the binary point */
733     product->mantissa += carry;                         /*   and add the carry from the discarded bits */
734 
735     product->negative =                                 /* set the product sign negative */
736        (multiplicand.negative != multiplier.negative);  /*   if the operand signs differ */
737     }
738 
739 return trap_None;                                       /* report that the multiplication succeeded */
740 }
741 
742 
743 /* Divide two unpacked numbers.
744 
745    The quotient of the two operands is returned, and the remainder is discarded.
746    Conceptually, the implementation requires a 128 / 64 = 64-bit division, with
747    64 bits of zeros appended to the dividend to get the required precision.
748    However, instead of implementing the FDIV or EDIV firmware algorithm
749    directly, which uses 32 / 16 = 16-bit trial divisions, it is more efficient
750    under simulation to use 64 / 32 = 64-bit divisions with the classic
751    divide-and-correct method.
752 
753    This method considers the 64-bit dividend and divisor each to consist of two
754    32-bit "digits."  The 64-bit dividend "ah,al" is divided by the first 32-bit
755    digit "bh" of the 64-bit divisor "bh,bl", yielding a 64-bit trial quotient
756    and a 64-bit remainder.  A correction is developed by multiplying the second
757    32-bit digit "bl" of the divisor and the trial quotient.  If the remainder is
758    smaller than the correction, the trial quotient is too large, so it is
759    decremented, and the (full 64-bit) divisor is added to the remainder.  The
760    correction is then subtracted from the remainder, and the process is repeated
761    using the corrected remainder as the dividend to develop the second 64-bit
762    trial quotient and second quotient digit.  The first quotient digit is
763    positioned, and the two quotient digits are then added to produce the final
764    64-bit quotient.  The quotient sign is set negative if the operand signs
765    differ.
766 
767    Mantissas are represented internally as fixed-point numbers with 54 data bits
768    plus one guard bit to the right of the binary point.  That is, the real
769    number represented is the integer mantissa value * (2 ** -55), where the
770    right-hand term represents the delta for a change of one bit.  Division is
771    therefore:
772 
773      (quotient * delta) = (dividend * delta) / (divisor * delta)
774 
775    The quotient is:
776 
777      quotient = (dividend / divisor) * (delta / (delta * delta))
778 
779    ...which reduces to:
780 
781      quotient = (dividend / divisor) / delta
782 
783    Dividing the quotient by (2 ** -55) is equivalent to left-shifting by 55.
784    However, using only the top 64 bits of the 128-bit product is equivalent to
785    right-shifting by 64, so the net correction is a right-shift by 9.
786 
787    Care must be taken to ensure that the correction product does not overflow.
788    In hardware, this is detected by examining the ALU carry bit after the
789    correction multiplication.  In simulation, there is no portable way of
790    determining if the 64 x 32 multiply overflowed.  However, we can take
791    advantage of the fact that the upper eight bits of the mantissa are always
792    zero (54 data bits plus one guard bit plus one implied bit = 56 significant
793    bits).  By shifting the divisor left by eight bits, we ensure that the
794    quotient will be eight bits shorter, and so guarantee that the correction
795    product will not overflow.  Moreover, because the divisor is now at least
796    one-half of the maximum value (because of the implied bit), it ensures that
797    the trial quotient will either be correct or off by one, so at most a single
798    correction will be needed.
799 
800    The final quotient would require a left-shift of 8 to account for the
801    renormalized divisor, but from the above calculations, we also need a right
802    shift of 9 to align it.  The net result needs only a right shift of one to
803    correct, which we handle by decrementing the exponent in lieu of additional
804    shifting.
805 
806    See "Divide-and-Correct Methods for Multiple Precision Division" by Marvin L.
807    Stein, Communications of the ACM, August 1964 and "Multiple-Length Division
808    Revisited: a Tour of the Minefield" by Per Brinch Hansen, Software Practice
809    and Experience, June 1994 for background.
810 
811 
812    Implementation notes:
813 
814     1. IA-32 processors do not have a 64 / 32 = 64-bit divide instruction (they
815        have a 64 / 32 = 32 instruction instead).  Therefore, a run-time library
816        routine for 64 / 64 = 64 is generated.  Consequently, "bh" and "bl" are
817        declared as 64-bit variables, as this produces simpler code than if they
818        were declared as 32-bit variables.
819 
820     2. "bh" is guaranteed to be non-zero because the divisor mantissa is
821        normalized on entry.  Therefore, no division-by-zero check is needed.
822 */
823 
divide(FPU * quotient,FPU dividend,FPU divisor)824 static TRAP_CLASS divide (FPU *quotient, FPU dividend, FPU divisor)
825 {
826 const uint32 renorm_shift = D64_WIDTH - (UNPACKED_BITS + GUARD_BITS + 1);
827 t_uint64     bh, bl, q1, q2, r1, r2, c1, c2;
828 
829 if (divisor.mantissa == 0) {                            /* if the divisor is zero */
830     *quotient = dividend;                               /*   then return the dividend */
831 
832     if (dividend.precision == fp_f)                     /* if this is a standard operand */
833         return trap_Float_Zero_Divide;                  /*   then report a standard zero-divide trap */
834     else                                                /* otherwise */
835         return trap_Ext_Float_Zero_Divide;              /*   report an extended zero-divide trap */
836     }
837 
838 else if (dividend.mantissa == 0) {                      /* otherwise if the dividend is zero */
839     *quotient = zero;                                   /*   then the quotient is (positive) zero */
840     quotient->precision = dividend.precision;           /*     with the appropriate precision */
841     }
842 
843 else {                                                  /* otherwise both operands are non-zero */
844     quotient->precision = dividend.precision;           /*   so set the precision to that of the operands */
845 
846     quotient->exponent = dividend.exponent              /* the quotient exponent is the difference of the */
847                           - divisor.exponent - 1;       /*   operand exponents (with alignment correction) */
848 
849     divisor.mantissa <<= renorm_shift;                  /* renormalize the divisor */
850 
851     bh = (t_uint64) UPPER_DWORD (divisor.mantissa);     /* split the divisor */
852     bl = (t_uint64) LOWER_DWORD (divisor.mantissa);     /*   into high and low halves */
853 
854     q1 = dividend.mantissa / bh;                        /* form the first trial quotient */
855     r1 = dividend.mantissa % bh;                        /*   and remainder */
856 
857     c1 = bl * q1;                                       /* form the first remainder correction */
858     r1 = r1 << D32_WIDTH;                               /*   and scale the remainder to match */
859 
860     if (r1 < c1) {                                      /* if a correction is required */
861         q1 = q1 - 1;                                    /*   then the first trial quotient is too large */
862         r1 = r1 + divisor.mantissa;                     /*     so reduce it and increase the remainder */
863         }
864 
865     r1 = r1 - c1;                                       /* correct the first remainder */
866 
867     q2 = r1 / bh;                                       /* form the second trial quotient */
868     r2 = r1 % bh;                                       /*   and remainder */
869 
870     c2 = bl * q2;                                       /* form the second remainder correction */
871     r2 = r2 << D32_WIDTH;                               /*   and scale the remainder to match */
872 
873     if (r2 < c2) {                                      /* if a correction is required */
874         q2 = q2 - 1;                                    /*   then the second trial quotient is too large */
875         r2 = r2 + divisor.mantissa;                     /*     so reduce it and increase the remainder */
876         }
877 
878     quotient->mantissa = (q1 << D32_WIDTH) + q2;        /* position and sum the quotient digits */
879 
880     quotient->negative =                                /* set the quotient sign negative */
881        (dividend.negative != divisor.negative);         /*   if the operand signs differ */
882     }
883 
884 return trap_None;                                       /* report that the division succeeded */
885 }
886 
887 
888 /* Float an integer to a floating-point value.
889 
890    The integer operand is converted to a floating-point value and returned.  The
891    desired precision of the result must be set before calling.
892 
893    Conversion is simply a matter of copying the integer value.  When the
894    unpacked value is normalized, it will be converted to floating-point format.
895 
896 
897    Implementation notes:
898 
899     1. The incoming integer has already been unpacked into fp_f format, so we do
900        not need to set the precision here.
901 */
902 
ffloat(FPU * real,FPU integer)903 static TRAP_CLASS ffloat (FPU *real, FPU integer)
904 {
905 *real = integer;                                        /* copy the unpacked value */
906 return trap_None;                                       /* report that the float succeeded */
907 }
908 
909 
910 /* Fix an unpacked floating-point value to an integer.
911 
912    A floating-point value is converted to a double-word integer.  If the "round"
913    parameter is true, the value is rounded before conversion; otherwise, it is
914    truncated.
915 
916    If the real value is less than 0.5, then the integer value is zero.
917    Otherwise, if rounding is requested, add 0.5 (created by shifting a "1" into
918    the position immediately to the right of the least significant bit of the
919    integer result) to the value.  Finally, the result precision is set.  The
920    remaining conversion occurs when the result is packed.
921 
922 
923    Implementation notes:
924 
925     1. The FIXR/FIXT microcode gives an Integer Overflow for exponent > 30, even
926        though -2 ** 31 (143700 000000) does fit in the result.
927 */
928 
fix(FPU * integer,FPU real,t_bool round)929 static TRAP_CLASS fix (FPU *integer, FPU real, t_bool round)
930 {
931 if (real.exponent < -1)                                 /* if the real value is < 0.5 */
932     integer->mantissa = 0;                              /*   then the integer value is 0 */
933 
934 else {                                                  /* otherwise the value is convertible */
935     integer->mantissa = real.mantissa;                  /*   so set the mantissa */
936 
937     if (round && real.exponent < UNPACKED_BITS)                 /* if rounding is requested and won't overflow */
938         integer->mantissa += half_lsb [in_d] >> real.exponent;  /*   then add one-half of the LSB to the value */
939     }
940 
941 integer->exponent = real.exponent;                      /* copy the exponent */
942 integer->negative = real.negative;                      /*   and sign */
943 integer->precision = in_d;                              /*     and set to pack to a double integer */
944 
945 return trap_None;                                       /* report that the fix succeeded */
946 }
947