1 /* hp3000_cpu_fp.c: HP 3000 floating-point arithmetic simulator
2 
3    Copyright (c) 2016, J. David Bryan
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
18    AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 
22    Except as contained in this notice, the name of the author shall not be used
23    in advertising or otherwise to promote the sale, use or other dealings in
24    this Software without prior written authorization from the author.
25 
26    11-Jun-16    JDB     Bit mask constants are now unsigned
27    03-Feb-16    JDB     First release version
28    25-Aug-15    JDB     Fixed FSUB zero subtrahend bug (from Norwin Malmberg)
29    01-Apr-15    JDB     Passes the floating point tests in the CPU diagnostic (D420A1)
30    29-Mar-15    JDB     Created
31 
32    References:
33      - HP 3000 Series II System Microprogram Listing
34          (30000-90023, August 1976)
35      - HP 3000 Series II/III System Reference Manual
36          (30000-90020, July 1978)
37      - Machine Instruction Set Reference Manual
38          (30000-90022, June 1984)
39 
40 
41    This module implements multiple-precision floating-point operations to
42    support the HP 3000 CPU instruction set.  It employs 64-bit integer
43    arithmetic for speed and simplicity of implementation.  The host compiler
44    must support 64-bit integers.
45 
46    HP 3000 computers use a proprietary floating-point format.  All 3000s
47    support two-word "single-precision" floating-point arithmetic as standard
48    equipment.  The original HP 3000 CX and Series I CPUs support three-word
49    "extended-precision" floating-point arithmetic when the optional HP 30011A
50    Extended Instruction Set microcode was installed.  The Series II and later
51    machines replace the three-word instructions with four-word "double-
52    precision" floating-point arithmetic and include the EIS as part of the
53    standard equipment.
54 
55    Floating-point numbers have this format:
56 
57        0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
58      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
59      | S |      exponent biased by +256      |   positive mantissa   |
60      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
61      |                       positive mantissa                       |
62      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
63      |                       positive mantissa                       | (extended)
64      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
65      |                       positive mantissa                       | (double)
66      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
67 
68    That is, the three- and four-word formats merely extended the mantissa with
69    no change to the exponent range.
70 
71    The mantissa is represented in sign-magnitude format.  The mantissa is always
72    positive, with an assumed "1" to the left of the MSB, and the sign bit is set
73    for negative values.  The exponent is in "excess-256" format, i.e.,
74    represented as an unsigned value biased by +256, giving an unbiased range of
75    -256 to +255.  The binary point is assumed to be between the leading "1" and
76    the MSB, so a zero value must be handled as a special case of all bits equal
77    to zero, which otherwise would represent the value +1.0 * 2 ** -256.
78    Normalization shifts the mantissa left and decrements the exponent until a
79    "1" bit appears in bit 9.
80 
81    The use of sign-magnitude format means that floating-point negation merely
82    complements the sign bit, and floating-point comparison simply checks the
83    signs and, if they are the same, then applies an integer comparison to the
84    packed values.  However, it also implies the existence of a "negative zero"
85    value, represented by all zeros except for the sign bit.  This value is
86    undefined; if a negative zero is supplied as an operand to one of the
87    arithmetic routines, it is treated as positive zero.  Negative zero is never
88    returned even if, e.g., it is supplied as the dividend or multiplier.
89 
90    This implementation provides add, subtract, multiply, divide, float, and fix
91    operations on two-, three-, and four-word floating point operands.  The
92    routines are called via a common floating-point executor ("fp_exec") by
93    supplying the operation to be performed and the operand(s) on which to act.
94    An operand contains the packed (i.e., in-memory) representation and the
95    precision of the value.  The returned value includes the packed
96    representation and the precision, along with a value that indicates whether
97    or not the operation resulted in an arithmetic trap.  It is the
98    responsibility of the caller to take the trap if it is indicated.
99 */
100 
101 
102 
103 #include "hp3000_defs.h"
104 #include "hp3000_cpu.h"
105 #include "hp3000_cpu_fp.h"
106 
107 
108 
109 /* Program constants */
110 
111 #define EXPONENT_BIAS       256                 /* the exponent is biased by +256 */
112 
113 #define MIN_EXPONENT        -256                /* the smallest representable exponent */
114 #define MAX_EXPONENT        +255                /* the largest representable exponent */
115 
116 #define EXPONENT_MASK       0077700u            /* the mask to isolate the exponent in the first word */
117 #define MANTISSA_MASK       0000077u            /* the mask to isolate the mantissa in the first word */
118 
119 #define EXPONENT_SHIFT      6                   /* the exponent alignment shift */
120 #define MANTISSA_SHIFT      0                   /* the mantissa alignment shift */
121 
122 #define UNPACKED_BITS       54                  /* the number of significant bits in the unpacked mantissa */
123 
124 #define IMPLIED_BIT         ((t_uint64) 1uL << UNPACKED_BITS)       /* the implied MSB in the mantissa */
125 #define CARRY_BIT           ((t_uint64) 1uL << UNPACKED_BITS + 1)   /* the carry from the MSB in the mantissa */
126 
127 #define DELTA_ALIGNMENT     (D64_WIDTH - UNPACKED_BITS)             /* net shift to align the binary point */
128 
129 
130 /* Floating-point accessors */
131 
132 #define MANTISSA(w)         ((t_uint64) (((w) & MANTISSA_MASK) >> MANTISSA_SHIFT))
133 #define EXPONENT(w)         ((int32)    (((w) & EXPONENT_MASK) >> EXPONENT_SHIFT))
134 
135 #define TO_EXPONENT(w)      ((w) + EXPONENT_BIAS << EXPONENT_SHIFT & EXPONENT_MASK)
136 
137 #define DENORMALIZED(m)     (((m) & IMPLIED_BIT) == 0)
138 
139 
140 /* Floating-point unpacked representation */
141 
142 typedef struct {
143     t_uint64   mantissa;                        /* the unsigned mantissa */
144     int32      exponent;                        /* the unbiased exponent */
145     t_bool     negative;                        /* TRUE if the mantissa is negative */
146     FP_OPSIZE  precision;                       /* the precision currently expressed by the value */
147     } FPU;
148 
149 static const FPU zero = { 0, 0, FALSE, fp_f };  /* an unpacked zero value */
150 
151 
152 /* Floating-point descriptors */
153 
154 static const int32 mantissa_bits [] = {         /* the number of mantissa bits, indexed by FP_OPSIZE */
155     16 - 1,                                     /*   in_s bits available - sign bit */
156     32 - 1,                                     /*   in_d bits available - sign bit */
157     22 + 1,                                     /*   fp_f bits explicit + bits implied */
158     38 + 1,                                     /*   fp_x bits explicit + bits implied */
159     54 + 1                                      /*   fp_e bits explicit + bits implied */
160     };
161 
162 static const t_uint64 mantissa_mask [] = {      /* the mask to the mantissa bits, indexed by FP_OPSIZE */
163     ((t_uint64) 1 << 16) - 1 <<  0,             /*   in_s 16-bit mantissa */
164     ((t_uint64) 1 << 32) - 1 <<  0,             /*   in_d 32-bit mantissa */
165     ((t_uint64) 1 << 22) - 1 << 32,             /*   fp_f 22-bit mantissa */
166     ((t_uint64) 1 << 38) - 1 << 16,             /*   fp_x 38-bit mantissa */
167     ((t_uint64) 1 << 54) - 1 <<  0              /*   fp_e 54-bit mantissa */
168     };
169 
170 
171 static const t_uint64 half_lsb [] = {           /* half of the LSB for rounding, indexed by FP_OPSIZE */
172     0,                                          /*   in_s not used */
173     0,                                          /*   in_d not used */
174     (t_uint64) 1 << 31,                         /*   fp_f word 2 LSB */
175     (t_uint64) 1 << 15,                         /*   fp_x word 3 LSB */
176     (t_uint64) 1 <<  0                          /*   fp_e word 4 LSB */
177     };
178 
179 
180 /* Floating-point local utility routine declarations */
181 
182 static FPU     unpack          (FP_OPND packed);
183 static FP_OPND norm_round_pack (FPU     unpacked);
184 
185 static TRAP_CLASS add      (FPU *sum,        FPU augend,       FPU addend);
186 static TRAP_CLASS subtract (FPU *difference, FPU minuend,      FPU subtrahend);
187 static TRAP_CLASS multiply (FPU *product,    FPU multiplicand, FPU multiplier);
188 static TRAP_CLASS divide   (FPU *quotient,   FPU dividend,     FPU divisor);
189 
190 static TRAP_CLASS ffloat (FPU *real,    FPU integer);
191 static TRAP_CLASS fix    (FPU *integer, FPU real, t_bool round);
192 
193 
194 
195 /* Floating-point global routines */
196 
197 
198 /* Execute a floating-point operation.
199 
200    The operator specified by the "operation" parameter is applied to the
201    "left_op" and to the "right_op" (if applicable), and the result is returned.
202    The "precision" fields of the operands must be set to the representations
203    stored within before calling this routine.
204 
205    On entry, the left and right (if needed) operands are unpacked, and the
206    executor for the specified operation is called.  The result is normalized,
207    rounded, and packed.  Any trap condition detected by the operator routine is
208    set into the packed operand, unless the normalize/round/pack routine detected
209    its own trap condition.  Finally, the packed result is returned.
210 */
211 
fp_exec(FP_OPR operator,FP_OPND left_op,FP_OPND right_op)212 FP_OPND fp_exec (FP_OPR operator, FP_OPND left_op, FP_OPND right_op)
213 {
214 FPU        left, right, result;
215 FP_OPND    result_op;
216 TRAP_CLASS trap;
217 
218 left = unpack (left_op);                                /* unpack the left-hand operand */
219 
220 if (operator <= fp_div)                                 /* if the operator requires two operands */
221     right = unpack (right_op);                          /*   then unpack the right-hand operation */
222 
223 switch (operator) {                                     /* dispatch the floating-point operation */
224 
225     case fp_add:
226         trap = add (&result, left, right);              /* add the two operands */
227         break;
228 
229     case fp_sub:
230         trap = subtract (&result, left, right);         /* subtract the two operands */
231         break;
232 
233     case fp_mpy:
234         trap = multiply (&result, left, right);         /* multiply the two operands */
235         break;
236 
237     case fp_div:
238         trap = divide (&result, left, right);           /* divide the two operands */
239         break;
240 
241     case fp_flt:
242         trap = ffloat (&result, left);                  /* convert the integer operand to a floating-point number */
243         break;
244 
245     case fp_fixr:
246         trap = fix (&result, left, TRUE);               /* round the floating-point operand to an integer */
247         break;
248 
249     case fp_fixt:
250         trap = fix (&result, left, FALSE);              /* truncate the floating-point operand to an integer */
251         break;
252 
253     default:
254         result = zero;                                  /* if an unimplemented operation is requested */
255         result.precision = left.precision;              /*   then return a zero of the appropriate precision */
256         trap = trap_Unimplemented;                      /*     and trap for an Unimplemented Instruction */
257         break;
258     }                                                   /* all cases are handled */
259 
260 result_op = norm_round_pack (result);                   /* normalize, round, and pack the result */
261 
262 if (result_op.trap == trap_None)                        /* if the pack operation succeeded */
263     result_op.trap = trap;                              /*   then set any arithmetic trap returned by the operation */
264 
265 return result_op;                                       /* return the result */
266 }
267 
268 
269 
270 /* Floating-point local utility routine declarations */
271 
272 
273 /* Unpack a packed operand.
274 
275    A packed integer or floating-point value is split into separate mantissa and
276    exponent variables.  The multiple words of the mantissa are concatenated into
277    a single 64-bit unsigned value, and the exponent is shifted with recovery of
278    the sign.
279 
280    The absolute values of single and double integers are unpacked into the
281    mantissas and preshifted by 32 or 16 bits, respectively, to reduce the
282    shifting needed for normalization.  The resulting value is unnormalized, but
283    the exponent is set correctly to reflect the preshift.  The precisions for
284    unpacked integers are set to single-precision but are valid for extended- and
285    double-precision, as the unpacked representations are identical.
286 
287    The packed floating-point representation contains an implied "1" bit
288    preceding the binary point in the mantissa, except if the floating-point
289    value is zero.  The unpacked mantissa includes the implied bit.  The bias is
290    removed from the exponent, producing a signed value, and the sign of the
291    mantissa is set from the sign of the packed value.
292 
293    A packed zero value is represented by all words set to zero.  In the unpacked
294    representation, the mantissa is zero, the exponent is the minimum value
295    (-256), and the sign is always positive (as "negative zero" is undefined).
296 
297 
298    Implementation notes:
299 
300     1. Integers could have been copied directly to the mantissa with the
301        exponents set to the appropriate values (54 in this case).  However, the
302        current implementation unpacks integers only in preparation for repacking
303        as floating-point numbers i.e., to implement the "float" operator.  This
304        would require a larger number of shifts to normalize the values -- as
305        many as 54 to normalize the value 1.  Preshifting reduces the number of
306        normalizing shifts needed to between 6 and 22.
307 */
308 
unpack(FP_OPND packed)309 static FPU unpack (FP_OPND packed)
310 {
311 FPU    unpacked;
312 uint32 word;
313 
314 switch (packed.precision) {                             /* dispatch based on the operand precision */
315 
316     case in_s:                                          /* unpack a single integer */
317         word = packed.words [0];                        /*   from the first word */
318 
319         if (word & D16_SIGN) {                          /* if the value is negative */
320             word = NEG16 (word);                        /*   then make it positive */
321             unpacked.negative = TRUE;                   /*     and set the mantissa sign flag */
322             }
323 
324         else                                            /* otherwise the value is positive */
325             unpacked.negative = FALSE;                  /*   so clear the sign flag */
326 
327         unpacked.mantissa = (t_uint64) word << 32;      /* store the preshifted value as the mantissa */
328         unpacked.exponent = UNPACKED_BITS - 32;         /*   and set the exponent to account for the shift */
329         unpacked.precision = fp_f;                      /* set the precision */
330         break;
331 
332 
333     case in_d:                                                  /* unpack a double integer */
334         word = TO_DWORD (packed.words [0], packed.words [1]);   /*   from the first two words */
335 
336         if (word & D32_SIGN) {                          /* if the value is negative */
337             word = NEG32 (word);                        /*   then make it positive */
338             unpacked.negative = TRUE;                   /*     and set the mantissa sign flag */
339             }
340 
341         else                                            /* otherwise the value is positive */
342             unpacked.negative = FALSE;                  /*   so clear the sign flag */
343 
344         unpacked.mantissa = (t_uint64) word << 16;      /* store the preshifted value as the mantissa */
345         unpacked.exponent = UNPACKED_BITS - 16;         /*   and set the exponent to account for the shift */
346         unpacked.precision = fp_f;                      /* set the precision */
347         break;
348 
349 
350     case fp_f:                                              /* unpack a single-precision */
351     case fp_x:                                              /*   extended-precision */
352     case fp_e:                                              /*     or double-precision floating-point number */
353         unpacked.mantissa = MANTISSA (packed.words [0]);    /*       starting with the first word */
354 
355         for (word = 1; word <= 3; word++) {                 /* unpack from one to three more words */
356             unpacked.mantissa <<= 16;                       /* shift the accumulated value */
357 
358             if (word < TO_COUNT (packed.precision))         /* if all words are not included yet */
359                 unpacked.mantissa |= packed.words [word];   /*   then merge the next word into value */
360             }
361 
362         unpacked.exponent =                                 /* store the exponent */
363            EXPONENT (packed.words [0]) - EXPONENT_BIAS;     /*   after removing the bias */
364 
365         if (unpacked.exponent == MIN_EXPONENT               /* if the biased exponent and mantissa are zero */
366           && unpacked.mantissa == 0)                        /*   then the mantissa is positive */
367             unpacked.negative = FALSE;                      /*     regardless of the packed sign */
368 
369         else {                                                          /* otherwise the value is non-zero */
370             unpacked.mantissa |= IMPLIED_BIT;                           /*   so add back the implied "1" bit */
371             unpacked.negative = ((packed.words [0] & D16_SIGN) != 0);   /*     and set the sign as directed */
372             }
373 
374         unpacked.precision = packed.precision;          /* set the precision */
375         break;
376     }                                                   /* all cases are handled */
377 
378 return unpacked;                                        /* return the unpacked value */
379 }
380 
381 
382 /* Normalize, round, and pack an unpacked value.
383 
384    An unpacked value is normalized, rounded, and packed into the representation
385    indicated by the operand precision.  If the supplied value cannot be
386    represented, the appropriate trap indication is returned.
387 
388    A single- or double-integer is packed into the first word or two words of the
389    result as a twos-complement value.  If the value is too large for the result
390    precision, an Integer Overflow trap is indicated, and a zero value is
391    returned.
392 
393    For a real of any precision, the mantissa is first normalized by shifting
394    right if the carry bit is set, or by shifting left until the implied bit is
395    set.  The exponent is adjusted for any shifts performed.  The value is then
396    rounded by adding one-half of the least-significant bit; if that causes a
397    carry, the exponent is adjusted again.  Finally, the mantissa is masked to
398    the number of bits corresponding to the desired precision and packed into the
399    in-memory representation.  The exponent is checked, and it exceeds the
400    permitted range, the appropriate trap indication is returned.
401 
402 
403    Implementation notes:
404 
405     1. If a carry occurs due to rounding, the mantissa is not shifted because
406        the carry bit will be masked off during packing.  Incrementing the
407        exponent in this case is sufficient.
408 
409     2. Masking the mantissa is required to remove the carry and implied bits
410        before packing.  Masking the value bits in excess of the specified
411        precision is not required but is desirable to avoid implying more
412        precision than actually is present.
413 
414     3. The result value +/-1 x 2 ** -256 is considered an underflow, as the
415        packed representation is identical to the zero representation, i.e., an
416        all-zeros value.
417 */
418 
norm_round_pack(FPU unpacked)419 static FP_OPND norm_round_pack (FPU unpacked)
420 {
421 FP_OPND packed;
422 int32   integer;
423 
424 packed.precision = unpacked.precision;                  /* set the precision */
425 
426 if (unpacked.mantissa == 0) {                           /* if the mantissa is zero */
427     packed.words [0] = 0;                               /*   then set */
428     packed.words [1] = 0;                               /*     the packed */
429     packed.words [2] = 0;                               /*       representation to */
430     packed.words [3] = 0;                               /*         all zeros */
431 
432     packed.trap = trap_None;                            /* report that packing succeeded */
433     }
434 
435 else if (unpacked.precision <= in_d)                                /* if packing a single or double integer */
436     if (unpacked.exponent >= mantissa_bits [unpacked.precision]) {  /*   then if the value is too large to fit */
437         packed.words [0] = 0;                                       /*     then return */
438         packed.words [1] = 0;                                       /*       a zero value */
439         packed.trap = trap_Integer_Overflow;                        /*         and an overflow trap */
440         }
441 
442     else {                                                          /* otherwise */
443         integer = (int32)                                           /*   convert the value to an integer */
444            (unpacked.mantissa >> UNPACKED_BITS - unpacked.exponent  /*     by shifting right to align */
445            & mantissa_mask [unpacked.precision]);                   /*       and masking to the desired precision */
446 
447         if (unpacked.negative)                          /* if the value is negative */
448             integer = - integer;                        /*   then negate the result */
449 
450         packed.words [0] = UPPER_WORD (integer);        /* split the result */
451         packed.words [1] = LOWER_WORD (integer);        /*   into the first two words */
452 
453         packed.trap = trap_None;                        /* report that packing succeeded */
454         }
455 
456 else {                                                  /* otherwise a real number is to be packed */
457     if (unpacked.mantissa & CARRY_BIT) {                /* if a carry out of the MSB has occurred */
458         unpacked.mantissa >>= 1;                        /*   then shift the mantissa to normalize */
459         unpacked.exponent +=  1;                        /*     and increment the exponent to compensate */
460         }
461 
462     else                                                /* otherwise */
463         while (DENORMALIZED (unpacked.mantissa)) {      /*   while the mantissa is not in normal form */
464             unpacked.mantissa <<= 1;                    /*     shift the mantissa toward the implied-bit position */
465             unpacked.exponent -=  1;                    /*       and decrement the exponent to compensate */
466             }
467 
468 
469     unpacked.mantissa += half_lsb [unpacked.precision]; /* round the mantissa by adding one-half of the LSB */
470 
471     if (unpacked.mantissa & CARRY_BIT)                  /* if rounding caused a carry out of the MSB */
472         unpacked.exponent = unpacked.exponent + 1;      /*   then increment the exponent to compensate */
473 
474 
475     unpacked.mantissa &= mantissa_mask [unpacked.precision];    /* mask the mantissa to the specified precision */
476 
477     packed.words [0] = (HP_WORD) (unpacked.mantissa >> 48) & DV_MASK    /* pack the first word of the mantissa */
478                        | TO_EXPONENT (unpacked.exponent)                /*   with the exponent */
479                        | (unpacked.negative ? D16_SIGN : 0);            /*     and the sign bit */
480 
481     packed.words [1] = (HP_WORD) (unpacked.mantissa >> 32) & DV_MASK;   /* pack the second */
482     packed.words [2] = (HP_WORD) (unpacked.mantissa >> 16) & DV_MASK;   /*   and third */
483     packed.words [3] = (HP_WORD) (unpacked.mantissa >>  0) & DV_MASK;   /*     and fourth words */
484 
485     if (unpacked.exponent < MIN_EXPONENT                                /* if the exponent is too small */
486       || unpacked.exponent == MIN_EXPONENT && unpacked.mantissa == 0)   /*   or the result would be all zeros */
487         packed.trap = trap_Float_Underflow;                             /*     then report an underflow trap */
488 
489     else if (unpacked.exponent > MAX_EXPONENT)                          /* otherwise if the exponent is too large */
490         packed.trap = trap_Float_Overflow;                              /*   then report an overflow trap */
491 
492     else                                                                /* otherwise */
493         packed.trap = trap_None;                                        /*   report that packing succeeded */
494     }
495 
496 return packed;                                          /* return the packed value */
497 }
498 
499 
500 /* Add two unpacked numbers.
501 
502    The sum of the two operands is returned.  If one operand is zero and the
503    other is not, the non-zero operand is returned.  If both operand are zero, a
504    "defined zero" is returned in case one or both operands are "negative zeros."
505 
506    Otherwise, the difference between the operand exponents is determined.  If
507    the magnitude of the difference between the exponents is greater than the
508    number of significant bits, then the smaller number has been scaled to zero
509    (swamped), and so the sum is simply the larger operand.  However, if the sum
510    will be significant, the smaller mantissa is shifted to align with the larger
511    mantissa, and the larger exponent is used (as, after the scaling shift, the
512    smaller value has the same exponent).  Finally, if the operand signs are the
513    same, the result is the sum of the mantissas.  If the signs are different,
514    then the sum is the smaller value subtracted from the larger value, and the
515    result adopts the sign of the larger value.
516 
517 
518    Implementation notes:
519 
520     1. If the addend is zero, the microcode converts the undefined value
521        "negative zero" to the defined positive zero if it is passed as the
522        augend.  This also applies to the subtraction operator, which passes a
523        negative zero if the subtrahend is zero.
524 */
525 
add(FPU * sum,FPU augend,FPU addend)526 static TRAP_CLASS add (FPU *sum, FPU augend, FPU addend)
527 {
528 int32 magnitude;
529 
530 if (addend.mantissa == 0)                              /* if the addend is zero */
531     if (augend.mantissa == 0) {                        /*   then if the augend is also zero */
532         *sum = zero;                                   /*     then the sum is (positive) zero */
533         sum->precision = augend.precision;             /*       with the appropriate precision */
534         }
535 
536     else                                                /*   otherwise the augend is non-zero */
537         *sum = augend;                                  /*     so the sum is just the augend */
538 
539 else if (augend.mantissa == 0)                          /* otherwise if the augend is zero */
540     *sum = addend;                                      /*   then the sum is just the addend */
541 
542 else {                                                  /* otherwise both operands are non-zero */
543     magnitude = augend.exponent - addend.exponent;      /*   so determine the magnitude of the difference */
544 
545     if (abs (magnitude) > mantissa_bits [augend.precision]) /* if one of the operands is swamped */
546         if (magnitude > 0)                                  /*   then if the addend is smaller */
547             *sum = augend;                                  /*     then return the augend */
548         else                                                /*   otherwise */
549             *sum = addend;                                  /*     return the addend */
550 
551     else {                                              /* otherwise the addition is required */
552         sum->precision = addend.precision;              /*   so set the precision to that of the operands */
553 
554         if (magnitude > 0) {                            /* if the addend is smaller */
555             addend.mantissa >>= magnitude;              /*   then shift right to align the addend */
556             sum->exponent = augend.exponent;            /*     and use the augend's exponent */
557             }
558 
559         else {                                          /* otherwise the augend is smaller or the same */
560             augend.mantissa >>= - magnitude;            /*   shift right to align the augend */
561             sum->exponent = addend.exponent;            /*     and use the addend's exponent */
562             }
563 
564         if (addend.negative == augend.negative) {               /* if the mantissa signs are the same */
565             sum->mantissa = addend.mantissa + augend.mantissa;  /*   then add the mantissas */
566             sum->negative = addend.negative;                    /*     and use the addend sign for the sum */
567             }
568 
569         else if (addend.mantissa > augend.mantissa) {           /* otherwise if the addend is larger */
570             sum->mantissa = addend.mantissa - augend.mantissa;  /*   then subtract the augend */
571             sum->negative = addend.negative;                    /*     and use the addend sign */
572             }
573 
574         else {                                                  /* otherwise the augend is larger */
575             sum->mantissa = augend.mantissa - addend.mantissa;  /*   so subtract the addend */
576             sum->negative = augend.negative;                    /*     and use the augend sign */
577             }
578         }
579     }
580 
581 return trap_None;                                       /* report that the addition succeeded */
582 }
583 
584 
585 /* Subtract two unpacked numbers.
586 
587    The difference of the two operands is returned.  Subtraction is implemented
588    by negating the subtrahend and then adding the minuend.
589 
590 
591    Implementation notes:
592 
593     1. If the subtrahend is zero, negating produces the undefined "negative
594        zero."  However, the "add" routine handles this as positive zero, so we
595        do not need to worry about this condition.
596 */
597 
subtract(FPU * difference,FPU minuend,FPU subtrahend)598 static TRAP_CLASS subtract (FPU *difference, FPU minuend, FPU subtrahend)
599 {
600 subtrahend.negative = ! subtrahend.negative;            /* invert the sign of the subtrahend */
601 
602 add (difference, minuend, subtrahend);                  /* add to obtain the difference */
603 
604 return trap_None;                                       /* report that the subtraction succeeded */
605 }
606 
607 
608 /* Multiply two unpacked numbers.
609 
610    The product of the two operands is returned.  Conceptually, the
611    implementation requires a 64 x 64 = 128-bit multiply, rounded to the upper 64
612    bits.  Instead of implementing the FMPY or EMPY firmware algorithm directly,
613    which uses 16 x 16 = 32-bit partial-product multiplies, it is more efficient
614    under simulation to use 32 x 32 = 64-bit multiplications by splitting the
615    operand mantissas ("a" and "b") into 32-bit high and low ("h" and "l") parts
616    and forming the cross-products:
617 
618                       64-bit operands
619                          ah      al
620                      +-------+-------+
621                          bh      bl
622                      +-------+-------+
623                      _________________
624 
625                          al  *   bl      [ll]
626                      +-------+-------+
627                  ah  *   bl              [hl]
628              +-------+-------+
629                  al  *   bh              [lh]
630              +-------+-------+
631          ah  *   bh                      [hh]
632      +-------+-------+
633      _________________________________
634 
635        64-bit product
636      +-------+-------+
637 
638    If either operand is zero, a "defined zero" is returned in case one or both
639    operands are "negative zeros."  Otherwise, the product exponent is set to the
640    sum of the operand exponents, and the four 64-bit cross-products are formed.
641    The lower 64 bits of the products are summed to form the carry into the upper
642    64 bits, which are summed to produce the product.  The product mantissa is
643    aligned, and the product sign is set negative if the operand signs differ.
644 
645    Mantissas are represented internally as fixed-point numbers with 54 bits to
646    the right of the binary point.  That is, the real number represented is the
647    integer mantissa value * (2 ** -54), where the right-hand term represents the
648    delta for a change of one bit.  Multiplication is therefore:
649 
650      (product * delta) = (multiplicand * delta) * (multiplier * delta)
651 
652    The product is:
653 
654      product = (multiplicand * multiplier) * (delta * delta) / delta
655 
656    ...which reduces to:
657 
658      product = multiplicand * multiplier * delta
659 
660    Multiplying the product by (2 ** -54) is equivalent to right-shifting by 54.
661    However, using only the top 64 bits of the 128-bit product is equivalent to
662    right-shifting by 64, so the net correction is a left-shift by 10.
663 
664 
665    Implementation notes:
666 
667     1. 32 x 32 = 64-bit multiplies use intrinsic instructions on the IA-32
668        processor family.
669 */
670 
multiply(FPU * product,FPU multiplicand,FPU multiplier)671 static TRAP_CLASS multiply (FPU *product, FPU multiplicand, FPU multiplier)
672 {
673 uint32   ah, al, bh, bl;
674 t_uint64 hh, hl, lh, ll, carry;
675 
676 if (multiplicand.mantissa == 0 || multiplier.mantissa == 0) {   /* if either operand is zero */
677     *product = zero;                                            /*   then the product is (positive) zero */
678     product->precision = multiplicand.precision;                /*     with the appropriate precision */
679     }
680 
681 else {                                                  /* otherwise both operands are non-zero */
682     product->precision = multiplicand.precision;        /*   so set the precision to that of the operands */
683 
684     product->exponent = multiplicand.exponent           /* the product exponent */
685                           + multiplier.exponent;        /*   is the sum of the operand exponents */
686 
687     ah = (uint32) (multiplicand.mantissa >> D32_WIDTH); /* split the multiplicand */
688     al = (uint32) (multiplicand.mantissa &  D32_MASK);  /*   into high and low double-words */
689 
690     bh = (uint32) (multiplier.mantissa   >> D32_WIDTH); /* split the multiplier */
691     bl = (uint32) (multiplier.mantissa   &  D32_MASK);  /*   into high and low double-words */
692 
693     hh = ((t_uint64) ah * bh);                          /* form the */
694     hl = ((t_uint64) ah * bl);                          /*   four cross products */
695     lh = ((t_uint64) al * bh);                          /*     using 32 x 32 = 64-bit multiplies */
696     ll = ((t_uint64) al * bl);                          /*       for efficiency */
697 
698     carry = ((ll >> D32_WIDTH) + (hl & D32_MASK)        /* add the upper half of "ll" to the lower halves of "hl" */
699               + (lh & D32_MASK)) >> D32_WIDTH;          /*   and "lh" and shift to leave just the carry bit */
700 
701     product->mantissa = hh + (hl >> D32_WIDTH)          /* add "hh" to the upper halves of "hl" and "lh" */
702                           + (lh >> D32_WIDTH) + carry;  /*   and the carry bit */
703 
704     product->mantissa <<= DELTA_ALIGNMENT;              /* align the result */
705 
706     product->negative =                                 /* set the product sign negative */
707        (multiplicand.negative != multiplier.negative);  /*   if the operand signs differ */
708     }
709 
710 return trap_None;                                       /* report that the multiplication succeeded */
711 }
712 
713 
714 /* Divide two unpacked numbers.
715 
716    The quotient of the two operands is returned, and the remainder is discarded.
717    Conceptually, the implementation requires a 128 / 64 = 64-bit division, with
718    64 bits of zeros appended to the dividend to get the required precision.
719    However, instead of implementing the FDIV or EDIV firmware algorithm
720    directly, which uses 32 / 16 = 16-bit trial divisions, it is more efficient
721    under simulation to use 64 / 32 = 64-bit divisions with the classic
722    divide-and-correct method.
723 
724    This method considers the 64-bit dividend and divisor each to consist of two
725    32-bit "digits."  The 64-bit dividend "ah,al" is divided by the first 32-bit
726    digit "bh" of the 64-bit divisor "bh,bl", yielding a 64-bit trial quotient
727    and a 64-bit remainder.  A correction is developed by subtracting the product
728    of the second 32-bit digit "bl" of the divisor and the trial quotient from
729    the remainder.  If the remainder is negative, the trial quotient is too
730    large, so it is decremented, and the (full 64-bit) divisor is added to the
731    correction.  This is repeated until the correction is non-negative,
732    indicating that the first quotient digit is correct.  The process is then
733    repeated using the corrected remainder as the dividend to develop the second
734    64-bit trial quotient and second quotient digit.  The first quotient digit is
735    positioned, and the two quotient digits are then added to produce the final
736    64-bit quotient.  The quotient mantissa is aligned, and the quotient sign is
737    set negative if the operand signs differ.
738 
739    Mantissas are represented internally as fixed-point numbers with 54 bits to
740    the right of the binary point.  That is, the real number represented is the
741    integer mantissa value * (2 ** -54), where the right-hand term represents the
742    delta for a change of one bit.  Division is therefore:
743 
744      (quotient * delta) = (dividend * delta) / (divisor * delta)
745 
746    The quotient is:
747 
748      quotient = (dividend / divisor) * (delta / (delta * delta))
749 
750    ...which reduces to:
751 
752      quotient = (dividend / divisor) / delta
753 
754    Dividing the quotient by (2 ** -54) is equivalent to left-shifting by 54.
755    However, using only the top 64 bits of the 128-bit product is equivalent to
756    right-shifting by 64, so the net correction is a right-shift by 10.
757 
758    See "Divide-and-Correct Methods for Multiple Precision Division" by Marvin L.
759    Stein, Communications of the ACM, August 1964 for background.
760 
761 
762    Implementation notes:
763 
764     1. IA-32 processors do not have a 64 / 32 = 64-bit divide instruction (they
765        have a 64 / 32 = 32 instruction instead).  Therefore, a run-time library
766        routine for 64 / 64 = 64 is generated.  Consequently, "bh" and "bl" are
767        declared as 64-bit variables, as this produces simpler code than if they
768        were declared as 32-bit variables.
769 
770     2. "bh" is guaranteed to be non-zero because the divisor mantissa is
771        normalized on entry.  Therefore, no division-by-zero check is needed.
772 
773     3. The quotient alignment shift logically expresses ((q1 << 32) + q2) >> 10,
774        but it must be implemented as (q1 << 22) + (q2 >> 10) as otherwise the
775        left-shift would lose significant bits.
776 */
777 
divide(FPU * quotient,FPU dividend,FPU divisor)778 static TRAP_CLASS divide (FPU *quotient, FPU dividend, FPU divisor)
779 {
780 t_uint64 bh, bl, q1, q2, r1, r2;
781 t_int64  c1, c2;
782 
783 if (divisor.mantissa == 0) {                            /* if the divisor is zero */
784     *quotient = dividend;                               /*   then return the dividend */
785     return trap_Float_Zero_Divide;                      /*     and report the error */
786     }
787 
788 else if (dividend.mantissa == 0) {                      /* otherwise if the dividend is zero */
789     *quotient = zero;                                   /*   then the quotient is (positive) zero */
790     quotient->precision = dividend.precision;           /*     with the appropriate precision */
791     }
792 
793 else {                                                  /* otherwise both operands are non-zero */
794     quotient->precision = dividend.precision;           /*   so set the precision to that of the operands */
795 
796     quotient->exponent = dividend.exponent              /* the quotient exponent */
797                           - divisor.exponent;           /*   is the difference of the operand exponents */
798 
799     bh = divisor.mantissa >> D32_WIDTH;                 /* split the divisor */
800     bl = divisor.mantissa &  D32_MASK;                  /*   into high and low halves */
801 
802     q1 = dividend.mantissa / bh;                        /* form the first trial quotient */
803     r1 = dividend.mantissa % bh;                        /*   and remainder */
804 
805     c1 = r1 - bl * q1;                                  /* form the first corrected remainder */
806 
807     while (c1 < 0) {                                    /* while a correction is required */
808         q1 = q1 - 1;                                    /*   the first trial quotient is too large */
809         c1 = c1 + divisor.mantissa;                     /*     so reduce it and increase the remainder */
810         }
811 
812     q2 = c1 / bh;                                       /* form the second trial quotient */
813     r2 = c1 % bh;                                       /*   and remainder */
814 
815     c2 = r2 - bl * q2;                                  /* form the second corrected remainder */
816 
817     while (c2 < 0) {                                    /* while a correction is required */
818         q2 = q2 - 1;                                    /*   the second trial quotient is too large */
819         c2 = c2 + divisor.mantissa;                     /*     so reduce it and increase the remainder */
820         }
821 
822     quotient->mantissa = (q1 << D32_WIDTH - DELTA_ALIGNMENT)    /* sum the quotient digits */
823                            + (q2 >> DELTA_ALIGNMENT);           /*   and align the result */
824 
825     quotient->negative =                                /* set the quotient sign negative */
826        (dividend.negative != divisor.negative);         /*   if the operand signs differ */
827     }
828 
829 return trap_None;                                       /* report that the division succeeded */
830 }
831 
832 
833 /* Float an integer to a floating-point value.
834 
835    The integer operand is converted to a floating-point value and returned.  The
836    desired precision of the result must be set before calling.
837 
838    Conversion is simply a matter of copying the integer value.  When the
839    unpacked value is normalized, it will be converted to floating-point format.
840 
841 
842    Implementation notes:
843 
844     1. The incoming integer has already been unpacked into fp_f format, so we do
845        not need to set the precision here.
846 */
847 
ffloat(FPU * real,FPU integer)848 static TRAP_CLASS ffloat (FPU *real, FPU integer)
849 {
850 *real = integer;                                        /* copy the unpacked value */
851 return trap_None;                                       /* report that the float succeeded */
852 }
853 
854 
855 /* Fix an unpacked floating-point value to an integer.
856 
857    A floating-point value is converted to a double-word integer.  If the "round"
858    parameter is true, the value is rounded before conversion; otherwise, it is
859    truncated.
860 
861    If the real value is less than 0.5, then the integer value is zero.
862    Otherwise, if rounding is requested, add 0.5 (created by shifting a "1" into
863    the position immediately to the right of the least significant bit of the
864    integer result) to the value.  Finally, the result precision is set.  The
865    remaining conversion occurs when the result is packed.
866 
867 
868    Implementation notes:
869 
870     1. The FIXR/FIXT microcode gives an Integer Overflow for exponent > 30, even
871        though -2 ** 31 (143700 000000) does fit in the result.
872 */
873 
fix(FPU * integer,FPU real,t_bool round)874 static TRAP_CLASS fix (FPU *integer, FPU real, t_bool round)
875 {
876 if (real.exponent < -1)                                 /* if the real value is < 0.5 */
877     integer->mantissa = 0;                              /*   then the integer value is 0 */
878 
879 else {                                                  /* otherwise the value is convertible */
880     integer->mantissa = real.mantissa;                  /*   so set the mantissa */
881 
882     if (round && real.exponent < UNPACKED_BITS)         /* if rounding is requested and the value won't overflow */
883         integer->mantissa +=                            /*   then add one-half of the LSB to the value */
884            (t_uint64) 1 << (UNPACKED_BITS - real.exponent - 1);
885     }
886 
887 integer->exponent = real.exponent;                      /* copy the exponent */
888 integer->negative = real.negative;                      /*   and sign */
889 integer->precision = in_d;                              /*     and set to pack to a double integer */
890 
891 return trap_None;                                       /* report that the fix succeeded */
892 }
893