1 /* hp2100_fp1.c: HP 1000 multiple-precision floating point routines
2
3 Copyright (c) 2005-2012, J. David Bryan
4
5 Permission is hereby granted, free of charge, to any person obtaining a
6 copy of this software and associated documentation files (the "Software"),
7 to deal in the Software without restriction, including without limitation
8 the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 and/or sell copies of the Software, and to permit persons to whom the
10 Software is furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22 Except as contained in this notice, the name of the author shall not be used
23 in advertising or otherwise to promote the sale, use or other dealings in
24 this Software without prior written authorization from the author.
25
26 06-Feb-12 JDB Added missing precision on constant "one" in fp_trun
27 21-Jun-11 JDB Completed the comments for divide; no code changes
28 08-Jun-08 JDB Quieted bogus gcc warning in fp_exec
29 10-May-08 JDB Fixed uninitialized return in fp_accum when setting
30 19-Mar-08 JDB Reworked "complement" to avoid inlining bug in gcc-4.x
31 01-Dec-06 JDB Reworked into generalized multiple-precision ops for FPP
32 12-Oct-06 JDB Altered x_trun for F-Series FFP compatibility
33 Added F-Series ..TCM FFP helpers
34
35 Primary references:
36 - HP 1000 M/E/F-Series Computers Engineering and Reference Documentation
37 (92851-90001, Mar-1981)
38 - HP 1000 M/E/F-Series Computers Technical Reference Handbook
39 (5955-0282, Mar-1980)
40 - DOS/RTE Relocatable Library Reference Manual
41 (24998-90001, Oct-1981)
42
43
44 This module implements multiple-precision floating-point operations to
45 support the 1000 F-Series hardware Floating Point Processor. It employs
46 64-bit integer arithmetic for speed and simplicity of implementation. The
47 host compiler must support 64-bit integers, and the HAVE_INT64 symbol must be
48 defined during compilation. If this symbol is not defined, then FPP support
49 is not available.
50
51 HP 2100/1000 computers used a proprietary floating-point format. The 2100
52 had optional firmware that provided two-word floating-point add, subtract,
53 multiply, and divide, as well as single-integer fix and float. The 1000-M/E
54 provided the same two-word firmware operations as standard equipment.
55 Three-word extended-precision instructions for the 2100 and 1000-M/E were
56 provided by the optional Fast FORTRAN Processor firmware.
57
58 The 1000-F substituted a hardware floating point processor for the firmware
59 in previous machines. In addition to the two- and three-word formats, the
60 F-Series introduced a four-word double-precision format. A five-word format
61 that provided extra range in the exponent by unpacking it from the mantissa
62 was also provided, although this capability was not documented in the user
63 manual. In addition, the FPP improved the accuracy of floating-point
64 calculations, as the firmware versions sacrificed a few bits of precision to
65 gain speed. Consequently, operations on the F-Series may return results that
66 differ slightly from the same operations on the M/E-Series or the 2100.
67
68 F-Series units after date code 1920 also provided two-word double-integer
69 instructions in firmware, as well as double-integer fix and float operations.
70
71 The original 32-bit floating-point format is as follows:
72
73 15 14 0
74 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
75 |MS| mantissa high | : M
76 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
77 | mantissa low | exponent |XS| : M + 1
78 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
79 15 8 7 1 0
80
81 Both 23-bit mantissa and 7-bit exponent are in twos-complement form. The
82 exponent sign bit has been rotated into the LSB of the second word.
83
84 The extended-precision floating-point format is a 48-bit extension of the
85 32-bit format used for single precision. A packed extended-precision value
86 consists of a 39-bit mantissa and a 7-bit exponent. The format is as
87 follows:
88
89 15 14 0
90 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
91 |MS| mantissa high | : M
92 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
93 | mantissa middle | : M + 1
94 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
95 | mantissa low | exponent |XS| : M + 2
96 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
97 15 8 7 1 0
98
99 The double-precision floating-point format is similar to the 48-bit
100 extended-precision format, although with a 55-bit mantissa:
101
102 15 14 0
103 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
104 |MS| mantissa high | : M
105 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
106 | mantissa middle high | : M + 1
107 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
108 | mantissa middle low | : M + 2
109 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
110 | mantissa low | exponent |XS| : M + 3
111 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
112 15 8 7 1 0
113
114 The FPP also supports a special five-word expanded-exponent format:
115
116 15 14 0
117 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
118 |MS| mantissa high | : M
119 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
120 | mantissa middle high | : M + 1
121 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
122 | mantissa middle low | : M + 2
123 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
124 | mantissa low | : M + 3
125 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
126 | exponent |XS| : M + 4
127 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
128 15 8 7 1 0
129
130 The exponent is a full 16-bit twos-complement value, but the allowed range is
131 only 10 bits, i.e., -512 to +511.
132
133 In a normalized value, the sign and MSB of the mantissa differ. Zero is
134 represented by all words = 0.
135
136 Internally, unpacked floating-point values are contained in a structure
137 having a signed 64-bit mantissa and a signed 32-bit exponent. Mantissas are
138 left-justified with the unused bits masked to zero. Exponents are
139 right-justified. The precision is indicated by the value of a structure
140 field.
141
142 HP terminology for the three-word floating-point format is confused. Some
143 documents refer to it as "double precision," while others use "extended
144 precision." The instruction mnemonics begin with "X" (e.g., .XADD),
145 suggesting the extended-precision term.
146
147 HP apparently intended that the four-word double-precision format would be
148 called "triple-precision," as the instruction mnemonics begin with "T" (e.g.,
149 ".TADD" for the four-word add instruction). The source files for the
150 software simulations of these instructions for the M/E-Series also explicitly
151 refer to "triple precision math." However, the engineering documentation and
152 the F-Series reference manual both use the double-precision term.
153
154 This module adopts the single/extended/double terminology and uses the
155 initial letters of the instructions (F/X/T) to indicate the precision used.
156
157 The FPP hardware consisted of two circuit boards that interfaced to the main
158 CPU via the Microprogrammable Processor Port (MPP) that had been introduced
159 with the 1000 E-Series. One board contained argument registers and ALUs,
160 split into separate mantissa and exponent parts. The other contained a state
161 machine sequencer. FPP results were copied automatically to the argument
162 registers in addition to being available over the MPP, so that chained
163 operations could be executed from these "accumulators" without reloading.
164
165 The FPP operated independently of the CPU. An opcode, specifying one of the
166 six operations (add, subtract, multiply, divide, fix, or float) was sent to
167 the FPP, and a start command was given. Operands of appropriate precision
168 were then supplied to the FPP. Once the operands were received, the FPP
169 would execute and set a flag when the operation was complete. The result
170 would then be retrieved from the FPP. The floating-point instruction
171 firmware in the CPU initiated the desired FPP operation and handled operand
172 reads from and result writes to main memory.
173
174 Under simulation, "fp_exec" provides the six arithmetic operations analogous
175 to FPP execution. The remainder of the functions are helpers that were
176 provided by firmware in the 1000-F but that can reuse code needed to simulate
177 the FPP hardware. As with the hardware, "fp_exec" retains the last result
178 in an internal accumulator that may be referenced in subsequent operations.
179
180 NOTE: this module also provides the floating-point support for the firmware
181 single-precision 1000-M/E base set and extended-precision FFP instructions.
182 Because the firmware and hardware implementations returned slightly different
183 results, particularly with respect to round-off, conditional checks are
184 implemented in the arithmetic routines. In some cases, entirely different
185 algorithms are used to ensure fidelity with the real machines. Functionally,
186 this means that the 2100/1000-M/E and 1000-F floating-point diagnostics are
187 not interchangeable, and failures are to be expected if a diagnostic is run
188 on the wrong machine.
189 */
190
191 #include "hp2100_defs.h"
192 #include "hp2100_cpu.h"
193 #include "hp2100_cpu1.h"
194 #include "hp2100_fp1.h"
195
196
197 #if defined (HAVE_INT64) /* we need int64 support */
198
199 /* Field widths. */
200
201 #define IN_W_SIGN 1
202 #define IN_W_SMAGN 15
203 #define IN_W_DMAGN 31
204
205 #define FP_W_MSIGN 1
206 #define FP_W_FMANT 23
207 #define FP_W_XMANT 39
208 #define FP_W_TMANT 55
209 #define FP_W_EMANT 55
210 #define FP_W_EXPANDEXP 9
211 #define FP_W_EXP 7
212 #define FP_W_ESIGN 1
213
214 /* Starting bit numbers. */
215
216 #define IN_V_SIGN (64 - IN_W_SIGN)
217 #define IN_V_SNUM (64 - IN_W_SIGN - IN_W_SMAGN)
218 #define IN_V_DNUM (64 - IN_W_SIGN - IN_W_DMAGN)
219
220 #define FP_V_FNUM (64 - FP_W_MSIGN - FP_W_FMANT - FP_W_EXP - FP_W_ESIGN)
221 #define FP_V_XNUM (64 - FP_W_MSIGN - FP_W_XMANT - FP_W_EXP - FP_W_ESIGN)
222 #define FP_V_TNUM (64 - FP_W_MSIGN - FP_W_TMANT - FP_W_EXP - FP_W_ESIGN)
223 #define FP_V_ENUM (64 - FP_W_MSIGN - FP_W_EMANT - FP_W_EXP - FP_W_ESIGN)
224
225 #define FP_V_MSIGN (64 - FP_W_MSIGN)
226 #define FP_V_FMANT (64 - FP_W_MSIGN - FP_W_FMANT)
227 #define FP_V_XMANT (64 - FP_W_MSIGN - FP_W_XMANT)
228 #define FP_V_TMANT (64 - FP_W_MSIGN - FP_W_TMANT)
229 #define FP_V_EMANT (64 - FP_W_MSIGN - FP_W_EMANT)
230 #define FP_V_EXP 1
231 #define FP_V_ESIGN 0
232
233 /* Right-aligned field masks. */
234
235 #define IN_M_SIGN (((t_uint64) 1 << IN_W_SIGN) - 1)
236 #define IN_M_SMAGN (((t_uint64) 1 << IN_W_SMAGN) - 1)
237 #define IN_M_DMAGN (((t_uint64) 1 << IN_W_DMAGN) - 1)
238
239 #define FP_M_MSIGN (((t_uint64) 1 << FP_W_MSIGN) - 1)
240 #define FP_M_FMANT (((t_uint64) 1 << FP_W_FMANT) - 1)
241 #define FP_M_XMANT (((t_uint64) 1 << FP_W_XMANT) - 1)
242 #define FP_M_TMANT (((t_uint64) 1 << FP_W_TMANT) - 1)
243 #define FP_M_EMANT (((t_uint64) 1 << FP_W_EMANT) - 1)
244
245 #define FP_M_EXPANDEXP ((1 << FP_W_EXPANDEXP) - 1)
246 #define FP_M_EXP ((1 << FP_W_EXP) - 1)
247 #define FP_M_ESIGN ((1 << FP_W_ESIGN) - 1)
248
249 /* In-place field masks. */
250
251 #define IN_SIGN (IN_M_SIGN << IN_V_SIGN)
252 #define IN_SMAGN (IN_M_SMAGN << IN_V_SNUM)
253 #define IN_DMAGN (IN_M_DMAGN << IN_V_DNUM)
254
255 #define FP_MSIGN (FP_M_MSIGN << FP_V_MSIGN)
256 #define FP_FMANT (FP_M_FMANT << FP_V_FMANT)
257 #define FP_XMANT (FP_M_XMANT << FP_V_XMANT)
258 #define FP_TMANT (FP_M_TMANT << FP_V_TMANT)
259 #define FP_EMANT (FP_M_EMANT << FP_V_EMANT)
260 #define FP_EXP (FP_M_EXP << FP_V_EXP)
261 #define FP_ESIGN (FP_M_ESIGN << FP_V_ESIGN)
262
263 /* In-place record masks. */
264
265 #define IN_SSMAGN (IN_SIGN | IN_SMAGN)
266 #define IN_SDMAGN (IN_SIGN | IN_DMAGN)
267
268 #define FP_SFMANT (FP_MSIGN | FP_FMANT)
269 #define FP_SXMANT (FP_MSIGN | FP_XMANT)
270 #define FP_STMANT (FP_MSIGN | FP_TMANT)
271 #define FP_SEMANT (FP_MSIGN | FP_EMANT)
272 #define FP_SEXP (FP_ESIGN | FP_EXP)
273
274 /* Minima and maxima. */
275
276 #define FP_ONEHALF ((t_int64) 1 << (FP_V_MSIGN - 1)) /* mantissa = 0.5 */
277 #define FP_MAXPMANT ((t_int64) FP_EMANT) /* maximum pos mantissa */
278 #define FP_MAXNMANT ((t_int64) FP_MSIGN) /* maximum neg mantissa */
279 #define FP_MAXPEXP (FP_M_EXPANDEXP) /* maximum pos expanded exponent */
280 #define FP_MAXNEXP (-(FP_MAXPEXP + 1)) /* maximum neg expanded exponent */
281
282 /* Floating-point helpers. */
283
284 #define DENORM(x) ((((x) ^ (x) << 1) & FP_MSIGN) == 0)
285
286 #define TO_EXP(e) (int8) ((e >> FP_V_EXP & FP_M_EXP) | \
287 (e & FP_M_ESIGN ? ~FP_M_EXP : 0))
288
289 /* Property constants. */
290
291 static const t_int64 p_half_lsb[6] = { ((t_int64) 1 << IN_V_SNUM) - 1, /* different than FP! */
292 ((t_int64) 1 << IN_V_DNUM) - 1, /* different than FP! */
293 (t_int64) 1 << (FP_V_FMANT - 1),
294 (t_int64) 1 << (FP_V_XMANT - 1),
295 (t_int64) 1 << (FP_V_TMANT - 1),
296 (t_int64) 1 << (FP_V_EMANT - 1) };
297
298 static const t_int64 n_half_lsb[6] = { 0,
299 0,
300 ((t_int64) 1 << (FP_V_FMANT - 1)) - 1,
301 ((t_int64) 1 << (FP_V_XMANT - 1)) - 1,
302 ((t_int64) 1 << (FP_V_TMANT - 1)) - 1,
303 ((t_int64) 1 << (FP_V_EMANT - 1)) - 1 };
304
305 static const uint32 op_start[6] = { IN_V_SNUM,
306 IN_V_DNUM,
307 FP_V_FMANT,
308 FP_V_XMANT,
309 FP_V_TMANT,
310 FP_V_EMANT };
311
312 static const t_int64 mant_mask[6] = { IN_SSMAGN,
313 IN_SDMAGN,
314 FP_SFMANT,
315 FP_SXMANT,
316 FP_STMANT,
317 FP_SEMANT };
318
319 static const uint32 op_bits[6] = { IN_W_SMAGN,
320 IN_W_DMAGN,
321 FP_W_FMANT + FP_W_MSIGN,
322 FP_W_XMANT + FP_W_MSIGN,
323 FP_W_TMANT + FP_W_MSIGN,
324 FP_W_EMANT + FP_W_MSIGN };
325
326 static const t_int64 op_mask[6] = { ~(t_int64) 0 << IN_V_SNUM,
327 ~(t_int64) 0 << IN_V_DNUM,
328 ~(t_int64) 0 << FP_V_FNUM,
329 ~(t_int64) 0 << FP_V_XNUM,
330 ~(t_int64) 0 << FP_V_TNUM,
331 ~(t_int64) 0 << FP_V_ENUM };
332
333 static const uint32 int_p_max[2] = { IN_M_SMAGN,
334 IN_M_DMAGN };
335
336
337 /* Internal unpacked floating-point representation. */
338
339 typedef struct {
340 t_int64 mantissa;
341 int32 exponent;
342 OPSIZE precision;
343 } FPU;
344
345
346
347 /* Low-level helper routines. */
348
349
350 /* Arithmetic shift right for mantissa only.
351
352 Returns TRUE if any one-bits are shifted out (for F-series only).
353 */
354
asr(FPU * operand,int32 shift)355 static t_bool asr (FPU *operand, int32 shift)
356 {
357 t_uint64 mask;
358 t_bool bits_lost;
359
360 if (UNIT_CPU_MODEL == UNIT_1000_F) { /* F-series? */
361 mask = ((t_uint64) 1 << shift) - 1; /* mask for lost bits */
362 bits_lost = ((operand->mantissa & mask) != 0); /* flag if any lost */
363 }
364 else
365 bits_lost = FALSE;
366
367 operand->mantissa = operand->mantissa >> shift; /* mantissa is int, so ASR */
368 return bits_lost;
369 }
370
371
372 /* Logical shift right for mantissa and exponent.
373
374 Shifts mantissa and corrects exponent for mantissa overflow.
375 Returns TRUE if any one-bits are shifted out (for F-series only).
376 */
377
lsrx(FPU * operand,int32 shift)378 static t_bool lsrx (FPU *operand, int32 shift)
379 {
380 t_uint64 mask;
381 t_bool bits_lost;
382
383 if (UNIT_CPU_MODEL == UNIT_1000_F) { /* F-series? */
384 mask = ((t_uint64) 1 << shift) - 1; /* mask for lost bits */
385 bits_lost = ((operand->mantissa & mask) != 0); /* flag if any lost */
386 }
387 else
388 bits_lost = FALSE;
389
390 operand->mantissa = (t_uint64) operand->mantissa >> shift; /* uint, so LSR */
391 operand->exponent = operand->exponent + shift; /* correct exponent */
392 return bits_lost;
393 }
394
395
396 /* Unpack an operand into a long integer.
397
398 Returns a left-aligned integer or mantissa. Does not mask to precision; this
399 should be done subsequently if desired.
400 */
401
unpack_int(OP packed,OPSIZE precision)402 static t_int64 unpack_int (OP packed, OPSIZE precision)
403 {
404 uint32 i;
405 t_uint64 unpacked = 0;
406
407 if (precision == in_s)
408 unpacked = (t_uint64) packed.word << 48; /* unpack single integer */
409
410 else if (precision == in_d)
411 unpacked = (t_uint64) packed.dword << 32; /* unpack double integer */
412
413 else {
414 if (precision == fp_e) /* five word operand? */
415 precision = fp_t; /* only four mantissa words */
416
417 for (i = 0; i < 4; i++) /* unpack fp 2 to 4 words */
418 if (i < TO_COUNT (precision))
419 unpacked = unpacked << 16 | packed.fpk[i];
420 else
421 unpacked = unpacked << 16;
422 }
423
424 return (t_int64) unpacked;
425 }
426
427
428 /* Unpack a packed operand.
429
430 The packed value is split into separate mantissa and exponent variables. The
431 multiple words of the mantissa are concatenated into a single 64-bit signed
432 value, and the exponent is shifted with recovery of the sign.
433 */
434
unpack(OP packed,OPSIZE precision)435 static FPU unpack (OP packed, OPSIZE precision)
436 {
437 FPU unpacked;
438
439 unpacked.precision = precision; /* set value's precision */
440
441 unpacked.mantissa = /* unpack and mask mantissa */
442 unpack_int (packed, precision) & mant_mask[precision];
443
444 switch (precision) {
445
446 case fp_f:
447 case fp_x:
448 case fp_t:
449 unpacked.exponent = /* unpack exponent from correct word */
450 TO_EXP (packed.fpk[(uint32) precision - 1]);
451 break;
452
453 case fp_e:
454 unpacked.exponent = /* unpack expanded exponent */
455 (int16) (packed.fpk[4] >> FP_V_EXP | /* rotate sign into place */
456 (packed.fpk[4] & 1 ? SIGN : 0));
457 break;
458
459 case fp_a: /* no action for value in accum */
460 case in_s: /* integers don't use exponent */
461 case in_d: /* integers don't use exponent */
462 default:
463 unpacked.exponent = 0;
464 break;
465 }
466
467 return unpacked;
468 }
469
470
471 /* Pack a long integer into an operand. */
472
pack_int(t_int64 unpacked,OPSIZE precision)473 static OP pack_int (t_int64 unpacked, OPSIZE precision)
474 {
475 int32 i;
476 OP packed;
477
478 if (precision == in_s)
479 packed.word = (uint16) (unpacked >> 48) & DMASK; /* pack single integer */
480
481 else if (precision == in_d)
482 packed.dword = (uint32) (unpacked >> 32) & DMASK32; /* pack double integer */
483
484 else {
485 if (precision == fp_e) /* five word operand? */
486 precision = fp_t; /* only four mantissa words */
487
488 for (i = 3; i >= 0; i--) { /* pack fp 2 to 4 words */
489 packed.fpk[i] = (uint16) unpacked & DMASK;
490 unpacked = unpacked >> 16;
491 }
492 }
493
494 return packed;
495 }
496
497
498 /* Pack an unpacked floating-point number.
499
500 The 64-bit mantissa is split into the appropriate number of 16-bit words.
501 The exponent is rotated to incorporate the sign bit and merged into the
502 appropriate word.
503 */
504
pack(FPU unpacked)505 static OP pack (FPU unpacked)
506 {
507 OP packed;
508 uint8 exp;
509
510 packed = pack_int (unpacked.mantissa, unpacked.precision); /* pack mantissa */
511
512 exp = ((uint8) unpacked.exponent << FP_V_EXP) | /* rotate exponent */
513 ((unpacked.exponent < 0) << FP_V_ESIGN);
514
515 switch (unpacked.precision) { /* merge exponent into correct word */
516
517 case in_s: /* no action for integers */
518 case in_d:
519 break;
520
521 case fp_f: /* merge into last word */
522 case fp_x:
523 case fp_t:
524 packed.fpk[(uint32) unpacked.precision - 1] =
525 (packed.fpk[(uint32) unpacked.precision - 1] & ~FP_SEXP) | exp;
526 break;
527
528 case fp_e: /* place in separate word */
529 packed.fpk[4] = ((uint16) unpacked.exponent << FP_V_EXP) |
530 ((unpacked.exponent < 0) << FP_V_ESIGN);
531 break;
532
533 case fp_a: /* no action for value in accum */
534 break;
535 }
536
537 return packed;
538 }
539
540
541 /* Normalize an unpacked floating-point number.
542
543 Floating-point numbers are in normal form if the sign bit and the MSB of the
544 mantissa differ. Unnormalized numbers are shifted as needed with appropriate
545 exponent modification.
546 */
547
normalize(FPU * unpacked)548 static void normalize (FPU *unpacked)
549 {
550
551 if (unpacked->mantissa) /* non-zero? */
552 while (DENORM (unpacked->mantissa)) { /* normal form? */
553 unpacked->exponent = unpacked->exponent - 1; /* no, so left shift */
554 unpacked->mantissa = unpacked->mantissa << 1;
555 }
556 else
557 unpacked->exponent = 0; /* clean for zero */
558 return;
559 }
560
561
562 /* Round an unpacked floating-point number and check for overflow.
563
564 An unpacked floating-point number is rounded by adding one-half of the LSB
565 value, maintaining symmetry around zero. If rounding resulted in a mantissa
566 overflow, the result logically is shifted to the right with an appropriate
567 exponent modification. Finally, the result is checked for exponent underflow
568 or overflow, and the appropriate approximation (zero or infinity) is
569 returned.
570
571 Rounding in hardware involves a special mantissa extension register that
572 holds three "guard" bits and one "sticky" bit. These represent the value of
573 bits right-shifted out the mantissa register. Under simulation, we track
574 such right-shifts and utilize the lower eight bits of the 64-bit mantissa
575 value to simulate the extension register.
576
577 Overflow depends on whether the FPP expanded-exponent form is being used
578 (this expands the exponent range by two bits). If overflow is detected, the
579 value representing infinity is dependent on whether the operation is on
580 behalf of the Fast FORTRAN Processor. The F-Series FPP returns positive
581 infinity on both positive and negative overflow for all precisions. The 2100
582 and M/E-Series FFPs return negative infinity on negative overflow of
583 extended-precision values. Single-precision overflows on these machines
584 always return positive infinity.
585
586 The number to be rounded must be normalized upon entry.
587 */
588
roundovf(FPU * unpacked,t_bool expand)589 static uint32 roundovf (FPU *unpacked, t_bool expand)
590 {
591 uint32 overflow;
592 t_bool sign;
593
594 sign = (unpacked->mantissa < 0); /* save mantissa sign */
595
596 if (sign) /* round and mask the number */
597 unpacked->mantissa =
598 (unpacked->mantissa + n_half_lsb[unpacked->precision]) &
599 mant_mask[unpacked->precision];
600 else
601 unpacked->mantissa =
602 (unpacked->mantissa + p_half_lsb[unpacked->precision]) &
603 mant_mask[unpacked->precision];
604
605 if (sign != (unpacked->mantissa < 0)) /* mantissa overflow? */
606 lsrx (unpacked, 1); /* correct by shifting */
607 else
608 normalize (unpacked); /* renorm may be needed */
609
610 if (unpacked->mantissa == 0) { /* result zero? */
611 unpacked->mantissa = 0; /* return zero */
612 unpacked->exponent = 0;
613 overflow = 0; /* with overflow clear */
614 }
615 else if (unpacked->exponent < /* result underflow? */
616 (FP_MAXNEXP >> (expand ? 0 : 2))) {
617 unpacked->mantissa = 0; /* return zero */
618 unpacked->exponent = 0;
619 overflow = 1; /* and set overflow */
620 }
621 else if (unpacked->exponent > /* result overflow? */
622 (FP_MAXPEXP >> (expand ? 0 : 2))) {
623 if (sign && /* negative value? */
624 (unpacked->precision == fp_x) && /* extended precision? */
625 (UNIT_CPU_MODEL != UNIT_1000_F)) { /* not F-series? */
626 unpacked->mantissa = FP_MAXNMANT; /* return negative infinity */
627 unpacked->exponent = FP_MAXPEXP & FP_M_EXP;
628 }
629 else {
630 unpacked->mantissa = FP_MAXPMANT; /* return positive infinity */
631 unpacked->exponent = FP_MAXPEXP & FP_M_EXP;
632 }
633 overflow = 1; /* and set overflow */
634 }
635 else
636 overflow = 0; /* value is in range */
637
638 return overflow;
639 }
640
641
642 /* Normalize, round, and pack an unpacked floating-point number. */
643
nrpack(OP * packed,FPU unpacked,t_bool expand)644 static uint32 nrpack (OP *packed, FPU unpacked, t_bool expand)
645 {
646 uint32 overflow;
647
648 normalize (&unpacked); /* normalize for rounding */
649 overflow = roundovf (&unpacked, expand); /* round and check for overflow */
650 *packed = pack (unpacked); /* pack result */
651
652 return overflow;
653 }
654
655
656
657 /* Low-level arithmetic routines. */
658
659
660 /* Complement an unpacked number. */
661
complement(FPU * result)662 static void complement (FPU *result)
663 {
664 if (result->mantissa == FP_MAXNMANT) { /* maximum negative? */
665 result->mantissa = FP_ONEHALF; /* complement of -1.0 * 2 ^ n */
666 result->exponent = result->exponent + 1; /* is 0.5 * 2 ^ (n + 1) */
667 }
668 else
669 result->mantissa = -result->mantissa; /* negate mantissa */
670 return;
671 }
672
673
674 /* Add two unpacked numbers.
675
676 The mantissas are first aligned if necessary by scaling the smaller of the
677 two operands. If the magnitude of the difference between the exponents is
678 greater than the number of significant bits, then the smaller number has been
679 scaled to zero (swamped), and so the sum is simply the larger operand.
680 Otherwise, the sum is computed and checked for overflow, which has occurred
681 if the signs of the operands are the same but differ from that of the result.
682 Scaling and renormalization is performed if overflow occurred.
683 */
684
add(FPU * sum,FPU augend,FPU addend)685 static void add (FPU *sum, FPU augend, FPU addend)
686 {
687 int32 magn;
688 t_bool bits_lost;
689
690 if (augend.mantissa == 0)
691 *sum = addend; /* X + 0 = X */
692
693 else if (addend.mantissa == 0)
694 *sum = augend; /* 0 + X = X */
695
696 else {
697 magn = augend.exponent - addend.exponent; /* difference exponents */
698
699 if (magn > 0) { /* addend smaller? */
700 *sum = augend; /* preset augend */
701 bits_lost = asr (&addend, magn); /* align addend */
702 }
703 else { /* augend smaller? */
704 *sum = addend; /* preset addend */
705 magn = -magn; /* make difference positive */
706 bits_lost = asr (&augend, magn); /* align augend */
707 }
708
709 if (magn <= (int32) op_bits[augend.precision]) { /* value swamped? */
710 sum->mantissa = /* no, add mantissas */
711 addend.mantissa + augend.mantissa;
712
713 if (((addend.mantissa < 0) == (augend.mantissa < 0)) && /* mantissa overflow? */
714 ((addend.mantissa < 0) != (sum->mantissa < 0))) {
715 bits_lost = bits_lost | lsrx (sum, 1); /* restore value */
716 sum->mantissa = /* restore sign */
717 sum-> mantissa | (addend.mantissa & FP_MSIGN);
718 }
719
720 if (bits_lost) /* any bits lost? */
721 sum->mantissa = sum->mantissa | 1; /* include one for rounding */
722 }
723 }
724 return;
725 }
726
727
728 /* Multiply two unpacked numbers.
729
730 The single-precision firmware (FMP) operates differently from the firmware
731 extended-precision (.XMPY) and the hardware multiplies of any precision.
732 Firmware implementations use the MPY micro-order to form 16-bit x 16-bit =
733 32-bit partial products and sum them to form the result. The hardware uses a
734 series of shifts and adds. This means that firmware FMP and hardware FMP
735 return slightly different values, as may be seen by attempting to run the
736 firmware FMP diagnostic on the FPP.
737
738 The FMP microcode calls a signed multiply routine to calculate three partial
739 products (all but LSB * LSB). Because the LSBs are unsigned, i.e., all bits
740 significant, the two MSB * LSB products are calculated using LSB/2. The
741 unsigned right-shift ensures a positive LSB with no significant bits lost,
742 because the lower eight bits are unused (they held the vacated exponent). In
743 order to sum the partial products, the LSB of the result of MSB * MSB is also
744 right-shifted before addition. Note, though, that this loses a significant
745 bit. After summation, the result is left-shifted to correct for the original
746 right shifts.
747
748 The .XMPY microcode negates both operands as necessary to produce positive
749 values and then forms six of the nine 16-bit x 16-bit = 32-bit unsigned
750 multiplications required for a full 96-bit product. Given a 48-bit
751 multiplicand "a1a2a3" and a 48-bit multiplier "b1b2b3", the firmware performs
752 these calculations to develop a 48-bit product:
753
754 a1 a2 a3
755 +-------+-------+-------+
756 b1 b2 b3
757 +-------+-------+-------+
758 _________________________
759
760 a1 * b3 [p1]
761 +-------+-------+
762 a2 * b2 [p2]
763 +-------+-------+
764 a1 * b2 [p3]
765 +-------+-------+
766 a3 * b1 [p4]
767 +-------+-------+
768 a2 * b1 [p5]
769 +-------+-------+
770 a1 * b1 [p6]
771 +-------+-------+
772 _________________________________
773
774 product
775 +-------+-------+-------+
776
777 The least-significant words of partial products [p1], [p2], and [p4] are used
778 only to develop a carry bit into the 48-bit sum. The product is complemented
779 as necessary to restore the sign.
780
781 The basic FPP hardware algorithm scans the multiplier and adds a shifted copy
782 of the multiplicand whenever a one-bit is detected. To avoid successive adds
783 when a string of ones is encountered (because adds are more expensive than
784 shifts), the hardware instead adds the multiplicand shifted by N + 1 + P and
785 subtracts the multiplicand shifted by P to obtain the equivalent value with a
786 maximum of two operations.
787
788 Instead of implementing either the .XMPY firmware algorithm or the hardware
789 shift-and-add algorithm directly, it is more efficient under simulation to
790 use 32 x 32 = 64-bit multiplications, thereby reducing the number required
791 from six to four (64-bit "c1c2" x 64-bit "d1d2"):
792
793 ah al
794 +-------+-------+
795 bh bl
796 +-------+-------+
797 _________________
798
799 al * bl [ll]
800 +-------+-------+
801 ah * bl [hl]
802 +-------+-------+
803 al * bh [lh]
804 +-------+-------+
805 ah * bh [hh]
806 +-------+-------+
807 _________________________________
808
809 product
810 +-------+-------+
811
812 However, the FMP algorithm is implemented directly from the microcode to
813 preserve the fidelity of the simulation, i.e., to lose the same amount
814 of precision.
815 */
816
multiply(FPU * product,FPU multiplicand,FPU multiplier)817 static void multiply (FPU *product, FPU multiplicand, FPU multiplier)
818 {
819 uint32 ah, al, bh, bl, sign = 0;
820 t_uint64 hh, hl, lh, ll, carry;
821 int16 ch, cl, dh, dl;
822 t_bool firmware;
823
824 product->precision = multiplicand.precision; /* set precision */
825
826 if ((multiplicand.mantissa == 0) || /* 0 * X = 0 */
827 (multiplier.mantissa == 0)) /* X * 0 = 0 */
828 product->mantissa = product->exponent = 0;
829
830 else {
831 firmware = (UNIT_CPU_MODEL != UNIT_1000_F); /* set firmware flag */
832
833 if (!firmware || (product->precision != fp_f)) { /* hardware? */
834 if (multiplicand.mantissa < 0) { /* negative? */
835 complement (&multiplicand); /* complement operand */
836 sign = ~sign; /* track sign */
837 }
838 if (multiplier.mantissa < 0) { /* negative? */
839 complement (&multiplier); /* complement operand */
840 sign = ~sign; /* track sign */
841 }
842 }
843
844 product->exponent = /* compute exponent */
845 multiplicand.exponent + multiplier.exponent + 1;
846
847 ah = (uint32) (multiplicand.mantissa >> 32); /* split multiplicand */
848 al = (uint32) (multiplicand.mantissa & DMASK32); /* into high and low parts */
849 bh = (uint32) (multiplier.mantissa >> 32); /* split multiplier */
850 bl = (uint32) (multiplier.mantissa & DMASK32); /* into high and low parts */
851
852 if (firmware && (product->precision == fp_f)) { /* single-precision firmware? */
853 ch = (int16) (ah >> 16) & DMASK; /* split 32-bit multiplicand */
854 cl = (int16) (ah & 0xfffe); /* into high and low parts */
855 dh = (int16) (bh >> 16) & DMASK; /* split 32-bit multiplier */
856 dl = (int16) (bh & 0xfffe); /* into high and low parts */
857
858 hh = (t_uint64) (((int32) ch * dh) & ~1); /* form cross products */
859 hl = (t_uint64) (((t_int64) ch * (t_int64) (uint16) dl +
860 (t_int64) dh * (t_int64) (uint16) cl) &
861 0xfffffffffffe0000);
862
863 product->mantissa = (t_uint64) (((t_int64) hh << 32) + /* sum partials */
864 ((t_int64) hl << 16));
865 }
866
867 else {
868 hh = ((t_uint64) ah * bh); /* form four cross products */
869 hl = ((t_uint64) ah * bl); /* using 32 x 32 = */
870 lh = ((t_uint64) al * bh); /* 64-bit multiplies */
871 ll = ((t_uint64) al * bl);
872
873 carry = ((ll >> 32) + (uint32) hl + (uint32) lh) >> 32; /* form carry */
874
875 product->mantissa = hh + (hl >> 32) + (lh >> 32) + carry; /* sum partials */
876
877 if (sign) /* negate if required */
878 complement (product);
879 }
880 }
881 return;
882 }
883
884
885 /* Divide two unpacked numbers.
886
887 As with multiply, the single-precision firmware (FDV) operates differently
888 from the firmware extended-precision (.XDIV) and the hardware divisions of
889 any precision. Firmware implementations use the DIV micro-order to form
890 32-bit / 16-bit = 16-bit quotients and 16-bit remainders. These are used in
891 a "divide and correct" algorithm, wherein the quotient is estimated and then
892 corrected by comparing the dividend to the product of the quotient and the
893 divisor. The hardware uses a series of shifts and subtracts. This means
894 that firmware FDV and hardware FDV once again return slightly different
895 values.
896
897 Under simulation, the classic divide-and-correct method is employed, using
898 64-bit / 32-bit = 32-bit divisions. This method considers the 64-bit
899 dividend and divisor each to consist of two 32-bit "digits." The 64-bit
900 dividend "a1a2a3a4" is divided by the first 32-bit digit "b1b2" of the 64-bit
901 divisor "b1b2b3b4", yielding a 32-bit trial quotient digit and a 32-bit
902 remainder digit. A correction is developed by subtracting the product of the
903 second 32-bit digit "b3b4" of the divisor and the trial quotient digit from
904 the remainder (we take advantage of the eight bits vacated by the exponent
905 during unpacking to ensure that this product will not overflow into the sign
906 bit). If the remainder is negative, the trial quotient is too large, so it
907 is decremented, and the (full 64-bit) divisor is added to the correction.
908 This is repeated until the correction is non-negative, indicating that the
909 first quotient digit is correct. The process is then repeated using the
910 remainder as the dividend to develop the second 32-bit digit of the quotient.
911 The two digits are then concatenated for produce the final 64-bit value.
912
913 (See, "Divide-and-Correct Methods for Multiple Precision Division" by Marvin
914 L. Stein, Communications of the ACM, August 1964 for background.)
915
916 The microcoded single-precision division avoids overflows by right-shifting
917 some values, which leads to a loss of precision in the LSBs. We duplicate
918 the firmware algorithm here to preserve the fidelity of the simulation.
919 */
920
divide(FPU * quotient,FPU dividend,FPU divisor)921 static void divide (FPU *quotient, FPU dividend, FPU divisor)
922 {
923 uint32 sign = 0;
924 t_int64 bh, bl, r1, r0, p1, p0;
925 t_uint64 q, q1, q0;
926 t_bool firmware;
927 int32 ah, div, cp;
928 int16 dh, dl, pq1, pq2, cq;
929
930 quotient->precision = dividend.precision; /* set precision */
931
932 if (divisor.mantissa == 0) { /* division by zero? */
933 if (dividend.mantissa < 0)
934 quotient->mantissa = FP_MSIGN; /* return minus infinity */
935 else
936 quotient->mantissa = ~FP_MSIGN; /* or plus infinity */
937 quotient->exponent = FP_MAXPEXP + 1;
938 }
939
940 else if (dividend.mantissa == 0) /* dividend zero? */
941 quotient->mantissa = quotient->exponent = 0; /* yes; result is zero */
942
943 else {
944 firmware = (UNIT_CPU_MODEL != UNIT_1000_F); /* set firmware flag */
945
946 if (!firmware || (quotient->precision != fp_f)) { /* hardware or FFP? */
947 if (dividend.mantissa < 0) { /* negative? */
948 complement (÷nd); /* complement operand */
949 sign = ~sign; /* track sign */
950 }
951 if (divisor.mantissa < 0) { /* negative? */
952 complement (&divisor); /* complement operand */
953 sign = ~sign; /* track sign */
954 }
955 }
956
957 quotient->exponent = /* division subtracts exponents */
958 dividend.exponent - divisor.exponent;
959
960 bh = divisor.mantissa >> 32; /* split divisor */
961 bl = divisor.mantissa & DMASK32; /* into high and low parts */
962
963 if (firmware && (quotient->precision == fp_f)) { /* single-precision firmware? */
964 quotient->exponent = quotient->exponent + 1; /* fix exponent */
965
966 ah = (int32) (dividend.mantissa >> 32); /* split dividend */
967 dh = (int16) (bh >> 16); /* split divisor again */
968 dl = (int16) bh;
969
970 div = ah >> 2; /* ASR 2 to prevent overflow */
971
972 pq1 = div / dh; /* form first partial quotient */
973 div = ((div % dh) & ~1) << 15; /* ASR 1, move rem to upper */
974 pq2 = div / dh; /* form second partial quotient */
975
976 div = (uint16) dl << 13; /* move divisor LSB to upper, LSR 3 */
977 cq = div / dh; /* form correction quotient */
978 cp = -cq * pq1; /* and correction product */
979
980 cp = (((cp >> 14) & ~3) + (int32) pq2) << 1; /* add corr prod and 2nd partial quo */
981 quotient->mantissa = /* add 1st partial quo and align */
982 (t_uint64) (((int32) pq1 << 16) + cp) << 32;
983 }
984
985 else { /* hardware or FFP */
986 q1 = (t_uint64) (dividend.mantissa / bh); /* form 1st trial quotient */
987 r1 = dividend.mantissa % bh; /* and remainder */
988 p1 = (r1 << 24) - (bl >> 8) * q1; /* calculate correction */
989
990 while (p1 < 0) { /* correction needed? */
991 q1 = q1 - 1; /* trial quotient too large */
992 p1 = p1 + (divisor.mantissa >> 8); /* increase remainder */
993 }
994
995 q0 = (t_uint64) ((p1 << 8) / bh); /* form 2nd trial quotient */
996 r0 = (p1 << 8) % bh; /* and remainder */
997 p0 = (r0 << 24) - (bl >> 8) * q0; /* calculate correction */
998
999 while (p0 < 0) { /* correction needed? */
1000 q0 = q0 - 1; /* trial quotient too large */
1001 p0 = p0 + (divisor.mantissa >> 8); /* increase remainder */
1002 }
1003
1004 q = (q1 << 32) + q0; /* sum quotient digits */
1005
1006 if (q1 & 0xffffffff00000000) { /* did we lose MSB? */
1007 q = (q >> 1) | 0x8000000000000000; /* shift right and replace bit */
1008 quotient->exponent = quotient->exponent + 1;/* bump exponent for shift */
1009 }
1010
1011 if (q & 0x8000000000000000) /* lose normalization? */
1012 q = q >> 1; /* correct */
1013
1014 quotient->mantissa = (t_int64) q;
1015 }
1016
1017 if (sign)
1018 complement (quotient); /* negate if required */
1019 }
1020 return;
1021 }
1022
1023
1024 /* Fix an unpacked number.
1025
1026 A floating-point value is converted to an integer. The desired precision of
1027 the result (single or double integer) must be set before calling.
1028
1029 Values less than 0.5 (i.e., with negative exponents) underflow to zero. If
1030 the value exceeds the specified integer range, the maximum integer value is
1031 returned and overflow is set. Otherwise, the floating-point value is
1032 right-shifted to zero the exponent. The result is then rounded.
1033 */
1034
fix(FPU * result,FPU operand)1035 static uint32 fix (FPU *result, FPU operand)
1036 {
1037 uint32 overflow;
1038 t_bool bits_lost;
1039
1040 if (operand.exponent < 0) { /* value < 0.5? */
1041 result->mantissa = 0; /* result rounds to zero */
1042 overflow = 0; /* clear for underflow */
1043 }
1044
1045 else if (operand.exponent > /* value > integer size? */
1046 (int32) op_bits[result->precision]) {
1047 result->mantissa = /* return max int value */
1048 (t_uint64) int_p_max[result->precision] <<
1049 op_start[result->precision];
1050 overflow = 1; /* and set overflow */
1051 }
1052
1053 else { /* value in range */
1054 bits_lost = asr (&operand, /* shift to zero exponent */
1055 op_bits[result->precision] - operand.exponent);
1056
1057 if (operand.mantissa < 0) { /* value negative? */
1058 if (bits_lost) /* bits lost? */
1059 operand.mantissa = operand.mantissa | 1; /* include one for rounding */
1060
1061 operand.mantissa = operand.mantissa + /* round result */
1062 p_half_lsb[result->precision];
1063 }
1064
1065 result->mantissa = operand.mantissa & /* mask to precision */
1066 op_mask[result->precision];
1067 overflow = 0;
1068 }
1069
1070 result->exponent = 0; /* tidy up for integer value */
1071 return overflow;
1072 }
1073
1074
1075 /* Float an integer to an unpacked number.
1076
1077 An integer is converted to a floating-point value. The desired precision of
1078 the result must be set before calling.
1079
1080 Conversion is simply a matter of copying the integer value, setting an
1081 exponent that reflects the right-aligned position of the bits, and
1082 normalizing.
1083 */
1084
ffloat(FPU * result,FPU operand)1085 static void ffloat (FPU *result, FPU operand)
1086 {
1087 result->mantissa = operand.mantissa; /* set value */
1088 result->exponent = op_bits[operand.precision]; /* set exponent */
1089 normalize (result); /* normalize */
1090 return;
1091 }
1092
1093
1094
1095 /* High-level floating-point routines. */
1096
1097
1098 /* Determine operand precisions.
1099
1100 The precisions of the operands and result are determined by decoding an
1101 operation opcode and returned to the caller. Pass NULL for both of the
1102 operands if only the result precision is wanted. Pass NULL for the result if
1103 only the operand precisions are wanted.
1104
1105 Implementation note:
1106
1107 1. gcc-4.3.0 complains at -O3 that operand_l/r may not be initialized.
1108 Because of the mask, the switch statement covers all cases, but gcc
1109 doesn't realize this. The "default" case is redundant but eliminates the
1110 warning.
1111 */
1112
fp_prec(uint16 opcode,OPSIZE * operand_l,OPSIZE * operand_r,OPSIZE * result)1113 void fp_prec (uint16 opcode, OPSIZE *operand_l, OPSIZE *operand_r, OPSIZE *result)
1114 {
1115 OPSIZE fp_size, int_size;
1116
1117 fp_size = (OPSIZE) ((opcode & 0003) + 2); /* fp_f, fp_x, fp_t, fp_e */
1118 int_size = (OPSIZE) ((opcode & 0004) >> 2); /* in_s, in_d */
1119
1120 if (operand_l && operand_r) { /* want operand precisions? */
1121 switch (opcode & 0120) { /* mask out opcode bit 5 */
1122 case 0000: /* add/mpy */
1123 case 0020: /* sub/div */
1124 *operand_l = fp_size; /* assume first op is fp */
1125
1126 if (opcode & 0004) /* operand internal? */
1127 *operand_r = fp_a; /* second op is accum */
1128 else
1129 *operand_r = fp_size; /* second op is fp */
1130 break;
1131
1132 case 0100: /* fix/accum as integer */
1133 *operand_l = fp_size; /* first op is fp */
1134 *operand_r = fp_a; /* second op is always null */
1135 break;
1136
1137 case 0120: /* flt/accum as float */
1138 default: /* keeps compiler quiet for uninit warning */
1139 *operand_l = int_size; /* first op is integer */
1140 *operand_r = fp_a; /* second op is always null */
1141 break;
1142 }
1143
1144 if (opcode & 0010) /* operand internal? */
1145 *operand_l = fp_a; /* first op is accum */
1146 }
1147
1148 if (result) { /* want result precision? */
1149 if ((opcode & 0120) == 0100) /* fix? */
1150 *result = int_size; /* result is integer */
1151 else /* all others */
1152 *result = fp_size; /* result is fp */
1153 }
1154 return;
1155 }
1156
1157
1158 /* Floating Point Processor executor.
1159
1160 The executor simulates the MPP interface between the CPU and the FPP. The
1161 operation to be performed is specified by the supplied opcode, which conforms
1162 to the FPP hardware interface, as follows:
1163
1164 Bits Value Action
1165 ---- ----- ----------------------------------------------
1166 7 0 Exponent range is standard (+/-127)
1167 1 Exponent range is expanded (+/-511)
1168
1169 6-4 000 Add
1170 001 Subtract
1171 010 Multiply
1172 011 Divide
1173 100 Fix
1174 101 Float
1175 110 (diagnostic)
1176 111 (diagnostic)
1177
1178 3 0 Left operand is supplied
1179 1 Left operand in accumulator
1180
1181 2 0 Right operand is supplied (ADD/SUB/MPY/DIV)
1182 Single integer operation (FIX/FLT)
1183 1 Right operand in accumulator (ADD/SUB/MPY/DIV)
1184 Double integer operation (FIX/FLT)
1185
1186 1-0 00 2-word operation
1187 01 3-word operation
1188 10 4-word operation
1189 11 5-word operation
1190
1191 If the opcode specifies that the left (or right) operand is in the
1192 accumulator, then the value supplied for that parameter is not used. All
1193 results are automatically left in the accumulator. If the result is not
1194 needed externally, then NULL may be passed for the result parameter.
1195
1196 To support accumulator set/get operations under simulation, the opcode is
1197 expanded to include a special mode, indicated by bit 15 = 1. In this mode,
1198 if the result parameter is NULL, then the accumulator is set from the value
1199 passed as operand_l. If the result parameter is not null, then the
1200 accumulator value is returned as the result, and operand_l is ignored. The
1201 precision of the operation is performed as specified by the OPSIZE value
1202 passed in bits 2-0 of the opcode.
1203
1204 The function returns 1 if the operation overflows and 0 if not.
1205 */
1206
fp_exec(uint16 opcode,OP * result,OP operand_l,OP operand_r)1207 uint32 fp_exec (uint16 opcode, OP *result, OP operand_l, OP operand_r)
1208 {
1209 static FPU accumulator;
1210 FPU uoperand_l, uoperand_r;
1211 OPSIZE op_l_prec, op_r_prec, rslt_prec;
1212 uint32 overflow;
1213
1214 if (opcode & SIGN) { /* accumulator mode? */
1215 rslt_prec = (OPSIZE) (opcode & 0017); /* get operation precision */
1216
1217 if (result) { /* get accumulator? */
1218 op_l_prec = accumulator.precision; /* save accum prec temp */
1219 accumulator.precision = rslt_prec; /* set desired precision */
1220 *result = pack (accumulator); /* pack accumulator */
1221 accumulator.precision = op_l_prec; /* restore correct prec */
1222 }
1223 else /* set accumulator */
1224 accumulator = unpack (operand_l, rslt_prec); /* unpack from operand */
1225
1226 return 0; /* no overflow from accum ops */
1227 }
1228
1229 fp_prec (opcode, &op_l_prec, &op_r_prec, &rslt_prec); /* calc precs from opcode */
1230
1231 if (op_l_prec == fp_a) /* left operand in accum? */
1232 uoperand_l = accumulator; /* copy it */
1233 else /* operand supplied */
1234 uoperand_l = unpack (operand_l, op_l_prec); /* unpack from parameter */
1235
1236 if (op_r_prec == fp_a) /* right operand in accum? */
1237 uoperand_r = accumulator; /* copy it */
1238 else /* operand supplied */
1239 uoperand_r = unpack (operand_r, op_r_prec); /* unpack from parameter */
1240
1241
1242 switch (opcode & 0160) { /* dispatch operation */
1243
1244 case 0000: /* add */
1245 add (&accumulator, uoperand_l, uoperand_r);
1246 break;
1247
1248 case 0020: /* subtract */
1249 complement (&uoperand_r);
1250 add (&accumulator, uoperand_l, uoperand_r);
1251 break;
1252
1253 case 0040: /* multiply */
1254 multiply (&accumulator, uoperand_l, uoperand_r);
1255 break;
1256
1257 case 0060: /* divide */
1258 divide (&accumulator, uoperand_l, uoperand_r);
1259 break;
1260
1261 case 0100: /* fix */
1262 accumulator.precision = rslt_prec;
1263 overflow = fix (&accumulator, uoperand_l);
1264
1265 if (result) /* result wanted? */
1266 *result = pack_int (accumulator.mantissa, /* pack integer */
1267 rslt_prec);
1268 return overflow;
1269
1270 case 0120: /* float */
1271 accumulator.precision = rslt_prec;
1272 ffloat (&accumulator, uoperand_l);
1273
1274 if (result) /* result wanted? */
1275 *result = pack (accumulator); /* pack FP (FLT does not round) */
1276 return 0;
1277
1278 case 0140: /* (diagnostic) */
1279 case 0160: /* (diagnostic) */
1280 return 0;
1281 }
1282
1283 if (UNIT_CPU_MODEL != UNIT_1000_F) /* firmware implementation? */
1284 accumulator.mantissa = accumulator.mantissa & /* mask to precision */
1285 op_mask[accumulator.precision];
1286
1287 normalize (&accumulator); /* normalize */
1288 overflow = roundovf (&accumulator, opcode & 0200); /* round and check for overflow */
1289
1290 if (result) /* result wanted? */
1291 *result = pack (accumulator); /* pack result */
1292
1293 return overflow;
1294 }
1295
1296
1297 /* Set or get accumulator at desired precision.
1298
1299 This function provides access to the FPP accumulator. In hardware, the
1300 accumulator may be read at a given precision by sending the FPP an opcode
1301 encoded with the desired precision and then reading words from the FPP
1302 /without/ initiating the operation, i.e., without starting the processor.
1303
1304 Under simulation, pass this function a NULL operand and the desired
1305 precision to read the accumulator. Pass a pointer to an operand and the
1306 desired precision to set the accumulator; the return value in this case is
1307 not defined.
1308 */
1309
fp_accum(const OP * operand,OPSIZE precision)1310 OP fp_accum (const OP *operand, OPSIZE precision)
1311 {
1312 OP result = NOP;
1313 uint16 opcode = (uint16) precision | SIGN; /* add special mode bit */
1314
1315 if (operand)
1316 fp_exec (opcode, NULL, *operand, NOP); /* set accum */
1317 else
1318 fp_exec (opcode, &result, NOP, NOP); /* get accum */
1319 return result;
1320 }
1321
1322
1323 /* Pack an unpacked floating-point number.
1324
1325 An unpacked mantissa is passed as a "packed" number with an unused exponent.
1326 The mantissa and separately-passed exponent are packed into the in-memory
1327 floating-point format. Note that all bits are significant in the mantissa
1328 (no masking is done).
1329 */
1330
fp_pack(OP * result,OP mantissa,int32 exponent,OPSIZE precision)1331 uint32 fp_pack (OP *result, OP mantissa, int32 exponent, OPSIZE precision)
1332 {
1333 FPU unpacked;
1334
1335 unpacked.mantissa = unpack_int (mantissa, precision); /* unpack mantissa */
1336 unpacked.exponent = exponent; /* set exponent */
1337 unpacked.precision = precision; /* set precision */
1338 *result = pack (unpacked); /* pack them */
1339 return 0;
1340 }
1341
1342
1343 /* Normalize, round, and pack an unpacked floating-point number.
1344
1345 An unpacked mantissa is passed as a "packed" number with an unused exponent.
1346 The mantissa and separately-passed exponent are normalized, rounded, and
1347 packed into the in-memory floating-point format. Note that all bits are
1348 significant in the mantissa (no masking is done).
1349 */
1350
fp_nrpack(OP * result,OP mantissa,int32 exponent,OPSIZE precision)1351 uint32 fp_nrpack (OP *result, OP mantissa, int32 exponent, OPSIZE precision)
1352 {
1353 FPU unpacked;
1354
1355 unpacked.mantissa = unpack_int (mantissa, precision); /* unpack mantissa */
1356 unpacked.exponent = exponent; /* set exponent */
1357 unpacked.precision = precision; /* set precision */
1358 return nrpack (result, unpacked, FALSE); /* norm/rnd/pack them */
1359 }
1360
1361
1362 /* Unpack a packed floating-point number.
1363
1364 A floating-point number, packed into the in-memory format, is unpacked into
1365 separate mantissa and exponent values. The unpacked mantissa is returned in
1366 a "packed" structure with an exponent of zero. Mantissa or exponent may be
1367 null if that part isn't wanted.
1368 */
1369
fp_unpack(OP * mantissa,int32 * exponent,OP packed,OPSIZE precision)1370 uint32 fp_unpack (OP *mantissa, int32 *exponent, OP packed, OPSIZE precision)
1371
1372 {
1373 FPU unpacked;
1374
1375 unpacked = unpack (packed, precision); /* unpack mantissa and exponent */
1376
1377 if (exponent) /* exponent wanted? */
1378 *exponent = unpacked.exponent; /* return exponent */
1379
1380 if (mantissa) /* mantissa wanted? */
1381 *mantissa = pack_int (unpacked.mantissa, fp_t); /* return full-size mantissa */
1382 return 0;
1383 }
1384
1385
1386 /* Complement an unpacked mantissa.
1387
1388 An unpacked mantissa is passed as a "packed" number with a zero exponent.
1389 The exponent increment, i.e., either zero or one, depending on whether a
1390 renormalization was required, is returned. Note that all bits are
1391 significant in the mantissa.
1392 */
1393
fp_ucom(OP * mantissa,OPSIZE precision)1394 uint32 fp_ucom (OP *mantissa, OPSIZE precision)
1395 {
1396 FPU unpacked;
1397
1398 unpacked.mantissa = unpack_int (*mantissa, precision); /* unpack mantissa */
1399 unpacked.exponent = 0; /* clear undefined exponent */
1400 unpacked.precision = precision; /* set precision */
1401 complement (&unpacked); /* negate it */
1402 *mantissa = pack_int (unpacked.mantissa, precision); /* replace mantissa */
1403 return (uint32) unpacked.exponent; /* return exponent increment */
1404 }
1405
1406
1407 /* Complement a floating-point number. */
1408
fp_pcom(OP * packed,OPSIZE precision)1409 uint32 fp_pcom (OP *packed, OPSIZE precision)
1410 {
1411 FPU unpacked;
1412
1413 unpacked = unpack (*packed, precision); /* unpack the number */
1414 complement (&unpacked); /* negate it */
1415 return nrpack (packed, unpacked, FALSE); /* and norm/rnd/pack */
1416 }
1417
1418
1419 /* Truncate a floating-point number. */
1420
fp_trun(OP * result,OP source,OPSIZE precision)1421 uint32 fp_trun (OP *result, OP source, OPSIZE precision)
1422 {
1423 t_bool bits_lost;
1424 FPU unpacked;
1425 FPU one = { FP_ONEHALF, 1, fp_t }; /* 0.5 * 2 ** 1 = 1.0 */
1426 OP zero = { { 0, 0, 0, 0, 0 } }; /* 0.0 */
1427 t_uint64 mask = mant_mask[precision] & ~FP_MSIGN;
1428
1429 unpacked = unpack (source, precision);
1430 if (unpacked.exponent < 0) /* number < 0.5? */
1431 *result = zero; /* return 0 */
1432 else if (unpacked.exponent >= (int32) op_bits[precision]) /* no fractional bits? */
1433 *result = source; /* already integer */
1434 else {
1435 mask = (mask >> unpacked.exponent) & mask; /* mask fractional bits */
1436 bits_lost = ((unpacked.mantissa & mask) != 0); /* flag if bits lost */
1437 unpacked.mantissa = unpacked.mantissa & ~mask; /* mask off fraction */
1438 if ((unpacked.mantissa < 0) && bits_lost) /* negative? */
1439 add (&unpacked, unpacked, one); /* truncate toward zero */
1440 nrpack (result, unpacked, FALSE); /* (overflow cannot occur) */
1441 }
1442 return 0; /* clear overflow on return */
1443 }
1444
1445
1446 /* Convert a floating-point number from one precision to another. */
1447
fp_cvt(OP * result,OPSIZE source_precision,OPSIZE dest_precision)1448 uint32 fp_cvt (OP *result, OPSIZE source_precision, OPSIZE dest_precision)
1449 {
1450 FPU unpacked;
1451
1452 unpacked = unpack (*result, source_precision);
1453 unpacked.precision = dest_precision;
1454 return nrpack (result, unpacked, FALSE); /* norm/rnd/pack */
1455 }
1456
1457
1458 #endif /* end of int64 support */
1459