xref: /netbsd/sys/arch/hppa/spmath/dfmpy.c (revision c4a72b64)
1 /*	$NetBSD: dfmpy.c,v 1.1 2002/06/05 01:04:24 fredette Exp $	*/
2 
3 /*	$OpenBSD: dfmpy.c,v 1.4 2001/03/29 03:58:17 mickey Exp $	*/
4 
5 /*
6  * Copyright 1996 1995 by Open Software Foundation, Inc.
7  *              All Rights Reserved
8  *
9  * Permission to use, copy, modify, and distribute this software and
10  * its documentation for any purpose and without fee is hereby granted,
11  * provided that the above copyright notice appears in all copies and
12  * that both the copyright notice and this permission notice appear in
13  * supporting documentation.
14  *
15  * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
16  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
17  * FOR A PARTICULAR PURPOSE.
18  *
19  * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
20  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
21  * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
22  * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
23  * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24  *
25  */
26 /*
27  * pmk1.1
28  */
29 /*
30  * (c) Copyright 1986 HEWLETT-PACKARD COMPANY
31  *
32  * To anyone who acknowledges that this file is provided "AS IS"
33  * without any express or implied warranty:
34  *     permission to use, copy, modify, and distribute this file
35  * for any purpose is hereby granted without fee, provided that
36  * the above copyright notice and this notice appears in all
37  * copies, and that the name of Hewlett-Packard Company not be
38  * used in advertising or publicity pertaining to distribution
39  * of the software without specific, written prior permission.
40  * Hewlett-Packard Company makes no representations about the
41  * suitability of this software for any purpose.
42  */
43 
44 #include "../spmath/float.h"
45 #include "../spmath/dbl_float.h"
46 
47 /*
48  *  Double Precision Floating-point Multiply
49  */
50 
51 int
52 dbl_fmpy(srcptr1,srcptr2,dstptr,status)
53 
54 dbl_floating_point *srcptr1, *srcptr2, *dstptr;
55 unsigned int *status;
56 {
57 	register unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2;
58 	register unsigned int opnd3p1, opnd3p2, resultp1, resultp2;
59 	register int dest_exponent, count;
60 	register int inexact = FALSE, guardbit = FALSE, stickybit = FALSE;
61 	int is_tiny;
62 
63 	Dbl_copyfromptr(srcptr1,opnd1p1,opnd1p2);
64 	Dbl_copyfromptr(srcptr2,opnd2p1,opnd2p2);
65 
66 	/*
67 	 * set sign bit of result
68 	 */
69 	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
70 		Dbl_setnegativezerop1(resultp1);
71 	else Dbl_setzerop1(resultp1);
72 	/*
73 	 * check first operand for NaN's or infinity
74 	 */
75 	if (Dbl_isinfinity_exponent(opnd1p1)) {
76 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
77 			if (Dbl_isnotnan(opnd2p1,opnd2p2)) {
78 				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
79 					/*
80 					 * invalid since operands are infinity
81 					 * and zero
82 					 */
83 					if (Is_invalidtrap_enabled())
84 						return(INVALIDEXCEPTION);
85 					Set_invalidflag();
86 					Dbl_makequietnan(resultp1,resultp2);
87 					Dbl_copytoptr(resultp1,resultp2,dstptr);
88 					return(NOEXCEPTION);
89 				}
90 				/*
91 				 * return infinity
92 				 */
93 				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
94 				Dbl_copytoptr(resultp1,resultp2,dstptr);
95 				return(NOEXCEPTION);
96 			}
97 		}
98 		else {
99 			/*
100 			 * is NaN; signaling or quiet?
101 			 */
102 			if (Dbl_isone_signaling(opnd1p1)) {
103 				/* trap if INVALIDTRAP enabled */
104 				if (Is_invalidtrap_enabled())
105 					return(INVALIDEXCEPTION);
106 				/* make NaN quiet */
107 				Set_invalidflag();
108 				Dbl_set_quiet(opnd1p1);
109 			}
110 			/*
111 			 * is second operand a signaling NaN?
112 			 */
113 			else if (Dbl_is_signalingnan(opnd2p1)) {
114 				/* trap if INVALIDTRAP enabled */
115 				if (Is_invalidtrap_enabled())
116 					return(INVALIDEXCEPTION);
117 				/* make NaN quiet */
118 				Set_invalidflag();
119 				Dbl_set_quiet(opnd2p1);
120 				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
121 				return(NOEXCEPTION);
122 			}
123 			/*
124 			 * return quiet NaN
125 			 */
126 			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
127 			return(NOEXCEPTION);
128 		}
129 	}
130 	/*
131 	 * check second operand for NaN's or infinity
132 	 */
133 	if (Dbl_isinfinity_exponent(opnd2p1)) {
134 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
135 			if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
136 				/* invalid since operands are zero & infinity */
137 				if (Is_invalidtrap_enabled())
138 					return(INVALIDEXCEPTION);
139 				Set_invalidflag();
140 				Dbl_makequietnan(opnd2p1,opnd2p2);
141 				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
142 				return(NOEXCEPTION);
143 			}
144 			/*
145 			 * return infinity
146 			 */
147 			Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
148 			Dbl_copytoptr(resultp1,resultp2,dstptr);
149 			return(NOEXCEPTION);
150 		}
151 		/*
152 		 * is NaN; signaling or quiet?
153 		 */
154 		if (Dbl_isone_signaling(opnd2p1)) {
155 			/* trap if INVALIDTRAP enabled */
156 			if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);
157 			/* make NaN quiet */
158 			Set_invalidflag();
159 			Dbl_set_quiet(opnd2p1);
160 		}
161 		/*
162 		 * return quiet NaN
163 		 */
164 		Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
165 		return(NOEXCEPTION);
166 	}
167 	/*
168 	 * Generate exponent
169 	 */
170 	dest_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) -DBL_BIAS;
171 
172 	/*
173 	 * Generate mantissa
174 	 */
175 	if (Dbl_isnotzero_exponent(opnd1p1)) {
176 		/* set hidden bit */
177 		Dbl_clear_signexponent_set_hidden(opnd1p1);
178 	}
179 	else {
180 		/* check for zero */
181 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
182 			Dbl_setzero_exponentmantissa(resultp1,resultp2);
183 			Dbl_copytoptr(resultp1,resultp2,dstptr);
184 			return(NOEXCEPTION);
185 		}
186 		/* is denormalized, adjust exponent */
187 		Dbl_clear_signexponent(opnd1p1);
188 		Dbl_leftshiftby1(opnd1p1,opnd1p2);
189 		Dbl_normalize(opnd1p1,opnd1p2,dest_exponent);
190 	}
191 	/* opnd2 needs to have hidden bit set with msb in hidden bit */
192 	if (Dbl_isnotzero_exponent(opnd2p1)) {
193 		Dbl_clear_signexponent_set_hidden(opnd2p1);
194 	}
195 	else {
196 		/* check for zero */
197 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
198 			Dbl_setzero_exponentmantissa(resultp1,resultp2);
199 			Dbl_copytoptr(resultp1,resultp2,dstptr);
200 			return(NOEXCEPTION);
201 		}
202 		/* is denormalized; want to normalize */
203 		Dbl_clear_signexponent(opnd2p1);
204 		Dbl_leftshiftby1(opnd2p1,opnd2p2);
205 		Dbl_normalize(opnd2p1,opnd2p2,dest_exponent);
206 	}
207 
208 	/* Multiply two source mantissas together */
209 
210 	/* make room for guard bits */
211 	Dbl_leftshiftby7(opnd2p1,opnd2p2);
212 	Dbl_setzero(opnd3p1,opnd3p2);
213 	/*
214 	 * Four bits at a time are inspected in each loop, and a
215 	 * simple shift and add multiply algorithm is used.
216 	 */
217 	for (count=1;count<=DBL_P;count+=4) {
218 		stickybit |= Dlow4p2(opnd3p2);
219 		Dbl_rightshiftby4(opnd3p1,opnd3p2);
220 		if (Dbit28p2(opnd1p2)) {
221 			/* Twoword_add should be an ADDC followed by an ADD. */
222 			Twoword_add(opnd3p1, opnd3p2, opnd2p1<<3 | opnd2p2>>29,
223 				    opnd2p2<<3);
224 		}
225 		if (Dbit29p2(opnd1p2)) {
226 			Twoword_add(opnd3p1, opnd3p2, opnd2p1<<2 | opnd2p2>>30,
227 				    opnd2p2<<2);
228 		}
229 		if (Dbit30p2(opnd1p2)) {
230 			Twoword_add(opnd3p1, opnd3p2, opnd2p1<<1 | opnd2p2>>31,
231 				    opnd2p2<<1);
232 		}
233 		if (Dbit31p2(opnd1p2)) {
234 			Twoword_add(opnd3p1, opnd3p2, opnd2p1, opnd2p2);
235 		}
236 		Dbl_rightshiftby4(opnd1p1,opnd1p2);
237 	}
238 	if (Dbit3p1(opnd3p1)==0) {
239 		Dbl_leftshiftby1(opnd3p1,opnd3p2);
240 	}
241 	else {
242 		/* result mantissa >= 2. */
243 		dest_exponent++;
244 	}
245 	/* check for denormalized result */
246 	while (Dbit3p1(opnd3p1)==0) {
247 		Dbl_leftshiftby1(opnd3p1,opnd3p2);
248 		dest_exponent--;
249 	}
250 	/*
251 	 * check for guard, sticky and inexact bits
252 	 */
253 	stickybit |= Dallp2(opnd3p2) << 25;
254 	guardbit = (Dallp2(opnd3p2) << 24) >> 31;
255 	inexact = guardbit | stickybit;
256 
257 	/* align result mantissa */
258 	Dbl_rightshiftby8(opnd3p1,opnd3p2);
259 
260 	/*
261 	 * round result
262 	 */
263 	if (inexact && (dest_exponent>0 || Is_underflowtrap_enabled())) {
264 		Dbl_clear_signexponent(opnd3p1);
265 		switch (Rounding_mode()) {
266 			case ROUNDPLUS:
267 				if (Dbl_iszero_sign(resultp1))
268 					Dbl_increment(opnd3p1,opnd3p2);
269 				break;
270 			case ROUNDMINUS:
271 				if (Dbl_isone_sign(resultp1))
272 					Dbl_increment(opnd3p1,opnd3p2);
273 				break;
274 			case ROUNDNEAREST:
275 				if (guardbit &&
276 				    (stickybit || Dbl_isone_lowmantissap2(opnd3p2)))
277 					Dbl_increment(opnd3p1,opnd3p2);
278 				break;
279 		}
280 		if (Dbl_isone_hidden(opnd3p1)) dest_exponent++;
281 	}
282 	Dbl_set_mantissa(resultp1,resultp2,opnd3p1,opnd3p2);
283 
284 	/*
285 	 * Test for overflow
286 	 */
287 	if (dest_exponent >= DBL_INFINITY_EXPONENT) {
288 		/* trap if OVERFLOWTRAP enabled */
289 		if (Is_overflowtrap_enabled()) {
290 			/*
291 			 * Adjust bias of result
292 			 */
293 			Dbl_setwrapped_exponent(resultp1,dest_exponent,ovfl);
294 			Dbl_copytoptr(resultp1,resultp2,dstptr);
295 			if (inexact) {
296 			    if (Is_inexacttrap_enabled())
297 				return (OVERFLOWEXCEPTION | INEXACTEXCEPTION);
298 			    else
299 				Set_inexactflag();
300 			}
301 			return (OVERFLOWEXCEPTION);
302 		}
303 		inexact = TRUE;
304 		Set_overflowflag();
305 		/* set result to infinity or largest number */
306 		Dbl_setoverflow(resultp1,resultp2);
307 	}
308 	/*
309 	 * Test for underflow
310 	 */
311 	else if (dest_exponent <= 0) {
312 		/* trap if UNDERFLOWTRAP enabled */
313 		if (Is_underflowtrap_enabled()) {
314 			/*
315 			 * Adjust bias of result
316 			 */
317 			Dbl_setwrapped_exponent(resultp1,dest_exponent,unfl);
318 			Dbl_copytoptr(resultp1,resultp2,dstptr);
319 			if (inexact) {
320 			    if (Is_inexacttrap_enabled())
321 				return (UNDERFLOWEXCEPTION | INEXACTEXCEPTION);
322 			    else
323 				Set_inexactflag();
324 			}
325 			return (UNDERFLOWEXCEPTION);
326 		}
327 
328 		/* Determine if should set underflow flag */
329 		is_tiny = TRUE;
330 		if (dest_exponent == 0 && inexact) {
331 			switch (Rounding_mode()) {
332 			case ROUNDPLUS:
333 				if (Dbl_iszero_sign(resultp1)) {
334 					Dbl_increment(opnd3p1,opnd3p2);
335 					if (Dbl_isone_hiddenoverflow(opnd3p1))
336 						is_tiny = FALSE;
337 					Dbl_decrement(opnd3p1,opnd3p2);
338 				}
339 				break;
340 			case ROUNDMINUS:
341 				if (Dbl_isone_sign(resultp1)) {
342 					Dbl_increment(opnd3p1,opnd3p2);
343 					if (Dbl_isone_hiddenoverflow(opnd3p1))
344 						is_tiny = FALSE;
345 					Dbl_decrement(opnd3p1,opnd3p2);
346 				}
347 				break;
348 			case ROUNDNEAREST:
349 				if (guardbit && (stickybit ||
350 				    Dbl_isone_lowmantissap2(opnd3p2))) {
351 					Dbl_increment(opnd3p1,opnd3p2);
352 					if (Dbl_isone_hiddenoverflow(opnd3p1))
353 					is_tiny = FALSE;
354 					Dbl_decrement(opnd3p1,opnd3p2);
355 				}
356 				break;
357 			}
358 		}
359 
360 		/*
361 		 * denormalize result or set to signed zero
362 		 */
363 		stickybit = inexact;
364 		Dbl_denormalize(opnd3p1,opnd3p2,dest_exponent,guardbit,
365 		 stickybit,inexact);
366 
367 		/* return zero or smallest number */
368 		if (inexact) {
369 			switch (Rounding_mode()) {
370 			case ROUNDPLUS:
371 				if (Dbl_iszero_sign(resultp1)) {
372 					Dbl_increment(opnd3p1,opnd3p2);
373 				}
374 				break;
375 			case ROUNDMINUS:
376 				if (Dbl_isone_sign(resultp1)) {
377 					Dbl_increment(opnd3p1,opnd3p2);
378 				}
379 				break;
380 			case ROUNDNEAREST:
381 				if (guardbit && (stickybit ||
382 				    Dbl_isone_lowmantissap2(opnd3p2))) {
383 					Dbl_increment(opnd3p1,opnd3p2);
384 				}
385 				break;
386 			}
387 			if (is_tiny) Set_underflowflag();
388 		}
389 		Dbl_set_exponentmantissa(resultp1,resultp2,opnd3p1,opnd3p2);
390 	}
391 	else Dbl_set_exponent(resultp1,dest_exponent);
392 	/* check for inexact */
393 	Dbl_copytoptr(resultp1,resultp2,dstptr);
394 	if (inexact) {
395 		if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION);
396 		else Set_inexactflag();
397 	}
398 	return(NOEXCEPTION);
399 }
400