xref: /netbsd/sys/arch/m68k/060sp/dist/pfpsp.s (revision 6550d01e)
1#
2# $NetBSD: pfpsp.s,v 1.4 2005/12/11 12:17:52 christos Exp $
3#
4
5#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6# MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
7# M68000 Hi-Performance Microprocessor Division
8# M68060 Software Package Production Release
9#
10# M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
11# All rights reserved.
12#
13# THE SOFTWARE is provided on an "AS IS" basis and without warranty.
14# To the maximum extent permitted by applicable law,
15# MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
16# INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
17# FOR A PARTICULAR PURPOSE and any warranty against infringement with
18# regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
19# and any accompanying written materials.
20#
21# To the maximum extent permitted by applicable law,
22# IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
23# (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
24# BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
25# ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
26#
27# Motorola assumes no responsibility for the maintenance and support
28# of the SOFTWARE.
29#
30# You are hereby granted a copyright license to use, modify, and distribute the
31# SOFTWARE so long as this entire notice is retained without alteration
32# in any modified and/or redistributed versions, and that such modified
33# versions are clearly identified as such.
34# No licenses are granted by implication, estoppel or otherwise under any
35# patents or trademarks of Motorola, Inc.
36#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
37
38#
39# freal.s:
40#	This file is appended to the top of the 060FPSP package
41# and contains the entry points into the package. The user, in
42# effect, branches to one of the branch table entries located
43# after _060FPSP_TABLE.
44#	Also, subroutine stubs exist in this file (_fpsp_done for
45# example) that are referenced by the FPSP package itself in order
46# to call a given routine. The stub routine actually performs the
47# callout. The FPSP code does a "bsr" to the stub routine. This
48# extra layer of hierarchy adds a slight performance penalty but
49# it makes the FPSP code easier to read and more mainatinable.
50#
51
52set	_off_bsun,	0x00
53set	_off_snan,	0x04
54set	_off_operr,	0x08
55set	_off_ovfl,	0x0c
56set	_off_unfl,	0x10
57set	_off_dz,	0x14
58set	_off_inex,	0x18
59set	_off_fline,	0x1c
60set	_off_fpu_dis,	0x20
61set	_off_trap,	0x24
62set	_off_trace,	0x28
63set	_off_access,	0x2c
64set	_off_done,	0x30
65
66set	_off_imr,	0x40
67set	_off_dmr,	0x44
68set	_off_dmw,	0x48
69set	_off_irw,	0x4c
70set	_off_irl,	0x50
71set	_off_drb,	0x54
72set	_off_drw,	0x58
73set	_off_drl,	0x5c
74set	_off_dwb,	0x60
75set	_off_dww,	0x64
76set	_off_dwl,	0x68
77
78_060FPSP_TABLE:
79
80###############################################################
81
82# Here's the table of ENTRY POINTS for those linking the package.
83	bra.l		_fpsp_snan
84	short		0x0000
85	bra.l		_fpsp_operr
86	short		0x0000
87	bra.l		_fpsp_ovfl
88	short		0x0000
89	bra.l		_fpsp_unfl
90	short		0x0000
91	bra.l		_fpsp_dz
92	short		0x0000
93	bra.l		_fpsp_inex
94	short		0x0000
95	bra.l		_fpsp_fline
96	short		0x0000
97	bra.l		_fpsp_unsupp
98	short		0x0000
99	bra.l		_fpsp_effadd
100	short		0x0000
101
102	space 		56
103
104###############################################################
105	global		_fpsp_done
106_fpsp_done:
107	mov.l		%d0,-(%sp)
108	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
109	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
110	mov.l		0x4(%sp),%d0
111	rtd		&0x4
112
113	global		_real_ovfl
114_real_ovfl:
115	mov.l		%d0,-(%sp)
116	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
117	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
118	mov.l		0x4(%sp),%d0
119	rtd		&0x4
120
121	global		_real_unfl
122_real_unfl:
123	mov.l		%d0,-(%sp)
124	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
125	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
126	mov.l		0x4(%sp),%d0
127	rtd		&0x4
128
129	global		_real_inex
130_real_inex:
131	mov.l		%d0,-(%sp)
132	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
133	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
134	mov.l		0x4(%sp),%d0
135	rtd		&0x4
136
137	global		_real_bsun
138_real_bsun:
139	mov.l		%d0,-(%sp)
140	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
141	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
142	mov.l		0x4(%sp),%d0
143	rtd		&0x4
144
145	global		_real_operr
146_real_operr:
147	mov.l		%d0,-(%sp)
148	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
149	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
150	mov.l		0x4(%sp),%d0
151	rtd		&0x4
152
153	global		_real_snan
154_real_snan:
155	mov.l		%d0,-(%sp)
156	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
157	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
158	mov.l		0x4(%sp),%d0
159	rtd		&0x4
160
161	global		_real_dz
162_real_dz:
163	mov.l		%d0,-(%sp)
164	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
165	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
166	mov.l		0x4(%sp),%d0
167	rtd		&0x4
168
169	global		_real_fline
170_real_fline:
171	mov.l		%d0,-(%sp)
172	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
173	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
174	mov.l		0x4(%sp),%d0
175	rtd		&0x4
176
177	global		_real_fpu_disabled
178_real_fpu_disabled:
179	mov.l		%d0,-(%sp)
180	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
181	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
182	mov.l		0x4(%sp),%d0
183	rtd		&0x4
184
185	global		_real_trap
186_real_trap:
187	mov.l		%d0,-(%sp)
188	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
189	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
190	mov.l		0x4(%sp),%d0
191	rtd		&0x4
192
193	global		_real_trace
194_real_trace:
195	mov.l		%d0,-(%sp)
196	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
197	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
198	mov.l		0x4(%sp),%d0
199	rtd		&0x4
200
201	global		_real_access
202_real_access:
203	mov.l		%d0,-(%sp)
204	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
205	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
206	mov.l		0x4(%sp),%d0
207	rtd		&0x4
208
209#######################################
210
211	global		_imem_read
212_imem_read:
213	mov.l		%d0,-(%sp)
214	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
215	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
216	mov.l		0x4(%sp),%d0
217	rtd		&0x4
218
219	global		_dmem_read
220_dmem_read:
221	mov.l		%d0,-(%sp)
222	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
223	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
224	mov.l		0x4(%sp),%d0
225	rtd		&0x4
226
227	global		_dmem_write
228_dmem_write:
229	mov.l		%d0,-(%sp)
230	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
231	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
232	mov.l		0x4(%sp),%d0
233	rtd		&0x4
234
235	global		_imem_read_word
236_imem_read_word:
237	mov.l		%d0,-(%sp)
238	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
239	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
240	mov.l		0x4(%sp),%d0
241	rtd		&0x4
242
243	global		_imem_read_long
244_imem_read_long:
245	mov.l		%d0,-(%sp)
246	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
247	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
248	mov.l		0x4(%sp),%d0
249	rtd		&0x4
250
251	global		_dmem_read_byte
252_dmem_read_byte:
253	mov.l		%d0,-(%sp)
254	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
255	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
256	mov.l		0x4(%sp),%d0
257	rtd		&0x4
258
259	global		_dmem_read_word
260_dmem_read_word:
261	mov.l		%d0,-(%sp)
262	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
263	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
264	mov.l		0x4(%sp),%d0
265	rtd		&0x4
266
267	global		_dmem_read_long
268_dmem_read_long:
269	mov.l		%d0,-(%sp)
270	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
271	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
272	mov.l		0x4(%sp),%d0
273	rtd		&0x4
274
275	global		_dmem_write_byte
276_dmem_write_byte:
277	mov.l		%d0,-(%sp)
278	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
279	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
280	mov.l		0x4(%sp),%d0
281	rtd		&0x4
282
283	global		_dmem_write_word
284_dmem_write_word:
285	mov.l		%d0,-(%sp)
286	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
287	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
288	mov.l		0x4(%sp),%d0
289	rtd		&0x4
290
291	global		_dmem_write_long
292_dmem_write_long:
293	mov.l		%d0,-(%sp)
294	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
295	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
296	mov.l		0x4(%sp),%d0
297	rtd		&0x4
298
299#
300# This file contains a set of define statements for constants
301# in order to promote readability within the corecode itself.
302#
303
304set LOCAL_SIZE,		192			# stack frame size(bytes)
305set LV,			-LOCAL_SIZE		# stack offset
306
307set EXC_SR,		0x4			# stack status register
308set EXC_PC,		0x6			# stack pc
309set EXC_VOFF,		0xa			# stacked vector offset
310set EXC_EA,		0xc			# stacked <ea>
311
312set EXC_FP,		0x0			# frame pointer
313
314set EXC_AREGS,		-68			# offset of all address regs
315set EXC_DREGS,		-100			# offset of all data regs
316set EXC_FPREGS,		-36			# offset of all fp regs
317
318set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
319set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
320set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
321set EXC_A5,		EXC_AREGS+(5*4)
322set EXC_A4,		EXC_AREGS+(4*4)
323set EXC_A3,		EXC_AREGS+(3*4)
324set EXC_A2,		EXC_AREGS+(2*4)
325set EXC_A1,		EXC_AREGS+(1*4)
326set EXC_A0,		EXC_AREGS+(0*4)
327set EXC_D7,		EXC_DREGS+(7*4)
328set EXC_D6,		EXC_DREGS+(6*4)
329set EXC_D5,		EXC_DREGS+(5*4)
330set EXC_D4,		EXC_DREGS+(4*4)
331set EXC_D3,		EXC_DREGS+(3*4)
332set EXC_D2,		EXC_DREGS+(2*4)
333set EXC_D1,		EXC_DREGS+(1*4)
334set EXC_D0,		EXC_DREGS+(0*4)
335
336set EXC_FP0, 		EXC_FPREGS+(0*12)	# offset of saved fp0
337set EXC_FP1, 		EXC_FPREGS+(1*12)	# offset of saved fp1
338set EXC_FP2, 		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
339
340set FP_SCR1, 		LV+80			# fp scratch 1
341set FP_SCR1_EX, 	FP_SCR1+0
342set FP_SCR1_SGN,	FP_SCR1+2
343set FP_SCR1_HI, 	FP_SCR1+4
344set FP_SCR1_LO, 	FP_SCR1+8
345
346set FP_SCR0, 		LV+68			# fp scratch 0
347set FP_SCR0_EX, 	FP_SCR0+0
348set FP_SCR0_SGN,	FP_SCR0+2
349set FP_SCR0_HI, 	FP_SCR0+4
350set FP_SCR0_LO, 	FP_SCR0+8
351
352set FP_DST, 		LV+56			# fp destination operand
353set FP_DST_EX, 		FP_DST+0
354set FP_DST_SGN,		FP_DST+2
355set FP_DST_HI, 		FP_DST+4
356set FP_DST_LO, 		FP_DST+8
357
358set FP_SRC, 		LV+44			# fp source operand
359set FP_SRC_EX, 		FP_SRC+0
360set FP_SRC_SGN,		FP_SRC+2
361set FP_SRC_HI, 		FP_SRC+4
362set FP_SRC_LO, 		FP_SRC+8
363
364set USER_FPIAR,		LV+40			# FP instr address register
365
366set USER_FPSR,		LV+36			# FP status register
367set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
368set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
369set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
370set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
371
372set USER_FPCR,		LV+32			# FP control register
373set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
374set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
375
376set L_SCR3,		LV+28			# integer scratch 3
377set L_SCR2,		LV+24			# integer scratch 2
378set L_SCR1,		LV+20			# integer scratch 1
379
380set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
381
382set EXC_TEMP2,		LV+24			# temporary space
383set EXC_TEMP,		LV+16			# temporary space
384
385set DTAG,		LV+15			# destination operand type
386set STAG, 		LV+14			# source operand type
387
388set SPCOND_FLG,		LV+10			# flag: special case (see below)
389
390set EXC_CC,		LV+8			# saved condition codes
391set EXC_EXTWPTR,	LV+4			# saved current PC (active)
392set EXC_EXTWORD,	LV+2			# saved extension word
393set EXC_CMDREG,		LV+2			# saved extension word
394set EXC_OPWORD,		LV+0			# saved operation word
395
396################################
397
398# Helpful macros
399
400set FTEMP,		0			# offsets within an
401set FTEMP_EX, 		0			# extended precision
402set FTEMP_SGN,		2			# value saved in memory.
403set FTEMP_HI, 		4
404set FTEMP_LO, 		8
405set FTEMP_GRS,		12
406
407set LOCAL,		0			# offsets within an
408set LOCAL_EX, 		0			# extended precision
409set LOCAL_SGN,		2			# value saved in memory.
410set LOCAL_HI, 		4
411set LOCAL_LO, 		8
412set LOCAL_GRS,		12
413
414set DST,		0			# offsets within an
415set DST_EX,		0			# extended precision
416set DST_HI,		4			# value saved in memory.
417set DST_LO,		8
418
419set SRC,		0			# offsets within an
420set SRC_EX,		0			# extended precision
421set SRC_HI,		4			# value saved in memory.
422set SRC_LO,		8
423
424set SGL_LO,		0x3f81			# min sgl prec exponent
425set SGL_HI,		0x407e			# max sgl prec exponent
426set DBL_LO,		0x3c01			# min dbl prec exponent
427set DBL_HI,		0x43fe			# max dbl prec exponent
428set EXT_LO,		0x0			# min ext prec exponent
429set EXT_HI,		0x7ffe			# max ext prec exponent
430
431set EXT_BIAS,		0x3fff			# extended precision bias
432set SGL_BIAS,		0x007f			# single precision bias
433set DBL_BIAS,		0x03ff			# double precision bias
434
435set NORM,		0x00			# operand type for STAG/DTAG
436set ZERO,		0x01			# operand type for STAG/DTAG
437set INF,		0x02			# operand type for STAG/DTAG
438set QNAN,		0x03			# operand type for STAG/DTAG
439set DENORM,		0x04			# operand type for STAG/DTAG
440set SNAN,		0x05			# operand type for STAG/DTAG
441set UNNORM,		0x06			# operand type for STAG/DTAG
442
443##################
444# FPSR/FPCR bits #
445##################
446set neg_bit,		0x3			# negative result
447set z_bit,		0x2			# zero result
448set inf_bit,		0x1			# infinite result
449set nan_bit,		0x0			# NAN result
450
451set q_sn_bit,		0x7			# sign bit of quotient byte
452
453set bsun_bit,		7			# branch on unordered
454set snan_bit,		6			# signalling NAN
455set operr_bit,		5			# operand error
456set ovfl_bit,		4			# overflow
457set unfl_bit,		3			# underflow
458set dz_bit,		2			# divide by zero
459set inex2_bit,		1			# inexact result 2
460set inex1_bit,		0			# inexact result 1
461
462set aiop_bit,		7			# accrued inexact operation bit
463set aovfl_bit,		6			# accrued overflow bit
464set aunfl_bit,		5			# accrued underflow bit
465set adz_bit,		4			# accrued dz bit
466set ainex_bit,		3			# accrued inexact bit
467
468#############################
469# FPSR individual bit masks #
470#############################
471set neg_mask,		0x08000000		# negative bit mask (lw)
472set inf_mask,		0x02000000		# infinity bit mask (lw)
473set z_mask,		0x04000000		# zero bit mask (lw)
474set nan_mask,		0x01000000		# nan bit mask (lw)
475
476set neg_bmask,		0x08			# negative bit mask (byte)
477set inf_bmask,		0x02			# infinity bit mask (byte)
478set z_bmask,		0x04			# zero bit mask (byte)
479set nan_bmask,		0x01			# nan bit mask (byte)
480
481set bsun_mask,		0x00008000		# bsun exception mask
482set snan_mask,		0x00004000		# snan exception mask
483set operr_mask,		0x00002000		# operr exception mask
484set ovfl_mask,		0x00001000		# overflow exception mask
485set unfl_mask,		0x00000800		# underflow exception mask
486set dz_mask,		0x00000400		# dz exception mask
487set inex2_mask,		0x00000200		# inex2 exception mask
488set inex1_mask,		0x00000100		# inex1 exception mask
489
490set aiop_mask,		0x00000080		# accrued illegal operation
491set aovfl_mask,		0x00000040		# accrued overflow
492set aunfl_mask,		0x00000020		# accrued underflow
493set adz_mask,		0x00000010		# accrued divide by zero
494set ainex_mask,		0x00000008		# accrued inexact
495
496######################################
497# FPSR combinations used in the FPSP #
498######################################
499set dzinf_mask,		inf_mask+dz_mask+adz_mask
500set opnan_mask,		nan_mask+operr_mask+aiop_mask
501set nzi_mask,		0x01ffffff 		#clears N, Z, and I
502set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
503set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
504set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
505set inx1a_mask,		inex1_mask+ainex_mask
506set inx2a_mask,		inex2_mask+ainex_mask
507set snaniop_mask, 	nan_mask+snan_mask+aiop_mask
508set snaniop2_mask,	snan_mask+aiop_mask
509set naniop_mask,	nan_mask+aiop_mask
510set neginf_mask,	neg_mask+inf_mask
511set infaiop_mask, 	inf_mask+aiop_mask
512set negz_mask,		neg_mask+z_mask
513set opaop_mask,		operr_mask+aiop_mask
514set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
515set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
516
517#########
518# misc. #
519#########
520set rnd_stky_bit,	29			# stky bit pos in longword
521
522set sign_bit,		0x7			# sign bit
523set signan_bit,		0x6			# signalling nan bit
524
525set sgl_thresh,		0x3f81			# minimum sgl exponent
526set dbl_thresh,		0x3c01			# minimum dbl exponent
527
528set x_mode,		0x0			# extended precision
529set s_mode,		0x4			# single precision
530set d_mode,		0x8			# double precision
531
532set rn_mode,		0x0			# round-to-nearest
533set rz_mode,		0x1			# round-to-zero
534set rm_mode,		0x2			# round-tp-minus-infinity
535set rp_mode,		0x3			# round-to-plus-infinity
536
537set mantissalen,	64			# length of mantissa in bits
538
539set BYTE,		1			# len(byte) == 1 byte
540set WORD, 		2			# len(word) == 2 bytes
541set LONG, 		4			# len(longword) == 2 bytes
542
543set BSUN_VEC,		0xc0			# bsun    vector offset
544set INEX_VEC,		0xc4			# inexact vector offset
545set DZ_VEC,		0xc8			# dz      vector offset
546set UNFL_VEC,		0xcc			# unfl    vector offset
547set OPERR_VEC,		0xd0			# operr   vector offset
548set OVFL_VEC,		0xd4			# ovfl    vector offset
549set SNAN_VEC,		0xd8			# snan    vector offset
550
551###########################
552# SPecial CONDition FLaGs #
553###########################
554set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
555set fbsun_flg,		0x02			# flag bit: bsun exception
556set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
557set mda7_flg,		0x08			# flag bit: -(a7) <ea>
558set fmovm_flg,		0x40			# flag bit: fmovm instruction
559set immed_flg,		0x80			# flag bit: &<data> <ea>
560
561set ftrapcc_bit,	0x0
562set fbsun_bit,		0x1
563set mia7_bit,		0x2
564set mda7_bit,		0x3
565set immed_bit,		0x7
566
567##################################
568# TRANSCENDENTAL "LAST-OP" FLAGS #
569##################################
570set FMUL_OP,		0x0			# fmul instr performed last
571set FDIV_OP,		0x1			# fdiv performed last
572set FADD_OP,		0x2			# fadd performed last
573set FMOV_OP,		0x3			# fmov performed last
574
575#############
576# CONSTANTS #
577#############
578T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
579T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
580
581PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
582PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
583
584TWOBYPI:
585	long		0x3FE45F30,0x6DC9C883
586
587#########################################################################
588# XDEF ****************************************************************	#
589#	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
590#									#
591#	This handler should be the first code executed upon taking the	#
592#	FP Overflow exception in an operating system.			#
593#									#
594# XREF ****************************************************************	#
595#	_imem_read_long() - read instruction longword			#
596#	fix_skewed_ops() - adjust src operand in fsave frame		#
597#	set_tag_x() - determine optype of src/dst operands		#
598#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
599#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
600#	load_fpn2() - load dst operand from FP regfile			#
601#	fout() - emulate an opclass 3 instruction			#
602#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
603#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
604#	_real_ovfl() - "callout" for Overflow exception enabled code	#
605#	_real_inex() - "callout" for Inexact exception enabled code	#
606#	_real_trace() - "callout" for Trace exception code		#
607#									#
608# INPUT ***************************************************************	#
609#	- The system stack contains the FP Ovfl exception stack frame	#
610#	- The fsave frame contains the source operand			#
611# 									#
612# OUTPUT **************************************************************	#
613#	Overflow Exception enabled:					#
614#	- The system stack is unchanged					#
615#	- The fsave frame contains the adjusted src op for opclass 0,2	#
616#	Overflow Exception disabled:					#
617#	- The system stack is unchanged					#
618#	- The "exception present" flag in the fsave frame is cleared	#
619#									#
620# ALGORITHM ***********************************************************	#
621#	On the 060, if an FP overflow is present as the result of any	#
622# instruction, the 060 will take an overflow exception whether the 	#
623# exception is enabled or disabled in the FPCR. For the disabled case, 	#
624# This handler emulates the instruction to determine what the correct	#
625# default result should be for the operation. This default result is	#
626# then stored in either the FP regfile, data regfile, or memory. 	#
627# Finally, the handler exits through the "callout" _fpsp_done() 	#
628# denoting that no exceptional conditions exist within the machine.	#
629# 	If the exception is enabled, then this handler must create the	#
630# exceptional operand and plave it in the fsave state frame, and store	#
631# the default result (only if the instruction is opclass 3). For 	#
632# exceptions enabled, this handler must exit through the "callout" 	#
633# _real_ovfl() so that the operating system enabled overflow handler	#
634# can handle this case.							#
635#	Two other conditions exist. First, if overflow was disabled 	#
636# but the inexact exception was enabled, this handler must exit 	#
637# through the "callout" _real_inex() regardless of whether the result	#
638# was inexact.								#
639#	Also, in the case of an opclass three instruction where 	#
640# overflow was disabled and the trace exception was enabled, this	#
641# handler must exit through the "callout" _real_trace().		#
642#									#
643#########################################################################
644
645	global		_fpsp_ovfl
646_fpsp_ovfl:
647
648#$#	sub.l		&24,%sp			# make room for src/dst
649
650	link.w		%a6,&-LOCAL_SIZE	# init stack frame
651
652	fsave		FP_SRC(%a6)		# grab the "busy" frame
653
654 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
655	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
656 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
657
658# the FPIAR holds the "current PC" of the faulting instruction
659	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
660	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
661	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
662	bsr.l		_imem_read_long		# fetch the instruction words
663	mov.l		%d0,EXC_OPWORD(%a6)
664
665##############################################################################
666
667	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
668	bne.w		fovfl_out
669
670
671	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
672	bsr.l		fix_skewed_ops		# fix src op
673
674# since, I believe, only NORMs and DENORMs can come through here,
675# maybe we can avoid the subroutine call.
676	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
677	bsr.l		set_tag_x		# tag the operand type
678	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
679
680# bit five of the fp extension word separates the monadic and dyadic operations
681# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
682# will never take this exception.
683	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
684	beq.b		fovfl_extract		# monadic
685
686	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
687	bsr.l		load_fpn2		# load dst into FP_DST
688
689	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
690	bsr.l		set_tag_x		# tag the operand type
691	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
692	bne.b		fovfl_op2_done		# no
693	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
694fovfl_op2_done:
695	mov.b		%d0,DTAG(%a6)		# save dst optype tag
696
697fovfl_extract:
698
699#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
700#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
701#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
702#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
703#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
704#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
705
706	clr.l		%d0
707	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
708
709	mov.b		1+EXC_CMDREG(%a6),%d1
710	andi.w		&0x007f,%d1		# extract extension
711
712	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
713
714	fmov.l		&0x0,%fpcr		# zero current control regs
715	fmov.l		&0x0,%fpsr
716
717	lea		FP_SRC(%a6),%a0
718	lea		FP_DST(%a6),%a1
719
720# maybe we can make these entry points ONLY the OVFL entry points of each routine.
721	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
722	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
723
724# the operation has been emulated. the result is in fp0.
725# the EXOP, if an exception occurred, is in fp1.
726# we must save the default result regardless of whether
727# traps are enabled or disabled.
728	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
729	bsr.l		store_fpreg
730
731# the exceptional possibilities we have left ourselves with are ONLY overflow
732# and inexact. and, the inexact is such that overflow occurred and was disabled
733# but inexact was enabled.
734	btst		&ovfl_bit,FPCR_ENABLE(%a6)
735	bne.b		fovfl_ovfl_on
736
737	btst		&inex2_bit,FPCR_ENABLE(%a6)
738	bne.b		fovfl_inex_on
739
740	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
741	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
742	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
743
744	unlk		%a6
745#$#	add.l		&24,%sp
746	bra.l		_fpsp_done
747
748# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
749# in fp1. now, simply jump to _real_ovfl()!
750fovfl_ovfl_on:
751	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
752
753	mov.w		&0xe005,2+FP_SRC(%a6) 	# save exc status
754
755	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
756	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
757	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
758
759	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
760
761	unlk		%a6
762
763	bra.l		_real_ovfl
764
765# overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
766# we must jump to real_inex().
767fovfl_inex_on:
768
769	fmovm.x		&0x40,FP_SRC(%a6) 	# save EXOP (fp1) to stack
770
771	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
772	mov.w		&0xe001,2+FP_SRC(%a6) 	# save exc status
773
774	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
775	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
776	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
777
778	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
779
780	unlk		%a6
781
782	bra.l		_real_inex
783
784########################################################################
785fovfl_out:
786
787
788#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
789#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
790#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
791
792# the src operand is definitely a NORM(!), so tag it as such
793	mov.b		&NORM,STAG(%a6)		# set src optype tag
794
795	clr.l		%d0
796	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
797
798	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
799
800	fmov.l		&0x0,%fpcr		# zero current control regs
801	fmov.l		&0x0,%fpsr
802
803	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
804
805	bsr.l		fout
806
807	btst		&ovfl_bit,FPCR_ENABLE(%a6)
808	bne.w		fovfl_ovfl_on
809
810	btst		&inex2_bit,FPCR_ENABLE(%a6)
811	bne.w		fovfl_inex_on
812
813	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
814	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
815	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
816
817	unlk		%a6
818#$#	add.l		&24,%sp
819
820	btst		&0x7,(%sp)		# is trace on?
821	beq.l		_fpsp_done		# no
822
823	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
824	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
825	bra.l		_real_trace
826
827#########################################################################
828# XDEF ****************************************************************	#
829#	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
830#									#
831#	This handler should be the first code executed upon taking the	#
832#	FP Underflow exception in an operating system.			#
833#									#
834# XREF ****************************************************************	#
835#	_imem_read_long() - read instruction longword			#
836#	fix_skewed_ops() - adjust src operand in fsave frame		#
837#	set_tag_x() - determine optype of src/dst operands		#
838#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
839#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
840#	load_fpn2() - load dst operand from FP regfile			#
841#	fout() - emulate an opclass 3 instruction			#
842#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
843#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
844#	_real_ovfl() - "callout" for Overflow exception enabled code	#
845#	_real_inex() - "callout" for Inexact exception enabled code	#
846#	_real_trace() - "callout" for Trace exception code		#
847#									#
848# INPUT ***************************************************************	#
849#	- The system stack contains the FP Unfl exception stack frame	#
850#	- The fsave frame contains the source operand			#
851# 									#
852# OUTPUT **************************************************************	#
853#	Underflow Exception enabled:					#
854#	- The system stack is unchanged					#
855#	- The fsave frame contains the adjusted src op for opclass 0,2	#
856#	Underflow Exception disabled:					#
857#	- The system stack is unchanged					#
858#	- The "exception present" flag in the fsave frame is cleared	#
859#									#
860# ALGORITHM ***********************************************************	#
861#	On the 060, if an FP underflow is present as the result of any	#
862# instruction, the 060 will take an underflow exception whether the 	#
863# exception is enabled or disabled in the FPCR. For the disabled case, 	#
864# This handler emulates the instruction to determine what the correct	#
865# default result should be for the operation. This default result is	#
866# then stored in either the FP regfile, data regfile, or memory. 	#
867# Finally, the handler exits through the "callout" _fpsp_done() 	#
868# denoting that no exceptional conditions exist within the machine.	#
869# 	If the exception is enabled, then this handler must create the	#
870# exceptional operand and plave it in the fsave state frame, and store	#
871# the default result (only if the instruction is opclass 3). For 	#
872# exceptions enabled, this handler must exit through the "callout" 	#
873# _real_unfl() so that the operating system enabled overflow handler	#
874# can handle this case.							#
875#	Two other conditions exist. First, if underflow was disabled 	#
876# but the inexact exception was enabled and the result was inexact, 	#
877# this handler must exit through the "callout" _real_inex().		#
878# was inexact.								#
879#	Also, in the case of an opclass three instruction where 	#
880# underflow was disabled and the trace exception was enabled, this	#
881# handler must exit through the "callout" _real_trace().		#
882#									#
883#########################################################################
884
885	global		_fpsp_unfl
886_fpsp_unfl:
887
888#$#	sub.l		&24,%sp			# make room for src/dst
889
890	link.w		%a6,&-LOCAL_SIZE	# init stack frame
891
892	fsave		FP_SRC(%a6)		# grab the "busy" frame
893
894 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
895	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
896 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
897
898# the FPIAR holds the "current PC" of the faulting instruction
899	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
900	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
901	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
902	bsr.l		_imem_read_long		# fetch the instruction words
903	mov.l		%d0,EXC_OPWORD(%a6)
904
905##############################################################################
906
907	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
908	bne.w		funfl_out
909
910
911	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
912	bsr.l		fix_skewed_ops		# fix src op
913
914	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
915	bsr.l		set_tag_x		# tag the operand type
916	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
917
918# bit five of the fp ext word separates the monadic and dyadic operations
919# that can pass through fpsp_unfl(). remember that fcmp, and ftst
920# will never take this exception.
921	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
922	beq.b		funfl_extract		# monadic
923
924# now, what's left that's not dyadic is fsincos. we can distinguish it
925# from all dyadics by the '0110xxx pattern
926	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
927	bne.b		funfl_extract		# yes
928
929	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
930	bsr.l		load_fpn2		# load dst into FP_DST
931
932	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
933	bsr.l		set_tag_x		# tag the operand type
934	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
935	bne.b		funfl_op2_done		# no
936	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
937funfl_op2_done:
938	mov.b		%d0,DTAG(%a6)		# save dst optype tag
939
940funfl_extract:
941
942#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
943#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
944#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
945#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
946#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
947#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
948
949	clr.l		%d0
950	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
951
952	mov.b		1+EXC_CMDREG(%a6),%d1
953	andi.w		&0x007f,%d1		# extract extension
954
955	andi.l		&0x00ff01ff,USER_FPSR(%a6)
956
957	fmov.l		&0x0,%fpcr		# zero current control regs
958	fmov.l		&0x0,%fpsr
959
960	lea		FP_SRC(%a6),%a0
961	lea		FP_DST(%a6),%a1
962
963# maybe we can make these entry points ONLY the OVFL entry points of each routine.
964	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
965	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
966
967	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
968	bsr.l		store_fpreg
969
970# The `060 FPU multiplier hardware is such that if the result of a
971# multiply operation is the smallest possible normalized number
972# (0x00000000_80000000_00000000), then the machine will take an
973# underflow exception. Since this is incorrect, we need to check
974# if our emulation, after re-doing the operation, decided that
975# no underflow was called for. We do these checks only in
976# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
977# special case will simply exit gracefully with the correct result.
978
979# the exceptional possibilities we have left ourselves with are ONLY overflow
980# and inexact. and, the inexact is such that overflow occurred and was disabled
981# but inexact was enabled.
982	btst		&unfl_bit,FPCR_ENABLE(%a6)
983	bne.b		funfl_unfl_on
984
985funfl_chkinex:
986	btst		&inex2_bit,FPCR_ENABLE(%a6)
987	bne.b		funfl_inex_on
988
989funfl_exit:
990	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
991	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
992	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
993
994	unlk		%a6
995#$#	add.l		&24,%sp
996	bra.l		_fpsp_done
997
998# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
999# in fp1 (don't forget to save fp0). what to do now?
1000# well, we simply have to get to go to _real_unfl()!
1001funfl_unfl_on:
1002
1003# The `060 FPU multiplier hardware is such that if the result of a
1004# multiply operation is the smallest possible normalized number
1005# (0x00000000_80000000_00000000), then the machine will take an
1006# underflow exception. Since this is incorrect, we check here to see
1007# if our emulation, after re-doing the operation, decided that
1008# no underflow was called for.
1009	btst		&unfl_bit,FPSR_EXCEPT(%a6)
1010	beq.w		funfl_chkinex
1011
1012funfl_unfl_on2:
1013	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
1014
1015	mov.w		&0xe003,2+FP_SRC(%a6) 	# save exc status
1016
1017	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1018	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1019	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1020
1021	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1022
1023	unlk		%a6
1024
1025	bra.l		_real_unfl
1026
1027# undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
1028# we must jump to real_inex().
1029funfl_inex_on:
1030
1031# The `060 FPU multiplier hardware is such that if the result of a
1032# multiply operation is the smallest possible normalized number
1033# (0x00000000_80000000_00000000), then the machine will take an
1034# underflow exception.
1035# But, whether bogus or not, if inexact is enabled AND it occurred,
1036# then we have to branch to real_inex.
1037
1038	btst		&inex2_bit,FPSR_EXCEPT(%a6)
1039	beq.w		funfl_exit
1040
1041funfl_inex_on2:
1042
1043	fmovm.x		&0x40,FP_SRC(%a6) 	# save EXOP to stack
1044
1045	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
1046	mov.w		&0xe001,2+FP_SRC(%a6) 	# save exc status
1047
1048	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1049	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1050	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1051
1052	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1053
1054	unlk		%a6
1055
1056	bra.l		_real_inex
1057
1058#######################################################################
1059funfl_out:
1060
1061
1062#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1063#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1064#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1065
1066# the src operand is definitely a NORM(!), so tag it as such
1067	mov.b		&NORM,STAG(%a6)		# set src optype tag
1068
1069	clr.l		%d0
1070	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
1071
1072	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1073
1074	fmov.l		&0x0,%fpcr		# zero current control regs
1075	fmov.l		&0x0,%fpsr
1076
1077	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1078
1079	bsr.l		fout
1080
1081	btst		&unfl_bit,FPCR_ENABLE(%a6)
1082	bne.w		funfl_unfl_on2
1083
1084	btst		&inex2_bit,FPCR_ENABLE(%a6)
1085	bne.w		funfl_inex_on2
1086
1087	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1088	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1089	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1090
1091	unlk		%a6
1092#$#	add.l		&24,%sp
1093
1094	btst		&0x7,(%sp)		# is trace on?
1095	beq.l		_fpsp_done		# no
1096
1097	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
1098	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
1099	bra.l		_real_trace
1100
1101#########################################################################
1102# XDEF ****************************************************************	#
1103#	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
1104#		        Data Type" exception.				#
1105#									#
1106#	This handler should be the first code executed upon taking the	#
1107#	FP Unimplemented Data Type exception in an operating system.	#
1108#									#
1109# XREF ****************************************************************	#
1110#	_imem_read_{word,long}() - read instruction word/longword	#
1111#	fix_skewed_ops() - adjust src operand in fsave frame		#
1112#	set_tag_x() - determine optype of src/dst operands		#
1113#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
1114#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
1115#	load_fpn2() - load dst operand from FP regfile			#
1116#	load_fpn1() - load src operand from FP regfile			#
1117#	fout() - emulate an opclass 3 instruction			#
1118#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
1119#	_real_inex() - "callout" to operating system inexact handler	#
1120#	_fpsp_done() - "callout" for exit; work all done		#
1121#	_real_trace() - "callout" for Trace enabled exception		#
1122#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
1123#	_real_snan() - "callout" for SNAN exception			#
1124#	_real_operr() - "callout" for OPERR exception			#
1125#	_real_ovfl() - "callout" for OVFL exception			#
1126#	_real_unfl() - "callout" for UNFL exception			#
1127#	get_packed() - fetch packed operand from memory			#
1128#									#
1129# INPUT ***************************************************************	#
1130#	- The system stack contains the "Unimp Data Type" stk frame	#
1131#	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
1132# 									#
1133# OUTPUT **************************************************************	#
1134#	If Inexact exception (opclass 3):				#
1135#	- The system stack is changed to an Inexact exception stk frame	#
1136#	If SNAN exception (opclass 3):					#
1137#	- The system stack is changed to an SNAN exception stk frame	#
1138#	If OPERR exception (opclass 3):					#
1139#	- The system stack is changed to an OPERR exception stk frame	#
1140#	If OVFL exception (opclass 3):					#
1141#	- The system stack is changed to an OVFL exception stk frame	#
1142#	If UNFL exception (opclass 3):					#
1143#	- The system stack is changed to an UNFL exception stack frame	#
1144#	If Trace exception enabled:					#
1145#	- The system stack is changed to a Trace exception stack frame	#
1146#	Else: (normal case)						#
1147#	- Correct result has been stored as appropriate			#
1148#									#
1149# ALGORITHM ***********************************************************	#
1150#	Two main instruction types can enter here: (1) DENORM or UNNORM	#
1151# unimplemented data types. These can be either opclass 0,2 or 3 	#
1152# instructions, and (2) PACKED unimplemented data format instructions	#
1153# also of opclasses 0,2, or 3.						#
1154#	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
1155# operand from the fsave state frame and the dst operand (if dyadic)	#
1156# from the FP register file. The instruction is then emulated by 	#
1157# choosing an emulation routine from a table of routines indexed by	#
1158# instruction type. Once the instruction has been emulated and result	#
1159# saved, then we check to see if any enabled exceptions resulted from	#
1160# instruction emulation. If none, then we exit through the "callout"	#
1161# _fpsp_done(). If there is an enabled FP exception, then we insert	#
1162# this exception into the FPU in the fsave state frame and then exit	#
1163# through _fpsp_done().							#
1164#	PACKED opclass 0 and 2 is similar in how the instruction is	#
1165# emulated and exceptions handled. The differences occur in how the	#
1166# handler loads the packed op (by calling get_packed() routine) and	#
1167# by the fact that a Trace exception could be pending for PACKED ops.	#
1168# If a Trace exception is pending, then the current exception stack	#
1169# frame is changed to a Trace exception stack frame and an exit is	#
1170# made through _real_trace().						#
1171#	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
1172# performed by calling the routine fout(). If no exception should occur	#
1173# as the result of emulation, then an exit either occurs through	#
1174# _fpsp_done() or through _real_trace() if a Trace exception is pending	#
1175# (a Trace stack frame must be created here, too). If an FP exception	#
1176# should occur, then we must create an exception stack frame of that	#
1177# type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
1178# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 	#
1179# emulation is performed in a similar manner.				#
1180#									#
1181#########################################################################
1182
1183#
1184# (1) DENORM and UNNORM (unimplemented) data types:
1185#
1186#				post-instruction
1187#				*****************
1188#				*      EA	*
1189#	 pre-instruction	*		*
1190# 	*****************	*****************
1191#	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
1192#	*****************	*****************
1193#	*     Next	*	*     Next	*
1194#	*      PC	*	*      PC	*
1195#	*****************	*****************
1196#	*      SR	*	*      SR	*
1197#	*****************	*****************
1198#
1199# (2) PACKED format (unsupported) opclasses two and three:
1200#	*****************
1201#	*      EA	*
1202#	*		*
1203#	*****************
1204#	* 0x2 *  0x0dc	*
1205#	*****************
1206#	*     Next	*
1207#	*      PC	*
1208#	*****************
1209#	*      SR	*
1210#	*****************
1211#
1212	global		_fpsp_unsupp
1213_fpsp_unsupp:
1214
1215	link.w		%a6,&-LOCAL_SIZE	# init stack frame
1216
1217	fsave		FP_SRC(%a6)		# save fp state
1218
1219 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
1220	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1221 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
1222
1223	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
1224	bne.b		fu_s
1225fu_u:
1226	mov.l		%usp,%a0		# fetch user stack pointer
1227	mov.l		%a0,EXC_A7(%a6)		# save on stack
1228	bra.b		fu_cont
1229# if the exception is an opclass zero or two unimplemented data type
1230# exception, then the a7' calculated here is wrong since it doesn't
1231# stack an ea. however, we don't need an a7' for this case anyways.
1232fu_s:
1233	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
1234	mov.l		%a0,EXC_A7(%a6)		# save on stack
1235
1236fu_cont:
1237
1238# the FPIAR holds the "current PC" of the faulting instruction
1239# the FPIAR should be set correctly for ALL exceptions passing through
1240# this point.
1241	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1242	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
1243	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
1244	bsr.l		_imem_read_long		# fetch the instruction words
1245	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
1246
1247############################
1248
1249	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
1250
1251# Separate opclass three (fpn-to-mem) ops since they have a different
1252# stack frame and protocol.
1253	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
1254	bne.w		fu_out			# yes
1255
1256# Separate packed opclass two instructions.
1257	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
1258	cmpi.b		%d0,&0x13
1259	beq.w		fu_in_pack
1260
1261
1262# I'm not sure at this point what FPSR bits are valid for this instruction.
1263# so, since the emulation routines re-create them anyways, zero exception field
1264	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
1265
1266	fmov.l		&0x0,%fpcr		# zero current control regs
1267	fmov.l		&0x0,%fpsr
1268
1269# Opclass two w/ memory-to-fpn operation will have an incorrect extended
1270# precision format if the src format was single or double and the
1271# source data type was an INF, NAN, DENORM, or UNNORM
1272	lea		FP_SRC(%a6),%a0		# pass ptr to input
1273	bsr.l		fix_skewed_ops
1274
1275# we don't know whether the src operand or the dst operand (or both) is the
1276# UNNORM or DENORM. call the function that tags the operand type. if the
1277# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1278	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
1279	bsr.l		set_tag_x		# tag the operand type
1280	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1281	bne.b		fu_op2			# no
1282	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1283
1284fu_op2:
1285	mov.b		%d0,STAG(%a6)		# save src optype tag
1286
1287	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1288
1289# bit five of the fp extension word separates the monadic and dyadic operations
1290# at this point
1291	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1292	beq.b		fu_extract		# monadic
1293	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1294	beq.b		fu_extract		# yes, so it's monadic, too
1295
1296	bsr.l		load_fpn2		# load dst into FP_DST
1297
1298	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1299	bsr.l		set_tag_x		# tag the operand type
1300	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1301	bne.b		fu_op2_done		# no
1302	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1303fu_op2_done:
1304	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1305
1306fu_extract:
1307	clr.l		%d0
1308	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1309
1310	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1311
1312	lea		FP_SRC(%a6),%a0
1313	lea		FP_DST(%a6),%a1
1314
1315	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1316	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1317
1318#
1319# Exceptions in order of precedence:
1320# 	BSUN	: none
1321#	SNAN	: all dyadic ops
1322#	OPERR	: fsqrt(-NORM)
1323#	OVFL	: all except ftst,fcmp
1324#	UNFL	: all except ftst,fcmp
1325#	DZ	: fdiv
1326# 	INEX2	: all except ftst,fcmp
1327#	INEX1	: none (packed doesn't go through here)
1328#
1329
1330# we determine the highest priority exception(if any) set by the
1331# emulation routine that has also been enabled by the user.
1332	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
1333	bne.b		fu_in_ena		# some are enabled
1334
1335fu_in_cont:
1336# fcmp and ftst do not store any result.
1337	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1338	andi.b		&0x38,%d0		# extract bits 3-5
1339	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1340	beq.b		fu_in_exit		# yes
1341
1342	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1343	bsr.l		store_fpreg		# store the result
1344
1345fu_in_exit:
1346
1347	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1348	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1349	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1350
1351	unlk		%a6
1352
1353	bra.l		_fpsp_done
1354
1355fu_in_ena:
1356	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1357	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1358	bne.b		fu_in_exc		# there is at least one set
1359
1360#
1361# No exceptions occurred that were also enabled. Now:
1362#
1363#   	if (OVFL && ovfl_disabled && inexact_enabled) {
1364#	    branch to _real_inex() (even if the result was exact!);
1365#     	} else {
1366#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1367#	    return;
1368#     	}
1369#
1370	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1371	beq.b		fu_in_cont		# no
1372
1373fu_in_ovflchk:
1374	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1375	beq.b		fu_in_cont		# no
1376	bra.w		fu_in_exc_ovfl		# go insert overflow frame
1377
1378#
1379# An exception occurred and that exception was enabled:
1380#
1381#	shift enabled exception field into lo byte of d0;
1382#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1383#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1384#		/*
1385#		 * this is the case where we must call _real_inex() now or else
1386#		 * there will be no other way to pass it the exceptional operand
1387#		 */
1388#		call _real_inex();
1389#	} else {
1390#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1391#	}
1392#
1393fu_in_exc:
1394	subi.l		&24,%d0			# fix offset to be 0-8
1395	cmpi.b		%d0,&0x6		# is exception INEX? (6)
1396	bne.b		fu_in_exc_exit		# no
1397
1398# the enabled exception was inexact
1399	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1400	bne.w		fu_in_exc_unfl		# yes
1401	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1402	bne.w		fu_in_exc_ovfl		# yes
1403
1404# here, we insert the correct fsave status value into the fsave frame for the
1405# corresponding exception. the operand in the fsave frame should be the original
1406# src operand.
1407fu_in_exc_exit:
1408	mov.l		%d0,-(%sp)		# save d0
1409	bsr.l		funimp_skew		# skew sgl or dbl inputs
1410	mov.l		(%sp)+,%d0		# restore d0
1411
1412	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1413
1414	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1415	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1416	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1417
1418	frestore	FP_SRC(%a6)		# restore src op
1419
1420	unlk		%a6
1421
1422	bra.l		_fpsp_done
1423
1424tbl_except:
1425	short		0xe000,0xe006,0xe004,0xe005
1426	short		0xe003,0xe002,0xe001,0xe001
1427
1428fu_in_exc_unfl:
1429	mov.w		&0x4,%d0
1430	bra.b		fu_in_exc_exit
1431fu_in_exc_ovfl:
1432	mov.w		&0x03,%d0
1433	bra.b		fu_in_exc_exit
1434
1435# If the input operand to this operation was opclass two and a single
1436# or double precision denorm, inf, or nan, the operand needs to be
1437# "corrected" in order to have the proper equivalent extended precision
1438# number.
1439	global		fix_skewed_ops
1440fix_skewed_ops:
1441	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1442	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
1443	beq.b		fso_sgl			# yes
1444	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
1445	beq.b		fso_dbl			# yes
1446	rts					# no
1447
1448fso_sgl:
1449	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1450	andi.w		&0x7fff,%d0		# strip sign
1451	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
1452	beq.b		fso_sgl_dnrm_zero	# yes
1453	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
1454	beq.b		fso_infnan		# yes
1455	rts					# no
1456
1457fso_sgl_dnrm_zero:
1458	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1459	beq.b		fso_zero		# it's a skewed zero
1460fso_sgl_dnrm:
1461# here, we count on norm not to alter a0...
1462	bsr.l		norm			# normalize mantissa
1463	neg.w		%d0			# -shft amt
1464	addi.w		&0x3f81,%d0		# adjust new exponent
1465	andi.w		&0x8000,LOCAL_EX(%a0) 	# clear old exponent
1466	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1467	rts
1468
1469fso_zero:
1470	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
1471	rts
1472
1473fso_infnan:
1474	andi.b		&0x7f,LOCAL_HI(%a0) 	# clear j-bit
1475	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
1476	rts
1477
1478fso_dbl:
1479	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1480	andi.w		&0x7fff,%d0		# strip sign
1481	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
1482	beq.b		fso_dbl_dnrm_zero	# yes
1483	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
1484	beq.b		fso_infnan		# yes
1485	rts					# no
1486
1487fso_dbl_dnrm_zero:
1488	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1489	bne.b		fso_dbl_dnrm		# it's a skewed denorm
1490	tst.l		LOCAL_LO(%a0)		# is it a zero?
1491	beq.b		fso_zero		# yes
1492fso_dbl_dnrm:
1493# here, we count on norm not to alter a0...
1494	bsr.l		norm			# normalize mantissa
1495	neg.w		%d0			# -shft amt
1496	addi.w		&0x3c01,%d0		# adjust new exponent
1497	andi.w		&0x8000,LOCAL_EX(%a0) 	# clear old exponent
1498	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1499	rts
1500
1501#################################################################
1502
1503# fmove out took an unimplemented data type exception.
1504# the src operand is in FP_SRC. Call _fout() to write out the result and
1505# to determine which exceptions, if any, to take.
1506fu_out:
1507
1508# Separate packed move outs from the UNNORM and DENORM move outs.
1509	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
1510	cmpi.b		%d0,&0x3
1511	beq.w		fu_out_pack
1512	cmpi.b		%d0,&0x7
1513	beq.w		fu_out_pack
1514
1515
1516# I'm not sure at this point what FPSR bits are valid for this instruction.
1517# so, since the emulation routines re-create them anyways, zero exception field.
1518# fmove out doesn't affect ccodes.
1519	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
1520
1521	fmov.l		&0x0,%fpcr		# zero current control regs
1522	fmov.l		&0x0,%fpsr
1523
1524# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1525# call here. just figure out what it is...
1526	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
1527	andi.w		&0x7fff,%d0		# strip sign
1528	beq.b		fu_out_denorm		# it's a DENORM
1529
1530	lea		FP_SRC(%a6),%a0
1531	bsr.l		unnorm_fix		# yes; fix it
1532
1533	mov.b		%d0,STAG(%a6)
1534
1535	bra.b		fu_out_cont
1536fu_out_denorm:
1537	mov.b		&DENORM,STAG(%a6)
1538fu_out_cont:
1539
1540	clr.l		%d0
1541	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1542
1543	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1544
1545	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
1546	bsr.l		fout			# call fmove out routine
1547
1548# Exceptions in order of precedence:
1549# 	BSUN	: none
1550#	SNAN	: none
1551#	OPERR	: fmove.{b,w,l} out of large UNNORM
1552#	OVFL	: fmove.{s,d}
1553#	UNFL	: fmove.{s,d,x}
1554#	DZ	: none
1555# 	INEX2	: all
1556#	INEX1	: none (packed doesn't travel through here)
1557
1558# determine the highest priority exception(if any) set by the
1559# emulation routine that has also been enabled by the user.
1560	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1561	bne.w		fu_out_ena		# some are enabled
1562
1563fu_out_done:
1564
1565	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
1566
1567# on extended precision opclass three instructions using pre-decrement or
1568# post-increment addressing mode, the address register is not updated. is the
1569# address register was the stack pointer used from user mode, then let's update
1570# it here. if it was used from supervisor mode, then we have to handle this
1571# as a special case.
1572	btst		&0x5,EXC_SR(%a6)
1573	bne.b		fu_out_done_s
1574
1575	mov.l		EXC_A7(%a6),%a0		# restore a7
1576	mov.l		%a0,%usp
1577
1578fu_out_done_cont:
1579	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1580	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1581	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1582
1583	unlk		%a6
1584
1585	btst		&0x7,(%sp)		# is trace on?
1586	bne.b		fu_out_trace		# yes
1587
1588	bra.l		_fpsp_done
1589
1590# is the ea mode pre-decrement of the stack pointer from supervisor mode?
1591# ("fmov.x fpm,-(a7)") if so,
1592fu_out_done_s:
1593	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
1594	bne.b		fu_out_done_cont
1595
1596# the extended precision result is still in fp0. but, we need to save it
1597# somewhere on the stack until we can copy it to its final resting place.
1598# here, we're counting on the top of the stack to be the old place-holders
1599# for fp0/fp1 which have already been restored. that way, we can write
1600# over those destinations with the shifted stack frame.
1601	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1602
1603	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1604	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1605	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1606
1607	mov.l		(%a6),%a6		# restore frame pointer
1608
1609	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1610	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1611
1612# now, copy the result to the proper place on the stack
1613	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1614	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1615	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1616
1617	add.l		&LOCAL_SIZE-0x8,%sp
1618
1619	btst		&0x7,(%sp)
1620	bne.b		fu_out_trace
1621
1622	bra.l		_fpsp_done
1623
1624fu_out_ena:
1625	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1626	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1627	bne.b		fu_out_exc		# there is at least one set
1628
1629# no exceptions were set.
1630# if a disabled overflow occurred and inexact was enabled but the result
1631# was exact, then a branch to _real_inex() is made.
1632	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1633	beq.w		fu_out_done		# no
1634
1635fu_out_ovflchk:
1636	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1637	beq.w		fu_out_done		# no
1638	bra.w		fu_inex			# yes
1639
1640#
1641# The fp move out that took the "Unimplemented Data Type" exception was
1642# being traced. Since the stack frames are similar, get the "current" PC
1643# from FPIAR and put it in the trace stack frame then jump to _real_trace().
1644#
1645#		  UNSUPP FRAME		   TRACE FRAME
1646# 		*****************	*****************
1647#		*      EA	*	*    Current	*
1648#		*		*	*      PC	*
1649#		*****************	*****************
1650#		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
1651#		*****************	*****************
1652#		*     Next	*	*     Next	*
1653#		*      PC	*	*      PC	*
1654#		*****************	*****************
1655#		*      SR	*	*      SR	*
1656#		*****************	*****************
1657#
1658fu_out_trace:
1659	mov.w		&0x2024,0x6(%sp)
1660	fmov.l		%fpiar,0x8(%sp)
1661	bra.l		_real_trace
1662
1663# an exception occurred and that exception was enabled.
1664fu_out_exc:
1665	subi.l		&24,%d0			# fix offset to be 0-8
1666
1667# we don't mess with the existing fsave frame. just re-insert it and
1668# jump to the "_real_{}()" handler...
1669	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
1670	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
1671
1672	swbeg		&0x8
1673tbl_fu_out:
1674	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
1675	short		tbl_fu_out 	- tbl_fu_out	# SNAN can't happen
1676	short		fu_operr	- tbl_fu_out	# OPERR
1677	short		fu_ovfl 	- tbl_fu_out	# OVFL
1678	short		fu_unfl 	- tbl_fu_out	# UNFL
1679	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
1680	short		fu_inex 	- tbl_fu_out	# INEX2
1681	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
1682
1683# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1684# frestore it.
1685fu_snan:
1686	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1687	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1688	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1689
1690	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
1691	mov.w		&0xe006,2+FP_SRC(%a6)
1692
1693	frestore	FP_SRC(%a6)
1694
1695	unlk		%a6
1696
1697
1698	bra.l		_real_snan
1699
1700fu_operr:
1701	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1702	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1703	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1704
1705	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
1706	mov.w		&0xe004,2+FP_SRC(%a6)
1707
1708	frestore	FP_SRC(%a6)
1709
1710	unlk		%a6
1711
1712
1713	bra.l		_real_operr
1714
1715fu_ovfl:
1716	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1717
1718	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1719	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1720	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1721
1722	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
1723	mov.w		&0xe005,2+FP_SRC(%a6)
1724
1725	frestore	FP_SRC(%a6)		# restore EXOP
1726
1727	unlk		%a6
1728
1729	bra.l		_real_ovfl
1730
1731# underflow can happen for extended precision. extended precision opclass
1732# three instruction exceptions don't update the stack pointer. so, if the
1733# exception occurred from user mode, then simply update a7 and exit normally.
1734# if the exception occurred from supervisor mode, check if
1735fu_unfl:
1736	mov.l		EXC_A6(%a6),(%a6)	# restore a6
1737
1738	btst		&0x5,EXC_SR(%a6)
1739	bne.w		fu_unfl_s
1740
1741	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
1742	mov.l		%a0,%usp		# to or not...
1743
1744fu_unfl_cont:
1745	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1746
1747	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1748	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1749	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1750
1751	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1752	mov.w		&0xe003,2+FP_SRC(%a6)
1753
1754	frestore	FP_SRC(%a6)		# restore EXOP
1755
1756	unlk		%a6
1757
1758	bra.l		_real_unfl
1759
1760fu_unfl_s:
1761	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1762	bne.b		fu_unfl_cont
1763
1764# the extended precision result is still in fp0. but, we need to save it
1765# somewhere on the stack until we can copy it to its final resting place
1766# (where the exc frame is currently). make sure it's not at the top of the
1767# frame or it will get overwritten when the exc stack frame is shifted "down".
1768	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1769	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
1770
1771	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1772	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1773	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1774
1775	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1776	mov.w		&0xe003,2+FP_DST(%a6)
1777
1778	frestore	FP_DST(%a6)		# restore EXOP
1779
1780	mov.l		(%a6),%a6		# restore frame pointer
1781
1782	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1783	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1784	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1785
1786# now, copy the result to the proper place on the stack
1787	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1788	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1789	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1790
1791	add.l		&LOCAL_SIZE-0x8,%sp
1792
1793	bra.l		_real_unfl
1794
1795# fmove in and out enter here.
1796fu_inex:
1797	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1798
1799	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1800	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1801	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1802
1803	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
1804	mov.w		&0xe001,2+FP_SRC(%a6)
1805
1806	frestore	FP_SRC(%a6)		# restore EXOP
1807
1808	unlk		%a6
1809
1810
1811	bra.l		_real_inex
1812
1813#########################################################################
1814#########################################################################
1815fu_in_pack:
1816
1817
1818# I'm not sure at this point what FPSR bits are valid for this instruction.
1819# so, since the emulation routines re-create them anyways, zero exception field
1820	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
1821
1822	fmov.l		&0x0,%fpcr		# zero current control regs
1823	fmov.l		&0x0,%fpsr
1824
1825	bsr.l		get_packed		# fetch packed src operand
1826
1827	lea		FP_SRC(%a6),%a0		# pass ptr to src
1828	bsr.l		set_tag_x		# set src optype tag
1829
1830	mov.b		%d0,STAG(%a6)		# save src optype tag
1831
1832	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1833
1834# bit five of the fp extension word separates the monadic and dyadic operations
1835# at this point
1836	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1837	beq.b		fu_extract_p		# monadic
1838	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1839	beq.b		fu_extract_p		# yes, so it's monadic, too
1840
1841	bsr.l		load_fpn2		# load dst into FP_DST
1842
1843	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1844	bsr.l		set_tag_x		# tag the operand type
1845	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1846	bne.b		fu_op2_done_p		# no
1847	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1848fu_op2_done_p:
1849	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1850
1851fu_extract_p:
1852	clr.l		%d0
1853	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1854
1855	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1856
1857	lea		FP_SRC(%a6),%a0
1858	lea		FP_DST(%a6),%a1
1859
1860	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1861	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1862
1863#
1864# Exceptions in order of precedence:
1865# 	BSUN	: none
1866#	SNAN	: all dyadic ops
1867#	OPERR	: fsqrt(-NORM)
1868#	OVFL	: all except ftst,fcmp
1869#	UNFL	: all except ftst,fcmp
1870#	DZ	: fdiv
1871# 	INEX2	: all except ftst,fcmp
1872#	INEX1	: all
1873#
1874
1875# we determine the highest priority exception(if any) set by the
1876# emulation routine that has also been enabled by the user.
1877	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1878	bne.w		fu_in_ena_p		# some are enabled
1879
1880fu_in_cont_p:
1881# fcmp and ftst do not store any result.
1882	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1883	andi.b		&0x38,%d0		# extract bits 3-5
1884	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1885	beq.b		fu_in_exit_p		# yes
1886
1887	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1888	bsr.l		store_fpreg		# store the result
1889
1890fu_in_exit_p:
1891
1892	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1893	bne.w		fu_in_exit_s_p		# supervisor
1894
1895	mov.l		EXC_A7(%a6),%a0		# update user a7
1896	mov.l		%a0,%usp
1897
1898fu_in_exit_cont_p:
1899	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1900	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1901	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1902
1903	unlk		%a6			# unravel stack frame
1904
1905	btst		&0x7,(%sp)		# is trace on?
1906	bne.w		fu_trace_p		# yes
1907
1908	bra.l		_fpsp_done		# exit to os
1909
1910# the exception occurred in supervisor mode. check to see if the
1911# addressing mode was (a7)+. if so, we'll need to shift the
1912# stack frame "up".
1913fu_in_exit_s_p:
1914	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1915	beq.b		fu_in_exit_cont_p	# no
1916
1917	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1918	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1919	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1920
1921	unlk		%a6			# unravel stack frame
1922
1923# shift the stack frame "up". we don't really care about the <ea> field.
1924	mov.l		0x4(%sp),0x10(%sp)
1925	mov.l		0x0(%sp),0xc(%sp)
1926	add.l		&0xc,%sp
1927
1928	btst		&0x7,(%sp)		# is trace on?
1929	bne.w		fu_trace_p		# yes
1930
1931	bra.l		_fpsp_done		# exit to os
1932
1933fu_in_ena_p:
1934	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
1935	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1936	bne.b		fu_in_exc_p		# at least one was set
1937
1938#
1939# No exceptions occurred that were also enabled. Now:
1940#
1941#   	if (OVFL && ovfl_disabled && inexact_enabled) {
1942#	    branch to _real_inex() (even if the result was exact!);
1943#     	} else {
1944#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1945#	    return;
1946#     	}
1947#
1948	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1949	beq.w		fu_in_cont_p		# no
1950
1951fu_in_ovflchk_p:
1952	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1953	beq.w		fu_in_cont_p		# no
1954	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
1955
1956#
1957# An exception occurred and that exception was enabled:
1958#
1959#	shift enabled exception field into lo byte of d0;
1960#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1961#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1962#		/*
1963#		 * this is the case where we must call _real_inex() now or else
1964#		 * there will be no other way to pass it the exceptional operand
1965#		 */
1966#		call _real_inex();
1967#	} else {
1968#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1969#	}
1970#
1971fu_in_exc_p:
1972	subi.l		&24,%d0			# fix offset to be 0-8
1973	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
1974	blt.b		fu_in_exc_exit_p	# no
1975
1976# the enabled exception was inexact
1977	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1978	bne.w		fu_in_exc_unfl_p	# yes
1979	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1980	bne.w		fu_in_exc_ovfl_p	# yes
1981
1982# here, we insert the correct fsave status value into the fsave frame for the
1983# corresponding exception. the operand in the fsave frame should be the original
1984# src operand.
1985# as a reminder for future predicted pain and agony, we are passing in fsave the
1986# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1987# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1988fu_in_exc_exit_p:
1989	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1990	bne.w		fu_in_exc_exit_s_p	# supervisor
1991
1992	mov.l		EXC_A7(%a6),%a0		# update user a7
1993	mov.l		%a0,%usp
1994
1995fu_in_exc_exit_cont_p:
1996	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1997
1998	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1999	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2000	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2001
2002	frestore	FP_SRC(%a6)		# restore src op
2003
2004	unlk		%a6
2005
2006	btst		&0x7,(%sp)		# is trace enabled?
2007	bne.w		fu_trace_p		# yes
2008
2009	bra.l		_fpsp_done
2010
2011tbl_except_p:
2012	short		0xe000,0xe006,0xe004,0xe005
2013	short		0xe003,0xe002,0xe001,0xe001
2014
2015fu_in_exc_ovfl_p:
2016	mov.w		&0x3,%d0
2017	bra.w		fu_in_exc_exit_p
2018
2019fu_in_exc_unfl_p:
2020	mov.w		&0x4,%d0
2021	bra.w		fu_in_exc_exit_p
2022
2023fu_in_exc_exit_s_p:
2024	btst		&mia7_bit,SPCOND_FLG(%a6)
2025	beq.b		fu_in_exc_exit_cont_p
2026
2027	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2028
2029	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2030	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2031	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2032
2033	frestore	FP_SRC(%a6)		# restore src op
2034
2035	unlk		%a6			# unravel stack frame
2036
2037# shift stack frame "up". who cares about <ea> field.
2038	mov.l		0x4(%sp),0x10(%sp)
2039	mov.l		0x0(%sp),0xc(%sp)
2040	add.l		&0xc,%sp
2041
2042	btst		&0x7,(%sp)		# is trace on?
2043	bne.b		fu_trace_p		# yes
2044
2045	bra.l		_fpsp_done		# exit to os
2046
2047#
2048# The opclass two PACKED instruction that took an "Unimplemented Data Type"
2049# exception was being traced. Make the "current" PC the FPIAR and put it in the
2050# trace stack frame then jump to _real_trace().
2051#
2052#		  UNSUPP FRAME		   TRACE FRAME
2053#		*****************	*****************
2054#		*      EA	*	*    Current	*
2055#		*		*	*      PC	*
2056#		*****************	*****************
2057#		* 0x2 *	0x0dc	* 	* 0x2 *  0x024	*
2058#		*****************	*****************
2059#		*     Next	*	*     Next	*
2060#		*      PC	*      	*      PC	*
2061#		*****************	*****************
2062#		*      SR	*	*      SR	*
2063#		*****************	*****************
2064fu_trace_p:
2065	mov.w		&0x2024,0x6(%sp)
2066	fmov.l		%fpiar,0x8(%sp)
2067
2068	bra.l		_real_trace
2069
2070#########################################################
2071#########################################################
2072fu_out_pack:
2073
2074
2075# I'm not sure at this point what FPSR bits are valid for this instruction.
2076# so, since the emulation routines re-create them anyways, zero exception field.
2077# fmove out doesn't affect ccodes.
2078	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
2079
2080	fmov.l		&0x0,%fpcr		# zero current control regs
2081	fmov.l		&0x0,%fpsr
2082
2083	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
2084	bsr.l		load_fpn1
2085
2086# unlike other opclass 3, unimplemented data type exceptions, packed must be
2087# able to detect all operand types.
2088	lea		FP_SRC(%a6),%a0
2089	bsr.l		set_tag_x		# tag the operand type
2090	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2091	bne.b		fu_op2_p		# no
2092	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
2093
2094fu_op2_p:
2095	mov.b		%d0,STAG(%a6)		# save src optype tag
2096
2097	clr.l		%d0
2098	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
2099
2100	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
2101
2102	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
2103	bsr.l		fout			# call fmove out routine
2104
2105# Exceptions in order of precedence:
2106# 	BSUN	: no
2107#	SNAN	: yes
2108#	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2109#	OVFL	: no
2110#	UNFL	: no
2111#	DZ	: no
2112# 	INEX2	: yes
2113#	INEX1	: no
2114
2115# determine the highest priority exception(if any) set by the
2116# emulation routine that has also been enabled by the user.
2117	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2118	bne.w		fu_out_ena_p		# some are enabled
2119
2120fu_out_exit_p:
2121	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2122
2123	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
2124	bne.b		fu_out_exit_s_p		# supervisor
2125
2126	mov.l		EXC_A7(%a6),%a0		# update user a7
2127	mov.l		%a0,%usp
2128
2129fu_out_exit_cont_p:
2130	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2131	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2132	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2133
2134	unlk		%a6			# unravel stack frame
2135
2136	btst		&0x7,(%sp)		# is trace on?
2137	bne.w		fu_trace_p		# yes
2138
2139	bra.l		_fpsp_done		# exit to os
2140
2141# the exception occurred in supervisor mode. check to see if the
2142# addressing mode was -(a7). if so, we'll need to shift the
2143# stack frame "down".
2144fu_out_exit_s_p:
2145	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2146	beq.b		fu_out_exit_cont_p	# no
2147
2148	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2149	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2150	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2151
2152	mov.l		(%a6),%a6		# restore frame pointer
2153
2154	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2155	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2156
2157# now, copy the result to the proper place on the stack
2158	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2159	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2160	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2161
2162	add.l		&LOCAL_SIZE-0x8,%sp
2163
2164	btst		&0x7,(%sp)
2165	bne.w		fu_trace_p
2166
2167	bra.l		_fpsp_done
2168
2169fu_out_ena_p:
2170	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
2171	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2172	beq.w		fu_out_exit_p
2173
2174	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2175
2176# an exception occurred and that exception was enabled.
2177# the only exception possible on packed move out are INEX, OPERR, and SNAN.
2178fu_out_exc_p:
2179	cmpi.b		%d0,&0x1a
2180	bgt.w		fu_inex_p2
2181	beq.w		fu_operr_p
2182
2183fu_snan_p:
2184	btst		&0x5,EXC_SR(%a6)
2185	bne.b		fu_snan_s_p
2186
2187	mov.l		EXC_A7(%a6),%a0
2188	mov.l		%a0,%usp
2189	bra.w		fu_snan
2190
2191fu_snan_s_p:
2192	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2193	bne.w		fu_snan
2194
2195# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2196# the strategy is to move the exception frame "down" 12 bytes. then, we
2197# can store the default result where the exception frame was.
2198	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2199	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2200	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2201
2202	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
2203	mov.w		&0xe006,2+FP_SRC(%a6) 	# set fsave status
2204
2205	frestore	FP_SRC(%a6)		# restore src operand
2206
2207	mov.l		(%a6),%a6		# restore frame pointer
2208
2209	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2210	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2211	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2212
2213# now, we copy the default result to it's proper location
2214	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2215	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2216	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2217
2218	add.l		&LOCAL_SIZE-0x8,%sp
2219
2220
2221	bra.l		_real_snan
2222
2223fu_operr_p:
2224	btst		&0x5,EXC_SR(%a6)
2225	bne.w		fu_operr_p_s
2226
2227	mov.l		EXC_A7(%a6),%a0
2228	mov.l		%a0,%usp
2229	bra.w		fu_operr
2230
2231fu_operr_p_s:
2232	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2233	bne.w		fu_operr
2234
2235# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2236# the strategy is to move the exception frame "down" 12 bytes. then, we
2237# can store the default result where the exception frame was.
2238	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2239	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2240	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2241
2242	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
2243	mov.w		&0xe004,2+FP_SRC(%a6) 	# set fsave status
2244
2245	frestore	FP_SRC(%a6)		# restore src operand
2246
2247	mov.l		(%a6),%a6		# restore frame pointer
2248
2249	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2250	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2251	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2252
2253# now, we copy the default result to it's proper location
2254	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2255	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2256	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2257
2258	add.l		&LOCAL_SIZE-0x8,%sp
2259
2260
2261	bra.l		_real_operr
2262
2263fu_inex_p2:
2264	btst		&0x5,EXC_SR(%a6)
2265	bne.w		fu_inex_s_p2
2266
2267	mov.l		EXC_A7(%a6),%a0
2268	mov.l		%a0,%usp
2269	bra.w		fu_inex
2270
2271fu_inex_s_p2:
2272	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2273	bne.w		fu_inex
2274
2275# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2276# the strategy is to move the exception frame "down" 12 bytes. then, we
2277# can store the default result where the exception frame was.
2278	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2279	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2280	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2281
2282	mov.w		&0x30c4,EXC_VOFF(%a6) 	# vector offset = 0xc4
2283	mov.w		&0xe001,2+FP_SRC(%a6) 	# set fsave status
2284
2285	frestore	FP_SRC(%a6)		# restore src operand
2286
2287	mov.l		(%a6),%a6		# restore frame pointer
2288
2289	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2290	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2291	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2292
2293# now, we copy the default result to it's proper location
2294	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2295	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2296	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2297
2298	add.l		&LOCAL_SIZE-0x8,%sp
2299
2300
2301	bra.l		_real_inex
2302
2303#########################################################################
2304
2305#
2306# if we're stuffing a source operand back into an fsave frame then we
2307# have to make sure that for single or double source operands that the
2308# format stuffed is as weird as the hardware usually makes it.
2309#
2310	global		funimp_skew
2311funimp_skew:
2312	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2313	cmpi.b		%d0,&0x1		# was src sgl?
2314	beq.b		funimp_skew_sgl		# yes
2315	cmpi.b		%d0,&0x5		# was src dbl?
2316	beq.b		funimp_skew_dbl		# yes
2317	rts
2318
2319funimp_skew_sgl:
2320	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2321	andi.w		&0x7fff,%d0		# strip sign
2322	beq.b		funimp_skew_sgl_not
2323	cmpi.w		%d0,&0x3f80
2324	bgt.b		funimp_skew_sgl_not
2325	neg.w		%d0			# make exponent negative
2326	addi.w		&0x3f81,%d0		# find amt to shift
2327	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
2328	lsr.l		%d0,%d1			# shift it
2329	bset		&31,%d1			# set j-bit
2330	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
2331	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
2332	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
2333funimp_skew_sgl_not:
2334	rts
2335
2336funimp_skew_dbl:
2337	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2338	andi.w		&0x7fff,%d0		# strip sign
2339	beq.b		funimp_skew_dbl_not
2340	cmpi.w		%d0,&0x3c00
2341	bgt.b		funimp_skew_dbl_not
2342
2343	tst.b		FP_SRC_EX(%a6)		# make "internal format"
2344	smi.b		0x2+FP_SRC(%a6)
2345	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
2346	clr.l		%d0			# clear g,r,s
2347	lea		FP_SRC(%a6),%a0		# pass ptr to src op
2348	mov.w		&0x3c01,%d1		# pass denorm threshold
2349	bsr.l		dnrm_lp			# denorm it
2350	mov.w		&0x3c00,%d0		# new exponent
2351	tst.b		0x2+FP_SRC(%a6)		# is sign set?
2352	beq.b		fss_dbl_denorm_done	# no
2353	bset		&15,%d0			# set sign
2354fss_dbl_denorm_done:
2355	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
2356	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
2357funimp_skew_dbl_not:
2358	rts
2359
2360#########################################################################
2361	global		_mem_write2
2362_mem_write2:
2363	btst		&0x5,EXC_SR(%a6)
2364	beq.l		_dmem_write
2365	mov.l		0x0(%a0),FP_DST_EX(%a6)
2366	mov.l		0x4(%a0),FP_DST_HI(%a6)
2367	mov.l		0x8(%a0),FP_DST_LO(%a6)
2368	clr.l		%d1
2369	rts
2370
2371#########################################################################
2372# XDEF ****************************************************************	#
2373#	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
2374#		     	effective address" exception.			#
2375#									#
2376#	This handler should be the first code executed upon taking the	#
2377#	FP Unimplemented Effective Address exception in an operating	#
2378#	system.								#
2379#									#
2380# XREF ****************************************************************	#
2381#	_imem_read_long() - read instruction longword			#
2382#	fix_skewed_ops() - adjust src operand in fsave frame		#
2383#	set_tag_x() - determine optype of src/dst operands		#
2384#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
2385#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
2386#	load_fpn2() - load dst operand from FP regfile			#
2387#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
2388#	decbin() - convert packed data to FP binary data		#
2389#	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
2390#	_real_access() - "callout" for access error exception		#
2391#	_mem_read() - read extended immediate operand from memory	#
2392#	_fpsp_done() - "callout" for exit; work all done		#
2393#	_real_trace() - "callout" for Trace enabled exception		#
2394#	fmovm_dynamic() - emulate dynamic fmovm instruction		#
2395#	fmovm_ctrl() - emulate fmovm control instruction		#
2396#									#
2397# INPUT ***************************************************************	#
2398#	- The system stack contains the "Unimplemented <ea>" stk frame	#
2399# 									#
2400# OUTPUT **************************************************************	#
2401#	If access error:						#
2402#	- The system stack is changed to an access error stack frame	#
2403#	If FPU disabled:						#
2404#	- The system stack is changed to an FPU disabled stack frame	#
2405#	If Trace exception enabled:					#
2406#	- The system stack is changed to a Trace exception stack frame	#
2407#	Else: (normal case)						#
2408#	- None (correct result has been stored as appropriate)		#
2409#									#
2410# ALGORITHM ***********************************************************	#
2411#	This exception handles 3 types of operations:			#
2412# (1) FP Instructions using extended precision or packed immediate	#
2413#     addressing mode.							#
2414# (2) The "fmovm.x" instruction w/ dynamic register specification.	#
2415# (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
2416#									#
2417#	For immediate data operations, the data is read in w/ a		#
2418# _mem_read() "callout", converted to FP binary (if packed), and used	#
2419# as the source operand to the instruction specified by the instruction	#
2420# word. If no FP exception should be reported ads a result of the 	#
2421# emulation, then the result is stored to the destination register and	#
2422# the handler exits through _fpsp_done(). If an enabled exc has been	#
2423# signalled as a result of emulation, then an fsave state frame		#
2424# corresponding to the FP exception type must be entered into the 060	#
2425# FPU before exiting. In either the enabled or disabled cases, we 	#
2426# must also check if a Trace exception is pending, in which case, we	#
2427# must create a Trace exception stack frame from the current exception	#
2428# stack frame. If no Trace is pending, we simply exit through		#
2429# _fpsp_done().								#
2430#	For "fmovm.x", call the routine fmovm_dynamic() which will 	#
2431# decode and emulate the instruction. No FP exceptions can be pending	#
2432# as a result of this operation emulation. A Trace exception can be	#
2433# pending, though, which means the current stack frame must be changed	#
2434# to a Trace stack frame and an exit made through _real_trace().	#
2435# For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
2436# was executed from supervisor mode, this handler must store the FP	#
2437# register file values to the system stack by itself since		#
2438# fmovm_dynamic() can't handle this. A normal exit is made through	#
2439# fpsp_done().								#
2440#	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
2441# Again, a Trace exception may be pending and an exit made through	#
2442# _real_trace(). Else, a normal exit is made through _fpsp_done().	#
2443#									#
2444#	Before any of the above is attempted, it must be checked to	#
2445# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
2446# before the "FPU disabled" exception, but the "FPU disabled" exception	#
2447# has higher priority, we check the disabled bit in the PCR. If set,	#
2448# then we must create an 8 word "FPU disabled" exception stack frame	#
2449# from the current 4 word exception stack frame. This includes 		#
2450# reproducing the effective address of the instruction to put on the 	#
2451# new stack frame.							#
2452#									#
2453# 	In the process of all emulation work, if a _mem_read()		#
2454# "callout" returns a failing result indicating an access error, then	#
2455# we must create an access error stack frame from the current stack	#
2456# frame. This information includes a faulting address and a fault-	#
2457# status-longword. These are created within this handler.		#
2458#									#
2459#########################################################################
2460
2461	global		_fpsp_effadd
2462_fpsp_effadd:
2463
2464# This exception type takes priority over the "Line F Emulator"
2465# exception. Therefore, the FPU could be disabled when entering here.
2466# So, we must check to see if it's disabled and handle that case separately.
2467	mov.l		%d0,-(%sp)		# save d0
2468	movc		%pcr,%d0		# load proc cr
2469	btst		&0x1,%d0		# is FPU disabled?
2470	bne.w		iea_disabled		# yes
2471	mov.l		(%sp)+,%d0		# restore d0
2472
2473	link		%a6,&-LOCAL_SIZE	# init stack frame
2474
2475	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2476	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2477	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
2478
2479# PC of instruction that took the exception is the PC in the frame
2480	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2481
2482	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2483	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2484	bsr.l		_imem_read_long		# fetch the instruction words
2485	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2486
2487#########################################################################
2488
2489	tst.w		%d0			# is operation fmovem?
2490	bmi.w		iea_fmovm		# yes
2491
2492#
2493# here, we will have:
2494# 	fabs	fdabs	fsabs		facos		fmod
2495#	fadd	fdadd	fsadd		fasin		frem
2496# 	fcmp				fatan		fscale
2497#	fdiv	fddiv	fsdiv		fatanh		fsin
2498#	fint				fcos		fsincos
2499#	fintrz				fcosh		fsinh
2500#	fmove	fdmove	fsmove		fetox		ftan
2501# 	fmul	fdmul	fsmul		fetoxm1		ftanh
2502#	fneg	fdneg	fsneg		fgetexp		ftentox
2503#	fsgldiv				fgetman		ftwotox
2504# 	fsglmul				flog10
2505# 	fsqrt				flog2
2506#	fsub	fdsub	fssub		flogn
2507#	ftst				flognp1
2508# which can all use f<op>.{x,p}
2509# so, now it's immediate data extended precision AND PACKED FORMAT!
2510#
2511iea_op:
2512	andi.l		&0x00ff00ff,USER_FPSR(%a6)
2513
2514	btst		&0xa,%d0		# is src fmt x or p?
2515	bne.b		iea_op_pack		# packed
2516
2517
2518	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2519	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
2520	mov.l		&0xc,%d0		# pass: 12 bytes
2521	bsr.l		_imem_read		# read extended immediate
2522
2523	tst.l		%d1			# did ifetch fail?
2524	bne.w		iea_iacc		# yes
2525
2526	bra.b		iea_op_setsrc
2527
2528iea_op_pack:
2529
2530	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2531	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
2532	mov.l		&0xc,%d0		# pass: 12 bytes
2533	bsr.l		_imem_read		# read packed operand
2534
2535	tst.l		%d1			# did ifetch fail?
2536	bne.w		iea_iacc		# yes
2537
2538# The packed operand is an INF or a NAN if the exponent field is all ones.
2539	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
2540	cmpi.w		%d0,&0x7fff		# INF or NAN?
2541	beq.b		iea_op_setsrc		# operand is an INF or NAN
2542
2543# The packed operand is a zero if the mantissa is all zero, else it's
2544# a normal packed op.
2545	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
2546	andi.b		&0x0f,%d0		# clear all but last nybble
2547	bne.b		iea_op_gp_not_spec	# not a zero
2548	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
2549	bne.b		iea_op_gp_not_spec	# not a zero
2550	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
2551	beq.b		iea_op_setsrc		# operand is a ZERO
2552iea_op_gp_not_spec:
2553	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
2554	bsr.l		decbin			# convert to extended
2555	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
2556
2557iea_op_setsrc:
2558	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
2559
2560# FP_SRC now holds the src operand.
2561	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
2562	bsr.l		set_tag_x		# tag the operand type
2563	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
2564	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2565	bne.b		iea_op_getdst		# no
2566	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2567	mov.b		%d0,STAG(%a6)		# set new optype tag
2568iea_op_getdst:
2569	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
2570
2571	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
2572	beq.b		iea_op_extract		# monadic
2573	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
2574	bne.b		iea_op_spec		# yes
2575
2576iea_op_loaddst:
2577	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2578	bsr.l		load_fpn2		# load dst operand
2579
2580	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
2581	bsr.l		set_tag_x		# tag the operand type
2582	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
2583	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2584	bne.b		iea_op_extract		# no
2585	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2586	mov.b		%d0,DTAG(%a6)		# set new optype tag
2587	bra.b		iea_op_extract
2588
2589# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2590iea_op_spec:
2591	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
2592	beq.b		iea_op_extract		# yes
2593# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2594# store a result. then, only fcmp will branch back and pick up a dst operand.
2595	st		STORE_FLG(%a6)		# don't store a final result
2596	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
2597	beq.b		iea_op_loaddst		# yes
2598
2599iea_op_extract:
2600	clr.l		%d0
2601	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
2602
2603	mov.b		1+EXC_CMDREG(%a6),%d1
2604	andi.w		&0x007f,%d1		# extract extension
2605
2606	fmov.l		&0x0,%fpcr
2607	fmov.l		&0x0,%fpsr
2608
2609	lea		FP_SRC(%a6),%a0
2610	lea		FP_DST(%a6),%a1
2611
2612	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2613	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
2614
2615#
2616# Exceptions in order of precedence:
2617#	BSUN	: none
2618#	SNAN	: all operations
2619#	OPERR	: all reg-reg or mem-reg operations that can normally operr
2620#	OVFL	: same as OPERR
2621#	UNFL	: same as OPERR
2622#	DZ	: same as OPERR
2623#	INEX2	: same as OPERR
2624#	INEX1	: all packed immediate operations
2625#
2626
2627# we determine the highest priority exception(if any) set by the
2628# emulation routine that has also been enabled by the user.
2629	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2630	bne.b		iea_op_ena		# some are enabled
2631
2632# now, we save the result, unless, of course, the operation was ftst or fcmp.
2633# these don't save results.
2634iea_op_save:
2635	tst.b		STORE_FLG(%a6)		# does this op store a result?
2636	bne.b		iea_op_exit1		# exit with no frestore
2637
2638iea_op_store:
2639	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2640	bsr.l		store_fpreg		# store the result
2641
2642iea_op_exit1:
2643	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2644	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2645
2646	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2647	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2648	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2649
2650	unlk		%a6			# unravel the frame
2651
2652	btst		&0x7,(%sp)		# is trace on?
2653	bne.w		iea_op_trace		# yes
2654
2655	bra.l		_fpsp_done		# exit to os
2656
2657iea_op_ena:
2658	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
2659	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2660	bne.b		iea_op_exc		# at least one was set
2661
2662# no exception occurred. now, did a disabled, exact overflow occur with inexact
2663# enabled? if so, then we have to stuff an overflow frame into the FPU.
2664	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2665	beq.b		iea_op_save
2666
2667iea_op_ovfl:
2668	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2669	beq.b		iea_op_store		# no
2670	bra.b		iea_op_exc_ovfl		# yes
2671
2672# an enabled exception occurred. we have to insert the exception type back into
2673# the machine.
2674iea_op_exc:
2675	subi.l		&24,%d0			# fix offset to be 0-8
2676	cmpi.b		%d0,&0x6		# is exception INEX?
2677	bne.b		iea_op_exc_force	# no
2678
2679# the enabled exception was inexact. so, if it occurs with an overflow
2680# or underflow that was disabled, then we have to force an overflow or
2681# underflow frame.
2682	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2683	bne.b		iea_op_exc_ovfl		# yes
2684	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2685	bne.b		iea_op_exc_unfl		# yes
2686
2687iea_op_exc_force:
2688	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2689	bra.b		iea_op_exit2		# exit with frestore
2690
2691tbl_iea_except:
2692	short		0xe002, 0xe006, 0xe004, 0xe005
2693	short		0xe003, 0xe002, 0xe001, 0xe001
2694
2695iea_op_exc_ovfl:
2696	mov.w		&0xe005,2+FP_SRC(%a6)
2697	bra.b		iea_op_exit2
2698
2699iea_op_exc_unfl:
2700	mov.w		&0xe003,2+FP_SRC(%a6)
2701
2702iea_op_exit2:
2703	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2704	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2705
2706	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2707	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2708	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2709
2710	frestore 	FP_SRC(%a6)		# restore exceptional state
2711
2712	unlk		%a6			# unravel the frame
2713
2714	btst		&0x7,(%sp)		# is trace on?
2715	bne.b		iea_op_trace		# yes
2716
2717	bra.l		_fpsp_done		# exit to os
2718
2719#
2720# The opclass two instruction that took an "Unimplemented Effective Address"
2721# exception was being traced. Make the "current" PC the FPIAR and put it in
2722# the trace stack frame then jump to _real_trace().
2723#
2724#		 UNIMP EA FRAME		   TRACE FRAME
2725#		*****************	*****************
2726#		* 0x0 *  0x0f0	*	*    Current	*
2727#		*****************	*      PC	*
2728#		*    Current	*	*****************
2729#		*      PC	*	* 0x2 *  0x024	*
2730#		*****************	*****************
2731#		*      SR	*	*     Next	*
2732#		*****************	*      PC	*
2733#					*****************
2734#					*      SR	*
2735#					*****************
2736iea_op_trace:
2737	mov.l		(%sp),-(%sp)		# shift stack frame "down"
2738	mov.w		0x8(%sp),0x4(%sp)
2739	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
2740	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
2741
2742	bra.l		_real_trace
2743
2744#########################################################################
2745iea_fmovm:
2746	btst		&14,%d0			# ctrl or data reg
2747	beq.w		iea_fmovm_ctrl
2748
2749iea_fmovm_data:
2750
2751	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
2752	bne.b		iea_fmovm_data_s
2753
2754iea_fmovm_data_u:
2755	mov.l		%usp,%a0
2756	mov.l		%a0,EXC_A7(%a6)		# store current a7
2757	bsr.l		fmovm_dynamic		# do dynamic fmovm
2758	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
2759	mov.l		%a0,%usp		# update usp
2760	bra.w		iea_fmovm_exit
2761
2762iea_fmovm_data_s:
2763	clr.b		SPCOND_FLG(%a6)
2764	lea		0x2+EXC_VOFF(%a6),%a0
2765	mov.l		%a0,EXC_A7(%a6)
2766	bsr.l		fmovm_dynamic		# do dynamic fmovm
2767
2768	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2769	beq.w		iea_fmovm_data_predec
2770	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
2771	bne.w		iea_fmovm_exit
2772
2773# right now, d0 = the size.
2774# the data has been fetched from the supervisor stack, but we have not
2775# incremented the stack pointer by the appropriate number of bytes.
2776# do it here.
2777iea_fmovm_data_postinc:
2778	btst		&0x7,EXC_SR(%a6)
2779	bne.b		iea_fmovm_data_pi_trace
2780
2781	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2782	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2783	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2784
2785	lea		(EXC_SR,%a6,%d0),%a0
2786	mov.l		%a0,EXC_SR(%a6)
2787
2788	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2789	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2790 	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
2791
2792	unlk		%a6
2793	mov.l		(%sp)+,%sp
2794	bra.l		_fpsp_done
2795
2796iea_fmovm_data_pi_trace:
2797	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2798	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2799	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2800	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2801
2802	lea		(EXC_SR-0x4,%a6,%d0),%a0
2803	mov.l		%a0,EXC_SR(%a6)
2804
2805	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2806	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2807 	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
2808
2809	unlk		%a6
2810	mov.l		(%sp)+,%sp
2811	bra.l		_real_trace
2812
2813# right now, d1 = size and d0 = the strg.
2814iea_fmovm_data_predec:
2815	mov.b		%d1,EXC_VOFF(%a6)	# store strg
2816	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
2817
2818	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2819	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2820 	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
2821
2822	mov.l		(%a6),-(%sp)		# make a copy of a6
2823	mov.l		%d0,-(%sp)		# save d0
2824	mov.l		%d1,-(%sp)		# save d1
2825	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
2826
2827	clr.l		%d0
2828	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
2829	neg.l		%d0			# get negative of size
2830
2831	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
2832	beq.b		iea_fmovm_data_p2
2833
2834	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2835	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2836	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
2837	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2838
2839	pea		(%a6,%d0)		# create final sp
2840	bra.b		iea_fmovm_data_p3
2841
2842iea_fmovm_data_p2:
2843	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2844	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
2845	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2846
2847	pea		(0x4,%a6,%d0)		# create final sp
2848
2849iea_fmovm_data_p3:
2850	clr.l		%d1
2851	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
2852
2853	tst.b		%d1
2854	bpl.b		fm_1
2855	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
2856	addi.l		&0xc,%d0
2857fm_1:
2858	lsl.b		&0x1,%d1
2859	bpl.b		fm_2
2860	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
2861	addi.l		&0xc,%d0
2862fm_2:
2863	lsl.b		&0x1,%d1
2864	bpl.b		fm_3
2865	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
2866	addi.l		&0xc,%d0
2867fm_3:
2868	lsl.b		&0x1,%d1
2869	bpl.b		fm_4
2870	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
2871	addi.l		&0xc,%d0
2872fm_4:
2873	lsl.b		&0x1,%d1
2874	bpl.b		fm_5
2875	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
2876	addi.l		&0xc,%d0
2877fm_5:
2878	lsl.b		&0x1,%d1
2879	bpl.b		fm_6
2880	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
2881	addi.l		&0xc,%d0
2882fm_6:
2883	lsl.b		&0x1,%d1
2884	bpl.b		fm_7
2885	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
2886	addi.l		&0xc,%d0
2887fm_7:
2888	lsl.b		&0x1,%d1
2889	bpl.b		fm_end
2890	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
2891fm_end:
2892	mov.l		0x4(%sp),%d1
2893	mov.l		0x8(%sp),%d0
2894	mov.l		0xc(%sp),%a6
2895	mov.l		(%sp)+,%sp
2896
2897	btst		&0x7,(%sp)		# is trace enabled?
2898	beq.l		_fpsp_done
2899	bra.l		_real_trace
2900
2901#########################################################################
2902iea_fmovm_ctrl:
2903
2904	bsr.l		fmovm_ctrl		# load ctrl regs
2905
2906iea_fmovm_exit:
2907	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2908	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2909	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2910
2911	btst		&0x7,EXC_SR(%a6)	# is trace on?
2912	bne.b		iea_fmovm_trace		# yes
2913
2914	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2915
2916	unlk		%a6			# unravel the frame
2917
2918	bra.l		_fpsp_done		# exit to os
2919
2920#
2921# The control reg instruction that took an "Unimplemented Effective Address"
2922# exception was being traced. The "Current PC" for the trace frame is the
2923# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2924# After fixing the stack frame, jump to _real_trace().
2925#
2926#		 UNIMP EA FRAME		   TRACE FRAME
2927#		*****************	*****************
2928#		* 0x0 *  0x0f0	*	*    Current	*
2929#		*****************	*      PC	*
2930#		*    Current	*	*****************
2931#		*      PC	*	* 0x2 *  0x024	*
2932#		*****************	*****************
2933#		*      SR	*	*     Next	*
2934#		*****************	*      PC	*
2935#					*****************
2936#					*      SR	*
2937#					*****************
2938# this ain't a pretty solution, but it works:
2939# -restore a6 (not with unlk)
2940# -shift stack frame down over where old a6 used to be
2941# -add LOCAL_SIZE to stack pointer
2942iea_fmovm_trace:
2943	mov.l		(%a6),%a6		# restore frame pointer
2944	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2945	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2946	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2947	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2948	add.l		&LOCAL_SIZE,%sp		# clear stack frame
2949
2950	bra.l		_real_trace
2951
2952#########################################################################
2953# The FPU is disabled and so we should really have taken the "Line
2954# F Emulator" exception. So, here we create an 8-word stack frame
2955# from our 4-word stack frame. This means we must calculate the length
2956# of the faulting instruction to get the "next PC". This is trivial for
2957# immediate operands but requires some extra work for fmovm dynamic
2958# which can use most addressing modes.
2959iea_disabled:
2960	mov.l		(%sp)+,%d0		# restore d0
2961
2962	link		%a6,&-LOCAL_SIZE	# init stack frame
2963
2964	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2965
2966# PC of instruction that took the exception is the PC in the frame
2967	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2968	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2969	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2970	bsr.l		_imem_read_long		# fetch the instruction words
2971	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2972
2973	tst.w		%d0			# is instr fmovm?
2974	bmi.b		iea_dis_fmovm		# yes
2975# instruction is using an extended precision immediate operand. therefore,
2976# the total instruction length is 16 bytes.
2977iea_dis_immed:
2978	mov.l		&0x10,%d0		# 16 bytes of instruction
2979	bra.b		iea_dis_cont
2980iea_dis_fmovm:
2981	btst		&0xe,%d0		# is instr fmovm ctrl
2982	bne.b		iea_dis_fmovm_data	# no
2983# the instruction is a fmovm.l with 2 or 3 registers.
2984	bfextu		%d0{&19:&3},%d1
2985	mov.l		&0xc,%d0
2986	cmpi.b		%d1,&0x7		# move all regs?
2987	bne.b		iea_dis_cont
2988	addq.l		&0x4,%d0
2989	bra.b		iea_dis_cont
2990# the instruction is an fmovm.x dynamic which can use many addressing
2991# modes and thus can have several different total instruction lengths.
2992# call fmovm_calc_ea which will go through the ea calc process and,
2993# as a by-product, will tell us how long the instruction is.
2994iea_dis_fmovm_data:
2995	clr.l		%d0
2996	bsr.l		fmovm_calc_ea
2997	mov.l		EXC_EXTWPTR(%a6),%d0
2998	sub.l		EXC_PC(%a6),%d0
2999iea_dis_cont:
3000	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
3001
3002	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3003
3004	unlk		%a6
3005
3006# here, we actually create the 8-word frame from the 4-word frame,
3007# with the "next PC" as additional info.
3008# the <ea> field is let as undefined.
3009	subq.l		&0x8,%sp		# make room for new stack
3010	mov.l		%d0,-(%sp)		# save d0
3011	mov.w		0xc(%sp),0x4(%sp)	# move SR
3012	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
3013	clr.l		%d0
3014	mov.w		0x12(%sp),%d0
3015	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
3016	add.l		%d0,0x6(%sp)		# make Next PC
3017	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
3018	mov.l		(%sp)+,%d0		# restore d0
3019
3020	bra.l		_real_fpu_disabled
3021
3022##########
3023
3024iea_iacc:
3025	movc		%pcr,%d0
3026	btst		&0x1,%d0
3027	bne.b		iea_iacc_cont
3028	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3029	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3030iea_iacc_cont:
3031	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3032
3033	unlk		%a6
3034
3035	subq.w		&0x8,%sp		# make stack frame bigger
3036	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
3037	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
3038	mov.w		&0x4008,0x6(%sp)	# store voff
3039	mov.l		0x2(%sp),0x8(%sp)	# store ea
3040	mov.l		&0x09428001,0xc(%sp)	# store fslw
3041
3042iea_acc_done:
3043	btst		&0x5,(%sp)		# user or supervisor mode?
3044	beq.b		iea_acc_done2		# user
3045	bset		&0x2,0xd(%sp)		# set supervisor TM bit
3046
3047iea_acc_done2:
3048	bra.l		_real_access
3049
3050iea_dacc:
3051	lea		-LOCAL_SIZE(%a6),%sp
3052
3053	movc		%pcr,%d1
3054	btst		&0x1,%d1
3055	bne.b		iea_dacc_cont
3056	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3057	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3058iea_dacc_cont:
3059	mov.l		(%a6),%a6
3060
3061	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3062	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3063	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3064	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
3065	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
3066	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3067
3068	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3069	add.w		&LOCAL_SIZE-0x4,%sp
3070
3071	bra.b		iea_acc_done
3072
3073#########################################################################
3074# XDEF ****************************************************************	#
3075#	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
3076#									#
3077#	This handler should be the first code executed upon taking the	#
3078# 	FP Operand Error exception in an operating system.		#
3079#									#
3080# XREF ****************************************************************	#
3081#	_imem_read_long() - read instruction longword			#
3082#	fix_skewed_ops() - adjust src operand in fsave frame		#
3083#	_real_operr() - "callout" to operating system operr handler	#
3084#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3085#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3086#	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
3087#									#
3088# INPUT ***************************************************************	#
3089#	- The system stack contains the FP Operr exception frame	#
3090#	- The fsave frame contains the source operand			#
3091# 									#
3092# OUTPUT **************************************************************	#
3093#	No access error:						#
3094#	- The system stack is unchanged					#
3095#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3096#									#
3097# ALGORITHM ***********************************************************	#
3098#	In a system where the FP Operr exception is enabled, the goal	#
3099# is to get to the handler specified at _real_operr(). But, on the 060,	#
3100# for opclass zero and two instruction taking this exception, the 	#
3101# input operand in the fsave frame may be incorrect for some cases	#
3102# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3103# do just this and then exits through _real_operr().			#
3104#	For opclass 3 instructions, the 060 doesn't store the default	#
3105# operr result out to memory or data register file as it should.	#
3106# This code must emulate the move out before finally exiting through	#
3107# _real_inex(). The move out, if to memory, is performed using 		#
3108# _mem_write() "callout" routines that may return a failing result.	#
3109# In this special case, the handler must exit through facc_out() 	#
3110# which creates an access error stack frame from the current operr	#
3111# stack frame.								#
3112#									#
3113#########################################################################
3114
3115	global		_fpsp_operr
3116_fpsp_operr:
3117
3118	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3119
3120	fsave		FP_SRC(%a6)		# grab the "busy" frame
3121
3122 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3123	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3124 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3125
3126# the FPIAR holds the "current PC" of the faulting instruction
3127	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3128
3129	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3130	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3131	bsr.l		_imem_read_long		# fetch the instruction words
3132	mov.l		%d0,EXC_OPWORD(%a6)
3133
3134##############################################################################
3135
3136	btst		&13,%d0			# is instr an fmove out?
3137	bne.b		foperr_out		# fmove out
3138
3139
3140# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3141# this would be the case for opclass two operations with a source infinity or
3142# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3143# cause an operr so we don't need to check for them here.
3144	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3145	bsr.l		fix_skewed_ops		# fix src op
3146
3147foperr_exit:
3148	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3149	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3150	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3151
3152	frestore	FP_SRC(%a6)
3153
3154	unlk		%a6
3155	bra.l		_real_operr
3156
3157########################################################################
3158
3159#
3160# the hardware does not save the default result to memory on enabled
3161# operand error exceptions. we do this here before passing control to
3162# the user operand error handler.
3163#
3164# byte, word, and long destination format operations can pass
3165# through here. we simply need to test the sign of the src
3166# operand and save the appropriate minimum or maximum integer value
3167# to the effective address as pointed to by the stacked effective address.
3168#
3169# although packed opclass three operations can take operand error
3170# exceptions, they won't pass through here since they are caught
3171# first by the unsupported data format exception handler. that handler
3172# sends them directly to _real_operr() if necessary.
3173#
3174foperr_out:
3175
3176	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
3177	andi.w		&0x7fff,%d1
3178	cmpi.w		%d1,&0x7fff
3179	bne.b		foperr_out_not_qnan
3180# the operand is either an infinity or a QNAN.
3181	tst.l		FP_SRC_LO(%a6)
3182	bne.b		foperr_out_qnan
3183	mov.l		FP_SRC_HI(%a6),%d1
3184	andi.l		&0x7fffffff,%d1
3185	beq.b		foperr_out_not_qnan
3186foperr_out_qnan:
3187	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
3188	bra.b		foperr_out_jmp
3189
3190foperr_out_not_qnan:
3191	mov.l		&0x7fffffff,%d1
3192	tst.b		FP_SRC_EX(%a6)
3193	bpl.b		foperr_out_not_qnan2
3194	addq.l		&0x1,%d1
3195foperr_out_not_qnan2:
3196	mov.l		%d1,L_SCR1(%a6)
3197
3198foperr_out_jmp:
3199	bfextu		%d0{&19:&3},%d0		# extract dst format field
3200	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3201	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
3202	jmp		(tbl_operr.b,%pc,%a0)
3203
3204tbl_operr:
3205	short		foperr_out_l - tbl_operr # long word integer
3206	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
3207	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
3208	short		foperr_exit  - tbl_operr # packed won't enter here
3209	short		foperr_out_w - tbl_operr # word integer
3210	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
3211	short		foperr_out_b - tbl_operr # byte integer
3212	short		tbl_operr    - tbl_operr # packed won't enter here
3213
3214foperr_out_b:
3215	mov.b		L_SCR1(%a6),%d0		# load positive default result
3216	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3217	ble.b		foperr_out_b_save_dn	# yes
3218	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3219	bsr.l		_dmem_write_byte	# write the default result
3220
3221	tst.l		%d1			# did dstore fail?
3222	bne.l		facc_out_b		# yes
3223
3224	bra.w		foperr_exit
3225foperr_out_b_save_dn:
3226	andi.w		&0x0007,%d1
3227	bsr.l		store_dreg_b		# store result to regfile
3228	bra.w		foperr_exit
3229
3230foperr_out_w:
3231	mov.w		L_SCR1(%a6),%d0		# load positive default result
3232	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3233	ble.b		foperr_out_w_save_dn	# yes
3234	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3235	bsr.l		_dmem_write_word	# write the default result
3236
3237	tst.l		%d1			# did dstore fail?
3238	bne.l		facc_out_w		# yes
3239
3240	bra.w		foperr_exit
3241foperr_out_w_save_dn:
3242	andi.w		&0x0007,%d1
3243	bsr.l		store_dreg_w		# store result to regfile
3244	bra.w		foperr_exit
3245
3246foperr_out_l:
3247	mov.l		L_SCR1(%a6),%d0		# load positive default result
3248	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3249	ble.b		foperr_out_l_save_dn	# yes
3250	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3251	bsr.l		_dmem_write_long	# write the default result
3252
3253	tst.l		%d1			# did dstore fail?
3254	bne.l		facc_out_l		# yes
3255
3256	bra.w		foperr_exit
3257foperr_out_l_save_dn:
3258	andi.w		&0x0007,%d1
3259	bsr.l		store_dreg_l		# store result to regfile
3260	bra.w		foperr_exit
3261
3262#########################################################################
3263# XDEF ****************************************************************	#
3264#	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
3265#									#
3266#	This handler should be the first code executed upon taking the	#
3267# 	FP Signalling NAN exception in an operating system.		#
3268#									#
3269# XREF ****************************************************************	#
3270#	_imem_read_long() - read instruction longword			#
3271#	fix_skewed_ops() - adjust src operand in fsave frame		#
3272#	_real_snan() - "callout" to operating system SNAN handler	#
3273#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3274#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3275#	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
3276#	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
3277#									#
3278# INPUT ***************************************************************	#
3279#	- The system stack contains the FP SNAN exception frame		#
3280#	- The fsave frame contains the source operand			#
3281# 									#
3282# OUTPUT **************************************************************	#
3283#	No access error:						#
3284#	- The system stack is unchanged					#
3285#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3286#									#
3287# ALGORITHM ***********************************************************	#
3288#	In a system where the FP SNAN exception is enabled, the goal	#
3289# is to get to the handler specified at _real_snan(). But, on the 060,	#
3290# for opclass zero and two instructions taking this exception, the 	#
3291# input operand in the fsave frame may be incorrect for some cases	#
3292# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3293# do just this and then exits through _real_snan().			#
3294#	For opclass 3 instructions, the 060 doesn't store the default	#
3295# SNAN result out to memory or data register file as it should.		#
3296# This code must emulate the move out before finally exiting through	#
3297# _real_snan(). The move out, if to memory, is performed using 		#
3298# _mem_write() "callout" routines that may return a failing result.	#
3299# In this special case, the handler must exit through facc_out() 	#
3300# which creates an access error stack frame from the current SNAN	#
3301# stack frame.								#
3302#	For the case of an extended precision opclass 3 instruction,	#
3303# if the effective addressing mode was -() or ()+, then the address	#
3304# register must get updated by calling _calc_ea_fout(). If the <ea>	#
3305# was -(a7) from supervisor mode, then the exception frame currently	#
3306# on the system stack must be carefully moved "down" to make room	#
3307# for the operand being moved.						#
3308#									#
3309#########################################################################
3310
3311	global		_fpsp_snan
3312_fpsp_snan:
3313
3314	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3315
3316	fsave		FP_SRC(%a6)		# grab the "busy" frame
3317
3318 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3319	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3320 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3321
3322# the FPIAR holds the "current PC" of the faulting instruction
3323	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3324
3325	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3326	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3327	bsr.l		_imem_read_long		# fetch the instruction words
3328	mov.l		%d0,EXC_OPWORD(%a6)
3329
3330##############################################################################
3331
3332	btst		&13,%d0			# is instr an fmove out?
3333	bne.w		fsnan_out		# fmove out
3334
3335
3336# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3337# this would be the case for opclass two operations with a source infinity or
3338# denorm operand in the sgl or dbl format. NANs also become skewed and must be
3339# fixed here.
3340	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3341	bsr.l		fix_skewed_ops		# fix src op
3342
3343fsnan_exit:
3344	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3345	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3346	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3347
3348	frestore	FP_SRC(%a6)
3349
3350	unlk		%a6
3351	bra.l		_real_snan
3352
3353########################################################################
3354
3355#
3356# the hardware does not save the default result to memory on enabled
3357# snan exceptions. we do this here before passing control to
3358# the user snan handler.
3359#
3360# byte, word, long, and packed destination format operations can pass
3361# through here. since packed format operations already were handled by
3362# fpsp_unsupp(), then we need to do nothing else for them here.
3363# for byte, word, and long, we simply need to test the sign of the src
3364# operand and save the appropriate minimum or maximum integer value
3365# to the effective address as pointed to by the stacked effective address.
3366#
3367fsnan_out:
3368
3369	bfextu		%d0{&19:&3},%d0		# extract dst format field
3370	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3371	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
3372	jmp		(tbl_snan.b,%pc,%a0)
3373
3374tbl_snan:
3375	short		fsnan_out_l - tbl_snan # long word integer
3376	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3377	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
3378	short		tbl_snan    - tbl_snan # packed needs no help
3379	short		fsnan_out_w - tbl_snan # word integer
3380	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3381	short		fsnan_out_b - tbl_snan # byte integer
3382	short		tbl_snan    - tbl_snan # packed needs no help
3383
3384fsnan_out_b:
3385	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
3386	bset		&6,%d0			# set SNAN bit
3387	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3388	ble.b		fsnan_out_b_dn		# yes
3389	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3390	bsr.l		_dmem_write_byte	# write the default result
3391
3392	tst.l		%d1			# did dstore fail?
3393	bne.l		facc_out_b		# yes
3394
3395	bra.w		fsnan_exit
3396fsnan_out_b_dn:
3397	andi.w		&0x0007,%d1
3398	bsr.l		store_dreg_b		# store result to regfile
3399	bra.w		fsnan_exit
3400
3401fsnan_out_w:
3402	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
3403	bset		&14,%d0			# set SNAN bit
3404	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3405	ble.b		fsnan_out_w_dn		# yes
3406	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3407	bsr.l		_dmem_write_word	# write the default result
3408
3409	tst.l		%d1			# did dstore fail?
3410	bne.l		facc_out_w		# yes
3411
3412	bra.w		fsnan_exit
3413fsnan_out_w_dn:
3414	andi.w		&0x0007,%d1
3415	bsr.l		store_dreg_w		# store result to regfile
3416	bra.w		fsnan_exit
3417
3418fsnan_out_l:
3419	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
3420	bset		&30,%d0			# set SNAN bit
3421	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3422	ble.b		fsnan_out_l_dn		# yes
3423	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3424	bsr.l		_dmem_write_long	# write the default result
3425
3426	tst.l		%d1			# did dstore fail?
3427	bne.l		facc_out_l		# yes
3428
3429	bra.w		fsnan_exit
3430fsnan_out_l_dn:
3431	andi.w		&0x0007,%d1
3432	bsr.l		store_dreg_l		# store result to regfile
3433	bra.w		fsnan_exit
3434
3435fsnan_out_s:
3436	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3437	ble.b		fsnan_out_d_dn		# yes
3438	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3439	andi.l		&0x80000000,%d0		# keep sign
3440	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3441	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3442	lsr.l		&0x8,%d1		# shift mantissa for sgl
3443	or.l		%d1,%d0			# create sgl SNAN
3444	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3445	bsr.l		_dmem_write_long	# write the default result
3446
3447	tst.l		%d1			# did dstore fail?
3448	bne.l		facc_out_l		# yes
3449
3450	bra.w		fsnan_exit
3451fsnan_out_d_dn:
3452	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3453	andi.l		&0x80000000,%d0		# keep sign
3454	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3455	mov.l		%d1,-(%sp)
3456	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3457	lsr.l		&0x8,%d1		# shift mantissa for sgl
3458	or.l		%d1,%d0			# create sgl SNAN
3459	mov.l		(%sp)+,%d1
3460	andi.w		&0x0007,%d1
3461	bsr.l		store_dreg_l		# store result to regfile
3462	bra.w		fsnan_exit
3463
3464fsnan_out_d:
3465	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3466	andi.l		&0x80000000,%d0		# keep sign
3467	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
3468	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3469	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
3470	mov.l		&11,%d0			# load shift amt
3471	lsr.l		%d0,%d1
3472	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
3473	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3474	andi.l		&0x000007ff,%d1
3475	ror.l		%d0,%d1
3476	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
3477	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
3478	lsr.l		%d0,%d1
3479	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
3480	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3481	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
3482	movq.l		&0x8,%d0		# pass: size of 8 bytes
3483	bsr.l		_dmem_write		# write the default result
3484
3485	tst.l		%d1			# did dstore fail?
3486	bne.l		facc_out_d		# yes
3487
3488	bra.w		fsnan_exit
3489
3490# for extended precision, if the addressing mode is pre-decrement or
3491# post-increment, then the address register did not get updated.
3492# in addition, for pre-decrement, the stacked <ea> is incorrect.
3493fsnan_out_x:
3494	clr.b		SPCOND_FLG(%a6)		# clear special case flag
3495
3496	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3497	clr.w		2+FP_SCR0(%a6)
3498	mov.l		FP_SRC_HI(%a6),%d0
3499	bset		&30,%d0
3500	mov.l		%d0,FP_SCR0_HI(%a6)
3501	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3502
3503	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
3504	bne.b		fsnan_out_x_s		# yes
3505
3506	mov.l		%usp,%a0		# fetch user stack pointer
3507	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
3508	mov.l		(%a6),EXC_A6(%a6)
3509
3510	bsr.l		_calc_ea_fout		# find the correct ea,update An
3511	mov.l		%a0,%a1
3512	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3513
3514	mov.l		EXC_A7(%a6),%a0
3515	mov.l		%a0,%usp		# restore user stack pointer
3516	mov.l		EXC_A6(%a6),(%a6)
3517
3518fsnan_out_x_save:
3519	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3520	movq.l		&0xc,%d0		# pass: size of extended
3521	bsr.l		_dmem_write		# write the default result
3522
3523	tst.l		%d1			# did dstore fail?
3524	bne.l		facc_out_x		# yes
3525
3526	bra.w		fsnan_exit
3527
3528fsnan_out_x_s:
3529	mov.l		(%a6),EXC_A6(%a6)
3530
3531	bsr.l		_calc_ea_fout		# find the correct ea,update An
3532	mov.l		%a0,%a1
3533	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3534
3535	mov.l		EXC_A6(%a6),(%a6)
3536
3537	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3538	bne.b		fsnan_out_x_save	# no
3539
3540# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3541	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3542	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3543	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3544
3545	frestore	FP_SRC(%a6)
3546
3547	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
3548
3549	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3550	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3551	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3552
3553	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3554	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3555	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3556
3557	add.l		&LOCAL_SIZE-0x8,%sp
3558
3559	bra.l		_real_snan
3560
3561#########################################################################
3562# XDEF ****************************************************************	#
3563#	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
3564#									#
3565#	This handler should be the first code executed upon taking the	#
3566# 	FP Inexact exception in an operating system.			#
3567#									#
3568# XREF ****************************************************************	#
3569#	_imem_read_long() - read instruction longword			#
3570#	fix_skewed_ops() - adjust src operand in fsave frame		#
3571#	set_tag_x() - determine optype of src/dst operands		#
3572#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
3573#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
3574#	load_fpn2() - load dst operand from FP regfile			#
3575#	smovcr() - emulate an "fmovcr" instruction			#
3576#	fout() - emulate an opclass 3 instruction			#
3577#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
3578#	_real_inex() - "callout" to operating system inexact handler	#
3579#									#
3580# INPUT ***************************************************************	#
3581#	- The system stack contains the FP Inexact exception frame	#
3582#	- The fsave frame contains the source operand			#
3583# 									#
3584# OUTPUT **************************************************************	#
3585#	- The system stack is unchanged					#
3586#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3587#									#
3588# ALGORITHM ***********************************************************	#
3589#	In a system where the FP Inexact exception is enabled, the goal	#
3590# is to get to the handler specified at _real_inex(). But, on the 060,	#
3591# for opclass zero and two instruction taking this exception, the 	#
3592# hardware doesn't store the correct result to the destination FP	#
3593# register as did the '040 and '881/2. This handler must emulate the 	#
3594# instruction in order to get this value and then store it to the 	#
3595# correct register before calling _real_inex().				#
3596#	For opclass 3 instructions, the 060 doesn't store the default	#
3597# inexact result out to memory or data register file as it should.	#
3598# This code must emulate the move out by calling fout() before finally	#
3599# exiting through _real_inex().						#
3600#									#
3601#########################################################################
3602
3603	global		_fpsp_inex
3604_fpsp_inex:
3605
3606	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3607
3608	fsave		FP_SRC(%a6)		# grab the "busy" frame
3609
3610 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3611	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3612 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3613
3614# the FPIAR holds the "current PC" of the faulting instruction
3615	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3616
3617	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3618	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3619	bsr.l		_imem_read_long		# fetch the instruction words
3620	mov.l		%d0,EXC_OPWORD(%a6)
3621
3622##############################################################################
3623
3624	btst		&13,%d0			# is instr an fmove out?
3625	bne.w		finex_out		# fmove out
3626
3627
3628# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3629# longword integer directly into the upper longword of the mantissa along
3630# w/ an exponent value of 0x401e. we convert this to extended precision here.
3631	bfextu		%d0{&19:&3},%d0		# fetch instr size
3632	bne.b		finex_cont		# instr size is not long
3633	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
3634	bne.b		finex_cont		# no
3635	fmov.l		&0x0,%fpcr
3636	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
3637	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
3638	mov.w		&0xe001,0x2+FP_SRC(%a6)
3639
3640finex_cont:
3641	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3642	bsr.l		fix_skewed_ops		# fix src op
3643
3644# Here, we zero the ccode and exception byte field since we're going to
3645# emulate the whole instruction. Notice, though, that we don't kill the
3646# INEX1 bit. This is because a packed op has long since been converted
3647# to extended before arriving here. Therefore, we need to retain the
3648# INEX1 bit from when the operand was first converted.
3649	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3650
3651	fmov.l		&0x0,%fpcr		# zero current control regs
3652	fmov.l		&0x0,%fpsr
3653
3654	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3655	cmpi.b		%d1,&0x17		# is op an fmovecr?
3656	beq.w		finex_fmovcr		# yes
3657
3658	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3659	bsr.l		set_tag_x		# tag the operand type
3660	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
3661
3662# bits four and five of the fp extension word separate the monadic and dyadic
3663# operations that can pass through fpsp_inex(). remember that fcmp and ftst
3664# will never take this exception, but fsincos will.
3665	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
3666	beq.b		finex_extract		# monadic
3667
3668	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
3669	bne.b		finex_extract		# yes
3670
3671	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3672	bsr.l		load_fpn2		# load dst into FP_DST
3673
3674	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
3675	bsr.l		set_tag_x		# tag the operand type
3676	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
3677	bne.b		finex_op2_done		# no
3678	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
3679finex_op2_done:
3680	mov.b		%d0,DTAG(%a6)		# save dst optype tag
3681
3682finex_extract:
3683	clr.l		%d0
3684	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
3685
3686	mov.b		1+EXC_CMDREG(%a6),%d1
3687	andi.w		&0x007f,%d1		# extract extension
3688
3689	lea		FP_SRC(%a6),%a0
3690	lea		FP_DST(%a6),%a1
3691
3692	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3693	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
3694
3695# the operation has been emulated. the result is in fp0.
3696finex_save:
3697	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
3698	bsr.l		store_fpreg
3699
3700finex_exit:
3701	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3702	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3703	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3704
3705	frestore	FP_SRC(%a6)
3706
3707	unlk		%a6
3708	bra.l		_real_inex
3709
3710finex_fmovcr:
3711	clr.l		%d0
3712	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3713	mov.b		1+EXC_CMDREG(%a6),%d1
3714	andi.l		&0x0000007f,%d1		# pass rom offset
3715	bsr.l		smovcr
3716	bra.b		finex_save
3717
3718########################################################################
3719
3720#
3721# the hardware does not save the default result to memory on enabled
3722# inexact exceptions. we do this here before passing control to
3723# the user inexact handler.
3724#
3725# byte, word, and long destination format operations can pass
3726# through here. so can double and single precision.
3727# although packed opclass three operations can take inexact
3728# exceptions, they won't pass through here since they are caught
3729# first by the unsupported data format exception handler. that handler
3730# sends them directly to _real_inex() if necessary.
3731#
3732finex_out:
3733
3734	mov.b		&NORM,STAG(%a6)		# src is a NORM
3735
3736	clr.l		%d0
3737	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3738
3739	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
3740
3741	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
3742
3743	bsr.l		fout			# store the default result
3744
3745	bra.b		finex_exit
3746
3747#########################################################################
3748# XDEF ****************************************************************	#
3749#	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
3750#									#
3751#	This handler should be the first code executed upon taking	#
3752#	the FP DZ exception in an operating system.			#
3753#									#
3754# XREF ****************************************************************	#
3755#	_imem_read_long() - read instruction longword from memory	#
3756#	fix_skewed_ops() - adjust fsave operand				#
3757#	_real_dz() - "callout" exit point from FP DZ handler		#
3758#									#
3759# INPUT ***************************************************************	#
3760#	- The system stack contains the FP DZ exception stack.		#
3761#	- The fsave frame contains the source operand.			#
3762# 									#
3763# OUTPUT **************************************************************	#
3764#	- The system stack contains the FP DZ exception stack.		#
3765#	- The fsave frame contains the adjusted source operand.		#
3766#									#
3767# ALGORITHM ***********************************************************	#
3768#	In a system where the DZ exception is enabled, the goal is to	#
3769# get to the handler specified at _real_dz(). But, on the 060, when the	#
3770# exception is taken, the input operand in the fsave state frame may	#
3771# be incorrect for some cases and need to be adjusted. So, this package	#
3772# adjusts the operand using fix_skewed_ops() and then branches to	#
3773# _real_dz(). 								#
3774#									#
3775#########################################################################
3776
3777	global		_fpsp_dz
3778_fpsp_dz:
3779
3780	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3781
3782	fsave		FP_SRC(%a6)		# grab the "busy" frame
3783
3784 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3785	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3786 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3787
3788# the FPIAR holds the "current PC" of the faulting instruction
3789	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3790
3791	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3792	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3793	bsr.l		_imem_read_long		# fetch the instruction words
3794	mov.l		%d0,EXC_OPWORD(%a6)
3795
3796##############################################################################
3797
3798
3799# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3800# this would be the case for opclass two operations with a source zero
3801# in the sgl or dbl format.
3802	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3803	bsr.l		fix_skewed_ops		# fix src op
3804
3805fdz_exit:
3806	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3807	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3808	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3809
3810	frestore	FP_SRC(%a6)
3811
3812	unlk		%a6
3813	bra.l		_real_dz
3814
3815#########################################################################
3816# XDEF ****************************************************************	#
3817#	_fpsp_fline(): 060FPSP entry point for "Line F emulator"	#
3818#		       exception when the "reduced" version of the 	#
3819#		       FPSP is implemented that does not emulate	#
3820#		       FP unimplemented instructions.			#
3821#									#
3822#	This handler should be the first code executed upon taking a	#
3823#	"Line F Emulator" exception in an operating system integrating	#
3824#	the reduced version of 060FPSP.					#
3825#									#
3826# XREF ****************************************************************	#
3827#	_real_fpu_disabled() - Handle "FPU disabled" exceptions		#
3828#	_real_fline() - Handle all other cases (treated equally)	#
3829#									#
3830# INPUT ***************************************************************	#
3831#	- The system stack contains a "Line F Emulator" exception	#
3832#	  stack frame.							#
3833# 									#
3834# OUTPUT **************************************************************	#
3835#	- The system stack is unchanged.				#
3836#									#
3837# ALGORITHM ***********************************************************	#
3838# 	When a "Line F Emulator" exception occurs in a system where	#
3839# "FPU Unimplemented" instructions will not be emulated, the exception	#
3840# can occur because then FPU is disabled or the instruction is to be	#
3841# classifed as "Line F". This module determines which case exists and	#
3842# calls the appropriate "callout".					#
3843#									#
3844#########################################################################
3845
3846	global		_fpsp_fline
3847_fpsp_fline:
3848
3849# check to see if the FPU is disabled. if so, jump to the OS entry
3850# point for that condition.
3851	cmpi.w		0x6(%sp),&0x402c
3852	beq.l		_real_fpu_disabled
3853
3854	bra.l		_real_fline
3855
3856#########################################################################
3857# XDEF ****************************************************************	#
3858#	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
3859#									#
3860# XREF ****************************************************************	#
3861#	inc_areg() - increment an address register			#
3862#	dec_areg() - decrement an address register			#
3863#									#
3864# INPUT ***************************************************************	#
3865#	d0 = number of bytes to adjust <ea> by				#
3866# 									#
3867# OUTPUT **************************************************************	#
3868#	None								#
3869#									#
3870# ALGORITHM ***********************************************************	#
3871# "Dummy" CALCulate Effective Address:					#
3872# 	The stacked <ea> for FP unimplemented instructions and opclass	#
3873#	two packed instructions is correct with the exception of...	#
3874#									#
3875#	1) -(An)   : The register is not updated regardless of size.	#
3876#		     Also, for extended precision and packed, the 	#
3877#		     stacked <ea> value is 8 bytes too big		#
3878#	2) (An)+   : The register is not updated.			#
3879#	3) #<data> : The upper longword of the immediate operand is 	#
3880#		     stacked b,w,l and s sizes are completely stacked. 	#
3881#		     d,x, and p are not.				#
3882#									#
3883#########################################################################
3884
3885	global		_dcalc_ea
3886_dcalc_ea:
3887	mov.l		%d0, %a0		# move # bytes to %a0
3888
3889	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
3890	mov.l		%d0, %d1		# make a copy
3891
3892	andi.w		&0x38, %d0		# extract mode field
3893	andi.l		&0x7, %d1		# extract reg  field
3894
3895	cmpi.b		%d0,&0x18		# is mode (An)+ ?
3896	beq.b		dcea_pi			# yes
3897
3898	cmpi.b		%d0,&0x20		# is mode -(An) ?
3899	beq.b		dcea_pd			# yes
3900
3901	or.w		%d1,%d0			# concat mode,reg
3902	cmpi.b		%d0,&0x3c		# is mode #<data>?
3903
3904	beq.b		dcea_imm		# yes
3905
3906	mov.l		EXC_EA(%a6),%a0		# return <ea>
3907	rts
3908
3909# need to set immediate data flag here since we'll need to do
3910# an imem_read to fetch this later.
3911dcea_imm:
3912	mov.b		&immed_flg,SPCOND_FLG(%a6)
3913	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3914	rts
3915
3916# here, the <ea> is stacked correctly. however, we must update the
3917# address register...
3918dcea_pi:
3919	mov.l		%a0,%d0			# pass amt to inc by
3920	bsr.l		inc_areg		# inc addr register
3921
3922	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3923	rts
3924
3925# the <ea> is stacked correctly for all but extended and packed which
3926# the <ea>s are 8 bytes too large.
3927# it would make no sense to have a pre-decrement to a7 in supervisor
3928# mode so we don't even worry about this tricky case here : )
3929dcea_pd:
3930	mov.l		%a0,%d0			# pass amt to dec by
3931	bsr.l		dec_areg		# dec addr register
3932
3933	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3934
3935	cmpi.b		%d0,&0xc		# is opsize ext or packed?
3936	beq.b		dcea_pd2		# yes
3937	rts
3938dcea_pd2:
3939	sub.l		&0x8,%a0		# correct <ea>
3940	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
3941	rts
3942
3943#########################################################################
3944# XDEF ****************************************************************	#
3945# 	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
3946#			 and packed data opclass 3 operations.		#
3947#									#
3948# XREF ****************************************************************	#
3949#	None								#
3950#									#
3951# INPUT ***************************************************************	#
3952#	None								#
3953# 									#
3954# OUTPUT **************************************************************	#
3955#	a0 = return correct effective address				#
3956#									#
3957# ALGORITHM ***********************************************************	#
3958#	For opclass 3 extended and packed data operations, the <ea>	#
3959# stacked for the exception is incorrect for -(an) and (an)+ addressing	#
3960# modes. Also, while we're at it, the index register itself must get 	#
3961# updated.								#
3962# 	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
3963# and return that value as the correct <ea> and store that value in An.	#
3964# For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
3965#									#
3966#########################################################################
3967
3968# This calc_ea is currently used to retrieve the correct <ea>
3969# for fmove outs of type extended and packed.
3970	global		_calc_ea_fout
3971_calc_ea_fout:
3972	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
3973	mov.l		%d0,%d1			# make a copy
3974
3975	andi.w		&0x38,%d0		# extract mode field
3976	andi.l		&0x7,%d1		# extract reg  field
3977
3978	cmpi.b		%d0,&0x18		# is mode (An)+ ?
3979	beq.b		ceaf_pi			# yes
3980
3981	cmpi.b		%d0,&0x20		# is mode -(An) ?
3982	beq.w		ceaf_pd			# yes
3983
3984	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3985	rts
3986
3987# (An)+ : extended and packed fmove out
3988#	: stacked <ea> is correct
3989#	: "An" not updated
3990ceaf_pi:
3991	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3992	mov.l		EXC_EA(%a6),%a0
3993	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
3994
3995	swbeg		&0x8
3996tbl_ceaf_pi:
3997	short		ceaf_pi0 - tbl_ceaf_pi
3998	short		ceaf_pi1 - tbl_ceaf_pi
3999	short		ceaf_pi2 - tbl_ceaf_pi
4000	short		ceaf_pi3 - tbl_ceaf_pi
4001	short		ceaf_pi4 - tbl_ceaf_pi
4002	short		ceaf_pi5 - tbl_ceaf_pi
4003	short		ceaf_pi6 - tbl_ceaf_pi
4004	short		ceaf_pi7 - tbl_ceaf_pi
4005
4006ceaf_pi0:
4007	addi.l		&0xc,EXC_DREGS+0x8(%a6)
4008	rts
4009ceaf_pi1:
4010	addi.l		&0xc,EXC_DREGS+0xc(%a6)
4011	rts
4012ceaf_pi2:
4013	add.l		&0xc,%a2
4014	rts
4015ceaf_pi3:
4016	add.l		&0xc,%a3
4017	rts
4018ceaf_pi4:
4019	add.l		&0xc,%a4
4020	rts
4021ceaf_pi5:
4022	add.l		&0xc,%a5
4023	rts
4024ceaf_pi6:
4025	addi.l		&0xc,EXC_A6(%a6)
4026	rts
4027ceaf_pi7:
4028	mov.b		&mia7_flg,SPCOND_FLG(%a6)
4029	addi.l		&0xc,EXC_A7(%a6)
4030	rts
4031
4032# -(An) : extended and packed fmove out
4033#	: stacked <ea> = actual <ea> + 8
4034#	: "An" not updated
4035ceaf_pd:
4036	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4037	mov.l		EXC_EA(%a6),%a0
4038	sub.l		&0x8,%a0
4039	sub.l		&0x8,EXC_EA(%a6)
4040	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
4041
4042	swbeg		&0x8
4043tbl_ceaf_pd:
4044	short		ceaf_pd0 - tbl_ceaf_pd
4045	short		ceaf_pd1 - tbl_ceaf_pd
4046	short		ceaf_pd2 - tbl_ceaf_pd
4047	short		ceaf_pd3 - tbl_ceaf_pd
4048	short		ceaf_pd4 - tbl_ceaf_pd
4049	short		ceaf_pd5 - tbl_ceaf_pd
4050	short		ceaf_pd6 - tbl_ceaf_pd
4051	short		ceaf_pd7 - tbl_ceaf_pd
4052
4053ceaf_pd0:
4054	mov.l		%a0,EXC_DREGS+0x8(%a6)
4055	rts
4056ceaf_pd1:
4057	mov.l		%a0,EXC_DREGS+0xc(%a6)
4058	rts
4059ceaf_pd2:
4060	mov.l		%a0,%a2
4061	rts
4062ceaf_pd3:
4063	mov.l		%a0,%a3
4064	rts
4065ceaf_pd4:
4066	mov.l		%a0,%a4
4067	rts
4068ceaf_pd5:
4069	mov.l		%a0,%a5
4070	rts
4071ceaf_pd6:
4072	mov.l		%a0,EXC_A6(%a6)
4073	rts
4074ceaf_pd7:
4075	mov.l		%a0,EXC_A7(%a6)
4076	mov.b		&mda7_flg,SPCOND_FLG(%a6)
4077	rts
4078
4079#
4080# This table holds the offsets of the emulation routines for each individual
4081# math operation relative to the address of this table. Included are
4082# routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4083# this table is for the version if the 060FPSP without transcendentals.
4084# The location within the table is determined by the extension bits of the
4085# operation longword.
4086#
4087
4088	swbeg		&109
4089tbl_unsupp:
4090	long		fin	 	- tbl_unsupp	# 00: fmove
4091	long		fint	 	- tbl_unsupp	# 01: fint
4092	long		tbl_unsupp 	- tbl_unsupp	# 02: fsinh
4093	long		fintrz	 	- tbl_unsupp	# 03: fintrz
4094	long		fsqrt	 	- tbl_unsupp	# 04: fsqrt
4095	long		tbl_unsupp	- tbl_unsupp
4096	long		tbl_unsupp	- tbl_unsupp	# 06: flognp1
4097	long		tbl_unsupp	- tbl_unsupp
4098	long		tbl_unsupp	- tbl_unsupp	# 08: fetoxm1
4099	long		tbl_unsupp	- tbl_unsupp	# 09: ftanh
4100	long		tbl_unsupp	- tbl_unsupp	# 0a: fatan
4101	long		tbl_unsupp	- tbl_unsupp
4102	long		tbl_unsupp	- tbl_unsupp	# 0c: fasin
4103	long		tbl_unsupp	- tbl_unsupp	# 0d: fatanh
4104	long		tbl_unsupp	- tbl_unsupp	# 0e: fsin
4105	long		tbl_unsupp	- tbl_unsupp	# 0f: ftan
4106	long		tbl_unsupp	- tbl_unsupp	# 10: fetox
4107	long		tbl_unsupp	- tbl_unsupp	# 11: ftwotox
4108	long		tbl_unsupp	- tbl_unsupp	# 12: ftentox
4109	long		tbl_unsupp	- tbl_unsupp
4110	long		tbl_unsupp	- tbl_unsupp	# 14: flogn
4111	long		tbl_unsupp	- tbl_unsupp	# 15: flog10
4112	long		tbl_unsupp	- tbl_unsupp	# 16: flog2
4113	long		tbl_unsupp	- tbl_unsupp
4114	long		fabs		- tbl_unsupp 	# 18: fabs
4115	long		tbl_unsupp	- tbl_unsupp	# 19: fcosh
4116	long		fneg		- tbl_unsupp 	# 1a: fneg
4117	long		tbl_unsupp	- tbl_unsupp
4118	long		tbl_unsupp	- tbl_unsupp	# 1c: facos
4119	long		tbl_unsupp	- tbl_unsupp	# 1d: fcos
4120	long		tbl_unsupp	- tbl_unsupp	# 1e: fgetexp
4121	long		tbl_unsupp	- tbl_unsupp	# 1f: fgetman
4122	long		fdiv		- tbl_unsupp 	# 20: fdiv
4123	long		tbl_unsupp	- tbl_unsupp	# 21: fmod
4124	long		fadd		- tbl_unsupp 	# 22: fadd
4125	long		fmul		- tbl_unsupp 	# 23: fmul
4126	long		fsgldiv		- tbl_unsupp 	# 24: fsgldiv
4127	long		tbl_unsupp	- tbl_unsupp	# 25: frem
4128	long		tbl_unsupp	- tbl_unsupp	# 26: fscale
4129	long		fsglmul		- tbl_unsupp 	# 27: fsglmul
4130	long		fsub		- tbl_unsupp 	# 28: fsub
4131	long		tbl_unsupp	- tbl_unsupp
4132	long		tbl_unsupp	- tbl_unsupp
4133	long		tbl_unsupp	- tbl_unsupp
4134	long		tbl_unsupp	- tbl_unsupp
4135	long		tbl_unsupp	- tbl_unsupp
4136	long		tbl_unsupp	- tbl_unsupp
4137	long		tbl_unsupp	- tbl_unsupp
4138	long		tbl_unsupp	- tbl_unsupp	# 30: fsincos
4139	long		tbl_unsupp	- tbl_unsupp	# 31: fsincos
4140	long		tbl_unsupp	- tbl_unsupp	# 32: fsincos
4141	long		tbl_unsupp	- tbl_unsupp	# 33: fsincos
4142	long		tbl_unsupp	- tbl_unsupp	# 34: fsincos
4143	long		tbl_unsupp	- tbl_unsupp	# 35: fsincos
4144	long		tbl_unsupp	- tbl_unsupp	# 36: fsincos
4145	long		tbl_unsupp	- tbl_unsupp	# 37: fsincos
4146	long		fcmp		- tbl_unsupp 	# 38: fcmp
4147	long		tbl_unsupp	- tbl_unsupp
4148	long		ftst		- tbl_unsupp 	# 3a: ftst
4149	long		tbl_unsupp	- tbl_unsupp
4150	long		tbl_unsupp	- tbl_unsupp
4151	long		tbl_unsupp	- tbl_unsupp
4152	long		tbl_unsupp	- tbl_unsupp
4153	long		tbl_unsupp	- tbl_unsupp
4154	long		fsin		- tbl_unsupp 	# 40: fsmove
4155	long		fssqrt		- tbl_unsupp 	# 41: fssqrt
4156	long		tbl_unsupp	- tbl_unsupp
4157	long		tbl_unsupp	- tbl_unsupp
4158	long		fdin		- tbl_unsupp	# 44: fdmove
4159	long		fdsqrt		- tbl_unsupp 	# 45: fdsqrt
4160	long		tbl_unsupp	- tbl_unsupp
4161	long		tbl_unsupp	- tbl_unsupp
4162	long		tbl_unsupp	- tbl_unsupp
4163	long		tbl_unsupp	- tbl_unsupp
4164	long		tbl_unsupp	- tbl_unsupp
4165	long		tbl_unsupp	- tbl_unsupp
4166	long		tbl_unsupp	- tbl_unsupp
4167	long		tbl_unsupp	- tbl_unsupp
4168	long		tbl_unsupp	- tbl_unsupp
4169	long		tbl_unsupp	- tbl_unsupp
4170	long		tbl_unsupp	- tbl_unsupp
4171	long		tbl_unsupp	- tbl_unsupp
4172	long		tbl_unsupp	- tbl_unsupp
4173	long		tbl_unsupp	- tbl_unsupp
4174	long		tbl_unsupp	- tbl_unsupp
4175	long		tbl_unsupp	- tbl_unsupp
4176	long		tbl_unsupp	- tbl_unsupp
4177	long		tbl_unsupp	- tbl_unsupp
4178	long		fsabs		- tbl_unsupp 	# 58: fsabs
4179	long		tbl_unsupp	- tbl_unsupp
4180	long		fsneg		- tbl_unsupp 	# 5a: fsneg
4181	long		tbl_unsupp	- tbl_unsupp
4182	long		fdabs		- tbl_unsupp	# 5c: fdabs
4183	long		tbl_unsupp	- tbl_unsupp
4184	long		fdneg		- tbl_unsupp 	# 5e: fdneg
4185	long		tbl_unsupp	- tbl_unsupp
4186	long		fsdiv		- tbl_unsupp	# 60: fsdiv
4187	long		tbl_unsupp	- tbl_unsupp
4188	long		fsadd		- tbl_unsupp	# 62: fsadd
4189	long		fsmul		- tbl_unsupp	# 63: fsmul
4190	long		fddiv		- tbl_unsupp 	# 64: fddiv
4191	long		tbl_unsupp	- tbl_unsupp
4192	long		fdadd		- tbl_unsupp	# 66: fdadd
4193	long		fdmul		- tbl_unsupp 	# 67: fdmul
4194	long		fssub		- tbl_unsupp	# 68: fssub
4195	long		tbl_unsupp	- tbl_unsupp
4196	long		tbl_unsupp	- tbl_unsupp
4197	long		tbl_unsupp	- tbl_unsupp
4198	long		fdsub		- tbl_unsupp 	# 6c: fdsub
4199
4200#################################################
4201# Add this here so non-fp modules can compile.
4202# (smovcr is called from fpsp_inex.)
4203	global		smovcr
4204smovcr:
4205	bra.b		smovcr
4206
4207#########################################################################
4208# XDEF ****************************************************************	#
4209#	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
4210#									#
4211# XREF ****************************************************************	#
4212#	fetch_dreg() - fetch data register				#
4213#	{i,d,}mem_read() - fetch data from memory			#
4214#	_mem_write() - write data to memory				#
4215#	iea_iacc() - instruction memory access error occurred		#
4216#	iea_dacc() - data memory access error occurred			#
4217#	restore() - restore An index regs if access error occurred	#
4218#									#
4219# INPUT ***************************************************************	#
4220#	None								#
4221# 									#
4222# OUTPUT **************************************************************	#
4223#	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
4224#		d0 = size of dump					#
4225#		d1 = Dn							#
4226#	Else if instruction access error,				#
4227#		d0 = FSLW						#
4228#	Else if data access error,					#
4229#		d0 = FSLW						#
4230#		a0 = address of fault					#
4231#	Else								#
4232#		none.							#
4233#									#
4234# ALGORITHM ***********************************************************	#
4235#	The effective address must be calculated since this is entered	#
4236# from an "Unimplemented Effective Address" exception handler. So, we	#
4237# have our own fcalc_ea() routine here. If an access error is flagged	#
4238# by a _{i,d,}mem_read() call, we must exit through the special		#
4239# handler.								#
4240#	The data register is determined and its value loaded to get the	#
4241# string of FP registers affected. This value is used as an index into	#
4242# a lookup table such that we can determine the number of bytes		#
4243# involved. 								#
4244#	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
4245# to read in all FP values. Again, _mem_read() may fail and require a	#
4246# special exit. 							#
4247#	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
4248# to write all FP values. _mem_write() may also fail.			#
4249# 	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
4250# then we return the size of the dump and the string to the caller	#
4251# so that the move can occur outside of this routine. This special	#
4252# case is required so that moves to the system stack are handled	#
4253# correctly.								#
4254#									#
4255# DYNAMIC:								#
4256# 	fmovm.x	dn, <ea>						#
4257# 	fmovm.x	<ea>, dn						#
4258#									#
4259#	      <WORD 1>		      <WORD2>				#
4260#	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
4261#					  				#
4262#	& = (0): predecrement addressing mode				#
4263#	    (1): postincrement or control addressing mode		#
4264#	@ = (0): move listed regs from memory to the FPU		#
4265#	    (1): move listed regs from the FPU to memory		#
4266#	$$$    : index of data register holding reg select mask		#
4267#									#
4268# NOTES:								#
4269#	If the data register holds a zero, then the			#
4270#	instruction is a nop.						#
4271#									#
4272#########################################################################
4273
4274	global		fmovm_dynamic
4275fmovm_dynamic:
4276
4277# extract the data register in which the bit string resides...
4278	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
4279	andi.w		&0x70,%d1		# extract reg bits
4280	lsr.b		&0x4,%d1		# shift into lo bits
4281
4282# fetch the bit string into d0...
4283	bsr.l		fetch_dreg		# fetch reg string
4284
4285	andi.l		&0x000000ff,%d0		# keep only lo byte
4286
4287	mov.l		%d0,-(%sp)		# save strg
4288	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
4289	mov.l		%d0,-(%sp)		# save size
4290	bsr.l		fmovm_calc_ea		# calculate <ea>
4291	mov.l		(%sp)+,%d0		# restore size
4292	mov.l		(%sp)+,%d1		# restore strg
4293
4294# if the bit string is a zero, then the operation is a no-op
4295# but, make sure that we've calculated ea and advanced the opword pointer
4296	beq.w		fmovm_data_done
4297
4298# separate move ins from move outs...
4299	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
4300	beq.w		fmovm_data_in		# it's a move out
4301
4302#############
4303# MOVE OUT: #
4304#############
4305fmovm_data_out:
4306	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
4307	bne.w		fmovm_out_ctrl		# control
4308
4309############################
4310fmovm_out_predec:
4311# for predecrement mode, the bit string is the opposite of both control
4312# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4313# here, we convert it to be just like the others...
4314	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4315
4316	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
4317	beq.b		fmovm_out_ctrl		# user
4318
4319fmovm_out_predec_s:
4320	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4321	bne.b		fmovm_out_ctrl
4322
4323# the operation was unfortunately an: fmovm.x dn,-(sp)
4324# called from supervisor mode.
4325# we're also passing "size" and "strg" back to the calling routine
4326	rts
4327
4328############################
4329fmovm_out_ctrl:
4330	mov.l		%a0,%a1			# move <ea> to a1
4331
4332	sub.l		%d0,%sp			# subtract size of dump
4333	lea		(%sp),%a0
4334
4335	tst.b		%d1			# should FP0 be moved?
4336	bpl.b		fmovm_out_ctrl_fp1	# no
4337
4338	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
4339	mov.l		0x4+EXC_FP0(%a6),(%a0)+
4340	mov.l		0x8+EXC_FP0(%a6),(%a0)+
4341
4342fmovm_out_ctrl_fp1:
4343	lsl.b		&0x1,%d1		# should FP1 be moved?
4344	bpl.b		fmovm_out_ctrl_fp2	# no
4345
4346	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
4347	mov.l		0x4+EXC_FP1(%a6),(%a0)+
4348	mov.l		0x8+EXC_FP1(%a6),(%a0)+
4349
4350fmovm_out_ctrl_fp2:
4351	lsl.b		&0x1,%d1		# should FP2 be moved?
4352	bpl.b		fmovm_out_ctrl_fp3	# no
4353
4354	fmovm.x		&0x20,(%a0)		# yes
4355	add.l		&0xc,%a0
4356
4357fmovm_out_ctrl_fp3:
4358	lsl.b		&0x1,%d1		# should FP3 be moved?
4359	bpl.b		fmovm_out_ctrl_fp4	# no
4360
4361	fmovm.x		&0x10,(%a0)		# yes
4362	add.l		&0xc,%a0
4363
4364fmovm_out_ctrl_fp4:
4365	lsl.b		&0x1,%d1		# should FP4 be moved?
4366	bpl.b		fmovm_out_ctrl_fp5	# no
4367
4368	fmovm.x		&0x08,(%a0)		# yes
4369	add.l		&0xc,%a0
4370
4371fmovm_out_ctrl_fp5:
4372	lsl.b		&0x1,%d1		# should FP5 be moved?
4373	bpl.b		fmovm_out_ctrl_fp6	# no
4374
4375	fmovm.x		&0x04,(%a0)		# yes
4376	add.l		&0xc,%a0
4377
4378fmovm_out_ctrl_fp6:
4379	lsl.b		&0x1,%d1		# should FP6 be moved?
4380	bpl.b		fmovm_out_ctrl_fp7	# no
4381
4382	fmovm.x		&0x02,(%a0)		# yes
4383	add.l		&0xc,%a0
4384
4385fmovm_out_ctrl_fp7:
4386	lsl.b		&0x1,%d1		# should FP7 be moved?
4387	bpl.b		fmovm_out_ctrl_done	# no
4388
4389	fmovm.x		&0x01,(%a0)		# yes
4390	add.l		&0xc,%a0
4391
4392fmovm_out_ctrl_done:
4393	mov.l		%a1,L_SCR1(%a6)
4394
4395	lea		(%sp),%a0		# pass: supervisor src
4396	mov.l		%d0,-(%sp)		# save size
4397	bsr.l		_dmem_write		# copy data to user mem
4398
4399	mov.l		(%sp)+,%d0
4400	add.l		%d0,%sp			# clear fpreg data from stack
4401
4402	tst.l		%d1			# did dstore err?
4403	bne.w		fmovm_out_err		# yes
4404
4405	rts
4406
4407############
4408# MOVE IN: #
4409############
4410fmovm_data_in:
4411	mov.l		%a0,L_SCR1(%a6)
4412
4413	sub.l		%d0,%sp			# make room for fpregs
4414	lea		(%sp),%a1
4415
4416	mov.l		%d1,-(%sp)		# save bit string for later
4417	mov.l		%d0,-(%sp)		# save # of bytes
4418
4419	bsr.l		_dmem_read		# copy data from user mem
4420
4421	mov.l		(%sp)+,%d0		# retrieve # of bytes
4422
4423	tst.l		%d1			# did dfetch fail?
4424	bne.w		fmovm_in_err		# yes
4425
4426	mov.l		(%sp)+,%d1		# load bit string
4427
4428	lea		(%sp),%a0		# addr of stack
4429
4430	tst.b		%d1			# should FP0 be moved?
4431	bpl.b		fmovm_data_in_fp1	# no
4432
4433	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
4434	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
4435	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
4436
4437fmovm_data_in_fp1:
4438	lsl.b		&0x1,%d1		# should FP1 be moved?
4439	bpl.b		fmovm_data_in_fp2	# no
4440
4441	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
4442	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
4443	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
4444
4445fmovm_data_in_fp2:
4446	lsl.b		&0x1,%d1		# should FP2 be moved?
4447	bpl.b		fmovm_data_in_fp3	# no
4448
4449	fmovm.x		(%a0)+,&0x20		# yes
4450
4451fmovm_data_in_fp3:
4452	lsl.b		&0x1,%d1		# should FP3 be moved?
4453	bpl.b		fmovm_data_in_fp4	# no
4454
4455	fmovm.x		(%a0)+,&0x10		# yes
4456
4457fmovm_data_in_fp4:
4458	lsl.b		&0x1,%d1		# should FP4 be moved?
4459	bpl.b		fmovm_data_in_fp5	# no
4460
4461	fmovm.x		(%a0)+,&0x08		# yes
4462
4463fmovm_data_in_fp5:
4464	lsl.b		&0x1,%d1		# should FP5 be moved?
4465	bpl.b		fmovm_data_in_fp6	# no
4466
4467	fmovm.x		(%a0)+,&0x04		# yes
4468
4469fmovm_data_in_fp6:
4470	lsl.b		&0x1,%d1		# should FP6 be moved?
4471	bpl.b		fmovm_data_in_fp7	# no
4472
4473	fmovm.x		(%a0)+,&0x02		# yes
4474
4475fmovm_data_in_fp7:
4476	lsl.b		&0x1,%d1		# should FP7 be moved?
4477	bpl.b		fmovm_data_in_done	# no
4478
4479	fmovm.x		(%a0)+,&0x01		# yes
4480
4481fmovm_data_in_done:
4482	add.l		%d0,%sp			# remove fpregs from stack
4483	rts
4484
4485#####################################
4486
4487fmovm_data_done:
4488	rts
4489
4490##############################################################################
4491
4492#
4493# table indexed by the operation's bit string that gives the number
4494# of bytes that will be moved.
4495#
4496# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4497#
4498tbl_fmovm_size:
4499	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4500	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4501	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4502	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4503	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4504	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4505	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4506	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4507	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4508	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4509	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4511	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4512	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4513	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4514	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4515	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4516	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4517	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4518	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4519	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4520	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4521	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4522	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4523	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4524	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4525	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4526	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4527	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4528	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4529	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4530	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4531
4532#
4533# table to convert a pre-decrement bit string into a post-increment
4534# or control bit string.
4535# ex: 	0x00	==>	0x00
4536#	0x01	==>	0x80
4537#	0x02	==>	0x40
4538#		.
4539#		.
4540#	0xfd	==>	0xbf
4541#	0xfe	==>	0x7f
4542#	0xff	==>	0xff
4543#
4544tbl_fmovm_convert:
4545	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4546	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4547	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4548	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4549	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4550	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4551	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4552	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4553	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4554	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4555	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4556	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4557	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4558	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4559	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4560	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4561	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4562	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4563	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4564	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4565	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4566	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4567	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4568	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4569	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4570	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4571	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4572	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4573	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4574	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4575	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4576	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4577
4578	global		fmovm_calc_ea
4579###############################################
4580# _fmovm_calc_ea: calculate effective address #
4581###############################################
4582fmovm_calc_ea:
4583	mov.l		%d0,%a0			# move # bytes to a0
4584
4585# currently, MODE and REG are taken from the EXC_OPWORD. this could be
4586# easily changed if they were inputs passed in registers.
4587	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
4588	mov.w		%d0,%d1			# make a copy
4589
4590	andi.w		&0x3f,%d0		# extract mode field
4591	andi.l		&0x7,%d1		# extract reg  field
4592
4593# jump to the corresponding function for each {MODE,REG} pair.
4594	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4595	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4596
4597	swbeg		&64
4598tbl_fea_mode:
4599	short		tbl_fea_mode	-	tbl_fea_mode
4600	short		tbl_fea_mode	-	tbl_fea_mode
4601	short		tbl_fea_mode	-	tbl_fea_mode
4602	short		tbl_fea_mode	-	tbl_fea_mode
4603	short		tbl_fea_mode	-	tbl_fea_mode
4604	short		tbl_fea_mode	-	tbl_fea_mode
4605	short		tbl_fea_mode	-	tbl_fea_mode
4606	short		tbl_fea_mode	-	tbl_fea_mode
4607
4608	short		tbl_fea_mode	-	tbl_fea_mode
4609	short		tbl_fea_mode	-	tbl_fea_mode
4610	short		tbl_fea_mode	-	tbl_fea_mode
4611	short		tbl_fea_mode	-	tbl_fea_mode
4612	short		tbl_fea_mode	-	tbl_fea_mode
4613	short		tbl_fea_mode	-	tbl_fea_mode
4614	short		tbl_fea_mode	-	tbl_fea_mode
4615	short		tbl_fea_mode	-	tbl_fea_mode
4616
4617	short		faddr_ind_a0	- 	tbl_fea_mode
4618	short		faddr_ind_a1	- 	tbl_fea_mode
4619	short		faddr_ind_a2	- 	tbl_fea_mode
4620	short		faddr_ind_a3 	- 	tbl_fea_mode
4621	short		faddr_ind_a4 	- 	tbl_fea_mode
4622	short		faddr_ind_a5 	- 	tbl_fea_mode
4623	short		faddr_ind_a6 	- 	tbl_fea_mode
4624	short		faddr_ind_a7 	- 	tbl_fea_mode
4625
4626	short		faddr_ind_p_a0	- 	tbl_fea_mode
4627	short		faddr_ind_p_a1 	- 	tbl_fea_mode
4628	short		faddr_ind_p_a2 	- 	tbl_fea_mode
4629	short		faddr_ind_p_a3 	- 	tbl_fea_mode
4630	short		faddr_ind_p_a4 	- 	tbl_fea_mode
4631	short		faddr_ind_p_a5 	- 	tbl_fea_mode
4632	short		faddr_ind_p_a6 	- 	tbl_fea_mode
4633	short		faddr_ind_p_a7 	- 	tbl_fea_mode
4634
4635	short		faddr_ind_m_a0 	- 	tbl_fea_mode
4636	short		faddr_ind_m_a1 	- 	tbl_fea_mode
4637	short		faddr_ind_m_a2 	- 	tbl_fea_mode
4638	short		faddr_ind_m_a3 	- 	tbl_fea_mode
4639	short		faddr_ind_m_a4 	- 	tbl_fea_mode
4640	short		faddr_ind_m_a5 	- 	tbl_fea_mode
4641	short		faddr_ind_m_a6 	- 	tbl_fea_mode
4642	short		faddr_ind_m_a7 	- 	tbl_fea_mode
4643
4644	short		faddr_ind_disp_a0	- 	tbl_fea_mode
4645	short		faddr_ind_disp_a1 	- 	tbl_fea_mode
4646	short		faddr_ind_disp_a2 	- 	tbl_fea_mode
4647	short		faddr_ind_disp_a3 	- 	tbl_fea_mode
4648	short		faddr_ind_disp_a4 	- 	tbl_fea_mode
4649	short		faddr_ind_disp_a5 	- 	tbl_fea_mode
4650	short		faddr_ind_disp_a6 	- 	tbl_fea_mode
4651	short		faddr_ind_disp_a7	-	tbl_fea_mode
4652
4653	short		faddr_ind_ext 	- 	tbl_fea_mode
4654	short		faddr_ind_ext 	- 	tbl_fea_mode
4655	short		faddr_ind_ext 	- 	tbl_fea_mode
4656	short		faddr_ind_ext 	- 	tbl_fea_mode
4657	short		faddr_ind_ext 	- 	tbl_fea_mode
4658	short		faddr_ind_ext 	- 	tbl_fea_mode
4659	short		faddr_ind_ext 	- 	tbl_fea_mode
4660	short		faddr_ind_ext 	- 	tbl_fea_mode
4661
4662	short		fabs_short	- 	tbl_fea_mode
4663	short		fabs_long	- 	tbl_fea_mode
4664	short		fpc_ind		- 	tbl_fea_mode
4665	short		fpc_ind_ext	- 	tbl_fea_mode
4666	short		tbl_fea_mode	- 	tbl_fea_mode
4667	short		tbl_fea_mode	- 	tbl_fea_mode
4668	short		tbl_fea_mode	- 	tbl_fea_mode
4669	short		tbl_fea_mode	- 	tbl_fea_mode
4670
4671###################################
4672# Address register indirect: (An) #
4673###################################
4674faddr_ind_a0:
4675	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
4676	rts
4677
4678faddr_ind_a1:
4679	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
4680	rts
4681
4682faddr_ind_a2:
4683	mov.l		%a2,%a0			# Get current a2
4684	rts
4685
4686faddr_ind_a3:
4687	mov.l		%a3,%a0			# Get current a3
4688	rts
4689
4690faddr_ind_a4:
4691	mov.l		%a4,%a0			# Get current a4
4692	rts
4693
4694faddr_ind_a5:
4695	mov.l		%a5,%a0			# Get current a5
4696	rts
4697
4698faddr_ind_a6:
4699	mov.l		(%a6),%a0		# Get current a6
4700	rts
4701
4702faddr_ind_a7:
4703	mov.l		EXC_A7(%a6),%a0		# Get current a7
4704	rts
4705
4706#####################################################
4707# Address register indirect w/ postincrement: (An)+ #
4708#####################################################
4709faddr_ind_p_a0:
4710	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
4711	mov.l		%d0,%d1
4712	add.l		%a0,%d1			# Increment
4713	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
4714	mov.l		%d0,%a0
4715	rts
4716
4717faddr_ind_p_a1:
4718	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
4719	mov.l		%d0,%d1
4720	add.l		%a0,%d1			# Increment
4721	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
4722	mov.l		%d0,%a0
4723	rts
4724
4725faddr_ind_p_a2:
4726	mov.l		%a2,%d0			# Get current a2
4727	mov.l		%d0,%d1
4728	add.l		%a0,%d1			# Increment
4729	mov.l		%d1,%a2			# Save incr value
4730	mov.l		%d0,%a0
4731	rts
4732
4733faddr_ind_p_a3:
4734	mov.l		%a3,%d0			# Get current a3
4735	mov.l		%d0,%d1
4736	add.l		%a0,%d1			# Increment
4737	mov.l		%d1,%a3			# Save incr value
4738	mov.l		%d0,%a0
4739	rts
4740
4741faddr_ind_p_a4:
4742	mov.l		%a4,%d0			# Get current a4
4743	mov.l		%d0,%d1
4744	add.l		%a0,%d1			# Increment
4745	mov.l		%d1,%a4			# Save incr value
4746	mov.l		%d0,%a0
4747	rts
4748
4749faddr_ind_p_a5:
4750	mov.l		%a5,%d0			# Get current a5
4751	mov.l		%d0,%d1
4752	add.l		%a0,%d1			# Increment
4753	mov.l		%d1,%a5			# Save incr value
4754	mov.l		%d0,%a0
4755	rts
4756
4757faddr_ind_p_a6:
4758	mov.l		(%a6),%d0		# Get current a6
4759	mov.l		%d0,%d1
4760	add.l		%a0,%d1			# Increment
4761	mov.l		%d1,(%a6)		# Save incr value
4762	mov.l		%d0,%a0
4763	rts
4764
4765faddr_ind_p_a7:
4766	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4767
4768	mov.l		EXC_A7(%a6),%d0		# Get current a7
4769	mov.l		%d0,%d1
4770	add.l		%a0,%d1			# Increment
4771	mov.l		%d1,EXC_A7(%a6)		# Save incr value
4772	mov.l		%d0,%a0
4773	rts
4774
4775####################################################
4776# Address register indirect w/ predecrement: -(An) #
4777####################################################
4778faddr_ind_m_a0:
4779	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
4780	sub.l		%a0,%d0			# Decrement
4781	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
4782	mov.l		%d0,%a0
4783	rts
4784
4785faddr_ind_m_a1:
4786	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
4787	sub.l		%a0,%d0			# Decrement
4788	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
4789	mov.l		%d0,%a0
4790	rts
4791
4792faddr_ind_m_a2:
4793	mov.l		%a2,%d0			# Get current a2
4794	sub.l		%a0,%d0			# Decrement
4795	mov.l		%d0,%a2			# Save decr value
4796	mov.l		%d0,%a0
4797	rts
4798
4799faddr_ind_m_a3:
4800	mov.l		%a3,%d0			# Get current a3
4801	sub.l		%a0,%d0			# Decrement
4802	mov.l		%d0,%a3			# Save decr value
4803	mov.l		%d0,%a0
4804	rts
4805
4806faddr_ind_m_a4:
4807	mov.l		%a4,%d0			# Get current a4
4808	sub.l		%a0,%d0			# Decrement
4809	mov.l		%d0,%a4			# Save decr value
4810	mov.l		%d0,%a0
4811	rts
4812
4813faddr_ind_m_a5:
4814	mov.l		%a5,%d0			# Get current a5
4815	sub.l		%a0,%d0			# Decrement
4816	mov.l		%d0,%a5			# Save decr value
4817	mov.l		%d0,%a0
4818	rts
4819
4820faddr_ind_m_a6:
4821	mov.l		(%a6),%d0		# Get current a6
4822	sub.l		%a0,%d0			# Decrement
4823	mov.l		%d0,(%a6)		# Save decr value
4824	mov.l		%d0,%a0
4825	rts
4826
4827faddr_ind_m_a7:
4828	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4829
4830	mov.l		EXC_A7(%a6),%d0		# Get current a7
4831	sub.l		%a0,%d0			# Decrement
4832	mov.l		%d0,EXC_A7(%a6)		# Save decr value
4833	mov.l		%d0,%a0
4834	rts
4835
4836########################################################
4837# Address register indirect w/ displacement: (d16, An) #
4838########################################################
4839faddr_ind_disp_a0:
4840	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4841	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4842	bsr.l		_imem_read_word
4843
4844	tst.l		%d1			# did ifetch fail?
4845	bne.l		iea_iacc		# yes
4846
4847	mov.w		%d0,%a0			# sign extend displacement
4848
4849	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
4850	rts
4851
4852faddr_ind_disp_a1:
4853	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4854	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4855	bsr.l		_imem_read_word
4856
4857	tst.l		%d1			# did ifetch fail?
4858	bne.l		iea_iacc		# yes
4859
4860	mov.w		%d0,%a0			# sign extend displacement
4861
4862	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
4863	rts
4864
4865faddr_ind_disp_a2:
4866	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4867	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4868	bsr.l		_imem_read_word
4869
4870	tst.l		%d1			# did ifetch fail?
4871	bne.l		iea_iacc		# yes
4872
4873	mov.w		%d0,%a0			# sign extend displacement
4874
4875	add.l		%a2,%a0			# a2 + d16
4876	rts
4877
4878faddr_ind_disp_a3:
4879	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4880	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4881	bsr.l		_imem_read_word
4882
4883	tst.l		%d1			# did ifetch fail?
4884	bne.l		iea_iacc		# yes
4885
4886	mov.w		%d0,%a0			# sign extend displacement
4887
4888	add.l		%a3,%a0			# a3 + d16
4889	rts
4890
4891faddr_ind_disp_a4:
4892	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4893	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4894	bsr.l		_imem_read_word
4895
4896	tst.l		%d1			# did ifetch fail?
4897	bne.l		iea_iacc		# yes
4898
4899	mov.w		%d0,%a0			# sign extend displacement
4900
4901	add.l		%a4,%a0			# a4 + d16
4902	rts
4903
4904faddr_ind_disp_a5:
4905	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4906	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4907	bsr.l		_imem_read_word
4908
4909	tst.l		%d1			# did ifetch fail?
4910	bne.l		iea_iacc		# yes
4911
4912	mov.w		%d0,%a0			# sign extend displacement
4913
4914	add.l		%a5,%a0			# a5 + d16
4915	rts
4916
4917faddr_ind_disp_a6:
4918	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4919	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4920	bsr.l		_imem_read_word
4921
4922	tst.l		%d1			# did ifetch fail?
4923	bne.l		iea_iacc		# yes
4924
4925	mov.w		%d0,%a0			# sign extend displacement
4926
4927	add.l		(%a6),%a0		# a6 + d16
4928	rts
4929
4930faddr_ind_disp_a7:
4931	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4932	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4933	bsr.l		_imem_read_word
4934
4935	tst.l		%d1			# did ifetch fail?
4936	bne.l		iea_iacc		# yes
4937
4938	mov.w		%d0,%a0			# sign extend displacement
4939
4940	add.l		EXC_A7(%a6),%a0		# a7 + d16
4941	rts
4942
4943########################################################################
4944# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4945#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
4946# Memory indirect postindexed: ([bd, An], Xn, od)		       #
4947# Memory indirect preindexed: ([bd, An, Xn], od)		       #
4948########################################################################
4949faddr_ind_ext:
4950	addq.l		&0x8,%d1
4951	bsr.l		fetch_dreg		# fetch base areg
4952	mov.l		%d0,-(%sp)
4953
4954	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4955	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4956	bsr.l		_imem_read_word		# fetch extword in d0
4957
4958	tst.l		%d1			# did ifetch fail?
4959	bne.l		iea_iacc		# yes
4960
4961	mov.l		(%sp)+,%a0
4962
4963	btst		&0x8,%d0
4964	bne.w		fcalc_mem_ind
4965
4966	mov.l		%d0,L_SCR1(%a6)		# hold opword
4967
4968	mov.l		%d0,%d1
4969	rol.w		&0x4,%d1
4970	andi.w		&0xf,%d1		# extract index regno
4971
4972# count on fetch_dreg() not to alter a0...
4973	bsr.l		fetch_dreg		# fetch index
4974
4975	mov.l		%d2,-(%sp)		# save d2
4976	mov.l		L_SCR1(%a6),%d2		# fetch opword
4977
4978	btst		&0xb,%d2		# is it word or long?
4979	bne.b		faii8_long
4980	ext.l		%d0			# sign extend word index
4981faii8_long:
4982	mov.l		%d2,%d1
4983	rol.w		&0x7,%d1
4984	andi.l		&0x3,%d1		# extract scale value
4985
4986	lsl.l		%d1,%d0			# shift index by scale
4987
4988	extb.l		%d2			# sign extend displacement
4989	add.l		%d2,%d0			# index + disp
4990	add.l		%d0,%a0			# An + (index + disp)
4991
4992	mov.l		(%sp)+,%d2		# restore old d2
4993	rts
4994
4995###########################
4996# Absolute short: (XXX).W #
4997###########################
4998fabs_short:
4999	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5000	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5001	bsr.l		_imem_read_word		# fetch short address
5002
5003	tst.l		%d1			# did ifetch fail?
5004	bne.l		iea_iacc		# yes
5005
5006	mov.w		%d0,%a0			# return <ea> in a0
5007	rts
5008
5009##########################
5010# Absolute long: (XXX).L #
5011##########################
5012fabs_long:
5013	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5014	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5015	bsr.l		_imem_read_long		# fetch long address
5016
5017	tst.l		%d1			# did ifetch fail?
5018	bne.l		iea_iacc		# yes
5019
5020	mov.l		%d0,%a0			# return <ea> in a0
5021	rts
5022
5023#######################################################
5024# Program counter indirect w/ displacement: (d16, PC) #
5025#######################################################
5026fpc_ind:
5027	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5028	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5029	bsr.l		_imem_read_word		# fetch word displacement
5030
5031	tst.l		%d1			# did ifetch fail?
5032	bne.l		iea_iacc		# yes
5033
5034	mov.w		%d0,%a0			# sign extend displacement
5035
5036	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
5037
5038# _imem_read_word() increased the extwptr by 2. need to adjust here.
5039	subq.l		&0x2,%a0		# adjust <ea>
5040	rts
5041
5042##########################################################
5043# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5044# "     "     w/   "  (base displacement): (bd, PC, An)  #
5045# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
5046# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
5047##########################################################
5048fpc_ind_ext:
5049	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5050	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5051	bsr.l		_imem_read_word		# fetch ext word
5052
5053	tst.l		%d1			# did ifetch fail?
5054	bne.l		iea_iacc		# yes
5055
5056	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
5057	subq.l		&0x2,%a0		# adjust base
5058
5059	btst		&0x8,%d0		# is disp only 8 bits?
5060	bne.w		fcalc_mem_ind		# calc memory indirect
5061
5062	mov.l		%d0,L_SCR1(%a6)		# store opword
5063
5064	mov.l		%d0,%d1			# make extword copy
5065	rol.w		&0x4,%d1		# rotate reg num into place
5066	andi.w		&0xf,%d1		# extract register number
5067
5068# count on fetch_dreg() not to alter a0...
5069	bsr.l		fetch_dreg		# fetch index
5070
5071	mov.l		%d2,-(%sp)		# save d2
5072	mov.l		L_SCR1(%a6),%d2		# fetch opword
5073
5074	btst		&0xb,%d2		# is index word or long?
5075	bne.b		fpii8_long		# long
5076	ext.l		%d0			# sign extend word index
5077fpii8_long:
5078	mov.l		%d2,%d1
5079	rol.w		&0x7,%d1		# rotate scale value into place
5080	andi.l		&0x3,%d1		# extract scale value
5081
5082	lsl.l		%d1,%d0			# shift index by scale
5083
5084	extb.l		%d2			# sign extend displacement
5085	add.l		%d2,%d0			# disp + index
5086	add.l		%d0,%a0			# An + (index + disp)
5087
5088	mov.l		(%sp)+,%d2		# restore temp register
5089	rts
5090
5091# d2 = index
5092# d3 = base
5093# d4 = od
5094# d5 = extword
5095fcalc_mem_ind:
5096	btst		&0x6,%d0		# is the index suppressed?
5097	beq.b		fcalc_index
5098
5099	movm.l		&0x3c00,-(%sp)		# save d2-d5
5100
5101	mov.l		%d0,%d5			# put extword in d5
5102	mov.l		%a0,%d3			# put base in d3
5103
5104	clr.l		%d2			# yes, so index = 0
5105	bra.b		fbase_supp_ck
5106
5107# index:
5108fcalc_index:
5109	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
5110	bfextu		%d0{&16:&4},%d1		# fetch dreg index
5111	bsr.l		fetch_dreg
5112
5113	movm.l		&0x3c00,-(%sp)		# save d2-d5
5114	mov.l		%d0,%d2			# put index in d2
5115	mov.l		L_SCR1(%a6),%d5
5116	mov.l		%a0,%d3
5117
5118	btst		&0xb,%d5		# is index word or long?
5119	bne.b		fno_ext
5120	ext.l		%d2
5121
5122fno_ext:
5123	bfextu		%d5{&21:&2},%d0
5124	lsl.l		%d0,%d2
5125
5126# base address (passed as parameter in d3):
5127# we clear the value here if it should actually be suppressed.
5128fbase_supp_ck:
5129	btst		&0x7,%d5		# is the bd suppressed?
5130	beq.b		fno_base_sup
5131	clr.l		%d3
5132
5133# base displacement:
5134fno_base_sup:
5135	bfextu		%d5{&26:&2},%d0		# get bd size
5136#	beq.l		fmovm_error		# if (size == 0) it's reserved
5137
5138	cmpi.b	 	%d0,&0x2
5139	blt.b		fno_bd
5140	beq.b		fget_word_bd
5141
5142	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5143	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5144	bsr.l		_imem_read_long
5145
5146	tst.l		%d1			# did ifetch fail?
5147	bne.l		fcea_iacc		# yes
5148
5149	bra.b		fchk_ind
5150
5151fget_word_bd:
5152	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5153	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5154	bsr.l		_imem_read_word
5155
5156	tst.l		%d1			# did ifetch fail?
5157	bne.l		fcea_iacc		# yes
5158
5159	ext.l		%d0			# sign extend bd
5160
5161fchk_ind:
5162	add.l		%d0,%d3			# base += bd
5163
5164# outer displacement:
5165fno_bd:
5166	bfextu		%d5{&30:&2},%d0		# is od suppressed?
5167	beq.w		faii_bd
5168
5169	cmpi.b	 	%d0,&0x2
5170	blt.b		fnull_od
5171	beq.b		fword_od
5172
5173	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5174	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5175	bsr.l		_imem_read_long
5176
5177	tst.l		%d1			# did ifetch fail?
5178	bne.l		fcea_iacc		# yes
5179
5180	bra.b 		fadd_them
5181
5182fword_od:
5183	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5184	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5185	bsr.l		_imem_read_word
5186
5187	tst.l		%d1			# did ifetch fail?
5188	bne.l		fcea_iacc		# yes
5189
5190	ext.l		%d0			# sign extend od
5191	bra.b		fadd_them
5192
5193fnull_od:
5194	clr.l		%d0
5195
5196fadd_them:
5197	mov.l		%d0,%d4
5198
5199	btst		&0x2,%d5		# pre or post indexing?
5200	beq.b		fpre_indexed
5201
5202	mov.l		%d3,%a0
5203	bsr.l		_dmem_read_long
5204
5205	tst.l		%d1			# did dfetch fail?
5206	bne.w		fcea_err		# yes
5207
5208	add.l		%d2,%d0			# <ea> += index
5209	add.l		%d4,%d0			# <ea> += od
5210	bra.b		fdone_ea
5211
5212fpre_indexed:
5213	add.l		%d2,%d3			# preindexing
5214	mov.l		%d3,%a0
5215	bsr.l		_dmem_read_long
5216
5217	tst.l		%d1			# did dfetch fail?
5218	bne.w		fcea_err		# yes
5219
5220	add.l		%d4,%d0			# ea += od
5221	bra.b		fdone_ea
5222
5223faii_bd:
5224	add.l		%d2,%d3			# ea = (base + bd) + index
5225	mov.l		%d3,%d0
5226fdone_ea:
5227	mov.l		%d0,%a0
5228
5229	movm.l		(%sp)+,&0x003c		# restore d2-d5
5230	rts
5231
5232#########################################################
5233fcea_err:
5234	mov.l		%d3,%a0
5235
5236	movm.l		(%sp)+,&0x003c		# restore d2-d5
5237	mov.w		&0x0101,%d0
5238	bra.l		iea_dacc
5239
5240fcea_iacc:
5241	movm.l		(%sp)+,&0x003c		# restore d2-d5
5242	bra.l		iea_iacc
5243
5244fmovm_out_err:
5245	bsr.l		restore
5246	mov.w		&0x00e1,%d0
5247	bra.b		fmovm_err
5248
5249fmovm_in_err:
5250	bsr.l		restore
5251	mov.w		&0x0161,%d0
5252
5253fmovm_err:
5254	mov.l		L_SCR1(%a6),%a0
5255	bra.l		iea_dacc
5256
5257#########################################################################
5258# XDEF ****************************************************************	#
5259# 	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
5260#									#
5261# XREF ****************************************************************	#
5262#	_imem_read_long() - read longword from memory			#
5263#	iea_iacc() - _imem_read_long() failed; error recovery		#
5264#									#
5265# INPUT ***************************************************************	#
5266#	None								#
5267# 									#
5268# OUTPUT **************************************************************	#
5269#	If _imem_read_long() doesn't fail:				#
5270#		USER_FPCR(a6)  = new FPCR value				#
5271#		USER_FPSR(a6)  = new FPSR value				#
5272#		USER_FPIAR(a6) = new FPIAR value			#
5273#									#
5274# ALGORITHM ***********************************************************	#
5275# 	Decode the instruction type by looking at the extension word 	#
5276# in order to see how many control registers to fetch from memory.	#
5277# Fetch them using _imem_read_long(). If this fetch fails, exit through	#
5278# the special access error exit handler iea_iacc().			#
5279#									#
5280# Instruction word decoding:						#
5281#									#
5282# 	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
5283#									#
5284#		WORD1			WORD2				#
5285#	1111 0010 00 111100	100$ $$00 0000 0000			#
5286#									#
5287#	$$$ (100): FPCR							#
5288#	    (010): FPSR							#
5289#	    (001): FPIAR						#
5290#	    (000): FPIAR						#
5291#									#
5292#########################################################################
5293
5294	global		fmovm_ctrl
5295fmovm_ctrl:
5296	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
5297	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
5298	beq.w		fctrl_in_7		# yes
5299	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
5300	beq.w		fctrl_in_6		# yes
5301	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
5302	beq.b		fctrl_in_5		# yes
5303
5304# fmovem.l #<data>, fpsr/fpiar
5305fctrl_in_3:
5306	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5307	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5308	bsr.l		_imem_read_long		# fetch FPSR from mem
5309
5310	tst.l		%d1			# did ifetch fail?
5311	bne.l		iea_iacc		# yes
5312
5313	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
5314	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5315	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5316	bsr.l		_imem_read_long		# fetch FPIAR from mem
5317
5318	tst.l		%d1			# did ifetch fail?
5319	bne.l		iea_iacc		# yes
5320
5321	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
5322	rts
5323
5324# fmovem.l #<data>, fpcr/fpiar
5325fctrl_in_5:
5326	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5327	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5328	bsr.l		_imem_read_long		# fetch FPCR from mem
5329
5330	tst.l		%d1			# did ifetch fail?
5331	bne.l		iea_iacc		# yes
5332
5333	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
5334	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5335	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5336	bsr.l		_imem_read_long		# fetch FPIAR from mem
5337
5338	tst.l		%d1			# did ifetch fail?
5339	bne.l		iea_iacc		# yes
5340
5341	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
5342	rts
5343
5344# fmovem.l #<data>, fpcr/fpsr
5345fctrl_in_6:
5346	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5347	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5348	bsr.l		_imem_read_long		# fetch FPCR from mem
5349
5350	tst.l		%d1			# did ifetch fail?
5351	bne.l		iea_iacc		# yes
5352
5353	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
5354	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5355	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5356	bsr.l		_imem_read_long		# fetch FPSR from mem
5357
5358	tst.l		%d1			# did ifetch fail?
5359	bne.l		iea_iacc		# yes
5360
5361	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
5362	rts
5363
5364# fmovem.l #<data>, fpcr/fpsr/fpiar
5365fctrl_in_7:
5366	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5367	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5368	bsr.l		_imem_read_long		# fetch FPCR from mem
5369
5370	tst.l		%d1			# did ifetch fail?
5371	bne.l		iea_iacc		# yes
5372
5373	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
5374	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5375	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5376	bsr.l		_imem_read_long		# fetch FPSR from mem
5377
5378	tst.l		%d1			# did ifetch fail?
5379	bne.l		iea_iacc		# yes
5380
5381	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
5382	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5383	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5384	bsr.l		_imem_read_long		# fetch FPIAR from mem
5385
5386	tst.l		%d1			# did ifetch fail?
5387	bne.l		iea_iacc		# yes
5388
5389	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
5390	rts
5391
5392##########################################################################
5393
5394#########################################################################
5395# XDEF ****************************************************************	#
5396#	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
5397#			  OVFL/UNFL exceptions will result		#
5398#									#
5399# XREF ****************************************************************	#
5400#	norm() - normalize mantissa after adjusting exponent		#
5401#									#
5402# INPUT ***************************************************************	#
5403#	FP_SRC(a6) = fp op1(src)					#
5404#	FP_DST(a6) = fp op2(dst)					#
5405# 									#
5406# OUTPUT **************************************************************	#
5407#	FP_SRC(a6) = fp op1 scaled(src)					#
5408#	FP_DST(a6) = fp op2 scaled(dst)					#
5409#	d0         = scale amount					#
5410#									#
5411# ALGORITHM ***********************************************************	#
5412# 	If the DST exponent is > the SRC exponent, set the DST exponent	#
5413# equal to 0x3fff and scale the SRC exponent by the value that the	#
5414# DST exponent was scaled by. If the SRC exponent is greater or equal,	#
5415# do the opposite. Return this scale factor in d0.			#
5416#	If the two exponents differ by > the number of mantissa bits	#
5417# plus two, then set the smallest exponent to a very small value as a	#
5418# quick shortcut.							#
5419#									#
5420#########################################################################
5421
5422	global		addsub_scaler2
5423addsub_scaler2:
5424	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
5425	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
5426	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
5427	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
5428	mov.w		SRC_EX(%a0),%d0
5429	mov.w		DST_EX(%a1),%d1
5430	mov.w		%d0,FP_SCR0_EX(%a6)
5431	mov.w		%d1,FP_SCR1_EX(%a6)
5432
5433	andi.w		&0x7fff,%d0
5434	andi.w		&0x7fff,%d1
5435	mov.w		%d0,L_SCR1(%a6)		# store src exponent
5436	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
5437
5438	cmp.w		%d0, %d1		# is src exp >= dst exp?
5439	bge.l		src_exp_ge2
5440
5441# dst exp is >  src exp; scale dst to exp = 0x3fff
5442dst_exp_gt2:
5443	bsr.l		scale_to_zero_dst
5444	mov.l		%d0,-(%sp)		# save scale factor
5445
5446	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
5447	bne.b		cmpexp12
5448
5449	lea		FP_SCR0(%a6),%a0
5450	bsr.l		norm			# normalize the denorm; result is new exp
5451	neg.w		%d0			# new exp = -(shft val)
5452	mov.w		%d0,L_SCR1(%a6)		# inset new exp
5453
5454cmpexp12:
5455	mov.w		2+L_SCR1(%a6),%d0
5456	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
5457
5458	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
5459	bge.b		quick_scale12
5460
5461	mov.w		L_SCR1(%a6),%d0
5462	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
5463	mov.w		FP_SCR0_EX(%a6),%d1
5464	and.w		&0x8000,%d1
5465	or.w		%d1,%d0			# concat {sgn,new exp}
5466	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
5467
5468	mov.l		(%sp)+,%d0		# return SCALE factor
5469	rts
5470
5471quick_scale12:
5472	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
5473	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
5474
5475	mov.l		(%sp)+,%d0		# return SCALE factor
5476	rts
5477
5478# src exp is >= dst exp; scale src to exp = 0x3fff
5479src_exp_ge2:
5480	bsr.l		scale_to_zero_src
5481	mov.l		%d0,-(%sp)		# save scale factor
5482
5483	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
5484	bne.b		cmpexp22
5485	lea		FP_SCR1(%a6),%a0
5486	bsr.l		norm			# normalize the denorm; result is new exp
5487	neg.w		%d0			# new exp = -(shft val)
5488	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
5489
5490cmpexp22:
5491	mov.w		L_SCR1(%a6),%d0
5492	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
5493
5494	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
5495	bge.b		quick_scale22
5496
5497	mov.w		2+L_SCR1(%a6),%d0
5498	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
5499	mov.w		FP_SCR1_EX(%a6),%d1
5500	andi.w		&0x8000,%d1
5501	or.w		%d1,%d0			# concat {sgn,new exp}
5502	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
5503
5504	mov.l		(%sp)+,%d0		# return SCALE factor
5505	rts
5506
5507quick_scale22:
5508	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
5509	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
5510
5511	mov.l		(%sp)+,%d0		# return SCALE factor
5512	rts
5513
5514##########################################################################
5515
5516#########################################################################
5517# XDEF ****************************************************************	#
5518#	scale_to_zero_src(): scale the exponent of extended precision	#
5519#			     value at FP_SCR0(a6).			#
5520#									#
5521# XREF ****************************************************************	#
5522#	norm() - normalize the mantissa if the operand was a DENORM	#
5523#									#
5524# INPUT ***************************************************************	#
5525#	FP_SCR0(a6) = extended precision operand to be scaled		#
5526# 									#
5527# OUTPUT **************************************************************	#
5528#	FP_SCR0(a6) = scaled extended precision operand			#
5529#	d0	    = scale value					#
5530#									#
5531# ALGORITHM ***********************************************************	#
5532# 	Set the exponent of the input operand to 0x3fff. Save the value	#
5533# of the difference between the original and new exponent. Then, 	#
5534# normalize the operand if it was a DENORM. Add this normalization	#
5535# value to the previous value. Return the result.			#
5536#									#
5537#########################################################################
5538
5539	global		scale_to_zero_src
5540scale_to_zero_src:
5541	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
5542	mov.w		%d1,%d0			# make a copy
5543
5544	andi.l		&0x7fff,%d1		# extract operand's exponent
5545
5546	andi.w		&0x8000,%d0		# extract operand's sgn
5547	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
5548
5549	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
5550
5551	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
5552	beq.b		stzs_denorm		# normalize the DENORM
5553
5554stzs_norm:
5555	mov.l		&0x3fff,%d0
5556	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5557
5558	rts
5559
5560stzs_denorm:
5561	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
5562	bsr.l		norm			# normalize denorm
5563	neg.l		%d0			# new exponent = -(shft val)
5564	mov.l		%d0,%d1			# prepare for op_norm call
5565	bra.b		stzs_norm		# finish scaling
5566
5567###
5568
5569#########################################################################
5570# XDEF ****************************************************************	#
5571#	scale_sqrt(): scale the input operand exponent so a subsequent	#
5572#		      fsqrt operation won't take an exception.		#
5573#									#
5574# XREF ****************************************************************	#
5575#	norm() - normalize the mantissa if the operand was a DENORM	#
5576#									#
5577# INPUT ***************************************************************	#
5578#	FP_SCR0(a6) = extended precision operand to be scaled		#
5579# 									#
5580# OUTPUT **************************************************************	#
5581#	FP_SCR0(a6) = scaled extended precision operand			#
5582#	d0	    = scale value					#
5583#									#
5584# ALGORITHM ***********************************************************	#
5585#	If the input operand is a DENORM, normalize it.			#
5586# 	If the exponent of the input operand is even, set the exponent	#
5587# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the 	#
5588# exponent of the input operand is off, set the exponent to ox3fff and	#
5589# return a scale factor of "(exp-0x3fff)/2". 				#
5590#									#
5591#########################################################################
5592
5593	global		scale_sqrt
5594scale_sqrt:
5595	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
5596	beq.b		ss_denorm		# normalize the DENORM
5597
5598	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
5599	andi.l		&0x7fff,%d1		# extract operand's exponent
5600
5601	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
5602
5603	btst		&0x0,%d1		# is exp even or odd?
5604	beq.b		ss_norm_even
5605
5606	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5607
5608	mov.l		&0x3fff,%d0
5609	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5610	asr.l		&0x1,%d0		# divide scale factor by 2
5611	rts
5612
5613ss_norm_even:
5614	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5615
5616	mov.l		&0x3ffe,%d0
5617	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5618	asr.l		&0x1,%d0		# divide scale factor by 2
5619	rts
5620
5621ss_denorm:
5622	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
5623	bsr.l		norm			# normalize denorm
5624
5625	btst		&0x0,%d0		# is exp even or odd?
5626	beq.b		ss_denorm_even
5627
5628	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5629
5630	add.l		&0x3fff,%d0
5631	asr.l		&0x1,%d0		# divide scale factor by 2
5632	rts
5633
5634ss_denorm_even:
5635	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5636
5637	add.l		&0x3ffe,%d0
5638	asr.l		&0x1,%d0		# divide scale factor by 2
5639	rts
5640
5641###
5642
5643#########################################################################
5644# XDEF ****************************************************************	#
5645#	scale_to_zero_dst(): scale the exponent of extended precision	#
5646#			     value at FP_SCR1(a6).			#
5647#									#
5648# XREF ****************************************************************	#
5649#	norm() - normalize the mantissa if the operand was a DENORM	#
5650#									#
5651# INPUT ***************************************************************	#
5652#	FP_SCR1(a6) = extended precision operand to be scaled		#
5653# 									#
5654# OUTPUT **************************************************************	#
5655#	FP_SCR1(a6) = scaled extended precision operand			#
5656#	d0	    = scale value					#
5657#									#
5658# ALGORITHM ***********************************************************	#
5659# 	Set the exponent of the input operand to 0x3fff. Save the value	#
5660# of the difference between the original and new exponent. Then, 	#
5661# normalize the operand if it was a DENORM. Add this normalization	#
5662# value to the previous value. Return the result.			#
5663#									#
5664#########################################################################
5665
5666	global		scale_to_zero_dst
5667scale_to_zero_dst:
5668	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
5669	mov.w		%d1,%d0			# make a copy
5670
5671	andi.l		&0x7fff,%d1		# extract operand's exponent
5672
5673	andi.w		&0x8000,%d0		# extract operand's sgn
5674	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
5675
5676	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
5677
5678	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
5679	beq.b		stzd_denorm		# normalize the DENORM
5680
5681stzd_norm:
5682	mov.l		&0x3fff,%d0
5683	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5684	rts
5685
5686stzd_denorm:
5687	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
5688	bsr.l		norm			# normalize denorm
5689	neg.l		%d0			# new exponent = -(shft val)
5690	mov.l		%d0,%d1			# prepare for op_norm call
5691	bra.b		stzd_norm		# finish scaling
5692
5693##########################################################################
5694
5695#########################################################################
5696# XDEF ****************************************************************	#
5697#	res_qnan(): return default result w/ QNAN operand for dyadic	#
5698#	res_snan(): return default result w/ SNAN operand for dyadic	#
5699#	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
5700#	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
5701#									#
5702# XREF ****************************************************************	#
5703#	None								#
5704#									#
5705# INPUT ***************************************************************	#
5706#	FP_SRC(a6) = pointer to extended precision src operand		#
5707#	FP_DST(a6) = pointer to extended precision dst operand		#
5708# 									#
5709# OUTPUT **************************************************************	#
5710#	fp0 = default result						#
5711#									#
5712# ALGORITHM ***********************************************************	#
5713# 	If either operand (but not both operands) of an operation is a	#
5714# nonsignalling NAN, then that NAN is returned as the result. If both	#
5715# operands are nonsignalling NANs, then the destination operand 	#
5716# nonsignalling NAN is returned as the result.				#
5717# 	If either operand to an operation is a signalling NAN (SNAN),	#
5718# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
5719# enable bit is set in the FPCR, then the trap is taken and the 	#
5720# destination is not modified. If the SNAN trap enable bit is not set,	#
5721# then the SNAN is converted to a nonsignalling NAN (by setting the 	#
5722# SNAN bit in the operand to one), and the operation continues as 	#
5723# described in the preceding paragraph, for nonsignalling NANs.		#
5724#	Make sure the appropriate FPSR bits are set before exiting.	#
5725#									#
5726#########################################################################
5727
5728	global		res_qnan
5729	global		res_snan
5730res_qnan:
5731res_snan:
5732	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
5733	beq.b		dst_snan2
5734	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
5735	beq.b		dst_qnan2
5736src_nan:
5737	cmp.b		STAG(%a6), &QNAN
5738	beq.b		src_qnan2
5739	global		res_snan_1op
5740res_snan_1op:
5741src_snan2:
5742	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
5743	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5744	lea		FP_SRC(%a6), %a0
5745	bra.b		nan_comp
5746	global		res_qnan_1op
5747res_qnan_1op:
5748src_qnan2:
5749	or.l		&nan_mask, USER_FPSR(%a6)
5750	lea		FP_SRC(%a6), %a0
5751	bra.b		nan_comp
5752dst_snan2:
5753	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5754	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
5755	lea		FP_DST(%a6), %a0
5756	bra.b		nan_comp
5757dst_qnan2:
5758	lea		FP_DST(%a6), %a0
5759	cmp.b		STAG(%a6), &SNAN
5760	bne		nan_done
5761	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
5762nan_done:
5763	or.l		&nan_mask, USER_FPSR(%a6)
5764nan_comp:
5765	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
5766	beq.b		nan_not_neg
5767	or.l		&neg_mask, USER_FPSR(%a6)
5768nan_not_neg:
5769	fmovm.x		(%a0), &0x80
5770	rts
5771
5772#########################################################################
5773# XDEF ****************************************************************	#
5774# 	res_operr(): return default result during operand error		#
5775#									#
5776# XREF ****************************************************************	#
5777#	None								#
5778#									#
5779# INPUT ***************************************************************	#
5780#	None								#
5781# 									#
5782# OUTPUT **************************************************************	#
5783#	fp0 = default operand error result				#
5784#									#
5785# ALGORITHM ***********************************************************	#
5786#	An nonsignalling NAN is returned as the default result when	#
5787# an operand error occurs for the following cases:			#
5788#									#
5789# 	Multiply: (Infinity x Zero)					#
5790# 	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
5791#									#
5792#########################################################################
5793
5794	global		res_operr
5795res_operr:
5796	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5797	fmovm.x		nan_return(%pc), &0x80
5798	rts
5799
5800nan_return:
5801	long		0x7fff0000, 0xffffffff, 0xffffffff
5802
5803#########################################################################
5804# XDEF ****************************************************************	#
5805# 	_denorm(): denormalize an intermediate result			#
5806#									#
5807# XREF ****************************************************************	#
5808#	None								#
5809#									#
5810# INPUT *************************************************************** #
5811#	a0 = points to the operand to be denormalized			#
5812#		(in the internal extended format)			#
5813#		 							#
5814#	d0 = rounding precision						#
5815#									#
5816# OUTPUT **************************************************************	#
5817#	a0 = pointer to the denormalized result				#
5818#		(in the internal extended format)			#
5819#									#
5820#	d0 = guard,round,sticky						#
5821#									#
5822# ALGORITHM ***********************************************************	#
5823# 	According to the exponent underflow threshold for the given	#
5824# precision, shift the mantissa bits to the right in order raise the	#
5825# exponent of the operand to the threshold value. While shifting the 	#
5826# mantissa bits right, maintain the value of the guard, round, and 	#
5827# sticky bits.								#
5828# other notes:								#
5829#	(1) _denorm() is called by the underflow routines		#
5830#	(2) _denorm() does NOT affect the status register		#
5831#									#
5832#########################################################################
5833
5834#
5835# table of exponent threshold values for each precision
5836#
5837tbl_thresh:
5838	short		0x0
5839	short		sgl_thresh
5840	short		dbl_thresh
5841
5842	global		_denorm
5843_denorm:
5844#
5845# Load the exponent threshold for the precision selected and check
5846# to see if (threshold - exponent) is > 65 in which case we can
5847# simply calculate the sticky bit and zero the mantissa. otherwise
5848# we have to call the denormalization routine.
5849#
5850	lsr.b		&0x2, %d0		# shift prec to lo bits
5851	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5852	mov.w		%d1, %d0		# copy d1 into d0
5853	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
5854	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
5855	bpl.b		denorm_set_stky		# yes; just calc sticky
5856
5857	clr.l		%d0			# clear g,r,s
5858	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5859	beq.b		denorm_call		# no; don't change anything
5860	bset		&29, %d0		# yes; set sticky bit
5861
5862denorm_call:
5863	bsr.l		dnrm_lp			# denormalize the number
5864	rts
5865
5866#
5867# all bit would have been shifted off during the denorm so simply
5868# calculate if the sticky should be set and clear the entire mantissa.
5869#
5870denorm_set_stky:
5871	mov.l		&0x20000000, %d0	# set sticky bit in return value
5872	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
5873	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
5874	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
5875	rts
5876
5877#									#
5878# dnrm_lp(): normalize exponent/mantissa to specified threshhold	#
5879#									#
5880# INPUT:								#
5881#	%a0	   : points to the operand to be denormalized		#
5882#	%d0{31:29} : initial guard,round,sticky				#
5883#	%d1{15:0}  : denormalization threshold				#
5884# OUTPUT:								#
5885#	%a0	   : points to the denormalized operand		 	#
5886#	%d0{31:29} : final guard,round,sticky				#
5887#									#
5888
5889# *** Local Equates *** #
5890set	GRS,		L_SCR2			# g,r,s temp storage
5891set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
5892
5893	global		dnrm_lp
5894dnrm_lp:
5895
5896#
5897# make a copy of FTEMP_LO and place the g,r,s bits directly after it
5898# in memory so as to make the bitfield extraction for denormalization easier.
5899#
5900	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5901	mov.l		%d0, GRS(%a6)		# place g,r,s after it
5902
5903#
5904# check to see how much less than the underflow threshold the operand
5905# exponent is.
5906#
5907	mov.l		%d1, %d0		# copy the denorm threshold
5908	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
5909	ble.b		dnrm_no_lp		# d1 <= 0
5910	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
5911	blt.b		case_1			# yes
5912	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
5913	blt.b		case_2			# yes
5914	bra.w		case_3			# (d1 >= 64)
5915
5916#
5917# No normalization necessary
5918#
5919dnrm_no_lp:
5920	mov.l		GRS(%a6), %d0 		# restore original g,r,s
5921	rts
5922
5923#
5924# case (0<d1<32)
5925#
5926# %d0 = denorm threshold
5927# %d1 = "n" = amt to shift
5928#
5929#	---------------------------------------------------------
5930#	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
5931#	---------------------------------------------------------
5932#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5933#	\	   \		      \			 \
5934#	 \	    \		       \		  \
5935#	  \	     \			\		   \
5936#	   \	      \			 \		    \
5937#	    \	       \		  \		     \
5938#	     \		\		   \		      \
5939#	      \		 \		    \		       \
5940#	       \	  \		     \			\
5941#	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5942#	---------------------------------------------------------
5943#	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
5944#	---------------------------------------------------------
5945#
5946case_1:
5947	mov.l		%d2, -(%sp)		# create temp storage
5948
5949	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
5950	mov.l		&32, %d0
5951	sub.w		%d1, %d0		# %d0 = 32 - %d1
5952
5953	cmpi.w		%d1, &29		# is shft amt >= 29
5954	blt.b		case1_extract		# no; no fix needed
5955	mov.b		GRS(%a6), %d2
5956	or.b		%d2, 3+FTEMP_LO2(%a6)
5957
5958case1_extract:
5959	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5960	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5961	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5962
5963	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
5964	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
5965
5966	bftst		%d0{&2:&30}		# were bits shifted off?
5967	beq.b		case1_sticky_clear	# no; go finish
5968	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
5969
5970case1_sticky_clear:
5971	and.l		&0xe0000000, %d0	# clear all but G,R,S
5972	mov.l		(%sp)+, %d2		# restore temp register
5973	rts
5974
5975#
5976# case (32<=d1<64)
5977#
5978# %d0 = denorm threshold
5979# %d1 = "n" = amt to shift
5980#
5981#	---------------------------------------------------------
5982#	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
5983#	---------------------------------------------------------
5984#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5985#	\	   \		      \
5986#	 \	    \		       \
5987#	  \	     \			-------------------
5988#	   \	      --------------------		   \
5989#	    -------------------	  	  \		    \
5990#	     		       \	   \		     \
5991#	      		 	\     	    \		      \
5992#	       		  	 \	     \		       \
5993#	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5994#	---------------------------------------------------------
5995#	|0...............0|0....0| NEW_LO     |grs		|
5996#	---------------------------------------------------------
5997#
5998case_2:
5999	mov.l		%d2, -(%sp)		# create temp storage
6000
6001	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
6002	subi.w		&0x20, %d1		# %d1 now between 0 and 32
6003	mov.l		&0x20, %d0
6004	sub.w		%d1, %d0		# %d0 = 32 - %d1
6005
6006# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
6007# the number of bits to check for the sticky detect.
6008# it only plays a role in shift amounts of 61-63.
6009	mov.b		GRS(%a6), %d2
6010	or.b		%d2, 3+FTEMP_LO2(%a6)
6011
6012	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6013	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6014
6015	bftst		%d1{&2:&30}		# were any bits shifted off?
6016	bne.b		case2_set_sticky	# yes; set sticky bit
6017	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
6018	bne.b		case2_set_sticky	# yes; set sticky bit
6019
6020	mov.l		%d1, %d0		# move new G,R,S to %d0
6021	bra.b		case2_end
6022
6023case2_set_sticky:
6024	mov.l		%d1, %d0		# move new G,R,S to %d0
6025	bset		&rnd_stky_bit, %d0	# set sticky bit
6026
6027case2_end:
6028	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
6029	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
6030	and.l		&0xe0000000, %d0	# clear all but G,R,S
6031
6032	mov.l		(%sp)+,%d2		# restore temp register
6033	rts
6034
6035#
6036# case (d1>=64)
6037#
6038# %d0 = denorm threshold
6039# %d1 = amt to shift
6040#
6041case_3:
6042	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
6043
6044	cmpi.w		%d1, &65		# is shift amt > 65?
6045	blt.b		case3_64		# no; it's == 64
6046	beq.b		case3_65		# no; it's == 65
6047
6048#
6049# case (d1>65)
6050#
6051# Shift value is > 65 and out of range. All bits are shifted off.
6052# Return a zero mantissa with the sticky bit set
6053#
6054	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6055	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6056	mov.l		&0x20000000, %d0	# set sticky bit
6057	rts
6058
6059#
6060# case (d1 == 64)
6061#
6062#	---------------------------------------------------------
6063#	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
6064#	---------------------------------------------------------
6065#	<-------(32)------>
6066#	\	   	   \
6067#	 \	    	    \
6068#	  \	     	     \
6069#	   \	      	      ------------------------------
6070#	    -------------------------------		    \
6071#	     		       		   \		     \
6072#	      		 	     	    \		      \
6073#	       		  	 	     \		       \
6074#					      <-------(32)------>
6075#	---------------------------------------------------------
6076#	|0...............0|0................0|grs		|
6077#	---------------------------------------------------------
6078#
6079case3_64:
6080	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
6081	mov.l		%d0, %d1		# make a copy
6082	and.l		&0xc0000000, %d0	# extract G,R
6083	and.l		&0x3fffffff, %d1	# extract other bits
6084
6085	bra.b		case3_complete
6086
6087#
6088# case (d1 == 65)
6089#
6090#	---------------------------------------------------------
6091#	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
6092#	---------------------------------------------------------
6093#	<-------(32)------>
6094#	\	   	   \
6095#	 \	    	    \
6096#	  \	     	     \
6097#	   \	      	      ------------------------------
6098#	    --------------------------------		    \
6099#	     		       		    \		     \
6100#	      		 	     	     \		      \
6101#	       		  	 	      \		       \
6102#					       <-------(31)----->
6103#	---------------------------------------------------------
6104#	|0...............0|0................0|0rs		|
6105#	---------------------------------------------------------
6106#
6107case3_65:
6108	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
6109	and.l		&0x80000000, %d0	# extract R bit
6110	lsr.l		&0x1, %d0		# shift high bit into R bit
6111	and.l		&0x7fffffff, %d1	# extract other bits
6112
6113case3_complete:
6114# last operation done was an "and" of the bits shifted off so the condition
6115# codes are already set so branch accordingly.
6116	bne.b		case3_set_sticky	# yes; go set new sticky
6117	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
6118	bne.b		case3_set_sticky	# yes; go set new sticky
6119	tst.b		GRS(%a6)		# were any bits shifted off?
6120	bne.b		case3_set_sticky	# yes; go set new sticky
6121
6122#
6123# no bits were shifted off so don't set the sticky bit.
6124# the guard and
6125# the entire mantissa is zero.
6126#
6127	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6128	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6129	rts
6130
6131#
6132# some bits were shifted off so set the sticky bit.
6133# the entire mantissa is zero.
6134#
6135case3_set_sticky:
6136	bset		&rnd_stky_bit,%d0	# set new sticky bit
6137	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6138	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6139	rts
6140
6141#########################################################################
6142# XDEF ****************************************************************	#
6143#	_round(): round result according to precision/mode		#
6144#									#
6145# XREF ****************************************************************	#
6146#	None								#
6147#									#
6148# INPUT ***************************************************************	#
6149#	a0	  = ptr to input operand in internal extended format 	#
6150#	d1(hi)    = contains rounding precision:			#
6151#			ext = $0000xxxx					#
6152#			sgl = $0004xxxx					#
6153#			dbl = $0008xxxx					#
6154#	d1(lo)	  = contains rounding mode:				#
6155#			RN  = $xxxx0000					#
6156#			RZ  = $xxxx0001					#
6157#			RM  = $xxxx0002					#
6158#			RP  = $xxxx0003					#
6159#	d0{31:29} = contains the g,r,s bits (extended)			#
6160#									#
6161# OUTPUT **************************************************************	#
6162#	a0 = pointer to rounded result					#
6163#									#
6164# ALGORITHM ***********************************************************	#
6165#	On return the value pointed to by a0 is correctly rounded,	#
6166#	a0 is preserved and the g-r-s bits in d0 are cleared.		#
6167#	The result is not typed - the tag field is invalid.  The	#
6168#	result is still in the internal extended format.		#
6169#									#
6170#	The INEX bit of USER_FPSR will be set if the rounded result was	#
6171#	inexact (i.e. if any of the g-r-s bits were set).		#
6172#									#
6173#########################################################################
6174
6175	global		_round
6176_round:
6177#
6178# ext_grs() looks at the rounding precision and sets the appropriate
6179# G,R,S bits.
6180# If (G,R,S == 0) then result is exact and round is done, else set
6181# the inex flag in status reg and continue.
6182#
6183	bsr.l		ext_grs			# extract G,R,S
6184
6185	tst.l		%d0			# are G,R,S zero?
6186	beq.w		truncate		# yes; round is complete
6187
6188	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6189
6190#
6191# Use rounding mode as an index into a jump table for these modes.
6192# All of the following assumes grs != 0.
6193#
6194	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6195	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
6196
6197tbl_mode:
6198	short		rnd_near - tbl_mode
6199	short		truncate - tbl_mode	# RZ always truncates
6200	short		rnd_mnus - tbl_mode
6201	short		rnd_plus - tbl_mode
6202
6203#################################################################
6204#	ROUND PLUS INFINITY					#
6205#								#
6206#	If sign of fp number = 0 (positive), then add 1 to l.	#
6207#################################################################
6208rnd_plus:
6209	tst.b		FTEMP_SGN(%a0)		# check for sign
6210	bmi.w		truncate		# if positive then truncate
6211
6212	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
6213	swap		%d1			# set up d1 for round prec.
6214
6215	cmpi.b		%d1, &s_mode		# is prec = sgl?
6216	beq.w		add_sgl			# yes
6217	bgt.w		add_dbl			# no; it's dbl
6218	bra.w		add_ext			# no; it's ext
6219
6220#################################################################
6221#	ROUND MINUS INFINITY					#
6222#								#
6223#	If sign of fp number = 1 (negative), then add 1 to l.	#
6224#################################################################
6225rnd_mnus:
6226	tst.b		FTEMP_SGN(%a0)		# check for sign
6227	bpl.w		truncate		# if negative then truncate
6228
6229	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
6230	swap		%d1			# set up d1 for round prec.
6231
6232	cmpi.b		%d1, &s_mode		# is prec = sgl?
6233	beq.w		add_sgl			# yes
6234	bgt.w		add_dbl			# no; it's dbl
6235	bra.w		add_ext			# no; it's ext
6236
6237#################################################################
6238#	ROUND NEAREST						#
6239#								#
6240#	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
6241#	Note that this will round to even in case of a tie.	#
6242#################################################################
6243rnd_near:
6244	asl.l		&0x1, %d0		# shift g-bit to c-bit
6245	bcc.w		truncate		# if (g=1) then
6246
6247	swap		%d1			# set up d1 for round prec.
6248
6249	cmpi.b		%d1, &s_mode		# is prec = sgl?
6250	beq.w		add_sgl			# yes
6251	bgt.w		add_dbl			# no; it's dbl
6252	bra.w		add_ext			# no; it's ext
6253
6254# *** LOCAL EQUATES ***
6255set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
6256set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
6257
6258#########################
6259#	ADD SINGLE	#
6260#########################
6261add_sgl:
6262	add.l		&ad_1_sgl, FTEMP_HI(%a0)
6263	bcc.b		scc_clr			# no mantissa overflow
6264	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
6265	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
6266	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
6267scc_clr:
6268	tst.l		%d0			# test for rs = 0
6269	bne.b		sgl_done
6270	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6271sgl_done:
6272	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6273	clr.l		FTEMP_LO(%a0)		# clear d2
6274	rts
6275
6276#########################
6277#	ADD EXTENDED	#
6278#########################
6279add_ext:
6280	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
6281	bcc.b		xcc_clr			# test for carry out
6282	addq.l		&1,FTEMP_HI(%a0)	# propogate carry
6283	bcc.b		xcc_clr
6284	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
6285	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
6286	roxr.w		FTEMP_LO(%a0)
6287	roxr.w		FTEMP_LO+2(%a0)
6288	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
6289xcc_clr:
6290	tst.l		%d0			# test rs = 0
6291	bne.b		add_ext_done
6292	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
6293add_ext_done:
6294	rts
6295
6296#########################
6297#	ADD DOUBLE	#
6298#########################
6299add_dbl:
6300	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6301	bcc.b		dcc_clr			# no carry
6302	addq.l		&0x1, FTEMP_HI(%a0)	# propogate carry
6303	bcc.b		dcc_clr			# no carry
6304
6305	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
6306	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
6307	roxr.w		FTEMP_LO(%a0)
6308	roxr.w		FTEMP_LO+2(%a0)
6309	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
6310dcc_clr:
6311	tst.l		%d0			# test for rs = 0
6312	bne.b		dbl_done
6313	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6314
6315dbl_done:
6316	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6317	rts
6318
6319###########################
6320# Truncate all other bits #
6321###########################
6322truncate:
6323	swap		%d1			# select rnd prec
6324
6325	cmpi.b		%d1, &s_mode		# is prec sgl?
6326	beq.w		sgl_done		# yes
6327	bgt.b		dbl_done		# no; it's dbl
6328	rts					# no; it's ext
6329
6330
6331#
6332# ext_grs(): extract guard, round and sticky bits according to
6333#	     rounding precision.
6334#
6335# INPUT
6336#	d0	   = extended precision g,r,s (in d0{31:29})
6337#	d1 	   = {PREC,ROUND}
6338# OUTPUT
6339#	d0{31:29}  = guard, round, sticky
6340#
6341# The ext_grs extract the guard/round/sticky bits according to the
6342# selected rounding precision. It is called by the round subroutine
6343# only.  All registers except d0 are kept intact. d0 becomes an
6344# updated guard,round,sticky in d0{31:29}
6345#
6346# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6347#	 prior to usage, and needs to restore d1 to original. this
6348#	 routine is tightly tied to the round routine and not meant to
6349#	 uphold standard subroutine calling practices.
6350#
6351
6352ext_grs:
6353	swap		%d1			# have d1.w point to round precision
6354	tst.b		%d1			# is rnd prec = extended?
6355	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
6356
6357#
6358# %d0 actually already hold g,r,s since _round() had it before calling
6359# this function. so, as long as we don't disturb it, we are "returning" it.
6360#
6361ext_grs_ext:
6362	swap		%d1			# yes; return to correct positions
6363	rts
6364
6365ext_grs_not_ext:
6366	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
6367
6368	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
6369	bne.b		ext_grs_dbl		# no; go handle dbl
6370
6371#
6372# sgl:
6373#	96		64	  40	32		0
6374#	-----------------------------------------------------
6375#	| EXP	|XXXXXXX|	  |xx	|		|grs|
6376#	-----------------------------------------------------
6377#			<--(24)--->nn\			   /
6378#				   ee ---------------------
6379#				   ww		|
6380#						v
6381#				   gr	   new sticky
6382#
6383ext_grs_sgl:
6384	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6385	mov.l		&30, %d2		# of the sgl prec. limits
6386	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
6387	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
6388	and.l		&0x0000003f, %d2	# s bit is the or of all other
6389	bne.b		ext_grs_st_stky		# bits to the right of g-r
6390	tst.l		FTEMP_LO(%a0)		# test lower mantissa
6391	bne.b		ext_grs_st_stky		# if any are set, set sticky
6392	tst.l		%d0			# test original g,r,s
6393	bne.b		ext_grs_st_stky		# if any are set, set sticky
6394	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
6395
6396#
6397# dbl:
6398#	96		64	  	32	 11	0
6399#	-----------------------------------------------------
6400#	| EXP	|XXXXXXX|	  	|	 |xx	|grs|
6401#	-----------------------------------------------------
6402#						  nn\	    /
6403#						  ee -------
6404#						  ww	|
6405#							v
6406#						  gr	new sticky
6407#
6408ext_grs_dbl:
6409	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6410	mov.l		&30, %d2		# of the dbl prec. limits
6411	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
6412	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
6413	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
6414	bne.b		ext_grs_st_stky		# other bits to the right of g-r
6415	tst.l		%d0			# test word original g,r,s
6416	bne.b		ext_grs_st_stky		# if any are set, set sticky
6417	bra.b		ext_grs_end_sd		# if clear, exit
6418
6419ext_grs_st_stky:
6420	bset		&rnd_stky_bit, %d3	# set sticky bit
6421ext_grs_end_sd:
6422	mov.l		%d3, %d0		# return grs to d0
6423
6424	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
6425
6426	swap		%d1			# restore d1 to original
6427	rts
6428
6429#########################################################################
6430# norm(): normalize the mantissa of an extended precision input. the	#
6431#	  input operand should not be normalized already.		#
6432#									#
6433# XDEF ****************************************************************	#
6434#	norm()								#
6435#									#
6436# XREF **************************************************************** #
6437#	none								#
6438#									#
6439# INPUT *************************************************************** #
6440#	a0 = pointer fp extended precision operand to normalize		#
6441#									#
6442# OUTPUT ************************************************************** #
6443# 	d0 = number of bit positions the mantissa was shifted		#
6444#	a0 = the input operand's mantissa is normalized; the exponent	#
6445#	     is unchanged.						#
6446#									#
6447#########################################################################
6448	global		norm
6449norm:
6450	mov.l		%d2, -(%sp)		# create some temp regs
6451	mov.l		%d3, -(%sp)
6452
6453	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
6454	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
6455
6456	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
6457	beq.b		norm_lo			# hi(man) is all zeroes!
6458
6459norm_hi:
6460	lsl.l		%d2, %d0		# left shift hi(man)
6461	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
6462
6463	or.l		%d3, %d0		# create hi(man)
6464	lsl.l		%d2, %d1		# create lo(man)
6465
6466	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
6467	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
6468
6469	mov.l		%d2, %d0		# return shift amount
6470
6471	mov.l		(%sp)+, %d3		# restore temp regs
6472	mov.l		(%sp)+, %d2
6473
6474	rts
6475
6476norm_lo:
6477	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
6478	lsl.l		%d2, %d1		# shift lo(man)
6479	add.l		&32, %d2		# add 32 to shft amount
6480
6481	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
6482	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
6483
6484	mov.l		%d2, %d0		# return shift amount
6485
6486	mov.l		(%sp)+, %d3		# restore temp regs
6487	mov.l		(%sp)+, %d2
6488
6489	rts
6490
6491#########################################################################
6492# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
6493#		- returns corresponding optype tag			#
6494#									#
6495# XDEF ****************************************************************	#
6496#	unnorm_fix()							#
6497#									#
6498# XREF **************************************************************** #
6499#	norm() - normalize the mantissa					#
6500#									#
6501# INPUT *************************************************************** #
6502#	a0 = pointer to unnormalized extended precision number		#
6503#									#
6504# OUTPUT ************************************************************** #
6505#	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
6506#	a0 = input operand has been converted to a norm, denorm, or	#
6507#	     zero; both the exponent and mantissa are changed.		#
6508#									#
6509#########################################################################
6510
6511	global		unnorm_fix
6512unnorm_fix:
6513	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6514	bne.b		unnorm_shift		# hi(man) is not all zeroes
6515
6516#
6517# hi(man) is all zeroes so see if any bits in lo(man) are set
6518#
6519unnorm_chk_lo:
6520	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6521	beq.w		unnorm_zero		# yes
6522
6523	add.w		&32, %d0		# no; fix shift distance
6524
6525#
6526# d0 = # shifts needed for complete normalization
6527#
6528unnorm_shift:
6529	clr.l		%d1			# clear top word
6530	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
6531	and.w		&0x7fff, %d1		# strip off sgn
6532
6533	cmp.w		%d0, %d1		# will denorm push exp < 0?
6534	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
6535
6536#
6537# exponent would not go < 0. therefore, number stays normalized
6538#
6539	sub.w		%d0, %d1		# shift exponent value
6540	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
6541	and.w		&0x8000, %d0		# save old sign
6542	or.w		%d0, %d1		# {sgn,new exp}
6543	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
6544
6545	bsr.l		norm			# normalize UNNORM
6546
6547	mov.b		&NORM, %d0		# return new optype tag
6548	rts
6549
6550#
6551# exponent would go < 0, so only denormalize until exp = 0
6552#
6553unnorm_nrm_zero:
6554	cmp.b		%d1, &32		# is exp <= 32?
6555	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
6556
6557	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6558	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
6559
6560	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
6561	lsl.l		%d1, %d0		# extract new lo(man)
6562	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
6563
6564	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
6565
6566	mov.b		&DENORM, %d0		# return new optype tag
6567	rts
6568
6569#
6570# only mantissa bits set are in lo(man)
6571#
6572unnorm_nrm_zero_lrg:
6573	sub.w		&32, %d1		# adjust shft amt by 32
6574
6575	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
6576	lsl.l		%d1, %d0		# left shift lo(man)
6577
6578	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
6579	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
6580
6581	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
6582
6583	mov.b		&DENORM, %d0		# return new optype tag
6584	rts
6585
6586#
6587# whole mantissa is zero so this UNNORM is actually a zero
6588#
6589unnorm_zero:
6590	and.w		&0x8000, FTEMP_EX(%a0) 	# force exponent to zero
6591
6592	mov.b		&ZERO, %d0		# fix optype tag
6593	rts
6594
6595#########################################################################
6596# XDEF ****************************************************************	#
6597# 	set_tag_x(): return the optype of the input ext fp number	#
6598#									#
6599# XREF ****************************************************************	#
6600#	None								#
6601#									#
6602# INPUT ***************************************************************	#
6603#	a0 = pointer to extended precision operand			#
6604# 									#
6605# OUTPUT **************************************************************	#
6606#	d0 = value of type tag						#
6607# 		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
6608#									#
6609# ALGORITHM ***********************************************************	#
6610#	Simply test the exponent, j-bit, and mantissa values to 	#
6611# determine the type of operand.					#
6612#	If it's an unnormalized zero, alter the operand and force it	#
6613# to be a normal zero.							#
6614#									#
6615#########################################################################
6616
6617	global		set_tag_x
6618set_tag_x:
6619	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
6620	andi.w		&0x7fff, %d0		# strip off sign
6621	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
6622	beq.b		inf_or_nan_x
6623not_inf_or_nan_x:
6624	btst		&0x7,FTEMP_HI(%a0)
6625	beq.b		not_norm_x
6626is_norm_x:
6627	mov.b		&NORM, %d0
6628	rts
6629not_norm_x:
6630	tst.w		%d0			# is exponent = 0?
6631	bne.b		is_unnorm_x
6632not_unnorm_x:
6633	tst.l		FTEMP_HI(%a0)
6634	bne.b		is_denorm_x
6635	tst.l		FTEMP_LO(%a0)
6636	bne.b		is_denorm_x
6637is_zero_x:
6638	mov.b		&ZERO, %d0
6639	rts
6640is_denorm_x:
6641	mov.b		&DENORM, %d0
6642	rts
6643# must distinguish now "Unnormalized zeroes" which we
6644# must convert to zero.
6645is_unnorm_x:
6646	tst.l		FTEMP_HI(%a0)
6647	bne.b		is_unnorm_reg_x
6648	tst.l		FTEMP_LO(%a0)
6649	bne.b		is_unnorm_reg_x
6650# it's an "unnormalized zero". let's convert it to an actual zero...
6651	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
6652	mov.b		&ZERO, %d0
6653	rts
6654is_unnorm_reg_x:
6655	mov.b		&UNNORM, %d0
6656	rts
6657inf_or_nan_x:
6658	tst.l		FTEMP_LO(%a0)
6659	bne.b		is_nan_x
6660	mov.l		FTEMP_HI(%a0), %d0
6661	and.l		&0x7fffffff, %d0	# msb is a don't care!
6662	bne.b		is_nan_x
6663is_inf_x:
6664	mov.b		&INF, %d0
6665	rts
6666is_nan_x:
6667	btst		&0x6, FTEMP_HI(%a0)
6668	beq.b		is_snan_x
6669	mov.b		&QNAN, %d0
6670	rts
6671is_snan_x:
6672	mov.b		&SNAN, %d0
6673	rts
6674
6675#########################################################################
6676# XDEF ****************************************************************	#
6677# 	set_tag_d(): return the optype of the input dbl fp number	#
6678#									#
6679# XREF ****************************************************************	#
6680#	None								#
6681#									#
6682# INPUT ***************************************************************	#
6683#	a0 = points to double precision operand				#
6684# 									#
6685# OUTPUT **************************************************************	#
6686#	d0 = value of type tag						#
6687# 		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
6688#									#
6689# ALGORITHM ***********************************************************	#
6690#	Simply test the exponent, j-bit, and mantissa values to 	#
6691# determine the type of operand.					#
6692#									#
6693#########################################################################
6694
6695	global		set_tag_d
6696set_tag_d:
6697	mov.l		FTEMP(%a0), %d0
6698	mov.l		%d0, %d1
6699
6700	andi.l		&0x7ff00000, %d0
6701	beq.b		zero_or_denorm_d
6702
6703	cmpi.l		%d0, &0x7ff00000
6704	beq.b		inf_or_nan_d
6705
6706is_norm_d:
6707	mov.b		&NORM, %d0
6708	rts
6709zero_or_denorm_d:
6710	and.l		&0x000fffff, %d1
6711	bne		is_denorm_d
6712	tst.l		4+FTEMP(%a0)
6713	bne		is_denorm_d
6714is_zero_d:
6715	mov.b		&ZERO, %d0
6716	rts
6717is_denorm_d:
6718	mov.b		&DENORM, %d0
6719	rts
6720inf_or_nan_d:
6721	and.l		&0x000fffff, %d1
6722	bne		is_nan_d
6723	tst.l		4+FTEMP(%a0)
6724	bne		is_nan_d
6725is_inf_d:
6726	mov.b		&INF, %d0
6727	rts
6728is_nan_d:
6729	btst		&19, %d1
6730	bne		is_qnan_d
6731is_snan_d:
6732	mov.b		&SNAN, %d0
6733	rts
6734is_qnan_d:
6735	mov.b		&QNAN, %d0
6736	rts
6737
6738#########################################################################
6739# XDEF ****************************************************************	#
6740# 	set_tag_s(): return the optype of the input sgl fp number	#
6741#									#
6742# XREF ****************************************************************	#
6743#	None								#
6744#									#
6745# INPUT ***************************************************************	#
6746#	a0 = pointer to single precision operand			#
6747# 									#
6748# OUTPUT **************************************************************	#
6749#	d0 = value of type tag						#
6750# 		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
6751#									#
6752# ALGORITHM ***********************************************************	#
6753#	Simply test the exponent, j-bit, and mantissa values to 	#
6754# determine the type of operand.					#
6755#									#
6756#########################################################################
6757
6758	global		set_tag_s
6759set_tag_s:
6760	mov.l		FTEMP(%a0), %d0
6761	mov.l		%d0, %d1
6762
6763	andi.l		&0x7f800000, %d0
6764	beq.b		zero_or_denorm_s
6765
6766	cmpi.l		%d0, &0x7f800000
6767	beq.b		inf_or_nan_s
6768
6769is_norm_s:
6770	mov.b		&NORM, %d0
6771	rts
6772zero_or_denorm_s:
6773	and.l		&0x007fffff, %d1
6774	bne		is_denorm_s
6775is_zero_s:
6776	mov.b		&ZERO, %d0
6777	rts
6778is_denorm_s:
6779	mov.b		&DENORM, %d0
6780	rts
6781inf_or_nan_s:
6782	and.l		&0x007fffff, %d1
6783	bne		is_nan_s
6784is_inf_s:
6785	mov.b		&INF, %d0
6786	rts
6787is_nan_s:
6788	btst		&22, %d1
6789	bne		is_qnan_s
6790is_snan_s:
6791	mov.b		&SNAN, %d0
6792	rts
6793is_qnan_s:
6794	mov.b		&QNAN, %d0
6795	rts
6796
6797#########################################################################
6798# XDEF ****************************************************************	#
6799# 	unf_res(): routine to produce default underflow result of a 	#
6800#	 	   scaled extended precision number; this is used by 	#
6801#		   fadd/fdiv/fmul/etc. emulation routines.		#
6802# 	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
6803#		    single round prec and extended prec mode.		#
6804#									#
6805# XREF ****************************************************************	#
6806#	_denorm() - denormalize according to scale factor		#
6807# 	_round() - round denormalized number according to rnd prec	#
6808#									#
6809# INPUT ***************************************************************	#
6810#	a0 = pointer to extended precison operand			#
6811#	d0 = scale factor						#
6812#	d1 = rounding precision/mode					#
6813#									#
6814# OUTPUT **************************************************************	#
6815#	a0 = pointer to default underflow result in extended precision	#
6816#	d0.b = result FPSR_cc which caller may or may not want to save	#
6817#									#
6818# ALGORITHM ***********************************************************	#
6819# 	Convert the input operand to "internal format" which means the	#
6820# exponent is extended to 16 bits and the sign is stored in the unused	#
6821# portion of the extended precison operand. Denormalize the number	#
6822# according to the scale factor passed in d0. Then, round the 		#
6823# denormalized result.							#
6824# 	Set the FPSR_exc bits as appropriate but return the cc bits in	#
6825# d0 in case the caller doesn't want to save them (as is the case for	#
6826# fmove out).								#
6827# 	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
6828# precision and the rounding mode to single.				#
6829#									#
6830#########################################################################
6831	global		unf_res
6832unf_res:
6833	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
6834
6835	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
6836	sne		FTEMP_SGN(%a0)
6837
6838	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
6839	and.w		&0x7fff, %d1
6840	sub.w		%d0, %d1
6841	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
6842
6843	mov.l		%a0, -(%sp)		# save operand ptr during calls
6844
6845	mov.l		0x4(%sp),%d0		# pass rnd prec.
6846	andi.w		&0x00c0,%d0
6847	lsr.w		&0x4,%d0
6848	bsr.l		_denorm			# denorm result
6849
6850	mov.l		(%sp),%a0
6851	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
6852	andi.w		&0xc0,%d1		# extract rnd prec
6853	lsr.w		&0x4,%d1
6854	swap		%d1
6855	mov.w		0x6(%sp),%d1
6856	andi.w		&0x30,%d1
6857	lsr.w		&0x4,%d1
6858	bsr.l		_round			# round the denorm
6859
6860	mov.l		(%sp)+, %a0
6861
6862# result is now rounded properly. convert back to normal format
6863	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
6864	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
6865	beq.b		unf_res_chkifzero	# no; result is positive
6866	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
6867	clr.b		FTEMP_SGN(%a0)		# clear temp sign
6868
6869# the number may have become zero after rounding. set ccodes accordingly.
6870unf_res_chkifzero:
6871	clr.l		%d0
6872	tst.l		FTEMP_HI(%a0)		# is value now a zero?
6873	bne.b		unf_res_cont		# no
6874	tst.l		FTEMP_LO(%a0)
6875	bne.b		unf_res_cont		# no
6876#	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
6877	bset		&z_bit, %d0		# yes; set zero ccode bit
6878
6879unf_res_cont:
6880
6881#
6882# can inex1 also be set along with unfl and inex2???
6883#
6884# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6885#
6886	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6887	beq.b		unf_res_end		# no
6888	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6889
6890unf_res_end:
6891	add.l		&0x4, %sp		# clear stack
6892	rts
6893
6894# unf_res() for fsglmul() and fsgldiv().
6895	global		unf_res4
6896unf_res4:
6897	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
6898
6899	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
6900	sne		FTEMP_SGN(%a0)
6901
6902	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
6903	and.w		&0x7fff,%d1
6904	sub.w		%d0,%d1
6905	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
6906
6907	mov.l		%a0,-(%sp)		# save operand ptr during calls
6908
6909	clr.l		%d0			# force rnd prec = ext
6910	bsr.l		_denorm			# denorm result
6911
6912	mov.l		(%sp),%a0
6913	mov.w		&s_mode,%d1		# force rnd prec = sgl
6914	swap		%d1
6915	mov.w		0x6(%sp),%d1		# load rnd mode
6916	andi.w		&0x30,%d1		# extract rnd prec
6917	lsr.w		&0x4,%d1
6918	bsr.l		_round			# round the denorm
6919
6920	mov.l		(%sp)+,%a0
6921
6922# result is now rounded properly. convert back to normal format
6923	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
6924	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
6925	beq.b		unf_res4_chkifzero	# no; result is positive
6926	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
6927	clr.b		FTEMP_SGN(%a0)		# clear temp sign
6928
6929# the number may have become zero after rounding. set ccodes accordingly.
6930unf_res4_chkifzero:
6931	clr.l		%d0
6932	tst.l		FTEMP_HI(%a0)		# is value now a zero?
6933	bne.b		unf_res4_cont		# no
6934	tst.l		FTEMP_LO(%a0)
6935	bne.b		unf_res4_cont		# no
6936#	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
6937	bset		&z_bit,%d0		# yes; set zero ccode bit
6938
6939unf_res4_cont:
6940
6941#
6942# can inex1 also be set along with unfl and inex2???
6943#
6944# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6945#
6946	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6947	beq.b		unf_res4_end		# no
6948	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6949
6950unf_res4_end:
6951	add.l		&0x4,%sp		# clear stack
6952	rts
6953
6954#########################################################################
6955# XDEF ****************************************************************	#
6956#	ovf_res(): routine to produce the default overflow result of	#
6957#		   an overflowing number.				#
6958#	ovf_res2(): same as above but the rnd mode/prec are passed	#
6959#		    differently.					#
6960#									#
6961# XREF ****************************************************************	#
6962#	none								#
6963#									#
6964# INPUT ***************************************************************	#
6965#	d1.b 	= '-1' => (-); '0' => (+)				#
6966#   ovf_res():								#
6967#	d0 	= rnd mode/prec						#
6968#   ovf_res2():								#
6969#	hi(d0) 	= rnd prec						#
6970#	lo(d0)	= rnd mode						#
6971#									#
6972# OUTPUT **************************************************************	#
6973#	a0   	= points to extended precision result			#
6974#	d0.b 	= condition code bits					#
6975#									#
6976# ALGORITHM ***********************************************************	#
6977#	The default overflow result can be determined by the sign of	#
6978# the result and the rounding mode/prec in effect. These bits are	#
6979# concatenated together to create an index into the default result 	#
6980# table. A pointer to the correct result is returned in a0. The		#
6981# resulting condition codes are returned in d0 in case the caller 	#
6982# doesn't want FPSR_cc altered (as is the case for fmove out).		#
6983#									#
6984#########################################################################
6985
6986	global		ovf_res
6987ovf_res:
6988	andi.w		&0x10,%d1		# keep result sign
6989	lsr.b		&0x4,%d0		# shift prec/mode
6990	or.b		%d0,%d1			# concat the two
6991	mov.w		%d1,%d0			# make a copy
6992	lsl.b		&0x1,%d1		# multiply d1 by 2
6993	bra.b		ovf_res_load
6994
6995	global		ovf_res2
6996ovf_res2:
6997	and.w		&0x10, %d1		# keep result sign
6998	or.b		%d0, %d1		# insert rnd mode
6999	swap		%d0
7000	or.b		%d0, %d1		# insert rnd prec
7001	mov.w		%d1, %d0		# make a copy
7002	lsl.b		&0x1, %d1		# shift left by 1
7003
7004#
7005# use the rounding mode, precision, and result sign as in index into the
7006# two tables below to fetch the default result and the result ccodes.
7007#
7008ovf_res_load:
7009	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7010	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7011
7012	rts
7013
7014tbl_ovfl_cc:
7015	byte		0x2, 0x0, 0x0, 0x2
7016	byte		0x2, 0x0, 0x0, 0x2
7017	byte		0x2, 0x0, 0x0, 0x2
7018	byte		0x0, 0x0, 0x0, 0x0
7019	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7020	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7021	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7022
7023tbl_ovfl_result:
7024	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7025	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7026	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7027	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7028
7029	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7030	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7031	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7032	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7033
7034	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7035	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7036	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7037	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7038
7039	long		0x00000000,0x00000000,0x00000000,0x00000000
7040	long		0x00000000,0x00000000,0x00000000,0x00000000
7041	long		0x00000000,0x00000000,0x00000000,0x00000000
7042	long		0x00000000,0x00000000,0x00000000,0x00000000
7043
7044	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7045	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7046	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7047	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7048
7049	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7050	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7051	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7052	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7053
7054	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7055	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7056	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7057	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7058
7059#########################################################################
7060# XDEF ****************************************************************	#
7061# 	fout(): move from fp register to memory or data register	#
7062#									#
7063# XREF ****************************************************************	#
7064#	_round() - needed to create EXOP for sgl/dbl precision		#
7065#	norm() - needed to create EXOP for extended precision		#
7066#	ovf_res() - create default overflow result for sgl/dbl precision#
7067#	unf_res() - create default underflow result for sgl/dbl prec.	#
7068#	dst_dbl() - create rounded dbl precision result.		#
7069#	dst_sgl() - create rounded sgl precision result.		#
7070#	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
7071#	bindec() - convert FP binary number to packed number.		#
7072#	_mem_write() - write data to memory.				#
7073#	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
7074#	_dmem_write_{byte,word,long}() - write data to memory.		#
7075#	store_dreg_{b,w,l}() - store data to data register file.	#
7076#	facc_out_{b,w,l,d,x}() - data access error occurred.		#
7077#									#
7078# INPUT ***************************************************************	#
7079#	a0 = pointer to extended precision source operand		#
7080#	d0 = round prec,mode						#
7081# 									#
7082# OUTPUT **************************************************************	#
7083#	fp0 : intermediate underflow or overflow result if		#
7084#	      OVFL/UNFL occurred for a sgl or dbl operand		#
7085#									#
7086# ALGORITHM ***********************************************************	#
7087#	This routine is accessed by many handlers that need to do an	#
7088# opclass three move of an operand out to memory.			#
7089#	Decode an fmove out (opclass 3) instruction to determine if	#
7090# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
7091# register or memory. The algorithm uses a standard "fmove" to create	#
7092# the rounded result. Also, since exceptions are disabled, this also	#
7093# create the correct OPERR default result if appropriate.		#
7094#	For sgl or dbl precision, overflow or underflow can occur. If	#
7095# either occurs and is enabled, the EXOP.				#
7096#	For extended precision, the stacked <ea> must be fixed along	#
7097# w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
7098# the source is a denorm and if underflow is enabled, an EXOP must be	#
7099# created.								#
7100# 	For packed, the k-factor must be fetched from the instruction	#
7101# word or a data register. The <ea> must be fixed as w/ extended 	#
7102# precision. Then, bindec() is called to create the appropriate 	#
7103# packed result.							#
7104#	If at any time an access error is flagged by one of the move-	#
7105# to-memory routines, then a special exit must be made so that the	#
7106# access error can be handled properly.					#
7107#									#
7108#########################################################################
7109
7110	global		fout
7111fout:
7112	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7113	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7114	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
7115
7116	swbeg		&0x8
7117tbl_fout:
7118	short		fout_long	-	tbl_fout
7119	short		fout_sgl	-	tbl_fout
7120	short		fout_ext	-	tbl_fout
7121	short		fout_pack	-	tbl_fout
7122	short		fout_word	-	tbl_fout
7123	short		fout_dbl	-	tbl_fout
7124	short		fout_byte	-	tbl_fout
7125	short		fout_pack	-	tbl_fout
7126
7127#################################################################
7128# fmove.b out ###################################################
7129#################################################################
7130
7131# Only "Unimplemented Data Type" exceptions enter here. The operand
7132# is either a DENORM or a NORM.
7133fout_byte:
7134	tst.b		STAG(%a6)		# is operand normalized?
7135	bne.b		fout_byte_denorm	# no
7136
7137	fmovm.x		SRC(%a0),&0x80		# load value
7138
7139fout_byte_norm:
7140	fmov.l		%d0,%fpcr		# insert rnd prec,mode
7141
7142	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
7143
7144	fmov.l		&0x0,%fpcr		# clear FPCR
7145	fmov.l		%fpsr,%d1		# fetch FPSR
7146	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7147
7148	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7149	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7150	beq.b		fout_byte_dn		# must save to integer regfile
7151
7152	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7153	bsr.l		_dmem_write_byte	# write byte
7154
7155	tst.l		%d1			# did dstore fail?
7156	bne.l		facc_out_b		# yes
7157
7158	rts
7159
7160fout_byte_dn:
7161	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7162	andi.w		&0x7,%d1
7163	bsr.l		store_dreg_b
7164	rts
7165
7166fout_byte_denorm:
7167	mov.l		SRC_EX(%a0),%d1
7168	andi.l		&0x80000000,%d1		# keep DENORM sign
7169	ori.l		&0x00800000,%d1		# make smallest sgl
7170	fmov.s		%d1,%fp0
7171	bra.b		fout_byte_norm
7172
7173#################################################################
7174# fmove.w out ###################################################
7175#################################################################
7176
7177# Only "Unimplemented Data Type" exceptions enter here. The operand
7178# is either a DENORM or a NORM.
7179fout_word:
7180	tst.b		STAG(%a6)		# is operand normalized?
7181	bne.b		fout_word_denorm	# no
7182
7183	fmovm.x		SRC(%a0),&0x80		# load value
7184
7185fout_word_norm:
7186	fmov.l		%d0,%fpcr		# insert rnd prec:mode
7187
7188	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
7189
7190	fmov.l		&0x0,%fpcr		# clear FPCR
7191	fmov.l		%fpsr,%d1		# fetch FPSR
7192	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7193
7194	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7195	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7196	beq.b		fout_word_dn		# must save to integer regfile
7197
7198	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7199	bsr.l		_dmem_write_word	# write word
7200
7201	tst.l		%d1			# did dstore fail?
7202	bne.l		facc_out_w		# yes
7203
7204	rts
7205
7206fout_word_dn:
7207	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7208	andi.w		&0x7,%d1
7209	bsr.l		store_dreg_w
7210	rts
7211
7212fout_word_denorm:
7213	mov.l		SRC_EX(%a0),%d1
7214	andi.l		&0x80000000,%d1		# keep DENORM sign
7215	ori.l		&0x00800000,%d1		# make smallest sgl
7216	fmov.s		%d1,%fp0
7217	bra.b		fout_word_norm
7218
7219#################################################################
7220# fmove.l out ###################################################
7221#################################################################
7222
7223# Only "Unimplemented Data Type" exceptions enter here. The operand
7224# is either a DENORM or a NORM.
7225fout_long:
7226	tst.b		STAG(%a6)		# is operand normalized?
7227	bne.b		fout_long_denorm	# no
7228
7229	fmovm.x		SRC(%a0),&0x80		# load value
7230
7231fout_long_norm:
7232	fmov.l		%d0,%fpcr		# insert rnd prec:mode
7233
7234	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
7235
7236	fmov.l		&0x0,%fpcr		# clear FPCR
7237	fmov.l		%fpsr,%d1		# fetch FPSR
7238	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7239
7240fout_long_write:
7241	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7242	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7243	beq.b		fout_long_dn		# must save to integer regfile
7244
7245	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7246	bsr.l		_dmem_write_long	# write long
7247
7248	tst.l		%d1			# did dstore fail?
7249	bne.l		facc_out_l		# yes
7250
7251	rts
7252
7253fout_long_dn:
7254	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7255	andi.w		&0x7,%d1
7256	bsr.l		store_dreg_l
7257	rts
7258
7259fout_long_denorm:
7260	mov.l		SRC_EX(%a0),%d1
7261	andi.l		&0x80000000,%d1		# keep DENORM sign
7262	ori.l		&0x00800000,%d1		# make smallest sgl
7263	fmov.s		%d1,%fp0
7264	bra.b		fout_long_norm
7265
7266#################################################################
7267# fmove.x out ###################################################
7268#################################################################
7269
7270# Only "Unimplemented Data Type" exceptions enter here. The operand
7271# is either a DENORM or a NORM.
7272# The DENORM causes an Underflow exception.
7273fout_ext:
7274
7275# we copy the extended precision result to FP_SCR0 so that the reserved
7276# 16-bit field gets zeroed. we do this since we promise not to disturb
7277# what's at SRC(a0).
7278	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7279	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
7280	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7281	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7282
7283	fmovm.x		SRC(%a0),&0x80		# return result
7284
7285	bsr.l		_calc_ea_fout		# fix stacked <ea>
7286
7287	mov.l		%a0,%a1			# pass: dst addr
7288	lea		FP_SCR0(%a6),%a0	# pass: src addr
7289	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
7290
7291# we must not yet write the extended precision data to the stack
7292# in the pre-decrement case from supervisor mode or else we'll corrupt
7293# the stack frame. so, leave it in FP_SRC for now and deal with it later...
7294	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
7295	beq.b		fout_ext_a7
7296
7297	bsr.l		_dmem_write		# write ext prec number to memory
7298
7299	tst.l		%d1			# did dstore fail?
7300	bne.w		fout_ext_err		# yes
7301
7302	tst.b		STAG(%a6)		# is operand normalized?
7303	bne.b		fout_ext_denorm		# no
7304	rts
7305
7306# the number is a DENORM. must set the underflow exception bit
7307fout_ext_denorm:
7308	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7309
7310	mov.b		FPCR_ENABLE(%a6),%d0
7311	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
7312	bne.b		fout_ext_exc		# yes
7313	rts
7314
7315# we don't want to do the write if the exception occurred in supervisor mode
7316# so _mem_write2() handles this for us.
7317fout_ext_a7:
7318	bsr.l		_mem_write2		# write ext prec number to memory
7319
7320	tst.l		%d1			# did dstore fail?
7321	bne.w		fout_ext_err		# yes
7322
7323	tst.b		STAG(%a6)		# is operand normalized?
7324	bne.b		fout_ext_denorm		# no
7325	rts
7326
7327fout_ext_exc:
7328	lea		FP_SCR0(%a6),%a0
7329	bsr.l		norm			# normalize the mantissa
7330	neg.w		%d0			# new exp = -(shft amt)
7331	andi.w		&0x7fff,%d0
7332	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
7333	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
7334	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
7335	rts
7336
7337fout_ext_err:
7338	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
7339	bra.l		facc_out_x
7340
7341#########################################################################
7342# fmove.s out ###########################################################
7343#########################################################################
7344fout_sgl:
7345	andi.b		&0x30,%d0		# clear rnd prec
7346	ori.b		&s_mode*0x10,%d0	# insert sgl prec
7347	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
7348
7349#
7350# operand is a normalized number. first, we check to see if the move out
7351# would cause either an underflow or overflow. these cases are handled
7352# separately. otherwise, set the FPCR to the proper rounding mode and
7353# execute the move.
7354#
7355	mov.w		SRC_EX(%a0),%d0		# extract exponent
7356	andi.w		&0x7fff,%d0		# strip sign
7357
7358	cmpi.w		%d0,&SGL_HI		# will operand overflow?
7359	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
7360	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
7361	cmpi.w		%d0,&SGL_LO		# will operand underflow?
7362	blt.w		fout_sgl_unfl		# yes; go handle underflow
7363
7364#
7365# NORMs(in range) can be stored out by a simple "fmov.s"
7366# Unnormalized inputs can come through this point.
7367#
7368fout_sgl_exg:
7369	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
7370
7371	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7372	fmov.l		&0x0,%fpsr		# clear FPSR
7373
7374	fmov.s		%fp0,%d0		# store does convert and round
7375
7376	fmov.l		&0x0,%fpcr		# clear FPCR
7377	fmov.l		%fpsr,%d1		# save FPSR
7378
7379	or.w		%d1,2+USER_FPSR(%a6) 	# set possible inex2/ainex
7380
7381fout_sgl_exg_write:
7382	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7383	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7384	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
7385
7386	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7387	bsr.l		_dmem_write_long	# write long
7388
7389	tst.l		%d1			# did dstore fail?
7390	bne.l		facc_out_l		# yes
7391
7392	rts
7393
7394fout_sgl_exg_write_dn:
7395	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7396	andi.w		&0x7,%d1
7397	bsr.l		store_dreg_l
7398	rts
7399
7400#
7401# here, we know that the operand would UNFL if moved out to single prec,
7402# so, denorm and round and then use generic store single routine to
7403# write the value to memory.
7404#
7405fout_sgl_unfl:
7406	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7407
7408	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7409	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7410	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7411	mov.l		%a0,-(%sp)
7412
7413	clr.l		%d0			# pass: S.F. = 0
7414
7415	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
7416	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
7417
7418	lea		FP_SCR0(%a6),%a0
7419	bsr.l		norm			# normalize the DENORM
7420
7421fout_sgl_unfl_cont:
7422	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
7423	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
7424	bsr.l		unf_res			# calc default underflow result
7425
7426	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
7427	bsr.l		dst_sgl			# convert to single prec
7428
7429	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7430	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7431	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
7432
7433	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7434	bsr.l		_dmem_write_long	# write long
7435
7436	tst.l		%d1			# did dstore fail?
7437	bne.l		facc_out_l		# yes
7438
7439	bra.b		fout_sgl_unfl_chkexc
7440
7441fout_sgl_unfl_dn:
7442	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7443	andi.w		&0x7,%d1
7444	bsr.l		store_dreg_l
7445
7446fout_sgl_unfl_chkexc:
7447	mov.b		FPCR_ENABLE(%a6),%d1
7448	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7449	bne.w		fout_sd_exc_unfl	# yes
7450	addq.l		&0x4,%sp
7451	rts
7452
7453#
7454# it's definitely an overflow so call ovf_res to get the correct answer
7455#
7456fout_sgl_ovfl:
7457	tst.b		3+SRC_HI(%a0)		# is result inexact?
7458	bne.b		fout_sgl_ovfl_inex2
7459	tst.l		SRC_LO(%a0)		# is result inexact?
7460	bne.b		fout_sgl_ovfl_inex2
7461	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7462	bra.b		fout_sgl_ovfl_cont
7463fout_sgl_ovfl_inex2:
7464	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7465
7466fout_sgl_ovfl_cont:
7467	mov.l		%a0,-(%sp)
7468
7469# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7470# overflow result. DON'T save the returned ccodes from ovf_res() since
7471# fmove out doesn't alter them.
7472	tst.b		SRC_EX(%a0)		# is operand negative?
7473	smi		%d1			# set if so
7474	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
7475	bsr.l		ovf_res			# calc OVFL result
7476	fmovm.x		(%a0),&0x80		# load default overflow result
7477	fmov.s		%fp0,%d0		# store to single
7478
7479	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7480	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7481	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
7482
7483	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7484	bsr.l		_dmem_write_long	# write long
7485
7486	tst.l		%d1			# did dstore fail?
7487	bne.l		facc_out_l		# yes
7488
7489	bra.b		fout_sgl_ovfl_chkexc
7490
7491fout_sgl_ovfl_dn:
7492	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7493	andi.w		&0x7,%d1
7494	bsr.l		store_dreg_l
7495
7496fout_sgl_ovfl_chkexc:
7497	mov.b		FPCR_ENABLE(%a6),%d1
7498	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7499	bne.w		fout_sd_exc_ovfl	# yes
7500	addq.l		&0x4,%sp
7501	rts
7502
7503#
7504# move out MAY overflow:
7505# (1) force the exp to 0x3fff
7506# (2) do a move w/ appropriate rnd mode
7507# (3) if exp still equals zero, then insert original exponent
7508#	for the correct result.
7509#     if exp now equals one, then it overflowed so call ovf_res.
7510#
7511fout_sgl_may_ovfl:
7512	mov.w		SRC_EX(%a0),%d1		# fetch current sign
7513	andi.w		&0x8000,%d1		# keep it,clear exp
7514	ori.w		&0x3fff,%d1		# insert exp = 0
7515	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
7516	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7517	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7518
7519	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7520
7521	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
7522	fmov.l		&0x0,%fpcr		# clear FPCR
7523
7524	fabs.x		%fp0			# need absolute value
7525	fcmp.b		%fp0,&0x2		# did exponent increase?
7526	fblt.w		fout_sgl_exg		# no; go finish NORM
7527	bra.w		fout_sgl_ovfl		# yes; go handle overflow
7528
7529################
7530
7531fout_sd_exc_unfl:
7532	mov.l		(%sp)+,%a0
7533
7534	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7535	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7536	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7537
7538	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
7539	bne.b		fout_sd_exc_cont	# no
7540
7541	lea		FP_SCR0(%a6),%a0
7542	bsr.l		norm
7543	neg.l		%d0
7544	andi.w		&0x7fff,%d0
7545	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
7546	bra.b		fout_sd_exc_cont
7547
7548fout_sd_exc:
7549fout_sd_exc_ovfl:
7550	mov.l		(%sp)+,%a0		# restore a0
7551
7552	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7553	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7554	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7555
7556fout_sd_exc_cont:
7557	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
7558	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
7559	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
7560
7561	mov.b		3+L_SCR3(%a6),%d1
7562	lsr.b		&0x4,%d1
7563	andi.w		&0x0c,%d1
7564	swap		%d1
7565	mov.b		3+L_SCR3(%a6),%d1
7566	lsr.b		&0x4,%d1
7567	andi.w		&0x03,%d1
7568	clr.l		%d0			# pass: zero g,r,s
7569	bsr.l		_round			# round the DENORM
7570
7571	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
7572	beq.b		fout_sd_exc_done	# no
7573	bset		&0x7,FP_SCR0_EX(%a6)	# yes
7574
7575fout_sd_exc_done:
7576	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
7577	rts
7578
7579#################################################################
7580# fmove.d out ###################################################
7581#################################################################
7582fout_dbl:
7583	andi.b		&0x30,%d0		# clear rnd prec
7584	ori.b		&d_mode*0x10,%d0	# insert dbl prec
7585	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
7586
7587#
7588# operand is a normalized number. first, we check to see if the move out
7589# would cause either an underflow or overflow. these cases are handled
7590# separately. otherwise, set the FPCR to the proper rounding mode and
7591# execute the move.
7592#
7593	mov.w		SRC_EX(%a0),%d0		# extract exponent
7594	andi.w		&0x7fff,%d0		# strip sign
7595
7596	cmpi.w		%d0,&DBL_HI		# will operand overflow?
7597	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
7598	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
7599	cmpi.w		%d0,&DBL_LO		# will operand underflow?
7600	blt.w		fout_dbl_unfl		# yes; go handle underflow
7601
7602#
7603# NORMs(in range) can be stored out by a simple "fmov.d"
7604# Unnormalized inputs can come through this point.
7605#
7606fout_dbl_exg:
7607	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
7608
7609	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7610	fmov.l		&0x0,%fpsr		# clear FPSR
7611
7612	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
7613
7614	fmov.l		&0x0,%fpcr		# clear FPCR
7615	fmov.l		%fpsr,%d0		# save FPSR
7616
7617	or.w		%d0,2+USER_FPSR(%a6) 	# set possible inex2/ainex
7618
7619	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7620	lea		L_SCR1(%a6),%a0		# pass: src addr
7621	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7622	bsr.l		_dmem_write		# store dbl fop to memory
7623
7624	tst.l		%d1			# did dstore fail?
7625	bne.l		facc_out_d		# yes
7626
7627	rts					# no; so we're finished
7628
7629#
7630# here, we know that the operand would UNFL if moved out to double prec,
7631# so, denorm and round and then use generic store double routine to
7632# write the value to memory.
7633#
7634fout_dbl_unfl:
7635	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7636
7637	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7638	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7639	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7640	mov.l		%a0,-(%sp)
7641
7642	clr.l		%d0			# pass: S.F. = 0
7643
7644	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
7645	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
7646
7647	lea		FP_SCR0(%a6),%a0
7648	bsr.l		norm			# normalize the DENORM
7649
7650fout_dbl_unfl_cont:
7651	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
7652	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
7653	bsr.l		unf_res			# calc default underflow result
7654
7655	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
7656	bsr.l		dst_dbl			# convert to single prec
7657	mov.l		%d0,L_SCR1(%a6)
7658	mov.l		%d1,L_SCR2(%a6)
7659
7660	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7661	lea		L_SCR1(%a6),%a0		# pass: src addr
7662	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7663	bsr.l		_dmem_write		# store dbl fop to memory
7664
7665	tst.l		%d1			# did dstore fail?
7666	bne.l		facc_out_d		# yes
7667
7668	mov.b		FPCR_ENABLE(%a6),%d1
7669	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7670	bne.w		fout_sd_exc_unfl	# yes
7671	addq.l		&0x4,%sp
7672	rts
7673
7674#
7675# it's definitely an overflow so call ovf_res to get the correct answer
7676#
7677fout_dbl_ovfl:
7678	mov.w		2+SRC_LO(%a0),%d0
7679	andi.w		&0x7ff,%d0
7680	bne.b		fout_dbl_ovfl_inex2
7681
7682	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7683	bra.b		fout_dbl_ovfl_cont
7684fout_dbl_ovfl_inex2:
7685	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7686
7687fout_dbl_ovfl_cont:
7688	mov.l		%a0,-(%sp)
7689
7690# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7691# overflow result. DON'T save the returned ccodes from ovf_res() since
7692# fmove out doesn't alter them.
7693	tst.b		SRC_EX(%a0)		# is operand negative?
7694	smi		%d1			# set if so
7695	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
7696	bsr.l		ovf_res			# calc OVFL result
7697	fmovm.x		(%a0),&0x80		# load default overflow result
7698	fmov.d		%fp0,L_SCR1(%a6)	# store to double
7699
7700	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7701	lea		L_SCR1(%a6),%a0		# pass: src addr
7702	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7703	bsr.l		_dmem_write		# store dbl fop to memory
7704
7705	tst.l		%d1			# did dstore fail?
7706	bne.l		facc_out_d		# yes
7707
7708	mov.b		FPCR_ENABLE(%a6),%d1
7709	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7710	bne.w		fout_sd_exc_ovfl	# yes
7711	addq.l		&0x4,%sp
7712	rts
7713
7714#
7715# move out MAY overflow:
7716# (1) force the exp to 0x3fff
7717# (2) do a move w/ appropriate rnd mode
7718# (3) if exp still equals zero, then insert original exponent
7719#	for the correct result.
7720#     if exp now equals one, then it overflowed so call ovf_res.
7721#
7722fout_dbl_may_ovfl:
7723	mov.w		SRC_EX(%a0),%d1		# fetch current sign
7724	andi.w		&0x8000,%d1		# keep it,clear exp
7725	ori.w		&0x3fff,%d1		# insert exp = 0
7726	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
7727	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7728	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7729
7730	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7731
7732	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
7733	fmov.l		&0x0,%fpcr		# clear FPCR
7734
7735	fabs.x		%fp0			# need absolute value
7736	fcmp.b		%fp0,&0x2		# did exponent increase?
7737	fblt.w		fout_dbl_exg		# no; go finish NORM
7738	bra.w		fout_dbl_ovfl		# yes; go handle overflow
7739
7740#########################################################################
7741# XDEF ****************************************************************	#
7742# 	dst_dbl(): create double precision value from extended prec.	#
7743#									#
7744# XREF ****************************************************************	#
7745#	None								#
7746#									#
7747# INPUT ***************************************************************	#
7748#	a0 = pointer to source operand in extended precision		#
7749# 									#
7750# OUTPUT **************************************************************	#
7751#	d0 = hi(double precision result)				#
7752#	d1 = lo(double precision result)				#
7753#									#
7754# ALGORITHM ***********************************************************	#
7755#									#
7756#  Changes extended precision to double precision.			#
7757#  Note: no attempt is made to round the extended value to double.	#
7758#	dbl_sign = ext_sign						#
7759#	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
7760#	get rid of ext integer bit					#
7761#	dbl_mant = ext_mant{62:12}					#
7762#									#
7763#	    	---------------   ---------------    ---------------	#
7764#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
7765#	    	---------------   ---------------    ---------------	#
7766#	   	 95	    64    63 62	      32      31     11	  0	#
7767#				     |			     |		#
7768#				     |			     |		#
7769#				     |			     |		#
7770#		 	             v   		     v		#
7771#	    		      ---------------   ---------------		#
7772#  double   ->  	      |s|exp| mant  |   |  mant       |		#
7773#	    		      ---------------   ---------------		#
7774#	   	 	      63     51   32   31	       0	#
7775#									#
7776#########################################################################
7777
7778dst_dbl:
7779	clr.l		%d0			# clear d0
7780	mov.w		FTEMP_EX(%a0),%d0	# get exponent
7781	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
7782	addi.w		&DBL_BIAS,%d0		# add double precision bias
7783	tst.b		FTEMP_HI(%a0)		# is number a denorm?
7784	bmi.b		dst_get_dupper		# no
7785	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
7786dst_get_dupper:
7787	swap		%d0			# d0 now in upper word
7788	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
7789	tst.b		FTEMP_EX(%a0)		# test sign
7790	bpl.b		dst_get_dman		# if postive, go process mantissa
7791	bset		&0x1f,%d0		# if negative, set sign
7792dst_get_dman:
7793	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7794	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
7795	or.l		%d1,%d0			# put these bits in ms word of double
7796	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
7797	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7798	mov.l		&21,%d0			# load shift count
7799	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
7800	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
7801	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
7802	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
7803	mov.l		L_SCR2(%a6),%d1
7804	or.l		%d0,%d1			# put them in double result
7805	mov.l		L_SCR1(%a6),%d0
7806	rts
7807
7808#########################################################################
7809# XDEF ****************************************************************	#
7810# 	dst_sgl(): create single precision value from extended prec	#
7811#									#
7812# XREF ****************************************************************	#
7813#									#
7814# INPUT ***************************************************************	#
7815#	a0 = pointer to source operand in extended precision		#
7816# 									#
7817# OUTPUT **************************************************************	#
7818#	d0 = single precision result					#
7819#									#
7820# ALGORITHM ***********************************************************	#
7821#									#
7822# Changes extended precision to single precision.			#
7823#	sgl_sign = ext_sign						#
7824#	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
7825#	get rid of ext integer bit					#
7826#	sgl_mant = ext_mant{62:12}					#
7827#									#
7828#	    	---------------   ---------------    ---------------	#
7829#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
7830#	    	---------------   ---------------    ---------------	#
7831#	   	 95	    64    63 62	   40 32      31     12	  0	#
7832#				     |	   |				#
7833#				     |	   |				#
7834#				     |	   |				#
7835#		 	             v     v				#
7836#	    		      ---------------				#
7837#  single   ->  	      |s|exp| mant  |				#
7838#	    		      ---------------				#
7839#	   	 	      31     22     0				#
7840#									#
7841#########################################################################
7842
7843dst_sgl:
7844	clr.l		%d0
7845	mov.w		FTEMP_EX(%a0),%d0	# get exponent
7846	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
7847	addi.w		&SGL_BIAS,%d0		# add single precision bias
7848	tst.b		FTEMP_HI(%a0)		# is number a denorm?
7849	bmi.b		dst_get_supper		# no
7850	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
7851dst_get_supper:
7852	swap		%d0			# put exp in upper word of d0
7853	lsl.l		&0x7,%d0		# shift it into single exp bits
7854	tst.b		FTEMP_EX(%a0)		# test sign
7855	bpl.b		dst_get_sman		# if positive, continue
7856	bset		&0x1f,%d0		# if negative, put in sign first
7857dst_get_sman:
7858	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7859	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
7860	lsr.l		&0x8,%d1		# and put them flush right
7861	or.l		%d1,%d0			# put these bits in ms word of single
7862	rts
7863
7864##############################################################################
7865fout_pack:
7866	bsr.l		_calc_ea_fout		# fetch the <ea>
7867	mov.l		%a0,-(%sp)
7868
7869	mov.b		STAG(%a6),%d0		# fetch input type
7870	bne.w		fout_pack_not_norm	# input is not NORM
7871
7872fout_pack_norm:
7873	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
7874	beq.b		fout_pack_s		# static
7875
7876fout_pack_d:
7877	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
7878	lsr.b		&0x4,%d1
7879	andi.w		&0x7,%d1
7880
7881	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
7882
7883	bra.b		fout_pack_type
7884fout_pack_s:
7885	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
7886
7887fout_pack_type:
7888	bfexts		%d0{&25:&7},%d0		# extract k-factor
7889	mov.l	%d0,-(%sp)
7890
7891	lea		FP_SRC(%a6),%a0		# pass: ptr to input
7892
7893# bindec is currently scrambling FP_SRC for denorm inputs.
7894# we'll have to change this, but for now, tough luck!!!
7895	bsr.l		bindec			# convert xprec to packed
7896
7897#	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
7898	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
7899
7900	mov.l	(%sp)+,%d0
7901
7902	tst.b		3+FP_SCR0_EX(%a6)
7903	bne.b		fout_pack_set
7904	tst.l		FP_SCR0_HI(%a6)
7905	bne.b		fout_pack_set
7906	tst.l		FP_SCR0_LO(%a6)
7907	bne.b		fout_pack_set
7908
7909# add the extra condition that only if the k-factor was zero, too, should
7910# we zero the exponent
7911	tst.l		%d0
7912	bne.b		fout_pack_set
7913# "mantissa" is all zero which means that the answer is zero. but, the '040
7914# algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
7915# if the mantissa is zero, I will zero the exponent, too.
7916# the question now is whether the exponents sign bit is allowed to be non-zero
7917# for a zero, also...
7918	andi.w		&0xf000,FP_SCR0(%a6)
7919
7920fout_pack_set:
7921
7922	lea		FP_SCR0(%a6),%a0	# pass: src addr
7923
7924fout_pack_write:
7925	mov.l		(%sp)+,%a1		# pass: dst addr
7926	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
7927
7928	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
7929	beq.b		fout_pack_a7
7930
7931	bsr.l		_dmem_write		# write ext prec number to memory
7932
7933	tst.l		%d1			# did dstore fail?
7934	bne.w		fout_ext_err		# yes
7935
7936	rts
7937
7938# we don't want to do the write if the exception occurred in supervisor mode
7939# so _mem_write2() handles this for us.
7940fout_pack_a7:
7941	bsr.l		_mem_write2		# write ext prec number to memory
7942
7943	tst.l		%d1			# did dstore fail?
7944	bne.w		fout_ext_err		# yes
7945
7946	rts
7947
7948fout_pack_not_norm:
7949	cmpi.b		%d0,&DENORM		# is it a DENORM?
7950	beq.w		fout_pack_norm		# yes
7951	lea		FP_SRC(%a6),%a0
7952	clr.w		2+FP_SRC_EX(%a6)
7953	cmpi.b		%d0,&SNAN		# is it an SNAN?
7954	beq.b		fout_pack_snan		# yes
7955	bra.b		fout_pack_write		# no
7956
7957fout_pack_snan:
7958	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7959	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
7960	bra.b		fout_pack_write
7961
7962#########################################################################
7963# XDEF ****************************************************************	#
7964# 	fmul(): emulates the fmul instruction				#
7965#	fsmul(): emulates the fsmul instruction				#
7966#	fdmul(): emulates the fdmul instruction				#
7967#									#
7968# XREF ****************************************************************	#
7969#	scale_to_zero_src() - scale src exponent to zero		#
7970#	scale_to_zero_dst() - scale dst exponent to zero		#
7971#	unf_res() - return default underflow result			#
7972#	ovf_res() - return default overflow result			#
7973# 	res_qnan() - return QNAN result					#
7974# 	res_snan() - return SNAN result					#
7975#									#
7976# INPUT ***************************************************************	#
7977#	a0 = pointer to extended precision source operand		#
7978#	a1 = pointer to extended precision destination operand		#
7979#	d0  rnd prec,mode						#
7980#									#
7981# OUTPUT **************************************************************	#
7982#	fp0 = result							#
7983#	fp1 = EXOP (if exception occurred)				#
7984#									#
7985# ALGORITHM ***********************************************************	#
7986#	Handle NANs, infinities, and zeroes as special cases. Divide	#
7987# norms/denorms into ext/sgl/dbl precision.				#
7988#	For norms/denorms, scale the exponents such that a multiply	#
7989# instruction won't cause an exception. Use the regular fmul to		#
7990# compute a result. Check if the regular operands would have taken	#
7991# an exception. If so, return the default overflow/underflow result	#
7992# and return the EXOP if exceptions are enabled. Else, scale the 	#
7993# result operand to the proper exponent.				#
7994#									#
7995#########################################################################
7996
7997	align 		0x10
7998tbl_fmul_ovfl:
7999	long		0x3fff - 0x7ffe		# ext_max
8000	long		0x3fff - 0x407e		# sgl_max
8001	long		0x3fff - 0x43fe		# dbl_max
8002tbl_fmul_unfl:
8003	long		0x3fff + 0x0001		# ext_unfl
8004	long		0x3fff - 0x3f80		# sgl_unfl
8005	long		0x3fff - 0x3c00		# dbl_unfl
8006
8007	global		fsmul
8008fsmul:
8009	andi.b		&0x30,%d0		# clear rnd prec
8010	ori.b		&s_mode*0x10,%d0	# insert sgl prec
8011	bra.b		fmul
8012
8013	global		fdmul
8014fdmul:
8015	andi.b		&0x30,%d0
8016	ori.b		&d_mode*0x10,%d0	# insert dbl prec
8017
8018	global		fmul
8019fmul:
8020	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8021
8022	clr.w		%d1
8023	mov.b		DTAG(%a6),%d1
8024	lsl.b		&0x3,%d1
8025	or.b		STAG(%a6),%d1		# combine src tags
8026	bne.w		fmul_not_norm		# optimize on non-norm input
8027
8028fmul_norm:
8029	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
8030	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
8031	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
8032
8033	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8034	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8035	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8036
8037	bsr.l		scale_to_zero_src	# scale src exponent
8038	mov.l		%d0,-(%sp)		# save scale factor 1
8039
8040	bsr.l		scale_to_zero_dst	# scale dst exponent
8041
8042	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
8043
8044	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
8045	lsr.b		&0x6,%d1		# shift to lo bits
8046	mov.l		(%sp)+,%d0		# load S.F.
8047	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8048	beq.w		fmul_may_ovfl		# result may rnd to overflow
8049	blt.w		fmul_ovfl		# result will overflow
8050
8051	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8052	beq.w		fmul_may_unfl		# result may rnd to no unfl
8053	bgt.w		fmul_unfl		# result will underflow
8054
8055#
8056# NORMAL:
8057# - the result of the multiply operation will neither overflow nor underflow.
8058# - do the multiply to the proper precision and rounding mode.
8059# - scale the result exponent using the scale factor. if both operands were
8060# normalized then we really don't need to go through this scaling. but for now,
8061# this will do.
8062#
8063fmul_normal:
8064	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8065
8066	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8067	fmov.l		&0x0,%fpsr		# clear FPSR
8068
8069	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8070
8071	fmov.l		%fpsr,%d1		# save status
8072	fmov.l		&0x0,%fpcr		# clear FPCR
8073
8074	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8075
8076fmul_normal_exit:
8077	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8078	mov.l		%d2,-(%sp)		# save d2
8079	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8080	mov.l		%d1,%d2			# make a copy
8081	andi.l		&0x7fff,%d1		# strip sign
8082	andi.w		&0x8000,%d2		# keep old sign
8083	sub.l		%d0,%d1			# add scale factor
8084	or.w		%d2,%d1			# concat old sign,new exp
8085	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8086	mov.l		(%sp)+,%d2		# restore d2
8087	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8088	rts
8089
8090#
8091# OVERFLOW:
8092# - the result of the multiply operation is an overflow.
8093# - do the multiply to the proper precision and rounding mode in order to
8094# set the inexact bits.
8095# - calculate the default result and return it in fp0.
8096# - if overflow or inexact is enabled, we need a multiply result rounded to
8097# extended precision. if the original operation was extended, then we have this
8098# result. if the original operation was single or double, we have to do another
8099# multiply using extended precision and the correct rounding mode. the result
8100# of this operation then has its exponent scaled by -0x6000 to create the
8101# exceptional operand.
8102#
8103fmul_ovfl:
8104	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8105
8106	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8107	fmov.l		&0x0,%fpsr		# clear FPSR
8108
8109	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8110
8111	fmov.l		%fpsr,%d1		# save status
8112	fmov.l		&0x0,%fpcr		# clear FPCR
8113
8114	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8115
8116# save setting this until now because this is where fmul_may_ovfl may jump in
8117fmul_ovfl_tst:
8118	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8119
8120	mov.b		FPCR_ENABLE(%a6),%d1
8121	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8122	bne.b		fmul_ovfl_ena		# yes
8123
8124# calculate the default result
8125fmul_ovfl_dis:
8126	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8127	sne		%d1			# set sign param accordingly
8128	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
8129	bsr.l		ovf_res			# calculate default result
8130	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
8131	fmovm.x		(%a0),&0x80		# return default result in fp0
8132	rts
8133
8134#
8135# OVFL is enabled; Create EXOP:
8136# - if precision is extended, then we have the EXOP. simply bias the exponent
8137# with an extra -0x6000. if the precision is single or double, we need to
8138# calculate a result rounded to extended precision.
8139#
8140fmul_ovfl_ena:
8141	mov.l		L_SCR3(%a6),%d1
8142	andi.b		&0xc0,%d1		# test the rnd prec
8143	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
8144
8145fmul_ovfl_ena_cont:
8146	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
8147
8148	mov.l		%d2,-(%sp)		# save d2
8149	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8150	mov.w		%d1,%d2			# make a copy
8151	andi.l		&0x7fff,%d1		# strip sign
8152	sub.l		%d0,%d1			# add scale factor
8153	subi.l		&0x6000,%d1		# subtract bias
8154	andi.w		&0x7fff,%d1		# clear sign bit
8155	andi.w		&0x8000,%d2		# keep old sign
8156	or.w		%d2,%d1			# concat old sign,new exp
8157	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8158	mov.l		(%sp)+,%d2		# restore d2
8159	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8160	bra.b		fmul_ovfl_dis
8161
8162fmul_ovfl_ena_sd:
8163	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8164
8165	mov.l		L_SCR3(%a6),%d1
8166	andi.b		&0x30,%d1		# keep rnd mode only
8167	fmov.l		%d1,%fpcr		# set FPCR
8168
8169	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8170
8171	fmov.l		&0x0,%fpcr		# clear FPCR
8172	bra.b		fmul_ovfl_ena_cont
8173
8174#
8175# may OVERFLOW:
8176# - the result of the multiply operation MAY overflow.
8177# - do the multiply to the proper precision and rounding mode in order to
8178# set the inexact bits.
8179# - calculate the default result and return it in fp0.
8180#
8181fmul_may_ovfl:
8182	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8183
8184	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8185	fmov.l		&0x0,%fpsr		# clear FPSR
8186
8187	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8188
8189	fmov.l		%fpsr,%d1		# save status
8190	fmov.l		&0x0,%fpcr		# clear FPCR
8191
8192	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8193
8194	fabs.x		%fp0,%fp1		# make a copy of result
8195	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
8196	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
8197
8198# no, it didn't overflow; we have correct result
8199	bra.w		fmul_normal_exit
8200
8201#
8202# UNDERFLOW:
8203# - the result of the multiply operation is an underflow.
8204# - do the multiply to the proper precision and rounding mode in order to
8205# set the inexact bits.
8206# - calculate the default result and return it in fp0.
8207# - if overflow or inexact is enabled, we need a multiply result rounded to
8208# extended precision. if the original operation was extended, then we have this
8209# result. if the original operation was single or double, we have to do another
8210# multiply using extended precision and the correct rounding mode. the result
8211# of this operation then has its exponent scaled by -0x6000 to create the
8212# exceptional operand.
8213#
8214fmul_unfl:
8215	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8216
8217# for fun, let's use only extended precision, round to zero. then, let
8218# the unf_res() routine figure out all the rest.
8219# will we get the correct answer.
8220	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8221
8222	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
8223	fmov.l		&0x0,%fpsr		# clear FPSR
8224
8225	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8226
8227	fmov.l		%fpsr,%d1		# save status
8228	fmov.l		&0x0,%fpcr		# clear FPCR
8229
8230	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8231
8232	mov.b		FPCR_ENABLE(%a6),%d1
8233	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8234	bne.b		fmul_unfl_ena		# yes
8235
8236fmul_unfl_dis:
8237	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8238
8239	lea		FP_SCR0(%a6),%a0	# pass: result addr
8240	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
8241	bsr.l		unf_res			# calculate default result
8242	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
8243	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8244	rts
8245
8246#
8247# UNFL is enabled.
8248#
8249fmul_unfl_ena:
8250	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
8251
8252	mov.l		L_SCR3(%a6),%d1
8253	andi.b		&0xc0,%d1		# is precision extended?
8254	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
8255
8256# if the rnd mode is anything but RZ, then we have to re-do the above
8257# multiplication because we used RZ for all.
8258	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8259
8260fmul_unfl_ena_cont:
8261	fmov.l		&0x0,%fpsr		# clear FPSR
8262
8263	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
8264
8265	fmov.l		&0x0,%fpcr		# clear FPCR
8266
8267	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
8268	mov.l		%d2,-(%sp)		# save d2
8269	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8270	mov.l		%d1,%d2			# make a copy
8271	andi.l		&0x7fff,%d1		# strip sign
8272	andi.w		&0x8000,%d2		# keep old sign
8273	sub.l		%d0,%d1			# add scale factor
8274	addi.l		&0x6000,%d1		# add bias
8275	andi.w		&0x7fff,%d1
8276	or.w		%d2,%d1			# concat old sign,new exp
8277	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8278	mov.l		(%sp)+,%d2		# restore d2
8279	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8280	bra.w		fmul_unfl_dis
8281
8282fmul_unfl_ena_sd:
8283	mov.l		L_SCR3(%a6),%d1
8284	andi.b		&0x30,%d1		# use only rnd mode
8285	fmov.l		%d1,%fpcr		# set FPCR
8286
8287	bra.b		fmul_unfl_ena_cont
8288
8289# MAY UNDERFLOW:
8290# -use the correct rounding mode and precision. this code favors operations
8291# that do not underflow.
8292fmul_may_unfl:
8293	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8294
8295	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8296	fmov.l		&0x0,%fpsr		# clear FPSR
8297
8298	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8299
8300	fmov.l		%fpsr,%d1		# save status
8301	fmov.l		&0x0,%fpcr		# clear FPCR
8302
8303	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8304
8305	fabs.x		%fp0,%fp1		# make a copy of result
8306	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
8307	fbgt.w		fmul_normal_exit	# no; no underflow occurred
8308	fblt.w		fmul_unfl		# yes; underflow occurred
8309
8310#
8311# we still don't know if underflow occurred. result is ~ equal to 2. but,
8312# we don't know if the result was an underflow that rounded up to a 2 or
8313# a normalized number that rounded down to a 2. so, redo the entire operation
8314# using RZ as the rounding mode to see what the pre-rounded result is.
8315# this case should be relatively rare.
8316#
8317	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
8318
8319	mov.l		L_SCR3(%a6),%d1
8320	andi.b		&0xc0,%d1		# keep rnd prec
8321	ori.b		&rz_mode*0x10,%d1	# insert RZ
8322
8323	fmov.l		%d1,%fpcr		# set FPCR
8324	fmov.l		&0x0,%fpsr		# clear FPSR
8325
8326	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
8327
8328	fmov.l		&0x0,%fpcr		# clear FPCR
8329	fabs.x		%fp1			# make absolute value
8330	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
8331	fbge.w		fmul_normal_exit	# no; no underflow occurred
8332	bra.w		fmul_unfl		# yes, underflow occurred
8333
8334################################################################################
8335
8336#
8337# Multiply: inputs are not both normalized; what are they?
8338#
8339fmul_not_norm:
8340	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
8341	jmp		(tbl_fmul_op.b,%pc,%d1.w)
8342
8343	swbeg		&48
8344tbl_fmul_op:
8345	short		fmul_norm	- tbl_fmul_op # NORM x NORM
8346	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
8347	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
8348	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
8349	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
8350	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
8351	short		tbl_fmul_op	- tbl_fmul_op #
8352	short		tbl_fmul_op	- tbl_fmul_op #
8353
8354	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
8355	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
8356	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
8357	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
8358	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
8359	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
8360	short		tbl_fmul_op	- tbl_fmul_op #
8361	short		tbl_fmul_op	- tbl_fmul_op #
8362
8363	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
8364	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
8365	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
8366	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
8367	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
8368	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
8369	short		tbl_fmul_op	- tbl_fmul_op #
8370	short		tbl_fmul_op	- tbl_fmul_op #
8371
8372	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
8373	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
8374	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
8375	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
8376	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
8377	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
8378	short		tbl_fmul_op	- tbl_fmul_op #
8379	short		tbl_fmul_op	- tbl_fmul_op #
8380
8381	short		fmul_norm	- tbl_fmul_op # NORM x NORM
8382	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
8383	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
8384	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
8385	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
8386	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
8387	short		tbl_fmul_op	- tbl_fmul_op #
8388	short		tbl_fmul_op	- tbl_fmul_op #
8389
8390	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
8391	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
8392	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
8393	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
8394	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
8395	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
8396	short		tbl_fmul_op	- tbl_fmul_op #
8397	short		tbl_fmul_op	- tbl_fmul_op #
8398
8399fmul_res_operr:
8400	bra.l		res_operr
8401fmul_res_snan:
8402	bra.l		res_snan
8403fmul_res_qnan:
8404	bra.l		res_qnan
8405
8406#
8407# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8408#
8409	global		fmul_zero		# global for fsglmul
8410fmul_zero:
8411	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8412	mov.b		DST_EX(%a1),%d1
8413	eor.b		%d0,%d1
8414	bpl.b		fmul_zero_p		# result ZERO is pos.
8415fmul_zero_n:
8416	fmov.s		&0x80000000,%fp0	# load -ZERO
8417	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8418	rts
8419fmul_zero_p:
8420	fmov.s		&0x00000000,%fp0	# load +ZERO
8421	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
8422	rts
8423
8424#
8425# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8426#
8427# Note: The j-bit for an infinity is a don't-care. However, to be
8428# strictly compatible w/ the 68881/882, we make sure to return an
8429# INF w/ the j-bit set if the input INF j-bit was set. Destination
8430# INFs take priority.
8431#
8432	global		fmul_inf_dst		# global for fsglmul
8433fmul_inf_dst:
8434	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
8435	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8436	mov.b		DST_EX(%a1),%d1
8437	eor.b		%d0,%d1
8438	bpl.b		fmul_inf_dst_p		# result INF is pos.
8439fmul_inf_dst_n:
8440	fabs.x		%fp0			# clear result sign
8441	fneg.x		%fp0			# set result sign
8442	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8443	rts
8444fmul_inf_dst_p:
8445	fabs.x		%fp0			# clear result sign
8446	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
8447	rts
8448
8449	global		fmul_inf_src		# global for fsglmul
8450fmul_inf_src:
8451	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
8452	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8453	mov.b		DST_EX(%a1),%d1
8454	eor.b		%d0,%d1
8455	bpl.b		fmul_inf_dst_p		# result INF is pos.
8456	bra.b		fmul_inf_dst_n
8457
8458#########################################################################
8459# XDEF ****************************************************************	#
8460#	fin(): emulates the fmove instruction				#
8461#	fsin(): emulates the fsmove instruction				#
8462#	fdin(): emulates the fdmove instruction				#
8463#									#
8464# XREF ****************************************************************	#
8465#	norm() - normalize mantissa for EXOP on denorm			#
8466#	scale_to_zero_src() - scale src exponent to zero		#
8467#	ovf_res() - return default overflow result			#
8468# 	unf_res() - return default underflow result			#
8469#	res_qnan_1op() - return QNAN result				#
8470#	res_snan_1op() - return SNAN result				#
8471#									#
8472# INPUT ***************************************************************	#
8473#	a0 = pointer to extended precision source operand		#
8474#	d0 = round prec/mode						#
8475# 									#
8476# OUTPUT **************************************************************	#
8477#	fp0 = result							#
8478#	fp1 = EXOP (if exception occurred)				#
8479#									#
8480# ALGORITHM ***********************************************************	#
8481# 	Handle NANs, infinities, and zeroes as special cases. Divide	#
8482# norms into extended, single, and double precision.			#
8483# 	Norms can be emulated w/ a regular fmove instruction. For	#
8484# sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
8485# if the result would have overflowed/underflowed. If so, use unf_res()	#
8486# or ovf_res() to return the default result. Also return EXOP if	#
8487# exception is enabled. If no exception, return the default result.	#
8488#	Unnorms don't pass through here.				#
8489#									#
8490#########################################################################
8491
8492	global		fsin
8493fsin:
8494	andi.b		&0x30,%d0		# clear rnd prec
8495	ori.b		&s_mode*0x10,%d0	# insert sgl precision
8496	bra.b		fin
8497
8498	global		fdin
8499fdin:
8500	andi.b		&0x30,%d0		# clear rnd prec
8501	ori.b		&d_mode*0x10,%d0	# insert dbl precision
8502
8503	global		fin
8504fin:
8505	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8506
8507	mov.b		STAG(%a6),%d1		# fetch src optype tag
8508	bne.w		fin_not_norm		# optimize on non-norm input
8509
8510#
8511# FP MOVE IN: NORMs and DENORMs ONLY!
8512#
8513fin_norm:
8514	andi.b		&0xc0,%d0		# is precision extended?
8515	bne.w		fin_not_ext		# no, so go handle dbl or sgl
8516
8517#
8518# precision selected is extended. so...we cannot get an underflow
8519# or overflow because of rounding to the correct precision. so...
8520# skip the scaling and unscaling...
8521#
8522	tst.b		SRC_EX(%a0)		# is the operand negative?
8523	bpl.b		fin_norm_done		# no
8524	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
8525fin_norm_done:
8526	fmovm.x		SRC(%a0),&0x80		# return result in fp0
8527	rts
8528
8529#
8530# for an extended precision DENORM, the UNFL exception bit is set
8531# the accrued bit is NOT set in this instance(no inexactness!)
8532#
8533fin_denorm:
8534	andi.b		&0xc0,%d0		# is precision extended?
8535	bne.w		fin_not_ext		# no, so go handle dbl or sgl
8536
8537	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8538	tst.b		SRC_EX(%a0)		# is the operand negative?
8539	bpl.b		fin_denorm_done		# no
8540	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
8541fin_denorm_done:
8542	fmovm.x		SRC(%a0),&0x80		# return result in fp0
8543	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8544	bne.b		fin_denorm_unfl_ena	# yes
8545	rts
8546
8547#
8548# the input is an extended DENORM and underflow is enabled in the FPCR.
8549# normalize the mantissa and add the bias of 0x6000 to the resulting negative
8550# exponent and insert back into the operand.
8551#
8552fin_denorm_unfl_ena:
8553	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8554	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8555	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8556	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
8557	bsr.l		norm			# normalize result
8558	neg.w		%d0			# new exponent = -(shft val)
8559	addi.w		&0x6000,%d0		# add new bias to exponent
8560	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
8561	andi.w		&0x8000,%d1		# keep old sign
8562	andi.w		&0x7fff,%d0		# clear sign position
8563	or.w		%d1,%d0			# concat new exo,old sign
8564	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
8565	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8566	rts
8567
8568#
8569# operand is to be rounded to single or double precision
8570#
8571fin_not_ext:
8572	cmpi.b		%d0,&s_mode*0x10 	# separate sgl/dbl prec
8573	bne.b		fin_dbl
8574
8575#
8576# operand is to be rounded to single precision
8577#
8578fin_sgl:
8579	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8580	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8581	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8582	bsr.l		scale_to_zero_src	# calculate scale factor
8583
8584	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
8585	bge.w		fin_sd_unfl		# yes; go handle underflow
8586	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
8587	beq.w		fin_sd_may_ovfl		# maybe; go check
8588	blt.w		fin_sd_ovfl		# yes; go handle overflow
8589
8590#
8591# operand will NOT overflow or underflow when moved into the fp reg file
8592#
8593fin_sd_normal:
8594	fmov.l		&0x0,%fpsr		# clear FPSR
8595	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8596
8597	fmov.x		FP_SCR0(%a6),%fp0	# perform move
8598
8599	fmov.l		%fpsr,%d1		# save FPSR
8600	fmov.l		&0x0,%fpcr		# clear FPCR
8601
8602	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8603
8604fin_sd_normal_exit:
8605	mov.l		%d2,-(%sp)		# save d2
8606	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8607	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8608	mov.w		%d1,%d2			# make a copy
8609	andi.l		&0x7fff,%d1		# strip sign
8610	sub.l		%d0,%d1			# add scale factor
8611	andi.w		&0x8000,%d2		# keep old sign
8612	or.w		%d1,%d2			# concat old sign,new exponent
8613	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
8614	mov.l		(%sp)+,%d2		# restore d2
8615	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
8616	rts
8617
8618#
8619# operand is to be rounded to double precision
8620#
8621fin_dbl:
8622	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8623	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8624	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8625	bsr.l		scale_to_zero_src	# calculate scale factor
8626
8627	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
8628	bge.w		fin_sd_unfl		# yes; go handle underflow
8629	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
8630	beq.w		fin_sd_may_ovfl		# maybe; go check
8631	blt.w		fin_sd_ovfl		# yes; go handle overflow
8632	bra.w		fin_sd_normal		# no; ho handle normalized op
8633
8634#
8635# operand WILL underflow when moved in to the fp register file
8636#
8637fin_sd_unfl:
8638	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8639
8640	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
8641	bpl.b		fin_sd_unfl_tst
8642	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
8643
8644# if underflow or inexact is enabled, then go calculate the EXOP first.
8645fin_sd_unfl_tst:
8646	mov.b		FPCR_ENABLE(%a6),%d1
8647	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8648	bne.b		fin_sd_unfl_ena		# yes
8649
8650fin_sd_unfl_dis:
8651	lea		FP_SCR0(%a6),%a0	# pass: result addr
8652	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
8653	bsr.l		unf_res			# calculate default result
8654	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
8655	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8656	rts
8657
8658#
8659# operand will underflow AND underflow or inexact is enabled.
8660# therefore, we must return the result rounded to extended precision.
8661#
8662fin_sd_unfl_ena:
8663	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8664	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8665	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
8666
8667	mov.l		%d2,-(%sp)		# save d2
8668	mov.w		%d1,%d2			# make a copy
8669	andi.l		&0x7fff,%d1		# strip sign
8670	sub.l		%d0,%d1			# subtract scale factor
8671	andi.w		&0x8000,%d2		# extract old sign
8672	addi.l		&0x6000,%d1		# add new bias
8673	andi.w		&0x7fff,%d1
8674	or.w		%d1,%d2			# concat old sign,new exp
8675	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
8676	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
8677	mov.l		(%sp)+,%d2		# restore d2
8678	bra.b		fin_sd_unfl_dis
8679
8680#
8681# operand WILL overflow.
8682#
8683fin_sd_ovfl:
8684	fmov.l		&0x0,%fpsr		# clear FPSR
8685	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8686
8687	fmov.x		FP_SCR0(%a6),%fp0	# perform move
8688
8689	fmov.l		&0x0,%fpcr		# clear FPCR
8690	fmov.l		%fpsr,%d1		# save FPSR
8691
8692	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8693
8694fin_sd_ovfl_tst:
8695	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8696
8697	mov.b		FPCR_ENABLE(%a6),%d1
8698	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8699	bne.b		fin_sd_ovfl_ena		# yes
8700
8701#
8702# OVFL is not enabled; therefore, we must create the default result by
8703# calling ovf_res().
8704#
8705fin_sd_ovfl_dis:
8706	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8707	sne		%d1			# set sign param accordingly
8708	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
8709	bsr.l		ovf_res			# calculate default result
8710	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
8711	fmovm.x		(%a0),&0x80		# return default result in fp0
8712	rts
8713
8714#
8715# OVFL is enabled.
8716# the INEX2 bit has already been updated by the round to the correct precision.
8717# now, round to extended(and don't alter the FPSR).
8718#
8719fin_sd_ovfl_ena:
8720	mov.l		%d2,-(%sp)		# save d2
8721	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8722	mov.l		%d1,%d2			# make a copy
8723	andi.l		&0x7fff,%d1		# strip sign
8724	andi.w		&0x8000,%d2		# keep old sign
8725	sub.l		%d0,%d1			# add scale factor
8726	sub.l		&0x6000,%d1		# subtract bias
8727	andi.w		&0x7fff,%d1
8728	or.w		%d2,%d1
8729	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8730	mov.l		(%sp)+,%d2		# restore d2
8731	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8732	bra.b		fin_sd_ovfl_dis
8733
8734#
8735# the move in MAY overflow. so...
8736#
8737fin_sd_may_ovfl:
8738	fmov.l		&0x0,%fpsr		# clear FPSR
8739	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8740
8741	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
8742
8743	fmov.l		%fpsr,%d1		# save status
8744	fmov.l		&0x0,%fpcr		# clear FPCR
8745
8746	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8747
8748	fabs.x		%fp0,%fp1		# make a copy of result
8749	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
8750	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
8751
8752# no, it didn't overflow; we have correct result
8753	bra.w		fin_sd_normal_exit
8754
8755##########################################################################
8756
8757#
8758# operand is not a NORM: check its optype and branch accordingly
8759#
8760fin_not_norm:
8761	cmpi.b		%d1,&DENORM		# weed out DENORM
8762	beq.w		fin_denorm
8763	cmpi.b		%d1,&SNAN		# weed out SNANs
8764	beq.l		res_snan_1op
8765	cmpi.b		%d1,&QNAN		# weed out QNANs
8766	beq.l		res_qnan_1op
8767
8768#
8769# do the fmove in; at this point, only possible ops are ZERO and INF.
8770# use fmov to determine ccodes.
8771# prec:mode should be zero at this point but it won't affect answer anyways.
8772#
8773	fmov.x		SRC(%a0),%fp0		# do fmove in
8774	fmov.l		%fpsr,%d0		# no exceptions possible
8775	rol.l		&0x8,%d0		# put ccodes in lo byte
8776	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
8777	rts
8778
8779#########################################################################
8780# XDEF ****************************************************************	#
8781# 	fdiv(): emulates the fdiv instruction				#
8782#	fsdiv(): emulates the fsdiv instruction				#
8783#	fddiv(): emulates the fddiv instruction				#
8784#									#
8785# XREF ****************************************************************	#
8786#	scale_to_zero_src() - scale src exponent to zero		#
8787#	scale_to_zero_dst() - scale dst exponent to zero		#
8788#	unf_res() - return default underflow result			#
8789#	ovf_res() - return default overflow result			#
8790# 	res_qnan() - return QNAN result					#
8791# 	res_snan() - return SNAN result					#
8792#									#
8793# INPUT ***************************************************************	#
8794#	a0 = pointer to extended precision source operand		#
8795#	a1 = pointer to extended precision destination operand		#
8796#	d0  rnd prec,mode						#
8797#									#
8798# OUTPUT **************************************************************	#
8799#	fp0 = result							#
8800#	fp1 = EXOP (if exception occurred)				#
8801#									#
8802# ALGORITHM ***********************************************************	#
8803#	Handle NANs, infinities, and zeroes as special cases. Divide	#
8804# norms/denorms into ext/sgl/dbl precision.				#
8805#	For norms/denorms, scale the exponents such that a divide	#
8806# instruction won't cause an exception. Use the regular fdiv to		#
8807# compute a result. Check if the regular operands would have taken	#
8808# an exception. If so, return the default overflow/underflow result	#
8809# and return the EXOP if exceptions are enabled. Else, scale the 	#
8810# result operand to the proper exponent.				#
8811#									#
8812#########################################################################
8813
8814	align		0x10
8815tbl_fdiv_unfl:
8816	long		0x3fff - 0x0000		# ext_unfl
8817	long		0x3fff - 0x3f81		# sgl_unfl
8818	long		0x3fff - 0x3c01		# dbl_unfl
8819
8820tbl_fdiv_ovfl:
8821	long		0x3fff - 0x7ffe		# ext overflow exponent
8822	long		0x3fff - 0x407e		# sgl overflow exponent
8823	long		0x3fff - 0x43fe		# dbl overflow exponent
8824
8825	global		fsdiv
8826fsdiv:
8827	andi.b		&0x30,%d0		# clear rnd prec
8828	ori.b		&s_mode*0x10,%d0	# insert sgl prec
8829	bra.b		fdiv
8830
8831	global		fddiv
8832fddiv:
8833	andi.b		&0x30,%d0		# clear rnd prec
8834	ori.b		&d_mode*0x10,%d0	# insert dbl prec
8835
8836	global		fdiv
8837fdiv:
8838	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8839
8840	clr.w		%d1
8841	mov.b		DTAG(%a6),%d1
8842	lsl.b		&0x3,%d1
8843	or.b		STAG(%a6),%d1		# combine src tags
8844
8845	bne.w		fdiv_not_norm		# optimize on non-norm input
8846
8847#
8848# DIVIDE: NORMs and DENORMs ONLY!
8849#
8850fdiv_norm:
8851	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
8852	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
8853	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
8854
8855	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8856	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8857	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8858
8859	bsr.l		scale_to_zero_src	# scale src exponent
8860	mov.l		%d0,-(%sp)		# save scale factor 1
8861
8862	bsr.l		scale_to_zero_dst	# scale dst exponent
8863
8864	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
8865	add.l		%d0,(%sp)
8866
8867	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
8868	lsr.b		&0x6,%d1		# shift to lo bits
8869	mov.l		(%sp)+,%d0		# load S.F.
8870	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8871	ble.w		fdiv_may_ovfl		# result will overflow
8872
8873	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8874	beq.w		fdiv_may_unfl		# maybe
8875	bgt.w		fdiv_unfl		# yes; go handle underflow
8876
8877fdiv_normal:
8878	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8879
8880	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
8881	fmov.l		&0x0,%fpsr		# clear FPSR
8882
8883	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
8884
8885	fmov.l		%fpsr,%d1		# save FPSR
8886	fmov.l		&0x0,%fpcr		# clear FPCR
8887
8888	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8889
8890fdiv_normal_exit:
8891	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
8892	mov.l		%d2,-(%sp)		# store d2
8893	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8894	mov.l		%d1,%d2			# make a copy
8895	andi.l		&0x7fff,%d1		# strip sign
8896	andi.w		&0x8000,%d2		# keep old sign
8897	sub.l		%d0,%d1			# add scale factor
8898	or.w		%d2,%d1			# concat old sign,new exp
8899	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8900	mov.l		(%sp)+,%d2		# restore d2
8901	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
8902	rts
8903
8904tbl_fdiv_ovfl2:
8905	long		0x7fff
8906	long		0x407f
8907	long		0x43ff
8908
8909fdiv_no_ovfl:
8910	mov.l		(%sp)+,%d0		# restore scale factor
8911	bra.b		fdiv_normal_exit
8912
8913fdiv_may_ovfl:
8914	mov.l		%d0,-(%sp)		# save scale factor
8915
8916	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8917
8918	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8919	fmov.l		&0x0,%fpsr		# set FPSR
8920
8921	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8922
8923	fmov.l		%fpsr,%d0
8924	fmov.l		&0x0,%fpcr
8925
8926	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
8927
8928	fmovm.x		&0x01,-(%sp)		# save result to stack
8929	mov.w		(%sp),%d0		# fetch new exponent
8930	add.l		&0xc,%sp		# clear result from stack
8931	andi.l		&0x7fff,%d0		# strip sign
8932	sub.l		(%sp),%d0		# add scale factor
8933	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8934	blt.b		fdiv_no_ovfl
8935	mov.l		(%sp)+,%d0
8936
8937fdiv_ovfl_tst:
8938	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8939
8940	mov.b		FPCR_ENABLE(%a6),%d1
8941	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8942	bne.b		fdiv_ovfl_ena		# yes
8943
8944fdiv_ovfl_dis:
8945	btst		&neg_bit,FPSR_CC(%a6) 	# is result negative?
8946	sne		%d1			# set sign param accordingly
8947	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
8948	bsr.l		ovf_res			# calculate default result
8949	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
8950	fmovm.x		(%a0),&0x80		# return default result in fp0
8951	rts
8952
8953fdiv_ovfl_ena:
8954	mov.l		L_SCR3(%a6),%d1
8955	andi.b		&0xc0,%d1		# is precision extended?
8956	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
8957
8958fdiv_ovfl_ena_cont:
8959	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
8960
8961	mov.l		%d2,-(%sp)		# save d2
8962	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8963	mov.w		%d1,%d2			# make a copy
8964	andi.l		&0x7fff,%d1		# strip sign
8965	sub.l		%d0,%d1			# add scale factor
8966	subi.l		&0x6000,%d1		# subtract bias
8967	andi.w		&0x7fff,%d1		# clear sign bit
8968	andi.w		&0x8000,%d2		# keep old sign
8969	or.w		%d2,%d1			# concat old sign,new exp
8970	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8971	mov.l		(%sp)+,%d2		# restore d2
8972	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8973	bra.b		fdiv_ovfl_dis
8974
8975fdiv_ovfl_ena_sd:
8976	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8977
8978	mov.l		L_SCR3(%a6),%d1
8979	andi.b		&0x30,%d1		# keep rnd mode
8980	fmov.l		%d1,%fpcr		# set FPCR
8981
8982	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8983
8984	fmov.l		&0x0,%fpcr		# clear FPCR
8985	bra.b		fdiv_ovfl_ena_cont
8986
8987fdiv_unfl:
8988	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8989
8990	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8991
8992	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
8993	fmov.l		&0x0,%fpsr		# clear FPSR
8994
8995	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8996
8997	fmov.l		%fpsr,%d1		# save status
8998	fmov.l		&0x0,%fpcr		# clear FPCR
8999
9000	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9001
9002	mov.b		FPCR_ENABLE(%a6),%d1
9003	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
9004	bne.b		fdiv_unfl_ena		# yes
9005
9006fdiv_unfl_dis:
9007	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
9008
9009	lea		FP_SCR0(%a6),%a0	# pass: result addr
9010	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
9011	bsr.l		unf_res			# calculate default result
9012	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
9013	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9014	rts
9015
9016#
9017# UNFL is enabled.
9018#
9019fdiv_unfl_ena:
9020	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
9021
9022	mov.l		L_SCR3(%a6),%d1
9023	andi.b		&0xc0,%d1		# is precision extended?
9024	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
9025
9026	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9027
9028fdiv_unfl_ena_cont:
9029	fmov.l		&0x0,%fpsr		# clear FPSR
9030
9031	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
9032
9033	fmov.l		&0x0,%fpcr		# clear FPCR
9034
9035	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
9036	mov.l		%d2,-(%sp)		# save d2
9037	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
9038	mov.l		%d1,%d2			# make a copy
9039	andi.l		&0x7fff,%d1		# strip sign
9040	andi.w		&0x8000,%d2		# keep old sign
9041	sub.l		%d0,%d1			# add scale factoer
9042	addi.l		&0x6000,%d1		# add bias
9043	andi.w		&0x7fff,%d1
9044	or.w		%d2,%d1			# concat old sign,new exp
9045	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
9046	mov.l		(%sp)+,%d2		# restore d2
9047	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9048	bra.w		fdiv_unfl_dis
9049
9050fdiv_unfl_ena_sd:
9051	mov.l		L_SCR3(%a6),%d1
9052	andi.b		&0x30,%d1		# use only rnd mode
9053	fmov.l		%d1,%fpcr		# set FPCR
9054
9055	bra.b		fdiv_unfl_ena_cont
9056
9057#
9058# the divide operation MAY underflow:
9059#
9060fdiv_may_unfl:
9061	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
9062
9063	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9064	fmov.l		&0x0,%fpsr		# clear FPSR
9065
9066	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
9067
9068	fmov.l		%fpsr,%d1		# save status
9069	fmov.l		&0x0,%fpcr		# clear FPCR
9070
9071	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9072
9073	fabs.x		%fp0,%fp1		# make a copy of result
9074	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
9075	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
9076	fblt.w		fdiv_unfl		# yes; underflow occurred
9077
9078#
9079# we still don't know if underflow occurred. result is ~ equal to 1. but,
9080# we don't know if the result was an underflow that rounded up to a 1
9081# or a normalized number that rounded down to a 1. so, redo the entire
9082# operation using RZ as the rounding mode to see what the pre-rounded
9083# result is. this case should be relatively rare.
9084#
9085	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
9086
9087	mov.l		L_SCR3(%a6),%d1
9088	andi.b		&0xc0,%d1		# keep rnd prec
9089	ori.b		&rz_mode*0x10,%d1	# insert RZ
9090
9091	fmov.l		%d1,%fpcr		# set FPCR
9092	fmov.l		&0x0,%fpsr		# clear FPSR
9093
9094	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
9095
9096	fmov.l		&0x0,%fpcr		# clear FPCR
9097	fabs.x		%fp1			# make absolute value
9098	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
9099	fbge.w		fdiv_normal_exit	# no; no underflow occurred
9100	bra.w		fdiv_unfl		# yes; underflow occurred
9101
9102############################################################################
9103
9104#
9105# Divide: inputs are not both normalized; what are they?
9106#
9107fdiv_not_norm:
9108	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9109	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
9110
9111	swbeg		&48
9112tbl_fdiv_op:
9113	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
9114	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
9115	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
9116	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
9117	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
9118	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
9119	short		tbl_fdiv_op	- tbl_fdiv_op #
9120	short		tbl_fdiv_op	- tbl_fdiv_op #
9121
9122	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
9123	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
9124	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
9125	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
9126	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
9127	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
9128	short		tbl_fdiv_op	- tbl_fdiv_op #
9129	short		tbl_fdiv_op	- tbl_fdiv_op #
9130
9131	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
9132	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
9133	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
9134	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
9135	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
9136	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
9137	short		tbl_fdiv_op	- tbl_fdiv_op #
9138	short		tbl_fdiv_op	- tbl_fdiv_op #
9139
9140	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
9141	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
9142	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
9143	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
9144	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
9145	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
9146	short		tbl_fdiv_op	- tbl_fdiv_op #
9147	short		tbl_fdiv_op	- tbl_fdiv_op #
9148
9149	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
9150	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
9151	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
9152	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
9153	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
9154	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
9155	short		tbl_fdiv_op	- tbl_fdiv_op #
9156	short		tbl_fdiv_op	- tbl_fdiv_op #
9157
9158	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
9159	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
9160	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
9161	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
9162	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
9163	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
9164	short		tbl_fdiv_op	- tbl_fdiv_op #
9165	short		tbl_fdiv_op	- tbl_fdiv_op #
9166
9167fdiv_res_qnan:
9168	bra.l		res_qnan
9169fdiv_res_snan:
9170	bra.l		res_snan
9171fdiv_res_operr:
9172	bra.l		res_operr
9173
9174	global		fdiv_zero_load		# global for fsgldiv
9175fdiv_zero_load:
9176	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
9177	mov.b		DST_EX(%a1),%d1		# or of input signs.
9178	eor.b		%d0,%d1
9179	bpl.b		fdiv_zero_load_p	# result is positive
9180	fmov.s		&0x80000000,%fp0	# load a -ZERO
9181	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
9182	rts
9183fdiv_zero_load_p:
9184	fmov.s		&0x00000000,%fp0	# load a +ZERO
9185	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
9186	rts
9187
9188#
9189# The destination was In Range and the source was a ZERO. The result,
9190# therefore, is an INF w/ the proper sign.
9191# So, determine the sign and return a new INF (w/ the j-bit cleared).
9192#
9193	global		fdiv_inf_load		# global for fsgldiv
9194fdiv_inf_load:
9195	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9196	mov.b		SRC_EX(%a0),%d0		# load both signs
9197	mov.b		DST_EX(%a1),%d1
9198	eor.b		%d0,%d1
9199	bpl.b		fdiv_inf_load_p		# result is positive
9200	fmov.s		&0xff800000,%fp0	# make result -INF
9201	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9202	rts
9203fdiv_inf_load_p:
9204	fmov.s		&0x7f800000,%fp0	# make result +INF
9205	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
9206	rts
9207
9208#
9209# The destination was an INF w/ an In Range or ZERO source, the result is
9210# an INF w/ the proper sign.
9211# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9212# dst INF is set, then then j-bit of the result INF is also set).
9213#
9214	global		fdiv_inf_dst		# global for fsgldiv
9215fdiv_inf_dst:
9216	mov.b		DST_EX(%a1),%d0		# load both signs
9217	mov.b		SRC_EX(%a0),%d1
9218	eor.b		%d0,%d1
9219	bpl.b		fdiv_inf_dst_p		# result is positive
9220
9221	fmovm.x		DST(%a1),&0x80		# return result in fp0
9222	fabs.x		%fp0			# clear sign bit
9223	fneg.x		%fp0			# set sign bit
9224	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9225	rts
9226
9227fdiv_inf_dst_p:
9228	fmovm.x		DST(%a1),&0x80		# return result in fp0
9229	fabs.x		%fp0			# return positive INF
9230	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
9231	rts
9232
9233#########################################################################
9234# XDEF ****************************************************************	#
9235#	fneg(): emulates the fneg instruction				#
9236#	fsneg(): emulates the fsneg instruction				#
9237#	fdneg(): emulates the fdneg instruction				#
9238#									#
9239# XREF ****************************************************************	#
9240# 	norm() - normalize a denorm to provide EXOP			#
9241#	scale_to_zero_src() - scale sgl/dbl source exponent		#
9242#	ovf_res() - return default overflow result			#
9243#	unf_res() - return default underflow result			#
9244# 	res_qnan_1op() - return QNAN result				#
9245#	res_snan_1op() - return SNAN result				#
9246#									#
9247# INPUT ***************************************************************	#
9248#	a0 = pointer to extended precision source operand		#
9249#	d0 = rnd prec,mode						#
9250#									#
9251# OUTPUT **************************************************************	#
9252#	fp0 = result							#
9253#	fp1 = EXOP (if exception occurred)				#
9254#									#
9255# ALGORITHM ***********************************************************	#
9256#	Handle NANs, zeroes, and infinities as special cases. Separate	#
9257# norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
9258# emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
9259# and an actual fneg performed to see if overflow/underflow would have	#
9260# occurred. If so, return default underflow/overflow result. Else,	#
9261# scale the result exponent and return result. FPSR gets set based on	#
9262# the result value.							#
9263#									#
9264#########################################################################
9265
9266	global		fsneg
9267fsneg:
9268	andi.b		&0x30,%d0		# clear rnd prec
9269	ori.b		&s_mode*0x10,%d0	# insert sgl precision
9270	bra.b		fneg
9271
9272	global		fdneg
9273fdneg:
9274	andi.b		&0x30,%d0		# clear rnd prec
9275	ori.b		&d_mode*0x10,%d0	# insert dbl prec
9276
9277	global		fneg
9278fneg:
9279	mov.l		%d0,L_SCR3(%a6)		# store rnd info
9280	mov.b		STAG(%a6),%d1
9281	bne.w		fneg_not_norm		# optimize on non-norm input
9282
9283#
9284# NEGATE SIGN : norms and denorms ONLY!
9285#
9286fneg_norm:
9287	andi.b		&0xc0,%d0		# is precision extended?
9288	bne.w		fneg_not_ext		# no; go handle sgl or dbl
9289
9290#
9291# precision selected is extended. so...we can not get an underflow
9292# or overflow because of rounding to the correct precision. so...
9293# skip the scaling and unscaling...
9294#
9295	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9296	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9297	mov.w		SRC_EX(%a0),%d0
9298	eori.w		&0x8000,%d0		# negate sign
9299	bpl.b		fneg_norm_load		# sign is positive
9300	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9301fneg_norm_load:
9302	mov.w		%d0,FP_SCR0_EX(%a6)
9303	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9304	rts
9305
9306#
9307# for an extended precision DENORM, the UNFL exception bit is set
9308# the accrued bit is NOT set in this instance(no inexactness!)
9309#
9310fneg_denorm:
9311	andi.b		&0xc0,%d0		# is precision extended?
9312	bne.b		fneg_not_ext		# no; go handle sgl or dbl
9313
9314	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9315
9316	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9317	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9318	mov.w		SRC_EX(%a0),%d0
9319	eori.w		&0x8000,%d0		# negate sign
9320	bpl.b		fneg_denorm_done	# no
9321	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
9322fneg_denorm_done:
9323	mov.w		%d0,FP_SCR0_EX(%a6)
9324	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9325
9326	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9327	bne.b		fneg_ext_unfl_ena	# yes
9328	rts
9329
9330#
9331# the input is an extended DENORM and underflow is enabled in the FPCR.
9332# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9333# exponent and insert back into the operand.
9334#
9335fneg_ext_unfl_ena:
9336	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
9337	bsr.l		norm			# normalize result
9338	neg.w		%d0			# new exponent = -(shft val)
9339	addi.w		&0x6000,%d0		# add new bias to exponent
9340	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
9341	andi.w		&0x8000,%d1	 	# keep old sign
9342	andi.w		&0x7fff,%d0		# clear sign position
9343	or.w		%d1,%d0			# concat old sign, new exponent
9344	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
9345	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9346	rts
9347
9348#
9349# operand is either single or double
9350#
9351fneg_not_ext:
9352	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
9353	bne.b		fneg_dbl
9354
9355#
9356# operand is to be rounded to single precision
9357#
9358fneg_sgl:
9359	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9360	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9361	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9362	bsr.l		scale_to_zero_src	# calculate scale factor
9363
9364	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
9365	bge.w		fneg_sd_unfl		# yes; go handle underflow
9366	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
9367	beq.w		fneg_sd_may_ovfl	# maybe; go check
9368	blt.w		fneg_sd_ovfl		# yes; go handle overflow
9369
9370#
9371# operand will NOT overflow or underflow when moved in to the fp reg file
9372#
9373fneg_sd_normal:
9374	fmov.l		&0x0,%fpsr		# clear FPSR
9375	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9376
9377	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9378
9379	fmov.l		%fpsr,%d1		# save FPSR
9380	fmov.l		&0x0,%fpcr		# clear FPCR
9381
9382	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9383
9384fneg_sd_normal_exit:
9385	mov.l		%d2,-(%sp)		# save d2
9386	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
9387	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
9388	mov.w		%d1,%d2			# make a copy
9389	andi.l		&0x7fff,%d1		# strip sign
9390	sub.l		%d0,%d1			# add scale factor
9391	andi.w		&0x8000,%d2		# keep old sign
9392	or.w		%d1,%d2			# concat old sign,new exp
9393	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
9394	mov.l		(%sp)+,%d2		# restore d2
9395	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9396	rts
9397
9398#
9399# operand is to be rounded to double precision
9400#
9401fneg_dbl:
9402	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9403	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9404	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9405	bsr.l		scale_to_zero_src	# calculate scale factor
9406
9407	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
9408	bge.b		fneg_sd_unfl		# yes; go handle underflow
9409	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
9410	beq.w		fneg_sd_may_ovfl	# maybe; go check
9411	blt.w		fneg_sd_ovfl		# yes; go handle overflow
9412	bra.w		fneg_sd_normal		# no; ho handle normalized op
9413
9414#
9415# operand WILL underflow when moved in to the fp register file
9416#
9417fneg_sd_unfl:
9418	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9419
9420	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
9421	bpl.b		fneg_sd_unfl_tst
9422	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
9423
9424# if underflow or inexact is enabled, go calculate EXOP first.
9425fneg_sd_unfl_tst:
9426	mov.b		FPCR_ENABLE(%a6),%d1
9427	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
9428	bne.b		fneg_sd_unfl_ena	# yes
9429
9430fneg_sd_unfl_dis:
9431	lea		FP_SCR0(%a6),%a0	# pass: result addr
9432	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
9433	bsr.l		unf_res			# calculate default result
9434	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
9435	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9436	rts
9437
9438#
9439# operand will underflow AND underflow is enabled.
9440# therefore, we must return the result rounded to extended precision.
9441#
9442fneg_sd_unfl_ena:
9443	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9444	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9445	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
9446
9447	mov.l		%d2,-(%sp)		# save d2
9448	mov.l		%d1,%d2			# make a copy
9449	andi.l		&0x7fff,%d1		# strip sign
9450	andi.w		&0x8000,%d2		# keep old sign
9451	sub.l		%d0,%d1			# subtract scale factor
9452	addi.l		&0x6000,%d1		# add new bias
9453	andi.w		&0x7fff,%d1
9454	or.w		%d2,%d1			# concat new sign,new exp
9455	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
9456	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
9457	mov.l		(%sp)+,%d2		# restore d2
9458	bra.b		fneg_sd_unfl_dis
9459
9460#
9461# operand WILL overflow.
9462#
9463fneg_sd_ovfl:
9464	fmov.l		&0x0,%fpsr		# clear FPSR
9465	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9466
9467	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9468
9469	fmov.l		&0x0,%fpcr		# clear FPCR
9470	fmov.l		%fpsr,%d1		# save FPSR
9471
9472	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9473
9474fneg_sd_ovfl_tst:
9475	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9476
9477	mov.b		FPCR_ENABLE(%a6),%d1
9478	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
9479	bne.b		fneg_sd_ovfl_ena	# yes
9480
9481#
9482# OVFL is not enabled; therefore, we must create the default result by
9483# calling ovf_res().
9484#
9485fneg_sd_ovfl_dis:
9486	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
9487	sne		%d1			# set sign param accordingly
9488	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
9489	bsr.l		ovf_res			# calculate default result
9490	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
9491	fmovm.x		(%a0),&0x80		# return default result in fp0
9492	rts
9493
9494#
9495# OVFL is enabled.
9496# the INEX2 bit has already been updated by the round to the correct precision.
9497# now, round to extended(and don't alter the FPSR).
9498#
9499fneg_sd_ovfl_ena:
9500	mov.l		%d2,-(%sp)		# save d2
9501	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
9502	mov.l		%d1,%d2			# make a copy
9503	andi.l		&0x7fff,%d1		# strip sign
9504	andi.w		&0x8000,%d2		# keep old sign
9505	sub.l		%d0,%d1			# add scale factor
9506	subi.l		&0x6000,%d1		# subtract bias
9507	andi.w		&0x7fff,%d1
9508	or.w		%d2,%d1			# concat sign,exp
9509	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
9510	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9511	mov.l		(%sp)+,%d2		# restore d2
9512	bra.b		fneg_sd_ovfl_dis
9513
9514#
9515# the move in MAY underflow. so...
9516#
9517fneg_sd_may_ovfl:
9518	fmov.l		&0x0,%fpsr		# clear FPSR
9519	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9520
9521	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9522
9523	fmov.l		%fpsr,%d1		# save status
9524	fmov.l		&0x0,%fpcr		# clear FPCR
9525
9526	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9527
9528	fabs.x		%fp0,%fp1		# make a copy of result
9529	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
9530	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
9531
9532# no, it didn't overflow; we have correct result
9533	bra.w		fneg_sd_normal_exit
9534
9535##########################################################################
9536
9537#
9538# input is not normalized; what is it?
9539#
9540fneg_not_norm:
9541	cmpi.b		%d1,&DENORM		# weed out DENORM
9542	beq.w		fneg_denorm
9543	cmpi.b		%d1,&SNAN		# weed out SNAN
9544	beq.l		res_snan_1op
9545	cmpi.b		%d1,&QNAN		# weed out QNAN
9546	beq.l		res_qnan_1op
9547
9548#
9549# do the fneg; at this point, only possible ops are ZERO and INF.
9550# use fneg to determine ccodes.
9551# prec:mode should be zero at this point but it won't affect answer anyways.
9552#
9553	fneg.x		SRC_EX(%a0),%fp0	# do fneg
9554	fmov.l		%fpsr,%d0
9555	rol.l		&0x8,%d0		# put ccodes in lo byte
9556	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
9557	rts
9558
9559#########################################################################
9560# XDEF ****************************************************************	#
9561# 	ftst(): emulates the ftest instruction				#
9562#									#
9563# XREF ****************************************************************	#
9564# 	res{s,q}nan_1op() - set NAN result for monadic instruction	#
9565#									#
9566# INPUT ***************************************************************	#
9567# 	a0 = pointer to extended precision source operand		#
9568#									#
9569# OUTPUT **************************************************************	#
9570#	none								#
9571#									#
9572# ALGORITHM ***********************************************************	#
9573# 	Check the source operand tag (STAG) and set the FPCR according	#
9574# to the operand type and sign.						#
9575#									#
9576#########################################################################
9577
9578	global		ftst
9579ftst:
9580	mov.b		STAG(%a6),%d1
9581	bne.b		ftst_not_norm		# optimize on non-norm input
9582
9583#
9584# Norm:
9585#
9586ftst_norm:
9587	tst.b		SRC_EX(%a0)		# is operand negative?
9588	bmi.b		ftst_norm_m		# yes
9589	rts
9590ftst_norm_m:
9591	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9592	rts
9593
9594#
9595# input is not normalized; what is it?
9596#
9597ftst_not_norm:
9598	cmpi.b		%d1,&ZERO		# weed out ZERO
9599	beq.b		ftst_zero
9600	cmpi.b		%d1,&INF		# weed out INF
9601	beq.b		ftst_inf
9602	cmpi.b		%d1,&SNAN		# weed out SNAN
9603	beq.l		res_snan_1op
9604	cmpi.b		%d1,&QNAN		# weed out QNAN
9605	beq.l		res_qnan_1op
9606
9607#
9608# Denorm:
9609#
9610ftst_denorm:
9611	tst.b		SRC_EX(%a0)		# is operand negative?
9612	bmi.b		ftst_denorm_m		# yes
9613	rts
9614ftst_denorm_m:
9615	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9616	rts
9617
9618#
9619# Infinity:
9620#
9621ftst_inf:
9622	tst.b		SRC_EX(%a0)		# is operand negative?
9623	bmi.b		ftst_inf_m		# yes
9624ftst_inf_p:
9625	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9626	rts
9627ftst_inf_m:
9628	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9629	rts
9630
9631#
9632# Zero:
9633#
9634ftst_zero:
9635	tst.b		SRC_EX(%a0)		# is operand negative?
9636	bmi.b		ftst_zero_m		# yes
9637ftst_zero_p:
9638	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9639	rts
9640ftst_zero_m:
9641	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
9642	rts
9643
9644#########################################################################
9645# XDEF ****************************************************************	#
9646#	fint(): emulates the fint instruction				#
9647#									#
9648# XREF ****************************************************************	#
9649#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9650#									#
9651# INPUT ***************************************************************	#
9652#	a0 = pointer to extended precision source operand		#
9653#	d0 = round precision/mode					#
9654#									#
9655# OUTPUT **************************************************************	#
9656#	fp0 = result							#
9657#									#
9658# ALGORITHM ***********************************************************	#
9659# 	Separate according to operand type. Unnorms don't pass through 	#
9660# here. For norms, load the rounding mode/prec, execute a "fint", then 	#
9661# store the resulting FPSR bits.					#
9662# 	For denorms, force the j-bit to a one and do the same as for	#
9663# norms. Denorms are so low that the answer will either be a zero or a 	#
9664# one.									#
9665# 	For zeroes/infs/NANs, return the same while setting the FPSR	#
9666# as appropriate.							#
9667#									#
9668#########################################################################
9669
9670	global		fint
9671fint:
9672	mov.b		STAG(%a6),%d1
9673	bne.b		fint_not_norm		# optimize on non-norm input
9674
9675#
9676# Norm:
9677#
9678fint_norm:
9679	andi.b		&0x30,%d0		# set prec = ext
9680
9681	fmov.l		%d0,%fpcr		# set FPCR
9682	fmov.l		&0x0,%fpsr		# clear FPSR
9683
9684	fint.x 		SRC(%a0),%fp0		# execute fint
9685
9686	fmov.l		&0x0,%fpcr		# clear FPCR
9687	fmov.l		%fpsr,%d0		# save FPSR
9688	or.l		%d0,USER_FPSR(%a6)	# set exception bits
9689
9690	rts
9691
9692#
9693# input is not normalized; what is it?
9694#
9695fint_not_norm:
9696	cmpi.b		%d1,&ZERO		# weed out ZERO
9697	beq.b		fint_zero
9698	cmpi.b		%d1,&INF		# weed out INF
9699	beq.b		fint_inf
9700	cmpi.b		%d1,&DENORM		# weed out DENORM
9701	beq.b		fint_denorm
9702	cmpi.b		%d1,&SNAN		# weed out SNAN
9703	beq.l		res_snan_1op
9704	bra.l		res_qnan_1op		# weed out QNAN
9705
9706#
9707# Denorm:
9708#
9709# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9710# also, the INEX2 and AINEX exception bits will be set.
9711# so, we could either set these manually or force the DENORM
9712# to a very small NORM and ship it to the NORM routine.
9713# I do the latter.
9714#
9715fint_denorm:
9716	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9717	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
9718	lea		FP_SCR0(%a6),%a0
9719	bra.b		fint_norm
9720
9721#
9722# Zero:
9723#
9724fint_zero:
9725	tst.b		SRC_EX(%a0)		# is ZERO negative?
9726	bmi.b		fint_zero_m		# yes
9727fint_zero_p:
9728	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
9729	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
9730	rts
9731fint_zero_m:
9732	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
9733	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9734	rts
9735
9736#
9737# Infinity:
9738#
9739fint_inf:
9740	fmovm.x		SRC(%a0),&0x80		# return result in fp0
9741	tst.b		SRC_EX(%a0)		# is INF negative?
9742	bmi.b		fint_inf_m		# yes
9743fint_inf_p:
9744	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9745	rts
9746fint_inf_m:
9747	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9748	rts
9749
9750#########################################################################
9751# XDEF ****************************************************************	#
9752#	fintrz(): emulates the fintrz instruction			#
9753#									#
9754# XREF ****************************************************************	#
9755#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9756#									#
9757# INPUT ***************************************************************	#
9758#	a0 = pointer to extended precision source operand		#
9759#	d0 = round precision/mode					#
9760#									#
9761# OUTPUT **************************************************************	#
9762# 	fp0 = result							#
9763#									#
9764# ALGORITHM ***********************************************************	#
9765#	Separate according to operand type. Unnorms don't pass through	#
9766# here. For norms, load the rounding mode/prec, execute a "fintrz", 	#
9767# then store the resulting FPSR bits.					#
9768# 	For denorms, force the j-bit to a one and do the same as for	#
9769# norms. Denorms are so low that the answer will either be a zero or a	#
9770# one.									#
9771# 	For zeroes/infs/NANs, return the same while setting the FPSR	#
9772# as appropriate.							#
9773#									#
9774#########################################################################
9775
9776	global		fintrz
9777fintrz:
9778	mov.b		STAG(%a6),%d1
9779	bne.b		fintrz_not_norm		# optimize on non-norm input
9780
9781#
9782# Norm:
9783#
9784fintrz_norm:
9785	fmov.l		&0x0,%fpsr		# clear FPSR
9786
9787	fintrz.x	SRC(%a0),%fp0		# execute fintrz
9788
9789	fmov.l		%fpsr,%d0		# save FPSR
9790	or.l		%d0,USER_FPSR(%a6)	# set exception bits
9791
9792	rts
9793
9794#
9795# input is not normalized; what is it?
9796#
9797fintrz_not_norm:
9798	cmpi.b		%d1,&ZERO		# weed out ZERO
9799	beq.b		fintrz_zero
9800	cmpi.b		%d1,&INF		# weed out INF
9801	beq.b		fintrz_inf
9802	cmpi.b		%d1,&DENORM		# weed out DENORM
9803	beq.b		fintrz_denorm
9804	cmpi.b		%d1,&SNAN		# weed out SNAN
9805	beq.l		res_snan_1op
9806	bra.l		res_qnan_1op		# weed out QNAN
9807
9808#
9809# Denorm:
9810#
9811# for DENORMs, the result will be (+/-)ZERO.
9812# also, the INEX2 and AINEX exception bits will be set.
9813# so, we could either set these manually or force the DENORM
9814# to a very small NORM and ship it to the NORM routine.
9815# I do the latter.
9816#
9817fintrz_denorm:
9818	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9819	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
9820	lea		FP_SCR0(%a6),%a0
9821	bra.b		fintrz_norm
9822
9823#
9824# Zero:
9825#
9826fintrz_zero:
9827	tst.b		SRC_EX(%a0)		# is ZERO negative?
9828	bmi.b		fintrz_zero_m		# yes
9829fintrz_zero_p:
9830	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
9831	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
9832	rts
9833fintrz_zero_m:
9834	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
9835	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9836	rts
9837
9838#
9839# Infinity:
9840#
9841fintrz_inf:
9842	fmovm.x		SRC(%a0),&0x80		# return result in fp0
9843	tst.b		SRC_EX(%a0)		# is INF negative?
9844	bmi.b		fintrz_inf_m		# yes
9845fintrz_inf_p:
9846	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9847	rts
9848fintrz_inf_m:
9849	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9850	rts
9851
9852#########################################################################
9853# XDEF ****************************************************************	#
9854#	fabs():  emulates the fabs instruction				#
9855#	fsabs(): emulates the fsabs instruction				#
9856#	fdabs(): emulates the fdabs instruction				#
9857#									#
9858# XREF **************************************************************** #
9859#	norm() - normalize denorm mantissa to provide EXOP		#
9860#	scale_to_zero_src() - make exponent. = 0; get scale factor	#
9861#	unf_res() - calculate underflow result				#
9862#	ovf_res() - calculate overflow result				#
9863#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9864#									#
9865# INPUT *************************************************************** #
9866#	a0 = pointer to extended precision source operand		#
9867#	d0 = rnd precision/mode						#
9868#									#
9869# OUTPUT ************************************************************** #
9870#	fp0 = result							#
9871#	fp1 = EXOP (if exception occurred)				#
9872#									#
9873# ALGORITHM ***********************************************************	#
9874#	Handle NANs, infinities, and zeroes as special cases. Divide	#
9875# norms into extended, single, and double precision. 			#
9876# 	Simply clear sign for extended precision norm. Ext prec denorm	#
9877# gets an EXOP created for it since it's an underflow.			#
9878#	Double and single precision can overflow and underflow. First,	#
9879# scale the operand such that the exponent is zero. Perform an "fabs"	#
9880# using the correct rnd mode/prec. Check to see if the original 	#
9881# exponent would take an exception. If so, use unf_res() or ovf_res()	#
9882# to calculate the default result. Also, create the EXOP for the	#
9883# exceptional case. If no exception should occur, insert the correct 	#
9884# result exponent and return.						#
9885# 	Unnorms don't pass through here.				#
9886#									#
9887#########################################################################
9888
9889	global		fsabs
9890fsabs:
9891	andi.b		&0x30,%d0		# clear rnd prec
9892	ori.b		&s_mode*0x10,%d0	# insert sgl precision
9893	bra.b		fabs
9894
9895	global		fdabs
9896fdabs:
9897	andi.b		&0x30,%d0		# clear rnd prec
9898	ori.b		&d_mode*0x10,%d0	# insert dbl precision
9899
9900	global		fabs
9901fabs:
9902	mov.l		%d0,L_SCR3(%a6)		# store rnd info
9903	mov.b		STAG(%a6),%d1
9904	bne.w		fabs_not_norm		# optimize on non-norm input
9905
9906#
9907# ABSOLUTE VALUE: norms and denorms ONLY!
9908#
9909fabs_norm:
9910	andi.b		&0xc0,%d0		# is precision extended?
9911	bne.b		fabs_not_ext		# no; go handle sgl or dbl
9912
9913#
9914# precision selected is extended. so...we can not get an underflow
9915# or overflow because of rounding to the correct precision. so...
9916# skip the scaling and unscaling...
9917#
9918	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9919	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9920	mov.w		SRC_EX(%a0),%d1
9921	bclr		&15,%d1			# force absolute value
9922	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
9923	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9924	rts
9925
9926#
9927# for an extended precision DENORM, the UNFL exception bit is set
9928# the accrued bit is NOT set in this instance(no inexactness!)
9929#
9930fabs_denorm:
9931	andi.b		&0xc0,%d0		# is precision extended?
9932	bne.b		fabs_not_ext		# no
9933
9934	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9935
9936	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9937	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9938	mov.w		SRC_EX(%a0),%d0
9939	bclr		&15,%d0			# clear sign
9940	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
9941
9942	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9943
9944	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9945	bne.b		fabs_ext_unfl_ena
9946	rts
9947
9948#
9949# the input is an extended DENORM and underflow is enabled in the FPCR.
9950# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9951# exponent and insert back into the operand.
9952#
9953fabs_ext_unfl_ena:
9954	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
9955	bsr.l		norm			# normalize result
9956	neg.w		%d0			# new exponent = -(shft val)
9957	addi.w		&0x6000,%d0		# add new bias to exponent
9958	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
9959	andi.w		&0x8000,%d1		# keep old sign
9960	andi.w		&0x7fff,%d0		# clear sign position
9961	or.w		%d1,%d0			# concat old sign, new exponent
9962	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
9963	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9964	rts
9965
9966#
9967# operand is either single or double
9968#
9969fabs_not_ext:
9970	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
9971	bne.b		fabs_dbl
9972
9973#
9974# operand is to be rounded to single precision
9975#
9976fabs_sgl:
9977	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9978	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9979	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9980	bsr.l		scale_to_zero_src	# calculate scale factor
9981
9982	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
9983	bge.w		fabs_sd_unfl		# yes; go handle underflow
9984	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
9985	beq.w		fabs_sd_may_ovfl	# maybe; go check
9986	blt.w		fabs_sd_ovfl		# yes; go handle overflow
9987
9988#
9989# operand will NOT overflow or underflow when moved in to the fp reg file
9990#
9991fabs_sd_normal:
9992	fmov.l		&0x0,%fpsr		# clear FPSR
9993	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9994
9995	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
9996
9997	fmov.l		%fpsr,%d1		# save FPSR
9998	fmov.l		&0x0,%fpcr		# clear FPCR
9999
10000	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10001
10002fabs_sd_normal_exit:
10003	mov.l		%d2,-(%sp)		# save d2
10004	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10005	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
10006	mov.l		%d1,%d2			# make a copy
10007	andi.l		&0x7fff,%d1		# strip sign
10008	sub.l		%d0,%d1			# add scale factor
10009	andi.w		&0x8000,%d2		# keep old sign
10010	or.w		%d1,%d2			# concat old sign,new exp
10011	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
10012	mov.l		(%sp)+,%d2		# restore d2
10013	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10014	rts
10015
10016#
10017# operand is to be rounded to double precision
10018#
10019fabs_dbl:
10020	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10021	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10022	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10023	bsr.l		scale_to_zero_src	# calculate scale factor
10024
10025	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
10026	bge.b		fabs_sd_unfl		# yes; go handle underflow
10027	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
10028	beq.w		fabs_sd_may_ovfl	# maybe; go check
10029	blt.w		fabs_sd_ovfl		# yes; go handle overflow
10030	bra.w		fabs_sd_normal		# no; ho handle normalized op
10031
10032#
10033# operand WILL underflow when moved in to the fp register file
10034#
10035fabs_sd_unfl:
10036	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10037
10038	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
10039
10040# if underflow or inexact is enabled, go calculate EXOP first.
10041	mov.b		FPCR_ENABLE(%a6),%d1
10042	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10043	bne.b		fabs_sd_unfl_ena	# yes
10044
10045fabs_sd_unfl_dis:
10046	lea		FP_SCR0(%a6),%a0	# pass: result addr
10047	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10048	bsr.l		unf_res			# calculate default result
10049	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
10050	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10051	rts
10052
10053#
10054# operand will underflow AND underflow is enabled.
10055# therefore, we must return the result rounded to extended precision.
10056#
10057fabs_sd_unfl_ena:
10058	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10059	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10060	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
10061
10062	mov.l		%d2,-(%sp)		# save d2
10063	mov.l		%d1,%d2			# make a copy
10064	andi.l		&0x7fff,%d1		# strip sign
10065	andi.w		&0x8000,%d2		# keep old sign
10066	sub.l		%d0,%d1			# subtract scale factor
10067	addi.l		&0x6000,%d1		# add new bias
10068	andi.w		&0x7fff,%d1
10069	or.w		%d2,%d1			# concat new sign,new exp
10070	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
10071	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
10072	mov.l		(%sp)+,%d2		# restore d2
10073	bra.b		fabs_sd_unfl_dis
10074
10075#
10076# operand WILL overflow.
10077#
10078fabs_sd_ovfl:
10079	fmov.l		&0x0,%fpsr		# clear FPSR
10080	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10081
10082	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
10083
10084	fmov.l		&0x0,%fpcr		# clear FPCR
10085	fmov.l		%fpsr,%d1		# save FPSR
10086
10087	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10088
10089fabs_sd_ovfl_tst:
10090	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10091
10092	mov.b		FPCR_ENABLE(%a6),%d1
10093	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10094	bne.b		fabs_sd_ovfl_ena	# yes
10095
10096#
10097# OVFL is not enabled; therefore, we must create the default result by
10098# calling ovf_res().
10099#
10100fabs_sd_ovfl_dis:
10101	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
10102	sne		%d1			# set sign param accordingly
10103	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
10104	bsr.l		ovf_res			# calculate default result
10105	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
10106	fmovm.x		(%a0),&0x80		# return default result in fp0
10107	rts
10108
10109#
10110# OVFL is enabled.
10111# the INEX2 bit has already been updated by the round to the correct precision.
10112# now, round to extended(and don't alter the FPSR).
10113#
10114fabs_sd_ovfl_ena:
10115	mov.l		%d2,-(%sp)		# save d2
10116	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10117	mov.l		%d1,%d2			# make a copy
10118	andi.l		&0x7fff,%d1		# strip sign
10119	andi.w		&0x8000,%d2		# keep old sign
10120	sub.l		%d0,%d1			# add scale factor
10121	subi.l		&0x6000,%d1		# subtract bias
10122	andi.w		&0x7fff,%d1
10123	or.w		%d2,%d1			# concat sign,exp
10124	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10125	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10126	mov.l		(%sp)+,%d2		# restore d2
10127	bra.b		fabs_sd_ovfl_dis
10128
10129#
10130# the move in MAY underflow. so...
10131#
10132fabs_sd_may_ovfl:
10133	fmov.l		&0x0,%fpsr		# clear FPSR
10134	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10135
10136	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
10137
10138	fmov.l		%fpsr,%d1		# save status
10139	fmov.l		&0x0,%fpcr		# clear FPCR
10140
10141	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10142
10143	fabs.x		%fp0,%fp1		# make a copy of result
10144	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
10145	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
10146
10147# no, it didn't overflow; we have correct result
10148	bra.w		fabs_sd_normal_exit
10149
10150##########################################################################
10151
10152#
10153# input is not normalized; what is it?
10154#
10155fabs_not_norm:
10156	cmpi.b		%d1,&DENORM		# weed out DENORM
10157	beq.w		fabs_denorm
10158	cmpi.b		%d1,&SNAN		# weed out SNAN
10159	beq.l		res_snan_1op
10160	cmpi.b		%d1,&QNAN		# weed out QNAN
10161	beq.l		res_qnan_1op
10162
10163	fabs.x		SRC(%a0),%fp0		# force absolute value
10164
10165	cmpi.b		%d1,&INF		# weed out INF
10166	beq.b		fabs_inf
10167fabs_zero:
10168	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10169	rts
10170fabs_inf:
10171	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
10172	rts
10173
10174#########################################################################
10175# XDEF ****************************************************************	#
10176# 	fcmp(): fp compare op routine					#
10177#									#
10178# XREF ****************************************************************	#
10179# 	res_qnan() - return QNAN result					#
10180#	res_snan() - return SNAN result					#
10181#									#
10182# INPUT ***************************************************************	#
10183#	a0 = pointer to extended precision source operand		#
10184#	a1 = pointer to extended precision destination operand		#
10185#	d0 = round prec/mode						#
10186#									#
10187# OUTPUT ************************************************************** #
10188#	None								#
10189#									#
10190# ALGORITHM ***********************************************************	#
10191# 	Handle NANs and denorms as special cases. For everything else,	#
10192# just use the actual fcmp instruction to produce the correct condition	#
10193# codes.								#
10194#									#
10195#########################################################################
10196
10197	global		fcmp
10198fcmp:
10199	clr.w		%d1
10200	mov.b		DTAG(%a6),%d1
10201	lsl.b		&0x3,%d1
10202	or.b		STAG(%a6),%d1
10203	bne.b		fcmp_not_norm		# optimize on non-norm input
10204
10205#
10206# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10207#
10208fcmp_norm:
10209	fmovm.x		DST(%a1),&0x80		# load dst op
10210
10211	fcmp.x 		%fp0,SRC(%a0)		# do compare
10212
10213	fmov.l		%fpsr,%d0		# save FPSR
10214	rol.l		&0x8,%d0		# extract ccode bits
10215	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
10216
10217	rts
10218
10219#
10220# fcmp: inputs are not both normalized; what are they?
10221#
10222fcmp_not_norm:
10223	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10224	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
10225
10226	swbeg		&48
10227tbl_fcmp_op:
10228	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
10229	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
10230	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
10231	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
10232	short		fcmp_nrm_dnrm 	- tbl_fcmp_op # NORM - DENORM
10233	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
10234	short		tbl_fcmp_op	- tbl_fcmp_op #
10235	short		tbl_fcmp_op	- tbl_fcmp_op #
10236
10237	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
10238	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
10239	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
10240	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
10241	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
10242	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
10243	short		tbl_fcmp_op	- tbl_fcmp_op #
10244	short		tbl_fcmp_op	- tbl_fcmp_op #
10245
10246	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
10247	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
10248	short		fcmp_norm	- tbl_fcmp_op # INF - INF
10249	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
10250	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
10251	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
10252	short		tbl_fcmp_op	- tbl_fcmp_op #
10253	short		tbl_fcmp_op	- tbl_fcmp_op #
10254
10255	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
10256	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
10257	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
10258	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
10259	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
10260	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
10261	short		tbl_fcmp_op	- tbl_fcmp_op #
10262	short		tbl_fcmp_op	- tbl_fcmp_op #
10263
10264	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
10265	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
10266	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
10267	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
10268	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
10269	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
10270	short		tbl_fcmp_op	- tbl_fcmp_op #
10271	short		tbl_fcmp_op	- tbl_fcmp_op #
10272
10273	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
10274	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
10275	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
10276	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
10277	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
10278	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
10279	short		tbl_fcmp_op	- tbl_fcmp_op #
10280	short		tbl_fcmp_op	- tbl_fcmp_op #
10281
10282# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10283# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10284fcmp_res_qnan:
10285	bsr.l		res_qnan
10286	andi.b		&0xf7,FPSR_CC(%a6)
10287	rts
10288fcmp_res_snan:
10289	bsr.l		res_snan
10290	andi.b		&0xf7,FPSR_CC(%a6)
10291	rts
10292
10293#
10294# DENORMs are a little more difficult.
10295# If you have a 2 DENORMs, then you can just force the j-bit to a one
10296# and use the fcmp_norm routine.
10297# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10298# and use the fcmp_norm routine.
10299# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10300# But with a DENORM and a NORM of the same sign, the neg bit is set if the
10301# (1) signs are (+) and the DENORM is the dst or
10302# (2) signs are (-) and the DENORM is the src
10303#
10304
10305fcmp_dnrm_s:
10306	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10307	mov.l		SRC_HI(%a0),%d0
10308	bset		&31,%d0			# DENORM src; make into small norm
10309	mov.l		%d0,FP_SCR0_HI(%a6)
10310	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10311	lea		FP_SCR0(%a6),%a0
10312	bra.w		fcmp_norm
10313
10314fcmp_dnrm_d:
10315	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
10316	mov.l		DST_HI(%a1),%d0
10317	bset		&31,%d0			# DENORM src; make into small norm
10318	mov.l		%d0,FP_SCR0_HI(%a6)
10319	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
10320	lea		FP_SCR0(%a6),%a1
10321	bra.w		fcmp_norm
10322
10323fcmp_dnrm_sd:
10324	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10325	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10326	mov.l		DST_HI(%a1),%d0
10327	bset		&31,%d0			# DENORM dst; make into small norm
10328	mov.l		%d0,FP_SCR1_HI(%a6)
10329	mov.l		SRC_HI(%a0),%d0
10330	bset		&31,%d0			# DENORM dst; make into small norm
10331	mov.l		%d0,FP_SCR0_HI(%a6)
10332	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10333	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10334	lea		FP_SCR1(%a6),%a1
10335	lea		FP_SCR0(%a6),%a0
10336	bra.w		fcmp_norm
10337
10338fcmp_nrm_dnrm:
10339	mov.b		SRC_EX(%a0),%d0		# determine if like signs
10340	mov.b		DST_EX(%a1),%d1
10341	eor.b		%d0,%d1
10342	bmi.w		fcmp_dnrm_s
10343
10344# signs are the same, so must determine the answer ourselves.
10345	tst.b		%d0			# is src op negative?
10346	bmi.b		fcmp_nrm_dnrm_m		# yes
10347	rts
10348fcmp_nrm_dnrm_m:
10349	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10350	rts
10351
10352fcmp_dnrm_nrm:
10353	mov.b		SRC_EX(%a0),%d0		# determine if like signs
10354	mov.b		DST_EX(%a1),%d1
10355	eor.b		%d0,%d1
10356	bmi.w		fcmp_dnrm_d
10357
10358# signs are the same, so must determine the answer ourselves.
10359	tst.b		%d0			# is src op negative?
10360	bpl.b		fcmp_dnrm_nrm_m		# no
10361	rts
10362fcmp_dnrm_nrm_m:
10363	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10364	rts
10365
10366#########################################################################
10367# XDEF ****************************************************************	#
10368# 	fsglmul(): emulates the fsglmul instruction			#
10369#									#
10370# XREF ****************************************************************	#
10371#	scale_to_zero_src() - scale src exponent to zero		#
10372#	scale_to_zero_dst() - scale dst exponent to zero		#
10373#	unf_res4() - return default underflow result for sglop		#
10374#	ovf_res() - return default overflow result			#
10375# 	res_qnan() - return QNAN result					#
10376# 	res_snan() - return SNAN result					#
10377#									#
10378# INPUT ***************************************************************	#
10379#	a0 = pointer to extended precision source operand		#
10380#	a1 = pointer to extended precision destination operand		#
10381#	d0  rnd prec,mode						#
10382#									#
10383# OUTPUT **************************************************************	#
10384#	fp0 = result							#
10385#	fp1 = EXOP (if exception occurred)				#
10386#									#
10387# ALGORITHM ***********************************************************	#
10388#	Handle NANs, infinities, and zeroes as special cases. Divide	#
10389# norms/denorms into ext/sgl/dbl precision.				#
10390#	For norms/denorms, scale the exponents such that a multiply	#
10391# instruction won't cause an exception. Use the regular fsglmul to	#
10392# compute a result. Check if the regular operands would have taken	#
10393# an exception. If so, return the default overflow/underflow result	#
10394# and return the EXOP if exceptions are enabled. Else, scale the 	#
10395# result operand to the proper exponent.				#
10396#									#
10397#########################################################################
10398
10399	global		fsglmul
10400fsglmul:
10401	mov.l		%d0,L_SCR3(%a6)		# store rnd info
10402
10403	clr.w		%d1
10404	mov.b		DTAG(%a6),%d1
10405	lsl.b		&0x3,%d1
10406	or.b		STAG(%a6),%d1
10407
10408	bne.w		fsglmul_not_norm	# optimize on non-norm input
10409
10410fsglmul_norm:
10411	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10412	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
10413	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10414
10415	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10416	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10417	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10418
10419	bsr.l		scale_to_zero_src	# scale exponent
10420	mov.l		%d0,-(%sp)		# save scale factor 1
10421
10422	bsr.l		scale_to_zero_dst	# scale dst exponent
10423
10424	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
10425
10426	cmpi.l		%d0,&0x3fff-0x7ffe 	# would result ovfl?
10427	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
10428	blt.w		fsglmul_ovfl		# result will overflow
10429
10430	cmpi.l		%d0,&0x3fff+0x0001 	# would result unfl?
10431	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
10432	bgt.w		fsglmul_unfl		# result will underflow
10433
10434fsglmul_normal:
10435	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10436
10437	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10438	fmov.l		&0x0,%fpsr		# clear FPSR
10439
10440	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10441
10442	fmov.l		%fpsr,%d1		# save status
10443	fmov.l		&0x0,%fpcr		# clear FPCR
10444
10445	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10446
10447fsglmul_normal_exit:
10448	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10449	mov.l		%d2,-(%sp)		# save d2
10450	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
10451	mov.l		%d1,%d2			# make a copy
10452	andi.l		&0x7fff,%d1		# strip sign
10453	andi.w		&0x8000,%d2		# keep old sign
10454	sub.l		%d0,%d1			# add scale factor
10455	or.w		%d2,%d1			# concat old sign,new exp
10456	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10457	mov.l		(%sp)+,%d2		# restore d2
10458	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10459	rts
10460
10461fsglmul_ovfl:
10462	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10463
10464	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10465	fmov.l		&0x0,%fpsr		# clear FPSR
10466
10467	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10468
10469	fmov.l		%fpsr,%d1		# save status
10470	fmov.l		&0x0,%fpcr		# clear FPCR
10471
10472	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10473
10474fsglmul_ovfl_tst:
10475
10476# save setting this until now because this is where fsglmul_may_ovfl may jump in
10477	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10478
10479	mov.b		FPCR_ENABLE(%a6),%d1
10480	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10481	bne.b		fsglmul_ovfl_ena	# yes
10482
10483fsglmul_ovfl_dis:
10484	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
10485	sne		%d1			# set sign param accordingly
10486	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
10487	andi.b		&0x30,%d0		# force prec = ext
10488	bsr.l		ovf_res			# calculate default result
10489	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
10490	fmovm.x		(%a0),&0x80		# return default result in fp0
10491	rts
10492
10493fsglmul_ovfl_ena:
10494	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
10495
10496	mov.l		%d2,-(%sp)		# save d2
10497	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10498	mov.l		%d1,%d2			# make a copy
10499	andi.l		&0x7fff,%d1		# strip sign
10500	sub.l		%d0,%d1			# add scale factor
10501	subi.l		&0x6000,%d1		# subtract bias
10502	andi.w		&0x7fff,%d1
10503	andi.w		&0x8000,%d2		# keep old sign
10504	or.w		%d2,%d1			# concat old sign,new exp
10505	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10506	mov.l		(%sp)+,%d2		# restore d2
10507	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10508	bra.b		fsglmul_ovfl_dis
10509
10510fsglmul_may_ovfl:
10511	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10512
10513	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10514	fmov.l		&0x0,%fpsr		# clear FPSR
10515
10516	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10517
10518	fmov.l		%fpsr,%d1		# save status
10519	fmov.l		&0x0,%fpcr		# clear FPCR
10520
10521	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10522
10523	fabs.x		%fp0,%fp1		# make a copy of result
10524	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
10525	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
10526
10527# no, it didn't overflow; we have correct result
10528	bra.w		fsglmul_normal_exit
10529
10530fsglmul_unfl:
10531	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10532
10533	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10534
10535	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
10536	fmov.l		&0x0,%fpsr		# clear FPSR
10537
10538	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10539
10540	fmov.l		%fpsr,%d1		# save status
10541	fmov.l		&0x0,%fpcr		# clear FPCR
10542
10543	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10544
10545	mov.b		FPCR_ENABLE(%a6),%d1
10546	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10547	bne.b		fsglmul_unfl_ena	# yes
10548
10549fsglmul_unfl_dis:
10550	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10551
10552	lea		FP_SCR0(%a6),%a0	# pass: result addr
10553	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10554	bsr.l		unf_res4		# calculate default result
10555	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
10556	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10557	rts
10558
10559#
10560# UNFL is enabled.
10561#
10562fsglmul_unfl_ena:
10563	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
10564
10565	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10566	fmov.l		&0x0,%fpsr		# clear FPSR
10567
10568	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
10569
10570	fmov.l		&0x0,%fpcr		# clear FPCR
10571
10572	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
10573	mov.l		%d2,-(%sp)		# save d2
10574	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10575	mov.l		%d1,%d2			# make a copy
10576	andi.l		&0x7fff,%d1		# strip sign
10577	andi.w		&0x8000,%d2		# keep old sign
10578	sub.l		%d0,%d1			# add scale factor
10579	addi.l		&0x6000,%d1		# add bias
10580	andi.w		&0x7fff,%d1
10581	or.w		%d2,%d1			# concat old sign,new exp
10582	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10583	mov.l		(%sp)+,%d2		# restore d2
10584	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10585	bra.w		fsglmul_unfl_dis
10586
10587fsglmul_may_unfl:
10588	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10589
10590	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10591	fmov.l		&0x0,%fpsr		# clear FPSR
10592
10593	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10594
10595	fmov.l		%fpsr,%d1		# save status
10596	fmov.l		&0x0,%fpcr		# clear FPCR
10597
10598	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10599
10600	fabs.x		%fp0,%fp1		# make a copy of result
10601	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
10602	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
10603	fblt.w		fsglmul_unfl		# yes; underflow occurred
10604
10605#
10606# we still don't know if underflow occurred. result is ~ equal to 2. but,
10607# we don't know if the result was an underflow that rounded up to a 2 or
10608# a normalized number that rounded down to a 2. so, redo the entire operation
10609# using RZ as the rounding mode to see what the pre-rounded result is.
10610# this case should be relatively rare.
10611#
10612	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
10613
10614	mov.l		L_SCR3(%a6),%d1
10615	andi.b		&0xc0,%d1		# keep rnd prec
10616	ori.b		&rz_mode*0x10,%d1	# insert RZ
10617
10618	fmov.l		%d1,%fpcr		# set FPCR
10619	fmov.l		&0x0,%fpsr		# clear FPSR
10620
10621	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
10622
10623	fmov.l		&0x0,%fpcr		# clear FPCR
10624	fabs.x		%fp1			# make absolute value
10625	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
10626	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
10627	bra.w		fsglmul_unfl		# yes, underflow occurred
10628
10629##############################################################################
10630
10631#
10632# Single Precision Multiply: inputs are not both normalized; what are they?
10633#
10634fsglmul_not_norm:
10635	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10636	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
10637
10638	swbeg		&48
10639tbl_fsglmul_op:
10640	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
10641	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
10642	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
10643	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
10644	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
10645	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
10646	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10647	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10648
10649	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
10650	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
10651	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
10652	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
10653	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
10654	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
10655	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10656	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10657
10658	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
10659	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
10660	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
10661	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
10662	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
10663	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
10664	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10665	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10666
10667	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
10668	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
10669	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
10670	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
10671	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
10672	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
10673	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10674	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10675
10676	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
10677	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
10678	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
10679	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
10680	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
10681	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
10682	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10683	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10684
10685	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
10686	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
10687	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
10688	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
10689	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
10690	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
10691	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10692	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10693
10694fsglmul_res_operr:
10695	bra.l		res_operr
10696fsglmul_res_snan:
10697	bra.l		res_snan
10698fsglmul_res_qnan:
10699	bra.l		res_qnan
10700fsglmul_zero:
10701	bra.l		fmul_zero
10702fsglmul_inf_src:
10703	bra.l		fmul_inf_src
10704fsglmul_inf_dst:
10705	bra.l		fmul_inf_dst
10706
10707#########################################################################
10708# XDEF ****************************************************************	#
10709# 	fsgldiv(): emulates the fsgldiv instruction			#
10710#									#
10711# XREF ****************************************************************	#
10712#	scale_to_zero_src() - scale src exponent to zero		#
10713#	scale_to_zero_dst() - scale dst exponent to zero		#
10714#	unf_res4() - return default underflow result for sglop		#
10715#	ovf_res() - return default overflow result			#
10716# 	res_qnan() - return QNAN result					#
10717# 	res_snan() - return SNAN result					#
10718#									#
10719# INPUT ***************************************************************	#
10720#	a0 = pointer to extended precision source operand		#
10721#	a1 = pointer to extended precision destination operand		#
10722#	d0  rnd prec,mode						#
10723#									#
10724# OUTPUT **************************************************************	#
10725#	fp0 = result							#
10726#	fp1 = EXOP (if exception occurred)				#
10727#									#
10728# ALGORITHM ***********************************************************	#
10729#	Handle NANs, infinities, and zeroes as special cases. Divide	#
10730# norms/denorms into ext/sgl/dbl precision.				#
10731#	For norms/denorms, scale the exponents such that a divide	#
10732# instruction won't cause an exception. Use the regular fsgldiv to	#
10733# compute a result. Check if the regular operands would have taken	#
10734# an exception. If so, return the default overflow/underflow result	#
10735# and return the EXOP if exceptions are enabled. Else, scale the 	#
10736# result operand to the proper exponent.				#
10737#									#
10738#########################################################################
10739
10740	global		fsgldiv
10741fsgldiv:
10742	mov.l		%d0,L_SCR3(%a6)		# store rnd info
10743
10744	clr.w		%d1
10745	mov.b		DTAG(%a6),%d1
10746	lsl.b		&0x3,%d1
10747	or.b		STAG(%a6),%d1		# combine src tags
10748
10749	bne.w		fsgldiv_not_norm	# optimize on non-norm input
10750
10751#
10752# DIVIDE: NORMs and DENORMs ONLY!
10753#
10754fsgldiv_norm:
10755	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10756	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
10757	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10758
10759	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10760	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10761	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10762
10763	bsr.l		scale_to_zero_src	# calculate scale factor 1
10764	mov.l		%d0,-(%sp)		# save scale factor 1
10765
10766	bsr.l		scale_to_zero_dst	# calculate scale factor 2
10767
10768	neg.l		(%sp)			# S.F. = scale1 - scale2
10769	add.l		%d0,(%sp)
10770
10771	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
10772	lsr.b		&0x6,%d1
10773	mov.l		(%sp)+,%d0
10774	cmpi.l		%d0,&0x3fff-0x7ffe
10775	ble.w		fsgldiv_may_ovfl
10776
10777	cmpi.l		%d0,&0x3fff-0x0000 	# will result underflow?
10778	beq.w		fsgldiv_may_unfl	# maybe
10779	bgt.w		fsgldiv_unfl		# yes; go handle underflow
10780
10781fsgldiv_normal:
10782	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10783
10784	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
10785	fmov.l		&0x0,%fpsr		# clear FPSR
10786
10787	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
10788
10789	fmov.l		%fpsr,%d1		# save FPSR
10790	fmov.l		&0x0,%fpcr		# clear FPCR
10791
10792	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10793
10794fsgldiv_normal_exit:
10795	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
10796	mov.l		%d2,-(%sp)		# save d2
10797	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
10798	mov.l		%d1,%d2			# make a copy
10799	andi.l		&0x7fff,%d1		# strip sign
10800	andi.w		&0x8000,%d2		# keep old sign
10801	sub.l		%d0,%d1			# add scale factor
10802	or.w		%d2,%d1			# concat old sign,new exp
10803	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10804	mov.l		(%sp)+,%d2		# restore d2
10805	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10806	rts
10807
10808fsgldiv_may_ovfl:
10809	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10810
10811	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10812	fmov.l		&0x0,%fpsr		# set FPSR
10813
10814	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
10815
10816	fmov.l		%fpsr,%d1
10817	fmov.l		&0x0,%fpcr
10818
10819	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
10820
10821	fmovm.x		&0x01,-(%sp)		# save result to stack
10822	mov.w		(%sp),%d1		# fetch new exponent
10823	add.l		&0xc,%sp		# clear result
10824	andi.l		&0x7fff,%d1		# strip sign
10825	sub.l		%d0,%d1			# add scale factor
10826	cmp.l		%d1,&0x7fff		# did divide overflow?
10827	blt.b		fsgldiv_normal_exit
10828
10829fsgldiv_ovfl_tst:
10830	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10831
10832	mov.b		FPCR_ENABLE(%a6),%d1
10833	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10834	bne.b		fsgldiv_ovfl_ena	# yes
10835
10836fsgldiv_ovfl_dis:
10837	btst		&neg_bit,FPSR_CC(%a6) 	# is result negative
10838	sne		%d1			# set sign param accordingly
10839	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
10840	andi.b		&0x30,%d0		# kill precision
10841	bsr.l		ovf_res			# calculate default result
10842	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
10843	fmovm.x		(%a0),&0x80		# return default result in fp0
10844	rts
10845
10846fsgldiv_ovfl_ena:
10847	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
10848
10849	mov.l		%d2,-(%sp)		# save d2
10850	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10851	mov.l		%d1,%d2			# make a copy
10852	andi.l		&0x7fff,%d1		# strip sign
10853	andi.w		&0x8000,%d2		# keep old sign
10854	sub.l		%d0,%d1			# add scale factor
10855	subi.l		&0x6000,%d1		# subtract new bias
10856	andi.w		&0x7fff,%d1		# clear ms bit
10857	or.w		%d2,%d1			# concat old sign,new exp
10858	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10859	mov.l		(%sp)+,%d2		# restore d2
10860	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10861	bra.b		fsgldiv_ovfl_dis
10862
10863fsgldiv_unfl:
10864	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10865
10866	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10867
10868	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
10869	fmov.l		&0x0,%fpsr		# clear FPSR
10870
10871	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
10872
10873	fmov.l		%fpsr,%d1		# save status
10874	fmov.l		&0x0,%fpcr		# clear FPCR
10875
10876	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10877
10878	mov.b		FPCR_ENABLE(%a6),%d1
10879	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10880	bne.b		fsgldiv_unfl_ena	# yes
10881
10882fsgldiv_unfl_dis:
10883	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10884
10885	lea		FP_SCR0(%a6),%a0	# pass: result addr
10886	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10887	bsr.l		unf_res4		# calculate default result
10888	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
10889	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10890	rts
10891
10892#
10893# UNFL is enabled.
10894#
10895fsgldiv_unfl_ena:
10896	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
10897
10898	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10899	fmov.l		&0x0,%fpsr		# clear FPSR
10900
10901	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
10902
10903	fmov.l		&0x0,%fpcr		# clear FPCR
10904
10905	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
10906	mov.l		%d2,-(%sp)		# save d2
10907	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10908	mov.l		%d1,%d2			# make a copy
10909	andi.l		&0x7fff,%d1		# strip sign
10910	andi.w		&0x8000,%d2		# keep old sign
10911	sub.l		%d0,%d1			# add scale factor
10912	addi.l		&0x6000,%d1		# add bias
10913	andi.w		&0x7fff,%d1		# clear top bit
10914	or.w		%d2,%d1			# concat old sign, new exp
10915	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10916	mov.l		(%sp)+,%d2		# restore d2
10917	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10918	bra.b		fsgldiv_unfl_dis
10919
10920#
10921# the divide operation MAY underflow:
10922#
10923fsgldiv_may_unfl:
10924	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10925
10926	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10927	fmov.l		&0x0,%fpsr		# clear FPSR
10928
10929	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
10930
10931	fmov.l		%fpsr,%d1		# save status
10932	fmov.l		&0x0,%fpcr		# clear FPCR
10933
10934	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10935
10936	fabs.x		%fp0,%fp1		# make a copy of result
10937	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
10938	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
10939	fblt.w		fsgldiv_unfl		# yes; underflow occurred
10940
10941#
10942# we still don't know if underflow occurred. result is ~ equal to 1. but,
10943# we don't know if the result was an underflow that rounded up to a 1
10944# or a normalized number that rounded down to a 1. so, redo the entire
10945# operation using RZ as the rounding mode to see what the pre-rounded
10946# result is. this case should be relatively rare.
10947#
10948	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
10949
10950	clr.l		%d1			# clear scratch register
10951	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
10952
10953	fmov.l		%d1,%fpcr		# set FPCR
10954	fmov.l		&0x0,%fpsr		# clear FPSR
10955
10956	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
10957
10958	fmov.l		&0x0,%fpcr		# clear FPCR
10959	fabs.x		%fp1			# make absolute value
10960	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
10961	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
10962	bra.w		fsgldiv_unfl		# yes; underflow occurred
10963
10964############################################################################
10965
10966#
10967# Divide: inputs are not both normalized; what are they?
10968#
10969fsgldiv_not_norm:
10970	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10971	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
10972
10973	swbeg		&48
10974tbl_fsgldiv_op:
10975	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
10976	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
10977	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
10978	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
10979	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
10980	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
10981	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10982	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10983
10984	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
10985	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
10986	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
10987	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
10988	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
10989	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
10990	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10991	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10992
10993	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
10994	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
10995	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
10996	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
10997	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
10998	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
10999	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11000	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11001
11002	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
11003	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
11004	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
11005	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
11006	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
11007	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
11008	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11009	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11010
11011	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
11012	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
11013	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
11014	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
11015	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
11016	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
11017	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11018	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11019
11020	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
11021	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
11022	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
11023	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
11024	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
11025	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
11026	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11027	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11028
11029fsgldiv_res_qnan:
11030	bra.l		res_qnan
11031fsgldiv_res_snan:
11032	bra.l		res_snan
11033fsgldiv_res_operr:
11034	bra.l		res_operr
11035fsgldiv_inf_load:
11036	bra.l		fdiv_inf_load
11037fsgldiv_zero_load:
11038	bra.l		fdiv_zero_load
11039fsgldiv_inf_dst:
11040	bra.l		fdiv_inf_dst
11041
11042#########################################################################
11043# XDEF ****************************************************************	#
11044#	fadd(): emulates the fadd instruction				#
11045#	fsadd(): emulates the fadd instruction				#
11046#	fdadd(): emulates the fdadd instruction				#
11047#									#
11048# XREF ****************************************************************	#
11049# 	addsub_scaler2() - scale the operands so they won't take exc	#
11050#	ovf_res() - return default overflow result			#
11051#	unf_res() - return default underflow result			#
11052#	res_qnan() - set QNAN result					#
11053# 	res_snan() - set SNAN result					#
11054#	res_operr() - set OPERR result					#
11055#	scale_to_zero_src() - set src operand exponent equal to zero	#
11056#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
11057#									#
11058# INPUT ***************************************************************	#
11059#	a0 = pointer to extended precision source operand		#
11060# 	a1 = pointer to extended precision destination operand		#
11061#									#
11062# OUTPUT **************************************************************	#
11063#	fp0 = result							#
11064#	fp1 = EXOP (if exception occurred)				#
11065#									#
11066# ALGORITHM ***********************************************************	#
11067# 	Handle NANs, infinities, and zeroes as special cases. Divide	#
11068# norms into extended, single, and double precision.			#
11069#	Do addition after scaling exponents such that exception won't	#
11070# occur. Then, check result exponent to see if exception would have	#
11071# occurred. If so, return default result and maybe EXOP. Else, insert	#
11072# the correct result exponent and return. Set FPSR bits as appropriate.	#
11073#									#
11074#########################################################################
11075
11076	global		fsadd
11077fsadd:
11078	andi.b		&0x30,%d0		# clear rnd prec
11079	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11080	bra.b		fadd
11081
11082	global		fdadd
11083fdadd:
11084	andi.b		&0x30,%d0		# clear rnd prec
11085	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11086
11087	global		fadd
11088fadd:
11089	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11090
11091	clr.w		%d1
11092	mov.b		DTAG(%a6),%d1
11093	lsl.b		&0x3,%d1
11094	or.b		STAG(%a6),%d1		# combine src tags
11095
11096	bne.w		fadd_not_norm		# optimize on non-norm input
11097
11098#
11099# ADD: norms and denorms
11100#
11101fadd_norm:
11102	bsr.l		addsub_scaler2		# scale exponents
11103
11104fadd_zero_entry:
11105	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11106
11107	fmov.l		&0x0,%fpsr		# clear FPSR
11108	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11109
11110	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11111
11112	fmov.l		&0x0,%fpcr		# clear FPCR
11113	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
11114
11115	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
11116
11117	fbeq.w		fadd_zero_exit		# if result is zero, end now
11118
11119	mov.l		%d2,-(%sp)		# save d2
11120
11121	fmovm.x		&0x01,-(%sp)		# save result to stack
11122
11123	mov.w		2+L_SCR3(%a6),%d1
11124	lsr.b		&0x6,%d1
11125
11126	mov.w		(%sp),%d2		# fetch new sign, exp
11127	andi.l		&0x7fff,%d2		# strip sign
11128	sub.l		%d0,%d2			# add scale factor
11129
11130	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11131	bge.b		fadd_ovfl		# yes
11132
11133	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11134	blt.w		fadd_unfl		# yes
11135	beq.w		fadd_may_unfl		# maybe; go find out
11136
11137fadd_normal:
11138	mov.w		(%sp),%d1
11139	andi.w		&0x8000,%d1		# keep sign
11140	or.w		%d2,%d1			# concat sign,new exp
11141	mov.w		%d1,(%sp)		# insert new exponent
11142
11143	fmovm.x		(%sp)+,&0x80		# return result in fp0
11144
11145	mov.l		(%sp)+,%d2		# restore d2
11146	rts
11147
11148fadd_zero_exit:
11149#	fmov.s		&0x00000000,%fp0	# return zero in fp0
11150	rts
11151
11152tbl_fadd_ovfl:
11153	long		0x7fff			# ext ovfl
11154	long		0x407f			# sgl ovfl
11155	long		0x43ff			# dbl ovfl
11156
11157tbl_fadd_unfl:
11158	long	        0x0000			# ext unfl
11159	long		0x3f81			# sgl unfl
11160	long		0x3c01			# dbl unfl
11161
11162fadd_ovfl:
11163	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11164
11165	mov.b		FPCR_ENABLE(%a6),%d1
11166	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11167	bne.b		fadd_ovfl_ena		# yes
11168
11169	add.l		&0xc,%sp
11170fadd_ovfl_dis:
11171	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11172	sne		%d1			# set sign param accordingly
11173	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
11174	bsr.l		ovf_res			# calculate default result
11175	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11176	fmovm.x		(%a0),&0x80		# return default result in fp0
11177	mov.l		(%sp)+,%d2		# restore d2
11178	rts
11179
11180fadd_ovfl_ena:
11181	mov.b		L_SCR3(%a6),%d1
11182	andi.b		&0xc0,%d1		# is precision extended?
11183	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
11184
11185fadd_ovfl_ena_cont:
11186	mov.w		(%sp),%d1
11187	andi.w		&0x8000,%d1		# keep sign
11188	subi.l		&0x6000,%d2		# add extra bias
11189	andi.w		&0x7fff,%d2
11190	or.w		%d2,%d1			# concat sign,new exp
11191	mov.w		%d1,(%sp)		# insert new exponent
11192
11193	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
11194	bra.b		fadd_ovfl_dis
11195
11196fadd_ovfl_ena_sd:
11197	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11198
11199	mov.l		L_SCR3(%a6),%d1
11200	andi.b		&0x30,%d1		# keep rnd mode
11201	fmov.l		%d1,%fpcr		# set FPCR
11202
11203	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11204
11205	fmov.l		&0x0,%fpcr		# clear FPCR
11206
11207	add.l		&0xc,%sp
11208	fmovm.x		&0x01,-(%sp)
11209	bra.b		fadd_ovfl_ena_cont
11210
11211fadd_unfl:
11212	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11213
11214	add.l		&0xc,%sp
11215
11216	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11217
11218	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11219	fmov.l		&0x0,%fpsr		# clear FPSR
11220
11221	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11222
11223	fmov.l		&0x0,%fpcr		# clear FPCR
11224	fmov.l		%fpsr,%d1		# save status
11225
11226	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
11227
11228	mov.b		FPCR_ENABLE(%a6),%d1
11229	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11230	bne.b		fadd_unfl_ena		# yes
11231
11232fadd_unfl_dis:
11233	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11234
11235	lea		FP_SCR0(%a6),%a0	# pass: result addr
11236	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11237	bsr.l		unf_res			# calculate default result
11238	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
11239	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11240	mov.l		(%sp)+,%d2		# restore d2
11241	rts
11242
11243fadd_unfl_ena:
11244	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
11245
11246	mov.l		L_SCR3(%a6),%d1
11247	andi.b		&0xc0,%d1		# is precision extended?
11248	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
11249
11250	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11251
11252fadd_unfl_ena_cont:
11253	fmov.l		&0x0,%fpsr		# clear FPSR
11254
11255	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
11256
11257	fmov.l		&0x0,%fpcr		# clear FPCR
11258
11259	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
11260	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11261	mov.l		%d1,%d2			# make a copy
11262	andi.l		&0x7fff,%d1		# strip sign
11263	andi.w		&0x8000,%d2		# keep old sign
11264	sub.l		%d0,%d1			# add scale factor
11265	addi.l		&0x6000,%d1		# add new bias
11266	andi.w		&0x7fff,%d1		# clear top bit
11267	or.w		%d2,%d1			# concat sign,new exp
11268	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11269	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11270	bra.w		fadd_unfl_dis
11271
11272fadd_unfl_ena_sd:
11273	mov.l		L_SCR3(%a6),%d1
11274	andi.b		&0x30,%d1		# use only rnd mode
11275	fmov.l		%d1,%fpcr		# set FPCR
11276
11277	bra.b		fadd_unfl_ena_cont
11278
11279#
11280# result is equal to the smallest normalized number in the selected precision
11281# if the precision is extended, this result could not have come from an
11282# underflow that rounded up.
11283#
11284fadd_may_unfl:
11285	mov.l		L_SCR3(%a6),%d1
11286	andi.b		&0xc0,%d1
11287	beq.w		fadd_normal		# yes; no underflow occurred
11288
11289	mov.l		0x4(%sp),%d1		# extract hi(man)
11290	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
11291	bne.w		fadd_normal		# no; no underflow occurred
11292
11293	tst.l		0x8(%sp)		# is lo(man) = 0x0?
11294	bne.w		fadd_normal		# no; no underflow occurred
11295
11296	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11297	beq.w		fadd_normal		# no; no underflow occurred
11298
11299#
11300# ok, so now the result has a exponent equal to the smallest normalized
11301# exponent for the selected precision. also, the mantissa is equal to
11302# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11303# g,r,s.
11304# now, we must determine whether the pre-rounded result was an underflow
11305# rounded "up" or a normalized number rounded "down".
11306# so, we do this be re-executing the add using RZ as the rounding mode and
11307# seeing if the new result is smaller or equal to the current result.
11308#
11309	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
11310
11311	mov.l		L_SCR3(%a6),%d1
11312	andi.b		&0xc0,%d1		# keep rnd prec
11313	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
11314	fmov.l		%d1,%fpcr		# set FPCR
11315	fmov.l		&0x0,%fpsr		# clear FPSR
11316
11317	fadd.x		FP_SCR0(%a6),%fp1	# execute add
11318
11319	fmov.l		&0x0,%fpcr		# clear FPCR
11320
11321	fabs.x		%fp0			# compare absolute values
11322	fabs.x		%fp1
11323	fcmp.x		%fp0,%fp1		# is first result > second?
11324
11325	fbgt.w		fadd_unfl		# yes; it's an underflow
11326	bra.w		fadd_normal		# no; it's not an underflow
11327
11328##########################################################################
11329
11330#
11331# Add: inputs are not both normalized; what are they?
11332#
11333fadd_not_norm:
11334	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
11335	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
11336
11337	swbeg		&48
11338tbl_fadd_op:
11339	short		fadd_norm	- tbl_fadd_op # NORM + NORM
11340	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
11341	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
11342	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11343	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
11344	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11345	short		tbl_fadd_op	- tbl_fadd_op #
11346	short		tbl_fadd_op	- tbl_fadd_op #
11347
11348	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
11349	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
11350	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
11351	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11352	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
11353	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11354	short		tbl_fadd_op	- tbl_fadd_op #
11355	short		tbl_fadd_op	- tbl_fadd_op #
11356
11357	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
11358	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
11359	short		fadd_inf_2	- tbl_fadd_op # INF + INF
11360	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11361	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
11362	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11363	short		tbl_fadd_op	- tbl_fadd_op #
11364	short		tbl_fadd_op	- tbl_fadd_op #
11365
11366	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
11367	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
11368	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
11369	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
11370	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
11371	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
11372	short		tbl_fadd_op	- tbl_fadd_op #
11373	short		tbl_fadd_op	- tbl_fadd_op #
11374
11375	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
11376	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
11377	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
11378	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11379	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
11380	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11381	short		tbl_fadd_op	- tbl_fadd_op #
11382	short		tbl_fadd_op	- tbl_fadd_op #
11383
11384	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
11385	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
11386	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
11387	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
11388	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
11389	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
11390	short		tbl_fadd_op	- tbl_fadd_op #
11391	short		tbl_fadd_op	- tbl_fadd_op #
11392
11393fadd_res_qnan:
11394	bra.l		res_qnan
11395fadd_res_snan:
11396	bra.l		res_snan
11397
11398#
11399# both operands are ZEROes
11400#
11401fadd_zero_2:
11402	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
11403	mov.b		DST_EX(%a1),%d1
11404	eor.b		%d0,%d1
11405	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
11406
11407# the signs are the same. so determine whether they are positive or negative
11408# and return the appropriately signed zero.
11409	tst.b		%d0			# are ZEROes positive or negative?
11410	bmi.b		fadd_zero_rm		# negative
11411	fmov.s		&0x00000000,%fp0	# return +ZERO
11412	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11413	rts
11414
11415#
11416# the ZEROes have opposite signs:
11417# - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11418# - -ZERO is returned in the case of RM.
11419#
11420fadd_zero_2_chk_rm:
11421	mov.b		3+L_SCR3(%a6),%d1
11422	andi.b		&0x30,%d1		# extract rnd mode
11423	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
11424	beq.b		fadd_zero_rm		# yes
11425	fmov.s		&0x00000000,%fp0	# return +ZERO
11426	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11427	rts
11428
11429fadd_zero_rm:
11430	fmov.s		&0x80000000,%fp0	# return -ZERO
11431	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11432	rts
11433
11434#
11435# one operand is a ZERO and the other is a DENORM or NORM. scale
11436# the DENORM or NORM and jump to the regular fadd routine.
11437#
11438fadd_zero_dst:
11439	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11440	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11441	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11442	bsr.l		scale_to_zero_src	# scale the operand
11443	clr.w		FP_SCR1_EX(%a6)
11444	clr.l		FP_SCR1_HI(%a6)
11445	clr.l		FP_SCR1_LO(%a6)
11446	bra.w		fadd_zero_entry		# go execute fadd
11447
11448fadd_zero_src:
11449	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11450	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11451	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11452	bsr.l		scale_to_zero_dst	# scale the operand
11453	clr.w		FP_SCR0_EX(%a6)
11454	clr.l		FP_SCR0_HI(%a6)
11455	clr.l		FP_SCR0_LO(%a6)
11456	bra.w		fadd_zero_entry		# go execute fadd
11457
11458#
11459# both operands are INFs. an OPERR will result if the INFs have
11460# different signs. else, an INF of the same sign is returned
11461#
11462fadd_inf_2:
11463	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11464	mov.b		DST_EX(%a1),%d1
11465	eor.b		%d1,%d0
11466	bmi.l		res_operr		# weed out (-INF)+(+INF)
11467
11468# ok, so it's not an OPERR. but, we do have to remember to return the
11469# src INF since that's where the 881/882 gets the j-bit from...
11470
11471#
11472# operands are INF and one of {ZERO, INF, DENORM, NORM}
11473#
11474fadd_inf_src:
11475	fmovm.x		SRC(%a0),&0x80		# return src INF
11476	tst.b		SRC_EX(%a0)		# is INF positive?
11477	bpl.b		fadd_inf_done		# yes; we're done
11478	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11479	rts
11480
11481#
11482# operands are INF and one of {ZERO, INF, DENORM, NORM}
11483#
11484fadd_inf_dst:
11485	fmovm.x		DST(%a1),&0x80		# return dst INF
11486	tst.b		DST_EX(%a1)		# is INF positive?
11487	bpl.b		fadd_inf_done		# yes; we're done
11488	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11489	rts
11490
11491fadd_inf_done:
11492	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
11493	rts
11494
11495#########################################################################
11496# XDEF ****************************************************************	#
11497#	fsub(): emulates the fsub instruction				#
11498#	fssub(): emulates the fssub instruction				#
11499#	fdsub(): emulates the fdsub instruction				#
11500#									#
11501# XREF ****************************************************************	#
11502# 	addsub_scaler2() - scale the operands so they won't take exc	#
11503#	ovf_res() - return default overflow result			#
11504#	unf_res() - return default underflow result			#
11505#	res_qnan() - set QNAN result					#
11506# 	res_snan() - set SNAN result					#
11507#	res_operr() - set OPERR result					#
11508#	scale_to_zero_src() - set src operand exponent equal to zero	#
11509#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
11510#									#
11511# INPUT ***************************************************************	#
11512#	a0 = pointer to extended precision source operand		#
11513# 	a1 = pointer to extended precision destination operand		#
11514#									#
11515# OUTPUT **************************************************************	#
11516#	fp0 = result							#
11517#	fp1 = EXOP (if exception occurred)				#
11518#									#
11519# ALGORITHM ***********************************************************	#
11520# 	Handle NANs, infinities, and zeroes as special cases. Divide	#
11521# norms into extended, single, and double precision.			#
11522#	Do subtraction after scaling exponents such that exception won't#
11523# occur. Then, check result exponent to see if exception would have	#
11524# occurred. If so, return default result and maybe EXOP. Else, insert	#
11525# the correct result exponent and return. Set FPSR bits as appropriate.	#
11526#									#
11527#########################################################################
11528
11529	global		fssub
11530fssub:
11531	andi.b		&0x30,%d0		# clear rnd prec
11532	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11533	bra.b		fsub
11534
11535	global		fdsub
11536fdsub:
11537	andi.b		&0x30,%d0		# clear rnd prec
11538	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11539
11540	global		fsub
11541fsub:
11542	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11543
11544	clr.w		%d1
11545	mov.b		DTAG(%a6),%d1
11546	lsl.b		&0x3,%d1
11547	or.b		STAG(%a6),%d1		# combine src tags
11548
11549	bne.w		fsub_not_norm		# optimize on non-norm input
11550
11551#
11552# SUB: norms and denorms
11553#
11554fsub_norm:
11555	bsr.l		addsub_scaler2		# scale exponents
11556
11557fsub_zero_entry:
11558	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11559
11560	fmov.l		&0x0,%fpsr		# clear FPSR
11561	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11562
11563	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11564
11565	fmov.l		&0x0,%fpcr		# clear FPCR
11566	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
11567
11568	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
11569
11570	fbeq.w		fsub_zero_exit		# if result zero, end now
11571
11572	mov.l		%d2,-(%sp)		# save d2
11573
11574	fmovm.x		&0x01,-(%sp)		# save result to stack
11575
11576	mov.w		2+L_SCR3(%a6),%d1
11577	lsr.b		&0x6,%d1
11578
11579	mov.w		(%sp),%d2		# fetch new exponent
11580	andi.l		&0x7fff,%d2		# strip sign
11581	sub.l		%d0,%d2			# add scale factor
11582
11583	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11584	bge.b		fsub_ovfl		# yes
11585
11586	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11587	blt.w		fsub_unfl		# yes
11588	beq.w		fsub_may_unfl		# maybe; go find out
11589
11590fsub_normal:
11591	mov.w		(%sp),%d1
11592	andi.w		&0x8000,%d1		# keep sign
11593	or.w		%d2,%d1			# insert new exponent
11594	mov.w		%d1,(%sp)		# insert new exponent
11595
11596	fmovm.x		(%sp)+,&0x80		# return result in fp0
11597
11598	mov.l		(%sp)+,%d2		# restore d2
11599	rts
11600
11601fsub_zero_exit:
11602#	fmov.s		&0x00000000,%fp0	# return zero in fp0
11603	rts
11604
11605tbl_fsub_ovfl:
11606	long		0x7fff			# ext ovfl
11607	long		0x407f			# sgl ovfl
11608	long		0x43ff			# dbl ovfl
11609
11610tbl_fsub_unfl:
11611	long	        0x0000			# ext unfl
11612	long		0x3f81			# sgl unfl
11613	long		0x3c01			# dbl unfl
11614
11615fsub_ovfl:
11616	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11617
11618	mov.b		FPCR_ENABLE(%a6),%d1
11619	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11620	bne.b		fsub_ovfl_ena		# yes
11621
11622	add.l		&0xc,%sp
11623fsub_ovfl_dis:
11624	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11625	sne		%d1			# set sign param accordingly
11626	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
11627	bsr.l		ovf_res			# calculate default result
11628	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11629	fmovm.x		(%a0),&0x80		# return default result in fp0
11630	mov.l		(%sp)+,%d2		# restore d2
11631	rts
11632
11633fsub_ovfl_ena:
11634	mov.b		L_SCR3(%a6),%d1
11635	andi.b		&0xc0,%d1		# is precision extended?
11636	bne.b		fsub_ovfl_ena_sd	# no
11637
11638fsub_ovfl_ena_cont:
11639	mov.w		(%sp),%d1		# fetch {sgn,exp}
11640	andi.w		&0x8000,%d1		# keep sign
11641	subi.l		&0x6000,%d2		# subtract new bias
11642	andi.w		&0x7fff,%d2		# clear top bit
11643	or.w		%d2,%d1			# concat sign,exp
11644	mov.w		%d1,(%sp)		# insert new exponent
11645
11646	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
11647	bra.b		fsub_ovfl_dis
11648
11649fsub_ovfl_ena_sd:
11650	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11651
11652	mov.l		L_SCR3(%a6),%d1
11653	andi.b		&0x30,%d1		# clear rnd prec
11654	fmov.l		%d1,%fpcr		# set FPCR
11655
11656	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11657
11658	fmov.l		&0x0,%fpcr		# clear FPCR
11659
11660	add.l		&0xc,%sp
11661	fmovm.x		&0x01,-(%sp)
11662	bra.b		fsub_ovfl_ena_cont
11663
11664fsub_unfl:
11665	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11666
11667	add.l		&0xc,%sp
11668
11669	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11670
11671	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11672	fmov.l		&0x0,%fpsr		# clear FPSR
11673
11674	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11675
11676	fmov.l		&0x0,%fpcr		# clear FPCR
11677	fmov.l		%fpsr,%d1		# save status
11678
11679	or.l		%d1,USER_FPSR(%a6)
11680
11681	mov.b		FPCR_ENABLE(%a6),%d1
11682	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11683	bne.b		fsub_unfl_ena		# yes
11684
11685fsub_unfl_dis:
11686	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11687
11688	lea		FP_SCR0(%a6),%a0	# pass: result addr
11689	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11690	bsr.l		unf_res			# calculate default result
11691	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
11692	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11693	mov.l		(%sp)+,%d2		# restore d2
11694	rts
11695
11696fsub_unfl_ena:
11697	fmovm.x		FP_SCR1(%a6),&0x40
11698
11699	mov.l		L_SCR3(%a6),%d1
11700	andi.b		&0xc0,%d1		# is precision extended?
11701	bne.b		fsub_unfl_ena_sd	# no
11702
11703	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11704
11705fsub_unfl_ena_cont:
11706	fmov.l		&0x0,%fpsr		# clear FPSR
11707
11708	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
11709
11710	fmov.l		&0x0,%fpcr		# clear FPCR
11711
11712	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
11713	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11714	mov.l		%d1,%d2			# make a copy
11715	andi.l		&0x7fff,%d1		# strip sign
11716	andi.w		&0x8000,%d2		# keep old sign
11717	sub.l		%d0,%d1			# add scale factor
11718	addi.l		&0x6000,%d1		# subtract new bias
11719	andi.w		&0x7fff,%d1		# clear top bit
11720	or.w		%d2,%d1			# concat sgn,exp
11721	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11722	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11723	bra.w		fsub_unfl_dis
11724
11725fsub_unfl_ena_sd:
11726	mov.l		L_SCR3(%a6),%d1
11727	andi.b		&0x30,%d1		# clear rnd prec
11728	fmov.l		%d1,%fpcr		# set FPCR
11729
11730	bra.b		fsub_unfl_ena_cont
11731
11732#
11733# result is equal to the smallest normalized number in the selected precision
11734# if the precision is extended, this result could not have come from an
11735# underflow that rounded up.
11736#
11737fsub_may_unfl:
11738	mov.l		L_SCR3(%a6),%d1
11739	andi.b		&0xc0,%d1		# fetch rnd prec
11740	beq.w		fsub_normal		# yes; no underflow occurred
11741
11742	mov.l		0x4(%sp),%d1
11743	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
11744	bne.w		fsub_normal		# no; no underflow occurred
11745
11746	tst.l		0x8(%sp)		# is lo(man) = 0x0?
11747	bne.w		fsub_normal		# no; no underflow occurred
11748
11749	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11750	beq.w		fsub_normal		# no; no underflow occurred
11751
11752#
11753# ok, so now the result has a exponent equal to the smallest normalized
11754# exponent for the selected precision. also, the mantissa is equal to
11755# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11756# g,r,s.
11757# now, we must determine whether the pre-rounded result was an underflow
11758# rounded "up" or a normalized number rounded "down".
11759# so, we do this be re-executing the add using RZ as the rounding mode and
11760# seeing if the new result is smaller or equal to the current result.
11761#
11762	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
11763
11764	mov.l		L_SCR3(%a6),%d1
11765	andi.b		&0xc0,%d1		# keep rnd prec
11766	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
11767	fmov.l		%d1,%fpcr		# set FPCR
11768	fmov.l		&0x0,%fpsr		# clear FPSR
11769
11770	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
11771
11772	fmov.l		&0x0,%fpcr		# clear FPCR
11773
11774	fabs.x		%fp0			# compare absolute values
11775	fabs.x		%fp1
11776	fcmp.x		%fp0,%fp1		# is first result > second?
11777
11778	fbgt.w		fsub_unfl		# yes; it's an underflow
11779	bra.w		fsub_normal		# no; it's not an underflow
11780
11781##########################################################################
11782
11783#
11784# Sub: inputs are not both normalized; what are they?
11785#
11786fsub_not_norm:
11787	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
11788	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
11789
11790	swbeg		&48
11791tbl_fsub_op:
11792	short		fsub_norm	- tbl_fsub_op # NORM - NORM
11793	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
11794	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
11795	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11796	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
11797	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11798	short		tbl_fsub_op	- tbl_fsub_op #
11799	short		tbl_fsub_op	- tbl_fsub_op #
11800
11801	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
11802	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
11803	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
11804	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11805	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
11806	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11807	short		tbl_fsub_op	- tbl_fsub_op #
11808	short		tbl_fsub_op	- tbl_fsub_op #
11809
11810	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
11811	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
11812	short		fsub_inf_2	- tbl_fsub_op # INF - INF
11813	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11814	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
11815	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11816	short		tbl_fsub_op	- tbl_fsub_op #
11817	short		tbl_fsub_op	- tbl_fsub_op #
11818
11819	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
11820	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
11821	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
11822	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
11823	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
11824	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
11825	short		tbl_fsub_op	- tbl_fsub_op #
11826	short		tbl_fsub_op	- tbl_fsub_op #
11827
11828	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
11829	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
11830	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
11831	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11832	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
11833	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11834	short		tbl_fsub_op	- tbl_fsub_op #
11835	short		tbl_fsub_op	- tbl_fsub_op #
11836
11837	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
11838	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
11839	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
11840	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
11841	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
11842	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
11843	short		tbl_fsub_op	- tbl_fsub_op #
11844	short		tbl_fsub_op	- tbl_fsub_op #
11845
11846fsub_res_qnan:
11847	bra.l		res_qnan
11848fsub_res_snan:
11849	bra.l		res_snan
11850
11851#
11852# both operands are ZEROes
11853#
11854fsub_zero_2:
11855	mov.b		SRC_EX(%a0),%d0
11856	mov.b		DST_EX(%a1),%d1
11857	eor.b		%d1,%d0
11858	bpl.b		fsub_zero_2_chk_rm
11859
11860# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11861	tst.b		%d0			# is dst negative?
11862	bmi.b		fsub_zero_2_rm		# yes
11863	fmov.s		&0x00000000,%fp0	# no; return +ZERO
11864	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11865	rts
11866
11867#
11868# the ZEROes have the same signs:
11869# - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11870# - -ZERO is returned in the case of RM.
11871#
11872fsub_zero_2_chk_rm:
11873	mov.b		3+L_SCR3(%a6),%d1
11874	andi.b		&0x30,%d1		# extract rnd mode
11875	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
11876	beq.b		fsub_zero_2_rm		# yes
11877	fmov.s		&0x00000000,%fp0	# no; return +ZERO
11878	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11879	rts
11880
11881fsub_zero_2_rm:
11882	fmov.s		&0x80000000,%fp0	# return -ZERO
11883	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
11884	rts
11885
11886#
11887# one operand is a ZERO and the other is a DENORM or a NORM.
11888# scale the DENORM or NORM and jump to the regular fsub routine.
11889#
11890fsub_zero_dst:
11891	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11892	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11893	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11894	bsr.l		scale_to_zero_src	# scale the operand
11895	clr.w		FP_SCR1_EX(%a6)
11896	clr.l		FP_SCR1_HI(%a6)
11897	clr.l		FP_SCR1_LO(%a6)
11898	bra.w		fsub_zero_entry		# go execute fsub
11899
11900fsub_zero_src:
11901	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11902	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11903	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11904	bsr.l		scale_to_zero_dst	# scale the operand
11905	clr.w		FP_SCR0_EX(%a6)
11906	clr.l		FP_SCR0_HI(%a6)
11907	clr.l		FP_SCR0_LO(%a6)
11908	bra.w		fsub_zero_entry		# go execute fsub
11909
11910#
11911# both operands are INFs. an OPERR will result if the INFs have the
11912# same signs. else,
11913#
11914fsub_inf_2:
11915	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11916	mov.b		DST_EX(%a1),%d1
11917	eor.b		%d1,%d0
11918	bpl.l		res_operr		# weed out (-INF)+(+INF)
11919
11920# ok, so it's not an OPERR. but we do have to remember to return
11921# the src INF since that's where the 881/882 gets the j-bit.
11922
11923fsub_inf_src:
11924	fmovm.x		SRC(%a0),&0x80		# return src INF
11925	fneg.x		%fp0			# invert sign
11926	fbge.w		fsub_inf_done		# sign is now positive
11927	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11928	rts
11929
11930fsub_inf_dst:
11931	fmovm.x		DST(%a1),&0x80		# return dst INF
11932	tst.b		DST_EX(%a1)		# is INF negative?
11933	bpl.b		fsub_inf_done		# no
11934	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11935	rts
11936
11937fsub_inf_done:
11938	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
11939	rts
11940
11941#########################################################################
11942# XDEF ****************************************************************	#
11943# 	fsqrt(): emulates the fsqrt instruction				#
11944#	fssqrt(): emulates the fssqrt instruction			#
11945#	fdsqrt(): emulates the fdsqrt instruction			#
11946#									#
11947# XREF ****************************************************************	#
11948#	scale_sqrt() - scale the source operand				#
11949#	unf_res() - return default underflow result			#
11950#	ovf_res() - return default overflow result			#
11951# 	res_qnan_1op() - return QNAN result				#
11952# 	res_snan_1op() - return SNAN result				#
11953#									#
11954# INPUT ***************************************************************	#
11955#	a0 = pointer to extended precision source operand		#
11956#	d0  rnd prec,mode						#
11957#									#
11958# OUTPUT **************************************************************	#
11959#	fp0 = result							#
11960#	fp1 = EXOP (if exception occurred)				#
11961#									#
11962# ALGORITHM ***********************************************************	#
11963#	Handle NANs, infinities, and zeroes as special cases. Divide	#
11964# norms/denorms into ext/sgl/dbl precision.				#
11965#	For norms/denorms, scale the exponents such that a sqrt		#
11966# instruction won't cause an exception. Use the regular fsqrt to	#
11967# compute a result. Check if the regular operands would have taken	#
11968# an exception. If so, return the default overflow/underflow result	#
11969# and return the EXOP if exceptions are enabled. Else, scale the 	#
11970# result operand to the proper exponent.				#
11971#									#
11972#########################################################################
11973
11974	global		fssqrt
11975fssqrt:
11976	andi.b		&0x30,%d0		# clear rnd prec
11977	ori.b		&s_mode*0x10,%d0	# insert sgl precision
11978	bra.b		fsqrt
11979
11980	global		fdsqrt
11981fdsqrt:
11982	andi.b		&0x30,%d0		# clear rnd prec
11983	ori.b		&d_mode*0x10,%d0	# insert dbl precision
11984
11985	global		fsqrt
11986fsqrt:
11987	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11988	clr.w		%d1
11989	mov.b		STAG(%a6),%d1
11990	bne.w		fsqrt_not_norm		# optimize on non-norm input
11991
11992#
11993# SQUARE ROOT: norms and denorms ONLY!
11994#
11995fsqrt_norm:
11996	tst.b		SRC_EX(%a0)		# is operand negative?
11997	bmi.l		res_operr		# yes
11998
11999	andi.b		&0xc0,%d0		# is precision extended?
12000	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
12001
12002	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12003	fmov.l		&0x0,%fpsr		# clear FPSR
12004
12005	fsqrt.x		(%a0),%fp0		# execute square root
12006
12007	fmov.l		%fpsr,%d1
12008	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
12009
12010	rts
12011
12012fsqrt_denorm:
12013	tst.b		SRC_EX(%a0)		# is operand negative?
12014	bmi.l		res_operr		# yes
12015
12016	andi.b		&0xc0,%d0		# is precision extended?
12017	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
12018
12019	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12020	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12021	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12022
12023	bsr.l		scale_sqrt		# calculate scale factor
12024
12025	bra.w		fsqrt_sd_normal
12026
12027#
12028# operand is either single or double
12029#
12030fsqrt_not_ext:
12031	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
12032	bne.w		fsqrt_dbl
12033
12034#
12035# operand is to be rounded to single precision
12036#
12037fsqrt_sgl:
12038	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12039	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12040	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12041
12042	bsr.l		scale_sqrt		# calculate scale factor
12043
12044	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
12045	beq.w		fsqrt_sd_may_unfl
12046	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
12047	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
12048	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
12049	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
12050
12051#
12052# operand will NOT overflow or underflow when moved in to the fp reg file
12053#
12054fsqrt_sd_normal:
12055	fmov.l		&0x0,%fpsr		# clear FPSR
12056	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12057
12058	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
12059
12060	fmov.l		%fpsr,%d1		# save FPSR
12061	fmov.l		&0x0,%fpcr		# clear FPCR
12062
12063	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12064
12065fsqrt_sd_normal_exit:
12066	mov.l		%d2,-(%sp)		# save d2
12067	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12068	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
12069	mov.l		%d1,%d2			# make a copy
12070	andi.l		&0x7fff,%d1		# strip sign
12071	sub.l		%d0,%d1			# add scale factor
12072	andi.w		&0x8000,%d2		# keep old sign
12073	or.w		%d1,%d2			# concat old sign,new exp
12074	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
12075	mov.l		(%sp)+,%d2		# restore d2
12076	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12077	rts
12078
12079#
12080# operand is to be rounded to double precision
12081#
12082fsqrt_dbl:
12083	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12084	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12085	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12086
12087	bsr.l		scale_sqrt		# calculate scale factor
12088
12089	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
12090	beq.w		fsqrt_sd_may_unfl
12091	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
12092	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
12093	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
12094	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
12095	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
12096
12097# we're on the line here and the distinguising characteristic is whether
12098# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12099# elsewise fall through to underflow.
12100fsqrt_sd_may_unfl:
12101	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
12102	bne.w		fsqrt_sd_normal		# yes, so no underflow
12103
12104#
12105# operand WILL underflow when moved in to the fp register file
12106#
12107fsqrt_sd_unfl:
12108	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12109
12110	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
12111	fmov.l		&0x0,%fpsr		# clear FPSR
12112
12113	fsqrt.x 	FP_SCR0(%a6),%fp0	# execute square root
12114
12115	fmov.l		%fpsr,%d1		# save status
12116	fmov.l		&0x0,%fpcr		# clear FPCR
12117
12118	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12119
12120# if underflow or inexact is enabled, go calculate EXOP first.
12121	mov.b		FPCR_ENABLE(%a6),%d1
12122	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12123	bne.b		fsqrt_sd_unfl_ena	# yes
12124
12125fsqrt_sd_unfl_dis:
12126	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12127
12128	lea		FP_SCR0(%a6),%a0	# pass: result addr
12129	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12130	bsr.l		unf_res			# calculate default result
12131	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
12132	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12133	rts
12134
12135#
12136# operand will underflow AND underflow is enabled.
12137# therefore, we must return the result rounded to extended precision.
12138#
12139fsqrt_sd_unfl_ena:
12140	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12141	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12142	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
12143
12144	mov.l		%d2,-(%sp)		# save d2
12145	mov.l		%d1,%d2			# make a copy
12146	andi.l		&0x7fff,%d1		# strip sign
12147	andi.w		&0x8000,%d2		# keep old sign
12148	sub.l		%d0,%d1			# subtract scale factor
12149	addi.l		&0x6000,%d1		# add new bias
12150	andi.w		&0x7fff,%d1
12151	or.w		%d2,%d1			# concat new sign,new exp
12152	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
12153	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
12154	mov.l		(%sp)+,%d2		# restore d2
12155	bra.b		fsqrt_sd_unfl_dis
12156
12157#
12158# operand WILL overflow.
12159#
12160fsqrt_sd_ovfl:
12161	fmov.l		&0x0,%fpsr		# clear FPSR
12162	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12163
12164	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
12165
12166	fmov.l		&0x0,%fpcr		# clear FPCR
12167	fmov.l		%fpsr,%d1		# save FPSR
12168
12169	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12170
12171fsqrt_sd_ovfl_tst:
12172	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12173
12174	mov.b		FPCR_ENABLE(%a6),%d1
12175	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
12176	bne.b		fsqrt_sd_ovfl_ena	# yes
12177
12178#
12179# OVFL is not enabled; therefore, we must create the default result by
12180# calling ovf_res().
12181#
12182fsqrt_sd_ovfl_dis:
12183	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
12184	sne		%d1			# set sign param accordingly
12185	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
12186	bsr.l		ovf_res			# calculate default result
12187	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
12188	fmovm.x		(%a0),&0x80		# return default result in fp0
12189	rts
12190
12191#
12192# OVFL is enabled.
12193# the INEX2 bit has already been updated by the round to the correct precision.
12194# now, round to extended(and don't alter the FPSR).
12195#
12196fsqrt_sd_ovfl_ena:
12197	mov.l		%d2,-(%sp)		# save d2
12198	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12199	mov.l		%d1,%d2			# make a copy
12200	andi.l		&0x7fff,%d1		# strip sign
12201	andi.w		&0x8000,%d2		# keep old sign
12202	sub.l		%d0,%d1			# add scale factor
12203	subi.l		&0x6000,%d1		# subtract bias
12204	andi.w		&0x7fff,%d1
12205	or.w		%d2,%d1			# concat sign,exp
12206	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12207	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12208	mov.l		(%sp)+,%d2		# restore d2
12209	bra.b		fsqrt_sd_ovfl_dis
12210
12211#
12212# the move in MAY underflow. so...
12213#
12214fsqrt_sd_may_ovfl:
12215	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
12216	bne.w		fsqrt_sd_ovfl		# yes, so overflow
12217
12218	fmov.l		&0x0,%fpsr		# clear FPSR
12219	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12220
12221	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
12222
12223	fmov.l		%fpsr,%d1		# save status
12224	fmov.l		&0x0,%fpcr		# clear FPCR
12225
12226	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12227
12228	fmov.x		%fp0,%fp1		# make a copy of result
12229	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
12230	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
12231
12232# no, it didn't overflow; we have correct result
12233	bra.w		fsqrt_sd_normal_exit
12234
12235##########################################################################
12236
12237#
12238# input is not normalized; what is it?
12239#
12240fsqrt_not_norm:
12241	cmpi.b		%d1,&DENORM		# weed out DENORM
12242	beq.w		fsqrt_denorm
12243	cmpi.b		%d1,&ZERO		# weed out ZERO
12244	beq.b		fsqrt_zero
12245	cmpi.b		%d1,&INF		# weed out INF
12246	beq.b		fsqrt_inf
12247	cmpi.b		%d1,&SNAN		# weed out SNAN
12248	beq.l		res_snan_1op
12249	bra.l		res_qnan_1op
12250
12251#
12252# 	fsqrt(+0) = +0
12253# 	fsqrt(-0) = -0
12254#	fsqrt(+INF) = +INF
12255# 	fsqrt(-INF) = OPERR
12256#
12257fsqrt_zero:
12258	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
12259	bmi.b		fsqrt_zero_m		# negative
12260fsqrt_zero_p:
12261	fmov.s		&0x00000000,%fp0	# return +ZERO
12262	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
12263	rts
12264fsqrt_zero_m:
12265	fmov.s		&0x80000000,%fp0	# return -ZERO
12266	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
12267	rts
12268
12269fsqrt_inf:
12270	tst.b		SRC_EX(%a0)		# is INF positive or negative?
12271	bmi.l		res_operr		# negative
12272fsqrt_inf_p:
12273	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
12274	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
12275	rts
12276
12277#########################################################################
12278# XDEF ****************************************************************	#
12279#	fetch_dreg(): fetch register according to index in d1		#
12280#									#
12281# XREF ****************************************************************	#
12282#	None								#
12283#									#
12284# INPUT ***************************************************************	#
12285#	d1 = index of register to fetch from				#
12286# 									#
12287# OUTPUT **************************************************************	#
12288#	d0 = value of register fetched					#
12289#									#
12290# ALGORITHM ***********************************************************	#
12291#	According to the index value in d1 which can range from zero 	#
12292# to fifteen, load the corresponding register file value (where 	#
12293# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
12294# stack. The rest should still be in their original places.		#
12295#									#
12296#########################################################################
12297
12298# this routine leaves d1 intact for subsequent store_dreg calls.
12299	global		fetch_dreg
12300fetch_dreg:
12301	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
12302	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
12303
12304tbl_fdreg:
12305	short		fdreg0 - tbl_fdreg
12306	short		fdreg1 - tbl_fdreg
12307	short		fdreg2 - tbl_fdreg
12308	short		fdreg3 - tbl_fdreg
12309	short		fdreg4 - tbl_fdreg
12310	short		fdreg5 - tbl_fdreg
12311	short		fdreg6 - tbl_fdreg
12312	short		fdreg7 - tbl_fdreg
12313	short		fdreg8 - tbl_fdreg
12314	short		fdreg9 - tbl_fdreg
12315	short		fdrega - tbl_fdreg
12316	short		fdregb - tbl_fdreg
12317	short		fdregc - tbl_fdreg
12318	short		fdregd - tbl_fdreg
12319	short		fdrege - tbl_fdreg
12320	short		fdregf - tbl_fdreg
12321
12322fdreg0:
12323	mov.l		EXC_DREGS+0x0(%a6),%d0
12324	rts
12325fdreg1:
12326	mov.l		EXC_DREGS+0x4(%a6),%d0
12327	rts
12328fdreg2:
12329	mov.l		%d2,%d0
12330	rts
12331fdreg3:
12332	mov.l		%d3,%d0
12333	rts
12334fdreg4:
12335	mov.l		%d4,%d0
12336	rts
12337fdreg5:
12338	mov.l		%d5,%d0
12339	rts
12340fdreg6:
12341	mov.l		%d6,%d0
12342	rts
12343fdreg7:
12344	mov.l		%d7,%d0
12345	rts
12346fdreg8:
12347	mov.l		EXC_DREGS+0x8(%a6),%d0
12348	rts
12349fdreg9:
12350	mov.l		EXC_DREGS+0xc(%a6),%d0
12351	rts
12352fdrega:
12353	mov.l		%a2,%d0
12354	rts
12355fdregb:
12356	mov.l		%a3,%d0
12357	rts
12358fdregc:
12359	mov.l		%a4,%d0
12360	rts
12361fdregd:
12362	mov.l		%a5,%d0
12363	rts
12364fdrege:
12365	mov.l		(%a6),%d0
12366	rts
12367fdregf:
12368	mov.l		EXC_A7(%a6),%d0
12369	rts
12370
12371#########################################################################
12372# XDEF ****************************************************************	#
12373#	store_dreg_l(): store longword to data register specified by d1	#
12374#									#
12375# XREF ****************************************************************	#
12376#	None								#
12377#									#
12378# INPUT ***************************************************************	#
12379#	d0 = longowrd value to store					#
12380#	d1 = index of register to fetch from				#
12381# 									#
12382# OUTPUT **************************************************************	#
12383#	(data register is updated)					#
12384#									#
12385# ALGORITHM ***********************************************************	#
12386#	According to the index value in d1, store the longword value	#
12387# in d0 to the corresponding data register. D0/D1 are on the stack	#
12388# while the rest are in their initial places.				#
12389#									#
12390#########################################################################
12391
12392	global		store_dreg_l
12393store_dreg_l:
12394	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
12395	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
12396
12397tbl_sdregl:
12398	short		sdregl0 - tbl_sdregl
12399	short		sdregl1 - tbl_sdregl
12400	short		sdregl2 - tbl_sdregl
12401	short		sdregl3 - tbl_sdregl
12402	short		sdregl4 - tbl_sdregl
12403	short		sdregl5 - tbl_sdregl
12404	short		sdregl6 - tbl_sdregl
12405	short		sdregl7 - tbl_sdregl
12406
12407sdregl0:
12408	mov.l		%d0,EXC_DREGS+0x0(%a6)
12409	rts
12410sdregl1:
12411	mov.l		%d0,EXC_DREGS+0x4(%a6)
12412	rts
12413sdregl2:
12414	mov.l		%d0,%d2
12415	rts
12416sdregl3:
12417	mov.l		%d0,%d3
12418	rts
12419sdregl4:
12420	mov.l		%d0,%d4
12421	rts
12422sdregl5:
12423	mov.l		%d0,%d5
12424	rts
12425sdregl6:
12426	mov.l		%d0,%d6
12427	rts
12428sdregl7:
12429	mov.l		%d0,%d7
12430	rts
12431
12432#########################################################################
12433# XDEF ****************************************************************	#
12434#	store_dreg_w(): store word to data register specified by d1	#
12435#									#
12436# XREF ****************************************************************	#
12437#	None								#
12438#									#
12439# INPUT ***************************************************************	#
12440#	d0 = word value to store					#
12441#	d1 = index of register to fetch from				#
12442# 									#
12443# OUTPUT **************************************************************	#
12444#	(data register is updated)					#
12445#									#
12446# ALGORITHM ***********************************************************	#
12447#	According to the index value in d1, store the word value	#
12448# in d0 to the corresponding data register. D0/D1 are on the stack	#
12449# while the rest are in their initial places.				#
12450#									#
12451#########################################################################
12452
12453	global		store_dreg_w
12454store_dreg_w:
12455	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
12456	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
12457
12458tbl_sdregw:
12459	short		sdregw0 - tbl_sdregw
12460	short		sdregw1 - tbl_sdregw
12461	short		sdregw2 - tbl_sdregw
12462	short		sdregw3 - tbl_sdregw
12463	short		sdregw4 - tbl_sdregw
12464	short		sdregw5 - tbl_sdregw
12465	short		sdregw6 - tbl_sdregw
12466	short		sdregw7 - tbl_sdregw
12467
12468sdregw0:
12469	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
12470	rts
12471sdregw1:
12472	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
12473	rts
12474sdregw2:
12475	mov.w		%d0,%d2
12476	rts
12477sdregw3:
12478	mov.w		%d0,%d3
12479	rts
12480sdregw4:
12481	mov.w		%d0,%d4
12482	rts
12483sdregw5:
12484	mov.w		%d0,%d5
12485	rts
12486sdregw6:
12487	mov.w		%d0,%d6
12488	rts
12489sdregw7:
12490	mov.w		%d0,%d7
12491	rts
12492
12493#########################################################################
12494# XDEF ****************************************************************	#
12495#	store_dreg_b(): store byte to data register specified by d1	#
12496#									#
12497# XREF ****************************************************************	#
12498#	None								#
12499#									#
12500# INPUT ***************************************************************	#
12501#	d0 = byte value to store					#
12502#	d1 = index of register to fetch from				#
12503# 									#
12504# OUTPUT **************************************************************	#
12505#	(data register is updated)					#
12506#									#
12507# ALGORITHM ***********************************************************	#
12508#	According to the index value in d1, store the byte value	#
12509# in d0 to the corresponding data register. D0/D1 are on the stack	#
12510# while the rest are in their initial places.				#
12511#									#
12512#########################################################################
12513
12514	global		store_dreg_b
12515store_dreg_b:
12516	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
12517	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
12518
12519tbl_sdregb:
12520	short		sdregb0 - tbl_sdregb
12521	short		sdregb1 - tbl_sdregb
12522	short		sdregb2 - tbl_sdregb
12523	short		sdregb3 - tbl_sdregb
12524	short		sdregb4 - tbl_sdregb
12525	short		sdregb5 - tbl_sdregb
12526	short		sdregb6 - tbl_sdregb
12527	short		sdregb7 - tbl_sdregb
12528
12529sdregb0:
12530	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
12531	rts
12532sdregb1:
12533	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
12534	rts
12535sdregb2:
12536	mov.b		%d0,%d2
12537	rts
12538sdregb3:
12539	mov.b		%d0,%d3
12540	rts
12541sdregb4:
12542	mov.b		%d0,%d4
12543	rts
12544sdregb5:
12545	mov.b		%d0,%d5
12546	rts
12547sdregb6:
12548	mov.b		%d0,%d6
12549	rts
12550sdregb7:
12551	mov.b		%d0,%d7
12552	rts
12553
12554#########################################################################
12555# XDEF ****************************************************************	#
12556#	inc_areg(): increment an address register by the value in d0	#
12557#									#
12558# XREF ****************************************************************	#
12559#	None								#
12560#									#
12561# INPUT ***************************************************************	#
12562#	d0 = amount to increment by					#
12563#	d1 = index of address register to increment			#
12564# 									#
12565# OUTPUT **************************************************************	#
12566#	(address register is updated)					#
12567#									#
12568# ALGORITHM ***********************************************************	#
12569# 	Typically used for an instruction w/ a post-increment <ea>, 	#
12570# this routine adds the increment value in d0 to the address register	#
12571# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
12572# in their original places.						#
12573# 	For a7, if the increment amount is one, then we have to 	#
12574# increment by two. For any a7 update, set the mia7_flag so that if	#
12575# an access error exception occurs later in emulation, this address	#
12576# register update can be undone.					#
12577#									#
12578#########################################################################
12579
12580	global		inc_areg
12581inc_areg:
12582	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
12583	jmp		(tbl_iareg.b,%pc,%d1.w*1)
12584
12585tbl_iareg:
12586	short		iareg0 - tbl_iareg
12587	short		iareg1 - tbl_iareg
12588	short		iareg2 - tbl_iareg
12589	short		iareg3 - tbl_iareg
12590	short		iareg4 - tbl_iareg
12591	short		iareg5 - tbl_iareg
12592	short		iareg6 - tbl_iareg
12593	short		iareg7 - tbl_iareg
12594
12595iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
12596	rts
12597iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
12598	rts
12599iareg2:	add.l		%d0,%a2
12600	rts
12601iareg3:	add.l		%d0,%a3
12602	rts
12603iareg4:	add.l		%d0,%a4
12604	rts
12605iareg5:	add.l		%d0,%a5
12606	rts
12607iareg6:	add.l		%d0,(%a6)
12608	rts
12609iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
12610	cmpi.b		%d0,&0x1
12611	beq.b		iareg7b
12612	add.l		%d0,EXC_A7(%a6)
12613	rts
12614iareg7b:
12615	addq.l		&0x2,EXC_A7(%a6)
12616	rts
12617
12618#########################################################################
12619# XDEF ****************************************************************	#
12620#	dec_areg(): decrement an address register by the value in d0	#
12621#									#
12622# XREF ****************************************************************	#
12623#	None								#
12624#									#
12625# INPUT ***************************************************************	#
12626#	d0 = amount to decrement by					#
12627#	d1 = index of address register to decrement			#
12628# 									#
12629# OUTPUT **************************************************************	#
12630#	(address register is updated)					#
12631#									#
12632# ALGORITHM ***********************************************************	#
12633# 	Typically used for an instruction w/ a pre-decrement <ea>, 	#
12634# this routine adds the decrement value in d0 to the address register	#
12635# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
12636# in their original places.						#
12637# 	For a7, if the decrement amount is one, then we have to 	#
12638# decrement by two. For any a7 update, set the mda7_flag so that if	#
12639# an access error exception occurs later in emulation, this address	#
12640# register update can be undone.					#
12641#									#
12642#########################################################################
12643
12644	global		dec_areg
12645dec_areg:
12646	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
12647	jmp		(tbl_dareg.b,%pc,%d1.w*1)
12648
12649tbl_dareg:
12650	short		dareg0 - tbl_dareg
12651	short		dareg1 - tbl_dareg
12652	short		dareg2 - tbl_dareg
12653	short		dareg3 - tbl_dareg
12654	short		dareg4 - tbl_dareg
12655	short		dareg5 - tbl_dareg
12656	short		dareg6 - tbl_dareg
12657	short		dareg7 - tbl_dareg
12658
12659dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
12660	rts
12661dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
12662	rts
12663dareg2:	sub.l		%d0,%a2
12664	rts
12665dareg3:	sub.l		%d0,%a3
12666	rts
12667dareg4:	sub.l		%d0,%a4
12668	rts
12669dareg5:	sub.l		%d0,%a5
12670	rts
12671dareg6:	sub.l		%d0,(%a6)
12672	rts
12673dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
12674	cmpi.b		%d0,&0x1
12675	beq.b		dareg7b
12676	sub.l		%d0,EXC_A7(%a6)
12677	rts
12678dareg7b:
12679	subq.l		&0x2,EXC_A7(%a6)
12680	rts
12681
12682##############################################################################
12683
12684#########################################################################
12685# XDEF ****************************************************************	#
12686#	load_fpn1(): load FP register value into FP_SRC(a6).		#
12687#									#
12688# XREF ****************************************************************	#
12689#	None								#
12690#									#
12691# INPUT ***************************************************************	#
12692#	d0 = index of FP register to load				#
12693# 									#
12694# OUTPUT **************************************************************	#
12695#	FP_SRC(a6) = value loaded from FP register file			#
12696#									#
12697# ALGORITHM ***********************************************************	#
12698#	Using the index in d0, load FP_SRC(a6) with a number from the 	#
12699# FP register file.							#
12700#									#
12701#########################################################################
12702
12703	global 		load_fpn1
12704load_fpn1:
12705	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12706	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
12707
12708tbl_load_fpn1:
12709	short		load_fpn1_0 - tbl_load_fpn1
12710	short		load_fpn1_1 - tbl_load_fpn1
12711	short		load_fpn1_2 - tbl_load_fpn1
12712	short		load_fpn1_3 - tbl_load_fpn1
12713	short		load_fpn1_4 - tbl_load_fpn1
12714	short		load_fpn1_5 - tbl_load_fpn1
12715	short		load_fpn1_6 - tbl_load_fpn1
12716	short		load_fpn1_7 - tbl_load_fpn1
12717
12718load_fpn1_0:
12719	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12720	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12721	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12722	lea		FP_SRC(%a6), %a0
12723	rts
12724load_fpn1_1:
12725	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12726	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12727	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12728	lea		FP_SRC(%a6), %a0
12729	rts
12730load_fpn1_2:
12731	fmovm.x		&0x20, FP_SRC(%a6)
12732	lea		FP_SRC(%a6), %a0
12733	rts
12734load_fpn1_3:
12735	fmovm.x		&0x10, FP_SRC(%a6)
12736	lea		FP_SRC(%a6), %a0
12737	rts
12738load_fpn1_4:
12739	fmovm.x		&0x08, FP_SRC(%a6)
12740	lea		FP_SRC(%a6), %a0
12741	rts
12742load_fpn1_5:
12743	fmovm.x		&0x04, FP_SRC(%a6)
12744	lea		FP_SRC(%a6), %a0
12745	rts
12746load_fpn1_6:
12747	fmovm.x		&0x02, FP_SRC(%a6)
12748	lea		FP_SRC(%a6), %a0
12749	rts
12750load_fpn1_7:
12751	fmovm.x		&0x01, FP_SRC(%a6)
12752	lea		FP_SRC(%a6), %a0
12753	rts
12754
12755#############################################################################
12756
12757#########################################################################
12758# XDEF ****************************************************************	#
12759#	load_fpn2(): load FP register value into FP_DST(a6).		#
12760#									#
12761# XREF ****************************************************************	#
12762#	None								#
12763#									#
12764# INPUT ***************************************************************	#
12765#	d0 = index of FP register to load				#
12766# 									#
12767# OUTPUT **************************************************************	#
12768#	FP_DST(a6) = value loaded from FP register file			#
12769#									#
12770# ALGORITHM ***********************************************************	#
12771#	Using the index in d0, load FP_DST(a6) with a number from the 	#
12772# FP register file.							#
12773#									#
12774#########################################################################
12775
12776	global		load_fpn2
12777load_fpn2:
12778	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12779	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
12780
12781tbl_load_fpn2:
12782	short		load_fpn2_0 - tbl_load_fpn2
12783	short		load_fpn2_1 - tbl_load_fpn2
12784	short		load_fpn2_2 - tbl_load_fpn2
12785	short		load_fpn2_3 - tbl_load_fpn2
12786	short		load_fpn2_4 - tbl_load_fpn2
12787	short		load_fpn2_5 - tbl_load_fpn2
12788	short		load_fpn2_6 - tbl_load_fpn2
12789	short		load_fpn2_7 - tbl_load_fpn2
12790
12791load_fpn2_0:
12792	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
12793	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
12794	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
12795	lea		FP_DST(%a6), %a0
12796	rts
12797load_fpn2_1:
12798	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
12799	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
12800	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
12801	lea		FP_DST(%a6), %a0
12802	rts
12803load_fpn2_2:
12804	fmovm.x		&0x20, FP_DST(%a6)
12805	lea		FP_DST(%a6), %a0
12806	rts
12807load_fpn2_3:
12808	fmovm.x		&0x10, FP_DST(%a6)
12809	lea		FP_DST(%a6), %a0
12810	rts
12811load_fpn2_4:
12812	fmovm.x		&0x08, FP_DST(%a6)
12813	lea		FP_DST(%a6), %a0
12814	rts
12815load_fpn2_5:
12816	fmovm.x		&0x04, FP_DST(%a6)
12817	lea		FP_DST(%a6), %a0
12818	rts
12819load_fpn2_6:
12820	fmovm.x		&0x02, FP_DST(%a6)
12821	lea		FP_DST(%a6), %a0
12822	rts
12823load_fpn2_7:
12824	fmovm.x		&0x01, FP_DST(%a6)
12825	lea		FP_DST(%a6), %a0
12826	rts
12827
12828#############################################################################
12829
12830#########################################################################
12831# XDEF ****************************************************************	#
12832# 	store_fpreg(): store an fp value to the fpreg designated d0.	#
12833#									#
12834# XREF ****************************************************************	#
12835#	None								#
12836#									#
12837# INPUT ***************************************************************	#
12838#	fp0 = extended precision value to store				#
12839#	d0  = index of floating-point register				#
12840# 									#
12841# OUTPUT **************************************************************	#
12842#	None								#
12843#									#
12844# ALGORITHM ***********************************************************	#
12845#	Store the value in fp0 to the FP register designated by the	#
12846# value in d0. The FP number can be DENORM or SNAN so we have to be	#
12847# careful that we don't take an exception here.				#
12848#									#
12849#########################################################################
12850
12851	global		store_fpreg
12852store_fpreg:
12853	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12854	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
12855
12856tbl_store_fpreg:
12857	short		store_fpreg_0 - tbl_store_fpreg
12858	short		store_fpreg_1 - tbl_store_fpreg
12859	short		store_fpreg_2 - tbl_store_fpreg
12860	short		store_fpreg_3 - tbl_store_fpreg
12861	short		store_fpreg_4 - tbl_store_fpreg
12862	short		store_fpreg_5 - tbl_store_fpreg
12863	short		store_fpreg_6 - tbl_store_fpreg
12864	short		store_fpreg_7 - tbl_store_fpreg
12865
12866store_fpreg_0:
12867	fmovm.x		&0x80, EXC_FP0(%a6)
12868	rts
12869store_fpreg_1:
12870	fmovm.x		&0x80, EXC_FP1(%a6)
12871	rts
12872store_fpreg_2:
12873	fmovm.x 	&0x01, -(%sp)
12874	fmovm.x		(%sp)+, &0x20
12875	rts
12876store_fpreg_3:
12877	fmovm.x 	&0x01, -(%sp)
12878	fmovm.x		(%sp)+, &0x10
12879	rts
12880store_fpreg_4:
12881	fmovm.x 	&0x01, -(%sp)
12882	fmovm.x		(%sp)+, &0x08
12883	rts
12884store_fpreg_5:
12885	fmovm.x 	&0x01, -(%sp)
12886	fmovm.x		(%sp)+, &0x04
12887	rts
12888store_fpreg_6:
12889	fmovm.x 	&0x01, -(%sp)
12890	fmovm.x		(%sp)+, &0x02
12891	rts
12892store_fpreg_7:
12893	fmovm.x 	&0x01, -(%sp)
12894	fmovm.x		(%sp)+, &0x01
12895	rts
12896
12897#########################################################################
12898# XDEF ****************************************************************	#
12899#	get_packed(): fetch a packed operand from memory and then	#
12900#		      convert it to a floating-point binary number.	#
12901#									#
12902# XREF ****************************************************************	#
12903#	_dcalc_ea() - calculate the correct <ea>			#
12904#	_mem_read() - fetch the packed operand from memory		#
12905#	facc_in_x() - the fetch failed so jump to special exit code	#
12906#	decbin()    - convert packed to binary extended precision	#
12907#									#
12908# INPUT ***************************************************************	#
12909#	None								#
12910# 									#
12911# OUTPUT **************************************************************	#
12912#	If no failure on _mem_read():					#
12913# 	FP_SRC(a6) = packed operand now as a binary FP number		#
12914#									#
12915# ALGORITHM ***********************************************************	#
12916#	Get the correct <ea> whihc is the value on the exception stack 	#
12917# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
12918# Then, fetch the operand from memory. If the fetch fails, exit		#
12919# through facc_in_x().							#
12920#	If the packed operand is a ZERO,NAN, or INF, convert it to	#
12921# its binary representation here. Else, call decbin() which will 	#
12922# convert the packed value to an extended precision binary value.	#
12923#									#
12924#########################################################################
12925
12926# the stacked <ea> for packed is correct except for -(An).
12927# the base reg must be updated for both -(An) and (An)+.
12928	global		get_packed
12929get_packed:
12930	mov.l		&0xc,%d0		# packed is 12 bytes
12931	bsr.l		_dcalc_ea		# fetch <ea>; correct An
12932
12933	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
12934	mov.l		&0xc,%d0		# pass: 12 bytes
12935	bsr.l		_dmem_read		# read packed operand
12936
12937	tst.l		%d1			# did dfetch fail?
12938	bne.l		facc_in_x		# yes
12939
12940# The packed operand is an INF or a NAN if the exponent field is all ones.
12941	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
12942	cmpi.w		%d0,&0x7fff		# INF or NAN?
12943	bne.b		gp_try_zero		# no
12944	rts					# operand is an INF or NAN
12945
12946# The packed operand is a zero if the mantissa is all zero, else it's
12947# a normal packed op.
12948gp_try_zero:
12949	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
12950	andi.b		&0x0f,%d0		# clear all but last nybble
12951	bne.b		gp_not_spec		# not a zero
12952	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
12953	bne.b		gp_not_spec		# not a zero
12954	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
12955	bne.b		gp_not_spec		# not a zero
12956	rts					# operand is a ZERO
12957gp_not_spec:
12958	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
12959	bsr.l		decbin			# convert to extended
12960	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
12961	rts
12962
12963#########################################################################
12964# decbin(): Converts normalized packed bcd value pointed to by register	#
12965#	    a0 to extended-precision value in fp0.			#
12966#									#
12967# INPUT ***************************************************************	#
12968#	a0 = pointer to normalized packed bcd value			#
12969#									#
12970# OUTPUT **************************************************************	#
12971#	fp0 = exact fp representation of the packed bcd value.		#
12972#									#
12973# ALGORITHM ***********************************************************	#
12974#	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
12975#	and NaN operands are dispatched without entering this routine)	#
12976#	value in 68881/882 format at location (a0).			#
12977#									#
12978#	A1. Convert the bcd exponent to binary by successive adds and 	#
12979#	muls. Set the sign according to SE. Subtract 16 to compensate	#
12980#	for the mantissa which is to be interpreted as 17 integer	#
12981#	digits, rather than 1 integer and 16 fraction digits.		#
12982#	Note: this operation can never overflow.			#
12983#									#
12984#	A2. Convert the bcd mantissa to binary by successive		#
12985#	adds and muls in FP0. Set the sign according to SM.		#
12986#	The mantissa digits will be converted with the decimal point	#
12987#	assumed following the least-significant digit.			#
12988#	Note: this operation can never overflow.			#
12989#									#
12990#	A3. Count the number of leading/trailing zeros in the		#
12991#	bcd string.  If SE is positive, count the leading zeros;	#
12992#	if negative, count the trailing zeros.  Set the adjusted	#
12993#	exponent equal to the exponent from A1 and the zero count	#
12994#	added if SM = 1 and subtracted if SM = 0.  Scale the		#
12995#	mantissa the equivalent of forcing in the bcd value:		#
12996#									#
12997#	SM = 0	a non-zero digit in the integer position		#
12998#	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
12999#									#
13000#	this will insure that any value, regardless of its		#
13001#	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
13002#	consistently.							#
13003#									#
13004#	A4. Calculate the factor 10^exp in FP1 using a table of		#
13005#	10^(2^n) values.  To reduce the error in forming factors	#
13006#	greater than 10^27, a directed rounding scheme is used with	#
13007#	tables rounded to RN, RM, and RP, according to the table	#
13008#	in the comments of the pwrten section.				#
13009#									#
13010#	A5. Form the final binary number by scaling the mantissa by	#
13011#	the exponent factor.  This is done by multiplying the		#
13012#	mantissa in FP0 by the factor in FP1 if the adjusted		#
13013#	exponent sign is positive, and dividing FP0 by FP1 if		#
13014#	it is negative.							#
13015#									#
13016#	Clean up and return. Check if the final mul or div was inexact.	#
13017#	If so, set INEX1 in USER_FPSR.					#
13018#									#
13019#########################################################################
13020
13021#
13022#	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13023#	to nearest, minus, and plus, respectively.  The tables include
13024#	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
13025#	is required until the power is greater than 27, however, all
13026#	tables include the first 5 for ease of indexing.
13027#
13028RTABLE:
13029	byte		0,0,0,0
13030	byte		2,3,2,3
13031	byte		2,3,3,2
13032	byte		3,2,2,3
13033
13034	set		FNIBS,7
13035	set		FSTRT,0
13036
13037	set		ESTRT,4
13038	set		EDIGITS,2
13039
13040	global		decbin
13041decbin:
13042	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13043	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13044	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
13045
13046	lea		FP_SCR0(%a6),%a0
13047
13048	movm.l		&0x3c00,-(%sp)		# save d2-d5
13049	fmovm.x		&0x1,-(%sp)		# save fp1
13050#
13051# Calculate exponent:
13052#  1. Copy bcd value in memory for use as a working copy.
13053#  2. Calculate absolute value of exponent in d1 by mul and add.
13054#  3. Correct for exponent sign.
13055#  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13056#     (i.e., all digits assumed left of the decimal point.)
13057#
13058# Register usage:
13059#
13060#  calc_e:
13061#	(*)  d0: temp digit storage
13062#	(*)  d1: accumulator for binary exponent
13063#	(*)  d2: digit count
13064#	(*)  d3: offset pointer
13065#	( )  d4: first word of bcd
13066#	( )  a0: pointer to working bcd value
13067#	( )  a6: pointer to original bcd value
13068#	(*)  FP_SCR1: working copy of original bcd value
13069#	(*)  L_SCR1: copy of original exponent word
13070#
13071calc_e:
13072	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
13073	mov.l		&ESTRT,%d3		# counter to pick up digits
13074	mov.l		(%a0),%d4		# get first word of bcd
13075	clr.l		%d1			# zero d1 for accumulator
13076e_gd:
13077	mulu.l		&0xa,%d1		# mul partial product by one digit place
13078	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
13079	add.l		%d0,%d1			# d1 = d1 + d0
13080	addq.b		&4,%d3			# advance d3 to the next digit
13081	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
13082	btst		&30,%d4			# get SE
13083	beq.b		e_pos			# don't negate if pos
13084	neg.l		%d1			# negate before subtracting
13085e_pos:
13086	sub.l		&16,%d1			# sub to compensate for shift of mant
13087	bge.b		e_save			# if still pos, do not neg
13088	neg.l		%d1			# now negative, make pos and set SE
13089	or.l		&0x40000000,%d4		# set SE in d4,
13090	or.l		&0x40000000,(%a0)	# and in working bcd
13091e_save:
13092	mov.l		%d1,-(%sp)		# save exp on stack
13093#
13094#
13095# Calculate mantissa:
13096#  1. Calculate absolute value of mantissa in fp0 by mul and add.
13097#  2. Correct for mantissa sign.
13098#     (i.e., all digits assumed left of the decimal point.)
13099#
13100# Register usage:
13101#
13102#  calc_m:
13103#	(*)  d0: temp digit storage
13104#	(*)  d1: lword counter
13105#	(*)  d2: digit count
13106#	(*)  d3: offset pointer
13107#	( )  d4: words 2 and 3 of bcd
13108#	( )  a0: pointer to working bcd value
13109#	( )  a6: pointer to original bcd value
13110#	(*) fp0: mantissa accumulator
13111#	( )  FP_SCR1: working copy of original bcd value
13112#	( )  L_SCR1: copy of original exponent word
13113#
13114calc_m:
13115	mov.l		&1,%d1			# word counter, init to 1
13116	fmov.s		&0x00000000,%fp0	# accumulator
13117#
13118#
13119#  Since the packed number has a long word between the first & second parts,
13120#  get the integer digit then skip down & get the rest of the
13121#  mantissa.  We will unroll the loop once.
13122#
13123	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
13124	fadd.b		%d0,%fp0		# add digit to sum in fp0
13125#
13126#
13127#  Get the rest of the mantissa.
13128#
13129loadlw:
13130	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
13131	mov.l		&FSTRT,%d3		# counter to pick up digits
13132	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
13133md2b:
13134	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
13135	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
13136	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
13137#
13138#
13139#  If all the digits (8) in that long word have been converted (d2=0),
13140#  then inc d1 (=2) to point to the next long word and reset d3 to 0
13141#  to initialize the digit offset, and set d2 to 7 for the digit count;
13142#  else continue with this long word.
13143#
13144	addq.b		&4,%d3			# advance d3 to the next digit
13145	dbf.w		%d2,md2b		# check for last digit in this lw
13146nextlw:
13147	addq.l		&1,%d1			# inc lw pointer in mantissa
13148	cmp.l		%d1,&2			# test for last lw
13149	ble.b		loadlw			# if not, get last one
13150#
13151#  Check the sign of the mant and make the value in fp0 the same sign.
13152#
13153m_sign:
13154	btst		&31,(%a0)		# test sign of the mantissa
13155	beq.b		ap_st_z			# if clear, go to append/strip zeros
13156	fneg.x		%fp0			# if set, negate fp0
13157#
13158# Append/strip zeros:
13159#
13160#  For adjusted exponents which have an absolute value greater than 27*,
13161#  this routine calculates the amount needed to normalize the mantissa
13162#  for the adjusted exponent.  That number is subtracted from the exp
13163#  if the exp was positive, and added if it was negative.  The purpose
13164#  of this is to reduce the value of the exponent and the possibility
13165#  of error in calculation of pwrten.
13166#
13167#  1. Branch on the sign of the adjusted exponent.
13168#  2p.(positive exp)
13169#   2. Check M16 and the digits in lwords 2 and 3 in decending order.
13170#   3. Add one for each zero encountered until a non-zero digit.
13171#   4. Subtract the count from the exp.
13172#   5. Check if the exp has crossed zero in #3 above; make the exp abs
13173#	   and set SE.
13174#	6. Multiply the mantissa by 10**count.
13175#  2n.(negative exp)
13176#   2. Check the digits in lwords 3 and 2 in decending order.
13177#   3. Add one for each zero encountered until a non-zero digit.
13178#   4. Add the count to the exp.
13179#   5. Check if the exp has crossed zero in #3 above; clear SE.
13180#   6. Divide the mantissa by 10**count.
13181#
13182#  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
13183#   any adjustment due to append/strip zeros will drive the resultane
13184#   exponent towards zero.  Since all pwrten constants with a power
13185#   of 27 or less are exact, there is no need to use this routine to
13186#   attempt to lessen the resultant exponent.
13187#
13188# Register usage:
13189#
13190#  ap_st_z:
13191#	(*)  d0: temp digit storage
13192#	(*)  d1: zero count
13193#	(*)  d2: digit count
13194#	(*)  d3: offset pointer
13195#	( )  d4: first word of bcd
13196#	(*)  d5: lword counter
13197#	( )  a0: pointer to working bcd value
13198#	( )  FP_SCR1: working copy of original bcd value
13199#	( )  L_SCR1: copy of original exponent word
13200#
13201#
13202# First check the absolute value of the exponent to see if this
13203# routine is necessary.  If so, then check the sign of the exponent
13204# and do append (+) or strip (-) zeros accordingly.
13205# This section handles a positive adjusted exponent.
13206#
13207ap_st_z:
13208	mov.l		(%sp),%d1		# load expA for range test
13209	cmp.l		%d1,&27			# test is with 27
13210	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
13211	btst		&30,(%a0)		# check sign of exp
13212	bne.b		ap_st_n			# if neg, go to neg side
13213	clr.l		%d1			# zero count reg
13214	mov.l		(%a0),%d4		# load lword 1 to d4
13215	bfextu		%d4{&28:&4},%d0		# get M16 in d0
13216	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
13217	addq.l		&1,%d1			# inc zero count
13218	mov.l		&1,%d5			# init lword counter
13219	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
13220	bne.b		ap_p_cl			# if lw 2 is zero, skip it
13221	addq.l		&8,%d1			# and inc count by 8
13222	addq.l		&1,%d5			# inc lword counter
13223	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
13224ap_p_cl:
13225	clr.l		%d3			# init offset reg
13226	mov.l		&7,%d2			# init digit counter
13227ap_p_gd:
13228	bfextu		%d4{%d3:&4},%d0		# get digit
13229	bne.b		ap_p_fx			# if non-zero, go to fix exp
13230	addq.l		&4,%d3			# point to next digit
13231	addq.l		&1,%d1			# inc digit counter
13232	dbf.w		%d2,ap_p_gd		# get next digit
13233ap_p_fx:
13234	mov.l		%d1,%d0			# copy counter to d2
13235	mov.l		(%sp),%d1		# get adjusted exp from memory
13236	sub.l		%d0,%d1			# subtract count from exp
13237	bge.b		ap_p_fm			# if still pos, go to pwrten
13238	neg.l		%d1			# now its neg; get abs
13239	mov.l		(%a0),%d4		# load lword 1 to d4
13240	or.l		&0x40000000,%d4		# and set SE in d4
13241	or.l		&0x40000000,(%a0)	# and in memory
13242#
13243# Calculate the mantissa multiplier to compensate for the striping of
13244# zeros from the mantissa.
13245#
13246ap_p_fm:
13247	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
13248	clr.l		%d3			# init table index
13249	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13250	mov.l		&3,%d2			# init d2 to count bits in counter
13251ap_p_el:
13252	asr.l		&1,%d0			# shift lsb into carry
13253	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
13254	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13255ap_p_en:
13256	add.l		&12,%d3			# inc d3 to next rtable entry
13257	tst.l		%d0			# check if d0 is zero
13258	bne.b		ap_p_el			# if not, get next bit
13259	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
13260	bra.b		pwrten			# go calc pwrten
13261#
13262# This section handles a negative adjusted exponent.
13263#
13264ap_st_n:
13265	clr.l		%d1			# clr counter
13266	mov.l		&2,%d5			# set up d5 to point to lword 3
13267	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
13268	bne.b		ap_n_cl			# if not zero, check digits
13269	sub.l		&1,%d5			# dec d5 to point to lword 2
13270	addq.l		&8,%d1			# inc counter by 8
13271	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
13272ap_n_cl:
13273	mov.l		&28,%d3			# point to last digit
13274	mov.l		&7,%d2			# init digit counter
13275ap_n_gd:
13276	bfextu		%d4{%d3:&4},%d0		# get digit
13277	bne.b		ap_n_fx			# if non-zero, go to exp fix
13278	subq.l		&4,%d3			# point to previous digit
13279	addq.l		&1,%d1			# inc digit counter
13280	dbf.w		%d2,ap_n_gd		# get next digit
13281ap_n_fx:
13282	mov.l		%d1,%d0			# copy counter to d0
13283	mov.l		(%sp),%d1		# get adjusted exp from memory
13284	sub.l		%d0,%d1			# subtract count from exp
13285	bgt.b		ap_n_fm			# if still pos, go fix mantissa
13286	neg.l		%d1			# take abs of exp and clr SE
13287	mov.l		(%a0),%d4		# load lword 1 to d4
13288	and.l		&0xbfffffff,%d4		# and clr SE in d4
13289	and.l		&0xbfffffff,(%a0)	# and in memory
13290#
13291# Calculate the mantissa multiplier to compensate for the appending of
13292# zeros to the mantissa.
13293#
13294ap_n_fm:
13295	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
13296	clr.l		%d3			# init table index
13297	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13298	mov.l		&3,%d2			# init d2 to count bits in counter
13299ap_n_el:
13300	asr.l		&1,%d0			# shift lsb into carry
13301	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
13302	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13303ap_n_en:
13304	add.l		&12,%d3			# inc d3 to next rtable entry
13305	tst.l		%d0			# check if d0 is zero
13306	bne.b		ap_n_el			# if not, get next bit
13307	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
13308#
13309#
13310# Calculate power-of-ten factor from adjusted and shifted exponent.
13311#
13312# Register usage:
13313#
13314#  pwrten:
13315#	(*)  d0: temp
13316#	( )  d1: exponent
13317#	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13318#	(*)  d3: FPCR work copy
13319#	( )  d4: first word of bcd
13320#	(*)  a1: RTABLE pointer
13321#  calc_p:
13322#	(*)  d0: temp
13323#	( )  d1: exponent
13324#	(*)  d3: PWRTxx table index
13325#	( )  a0: pointer to working copy of bcd
13326#	(*)  a1: PWRTxx pointer
13327#	(*) fp1: power-of-ten accumulator
13328#
13329# Pwrten calculates the exponent factor in the selected rounding mode
13330# according to the following table:
13331#
13332#	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
13333#
13334#	ANY	  ANY	RN	RN
13335#
13336#	 +	   +	RP	RP
13337#	 -	   +	RP	RM
13338#	 +	   -	RP	RM
13339#	 -	   -	RP	RP
13340#
13341#	 +	   +	RM	RM
13342#	 -	   +	RM	RP
13343#	 +	   -	RM	RP
13344#	 -	   -	RM	RM
13345#
13346#	 +	   +	RZ	RM
13347#	 -	   +	RZ	RM
13348#	 +	   -	RZ	RP
13349#	 -	   -	RZ	RP
13350#
13351#
13352pwrten:
13353	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
13354	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
13355	mov.l		(%a0),%d4		# reload 1st bcd word to d4
13356	asl.l		&2,%d2			# format d2 to be
13357	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
13358	add.l		%d0,%d2			# in d2 as index into RTABLE
13359	lea.l		RTABLE(%pc),%a1		# load rtable base
13360	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
13361	clr.l		%d3			# clear d3 to force no exc and extended
13362	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
13363	fmov.l		%d3,%fpcr		# write new FPCR
13364	asr.l		&1,%d0			# write correct PTENxx table
13365	bcc.b		not_rp			# to a1
13366	lea.l		PTENRP(%pc),%a1		# it is RP
13367	bra.b		calc_p			# go to init section
13368not_rp:
13369	asr.l		&1,%d0			# keep checking
13370	bcc.b		not_rm
13371	lea.l		PTENRM(%pc),%a1		# it is RM
13372	bra.b		calc_p			# go to init section
13373not_rm:
13374	lea.l		PTENRN(%pc),%a1		# it is RN
13375calc_p:
13376	mov.l		%d1,%d0			# copy exp to d0;use d0
13377	bpl.b		no_neg			# if exp is negative,
13378	neg.l		%d0			# invert it
13379	or.l		&0x40000000,(%a0)	# and set SE bit
13380no_neg:
13381	clr.l		%d3			# table index
13382	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13383e_loop:
13384	asr.l		&1,%d0			# shift next bit into carry
13385	bcc.b		e_next			# if zero, skip the mul
13386	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13387e_next:
13388	add.l		&12,%d3			# inc d3 to next rtable entry
13389	tst.l		%d0			# check if d0 is zero
13390	bne.b		e_loop			# not zero, continue shifting
13391#
13392#
13393#  Check the sign of the adjusted exp and make the value in fp0 the
13394#  same sign. If the exp was pos then multiply fp1*fp0;
13395#  else divide fp0/fp1.
13396#
13397# Register Usage:
13398#  norm:
13399#	( )  a0: pointer to working bcd value
13400#	(*) fp0: mantissa accumulator
13401#	( ) fp1: scaling factor - 10**(abs(exp))
13402#
13403pnorm:
13404	btst		&30,(%a0)		# test the sign of the exponent
13405	beq.b		mul			# if clear, go to multiply
13406div:
13407	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
13408	bra.b		end_dec
13409mul:
13410	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
13411#
13412#
13413# Clean up and return with result in fp0.
13414#
13415# If the final mul/div in decbin incurred an inex exception,
13416# it will be inex2, but will be reported as inex1 by get_op.
13417#
13418end_dec:
13419	fmov.l		%fpsr,%d0		# get status register
13420	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
13421	beq.b		no_exc			# skip this if no exc
13422	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13423no_exc:
13424	add.l		&0x4,%sp		# clear 1 lw param
13425	fmovm.x		(%sp)+,&0x40		# restore fp1
13426	movm.l		(%sp)+,&0x3c		# restore d2-d5
13427	fmov.l		&0x0,%fpcr
13428	fmov.l		&0x0,%fpsr
13429	rts
13430
13431#########################################################################
13432# bindec(): Converts an input in extended precision format to bcd format#
13433#									#
13434# INPUT ***************************************************************	#
13435#	a0 = pointer to the input extended precision value in memory.	#
13436#	     the input may be either normalized, unnormalized, or 	#
13437#	     denormalized.						#
13438#	d0 = contains the k-factor sign-extended to 32-bits. 		#
13439#									#
13440# OUTPUT **************************************************************	#
13441#	FP_SCR0(a6) = bcd format result on the stack.			#
13442#									#
13443# ALGORITHM ***********************************************************	#
13444#									#
13445#	A1.	Set RM and size ext;  Set SIGMA = sign of input.  	#
13446#		The k-factor is saved for use in d7. Clear the		#
13447#		BINDEC_FLG for separating normalized/denormalized	#
13448#		input.  If input is unnormalized or denormalized,	#
13449#		normalize it.						#
13450#									#
13451#	A2.	Set X = abs(input).					#
13452#									#
13453#	A3.	Compute ILOG.						#
13454#		ILOG is the log base 10 of the input value.  It is	#
13455#		approximated by adding e + 0.f when the original 	#
13456#		value is viewed as 2^^e * 1.f in extended precision.  	#
13457#		This value is stored in d6.				#
13458#									#
13459#	A4.	Clr INEX bit.						#
13460#		The operation in A3 above may have set INEX2.  		#
13461#									#
13462#	A5.	Set ICTR = 0;						#
13463#		ICTR is a flag used in A13.  It must be set before the 	#
13464#		loop entry A6.						#
13465#									#
13466#	A6.	Calculate LEN.						#
13467#		LEN is the number of digits to be displayed.  The	#
13468#		k-factor can dictate either the total number of digits,	#
13469#		if it is a positive number, or the number of digits	#
13470#		after the decimal point which are to be included as	#
13471#		significant.  See the 68882 manual for examples.	#
13472#		If LEN is computed to be greater than 17, set OPERR in	#
13473#		USER_FPSR.  LEN is stored in d4.			#
13474#									#
13475#	A7.	Calculate SCALE.					#
13476#		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
13477#		of decimal places needed to insure LEN integer digits	#
13478#		in the output before conversion to bcd. LAMBDA is the	#
13479#		sign of ISCALE, used in A9. Fp1 contains		#
13480#		10^^(abs(ISCALE)) using a rounding mode which is a	#
13481#		function of the original rounding mode and the signs	#
13482#		of ISCALE and X.  A table is given in the code.		#
13483#									#
13484#	A8.	Clr INEX; Force RZ.					#
13485#		The operation in A3 above may have set INEX2.  		#
13486#		RZ mode is forced for the scaling operation to insure	#
13487#		only one rounding error.  The grs bits are collected in #
13488#		the INEX flag for use in A10.				#
13489#									#
13490#	A9.	Scale X -> Y.						#
13491#		The mantissa is scaled to the desired number of		#
13492#		significant digits.  The excess digits are collected	#
13493#		in INEX2.						#
13494#									#
13495#	A10.	Or in INEX.						#
13496#		If INEX is set, round error occurred.  This is		#
13497#		compensated for by 'or-ing' in the INEX2 flag to	#
13498#		the lsb of Y.						#
13499#									#
13500#	A11.	Restore original FPCR; set size ext.			#
13501#		Perform FINT operation in the user's rounding mode.	#
13502#		Keep the size to extended.				#
13503#									#
13504#	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
13505#		mode.  The FPSP routine sintd0 is used.  The output	#
13506#		is in fp0.						#
13507#									#
13508#	A13.	Check for LEN digits.					#
13509#		If the int operation results in more than LEN digits,	#
13510#		or less than LEN -1 digits, adjust ILOG and repeat from	#
13511#		A6.  This test occurs only on the first pass.  If the	#
13512#		result is exactly 10^LEN, decrement ILOG and divide	#
13513#		the mantissa by 10.					#
13514#									#
13515#	A14.	Convert the mantissa to bcd.				#
13516#		The binstr routine is used to convert the LEN digit 	#
13517#		mantissa to bcd in memory.  The input to binstr is	#
13518#		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
13519#		such that the decimal point is to the left of bit 63.	#
13520#		The bcd digits are stored in the correct position in 	#
13521#		the final string area in memory.			#
13522#									#
13523#	A15.	Convert the exponent to bcd.				#
13524#		As in A14 above, the exp is converted to bcd and the	#
13525#		digits are stored in the final string.			#
13526#		Test the length of the final exponent string.  If the	#
13527#		length is 4, set operr.					#
13528#									#
13529#	A16.	Write sign bits to final string.			#
13530#									#
13531#########################################################################
13532
13533set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
13534
13535# Constants in extended precision
13536PLOG2:
13537	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13538PLOG2UP1:
13539	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13540
13541# Constants in single precision
13542FONE:
13543	long		0x3F800000,0x00000000,0x00000000,0x00000000
13544FTWO:
13545	long		0x40000000,0x00000000,0x00000000,0x00000000
13546FTEN:
13547	long		0x41200000,0x00000000,0x00000000,0x00000000
13548F4933:
13549	long		0x459A2800,0x00000000,0x00000000,0x00000000
13550
13551RBDTBL:
13552	byte		0,0,0,0
13553	byte		3,3,2,2
13554	byte		3,2,2,3
13555	byte		2,3,3,2
13556
13557#	Implementation Notes:
13558#
13559#	The registers are used as follows:
13560#
13561#		d0: scratch; LEN input to binstr
13562#		d1: scratch
13563#		d2: upper 32-bits of mantissa for binstr
13564#		d3: scratch;lower 32-bits of mantissa for binstr
13565#		d4: LEN
13566#      		d5: LAMBDA/ICTR
13567#		d6: ILOG
13568#		d7: k-factor
13569#		a0: ptr for original operand/final result
13570#		a1: scratch pointer
13571#		a2: pointer to FP_X; abs(original value) in ext
13572#		fp0: scratch
13573#		fp1: scratch
13574#		fp2: scratch
13575#		F_SCR1:
13576#		F_SCR2:
13577#		L_SCR1:
13578#		L_SCR2:
13579
13580	global		bindec
13581bindec:
13582	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
13583	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
13584
13585# A1. Set RM and size ext. Set SIGMA = sign input;
13586#     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
13587#     separating  normalized/denormalized input.  If the input
13588#     is a denormalized number, set the BINDEC_FLG memory word
13589#     to signal denorm.  If the input is unnormalized, normalize
13590#     the input and test for denormalized result.
13591#
13592	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
13593	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
13594	mov.l		%d0,%d7		# move k-factor to d7
13595
13596	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
13597	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
13598	bne.w		A2_str		# no; input is a NORM
13599
13600#
13601# Normalize the denorm
13602#
13603un_de_norm:
13604	mov.w		(%a0),%d0
13605	and.w		&0x7fff,%d0	# strip sign of normalized exp
13606	mov.l		4(%a0),%d1
13607	mov.l		8(%a0),%d2
13608norm_loop:
13609	sub.w		&1,%d0
13610	lsl.l		&1,%d2
13611	roxl.l		&1,%d1
13612	tst.l		%d1
13613	bge.b		norm_loop
13614#
13615# Test if the normalized input is denormalized
13616#
13617	tst.w		%d0
13618	bgt.b		pos_exp		# if greater than zero, it is a norm
13619	st		BINDEC_FLG(%a6)	# set flag for denorm
13620pos_exp:
13621	and.w		&0x7fff,%d0	# strip sign of normalized exp
13622	mov.w		%d0,(%a0)
13623	mov.l		%d1,4(%a0)
13624	mov.l		%d2,8(%a0)
13625
13626# A2. Set X = abs(input).
13627#
13628A2_str:
13629	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
13630	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
13631	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
13632	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
13633
13634# A3. Compute ILOG.
13635#     ILOG is the log base 10 of the input value.  It is approx-
13636#     imated by adding e + 0.f when the original value is viewed
13637#     as 2^^e * 1.f in extended precision.  This value is stored
13638#     in d6.
13639#
13640# Register usage:
13641#	Input/Output
13642#	d0: k-factor/exponent
13643#	d2: x/x
13644#	d3: x/x
13645#	d4: x/x
13646#	d5: x/x
13647#	d6: x/ILOG
13648#	d7: k-factor/Unchanged
13649#	a0: ptr for original operand/final result
13650#	a1: x/x
13651#	a2: x/x
13652#	fp0: x/float(ILOG)
13653#	fp1: x/x
13654#	fp2: x/x
13655#	F_SCR1:x/x
13656#	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13657#	L_SCR1:x/x
13658#	L_SCR2:first word of X packed/Unchanged
13659
13660	tst.b		BINDEC_FLG(%a6)	# check for denorm
13661	beq.b		A3_cont		# if clr, continue with norm
13662	mov.l		&-4933,%d6	# force ILOG = -4933
13663	bra.b		A4_str
13664A3_cont:
13665	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
13666	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
13667	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
13668	sub.w		&0x3fff,%d0	# strip off bias
13669	fadd.w		%d0,%fp0	# add in exp
13670	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
13671	fbge.w		pos_res		# if pos, branch
13672	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
13673	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
13674	bra.b		A4_str		# go move out ILOG
13675pos_res:
13676	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
13677	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
13678
13679
13680# A4. Clr INEX bit.
13681#     The operation in A3 above may have set INEX2.
13682
13683A4_str:
13684	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
13685
13686
13687# A5. Set ICTR = 0;
13688#     ICTR is a flag used in A13.  It must be set before the
13689#     loop entry A6. The lower word of d5 is used for ICTR.
13690
13691	clr.w		%d5		# clear ICTR
13692
13693# A6. Calculate LEN.
13694#     LEN is the number of digits to be displayed.  The k-factor
13695#     can dictate either the total number of digits, if it is
13696#     a positive number, or the number of digits after the
13697#     original decimal point which are to be included as
13698#     significant.  See the 68882 manual for examples.
13699#     If LEN is computed to be greater than 17, set OPERR in
13700#     USER_FPSR.  LEN is stored in d4.
13701#
13702# Register usage:
13703#	Input/Output
13704#	d0: exponent/Unchanged
13705#	d2: x/x/scratch
13706#	d3: x/x
13707#	d4: exc picture/LEN
13708#	d5: ICTR/Unchanged
13709#	d6: ILOG/Unchanged
13710#	d7: k-factor/Unchanged
13711#	a0: ptr for original operand/final result
13712#	a1: x/x
13713#	a2: x/x
13714#	fp0: float(ILOG)/Unchanged
13715#	fp1: x/x
13716#	fp2: x/x
13717#	F_SCR1:x/x
13718#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13719#	L_SCR1:x/x
13720#	L_SCR2:first word of X packed/Unchanged
13721
13722A6_str:
13723	tst.l		%d7		# branch on sign of k
13724	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
13725	mov.l		%d7,%d4		# if k > 0, LEN = k
13726	bra.b		len_ck		# skip to LEN check
13727k_neg:
13728	mov.l		%d6,%d4		# first load ILOG to d4
13729	sub.l		%d7,%d4		# subtract off k
13730	addq.l		&1,%d4		# add in the 1
13731len_ck:
13732	tst.l		%d4		# LEN check: branch on sign of LEN
13733	ble.b		LEN_ng		# if neg, set LEN = 1
13734	cmp.l		%d4,&17		# test if LEN > 17
13735	ble.b		A7_str		# if not, forget it
13736	mov.l		&17,%d4		# set max LEN = 17
13737	tst.l		%d7		# if negative, never set OPERR
13738	ble.b		A7_str		# if positive, continue
13739	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
13740	bra.b		A7_str		# finished here
13741LEN_ng:
13742	mov.l		&1,%d4		# min LEN is 1
13743
13744
13745# A7. Calculate SCALE.
13746#     SCALE is equal to 10^ISCALE, where ISCALE is the number
13747#     of decimal places needed to insure LEN integer digits
13748#     in the output before conversion to bcd. LAMBDA is the sign
13749#     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
13750#     the rounding mode as given in the following table (see
13751#     Coonen, p. 7.23 as ref.; however, the SCALE variable is
13752#     of opposite sign in bindec.sa from Coonen).
13753#
13754#	Initial					USE
13755#	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
13756#	----------------------------------------------
13757#	 RN	00	   0	   0		00/0	RN
13758#	 RN	00	   0	   1		00/0	RN
13759#	 RN	00	   1	   0		00/0	RN
13760#	 RN	00	   1	   1		00/0	RN
13761#	 RZ	01	   0	   0		11/3	RP
13762#	 RZ	01	   0	   1		11/3	RP
13763#	 RZ	01	   1	   0		10/2	RM
13764#	 RZ	01	   1	   1		10/2	RM
13765#	 RM	10	   0	   0		11/3	RP
13766#	 RM	10	   0	   1		10/2	RM
13767#	 RM	10	   1	   0		10/2	RM
13768#	 RM	10	   1	   1		11/3	RP
13769#	 RP	11	   0	   0		10/2	RM
13770#	 RP	11	   0	   1		11/3	RP
13771#	 RP	11	   1	   0		11/3	RP
13772#	 RP	11	   1	   1		10/2	RM
13773#
13774# Register usage:
13775#	Input/Output
13776#	d0: exponent/scratch - final is 0
13777#	d2: x/0 or 24 for A9
13778#	d3: x/scratch - offset ptr into PTENRM array
13779#	d4: LEN/Unchanged
13780#	d5: 0/ICTR:LAMBDA
13781#	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13782#	d7: k-factor/Unchanged
13783#	a0: ptr for original operand/final result
13784#	a1: x/ptr to PTENRM array
13785#	a2: x/x
13786#	fp0: float(ILOG)/Unchanged
13787#	fp1: x/10^ISCALE
13788#	fp2: x/x
13789#	F_SCR1:x/x
13790#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13791#	L_SCR1:x/x
13792#	L_SCR2:first word of X packed/Unchanged
13793
13794A7_str:
13795	tst.l		%d7		# test sign of k
13796	bgt.b		k_pos		# if pos and > 0, skip this
13797	cmp.l		%d7,%d6		# test k - ILOG
13798	blt.b		k_pos		# if ILOG >= k, skip this
13799	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
13800k_pos:
13801	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
13802	addq.l		&1,%d0		# add the 1
13803	sub.l		%d4,%d0		# sub off LEN
13804	swap		%d5		# use upper word of d5 for LAMBDA
13805	clr.w		%d5		# set it zero initially
13806	clr.w		%d2		# set up d2 for very small case
13807	tst.l		%d0		# test sign of ISCALE
13808	bge.b		iscale		# if pos, skip next inst
13809	addq.w		&1,%d5		# if neg, set LAMBDA true
13810	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
13811	bgt.b		no_inf		# if false, skip rest
13812	add.l		&24,%d0		# add in 24 to iscale
13813	mov.l		&24,%d2		# put 24 in d2 for A9
13814no_inf:
13815	neg.l		%d0		# and take abs of ISCALE
13816iscale:
13817	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
13818	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
13819	lsl.w		&1,%d1		# put them in bits 2:1
13820	add.w		%d5,%d1		# add in LAMBDA
13821	lsl.w		&1,%d1		# put them in bits 3:1
13822	tst.l		L_SCR2(%a6)	# test sign of original x
13823	bge.b		x_pos		# if pos, don't set bit 0
13824	addq.l		&1,%d1		# if neg, set bit 0
13825x_pos:
13826	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
13827	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
13828	lsl.l		&4,%d3		# put bits in proper position
13829	fmov.l		%d3,%fpcr	# load bits into fpu
13830	lsr.l		&4,%d3		# put bits in proper position
13831	tst.b		%d3		# decode new rmode for pten table
13832	bne.b		not_rn		# if zero, it is RN
13833	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
13834	bra.b		rmode		# exit decode
13835not_rn:
13836	lsr.b		&1,%d3		# get lsb in carry
13837	bcc.b		not_rp2		# if carry clear, it is RM
13838	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
13839	bra.b		rmode		# exit decode
13840not_rp2:
13841	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
13842rmode:
13843	clr.l		%d3		# clr table index
13844e_loop2:
13845	lsr.l		&1,%d0		# shift next bit into carry
13846	bcc.b		e_next2		# if zero, skip the mul
13847	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
13848e_next2:
13849	add.l		&12,%d3		# inc d3 to next pwrten table entry
13850	tst.l		%d0		# test if ISCALE is zero
13851	bne.b		e_loop2		# if not, loop
13852
13853# A8. Clr INEX; Force RZ.
13854#     The operation in A3 above may have set INEX2.
13855#     RZ mode is forced for the scaling operation to insure
13856#     only one rounding error.  The grs bits are collected in
13857#     the INEX flag for use in A10.
13858#
13859# Register usage:
13860#	Input/Output
13861
13862	fmov.l		&0,%fpsr	# clr INEX
13863	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
13864
13865# A9. Scale X -> Y.
13866#     The mantissa is scaled to the desired number of significant
13867#     digits.  The excess digits are collected in INEX2. If mul,
13868#     Check d2 for excess 10 exponential value.  If not zero,
13869#     the iscale value would have caused the pwrten calculation
13870#     to overflow.  Only a negative iscale can cause this, so
13871#     multiply by 10^(d2), which is now only allowed to be 24,
13872#     with a multiply by 10^8 and 10^16, which is exact since
13873#     10^24 is exact.  If the input was denormalized, we must
13874#     create a busy stack frame with the mul command and the
13875#     two operands, and allow the fpu to complete the multiply.
13876#
13877# Register usage:
13878#	Input/Output
13879#	d0: FPCR with RZ mode/Unchanged
13880#	d2: 0 or 24/unchanged
13881#	d3: x/x
13882#	d4: LEN/Unchanged
13883#	d5: ICTR:LAMBDA
13884#	d6: ILOG/Unchanged
13885#	d7: k-factor/Unchanged
13886#	a0: ptr for original operand/final result
13887#	a1: ptr to PTENRM array/Unchanged
13888#	a2: x/x
13889#	fp0: float(ILOG)/X adjusted for SCALE (Y)
13890#	fp1: 10^ISCALE/Unchanged
13891#	fp2: x/x
13892#	F_SCR1:x/x
13893#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13894#	L_SCR1:x/x
13895#	L_SCR2:first word of X packed/Unchanged
13896
13897A9_str:
13898	fmov.x		(%a0),%fp0	# load X from memory
13899	fabs.x		%fp0		# use abs(X)
13900	tst.w		%d5		# LAMBDA is in lower word of d5
13901	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
13902	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
13903	bra.w		A10_st		# branch to A10
13904
13905sc_mul:
13906	tst.b		BINDEC_FLG(%a6)	# check for denorm
13907	beq.w		A9_norm		# if norm, continue with mul
13908
13909# for DENORM, we must calculate:
13910#	fp0 = input_op * 10^ISCALE * 10^24
13911# since the input operand is a DENORM, we can't multiply it directly.
13912# so, we do the multiplication of the exponents and mantissas separately.
13913# in this way, we avoid underflow on intermediate stages of the
13914# multiplication and guarantee a result without exception.
13915	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
13916
13917	mov.w		(%sp),%d3	# grab exponent
13918	andi.w		&0x7fff,%d3	# clear sign
13919	ori.w		&0x8000,(%a0)	# make DENORM exp negative
13920	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
13921	subi.w		&0x3fff,%d3	# subtract BIAS
13922	add.w		36(%a1),%d3
13923	subi.w		&0x3fff,%d3	# subtract BIAS
13924	add.w		48(%a1),%d3
13925	subi.w		&0x3fff,%d3	# subtract BIAS
13926
13927	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
13928
13929	andi.w		&0x8000,(%sp)	# keep sign
13930	or.w		%d3,(%sp)	# insert new exponent
13931	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
13932	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
13933	mov.l		0x4(%a0),-(%sp)
13934	mov.l		&0x3fff0000,-(%sp) # force exp to zero
13935	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
13936	fmul.x		(%sp)+,%fp0
13937
13938#	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
13939#	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
13940	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
13941	mov.l		36+4(%a1),-(%sp)
13942	mov.l		&0x3fff0000,-(%sp) # force exp to zero
13943	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
13944	mov.l		48+4(%a1),-(%sp)
13945	mov.l		&0x3fff0000,-(%sp)# force exp to zero
13946	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
13947	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
13948	bra.b		A10_st
13949
13950sc_mul_err:
13951	bra.b		sc_mul_err
13952
13953A9_norm:
13954	tst.w		%d2		# test for small exp case
13955	beq.b		A9_con		# if zero, continue as normal
13956	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
13957	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
13958A9_con:
13959	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
13960
13961# A10. Or in INEX.
13962#      If INEX is set, round error occurred.  This is compensated
13963#      for by 'or-ing' in the INEX2 flag to the lsb of Y.
13964#
13965# Register usage:
13966#	Input/Output
13967#	d0: FPCR with RZ mode/FPSR with INEX2 isolated
13968#	d2: x/x
13969#	d3: x/x
13970#	d4: LEN/Unchanged
13971#	d5: ICTR:LAMBDA
13972#	d6: ILOG/Unchanged
13973#	d7: k-factor/Unchanged
13974#	a0: ptr for original operand/final result
13975#	a1: ptr to PTENxx array/Unchanged
13976#	a2: x/ptr to FP_SCR1(a6)
13977#	fp0: Y/Y with lsb adjusted
13978#	fp1: 10^ISCALE/Unchanged
13979#	fp2: x/x
13980
13981A10_st:
13982	fmov.l		%fpsr,%d0	# get FPSR
13983	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
13984	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
13985	btst		&9,%d0		# check if INEX2 set
13986	beq.b		A11_st		# if clear, skip rest
13987	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
13988	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
13989
13990
13991# A11. Restore original FPCR; set size ext.
13992#      Perform FINT operation in the user's rounding mode.  Keep
13993#      the size to extended.  The sintdo entry point in the sint
13994#      routine expects the FPCR value to be in USER_FPCR for
13995#      mode and precision.  The original FPCR is saved in L_SCR1.
13996
13997A11_st:
13998	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
13999	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
14000#					;block exceptions
14001
14002
14003# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
14004#      The FPSP routine sintd0 is used.  The output is in fp0.
14005#
14006# Register usage:
14007#	Input/Output
14008#	d0: FPSR with AINEX cleared/FPCR with size set to ext
14009#	d2: x/x/scratch
14010#	d3: x/x
14011#	d4: LEN/Unchanged
14012#	d5: ICTR:LAMBDA/Unchanged
14013#	d6: ILOG/Unchanged
14014#	d7: k-factor/Unchanged
14015#	a0: ptr for original operand/src ptr for sintdo
14016#	a1: ptr to PTENxx array/Unchanged
14017#	a2: ptr to FP_SCR1(a6)/Unchanged
14018#	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14019#	fp0: Y/YINT
14020#	fp1: 10^ISCALE/Unchanged
14021#	fp2: x/x
14022#	F_SCR1:x/x
14023#	F_SCR2:Y adjusted for inex/Y with original exponent
14024#	L_SCR1:x/original USER_FPCR
14025#	L_SCR2:first word of X packed/Unchanged
14026
14027A12_st:
14028	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
14029	mov.l	L_SCR1(%a6),-(%sp)
14030	mov.l	L_SCR2(%a6),-(%sp)
14031
14032	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
14033	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
14034	tst.l		L_SCR2(%a6)	# test sign of original operand
14035	bge.b		do_fint12		# if pos, use Y
14036	or.l		&0x80000000,(%a0)	# if neg, use -Y
14037do_fint12:
14038	mov.l	USER_FPSR(%a6),-(%sp)
14039#	bsr	sintdo		# sint routine returns int in fp0
14040
14041	fmov.l	USER_FPCR(%a6),%fpcr
14042	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
14043##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
14044##	andi.l		&0x00000030,%d0
14045##	fmov.l		%d0,%fpcr
14046	fint.x		FP_SCR1(%a6),%fp0	# do fint()
14047	fmov.l	%fpsr,%d0
14048	or.w	%d0,FPSR_EXCEPT(%a6)
14049##	fmov.l		&0x0,%fpcr
14050##	fmov.l		%fpsr,%d0		# don't keep ccodes
14051##	or.w		%d0,FPSR_EXCEPT(%a6)
14052
14053	mov.b	(%sp),USER_FPSR(%a6)
14054	add.l	&4,%sp
14055
14056	mov.l	(%sp)+,L_SCR2(%a6)
14057	mov.l	(%sp)+,L_SCR1(%a6)
14058	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
14059
14060	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
14061	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
14062
14063# A13. Check for LEN digits.
14064#      If the int operation results in more than LEN digits,
14065#      or less than LEN -1 digits, adjust ILOG and repeat from
14066#      A6.  This test occurs only on the first pass.  If the
14067#      result is exactly 10^LEN, decrement ILOG and divide
14068#      the mantissa by 10.  The calculation of 10^LEN cannot
14069#      be inexact, since all powers of ten upto 10^27 are exact
14070#      in extended precision, so the use of a previous power-of-ten
14071#      table will introduce no error.
14072#
14073#
14074# Register usage:
14075#	Input/Output
14076#	d0: FPCR with size set to ext/scratch final = 0
14077#	d2: x/x
14078#	d3: x/scratch final = x
14079#	d4: LEN/LEN adjusted
14080#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14081#	d6: ILOG/ILOG adjusted
14082#	d7: k-factor/Unchanged
14083#	a0: pointer into memory for packed bcd string formation
14084#	a1: ptr to PTENxx array/Unchanged
14085#	a2: ptr to FP_SCR1(a6)/Unchanged
14086#	fp0: int portion of Y/abs(YINT) adjusted
14087#	fp1: 10^ISCALE/Unchanged
14088#	fp2: x/10^LEN
14089#	F_SCR1:x/x
14090#	F_SCR2:Y with original exponent/Unchanged
14091#	L_SCR1:original USER_FPCR/Unchanged
14092#	L_SCR2:first word of X packed/Unchanged
14093
14094A13_st:
14095	swap		%d5		# put ICTR in lower word of d5
14096	tst.w		%d5		# check if ICTR = 0
14097	bne		not_zr		# if non-zero, go to second test
14098#
14099# Compute 10^(LEN-1)
14100#
14101	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
14102	mov.l		%d4,%d0		# put LEN in d0
14103	subq.l		&1,%d0		# d0 = LEN -1
14104	clr.l		%d3		# clr table index
14105l_loop:
14106	lsr.l		&1,%d0		# shift next bit into carry
14107	bcc.b		l_next		# if zero, skip the mul
14108	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
14109l_next:
14110	add.l		&12,%d3		# inc d3 to next pwrten table entry
14111	tst.l		%d0		# test if LEN is zero
14112	bne.b		l_loop		# if not, loop
14113#
14114# 10^LEN-1 is computed for this test and A14.  If the input was
14115# denormalized, check only the case in which YINT > 10^LEN.
14116#
14117	tst.b		BINDEC_FLG(%a6)	# check if input was norm
14118	beq.b		A13_con		# if norm, continue with checking
14119	fabs.x		%fp0		# take abs of YINT
14120	bra		test_2
14121#
14122# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14123#
14124A13_con:
14125	fabs.x		%fp0		# take abs of YINT
14126	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
14127	fbge.w		test_2		# if greater, do next test
14128	subq.l		&1,%d6		# subtract 1 from ILOG
14129	mov.w		&1,%d5		# set ICTR
14130	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
14131	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
14132	bra.w		A6_str		# return to A6 and recompute YINT
14133test_2:
14134	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
14135	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
14136	fblt.w		A14_st		# if less, all is ok, go to A14
14137	fbgt.w		fix_ex		# if greater, fix and redo
14138	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
14139	addq.l		&1,%d6		# and inc ILOG
14140	bra.b		A14_st		# and continue elsewhere
14141fix_ex:
14142	addq.l		&1,%d6		# increment ILOG by 1
14143	mov.w		&1,%d5		# set ICTR
14144	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
14145	bra.w		A6_str		# return to A6 and recompute YINT
14146#
14147# Since ICTR <> 0, we have already been through one adjustment,
14148# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14149# 10^LEN is again computed using whatever table is in a1 since the
14150# value calculated cannot be inexact.
14151#
14152not_zr:
14153	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
14154	mov.l		%d4,%d0		# put LEN in d0
14155	clr.l		%d3		# clr table index
14156z_loop:
14157	lsr.l		&1,%d0		# shift next bit into carry
14158	bcc.b		z_next		# if zero, skip the mul
14159	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
14160z_next:
14161	add.l		&12,%d3		# inc d3 to next pwrten table entry
14162	tst.l		%d0		# test if LEN is zero
14163	bne.b		z_loop		# if not, loop
14164	fabs.x		%fp0		# get abs(YINT)
14165	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
14166	fbneq.w		A14_st		# if not, skip this
14167	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
14168	addq.l		&1,%d6		# and inc ILOG by 1
14169	addq.l		&1,%d4		# and inc LEN
14170	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
14171
14172# A14. Convert the mantissa to bcd.
14173#      The binstr routine is used to convert the LEN digit
14174#      mantissa to bcd in memory.  The input to binstr is
14175#      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14176#      such that the decimal point is to the left of bit 63.
14177#      The bcd digits are stored in the correct position in
14178#      the final string area in memory.
14179#
14180#
14181# Register usage:
14182#	Input/Output
14183#	d0: x/LEN call to binstr - final is 0
14184#	d1: x/0
14185#	d2: x/ms 32-bits of mant of abs(YINT)
14186#	d3: x/ls 32-bits of mant of abs(YINT)
14187#	d4: LEN/Unchanged
14188#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14189#	d6: ILOG
14190#	d7: k-factor/Unchanged
14191#	a0: pointer into memory for packed bcd string formation
14192#	    /ptr to first mantissa byte in result string
14193#	a1: ptr to PTENxx array/Unchanged
14194#	a2: ptr to FP_SCR1(a6)/Unchanged
14195#	fp0: int portion of Y/abs(YINT) adjusted
14196#	fp1: 10^ISCALE/Unchanged
14197#	fp2: 10^LEN/Unchanged
14198#	F_SCR1:x/Work area for final result
14199#	F_SCR2:Y with original exponent/Unchanged
14200#	L_SCR1:original USER_FPCR/Unchanged
14201#	L_SCR2:first word of X packed/Unchanged
14202
14203A14_st:
14204	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
14205	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
14206	lea.l		FP_SCR0(%a6),%a0
14207	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
14208	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
14209	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
14210	clr.l		4(%a0)		# zero word 2 of FP_RES
14211	clr.l		8(%a0)		# zero word 3 of FP_RES
14212	mov.l		(%a0),%d0	# move exponent to d0
14213	swap		%d0		# put exponent in lower word
14214	beq.b		no_sft		# if zero, don't shift
14215	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
14216	tst.l		%d0		# check if > 1
14217	bgt.b		no_sft		# if so, don't shift
14218	neg.l		%d0		# make exp positive
14219m_loop:
14220	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
14221	roxr.l		&1,%d3		# the number of places
14222	dbf.w		%d0,m_loop	# given in d0
14223no_sft:
14224	tst.l		%d2		# check for mantissa of zero
14225	bne.b		no_zr		# if not, go on
14226	tst.l		%d3		# continue zero check
14227	beq.b		zer_m		# if zero, go directly to binstr
14228no_zr:
14229	clr.l		%d1		# put zero in d1 for addx
14230	add.l		&0x00000080,%d3	# inc at bit 7
14231	addx.l		%d1,%d2		# continue inc
14232	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
14233zer_m:
14234	mov.l		%d4,%d0		# put LEN in d0 for binstr call
14235	addq.l		&3,%a0		# a0 points to M16 byte in result
14236	bsr		binstr		# call binstr to convert mant
14237
14238
14239# A15. Convert the exponent to bcd.
14240#      As in A14 above, the exp is converted to bcd and the
14241#      digits are stored in the final string.
14242#
14243#      Digits are stored in L_SCR1(a6) on return from BINDEC as:
14244#
14245#  	 32               16 15                0
14246#	-----------------------------------------
14247#  	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
14248#	-----------------------------------------
14249#
14250# And are moved into their proper places in FP_SCR0.  If digit e4
14251# is non-zero, OPERR is signaled.  In all cases, all 4 digits are
14252# written as specified in the 881/882 manual for packed decimal.
14253#
14254# Register usage:
14255#	Input/Output
14256#	d0: x/LEN call to binstr - final is 0
14257#	d1: x/scratch (0);shift count for final exponent packing
14258#	d2: x/ms 32-bits of exp fraction/scratch
14259#	d3: x/ls 32-bits of exp fraction
14260#	d4: LEN/Unchanged
14261#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14262#	d6: ILOG
14263#	d7: k-factor/Unchanged
14264#	a0: ptr to result string/ptr to L_SCR1(a6)
14265#	a1: ptr to PTENxx array/Unchanged
14266#	a2: ptr to FP_SCR1(a6)/Unchanged
14267#	fp0: abs(YINT) adjusted/float(ILOG)
14268#	fp1: 10^ISCALE/Unchanged
14269#	fp2: 10^LEN/Unchanged
14270#	F_SCR1:Work area for final result/BCD result
14271#	F_SCR2:Y with original exponent/ILOG/10^4
14272#	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14273#	L_SCR2:first word of X packed/Unchanged
14274
14275A15_st:
14276	tst.b		BINDEC_FLG(%a6)	# check for denorm
14277	beq.b		not_denorm
14278	ftest.x		%fp0		# test for zero
14279	fbeq.w		den_zero	# if zero, use k-factor or 4933
14280	fmov.l		%d6,%fp0	# float ILOG
14281	fabs.x		%fp0		# get abs of ILOG
14282	bra.b		convrt
14283den_zero:
14284	tst.l		%d7		# check sign of the k-factor
14285	blt.b		use_ilog	# if negative, use ILOG
14286	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
14287	bra.b		convrt		# do it
14288use_ilog:
14289	fmov.l		%d6,%fp0	# float ILOG
14290	fabs.x		%fp0		# get abs of ILOG
14291	bra.b		convrt
14292not_denorm:
14293	ftest.x		%fp0		# test for zero
14294	fbneq.w		not_zero	# if zero, force exponent
14295	fmov.s		FONE(%pc),%fp0	# force exponent to 1
14296	bra.b		convrt		# do it
14297not_zero:
14298	fmov.l		%d6,%fp0	# float ILOG
14299	fabs.x		%fp0		# get abs of ILOG
14300convrt:
14301	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
14302	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
14303	mov.l		4(%a2),%d2	# move word 2 to d2
14304	mov.l		8(%a2),%d3	# move word 3 to d3
14305	mov.w		(%a2),%d0	# move exp to d0
14306	beq.b		x_loop_fin	# if zero, skip the shift
14307	sub.w		&0x3ffd,%d0	# subtract off bias
14308	neg.w		%d0		# make exp positive
14309x_loop:
14310	lsr.l		&1,%d2		# shift d2:d3 right
14311	roxr.l		&1,%d3		# the number of places
14312	dbf.w		%d0,x_loop	# given in d0
14313x_loop_fin:
14314	clr.l		%d1		# put zero in d1 for addx
14315	add.l		&0x00000080,%d3	# inc at bit 6
14316	addx.l		%d1,%d2		# continue inc
14317	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
14318	mov.l		&4,%d0		# put 4 in d0 for binstr call
14319	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
14320	bsr		binstr		# call binstr to convert exp
14321	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
14322	mov.l		&12,%d1		# use d1 for shift count
14323	lsr.l		%d1,%d0		# shift d0 right by 12
14324	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
14325	lsr.l		%d1,%d0		# shift d0 right by 12
14326	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
14327	tst.b		%d0		# check if e4 is zero
14328	beq.b		A16_st		# if zero, skip rest
14329	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
14330
14331
14332# A16. Write sign bits to final string.
14333#	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14334#
14335# Register usage:
14336#	Input/Output
14337#	d0: x/scratch - final is x
14338#	d2: x/x
14339#	d3: x/x
14340#	d4: LEN/Unchanged
14341#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14342#	d6: ILOG/ILOG adjusted
14343#	d7: k-factor/Unchanged
14344#	a0: ptr to L_SCR1(a6)/Unchanged
14345#	a1: ptr to PTENxx array/Unchanged
14346#	a2: ptr to FP_SCR1(a6)/Unchanged
14347#	fp0: float(ILOG)/Unchanged
14348#	fp1: 10^ISCALE/Unchanged
14349#	fp2: 10^LEN/Unchanged
14350#	F_SCR1:BCD result with correct signs
14351#	F_SCR2:ILOG/10^4
14352#	L_SCR1:Exponent digits on return from binstr
14353#	L_SCR2:first word of X packed/Unchanged
14354
14355A16_st:
14356	clr.l		%d0		# clr d0 for collection of signs
14357	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
14358	tst.l		L_SCR2(%a6)	# check sign of original mantissa
14359	bge.b		mant_p		# if pos, don't set SM
14360	mov.l		&2,%d0		# move 2 in to d0 for SM
14361mant_p:
14362	tst.l		%d6		# check sign of ILOG
14363	bge.b		wr_sgn		# if pos, don't set SE
14364	addq.l		&1,%d0		# set bit 0 in d0 for SE
14365wr_sgn:
14366	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
14367
14368# Clean up and restore all registers used.
14369
14370	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
14371	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
14372	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
14373	rts
14374
14375	global		PTENRN
14376PTENRN:
14377	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14378	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14379	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14380	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14381	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14382	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
14383	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
14384	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
14385	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
14386	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
14387	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
14388	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
14389	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
14390
14391	global		PTENRP
14392PTENRP:
14393	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14394	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14395	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14396	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14397	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14398	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
14399	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
14400	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
14401	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
14402	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
14403	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
14404	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
14405	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
14406
14407	global		PTENRM
14408PTENRM:
14409	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14410	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14411	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14412	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14413	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14414	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
14415	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
14416	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
14417	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
14418	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
14419	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
14420	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
14421	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
14422
14423#########################################################################
14424# binstr(): Converts a 64-bit binary integer to bcd.			#
14425#									#
14426# INPUT *************************************************************** #
14427#	d2:d3 = 64-bit binary integer					#
14428#	d0    = desired length (LEN)					#
14429#	a0    = pointer to start in memory for bcd characters		#
14430#          	(This pointer must point to byte 4 of the first		#
14431#          	 lword of the packed decimal memory string.)		#
14432#									#
14433# OUTPUT ************************************************************** #
14434#	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
14435#									#
14436# ALGORITHM ***********************************************************	#
14437#	The 64-bit binary is assumed to have a decimal point before	#
14438#	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
14439#	shift and a mul by 8 shift.  The bits shifted out of the	#
14440#	msb form a decimal digit.  This process is iterated until	#
14441#	LEN digits are formed.						#
14442#									#
14443# A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
14444#     digit formed will be assumed the least significant.  This is	#
14445#     to force the first byte formed to have a 0 in the upper 4 bits.	#
14446#									#
14447# A2. Beginning of the loop:						#
14448#     Copy the fraction in d2:d3 to d4:d5.				#
14449#									#
14450# A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
14451#     extracts and shifts.  The three msbs from d2 will go into d1.	#
14452#									#
14453# A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
14454#     will be collected by the carry.					#
14455#									#
14456# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
14457#     into d2:d3.  D1 will contain the bcd digit formed.		#
14458#									#
14459# A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
14460#     zero, it is the ls digit.  Put the digit in its place in the	#
14461#     upper word of d0.  If it is the ls digit, write the word		#
14462#     from d0 to memory.						#
14463#									#
14464# A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
14465#									#
14466#########################################################################
14467
14468#	Implementation Notes:
14469#
14470#	The registers are used as follows:
14471#
14472#		d0: LEN counter
14473#		d1: temp used to form the digit
14474#		d2: upper 32-bits of fraction for mul by 8
14475#		d3: lower 32-bits of fraction for mul by 8
14476#		d4: upper 32-bits of fraction for mul by 2
14477#		d5: lower 32-bits of fraction for mul by 2
14478#		d6: temp for bit-field extracts
14479#		d7: byte digit formation word;digit count {0,1}
14480#		a0: pointer into memory for packed bcd string formation
14481#
14482
14483	global		binstr
14484binstr:
14485	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
14486
14487#
14488# A1: Init d7
14489#
14490	mov.l		&1,%d7		# init d7 for second digit
14491	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
14492#
14493# A2. Copy d2:d3 to d4:d5.  Start loop.
14494#
14495loop:
14496	mov.l		%d2,%d4		# copy the fraction before muls
14497	mov.l		%d3,%d5		# to d4:d5
14498#
14499# A3. Multiply d2:d3 by 8; extract msbs into d1.
14500#
14501	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
14502	asl.l		&3,%d2		# shift d2 left by 3 places
14503	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
14504	asl.l		&3,%d3		# shift d3 left by 3 places
14505	or.l		%d6,%d2		# or in msbs from d3 into d2
14506#
14507# A4. Multiply d4:d5 by 2; add carry out to d1.
14508#
14509	asl.l		&1,%d5		# mul d5 by 2
14510	roxl.l		&1,%d4		# mul d4 by 2
14511	swap		%d6		# put 0 in d6 lower word
14512	addx.w		%d6,%d1		# add in extend from mul by 2
14513#
14514# A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
14515#
14516	add.l		%d5,%d3		# add lower 32 bits
14517	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
14518	addx.l		%d4,%d2		# add with extend upper 32 bits
14519	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
14520	addx.w		%d6,%d1		# add in extend from add to d1
14521	swap		%d6		# with d6 = 0; put 0 in upper word
14522#
14523# A6. Test d7 and branch.
14524#
14525	tst.w		%d7		# if zero, store digit & to loop
14526	beq.b		first_d		# if non-zero, form byte & write
14527sec_d:
14528	swap		%d7		# bring first digit to word d7b
14529	asl.w		&4,%d7		# first digit in upper 4 bits d7b
14530	add.w		%d1,%d7		# add in ls digit to d7b
14531	mov.b		%d7,(%a0)+	# store d7b byte in memory
14532	swap		%d7		# put LEN counter in word d7a
14533	clr.w		%d7		# set d7a to signal no digits done
14534	dbf.w		%d0,loop	# do loop some more!
14535	bra.b		end_bstr	# finished, so exit
14536first_d:
14537	swap		%d7		# put digit word in d7b
14538	mov.w		%d1,%d7		# put new digit in d7b
14539	swap		%d7		# put LEN counter in word d7a
14540	addq.w		&1,%d7		# set d7a to signal first digit done
14541	dbf.w		%d0,loop	# do loop some more!
14542	swap		%d7		# put last digit in string
14543	lsl.w		&4,%d7		# move it to upper 4 bits
14544	mov.b		%d7,(%a0)+	# store it in memory string
14545#
14546# Clean up and return with result in fp0.
14547#
14548end_bstr:
14549	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
14550	rts
14551
14552#########################################################################
14553# XDEF ****************************************************************	#
14554#	facc_in_b(): dmem_read_byte failed				#
14555#	facc_in_w(): dmem_read_word failed				#
14556#	facc_in_l(): dmem_read_long failed				#
14557#	facc_in_d(): dmem_read of dbl prec failed			#
14558#	facc_in_x(): dmem_read of ext prec failed			#
14559#									#
14560#	facc_out_b(): dmem_write_byte failed				#
14561#	facc_out_w(): dmem_write_word failed				#
14562#	facc_out_l(): dmem_write_long failed				#
14563#	facc_out_d(): dmem_write of dbl prec failed			#
14564#	facc_out_x(): dmem_write of ext prec failed			#
14565#									#
14566# XREF ****************************************************************	#
14567#	_real_access() - exit through access error handler		#
14568#									#
14569# INPUT ***************************************************************	#
14570#	None								#
14571# 									#
14572# OUTPUT **************************************************************	#
14573#	None								#
14574#									#
14575# ALGORITHM ***********************************************************	#
14576# 	Flow jumps here when an FP data fetch call gets an error 	#
14577# result. This means the operating system wants an access error frame	#
14578# made out of the current exception stack frame. 			#
14579#	So, we first call restore() which makes sure that any updated	#
14580# -(an)+ register gets returned to its pre-exception value and then	#
14581# we change the stack to an acess error stack frame.			#
14582#									#
14583#########################################################################
14584
14585facc_in_b:
14586	movq.l		&0x1,%d0			# one byte
14587	bsr.w		restore				# fix An
14588
14589	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
14590	bra.w		facc_finish
14591
14592facc_in_w:
14593	movq.l		&0x2,%d0			# two bytes
14594	bsr.w		restore				# fix An
14595
14596	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
14597	bra.b		facc_finish
14598
14599facc_in_l:
14600	movq.l		&0x4,%d0			# four bytes
14601	bsr.w		restore				# fix An
14602
14603	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
14604	bra.b		facc_finish
14605
14606facc_in_d:
14607	movq.l		&0x8,%d0			# eight bytes
14608	bsr.w		restore				# fix An
14609
14610	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
14611	bra.b		facc_finish
14612
14613facc_in_x:
14614	movq.l		&0xc,%d0			# twelve bytes
14615	bsr.w		restore				# fix An
14616
14617	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
14618	bra.b		facc_finish
14619
14620################################################################
14621
14622facc_out_b:
14623	movq.l		&0x1,%d0			# one byte
14624	bsr.w		restore				# restore An
14625
14626	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
14627	bra.b		facc_finish
14628
14629facc_out_w:
14630	movq.l		&0x2,%d0			# two bytes
14631	bsr.w		restore				# restore An
14632
14633	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
14634	bra.b		facc_finish
14635
14636facc_out_l:
14637	movq.l		&0x4,%d0			# four bytes
14638	bsr.w		restore				# restore An
14639
14640	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
14641	bra.b		facc_finish
14642
14643facc_out_d:
14644	movq.l		&0x8,%d0			# eight bytes
14645	bsr.w		restore				# restore An
14646
14647	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
14648	bra.b		facc_finish
14649
14650facc_out_x:
14651	mov.l		&0xc,%d0			# twelve bytes
14652	bsr.w		restore				# restore An
14653
14654	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
14655
14656# here's where we actually create the access error frame from the
14657# current exception stack frame.
14658facc_finish:
14659	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14660
14661	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
14662	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14663	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
14664
14665	unlk		%a6
14666
14667	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
14668	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
14669	mov.l		0xc(%sp),0x8(%sp)	# store EA
14670	mov.l		&0x00000001,0xc(%sp)	# store FSLW
14671	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
14672	mov.w		&0x4008,0x6(%sp)	# store voff
14673
14674	btst		&0x5,(%sp)		# supervisor or user mode?
14675	beq.b		facc_out2		# user
14676	bset		&0x2,0xd(%sp)		# set supervisor TM bit
14677
14678facc_out2:
14679	bra.l		_real_access
14680
14681##################################################################
14682
14683# if the effective addressing mode was predecrement or postincrement,
14684# the emulation has already changed its value to the correct post-
14685# instruction value. but since we're exiting to the access error
14686# handler, then AN must be returned to its pre-instruction value.
14687# we do that here.
14688restore:
14689	mov.b		EXC_OPWORD+0x1(%a6),%d1
14690	andi.b		&0x38,%d1		# extract opmode
14691	cmpi.b		%d1,&0x18		# postinc?
14692	beq.w		rest_inc
14693	cmpi.b		%d1,&0x20		# predec?
14694	beq.w		rest_dec
14695	rts
14696
14697rest_inc:
14698	mov.b		EXC_OPWORD+0x1(%a6),%d1
14699	andi.w		&0x0007,%d1		# fetch An
14700
14701	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
14702	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
14703
14704tbl_rest_inc:
14705	short		ri_a0 - tbl_rest_inc
14706	short		ri_a1 - tbl_rest_inc
14707	short		ri_a2 - tbl_rest_inc
14708	short		ri_a3 - tbl_rest_inc
14709	short		ri_a4 - tbl_rest_inc
14710	short		ri_a5 - tbl_rest_inc
14711	short		ri_a6 - tbl_rest_inc
14712	short		ri_a7 - tbl_rest_inc
14713
14714ri_a0:
14715	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
14716	rts
14717ri_a1:
14718	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
14719	rts
14720ri_a2:
14721	sub.l		%d0,%a2			# fix a2
14722	rts
14723ri_a3:
14724	sub.l		%d0,%a3			# fix a3
14725	rts
14726ri_a4:
14727	sub.l		%d0,%a4			# fix a4
14728	rts
14729ri_a5:
14730	sub.l		%d0,%a5			# fix a5
14731	rts
14732ri_a6:
14733	sub.l		%d0,(%a6)		# fix stacked a6
14734	rts
14735# if it's a fmove out instruction, we don't have to fix a7
14736# because we hadn't changed it yet. if it's an opclass two
14737# instruction (data moved in) and the exception was in supervisor
14738# mode, then also also wasn't updated. if it was user mode, then
14739# restore the correct a7 which is in the USP currently.
14740ri_a7:
14741	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
14742	bne.b		ri_a7_done		# out
14743
14744	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
14745	bne.b		ri_a7_done		# supervisor
14746	movc		%usp,%a0		# restore USP
14747	sub.l		%d0,%a0
14748	movc		%a0,%usp
14749ri_a7_done:
14750	rts
14751
14752# need to invert adjustment value if the <ea> was predec
14753rest_dec:
14754	neg.l		%d0
14755	bra.b		rest_inc
14756