xref: /netbsd/sys/arch/m68k/fpsp/bindec.sa (revision 6550d01e)
1*	$NetBSD: bindec.sa,v 1.5 2001/12/09 01:43:13 briggs Exp $
2
3*	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
4*	M68000 Hi-Performance Microprocessor Division
5*	M68040 Software Package
6*
7*	M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
8*	All rights reserved.
9*
10*	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
11*	To the maximum extent permitted by applicable law,
12*	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
13*	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
14*	PARTICULAR PURPOSE and any warranty against infringement with
15*	regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
16*	and any accompanying written materials.
17*
18*	To the maximum extent permitted by applicable law,
19*	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
20*	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
21*	PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
22*	OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
23*	SOFTWARE.  Motorola assumes no responsibility for the maintenance
24*	and support of the SOFTWARE.
25*
26*	You are hereby granted a copyright license to use, modify, and
27*	distribute the SOFTWARE so long as this entire notice is retained
28*	without alteration in any modified and/or redistributed versions,
29*	and that such modified versions are clearly identified as such.
30*	No licenses are granted by implication, estoppel or otherwise
31*	under any patents or trademarks of Motorola, Inc.
32
33*
34*	bindec.sa 3.4 1/3/91
35*
36*	bindec
37*
38*	Description:
39*		Converts an input in extended precision format
40*		to bcd format.
41*
42*	Input:
43*		a0 points to the input extended precision value
44*		value in memory; d0 contains the k-factor sign-extended
45*		to 32-bits.  The input may be either normalized,
46*		unnormalized, or denormalized.
47*
48*	Output:	result in the FP_SCR1 space on the stack.
49*
50*	Saves and Modifies: D2-D7,A2,FP2
51*
52*	Algorithm:
53*
54*	A1.	Set RM and size ext;  Set SIGMA = sign of input.
55*		The k-factor is saved for use in d7. Clear the
56*		BINDEC_FLG for separating normalized/denormalized
57*		input.  If input is unnormalized or denormalized,
58*		normalize it.
59*
60*	A2.	Set X = abs(input).
61*
62*	A3.	Compute ILOG.
63*		ILOG is the log base 10 of the input value.  It is
64*		approximated by adding e + 0.f when the original
65*		value is viewed as 2^^e * 1.f in extended precision.
66*		This value is stored in d6.
67*
68*	A4.	Clr INEX bit.
69*		The operation in A3 above may have set INEX2.
70*
71*	A5.	Set ICTR = 0;
72*		ICTR is a flag used in A13.  It must be set before the
73*		loop entry A6.
74*
75*	A6.	Calculate LEN.
76*		LEN is the number of digits to be displayed.  The
77*		k-factor can dictate either the total number of digits,
78*		if it is a positive number, or the number of digits
79*		after the decimal point which are to be included as
80*		significant.  See the 68882 manual for examples.
81*		If LEN is computed to be greater than 17, set OPERR in
82*		USER_FPSR.  LEN is stored in d4.
83*
84*	A7.	Calculate SCALE.
85*		SCALE is equal to 10^ISCALE, where ISCALE is the number
86*		of decimal places needed to insure LEN integer digits
87*		in the output before conversion to bcd. LAMBDA is the
88*		sign of ISCALE, used in A9. Fp1 contains
89*		10^^(abs(ISCALE)) using a rounding mode which is a
90*		function of the original rounding mode and the signs
91*		of ISCALE and X.  A table is given in the code.
92*
93*	A8.	Clr INEX; Force RZ.
94*		The operation in A3 above may have set INEX2.
95*		RZ mode is forced for the scaling operation to insure
96*		only one rounding error.  The grs bits are collected in
97*		the INEX flag for use in A10.
98*
99*	A9.	Scale X -> Y.
100*		The mantissa is scaled to the desired number of
101*		significant digits.  The excess digits are collected
102*		in INEX2.
103*
104*	A10.	Or in INEX.
105*		If INEX is set, round error occurred.  This is
106*		compensated for by 'or-ing' in the INEX2 flag to
107*		the lsb of Y.
108*
109*	A11.	Restore original FPCR; set size ext.
110*		Perform FINT operation in the user's rounding mode.
111*		Keep the size to extended.
112*
113*	A12.	Calculate YINT = FINT(Y) according to user's rounding
114*		mode.  The FPSP routine sintd0 is used.  The output
115*		is in fp0.
116*
117*	A13.	Check for LEN digits.
118*		If the int operation results in more than LEN digits,
119*		or less than LEN -1 digits, adjust ILOG and repeat from
120*		A6.  This test occurs only on the first pass.  If the
121*		result is exactly 10^LEN, decrement ILOG and divide
122*		the mantissa by 10.
123*
124*	A14.	Convert the mantissa to bcd.
125*		The binstr routine is used to convert the LEN digit
126*		mantissa to bcd in memory.  The input to binstr is
127*		to be a fraction; i.e. (mantissa)/10^LEN and adjusted
128*		such that the decimal point is to the left of bit 63.
129*		The bcd digits are stored in the correct position in
130*		the final string area in memory.
131*
132*	A15.	Convert the exponent to bcd.
133*		As in A14 above, the exp is converted to bcd and the
134*		digits are stored in the final string.
135*		Test the length of the final exponent string.  If the
136*		length is 4, set operr.
137*
138*	A16.	Write sign bits to final string.
139*
140*	Implementation Notes:
141*
142*	The registers are used as follows:
143*
144*		d0: scratch; LEN input to binstr
145*		d1: scratch
146*		d2: upper 32-bits of mantissa for binstr
147*		d3: scratch;lower 32-bits of mantissa for binstr
148*		d4: LEN
149*      		d5: LAMBDA/ICTR
150*		d6: ILOG
151*		d7: k-factor
152*		a0: ptr for original operand/final result
153*		a1: scratch pointer
154*		a2: pointer to FP_X; abs(original value) in ext
155*		fp0: scratch
156*		fp1: scratch
157*		fp2: scratch
158*		F_SCR1:
159*		F_SCR2:
160*		L_SCR1:
161*		L_SCR2:
162*
163
164BINDEC    IDNT    2,1 Motorola 040 Floating Point Software Package
165
166	include	fpsp.h
167
168	section	8
169
170* Constants in extended precision
171LOG2 	dc.l	$3FFD0000,$9A209A84,$FBCFF798,$00000000
172LOG2UP1	dc.l	$3FFD0000,$9A209A84,$FBCFF799,$00000000
173
174* Constants in single precision
175FONE 	dc.l	$3F800000,$00000000,$00000000,$00000000
176FTWO	dc.l	$40000000,$00000000,$00000000,$00000000
177FTEN 	dc.l	$41200000,$00000000,$00000000,$00000000
178F4933	dc.l	$459A2800,$00000000,$00000000,$00000000
179
180RBDTBL 	dc.b	0,0,0,0
181	dc.b	3,3,2,2
182	dc.b	3,2,2,3
183	dc.b	2,3,3,2
184
185	xref	binstr
186	xref	sintdo
187	xref	ptenrn,ptenrm,ptenrp
188
189	xdef	bindec
190	xdef	sc_mul
191bindec:
192	movem.l	d2-d7/a2,-(a7)
193	fmovem.x fp0-fp2,-(a7)
194
195* A1. Set RM and size ext. Set SIGMA = sign input;
196*     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
197*     separating  normalized/denormalized input.  If the input
198*     is a denormalized number, set the BINDEC_FLG memory word
199*     to signal denorm.  If the input is unnormalized, normalize
200*     the input and test for denormalized result.
201*
202	fmove.l	#rm_mode,FPCR	;set RM and ext
203	move.l	(a0),L_SCR2(a6)	;save exponent for sign check
204	move.l	d0,d7		;move k-factor to d7
205	clr.b	BINDEC_FLG(a6)	;clr norm/denorm flag
206	move.w	STAG(a6),d0	;get stag
207	andi.w	#$e000,d0	;isolate stag bits
208	beq	A2_str		;if zero, input is norm
209*
210* Normalize the denorm
211*
212un_de_norm:
213	move.w	(a0),d0
214	andi.w	#$7fff,d0	;strip sign of normalized exp
215	move.l	4(a0),d1
216	move.l	8(a0),d2
217norm_loop:
218	sub.w	#1,d0
219	add.l	d2,d2
220	addx.l	d1,d1
221	tst.l	d1
222	bge.b	norm_loop
223*
224* Test if the normalized input is denormalized
225*
226	tst.w	d0
227	bgt.b	pos_exp		;if greater than zero, it is a norm
228	st	BINDEC_FLG(a6)	;set flag for denorm
229pos_exp:
230	andi.w	#$7fff,d0	;strip sign of normalized exp
231	move.w	d0,(a0)
232	move.l	d1,4(a0)
233	move.l	d2,8(a0)
234
235* A2. Set X = abs(input).
236*
237A2_str:
238	move.l	(a0),FP_SCR2(a6) ; move input to work space
239	move.l	4(a0),FP_SCR2+4(a6) ; move input to work space
240	move.l	8(a0),FP_SCR2+8(a6) ; move input to work space
241	andi.l	#$7fffffff,FP_SCR2(a6) ;create abs(X)
242
243* A3. Compute ILOG.
244*     ILOG is the log base 10 of the input value.  It is approx-
245*     imated by adding e + 0.f when the original value is viewed
246*     as 2^^e * 1.f in extended precision.  This value is stored
247*     in d6.
248*
249* Register usage:
250*	Input/Output
251*	d0: k-factor/exponent
252*	d2: x/x
253*	d3: x/x
254*	d4: x/x
255*	d5: x/x
256*	d6: x/ILOG
257*	d7: k-factor/Unchanged
258*	a0: ptr for original operand/final result
259*	a1: x/x
260*	a2: x/x
261*	fp0: x/float(ILOG)
262*	fp1: x/x
263*	fp2: x/x
264*	F_SCR1:x/x
265*	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
266*	L_SCR1:x/x
267*	L_SCR2:first word of X packed/Unchanged
268
269	tst.b	BINDEC_FLG(a6)	;check for denorm
270	beq.b	A3_cont		;if clr, continue with norm
271	move.l	#-4933,d6	;force ILOG = -4933
272	bra.b	A4_str
273A3_cont:
274	move.w	FP_SCR2(a6),d0	;move exp to d0
275	move.w	#$3fff,FP_SCR2(a6) ;replace exponent with 0x3fff
276	fmove.x	FP_SCR2(a6),fp0	;now fp0 has 1.f
277	sub.w	#$3fff,d0	;strip off bias
278	fadd.w	d0,fp0		;add in exp
279	fsub.s	FONE,fp0	;subtract off 1.0
280	fbge.w	pos_res		;if pos, branch
281	fmul.x	LOG2UP1,fp0	;if neg, mul by LOG2UP1
282	fmove.l	fp0,d6		;put ILOG in d6 as a lword
283	bra.b	A4_str		;go move out ILOG
284pos_res:
285	fmul.x	LOG2,fp0	;if pos, mul by LOG2
286	fmove.l	fp0,d6		;put ILOG in d6 as a lword
287
288
289* A4. Clr INEX bit.
290*     The operation in A3 above may have set INEX2.
291
292A4_str:
293	fmove.l	#0,FPSR		;zero all of fpsr - nothing needed
294
295
296* A5. Set ICTR = 0;
297*     ICTR is a flag used in A13.  It must be set before the
298*     loop entry A6. The lower word of d5 is used for ICTR.
299
300	clr.w	d5		;clear ICTR
301
302
303* A6. Calculate LEN.
304*     LEN is the number of digits to be displayed.  The k-factor
305*     can dictate either the total number of digits, if it is
306*     a positive number, or the number of digits after the
307*     original decimal point which are to be included as
308*     significant.  See the 68882 manual for examples.
309*     If LEN is computed to be greater than 17, set OPERR in
310*     USER_FPSR.  LEN is stored in d4.
311*
312* Register usage:
313*	Input/Output
314*	d0: exponent/Unchanged
315*	d2: x/x/scratch
316*	d3: x/x
317*	d4: exc picture/LEN
318*	d5: ICTR/Unchanged
319*	d6: ILOG/Unchanged
320*	d7: k-factor/Unchanged
321*	a0: ptr for original operand/final result
322*	a1: x/x
323*	a2: x/x
324*	fp0: float(ILOG)/Unchanged
325*	fp1: x/x
326*	fp2: x/x
327*	F_SCR1:x/x
328*	F_SCR2:Abs(X) with $3fff exponent/Unchanged
329*	L_SCR1:x/x
330*	L_SCR2:first word of X packed/Unchanged
331
332A6_str:
333	tst.l	d7		;branch on sign of k
334	ble.b	k_neg		;if k <= 0, LEN = ILOG + 1 - k
335	move.l	d7,d4		;if k > 0, LEN = k
336	bra.b	len_ck		;skip to LEN check
337k_neg:
338	move.l	d6,d4		;first load ILOG to d4
339	sub.l	d7,d4		;subtract off k
340	addq.l	#1,d4		;add in the 1
341len_ck:
342	tst.l	d4		;LEN check: branch on sign of LEN
343	ble.b	LEN_ng		;if neg, set LEN = 1
344	cmp.l	#17,d4		;test if LEN > 17
345	ble.b	A7_str		;if not, forget it
346	move.l	#17,d4		;set max LEN = 17
347	tst.l	d7		;if negative, never set OPERR
348	ble.b	A7_str		;if positive, continue
349	or.l	#opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
350	bra.b	A7_str		;finished here
351LEN_ng:
352	moveq.l	#1,d4		;min LEN is 1
353
354
355* A7. Calculate SCALE.
356*     SCALE is equal to 10^ISCALE, where ISCALE is the number
357*     of decimal places needed to insure LEN integer digits
358*     in the output before conversion to bcd. LAMBDA is the sign
359*     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
360*     the rounding mode as given in the following table (see
361*     Coonen, p. 7.23 as ref.; however, the SCALE variable is
362*     of opposite sign in bindec.sa from Coonen).
363*
364*	Initial					USE
365*	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
366*	----------------------------------------------
367*	 RN	00	   0	   0		00/0	RN
368*	 RN	00	   0	   1		00/0	RN
369*	 RN	00	   1	   0		00/0	RN
370*	 RN	00	   1	   1		00/0	RN
371*	 RZ	01	   0	   0		11/3	RP
372*	 RZ	01	   0	   1		11/3	RP
373*	 RZ	01	   1	   0		10/2	RM
374*	 RZ	01	   1	   1		10/2	RM
375*	 RM	10	   0	   0		11/3	RP
376*	 RM	10	   0	   1		10/2	RM
377*	 RM	10	   1	   0		10/2	RM
378*	 RM	10	   1	   1		11/3	RP
379*	 RP	11	   0	   0		10/2	RM
380*	 RP	11	   0	   1		11/3	RP
381*	 RP	11	   1	   0		11/3	RP
382*	 RP	11	   1	   1		10/2	RM
383*
384* Register usage:
385*	Input/Output
386*	d0: exponent/scratch - final is 0
387*	d2: x/0 or 24 for A9
388*	d3: x/scratch - offset ptr into PTENRM array
389*	d4: LEN/Unchanged
390*	d5: 0/ICTR:LAMBDA
391*	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
392*	d7: k-factor/Unchanged
393*	a0: ptr for original operand/final result
394*	a1: x/ptr to PTENRM array
395*	a2: x/x
396*	fp0: float(ILOG)/Unchanged
397*	fp1: x/10^ISCALE
398*	fp2: x/x
399*	F_SCR1:x/x
400*	F_SCR2:Abs(X) with $3fff exponent/Unchanged
401*	L_SCR1:x/x
402*	L_SCR2:first word of X packed/Unchanged
403
404A7_str:
405	tst.l	d7		;test sign of k
406	bgt.b	k_pos		;if pos and > 0, skip this
407	cmp.l	d6,d7		;test k - ILOG
408	blt.b	k_pos		;if ILOG >= k, skip this
409	move.l	d7,d6		;if ((k<0) & (ILOG < k)) ILOG = k
410k_pos:
411	move.l	d6,d0		;calc ILOG + 1 - LEN in d0
412	addq.l	#1,d0		;add the 1
413	sub.l	d4,d0		;sub off LEN
414	swap	d5		;use upper word of d5 for LAMBDA
415	clr.w	d5		;set it zero initially
416	clr.w	d2		;set up d2 for very small case
417	tst.l	d0		;test sign of ISCALE
418	bge.b	iscale		;if pos, skip next inst
419	addq.w	#1,d5		;if neg, set LAMBDA true
420	cmp.l	#$ffffecd4,d0	;test iscale <= -4908
421	bgt.b	no_inf		;if false, skip rest
422	addi.l	#24,d0		;add in 24 to iscale
423	move.l	#24,d2		;put 24 in d2 for A9
424no_inf:
425	neg.l	d0		;and take abs of ISCALE
426iscale:
427	fmove.s	FONE,fp1	;init fp1 to 1
428	bfextu	USER_FPCR(a6){26:2},d1 ;get initial rmode bits
429	add.w	d1,d1		;put them in bits 2:1
430	add.w	d5,d1		;add in LAMBDA
431	add.w	d1,d1		;put them in bits 3:1
432	tst.l	L_SCR2(a6)	;test sign of original x
433	bge.b	x_pos		;if pos, don't set bit 0
434	addq.l	#1,d1		;if neg, set bit 0
435x_pos:
436	lea.l	RBDTBL,a2	;load rbdtbl base
437	move.b	(a2,d1),d3	;load d3 with new rmode
438	lsl.l	#4,d3		;put bits in proper position
439	fmove.l	d3,fpcr		;load bits into fpu
440	lsr.l	#4,d3		;put bits in proper position
441	tst.b	d3		;decode new rmode for pten table
442	bne.b	not_rn		;if zero, it is RN
443	lea.l	PTENRN,a1	;load a1 with RN table base
444	bra.b	rmode		;exit decode
445not_rn:
446	lsr.b	#1,d3		;get lsb in carry
447	bcc.b	not_rp		;if carry clear, it is RM
448	lea.l	PTENRP,a1	;load a1 with RP table base
449	bra.b	rmode		;exit decode
450not_rp:
451	lea.l	PTENRM,a1	;load a1 with RM table base
452rmode:
453	clr.l	d3		;clr table index
454e_loop:
455	lsr.l	#1,d0		;shift next bit into carry
456	bcc.b	e_next		;if zero, skip the mul
457	fmul.x	(a1,d3),fp1	;mul by 10**(d3_bit_no)
458e_next:
459	add.l	#12,d3		;inc d3 to next pwrten table entry
460	tst.l	d0		;test if ISCALE is zero
461	bne.b	e_loop		;if not, loop
462
463
464* A8. Clr INEX; Force RZ.
465*     The operation in A3 above may have set INEX2.
466*     RZ mode is forced for the scaling operation to insure
467*     only one rounding error.  The grs bits are collected in
468*     the INEX flag for use in A10.
469*
470* Register usage:
471*	Input/Output
472
473	fmove.l	#0,FPSR		;clr INEX
474	fmove.l	#rz_mode,FPCR	;set RZ rounding mode
475
476
477* A9. Scale X -> Y.
478*     The mantissa is scaled to the desired number of significant
479*     digits.  The excess digits are collected in INEX2. If mul,
480*     Check d2 for excess 10 exponential value.  If not zero,
481*     the iscale value would have caused the pwrten calculation
482*     to overflow.  Only a negative iscale can cause this, so
483*     multiply by 10^(d2), which is now only allowed to be 24,
484*     with a multiply by 10^8 and 10^16, which is exact since
485*     10^24 is exact.  If the input was denormalized, we must
486*     create a busy stack frame with the mul command and the
487*     two operands, and allow the fpu to complete the multiply.
488*
489* Register usage:
490*	Input/Output
491*	d0: FPCR with RZ mode/Unchanged
492*	d2: 0 or 24/unchanged
493*	d3: x/x
494*	d4: LEN/Unchanged
495*	d5: ICTR:LAMBDA
496*	d6: ILOG/Unchanged
497*	d7: k-factor/Unchanged
498*	a0: ptr for original operand/final result
499*	a1: ptr to PTENRM array/Unchanged
500*	a2: x/x
501*	fp0: float(ILOG)/X adjusted for SCALE (Y)
502*	fp1: 10^ISCALE/Unchanged
503*	fp2: x/x
504*	F_SCR1:x/x
505*	F_SCR2:Abs(X) with $3fff exponent/Unchanged
506*	L_SCR1:x/x
507*	L_SCR2:first word of X packed/Unchanged
508
509A9_str:
510	fmove.x	(a0),fp0	;load X from memory
511	fabs.x	fp0		;use abs(X)
512	tst.w	d5		;LAMBDA is in lower word of d5
513	bne.b	short_sc_mul	;if neg (LAMBDA = 1), scale by mul
514	fdiv.x	fp1,fp0		;calculate X / SCALE -> Y to fp0
515	bra.b	A10_st		;branch to A10
516
517sc_mul:
518short_sc_mul:
519	tst.b	BINDEC_FLG(a6)	;check for denorm
520	beq.b	A9_norm		;if norm, continue with mul
521	fmovem.x fp1,-(a7)	;load ETEMP with 10^ISCALE
522	move.l	8(a0),-(a7)	;load FPTEMP with input arg
523	move.l	4(a0),-(a7)
524	move.l	(a0),-(a7)
525	move.l	#18,d3		;load count for busy stack
526A9_loop:
527	clr.l	-(a7)		;clear lword on stack
528	dbf.w	d3,A9_loop
529	move.b	VER_TMP(a6),(a7) ;write current version number
530	move.b	#BUSY_SIZE-4,1(a7) ;write current busy size
531	move.b	#$10,$44(a7)	;set fcefpte[15] bit
532	move.w	#$0023,$40(a7)	;load cmdreg1b with mul command
533	move.b	#$fe,$8(a7)	;load all 1s to cu savepc
534	frestore (a7)+		;restore frame to fpu for completion
535	fmul.x	36(a1),fp0	;multiply fp0 by 10^8
536	fmul.x	48(a1),fp0	;multiply fp0 by 10^16
537	bra.b	A10_st
538A9_norm:
539	tst.w	d2		;test for small exp case
540	beq.b	A9_con		;if zero, continue as normal
541	fmul.x	36(a1),fp0	;multiply fp0 by 10^8
542	fmul.x	48(a1),fp0	;multiply fp0 by 10^16
543A9_con:
544	fmul.x	fp1,fp0		;calculate X * SCALE -> Y to fp0
545
546
547* A10. Or in INEX.
548*      If INEX is set, round error occurred.  This is compensated
549*      for by 'or-ing' in the INEX2 flag to the lsb of Y.
550*
551* Register usage:
552*	Input/Output
553*	d0: FPCR with RZ mode/FPSR with INEX2 isolated
554*	d2: x/x
555*	d3: x/x
556*	d4: LEN/Unchanged
557*	d5: ICTR:LAMBDA
558*	d6: ILOG/Unchanged
559*	d7: k-factor/Unchanged
560*	a0: ptr for original operand/final result
561*	a1: ptr to PTENxx array/Unchanged
562*	a2: x/ptr to FP_SCR2(a6)
563*	fp0: Y/Y with lsb adjusted
564*	fp1: 10^ISCALE/Unchanged
565*	fp2: x/x
566
567A10_st:
568	fmove.l	FPSR,d0		;get FPSR
569	fmove.x	fp0,FP_SCR2(a6)	;move Y to memory
570	lea.l	FP_SCR2(a6),a2	;load a2 with ptr to FP_SCR2
571	btst.l	#9,d0		;check if INEX2 set
572	beq.b	A11_st		;if clear, skip rest
573	ori.l	#1,8(a2)	;or in 1 to lsb of mantissa
574	fmove.x	FP_SCR2(a6),fp0	;write adjusted Y back to fpu
575
576
577* A11. Restore original FPCR; set size ext.
578*      Perform FINT operation in the user's rounding mode.  Keep
579*      the size to extended.  The sintdo entry point in the sint
580*      routine expects the FPCR value to be in USER_FPCR for
581*      mode and precision.  The original FPCR is saved in L_SCR1.
582
583A11_st:
584	move.l	USER_FPCR(a6),L_SCR1(a6) ;save it for later
585	andi.l	#$00000030,USER_FPCR(a6) ;set size to ext,
586*					;block exceptions
587
588
589* A12. Calculate YINT = FINT(Y) according to user's rounding mode.
590*      The FPSP routine sintd0 is used.  The output is in fp0.
591*
592* Register usage:
593*	Input/Output
594*	d0: FPSR with AINEX cleared/FPCR with size set to ext
595*	d2: x/x/scratch
596*	d3: x/x
597*	d4: LEN/Unchanged
598*	d5: ICTR:LAMBDA/Unchanged
599*	d6: ILOG/Unchanged
600*	d7: k-factor/Unchanged
601*	a0: ptr for original operand/src ptr for sintdo
602*	a1: ptr to PTENxx array/Unchanged
603*	a2: ptr to FP_SCR2(a6)/Unchanged
604*	a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
605*	fp0: Y/YINT
606*	fp1: 10^ISCALE/Unchanged
607*	fp2: x/x
608*	F_SCR1:x/x
609*	F_SCR2:Y adjusted for inex/Y with original exponent
610*	L_SCR1:x/original USER_FPCR
611*	L_SCR2:first word of X packed/Unchanged
612
613A12_st:
614	movem.l	d0-d1/a0-a1,-(a7)	;save regs used by sintd0
615	move.l	L_SCR1(a6),-(a7)
616	move.l	L_SCR2(a6),-(a7)
617	lea.l	FP_SCR2(a6),a0		;a0 is ptr to F_SCR2(a6)
618	fmove.x	fp0,(a0)		;move Y to memory at FP_SCR2(a6)
619	tst.l	L_SCR2(a6)		;test sign of original operand
620	bge.b	do_fint			;if pos, use Y
621	or.l	#$80000000,(a0)		;if neg, use -Y
622do_fint:
623	move.l	USER_FPSR(a6),-(a7)
624	bsr	sintdo			;sint routine returns int in fp0
625	move.b	(a7),USER_FPSR(a6)
626	add.l	#4,a7
627	move.l	(a7)+,L_SCR2(a6)
628	move.l	(a7)+,L_SCR1(a6)
629	movem.l	(a7)+,d0-d1/a0-a1	;restore regs used by sint
630	move.l	L_SCR2(a6),FP_SCR2(a6)	;restore original exponent
631	move.l	L_SCR1(a6),USER_FPCR(a6) ;restore user's FPCR
632
633
634* A13. Check for LEN digits.
635*      If the int operation results in more than LEN digits,
636*      or less than LEN -1 digits, adjust ILOG and repeat from
637*      A6.  This test occurs only on the first pass.  If the
638*      result is exactly 10^LEN, decrement ILOG and divide
639*      the mantissa by 10.  The calculation of 10^LEN cannot
640*      be inexact, since all powers of ten upto 10^27 are exact
641*      in extended precision, so the use of a previous power-of-ten
642*      table will introduce no error.
643*
644*
645* Register usage:
646*	Input/Output
647*	d0: FPCR with size set to ext/scratch final = 0
648*	d2: x/x
649*	d3: x/scratch final = x
650*	d4: LEN/LEN adjusted
651*	d5: ICTR:LAMBDA/LAMBDA:ICTR
652*	d6: ILOG/ILOG adjusted
653*	d7: k-factor/Unchanged
654*	a0: pointer into memory for packed bcd string formation
655*	a1: ptr to PTENxx array/Unchanged
656*	a2: ptr to FP_SCR2(a6)/Unchanged
657*	fp0: int portion of Y/abs(YINT) adjusted
658*	fp1: 10^ISCALE/Unchanged
659*	fp2: x/10^LEN
660*	F_SCR1:x/x
661*	F_SCR2:Y with original exponent/Unchanged
662*	L_SCR1:original USER_FPCR/Unchanged
663*	L_SCR2:first word of X packed/Unchanged
664
665A13_st:
666	swap	d5		;put ICTR in lower word of d5
667	tst.w	d5		;check if ICTR = 0
668	bne	not_zr		;if non-zero, go to second test
669*
670* Compute 10^(LEN-1)
671*
672	fmove.s	FONE,fp2	;init fp2 to 1.0
673	move.l	d4,d0		;put LEN in d0
674	subq.l	#1,d0		;d0 = LEN -1
675	clr.l	d3		;clr table index
676l_loop:
677	lsr.l	#1,d0		;shift next bit into carry
678	bcc.b	l_next		;if zero, skip the mul
679	fmul.x	(a1,d3),fp2	;mul by 10**(d3_bit_no)
680l_next:
681	add.l	#12,d3		;inc d3 to next pwrten table entry
682	tst.l	d0		;test if LEN is zero
683	bne.b	l_loop		;if not, loop
684*
685* 10^LEN-1 is computed for this test and A14.  If the input was
686* denormalized, check only the case in which YINT > 10^LEN.
687*
688	tst.b	BINDEC_FLG(a6)	;check if input was norm
689	beq.b	A13_con		;if norm, continue with checking
690	fabs.x	fp0		;take abs of YINT
691	bra	test_2
692*
693* Compare abs(YINT) to 10^(LEN-1) and 10^LEN
694*
695A13_con:
696	fabs.x	fp0		;take abs of YINT
697	fcmp.x	fp2,fp0		;compare abs(YINT) with 10^(LEN-1)
698	fbge.w	test_2		;if greater, do next test
699	subq.l	#1,d6		;subtract 1 from ILOG
700	move.w	#1,d5		;set ICTR
701	fmove.l	#rm_mode,FPCR	;set rmode to RM
702	fmul.s	FTEN,fp2	;compute 10^LEN
703	bra.w	A6_str		;return to A6 and recompute YINT
704test_2:
705	fmul.s	FTEN,fp2	;compute 10^LEN
706	fcmp.x	fp2,fp0		;compare abs(YINT) with 10^LEN
707	fblt.w	A14_st		;if less, all is ok, go to A14
708	fbgt.w	fix_ex		;if greater, fix and redo
709	fdiv.s	FTEN,fp0	;if equal, divide by 10
710	addq.l	#1,d6		; and inc ILOG
711	bra.b	A14_st		; and continue elsewhere
712fix_ex:
713	addq.l	#1,d6		;increment ILOG by 1
714	move.w	#1,d5		;set ICTR
715	fmove.l	#rm_mode,FPCR	;set rmode to RM
716	bra.w	A6_str		;return to A6 and recompute YINT
717*
718* Since ICTR <> 0, we have already been through one adjustment,
719* and shouldn't have another; this is to check if abs(YINT) = 10^LEN
720* 10^LEN is again computed using whatever table is in a1 since the
721* value calculated cannot be inexact.
722*
723not_zr:
724	fmove.s	FONE,fp2	;init fp2 to 1.0
725	move.l	d4,d0		;put LEN in d0
726	clr.l	d3		;clr table index
727z_loop:
728	lsr.l	#1,d0		;shift next bit into carry
729	bcc.b	z_next		;if zero, skip the mul
730	fmul.x	(a1,d3),fp2	;mul by 10**(d3_bit_no)
731z_next:
732	add.l	#12,d3		;inc d3 to next pwrten table entry
733	tst.l	d0		;test if LEN is zero
734	bne.b	z_loop		;if not, loop
735	fabs.x	fp0		;get abs(YINT)
736	fcmp.x	fp2,fp0		;check if abs(YINT) = 10^LEN
737	fbne.w	A14_st		;if not, skip this
738	fdiv.s	FTEN,fp0	;divide abs(YINT) by 10
739	addq.l	#1,d6		;and inc ILOG by 1
740	addq.l	#1,d4		; and inc LEN
741	fmul.s	FTEN,fp2	; if LEN++, the get 10^^LEN
742
743
744* A14. Convert the mantissa to bcd.
745*      The binstr routine is used to convert the LEN digit
746*      mantissa to bcd in memory.  The input to binstr is
747*      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
748*      such that the decimal point is to the left of bit 63.
749*      The bcd digits are stored in the correct position in
750*      the final string area in memory.
751*
752*
753* Register usage:
754*	Input/Output
755*	d0: x/LEN call to binstr - final is 0
756*	d1: x/0
757*	d2: x/ms 32-bits of mant of abs(YINT)
758*	d3: x/ls 32-bits of mant of abs(YINT)
759*	d4: LEN/Unchanged
760*	d5: ICTR:LAMBDA/LAMBDA:ICTR
761*	d6: ILOG
762*	d7: k-factor/Unchanged
763*	a0: pointer into memory for packed bcd string formation
764*	    /ptr to first mantissa byte in result string
765*	a1: ptr to PTENxx array/Unchanged
766*	a2: ptr to FP_SCR2(a6)/Unchanged
767*	fp0: int portion of Y/abs(YINT) adjusted
768*	fp1: 10^ISCALE/Unchanged
769*	fp2: 10^LEN/Unchanged
770*	F_SCR1:x/Work area for final result
771*	F_SCR2:Y with original exponent/Unchanged
772*	L_SCR1:original USER_FPCR/Unchanged
773*	L_SCR2:first word of X packed/Unchanged
774
775A14_st:
776	fmove.l	#rz_mode,FPCR	;force rz for conversion
777	fdiv.x	fp2,fp0		;divide abs(YINT) by 10^LEN
778	lea.l	FP_SCR1(a6),a0
779	fmove.x	fp0,(a0)	;move abs(YINT)/10^LEN to memory
780	move.l	4(a0),d2	;move 2nd word of FP_RES to d2
781	move.l	8(a0),d3	;move 3rd word of FP_RES to d3
782	clr.l	4(a0)		;zero word 2 of FP_RES
783	clr.l	8(a0)		;zero word 3 of FP_RES
784	move.l	(a0),d0		;move exponent to d0
785	swap	d0		;put exponent in lower word
786	beq.b	no_sft		;if zero, don't shift
787	subi.l	#$3ffd,d0	;sub bias less 2 to make fract
788	tst.l	d0		;check if > 1
789	bgt.b	no_sft		;if so, don't shift
790	neg.l	d0		;make exp positive
791m_loop:
792	lsr.l	#1,d2		;shift d2:d3 right, add 0s
793	roxr.l	#1,d3		;the number of places
794	dbf.w	d0,m_loop	;given in d0
795no_sft:
796	tst.l	d2		;check for mantissa of zero
797	bne.b	no_zr		;if not, go on
798	tst.l	d3		;continue zero check
799	beq.b	zer_m		;if zero, go directly to binstr
800no_zr:
801	clr.l	d1		;put zero in d1 for addx
802	addi.l	#$00000080,d3	;inc at bit 7
803	addx.l	d1,d2		;continue inc
804	andi.l	#$ffffff80,d3	;strip off lsb not used by 882
805zer_m:
806	move.l	d4,d0		;put LEN in d0 for binstr call
807	addq.l	#3,a0		;a0 points to M16 byte in result
808	bsr	binstr		;call binstr to convert mant
809
810
811* A15. Convert the exponent to bcd.
812*      As in A14 above, the exp is converted to bcd and the
813*      digits are stored in the final string.
814*
815*      Digits are stored in L_SCR1(a6) on return from BINDEC as:
816*
817*  	 32               16 15                0
818*	-----------------------------------------
819*  	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
820*	-----------------------------------------
821*
822* And are moved into their proper places in FP_SCR1.  If digit e4
823* is non-zero, OPERR is signaled.  In all cases, all 4 digits are
824* written as specified in the 881/882 manual for packed decimal.
825*
826* Register usage:
827*	Input/Output
828*	d0: x/LEN call to binstr - final is 0
829*	d1: x/scratch (0);shift count for final exponent packing
830*	d2: x/ms 32-bits of exp fraction/scratch
831*	d3: x/ls 32-bits of exp fraction
832*	d4: LEN/Unchanged
833*	d5: ICTR:LAMBDA/LAMBDA:ICTR
834*	d6: ILOG
835*	d7: k-factor/Unchanged
836*	a0: ptr to result string/ptr to L_SCR1(a6)
837*	a1: ptr to PTENxx array/Unchanged
838*	a2: ptr to FP_SCR2(a6)/Unchanged
839*	fp0: abs(YINT) adjusted/float(ILOG)
840*	fp1: 10^ISCALE/Unchanged
841*	fp2: 10^LEN/Unchanged
842*	F_SCR1:Work area for final result/BCD result
843*	F_SCR2:Y with original exponent/ILOG/10^4
844*	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
845*	L_SCR2:first word of X packed/Unchanged
846
847A15_st:
848	tst.b	BINDEC_FLG(a6)	;check for denorm
849	beq.b	not_denorm
850	ftst.x	fp0		;test for zero
851	fbeq.w	den_zero	;if zero, use k-factor or 4933
852	fmove.l	d6,fp0		;float ILOG
853	fabs.x	fp0		;get abs of ILOG
854	bra.b	convrt
855den_zero:
856	tst.l	d7		;check sign of the k-factor
857	blt.b	use_ilog	;if negative, use ILOG
858	fmove.s	F4933,fp0	;force exponent to 4933
859	bra.b	convrt		;do it
860use_ilog:
861	fmove.l	d6,fp0		;float ILOG
862	fabs.x	fp0		;get abs of ILOG
863	bra.b	convrt
864not_denorm:
865	ftst.x	fp0		;test for zero
866	fbne.w	not_zero	;if zero, force exponent
867	fmove.s	FONE,fp0	;force exponent to 1
868	bra.b	convrt		;do it
869not_zero:
870	fmove.l	d6,fp0		;float ILOG
871	fabs.x	fp0		;get abs of ILOG
872convrt:
873	fdiv.x	24(a1),fp0	;compute ILOG/10^4
874	fmove.x	fp0,FP_SCR2(a6)	;store fp0 in memory
875	move.l	4(a2),d2	;move word 2 to d2
876	move.l	8(a2),d3	;move word 3 to d3
877	move.w	(a2),d0		;move exp to d0
878	beq.b	x_loop_fin	;if zero, skip the shift
879	subi.w	#$3ffd,d0	;subtract off bias
880	neg.w	d0		;make exp positive
881x_loop:
882	lsr.l	#1,d2		;shift d2:d3 right
883	roxr.l	#1,d3		;the number of places
884	dbf.w	d0,x_loop	;given in d0
885x_loop_fin:
886	clr.l	d1		;put zero in d1 for addx
887	addi.l	#$00000080,d3	;inc at bit 6
888	addx.l	d1,d2		;continue inc
889	andi.l	#$ffffff80,d3	;strip off lsb not used by 882
890	move.l	#4,d0		;put 4 in d0 for binstr call
891	lea.l	L_SCR1(a6),a0	;a0 is ptr to L_SCR1 for exp digits
892	bsr	binstr		;call binstr to convert exp
893	move.l	L_SCR1(a6),d0	;load L_SCR1 lword to d0
894	move.l	#12,d1		;use d1 for shift count
895	lsr.l	d1,d0		;shift d0 right by 12
896	bfins	d0,FP_SCR1(a6){4:12} ;put e3:e2:e1 in FP_SCR1
897	lsr.l	d1,d0		;shift d0 right by 12
898	bfins	d0,FP_SCR1(a6){16:4} ;put e4 in FP_SCR1
899	tst.b	d0		;check if e4 is zero
900	beq.b	A16_st		;if zero, skip rest
901	or.l	#opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
902
903
904* A16. Write sign bits to final string.
905*	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
906*
907* Register usage:
908*	Input/Output
909*	d0: x/scratch - final is x
910*	d2: x/x
911*	d3: x/x
912*	d4: LEN/Unchanged
913*	d5: ICTR:LAMBDA/LAMBDA:ICTR
914*	d6: ILOG/ILOG adjusted
915*	d7: k-factor/Unchanged
916*	a0: ptr to L_SCR1(a6)/Unchanged
917*	a1: ptr to PTENxx array/Unchanged
918*	a2: ptr to FP_SCR2(a6)/Unchanged
919*	fp0: float(ILOG)/Unchanged
920*	fp1: 10^ISCALE/Unchanged
921*	fp2: 10^LEN/Unchanged
922*	F_SCR1:BCD result with correct signs
923*	F_SCR2:ILOG/10^4
924*	L_SCR1:Exponent digits on return from binstr
925*	L_SCR2:first word of X packed/Unchanged
926
927A16_st:
928	clr.l	d0		;clr d0 for collection of signs
929	andi.b	#$0f,FP_SCR1(a6) ;clear first nibble of FP_SCR1
930	tst.l	L_SCR2(a6)	;check sign of original mantissa
931	bge.b	mant_p		;if pos, don't set SM
932	moveq.l	#2,d0		;move 2 in to d0 for SM
933mant_p:
934	tst.l	d6		;check sign of ILOG
935	bge.b	wr_sgn		;if pos, don't set SE
936	addq.l	#1,d0		;set bit 0 in d0 for SE
937wr_sgn:
938	bfins	d0,FP_SCR1(a6){0:2} ;insert SM and SE into FP_SCR1
939
940* Clean up and restore all registers used.
941
942	fmove.l	#0,FPSR		;clear possible inex2/ainex bits
943	fmovem.x (a7)+,fp0-fp2
944	movem.l	(a7)+,d2-d7/a2
945	rts
946
947	end
948