xref: /netbsd/sys/arch/hppa/hppa/fpu.c (revision c4a72b64)
1 /*	$NetBSD: fpu.c,v 1.1 2002/06/05 01:04:20 fredette Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Matthew Fredette.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * FPU handling for NetBSD/hppa.
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.1 2002/06/05 01:04:20 fredette Exp $");
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/proc.h>
49 #include <sys/signalvar.h>
50 #include <sys/user.h>
51 
52 #include <machine/cpu.h>
53 #include <machine/cpufunc.h>
54 #include <machine/frame.h>
55 #include <machine/reg.h>
56 
57 #include <hppa/hppa/machdep.h>
58 
59 #include "../spmath/float.h"
60 #include "../spmath/fpudispatch.h"
61 
62 /* Some macros representing opcodes. */
63 #define OPCODE_NOP	0x08000240
64 #define OPCODE_COPR_0_0	0x30000000
65 
66 /* Some macros representing fields in load/store opcodes. */
67 #define	OPCODE_CMPLT_S	0x00002000
68 #define	OPCODE_CMPLT_M	0x00000020
69 #define	OPCODE_CMPLT_SM	(OPCODE_CMPLT_S | OPCODE_CMPLT_M)
70 #define	OPCODE_CMPLT_MB	OPCODE_CMPLT_M
71 #define	OPCODE_CMPLT_MA	(OPCODE_CMPLT_S | OPCODE_CMPLT_M)
72 #define	OPCODE_CMPLT	(OPCODE_CMPLT_S | OPCODE_CMPLT_M)
73 #define	OPCODE_DOUBLE	0x08000000
74 #define	OPCODE_STORE	0x00000200
75 #define OPCODE_INDEXED	0x00001000
76 
77 /* This is nonzero iff we're using a hardware FPU. */
78 int fpu_present;
79 
80 /* If we have any FPU, this is its version. */
81 u_int fpu_version;
82 
83 /* The number of times we have had to switch the FPU context. */
84 u_int fpu_csw;
85 
86 /* The U-space physical address of the proc in the FPU, or zero. */
87 paddr_t fpu_cur_uspace;
88 
89 /* In locore.S, this swaps states in and out of the FPU. */
90 void hppa_fpu_swap __P((struct user *, struct user *));
91 
92 #ifdef FPEMUL
93 /*
94  * Given a trapframe and a general register number, the
95  * FRAME_REG macro returns a pointer to that general
96  * register.  The _frame_reg_positions array is a lookup
97  * table, since the general registers aren't in order
98  * in a trapframe.
99  *
100  * NB: this more or less assumes that all members of
101  * struct trapframe are u_ints.
102  */
103 #define FRAME_REG(f, reg, r0)	\
104 	((reg) == 0 ? (&r0) : ((&(f)->tf_t1) + _frame_reg_positions[reg]))
105 #define _FRAME_POSITION(f)	\
106 	((&((struct trapframe *) 0)->f) - (&((struct trapframe *) 0)->tf_t1))
107 const int _frame_reg_positions[32] = {
108 	-1,				/* r0 */
109 	_FRAME_POSITION(tf_r1),
110 	_FRAME_POSITION(tf_rp),		/* r2 */
111 	_FRAME_POSITION(tf_r3),
112 	_FRAME_POSITION(tf_r4),
113 	_FRAME_POSITION(tf_r5),
114 	_FRAME_POSITION(tf_r6),
115 	_FRAME_POSITION(tf_r7),
116 	_FRAME_POSITION(tf_r8),
117 	_FRAME_POSITION(tf_r9),
118 	_FRAME_POSITION(tf_r10),
119 	_FRAME_POSITION(tf_r11),
120 	_FRAME_POSITION(tf_r12),
121 	_FRAME_POSITION(tf_r13),
122 	_FRAME_POSITION(tf_r14),
123 	_FRAME_POSITION(tf_r15),
124 	_FRAME_POSITION(tf_r16),
125 	_FRAME_POSITION(tf_r17),
126 	_FRAME_POSITION(tf_r18),
127 	_FRAME_POSITION(tf_t4),		/* r19 */
128 	_FRAME_POSITION(tf_t3),		/* r20 */
129 	_FRAME_POSITION(tf_t2),		/* r21 */
130 	_FRAME_POSITION(tf_t1),		/* r22 */
131 	_FRAME_POSITION(tf_arg3),	/* r23 */
132 	_FRAME_POSITION(tf_arg2),	/* r24 */
133 	_FRAME_POSITION(tf_arg1),	/* r25 */
134 	_FRAME_POSITION(tf_arg0),	/* r26 */
135 	_FRAME_POSITION(tf_dp),		/* r27 */
136 	_FRAME_POSITION(tf_ret0),	/* r28 */
137 	_FRAME_POSITION(tf_ret1),	/* r29 */
138 	_FRAME_POSITION(tf_sp),		/* r30 */
139 	_FRAME_POSITION(tf_r31),
140 };
141 #endif /* FPEMUL */
142 
143 /*
144  * Bootstraps the FPU.
145  */
146 void
147 hppa_fpu_bootstrap(u_int ccr_enable)
148 {
149 	u_int32_t junk[2];
150 	u_int32_t version[2];
151 	extern u_int hppa_fpu_nop0;
152 	extern u_int hppa_fpu_nop1;
153 
154 	/* See if we have a present and functioning hardware FPU. */
155 	fpu_present = (ccr_enable & HPPA_FPUS) == HPPA_FPUS;
156 
157 	/* Initialize the FPU and get its version. */
158 	if (fpu_present) {
159 
160 		/*
161 		 * To somewhat optimize the emulation
162 		 * assist trap handling and context
163 		 * switching (to save them from having
164 	 	 * to always load and check fpu_present),
165 		 * there are two instructions in locore.S
166 		 * that are replaced with nops when
167 		 * there is a hardware FPU.
168 	 	 */
169 		hppa_fpu_nop0 = OPCODE_NOP;
170 		hppa_fpu_nop1 = OPCODE_NOP;
171 		fcacheall();
172 
173 		/*
174 		 * We track what process has the FPU,
175 		 * and how many times we have to swap
176 		 * in and out.
177 		 */
178 
179 		/*
180 		 * The PA-RISC 1.1 Architecture manual is
181 		 * pretty clear that the copr,0,0 must be
182 		 * wrapped in double word stores of fr0,
183 		 * otherwise its operation is undefined.
184 		 */
185 		__asm __volatile(
186 			"	ldo	%0, %%r22	\n"
187 			"	fstds	%%fr0, 0(%%r22)	\n"
188 			"	ldo	%1, %%r22	\n"
189 			"	copr,0,0		\n"
190 			"	fstds	%%fr0, 0(%%r22)	\n"
191 			: "=m" (junk), "=m" (version) : : "r22");
192 
193 		/*
194 		 * Now mark that no process has the FPU,
195 		 * and disable it, so the first time it
196 		 * gets used the process' state gets
197 		 * swapped in.
198 		 */
199 		fpu_csw = 0;
200 		fpu_cur_uspace = 0;
201 		mtctl(ccr_enable & (CCR_MASK ^ HPPA_FPUS), CR_CCR);
202 	}
203 #ifdef FPEMUL
204 	else
205 		/*
206 		 * XXX This is a hack - to avoid
207 		 * having to set up the emulator so
208 		 * it can work for one instruction for
209 		 * proc0, we dispatch the copr,0,0 opcode
210 		 * into the emulator directly.
211 		 */
212 		decode_0c(OPCODE_COPR_0_0, 0, 0, version);
213 #endif /* FPEMUL */
214 	fpu_version = version[0];
215 }
216 
217 /*
218  * If the given process has its state in the FPU,
219  * flush that state out into the process' PCB.
220  */
221 void
222 hppa_fpu_flush(struct proc *p)
223 {
224 	struct trapframe *tf = p->p_md.md_regs;
225 
226 	/*
227  	* If we have a hardware FPU, and this process'
228  	* state is currently in it, swap it out.
229  	*/
230 	if (fpu_present &&
231 	    fpu_cur_uspace != NULL &&
232 	    fpu_cur_uspace == tf->tf_cr30)
233 		hppa_fpu_swap(p->p_addr, NULL);
234 }
235 
236 #ifdef FPEMUL
237 
238 /*
239  * This emulates a coprocessor load/store instruction.
240  */
241 static int hppa_fpu_ls __P((struct trapframe *, struct proc *));
242 static int
243 hppa_fpu_ls(struct trapframe *frame, struct proc *p)
244 {
245 	u_int inst, inst_b, inst_x, inst_s, inst_t;
246 	int log2size;
247 	u_int *base;
248 	u_int offset, index, im5;
249 	void *fpreg;
250 	u_int r0 = 0;
251 
252 	/*
253 	 * Get the instruction that we're emulating,
254 	 * and break it down.  Using HP bit notation,
255 	 * b is a five-bit field starting at bit 10,
256 	 * x is a five-bit field starting at bit 15,
257 	 * s is a two-bit field starting at bit 17,
258 	 * and t is a two-bit field starting at bit 31.
259 	 */
260 	inst = frame->tf_iir;
261 	__asm __volatile(
262 		"	extru %4, 10, 5, %1	\n"
263 		"	extru %4, 15, 5, %2	\n"
264 		"	extru %4, 17, 2, %3	\n"
265 		"	extru %4, 31, 5, %4	\n"
266 		: "=r" (inst_b), "=r" (inst_x), "=r" (inst_s), "=r" (inst_t)
267 		: "r" (inst));
268 
269 	/*
270 	 * The space must be the user's space, else we
271 	 * segfault.
272 	 */
273 	if (inst_s != p->p_addr->u_pcb.pcb_space)
274 		return (EFAULT);
275 
276 	/* See whether or not this is a doubleword load/store. */
277 	log2size = (inst & OPCODE_DOUBLE) ? 3 : 2;
278 
279 	/* Get the floating point register. */
280 	fpreg = ((caddr_t) p->p_addr->u_pcb.pcb_fpregs) + (inst_t << log2size);
281 
282 	/* Get the base register. */
283 	base = FRAME_REG(frame, inst_b, r0);
284 
285 	/* Dispatch on whether or not this is an indexed load/store. */
286 	if (inst & OPCODE_INDEXED) {
287 
288 		/* Get the index register value. */
289 		index = *FRAME_REG(frame, inst_x, r0);
290 
291 		/* Dispatch on the completer. */
292 		switch (inst & OPCODE_CMPLT) {
293 		case OPCODE_CMPLT_S:
294 			offset = *base + (index << log2size);
295 			break;
296 		case OPCODE_CMPLT_M:
297 			offset = *base;
298 			*base = *base + index;
299 			break;
300 		case OPCODE_CMPLT_SM:
301 			offset = *base;
302 			*base = *base + (index << log2size);
303 			break;
304 		default:
305 			offset = *base + index;
306 			break;
307 		}
308 	} else {
309 
310 		/* Do a low_sign_ext(x, 5). */
311 		im5 = inst_x >> 1;
312 		if (inst_x & 1)
313 			im5 |= 0xfffffff0;
314 
315 		/* Dispatch on the completer. */
316 		switch (inst & OPCODE_CMPLT) {
317 		case OPCODE_CMPLT_MB:
318 			offset = *base + im5;
319 			*base = *base + im5;
320 			break;
321 		case OPCODE_CMPLT_MA:
322 			offset = *base;
323 			*base = *base + im5;
324 			break;
325 		default:
326 			offset = *base + im5;
327 			break;
328 		}
329 	}
330 
331 	/*
332 	 * The offset we calculated must be the same as the
333 	 * offset in the IOR.
334 	 */
335 	KASSERT(offset == frame->tf_ior);
336 
337 	/* Perform the load or store. */
338 	return (inst & OPCODE_STORE) ?
339 		copyout(fpreg, (void *) offset, 1 << log2size) :
340 		copyin((const void *) offset, fpreg, 1 << log2size);
341 }
342 
343 /*
344  * This is called to emulate an instruction.
345  */
346 void
347 hppa_fpu_emulate(struct trapframe *frame, struct proc *p)
348 {
349 	u_int inst, opcode, class, sub;
350 	u_int *fpregs;
351 	int exception;
352 
353 	/*
354 	 * If the process' state is in any hardware FPU,
355 	 * flush it out - we need to operate on it.
356 	 */
357 	hppa_fpu_flush(p);
358 
359 	/*
360 	 * Get the instruction that we're emulating,
361 	 * and break it down.  Using HP bit notation,
362 	 * the class is a two-bit field starting at
363 	 * bit 22, the opcode is a 6-bit field starting
364 	 * at bit 5, and sub for a class 1 instruction
365 	 * is a two bit field starting at bit 16, else
366 	 * it is a three bit field starting at bit 18.
367 	 */
368 	inst = frame->tf_iir;
369 	__asm __volatile(
370 		"	extru %3, 22, 2, %1	\n"
371 		"	extru %3, 5, 6, %0	\n"
372 		"	extru %3, 18, 3, %2	\n"
373 		"	comib,<> 1, %1, 0	\n"
374 		"	extru %3, 16, 2, %2	\n"
375 		: "=r" (opcode), "=r" (class), "=r" (sub)
376 		: "r" (inst));
377 
378 	/* Get this process' FPU registers. */
379 	fpregs = (u_int *) p->p_addr->u_pcb.pcb_fpregs;
380 
381 	/* Dispatch on the opcode. */
382 	switch (opcode) {
383 	case 0x09:
384 	case 0x0b:
385 		if (hppa_fpu_ls(frame, p) != 0)
386 			trapsignal(p, SIGSEGV, frame->tf_iioq_head);
387 		return;
388 	case 0x0c:
389 		exception = decode_0c(inst, class, sub, fpregs);
390 		break;
391 	case 0x0e:
392 		exception = decode_0e(inst, class, sub, fpregs);
393 		break;
394 	case 0x06:
395 		exception = decode_06(inst, fpregs);
396 		break;
397 	case 0x26:
398 		exception = decode_26(inst, fpregs);
399 		break;
400 	default:
401 		exception = UNIMPLEMENTEDEXCEPTION;
402 		break;
403         }
404 
405 	if (exception)
406 		trapsignal(p, (exception & UNIMPLEMENTEDEXCEPTION) ?
407 			SIGILL : SIGFPE, frame->tf_iioq_head);
408 }
409 
410 #endif /* FPEMUL */
411