xref: /netbsd/sys/arch/hppa/hppa/fpu.c (revision 6550d01e)
1 /*	$NetBSD: fpu.c,v 1.23 2011/01/23 09:44:59 skrll Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Matthew Fredette.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * FPU handling for NetBSD/hppa.
34  */
35 
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.23 2011/01/23 09:44:59 skrll Exp $");
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/signalvar.h>
43 
44 #include <uvm/uvm_extern.h>
45 
46 #include <machine/cpufunc.h>
47 #include <machine/frame.h>
48 #include <machine/reg.h>
49 #include <machine/pcb.h>
50 #include <machine/pmap.h>
51 
52 #include <hppa/hppa/machdep.h>
53 
54 #include "../spmath/float.h"
55 #include "../spmath/fpudispatch.h"
56 
57 /* Some macros representing opcodes. */
58 #define OPCODE_NOP	0x08000240
59 #define OPCODE_COPR_0_0	0x30000000
60 
61 /* Some macros representing fields in load/store opcodes. */
62 #define	OPCODE_CMPLT_S	0x00002000
63 #define	OPCODE_CMPLT_M	0x00000020
64 #define	OPCODE_CMPLT_SM	(OPCODE_CMPLT_S | OPCODE_CMPLT_M)
65 #define	OPCODE_CMPLT_MB	OPCODE_CMPLT_M
66 #define	OPCODE_CMPLT_MA	(OPCODE_CMPLT_S | OPCODE_CMPLT_M)
67 #define	OPCODE_CMPLT	(OPCODE_CMPLT_S | OPCODE_CMPLT_M)
68 #define	OPCODE_DOUBLE	0x08000000
69 #define	OPCODE_STORE	0x00000200
70 #define OPCODE_INDEXED	0x00001000
71 
72 /* This is nonzero iff we're using a hardware FPU. */
73 int fpu_present;
74 
75 /* If we have any FPU, this is its version. */
76 u_int fpu_version;
77 
78 /* The number of times we have had to switch the FPU context. */
79 u_int fpu_csw;
80 
81 /* In locore.S, this swaps states in and out of the FPU. */
82 void hppa_fpu_swapout(struct pcb *);
83 void hppa_fpu_swap(struct fpreg *, struct fpreg *);
84 
85 #ifdef FPEMUL
86 /*
87  * Given a trapframe and a general register number, the
88  * FRAME_REG macro returns a pointer to that general
89  * register.  The _frame_reg_positions array is a lookup
90  * table, since the general registers aren't in order
91  * in a trapframe.
92  *
93  * NB: this more or less assumes that all members of
94  * struct trapframe are u_ints.
95  */
96 #define FRAME_REG(f, reg, r0)	\
97 	((reg) == 0 ? (&r0) : ((&(f)->tf_t1) + _frame_reg_positions[reg]))
98 #define _FRAME_POSITION(f)	\
99 	((&((struct trapframe *) 0)->f) - (&((struct trapframe *) 0)->tf_t1))
100 const int _frame_reg_positions[32] = {
101 	-1,				/* r0 */
102 	_FRAME_POSITION(tf_r1),
103 	_FRAME_POSITION(tf_rp),		/* r2 */
104 	_FRAME_POSITION(tf_r3),
105 	_FRAME_POSITION(tf_r4),
106 	_FRAME_POSITION(tf_r5),
107 	_FRAME_POSITION(tf_r6),
108 	_FRAME_POSITION(tf_r7),
109 	_FRAME_POSITION(tf_r8),
110 	_FRAME_POSITION(tf_r9),
111 	_FRAME_POSITION(tf_r10),
112 	_FRAME_POSITION(tf_r11),
113 	_FRAME_POSITION(tf_r12),
114 	_FRAME_POSITION(tf_r13),
115 	_FRAME_POSITION(tf_r14),
116 	_FRAME_POSITION(tf_r15),
117 	_FRAME_POSITION(tf_r16),
118 	_FRAME_POSITION(tf_r17),
119 	_FRAME_POSITION(tf_r18),
120 	_FRAME_POSITION(tf_t4),		/* r19 */
121 	_FRAME_POSITION(tf_t3),		/* r20 */
122 	_FRAME_POSITION(tf_t2),		/* r21 */
123 	_FRAME_POSITION(tf_t1),		/* r22 */
124 	_FRAME_POSITION(tf_arg3),	/* r23 */
125 	_FRAME_POSITION(tf_arg2),	/* r24 */
126 	_FRAME_POSITION(tf_arg1),	/* r25 */
127 	_FRAME_POSITION(tf_arg0),	/* r26 */
128 	_FRAME_POSITION(tf_dp),		/* r27 */
129 	_FRAME_POSITION(tf_ret0),	/* r28 */
130 	_FRAME_POSITION(tf_ret1),	/* r29 */
131 	_FRAME_POSITION(tf_sp),		/* r30 */
132 	_FRAME_POSITION(tf_r31),
133 };
134 #endif /* FPEMUL */
135 
136 /*
137  * Bootstraps the FPU.
138  */
139 void
140 hppa_fpu_bootstrap(u_int ccr_enable)
141 {
142 	uint32_t junk[2];
143 	uint32_t vers[2];
144 	extern u_int hppa_fpu_nop0;
145 	extern u_int hppa_fpu_nop1;
146 
147 	/* See if we have a present and functioning hardware FPU. */
148 	fpu_present = (ccr_enable & HPPA_FPUS) == HPPA_FPUS;
149 
150 	/* Initialize the FPU and get its version. */
151 	if (fpu_present) {
152 
153 		/*
154 		 * To somewhat optimize the emulation
155 		 * assist trap handling and context
156 		 * switching (to save them from having
157 	 	 * to always load and check fpu_present),
158 		 * there are two instructions in locore.S
159 		 * that are replaced with nops when
160 		 * there is a hardware FPU.
161 	 	 */
162 		hppa_fpu_nop0 = OPCODE_NOP;
163 		hppa_fpu_nop1 = OPCODE_NOP;
164 		fcacheall();
165 
166 		/*
167 		 * We track what process has the FPU,
168 		 * and how many times we have to swap
169 		 * in and out.
170 		 */
171 
172 		/*
173 		 * The PA-RISC 1.1 Architecture manual is
174 		 * pretty clear that the copr,0,0 must be
175 		 * wrapped in double word stores of fr0,
176 		 * otherwise its operation is undefined.
177 		 */
178 		__asm volatile(
179 			"	ldo	%0, %%r22	\n"
180 			"	fstds	%%fr0, 0(%%r22)	\n"
181 			"	ldo	%1, %%r22	\n"
182 			"	copr,0,0		\n"
183 			"	fstds	%%fr0, 0(%%r22)	\n"
184 			: "=m" (junk), "=m" (vers) : : "r22");
185 
186 		/*
187 		 * Now mark that no process has the FPU,
188 		 * and disable it, so the first time it
189 		 * gets used the process' state gets
190 		 * swapped in.
191 		 */
192 		fpu_csw = 0;
193 		curcpu()->ci_fpu_state = 0;
194 		mtctl(ccr_enable & (CCR_MASK ^ HPPA_FPUS), CR_CCR);
195 	}
196 #ifdef FPEMUL
197 	else
198 		/*
199 		 * XXX This is a hack - to avoid
200 		 * having to set up the emulator so
201 		 * it can work for one instruction for
202 		 * proc0, we dispatch the copr,0,0 opcode
203 		 * into the emulator directly.
204 		 */
205 		decode_0c(OPCODE_COPR_0_0, 0, 0, vers);
206 #endif /* FPEMUL */
207 	fpu_version = vers[0];
208 }
209 
210 /*
211  * If the given LWP has its state in the FPU,
212  * flush that state out into the LWP's PCB.
213  */
214 void
215 hppa_fpu_flush(struct lwp *l)
216 {
217 	struct trapframe *tf = l->l_md.md_regs;
218 	struct pcb *pcb = lwp_getpcb(l);
219 	struct cpu_info *ci = curcpu();
220 
221 	if (!fpu_present)
222 		return;
223 
224 	/*
225 	 * If we have a hardware FPU, and this process'
226 	 * state is currently in it, swap it out.
227 	 */
228 
229 	if (ci->ci_fpu_state == 0 ||
230 	    ci->ci_fpu_state != tf->tf_cr30) {
231 		return;
232 	}
233 
234 	hppa_fpu_swapout(pcb);
235 	ci->ci_fpu_state = 0;
236 }
237 
238 #ifdef FPEMUL
239 
240 /*
241  * This emulates a coprocessor load/store instruction.
242  */
243 static int hppa_fpu_ls(struct trapframe *, struct lwp *);
244 static int
245 hppa_fpu_ls(struct trapframe *frame, struct lwp *l)
246 {
247 	struct pcb *pcb = lwp_getpcb(l);
248 	u_int inst, inst_b, inst_x, inst_s, inst_t;
249 	int log2size;
250 	u_int *base;
251 	u_int offset, index, im5;
252 	void *fpreg;
253 	u_int r0 = 0;
254 	int error;
255 
256 	/*
257 	 * Get the instruction that we're emulating,
258 	 * and break it down.  Using HP bit notation,
259 	 * b is a five-bit field starting at bit 10,
260 	 * x is a five-bit field starting at bit 15,
261 	 * s is a two-bit field starting at bit 17,
262 	 * and t is a five-bit field starting at bit 31.
263 	 */
264 	inst = frame->tf_iir;
265 	__asm volatile(
266 		"	extru %4, 10, 5, %1	\n"
267 		"	extru %4, 15, 5, %2	\n"
268 		"	extru %4, 17, 2, %3	\n"
269 		"	extru %4, 31, 5, %4	\n"
270 		: "=r" (inst_b), "=r" (inst_x), "=r" (inst_s), "=r" (inst_t)
271 		: "r" (inst));
272 
273 	/*
274 	 * The space must be the user's space, else we
275 	 * segfault.
276 	 */
277 	if (inst_s != pcb->pcb_space)
278 		return EFAULT;
279 
280 	/* See whether or not this is a doubleword load/store. */
281 	log2size = (inst & OPCODE_DOUBLE) ? 3 : 2;
282 
283 	/* Get the floating point register. */
284 	fpreg = ((char *)pcb->pcb_fpregs) + (inst_t << log2size);
285 
286 	/* Get the base register. */
287 	base = FRAME_REG(frame, inst_b, r0);
288 
289 	/* Dispatch on whether or not this is an indexed load/store. */
290 	if (inst & OPCODE_INDEXED) {
291 
292 		/* Get the index register value. */
293 		index = *FRAME_REG(frame, inst_x, r0);
294 
295 		/* Dispatch on the completer. */
296 		switch (inst & OPCODE_CMPLT) {
297 		case OPCODE_CMPLT_S:
298 			offset = *base + (index << log2size);
299 			break;
300 		case OPCODE_CMPLT_M:
301 			offset = *base;
302 			*base = *base + index;
303 			break;
304 		case OPCODE_CMPLT_SM:
305 			offset = *base;
306 			*base = *base + (index << log2size);
307 			break;
308 		default:
309 			offset = *base + index;
310 			break;
311 		}
312 	} else {
313 
314 		/* Do a low_sign_ext(x, 5). */
315 		im5 = inst_x >> 1;
316 		if (inst_x & 1)
317 			im5 |= 0xfffffff0;
318 
319 		/* Dispatch on the completer. */
320 		switch (inst & OPCODE_CMPLT) {
321 		case OPCODE_CMPLT_MB:
322 			offset = *base + im5;
323 			*base = *base + im5;
324 			break;
325 		case OPCODE_CMPLT_MA:
326 			offset = *base;
327 			*base = *base + im5;
328 			break;
329 		default:
330 			offset = *base + im5;
331 			break;
332 		}
333 	}
334 
335 	/*
336 	 * The offset we calculated must be the same as the
337 	 * offset in the IOR.
338 	 */
339 	KASSERT(offset == frame->tf_ior);
340 
341 	/* Perform the load or store. */
342 	error = (inst & OPCODE_STORE) ?
343 		copyout(fpreg, (void *) offset, 1 << log2size) :
344 		copyin((const void *) offset, fpreg, 1 << log2size);
345 	return error;
346 }
347 
348 /*
349  * This is called to emulate an instruction.
350  */
351 void
352 hppa_fpu_emulate(struct trapframe *frame, struct lwp *l, u_int inst)
353 {
354 	struct pcb *pcb = lwp_getpcb(l);
355 	u_int opcode, class, sub;
356 	u_int *fpregs;
357 	int exception;
358 	ksiginfo_t ksi;
359 
360 	/*
361 	 * If the process' state is in any hardware FPU,
362 	 * flush it out - we need to operate on it.
363 	 */
364 	hppa_fpu_flush(l);
365 
366 	/*
367 	 * Get the instruction that we're emulating,
368 	 * and break it down.  Using HP bit notation,
369 	 * the class is a two-bit field starting at
370 	 * bit 22, the opcode is a 6-bit field starting
371 	 * at bit 5, and sub for a class 1 instruction
372 	 * is a two bit field starting at bit 16, else
373 	 * it is a three bit field starting at bit 18.
374 	 */
375 #if 0
376 	__asm volatile(
377 		"	extru %3, 22, 2, %1	\n"
378 		"	extru %3, 5, 6, %0	\n"
379 		"	extru %3, 18, 3, %2	\n"
380 		"	comib,<> 1, %1, 0	\n"
381 		"	extru %3, 16, 2, %2	\n"
382 		: "=r" (opcode), "=r" (class), "=r" (sub)
383 		: "r" (inst));
384 #else
385 	opcode = (inst >> (31 - 5)) & 0x3f;
386 	class = (inst >> (31 - 22)) & 0x3;
387 	if (class == 1) {
388 		sub = (inst >> (31 - 16)) & 3;
389 	} else {
390 		sub = (inst >> (31 - 18)) & 7;
391 	}
392 #endif
393 
394 	/* Get this LWP's FPU registers. */
395 	fpregs = (u_int *)pcb->pcb_fpregs;
396 
397 	/* Dispatch on the opcode. */
398 	switch (opcode) {
399 	case 0x09:
400 	case 0x0b:
401 		if (hppa_fpu_ls(frame, l) != 0) {
402 			KSI_INIT_TRAP(&ksi);
403 			ksi.ksi_signo = SIGSEGV;
404 			ksi.ksi_code = SEGV_MAPERR;
405 			ksi.ksi_trap = T_DTLBMISS;
406 			ksi.ksi_addr = (void *)frame->tf_iioq_head;
407 			trapsignal(l, &ksi);
408 		}
409 		return;
410 	case 0x0c:
411 		exception = decode_0c(inst, class, sub, fpregs);
412 		break;
413 	case 0x0e:
414 		exception = decode_0e(inst, class, sub, fpregs);
415 		break;
416 	case 0x06:
417 		exception = decode_06(inst, fpregs);
418 		break;
419 	case 0x26:
420 		exception = decode_26(inst, fpregs);
421 		break;
422 	default:
423 		exception = UNIMPLEMENTEDEXCEPTION;
424 		break;
425         }
426 
427 	if (exception) {
428 		KSI_INIT_TRAP(&ksi);
429 		if (exception & UNIMPLEMENTEDEXCEPTION) {
430 			ksi.ksi_signo = SIGILL;
431 			ksi.ksi_code = ILL_COPROC;
432 		} else {
433 			ksi.ksi_signo = SIGFPE;
434 			if (exception & INVALIDEXCEPTION) {
435 				ksi.ksi_code = FPE_FLTINV;
436 			} else if (exception & DIVISIONBYZEROEXCEPTION) {
437 				ksi.ksi_code = FPE_FLTDIV;
438 			} else if (exception & OVERFLOWEXCEPTION) {
439 				ksi.ksi_code = FPE_FLTOVF;
440 			} else if (exception & UNDERFLOWEXCEPTION) {
441 				ksi.ksi_code = FPE_FLTUND;
442 			} else if (exception & INEXACTEXCEPTION) {
443 				ksi.ksi_code = FPE_FLTRES;
444 			}
445 		}
446 		ksi.ksi_trap = T_EMULATION;
447 		ksi.ksi_addr = (void *)frame->tf_iioq_head;
448 		trapsignal(l, &ksi);
449 	}
450 }
451 
452 #endif /* FPEMUL */
453