xref: /openbsd/sys/arch/mips64/mips64/fp_emulate.c (revision 16e66f46)
1 /*	$OpenBSD: fp_emulate.c,v 1.25 2023/01/11 03:19:52 visa Exp $	*/
2 
3 /*
4  * Copyright (c) 2010 Miodrag Vallat.
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /*
20  * Floating Point completion/emulation code (MI softfloat code control engine).
21  *
22  * Supports all MIPS IV COP1 and COP1X floating-point instructions.
23  *
24  * Floating-point load and store instructions, as well as branch instructions,
25  * are only handled if the kernel is compiled with option FPUEMUL.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/signalvar.h>
32 
33 #include <machine/cpu.h>
34 #include <mips64/mips_cpu.h>
35 #include <machine/fpu.h>
36 #include <machine/frame.h>
37 #include <machine/ieee.h>
38 #include <machine/ieeefp.h>
39 #include <machine/mips_opcode.h>
40 #include <machine/regnum.h>
41 
42 #include <lib/libkern/softfloat.h>
43 #if defined(DEBUG) && defined(DDB)
44 #include <machine/db_machdep.h>
45 #endif
46 
47 int	fpu_emulate(struct proc *, struct trapframe *, uint32_t,
48 	    union sigval *);
49 int	fpu_emulate_cop1(struct proc *, struct trapframe *, uint32_t);
50 int	fpu_emulate_cop1x(struct proc *, struct trapframe *, uint32_t);
51 uint64_t
52 	fpu_load(struct proc *, struct trapframe *, uint, uint);
53 void	fpu_store(struct proc *, struct trapframe *, uint, uint, uint64_t);
54 #ifdef FPUEMUL
55 int	nofpu_emulate_cop1(struct proc *, struct trapframe *, uint32_t,
56 	    union sigval *);
57 int	nofpu_emulate_cop1x(struct proc *, struct trapframe *, uint32_t,
58 	    union sigval *);
59 int	nofpu_emulate_loadstore(struct proc *, struct trapframe *, uint32_t,
60 	    union sigval *);
61 int	nofpu_emulate_movci(struct trapframe *, uint32_t);
62 #endif
63 
64 typedef	int (fpu_fn3)(struct proc *, struct trapframe *, uint, uint, uint,
65 	    uint);
66 typedef	int (fpu_fn4)(struct proc *, struct trapframe *, uint, uint, uint,
67 	    uint, uint);
68 fpu_fn3	fpu_abs;
69 fpu_fn3	fpu_add;
70 int	fpu_c(struct proc *, struct trapframe *, uint, uint, uint, uint, uint);
71 fpu_fn3	fpu_ceil_l;
72 fpu_fn3	fpu_ceil_w;
73 fpu_fn3	fpu_cvt_d;
74 fpu_fn3	fpu_cvt_l;
75 fpu_fn3	fpu_cvt_s;
76 fpu_fn3	fpu_cvt_w;
77 fpu_fn3	fpu_div;
78 fpu_fn3	fpu_floor_l;
79 fpu_fn3	fpu_floor_w;
80 int	fpu_int_l(struct proc *, struct trapframe *, uint, uint, uint, uint,
81 	    uint);
82 int	fpu_int_w(struct proc *, struct trapframe *, uint, uint, uint, uint,
83 	    uint);
84 fpu_fn4	fpu_madd;
85 fpu_fn4	fpu_msub;
86 fpu_fn3	fpu_mov;
87 fpu_fn3	fpu_movcf;
88 fpu_fn3	fpu_movn;
89 fpu_fn3	fpu_movz;
90 fpu_fn3	fpu_mul;
91 fpu_fn3	fpu_neg;
92 fpu_fn4	fpu_nmadd;
93 fpu_fn4	fpu_nmsub;
94 fpu_fn3	fpu_recip;
95 fpu_fn3	fpu_round_l;
96 fpu_fn3	fpu_round_w;
97 fpu_fn3	fpu_rsqrt;
98 fpu_fn3	fpu_sqrt;
99 fpu_fn3	fpu_sub;
100 fpu_fn3	fpu_trunc_l;
101 fpu_fn3	fpu_trunc_w;
102 
103 /*
104  * Encoding of operand format within opcodes `fmt' and `fmt3' fields.
105  */
106 #define	FMT_S	0x00
107 #define	FMT_D	0x01
108 #define	FMT_W	0x04
109 #define	FMT_L	0x05
110 
111 /*
112  * Inlines from softfloat-specialize.h which are not made public, needed
113  * for fpu_abs.
114  */
115 #define	float32_is_nan(a) \
116 	(0xff000000 < (a << 1))
117 #define	float32_is_signaling_nan(a) \
118 	((((a >> 22) & 0x1ff) == 0x1fe) && (a & 0x003fffff))
119 
120 /*
121  * Precomputed results of intXX_to_floatXX(1)
122  */
123 #define	ONE_F32	(float32)(SNG_EXP_BIAS << SNG_FRACBITS)
124 #define	ONE_F64	(float64)((uint64_t)DBL_EXP_BIAS << DBL_FRACBITS)
125 
126 static inline uint32_t
getfsr(void)127 getfsr(void)
128 {
129 	uint32_t fsr;
130 
131 	__asm__ volatile (
132 	"	.set	push\n"
133 	"	.set	hardfloat\n"
134 	"	cfc1	%0, $31\n"	/* stall until FPU done */
135 	"	cfc1	%0, $31\n"	/* now get status */
136 	"	.set	pop\n"
137 	: "=r" (fsr));
138 	return fsr;
139 }
140 
141 static inline void
setfsr(uint32_t fsr)142 setfsr(uint32_t fsr)
143 {
144 	__asm__ volatile (
145 	"	.set	push\n"
146 	"	.set	hardfloat\n"
147 	"	ctc1	%0, $31\n"
148 	"	.set	pop\n"
149 	: : "r" (fsr));
150 }
151 
152 /*
153  * Handle a floating-point exception.
154  */
155 void
MipsFPTrap(struct trapframe * tf)156 MipsFPTrap(struct trapframe *tf)
157 {
158 	struct cpu_info *ci = curcpu();
159 	struct proc *p = ci->ci_curproc;
160 	union sigval sv;
161 	vaddr_t pc;
162 	register_t sr;
163 	uint32_t fsr, excbits;
164 	uint32_t branch = 0;
165 	uint32_t insn;
166 	InstFmt inst;
167 	int sig = 0;
168 	int fault_type = SI_NOINFO;
169 	int update_pcb = 0;
170 	int emulate = 0;
171 	int skip_insn = 1;
172 
173 	KDASSERT(tf == p->p_md.md_regs);
174 
175 	pc = (vaddr_t)tf->pc;
176 	if (tf->cause & CR_BR_DELAY)
177 		pc += 4;
178 
179 	if (CPU_HAS_FPU(ci)) {
180 		/*
181 		 * Enable FPU, and read its status register.
182 		 */
183 
184 		sr = getsr();
185 		setsr(sr | SR_COP_1_BIT);
186 		fsr = getfsr();
187 
188 		/*
189 		 * If this is not an unimplemented operation, but a genuine
190 		 * FPU exception, signal the process.
191 		 */
192 
193 		if ((fsr & FPCSR_C_E) == 0) {
194 			sig = SIGFPE;
195 			goto deliver;
196 		}
197 	} else {
198 #ifdef CPU_OCTEON
199 		/*
200 		 * SR_FR_32 is hardwired to zero on Octeon; make sure it is
201 		 * set in the emulation view of the FPU state.
202 		 */
203 		tf->sr |= SR_FR_32;
204 #endif
205 		fsr = tf->fsr;
206 	}
207 
208 	/*
209 	 * Get the faulting instruction.  This should not fail, and
210 	 * if it does, it's probably not your lucky day.
211 	 */
212 
213 	if (copyinsn(p, pc, &insn) != 0) {
214 		sig = SIGBUS;
215 		fault_type = BUS_OBJERR;
216 		sv.sival_ptr = (void *)pc;
217 		goto deliver;
218 	}
219 	inst = *(InstFmt *)&insn;
220 
221 	if (tf->cause & CR_BR_DELAY) {
222 		if (copyinsn(p, tf->pc, &branch) != 0) {
223 			sig = SIGBUS;
224 			fault_type = BUS_OBJERR;
225 			sv.sival_ptr = (void *)tf->pc;
226 			goto deliver;
227 		}
228 	}
229 
230 	/*
231 	 * Emulate the instruction.
232 	 */
233 
234 #ifdef DEBUG
235 #ifdef DDB
236 	printf("%s: unimplemented FPU completion, fsr 0x%08x\n0x%lx: ",
237 	    p->p_p->ps_comm, fsr, pc);
238 	dbmd_print_insn(insn, pc, printf);
239 #else
240 	printf("%s: unimplemented FPU completion, insn 0x%08x fsr 0x%08x\n",
241 	    p->p_p->ps_comm, insn, fsr);
242 #endif
243 #endif
244 
245 	switch (inst.FRType.op) {
246 	default:
247 		/*
248 		 * Not a FPU instruction.
249 		 */
250 		break;
251 #ifdef FPUEMUL
252 	case OP_SPECIAL:
253 		switch (inst.FRType.func) {
254 		default:
255 			/*
256 			 * Not a FPU instruction.
257 			 */
258 			break;
259 		case OP_MOVCI:
260 			/*
261 			 * This instruction should not require emulation,
262 			 * unless there is no FPU.
263 			 */
264 			if (!CPU_HAS_FPU(ci))
265 				emulate = 1;
266 			break;
267 		}
268 		break;
269 	case OP_LDC1:
270 	case OP_LWC1:
271 	case OP_SDC1:
272 	case OP_SWC1:
273 		/*
274 		 * These instructions should not require emulation,
275 		 * unless there is no FPU.
276 		 */
277 		if (!CPU_HAS_FPU(ci))
278 			emulate = 1;
279 		break;
280 #endif
281 	case OP_COP1:
282 		switch (inst.RType.rs) {
283 		case OP_BC:
284 			skip_insn = 0;
285 			/* FALLTHROUGH */
286 		case OP_MF:
287 		case OP_DMF:
288 		case OP_CF:
289 		case OP_MT:
290 		case OP_DMT:
291 		case OP_CT:
292 			/*
293 			 * These instructions should not require emulation,
294 			 * unless there is no FPU.
295 			 */
296 			if (!CPU_HAS_FPU(ci))
297 				emulate = 1;
298 			break;
299 		default:
300 			emulate = 1;
301 			break;
302 		}
303 		break;
304 	case OP_COP1X:
305 		switch (inst.FQType.op4) {
306 		default:
307 			switch (inst.FRType.func) {
308 #ifdef FPUEMUL
309 			case OP_LDXC1:
310 			case OP_LWXC1:
311 			case OP_SDXC1:
312 			case OP_SWXC1:
313 			case OP_PREFX:
314 				/*
315 				 * These instructions should not require
316 				 * emulation, unless there is no FPU.
317 				 */
318 				if (!CPU_HAS_FPU(ci))
319 					emulate = 1;
320 				break;
321 #endif
322 			default:
323 				/*
324 				 * Not a valid instruction.
325 				 */
326 				break;
327 			}
328 			break;
329 		case OP_MADD:
330 		case OP_MSUB:
331 		case OP_NMADD:
332 		case OP_NMSUB:
333 			emulate = 1;
334 			break;
335 		}
336 		break;
337 	}
338 
339 	if (emulate) {
340 		if (CPU_HAS_FPU(ci)) {
341 			KASSERT(p == ci->ci_fpuproc);
342 			save_fpu();
343 		}
344 
345 		update_pcb = 1;
346 
347 		sig = fpu_emulate(p, tf, insn, &sv);
348 		/* reload fsr, possibly modified by softfloat code */
349 		fsr = tf->fsr;
350 		if (sig == 0) {
351 			/* raise SIGFPE if necessary */
352 			excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
353 			excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
354 			if (excbits != 0)
355 				sig = SIGFPE;
356 		}
357 	} else {
358 		sig = SIGILL;
359 		fault_type = ILL_ILLOPC;
360 	}
361 
362 deliver:
363 	switch (sig) {
364 	case SIGFPE:
365 		excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
366 		excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
367 		if (excbits & FP_X_INV)
368 			fault_type = FPE_FLTINV;
369 		else if (excbits & FP_X_DZ)
370 			fault_type = FPE_INTDIV;
371 		else if (excbits & FP_X_OFL)
372 			fault_type = FPE_FLTUND;
373 		else if (excbits & FP_X_UFL)
374 			fault_type = FPE_FLTOVF;
375 		else /* if (excbits & FP_X_IMP) */
376 			fault_type = FPE_FLTRES;
377 
378 		break;
379 #ifdef FPUEMUL
380 	case SIGBUS:
381 		if (fault_type == SI_NOINFO)
382 			fault_type = BUS_ADRALN;
383 		break;
384 	case SIGSEGV:
385 		if (fault_type == SI_NOINFO)
386 			fault_type = SEGV_MAPERR;
387 		break;
388 #endif
389 	}
390 
391 	/*
392 	 * Skip the instruction, unless we are delivering SIGILL.
393 	 */
394 	if (CPU_HAS_FPU(ci) || skip_insn) {
395 		if (sig != SIGILL) {
396 			if (tf->cause & CR_BR_DELAY) {
397 				/*
398 				 * Note that it doesn't matter, at this point,
399 				 * that we pass the updated FSR value, as it is
400 				 * only used to decide whether to branch or not
401 				 * if the faulting instruction was BC1[FT].
402 				 */
403 				tf->pc = MipsEmulateBranch(tf, tf->pc, fsr,
404 				    branch);
405 			} else
406 				tf->pc += 4;
407 		}
408 	}
409 
410 	/*
411 	 * Update the FPU status register.
412 	 * We need to make sure that this will not cause an exception
413 	 * in kernel mode.
414 	 */
415 
416 	/* propagate raised exceptions to the sticky bits */
417 	fsr &= ~FPCSR_C_E;
418 	excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
419 	fsr |= excbits << FPCSR_F_SHIFT;
420 	/* clear all exception sources */
421 	fsr &= ~FPCSR_C_MASK;
422 	if (update_pcb)
423 		tf->fsr = fsr;
424 
425 	if (CPU_HAS_FPU(ci)) {
426 		setfsr(fsr);
427 		/* disable fpu before returning to trap() */
428 		setsr(sr);
429 	}
430 
431 	if (sig != 0) {
432 		if (sig != SIGBUS && sig != SIGSEGV)
433 			sv.sival_ptr = (void *)pc;
434 		trapsignal(p, sig, 0, fault_type, sv);
435 	}
436 }
437 
438 /*
439  * Emulate an FPU instruction.  The FPU register set has been saved in the
440  * current PCB, and is pointed to by the trap frame.
441  */
442 int
fpu_emulate(struct proc * p,struct trapframe * tf,uint32_t insn,union sigval * sv)443 fpu_emulate(struct proc *p, struct trapframe *tf, uint32_t insn,
444     union sigval *sv)
445 {
446 	InstFmt inst;
447 
448 	tf->zero = 0;	/* not written by trap code */
449 
450 	inst = *(InstFmt *)&insn;
451 
452 	if (CPU_HAS_FPU(p->p_cpu)) {
453 		switch (inst.FRType.op) {
454 		default:
455 			break;
456 		case OP_COP1:
457 			return fpu_emulate_cop1(p, tf, insn);
458 		case OP_COP1X:
459 			return fpu_emulate_cop1x(p, tf, insn);
460 		}
461 
462 		return SIGILL;
463 	}
464 
465 #ifdef FPUEMUL
466 	switch (inst.FRType.op) {
467 	default:
468 		break;
469 	case OP_SPECIAL:
470 		return nofpu_emulate_movci(tf, insn);
471 	case OP_LDC1:
472 	case OP_LWC1:
473 	case OP_SDC1:
474 	case OP_SWC1:
475 		return nofpu_emulate_loadstore(p, tf, insn, sv);
476 	case OP_COP1:
477 		switch (inst.RType.rs) {
478 		case OP_MF:
479 		case OP_DMF:
480 		case OP_CF:
481 		case OP_MT:
482 		case OP_DMT:
483 		case OP_CT:
484 		case OP_BC:
485 			return nofpu_emulate_cop1(p, tf, insn, sv);
486 		default:
487 			return fpu_emulate_cop1(p, tf, insn);
488 		}
489 		break;
490 	case OP_COP1X:
491 		switch (inst.FQType.op4) {
492 		default:
493 			switch (inst.FRType.func) {
494 			case OP_LDXC1:
495 			case OP_LWXC1:
496 			case OP_SDXC1:
497 			case OP_SWXC1:
498 			case OP_PREFX:
499 				return nofpu_emulate_cop1x(p, tf, insn, sv);
500 			default:
501 				break;
502 			}
503 			break;
504 		case OP_MADD:
505 		case OP_MSUB:
506 		case OP_NMADD:
507 		case OP_NMSUB:
508 			return fpu_emulate_cop1x(p, tf, insn);
509 		}
510 	}
511 #endif
512 
513 	return SIGILL;
514 }
515 
516 /*
517  * Emulate a COP1 FPU instruction.
518  */
519 int
fpu_emulate_cop1(struct proc * p,struct trapframe * tf,uint32_t insn)520 fpu_emulate_cop1(struct proc *p, struct trapframe *tf, uint32_t insn)
521 {
522 	InstFmt inst;
523 	uint ft, fs, fd;
524 	fpu_fn3 *fpu_op;
525 	static fpu_fn3 *const fpu_ops1[1 << 6] = {
526 		fpu_add,		/* 0x00 */
527 		fpu_sub,
528 		fpu_mul,
529 		fpu_div,
530 		fpu_sqrt,
531 		fpu_abs,
532 		fpu_mov,
533 		fpu_neg,
534 		fpu_round_l,		/* 0x08 */
535 		fpu_trunc_l,
536 		fpu_ceil_l,
537 		fpu_floor_l,
538 		fpu_round_w,
539 		fpu_trunc_w,
540 		fpu_ceil_w,
541 		fpu_floor_w,
542 		NULL,			/* 0x10 */
543 		fpu_movcf,
544 		fpu_movz,
545 		fpu_movn,
546 		NULL,
547 		fpu_recip,
548 		fpu_rsqrt,
549 		NULL,
550 		NULL,			/* 0x18 */
551 		NULL,
552 		NULL,
553 		NULL,
554 		NULL,
555 		NULL,
556 		NULL,
557 		NULL,
558 		fpu_cvt_s,		/* 0x20 */
559 		fpu_cvt_d,
560 		NULL,
561 		NULL,
562 		fpu_cvt_w,
563 		fpu_cvt_l,
564 		NULL,
565 		NULL,
566 		NULL,			/* 0x28 */
567 		NULL,
568 		NULL,
569 		NULL,
570 		NULL,
571 		NULL,
572 		NULL,
573 		NULL,
574 		(fpu_fn3 *)fpu_c,	/* 0x30 */
575 		(fpu_fn3 *)fpu_c,
576 		(fpu_fn3 *)fpu_c,
577 		(fpu_fn3 *)fpu_c,
578 		(fpu_fn3 *)fpu_c,
579 		(fpu_fn3 *)fpu_c,
580 		(fpu_fn3 *)fpu_c,
581 		(fpu_fn3 *)fpu_c,
582 		(fpu_fn3 *)fpu_c,	/* 0x38 */
583 		(fpu_fn3 *)fpu_c,
584 		(fpu_fn3 *)fpu_c,
585 		(fpu_fn3 *)fpu_c,
586 		(fpu_fn3 *)fpu_c,
587 		(fpu_fn3 *)fpu_c,
588 		(fpu_fn3 *)fpu_c,
589 		(fpu_fn3 *)fpu_c
590 	};
591 
592 	inst = *(InstFmt *)&insn;
593 
594 	/*
595 	 * Check for valid function code.
596 	 */
597 
598 	fpu_op = fpu_ops1[inst.FRType.func];
599 	if (fpu_op == NULL)
600 		return SIGILL;
601 
602 	/*
603 	 * Check for valid format.  FRType assumes bit 25 is always set,
604 	 * so we need to check for it explicitly.
605 	 */
606 
607 	if ((insn & (1 << 25)) == 0)
608 		return SIGILL;
609 	switch (inst.FRType.fmt) {
610 	default:
611 		return SIGILL;
612 	case FMT_S:
613 	case FMT_D:
614 	case FMT_W:
615 	case FMT_L:
616 		break;
617 	}
618 
619 	/*
620 	 * Check for valid register values. Only even-numbered registers
621 	 * can be used if the FR bit is clear in coprocessor 0 status
622 	 * register.
623 	 *
624 	 * Note that c.cond does not specify a register number in the fd
625 	 * field, but the fd field must have zero in its low two bits, so
626 	 * the test will not reject valid c.cond instructions.
627 	 */
628 
629 	ft = inst.FRType.ft;
630 	fs = inst.FRType.fs;
631 	fd = inst.FRType.fd;
632 	if ((tf->sr & SR_FR_32) == 0) {
633 		if ((ft | fs | fd) & 1)
634 			return SIGILL;
635 	}
636 
637 	/*
638 	 * Finally dispatch to the proper routine.
639 	 */
640 
641 	if (fpu_op == (fpu_fn3 *)&fpu_c)
642 		return
643 		    fpu_c(p, tf, inst.FRType.fmt, ft, fs, fd, inst.FRType.func);
644 	else
645 		return (*fpu_op)(p, tf, inst.FRType.fmt, ft, fs, fd);
646 }
647 
648 /*
649  * Emulate a COP1X FPU instruction.
650  */
651 int
fpu_emulate_cop1x(struct proc * p,struct trapframe * tf,uint32_t insn)652 fpu_emulate_cop1x(struct proc *p, struct trapframe *tf, uint32_t insn)
653 {
654 	InstFmt inst;
655 	uint fr, ft, fs, fd;
656 	fpu_fn4 *fpu_op;
657 	static fpu_fn4 *const fpu_ops1x[1 << 3] = {
658 		NULL,
659 		NULL,
660 		NULL,
661 		NULL,
662 		fpu_madd,
663 		fpu_msub,
664 		fpu_nmadd,
665 		fpu_nmsub
666 	};
667 
668 	inst = *(InstFmt *)&insn;
669 
670 	/*
671 	 * Check for valid function code.
672 	 */
673 
674 	fpu_op = fpu_ops1x[inst.FQType.op4];
675 	if (fpu_op == NULL)
676 		return SIGILL;
677 
678 	/*
679 	 * Check for valid format.
680 	 */
681 
682 	switch (inst.FQType.fmt3) {
683 	default:
684 		return SIGILL;
685 	case FMT_S:
686 	case FMT_D:
687 	case FMT_W:
688 	case FMT_L:
689 		break;
690 	}
691 
692 	/*
693 	 * Check for valid register values. Only even-numbered registers
694 	 * can be used if the FR bit is clear in coprocessor 0 status
695 	 * register.
696 	 */
697 
698 	fr = inst.FQType.fr;
699 	ft = inst.FQType.ft;
700 	fs = inst.FQType.fs;
701 	fd = inst.FQType.fd;
702 	if ((tf->sr & SR_FR_32) == 0) {
703 		if ((fr | ft | fs | fd) & 1)
704 			return SIGILL;
705 	}
706 
707 	/*
708 	 * Finally dispatch to the proper routine.
709 	 */
710 
711 	return (*fpu_op)(p, tf, inst.FRType.fmt, fr, ft, fs, fd);
712 }
713 
714 /*
715  * Load a floating-point argument according to the specified format.
716  */
717 uint64_t
fpu_load(struct proc * p,struct trapframe * tf,uint fmt,uint regno)718 fpu_load(struct proc *p, struct trapframe *tf, uint fmt, uint regno)
719 {
720 	uint64_t tmp, tmp2;
721 
722 	tmp = ((uint64_t *)p->p_md.md_regs)[FPBASE + regno];
723 	if (tf->sr & SR_FR_32) {
724 		switch (fmt) {
725 		case FMT_D:
726 		case FMT_L:
727 			break;
728 		case FMT_S:
729 		case FMT_W:
730 			tmp &= 0xffffffff;
731 			break;
732 		}
733 	} else {
734 		tmp &= 0xffffffff;
735 		switch (fmt) {
736 		case FMT_D:
737 		case FMT_L:
738 			/* caller has enforced regno is even */
739 			tmp2 =
740 			    ((uint64_t *)p->p_md.md_regs)[FPBASE + regno + 1];
741 			tmp |= tmp2 << 32;
742 			break;
743 		case FMT_S:
744 		case FMT_W:
745 			break;
746 		}
747 	}
748 
749 	return tmp;
750 }
751 
752 /*
753  * Store a floating-point result according to the specified format.
754  */
755 void
fpu_store(struct proc * p,struct trapframe * tf,uint fmt,uint regno,uint64_t rslt)756 fpu_store(struct proc *p, struct trapframe *tf, uint fmt, uint regno,
757     uint64_t rslt)
758 {
759 	if (tf->sr & SR_FR_32) {
760 		((uint64_t *)p->p_md.md_regs)[FPBASE + regno] = rslt;
761 	} else {
762 		/* caller has enforced regno is even */
763 		((uint64_t *)p->p_md.md_regs)[FPBASE + regno] =
764 		    rslt & 0xffffffff;
765 		((uint64_t *)p->p_md.md_regs)[FPBASE + regno + 1] =
766 		    (rslt >> 32) & 0xffffffff;
767 	}
768 }
769 
770 /*
771  * Integer conversion
772  */
773 
774 int
fpu_int_l(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd,uint rm)775 fpu_int_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
776     uint fd, uint rm)
777 {
778 	uint64_t raw;
779 	uint32_t oldrm;
780 
781 	if (ft != 0)
782 		return SIGILL;
783 	if (fmt != FMT_S && fmt != FMT_D)
784 		return SIGILL;
785 
786 	raw = fpu_load(p, tf, fmt, fs);
787 
788 	/* round towards required mode */
789 	oldrm = tf->fsr & FPCSR_RM_MASK;
790 	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
791 	if (fmt == FMT_S)
792 		raw = float32_to_int64((float32)raw);
793 	else
794 		raw = float64_to_int64((float64)raw);
795 	/* restore rounding mode */
796 	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;
797 
798 	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
799 		fpu_store(p, tf, fmt, fd, raw);
800 
801 	return 0;
802 }
803 
804 int
fpu_int_w(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd,uint rm)805 fpu_int_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
806     uint fd, uint rm)
807 {
808 	uint64_t raw;
809 	uint32_t oldrm;
810 
811 	if (ft != 0)
812 		return SIGILL;
813 	if (fmt != FMT_S && fmt != FMT_D)
814 		return SIGILL;
815 
816 	raw = fpu_load(p, tf, fmt, fs);
817 
818 	/* round towards required mode */
819 	oldrm = tf->fsr & FPCSR_RM_MASK;
820 	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
821 	if (fmt == FMT_S)
822 		raw = float32_to_int32((float32)raw);
823 	else
824 		raw = float64_to_int32((float64)raw);
825 	/* restore rounding mode */
826 	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;
827 
828 	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
829 		fpu_store(p, tf, fmt, fd, raw);
830 
831 	return 0;
832 }
833 
834 /*
835  * FPU Instruction emulation
836  */
837 
838 int
fpu_abs(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)839 fpu_abs(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
840     uint fd)
841 {
842 	uint64_t raw;
843 
844 	if (ft != 0)
845 		return SIGILL;
846 	if (fmt != FMT_S && fmt != FMT_D)
847 		return SIGILL;
848 
849 	raw = fpu_load(p, tf, fmt, fs);
850 	/* clear sign bit unless NaN */
851 	if (fmt == FMT_S) {
852 		float32 f32 = (float32)raw;
853 		if (float32_is_nan(f32)) {
854 			float_set_invalid();
855 		} else {
856 			f32 &= ~(1L << 31);
857 			raw = (uint64_t)f32;
858 		}
859 	} else {
860 		float64 f64 = (float64)raw;
861 		if (float64_is_nan(f64)) {
862 			float_set_invalid();
863 		} else {
864 			f64 &= ~(1L << 63);
865 			raw = (uint64_t)f64;
866 		}
867 	}
868 	fpu_store(p, tf, fmt, fd, raw);
869 
870 	return 0;
871 }
872 
873 int
fpu_add(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)874 fpu_add(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
875     uint fd)
876 {
877 	uint64_t raw1, raw2, rslt;
878 
879 	if (fmt != FMT_S && fmt != FMT_D)
880 		return SIGILL;
881 
882 	raw1 = fpu_load(p, tf, fmt, fs);
883 	raw2 = fpu_load(p, tf, fmt, ft);
884 	if (fmt == FMT_S) {
885 		float32 f32 = float32_add((float32)raw1, (float32)raw2);
886 		rslt = (uint64_t)f32;
887 	} else {
888 		float64 f64 = float64_add((float64)raw1, (float64)raw2);
889 		rslt = (uint64_t)f64;
890 	}
891 	fpu_store(p, tf, fmt, fd, rslt);
892 
893 	return 0;
894 }
895 
896 int
fpu_c(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd,uint op)897 fpu_c(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
898     uint fd, uint op)
899 {
900 	uint64_t raw1, raw2;
901 	uint cc, lt, eq, uo;
902 
903 	if ((fd & 0x03) != 0)
904 		return SIGILL;
905 	if (fmt != FMT_S && fmt != FMT_D)
906 		return SIGILL;
907 
908 	lt = eq = uo = 0;
909 	cc = fd >> 2;
910 
911 	raw1 = fpu_load(p, tf, fmt, fs);
912 	raw2 = fpu_load(p, tf, fmt, ft);
913 
914 	if (fmt == FMT_S) {
915 		float32 f32a = (float32)raw1;
916 		float32 f32b = (float32)raw2;
917 		if (float32_is_nan(f32a)) {
918 			uo = 1 << 0;
919 			if (float32_is_signaling_nan(f32a))
920 				op |= 0x08;	/* force invalid exception */
921 		}
922 		if (float32_is_nan(f32b)) {
923 			uo = 1 << 0;
924 			if (float32_is_signaling_nan(f32b))
925 				op |= 0x08;	/* force invalid exception */
926 		}
927 		if (uo == 0) {
928 			if (float32_eq(f32a, f32b))
929 				eq = 1 << 1;
930 			else if (float32_lt(f32a, f32b))
931 				lt = 1 << 2;
932 		}
933 	} else {
934 		float64 f64a = (float64)raw1;
935 		float64 f64b = (float64)raw2;
936 		if (float64_is_nan(f64a)) {
937 			uo = 1 << 0;
938 			if (float64_is_signaling_nan(f64a))
939 				op |= 0x08;	/* force invalid exception */
940 		}
941 		if (float64_is_nan(f64b)) {
942 			uo = 1 << 0;
943 			if (float64_is_signaling_nan(f64b))
944 				op |= 0x08;	/* force invalid exception */
945 		}
946 		if (uo == 0) {
947 			if (float64_eq(f64a, f64b))
948 				eq = 1 << 1;
949 			else if (float64_lt(f64a, f64b))
950 				lt = 1 << 2;
951 		}
952 	}
953 
954 	if (uo && (op & 0x08)) {
955 		float_set_invalid();
956 		if (tf->fsr & FPCSR_E_V) {
957 			/* comparison result intentionally not written */
958 			goto skip;
959 		}
960 	}
961 
962 	if ((uo | eq | lt) & op)
963 		tf->fsr |= FPCSR_CONDVAL(cc);
964 	else
965 		tf->fsr &= ~FPCSR_CONDVAL(cc);
966 skip:
967 
968 	return 0;
969 }
970 
971 int
fpu_ceil_l(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)972 fpu_ceil_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
973     uint fd)
974 {
975 	/* round towards positive infinity */
976 	return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RP);
977 }
978 
979 int
fpu_ceil_w(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)980 fpu_ceil_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
981     uint fd)
982 {
983 	/* round towards positive infinity */
984 	return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RP);
985 }
986 
987 int
fpu_cvt_d(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)988 fpu_cvt_d(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
989     uint fd)
990 {
991 	uint64_t raw;
992 
993 	if (ft != 0)
994 		return SIGILL;
995 	if (fmt == FMT_D)
996 		return SIGILL;
997 
998 	raw = fpu_load(p, tf, fmt, fs);
999 	switch (fmt) {
1000 	case FMT_L:
1001 		raw = int64_to_float64((int64_t)raw);
1002 		break;
1003 	case FMT_S:
1004 		raw = float32_to_float64((float32)raw);
1005 		break;
1006 	case FMT_W:
1007 		raw = int32_to_float64((int32_t)raw);
1008 		break;
1009 	}
1010 	fpu_store(p, tf, fmt, fd, raw);
1011 
1012 	return 0;
1013 }
1014 
1015 int
fpu_cvt_l(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1016 fpu_cvt_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1017     uint fd)
1018 {
1019 	uint64_t raw;
1020 	uint32_t rm;
1021 
1022 	if (ft != 0)
1023 		return SIGILL;
1024 	if (fmt != FMT_S && fmt != FMT_D)
1025 		return SIGILL;
1026 
1027 	rm = tf->fsr & FPCSR_RM_MASK;
1028 	raw = fpu_load(p, tf, fmt, fs);
1029 	if (fmt == FMT_D) {
1030 		if (rm == FP_RZ)
1031 			raw = float64_to_int64_round_to_zero((float64)raw);
1032 		else
1033 			raw = float64_to_int64((float64)raw);
1034 	} else {
1035 		if (rm == FP_RZ)
1036 			raw = float32_to_int64_round_to_zero((float32)raw);
1037 		else
1038 			raw = float32_to_int64((float32)raw);
1039 	}
1040 	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
1041 		fpu_store(p, tf, fmt, fd, raw);
1042 
1043 	return 0;
1044 }
1045 
1046 int
fpu_cvt_s(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1047 fpu_cvt_s(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1048     uint fd)
1049 {
1050 	uint64_t raw;
1051 
1052 	if (ft != 0)
1053 		return SIGILL;
1054 	if (fmt == FMT_S)
1055 		return SIGILL;
1056 
1057 	raw = fpu_load(p, tf, fmt, fs);
1058 	switch (fmt) {
1059 	case FMT_D:
1060 		raw = float64_to_float32((float64)raw);
1061 		break;
1062 	case FMT_L:
1063 		raw = int64_to_float32((int64_t)raw);
1064 		break;
1065 	case FMT_W:
1066 		raw = int32_to_float32((int32_t)raw);
1067 		break;
1068 	}
1069 	fpu_store(p, tf, fmt, fd, raw);
1070 
1071 	return 0;
1072 }
1073 
1074 int
fpu_cvt_w(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1075 fpu_cvt_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1076     uint fd)
1077 {
1078 	uint64_t raw;
1079 	uint32_t rm;
1080 
1081 	if (ft != 0)
1082 		return SIGILL;
1083 	if (fmt != FMT_S && fmt != FMT_D)
1084 		return SIGILL;
1085 
1086 	rm = tf->fsr & FPCSR_RM_MASK;
1087 	raw = fpu_load(p, tf, fmt, fs);
1088 	if (fmt == FMT_D) {
1089 		if (rm == FP_RZ)
1090 			raw = float64_to_int32_round_to_zero((float64)raw);
1091 		else
1092 			raw = float64_to_int32((float64)raw);
1093 	} else {
1094 		if (rm == FP_RZ)
1095 			raw = float32_to_int32_round_to_zero((float32)raw);
1096 		else
1097 			raw = float32_to_int32((float32)raw);
1098 	}
1099 	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
1100 		fpu_store(p, tf, fmt, fd, raw);
1101 
1102 	return 0;
1103 }
1104 
1105 int
fpu_div(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1106 fpu_div(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1107     uint fd)
1108 {
1109 	uint64_t raw1, raw2, rslt;
1110 
1111 	if (fmt != FMT_S && fmt != FMT_D)
1112 		return SIGILL;
1113 
1114 	raw1 = fpu_load(p, tf, fmt, fs);
1115 	raw2 = fpu_load(p, tf, fmt, ft);
1116 	if (fmt == FMT_S) {
1117 		float32 f32 = float32_div((float32)raw1, (float32)raw2);
1118 		rslt = (uint64_t)f32;
1119 	} else {
1120 		float64 f64 = float64_div((float64)raw1, (float64)raw2);
1121 		rslt = (uint64_t)f64;
1122 	}
1123 	fpu_store(p, tf, fmt, fd, rslt);
1124 
1125 	return 0;
1126 }
1127 
1128 int
fpu_floor_l(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1129 fpu_floor_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1130     uint fd)
1131 {
1132 	/* round towards negative infinity */
1133 	return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RM);
1134 }
1135 
1136 int
fpu_floor_w(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1137 fpu_floor_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1138     uint fd)
1139 {
1140 	/* round towards negative infinity */
1141 	return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RM);
1142 }
1143 
1144 int
fpu_madd(struct proc * p,struct trapframe * tf,uint fmt,uint fr,uint ft,uint fs,uint fd)1145 fpu_madd(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
1146     uint fs, uint fd)
1147 {
1148 	uint64_t raw1, raw2, raw3, rslt;
1149 
1150 	if (fmt != FMT_S && fmt != FMT_D)
1151 		return SIGILL;
1152 
1153 	raw1 = fpu_load(p, tf, fmt, fs);
1154 	raw2 = fpu_load(p, tf, fmt, ft);
1155 	raw3 = fpu_load(p, tf, fmt, fr);
1156 	if (fmt == FMT_S) {
1157 		float32 f32 = float32_add(
1158 		    float32_mul((float32)raw1, (float32)raw2),
1159 		    (float32)raw3);
1160 		rslt = (uint64_t)f32;
1161 	} else {
1162 		float64 f64 = float64_add(
1163 		    float64_mul((float64)raw1, (float64)raw2),
1164 		    (float64)raw3);
1165 		rslt = (uint64_t)f64;
1166 	}
1167 	fpu_store(p, tf, fmt, fd, rslt);
1168 
1169 	return 0;
1170 }
1171 
1172 int
fpu_mov(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1173 fpu_mov(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1174     uint fd)
1175 {
1176 	uint64_t raw;
1177 
1178 	if (ft != 0)
1179 		return SIGILL;
1180 	if (fmt != FMT_S && fmt != FMT_D)
1181 		return SIGILL;
1182 
1183 	raw = fpu_load(p, tf, fmt, fs);
1184 	fpu_store(p, tf, fmt, fd, raw);
1185 
1186 	return 0;
1187 }
1188 
1189 int
fpu_movcf(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1190 fpu_movcf(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1191     uint fd)
1192 {
1193 	uint64_t raw;
1194 	uint cc, istf;
1195 	int condition;
1196 
1197 	if ((ft & 0x02) != 0)
1198 		return SIGILL;
1199 	cc = ft >> 2;
1200 	if (fmt != FMT_S && fmt != FMT_D)
1201 		return SIGILL;
1202 
1203 	condition = tf->fsr & FPCSR_CONDVAL(cc);
1204 	istf = ft & COPz_BC_TF_MASK;
1205 	if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {
1206 		raw = fpu_load(p, tf, fmt, fs);
1207 		fpu_store(p, tf, fmt, fd, raw);
1208 	}
1209 
1210 	return 0;
1211 }
1212 
1213 int
fpu_movn(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1214 fpu_movn(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1215     uint fd)
1216 {
1217 	register_t *regs = (register_t *)tf;
1218 	uint64_t raw;
1219 
1220 	if (fmt != FMT_S && fmt != FMT_D)
1221 		return SIGILL;
1222 
1223 	if (ft != ZERO && regs[ft] != 0) {
1224 		raw = fpu_load(p, tf, fmt, fs);
1225 		fpu_store(p, tf, fmt, fd, raw);
1226 	}
1227 
1228 	return 0;
1229 }
1230 
1231 int
fpu_movz(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1232 fpu_movz(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1233     uint fd)
1234 {
1235 	register_t *regs = (register_t *)tf;
1236 	uint64_t raw;
1237 
1238 	if (fmt != FMT_S && fmt != FMT_D)
1239 		return SIGILL;
1240 
1241 	if (ft == ZERO || regs[ft] == 0) {
1242 		raw = fpu_load(p, tf, fmt, fs);
1243 		fpu_store(p, tf, fmt, fd, raw);
1244 	}
1245 
1246 	return 0;
1247 }
1248 
1249 int
fpu_msub(struct proc * p,struct trapframe * tf,uint fmt,uint fr,uint ft,uint fs,uint fd)1250 fpu_msub(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
1251     uint fs, uint fd)
1252 {
1253 	uint64_t raw1, raw2, raw3, rslt;
1254 
1255 	if (fmt != FMT_S && fmt != FMT_D)
1256 		return SIGILL;
1257 
1258 	raw1 = fpu_load(p, tf, fmt, fs);
1259 	raw2 = fpu_load(p, tf, fmt, ft);
1260 	raw3 = fpu_load(p, tf, fmt, fr);
1261 	if (fmt == FMT_S) {
1262 		float32 f32 = float32_sub(
1263 		    float32_mul((float32)raw1, (float32)raw2),
1264 		    (float32)raw3);
1265 		rslt = (uint64_t)f32;
1266 	} else {
1267 		float64 f64 = float64_sub(
1268 		    float64_mul((float64)raw1, (float64)raw2),
1269 		    (float64)raw3);
1270 		rslt = (uint64_t)f64;
1271 	}
1272 	fpu_store(p, tf, fmt, fd, rslt);
1273 
1274 	return 0;
1275 }
1276 
1277 int
fpu_mul(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1278 fpu_mul(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1279     uint fd)
1280 {
1281 	uint64_t raw1, raw2, rslt;
1282 
1283 	if (fmt != FMT_S && fmt != FMT_D)
1284 		return SIGILL;
1285 
1286 	raw1 = fpu_load(p, tf, fmt, fs);
1287 	raw2 = fpu_load(p, tf, fmt, ft);
1288 	if (fmt == FMT_S) {
1289 		float32 f32 = float32_mul((float32)raw1, (float32)raw2);
1290 		rslt = (uint64_t)f32;
1291 	} else {
1292 		float64 f64 = float64_mul((float64)raw1, (float64)raw2);
1293 		rslt = (uint64_t)f64;
1294 	}
1295 	fpu_store(p, tf, fmt, fd, rslt);
1296 
1297 	return 0;
1298 }
1299 
1300 int
fpu_neg(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1301 fpu_neg(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1302     uint fd)
1303 {
1304 	uint64_t raw;
1305 
1306 	if (ft != 0)
1307 		return SIGILL;
1308 	if (fmt != FMT_S && fmt != FMT_D)
1309 		return SIGILL;
1310 
1311 	raw = fpu_load(p, tf, fmt, fs);
1312 	/* flip sign bit unless NaN */
1313 	if (fmt == FMT_S) {
1314 		float32 f32 = (float32)raw;
1315 		if (float32_is_nan(f32)) {
1316 			float_set_invalid();
1317 		} else {
1318 			f32 ^= 1L << 31;
1319 			raw = (uint64_t)f32;
1320 		}
1321 	} else {
1322 		float64 f64 = (float64)raw;
1323 		if (float64_is_nan(f64)) {
1324 			float_set_invalid();
1325 		} else {
1326 			f64 ^= 1L << 63;
1327 			raw = (uint64_t)f64;
1328 		}
1329 	}
1330 	fpu_store(p, tf, fmt, fd, raw);
1331 
1332 	return 0;
1333 }
1334 
1335 int
fpu_nmadd(struct proc * p,struct trapframe * tf,uint fmt,uint fr,uint ft,uint fs,uint fd)1336 fpu_nmadd(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
1337     uint fs, uint fd)
1338 {
1339 	uint64_t raw1, raw2, raw3, rslt;
1340 
1341 	if (fmt != FMT_S && fmt != FMT_D)
1342 		return SIGILL;
1343 
1344 	raw1 = fpu_load(p, tf, fmt, fs);
1345 	raw2 = fpu_load(p, tf, fmt, ft);
1346 	raw3 = fpu_load(p, tf, fmt, fr);
1347 	if (fmt == FMT_S) {
1348 		float32 f32 = float32_add(
1349 		    float32_mul((float32)raw1, (float32)raw2),
1350 		    (float32)raw3);
1351 		if (float32_is_nan(f32))
1352 			float_set_invalid();
1353 		else
1354 			f32 ^= 1L << 31;
1355 		rslt = (uint64_t)f32;
1356 	} else {
1357 		float64 f64 = float64_add(
1358 		    float64_mul((float64)raw1, (float64)raw2),
1359 		    (float64)raw3);
1360 		if (float64_is_nan(f64))
1361 			float_set_invalid();
1362 		else
1363 			f64 ^= 1L << 63;
1364 		rslt = (uint64_t)f64;
1365 	}
1366 	fpu_store(p, tf, fmt, fd, rslt);
1367 
1368 	return 0;
1369 }
1370 
1371 int
fpu_nmsub(struct proc * p,struct trapframe * tf,uint fmt,uint fr,uint ft,uint fs,uint fd)1372 fpu_nmsub(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
1373     uint fs, uint fd)
1374 {
1375 	uint64_t raw1, raw2, raw3, rslt;
1376 
1377 	if (fmt != FMT_S && fmt != FMT_D)
1378 		return SIGILL;
1379 
1380 	raw1 = fpu_load(p, tf, fmt, fs);
1381 	raw2 = fpu_load(p, tf, fmt, ft);
1382 	raw3 = fpu_load(p, tf, fmt, fr);
1383 	if (fmt == FMT_S) {
1384 		float32 f32 = float32_sub(
1385 		    float32_mul((float32)raw1, (float32)raw2),
1386 		    (float32)raw3);
1387 		if (float32_is_nan(f32))
1388 			float_set_invalid();
1389 		else
1390 			f32 ^= 1L << 31;
1391 		rslt = (uint64_t)f32;
1392 	} else {
1393 		float64 f64 = float64_sub(
1394 		    float64_mul((float64)raw1, (float64)raw2),
1395 		    (float64)raw3);
1396 		if (float64_is_nan(f64))
1397 			float_set_invalid();
1398 		else
1399 			f64 ^= 1L << 63;
1400 		rslt = (uint64_t)f64;
1401 	}
1402 	fpu_store(p, tf, fmt, fd, rslt);
1403 
1404 	return 0;
1405 }
1406 
1407 int
fpu_recip(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1408 fpu_recip(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1409     uint fd)
1410 {
1411 	uint64_t raw;
1412 
1413 	if (ft != 0)
1414 		return SIGILL;
1415 	if (fmt != FMT_S && fmt != FMT_D)
1416 		return SIGILL;
1417 
1418 	raw = fpu_load(p, tf, fmt, fs);
1419 	if (fmt == FMT_S) {
1420 		float32 f32 = float32_div(ONE_F32, (float32)raw);
1421 		raw = (uint64_t)f32;
1422 	} else {
1423 		float64 f64 = float64_div(ONE_F64, (float64)raw);
1424 		raw = (uint64_t)f64;
1425 	}
1426 	fpu_store(p, tf, fmt, fd, raw);
1427 
1428 	return 0;
1429 }
1430 
1431 int
fpu_round_l(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1432 fpu_round_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1433     uint fd)
1434 {
1435 	/* round towards nearest */
1436 	return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RN);
1437 }
1438 
1439 int
fpu_round_w(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1440 fpu_round_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1441     uint fd)
1442 {
1443 	/* round towards nearest */
1444 	return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RN);
1445 }
1446 
1447 int
fpu_rsqrt(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1448 fpu_rsqrt(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1449     uint fd)
1450 {
1451 	uint64_t raw;
1452 
1453 	if (ft != 0)
1454 		return SIGILL;
1455 	if (fmt != FMT_S && fmt != FMT_D)
1456 		return SIGILL;
1457 
1458 	raw = fpu_load(p, tf, fmt, fs);
1459 	if (fmt == FMT_S) {
1460 		float32 f32 = float32_sqrt((float32)raw);
1461 		if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
1462 		    (FPCSR_C_V | FPCSR_E_V))
1463 			f32 = float32_div(ONE_F32, f32);
1464 		raw = (uint64_t)f32;
1465 	} else {
1466 		float64 f64 = float64_sqrt((float64)raw);
1467 		if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
1468 		    (FPCSR_C_V | FPCSR_E_V))
1469 			f64 = float64_div(ONE_F64, f64);
1470 		raw = (uint64_t)f64;
1471 	}
1472 	fpu_store(p, tf, fmt, fd, raw);
1473 
1474 	return 0;
1475 }
1476 
1477 int
fpu_sqrt(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1478 fpu_sqrt(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1479     uint fd)
1480 {
1481 	uint64_t raw;
1482 
1483 	if (ft != 0)
1484 		return SIGILL;
1485 	if (fmt != FMT_S && fmt != FMT_D)
1486 		return SIGILL;
1487 
1488 	raw = fpu_load(p, tf, fmt, fs);
1489 	if (fmt == FMT_S) {
1490 		float32 f32 = float32_sqrt((float32)raw);
1491 		raw = (uint64_t)f32;
1492 	} else {
1493 		float64 f64 = float64_sqrt((float64)raw);
1494 		raw = (uint64_t)f64;
1495 	}
1496 	fpu_store(p, tf, fmt, fd, raw);
1497 
1498 	return 0;
1499 }
1500 
1501 int
fpu_sub(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1502 fpu_sub(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1503     uint fd)
1504 {
1505 	uint64_t raw1, raw2, rslt;
1506 
1507 	if (fmt != FMT_S && fmt != FMT_D)
1508 		return SIGILL;
1509 
1510 	raw1 = fpu_load(p, tf, fmt, fs);
1511 	raw2 = fpu_load(p, tf, fmt, ft);
1512 	if (fmt == FMT_S) {
1513 		float32 f32 = float32_sub((float32)raw1, (float32)raw2);
1514 		rslt = (uint64_t)f32;
1515 	} else {
1516 		float64 f64 = float64_sub((float64)raw1, (float64)raw2);
1517 		rslt = (uint64_t)f64;
1518 	}
1519 	fpu_store(p, tf, fmt, fd, rslt);
1520 
1521 	return 0;
1522 }
1523 
1524 int
fpu_trunc_l(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1525 fpu_trunc_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1526     uint fd)
1527 {
1528 	/* round towards zero */
1529 	return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RZ);
1530 }
1531 
1532 int
fpu_trunc_w(struct proc * p,struct trapframe * tf,uint fmt,uint ft,uint fs,uint fd)1533 fpu_trunc_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1534     uint fd)
1535 {
1536 	/* round towards zero */
1537 	return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RZ);
1538 }
1539 
1540 #ifdef FPUEMUL
1541 
1542 /*
1543  * Emulate a COP1 non-FPU instruction.
1544  */
1545 int
nofpu_emulate_cop1(struct proc * p,struct trapframe * tf,uint32_t insn,union sigval * sv)1546 nofpu_emulate_cop1(struct proc *p, struct trapframe *tf, uint32_t insn,
1547     union sigval *sv)
1548 {
1549 	register_t *regs = (register_t *)tf;
1550 	InstFmt inst;
1551 	int32_t cval;
1552 
1553 	inst = *(InstFmt *)&insn;
1554 
1555 	switch (inst.RType.rs) {
1556 	case OP_MF:
1557 		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1558 			return SIGILL;
1559 		if (inst.FRType.ft != ZERO)
1560 			regs[inst.FRType.ft] = (int32_t)
1561 			    ((uint64_t *)p->p_md.md_regs)
1562 			      [FPBASE + inst.FRType.fs];
1563 		break;
1564 	case OP_DMF:
1565 		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1566 			return SIGILL;
1567 		if ((tf->sr & SR_FR_32) != 0 || (inst.FRType.fs & 1) == 0) {
1568 			if (inst.FRType.ft != ZERO)
1569 				regs[inst.FRType.ft] =
1570 				    fpu_load(p, tf, FMT_L, inst.FRType.fs);
1571 		}
1572 		break;
1573 	case OP_CF:
1574 		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1575 			return SIGILL;
1576 		if (inst.FRType.ft != ZERO) {
1577 			switch (inst.FRType.fs) {
1578 			case 0:	/* FPC_ID */
1579 				cval = MIPS_SOFT << 8;
1580 				break;
1581 			case 31: /* FPC_CSR */
1582 				cval = (int32_t)tf->fsr;
1583 				break;
1584 			default:
1585 				cval = 0;
1586 				break;
1587 			}
1588 			regs[inst.FRType.ft] = (int64_t)cval;
1589 		}
1590 		break;
1591 	case OP_MT:
1592 		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1593 			return SIGILL;
1594 		((uint64_t *)p->p_md.md_regs)[FPBASE + inst.FRType.fs] =
1595 		    (int32_t)regs[inst.FRType.ft];
1596 		break;
1597 	case OP_DMT:
1598 		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1599 			return SIGILL;
1600 		if ((tf->sr & SR_FR_32) != 0 || (inst.FRType.fs & 1) == 0) {
1601 			fpu_store(p, tf, FMT_L, inst.FRType.fs,
1602 			    regs[inst.FRType.ft]);
1603 		}
1604 		break;
1605 	case OP_CT:
1606 		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1607 			return SIGILL;
1608 		cval = (int32_t)regs[inst.FRType.ft];
1609 		switch (inst.FRType.fs) {
1610 		case 31: /* FPC_CSR */
1611 			cval &= ~FPCSR_C_E;
1612 			tf->fsr = cval;
1613 			break;
1614 		case 0:	/* FPC_ID */
1615 		default:
1616 			break;
1617 		}
1618 		break;
1619 	case OP_BC:
1620 	   {
1621 		uint cc, nd, istf;
1622 		int condition;
1623 		vaddr_t dest;
1624 		uint32_t dinsn;
1625 
1626 		cc = (inst.RType.rt & COPz_BC_CC_MASK) >> COPz_BC_CC_SHIFT;
1627 		nd = inst.RType.rt & COPz_BCL_TF_MASK;
1628 		istf = inst.RType.rt & COPz_BC_TF_MASK;
1629 		condition = tf->fsr & FPCSR_CONDVAL(cc);
1630 		if ((!condition && !istf) /*bc1f*/ ||
1631 		    (condition && istf) /*bc1t*/) {
1632 			/*
1633 			 * Branch taken: if the delay slot is not a nop,
1634 			 * copy the delay slot instruction to the dedicated
1635 			 * relocation page, in order to be able to have the
1636 			 * cpu process it and give control back to the
1637 			 * kernel, for us to redirect to the branch
1638 			 * destination.
1639 			 */
1640 			/* inline MipsEmulateBranch(tf, tf->pc, tf->fsr, insn)*/
1641 			dest = tf->pc + 4 + ((short)inst.IType.imm << 2);
1642 			if (copyinsn(p, tf->pc + 4, &dinsn) != 0) {
1643 				sv->sival_ptr = (void *)(tf->pc + 4);
1644 				return SIGSEGV;
1645 			}
1646 			if (dinsn == 0x00000000 /* nop */ ||
1647 			    dinsn == 0x00000040 /* ssnop */) {
1648 				tf->pc = dest;
1649 			} else {
1650 				if (fpe_branch_emulate(curproc, tf, dinsn,
1651 				    dest) != 0)
1652 					return SIGILL;
1653 			}
1654 		} else {
1655 			/*
1656 			 * Branch not taken: skip the instruction, and
1657 			 * skip the delay slot if it was a `branch likely'
1658 			 * instruction.
1659 			 */
1660 			tf->pc += 4;
1661 			if (nd)
1662 				tf->pc += 4;
1663 		}
1664 	    }
1665 		break;
1666 	}
1667 
1668 	return 0;
1669 }
1670 
1671 /*
1672  * Emulate a COP1X non-FPU instruction.
1673  */
1674 int
nofpu_emulate_cop1x(struct proc * p,struct trapframe * tf,uint32_t insn,union sigval * sv)1675 nofpu_emulate_cop1x(struct proc *p, struct trapframe *tf, uint32_t insn,
1676     union sigval *sv)
1677 {
1678 	register_t *regs = (register_t *)tf;
1679 	InstFmt inst;
1680 	vaddr_t va;
1681 	uint64_t ddata;
1682 	uint32_t wdata;
1683 
1684 	inst = *(InstFmt *)&insn;
1685 	switch (inst.FRType.func) {
1686 	case OP_LDXC1:
1687 		if (inst.FQType.fs != 0)
1688 			return SIGILL;
1689 		va = (vaddr_t)regs[inst.FQType.fr] +
1690 		    (vaddr_t)regs[inst.FQType.ft];
1691 		if ((va & 0x07) != 0) {
1692 			sv->sival_ptr = (void *)va;
1693 			return SIGBUS;
1694 		}
1695 		if (copyin((const void *)va, &ddata, sizeof ddata) != 0) {
1696 			sv->sival_ptr = (void *)va;
1697 			return SIGSEGV;
1698 		}
1699 		if ((tf->sr & SR_FR_32) != 0 || (inst.FQType.fd & 1) == 0)
1700 			fpu_store(p, tf, FMT_L, inst.FQType.fd, ddata);
1701 		break;
1702 	case OP_LWXC1:
1703 		if (inst.FQType.fs != 0)
1704 			return SIGILL;
1705 		va = (vaddr_t)regs[inst.FQType.fr] +
1706 		    (vaddr_t)regs[inst.FQType.ft];
1707 		if ((va & 0x03) != 0) {
1708 			sv->sival_ptr = (void *)va;
1709 			return SIGBUS;
1710 		}
1711 		if (copyin((const void *)va, &wdata, sizeof wdata) != 0) {
1712 			sv->sival_ptr = (void *)va;
1713 			return SIGSEGV;
1714 		}
1715 		((uint64_t *)p->p_md.md_regs)[FPBASE + inst.FQType.fd] = wdata;
1716 		break;
1717 	case OP_SDXC1:
1718 		if (inst.FQType.fd != 0)
1719 			return SIGILL;
1720 		va = (vaddr_t)regs[inst.FQType.fr] +
1721 		    (vaddr_t)regs[inst.FQType.ft];
1722 		if ((va & 0x07) != 0) {
1723 			sv->sival_ptr = (void *)va;
1724 			return SIGBUS;
1725 		}
1726 		if ((tf->sr & SR_FR_32) != 0 || (inst.FQType.fs & 1) == 0)
1727 			ddata = fpu_load(p, tf, FMT_L, inst.FQType.fs);
1728 		else {
1729 			/* undefined behaviour, don't expose stack content */
1730 			ddata = 0;
1731 		}
1732 		if (copyout(&ddata, (void *)va, sizeof ddata) != 0) {
1733 			sv->sival_ptr = (void *)va;
1734 			return SIGSEGV;
1735 		}
1736 		break;
1737 	case OP_SWXC1:
1738 		if (inst.FQType.fd != 0)
1739 			return SIGILL;
1740 		va = (vaddr_t)regs[inst.FQType.fr] +
1741 		    (vaddr_t)regs[inst.FQType.ft];
1742 		if ((va & 0x03) != 0) {
1743 			sv->sival_ptr = (void *)va;
1744 			return SIGBUS;
1745 		}
1746 		wdata = ((uint64_t *)p->p_md.md_regs)[FPBASE + inst.FQType.fs];
1747 		if (copyout(&wdata, (void *)va, sizeof wdata) != 0) {
1748 			sv->sival_ptr = (void *)va;
1749 			return SIGSEGV;
1750 		}
1751 		break;
1752 	case OP_PREFX:
1753 		/* nothing to do */
1754 		break;
1755 	}
1756 
1757 	return 0;
1758 }
1759 
1760 /*
1761  * Emulate a load/store instruction on FPU registers.
1762  */
1763 int
nofpu_emulate_loadstore(struct proc * p,struct trapframe * tf,uint32_t insn,union sigval * sv)1764 nofpu_emulate_loadstore(struct proc *p, struct trapframe *tf, uint32_t insn,
1765     union sigval *sv)
1766 {
1767 	register_t *regs = (register_t *)tf;
1768 	InstFmt inst;
1769 	vaddr_t va;
1770 	uint64_t ddata;
1771 	uint32_t wdata;
1772 
1773 	inst = *(InstFmt *)&insn;
1774 	switch (inst.IType.op) {
1775 	case OP_LDC1:
1776 		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1777 		if ((va & 0x07) != 0) {
1778 			sv->sival_ptr = (void *)va;
1779 			return SIGBUS;
1780 		}
1781 		if (copyin((const void *)va, &ddata, sizeof ddata) != 0) {
1782 			sv->sival_ptr = (void *)va;
1783 			return SIGSEGV;
1784 		}
1785 		if ((tf->sr & SR_FR_32) != 0 || (inst.IType.rt & 1) == 0)
1786 			fpu_store(p, tf, FMT_L, inst.IType.rt, ddata);
1787 		break;
1788 	case OP_LWC1:
1789 		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1790 		if ((va & 0x03) != 0) {
1791 			sv->sival_ptr = (void *)va;
1792 			return SIGBUS;
1793 		}
1794 		if (copyin((const void *)va, &wdata, sizeof wdata) != 0) {
1795 			sv->sival_ptr = (void *)va;
1796 			return SIGSEGV;
1797 		}
1798 		((uint64_t *)p->p_md.md_regs)[FPBASE + inst.IType.rt] = wdata;
1799 		break;
1800 	case OP_SDC1:
1801 		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1802 		if ((va & 0x07) != 0) {
1803 			sv->sival_ptr = (void *)va;
1804 			return SIGBUS;
1805 		}
1806 		if ((tf->sr & SR_FR_32) != 0 || (inst.IType.rt & 1) == 0)
1807 			ddata = fpu_load(p, tf, FMT_L, inst.IType.rt);
1808 		else {
1809 			/* undefined behaviour, don't expose stack content */
1810 			ddata = 0;
1811 		}
1812 		if (copyout(&ddata, (void *)va, sizeof ddata) != 0) {
1813 			sv->sival_ptr = (void *)va;
1814 			return SIGSEGV;
1815 		}
1816 		break;
1817 	case OP_SWC1:
1818 		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1819 		if ((va & 0x03) != 0) {
1820 			sv->sival_ptr = (void *)va;
1821 			return SIGBUS;
1822 		}
1823 		wdata = ((uint64_t *)p->p_md.md_regs)[FPBASE + inst.IType.rt];
1824 		if (copyout(&wdata, (void *)va, sizeof wdata) != 0) {
1825 			sv->sival_ptr = (void *)va;
1826 			return SIGSEGV;
1827 		}
1828 		break;
1829 	}
1830 
1831 	return 0;
1832 }
1833 
1834 /*
1835  * Emulate MOVF and MOVT.
1836  */
1837 int
nofpu_emulate_movci(struct trapframe * tf,uint32_t insn)1838 nofpu_emulate_movci(struct trapframe *tf, uint32_t insn)
1839 {
1840 	register_t *regs = (register_t *)tf;
1841 	InstFmt inst;
1842 	uint cc, istf;
1843 	int condition;
1844 
1845 	inst = *(InstFmt *)&insn;
1846 	if ((inst.RType.rt & 0x02) != 0 || inst.RType.shamt != 0)
1847 		return SIGILL;
1848 
1849 	cc = inst.RType.rt >> 2;
1850 	istf = inst.RType.rt & COPz_BC_TF_MASK;
1851 	condition = tf->fsr & FPCSR_CONDVAL(cc);
1852 	if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {
1853 		if (inst.RType.rd != ZERO)
1854 			regs[inst.RType.rd] = regs[inst.RType.rs];
1855 	}
1856 
1857 	return 0;
1858 }
1859 
1860 #endif	/* FPUEMUL */
1861