1 /* $NetBSD: fpu.c,v 1.23 2011/01/23 09:44:59 skrll Exp $ */ 2 3 /* 4 * Copyright (c) 2002 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matthew Fredette. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * FPU handling for NetBSD/hppa. 34 */ 35 36 #include <sys/cdefs.h> 37 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.23 2011/01/23 09:44:59 skrll Exp $"); 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/signalvar.h> 43 44 #include <uvm/uvm_extern.h> 45 46 #include <machine/cpufunc.h> 47 #include <machine/frame.h> 48 #include <machine/reg.h> 49 #include <machine/pcb.h> 50 #include <machine/pmap.h> 51 52 #include <hppa/hppa/machdep.h> 53 54 #include "../spmath/float.h" 55 #include "../spmath/fpudispatch.h" 56 57 /* Some macros representing opcodes. */ 58 #define OPCODE_NOP 0x08000240 59 #define OPCODE_COPR_0_0 0x30000000 60 61 /* Some macros representing fields in load/store opcodes. */ 62 #define OPCODE_CMPLT_S 0x00002000 63 #define OPCODE_CMPLT_M 0x00000020 64 #define OPCODE_CMPLT_SM (OPCODE_CMPLT_S | OPCODE_CMPLT_M) 65 #define OPCODE_CMPLT_MB OPCODE_CMPLT_M 66 #define OPCODE_CMPLT_MA (OPCODE_CMPLT_S | OPCODE_CMPLT_M) 67 #define OPCODE_CMPLT (OPCODE_CMPLT_S | OPCODE_CMPLT_M) 68 #define OPCODE_DOUBLE 0x08000000 69 #define OPCODE_STORE 0x00000200 70 #define OPCODE_INDEXED 0x00001000 71 72 /* This is nonzero iff we're using a hardware FPU. */ 73 int fpu_present; 74 75 /* If we have any FPU, this is its version. */ 76 u_int fpu_version; 77 78 /* The number of times we have had to switch the FPU context. */ 79 u_int fpu_csw; 80 81 /* In locore.S, this swaps states in and out of the FPU. */ 82 void hppa_fpu_swapout(struct pcb *); 83 void hppa_fpu_swap(struct fpreg *, struct fpreg *); 84 85 #ifdef FPEMUL 86 /* 87 * Given a trapframe and a general register number, the 88 * FRAME_REG macro returns a pointer to that general 89 * register. The _frame_reg_positions array is a lookup 90 * table, since the general registers aren't in order 91 * in a trapframe. 92 * 93 * NB: this more or less assumes that all members of 94 * struct trapframe are u_ints. 95 */ 96 #define FRAME_REG(f, reg, r0) \ 97 ((reg) == 0 ? (&r0) : ((&(f)->tf_t1) + _frame_reg_positions[reg])) 98 #define _FRAME_POSITION(f) \ 99 ((&((struct trapframe *) 0)->f) - (&((struct trapframe *) 0)->tf_t1)) 100 const int _frame_reg_positions[32] = { 101 -1, /* r0 */ 102 _FRAME_POSITION(tf_r1), 103 _FRAME_POSITION(tf_rp), /* r2 */ 104 _FRAME_POSITION(tf_r3), 105 _FRAME_POSITION(tf_r4), 106 _FRAME_POSITION(tf_r5), 107 _FRAME_POSITION(tf_r6), 108 _FRAME_POSITION(tf_r7), 109 _FRAME_POSITION(tf_r8), 110 _FRAME_POSITION(tf_r9), 111 _FRAME_POSITION(tf_r10), 112 _FRAME_POSITION(tf_r11), 113 _FRAME_POSITION(tf_r12), 114 _FRAME_POSITION(tf_r13), 115 _FRAME_POSITION(tf_r14), 116 _FRAME_POSITION(tf_r15), 117 _FRAME_POSITION(tf_r16), 118 _FRAME_POSITION(tf_r17), 119 _FRAME_POSITION(tf_r18), 120 _FRAME_POSITION(tf_t4), /* r19 */ 121 _FRAME_POSITION(tf_t3), /* r20 */ 122 _FRAME_POSITION(tf_t2), /* r21 */ 123 _FRAME_POSITION(tf_t1), /* r22 */ 124 _FRAME_POSITION(tf_arg3), /* r23 */ 125 _FRAME_POSITION(tf_arg2), /* r24 */ 126 _FRAME_POSITION(tf_arg1), /* r25 */ 127 _FRAME_POSITION(tf_arg0), /* r26 */ 128 _FRAME_POSITION(tf_dp), /* r27 */ 129 _FRAME_POSITION(tf_ret0), /* r28 */ 130 _FRAME_POSITION(tf_ret1), /* r29 */ 131 _FRAME_POSITION(tf_sp), /* r30 */ 132 _FRAME_POSITION(tf_r31), 133 }; 134 #endif /* FPEMUL */ 135 136 /* 137 * Bootstraps the FPU. 138 */ 139 void 140 hppa_fpu_bootstrap(u_int ccr_enable) 141 { 142 uint32_t junk[2]; 143 uint32_t vers[2]; 144 extern u_int hppa_fpu_nop0; 145 extern u_int hppa_fpu_nop1; 146 147 /* See if we have a present and functioning hardware FPU. */ 148 fpu_present = (ccr_enable & HPPA_FPUS) == HPPA_FPUS; 149 150 /* Initialize the FPU and get its version. */ 151 if (fpu_present) { 152 153 /* 154 * To somewhat optimize the emulation 155 * assist trap handling and context 156 * switching (to save them from having 157 * to always load and check fpu_present), 158 * there are two instructions in locore.S 159 * that are replaced with nops when 160 * there is a hardware FPU. 161 */ 162 hppa_fpu_nop0 = OPCODE_NOP; 163 hppa_fpu_nop1 = OPCODE_NOP; 164 fcacheall(); 165 166 /* 167 * We track what process has the FPU, 168 * and how many times we have to swap 169 * in and out. 170 */ 171 172 /* 173 * The PA-RISC 1.1 Architecture manual is 174 * pretty clear that the copr,0,0 must be 175 * wrapped in double word stores of fr0, 176 * otherwise its operation is undefined. 177 */ 178 __asm volatile( 179 " ldo %0, %%r22 \n" 180 " fstds %%fr0, 0(%%r22) \n" 181 " ldo %1, %%r22 \n" 182 " copr,0,0 \n" 183 " fstds %%fr0, 0(%%r22) \n" 184 : "=m" (junk), "=m" (vers) : : "r22"); 185 186 /* 187 * Now mark that no process has the FPU, 188 * and disable it, so the first time it 189 * gets used the process' state gets 190 * swapped in. 191 */ 192 fpu_csw = 0; 193 curcpu()->ci_fpu_state = 0; 194 mtctl(ccr_enable & (CCR_MASK ^ HPPA_FPUS), CR_CCR); 195 } 196 #ifdef FPEMUL 197 else 198 /* 199 * XXX This is a hack - to avoid 200 * having to set up the emulator so 201 * it can work for one instruction for 202 * proc0, we dispatch the copr,0,0 opcode 203 * into the emulator directly. 204 */ 205 decode_0c(OPCODE_COPR_0_0, 0, 0, vers); 206 #endif /* FPEMUL */ 207 fpu_version = vers[0]; 208 } 209 210 /* 211 * If the given LWP has its state in the FPU, 212 * flush that state out into the LWP's PCB. 213 */ 214 void 215 hppa_fpu_flush(struct lwp *l) 216 { 217 struct trapframe *tf = l->l_md.md_regs; 218 struct pcb *pcb = lwp_getpcb(l); 219 struct cpu_info *ci = curcpu(); 220 221 if (!fpu_present) 222 return; 223 224 /* 225 * If we have a hardware FPU, and this process' 226 * state is currently in it, swap it out. 227 */ 228 229 if (ci->ci_fpu_state == 0 || 230 ci->ci_fpu_state != tf->tf_cr30) { 231 return; 232 } 233 234 hppa_fpu_swapout(pcb); 235 ci->ci_fpu_state = 0; 236 } 237 238 #ifdef FPEMUL 239 240 /* 241 * This emulates a coprocessor load/store instruction. 242 */ 243 static int hppa_fpu_ls(struct trapframe *, struct lwp *); 244 static int 245 hppa_fpu_ls(struct trapframe *frame, struct lwp *l) 246 { 247 struct pcb *pcb = lwp_getpcb(l); 248 u_int inst, inst_b, inst_x, inst_s, inst_t; 249 int log2size; 250 u_int *base; 251 u_int offset, index, im5; 252 void *fpreg; 253 u_int r0 = 0; 254 int error; 255 256 /* 257 * Get the instruction that we're emulating, 258 * and break it down. Using HP bit notation, 259 * b is a five-bit field starting at bit 10, 260 * x is a five-bit field starting at bit 15, 261 * s is a two-bit field starting at bit 17, 262 * and t is a five-bit field starting at bit 31. 263 */ 264 inst = frame->tf_iir; 265 __asm volatile( 266 " extru %4, 10, 5, %1 \n" 267 " extru %4, 15, 5, %2 \n" 268 " extru %4, 17, 2, %3 \n" 269 " extru %4, 31, 5, %4 \n" 270 : "=r" (inst_b), "=r" (inst_x), "=r" (inst_s), "=r" (inst_t) 271 : "r" (inst)); 272 273 /* 274 * The space must be the user's space, else we 275 * segfault. 276 */ 277 if (inst_s != pcb->pcb_space) 278 return EFAULT; 279 280 /* See whether or not this is a doubleword load/store. */ 281 log2size = (inst & OPCODE_DOUBLE) ? 3 : 2; 282 283 /* Get the floating point register. */ 284 fpreg = ((char *)pcb->pcb_fpregs) + (inst_t << log2size); 285 286 /* Get the base register. */ 287 base = FRAME_REG(frame, inst_b, r0); 288 289 /* Dispatch on whether or not this is an indexed load/store. */ 290 if (inst & OPCODE_INDEXED) { 291 292 /* Get the index register value. */ 293 index = *FRAME_REG(frame, inst_x, r0); 294 295 /* Dispatch on the completer. */ 296 switch (inst & OPCODE_CMPLT) { 297 case OPCODE_CMPLT_S: 298 offset = *base + (index << log2size); 299 break; 300 case OPCODE_CMPLT_M: 301 offset = *base; 302 *base = *base + index; 303 break; 304 case OPCODE_CMPLT_SM: 305 offset = *base; 306 *base = *base + (index << log2size); 307 break; 308 default: 309 offset = *base + index; 310 break; 311 } 312 } else { 313 314 /* Do a low_sign_ext(x, 5). */ 315 im5 = inst_x >> 1; 316 if (inst_x & 1) 317 im5 |= 0xfffffff0; 318 319 /* Dispatch on the completer. */ 320 switch (inst & OPCODE_CMPLT) { 321 case OPCODE_CMPLT_MB: 322 offset = *base + im5; 323 *base = *base + im5; 324 break; 325 case OPCODE_CMPLT_MA: 326 offset = *base; 327 *base = *base + im5; 328 break; 329 default: 330 offset = *base + im5; 331 break; 332 } 333 } 334 335 /* 336 * The offset we calculated must be the same as the 337 * offset in the IOR. 338 */ 339 KASSERT(offset == frame->tf_ior); 340 341 /* Perform the load or store. */ 342 error = (inst & OPCODE_STORE) ? 343 copyout(fpreg, (void *) offset, 1 << log2size) : 344 copyin((const void *) offset, fpreg, 1 << log2size); 345 return error; 346 } 347 348 /* 349 * This is called to emulate an instruction. 350 */ 351 void 352 hppa_fpu_emulate(struct trapframe *frame, struct lwp *l, u_int inst) 353 { 354 struct pcb *pcb = lwp_getpcb(l); 355 u_int opcode, class, sub; 356 u_int *fpregs; 357 int exception; 358 ksiginfo_t ksi; 359 360 /* 361 * If the process' state is in any hardware FPU, 362 * flush it out - we need to operate on it. 363 */ 364 hppa_fpu_flush(l); 365 366 /* 367 * Get the instruction that we're emulating, 368 * and break it down. Using HP bit notation, 369 * the class is a two-bit field starting at 370 * bit 22, the opcode is a 6-bit field starting 371 * at bit 5, and sub for a class 1 instruction 372 * is a two bit field starting at bit 16, else 373 * it is a three bit field starting at bit 18. 374 */ 375 #if 0 376 __asm volatile( 377 " extru %3, 22, 2, %1 \n" 378 " extru %3, 5, 6, %0 \n" 379 " extru %3, 18, 3, %2 \n" 380 " comib,<> 1, %1, 0 \n" 381 " extru %3, 16, 2, %2 \n" 382 : "=r" (opcode), "=r" (class), "=r" (sub) 383 : "r" (inst)); 384 #else 385 opcode = (inst >> (31 - 5)) & 0x3f; 386 class = (inst >> (31 - 22)) & 0x3; 387 if (class == 1) { 388 sub = (inst >> (31 - 16)) & 3; 389 } else { 390 sub = (inst >> (31 - 18)) & 7; 391 } 392 #endif 393 394 /* Get this LWP's FPU registers. */ 395 fpregs = (u_int *)pcb->pcb_fpregs; 396 397 /* Dispatch on the opcode. */ 398 switch (opcode) { 399 case 0x09: 400 case 0x0b: 401 if (hppa_fpu_ls(frame, l) != 0) { 402 KSI_INIT_TRAP(&ksi); 403 ksi.ksi_signo = SIGSEGV; 404 ksi.ksi_code = SEGV_MAPERR; 405 ksi.ksi_trap = T_DTLBMISS; 406 ksi.ksi_addr = (void *)frame->tf_iioq_head; 407 trapsignal(l, &ksi); 408 } 409 return; 410 case 0x0c: 411 exception = decode_0c(inst, class, sub, fpregs); 412 break; 413 case 0x0e: 414 exception = decode_0e(inst, class, sub, fpregs); 415 break; 416 case 0x06: 417 exception = decode_06(inst, fpregs); 418 break; 419 case 0x26: 420 exception = decode_26(inst, fpregs); 421 break; 422 default: 423 exception = UNIMPLEMENTEDEXCEPTION; 424 break; 425 } 426 427 if (exception) { 428 KSI_INIT_TRAP(&ksi); 429 if (exception & UNIMPLEMENTEDEXCEPTION) { 430 ksi.ksi_signo = SIGILL; 431 ksi.ksi_code = ILL_COPROC; 432 } else { 433 ksi.ksi_signo = SIGFPE; 434 if (exception & INVALIDEXCEPTION) { 435 ksi.ksi_code = FPE_FLTINV; 436 } else if (exception & DIVISIONBYZEROEXCEPTION) { 437 ksi.ksi_code = FPE_FLTDIV; 438 } else if (exception & OVERFLOWEXCEPTION) { 439 ksi.ksi_code = FPE_FLTOVF; 440 } else if (exception & UNDERFLOWEXCEPTION) { 441 ksi.ksi_code = FPE_FLTUND; 442 } else if (exception & INEXACTEXCEPTION) { 443 ksi.ksi_code = FPE_FLTRES; 444 } 445 } 446 ksi.ksi_trap = T_EMULATION; 447 ksi.ksi_addr = (void *)frame->tf_iioq_head; 448 trapsignal(l, &ksi); 449 } 450 } 451 452 #endif /* FPEMUL */ 453