xref: /netbsd/sys/arch/powerpc/powerpc/trap.c (revision 6550d01e)
1 /*	$NetBSD: trap.c,v 1.138 2011/01/18 01:02:55 matt Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
5  * Copyright (C) 1995, 1996 TooLs GmbH.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by TooLs GmbH.
19  * 4. The name of TooLs GmbH may not be used to endorse or promote products
20  *    derived from this software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
28  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
31  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.138 2011/01/18 01:02:55 matt Exp $");
36 
37 #include "opt_altivec.h"
38 #include "opt_ddb.h"
39 #include "opt_multiprocessor.h"
40 
41 #include <sys/param.h>
42 
43 #include <sys/proc.h>
44 #include <sys/ras.h>
45 #include <sys/reboot.h>
46 #include <sys/sa.h>
47 #include <sys/savar.h>
48 #include <sys/systm.h>
49 #include <sys/kauth.h>
50 #include <sys/kmem.h>
51 
52 #include <uvm/uvm_extern.h>
53 
54 #include <dev/cons.h>
55 
56 #include <machine/cpu.h>
57 #include <machine/db_machdep.h>
58 #include <machine/fpu.h>
59 #include <machine/frame.h>
60 #include <machine/pcb.h>
61 #include <machine/pmap.h>
62 #include <machine/psl.h>
63 #include <machine/trap.h>
64 #include <powerpc/altivec.h>
65 #include <powerpc/userret.h>
66 #include <powerpc/instr.h>
67 
68 #include <powerpc/spr.h>
69 #include <powerpc/oea/spr.h>
70 
71 static int emulated_opcode(struct lwp *, struct trapframe *);
72 static int fix_unaligned(struct lwp *, struct trapframe *);
73 static inline vaddr_t setusr(vaddr_t, size_t *);
74 static inline void unsetusr(void);
75 
76 extern int do_ucas_32(volatile int32_t *, int32_t, int32_t, int32_t *);
77 int ucas_32(volatile int32_t *, int32_t, int32_t, int32_t *);
78 
79 void trap(struct trapframe *);	/* Called from locore / trap_subr */
80 /* Why are these not defined in a header? */
81 int badaddr(void *, size_t);
82 int badaddr_read(void *, size_t, int *);
83 
84 void
85 trap(struct trapframe *tf)
86 {
87 	struct cpu_info * const ci = curcpu();
88 	struct lwp * const l = curlwp;
89 	struct proc * const p = l->l_proc;
90 	struct pcb * const pcb = curpcb;
91 	struct vm_map *map;
92 	ksiginfo_t ksi;
93 	const bool usertrap = (tf->tf_srr1 & PSL_PR);
94 	int type = tf->tf_exc;
95 	int ftype, rv;
96 
97 	ci->ci_ev_traps.ev_count++;
98 
99 	KASSERTMSG(!usertrap || tf == trapframe(l),
100 	    ("trap: tf=%p is invalid: trapframe(%p)=%p", tf, l, trapframe(l)));
101 
102 	if (usertrap) {
103 		type |= EXC_USER;
104 #ifdef DIAGNOSTIC
105 		if (l == NULL || p == NULL)
106 			panic("trap: user trap %d with lwp = %p, proc = %p",
107 			    type, l, p);
108 #endif
109 		LWP_CACHE_CREDS(l, p);
110 	}
111 
112 	ci->ci_data.cpu_ntrap++;
113 
114 	switch (type) {
115 	case EXC_RUNMODETRC|EXC_USER:
116 		/* FALLTHROUGH */
117 	case EXC_TRC|EXC_USER:
118 		tf->tf_srr1 &= ~PSL_SE;
119 		if (p->p_raslist == NULL ||
120 		    ras_lookup(p, (void *)tf->tf_srr0) == (void *) -1) {
121 			KSI_INIT_TRAP(&ksi);
122 			ksi.ksi_signo = SIGTRAP;
123 			ksi.ksi_trap = EXC_TRC;
124 			ksi.ksi_addr = (void *)tf->tf_srr0;
125 			ksi.ksi_code = TRAP_TRACE;
126 			(*p->p_emul->e_trapsignal)(l, &ksi);
127 		}
128 		break;
129 	case EXC_DSI: {
130 		struct faultbuf * const fb = pcb->pcb_onfault;
131 		vaddr_t va = tf->tf_dar;
132 
133 		ci->ci_ev_kdsi.ev_count++;
134 
135 		/*
136 		 * Only query UVM if no interrupts are active.
137 		 */
138 		if (ci->ci_idepth < 0) {
139 			if ((va >> ADDR_SR_SHFT) == pcb->pcb_kmapsr) {
140 				va &= ADDR_PIDX | ADDR_POFF;
141 				va |= pcb->pcb_umapsr << ADDR_SR_SHFT;
142 				map = &p->p_vmspace->vm_map;
143 #ifdef PPC_OEA64
144 				if ((tf->tf_dsisr & DSISR_NOTFOUND) &&
145 				    vm_map_pmap(map)->pm_ste_evictions > 0 &&
146 				    pmap_ste_spill(vm_map_pmap(map),
147 					    trunc_page(va), false)) {
148 					return;
149 				}
150 #endif
151 
152 				if ((tf->tf_dsisr & DSISR_NOTFOUND) &&
153 				    vm_map_pmap(map)->pm_evictions > 0 &&
154 				    pmap_pte_spill(vm_map_pmap(map),
155 					    trunc_page(va), false)) {
156 					return;
157 				}
158 				if ((l->l_flag & LW_SA)
159 				    && (~l->l_pflag & LP_SA_NOBLOCK)) {
160 					l->l_savp->savp_faultaddr = va;
161 					l->l_pflag |= LP_SA_PAGEFAULT;
162 				}
163 #if defined(DIAGNOSTIC) && !defined(PPC_OEA64) && !defined (PPC_IBM4XX)
164 			} else if ((va >> ADDR_SR_SHFT) == USER_SR) {
165 				printf("trap: kernel %s DSI trap @ %#lx by %#lx"
166 				    " (DSISR %#x): USER_SR unset\n",
167 				    (tf->tf_dsisr & DSISR_STORE)
168 					? "write" : "read",
169 				    va, tf->tf_srr0, tf->tf_dsisr);
170 				goto brain_damage2;
171 #endif
172 			} else {
173 				map = kernel_map;
174 			}
175 
176 			if (tf->tf_dsisr & DSISR_STORE)
177 				ftype = VM_PROT_WRITE;
178 			else
179 				ftype = VM_PROT_READ;
180 
181 			pcb->pcb_onfault = NULL;
182 			rv = uvm_fault(map, trunc_page(va), ftype);
183 			pcb->pcb_onfault = fb;
184 
185 			if (map != kernel_map) {
186 				/*
187 				 * Record any stack growth...
188 				 */
189 				if (rv == 0)
190 					uvm_grow(p, trunc_page(va));
191 				l->l_pflag &= ~LP_SA_PAGEFAULT;
192 			}
193 			if (rv == 0)
194 				return;
195 			if (rv == EACCES)
196 				rv = EFAULT;
197 		} else {
198 			/*
199 			 * Note that this implies that access to the USER
200 			 * segment is not allowed in interrupt context.
201 			 */
202 			rv = EFAULT;
203 		}
204 		if (fb != NULL) {
205 			tf->tf_srr0 = fb->fb_pc;
206 			tf->tf_cr = fb->fb_cr;
207 			tf->tf_fixreg[1] = fb->fb_sp;
208 			tf->tf_fixreg[2] = fb->fb_r2;
209 			tf->tf_fixreg[3] = rv;
210 			memcpy(&tf->tf_fixreg[13], fb->fb_fixreg,
211 			    sizeof(fb->fb_fixreg));
212 			return;
213 		}
214 		printf("trap: kernel %s DSI trap @ %#lx by %#lx (DSISR %#x, err"
215 		    "=%d), lr %#lx\n", (tf->tf_dsisr & DSISR_STORE) ? "write" : "read",
216 		    va, tf->tf_srr0, tf->tf_dsisr, rv, tf->tf_lr);
217 		goto brain_damage2;
218 	}
219 	case EXC_DSI|EXC_USER:
220 		ci->ci_ev_udsi.ev_count++;
221 		if (tf->tf_dsisr & DSISR_STORE)
222 			ftype = VM_PROT_WRITE;
223 		else
224 			ftype = VM_PROT_READ;
225 
226 		/*
227 		 * Try to spill an evicted pte into the page table
228 		 * if this wasn't a protection fault and the pmap
229 		 * has some evicted pte's.
230 		 */
231 		map = &p->p_vmspace->vm_map;
232 #ifdef PPC_OEA64
233 		if ((tf->tf_dsisr & DSISR_NOTFOUND) &&
234 		    vm_map_pmap(map)->pm_ste_evictions > 0 &&
235 		    pmap_ste_spill(vm_map_pmap(map), trunc_page(tf->tf_dar),
236 				   false)) {
237 			break;
238 		}
239 #endif
240 
241 		if ((tf->tf_dsisr & DSISR_NOTFOUND) &&
242 		    vm_map_pmap(map)->pm_evictions > 0 &&
243 		    pmap_pte_spill(vm_map_pmap(map), trunc_page(tf->tf_dar),
244 				   false)) {
245 			break;
246 		}
247 
248 		if (l->l_flag & LW_SA) {
249 			l->l_savp->savp_faultaddr = (vaddr_t)tf->tf_dar;
250 			l->l_pflag |= LP_SA_PAGEFAULT;
251 		}
252 		KASSERT(pcb->pcb_onfault == NULL);
253 		rv = uvm_fault(map, trunc_page(tf->tf_dar), ftype);
254 		if (rv == 0) {
255 			/*
256 			 * Record any stack growth...
257 			 */
258 			uvm_grow(p, trunc_page(tf->tf_dar));
259 			l->l_pflag &= ~LP_SA_PAGEFAULT;
260 			break;
261 		}
262 		ci->ci_ev_udsi_fatal.ev_count++;
263 		if (cpu_printfataltraps) {
264 			printf("trap: pid %d.%d (%s): user %s DSI trap @ %#lx "
265 			    "by %#lx (DSISR %#x, err=%d)\n",
266 			    p->p_pid, l->l_lid, p->p_comm,
267 			    (tf->tf_dsisr & DSISR_STORE) ? "write" : "read",
268 			    tf->tf_dar, tf->tf_srr0, tf->tf_dsisr, rv);
269 		}
270 		KSI_INIT_TRAP(&ksi);
271 		ksi.ksi_signo = SIGSEGV;
272 		ksi.ksi_trap = EXC_DSI;
273 		ksi.ksi_addr = (void *)tf->tf_dar;
274 		ksi.ksi_code =
275 		    (tf->tf_dsisr & DSISR_PROTECT ? SEGV_ACCERR : SEGV_MAPERR);
276 		if (rv == ENOMEM) {
277 			printf("UVM: pid %d.%d (%s), uid %d killed: "
278 			       "out of swap\n",
279 			       p->p_pid, l->l_lid, p->p_comm,
280 			       l->l_cred ?
281 			       kauth_cred_geteuid(l->l_cred) : -1);
282 			ksi.ksi_signo = SIGKILL;
283 		}
284 		(*p->p_emul->e_trapsignal)(l, &ksi);
285 		l->l_pflag &= ~LP_SA_PAGEFAULT;
286 		break;
287 
288 	case EXC_ISI:
289 		ci->ci_ev_kisi.ev_count++;
290 
291 		printf("trap: kernel ISI by %#lx (SRR1 %#lx), lr: %#lx\n",
292 		    tf->tf_srr0, tf->tf_srr1, tf->tf_lr);
293 		goto brain_damage2;
294 
295 	case EXC_ISI|EXC_USER:
296 		ci->ci_ev_isi.ev_count++;
297 
298 		/*
299 		 * Try to spill an evicted pte into the page table
300 		 * if this wasn't a protection fault and the pmap
301 		 * has some evicted pte's.
302 		 */
303 		map = &p->p_vmspace->vm_map;
304 #ifdef PPC_OEA64
305 		if (vm_map_pmap(map)->pm_ste_evictions > 0 &&
306 		    pmap_ste_spill(vm_map_pmap(map), trunc_page(tf->tf_srr0),
307 				   true)) {
308 			break;
309 		}
310 #endif
311 
312 		if (vm_map_pmap(map)->pm_evictions > 0 &&
313 		    pmap_pte_spill(vm_map_pmap(map), trunc_page(tf->tf_srr0),
314 				   true)) {
315 			break;
316 		}
317 
318 		if (l->l_flag & LW_SA) {
319 			l->l_savp->savp_faultaddr = (vaddr_t)tf->tf_srr0;
320 			l->l_pflag |= LP_SA_PAGEFAULT;
321 		}
322 		ftype = VM_PROT_EXECUTE;
323 		KASSERT(pcb->pcb_onfault == NULL);
324 		rv = uvm_fault(map, trunc_page(tf->tf_srr0), ftype);
325 		if (rv == 0) {
326 			l->l_pflag &= ~LP_SA_PAGEFAULT;
327 			break;
328 		}
329 		ci->ci_ev_isi_fatal.ev_count++;
330 		if (cpu_printfataltraps) {
331 			printf("trap: pid %d.%d (%s): user ISI trap @ %#lx "
332 			    "(SRR1=%#lx)\n", p->p_pid, l->l_lid, p->p_comm,
333 			    tf->tf_srr0, tf->tf_srr1);
334 		}
335 		KSI_INIT_TRAP(&ksi);
336 		ksi.ksi_signo = SIGSEGV;
337 		ksi.ksi_trap = EXC_ISI;
338 		ksi.ksi_addr = (void *)tf->tf_srr0;
339 		ksi.ksi_code = (rv == EACCES ? SEGV_ACCERR : SEGV_MAPERR);
340 		(*p->p_emul->e_trapsignal)(l, &ksi);
341 		l->l_pflag &= ~LP_SA_PAGEFAULT;
342 		break;
343 
344 	case EXC_FPU|EXC_USER:
345 		ci->ci_ev_fpu.ev_count++;
346 		fpu_enable();
347 		break;
348 
349 	case EXC_AST|EXC_USER:
350 		ci->ci_astpending = 0;		/* we are about to do it */
351 		//ci->ci_data.cpu_nast++;
352 		if (l->l_pflag & LP_OWEUPC) {
353 			l->l_pflag &= ~LP_OWEUPC;
354 			ADDUPROF(l);
355 		}
356 		/* Check whether we are being preempted. */
357 		if (ci->ci_want_resched)
358 			preempt();
359 		break;
360 
361 	case EXC_ALI|EXC_USER:
362 		ci->ci_ev_ali.ev_count++;
363 		if (fix_unaligned(l, tf) != 0) {
364 			ci->ci_ev_ali_fatal.ev_count++;
365 			if (cpu_printfataltraps) {
366 				printf("trap: pid %d.%d (%s): user ALI trap @ "
367 				    "%#lx by %#lx (DSISR %#x)\n",
368 				    p->p_pid, l->l_lid, p->p_comm,
369 				    tf->tf_dar, tf->tf_srr0, tf->tf_dsisr);
370 			}
371 			KSI_INIT_TRAP(&ksi);
372 			ksi.ksi_signo = SIGBUS;
373 			ksi.ksi_trap = EXC_ALI;
374 			ksi.ksi_addr = (void *)tf->tf_dar;
375 			ksi.ksi_code = BUS_ADRALN;
376 			(*p->p_emul->e_trapsignal)(l, &ksi);
377 		} else
378 			tf->tf_srr0 += 4;
379 		break;
380 
381 	case EXC_PERF|EXC_USER:
382 		/* Not really, but needed due to how trap_subr.S works */
383 	case EXC_VEC|EXC_USER:
384 		ci->ci_ev_vec.ev_count++;
385 #ifdef ALTIVEC
386 		vec_enable();
387 		break;
388 #else
389 		if (cpu_printfataltraps) {
390 			printf("trap: pid %d.%d (%s): user VEC trap @ %#lx "
391 			    "(SRR1=%#lx)\n",
392 			    p->p_pid, l->l_lid, p->p_comm,
393 			    tf->tf_srr0, tf->tf_srr1);
394 		}
395 		KSI_INIT_TRAP(&ksi);
396 		ksi.ksi_signo = SIGILL;
397 		ksi.ksi_trap = EXC_PGM;
398 		ksi.ksi_addr = (void *)tf->tf_srr0;
399 		ksi.ksi_code = ILL_ILLOPC;
400 		(*p->p_emul->e_trapsignal)(l, &ksi);
401 		break;
402 #endif
403 	case EXC_MCHK|EXC_USER:
404 		ci->ci_ev_umchk.ev_count++;
405 		if (cpu_printfataltraps) {
406 			printf("trap: pid %d (%s): user MCHK trap @ %#lx "
407 			    "(SRR1=%#lx)\n",
408 			    p->p_pid, p->p_comm, tf->tf_srr0, tf->tf_srr1);
409 		}
410 		KSI_INIT_TRAP(&ksi);
411 		ksi.ksi_signo = SIGBUS;
412 		ksi.ksi_trap = EXC_MCHK;
413 		ksi.ksi_addr = (void *)tf->tf_srr0;
414 		ksi.ksi_code = BUS_OBJERR;
415 		(*p->p_emul->e_trapsignal)(l, &ksi);
416 		break;
417 
418 	case EXC_PGM|EXC_USER:
419 		ci->ci_ev_pgm.ev_count++;
420 		if (tf->tf_srr1 & 0x00020000) {	/* Bit 14 is set if trap */
421 			if (p->p_raslist == NULL ||
422 			    ras_lookup(p, (void *)tf->tf_srr0) == (void *) -1) {
423 				KSI_INIT_TRAP(&ksi);
424 				ksi.ksi_signo = SIGTRAP;
425 				ksi.ksi_trap = EXC_PGM;
426 				ksi.ksi_addr = (void *)tf->tf_srr0;
427 				ksi.ksi_code = TRAP_BRKPT;
428 				(*p->p_emul->e_trapsignal)(l, &ksi);
429 			} else {
430 				/* skip the trap instruction */
431 				tf->tf_srr0 += 4;
432 			}
433 		} else {
434 			KSI_INIT_TRAP(&ksi);
435 			ksi.ksi_signo = SIGILL;
436 			ksi.ksi_trap = EXC_PGM;
437 			ksi.ksi_addr = (void *)tf->tf_srr0;
438 			if (tf->tf_srr1 & 0x100000) {
439 				ksi.ksi_signo = SIGFPE;
440 				ksi.ksi_code = fpu_get_fault_code();
441 			} else if (tf->tf_srr1 & 0x40000) {
442 				if (emulated_opcode(l, tf)) {
443 					tf->tf_srr0 += 4;
444 					break;
445 				}
446 				ksi.ksi_code = ILL_PRVOPC;
447 			} else
448 				ksi.ksi_code = ILL_ILLOPC;
449 			if (cpu_printfataltraps)
450 				printf("trap: pid %d.%d (%s): user PGM trap @"
451 				    " %#lx (SRR1=%#lx)\n", p->p_pid, l->l_lid,
452 				    p->p_comm, tf->tf_srr0, tf->tf_srr1);
453 			(*p->p_emul->e_trapsignal)(l, &ksi);
454 		}
455 		break;
456 
457 	case EXC_MCHK: {
458 		struct faultbuf *fb;
459 
460 		if ((fb = pcb->pcb_onfault) != NULL) {
461 			tf->tf_srr0 = fb->fb_pc;
462 			tf->tf_fixreg[1] = fb->fb_sp;
463 			tf->tf_fixreg[2] = fb->fb_r2;
464 			tf->tf_fixreg[3] = EFAULT;
465 			tf->tf_cr = fb->fb_cr;
466 			memcpy(&tf->tf_fixreg[13], fb->fb_fixreg,
467 			    sizeof(fb->fb_fixreg));
468 			return;
469 		}
470 		printf("trap: pid %d.%d (%s): kernel MCHK trap @"
471 		    " %#lx (SRR1=%#lx)\n", p->p_pid, l->l_lid,
472 		    p->p_comm, tf->tf_srr0, tf->tf_srr1);
473 		goto brain_damage2;
474 	}
475 	case EXC_ALI:
476 		printf("trap: pid %d.%d (%s): kernel ALI trap @ %#lx by %#lx "
477 		    "(DSISR %#x)\n", p->p_pid, l->l_lid, p->p_comm,
478 		    tf->tf_dar, tf->tf_srr0, tf->tf_dsisr);
479 		goto brain_damage2;
480 	case EXC_PGM:
481 		printf("trap: pid %d.%d (%s): kernel PGM trap @"
482 		    " %#lx (SRR1=%#lx)\n", p->p_pid, l->l_lid,
483 		    p->p_comm, tf->tf_srr0, tf->tf_srr1);
484 		goto brain_damage2;
485 
486 	default:
487 		printf("trap type %x at %lx\n", type, tf->tf_srr0);
488 brain_damage2:
489 #ifdef DDBX
490 		if (kdb_trap(type, tf))
491 			return;
492 #endif
493 #ifdef TRAP_PANICWAIT
494 		printf("Press a key to panic.\n");
495 		cnpollc(1);
496 		cngetc();
497 		cnpollc(0);
498 #endif
499 		panic("trap");
500 	}
501 	userret(l, tf);
502 }
503 
504 #ifdef _LP64
505 static inline vaddr_t
506 setusr(vaddr_t uva, size_t *len_p)
507 {
508 	*len_p = SEGMENT_LENGTH - (uva & ~SEGMENT_MASK);
509 	return pmap_setusr(uva) + (uva & ~SEGMENT_MASK);
510 }
511 static void
512 unsetusr(void)
513 {
514 	pmap_unsetusr();
515 }
516 #else
517 static inline vaddr_t
518 setusr(vaddr_t uva, size_t *len_p)
519 {
520 	struct pcb *pcb = curpcb;
521 	vaddr_t p;
522 	KASSERT(pcb != NULL);
523 	KASSERT(pcb->pcb_kmapsr == 0);
524 	pcb->pcb_kmapsr = USER_SR;
525 	pcb->pcb_umapsr = uva >> ADDR_SR_SHFT;
526 	*len_p = SEGMENT_LENGTH - (uva & ~SEGMENT_MASK);
527 	p = (USER_SR << ADDR_SR_SHFT) + (uva & ~SEGMENT_MASK);
528 	__asm volatile ("isync; mtsr %0,%1; isync"
529 	    ::	"n"(USER_SR), "r"(pcb->pcb_pm->pm_sr[pcb->pcb_umapsr]));
530 	return p;
531 }
532 
533 static void
534 unsetusr(void)
535 {
536 	curpcb->pcb_kmapsr = 0;
537 	__asm volatile ("isync; mtsr %0,%1; isync"
538 	    ::	"n"(USER_SR), "r"(EMPTY_SEGMENT));
539 }
540 #endif
541 
542 int
543 copyin(const void *udaddr, void *kaddr, size_t len)
544 {
545 	vaddr_t uva = (vaddr_t) udaddr;
546 	char *kp = kaddr;
547 	struct faultbuf env;
548 	int rv;
549 
550 	if ((rv = setfault(&env)) != 0) {
551 		unsetusr();
552 		goto out;
553 	}
554 
555 	while (len > 0) {
556 		size_t seglen;
557 		vaddr_t p = setusr(uva, &seglen);
558 		if (seglen > len)
559 			seglen = len;
560 		memcpy(kp, (const char *) p, seglen);
561 		uva += seglen;
562 		kp += seglen;
563 		len -= seglen;
564 		unsetusr();
565 	}
566 
567   out:
568 	curpcb->pcb_onfault = 0;
569 	return rv;
570 }
571 
572 int
573 copyout(const void *kaddr, void *udaddr, size_t len)
574 {
575 	const char *kp = kaddr;
576 	vaddr_t uva = (vaddr_t) udaddr;
577 	struct faultbuf env;
578 	int rv;
579 
580 	if ((rv = setfault(&env)) != 0) {
581 		unsetusr();
582 		goto out;
583 	}
584 
585 	while (len > 0) {
586 		size_t seglen;
587 		vaddr_t p = setusr(uva, &seglen);
588 		if (seglen > len)
589 			seglen = len;
590 		memcpy((char *)p, kp, seglen);
591 		uva += seglen;
592 		kp += seglen;
593 		len -= seglen;
594 		unsetusr();
595 	}
596 
597   out:
598 	curpcb->pcb_onfault = 0;
599 	return rv;
600 }
601 
602 /*
603  * kcopy(const void *src, void *dst, size_t len);
604  *
605  * Copy len bytes from src to dst, aborting if we encounter a fatal
606  * page fault.
607  *
608  * kcopy() _must_ save and restore the old fault handler since it is
609  * called by uiomove(), which may be in the path of servicing a non-fatal
610  * page fault.
611  */
612 int
613 kcopy(const void *src, void *dst, size_t len)
614 {
615 	struct faultbuf env, *oldfault;
616 	int rv;
617 
618 	oldfault = curpcb->pcb_onfault;
619 
620 	if ((rv = setfault(&env)) == 0)
621 		memcpy(dst, src, len);
622 
623 	curpcb->pcb_onfault = oldfault;
624 	return rv;
625 }
626 
627 int
628 ucas_32(volatile int32_t *uptr, int32_t old, int32_t new, int32_t *ret)
629 {
630 	vaddr_t uva = (vaddr_t)uptr;
631 	vaddr_t p;
632 	struct faultbuf env;
633 	size_t seglen;
634 	int rv;
635 
636 	if (uva & 3) {
637 		return EFAULT;
638 	}
639 	if ((rv = setfault(&env)) != 0) {
640 		unsetusr();
641 		goto out;
642 	}
643 	p = setusr(uva, &seglen);
644 	KASSERT(seglen >= sizeof(*uptr));
645 	do_ucas_32((void *)p, old, new, ret);
646 	unsetusr();
647 
648 out:
649 	curpcb->pcb_onfault = 0;
650 	return rv;
651 }
652 __strong_alias(ucas_ptr,ucas_32);
653 __strong_alias(ucas_int,ucas_32);
654 
655 int
656 badaddr(void *addr, size_t size)
657 {
658 	return badaddr_read(addr, size, NULL);
659 }
660 
661 int
662 badaddr_read(void *addr, size_t size, int *rptr)
663 {
664 	struct faultbuf env;
665 	int x;
666 
667 	/* Get rid of any stale machine checks that have been waiting.  */
668 	__asm volatile ("sync; isync");
669 
670 	if (setfault(&env)) {
671 		curpcb->pcb_onfault = 0;
672 		__asm volatile ("sync");
673 		return 1;
674 	}
675 
676 	__asm volatile ("sync");
677 
678 	switch (size) {
679 	case 1:
680 		x = *(volatile int8_t *)addr;
681 		break;
682 	case 2:
683 		x = *(volatile int16_t *)addr;
684 		break;
685 	case 4:
686 		x = *(volatile int32_t *)addr;
687 		break;
688 	default:
689 		panic("badaddr: invalid size (%lu)", (u_long) size);
690 	}
691 
692 	/* Make sure we took the machine check, if we caused one. */
693 	__asm volatile ("sync; isync");
694 
695 	curpcb->pcb_onfault = 0;
696 	__asm volatile ("sync");	/* To be sure. */
697 
698 	/* Use the value to avoid reorder. */
699 	if (rptr)
700 		*rptr = x;
701 
702 	return 0;
703 }
704 
705 /*
706  * For now, this only deals with the particular unaligned access case
707  * that gcc tends to generate.  Eventually it should handle all of the
708  * possibilities that can happen on a 32-bit PowerPC in big-endian mode.
709  */
710 
711 static int
712 fix_unaligned(struct lwp *l, struct trapframe *tf)
713 {
714 	int indicator = EXC_ALI_OPCODE_INDICATOR(tf->tf_dsisr);
715 
716 	switch (indicator) {
717 	case EXC_ALI_DCBZ:
718 		{
719 			/*
720 			 * The DCBZ (Data Cache Block Zero) instruction
721 			 * gives an alignment fault if used on non-cacheable
722 			 * memory.  We handle the fault mainly for the
723 			 * case when we are running with the cache disabled
724 			 * for debugging.
725 			 */
726 			static char zeroes[MAXCACHELINESIZE];
727 			int error;
728 			error = copyout(zeroes,
729 			    (void *)(tf->tf_dar & -curcpu()->ci_ci.dcache_line_size),
730 			    curcpu()->ci_ci.dcache_line_size);
731 			if (error)
732 				return -1;
733 			return 0;
734 		}
735 
736 	case EXC_ALI_LFD:
737 	case EXC_ALI_STFD:
738 		{
739 			struct pcb * const pcb = lwp_getpcb(l);
740 			const int reg = EXC_ALI_RST(tf->tf_dsisr);
741 			double * const fpreg = &pcb->pcb_fpu.fpreg[reg];
742 
743 			/*
744 			 * Juggle the FPU to ensure that we've initialized
745 			 * the FPRs, and that their current state is in
746 			 * the PCB.
747 			 */
748 
749 			if ((l->l_md.md_flags & MDLWP_USEDFPU) == 0) {
750 				memset(&pcb->pcb_fpu, 0, sizeof(pcb->pcb_fpu));
751 				l->l_md.md_flags |= MDLWP_USEDFPU;
752 			}
753 			if (indicator == EXC_ALI_LFD) {
754 				fpu_save_lwp(l, FPU_SAVE_AND_RELEASE);
755 				if (copyin((void *)tf->tf_dar, fpreg,
756 				    sizeof(double)) != 0)
757 					return -1;
758 			} else {
759 				fpu_save_lwp(l, FPU_SAVE);
760 				if (copyout(fpreg, (void *)tf->tf_dar,
761 				    sizeof(double)) != 0)
762 					return -1;
763 			}
764 			fpu_enable();
765 			return 0;
766 		}
767 		break;
768 	}
769 
770 	return -1;
771 }
772 
773 int
774 emulated_opcode(struct lwp *l, struct trapframe *tf)
775 {
776 	uint32_t opcode;
777 	if (copyin((void *)tf->tf_srr0, &opcode, sizeof(opcode)) != 0)
778 		return 0;
779 
780 	if (OPC_MFSPR_P(opcode, SPR_PVR)) {
781 		__asm ("mfpvr %0" : "=r"(tf->tf_fixreg[OPC_MFSPR_REG(opcode)]));
782 		return 1;
783 	}
784 
785 	if (OPC_MFMSR_P(opcode)) {
786 		struct pcb * const pcb = lwp_getpcb(l);
787 		register_t msr = tf->tf_srr1 & PSL_USERSRR1;
788 
789 		if (l->l_md.md_flags & MDLWP_USEDFPU)
790 			msr |= PSL_FP;
791 		msr |= (pcb->pcb_flags & (PCB_FE0|PCB_FE1));
792 #ifdef ALTIVEC
793 		if (l->l_md.md_flags & MDLWP_USEDVEC)
794 			msr |= PSL_VEC;
795 #endif
796 		tf->tf_fixreg[OPC_MFMSR_REG(opcode)] = msr;
797 		return 1;
798 	}
799 
800 #define	OPC_MTMSR_CODE		0x7c0000a8
801 #define	OPC_MTMSR_MASK		0xfc1fffff
802 #define	OPC_MTMSR		OPC_MTMSR_CODE
803 #define	OPC_MTMSR_REG(o)	(((o) >> 21) & 0x1f)
804 #define	OPC_MTMSR_P(o)		(((o) & OPC_MTMSR_MASK) == OPC_MTMSR_CODE)
805 
806 	if (OPC_MTMSR_P(opcode)) {
807 		struct pcb * const pcb = lwp_getpcb(l);
808 		register_t msr = tf->tf_fixreg[OPC_MTMSR_REG(opcode)];
809 
810 		/*
811 		 * Don't let the user muck with bits he's not allowed to.
812 		 */
813 		if (!PSL_USEROK_P(msr))
814 			return 0;
815 		/*
816 		 * For now, only update the FP exception mode.
817 		 */
818 		pcb->pcb_flags &= ~(PSL_FE0|PSL_FE1);
819 		pcb->pcb_flags |= msr & (PSL_FE0|PSL_FE1);
820 		/*
821 		 * If we think we have the FPU, update SRR1 too.  If we're
822 		 * wrong userret() will take care of it.
823 		 */
824 		if (tf->tf_srr1 & PSL_FP) {
825 			tf->tf_srr1 &= ~(PSL_FE0|PSL_FE1);
826 			tf->tf_srr1 |= msr & (PSL_FE0|PSL_FE1);
827 		}
828 		return 1;
829 	}
830 
831 	return 0;
832 }
833 
834 int
835 copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *done)
836 {
837 	vaddr_t uva = (vaddr_t) udaddr;
838 	char *kp = kaddr;
839 	struct faultbuf env;
840 	int rv;
841 
842 	if ((rv = setfault(&env)) != 0) {
843 		unsetusr();
844 		goto out2;
845 	}
846 
847 	while (len > 0) {
848 		size_t seglen;
849 		vaddr_t p = setusr(uva, &seglen);
850 		if (seglen > len)
851 			seglen = len;
852 		len -= seglen;
853 		uva += seglen;
854 		for (; seglen-- > 0; p++) {
855 			if ((*kp++ = *(char *)p) == 0) {
856 				unsetusr();
857 				goto out;
858 			}
859 		}
860 		unsetusr();
861 	}
862 	rv = ENAMETOOLONG;
863 
864  out:
865 	if (done != NULL)
866 		*done = kp - (char *) kaddr;
867  out2:
868 	curpcb->pcb_onfault = 0;
869 	return rv;
870 }
871 
872 
873 int
874 copyoutstr(const void *kaddr, void *udaddr, size_t len, size_t *done)
875 {
876 	const char *kp = kaddr;
877 	vaddr_t uva = (vaddr_t) udaddr;
878 	struct faultbuf env;
879 	int rv;
880 
881 	if ((rv = setfault(&env)) != 0) {
882 		unsetusr();
883 		goto out2;
884 	}
885 
886 	while (len > 0) {
887 		size_t seglen;
888 		vaddr_t p = setusr(uva, &seglen);
889 		if (seglen > len)
890 			seglen = len;
891 		len -= seglen;
892 		uva += seglen;
893 		for (; seglen-- > 0; p++) {
894 			if ((*(char *)p = *kp++) == 0) {
895 				unsetusr();
896 				goto out;
897 			}
898 		}
899 		unsetusr();
900 	}
901 	rv = ENAMETOOLONG;
902 
903  out:
904 	if (done != NULL)
905 		*done = kp - (const char*)kaddr;
906  out2:
907 	curpcb->pcb_onfault = 0;
908 	return rv;
909 }
910 
911 /*
912  * Start a new LWP
913  */
914 void
915 startlwp(void *arg)
916 {
917 	ucontext_t *uc = arg;
918 	lwp_t *l = curlwp;
919 	struct trapframe *tf = trapframe(l);
920 	int error;
921 
922 	error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
923 	KASSERT(error == 0);
924 
925 	kmem_free(uc, sizeof(ucontext_t));
926 	userret(l, tf);
927 }
928 
929 void
930 upcallret(struct lwp *l)
931 {
932         struct trapframe *tf = trapframe(l);
933 
934 	KERNEL_UNLOCK_LAST(l);
935 	userret(l, tf);
936 }
937