xref: /freebsd/sys/amd64/amd64/vm_machdep.c (revision 39beb93c)
1 /*-
2  * Copyright (c) 1982, 1986 The Regents of the University of California.
3  * Copyright (c) 1989, 1990 William Jolitz
4  * Copyright (c) 1994 John Dyson
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * the Systems Programming Group of the University of Utah Computer
9  * Science Department, and William Jolitz.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41  */
42 
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
45 
46 #include "opt_isa.h"
47 #include "opt_cpu.h"
48 #include "opt_compat.h"
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/bio.h>
53 #include <sys/buf.h>
54 #include <sys/kernel.h>
55 #include <sys/ktr.h>
56 #include <sys/lock.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/mutex.h>
60 #include <sys/pioctl.h>
61 #include <sys/proc.h>
62 #include <sys/sf_buf.h>
63 #include <sys/smp.h>
64 #include <sys/sysctl.h>
65 #include <sys/unistd.h>
66 #include <sys/vnode.h>
67 #include <sys/vmmeter.h>
68 
69 #include <machine/cpu.h>
70 #include <machine/md_var.h>
71 #include <machine/pcb.h>
72 #include <machine/specialreg.h>
73 
74 #include <vm/vm.h>
75 #include <vm/vm_extern.h>
76 #include <vm/vm_kern.h>
77 #include <vm/vm_page.h>
78 #include <vm/vm_map.h>
79 #include <vm/vm_param.h>
80 
81 #include <amd64/isa/isa.h>
82 
83 #ifdef COMPAT_IA32
84 
85 extern struct sysentvec ia32_freebsd_sysvec;
86 
87 #endif
88 
89 static void	cpu_reset_real(void);
90 #ifdef SMP
91 static void	cpu_reset_proxy(void);
92 static u_int	cpu_reset_proxyid;
93 static volatile u_int	cpu_reset_proxy_active;
94 #endif
95 
96 /*
97  * Finish a fork operation, with process p2 nearly set up.
98  * Copy and update the pcb, set up the stack so that the child
99  * ready to run and return to user mode.
100  */
101 void
102 cpu_fork(td1, p2, td2, flags)
103 	register struct thread *td1;
104 	register struct proc *p2;
105 	struct thread *td2;
106 	int flags;
107 {
108 	register struct proc *p1;
109 	struct pcb *pcb2;
110 	struct mdproc *mdp2;
111 
112 	p1 = td1->td_proc;
113 	if ((flags & RFPROC) == 0)
114 		return;
115 
116 	/* Ensure that p1's pcb is up to date. */
117 	fpuexit(td1);
118 
119 	/* Point the pcb to the top of the stack */
120 	pcb2 = (struct pcb *)(td2->td_kstack +
121 	    td2->td_kstack_pages * PAGE_SIZE) - 1;
122 	td2->td_pcb = pcb2;
123 
124 	/* Copy p1's pcb */
125 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
126 
127 	/* Point mdproc and then copy over td1's contents */
128 	mdp2 = &p2->p_md;
129 	bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
130 
131 	/*
132 	 * Create a new fresh stack for the new process.
133 	 * Copy the trap frame for the return to user mode as if from a
134 	 * syscall.  This copies most of the user mode register values.
135 	 */
136 	td2->td_frame = (struct trapframe *)td2->td_pcb - 1;
137 	bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
138 
139 	td2->td_frame->tf_rax = 0;		/* Child returns zero */
140 	td2->td_frame->tf_rflags &= ~PSL_C;	/* success */
141 	td2->td_frame->tf_rdx = 1;
142 
143 	/*
144 	 * If the parent process has the trap bit set (i.e. a debugger had
145 	 * single stepped the process to the system call), we need to clear
146 	 * the trap flag from the new frame unless the debugger had set PF_FORK
147 	 * on the parent.  Otherwise, the child will receive a (likely
148 	 * unexpected) SIGTRAP when it executes the first instruction after
149 	 * returning  to userland.
150 	 */
151 	if ((p1->p_pfsflags & PF_FORK) == 0)
152 		td2->td_frame->tf_rflags &= ~PSL_T;
153 
154 	/*
155 	 * Set registers for trampoline to user mode.  Leave space for the
156 	 * return address on stack.  These are the kernel mode register values.
157 	 */
158 	pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pml4);
159 	pcb2->pcb_r12 = (register_t)fork_return;	/* fork_trampoline argument */
160 	pcb2->pcb_rbp = 0;
161 	pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *);
162 	pcb2->pcb_rbx = (register_t)td2;		/* fork_trampoline argument */
163 	pcb2->pcb_rip = (register_t)fork_trampoline;
164 	/*-
165 	 * pcb2->pcb_dr*:	cloned above.
166 	 * pcb2->pcb_savefpu:	cloned above.
167 	 * pcb2->pcb_flags:	cloned above.
168 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
169 	 * pcb2->pcb_[fg]sbase:	cloned above
170 	 */
171 
172 	/* Setup to release spin count in fork_exit(). */
173 	td2->td_md.md_spinlock_count = 1;
174 	td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
175 
176 	/*
177 	 * Now, cpu_switch() can schedule the new process.
178 	 * pcb_rsp is loaded pointing to the cpu_switch() stack frame
179 	 * containing the return address when exiting cpu_switch.
180 	 * This will normally be to fork_trampoline(), which will have
181 	 * %ebx loaded with the new proc's pointer.  fork_trampoline()
182 	 * will set up a stack to call fork_return(p, frame); to complete
183 	 * the return to user-mode.
184 	 */
185 }
186 
187 /*
188  * Intercept the return address from a freshly forked process that has NOT
189  * been scheduled yet.
190  *
191  * This is needed to make kernel threads stay in kernel mode.
192  */
193 void
194 cpu_set_fork_handler(td, func, arg)
195 	struct thread *td;
196 	void (*func)(void *);
197 	void *arg;
198 {
199 	/*
200 	 * Note that the trap frame follows the args, so the function
201 	 * is really called like this:  func(arg, frame);
202 	 */
203 	td->td_pcb->pcb_r12 = (long) func;	/* function */
204 	td->td_pcb->pcb_rbx = (long) arg;	/* first arg */
205 }
206 
207 void
208 cpu_exit(struct thread *td)
209 {
210 }
211 
212 void
213 cpu_thread_exit(struct thread *td)
214 {
215 
216 	if (td == PCPU_GET(fpcurthread))
217 		fpudrop();
218 
219 	/* Disable any hardware breakpoints. */
220 	if (td->td_pcb->pcb_flags & PCB_DBREGS) {
221 		reset_dbregs();
222 		td->td_pcb->pcb_flags &= ~PCB_DBREGS;
223 	}
224 }
225 
226 void
227 cpu_thread_clean(struct thread *td)
228 {
229 }
230 
231 void
232 cpu_thread_swapin(struct thread *td)
233 {
234 }
235 
236 void
237 cpu_thread_swapout(struct thread *td)
238 {
239 }
240 
241 void
242 cpu_thread_alloc(struct thread *td)
243 {
244 
245 	td->td_pcb = (struct pcb *)(td->td_kstack +
246 	    td->td_kstack_pages * PAGE_SIZE) - 1;
247 	td->td_frame = (struct trapframe *)td->td_pcb - 1;
248 }
249 
250 void
251 cpu_thread_free(struct thread *td)
252 {
253 }
254 
255 /*
256  * Initialize machine state (pcb and trap frame) for a new thread about to
257  * upcall. Put enough state in the new thread's PCB to get it to go back
258  * userret(), where we can intercept it again to set the return (upcall)
259  * Address and stack, along with those from upcals that are from other sources
260  * such as those generated in thread_userret() itself.
261  */
262 void
263 cpu_set_upcall(struct thread *td, struct thread *td0)
264 {
265 	struct pcb *pcb2;
266 
267 	/* Point the pcb to the top of the stack. */
268 	pcb2 = td->td_pcb;
269 
270 	/*
271 	 * Copy the upcall pcb.  This loads kernel regs.
272 	 * Those not loaded individually below get their default
273 	 * values here.
274 	 */
275 	bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
276 	pcb2->pcb_flags &= ~PCB_FPUINITDONE;
277 
278 	/*
279 	 * Create a new fresh stack for the new thread.
280 	 */
281 	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
282 
283 	/* If the current thread has the trap bit set (i.e. a debugger had
284 	 * single stepped the process to the system call), we need to clear
285 	 * the trap flag from the new frame. Otherwise, the new thread will
286 	 * receive a (likely unexpected) SIGTRAP when it executes the first
287 	 * instruction after returning to userland.
288 	 */
289 	td->td_frame->tf_rflags &= ~PSL_T;
290 
291 	/*
292 	 * Set registers for trampoline to user mode.  Leave space for the
293 	 * return address on stack.  These are the kernel mode register values.
294 	 */
295 	pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pml4);
296 	pcb2->pcb_r12 = (register_t)fork_return;	    /* trampoline arg */
297 	pcb2->pcb_rbp = 0;
298 	pcb2->pcb_rsp = (register_t)td->td_frame - sizeof(void *);	/* trampoline arg */
299 	pcb2->pcb_rbx = (register_t)td;			    /* trampoline arg */
300 	pcb2->pcb_rip = (register_t)fork_trampoline;
301 	/*
302 	 * If we didn't copy the pcb, we'd need to do the following registers:
303 	 * pcb2->pcb_dr*:	cloned above.
304 	 * pcb2->pcb_savefpu:	cloned above.
305 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
306 	 * pcb2->pcb_[fg]sbase: cloned above
307 	 */
308 
309 	/* Setup to release spin count in fork_exit(). */
310 	td->td_md.md_spinlock_count = 1;
311 	td->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
312 }
313 
314 /*
315  * Set that machine state for performing an upcall that has to
316  * be done in thread_userret() so that those upcalls generated
317  * in thread_userret() itself can be done as well.
318  */
319 void
320 cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
321 	stack_t *stack)
322 {
323 
324 	/*
325 	 * Do any extra cleaning that needs to be done.
326 	 * The thread may have optional components
327 	 * that are not present in a fresh thread.
328 	 * This may be a recycled thread so make it look
329 	 * as though it's newly allocated.
330 	 */
331 	cpu_thread_clean(td);
332 
333 #ifdef COMPAT_IA32
334 	if (td->td_proc->p_sysent == &ia32_freebsd_sysvec) {
335 		/*
336 	 	 * Set the trap frame to point at the beginning of the uts
337 		 * function.
338 		 */
339 		td->td_frame->tf_rbp = 0;
340 		td->td_frame->tf_rsp =
341 		   (((uintptr_t)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4;
342 		td->td_frame->tf_rip = (uintptr_t)entry;
343 
344 		/*
345 		 * Pass the address of the mailbox for this kse to the uts
346 		 * function as a parameter on the stack.
347 		 */
348 		suword32((void *)(td->td_frame->tf_rsp + sizeof(int32_t)),
349 		    (uint32_t)(uintptr_t)arg);
350 
351 		return;
352 	}
353 #endif
354 
355 	/*
356 	 * Set the trap frame to point at the beginning of the uts
357 	 * function.
358 	 */
359 	td->td_frame->tf_rbp = 0;
360 	td->td_frame->tf_rsp =
361 	    ((register_t)stack->ss_sp + stack->ss_size) & ~0x0f;
362 	td->td_frame->tf_rsp -= 8;
363 	td->td_frame->tf_rip = (register_t)entry;
364 
365 	/*
366 	 * Pass the address of the mailbox for this kse to the uts
367 	 * function as a parameter on the stack.
368 	 */
369 	td->td_frame->tf_rdi = (register_t)arg;
370 }
371 
372 int
373 cpu_set_user_tls(struct thread *td, void *tls_base)
374 {
375 
376 	if ((u_int64_t)tls_base >= VM_MAXUSER_ADDRESS)
377 		return (EINVAL);
378 
379 #ifdef COMPAT_IA32
380 	if (td->td_proc->p_sysent == &ia32_freebsd_sysvec) {
381 		if (td == curthread) {
382 			critical_enter();
383 			td->td_pcb->pcb_gsbase = (register_t)tls_base;
384 			wrmsr(MSR_KGSBASE, td->td_pcb->pcb_gsbase);
385 			critical_exit();
386 		} else {
387 			td->td_pcb->pcb_gsbase = (register_t)tls_base;
388 		}
389 		return (0);
390 	}
391 #endif
392 	if (td == curthread) {
393 		critical_enter();
394 		td->td_pcb->pcb_fsbase = (register_t)tls_base;
395 		wrmsr(MSR_FSBASE, td->td_pcb->pcb_fsbase);
396 		critical_exit();
397 	} else {
398 		td->td_pcb->pcb_fsbase = (register_t)tls_base;
399 	}
400 	return (0);
401 }
402 
403 #ifdef SMP
404 static void
405 cpu_reset_proxy()
406 {
407 
408 	cpu_reset_proxy_active = 1;
409 	while (cpu_reset_proxy_active == 1)
410 		;	/* Wait for other cpu to see that we've started */
411 	stop_cpus((1<<cpu_reset_proxyid));
412 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
413 	DELAY(1000000);
414 	cpu_reset_real();
415 }
416 #endif
417 
418 void
419 cpu_reset()
420 {
421 #ifdef SMP
422 	u_int cnt, map;
423 
424 	if (smp_active) {
425 		map = PCPU_GET(other_cpus) & ~stopped_cpus;
426 		if (map != 0) {
427 			printf("cpu_reset: Stopping other CPUs\n");
428 			stop_cpus(map);
429 		}
430 
431 		if (PCPU_GET(cpuid) != 0) {
432 			cpu_reset_proxyid = PCPU_GET(cpuid);
433 			cpustop_restartfunc = cpu_reset_proxy;
434 			cpu_reset_proxy_active = 0;
435 			printf("cpu_reset: Restarting BSP\n");
436 
437 			/* Restart CPU #0. */
438 			atomic_store_rel_int(&started_cpus, 1 << 0);
439 
440 			cnt = 0;
441 			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
442 				cnt++;	/* Wait for BSP to announce restart */
443 			if (cpu_reset_proxy_active == 0)
444 				printf("cpu_reset: Failed to restart BSP\n");
445 			enable_intr();
446 			cpu_reset_proxy_active = 2;
447 
448 			while (1);
449 			/* NOTREACHED */
450 		}
451 
452 		DELAY(1000000);
453 	}
454 #endif
455 	cpu_reset_real();
456 	/* NOTREACHED */
457 }
458 
459 static void
460 cpu_reset_real()
461 {
462 	struct region_descriptor null_idt;
463 	int b;
464 
465 	disable_intr();
466 
467 	/*
468 	 * Attempt to do a CPU reset via the keyboard controller,
469 	 * do not turn off GateA20, as any machine that fails
470 	 * to do the reset here would then end up in no man's land.
471 	 */
472 	outb(IO_KBD + 4, 0xFE);
473 	DELAY(500000);	/* wait 0.5 sec to see if that did it */
474 
475 	/*
476 	 * Attempt to force a reset via the Reset Control register at
477 	 * I/O port 0xcf9.  Bit 2 forces a system reset when it
478 	 * transitions from 0 to 1.  Bit 1 selects the type of reset
479 	 * to attempt: 0 selects a "soft" reset, and 1 selects a
480 	 * "hard" reset.  We try a "hard" reset.  The first write sets
481 	 * bit 1 to select a "hard" reset and clears bit 2.  The
482 	 * second write forces a 0 -> 1 transition in bit 2 to trigger
483 	 * a reset.
484 	 */
485 	outb(0xcf9, 0x2);
486 	outb(0xcf9, 0x6);
487 	DELAY(500000);  /* wait 0.5 sec to see if that did it */
488 
489 	/*
490 	 * Attempt to force a reset via the Fast A20 and Init register
491 	 * at I/O port 0x92.  Bit 1 serves as an alternate A20 gate.
492 	 * Bit 0 asserts INIT# when set to 1.  We are careful to only
493 	 * preserve bit 1 while setting bit 0.  We also must clear bit
494 	 * 0 before setting it if it isn't already clear.
495 	 */
496 	b = inb(0x92);
497 	if (b != 0xff) {
498 		if ((b & 0x1) != 0)
499 			outb(0x92, b & 0xfe);
500 		outb(0x92, b | 0x1);
501 		DELAY(500000);  /* wait 0.5 sec to see if that did it */
502 	}
503 
504 	printf("No known reset method worked, attempting CPU shutdown\n");
505 	DELAY(1000000);	/* wait 1 sec for printf to complete */
506 
507 	/* Wipe the IDT. */
508 	null_idt.rd_limit = 0;
509 	null_idt.rd_base = 0;
510 	lidt(&null_idt);
511 
512 	/* "good night, sweet prince .... <THUNK!>" */
513 	breakpoint();
514 
515 	/* NOTREACHED */
516 	while(1);
517 }
518 
519 /*
520  * Allocate an sf_buf for the given vm_page.  On this machine, however, there
521  * is no sf_buf object.  Instead, an opaque pointer to the given vm_page is
522  * returned.
523  */
524 struct sf_buf *
525 sf_buf_alloc(struct vm_page *m, int pri)
526 {
527 
528 	return ((struct sf_buf *)m);
529 }
530 
531 /*
532  * Free the sf_buf.  In fact, do nothing because there are no resources
533  * associated with the sf_buf.
534  */
535 void
536 sf_buf_free(struct sf_buf *sf)
537 {
538 }
539 
540 /*
541  * Software interrupt handler for queued VM system processing.
542  */
543 void
544 swi_vm(void *dummy)
545 {
546 	if (busdma_swi_pending != 0)
547 		busdma_swi();
548 }
549 
550 /*
551  * Tell whether this address is in some physical memory region.
552  * Currently used by the kernel coredump code in order to avoid
553  * dumping the ``ISA memory hole'' which could cause indefinite hangs,
554  * or other unpredictable behaviour.
555  */
556 
557 int
558 is_physical_memory(vm_paddr_t addr)
559 {
560 
561 #ifdef DEV_ISA
562 	/* The ISA ``memory hole''. */
563 	if (addr >= 0xa0000 && addr < 0x100000)
564 		return 0;
565 #endif
566 
567 	/*
568 	 * stuff other tests for known memory-mapped devices (PCI?)
569 	 * here
570 	 */
571 
572 	return 1;
573 }
574