1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_capsicum.h" 36 #include "opt_kstack_pages.h" 37 38 #include <sys/param.h> 39 #include <sys/capability.h> 40 #include <sys/systm.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/mutex.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/smp.h> 47 #include <sys/sysproto.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 #include <vm/vm_map.h> 52 #include <vm/vm_extern.h> 53 54 #include <machine/cpu.h> 55 #include <machine/pcb.h> 56 #include <machine/pcb_ext.h> 57 #include <machine/proc.h> 58 #include <machine/sysarch.h> 59 60 #include <security/audit/audit.h> 61 62 #ifdef XEN 63 #include <machine/xen/xenfunc.h> 64 65 void i386_reset_ldt(struct proc_ldt *pldt); 66 67 void 68 i386_reset_ldt(struct proc_ldt *pldt) 69 { 70 xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len); 71 } 72 #else 73 #define i386_reset_ldt(x) 74 #endif 75 76 #include <vm/vm_kern.h> /* for kernel_map */ 77 78 #define MAX_LD 8192 79 #define LD_PER_PAGE 512 80 #define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1)) 81 #define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3) 82 #define NULL_LDT_BASE ((caddr_t)NULL) 83 84 #ifdef SMP 85 static void set_user_ldt_rv(struct vmspace *vmsp); 86 #endif 87 static int i386_set_ldt_data(struct thread *, int start, int num, 88 union descriptor *descs); 89 static int i386_ldt_grow(struct thread *td, int len); 90 91 #ifndef _SYS_SYSPROTO_H_ 92 struct sysarch_args { 93 int op; 94 char *parms; 95 }; 96 #endif 97 98 int 99 sysarch(td, uap) 100 struct thread *td; 101 register struct sysarch_args *uap; 102 { 103 int error; 104 union descriptor *lp; 105 union { 106 struct i386_ldt_args largs; 107 struct i386_ioperm_args iargs; 108 } kargs; 109 uint32_t base; 110 struct segment_descriptor sd, *sdp; 111 112 AUDIT_ARG_CMD(uap->op); 113 114 #ifdef CAPABILITY_MODE 115 /* 116 * When adding new operations, add a new case statement here to 117 * explicitly indicate whether or not the operation is safe to 118 * perform in capability mode. 119 */ 120 if (IN_CAPABILITY_MODE(td)) { 121 switch (uap->op) { 122 case I386_GET_LDT: 123 case I386_SET_LDT: 124 case I386_GET_IOPERM: 125 case I386_GET_FSBASE: 126 case I386_SET_FSBASE: 127 case I386_GET_GSBASE: 128 case I386_SET_GSBASE: 129 break; 130 131 case I386_SET_IOPERM: 132 default: 133 #ifdef KTRACE 134 if (KTRPOINT(td, KTR_CAPFAIL)) 135 ktrcapfail(CAPFAIL_SYSCALL, 0, 0); 136 #endif 137 return (ECAPMODE); 138 } 139 } 140 #endif 141 142 switch (uap->op) { 143 case I386_GET_IOPERM: 144 case I386_SET_IOPERM: 145 if ((error = copyin(uap->parms, &kargs.iargs, 146 sizeof(struct i386_ioperm_args))) != 0) 147 return (error); 148 break; 149 case I386_GET_LDT: 150 case I386_SET_LDT: 151 if ((error = copyin(uap->parms, &kargs.largs, 152 sizeof(struct i386_ldt_args))) != 0) 153 return (error); 154 if (kargs.largs.num > MAX_LD || kargs.largs.num <= 0) 155 return (EINVAL); 156 break; 157 default: 158 break; 159 } 160 161 switch(uap->op) { 162 case I386_GET_LDT: 163 error = i386_get_ldt(td, &kargs.largs); 164 break; 165 case I386_SET_LDT: 166 if (kargs.largs.descs != NULL) { 167 lp = (union descriptor *)kmem_alloc(kernel_map, 168 kargs.largs.num * sizeof(union descriptor)); 169 if (lp == NULL) { 170 error = ENOMEM; 171 break; 172 } 173 error = copyin(kargs.largs.descs, lp, 174 kargs.largs.num * sizeof(union descriptor)); 175 if (error == 0) 176 error = i386_set_ldt(td, &kargs.largs, lp); 177 kmem_free(kernel_map, (vm_offset_t)lp, 178 kargs.largs.num * sizeof(union descriptor)); 179 } else { 180 error = i386_set_ldt(td, &kargs.largs, NULL); 181 } 182 break; 183 case I386_GET_IOPERM: 184 error = i386_get_ioperm(td, &kargs.iargs); 185 if (error == 0) 186 error = copyout(&kargs.iargs, uap->parms, 187 sizeof(struct i386_ioperm_args)); 188 break; 189 case I386_SET_IOPERM: 190 error = i386_set_ioperm(td, &kargs.iargs); 191 break; 192 case I386_VM86: 193 error = vm86_sysarch(td, uap->parms); 194 break; 195 case I386_GET_FSBASE: 196 sdp = &td->td_pcb->pcb_fsd; 197 base = sdp->sd_hibase << 24 | sdp->sd_lobase; 198 error = copyout(&base, uap->parms, sizeof(base)); 199 break; 200 case I386_SET_FSBASE: 201 error = copyin(uap->parms, &base, sizeof(base)); 202 if (!error) { 203 /* 204 * Construct a descriptor and store it in the pcb for 205 * the next context switch. Also store it in the gdt 206 * so that the load of tf_fs into %fs will activate it 207 * at return to userland. 208 */ 209 sd.sd_lobase = base & 0xffffff; 210 sd.sd_hibase = (base >> 24) & 0xff; 211 #ifdef XEN 212 /* need to do nosegneg like Linux */ 213 sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff; 214 #else 215 sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ 216 #endif 217 sd.sd_hilimit = 0xf; 218 sd.sd_type = SDT_MEMRWA; 219 sd.sd_dpl = SEL_UPL; 220 sd.sd_p = 1; 221 sd.sd_xx = 0; 222 sd.sd_def32 = 1; 223 sd.sd_gran = 1; 224 critical_enter(); 225 td->td_pcb->pcb_fsd = sd; 226 #ifdef XEN 227 HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[0]), 228 *(uint64_t *)&sd); 229 #else 230 PCPU_GET(fsgs_gdt)[0] = sd; 231 #endif 232 critical_exit(); 233 td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL); 234 } 235 break; 236 case I386_GET_GSBASE: 237 sdp = &td->td_pcb->pcb_gsd; 238 base = sdp->sd_hibase << 24 | sdp->sd_lobase; 239 error = copyout(&base, uap->parms, sizeof(base)); 240 break; 241 case I386_SET_GSBASE: 242 error = copyin(uap->parms, &base, sizeof(base)); 243 if (!error) { 244 /* 245 * Construct a descriptor and store it in the pcb for 246 * the next context switch. Also store it in the gdt 247 * because we have to do a load_gs() right now. 248 */ 249 sd.sd_lobase = base & 0xffffff; 250 sd.sd_hibase = (base >> 24) & 0xff; 251 252 #ifdef XEN 253 /* need to do nosegneg like Linux */ 254 sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff; 255 #else 256 sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ 257 #endif 258 sd.sd_hilimit = 0xf; 259 sd.sd_type = SDT_MEMRWA; 260 sd.sd_dpl = SEL_UPL; 261 sd.sd_p = 1; 262 sd.sd_xx = 0; 263 sd.sd_def32 = 1; 264 sd.sd_gran = 1; 265 critical_enter(); 266 td->td_pcb->pcb_gsd = sd; 267 #ifdef XEN 268 HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[1]), 269 *(uint64_t *)&sd); 270 #else 271 PCPU_GET(fsgs_gdt)[1] = sd; 272 #endif 273 critical_exit(); 274 load_gs(GSEL(GUGS_SEL, SEL_UPL)); 275 } 276 break; 277 default: 278 error = EINVAL; 279 break; 280 } 281 return (error); 282 } 283 284 int 285 i386_extend_pcb(struct thread *td) 286 { 287 int i, offset; 288 u_long *addr; 289 struct pcb_ext *ext; 290 struct soft_segment_descriptor ssd = { 291 0, /* segment base address (overwritten) */ 292 ctob(IOPAGES + 1) - 1, /* length */ 293 SDT_SYS386TSS, /* segment type */ 294 0, /* priority level */ 295 1, /* descriptor present */ 296 0, 0, 297 0, /* default 32 size */ 298 0 /* granularity */ 299 }; 300 301 ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1)); 302 if (ext == 0) 303 return (ENOMEM); 304 bzero(ext, sizeof(struct pcb_ext)); 305 /* -16 is so we can convert a trapframe into vm86trapframe inplace */ 306 ext->ext_tss.tss_esp0 = td->td_kstack + ctob(KSTACK_PAGES) - 307 sizeof(struct pcb) - 16; 308 ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 309 /* 310 * The last byte of the i/o map must be followed by an 0xff byte. 311 * We arbitrarily allocate 16 bytes here, to keep the starting 312 * address on a doubleword boundary. 313 */ 314 offset = PAGE_SIZE - 16; 315 ext->ext_tss.tss_ioopt = 316 (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16; 317 ext->ext_iomap = (caddr_t)ext + offset; 318 ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32; 319 320 addr = (u_long *)ext->ext_vm86.vm86_intmap; 321 for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++) 322 *addr++ = ~0; 323 324 ssd.ssd_base = (unsigned)&ext->ext_tss; 325 ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext); 326 ssdtosd(&ssd, &ext->ext_tssd); 327 328 KASSERT(td == curthread, ("giving TSS to !curthread")); 329 KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!")); 330 331 /* Switch to the new TSS. */ 332 critical_enter(); 333 td->td_pcb->pcb_ext = ext; 334 PCPU_SET(private_tss, 1); 335 *PCPU_GET(tss_gdt) = ext->ext_tssd; 336 ltr(GSEL(GPROC0_SEL, SEL_KPL)); 337 critical_exit(); 338 339 return 0; 340 } 341 342 int 343 i386_set_ioperm(td, uap) 344 struct thread *td; 345 struct i386_ioperm_args *uap; 346 { 347 int i, error; 348 char *iomap; 349 350 if ((error = priv_check(td, PRIV_IO)) != 0) 351 return (error); 352 if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 353 return (error); 354 /* 355 * XXX 356 * While this is restricted to root, we should probably figure out 357 * whether any other driver is using this i/o address, as so not to 358 * cause confusion. This probably requires a global 'usage registry'. 359 */ 360 361 if (td->td_pcb->pcb_ext == 0) 362 if ((error = i386_extend_pcb(td)) != 0) 363 return (error); 364 iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; 365 366 if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY) 367 return (EINVAL); 368 369 for (i = uap->start; i < uap->start + uap->length; i++) { 370 if (uap->enable) 371 iomap[i >> 3] &= ~(1 << (i & 7)); 372 else 373 iomap[i >> 3] |= (1 << (i & 7)); 374 } 375 return (error); 376 } 377 378 int 379 i386_get_ioperm(td, uap) 380 struct thread *td; 381 struct i386_ioperm_args *uap; 382 { 383 int i, state; 384 char *iomap; 385 386 if (uap->start >= IOPAGES * PAGE_SIZE * NBBY) 387 return (EINVAL); 388 389 if (td->td_pcb->pcb_ext == 0) { 390 uap->length = 0; 391 goto done; 392 } 393 394 iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; 395 396 i = uap->start; 397 state = (iomap[i >> 3] >> (i & 7)) & 1; 398 uap->enable = !state; 399 uap->length = 1; 400 401 for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { 402 if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) 403 break; 404 uap->length++; 405 } 406 407 done: 408 return (0); 409 } 410 411 /* 412 * Update the GDT entry pointing to the LDT to point to the LDT of the 413 * current process. Manage dt_lock holding/unholding autonomously. 414 */ 415 void 416 set_user_ldt(struct mdproc *mdp) 417 { 418 struct proc_ldt *pldt; 419 int dtlocked; 420 421 dtlocked = 0; 422 if (!mtx_owned(&dt_lock)) { 423 mtx_lock_spin(&dt_lock); 424 dtlocked = 1; 425 } 426 427 pldt = mdp->md_ldt; 428 #ifdef XEN 429 i386_reset_ldt(pldt); 430 PCPU_SET(currentldt, (int)pldt); 431 #else 432 #ifdef SMP 433 gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd; 434 #else 435 gdt[GUSERLDT_SEL].sd = pldt->ldt_sd; 436 #endif 437 lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); 438 PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL)); 439 #endif /* XEN */ 440 if (dtlocked) 441 mtx_unlock_spin(&dt_lock); 442 } 443 444 #ifdef SMP 445 static void 446 set_user_ldt_rv(struct vmspace *vmsp) 447 { 448 struct thread *td; 449 450 td = curthread; 451 if (vmsp != td->td_proc->p_vmspace) 452 return; 453 454 set_user_ldt(&td->td_proc->p_md); 455 } 456 #endif 457 458 #ifdef XEN 459 460 /* 461 * dt_lock must be held. Returns with dt_lock held. 462 */ 463 struct proc_ldt * 464 user_ldt_alloc(struct mdproc *mdp, int len) 465 { 466 struct proc_ldt *pldt, *new_ldt; 467 468 mtx_assert(&dt_lock, MA_OWNED); 469 mtx_unlock_spin(&dt_lock); 470 new_ldt = malloc(sizeof(struct proc_ldt), 471 M_SUBPROC, M_WAITOK); 472 473 new_ldt->ldt_len = len = NEW_MAX_LD(len); 474 new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, 475 round_page(len * sizeof(union descriptor))); 476 if (new_ldt->ldt_base == NULL) { 477 free(new_ldt, M_SUBPROC); 478 mtx_lock_spin(&dt_lock); 479 return (NULL); 480 } 481 new_ldt->ldt_refcnt = 1; 482 new_ldt->ldt_active = 0; 483 484 mtx_lock_spin(&dt_lock); 485 if ((pldt = mdp->md_ldt)) { 486 if (len > pldt->ldt_len) 487 len = pldt->ldt_len; 488 bcopy(pldt->ldt_base, new_ldt->ldt_base, 489 len * sizeof(union descriptor)); 490 } else { 491 bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE); 492 } 493 mtx_unlock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */ 494 pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base, 495 new_ldt->ldt_len*sizeof(union descriptor)); 496 mtx_lock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */ 497 return (new_ldt); 498 } 499 #else 500 /* 501 * dt_lock must be held. Returns with dt_lock held. 502 */ 503 struct proc_ldt * 504 user_ldt_alloc(struct mdproc *mdp, int len) 505 { 506 struct proc_ldt *pldt, *new_ldt; 507 508 mtx_assert(&dt_lock, MA_OWNED); 509 mtx_unlock_spin(&dt_lock); 510 new_ldt = malloc(sizeof(struct proc_ldt), 511 M_SUBPROC, M_WAITOK); 512 513 new_ldt->ldt_len = len = NEW_MAX_LD(len); 514 new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, 515 len * sizeof(union descriptor)); 516 if (new_ldt->ldt_base == NULL) { 517 free(new_ldt, M_SUBPROC); 518 mtx_lock_spin(&dt_lock); 519 return (NULL); 520 } 521 new_ldt->ldt_refcnt = 1; 522 new_ldt->ldt_active = 0; 523 524 mtx_lock_spin(&dt_lock); 525 gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base; 526 gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1; 527 ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd); 528 529 if ((pldt = mdp->md_ldt) != NULL) { 530 if (len > pldt->ldt_len) 531 len = pldt->ldt_len; 532 bcopy(pldt->ldt_base, new_ldt->ldt_base, 533 len * sizeof(union descriptor)); 534 } else 535 bcopy(ldt, new_ldt->ldt_base, sizeof(ldt)); 536 537 return (new_ldt); 538 } 539 #endif /* !XEN */ 540 541 /* 542 * Must be called with dt_lock held. Returns with dt_lock unheld. 543 */ 544 void 545 user_ldt_free(struct thread *td) 546 { 547 struct mdproc *mdp = &td->td_proc->p_md; 548 struct proc_ldt *pldt; 549 550 mtx_assert(&dt_lock, MA_OWNED); 551 if ((pldt = mdp->md_ldt) == NULL) { 552 mtx_unlock_spin(&dt_lock); 553 return; 554 } 555 556 if (td == curthread) { 557 #ifdef XEN 558 i386_reset_ldt(&default_proc_ldt); 559 PCPU_SET(currentldt, (int)&default_proc_ldt); 560 #else 561 lldt(_default_ldt); 562 PCPU_SET(currentldt, _default_ldt); 563 #endif 564 } 565 566 mdp->md_ldt = NULL; 567 user_ldt_deref(pldt); 568 } 569 570 void 571 user_ldt_deref(struct proc_ldt *pldt) 572 { 573 574 mtx_assert(&dt_lock, MA_OWNED); 575 if (--pldt->ldt_refcnt == 0) { 576 mtx_unlock_spin(&dt_lock); 577 kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base, 578 pldt->ldt_len * sizeof(union descriptor)); 579 free(pldt, M_SUBPROC); 580 } else 581 mtx_unlock_spin(&dt_lock); 582 } 583 584 /* 585 * Note for the authors of compat layers (linux, etc): copyout() in 586 * the function below is not a problem since it presents data in 587 * arch-specific format (i.e. i386-specific in this case), not in 588 * the OS-specific one. 589 */ 590 int 591 i386_get_ldt(td, uap) 592 struct thread *td; 593 struct i386_ldt_args *uap; 594 { 595 int error = 0; 596 struct proc_ldt *pldt; 597 int nldt, num; 598 union descriptor *lp; 599 600 #ifdef DEBUG 601 printf("i386_get_ldt: start=%d num=%d descs=%p\n", 602 uap->start, uap->num, (void *)uap->descs); 603 #endif 604 605 mtx_lock_spin(&dt_lock); 606 if ((pldt = td->td_proc->p_md.md_ldt) != NULL) { 607 nldt = pldt->ldt_len; 608 lp = &((union descriptor *)(pldt->ldt_base))[uap->start]; 609 mtx_unlock_spin(&dt_lock); 610 num = min(uap->num, nldt); 611 } else { 612 mtx_unlock_spin(&dt_lock); 613 nldt = sizeof(ldt)/sizeof(ldt[0]); 614 num = min(uap->num, nldt); 615 lp = &ldt[uap->start]; 616 } 617 618 if ((uap->start > (unsigned int)nldt) || 619 ((unsigned int)num > (unsigned int)nldt) || 620 ((unsigned int)(uap->start + num) > (unsigned int)nldt)) 621 return(EINVAL); 622 623 error = copyout(lp, uap->descs, num * sizeof(union descriptor)); 624 if (!error) 625 td->td_retval[0] = num; 626 627 return(error); 628 } 629 630 int 631 i386_set_ldt(td, uap, descs) 632 struct thread *td; 633 struct i386_ldt_args *uap; 634 union descriptor *descs; 635 { 636 int error = 0, i; 637 int largest_ld; 638 struct mdproc *mdp = &td->td_proc->p_md; 639 struct proc_ldt *pldt; 640 union descriptor *dp; 641 642 #ifdef DEBUG 643 printf("i386_set_ldt: start=%d num=%d descs=%p\n", 644 uap->start, uap->num, (void *)uap->descs); 645 #endif 646 647 if (descs == NULL) { 648 /* Free descriptors */ 649 if (uap->start == 0 && uap->num == 0) { 650 /* 651 * Treat this as a special case, so userland needn't 652 * know magic number NLDT. 653 */ 654 uap->start = NLDT; 655 uap->num = MAX_LD - NLDT; 656 } 657 if (uap->num == 0) 658 return (EINVAL); 659 mtx_lock_spin(&dt_lock); 660 if ((pldt = mdp->md_ldt) == NULL || 661 uap->start >= pldt->ldt_len) { 662 mtx_unlock_spin(&dt_lock); 663 return (0); 664 } 665 largest_ld = uap->start + uap->num; 666 if (largest_ld > pldt->ldt_len) 667 largest_ld = pldt->ldt_len; 668 i = largest_ld - uap->start; 669 bzero(&((union descriptor *)(pldt->ldt_base))[uap->start], 670 sizeof(union descriptor) * i); 671 mtx_unlock_spin(&dt_lock); 672 return (0); 673 } 674 675 if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) { 676 /* verify range of descriptors to modify */ 677 largest_ld = uap->start + uap->num; 678 if (uap->start >= MAX_LD || largest_ld > MAX_LD) { 679 return (EINVAL); 680 } 681 } 682 683 /* Check descriptors for access violations */ 684 for (i = 0; i < uap->num; i++) { 685 dp = &descs[i]; 686 687 switch (dp->sd.sd_type) { 688 case SDT_SYSNULL: /* system null */ 689 dp->sd.sd_p = 0; 690 break; 691 case SDT_SYS286TSS: /* system 286 TSS available */ 692 case SDT_SYSLDT: /* system local descriptor table */ 693 case SDT_SYS286BSY: /* system 286 TSS busy */ 694 case SDT_SYSTASKGT: /* system task gate */ 695 case SDT_SYS286IGT: /* system 286 interrupt gate */ 696 case SDT_SYS286TGT: /* system 286 trap gate */ 697 case SDT_SYSNULL2: /* undefined by Intel */ 698 case SDT_SYS386TSS: /* system 386 TSS available */ 699 case SDT_SYSNULL3: /* undefined by Intel */ 700 case SDT_SYS386BSY: /* system 386 TSS busy */ 701 case SDT_SYSNULL4: /* undefined by Intel */ 702 case SDT_SYS386IGT: /* system 386 interrupt gate */ 703 case SDT_SYS386TGT: /* system 386 trap gate */ 704 case SDT_SYS286CGT: /* system 286 call gate */ 705 case SDT_SYS386CGT: /* system 386 call gate */ 706 /* I can't think of any reason to allow a user proc 707 * to create a segment of these types. They are 708 * for OS use only. 709 */ 710 return (EACCES); 711 /*NOTREACHED*/ 712 713 /* memory segment types */ 714 case SDT_MEMEC: /* memory execute only conforming */ 715 case SDT_MEMEAC: /* memory execute only accessed conforming */ 716 case SDT_MEMERC: /* memory execute read conforming */ 717 case SDT_MEMERAC: /* memory execute read accessed conforming */ 718 /* Must be "present" if executable and conforming. */ 719 if (dp->sd.sd_p == 0) 720 return (EACCES); 721 break; 722 case SDT_MEMRO: /* memory read only */ 723 case SDT_MEMROA: /* memory read only accessed */ 724 case SDT_MEMRW: /* memory read write */ 725 case SDT_MEMRWA: /* memory read write accessed */ 726 case SDT_MEMROD: /* memory read only expand dwn limit */ 727 case SDT_MEMRODA: /* memory read only expand dwn lim accessed */ 728 case SDT_MEMRWD: /* memory read write expand dwn limit */ 729 case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */ 730 case SDT_MEME: /* memory execute only */ 731 case SDT_MEMEA: /* memory execute only accessed */ 732 case SDT_MEMER: /* memory execute read */ 733 case SDT_MEMERA: /* memory execute read accessed */ 734 break; 735 default: 736 return(EINVAL); 737 /*NOTREACHED*/ 738 } 739 740 /* Only user (ring-3) descriptors may be present. */ 741 if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL)) 742 return (EACCES); 743 } 744 745 if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) { 746 /* Allocate a free slot */ 747 mtx_lock_spin(&dt_lock); 748 if ((pldt = mdp->md_ldt) == NULL) { 749 if ((error = i386_ldt_grow(td, NLDT + 1))) { 750 mtx_unlock_spin(&dt_lock); 751 return (error); 752 } 753 pldt = mdp->md_ldt; 754 } 755 again: 756 /* 757 * start scanning a bit up to leave room for NVidia and 758 * Wine, which still user the "Blat" method of allocation. 759 */ 760 dp = &((union descriptor *)(pldt->ldt_base))[NLDT]; 761 for (i = NLDT; i < pldt->ldt_len; ++i) { 762 if (dp->sd.sd_type == SDT_SYSNULL) 763 break; 764 dp++; 765 } 766 if (i >= pldt->ldt_len) { 767 if ((error = i386_ldt_grow(td, pldt->ldt_len+1))) { 768 mtx_unlock_spin(&dt_lock); 769 return (error); 770 } 771 goto again; 772 } 773 uap->start = i; 774 error = i386_set_ldt_data(td, i, 1, descs); 775 mtx_unlock_spin(&dt_lock); 776 } else { 777 largest_ld = uap->start + uap->num; 778 mtx_lock_spin(&dt_lock); 779 if (!(error = i386_ldt_grow(td, largest_ld))) { 780 error = i386_set_ldt_data(td, uap->start, uap->num, 781 descs); 782 } 783 mtx_unlock_spin(&dt_lock); 784 } 785 if (error == 0) 786 td->td_retval[0] = uap->start; 787 return (error); 788 } 789 #ifdef XEN 790 static int 791 i386_set_ldt_data(struct thread *td, int start, int num, 792 union descriptor *descs) 793 { 794 struct mdproc *mdp = &td->td_proc->p_md; 795 struct proc_ldt *pldt = mdp->md_ldt; 796 797 mtx_assert(&dt_lock, MA_OWNED); 798 799 while (num) { 800 xen_update_descriptor( 801 &((union descriptor *)(pldt->ldt_base))[start], 802 descs); 803 num--; 804 start++; 805 descs++; 806 } 807 return (0); 808 } 809 #else 810 static int 811 i386_set_ldt_data(struct thread *td, int start, int num, 812 union descriptor *descs) 813 { 814 struct mdproc *mdp = &td->td_proc->p_md; 815 struct proc_ldt *pldt = mdp->md_ldt; 816 817 mtx_assert(&dt_lock, MA_OWNED); 818 819 /* Fill in range */ 820 bcopy(descs, 821 &((union descriptor *)(pldt->ldt_base))[start], 822 num * sizeof(union descriptor)); 823 return (0); 824 } 825 #endif /* !XEN */ 826 827 static int 828 i386_ldt_grow(struct thread *td, int len) 829 { 830 struct mdproc *mdp = &td->td_proc->p_md; 831 struct proc_ldt *new_ldt, *pldt; 832 caddr_t old_ldt_base = NULL_LDT_BASE; 833 int old_ldt_len = 0; 834 835 mtx_assert(&dt_lock, MA_OWNED); 836 837 if (len > MAX_LD) 838 return (ENOMEM); 839 if (len < NLDT + 1) 840 len = NLDT + 1; 841 842 /* Allocate a user ldt. */ 843 if ((pldt = mdp->md_ldt) == NULL || len > pldt->ldt_len) { 844 new_ldt = user_ldt_alloc(mdp, len); 845 if (new_ldt == NULL) 846 return (ENOMEM); 847 pldt = mdp->md_ldt; 848 849 if (pldt != NULL) { 850 if (new_ldt->ldt_len <= pldt->ldt_len) { 851 /* 852 * We just lost the race for allocation, so 853 * free the new object and return. 854 */ 855 mtx_unlock_spin(&dt_lock); 856 kmem_free(kernel_map, 857 (vm_offset_t)new_ldt->ldt_base, 858 new_ldt->ldt_len * sizeof(union descriptor)); 859 free(new_ldt, M_SUBPROC); 860 mtx_lock_spin(&dt_lock); 861 return (0); 862 } 863 864 /* 865 * We have to substitute the current LDT entry for 866 * curproc with the new one since its size grew. 867 */ 868 old_ldt_base = pldt->ldt_base; 869 old_ldt_len = pldt->ldt_len; 870 pldt->ldt_sd = new_ldt->ldt_sd; 871 pldt->ldt_base = new_ldt->ldt_base; 872 pldt->ldt_len = new_ldt->ldt_len; 873 } else 874 mdp->md_ldt = pldt = new_ldt; 875 #ifdef SMP 876 /* 877 * Signal other cpus to reload ldt. We need to unlock dt_lock 878 * here because other CPU will contest on it since their 879 * curthreads won't hold the lock and will block when trying 880 * to acquire it. 881 */ 882 mtx_unlock_spin(&dt_lock); 883 smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, 884 NULL, td->td_proc->p_vmspace); 885 #else 886 set_user_ldt(&td->td_proc->p_md); 887 mtx_unlock_spin(&dt_lock); 888 #endif 889 if (old_ldt_base != NULL_LDT_BASE) { 890 kmem_free(kernel_map, (vm_offset_t)old_ldt_base, 891 old_ldt_len * sizeof(union descriptor)); 892 free(new_ldt, M_SUBPROC); 893 } 894 mtx_lock_spin(&dt_lock); 895 } 896 return (0); 897 } 898