1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2003 Peter Wemm. 5 * Copyright (c) 1990 The Regents of the University of California. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_capsicum.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/capsicum.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/malloc.h> 46 #include <sys/mutex.h> 47 #include <sys/pcpu.h> 48 #include <sys/priv.h> 49 #include <sys/proc.h> 50 #include <sys/smp.h> 51 #include <sys/sysproto.h> 52 #include <sys/uio.h> 53 54 #include <vm/vm.h> 55 #include <vm/pmap.h> 56 #include <vm/vm_kern.h> /* for kernel_map */ 57 #include <vm/vm_map.h> 58 #include <vm/vm_extern.h> 59 60 #include <machine/frame.h> 61 #include <machine/md_var.h> 62 #include <machine/pcb.h> 63 #include <machine/specialreg.h> 64 #include <machine/sysarch.h> 65 #include <machine/tss.h> 66 #include <machine/vmparam.h> 67 68 #include <security/audit/audit.h> 69 70 static void user_ldt_deref(struct proc_ldt *pldt); 71 static void user_ldt_derefl(struct proc_ldt *pldt); 72 73 #define MAX_LD 8192 74 75 int max_ldt_segment = 512; 76 SYSCTL_INT(_machdep, OID_AUTO, max_ldt_segment, CTLFLAG_RDTUN, 77 &max_ldt_segment, 0, 78 "Maximum number of allowed LDT segments in the single address space"); 79 80 static void 81 max_ldt_segment_init(void *arg __unused) 82 { 83 84 if (max_ldt_segment <= 0) 85 max_ldt_segment = 1; 86 if (max_ldt_segment > MAX_LD) 87 max_ldt_segment = MAX_LD; 88 } 89 SYSINIT(maxldt, SI_SUB_VM_CONF, SI_ORDER_ANY, max_ldt_segment_init, NULL); 90 91 #ifndef _SYS_SYSPROTO_H_ 92 struct sysarch_args { 93 int op; 94 char *parms; 95 }; 96 #endif 97 98 int 99 sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space) 100 { 101 struct i386_ldt_args *largs, la; 102 struct user_segment_descriptor *lp; 103 int error = 0; 104 105 /* 106 * XXXKIB check that the BSM generation code knows to encode 107 * the op argument. 108 */ 109 AUDIT_ARG_CMD(uap->op); 110 if (uap_space == UIO_USERSPACE) { 111 error = copyin(uap->parms, &la, sizeof(struct i386_ldt_args)); 112 if (error != 0) 113 return (error); 114 largs = &la; 115 } else 116 largs = (struct i386_ldt_args *)uap->parms; 117 118 switch (uap->op) { 119 case I386_GET_LDT: 120 error = amd64_get_ldt(td, largs); 121 break; 122 case I386_SET_LDT: 123 if (largs->descs != NULL && largs->num > max_ldt_segment) 124 return (EINVAL); 125 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 126 if (largs->descs != NULL) { 127 lp = malloc(largs->num * sizeof(struct 128 user_segment_descriptor), M_TEMP, M_WAITOK); 129 error = copyin(largs->descs, lp, largs->num * 130 sizeof(struct user_segment_descriptor)); 131 if (error == 0) 132 error = amd64_set_ldt(td, largs, lp); 133 free(lp, M_TEMP); 134 } else { 135 error = amd64_set_ldt(td, largs, NULL); 136 } 137 break; 138 } 139 return (error); 140 } 141 142 void 143 update_gdt_gsbase(struct thread *td, uint32_t base) 144 { 145 struct user_segment_descriptor *sd; 146 147 if (td != curthread) 148 return; 149 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 150 critical_enter(); 151 sd = PCPU_GET(gs32p); 152 sd->sd_lobase = base & 0xffffff; 153 sd->sd_hibase = (base >> 24) & 0xff; 154 critical_exit(); 155 } 156 157 void 158 update_gdt_fsbase(struct thread *td, uint32_t base) 159 { 160 struct user_segment_descriptor *sd; 161 162 if (td != curthread) 163 return; 164 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 165 critical_enter(); 166 sd = PCPU_GET(fs32p); 167 sd->sd_lobase = base & 0xffffff; 168 sd->sd_hibase = (base >> 24) & 0xff; 169 critical_exit(); 170 } 171 172 int 173 sysarch(struct thread *td, struct sysarch_args *uap) 174 { 175 struct pcb *pcb; 176 struct vm_map *map; 177 uint32_t i386base; 178 uint64_t a64base; 179 struct i386_ioperm_args iargs; 180 struct i386_get_xfpustate i386xfpu; 181 struct i386_set_pkru i386pkru; 182 struct amd64_get_xfpustate a64xfpu; 183 struct amd64_set_pkru a64pkru; 184 int error; 185 186 #ifdef CAPABILITY_MODE 187 /* 188 * When adding new operations, add a new case statement here to 189 * explicitly indicate whether or not the operation is safe to 190 * perform in capability mode. 191 */ 192 if (IN_CAPABILITY_MODE(td)) { 193 switch (uap->op) { 194 case I386_GET_LDT: 195 case I386_SET_LDT: 196 case I386_GET_IOPERM: 197 case I386_GET_FSBASE: 198 case I386_SET_FSBASE: 199 case I386_GET_GSBASE: 200 case I386_SET_GSBASE: 201 case I386_GET_XFPUSTATE: 202 case I386_SET_PKRU: 203 case I386_CLEAR_PKRU: 204 case AMD64_GET_FSBASE: 205 case AMD64_SET_FSBASE: 206 case AMD64_GET_GSBASE: 207 case AMD64_SET_GSBASE: 208 case AMD64_GET_XFPUSTATE: 209 case AMD64_SET_PKRU: 210 case AMD64_CLEAR_PKRU: 211 break; 212 213 case I386_SET_IOPERM: 214 default: 215 #ifdef KTRACE 216 if (KTRPOINT(td, KTR_CAPFAIL)) 217 ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL); 218 #endif 219 return (ECAPMODE); 220 } 221 } 222 #endif 223 224 if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT) 225 return (sysarch_ldt(td, uap, UIO_USERSPACE)); 226 227 error = 0; 228 pcb = td->td_pcb; 229 230 /* 231 * XXXKIB check that the BSM generation code knows to encode 232 * the op argument. 233 */ 234 AUDIT_ARG_CMD(uap->op); 235 switch (uap->op) { 236 case I386_GET_IOPERM: 237 case I386_SET_IOPERM: 238 if ((error = copyin(uap->parms, &iargs, 239 sizeof(struct i386_ioperm_args))) != 0) 240 return (error); 241 break; 242 case I386_GET_XFPUSTATE: 243 if ((error = copyin(uap->parms, &i386xfpu, 244 sizeof(struct i386_get_xfpustate))) != 0) 245 return (error); 246 a64xfpu.addr = (void *)(uintptr_t)i386xfpu.addr; 247 a64xfpu.len = i386xfpu.len; 248 break; 249 case I386_SET_PKRU: 250 case I386_CLEAR_PKRU: 251 if ((error = copyin(uap->parms, &i386pkru, 252 sizeof(struct i386_set_pkru))) != 0) 253 return (error); 254 a64pkru.addr = (void *)(uintptr_t)i386pkru.addr; 255 a64pkru.len = i386pkru.len; 256 a64pkru.keyidx = i386pkru.keyidx; 257 a64pkru.flags = i386pkru.flags; 258 break; 259 case AMD64_GET_XFPUSTATE: 260 if ((error = copyin(uap->parms, &a64xfpu, 261 sizeof(struct amd64_get_xfpustate))) != 0) 262 return (error); 263 break; 264 case AMD64_SET_PKRU: 265 case AMD64_CLEAR_PKRU: 266 if ((error = copyin(uap->parms, &a64pkru, 267 sizeof(struct amd64_set_pkru))) != 0) 268 return (error); 269 break; 270 default: 271 break; 272 } 273 274 switch (uap->op) { 275 case I386_GET_IOPERM: 276 error = amd64_get_ioperm(td, &iargs); 277 if (error == 0) 278 error = copyout(&iargs, uap->parms, 279 sizeof(struct i386_ioperm_args)); 280 break; 281 case I386_SET_IOPERM: 282 error = amd64_set_ioperm(td, &iargs); 283 break; 284 case I386_GET_FSBASE: 285 update_pcb_bases(pcb); 286 i386base = pcb->pcb_fsbase; 287 error = copyout(&i386base, uap->parms, sizeof(i386base)); 288 break; 289 case I386_SET_FSBASE: 290 error = copyin(uap->parms, &i386base, sizeof(i386base)); 291 if (!error) { 292 set_pcb_flags(pcb, PCB_FULL_IRET); 293 pcb->pcb_fsbase = i386base; 294 td->td_frame->tf_fs = _ufssel; 295 update_gdt_fsbase(td, i386base); 296 } 297 break; 298 case I386_GET_GSBASE: 299 update_pcb_bases(pcb); 300 i386base = pcb->pcb_gsbase; 301 error = copyout(&i386base, uap->parms, sizeof(i386base)); 302 break; 303 case I386_SET_GSBASE: 304 error = copyin(uap->parms, &i386base, sizeof(i386base)); 305 if (!error) { 306 set_pcb_flags(pcb, PCB_FULL_IRET); 307 pcb->pcb_gsbase = i386base; 308 td->td_frame->tf_gs = _ugssel; 309 update_gdt_gsbase(td, i386base); 310 } 311 break; 312 case AMD64_GET_FSBASE: 313 update_pcb_bases(pcb); 314 error = copyout(&pcb->pcb_fsbase, uap->parms, 315 sizeof(pcb->pcb_fsbase)); 316 break; 317 318 case AMD64_SET_FSBASE: 319 error = copyin(uap->parms, &a64base, sizeof(a64base)); 320 if (!error) { 321 if (a64base < VM_MAXUSER_ADDRESS) { 322 set_pcb_flags(pcb, PCB_FULL_IRET); 323 pcb->pcb_fsbase = a64base; 324 td->td_frame->tf_fs = _ufssel; 325 } else 326 error = EINVAL; 327 } 328 break; 329 330 case AMD64_GET_GSBASE: 331 update_pcb_bases(pcb); 332 error = copyout(&pcb->pcb_gsbase, uap->parms, 333 sizeof(pcb->pcb_gsbase)); 334 break; 335 336 case AMD64_SET_GSBASE: 337 error = copyin(uap->parms, &a64base, sizeof(a64base)); 338 if (!error) { 339 if (a64base < VM_MAXUSER_ADDRESS) { 340 set_pcb_flags(pcb, PCB_FULL_IRET); 341 pcb->pcb_gsbase = a64base; 342 td->td_frame->tf_gs = _ugssel; 343 } else 344 error = EINVAL; 345 } 346 break; 347 348 case I386_GET_XFPUSTATE: 349 case AMD64_GET_XFPUSTATE: 350 if (a64xfpu.len > cpu_max_ext_state_size - 351 sizeof(struct savefpu)) 352 return (EINVAL); 353 fpugetregs(td); 354 error = copyout((char *)(get_pcb_user_save_td(td) + 1), 355 a64xfpu.addr, a64xfpu.len); 356 break; 357 358 case I386_SET_PKRU: 359 case AMD64_SET_PKRU: 360 /* 361 * Read-lock the map to synchronize with parallel 362 * pmap_vmspace_copy() on fork. 363 */ 364 map = &td->td_proc->p_vmspace->vm_map; 365 vm_map_lock_read(map); 366 error = pmap_pkru_set(PCPU_GET(curpmap), 367 (vm_offset_t)a64pkru.addr, (vm_offset_t)a64pkru.addr + 368 a64pkru.len, a64pkru.keyidx, a64pkru.flags); 369 vm_map_unlock_read(map); 370 break; 371 372 case I386_CLEAR_PKRU: 373 case AMD64_CLEAR_PKRU: 374 if (a64pkru.flags != 0 || a64pkru.keyidx != 0) { 375 error = EINVAL; 376 break; 377 } 378 map = &td->td_proc->p_vmspace->vm_map; 379 vm_map_lock_read(map); 380 error = pmap_pkru_clear(PCPU_GET(curpmap), 381 (vm_offset_t)a64pkru.addr, 382 (vm_offset_t)a64pkru.addr + a64pkru.len); 383 vm_map_unlock_read(map); 384 break; 385 386 default: 387 error = EINVAL; 388 break; 389 } 390 return (error); 391 } 392 393 int 394 amd64_set_ioperm(struct thread *td, struct i386_ioperm_args *uap) 395 { 396 char *iomap; 397 struct amd64tss *tssp; 398 struct system_segment_descriptor *tss_sd; 399 struct pcb *pcb; 400 u_int i; 401 int error; 402 403 if ((error = priv_check(td, PRIV_IO)) != 0) 404 return (error); 405 if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 406 return (error); 407 if (uap->start > uap->start + uap->length || 408 uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY) 409 return (EINVAL); 410 411 /* 412 * XXX 413 * While this is restricted to root, we should probably figure out 414 * whether any other driver is using this i/o address, as so not to 415 * cause confusion. This probably requires a global 'usage registry'. 416 */ 417 pcb = td->td_pcb; 418 if (pcb->pcb_tssp == NULL) { 419 tssp = kmem_malloc(ctob(IOPAGES + 1), M_WAITOK); 420 pmap_pti_add_kva((vm_offset_t)tssp, (vm_offset_t)tssp + 421 ctob(IOPAGES + 1), false); 422 iomap = (char *)&tssp[1]; 423 memset(iomap, 0xff, IOPERM_BITMAP_SIZE); 424 critical_enter(); 425 /* Takes care of tss_rsp0. */ 426 memcpy(tssp, PCPU_PTR(common_tss), sizeof(struct amd64tss)); 427 tssp->tss_iobase = sizeof(*tssp); 428 pcb->pcb_tssp = tssp; 429 tss_sd = PCPU_GET(tss); 430 tss_sd->sd_lobase = (u_long)tssp & 0xffffff; 431 tss_sd->sd_hibase = ((u_long)tssp >> 24) & 0xfffffffffful; 432 tss_sd->sd_type = SDT_SYSTSS; 433 ltr(GSEL(GPROC0_SEL, SEL_KPL)); 434 PCPU_SET(tssp, tssp); 435 critical_exit(); 436 } else 437 iomap = (char *)&pcb->pcb_tssp[1]; 438 for (i = uap->start; i < uap->start + uap->length; i++) { 439 if (uap->enable) 440 iomap[i >> 3] &= ~(1 << (i & 7)); 441 else 442 iomap[i >> 3] |= (1 << (i & 7)); 443 } 444 return (error); 445 } 446 447 int 448 amd64_get_ioperm(struct thread *td, struct i386_ioperm_args *uap) 449 { 450 int i, state; 451 char *iomap; 452 453 if (uap->start >= IOPAGES * PAGE_SIZE * NBBY) 454 return (EINVAL); 455 if (td->td_pcb->pcb_tssp == NULL) { 456 uap->length = 0; 457 goto done; 458 } 459 460 iomap = (char *)&td->td_pcb->pcb_tssp[1]; 461 462 i = uap->start; 463 state = (iomap[i >> 3] >> (i & 7)) & 1; 464 uap->enable = !state; 465 uap->length = 1; 466 467 for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { 468 if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) 469 break; 470 uap->length++; 471 } 472 473 done: 474 return (0); 475 } 476 477 /* 478 * Update the GDT entry pointing to the LDT to point to the LDT of the 479 * current process. 480 */ 481 static void 482 set_user_ldt(struct mdproc *mdp) 483 { 484 485 *PCPU_GET(ldt) = mdp->md_ldt_sd; 486 lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); 487 } 488 489 static void 490 set_user_ldt_rv(void *arg) 491 { 492 struct proc *orig, *target; 493 struct proc_ldt *ldt; 494 495 orig = arg; 496 target = curthread->td_proc; 497 498 ldt = (void *)atomic_load_acq_ptr((uintptr_t *)&orig->p_md.md_ldt); 499 if (target->p_md.md_ldt != ldt) 500 return; 501 502 set_user_ldt(&target->p_md); 503 } 504 505 struct proc_ldt * 506 user_ldt_alloc(struct proc *p, int force) 507 { 508 struct proc_ldt *pldt, *new_ldt; 509 struct mdproc *mdp; 510 struct soft_segment_descriptor sldt; 511 vm_offset_t sva; 512 vm_size_t sz; 513 514 mtx_assert(&dt_lock, MA_OWNED); 515 mdp = &p->p_md; 516 if (!force && mdp->md_ldt != NULL) 517 return (mdp->md_ldt); 518 mtx_unlock(&dt_lock); 519 new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK); 520 sz = max_ldt_segment * sizeof(struct user_segment_descriptor); 521 new_ldt->ldt_base = kmem_malloc(sz, M_WAITOK | M_ZERO); 522 sva = (uintptr_t)new_ldt->ldt_base; 523 pmap_pti_add_kva(sva, sva + sz, false); 524 new_ldt->ldt_refcnt = 1; 525 sldt.ssd_base = sva; 526 sldt.ssd_limit = sz - 1; 527 sldt.ssd_type = SDT_SYSLDT; 528 sldt.ssd_dpl = SEL_KPL; 529 sldt.ssd_p = 1; 530 sldt.ssd_long = 0; 531 sldt.ssd_def32 = 0; 532 sldt.ssd_gran = 0; 533 mtx_lock(&dt_lock); 534 pldt = mdp->md_ldt; 535 if (pldt != NULL && !force) { 536 pmap_pti_remove_kva(sva, sva + sz); 537 kmem_free(new_ldt->ldt_base, sz); 538 free(new_ldt, M_SUBPROC); 539 return (pldt); 540 } 541 542 if (pldt != NULL) { 543 bcopy(pldt->ldt_base, new_ldt->ldt_base, max_ldt_segment * 544 sizeof(struct user_segment_descriptor)); 545 user_ldt_derefl(pldt); 546 } 547 critical_enter(); 548 ssdtosyssd(&sldt, &p->p_md.md_ldt_sd); 549 atomic_thread_fence_rel(); 550 mdp->md_ldt = new_ldt; 551 critical_exit(); 552 smp_rendezvous(NULL, set_user_ldt_rv, NULL, p); 553 554 return (mdp->md_ldt); 555 } 556 557 void 558 user_ldt_free(struct thread *td) 559 { 560 struct proc *p = td->td_proc; 561 struct mdproc *mdp = &p->p_md; 562 struct proc_ldt *pldt; 563 564 mtx_lock(&dt_lock); 565 if ((pldt = mdp->md_ldt) == NULL) { 566 mtx_unlock(&dt_lock); 567 return; 568 } 569 570 critical_enter(); 571 mdp->md_ldt = NULL; 572 atomic_thread_fence_rel(); 573 bzero(&mdp->md_ldt_sd, sizeof(mdp->md_ldt_sd)); 574 if (td == curthread) 575 lldt(GSEL(GNULL_SEL, SEL_KPL)); 576 critical_exit(); 577 user_ldt_deref(pldt); 578 } 579 580 static void 581 user_ldt_derefl(struct proc_ldt *pldt) 582 { 583 vm_offset_t sva; 584 vm_size_t sz; 585 586 if (--pldt->ldt_refcnt == 0) { 587 sva = (vm_offset_t)pldt->ldt_base; 588 sz = max_ldt_segment * sizeof(struct user_segment_descriptor); 589 pmap_pti_remove_kva(sva, sva + sz); 590 kmem_free(pldt->ldt_base, sz); 591 free(pldt, M_SUBPROC); 592 } 593 } 594 595 static void 596 user_ldt_deref(struct proc_ldt *pldt) 597 { 598 599 mtx_assert(&dt_lock, MA_OWNED); 600 user_ldt_derefl(pldt); 601 mtx_unlock(&dt_lock); 602 } 603 604 /* 605 * Note for the authors of compat layers (linux, etc): copyout() in 606 * the function below is not a problem since it presents data in 607 * arch-specific format (i.e. i386-specific in this case), not in 608 * the OS-specific one. 609 */ 610 int 611 amd64_get_ldt(struct thread *td, struct i386_ldt_args *uap) 612 { 613 struct proc_ldt *pldt; 614 struct user_segment_descriptor *lp; 615 uint64_t *data; 616 u_int i, num; 617 int error; 618 619 #ifdef DEBUG 620 printf("amd64_get_ldt: start=%u num=%u descs=%p\n", 621 uap->start, uap->num, (void *)uap->descs); 622 #endif 623 624 pldt = td->td_proc->p_md.md_ldt; 625 if (pldt == NULL || uap->start >= max_ldt_segment || uap->num == 0) { 626 td->td_retval[0] = 0; 627 return (0); 628 } 629 num = min(uap->num, max_ldt_segment - uap->start); 630 lp = &((struct user_segment_descriptor *)(pldt->ldt_base))[uap->start]; 631 data = malloc(num * sizeof(struct user_segment_descriptor), M_TEMP, 632 M_WAITOK); 633 mtx_lock(&dt_lock); 634 for (i = 0; i < num; i++) 635 data[i] = ((volatile uint64_t *)lp)[i]; 636 mtx_unlock(&dt_lock); 637 error = copyout(data, uap->descs, num * 638 sizeof(struct user_segment_descriptor)); 639 free(data, M_TEMP); 640 if (error == 0) 641 td->td_retval[0] = num; 642 return (error); 643 } 644 645 int 646 amd64_set_ldt(struct thread *td, struct i386_ldt_args *uap, 647 struct user_segment_descriptor *descs) 648 { 649 struct mdproc *mdp; 650 struct proc_ldt *pldt; 651 struct user_segment_descriptor *dp; 652 struct proc *p; 653 u_int largest_ld, i; 654 int error; 655 656 #ifdef DEBUG 657 printf("amd64_set_ldt: start=%u num=%u descs=%p\n", 658 uap->start, uap->num, (void *)uap->descs); 659 #endif 660 mdp = &td->td_proc->p_md; 661 error = 0; 662 663 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 664 p = td->td_proc; 665 if (descs == NULL) { 666 /* Free descriptors */ 667 if (uap->start == 0 && uap->num == 0) 668 uap->num = max_ldt_segment; 669 if (uap->num == 0) 670 return (EINVAL); 671 if ((pldt = mdp->md_ldt) == NULL || 672 uap->start >= max_ldt_segment) 673 return (0); 674 largest_ld = uap->start + uap->num; 675 if (largest_ld > max_ldt_segment) 676 largest_ld = max_ldt_segment; 677 if (largest_ld < uap->start) 678 return (EINVAL); 679 mtx_lock(&dt_lock); 680 for (i = uap->start; i < largest_ld; i++) 681 ((volatile uint64_t *)(pldt->ldt_base))[i] = 0; 682 mtx_unlock(&dt_lock); 683 return (0); 684 } 685 686 if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) { 687 /* verify range of descriptors to modify */ 688 largest_ld = uap->start + uap->num; 689 if (uap->start >= max_ldt_segment || 690 largest_ld > max_ldt_segment || 691 largest_ld < uap->start) 692 return (EINVAL); 693 } 694 695 /* Check descriptors for access violations */ 696 for (i = 0; i < uap->num; i++) { 697 dp = &descs[i]; 698 699 switch (dp->sd_type) { 700 case SDT_SYSNULL: /* system null */ 701 dp->sd_p = 0; 702 break; 703 case SDT_SYS286TSS: 704 case SDT_SYSLDT: 705 case SDT_SYS286BSY: 706 case SDT_SYS286CGT: 707 case SDT_SYSTASKGT: 708 case SDT_SYS286IGT: 709 case SDT_SYS286TGT: 710 case SDT_SYSNULL2: 711 case SDT_SYSTSS: 712 case SDT_SYSNULL3: 713 case SDT_SYSBSY: 714 case SDT_SYSCGT: 715 case SDT_SYSNULL4: 716 case SDT_SYSIGT: 717 case SDT_SYSTGT: 718 return (EACCES); 719 720 /* memory segment types */ 721 case SDT_MEMEC: /* memory execute only conforming */ 722 case SDT_MEMEAC: /* memory execute only accessed conforming */ 723 case SDT_MEMERC: /* memory execute read conforming */ 724 case SDT_MEMERAC: /* memory execute read accessed conforming */ 725 /* Must be "present" if executable and conforming. */ 726 if (dp->sd_p == 0) 727 return (EACCES); 728 break; 729 case SDT_MEMRO: /* memory read only */ 730 case SDT_MEMROA: /* memory read only accessed */ 731 case SDT_MEMRW: /* memory read write */ 732 case SDT_MEMRWA: /* memory read write accessed */ 733 case SDT_MEMROD: /* memory read only expand dwn limit */ 734 case SDT_MEMRODA: /* memory read only expand dwn lim accessed */ 735 case SDT_MEMRWD: /* memory read write expand dwn limit */ 736 case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */ 737 case SDT_MEME: /* memory execute only */ 738 case SDT_MEMEA: /* memory execute only accessed */ 739 case SDT_MEMER: /* memory execute read */ 740 case SDT_MEMERA: /* memory execute read accessed */ 741 break; 742 default: 743 return(EINVAL); 744 } 745 746 /* Only user (ring-3) descriptors may be present. */ 747 if ((dp->sd_p != 0) && (dp->sd_dpl != SEL_UPL)) 748 return (EACCES); 749 } 750 751 if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) { 752 /* Allocate a free slot */ 753 mtx_lock(&dt_lock); 754 pldt = user_ldt_alloc(p, 0); 755 if (pldt == NULL) { 756 mtx_unlock(&dt_lock); 757 return (ENOMEM); 758 } 759 760 /* 761 * start scanning a bit up to leave room for NVidia and 762 * Wine, which still user the "Blat" method of allocation. 763 */ 764 i = 16; 765 dp = &((struct user_segment_descriptor *)(pldt->ldt_base))[i]; 766 for (; i < max_ldt_segment; ++i, ++dp) { 767 if (dp->sd_type == SDT_SYSNULL) 768 break; 769 } 770 if (i >= max_ldt_segment) { 771 mtx_unlock(&dt_lock); 772 return (ENOSPC); 773 } 774 uap->start = i; 775 error = amd64_set_ldt_data(td, i, 1, descs); 776 mtx_unlock(&dt_lock); 777 } else { 778 largest_ld = uap->start + uap->num; 779 if (largest_ld > max_ldt_segment) 780 return (EINVAL); 781 mtx_lock(&dt_lock); 782 if (user_ldt_alloc(p, 0) != NULL) { 783 error = amd64_set_ldt_data(td, uap->start, uap->num, 784 descs); 785 } 786 mtx_unlock(&dt_lock); 787 } 788 if (error == 0) 789 td->td_retval[0] = uap->start; 790 return (error); 791 } 792 793 int 794 amd64_set_ldt_data(struct thread *td, int start, int num, 795 struct user_segment_descriptor *descs) 796 { 797 struct mdproc *mdp; 798 struct proc_ldt *pldt; 799 volatile uint64_t *dst, *src; 800 int i; 801 802 mtx_assert(&dt_lock, MA_OWNED); 803 804 mdp = &td->td_proc->p_md; 805 pldt = mdp->md_ldt; 806 dst = (volatile uint64_t *)(pldt->ldt_base); 807 src = (volatile uint64_t *)descs; 808 for (i = 0; i < num; i++) 809 dst[start + i] = src[i]; 810 return (0); 811 } 812