1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2003 Peter Wemm. 5 * Copyright (c) 1990 The Regents of the University of California. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_capsicum.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/capsicum.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/malloc.h> 46 #include <sys/mutex.h> 47 #include <sys/pcpu.h> 48 #include <sys/priv.h> 49 #include <sys/proc.h> 50 #include <sys/smp.h> 51 #include <sys/sysproto.h> 52 #include <sys/uio.h> 53 54 #include <vm/vm.h> 55 #include <vm/pmap.h> 56 #include <vm/vm_kern.h> /* for kernel_map */ 57 #include <vm/vm_map.h> 58 #include <vm/vm_extern.h> 59 60 #include <machine/frame.h> 61 #include <machine/md_var.h> 62 #include <machine/pcb.h> 63 #include <machine/specialreg.h> 64 #include <machine/sysarch.h> 65 #include <machine/tss.h> 66 #include <machine/vmparam.h> 67 68 #include <security/audit/audit.h> 69 70 static void user_ldt_deref(struct proc_ldt *pldt); 71 static void user_ldt_derefl(struct proc_ldt *pldt); 72 73 #define MAX_LD 8192 74 75 int max_ldt_segment = 512; 76 SYSCTL_INT(_machdep, OID_AUTO, max_ldt_segment, CTLFLAG_RDTUN, 77 &max_ldt_segment, 0, 78 "Maximum number of allowed LDT segments in the single address space"); 79 80 static void 81 max_ldt_segment_init(void *arg __unused) 82 { 83 84 if (max_ldt_segment <= 0) 85 max_ldt_segment = 1; 86 if (max_ldt_segment > MAX_LD) 87 max_ldt_segment = MAX_LD; 88 } 89 SYSINIT(maxldt, SI_SUB_VM_CONF, SI_ORDER_ANY, max_ldt_segment_init, NULL); 90 91 #ifndef _SYS_SYSPROTO_H_ 92 struct sysarch_args { 93 int op; 94 char *parms; 95 }; 96 #endif 97 98 int 99 sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space) 100 { 101 struct i386_ldt_args *largs, la; 102 struct user_segment_descriptor *lp; 103 int error = 0; 104 105 /* 106 * XXXKIB check that the BSM generation code knows to encode 107 * the op argument. 108 */ 109 AUDIT_ARG_CMD(uap->op); 110 if (uap_space == UIO_USERSPACE) { 111 error = copyin(uap->parms, &la, sizeof(struct i386_ldt_args)); 112 if (error != 0) 113 return (error); 114 largs = &la; 115 } else 116 largs = (struct i386_ldt_args *)uap->parms; 117 118 switch (uap->op) { 119 case I386_GET_LDT: 120 error = amd64_get_ldt(td, largs); 121 break; 122 case I386_SET_LDT: 123 if (largs->descs != NULL && largs->num > max_ldt_segment) 124 return (EINVAL); 125 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 126 if (largs->descs != NULL) { 127 lp = malloc(largs->num * sizeof(struct 128 user_segment_descriptor), M_TEMP, M_WAITOK); 129 error = copyin(largs->descs, lp, largs->num * 130 sizeof(struct user_segment_descriptor)); 131 if (error == 0) 132 error = amd64_set_ldt(td, largs, lp); 133 free(lp, M_TEMP); 134 } else { 135 error = amd64_set_ldt(td, largs, NULL); 136 } 137 break; 138 } 139 return (error); 140 } 141 142 void 143 update_gdt_gsbase(struct thread *td, uint32_t base) 144 { 145 struct user_segment_descriptor *sd; 146 147 if (td != curthread) 148 return; 149 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 150 critical_enter(); 151 sd = PCPU_GET(gs32p); 152 sd->sd_lobase = base & 0xffffff; 153 sd->sd_hibase = (base >> 24) & 0xff; 154 critical_exit(); 155 } 156 157 void 158 update_gdt_fsbase(struct thread *td, uint32_t base) 159 { 160 struct user_segment_descriptor *sd; 161 162 if (td != curthread) 163 return; 164 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 165 critical_enter(); 166 sd = PCPU_GET(fs32p); 167 sd->sd_lobase = base & 0xffffff; 168 sd->sd_hibase = (base >> 24) & 0xff; 169 critical_exit(); 170 } 171 172 int 173 sysarch(struct thread *td, struct sysarch_args *uap) 174 { 175 struct pcb *pcb; 176 struct vm_map *map; 177 uint32_t i386base; 178 uint64_t a64base; 179 struct i386_ioperm_args iargs; 180 struct i386_get_xfpustate i386xfpu; 181 struct i386_set_pkru i386pkru; 182 struct amd64_get_xfpustate a64xfpu; 183 struct amd64_set_pkru a64pkru; 184 int error; 185 186 #ifdef CAPABILITY_MODE 187 /* 188 * When adding new operations, add a new case statement here to 189 * explicitly indicate whether or not the operation is safe to 190 * perform in capability mode. 191 */ 192 if (IN_CAPABILITY_MODE(td)) { 193 switch (uap->op) { 194 case I386_GET_LDT: 195 case I386_SET_LDT: 196 case I386_GET_IOPERM: 197 case I386_GET_FSBASE: 198 case I386_SET_FSBASE: 199 case I386_GET_GSBASE: 200 case I386_SET_GSBASE: 201 case I386_GET_XFPUSTATE: 202 case I386_SET_PKRU: 203 case I386_CLEAR_PKRU: 204 case AMD64_GET_FSBASE: 205 case AMD64_SET_FSBASE: 206 case AMD64_GET_GSBASE: 207 case AMD64_SET_GSBASE: 208 case AMD64_GET_XFPUSTATE: 209 case AMD64_SET_PKRU: 210 case AMD64_CLEAR_PKRU: 211 break; 212 213 case I386_SET_IOPERM: 214 default: 215 #ifdef KTRACE 216 if (KTRPOINT(td, KTR_CAPFAIL)) 217 ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL); 218 #endif 219 return (ECAPMODE); 220 } 221 } 222 #endif 223 224 if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT) 225 return (sysarch_ldt(td, uap, UIO_USERSPACE)); 226 227 error = 0; 228 pcb = td->td_pcb; 229 230 /* 231 * XXXKIB check that the BSM generation code knows to encode 232 * the op argument. 233 */ 234 AUDIT_ARG_CMD(uap->op); 235 switch (uap->op) { 236 case I386_GET_IOPERM: 237 case I386_SET_IOPERM: 238 if ((error = copyin(uap->parms, &iargs, 239 sizeof(struct i386_ioperm_args))) != 0) 240 return (error); 241 break; 242 case I386_GET_XFPUSTATE: 243 if ((error = copyin(uap->parms, &i386xfpu, 244 sizeof(struct i386_get_xfpustate))) != 0) 245 return (error); 246 a64xfpu.addr = (void *)(uintptr_t)i386xfpu.addr; 247 a64xfpu.len = i386xfpu.len; 248 break; 249 case I386_SET_PKRU: 250 case I386_CLEAR_PKRU: 251 if ((error = copyin(uap->parms, &i386pkru, 252 sizeof(struct i386_set_pkru))) != 0) 253 return (error); 254 a64pkru.addr = (void *)(uintptr_t)i386pkru.addr; 255 a64pkru.len = i386pkru.len; 256 a64pkru.keyidx = i386pkru.keyidx; 257 a64pkru.flags = i386pkru.flags; 258 break; 259 case AMD64_GET_XFPUSTATE: 260 if ((error = copyin(uap->parms, &a64xfpu, 261 sizeof(struct amd64_get_xfpustate))) != 0) 262 return (error); 263 break; 264 case AMD64_SET_PKRU: 265 case AMD64_CLEAR_PKRU: 266 if ((error = copyin(uap->parms, &a64pkru, 267 sizeof(struct amd64_set_pkru))) != 0) 268 return (error); 269 break; 270 default: 271 break; 272 } 273 274 switch (uap->op) { 275 case I386_GET_IOPERM: 276 error = amd64_get_ioperm(td, &iargs); 277 if (error == 0) 278 error = copyout(&iargs, uap->parms, 279 sizeof(struct i386_ioperm_args)); 280 break; 281 case I386_SET_IOPERM: 282 error = amd64_set_ioperm(td, &iargs); 283 break; 284 case I386_GET_FSBASE: 285 update_pcb_bases(pcb); 286 i386base = pcb->pcb_fsbase; 287 error = copyout(&i386base, uap->parms, sizeof(i386base)); 288 break; 289 case I386_SET_FSBASE: 290 error = copyin(uap->parms, &i386base, sizeof(i386base)); 291 if (!error) { 292 set_pcb_flags(pcb, PCB_FULL_IRET); 293 pcb->pcb_fsbase = i386base; 294 td->td_frame->tf_fs = _ufssel; 295 update_gdt_fsbase(td, i386base); 296 } 297 break; 298 case I386_GET_GSBASE: 299 update_pcb_bases(pcb); 300 i386base = pcb->pcb_gsbase; 301 error = copyout(&i386base, uap->parms, sizeof(i386base)); 302 break; 303 case I386_SET_GSBASE: 304 error = copyin(uap->parms, &i386base, sizeof(i386base)); 305 if (!error) { 306 set_pcb_flags(pcb, PCB_FULL_IRET); 307 pcb->pcb_gsbase = i386base; 308 td->td_frame->tf_gs = _ugssel; 309 update_gdt_gsbase(td, i386base); 310 } 311 break; 312 case AMD64_GET_FSBASE: 313 update_pcb_bases(pcb); 314 error = copyout(&pcb->pcb_fsbase, uap->parms, 315 sizeof(pcb->pcb_fsbase)); 316 break; 317 318 case AMD64_SET_FSBASE: 319 error = copyin(uap->parms, &a64base, sizeof(a64base)); 320 if (!error) { 321 if (a64base < VM_MAXUSER_ADDRESS) { 322 set_pcb_flags(pcb, PCB_FULL_IRET); 323 pcb->pcb_fsbase = a64base; 324 td->td_frame->tf_fs = _ufssel; 325 } else 326 error = EINVAL; 327 } 328 break; 329 330 case AMD64_GET_GSBASE: 331 update_pcb_bases(pcb); 332 error = copyout(&pcb->pcb_gsbase, uap->parms, 333 sizeof(pcb->pcb_gsbase)); 334 break; 335 336 case AMD64_SET_GSBASE: 337 error = copyin(uap->parms, &a64base, sizeof(a64base)); 338 if (!error) { 339 if (a64base < VM_MAXUSER_ADDRESS) { 340 set_pcb_flags(pcb, PCB_FULL_IRET); 341 pcb->pcb_gsbase = a64base; 342 td->td_frame->tf_gs = _ugssel; 343 } else 344 error = EINVAL; 345 } 346 break; 347 348 case I386_GET_XFPUSTATE: 349 case AMD64_GET_XFPUSTATE: 350 if (a64xfpu.len > cpu_max_ext_state_size - 351 sizeof(struct savefpu)) 352 return (EINVAL); 353 fpugetregs(td); 354 error = copyout((char *)(get_pcb_user_save_td(td) + 1), 355 a64xfpu.addr, a64xfpu.len); 356 break; 357 358 case I386_SET_PKRU: 359 case AMD64_SET_PKRU: 360 /* 361 * Read-lock the map to synchronize with parallel 362 * pmap_vmspace_copy() on fork. 363 */ 364 map = &td->td_proc->p_vmspace->vm_map; 365 vm_map_lock_read(map); 366 error = pmap_pkru_set(PCPU_GET(curpmap), 367 (vm_offset_t)a64pkru.addr, (vm_offset_t)a64pkru.addr + 368 a64pkru.len, a64pkru.keyidx, a64pkru.flags); 369 vm_map_unlock_read(map); 370 break; 371 372 case I386_CLEAR_PKRU: 373 case AMD64_CLEAR_PKRU: 374 if (a64pkru.flags != 0 || a64pkru.keyidx != 0) { 375 error = EINVAL; 376 break; 377 } 378 map = &td->td_proc->p_vmspace->vm_map; 379 vm_map_lock_read(map); 380 error = pmap_pkru_clear(PCPU_GET(curpmap), 381 (vm_offset_t)a64pkru.addr, 382 (vm_offset_t)a64pkru.addr + a64pkru.len); 383 vm_map_unlock(map); 384 break; 385 386 default: 387 error = EINVAL; 388 break; 389 } 390 return (error); 391 } 392 393 int 394 amd64_set_ioperm(td, uap) 395 struct thread *td; 396 struct i386_ioperm_args *uap; 397 { 398 char *iomap; 399 struct amd64tss *tssp; 400 struct system_segment_descriptor *tss_sd; 401 struct pcb *pcb; 402 u_int i; 403 int error; 404 405 if ((error = priv_check(td, PRIV_IO)) != 0) 406 return (error); 407 if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 408 return (error); 409 if (uap->start > uap->start + uap->length || 410 uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY) 411 return (EINVAL); 412 413 /* 414 * XXX 415 * While this is restricted to root, we should probably figure out 416 * whether any other driver is using this i/o address, as so not to 417 * cause confusion. This probably requires a global 'usage registry'. 418 */ 419 pcb = td->td_pcb; 420 if (pcb->pcb_tssp == NULL) { 421 tssp = (struct amd64tss *)kmem_malloc(ctob(IOPAGES + 1), 422 M_WAITOK); 423 pmap_pti_add_kva((vm_offset_t)tssp, (vm_offset_t)tssp + 424 ctob(IOPAGES + 1), false); 425 iomap = (char *)&tssp[1]; 426 memset(iomap, 0xff, IOPERM_BITMAP_SIZE); 427 critical_enter(); 428 /* Takes care of tss_rsp0. */ 429 memcpy(tssp, &common_tss[PCPU_GET(cpuid)], 430 sizeof(struct amd64tss)); 431 tssp->tss_iobase = sizeof(*tssp); 432 pcb->pcb_tssp = tssp; 433 tss_sd = PCPU_GET(tss); 434 tss_sd->sd_lobase = (u_long)tssp & 0xffffff; 435 tss_sd->sd_hibase = ((u_long)tssp >> 24) & 0xfffffffffful; 436 tss_sd->sd_type = SDT_SYSTSS; 437 ltr(GSEL(GPROC0_SEL, SEL_KPL)); 438 PCPU_SET(tssp, tssp); 439 critical_exit(); 440 } else 441 iomap = (char *)&pcb->pcb_tssp[1]; 442 for (i = uap->start; i < uap->start + uap->length; i++) { 443 if (uap->enable) 444 iomap[i >> 3] &= ~(1 << (i & 7)); 445 else 446 iomap[i >> 3] |= (1 << (i & 7)); 447 } 448 return (error); 449 } 450 451 int 452 amd64_get_ioperm(td, uap) 453 struct thread *td; 454 struct i386_ioperm_args *uap; 455 { 456 int i, state; 457 char *iomap; 458 459 if (uap->start >= IOPAGES * PAGE_SIZE * NBBY) 460 return (EINVAL); 461 if (td->td_pcb->pcb_tssp == NULL) { 462 uap->length = 0; 463 goto done; 464 } 465 466 iomap = (char *)&td->td_pcb->pcb_tssp[1]; 467 468 i = uap->start; 469 state = (iomap[i >> 3] >> (i & 7)) & 1; 470 uap->enable = !state; 471 uap->length = 1; 472 473 for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { 474 if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) 475 break; 476 uap->length++; 477 } 478 479 done: 480 return (0); 481 } 482 483 /* 484 * Update the GDT entry pointing to the LDT to point to the LDT of the 485 * current process. 486 */ 487 static void 488 set_user_ldt(struct mdproc *mdp) 489 { 490 491 *PCPU_GET(ldt) = mdp->md_ldt_sd; 492 lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); 493 } 494 495 static void 496 set_user_ldt_rv(struct vmspace *vmsp) 497 { 498 struct thread *td; 499 500 td = curthread; 501 if (vmsp != td->td_proc->p_vmspace) 502 return; 503 504 set_user_ldt(&td->td_proc->p_md); 505 } 506 507 struct proc_ldt * 508 user_ldt_alloc(struct proc *p, int force) 509 { 510 struct proc_ldt *pldt, *new_ldt; 511 struct mdproc *mdp; 512 struct soft_segment_descriptor sldt; 513 vm_offset_t sva; 514 vm_size_t sz; 515 516 mtx_assert(&dt_lock, MA_OWNED); 517 mdp = &p->p_md; 518 if (!force && mdp->md_ldt != NULL) 519 return (mdp->md_ldt); 520 mtx_unlock(&dt_lock); 521 new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK); 522 sz = max_ldt_segment * sizeof(struct user_segment_descriptor); 523 sva = kmem_malloc(sz, M_WAITOK | M_ZERO); 524 new_ldt->ldt_base = (caddr_t)sva; 525 pmap_pti_add_kva(sva, sva + sz, false); 526 new_ldt->ldt_refcnt = 1; 527 sldt.ssd_base = sva; 528 sldt.ssd_limit = sz - 1; 529 sldt.ssd_type = SDT_SYSLDT; 530 sldt.ssd_dpl = SEL_KPL; 531 sldt.ssd_p = 1; 532 sldt.ssd_long = 0; 533 sldt.ssd_def32 = 0; 534 sldt.ssd_gran = 0; 535 mtx_lock(&dt_lock); 536 pldt = mdp->md_ldt; 537 if (pldt != NULL && !force) { 538 pmap_pti_remove_kva(sva, sva + sz); 539 kmem_free(sva, sz); 540 free(new_ldt, M_SUBPROC); 541 return (pldt); 542 } 543 544 if (pldt != NULL) { 545 bcopy(pldt->ldt_base, new_ldt->ldt_base, max_ldt_segment * 546 sizeof(struct user_segment_descriptor)); 547 user_ldt_derefl(pldt); 548 } 549 critical_enter(); 550 ssdtosyssd(&sldt, &p->p_md.md_ldt_sd); 551 atomic_thread_fence_rel(); 552 mdp->md_ldt = new_ldt; 553 critical_exit(); 554 smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, NULL, 555 p->p_vmspace); 556 557 return (mdp->md_ldt); 558 } 559 560 void 561 user_ldt_free(struct thread *td) 562 { 563 struct proc *p = td->td_proc; 564 struct mdproc *mdp = &p->p_md; 565 struct proc_ldt *pldt; 566 567 mtx_lock(&dt_lock); 568 if ((pldt = mdp->md_ldt) == NULL) { 569 mtx_unlock(&dt_lock); 570 return; 571 } 572 573 critical_enter(); 574 mdp->md_ldt = NULL; 575 atomic_thread_fence_rel(); 576 bzero(&mdp->md_ldt_sd, sizeof(mdp->md_ldt_sd)); 577 if (td == curthread) 578 lldt(GSEL(GNULL_SEL, SEL_KPL)); 579 critical_exit(); 580 user_ldt_deref(pldt); 581 } 582 583 static void 584 user_ldt_derefl(struct proc_ldt *pldt) 585 { 586 vm_offset_t sva; 587 vm_size_t sz; 588 589 if (--pldt->ldt_refcnt == 0) { 590 sva = (vm_offset_t)pldt->ldt_base; 591 sz = max_ldt_segment * sizeof(struct user_segment_descriptor); 592 pmap_pti_remove_kva(sva, sva + sz); 593 kmem_free(sva, sz); 594 free(pldt, M_SUBPROC); 595 } 596 } 597 598 static void 599 user_ldt_deref(struct proc_ldt *pldt) 600 { 601 602 mtx_assert(&dt_lock, MA_OWNED); 603 user_ldt_derefl(pldt); 604 mtx_unlock(&dt_lock); 605 } 606 607 /* 608 * Note for the authors of compat layers (linux, etc): copyout() in 609 * the function below is not a problem since it presents data in 610 * arch-specific format (i.e. i386-specific in this case), not in 611 * the OS-specific one. 612 */ 613 int 614 amd64_get_ldt(struct thread *td, struct i386_ldt_args *uap) 615 { 616 struct proc_ldt *pldt; 617 struct user_segment_descriptor *lp; 618 uint64_t *data; 619 u_int i, num; 620 int error; 621 622 #ifdef DEBUG 623 printf("amd64_get_ldt: start=%u num=%u descs=%p\n", 624 uap->start, uap->num, (void *)uap->descs); 625 #endif 626 627 pldt = td->td_proc->p_md.md_ldt; 628 if (pldt == NULL || uap->start >= max_ldt_segment || uap->num == 0) { 629 td->td_retval[0] = 0; 630 return (0); 631 } 632 num = min(uap->num, max_ldt_segment - uap->start); 633 lp = &((struct user_segment_descriptor *)(pldt->ldt_base))[uap->start]; 634 data = malloc(num * sizeof(struct user_segment_descriptor), M_TEMP, 635 M_WAITOK); 636 mtx_lock(&dt_lock); 637 for (i = 0; i < num; i++) 638 data[i] = ((volatile uint64_t *)lp)[i]; 639 mtx_unlock(&dt_lock); 640 error = copyout(data, uap->descs, num * 641 sizeof(struct user_segment_descriptor)); 642 free(data, M_TEMP); 643 if (error == 0) 644 td->td_retval[0] = num; 645 return (error); 646 } 647 648 int 649 amd64_set_ldt(struct thread *td, struct i386_ldt_args *uap, 650 struct user_segment_descriptor *descs) 651 { 652 struct mdproc *mdp; 653 struct proc_ldt *pldt; 654 struct user_segment_descriptor *dp; 655 struct proc *p; 656 u_int largest_ld, i; 657 int error; 658 659 #ifdef DEBUG 660 printf("amd64_set_ldt: start=%u num=%u descs=%p\n", 661 uap->start, uap->num, (void *)uap->descs); 662 #endif 663 mdp = &td->td_proc->p_md; 664 error = 0; 665 666 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 667 p = td->td_proc; 668 if (descs == NULL) { 669 /* Free descriptors */ 670 if (uap->start == 0 && uap->num == 0) 671 uap->num = max_ldt_segment; 672 if (uap->num == 0) 673 return (EINVAL); 674 if ((pldt = mdp->md_ldt) == NULL || 675 uap->start >= max_ldt_segment) 676 return (0); 677 largest_ld = uap->start + uap->num; 678 if (largest_ld > max_ldt_segment) 679 largest_ld = max_ldt_segment; 680 if (largest_ld < uap->start) 681 return (EINVAL); 682 mtx_lock(&dt_lock); 683 for (i = uap->start; i < largest_ld; i++) 684 ((volatile uint64_t *)(pldt->ldt_base))[i] = 0; 685 mtx_unlock(&dt_lock); 686 return (0); 687 } 688 689 if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) { 690 /* verify range of descriptors to modify */ 691 largest_ld = uap->start + uap->num; 692 if (uap->start >= max_ldt_segment || 693 largest_ld > max_ldt_segment || 694 largest_ld < uap->start) 695 return (EINVAL); 696 } 697 698 /* Check descriptors for access violations */ 699 for (i = 0; i < uap->num; i++) { 700 dp = &descs[i]; 701 702 switch (dp->sd_type) { 703 case SDT_SYSNULL: /* system null */ 704 dp->sd_p = 0; 705 break; 706 case SDT_SYS286TSS: 707 case SDT_SYSLDT: 708 case SDT_SYS286BSY: 709 case SDT_SYS286CGT: 710 case SDT_SYSTASKGT: 711 case SDT_SYS286IGT: 712 case SDT_SYS286TGT: 713 case SDT_SYSNULL2: 714 case SDT_SYSTSS: 715 case SDT_SYSNULL3: 716 case SDT_SYSBSY: 717 case SDT_SYSCGT: 718 case SDT_SYSNULL4: 719 case SDT_SYSIGT: 720 case SDT_SYSTGT: 721 return (EACCES); 722 723 /* memory segment types */ 724 case SDT_MEMEC: /* memory execute only conforming */ 725 case SDT_MEMEAC: /* memory execute only accessed conforming */ 726 case SDT_MEMERC: /* memory execute read conforming */ 727 case SDT_MEMERAC: /* memory execute read accessed conforming */ 728 /* Must be "present" if executable and conforming. */ 729 if (dp->sd_p == 0) 730 return (EACCES); 731 break; 732 case SDT_MEMRO: /* memory read only */ 733 case SDT_MEMROA: /* memory read only accessed */ 734 case SDT_MEMRW: /* memory read write */ 735 case SDT_MEMRWA: /* memory read write accessed */ 736 case SDT_MEMROD: /* memory read only expand dwn limit */ 737 case SDT_MEMRODA: /* memory read only expand dwn lim accessed */ 738 case SDT_MEMRWD: /* memory read write expand dwn limit */ 739 case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */ 740 case SDT_MEME: /* memory execute only */ 741 case SDT_MEMEA: /* memory execute only accessed */ 742 case SDT_MEMER: /* memory execute read */ 743 case SDT_MEMERA: /* memory execute read accessed */ 744 break; 745 default: 746 return(EINVAL); 747 } 748 749 /* Only user (ring-3) descriptors may be present. */ 750 if ((dp->sd_p != 0) && (dp->sd_dpl != SEL_UPL)) 751 return (EACCES); 752 } 753 754 if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) { 755 /* Allocate a free slot */ 756 mtx_lock(&dt_lock); 757 pldt = user_ldt_alloc(p, 0); 758 if (pldt == NULL) { 759 mtx_unlock(&dt_lock); 760 return (ENOMEM); 761 } 762 763 /* 764 * start scanning a bit up to leave room for NVidia and 765 * Wine, which still user the "Blat" method of allocation. 766 */ 767 i = 16; 768 dp = &((struct user_segment_descriptor *)(pldt->ldt_base))[i]; 769 for (; i < max_ldt_segment; ++i, ++dp) { 770 if (dp->sd_type == SDT_SYSNULL) 771 break; 772 } 773 if (i >= max_ldt_segment) { 774 mtx_unlock(&dt_lock); 775 return (ENOSPC); 776 } 777 uap->start = i; 778 error = amd64_set_ldt_data(td, i, 1, descs); 779 mtx_unlock(&dt_lock); 780 } else { 781 largest_ld = uap->start + uap->num; 782 if (largest_ld > max_ldt_segment) 783 return (EINVAL); 784 mtx_lock(&dt_lock); 785 if (user_ldt_alloc(p, 0) != NULL) { 786 error = amd64_set_ldt_data(td, uap->start, uap->num, 787 descs); 788 } 789 mtx_unlock(&dt_lock); 790 } 791 if (error == 0) 792 td->td_retval[0] = uap->start; 793 return (error); 794 } 795 796 int 797 amd64_set_ldt_data(struct thread *td, int start, int num, 798 struct user_segment_descriptor *descs) 799 { 800 struct mdproc *mdp; 801 struct proc_ldt *pldt; 802 volatile uint64_t *dst, *src; 803 int i; 804 805 mtx_assert(&dt_lock, MA_OWNED); 806 807 mdp = &td->td_proc->p_md; 808 pldt = mdp->md_ldt; 809 dst = (volatile uint64_t *)(pldt->ldt_base); 810 src = (volatile uint64_t *)descs; 811 for (i = 0; i < num; i++) 812 dst[start + i] = src[i]; 813 return (0); 814 } 815