xref: /freebsd/sys/amd64/sgx/sgx.c (revision 9768746b)
1 /*-
2  * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
3  * All rights reserved.
4  *
5  * This software was developed by BAE Systems, the University of Cambridge
6  * Computer Laboratory, and Memorial University under DARPA/AFRL contract
7  * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
8  * (TC) research program.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Design overview.
34  *
35  * The driver provides character device for mmap(2) and ioctl(2) system calls
36  * allowing user to manage isolated compartments ("enclaves") in user VA space.
37  *
38  * The driver duties is EPC pages management, enclave management, user data
39  * validation.
40  *
41  * This driver requires Intel SGX support from hardware.
42  *
43  * /dev/sgx:
44  *    .mmap:
45  *        sgx_mmap_single() allocates VM object with following pager
46  *        operations:
47  *              a) sgx_pg_ctor():
48  *                  VM object constructor does nothing
49  *              b) sgx_pg_dtor():
50  *                  VM object destructor destroys the SGX enclave associated
51  *                  with the object: it frees all the EPC pages allocated for
52  *                  enclave and removes the enclave.
53  *              c) sgx_pg_fault():
54  *                  VM object fault handler does nothing
55  *
56  *    .ioctl:
57  *        sgx_ioctl():
58  *               a) SGX_IOC_ENCLAVE_CREATE
59  *                   Adds Enclave SECS page: initial step of enclave creation.
60  *               b) SGX_IOC_ENCLAVE_ADD_PAGE
61  *                   Adds TCS, REG pages to the enclave.
62  *               c) SGX_IOC_ENCLAVE_INIT
63  *                   Finalizes enclave creation.
64  *
65  * Enclave lifecycle:
66  *          .-- ECREATE  -- Add SECS page
67  *   Kernel |   EADD     -- Add TCS, REG pages
68  *    space |   EEXTEND  -- Measure the page (take unique hash)
69  *    ENCLS |   EPA      -- Allocate version array page
70  *          '-- EINIT    -- Finalize enclave creation
71  *   User   .-- EENTER   -- Go to entry point of enclave
72  *    space |   EEXIT    -- Exit back to main application
73  *    ENCLU '-- ERESUME  -- Resume enclave execution (e.g. after exception)
74  *
75  * Enclave lifecycle from driver point of view:
76  *  1) User calls mmap() on /dev/sgx: we allocate a VM object
77  *  2) User calls ioctl SGX_IOC_ENCLAVE_CREATE: we look for the VM object
78  *     associated with user process created on step 1, create SECS physical
79  *     page and store it in enclave's VM object queue by special index
80  *     SGX_SECS_VM_OBJECT_INDEX.
81  *  3) User calls ioctl SGX_IOC_ENCLAVE_ADD_PAGE: we look for enclave created
82  *     on step 2, create TCS or REG physical page and map it to specified by
83  *     user address of enclave VM object.
84  *  4) User finalizes enclave creation with ioctl SGX_IOC_ENCLAVE_INIT call.
85  *  5) User can freely enter to and exit from enclave using ENCLU instructions
86  *     from userspace: the driver does nothing here.
87  *  6) User proceed munmap(2) system call (or the process with enclave dies):
88  *     we destroy the enclave associated with the object.
89  *
90  * EPC page types and their indexes in VM object queue:
91  *   - PT_SECS index is special and equals SGX_SECS_VM_OBJECT_INDEX (-1);
92  *   - PT_TCS and PT_REG indexes are specified by user in addr field of ioctl
93  *     request data and determined as follows:
94  *       pidx = OFF_TO_IDX(addp->addr - vmh->base);
95  *   - PT_VA index is special, created for PT_REG, PT_TCS and PT_SECS pages
96  *     and determined by formula:
97  *       va_page_idx = - SGX_VA_PAGES_OFFS - (page_idx / SGX_VA_PAGE_SLOTS);
98  *     PT_VA page can hold versions of up to 512 pages, and slot for each
99  *     page in PT_VA page is determined as follows:
100  *       va_slot_idx = page_idx % SGX_VA_PAGE_SLOTS;
101  *   - PT_TRIM is unused.
102  *
103  * Locking:
104  *    SGX ENCLS set of instructions have limitations on concurrency:
105  *    some instructions can't be executed same time on different CPUs.
106  *    We use sc->mtx_encls lock around them to prevent concurrent execution.
107  *    sc->mtx lock is used to manage list of created enclaves and the state of
108  *    SGX driver.
109  *
110  * Eviction of EPC pages:
111  *    Eviction support is not implemented in this driver, however the driver
112  *    manages VA (version array) pages: it allocates a VA slot for each EPC
113  *    page. This will be required for eviction support in future.
114  *    VA pages and slots are currently unused.
115  *
116  * Intel® 64 and IA-32 Architectures Software Developer's Manual
117  * https://software.intel.com/en-us/articles/intel-sdm
118  */
119 
120 #include <sys/cdefs.h>
121 __FBSDID("$FreeBSD$");
122 
123 #include <sys/param.h>
124 #include <sys/systm.h>
125 #include <sys/ioccom.h>
126 #include <sys/malloc.h>
127 #include <sys/kernel.h>
128 #include <sys/lock.h>
129 #include <sys/mutex.h>
130 #include <sys/rwlock.h>
131 #include <sys/conf.h>
132 #include <sys/module.h>
133 #include <sys/proc.h>
134 #include <sys/vmem.h>
135 #include <sys/vmmeter.h>
136 
137 #include <vm/vm.h>
138 #include <vm/vm_param.h>
139 #include <vm/vm_extern.h>
140 #include <vm/vm_kern.h>
141 #include <vm/vm_page.h>
142 #include <vm/vm_map.h>
143 #include <vm/vm_object.h>
144 #include <vm/vm_pager.h>
145 #include <vm/vm_phys.h>
146 #include <vm/vm_radix.h>
147 #include <vm/pmap.h>
148 
149 #include <machine/md_var.h>
150 #include <machine/specialreg.h>
151 #include <machine/cpufunc.h>
152 #include <machine/sgx.h>
153 #include <machine/sgxreg.h>
154 
155 #include <amd64/sgx/sgxvar.h>
156 
157 #define	SGX_DEBUG
158 #undef	SGX_DEBUG
159 
160 #ifdef	SGX_DEBUG
161 #define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
162 #else
163 #define	dprintf(fmt, ...)
164 #endif
165 
166 static struct cdev_pager_ops sgx_pg_ops;
167 struct sgx_softc sgx_sc;
168 
169 static int
170 sgx_get_epc_page(struct sgx_softc *sc, struct epc_page **epc)
171 {
172 	vmem_addr_t addr;
173 	int i;
174 
175 	if (vmem_alloc(sc->vmem_epc, PAGE_SIZE, M_FIRSTFIT | M_NOWAIT,
176 	    &addr) == 0) {
177 		i = (addr - sc->epc_base) / PAGE_SIZE;
178 		*epc = &sc->epc_pages[i];
179 		return (0);
180 	}
181 
182 	return (ENOMEM);
183 }
184 
185 static void
186 sgx_put_epc_page(struct sgx_softc *sc, struct epc_page *epc)
187 {
188 	vmem_addr_t addr;
189 
190 	if (epc == NULL)
191 		return;
192 
193 	addr = (epc->index * PAGE_SIZE) + sc->epc_base;
194 	vmem_free(sc->vmem_epc, addr, PAGE_SIZE);
195 }
196 
197 static int
198 sgx_va_slot_init_by_index(struct sgx_softc *sc, vm_object_t object,
199     uint64_t idx)
200 {
201 	struct epc_page *epc;
202 	vm_page_t page;
203 	vm_page_t p;
204 	int ret;
205 
206 	VM_OBJECT_ASSERT_WLOCKED(object);
207 
208 	p = vm_page_lookup(object, idx);
209 	if (p == NULL) {
210 		ret = sgx_get_epc_page(sc, &epc);
211 		if (ret) {
212 			dprintf("%s: No free EPC pages available.\n",
213 			    __func__);
214 			return (ret);
215 		}
216 
217 		mtx_lock(&sc->mtx_encls);
218 		sgx_epa((void *)epc->base);
219 		mtx_unlock(&sc->mtx_encls);
220 
221 		page = PHYS_TO_VM_PAGE(epc->phys);
222 
223 		page->valid = VM_PAGE_BITS_ALL;
224 		vm_page_insert(page, object, idx);
225 	}
226 
227 	return (0);
228 }
229 
230 static int
231 sgx_va_slot_init(struct sgx_softc *sc,
232     struct sgx_enclave *enclave,
233     uint64_t addr)
234 {
235 	vm_pindex_t pidx;
236 	uint64_t va_page_idx;
237 	uint64_t idx;
238 	vm_object_t object;
239 	int ret;
240 
241 	object = enclave->object;
242 
243 	VM_OBJECT_ASSERT_WLOCKED(object);
244 
245 	pidx = OFF_TO_IDX(addr);
246 
247 	va_page_idx = pidx / SGX_VA_PAGE_SLOTS;
248 	idx = - SGX_VA_PAGES_OFFS - va_page_idx;
249 
250 	ret = sgx_va_slot_init_by_index(sc, object, idx);
251 
252 	return (ret);
253 }
254 
255 static int
256 sgx_mem_find(struct sgx_softc *sc, uint64_t addr,
257     vm_map_entry_t *entry0, vm_object_t *object0)
258 {
259 	vm_map_t map;
260 	vm_map_entry_t entry;
261 	vm_object_t object;
262 
263 	map = &curproc->p_vmspace->vm_map;
264 
265 	vm_map_lock_read(map);
266 	if (!vm_map_lookup_entry(map, addr, &entry)) {
267 		vm_map_unlock_read(map);
268 		dprintf("%s: Can't find enclave.\n", __func__);
269 		return (EINVAL);
270 	}
271 
272 	object = entry->object.vm_object;
273 	if (object == NULL || object->handle == NULL) {
274 		vm_map_unlock_read(map);
275 		return (EINVAL);
276 	}
277 
278 	if (object->type != OBJT_MGTDEVICE ||
279 	    object->un_pager.devp.ops != &sgx_pg_ops) {
280 		vm_map_unlock_read(map);
281 		return (EINVAL);
282 	}
283 
284 	vm_object_reference(object);
285 
286 	*object0 = object;
287 	*entry0 = entry;
288 	vm_map_unlock_read(map);
289 
290 	return (0);
291 }
292 
293 static int
294 sgx_enclave_find(struct sgx_softc *sc, uint64_t addr,
295     struct sgx_enclave **encl)
296 {
297 	struct sgx_vm_handle *vmh;
298 	struct sgx_enclave *enclave;
299 	vm_map_entry_t entry;
300 	vm_object_t object;
301 	int ret;
302 
303 	ret = sgx_mem_find(sc, addr, &entry, &object);
304 	if (ret)
305 		return (ret);
306 
307 	vmh = object->handle;
308 	if (vmh == NULL) {
309 		vm_object_deallocate(object);
310 		return (EINVAL);
311 	}
312 
313 	enclave = vmh->enclave;
314 	if (enclave == NULL || enclave->object == NULL) {
315 		vm_object_deallocate(object);
316 		return (EINVAL);
317 	}
318 
319 	*encl = enclave;
320 
321 	return (0);
322 }
323 
324 static int
325 sgx_enclave_alloc(struct sgx_softc *sc, struct secs *secs,
326     struct sgx_enclave **enclave0)
327 {
328 	struct sgx_enclave *enclave;
329 
330 	enclave = malloc(sizeof(struct sgx_enclave),
331 	    M_SGX, M_WAITOK | M_ZERO);
332 
333 	enclave->base = secs->base;
334 	enclave->size = secs->size;
335 
336 	*enclave0 = enclave;
337 
338 	return (0);
339 }
340 
341 static void
342 sgx_epc_page_remove(struct sgx_softc *sc,
343     struct epc_page *epc)
344 {
345 
346 	mtx_lock(&sc->mtx_encls);
347 	sgx_eremove((void *)epc->base);
348 	mtx_unlock(&sc->mtx_encls);
349 }
350 
351 static void
352 sgx_page_remove(struct sgx_softc *sc, vm_page_t p)
353 {
354 	struct epc_page *epc;
355 	vm_paddr_t pa;
356 	uint64_t offs;
357 
358 	(void)vm_page_remove(p);
359 
360 	dprintf("%s: p->pidx %ld\n", __func__, p->pindex);
361 
362 	pa = VM_PAGE_TO_PHYS(p);
363 	epc = &sc->epc_pages[0];
364 	offs = (pa - epc->phys) / PAGE_SIZE;
365 	epc = &sc->epc_pages[offs];
366 
367 	sgx_epc_page_remove(sc, epc);
368 	sgx_put_epc_page(sc, epc);
369 }
370 
371 static void
372 sgx_enclave_remove(struct sgx_softc *sc,
373     struct sgx_enclave *enclave)
374 {
375 	vm_object_t object;
376 	vm_page_t p, p_secs, p_next;
377 
378 	mtx_lock(&sc->mtx);
379 	TAILQ_REMOVE(&sc->enclaves, enclave, next);
380 	mtx_unlock(&sc->mtx);
381 
382 	object = enclave->object;
383 
384 	VM_OBJECT_WLOCK(object);
385 
386 	/*
387 	 * First remove all the pages except SECS,
388 	 * then remove SECS page.
389 	 */
390 restart:
391 	TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
392 		if (p->pindex == SGX_SECS_VM_OBJECT_INDEX)
393 			continue;
394 		if (vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL) == 0)
395 			goto restart;
396 		sgx_page_remove(sc, p);
397 	}
398 	p_secs = vm_page_grab(object, SGX_SECS_VM_OBJECT_INDEX,
399 	    VM_ALLOC_NOCREAT);
400 	/* Now remove SECS page */
401 	if (p_secs != NULL)
402 		sgx_page_remove(sc, p_secs);
403 
404 	KASSERT(TAILQ_EMPTY(&object->memq) == 1, ("not empty"));
405 	KASSERT(object->resident_page_count == 0, ("count"));
406 
407 	VM_OBJECT_WUNLOCK(object);
408 }
409 
410 static int
411 sgx_measure_page(struct sgx_softc *sc, struct epc_page *secs,
412     struct epc_page *epc, uint16_t mrmask)
413 {
414 	int i, j;
415 	int ret;
416 
417 	mtx_lock(&sc->mtx_encls);
418 
419 	for (i = 0, j = 1; i < PAGE_SIZE; i += 0x100, j <<= 1) {
420 		if (!(j & mrmask))
421 			continue;
422 
423 		ret = sgx_eextend((void *)secs->base,
424 		    (void *)(epc->base + i));
425 		if (ret == SGX_EFAULT) {
426 			mtx_unlock(&sc->mtx_encls);
427 			return (ret);
428 		}
429 	}
430 
431 	mtx_unlock(&sc->mtx_encls);
432 
433 	return (0);
434 }
435 
436 static int
437 sgx_secs_validate(struct sgx_softc *sc, struct secs *secs)
438 {
439 	struct secs_attr *attr;
440 	int i;
441 
442 	if (secs->size == 0)
443 		return (EINVAL);
444 
445 	/* BASEADDR must be naturally aligned on an SECS.SIZE boundary. */
446 	if (secs->base & (secs->size - 1))
447 		return (EINVAL);
448 
449 	/* SECS.SIZE must be at least 2 pages. */
450 	if (secs->size < 2 * PAGE_SIZE)
451 		return (EINVAL);
452 
453 	if ((secs->size & (secs->size - 1)) != 0)
454 		return (EINVAL);
455 
456 	attr = &secs->attributes;
457 
458 	if (attr->reserved1 != 0 ||
459 	    attr->reserved2 != 0 ||
460 	    attr->reserved3 != 0)
461 		return (EINVAL);
462 
463 	for (i = 0; i < SECS_ATTR_RSV4_SIZE; i++)
464 		if (attr->reserved4[i])
465 			return (EINVAL);
466 
467 	/*
468 	 * Intel® Software Guard Extensions Programming Reference
469 	 * 6.7.2 Relevant Fields in Various Data Structures
470 	 * 6.7.2.1 SECS.ATTRIBUTES.XFRM
471 	 * XFRM[1:0] must be set to 0x3.
472 	 */
473 	if ((attr->xfrm & 0x3) != 0x3)
474 		return (EINVAL);
475 
476 	if (!attr->mode64bit)
477 		return (EINVAL);
478 
479 	if (secs->size > sc->enclave_size_max)
480 		return (EINVAL);
481 
482 	for (i = 0; i < SECS_RSV1_SIZE; i++)
483 		if (secs->reserved1[i])
484 			return (EINVAL);
485 
486 	for (i = 0; i < SECS_RSV2_SIZE; i++)
487 		if (secs->reserved2[i])
488 			return (EINVAL);
489 
490 	for (i = 0; i < SECS_RSV3_SIZE; i++)
491 		if (secs->reserved3[i])
492 			return (EINVAL);
493 
494 	for (i = 0; i < SECS_RSV4_SIZE; i++)
495 		if (secs->reserved4[i])
496 			return (EINVAL);
497 
498 	return (0);
499 }
500 
501 static int
502 sgx_tcs_validate(struct tcs *tcs)
503 {
504 	int i;
505 
506 	if ((tcs->flags) ||
507 	    (tcs->ossa & (PAGE_SIZE - 1)) ||
508 	    (tcs->ofsbasgx & (PAGE_SIZE - 1)) ||
509 	    (tcs->ogsbasgx & (PAGE_SIZE - 1)) ||
510 	    ((tcs->fslimit & 0xfff) != 0xfff) ||
511 	    ((tcs->gslimit & 0xfff) != 0xfff))
512 		return (EINVAL);
513 
514 	for (i = 0; i < nitems(tcs->reserved3); i++)
515 		if (tcs->reserved3[i])
516 			return (EINVAL);
517 
518 	return (0);
519 }
520 
521 static void
522 sgx_tcs_dump(struct sgx_softc *sc, struct tcs *t)
523 {
524 
525 	dprintf("t->flags %lx\n", t->flags);
526 	dprintf("t->ossa %lx\n", t->ossa);
527 	dprintf("t->cssa %x\n", t->cssa);
528 	dprintf("t->nssa %x\n", t->nssa);
529 	dprintf("t->oentry %lx\n", t->oentry);
530 	dprintf("t->ofsbasgx %lx\n", t->ofsbasgx);
531 	dprintf("t->ogsbasgx %lx\n", t->ogsbasgx);
532 	dprintf("t->fslimit %x\n", t->fslimit);
533 	dprintf("t->gslimit %x\n", t->gslimit);
534 }
535 
536 static int
537 sgx_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
538     vm_ooffset_t foff, struct ucred *cred, u_short *color)
539 {
540 	struct sgx_vm_handle *vmh;
541 
542 	vmh = handle;
543 	if (vmh == NULL) {
544 		dprintf("%s: vmh not found.\n", __func__);
545 		return (0);
546 	}
547 
548 	dprintf("%s: vmh->base %lx foff 0x%lx size 0x%lx\n",
549 	    __func__, vmh->base, foff, size);
550 
551 	return (0);
552 }
553 
554 static void
555 sgx_pg_dtor(void *handle)
556 {
557 	struct sgx_vm_handle *vmh;
558 	struct sgx_softc *sc;
559 
560 	vmh = handle;
561 	if (vmh == NULL) {
562 		dprintf("%s: vmh not found.\n", __func__);
563 		return;
564 	}
565 
566 	sc = vmh->sc;
567 	if (sc == NULL) {
568 		dprintf("%s: sc is NULL\n", __func__);
569 		return;
570 	}
571 
572 	if (vmh->enclave == NULL) {
573 		dprintf("%s: Enclave not found.\n", __func__);
574 		return;
575 	}
576 
577 	sgx_enclave_remove(sc, vmh->enclave);
578 
579 	free(vmh->enclave, M_SGX);
580 	free(vmh, M_SGX);
581 }
582 
583 static int
584 sgx_pg_fault(vm_object_t object, vm_ooffset_t offset,
585     int prot, vm_page_t *mres)
586 {
587 
588 	/*
589 	 * The purpose of this trivial handler is to handle the race
590 	 * when user tries to access mmaped region before or during
591 	 * enclave creation ioctl calls.
592 	 */
593 
594 	dprintf("%s: offset 0x%lx\n", __func__, offset);
595 
596 	return (VM_PAGER_FAIL);
597 }
598 
599 static struct cdev_pager_ops sgx_pg_ops = {
600 	.cdev_pg_ctor = sgx_pg_ctor,
601 	.cdev_pg_dtor = sgx_pg_dtor,
602 	.cdev_pg_fault = sgx_pg_fault,
603 };
604 
605 static void
606 sgx_insert_epc_page_by_index(vm_page_t page, vm_object_t object,
607     vm_pindex_t pidx)
608 {
609 
610 	VM_OBJECT_ASSERT_WLOCKED(object);
611 
612 	page->valid = VM_PAGE_BITS_ALL;
613 	vm_page_insert(page, object, pidx);
614 }
615 
616 static void
617 sgx_insert_epc_page(struct sgx_enclave *enclave,
618     struct epc_page *epc, uint64_t addr)
619 {
620 	vm_pindex_t pidx;
621 	vm_page_t page;
622 
623 	VM_OBJECT_ASSERT_WLOCKED(enclave->object);
624 
625 	pidx = OFF_TO_IDX(addr);
626 	page = PHYS_TO_VM_PAGE(epc->phys);
627 
628 	sgx_insert_epc_page_by_index(page, enclave->object, pidx);
629 }
630 
631 static int
632 sgx_ioctl_create(struct sgx_softc *sc, struct sgx_enclave_create *param)
633 {
634 	struct sgx_vm_handle *vmh;
635 	vm_map_entry_t entry;
636 	vm_page_t p;
637 	struct page_info pginfo;
638 	struct secinfo secinfo;
639 	struct sgx_enclave *enclave;
640 	struct epc_page *epc;
641 	struct secs *secs;
642 	vm_object_t object;
643 	vm_page_t page;
644 	int ret;
645 
646 	epc = NULL;
647 	secs = NULL;
648 	enclave = NULL;
649 	object = NULL;
650 
651 	/* SGX Enclave Control Structure (SECS) */
652 	secs = malloc(PAGE_SIZE, M_SGX, M_WAITOK | M_ZERO);
653 	ret = copyin((void *)param->src, secs, sizeof(struct secs));
654 	if (ret) {
655 		dprintf("%s: Can't copy SECS.\n", __func__);
656 		goto error;
657 	}
658 
659 	ret = sgx_secs_validate(sc, secs);
660 	if (ret) {
661 		dprintf("%s: SECS validation failed.\n", __func__);
662 		goto error;
663 	}
664 
665 	ret = sgx_mem_find(sc, secs->base, &entry, &object);
666 	if (ret) {
667 		dprintf("%s: Can't find vm_map.\n", __func__);
668 		goto error;
669 	}
670 
671 	vmh = object->handle;
672 	if (!vmh) {
673 		dprintf("%s: Can't find vmh.\n", __func__);
674 		ret = ENXIO;
675 		goto error;
676 	}
677 
678 	dprintf("%s: entry start %lx offset %lx\n",
679 	    __func__, entry->start, entry->offset);
680 	vmh->base = (entry->start - entry->offset);
681 
682 	ret = sgx_enclave_alloc(sc, secs, &enclave);
683 	if (ret) {
684 		dprintf("%s: Can't alloc enclave.\n", __func__);
685 		goto error;
686 	}
687 	enclave->object = object;
688 	enclave->vmh = vmh;
689 
690 	memset(&secinfo, 0, sizeof(struct secinfo));
691 	memset(&pginfo, 0, sizeof(struct page_info));
692 	pginfo.linaddr = 0;
693 	pginfo.srcpge = (uint64_t)secs;
694 	pginfo.secinfo = &secinfo;
695 	pginfo.secs = 0;
696 
697 	ret = sgx_get_epc_page(sc, &epc);
698 	if (ret) {
699 		dprintf("%s: Failed to get free epc page.\n", __func__);
700 		goto error;
701 	}
702 	enclave->secs_epc_page = epc;
703 
704 	VM_OBJECT_WLOCK(object);
705 	p = vm_page_lookup(object, SGX_SECS_VM_OBJECT_INDEX);
706 	if (p) {
707 		VM_OBJECT_WUNLOCK(object);
708 		/* SECS page already added. */
709 		ret = ENXIO;
710 		goto error;
711 	}
712 
713 	ret = sgx_va_slot_init_by_index(sc, object,
714 	    - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX);
715 	if (ret) {
716 		VM_OBJECT_WUNLOCK(object);
717 		dprintf("%s: Can't init va slot.\n", __func__);
718 		goto error;
719 	}
720 
721 	mtx_lock(&sc->mtx);
722 	if ((sc->state & SGX_STATE_RUNNING) == 0) {
723 		mtx_unlock(&sc->mtx);
724 		/* Remove VA page that was just created for SECS page. */
725 		p = vm_page_grab(enclave->object,
726 		    - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX,
727 		    VM_ALLOC_NOCREAT);
728 		sgx_page_remove(sc, p);
729 		VM_OBJECT_WUNLOCK(object);
730 		goto error;
731 	}
732 	mtx_lock(&sc->mtx_encls);
733 	ret = sgx_ecreate(&pginfo, (void *)epc->base);
734 	mtx_unlock(&sc->mtx_encls);
735 	if (ret == SGX_EFAULT) {
736 		dprintf("%s: gp fault\n", __func__);
737 		mtx_unlock(&sc->mtx);
738 		/* Remove VA page that was just created for SECS page. */
739 		p = vm_page_grab(enclave->object,
740 		    - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX,
741 		    VM_ALLOC_NOCREAT);
742 		sgx_page_remove(sc, p);
743 		VM_OBJECT_WUNLOCK(object);
744 		goto error;
745 	}
746 
747 	TAILQ_INSERT_TAIL(&sc->enclaves, enclave, next);
748 	mtx_unlock(&sc->mtx);
749 
750 	vmh->enclave = enclave;
751 
752 	page = PHYS_TO_VM_PAGE(epc->phys);
753 	sgx_insert_epc_page_by_index(page, enclave->object,
754 	    SGX_SECS_VM_OBJECT_INDEX);
755 
756 	VM_OBJECT_WUNLOCK(object);
757 
758 	/* Release the reference. */
759 	vm_object_deallocate(object);
760 
761 	free(secs, M_SGX);
762 
763 	return (0);
764 
765 error:
766 	free(secs, M_SGX);
767 	sgx_put_epc_page(sc, epc);
768 	free(enclave, M_SGX);
769 	vm_object_deallocate(object);
770 
771 	return (ret);
772 }
773 
774 static int
775 sgx_ioctl_add_page(struct sgx_softc *sc,
776     struct sgx_enclave_add_page *addp)
777 {
778 	struct epc_page *secs_epc_page;
779 	struct sgx_enclave *enclave;
780 	struct sgx_vm_handle *vmh;
781 	struct epc_page *epc;
782 	struct page_info pginfo;
783 	struct secinfo secinfo;
784 	vm_object_t object;
785 	void *tmp_vaddr;
786 	uint64_t page_type;
787 	struct tcs *t;
788 	uint64_t addr;
789 	uint64_t pidx;
790 	vm_page_t p;
791 	int ret;
792 
793 	tmp_vaddr = NULL;
794 	epc = NULL;
795 	object = NULL;
796 
797 	/* Find and get reference to VM object. */
798 	ret = sgx_enclave_find(sc, addp->addr, &enclave);
799 	if (ret) {
800 		dprintf("%s: Failed to find enclave.\n", __func__);
801 		goto error;
802 	}
803 
804 	object = enclave->object;
805 	KASSERT(object != NULL, ("vm object is NULL\n"));
806 	vmh = object->handle;
807 
808 	ret = sgx_get_epc_page(sc, &epc);
809 	if (ret) {
810 		dprintf("%s: Failed to get free epc page.\n", __func__);
811 		goto error;
812 	}
813 
814 	memset(&secinfo, 0, sizeof(struct secinfo));
815 	ret = copyin((void *)addp->secinfo, &secinfo,
816 	    sizeof(struct secinfo));
817 	if (ret) {
818 		dprintf("%s: Failed to copy secinfo.\n", __func__);
819 		goto error;
820 	}
821 
822 	tmp_vaddr = malloc(PAGE_SIZE, M_SGX, M_WAITOK | M_ZERO);
823 	ret = copyin((void *)addp->src, tmp_vaddr, PAGE_SIZE);
824 	if (ret) {
825 		dprintf("%s: Failed to copy page.\n", __func__);
826 		goto error;
827 	}
828 
829 	page_type = (secinfo.flags & SECINFO_FLAGS_PT_M) >>
830 	    SECINFO_FLAGS_PT_S;
831 	if (page_type != SGX_PT_TCS && page_type != SGX_PT_REG) {
832 		dprintf("%s: page can't be added.\n", __func__);
833 		goto error;
834 	}
835 	if (page_type == SGX_PT_TCS) {
836 		t = (struct tcs *)tmp_vaddr;
837 		ret = sgx_tcs_validate(t);
838 		if (ret) {
839 			dprintf("%s: TCS page validation failed.\n",
840 			    __func__);
841 			goto error;
842 		}
843 		sgx_tcs_dump(sc, t);
844 	}
845 
846 	addr = (addp->addr - vmh->base);
847 	pidx = OFF_TO_IDX(addr);
848 
849 	VM_OBJECT_WLOCK(object);
850 	p = vm_page_lookup(object, pidx);
851 	if (p) {
852 		VM_OBJECT_WUNLOCK(object);
853 		/* Page already added. */
854 		ret = ENXIO;
855 		goto error;
856 	}
857 
858 	ret = sgx_va_slot_init(sc, enclave, addr);
859 	if (ret) {
860 		VM_OBJECT_WUNLOCK(object);
861 		dprintf("%s: Can't init va slot.\n", __func__);
862 		goto error;
863 	}
864 
865 	secs_epc_page = enclave->secs_epc_page;
866 	memset(&pginfo, 0, sizeof(struct page_info));
867 	pginfo.linaddr = (uint64_t)addp->addr;
868 	pginfo.srcpge = (uint64_t)tmp_vaddr;
869 	pginfo.secinfo = &secinfo;
870 	pginfo.secs = (uint64_t)secs_epc_page->base;
871 
872 	mtx_lock(&sc->mtx_encls);
873 	ret = sgx_eadd(&pginfo, (void *)epc->base);
874 	if (ret == SGX_EFAULT) {
875 		dprintf("%s: gp fault on eadd\n", __func__);
876 		mtx_unlock(&sc->mtx_encls);
877 		VM_OBJECT_WUNLOCK(object);
878 		goto error;
879 	}
880 	mtx_unlock(&sc->mtx_encls);
881 
882 	ret = sgx_measure_page(sc, enclave->secs_epc_page, epc, addp->mrmask);
883 	if (ret == SGX_EFAULT) {
884 		dprintf("%s: gp fault on eextend\n", __func__);
885 		sgx_epc_page_remove(sc, epc);
886 		VM_OBJECT_WUNLOCK(object);
887 		goto error;
888 	}
889 
890 	sgx_insert_epc_page(enclave, epc, addr);
891 
892 	VM_OBJECT_WUNLOCK(object);
893 
894 	/* Release the reference. */
895 	vm_object_deallocate(object);
896 
897 	free(tmp_vaddr, M_SGX);
898 
899 	return (0);
900 
901 error:
902 	free(tmp_vaddr, M_SGX);
903 	sgx_put_epc_page(sc, epc);
904 	vm_object_deallocate(object);
905 
906 	return (ret);
907 }
908 
909 static int
910 sgx_ioctl_init(struct sgx_softc *sc, struct sgx_enclave_init *initp)
911 {
912 	struct epc_page *secs_epc_page;
913 	struct sgx_enclave *enclave;
914 	struct thread *td;
915 	void *tmp_vaddr;
916 	void *einittoken;
917 	void *sigstruct;
918 	vm_object_t object;
919 	int retry;
920 	int ret;
921 
922 	td = curthread;
923 	tmp_vaddr = NULL;
924 	object = NULL;
925 
926 	dprintf("%s: addr %lx, sigstruct %lx, einittoken %lx\n",
927 	    __func__, initp->addr, initp->sigstruct, initp->einittoken);
928 
929 	/* Find and get reference to VM object. */
930 	ret = sgx_enclave_find(sc, initp->addr, &enclave);
931 	if (ret) {
932 		dprintf("%s: Failed to find enclave.\n", __func__);
933 		goto error;
934 	}
935 
936 	object = enclave->object;
937 
938 	tmp_vaddr = malloc(PAGE_SIZE, M_SGX, M_WAITOK | M_ZERO);
939 	sigstruct = tmp_vaddr;
940 	einittoken = (void *)((uint64_t)sigstruct + PAGE_SIZE / 2);
941 
942 	ret = copyin((void *)initp->sigstruct, sigstruct,
943 	    SGX_SIGSTRUCT_SIZE);
944 	if (ret) {
945 		dprintf("%s: Failed to copy SIGSTRUCT page.\n", __func__);
946 		goto error;
947 	}
948 
949 	ret = copyin((void *)initp->einittoken, einittoken,
950 	    SGX_EINITTOKEN_SIZE);
951 	if (ret) {
952 		dprintf("%s: Failed to copy EINITTOKEN page.\n", __func__);
953 		goto error;
954 	}
955 
956 	secs_epc_page = enclave->secs_epc_page;
957 	retry = 16;
958 	do {
959 		mtx_lock(&sc->mtx_encls);
960 		ret = sgx_einit(sigstruct, (void *)secs_epc_page->base,
961 		    einittoken);
962 		mtx_unlock(&sc->mtx_encls);
963 		dprintf("%s: sgx_einit returned %d\n", __func__, ret);
964 	} while (ret == SGX_UNMASKED_EVENT && retry--);
965 
966 	if (ret) {
967 		dprintf("%s: Failed init enclave: %d\n", __func__, ret);
968 		td->td_retval[0] = ret;
969 		ret = 0;
970 	}
971 
972 error:
973 	free(tmp_vaddr, M_SGX);
974 
975 	/* Release the reference. */
976 	vm_object_deallocate(object);
977 
978 	return (ret);
979 }
980 
981 static int
982 sgx_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
983     struct thread *td)
984 {
985 	struct sgx_enclave_add_page *addp;
986 	struct sgx_enclave_create *param;
987 	struct sgx_enclave_init *initp;
988 	struct sgx_softc *sc;
989 	int ret;
990 	int len;
991 
992 	sc = &sgx_sc;
993 
994 	len = IOCPARM_LEN(cmd);
995 
996 	dprintf("%s: cmd %lx, addr %lx, len %d\n",
997 	    __func__, cmd, (uint64_t)addr, len);
998 
999 	if (len > SGX_IOCTL_MAX_DATA_LEN)
1000 		return (EINVAL);
1001 
1002 	switch (cmd) {
1003 	case SGX_IOC_ENCLAVE_CREATE:
1004 		param = (struct sgx_enclave_create *)addr;
1005 		ret = sgx_ioctl_create(sc, param);
1006 		break;
1007 	case SGX_IOC_ENCLAVE_ADD_PAGE:
1008 		addp = (struct sgx_enclave_add_page *)addr;
1009 		ret = sgx_ioctl_add_page(sc, addp);
1010 		break;
1011 	case SGX_IOC_ENCLAVE_INIT:
1012 		initp = (struct sgx_enclave_init *)addr;
1013 		ret = sgx_ioctl_init(sc, initp);
1014 		break;
1015 	default:
1016 		return (EINVAL);
1017 	}
1018 
1019 	return (ret);
1020 }
1021 
1022 static int
1023 sgx_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
1024     vm_size_t mapsize, struct vm_object **objp, int nprot)
1025 {
1026 	struct sgx_vm_handle *vmh;
1027 	struct sgx_softc *sc;
1028 
1029 	sc = &sgx_sc;
1030 
1031 	dprintf("%s: mapsize 0x%lx, offset %lx\n",
1032 	    __func__, mapsize, *offset);
1033 
1034 	vmh = malloc(sizeof(struct sgx_vm_handle),
1035 	    M_SGX, M_WAITOK | M_ZERO);
1036 	vmh->sc = sc;
1037 	vmh->size = mapsize;
1038 	vmh->mem = cdev_pager_allocate(vmh, OBJT_MGTDEVICE, &sgx_pg_ops,
1039 	    mapsize, nprot, *offset, NULL);
1040 	if (vmh->mem == NULL) {
1041 		free(vmh, M_SGX);
1042 		return (ENOMEM);
1043 	}
1044 
1045 	VM_OBJECT_WLOCK(vmh->mem);
1046 	vm_object_set_flag(vmh->mem, OBJ_PG_DTOR);
1047 	VM_OBJECT_WUNLOCK(vmh->mem);
1048 
1049 	*objp = vmh->mem;
1050 
1051 	return (0);
1052 }
1053 
1054 static struct cdevsw sgx_cdevsw = {
1055 	.d_version =		D_VERSION,
1056 	.d_ioctl =		sgx_ioctl,
1057 	.d_mmap_single =	sgx_mmap_single,
1058 	.d_name =		"Intel SGX",
1059 };
1060 
1061 static int
1062 sgx_get_epc_area(struct sgx_softc *sc)
1063 {
1064 	vm_offset_t epc_base_vaddr;
1065 	u_int cp[4];
1066 	int error;
1067 	int i;
1068 
1069 	cpuid_count(SGX_CPUID, 0x2, cp);
1070 
1071 	sc->epc_base = ((uint64_t)(cp[1] & 0xfffff) << 32) +
1072 	    (cp[0] & 0xfffff000);
1073 	sc->epc_size = ((uint64_t)(cp[3] & 0xfffff) << 32) +
1074 	    (cp[2] & 0xfffff000);
1075 	sc->npages = sc->epc_size / SGX_PAGE_SIZE;
1076 
1077 	if (sc->epc_size == 0 || sc->epc_base == 0) {
1078 		printf("%s: Incorrect EPC data: EPC base %lx, size %lu\n",
1079 		    __func__, sc->epc_base, sc->epc_size);
1080 		return (EINVAL);
1081 	}
1082 
1083 	if (cp[3] & 0xffff)
1084 		sc->enclave_size_max = (1 << ((cp[3] >> 8) & 0xff));
1085 	else
1086 		sc->enclave_size_max = SGX_ENCL_SIZE_MAX_DEF;
1087 
1088 	epc_base_vaddr = (vm_offset_t)pmap_mapdev_attr(sc->epc_base,
1089 	    sc->epc_size, VM_MEMATTR_DEFAULT);
1090 
1091 	sc->epc_pages = malloc(sizeof(struct epc_page) * sc->npages,
1092 	    M_DEVBUF, M_WAITOK | M_ZERO);
1093 
1094 	for (i = 0; i < sc->npages; i++) {
1095 		sc->epc_pages[i].base = epc_base_vaddr + SGX_PAGE_SIZE * i;
1096 		sc->epc_pages[i].phys = sc->epc_base + SGX_PAGE_SIZE * i;
1097 		sc->epc_pages[i].index = i;
1098 	}
1099 
1100 	sc->vmem_epc = vmem_create("SGX EPC", sc->epc_base, sc->epc_size,
1101 	    PAGE_SIZE, PAGE_SIZE, M_FIRSTFIT | M_WAITOK);
1102 	if (sc->vmem_epc == NULL) {
1103 		printf("%s: Can't create vmem arena.\n", __func__);
1104 		free(sc->epc_pages, M_SGX);
1105 		return (EINVAL);
1106 	}
1107 
1108 	error = vm_phys_fictitious_reg_range(sc->epc_base,
1109 	    sc->epc_base + sc->epc_size, VM_MEMATTR_DEFAULT);
1110 	if (error) {
1111 		printf("%s: Can't register fictitious space.\n", __func__);
1112 		free(sc->epc_pages, M_SGX);
1113 		return (EINVAL);
1114 	}
1115 
1116 	return (0);
1117 }
1118 
1119 static void
1120 sgx_put_epc_area(struct sgx_softc *sc)
1121 {
1122 
1123 	vm_phys_fictitious_unreg_range(sc->epc_base,
1124 	    sc->epc_base + sc->epc_size);
1125 
1126 	free(sc->epc_pages, M_SGX);
1127 }
1128 
1129 static int
1130 sgx_load(void)
1131 {
1132 	struct sgx_softc *sc;
1133 	int error;
1134 
1135 	sc = &sgx_sc;
1136 
1137 	if ((cpu_stdext_feature & CPUID_STDEXT_SGX) == 0)
1138 		return (ENXIO);
1139 
1140 	error = sgx_get_epc_area(sc);
1141 	if (error) {
1142 		printf("%s: Failed to get Processor Reserved Memory area.\n",
1143 		    __func__);
1144 		return (ENXIO);
1145 	}
1146 
1147 	mtx_init(&sc->mtx_encls, "SGX ENCLS", NULL, MTX_DEF);
1148 	mtx_init(&sc->mtx, "SGX driver", NULL, MTX_DEF);
1149 
1150 	TAILQ_INIT(&sc->enclaves);
1151 
1152 	sc->sgx_cdev = make_dev(&sgx_cdevsw, 0, UID_ROOT, GID_WHEEL,
1153 	    0600, "isgx");
1154 
1155 	sc->state |= SGX_STATE_RUNNING;
1156 
1157 	printf("SGX initialized: EPC base 0x%lx size %ld (%d pages)\n",
1158 	    sc->epc_base, sc->epc_size, sc->npages);
1159 
1160 	return (0);
1161 }
1162 
1163 static int
1164 sgx_unload(void)
1165 {
1166 	struct sgx_softc *sc;
1167 
1168 	sc = &sgx_sc;
1169 
1170 	if ((sc->state & SGX_STATE_RUNNING) == 0)
1171 		return (0);
1172 
1173 	mtx_lock(&sc->mtx);
1174 	if (!TAILQ_EMPTY(&sc->enclaves)) {
1175 		mtx_unlock(&sc->mtx);
1176 		return (EBUSY);
1177 	}
1178 	sc->state &= ~SGX_STATE_RUNNING;
1179 	mtx_unlock(&sc->mtx);
1180 
1181 	destroy_dev(sc->sgx_cdev);
1182 
1183 	vmem_destroy(sc->vmem_epc);
1184 	sgx_put_epc_area(sc);
1185 
1186 	mtx_destroy(&sc->mtx_encls);
1187 	mtx_destroy(&sc->mtx);
1188 
1189 	return (0);
1190 }
1191 
1192 static int
1193 sgx_handler(module_t mod, int what, void *arg)
1194 {
1195 	int error;
1196 
1197 	switch (what) {
1198 	case MOD_LOAD:
1199 		error = sgx_load();
1200 		break;
1201 	case MOD_UNLOAD:
1202 		error = sgx_unload();
1203 		break;
1204 	default:
1205 		error = 0;
1206 		break;
1207 	}
1208 
1209 	return (error);
1210 }
1211 
1212 static moduledata_t sgx_kmod = {
1213 	"sgx",
1214 	sgx_handler,
1215 	NULL
1216 };
1217 
1218 DECLARE_MODULE(sgx, sgx_kmod, SI_SUB_LAST, SI_ORDER_ANY);
1219 MODULE_VERSION(sgx, 1);
1220