xref: /freebsd/sys/dev/nvdimm/nvdimm_spa.c (revision 8d2a55ca)
16db7f8e5SKonstantin Belousov /*-
26db7f8e5SKonstantin Belousov  * Copyright (c) 2017, 2018 The FreeBSD Foundation
36db7f8e5SKonstantin Belousov  * All rights reserved.
46465f315SKonstantin Belousov  * Copyright (c) 2018, 2019 Intel Corporation
56db7f8e5SKonstantin Belousov  *
66db7f8e5SKonstantin Belousov  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
76db7f8e5SKonstantin Belousov  * under sponsorship from the FreeBSD Foundation.
86db7f8e5SKonstantin Belousov  *
96db7f8e5SKonstantin Belousov  * Redistribution and use in source and binary forms, with or without
106db7f8e5SKonstantin Belousov  * modification, are permitted provided that the following conditions
116db7f8e5SKonstantin Belousov  * are met:
126db7f8e5SKonstantin Belousov  * 1. Redistributions of source code must retain the above copyright
136db7f8e5SKonstantin Belousov  *    notice, this list of conditions and the following disclaimer.
146db7f8e5SKonstantin Belousov  * 2. Redistributions in binary form must reproduce the above copyright
156db7f8e5SKonstantin Belousov  *    notice, this list of conditions and the following disclaimer in the
166db7f8e5SKonstantin Belousov  *    documentation and/or other materials provided with the distribution.
176db7f8e5SKonstantin Belousov  *
186db7f8e5SKonstantin Belousov  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
196db7f8e5SKonstantin Belousov  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
206db7f8e5SKonstantin Belousov  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
216db7f8e5SKonstantin Belousov  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
226db7f8e5SKonstantin Belousov  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
236db7f8e5SKonstantin Belousov  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
246db7f8e5SKonstantin Belousov  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
256db7f8e5SKonstantin Belousov  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
266db7f8e5SKonstantin Belousov  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
276db7f8e5SKonstantin Belousov  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
286db7f8e5SKonstantin Belousov  * SUCH DAMAGE.
296db7f8e5SKonstantin Belousov  */
306db7f8e5SKonstantin Belousov 
316db7f8e5SKonstantin Belousov #include <sys/cdefs.h>
326db7f8e5SKonstantin Belousov __FBSDID("$FreeBSD$");
336db7f8e5SKonstantin Belousov 
346db7f8e5SKonstantin Belousov #include "opt_acpi.h"
356db7f8e5SKonstantin Belousov #include "opt_ddb.h"
366db7f8e5SKonstantin Belousov 
376db7f8e5SKonstantin Belousov #include <sys/param.h>
386db7f8e5SKonstantin Belousov #include <sys/systm.h>
396db7f8e5SKonstantin Belousov #include <sys/bio.h>
406db7f8e5SKonstantin Belousov #include <sys/bus.h>
416db7f8e5SKonstantin Belousov #include <sys/conf.h>
426db7f8e5SKonstantin Belousov #include <sys/devicestat.h>
436db7f8e5SKonstantin Belousov #include <sys/disk.h>
446db7f8e5SKonstantin Belousov #include <sys/efi.h>
456db7f8e5SKonstantin Belousov #include <sys/kernel.h>
466db7f8e5SKonstantin Belousov #include <sys/kthread.h>
476db7f8e5SKonstantin Belousov #include <sys/limits.h>
486db7f8e5SKonstantin Belousov #include <sys/lock.h>
496db7f8e5SKonstantin Belousov #include <sys/malloc.h>
506db7f8e5SKonstantin Belousov #include <sys/module.h>
516db7f8e5SKonstantin Belousov #include <sys/rwlock.h>
526db7f8e5SKonstantin Belousov #include <sys/sglist.h>
536db7f8e5SKonstantin Belousov #include <sys/uio.h>
546db7f8e5SKonstantin Belousov #include <sys/uuid.h>
556db7f8e5SKonstantin Belousov #include <geom/geom.h>
566db7f8e5SKonstantin Belousov #include <geom/geom_int.h>
576db7f8e5SKonstantin Belousov #include <machine/vmparam.h>
586db7f8e5SKonstantin Belousov #include <vm/vm.h>
596db7f8e5SKonstantin Belousov #include <vm/vm_object.h>
606db7f8e5SKonstantin Belousov #include <vm/vm_page.h>
616db7f8e5SKonstantin Belousov #include <vm/vm_pager.h>
626db7f8e5SKonstantin Belousov #include <contrib/dev/acpica/include/acpi.h>
636db7f8e5SKonstantin Belousov #include <contrib/dev/acpica/include/accommon.h>
646db7f8e5SKonstantin Belousov #include <contrib/dev/acpica/include/acuuid.h>
656db7f8e5SKonstantin Belousov #include <dev/acpica/acpivar.h>
666db7f8e5SKonstantin Belousov #include <dev/nvdimm/nvdimm_var.h>
676db7f8e5SKonstantin Belousov 
686465f315SKonstantin Belousov #define UUID_INITIALIZER_VOLATILE_MEMORY \
696465f315SKonstantin Belousov     {0x7305944f,0xfdda,0x44e3,0xb1,0x6c,{0x3f,0x22,0xd2,0x52,0xe5,0xd0}}
706465f315SKonstantin Belousov #define UUID_INITIALIZER_PERSISTENT_MEMORY \
716465f315SKonstantin Belousov     {0x66f0d379,0xb4f3,0x4074,0xac,0x43,{0x0d,0x33,0x18,0xb7,0x8c,0xdb}}
726465f315SKonstantin Belousov #define UUID_INITIALIZER_CONTROL_REGION \
736465f315SKonstantin Belousov     {0x92f701f6,0x13b4,0x405d,0x91,0x0b,{0x29,0x93,0x67,0xe8,0x23,0x4c}}
746465f315SKonstantin Belousov #define UUID_INITIALIZER_DATA_REGION \
756465f315SKonstantin Belousov     {0x91af0530,0x5d86,0x470e,0xa6,0xb0,{0x0a,0x2d,0xb9,0x40,0x82,0x49}}
766465f315SKonstantin Belousov #define UUID_INITIALIZER_VOLATILE_VIRTUAL_DISK \
776465f315SKonstantin Belousov     {0x77ab535a,0x45fc,0x624b,0x55,0x60,{0xf7,0xb2,0x81,0xd1,0xf9,0x6e}}
786465f315SKonstantin Belousov #define UUID_INITIALIZER_VOLATILE_VIRTUAL_CD \
796465f315SKonstantin Belousov     {0x3d5abd30,0x4175,0x87ce,0x6d,0x64,{0xd2,0xad,0xe5,0x23,0xc4,0xbb}}
806465f315SKonstantin Belousov #define UUID_INITIALIZER_PERSISTENT_VIRTUAL_DISK \
816465f315SKonstantin Belousov     {0x5cea02c9,0x4d07,0x69d3,0x26,0x9f,{0x44,0x96,0xfb,0xe0,0x96,0xf9}}
826465f315SKonstantin Belousov #define UUID_INITIALIZER_PERSISTENT_VIRTUAL_CD \
836465f315SKonstantin Belousov     {0x08018188,0x42cd,0xbb48,0x10,0x0f,{0x53,0x87,0xd5,0x3d,0xed,0x3d}}
846465f315SKonstantin Belousov 
856db7f8e5SKonstantin Belousov static struct nvdimm_SPA_uuid_list_elm {
866db7f8e5SKonstantin Belousov 	const char		*u_name;
876db7f8e5SKonstantin Belousov 	struct uuid		u_id;
886db7f8e5SKonstantin Belousov 	const bool		u_usr_acc;
896db7f8e5SKonstantin Belousov } nvdimm_SPA_uuid_list[] = {
906db7f8e5SKonstantin Belousov 	[SPA_TYPE_VOLATILE_MEMORY] = {
916db7f8e5SKonstantin Belousov 		.u_name =	"VOLA MEM ",
926465f315SKonstantin Belousov 		.u_id =		UUID_INITIALIZER_VOLATILE_MEMORY,
936db7f8e5SKonstantin Belousov 		.u_usr_acc =	true,
946db7f8e5SKonstantin Belousov 	},
956db7f8e5SKonstantin Belousov 	[SPA_TYPE_PERSISTENT_MEMORY] = {
966db7f8e5SKonstantin Belousov 		.u_name =	"PERS MEM",
976465f315SKonstantin Belousov 		.u_id =		UUID_INITIALIZER_PERSISTENT_MEMORY,
986db7f8e5SKonstantin Belousov 		.u_usr_acc =	true,
996db7f8e5SKonstantin Belousov 	},
1006db7f8e5SKonstantin Belousov 	[SPA_TYPE_CONTROL_REGION] = {
1016db7f8e5SKonstantin Belousov 		.u_name =	"CTRL RG ",
1026465f315SKonstantin Belousov 		.u_id =		UUID_INITIALIZER_CONTROL_REGION,
1036db7f8e5SKonstantin Belousov 		.u_usr_acc =	false,
1046db7f8e5SKonstantin Belousov 	},
1056db7f8e5SKonstantin Belousov 	[SPA_TYPE_DATA_REGION] = {
1066db7f8e5SKonstantin Belousov 		.u_name =	"DATA RG ",
1076465f315SKonstantin Belousov 		.u_id =		UUID_INITIALIZER_DATA_REGION,
1086db7f8e5SKonstantin Belousov 		.u_usr_acc =	true,
1096db7f8e5SKonstantin Belousov 	},
1106db7f8e5SKonstantin Belousov 	[SPA_TYPE_VOLATILE_VIRTUAL_DISK] = {
1116db7f8e5SKonstantin Belousov 		.u_name =	"VIRT DSK",
1126465f315SKonstantin Belousov 		.u_id =		UUID_INITIALIZER_VOLATILE_VIRTUAL_DISK,
1136db7f8e5SKonstantin Belousov 		.u_usr_acc =	true,
1146db7f8e5SKonstantin Belousov 	},
1156db7f8e5SKonstantin Belousov 	[SPA_TYPE_VOLATILE_VIRTUAL_CD] = {
1166db7f8e5SKonstantin Belousov 		.u_name =	"VIRT CD ",
1176465f315SKonstantin Belousov 		.u_id =		UUID_INITIALIZER_VOLATILE_VIRTUAL_CD,
1186db7f8e5SKonstantin Belousov 		.u_usr_acc =	true,
1196db7f8e5SKonstantin Belousov 	},
1206db7f8e5SKonstantin Belousov 	[SPA_TYPE_PERSISTENT_VIRTUAL_DISK] = {
1216db7f8e5SKonstantin Belousov 		.u_name =	"PV DSK  ",
1226465f315SKonstantin Belousov 		.u_id =		UUID_INITIALIZER_PERSISTENT_VIRTUAL_DISK,
1236db7f8e5SKonstantin Belousov 		.u_usr_acc =	true,
1246db7f8e5SKonstantin Belousov 	},
1256db7f8e5SKonstantin Belousov 	[SPA_TYPE_PERSISTENT_VIRTUAL_CD] = {
1266db7f8e5SKonstantin Belousov 		.u_name =	"PV CD   ",
1276465f315SKonstantin Belousov 		.u_id =		UUID_INITIALIZER_PERSISTENT_VIRTUAL_CD,
1286db7f8e5SKonstantin Belousov 		.u_usr_acc =	true,
1296db7f8e5SKonstantin Belousov 	},
1306db7f8e5SKonstantin Belousov };
1316db7f8e5SKonstantin Belousov 
1326465f315SKonstantin Belousov enum SPA_mapping_type
1336465f315SKonstantin Belousov nvdimm_spa_type_from_uuid(struct uuid *uuid)
1346465f315SKonstantin Belousov {
1356465f315SKonstantin Belousov 	int j;
1366465f315SKonstantin Belousov 
1376465f315SKonstantin Belousov 	for (j = 0; j < nitems(nvdimm_SPA_uuid_list); j++) {
1386465f315SKonstantin Belousov 		if (uuidcmp(uuid, &nvdimm_SPA_uuid_list[j].u_id) != 0)
1396465f315SKonstantin Belousov 			continue;
1406465f315SKonstantin Belousov 		return (j);
1416465f315SKonstantin Belousov 	}
1426465f315SKonstantin Belousov 	return (SPA_TYPE_UNKNOWN);
1436465f315SKonstantin Belousov }
1446465f315SKonstantin Belousov 
1456db7f8e5SKonstantin Belousov static vm_memattr_t
146228e377dSBen Widawsky nvdimm_spa_memattr(struct nvdimm_spa_dev *dev)
1476db7f8e5SKonstantin Belousov {
1486db7f8e5SKonstantin Belousov 	vm_memattr_t mode;
1496db7f8e5SKonstantin Belousov 
150228e377dSBen Widawsky 	if ((dev->spa_efi_mem_flags & EFI_MD_ATTR_WB) != 0)
1516db7f8e5SKonstantin Belousov 		mode = VM_MEMATTR_WRITE_BACK;
152228e377dSBen Widawsky 	else if ((dev->spa_efi_mem_flags & EFI_MD_ATTR_WT) != 0)
1536db7f8e5SKonstantin Belousov 		mode = VM_MEMATTR_WRITE_THROUGH;
154228e377dSBen Widawsky 	else if ((dev->spa_efi_mem_flags & EFI_MD_ATTR_WC) != 0)
1556db7f8e5SKonstantin Belousov 		mode = VM_MEMATTR_WRITE_COMBINING;
156228e377dSBen Widawsky 	else if ((dev->spa_efi_mem_flags & EFI_MD_ATTR_WP) != 0)
1576db7f8e5SKonstantin Belousov 		mode = VM_MEMATTR_WRITE_PROTECTED;
158228e377dSBen Widawsky 	else if ((dev->spa_efi_mem_flags & EFI_MD_ATTR_UC) != 0)
1596db7f8e5SKonstantin Belousov 		mode = VM_MEMATTR_UNCACHEABLE;
1606db7f8e5SKonstantin Belousov 	else {
1616db7f8e5SKonstantin Belousov 		if (bootverbose)
162228e377dSBen Widawsky 			printf("SPA mapping attr %#lx unsupported\n",
163228e377dSBen Widawsky 			    dev->spa_efi_mem_flags);
1646db7f8e5SKonstantin Belousov 		mode = VM_MEMATTR_UNCACHEABLE;
1656db7f8e5SKonstantin Belousov 	}
1666db7f8e5SKonstantin Belousov 	return (mode);
1676db7f8e5SKonstantin Belousov }
1686db7f8e5SKonstantin Belousov 
1696db7f8e5SKonstantin Belousov static int
170228e377dSBen Widawsky nvdimm_spa_uio(struct nvdimm_spa_dev *dev, struct uio *uio)
1716db7f8e5SKonstantin Belousov {
1726db7f8e5SKonstantin Belousov 	struct vm_page m, *ma;
1736db7f8e5SKonstantin Belousov 	off_t off;
1746db7f8e5SKonstantin Belousov 	vm_memattr_t mattr;
1756db7f8e5SKonstantin Belousov 	int error, n;
1766db7f8e5SKonstantin Belousov 
17790a38351SKonstantin Belousov 	error = 0;
178228e377dSBen Widawsky 	if (dev->spa_kva == NULL) {
179228e377dSBen Widawsky 		mattr = nvdimm_spa_memattr(dev);
1808d2a55caSTycho Nightingale 		bzero(&m, sizeof(m));
1816db7f8e5SKonstantin Belousov 		vm_page_initfake(&m, 0, mattr);
1826db7f8e5SKonstantin Belousov 		ma = &m;
1836db7f8e5SKonstantin Belousov 		while (uio->uio_resid > 0) {
184228e377dSBen Widawsky 			if (uio->uio_offset >= dev->spa_len)
1856db7f8e5SKonstantin Belousov 				break;
186228e377dSBen Widawsky 			off = dev->spa_phys_base + uio->uio_offset;
1876db7f8e5SKonstantin Belousov 			vm_page_updatefake(&m, trunc_page(off), mattr);
1886db7f8e5SKonstantin Belousov 			n = PAGE_SIZE;
1896db7f8e5SKonstantin Belousov 			if (n > uio->uio_resid)
1906db7f8e5SKonstantin Belousov 				n = uio->uio_resid;
1916db7f8e5SKonstantin Belousov 			error = uiomove_fromphys(&ma, off & PAGE_MASK, n, uio);
1926db7f8e5SKonstantin Belousov 			if (error != 0)
1936db7f8e5SKonstantin Belousov 				break;
1946db7f8e5SKonstantin Belousov 		}
1956db7f8e5SKonstantin Belousov 	} else {
1966db7f8e5SKonstantin Belousov 		while (uio->uio_resid > 0) {
197228e377dSBen Widawsky 			if (uio->uio_offset >= dev->spa_len)
1986db7f8e5SKonstantin Belousov 				break;
1996db7f8e5SKonstantin Belousov 			n = INT_MAX;
2006db7f8e5SKonstantin Belousov 			if (n > uio->uio_resid)
2016db7f8e5SKonstantin Belousov 				n = uio->uio_resid;
202228e377dSBen Widawsky 			if (uio->uio_offset + n > dev->spa_len)
203228e377dSBen Widawsky 				n = dev->spa_len - uio->uio_offset;
204228e377dSBen Widawsky 			error = uiomove((char *)dev->spa_kva + uio->uio_offset,
2056db7f8e5SKonstantin Belousov 			    n, uio);
2066db7f8e5SKonstantin Belousov 			if (error != 0)
2076db7f8e5SKonstantin Belousov 				break;
2086db7f8e5SKonstantin Belousov 		}
2096db7f8e5SKonstantin Belousov 	}
2106db7f8e5SKonstantin Belousov 	return (error);
2116db7f8e5SKonstantin Belousov }
2126db7f8e5SKonstantin Belousov 
2136db7f8e5SKonstantin Belousov static int
2146db7f8e5SKonstantin Belousov nvdimm_spa_rw(struct cdev *dev, struct uio *uio, int ioflag)
2156db7f8e5SKonstantin Belousov {
2166db7f8e5SKonstantin Belousov 
2176db7f8e5SKonstantin Belousov 	return (nvdimm_spa_uio(dev->si_drv1, uio));
2186db7f8e5SKonstantin Belousov }
2196db7f8e5SKonstantin Belousov 
2206db7f8e5SKonstantin Belousov static int
221228e377dSBen Widawsky nvdimm_spa_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
2226db7f8e5SKonstantin Belousov     struct thread *td)
2236db7f8e5SKonstantin Belousov {
224228e377dSBen Widawsky 	struct nvdimm_spa_dev *dev;
2256db7f8e5SKonstantin Belousov 	int error;
2266db7f8e5SKonstantin Belousov 
227228e377dSBen Widawsky 	dev = cdev->si_drv1;
2286db7f8e5SKonstantin Belousov 	error = 0;
2296db7f8e5SKonstantin Belousov 	switch (cmd) {
2306db7f8e5SKonstantin Belousov 	case DIOCGSECTORSIZE:
2316db7f8e5SKonstantin Belousov 		*(u_int *)data = DEV_BSIZE;
2326db7f8e5SKonstantin Belousov 		break;
2336db7f8e5SKonstantin Belousov 	case DIOCGMEDIASIZE:
234228e377dSBen Widawsky 		*(off_t *)data = dev->spa_len;
2356db7f8e5SKonstantin Belousov 		break;
2366db7f8e5SKonstantin Belousov 	default:
2376db7f8e5SKonstantin Belousov 		error = ENOTTY;
2386db7f8e5SKonstantin Belousov 		break;
2396db7f8e5SKonstantin Belousov 	}
2406db7f8e5SKonstantin Belousov 	return (error);
2416db7f8e5SKonstantin Belousov }
2426db7f8e5SKonstantin Belousov 
2436db7f8e5SKonstantin Belousov static int
244228e377dSBen Widawsky nvdimm_spa_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
2456db7f8e5SKonstantin Belousov     vm_object_t *objp, int nprot)
2466db7f8e5SKonstantin Belousov {
247228e377dSBen Widawsky 	struct nvdimm_spa_dev *dev;
2486db7f8e5SKonstantin Belousov 
249228e377dSBen Widawsky 	dev = cdev->si_drv1;
250228e377dSBen Widawsky 	if (dev->spa_obj == NULL)
2516db7f8e5SKonstantin Belousov 		return (ENXIO);
252228e377dSBen Widawsky 	if (*offset >= dev->spa_len || *offset + size < *offset ||
253228e377dSBen Widawsky 	    *offset + size > dev->spa_len)
2546db7f8e5SKonstantin Belousov 		return (EINVAL);
255228e377dSBen Widawsky 	vm_object_reference(dev->spa_obj);
256228e377dSBen Widawsky 	*objp = dev->spa_obj;
2576db7f8e5SKonstantin Belousov 	return (0);
2586db7f8e5SKonstantin Belousov }
2596db7f8e5SKonstantin Belousov 
2606db7f8e5SKonstantin Belousov static struct cdevsw spa_cdevsw = {
2616db7f8e5SKonstantin Belousov 	.d_version =	D_VERSION,
2626db7f8e5SKonstantin Belousov 	.d_flags =	D_DISK,
2636db7f8e5SKonstantin Belousov 	.d_name =	"nvdimm_spa",
2646db7f8e5SKonstantin Belousov 	.d_read =	nvdimm_spa_rw,
2656db7f8e5SKonstantin Belousov 	.d_write =	nvdimm_spa_rw,
2666db7f8e5SKonstantin Belousov 	.d_ioctl =	nvdimm_spa_ioctl,
2676db7f8e5SKonstantin Belousov 	.d_mmap_single = nvdimm_spa_mmap_single,
2686db7f8e5SKonstantin Belousov };
2696db7f8e5SKonstantin Belousov 
2706db7f8e5SKonstantin Belousov static void
271228e377dSBen Widawsky nvdimm_spa_g_all_unmapped(struct nvdimm_spa_dev *dev, struct bio *bp, int rw)
2726db7f8e5SKonstantin Belousov {
2736db7f8e5SKonstantin Belousov 	struct vm_page maa[bp->bio_ma_n];
2746db7f8e5SKonstantin Belousov 	vm_page_t ma[bp->bio_ma_n];
2756db7f8e5SKonstantin Belousov 	vm_memattr_t mattr;
2766db7f8e5SKonstantin Belousov 	int i;
2776db7f8e5SKonstantin Belousov 
278228e377dSBen Widawsky 	mattr = nvdimm_spa_memattr(dev);
2796db7f8e5SKonstantin Belousov 	for (i = 0; i < nitems(ma); i++) {
2808d2a55caSTycho Nightingale 		bzero(&maa[i], sizeof(maa[i]));
281228e377dSBen Widawsky 		vm_page_initfake(&maa[i], dev->spa_phys_base +
2826db7f8e5SKonstantin Belousov 		    trunc_page(bp->bio_offset) + PAGE_SIZE * i, mattr);
2836db7f8e5SKonstantin Belousov 		ma[i] = &maa[i];
2846db7f8e5SKonstantin Belousov 	}
2856db7f8e5SKonstantin Belousov 	if (rw == BIO_READ)
2866db7f8e5SKonstantin Belousov 		pmap_copy_pages(ma, bp->bio_offset & PAGE_MASK, bp->bio_ma,
2876db7f8e5SKonstantin Belousov 		    bp->bio_ma_offset, bp->bio_length);
2886db7f8e5SKonstantin Belousov 	else
2896db7f8e5SKonstantin Belousov 		pmap_copy_pages(bp->bio_ma, bp->bio_ma_offset, ma,
2906db7f8e5SKonstantin Belousov 		    bp->bio_offset & PAGE_MASK, bp->bio_length);
2916db7f8e5SKonstantin Belousov }
2926db7f8e5SKonstantin Belousov 
2936db7f8e5SKonstantin Belousov static void
2946db7f8e5SKonstantin Belousov nvdimm_spa_g_thread(void *arg)
2956db7f8e5SKonstantin Belousov {
296228e377dSBen Widawsky 	struct g_spa *sc;
2976db7f8e5SKonstantin Belousov 	struct bio *bp;
2986db7f8e5SKonstantin Belousov 	struct uio auio;
2996db7f8e5SKonstantin Belousov 	struct iovec aiovec;
3006db7f8e5SKonstantin Belousov 	int error;
3016db7f8e5SKonstantin Belousov 
302228e377dSBen Widawsky 	sc = arg;
3036db7f8e5SKonstantin Belousov 	for (;;) {
304228e377dSBen Widawsky 		mtx_lock(&sc->spa_g_mtx);
3056db7f8e5SKonstantin Belousov 		for (;;) {
306228e377dSBen Widawsky 			bp = bioq_takefirst(&sc->spa_g_queue);
3076db7f8e5SKonstantin Belousov 			if (bp != NULL)
3086db7f8e5SKonstantin Belousov 				break;
309228e377dSBen Widawsky 			msleep(&sc->spa_g_queue, &sc->spa_g_mtx, PRIBIO,
3106db7f8e5SKonstantin Belousov 			    "spa_g", 0);
311228e377dSBen Widawsky 			if (!sc->spa_g_proc_run) {
312228e377dSBen Widawsky 				sc->spa_g_proc_exiting = true;
313228e377dSBen Widawsky 				wakeup(&sc->spa_g_queue);
314228e377dSBen Widawsky 				mtx_unlock(&sc->spa_g_mtx);
3156db7f8e5SKonstantin Belousov 				kproc_exit(0);
3166db7f8e5SKonstantin Belousov 			}
3176db7f8e5SKonstantin Belousov 			continue;
3186db7f8e5SKonstantin Belousov 		}
319228e377dSBen Widawsky 		mtx_unlock(&sc->spa_g_mtx);
3206db7f8e5SKonstantin Belousov 		if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE &&
3216db7f8e5SKonstantin Belousov 		    bp->bio_cmd != BIO_FLUSH) {
3226db7f8e5SKonstantin Belousov 			error = EOPNOTSUPP;
3236db7f8e5SKonstantin Belousov 			goto completed;
3246db7f8e5SKonstantin Belousov 		}
3256db7f8e5SKonstantin Belousov 
3266db7f8e5SKonstantin Belousov 		error = 0;
3276db7f8e5SKonstantin Belousov 		if (bp->bio_cmd == BIO_FLUSH) {
328228e377dSBen Widawsky 			if (sc->dev->spa_kva != NULL) {
329228e377dSBen Widawsky 				pmap_large_map_wb(sc->dev->spa_kva,
330228e377dSBen Widawsky 				    sc->dev->spa_len);
3316db7f8e5SKonstantin Belousov 			} else {
3326db7f8e5SKonstantin Belousov 				pmap_flush_cache_phys_range(
333228e377dSBen Widawsky 				    (vm_paddr_t)sc->dev->spa_phys_base,
334228e377dSBen Widawsky 				    (vm_paddr_t)sc->dev->spa_phys_base +
335228e377dSBen Widawsky 				    sc->dev->spa_len,
336228e377dSBen Widawsky 				    nvdimm_spa_memattr(sc->dev));
3376db7f8e5SKonstantin Belousov 			}
3386db7f8e5SKonstantin Belousov 			/*
3396db7f8e5SKonstantin Belousov 			 * XXX flush IMC
3406db7f8e5SKonstantin Belousov 			 */
3416db7f8e5SKonstantin Belousov 			goto completed;
3426db7f8e5SKonstantin Belousov 		}
3436db7f8e5SKonstantin Belousov 
3446db7f8e5SKonstantin Belousov 		if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
345228e377dSBen Widawsky 			if (sc->dev->spa_kva != NULL) {
346228e377dSBen Widawsky 				aiovec.iov_base = (char *)sc->dev->spa_kva +
3476db7f8e5SKonstantin Belousov 				    bp->bio_offset;
3486db7f8e5SKonstantin Belousov 				aiovec.iov_len = bp->bio_length;
3496db7f8e5SKonstantin Belousov 				auio.uio_iov = &aiovec;
3506db7f8e5SKonstantin Belousov 				auio.uio_iovcnt = 1;
3516db7f8e5SKonstantin Belousov 				auio.uio_resid = bp->bio_length;
3526db7f8e5SKonstantin Belousov 				auio.uio_offset = bp->bio_offset;
3536db7f8e5SKonstantin Belousov 				auio.uio_segflg = UIO_SYSSPACE;
3546db7f8e5SKonstantin Belousov 				auio.uio_rw = bp->bio_cmd == BIO_READ ?
3556db7f8e5SKonstantin Belousov 				    UIO_WRITE : UIO_READ;
3566db7f8e5SKonstantin Belousov 				auio.uio_td = curthread;
3576db7f8e5SKonstantin Belousov 				error = uiomove_fromphys(bp->bio_ma,
3586db7f8e5SKonstantin Belousov 				    bp->bio_ma_offset, bp->bio_length, &auio);
359cbbdd283SKonstantin Belousov 				bp->bio_resid = auio.uio_resid;
3606db7f8e5SKonstantin Belousov 			} else {
361228e377dSBen Widawsky 				nvdimm_spa_g_all_unmapped(sc->dev, bp,
362228e377dSBen Widawsky 				    bp->bio_cmd);
363cbbdd283SKonstantin Belousov 				bp->bio_resid = bp->bio_length;
3646db7f8e5SKonstantin Belousov 				error = 0;
3656db7f8e5SKonstantin Belousov 			}
3666db7f8e5SKonstantin Belousov 		} else {
3676db7f8e5SKonstantin Belousov 			aiovec.iov_base = bp->bio_data;
3686db7f8e5SKonstantin Belousov 			aiovec.iov_len = bp->bio_length;
3696db7f8e5SKonstantin Belousov 			auio.uio_iov = &aiovec;
3706db7f8e5SKonstantin Belousov 			auio.uio_iovcnt = 1;
3716db7f8e5SKonstantin Belousov 			auio.uio_resid = bp->bio_length;
3726db7f8e5SKonstantin Belousov 			auio.uio_offset = bp->bio_offset;
3736db7f8e5SKonstantin Belousov 			auio.uio_segflg = UIO_SYSSPACE;
3746db7f8e5SKonstantin Belousov 			auio.uio_rw = bp->bio_cmd == BIO_READ ? UIO_READ :
3756db7f8e5SKonstantin Belousov 			    UIO_WRITE;
3766db7f8e5SKonstantin Belousov 			auio.uio_td = curthread;
377228e377dSBen Widawsky 			error = nvdimm_spa_uio(sc->dev, &auio);
378cbbdd283SKonstantin Belousov 			bp->bio_resid = auio.uio_resid;
3796db7f8e5SKonstantin Belousov 		}
380cbbdd283SKonstantin Belousov 		bp->bio_bcount = bp->bio_length;
381228e377dSBen Widawsky 		devstat_end_transaction_bio(sc->spa_g_devstat, bp);
3826db7f8e5SKonstantin Belousov completed:
3836db7f8e5SKonstantin Belousov 		bp->bio_completed = bp->bio_length;
3846db7f8e5SKonstantin Belousov 		g_io_deliver(bp, error);
3856db7f8e5SKonstantin Belousov 	}
3866db7f8e5SKonstantin Belousov }
3876db7f8e5SKonstantin Belousov 
3886db7f8e5SKonstantin Belousov static void
3896db7f8e5SKonstantin Belousov nvdimm_spa_g_start(struct bio *bp)
3906db7f8e5SKonstantin Belousov {
391228e377dSBen Widawsky 	struct g_spa *sc;
3926db7f8e5SKonstantin Belousov 
393228e377dSBen Widawsky 	sc = bp->bio_to->geom->softc;
3946db7f8e5SKonstantin Belousov 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
395228e377dSBen Widawsky 		mtx_lock(&sc->spa_g_stat_mtx);
396228e377dSBen Widawsky 		devstat_start_transaction_bio(sc->spa_g_devstat, bp);
397228e377dSBen Widawsky 		mtx_unlock(&sc->spa_g_stat_mtx);
3986db7f8e5SKonstantin Belousov 	}
399228e377dSBen Widawsky 	mtx_lock(&sc->spa_g_mtx);
400228e377dSBen Widawsky 	bioq_disksort(&sc->spa_g_queue, bp);
401228e377dSBen Widawsky 	wakeup(&sc->spa_g_queue);
402228e377dSBen Widawsky 	mtx_unlock(&sc->spa_g_mtx);
4036db7f8e5SKonstantin Belousov }
4046db7f8e5SKonstantin Belousov 
4056db7f8e5SKonstantin Belousov static int
4066db7f8e5SKonstantin Belousov nvdimm_spa_g_access(struct g_provider *pp, int r, int w, int e)
4076db7f8e5SKonstantin Belousov {
4086db7f8e5SKonstantin Belousov 
4096db7f8e5SKonstantin Belousov 	return (0);
4106db7f8e5SKonstantin Belousov }
4116db7f8e5SKonstantin Belousov 
412228e377dSBen Widawsky static struct g_geom * nvdimm_spa_g_create(struct nvdimm_spa_dev *dev,
413228e377dSBen Widawsky     const char *name);
414228e377dSBen Widawsky static g_ctl_destroy_geom_t nvdimm_spa_g_destroy_geom;
415228e377dSBen Widawsky 
4166db7f8e5SKonstantin Belousov struct g_class nvdimm_spa_g_class = {
4176db7f8e5SKonstantin Belousov 	.name =		"SPA",
4186db7f8e5SKonstantin Belousov 	.version =	G_VERSION,
4196db7f8e5SKonstantin Belousov 	.start =	nvdimm_spa_g_start,
4206db7f8e5SKonstantin Belousov 	.access =	nvdimm_spa_g_access,
421228e377dSBen Widawsky 	.destroy_geom =	nvdimm_spa_g_destroy_geom,
4226db7f8e5SKonstantin Belousov };
4236db7f8e5SKonstantin Belousov DECLARE_GEOM_CLASS(nvdimm_spa_g_class, g_spa);
4246db7f8e5SKonstantin Belousov 
4257dcbca8dSKonstantin Belousov int
4267dcbca8dSKonstantin Belousov nvdimm_spa_init(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr,
4277dcbca8dSKonstantin Belousov     enum SPA_mapping_type spa_type)
4286db7f8e5SKonstantin Belousov {
429228e377dSBen Widawsky 	char *name;
430228e377dSBen Widawsky 	int error;
4316db7f8e5SKonstantin Belousov 
4326db7f8e5SKonstantin Belousov 	spa->spa_type = spa_type;
4336db7f8e5SKonstantin Belousov 	spa->spa_nfit_idx = nfitaddr->RangeIndex;
434228e377dSBen Widawsky 	spa->dev.spa_domain =
435228e377dSBen Widawsky 	    ((nfitaddr->Flags & ACPI_NFIT_PROXIMITY_VALID) != 0) ?
436228e377dSBen Widawsky 	    nfitaddr->ProximityDomain : -1;
437228e377dSBen Widawsky 	spa->dev.spa_phys_base = nfitaddr->Address;
438228e377dSBen Widawsky 	spa->dev.spa_len = nfitaddr->Length;
439228e377dSBen Widawsky 	spa->dev.spa_efi_mem_flags = nfitaddr->MemoryMapping;
4406db7f8e5SKonstantin Belousov 	if (bootverbose) {
4416db7f8e5SKonstantin Belousov 		printf("NVDIMM SPA%d base %#016jx len %#016jx %s fl %#jx\n",
4426db7f8e5SKonstantin Belousov 		    spa->spa_nfit_idx,
443228e377dSBen Widawsky 		    (uintmax_t)spa->dev.spa_phys_base,
444228e377dSBen Widawsky 		    (uintmax_t)spa->dev.spa_len,
4456db7f8e5SKonstantin Belousov 		    nvdimm_SPA_uuid_list[spa_type].u_name,
446228e377dSBen Widawsky 		    spa->dev.spa_efi_mem_flags);
4476db7f8e5SKonstantin Belousov 	}
4486db7f8e5SKonstantin Belousov 	if (!nvdimm_SPA_uuid_list[spa_type].u_usr_acc)
4496db7f8e5SKonstantin Belousov 		return (0);
4506db7f8e5SKonstantin Belousov 
451228e377dSBen Widawsky 	asprintf(&name, M_NVDIMM, "spa%d", spa->spa_nfit_idx);
452228e377dSBen Widawsky 	error = nvdimm_spa_dev_init(&spa->dev, name);
453228e377dSBen Widawsky 	free(name, M_NVDIMM);
454228e377dSBen Widawsky 	return (error);
455228e377dSBen Widawsky }
456228e377dSBen Widawsky 
457228e377dSBen Widawsky int
458228e377dSBen Widawsky nvdimm_spa_dev_init(struct nvdimm_spa_dev *dev, const char *name)
459228e377dSBen Widawsky {
460228e377dSBen Widawsky 	struct make_dev_args mda;
461228e377dSBen Widawsky 	struct sglist *spa_sg;
462228e377dSBen Widawsky 	char *devname;
463228e377dSBen Widawsky 	int error, error1;
464228e377dSBen Widawsky 
465228e377dSBen Widawsky 	error1 = pmap_large_map(dev->spa_phys_base, dev->spa_len,
466228e377dSBen Widawsky 	    &dev->spa_kva, nvdimm_spa_memattr(dev));
4676db7f8e5SKonstantin Belousov 	if (error1 != 0) {
468228e377dSBen Widawsky 		printf("NVDIMM %s cannot map into KVA, error %d\n", name,
469228e377dSBen Widawsky 		    error1);
470228e377dSBen Widawsky 		dev->spa_kva = NULL;
4716db7f8e5SKonstantin Belousov 	}
4726db7f8e5SKonstantin Belousov 
4736db7f8e5SKonstantin Belousov 	spa_sg = sglist_alloc(1, M_WAITOK);
474228e377dSBen Widawsky 	error = sglist_append_phys(spa_sg, dev->spa_phys_base,
475228e377dSBen Widawsky 	    dev->spa_len);
4766db7f8e5SKonstantin Belousov 	if (error == 0) {
477228e377dSBen Widawsky 		dev->spa_obj = vm_pager_allocate(OBJT_SG, spa_sg, dev->spa_len,
4786db7f8e5SKonstantin Belousov 		    VM_PROT_ALL, 0, NULL);
479228e377dSBen Widawsky 		if (dev->spa_obj == NULL) {
480228e377dSBen Widawsky 			printf("NVDIMM %s failed to alloc vm object", name);
4816db7f8e5SKonstantin Belousov 			sglist_free(spa_sg);
4826db7f8e5SKonstantin Belousov 		}
4836db7f8e5SKonstantin Belousov 	} else {
484228e377dSBen Widawsky 		printf("NVDIMM %s failed to init sglist, error %d", name,
485228e377dSBen Widawsky 		    error);
4866db7f8e5SKonstantin Belousov 		sglist_free(spa_sg);
4876db7f8e5SKonstantin Belousov 	}
4886db7f8e5SKonstantin Belousov 
4896db7f8e5SKonstantin Belousov 	make_dev_args_init(&mda);
4906db7f8e5SKonstantin Belousov 	mda.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME;
4916db7f8e5SKonstantin Belousov 	mda.mda_devsw = &spa_cdevsw;
4926db7f8e5SKonstantin Belousov 	mda.mda_cr = NULL;
4936db7f8e5SKonstantin Belousov 	mda.mda_uid = UID_ROOT;
4946db7f8e5SKonstantin Belousov 	mda.mda_gid = GID_OPERATOR;
4956db7f8e5SKonstantin Belousov 	mda.mda_mode = 0660;
496228e377dSBen Widawsky 	mda.mda_si_drv1 = dev;
497228e377dSBen Widawsky 	asprintf(&devname, M_NVDIMM, "nvdimm_%s", name);
498228e377dSBen Widawsky 	error = make_dev_s(&mda, &dev->spa_dev, "%s", devname);
499228e377dSBen Widawsky 	free(devname, M_NVDIMM);
5006db7f8e5SKonstantin Belousov 	if (error != 0) {
501228e377dSBen Widawsky 		printf("NVDIMM %s cannot create devfs node, error %d\n", name,
502228e377dSBen Widawsky 		    error);
5036db7f8e5SKonstantin Belousov 		if (error1 == 0)
5046db7f8e5SKonstantin Belousov 			error1 = error;
5056db7f8e5SKonstantin Belousov 	}
506228e377dSBen Widawsky 	dev->spa_g = nvdimm_spa_g_create(dev, name);
507228e377dSBen Widawsky 	if (dev->spa_g == NULL && error1 == 0)
508228e377dSBen Widawsky 		error1 = ENXIO;
509228e377dSBen Widawsky 	return (error1);
510228e377dSBen Widawsky }
5116db7f8e5SKonstantin Belousov 
512228e377dSBen Widawsky static struct g_geom *
513228e377dSBen Widawsky nvdimm_spa_g_create(struct nvdimm_spa_dev *dev, const char *name)
514228e377dSBen Widawsky {
515228e377dSBen Widawsky 	struct g_geom *gp;
516228e377dSBen Widawsky 	struct g_spa *sc;
517228e377dSBen Widawsky 	int error;
518228e377dSBen Widawsky 
519228e377dSBen Widawsky 	gp = NULL;
520228e377dSBen Widawsky 	sc = malloc(sizeof(struct g_spa), M_NVDIMM, M_WAITOK | M_ZERO);
521228e377dSBen Widawsky 	sc->dev = dev;
522228e377dSBen Widawsky 	bioq_init(&sc->spa_g_queue);
523228e377dSBen Widawsky 	mtx_init(&sc->spa_g_mtx, "spag", NULL, MTX_DEF);
524228e377dSBen Widawsky 	mtx_init(&sc->spa_g_stat_mtx, "spagst", NULL, MTX_DEF);
525228e377dSBen Widawsky 	sc->spa_g_proc_run = true;
526228e377dSBen Widawsky 	sc->spa_g_proc_exiting = false;
527228e377dSBen Widawsky 	error = kproc_create(nvdimm_spa_g_thread, sc, &sc->spa_g_proc, 0, 0,
528228e377dSBen Widawsky 	    "g_spa");
5296db7f8e5SKonstantin Belousov 	if (error != 0) {
530228e377dSBen Widawsky 		mtx_destroy(&sc->spa_g_mtx);
531228e377dSBen Widawsky 		mtx_destroy(&sc->spa_g_stat_mtx);
532228e377dSBen Widawsky 		free(sc, M_NVDIMM);
533228e377dSBen Widawsky 		printf("NVDIMM %s cannot create geom worker, error %d\n", name,
534228e377dSBen Widawsky 		    error);
5356db7f8e5SKonstantin Belousov 	} else {
5367dcbca8dSKonstantin Belousov 		g_topology_lock();
537228e377dSBen Widawsky 		gp = g_new_geomf(&nvdimm_spa_g_class, "%s", name);
538228e377dSBen Widawsky 		gp->softc = sc;
539228e377dSBen Widawsky 		sc->spa_p = g_new_providerf(gp, "%s", name);
540228e377dSBen Widawsky 		sc->spa_p->mediasize = dev->spa_len;
541228e377dSBen Widawsky 		sc->spa_p->sectorsize = DEV_BSIZE;
542228e377dSBen Widawsky 		sc->spa_p->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE |
5436db7f8e5SKonstantin Belousov 		    G_PF_ACCEPT_UNMAPPED;
544228e377dSBen Widawsky 		g_error_provider(sc->spa_p, 0);
545228e377dSBen Widawsky 		sc->spa_g_devstat = devstat_new_entry("spa", -1, DEV_BSIZE,
546228e377dSBen Widawsky 		    DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT,
5476db7f8e5SKonstantin Belousov 		    DEVSTAT_PRIORITY_MAX);
5487dcbca8dSKonstantin Belousov 		g_topology_unlock();
5496db7f8e5SKonstantin Belousov 	}
550228e377dSBen Widawsky 	return (gp);
5516db7f8e5SKonstantin Belousov }
5526db7f8e5SKonstantin Belousov 
5537dcbca8dSKonstantin Belousov void
5547dcbca8dSKonstantin Belousov nvdimm_spa_fini(struct SPA_mapping *spa)
5556db7f8e5SKonstantin Belousov {
5566db7f8e5SKonstantin Belousov 
557228e377dSBen Widawsky 	nvdimm_spa_dev_fini(&spa->dev);
558228e377dSBen Widawsky }
559228e377dSBen Widawsky 
560228e377dSBen Widawsky void
561228e377dSBen Widawsky nvdimm_spa_dev_fini(struct nvdimm_spa_dev *dev)
562228e377dSBen Widawsky {
563228e377dSBen Widawsky 
564228e377dSBen Widawsky 	if (dev->spa_g != NULL) {
5656db7f8e5SKonstantin Belousov 		g_topology_lock();
566228e377dSBen Widawsky 		nvdimm_spa_g_destroy_geom(NULL, dev->spa_g->class, dev->spa_g);
5676db7f8e5SKonstantin Belousov 		g_topology_unlock();
5686db7f8e5SKonstantin Belousov 	}
569228e377dSBen Widawsky 	if (dev->spa_dev != NULL) {
570228e377dSBen Widawsky 		destroy_dev(dev->spa_dev);
571228e377dSBen Widawsky 		dev->spa_dev = NULL;
5726db7f8e5SKonstantin Belousov 	}
573228e377dSBen Widawsky 	vm_object_deallocate(dev->spa_obj);
574228e377dSBen Widawsky 	if (dev->spa_kva != NULL) {
575228e377dSBen Widawsky 		pmap_large_unmap(dev->spa_kva, dev->spa_len);
576228e377dSBen Widawsky 		dev->spa_kva = NULL;
5776db7f8e5SKonstantin Belousov 	}
5786db7f8e5SKonstantin Belousov }
579228e377dSBen Widawsky 
580228e377dSBen Widawsky static int
581228e377dSBen Widawsky nvdimm_spa_g_destroy_geom(struct gctl_req *req, struct g_class *cp,
582228e377dSBen Widawsky     struct g_geom *gp)
583228e377dSBen Widawsky {
584228e377dSBen Widawsky 	struct g_spa *sc;
585228e377dSBen Widawsky 
586228e377dSBen Widawsky 	sc = gp->softc;
587228e377dSBen Widawsky 	mtx_lock(&sc->spa_g_mtx);
588228e377dSBen Widawsky 	sc->spa_g_proc_run = false;
589228e377dSBen Widawsky 	wakeup(&sc->spa_g_queue);
590228e377dSBen Widawsky 	while (!sc->spa_g_proc_exiting)
591228e377dSBen Widawsky 		msleep(&sc->spa_g_queue, &sc->spa_g_mtx, PRIBIO, "spa_e", 0);
592228e377dSBen Widawsky 	mtx_unlock(&sc->spa_g_mtx);
593228e377dSBen Widawsky 	g_topology_assert();
594228e377dSBen Widawsky 	g_wither_geom(gp, ENXIO);
595228e377dSBen Widawsky 	sc->spa_p = NULL;
596228e377dSBen Widawsky 	if (sc->spa_g_devstat != NULL) {
597228e377dSBen Widawsky 		devstat_remove_entry(sc->spa_g_devstat);
598228e377dSBen Widawsky 		sc->spa_g_devstat = NULL;
599228e377dSBen Widawsky 	}
600228e377dSBen Widawsky 	mtx_destroy(&sc->spa_g_mtx);
601228e377dSBen Widawsky 	mtx_destroy(&sc->spa_g_stat_mtx);
602228e377dSBen Widawsky 	free(sc, M_NVDIMM);
603228e377dSBen Widawsky 	return (0);
6046db7f8e5SKonstantin Belousov }
605