xref: /illumos-gate/usr/src/uts/i86xpv/vm/seg_mf.c (revision 7eea693d)
1843e1988Sjohnlev /*
2843e1988Sjohnlev  * CDDL HEADER START
3843e1988Sjohnlev  *
4843e1988Sjohnlev  * The contents of this file are subject to the terms of the
5843e1988Sjohnlev  * Common Development and Distribution License (the "License").
6843e1988Sjohnlev  * You may not use this file except in compliance with the License.
7843e1988Sjohnlev  *
8843e1988Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev  * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev  * See the License for the specific language governing permissions
11843e1988Sjohnlev  * and limitations under the License.
12843e1988Sjohnlev  *
13843e1988Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev  *
19843e1988Sjohnlev  * CDDL HEADER END
20843e1988Sjohnlev  */
21843e1988Sjohnlev 
22843e1988Sjohnlev /*
23*7eea693dSMark Johnson  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24843e1988Sjohnlev  * Use is subject to license terms.
25843e1988Sjohnlev  */
26843e1988Sjohnlev 
27843e1988Sjohnlev /*
28843e1988Sjohnlev  * Machine frame segment driver.  This segment driver allows dom0 processes to
29843e1988Sjohnlev  * map pages of other domains or Xen (e.g. during save/restore).  ioctl()s on
30843e1988Sjohnlev  * the privcmd driver provide the MFN values backing each mapping, and we map
31843e1988Sjohnlev  * them into the process's address space at this time.  Demand-faulting is not
32843e1988Sjohnlev  * supported by this driver due to the requirements upon some of the ioctl()s.
33843e1988Sjohnlev  */
34843e1988Sjohnlev 
35843e1988Sjohnlev 
36843e1988Sjohnlev #include <sys/types.h>
37843e1988Sjohnlev #include <sys/systm.h>
38843e1988Sjohnlev #include <sys/vmsystm.h>
39843e1988Sjohnlev #include <sys/mman.h>
40843e1988Sjohnlev #include <sys/errno.h>
41843e1988Sjohnlev #include <sys/kmem.h>
42843e1988Sjohnlev #include <sys/cmn_err.h>
43843e1988Sjohnlev #include <sys/vnode.h>
44843e1988Sjohnlev #include <sys/conf.h>
45843e1988Sjohnlev #include <sys/debug.h>
46843e1988Sjohnlev #include <sys/lgrp.h>
47843e1988Sjohnlev #include <sys/hypervisor.h>
48843e1988Sjohnlev 
49843e1988Sjohnlev #include <vm/page.h>
50843e1988Sjohnlev #include <vm/hat.h>
51843e1988Sjohnlev #include <vm/as.h>
52843e1988Sjohnlev #include <vm/seg.h>
53843e1988Sjohnlev 
54843e1988Sjohnlev #include <vm/hat_pte.h>
55*7eea693dSMark Johnson #include <vm/hat_i86.h>
56843e1988Sjohnlev #include <vm/seg_mf.h>
57843e1988Sjohnlev 
58843e1988Sjohnlev #include <sys/fs/snode.h>
59843e1988Sjohnlev 
60843e1988Sjohnlev #define	VTOCVP(vp)	(VTOS(vp)->s_commonvp)
61843e1988Sjohnlev 
62*7eea693dSMark Johnson typedef struct segmf_mfn_s {
63*7eea693dSMark Johnson 	mfn_t		m_mfn;
64*7eea693dSMark Johnson } segmf_mfn_t;
65*7eea693dSMark Johnson 
66*7eea693dSMark Johnson /* g_flags */
67*7eea693dSMark Johnson #define	SEGMF_GFLAGS_WR		0x1
68*7eea693dSMark Johnson #define	SEGMF_GFLAGS_MAPPED	0x2
69*7eea693dSMark Johnson typedef struct segmf_gref_s {
70*7eea693dSMark Johnson 	uint64_t	g_ptep;
71*7eea693dSMark Johnson 	grant_ref_t	g_gref;
72*7eea693dSMark Johnson 	uint32_t	g_flags;
73*7eea693dSMark Johnson 	grant_handle_t	g_handle;
74*7eea693dSMark Johnson } segmf_gref_t;
75*7eea693dSMark Johnson 
76*7eea693dSMark Johnson typedef union segmf_mu_u {
77*7eea693dSMark Johnson 	segmf_mfn_t	m;
78*7eea693dSMark Johnson 	segmf_gref_t	g;
79*7eea693dSMark Johnson } segmf_mu_t;
80*7eea693dSMark Johnson 
81*7eea693dSMark Johnson typedef enum {
82*7eea693dSMark Johnson 	SEGMF_MAP_EMPTY = 0,
83*7eea693dSMark Johnson 	SEGMF_MAP_MFN,
84*7eea693dSMark Johnson 	SEGMF_MAP_GREF
85*7eea693dSMark Johnson } segmf_map_type_t;
86*7eea693dSMark Johnson 
87*7eea693dSMark Johnson typedef struct segmf_map_s {
88*7eea693dSMark Johnson 	segmf_map_type_t	t_type;
89*7eea693dSMark Johnson 	segmf_mu_t		u;
90*7eea693dSMark Johnson } segmf_map_t;
91843e1988Sjohnlev 
92843e1988Sjohnlev struct segmf_data {
93843e1988Sjohnlev 	kmutex_t	lock;
94843e1988Sjohnlev 	struct vnode	*vp;
95843e1988Sjohnlev 	uchar_t		prot;
96843e1988Sjohnlev 	uchar_t		maxprot;
97843e1988Sjohnlev 	size_t		softlockcnt;
98843e1988Sjohnlev 	domid_t		domid;
99*7eea693dSMark Johnson 	segmf_map_t	*map;
100843e1988Sjohnlev };
101843e1988Sjohnlev 
102843e1988Sjohnlev static struct seg_ops segmf_ops;
103843e1988Sjohnlev 
104*7eea693dSMark Johnson static int segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t len);
105*7eea693dSMark Johnson 
106843e1988Sjohnlev static struct segmf_data *
107843e1988Sjohnlev segmf_data_zalloc(struct seg *seg)
108843e1988Sjohnlev {
109843e1988Sjohnlev 	struct segmf_data *data = kmem_zalloc(sizeof (*data), KM_SLEEP);
110843e1988Sjohnlev 
111843e1988Sjohnlev 	mutex_init(&data->lock, "segmf.lock", MUTEX_DEFAULT, NULL);
112843e1988Sjohnlev 	seg->s_ops = &segmf_ops;
113843e1988Sjohnlev 	seg->s_data = data;
114843e1988Sjohnlev 	return (data);
115843e1988Sjohnlev }
116843e1988Sjohnlev 
117843e1988Sjohnlev int
118843e1988Sjohnlev segmf_create(struct seg *seg, void *args)
119843e1988Sjohnlev {
120843e1988Sjohnlev 	struct segmf_crargs *a = args;
121843e1988Sjohnlev 	struct segmf_data *data;
122843e1988Sjohnlev 	struct as *as = seg->s_as;
123843e1988Sjohnlev 	pgcnt_t i, npages = seg_pages(seg);
124843e1988Sjohnlev 	int error;
125843e1988Sjohnlev 
126843e1988Sjohnlev 	hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
127843e1988Sjohnlev 
128843e1988Sjohnlev 	data = segmf_data_zalloc(seg);
129843e1988Sjohnlev 	data->vp = specfind(a->dev, VCHR);
130843e1988Sjohnlev 	data->prot = a->prot;
131843e1988Sjohnlev 	data->maxprot = a->maxprot;
132843e1988Sjohnlev 
133*7eea693dSMark Johnson 	data->map = kmem_alloc(npages * sizeof (segmf_map_t), KM_SLEEP);
134*7eea693dSMark Johnson 	for (i = 0; i < npages; i++) {
135*7eea693dSMark Johnson 		data->map[i].t_type = SEGMF_MAP_EMPTY;
136*7eea693dSMark Johnson 	}
137843e1988Sjohnlev 
138843e1988Sjohnlev 	error = VOP_ADDMAP(VTOCVP(data->vp), 0, as, seg->s_base, seg->s_size,
139da6c28aaSamw 	    data->prot, data->maxprot, MAP_SHARED, CRED(), NULL);
140843e1988Sjohnlev 
141843e1988Sjohnlev 	if (error != 0)
142843e1988Sjohnlev 		hat_unload(as->a_hat,
143843e1988Sjohnlev 		    seg->s_base, seg->s_size, HAT_UNLOAD_UNMAP);
144843e1988Sjohnlev 	return (error);
145843e1988Sjohnlev }
146843e1988Sjohnlev 
147843e1988Sjohnlev /*
148843e1988Sjohnlev  * Duplicate a seg and return new segment in newseg.
149843e1988Sjohnlev  */
150843e1988Sjohnlev static int
151843e1988Sjohnlev segmf_dup(struct seg *seg, struct seg *newseg)
152843e1988Sjohnlev {
153843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
154843e1988Sjohnlev 	struct segmf_data *ndata;
155843e1988Sjohnlev 	pgcnt_t npages = seg_pages(newseg);
156*7eea693dSMark Johnson 	size_t sz;
157843e1988Sjohnlev 
158843e1988Sjohnlev 	ndata = segmf_data_zalloc(newseg);
159843e1988Sjohnlev 
160843e1988Sjohnlev 	VN_HOLD(data->vp);
161843e1988Sjohnlev 	ndata->vp = data->vp;
162843e1988Sjohnlev 	ndata->prot = data->prot;
163843e1988Sjohnlev 	ndata->maxprot = data->maxprot;
164843e1988Sjohnlev 	ndata->domid = data->domid;
165843e1988Sjohnlev 
166*7eea693dSMark Johnson 	sz = npages * sizeof (segmf_map_t);
167*7eea693dSMark Johnson 	ndata->map = kmem_alloc(sz, KM_SLEEP);
168*7eea693dSMark Johnson 	bcopy(data->map, ndata->map, sz);
169843e1988Sjohnlev 
170843e1988Sjohnlev 	return (VOP_ADDMAP(VTOCVP(ndata->vp), 0, newseg->s_as,
171843e1988Sjohnlev 	    newseg->s_base, newseg->s_size, ndata->prot, ndata->maxprot,
172da6c28aaSamw 	    MAP_SHARED, CRED(), NULL));
173843e1988Sjohnlev }
174843e1988Sjohnlev 
175843e1988Sjohnlev /*
176843e1988Sjohnlev  * We only support unmapping the whole segment, and we automatically unlock
177843e1988Sjohnlev  * what we previously soft-locked.
178843e1988Sjohnlev  */
179843e1988Sjohnlev static int
180843e1988Sjohnlev segmf_unmap(struct seg *seg, caddr_t addr, size_t len)
181843e1988Sjohnlev {
182843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
183843e1988Sjohnlev 	offset_t off;
184843e1988Sjohnlev 
185843e1988Sjohnlev 	if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
186843e1988Sjohnlev 	    (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
187843e1988Sjohnlev 		panic("segmf_unmap");
188843e1988Sjohnlev 
189843e1988Sjohnlev 	if (addr != seg->s_base || len != seg->s_size)
190843e1988Sjohnlev 		return (ENOTSUP);
191843e1988Sjohnlev 
192843e1988Sjohnlev 	hat_unload(seg->s_as->a_hat, addr, len,
193843e1988Sjohnlev 	    HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
194843e1988Sjohnlev 
195843e1988Sjohnlev 	off = (offset_t)seg_page(seg, addr);
196843e1988Sjohnlev 
197843e1988Sjohnlev 	ASSERT(data->vp != NULL);
198843e1988Sjohnlev 
199843e1988Sjohnlev 	(void) VOP_DELMAP(VTOCVP(data->vp), off, seg->s_as, addr, len,
200da6c28aaSamw 	    data->prot, data->maxprot, MAP_SHARED, CRED(), NULL);
201843e1988Sjohnlev 
202843e1988Sjohnlev 	seg_free(seg);
203843e1988Sjohnlev 	return (0);
204843e1988Sjohnlev }
205843e1988Sjohnlev 
206843e1988Sjohnlev static void
207843e1988Sjohnlev segmf_free(struct seg *seg)
208843e1988Sjohnlev {
209843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
210843e1988Sjohnlev 	pgcnt_t npages = seg_pages(seg);
211843e1988Sjohnlev 
212*7eea693dSMark Johnson 	kmem_free(data->map, npages * sizeof (segmf_map_t));
213843e1988Sjohnlev 	VN_RELE(data->vp);
214843e1988Sjohnlev 	mutex_destroy(&data->lock);
215843e1988Sjohnlev 	kmem_free(data, sizeof (*data));
216843e1988Sjohnlev }
217843e1988Sjohnlev 
218843e1988Sjohnlev static int segmf_faultpage_debug = 0;
219843e1988Sjohnlev /*ARGSUSED*/
220843e1988Sjohnlev static int
221843e1988Sjohnlev segmf_faultpage(struct hat *hat, struct seg *seg, caddr_t addr,
222843e1988Sjohnlev     enum fault_type type, uint_t prot)
223843e1988Sjohnlev {
224843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
225843e1988Sjohnlev 	uint_t hat_flags = HAT_LOAD_NOCONSIST;
226843e1988Sjohnlev 	mfn_t mfn;
227843e1988Sjohnlev 	x86pte_t pte;
228*7eea693dSMark Johnson 	segmf_map_t *map;
229*7eea693dSMark Johnson 	uint_t idx;
230843e1988Sjohnlev 
231843e1988Sjohnlev 
232*7eea693dSMark Johnson 	idx = seg_page(seg, addr);
233*7eea693dSMark Johnson 	map = &data->map[idx];
234*7eea693dSMark Johnson 	ASSERT(map->t_type == SEGMF_MAP_MFN);
235*7eea693dSMark Johnson 
236*7eea693dSMark Johnson 	mfn = map->u.m.m_mfn;
237843e1988Sjohnlev 
238843e1988Sjohnlev 	if (type == F_SOFTLOCK) {
239843e1988Sjohnlev 		mutex_enter(&freemem_lock);
240843e1988Sjohnlev 		data->softlockcnt++;
241843e1988Sjohnlev 		mutex_exit(&freemem_lock);
242843e1988Sjohnlev 		hat_flags |= HAT_LOAD_LOCK;
243843e1988Sjohnlev 	} else
244843e1988Sjohnlev 		hat_flags |= HAT_LOAD;
245843e1988Sjohnlev 
246843e1988Sjohnlev 	if (segmf_faultpage_debug > 0) {
247843e1988Sjohnlev 		uprintf("segmf_faultpage: addr %p domid %x mfn %lx prot %x\n",
248843e1988Sjohnlev 		    (void *)addr, data->domid, mfn, prot);
249843e1988Sjohnlev 		segmf_faultpage_debug--;
250843e1988Sjohnlev 	}
251843e1988Sjohnlev 
252843e1988Sjohnlev 	/*
253843e1988Sjohnlev 	 * Ask the HAT to load a throwaway mapping to page zero, then
254843e1988Sjohnlev 	 * overwrite it with our foreign domain mapping. It gets removed
255843e1988Sjohnlev 	 * later via hat_unload()
256843e1988Sjohnlev 	 */
257843e1988Sjohnlev 	hat_devload(hat, addr, MMU_PAGESIZE, (pfn_t)0,
258843e1988Sjohnlev 	    PROT_READ | HAT_UNORDERED_OK, hat_flags);
259843e1988Sjohnlev 
260843e1988Sjohnlev 	pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER | PT_FOREIGN;
261843e1988Sjohnlev 	if (prot & PROT_WRITE)
262843e1988Sjohnlev 		pte |= PT_WRITABLE;
263843e1988Sjohnlev 
264843e1988Sjohnlev 	if (HYPERVISOR_update_va_mapping_otherdomain((uintptr_t)addr, pte,
265843e1988Sjohnlev 	    UVMF_INVLPG | UVMF_ALL, data->domid) != 0) {
266843e1988Sjohnlev 		hat_flags = HAT_UNLOAD_UNMAP;
267843e1988Sjohnlev 
268843e1988Sjohnlev 		if (type == F_SOFTLOCK) {
269843e1988Sjohnlev 			hat_flags |= HAT_UNLOAD_UNLOCK;
270843e1988Sjohnlev 			mutex_enter(&freemem_lock);
271843e1988Sjohnlev 			data->softlockcnt--;
272843e1988Sjohnlev 			mutex_exit(&freemem_lock);
273843e1988Sjohnlev 		}
274843e1988Sjohnlev 
275843e1988Sjohnlev 		hat_unload(hat, addr, MMU_PAGESIZE, hat_flags);
276843e1988Sjohnlev 		return (FC_MAKE_ERR(EFAULT));
277843e1988Sjohnlev 	}
278843e1988Sjohnlev 
279843e1988Sjohnlev 	return (0);
280843e1988Sjohnlev }
281843e1988Sjohnlev 
282843e1988Sjohnlev static int
283843e1988Sjohnlev seg_rw_to_prot(enum seg_rw rw)
284843e1988Sjohnlev {
285843e1988Sjohnlev 	switch (rw) {
286843e1988Sjohnlev 	case S_READ:
287843e1988Sjohnlev 		return (PROT_READ);
288843e1988Sjohnlev 	case S_WRITE:
289843e1988Sjohnlev 		return (PROT_WRITE);
290843e1988Sjohnlev 	case S_EXEC:
291843e1988Sjohnlev 		return (PROT_EXEC);
292843e1988Sjohnlev 	case S_OTHER:
293843e1988Sjohnlev 	default:
294843e1988Sjohnlev 		break;
295843e1988Sjohnlev 	}
296843e1988Sjohnlev 	return (PROT_READ | PROT_WRITE | PROT_EXEC);
297843e1988Sjohnlev }
298843e1988Sjohnlev 
299843e1988Sjohnlev static void
300843e1988Sjohnlev segmf_softunlock(struct hat *hat, struct seg *seg, caddr_t addr, size_t len)
301843e1988Sjohnlev {
302843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
303843e1988Sjohnlev 
304843e1988Sjohnlev 	hat_unlock(hat, addr, len);
305843e1988Sjohnlev 
306843e1988Sjohnlev 	mutex_enter(&freemem_lock);
307843e1988Sjohnlev 	ASSERT(data->softlockcnt >= btopr(len));
308843e1988Sjohnlev 	data->softlockcnt -= btopr(len);
309843e1988Sjohnlev 	mutex_exit(&freemem_lock);
310843e1988Sjohnlev 
311843e1988Sjohnlev 	if (data->softlockcnt == 0) {
312843e1988Sjohnlev 		struct as *as = seg->s_as;
313843e1988Sjohnlev 
314843e1988Sjohnlev 		if (AS_ISUNMAPWAIT(as)) {
315843e1988Sjohnlev 			mutex_enter(&as->a_contents);
316843e1988Sjohnlev 			if (AS_ISUNMAPWAIT(as)) {
317843e1988Sjohnlev 				AS_CLRUNMAPWAIT(as);
318843e1988Sjohnlev 				cv_broadcast(&as->a_cv);
319843e1988Sjohnlev 			}
320843e1988Sjohnlev 			mutex_exit(&as->a_contents);
321843e1988Sjohnlev 		}
322843e1988Sjohnlev 	}
323843e1988Sjohnlev }
324843e1988Sjohnlev 
325843e1988Sjohnlev static int
326843e1988Sjohnlev segmf_fault_range(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
327843e1988Sjohnlev     enum fault_type type, enum seg_rw rw)
328843e1988Sjohnlev {
329843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
330843e1988Sjohnlev 	int error = 0;
331843e1988Sjohnlev 	caddr_t a;
332843e1988Sjohnlev 
333843e1988Sjohnlev 	if ((data->prot & seg_rw_to_prot(rw)) == 0)
334843e1988Sjohnlev 		return (FC_PROT);
335843e1988Sjohnlev 
336843e1988Sjohnlev 	/* loop over the address range handling each fault */
337843e1988Sjohnlev 
338843e1988Sjohnlev 	for (a = addr; a < addr + len; a += PAGESIZE) {
339843e1988Sjohnlev 		error = segmf_faultpage(hat, seg, a, type, data->prot);
340843e1988Sjohnlev 		if (error != 0)
341843e1988Sjohnlev 			break;
342843e1988Sjohnlev 	}
343843e1988Sjohnlev 
344843e1988Sjohnlev 	if (error != 0 && type == F_SOFTLOCK) {
345843e1988Sjohnlev 		size_t done = (size_t)(a - addr);
346843e1988Sjohnlev 
347843e1988Sjohnlev 		/*
348843e1988Sjohnlev 		 * Undo what's been done so far.
349843e1988Sjohnlev 		 */
350843e1988Sjohnlev 		if (done > 0)
351843e1988Sjohnlev 			segmf_softunlock(hat, seg, addr, done);
352843e1988Sjohnlev 	}
353843e1988Sjohnlev 
354843e1988Sjohnlev 	return (error);
355843e1988Sjohnlev }
356843e1988Sjohnlev 
357843e1988Sjohnlev /*
358843e1988Sjohnlev  * We never demand-fault for seg_mf.
359843e1988Sjohnlev  */
360843e1988Sjohnlev /*ARGSUSED*/
361843e1988Sjohnlev static int
362843e1988Sjohnlev segmf_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
363843e1988Sjohnlev     enum fault_type type, enum seg_rw rw)
364843e1988Sjohnlev {
365843e1988Sjohnlev 	return (FC_MAKE_ERR(EFAULT));
366843e1988Sjohnlev }
367843e1988Sjohnlev 
368843e1988Sjohnlev /*ARGSUSED*/
369843e1988Sjohnlev static int
370843e1988Sjohnlev segmf_faulta(struct seg *seg, caddr_t addr)
371843e1988Sjohnlev {
372843e1988Sjohnlev 	return (0);
373843e1988Sjohnlev }
374843e1988Sjohnlev 
375843e1988Sjohnlev /*ARGSUSED*/
376843e1988Sjohnlev static int
377843e1988Sjohnlev segmf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
378843e1988Sjohnlev {
379843e1988Sjohnlev 	return (EINVAL);
380843e1988Sjohnlev }
381843e1988Sjohnlev 
382843e1988Sjohnlev /*ARGSUSED*/
383843e1988Sjohnlev static int
384843e1988Sjohnlev segmf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
385843e1988Sjohnlev {
386843e1988Sjohnlev 	return (EINVAL);
387843e1988Sjohnlev }
388843e1988Sjohnlev 
389843e1988Sjohnlev /*ARGSUSED*/
390843e1988Sjohnlev static int
391843e1988Sjohnlev segmf_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
392843e1988Sjohnlev {
393843e1988Sjohnlev 	return (-1);
394843e1988Sjohnlev }
395843e1988Sjohnlev 
396843e1988Sjohnlev /*ARGSUSED*/
397843e1988Sjohnlev static int
398843e1988Sjohnlev segmf_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
399843e1988Sjohnlev {
400843e1988Sjohnlev 	return (0);
401843e1988Sjohnlev }
402843e1988Sjohnlev 
403843e1988Sjohnlev /*
404843e1988Sjohnlev  * XXPV	Hmm.  Should we say that mf mapping are "in core?"
405843e1988Sjohnlev  */
406843e1988Sjohnlev 
407843e1988Sjohnlev /*ARGSUSED*/
408843e1988Sjohnlev static size_t
409843e1988Sjohnlev segmf_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
410843e1988Sjohnlev {
411843e1988Sjohnlev 	size_t v;
412843e1988Sjohnlev 
413843e1988Sjohnlev 	for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
414843e1988Sjohnlev 	    len -= PAGESIZE, v += PAGESIZE)
415843e1988Sjohnlev 		*vec++ = 1;
416843e1988Sjohnlev 	return (v);
417843e1988Sjohnlev }
418843e1988Sjohnlev 
419843e1988Sjohnlev /*ARGSUSED*/
420843e1988Sjohnlev static int
421843e1988Sjohnlev segmf_lockop(struct seg *seg, caddr_t addr,
422843e1988Sjohnlev     size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
423843e1988Sjohnlev {
424843e1988Sjohnlev 	return (0);
425843e1988Sjohnlev }
426843e1988Sjohnlev 
427843e1988Sjohnlev static int
428843e1988Sjohnlev segmf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
429843e1988Sjohnlev {
430843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
431843e1988Sjohnlev 	pgcnt_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
432843e1988Sjohnlev 
433843e1988Sjohnlev 	if (pgno != 0) {
434843e1988Sjohnlev 		do
435843e1988Sjohnlev 			protv[--pgno] = data->prot;
436843e1988Sjohnlev 		while (pgno != 0)
437843e1988Sjohnlev 			;
438843e1988Sjohnlev 	}
439843e1988Sjohnlev 	return (0);
440843e1988Sjohnlev }
441843e1988Sjohnlev 
442843e1988Sjohnlev static u_offset_t
443843e1988Sjohnlev segmf_getoffset(struct seg *seg, caddr_t addr)
444843e1988Sjohnlev {
445843e1988Sjohnlev 	return (addr - seg->s_base);
446843e1988Sjohnlev }
447843e1988Sjohnlev 
448843e1988Sjohnlev /*ARGSUSED*/
449843e1988Sjohnlev static int
450843e1988Sjohnlev segmf_gettype(struct seg *seg, caddr_t addr)
451843e1988Sjohnlev {
452843e1988Sjohnlev 	return (MAP_SHARED);
453843e1988Sjohnlev }
454843e1988Sjohnlev 
455843e1988Sjohnlev /*ARGSUSED1*/
456843e1988Sjohnlev static int
457843e1988Sjohnlev segmf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
458843e1988Sjohnlev {
459843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
460843e1988Sjohnlev 
461843e1988Sjohnlev 	*vpp = VTOCVP(data->vp);
462843e1988Sjohnlev 	return (0);
463843e1988Sjohnlev }
464843e1988Sjohnlev 
465843e1988Sjohnlev /*ARGSUSED*/
466843e1988Sjohnlev static int
467843e1988Sjohnlev segmf_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
468843e1988Sjohnlev {
469843e1988Sjohnlev 	return (0);
470843e1988Sjohnlev }
471843e1988Sjohnlev 
472843e1988Sjohnlev /*ARGSUSED*/
473843e1988Sjohnlev static void
474843e1988Sjohnlev segmf_dump(struct seg *seg)
475843e1988Sjohnlev {}
476843e1988Sjohnlev 
477843e1988Sjohnlev /*ARGSUSED*/
478843e1988Sjohnlev static int
479843e1988Sjohnlev segmf_pagelock(struct seg *seg, caddr_t addr, size_t len,
480843e1988Sjohnlev     struct page ***ppp, enum lock_type type, enum seg_rw rw)
481843e1988Sjohnlev {
482843e1988Sjohnlev 	return (ENOTSUP);
483843e1988Sjohnlev }
484843e1988Sjohnlev 
485843e1988Sjohnlev /*ARGSUSED*/
486843e1988Sjohnlev static int
487843e1988Sjohnlev segmf_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
488843e1988Sjohnlev {
489843e1988Sjohnlev 	return (ENOTSUP);
490843e1988Sjohnlev }
491843e1988Sjohnlev 
492843e1988Sjohnlev static int
493843e1988Sjohnlev segmf_getmemid(struct seg *seg, caddr_t addr, memid_t *memid)
494843e1988Sjohnlev {
495843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
496843e1988Sjohnlev 
497843e1988Sjohnlev 	memid->val[0] = (uintptr_t)VTOCVP(data->vp);
498843e1988Sjohnlev 	memid->val[1] = (uintptr_t)seg_page(seg, addr);
499843e1988Sjohnlev 	return (0);
500843e1988Sjohnlev }
501843e1988Sjohnlev 
502843e1988Sjohnlev /*ARGSUSED*/
503843e1988Sjohnlev static lgrp_mem_policy_info_t *
504843e1988Sjohnlev segmf_getpolicy(struct seg *seg, caddr_t addr)
505843e1988Sjohnlev {
506843e1988Sjohnlev 	return (NULL);
507843e1988Sjohnlev }
508843e1988Sjohnlev 
509843e1988Sjohnlev /*ARGSUSED*/
510843e1988Sjohnlev static int
511843e1988Sjohnlev segmf_capable(struct seg *seg, segcapability_t capability)
512843e1988Sjohnlev {
513843e1988Sjohnlev 	return (0);
514843e1988Sjohnlev }
515843e1988Sjohnlev 
516843e1988Sjohnlev /*
517843e1988Sjohnlev  * Add a set of contiguous foreign MFNs to the segment. soft-locking them.  The
518843e1988Sjohnlev  * pre-faulting is necessary due to live migration; in particular we must
519843e1988Sjohnlev  * return an error in response to IOCTL_PRIVCMD_MMAPBATCH rather than faulting
520843e1988Sjohnlev  * later on a bad MFN.  Whilst this isn't necessary for the other MMAP
521843e1988Sjohnlev  * ioctl()s, we lock them too, as they should be transitory.
522843e1988Sjohnlev  */
523843e1988Sjohnlev int
524843e1988Sjohnlev segmf_add_mfns(struct seg *seg, caddr_t addr, mfn_t mfn,
525843e1988Sjohnlev     pgcnt_t pgcnt, domid_t domid)
526843e1988Sjohnlev {
527843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
528*7eea693dSMark Johnson 	pgcnt_t base;
529843e1988Sjohnlev 	faultcode_t fc;
530843e1988Sjohnlev 	pgcnt_t i;
531843e1988Sjohnlev 	int error = 0;
532843e1988Sjohnlev 
533843e1988Sjohnlev 	if (seg->s_ops != &segmf_ops)
534843e1988Sjohnlev 		return (EINVAL);
535843e1988Sjohnlev 
536843e1988Sjohnlev 	/*
537843e1988Sjohnlev 	 * Don't mess with dom0.
538843e1988Sjohnlev 	 *
539843e1988Sjohnlev 	 * Only allow the domid to be set once for the segment.
540843e1988Sjohnlev 	 * After that attempts to add mappings to this segment for
541843e1988Sjohnlev 	 * other domains explicitly fails.
542843e1988Sjohnlev 	 */
543843e1988Sjohnlev 
544843e1988Sjohnlev 	if (domid == 0 || domid == DOMID_SELF)
545843e1988Sjohnlev 		return (EACCES);
546843e1988Sjohnlev 
547843e1988Sjohnlev 	mutex_enter(&data->lock);
548843e1988Sjohnlev 
549843e1988Sjohnlev 	if (data->domid == 0)
550843e1988Sjohnlev 		data->domid = domid;
551843e1988Sjohnlev 
552843e1988Sjohnlev 	if (data->domid != domid) {
553843e1988Sjohnlev 		error = EINVAL;
554843e1988Sjohnlev 		goto out;
555843e1988Sjohnlev 	}
556843e1988Sjohnlev 
557843e1988Sjohnlev 	base = seg_page(seg, addr);
558843e1988Sjohnlev 
559*7eea693dSMark Johnson 	for (i = 0; i < pgcnt; i++) {
560*7eea693dSMark Johnson 		data->map[base + i].t_type = SEGMF_MAP_MFN;
561*7eea693dSMark Johnson 		data->map[base + i].u.m.m_mfn = mfn++;
562*7eea693dSMark Johnson 	}
563843e1988Sjohnlev 
564843e1988Sjohnlev 	fc = segmf_fault_range(seg->s_as->a_hat, seg, addr,
565843e1988Sjohnlev 	    pgcnt * MMU_PAGESIZE, F_SOFTLOCK, S_OTHER);
566843e1988Sjohnlev 
567843e1988Sjohnlev 	if (fc != 0) {
568843e1988Sjohnlev 		error = fc_decode(fc);
569*7eea693dSMark Johnson 		for (i = 0; i < pgcnt; i++) {
570*7eea693dSMark Johnson 			data->map[base + i].t_type = SEGMF_MAP_EMPTY;
571*7eea693dSMark Johnson 		}
572843e1988Sjohnlev 	}
573843e1988Sjohnlev 
574843e1988Sjohnlev out:
575843e1988Sjohnlev 	mutex_exit(&data->lock);
576843e1988Sjohnlev 	return (error);
577843e1988Sjohnlev }
578843e1988Sjohnlev 
579*7eea693dSMark Johnson int
580*7eea693dSMark Johnson segmf_add_grefs(struct seg *seg, caddr_t addr, uint_t flags,
581*7eea693dSMark Johnson     grant_ref_t *grefs, uint_t cnt, domid_t domid)
582*7eea693dSMark Johnson {
583*7eea693dSMark Johnson 	struct segmf_data *data;
584*7eea693dSMark Johnson 	segmf_map_t *map;
585*7eea693dSMark Johnson 	faultcode_t fc;
586*7eea693dSMark Johnson 	uint_t idx;
587*7eea693dSMark Johnson 	uint_t i;
588*7eea693dSMark Johnson 	int e;
589*7eea693dSMark Johnson 
590*7eea693dSMark Johnson 	if (seg->s_ops != &segmf_ops)
591*7eea693dSMark Johnson 		return (EINVAL);
592*7eea693dSMark Johnson 
593*7eea693dSMark Johnson 	/*
594*7eea693dSMark Johnson 	 * Don't mess with dom0.
595*7eea693dSMark Johnson 	 *
596*7eea693dSMark Johnson 	 * Only allow the domid to be set once for the segment.
597*7eea693dSMark Johnson 	 * After that attempts to add mappings to this segment for
598*7eea693dSMark Johnson 	 * other domains explicitly fails.
599*7eea693dSMark Johnson 	 */
600*7eea693dSMark Johnson 
601*7eea693dSMark Johnson 	if (domid == 0 || domid == DOMID_SELF)
602*7eea693dSMark Johnson 		return (EACCES);
603*7eea693dSMark Johnson 
604*7eea693dSMark Johnson 	data = seg->s_data;
605*7eea693dSMark Johnson 	idx = seg_page(seg, addr);
606*7eea693dSMark Johnson 	map = &data->map[idx];
607*7eea693dSMark Johnson 	e = 0;
608*7eea693dSMark Johnson 
609*7eea693dSMark Johnson 	mutex_enter(&data->lock);
610*7eea693dSMark Johnson 
611*7eea693dSMark Johnson 	if (data->domid == 0)
612*7eea693dSMark Johnson 		data->domid = domid;
613*7eea693dSMark Johnson 
614*7eea693dSMark Johnson 	if (data->domid != domid) {
615*7eea693dSMark Johnson 		e = EINVAL;
616*7eea693dSMark Johnson 		goto out;
617*7eea693dSMark Johnson 	}
618*7eea693dSMark Johnson 
619*7eea693dSMark Johnson 	/* store away the grefs passed in then fault in the pages */
620*7eea693dSMark Johnson 	for (i = 0; i < cnt; i++) {
621*7eea693dSMark Johnson 		map[i].t_type = SEGMF_MAP_GREF;
622*7eea693dSMark Johnson 		map[i].u.g.g_gref = grefs[i];
623*7eea693dSMark Johnson 		map[i].u.g.g_handle = 0;
624*7eea693dSMark Johnson 		map[i].u.g.g_flags = 0;
625*7eea693dSMark Johnson 		if (flags & SEGMF_GREF_WR) {
626*7eea693dSMark Johnson 			map[i].u.g.g_flags |= SEGMF_GFLAGS_WR;
627*7eea693dSMark Johnson 		}
628*7eea693dSMark Johnson 	}
629*7eea693dSMark Johnson 	fc = segmf_fault_gref_range(seg, addr, cnt);
630*7eea693dSMark Johnson 	if (fc != 0) {
631*7eea693dSMark Johnson 		e = fc_decode(fc);
632*7eea693dSMark Johnson 		for (i = 0; i < cnt; i++) {
633*7eea693dSMark Johnson 			data->map[i].t_type = SEGMF_MAP_EMPTY;
634*7eea693dSMark Johnson 		}
635*7eea693dSMark Johnson 	}
636*7eea693dSMark Johnson 
637*7eea693dSMark Johnson out:
638*7eea693dSMark Johnson 	mutex_exit(&data->lock);
639*7eea693dSMark Johnson 	return (e);
640*7eea693dSMark Johnson }
641*7eea693dSMark Johnson 
642*7eea693dSMark Johnson int
643*7eea693dSMark Johnson segmf_release_grefs(struct seg *seg, caddr_t addr, uint_t cnt)
644*7eea693dSMark Johnson {
645*7eea693dSMark Johnson 	gnttab_unmap_grant_ref_t mapop[SEGMF_MAX_GREFS];
646*7eea693dSMark Johnson 	struct segmf_data *data;
647*7eea693dSMark Johnson 	segmf_map_t *map;
648*7eea693dSMark Johnson 	uint_t idx;
649*7eea693dSMark Johnson 	long e;
650*7eea693dSMark Johnson 	int i;
651*7eea693dSMark Johnson 	int n;
652*7eea693dSMark Johnson 
653*7eea693dSMark Johnson 
654*7eea693dSMark Johnson 	if (cnt > SEGMF_MAX_GREFS) {
655*7eea693dSMark Johnson 		return (-1);
656*7eea693dSMark Johnson 	}
657*7eea693dSMark Johnson 
658*7eea693dSMark Johnson 	idx = seg_page(seg, addr);
659*7eea693dSMark Johnson 	data = seg->s_data;
660*7eea693dSMark Johnson 	map = &data->map[idx];
661*7eea693dSMark Johnson 
662*7eea693dSMark Johnson 	bzero(mapop, sizeof (gnttab_unmap_grant_ref_t) * cnt);
663*7eea693dSMark Johnson 
664*7eea693dSMark Johnson 	/*
665*7eea693dSMark Johnson 	 * for each entry which isn't empty and is currently mapped,
666*7eea693dSMark Johnson 	 * set it up for an unmap then mark them empty.
667*7eea693dSMark Johnson 	 */
668*7eea693dSMark Johnson 	n = 0;
669*7eea693dSMark Johnson 	for (i = 0; i < cnt; i++) {
670*7eea693dSMark Johnson 		ASSERT(map[i].t_type != SEGMF_MAP_MFN);
671*7eea693dSMark Johnson 		if ((map[i].t_type == SEGMF_MAP_GREF) &&
672*7eea693dSMark Johnson 		    (map[i].u.g.g_flags & SEGMF_GFLAGS_MAPPED)) {
673*7eea693dSMark Johnson 			mapop[n].handle = map[i].u.g.g_handle;
674*7eea693dSMark Johnson 			mapop[n].host_addr = map[i].u.g.g_ptep;
675*7eea693dSMark Johnson 			mapop[n].dev_bus_addr = 0;
676*7eea693dSMark Johnson 			n++;
677*7eea693dSMark Johnson 		}
678*7eea693dSMark Johnson 		map[i].t_type = SEGMF_MAP_EMPTY;
679*7eea693dSMark Johnson 	}
680*7eea693dSMark Johnson 
681*7eea693dSMark Johnson 	/* if there's nothing to unmap, just return */
682*7eea693dSMark Johnson 	if (n == 0) {
683*7eea693dSMark Johnson 		return (0);
684*7eea693dSMark Johnson 	}
685*7eea693dSMark Johnson 
686*7eea693dSMark Johnson 	e = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &mapop, n);
687*7eea693dSMark Johnson 	if (e != 0) {
688*7eea693dSMark Johnson 		return (-1);
689*7eea693dSMark Johnson 	}
690*7eea693dSMark Johnson 
691*7eea693dSMark Johnson 	return (0);
692*7eea693dSMark Johnson }
693*7eea693dSMark Johnson 
694*7eea693dSMark Johnson 
695*7eea693dSMark Johnson void
696*7eea693dSMark Johnson segmf_add_gref_pte(struct seg *seg, caddr_t addr, uint64_t pte_ma)
697*7eea693dSMark Johnson {
698*7eea693dSMark Johnson 	struct segmf_data *data;
699*7eea693dSMark Johnson 	uint_t idx;
700*7eea693dSMark Johnson 
701*7eea693dSMark Johnson 	idx = seg_page(seg, addr);
702*7eea693dSMark Johnson 	data = seg->s_data;
703*7eea693dSMark Johnson 
704*7eea693dSMark Johnson 	data->map[idx].u.g.g_ptep = pte_ma;
705*7eea693dSMark Johnson }
706*7eea693dSMark Johnson 
707*7eea693dSMark Johnson 
708*7eea693dSMark Johnson static int
709*7eea693dSMark Johnson segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t cnt)
710*7eea693dSMark Johnson {
711*7eea693dSMark Johnson 	gnttab_map_grant_ref_t mapop[SEGMF_MAX_GREFS];
712*7eea693dSMark Johnson 	struct segmf_data *data;
713*7eea693dSMark Johnson 	segmf_map_t *map;
714*7eea693dSMark Johnson 	uint_t idx;
715*7eea693dSMark Johnson 	int e;
716*7eea693dSMark Johnson 	int i;
717*7eea693dSMark Johnson 
718*7eea693dSMark Johnson 
719*7eea693dSMark Johnson 	if (cnt > SEGMF_MAX_GREFS) {
720*7eea693dSMark Johnson 		return (-1);
721*7eea693dSMark Johnson 	}
722*7eea693dSMark Johnson 
723*7eea693dSMark Johnson 	data = seg->s_data;
724*7eea693dSMark Johnson 	idx = seg_page(seg, addr);
725*7eea693dSMark Johnson 	map = &data->map[idx];
726*7eea693dSMark Johnson 
727*7eea693dSMark Johnson 	bzero(mapop, sizeof (gnttab_map_grant_ref_t) * cnt);
728*7eea693dSMark Johnson 
729*7eea693dSMark Johnson 	ASSERT(map->t_type == SEGMF_MAP_GREF);
730*7eea693dSMark Johnson 
731*7eea693dSMark Johnson 	/*
732*7eea693dSMark Johnson 	 * map in each page passed in into the user apps AS. We do this by
733*7eea693dSMark Johnson 	 * passing the MA of the actual pte of the mapping to the hypervisor.
734*7eea693dSMark Johnson 	 */
735*7eea693dSMark Johnson 	for (i = 0; i < cnt; i++) {
736*7eea693dSMark Johnson 		mapop[i].host_addr = map[i].u.g.g_ptep;
737*7eea693dSMark Johnson 		mapop[i].dom = data->domid;
738*7eea693dSMark Johnson 		mapop[i].ref = map[i].u.g.g_gref;
739*7eea693dSMark Johnson 		mapop[i].flags = GNTMAP_host_map | GNTMAP_application_map |
740*7eea693dSMark Johnson 		    GNTMAP_contains_pte;
741*7eea693dSMark Johnson 		if (!(map[i].u.g.g_flags & SEGMF_GFLAGS_WR)) {
742*7eea693dSMark Johnson 			mapop[i].flags |= GNTMAP_readonly;
743*7eea693dSMark Johnson 		}
744*7eea693dSMark Johnson 	}
745*7eea693dSMark Johnson 	e = xen_map_gref(GNTTABOP_map_grant_ref, mapop, cnt, B_TRUE);
746*7eea693dSMark Johnson 	if ((e != 0) || (mapop[0].status != GNTST_okay)) {
747*7eea693dSMark Johnson 		return (FC_MAKE_ERR(EFAULT));
748*7eea693dSMark Johnson 	}
749*7eea693dSMark Johnson 
750*7eea693dSMark Johnson 	/* save handle for segmf_release_grefs() and mark it as mapped */
751*7eea693dSMark Johnson 	for (i = 0; i < cnt; i++) {
752*7eea693dSMark Johnson 		ASSERT(mapop[i].status == GNTST_okay);
753*7eea693dSMark Johnson 		map[i].u.g.g_handle = mapop[i].handle;
754*7eea693dSMark Johnson 		map[i].u.g.g_flags |= SEGMF_GFLAGS_MAPPED;
755*7eea693dSMark Johnson 	}
756*7eea693dSMark Johnson 
757*7eea693dSMark Johnson 	return (0);
758*7eea693dSMark Johnson }
759*7eea693dSMark Johnson 
760843e1988Sjohnlev static struct seg_ops segmf_ops = {
761843e1988Sjohnlev 	segmf_dup,
762843e1988Sjohnlev 	segmf_unmap,
763843e1988Sjohnlev 	segmf_free,
764843e1988Sjohnlev 	segmf_fault,
765843e1988Sjohnlev 	segmf_faulta,
766843e1988Sjohnlev 	segmf_setprot,
767843e1988Sjohnlev 	segmf_checkprot,
768843e1988Sjohnlev 	(int (*)())segmf_kluster,
769843e1988Sjohnlev 	(size_t (*)(struct seg *))NULL,	/* swapout */
770843e1988Sjohnlev 	segmf_sync,
771843e1988Sjohnlev 	segmf_incore,
772843e1988Sjohnlev 	segmf_lockop,
773843e1988Sjohnlev 	segmf_getprot,
774843e1988Sjohnlev 	segmf_getoffset,
775843e1988Sjohnlev 	segmf_gettype,
776843e1988Sjohnlev 	segmf_getvp,
777843e1988Sjohnlev 	segmf_advise,
778843e1988Sjohnlev 	segmf_dump,
779843e1988Sjohnlev 	segmf_pagelock,
780843e1988Sjohnlev 	segmf_setpagesize,
781843e1988Sjohnlev 	segmf_getmemid,
782843e1988Sjohnlev 	segmf_getpolicy,
783843e1988Sjohnlev 	segmf_capable
784843e1988Sjohnlev };
785