xref: /illumos-gate/usr/src/uts/i86xpv/vm/seg_mf.c (revision 843e1988)
1*843e1988Sjohnlev /*
2*843e1988Sjohnlev  * CDDL HEADER START
3*843e1988Sjohnlev  *
4*843e1988Sjohnlev  * The contents of this file are subject to the terms of the
5*843e1988Sjohnlev  * Common Development and Distribution License (the "License").
6*843e1988Sjohnlev  * You may not use this file except in compliance with the License.
7*843e1988Sjohnlev  *
8*843e1988Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*843e1988Sjohnlev  * or http://www.opensolaris.org/os/licensing.
10*843e1988Sjohnlev  * See the License for the specific language governing permissions
11*843e1988Sjohnlev  * and limitations under the License.
12*843e1988Sjohnlev  *
13*843e1988Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
14*843e1988Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*843e1988Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
16*843e1988Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
17*843e1988Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
18*843e1988Sjohnlev  *
19*843e1988Sjohnlev  * CDDL HEADER END
20*843e1988Sjohnlev  */
21*843e1988Sjohnlev 
22*843e1988Sjohnlev /*
23*843e1988Sjohnlev  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24*843e1988Sjohnlev  * Use is subject to license terms.
25*843e1988Sjohnlev  */
26*843e1988Sjohnlev 
27*843e1988Sjohnlev /*
28*843e1988Sjohnlev  * Machine frame segment driver.  This segment driver allows dom0 processes to
29*843e1988Sjohnlev  * map pages of other domains or Xen (e.g. during save/restore).  ioctl()s on
30*843e1988Sjohnlev  * the privcmd driver provide the MFN values backing each mapping, and we map
31*843e1988Sjohnlev  * them into the process's address space at this time.  Demand-faulting is not
32*843e1988Sjohnlev  * supported by this driver due to the requirements upon some of the ioctl()s.
33*843e1988Sjohnlev  */
34*843e1988Sjohnlev 
35*843e1988Sjohnlev #pragma ident	"%Z%%M%	%I%	%E% SMI"
36*843e1988Sjohnlev 
37*843e1988Sjohnlev #include <sys/types.h>
38*843e1988Sjohnlev #include <sys/systm.h>
39*843e1988Sjohnlev #include <sys/vmsystm.h>
40*843e1988Sjohnlev #include <sys/mman.h>
41*843e1988Sjohnlev #include <sys/errno.h>
42*843e1988Sjohnlev #include <sys/kmem.h>
43*843e1988Sjohnlev #include <sys/cmn_err.h>
44*843e1988Sjohnlev #include <sys/vnode.h>
45*843e1988Sjohnlev #include <sys/conf.h>
46*843e1988Sjohnlev #include <sys/debug.h>
47*843e1988Sjohnlev #include <sys/lgrp.h>
48*843e1988Sjohnlev #include <sys/hypervisor.h>
49*843e1988Sjohnlev 
50*843e1988Sjohnlev #include <vm/page.h>
51*843e1988Sjohnlev #include <vm/hat.h>
52*843e1988Sjohnlev #include <vm/as.h>
53*843e1988Sjohnlev #include <vm/seg.h>
54*843e1988Sjohnlev 
55*843e1988Sjohnlev #include <vm/hat_pte.h>
56*843e1988Sjohnlev #include <vm/seg_mf.h>
57*843e1988Sjohnlev 
58*843e1988Sjohnlev #include <sys/fs/snode.h>
59*843e1988Sjohnlev 
60*843e1988Sjohnlev #define	VTOCVP(vp)	(VTOS(vp)->s_commonvp)
61*843e1988Sjohnlev 
62*843e1988Sjohnlev #define	mfatob(n)	((n) * sizeof (mfn_t))
63*843e1988Sjohnlev 
64*843e1988Sjohnlev struct segmf_data {
65*843e1988Sjohnlev 	kmutex_t	lock;
66*843e1988Sjohnlev 	struct vnode	*vp;
67*843e1988Sjohnlev 	uchar_t		prot;
68*843e1988Sjohnlev 	uchar_t		maxprot;
69*843e1988Sjohnlev 	size_t		softlockcnt;
70*843e1988Sjohnlev 	domid_t		domid;
71*843e1988Sjohnlev 	mfn_t		*mfns;
72*843e1988Sjohnlev };
73*843e1988Sjohnlev 
74*843e1988Sjohnlev static struct seg_ops segmf_ops;
75*843e1988Sjohnlev 
76*843e1988Sjohnlev static struct segmf_data *
77*843e1988Sjohnlev segmf_data_zalloc(struct seg *seg)
78*843e1988Sjohnlev {
79*843e1988Sjohnlev 	struct segmf_data *data = kmem_zalloc(sizeof (*data), KM_SLEEP);
80*843e1988Sjohnlev 
81*843e1988Sjohnlev 	mutex_init(&data->lock, "segmf.lock", MUTEX_DEFAULT, NULL);
82*843e1988Sjohnlev 	seg->s_ops = &segmf_ops;
83*843e1988Sjohnlev 	seg->s_data = data;
84*843e1988Sjohnlev 	return (data);
85*843e1988Sjohnlev }
86*843e1988Sjohnlev 
87*843e1988Sjohnlev int
88*843e1988Sjohnlev segmf_create(struct seg *seg, void *args)
89*843e1988Sjohnlev {
90*843e1988Sjohnlev 	struct segmf_crargs *a = args;
91*843e1988Sjohnlev 	struct segmf_data *data;
92*843e1988Sjohnlev 	struct as *as = seg->s_as;
93*843e1988Sjohnlev 	pgcnt_t i, npages = seg_pages(seg);
94*843e1988Sjohnlev 	int error;
95*843e1988Sjohnlev 
96*843e1988Sjohnlev 	hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
97*843e1988Sjohnlev 
98*843e1988Sjohnlev 	data = segmf_data_zalloc(seg);
99*843e1988Sjohnlev 	data->vp = specfind(a->dev, VCHR);
100*843e1988Sjohnlev 	data->prot = a->prot;
101*843e1988Sjohnlev 	data->maxprot = a->maxprot;
102*843e1988Sjohnlev 
103*843e1988Sjohnlev 	data->mfns = kmem_alloc(mfatob(npages), KM_SLEEP);
104*843e1988Sjohnlev 	for (i = 0; i < npages; i++)
105*843e1988Sjohnlev 		data->mfns[i] = MFN_INVALID;
106*843e1988Sjohnlev 
107*843e1988Sjohnlev 	error = VOP_ADDMAP(VTOCVP(data->vp), 0, as, seg->s_base, seg->s_size,
108*843e1988Sjohnlev 	    data->prot, data->maxprot, MAP_SHARED, CRED());
109*843e1988Sjohnlev 
110*843e1988Sjohnlev 	if (error != 0)
111*843e1988Sjohnlev 		hat_unload(as->a_hat,
112*843e1988Sjohnlev 		    seg->s_base, seg->s_size, HAT_UNLOAD_UNMAP);
113*843e1988Sjohnlev 	return (error);
114*843e1988Sjohnlev }
115*843e1988Sjohnlev 
116*843e1988Sjohnlev /*
117*843e1988Sjohnlev  * Duplicate a seg and return new segment in newseg.
118*843e1988Sjohnlev  */
119*843e1988Sjohnlev static int
120*843e1988Sjohnlev segmf_dup(struct seg *seg, struct seg *newseg)
121*843e1988Sjohnlev {
122*843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
123*843e1988Sjohnlev 	struct segmf_data *ndata;
124*843e1988Sjohnlev 	pgcnt_t npages = seg_pages(newseg);
125*843e1988Sjohnlev 
126*843e1988Sjohnlev 	ndata = segmf_data_zalloc(newseg);
127*843e1988Sjohnlev 
128*843e1988Sjohnlev 	VN_HOLD(data->vp);
129*843e1988Sjohnlev 	ndata->vp = data->vp;
130*843e1988Sjohnlev 	ndata->prot = data->prot;
131*843e1988Sjohnlev 	ndata->maxprot = data->maxprot;
132*843e1988Sjohnlev 	ndata->domid = data->domid;
133*843e1988Sjohnlev 
134*843e1988Sjohnlev 	ndata->mfns = kmem_alloc(mfatob(npages), KM_SLEEP);
135*843e1988Sjohnlev 	bcopy(data->mfns, ndata->mfns, mfatob(npages));
136*843e1988Sjohnlev 
137*843e1988Sjohnlev 	return (VOP_ADDMAP(VTOCVP(ndata->vp), 0, newseg->s_as,
138*843e1988Sjohnlev 	    newseg->s_base, newseg->s_size, ndata->prot, ndata->maxprot,
139*843e1988Sjohnlev 	    MAP_SHARED, CRED()));
140*843e1988Sjohnlev }
141*843e1988Sjohnlev 
142*843e1988Sjohnlev /*
143*843e1988Sjohnlev  * We only support unmapping the whole segment, and we automatically unlock
144*843e1988Sjohnlev  * what we previously soft-locked.
145*843e1988Sjohnlev  */
146*843e1988Sjohnlev static int
147*843e1988Sjohnlev segmf_unmap(struct seg *seg, caddr_t addr, size_t len)
148*843e1988Sjohnlev {
149*843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
150*843e1988Sjohnlev 	offset_t off;
151*843e1988Sjohnlev 
152*843e1988Sjohnlev 	if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
153*843e1988Sjohnlev 	    (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
154*843e1988Sjohnlev 		panic("segmf_unmap");
155*843e1988Sjohnlev 
156*843e1988Sjohnlev 	if (addr != seg->s_base || len != seg->s_size)
157*843e1988Sjohnlev 		return (ENOTSUP);
158*843e1988Sjohnlev 
159*843e1988Sjohnlev 	hat_unload(seg->s_as->a_hat, addr, len,
160*843e1988Sjohnlev 	    HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
161*843e1988Sjohnlev 
162*843e1988Sjohnlev 	off = (offset_t)seg_page(seg, addr);
163*843e1988Sjohnlev 
164*843e1988Sjohnlev 	ASSERT(data->vp != NULL);
165*843e1988Sjohnlev 
166*843e1988Sjohnlev 	(void) VOP_DELMAP(VTOCVP(data->vp), off, seg->s_as, addr, len,
167*843e1988Sjohnlev 	    data->prot, data->maxprot, MAP_SHARED, CRED());
168*843e1988Sjohnlev 
169*843e1988Sjohnlev 	seg_free(seg);
170*843e1988Sjohnlev 	return (0);
171*843e1988Sjohnlev }
172*843e1988Sjohnlev 
173*843e1988Sjohnlev static void
174*843e1988Sjohnlev segmf_free(struct seg *seg)
175*843e1988Sjohnlev {
176*843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
177*843e1988Sjohnlev 	pgcnt_t npages = seg_pages(seg);
178*843e1988Sjohnlev 
179*843e1988Sjohnlev 	kmem_free(data->mfns, mfatob(npages));
180*843e1988Sjohnlev 	VN_RELE(data->vp);
181*843e1988Sjohnlev 	mutex_destroy(&data->lock);
182*843e1988Sjohnlev 	kmem_free(data, sizeof (*data));
183*843e1988Sjohnlev }
184*843e1988Sjohnlev 
185*843e1988Sjohnlev static int segmf_faultpage_debug = 0;
186*843e1988Sjohnlev 
187*843e1988Sjohnlev /*ARGSUSED*/
188*843e1988Sjohnlev static int
189*843e1988Sjohnlev segmf_faultpage(struct hat *hat, struct seg *seg, caddr_t addr,
190*843e1988Sjohnlev     enum fault_type type, uint_t prot)
191*843e1988Sjohnlev {
192*843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
193*843e1988Sjohnlev 	uint_t hat_flags = HAT_LOAD_NOCONSIST;
194*843e1988Sjohnlev 	mfn_t mfn;
195*843e1988Sjohnlev 	x86pte_t pte;
196*843e1988Sjohnlev 
197*843e1988Sjohnlev 	mfn = data->mfns[seg_page(seg, addr)];
198*843e1988Sjohnlev 
199*843e1988Sjohnlev 	ASSERT(mfn != MFN_INVALID);
200*843e1988Sjohnlev 
201*843e1988Sjohnlev 	if (type == F_SOFTLOCK) {
202*843e1988Sjohnlev 		mutex_enter(&freemem_lock);
203*843e1988Sjohnlev 		data->softlockcnt++;
204*843e1988Sjohnlev 		mutex_exit(&freemem_lock);
205*843e1988Sjohnlev 		hat_flags |= HAT_LOAD_LOCK;
206*843e1988Sjohnlev 	} else
207*843e1988Sjohnlev 		hat_flags |= HAT_LOAD;
208*843e1988Sjohnlev 
209*843e1988Sjohnlev 	if (segmf_faultpage_debug > 0) {
210*843e1988Sjohnlev 		uprintf("segmf_faultpage: addr %p domid %x mfn %lx prot %x\n",
211*843e1988Sjohnlev 		    (void *)addr, data->domid, mfn, prot);
212*843e1988Sjohnlev 		segmf_faultpage_debug--;
213*843e1988Sjohnlev 	}
214*843e1988Sjohnlev 
215*843e1988Sjohnlev 	/*
216*843e1988Sjohnlev 	 * Ask the HAT to load a throwaway mapping to page zero, then
217*843e1988Sjohnlev 	 * overwrite it with our foreign domain mapping. It gets removed
218*843e1988Sjohnlev 	 * later via hat_unload()
219*843e1988Sjohnlev 	 */
220*843e1988Sjohnlev 	hat_devload(hat, addr, MMU_PAGESIZE, (pfn_t)0,
221*843e1988Sjohnlev 	    PROT_READ | HAT_UNORDERED_OK, hat_flags);
222*843e1988Sjohnlev 
223*843e1988Sjohnlev 	pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER | PT_FOREIGN;
224*843e1988Sjohnlev 	if (prot & PROT_WRITE)
225*843e1988Sjohnlev 		pte |= PT_WRITABLE;
226*843e1988Sjohnlev 
227*843e1988Sjohnlev 	if (HYPERVISOR_update_va_mapping_otherdomain((uintptr_t)addr, pte,
228*843e1988Sjohnlev 	    UVMF_INVLPG | UVMF_ALL, data->domid) != 0) {
229*843e1988Sjohnlev 		hat_flags = HAT_UNLOAD_UNMAP;
230*843e1988Sjohnlev 
231*843e1988Sjohnlev 		if (type == F_SOFTLOCK) {
232*843e1988Sjohnlev 			hat_flags |= HAT_UNLOAD_UNLOCK;
233*843e1988Sjohnlev 			mutex_enter(&freemem_lock);
234*843e1988Sjohnlev 			data->softlockcnt--;
235*843e1988Sjohnlev 			mutex_exit(&freemem_lock);
236*843e1988Sjohnlev 		}
237*843e1988Sjohnlev 
238*843e1988Sjohnlev 		hat_unload(hat, addr, MMU_PAGESIZE, hat_flags);
239*843e1988Sjohnlev 		return (FC_MAKE_ERR(EFAULT));
240*843e1988Sjohnlev 	}
241*843e1988Sjohnlev 
242*843e1988Sjohnlev 	return (0);
243*843e1988Sjohnlev }
244*843e1988Sjohnlev 
245*843e1988Sjohnlev static int
246*843e1988Sjohnlev seg_rw_to_prot(enum seg_rw rw)
247*843e1988Sjohnlev {
248*843e1988Sjohnlev 	switch (rw) {
249*843e1988Sjohnlev 	case S_READ:
250*843e1988Sjohnlev 		return (PROT_READ);
251*843e1988Sjohnlev 	case S_WRITE:
252*843e1988Sjohnlev 		return (PROT_WRITE);
253*843e1988Sjohnlev 	case S_EXEC:
254*843e1988Sjohnlev 		return (PROT_EXEC);
255*843e1988Sjohnlev 	case S_OTHER:
256*843e1988Sjohnlev 	default:
257*843e1988Sjohnlev 		break;
258*843e1988Sjohnlev 	}
259*843e1988Sjohnlev 	return (PROT_READ | PROT_WRITE | PROT_EXEC);
260*843e1988Sjohnlev }
261*843e1988Sjohnlev 
262*843e1988Sjohnlev static void
263*843e1988Sjohnlev segmf_softunlock(struct hat *hat, struct seg *seg, caddr_t addr, size_t len)
264*843e1988Sjohnlev {
265*843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
266*843e1988Sjohnlev 
267*843e1988Sjohnlev 	hat_unlock(hat, addr, len);
268*843e1988Sjohnlev 
269*843e1988Sjohnlev 	mutex_enter(&freemem_lock);
270*843e1988Sjohnlev 	ASSERT(data->softlockcnt >= btopr(len));
271*843e1988Sjohnlev 	data->softlockcnt -= btopr(len);
272*843e1988Sjohnlev 	mutex_exit(&freemem_lock);
273*843e1988Sjohnlev 
274*843e1988Sjohnlev 	if (data->softlockcnt == 0) {
275*843e1988Sjohnlev 		struct as *as = seg->s_as;
276*843e1988Sjohnlev 
277*843e1988Sjohnlev 		if (AS_ISUNMAPWAIT(as)) {
278*843e1988Sjohnlev 			mutex_enter(&as->a_contents);
279*843e1988Sjohnlev 			if (AS_ISUNMAPWAIT(as)) {
280*843e1988Sjohnlev 				AS_CLRUNMAPWAIT(as);
281*843e1988Sjohnlev 				cv_broadcast(&as->a_cv);
282*843e1988Sjohnlev 			}
283*843e1988Sjohnlev 			mutex_exit(&as->a_contents);
284*843e1988Sjohnlev 		}
285*843e1988Sjohnlev 	}
286*843e1988Sjohnlev }
287*843e1988Sjohnlev 
288*843e1988Sjohnlev static int
289*843e1988Sjohnlev segmf_fault_range(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
290*843e1988Sjohnlev     enum fault_type type, enum seg_rw rw)
291*843e1988Sjohnlev {
292*843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
293*843e1988Sjohnlev 	int error = 0;
294*843e1988Sjohnlev 	caddr_t a;
295*843e1988Sjohnlev 
296*843e1988Sjohnlev 	if ((data->prot & seg_rw_to_prot(rw)) == 0)
297*843e1988Sjohnlev 		return (FC_PROT);
298*843e1988Sjohnlev 
299*843e1988Sjohnlev 	/* loop over the address range handling each fault */
300*843e1988Sjohnlev 
301*843e1988Sjohnlev 	for (a = addr; a < addr + len; a += PAGESIZE) {
302*843e1988Sjohnlev 		error = segmf_faultpage(hat, seg, a, type, data->prot);
303*843e1988Sjohnlev 		if (error != 0)
304*843e1988Sjohnlev 			break;
305*843e1988Sjohnlev 	}
306*843e1988Sjohnlev 
307*843e1988Sjohnlev 	if (error != 0 && type == F_SOFTLOCK) {
308*843e1988Sjohnlev 		size_t done = (size_t)(a - addr);
309*843e1988Sjohnlev 
310*843e1988Sjohnlev 		/*
311*843e1988Sjohnlev 		 * Undo what's been done so far.
312*843e1988Sjohnlev 		 */
313*843e1988Sjohnlev 		if (done > 0)
314*843e1988Sjohnlev 			segmf_softunlock(hat, seg, addr, done);
315*843e1988Sjohnlev 	}
316*843e1988Sjohnlev 
317*843e1988Sjohnlev 	return (error);
318*843e1988Sjohnlev }
319*843e1988Sjohnlev 
320*843e1988Sjohnlev /*
321*843e1988Sjohnlev  * We never demand-fault for seg_mf.
322*843e1988Sjohnlev  */
323*843e1988Sjohnlev /*ARGSUSED*/
324*843e1988Sjohnlev static int
325*843e1988Sjohnlev segmf_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
326*843e1988Sjohnlev     enum fault_type type, enum seg_rw rw)
327*843e1988Sjohnlev {
328*843e1988Sjohnlev 	return (FC_MAKE_ERR(EFAULT));
329*843e1988Sjohnlev }
330*843e1988Sjohnlev 
331*843e1988Sjohnlev /*ARGSUSED*/
332*843e1988Sjohnlev static int
333*843e1988Sjohnlev segmf_faulta(struct seg *seg, caddr_t addr)
334*843e1988Sjohnlev {
335*843e1988Sjohnlev 	return (0);
336*843e1988Sjohnlev }
337*843e1988Sjohnlev 
338*843e1988Sjohnlev /*ARGSUSED*/
339*843e1988Sjohnlev static int
340*843e1988Sjohnlev segmf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
341*843e1988Sjohnlev {
342*843e1988Sjohnlev 	return (EINVAL);
343*843e1988Sjohnlev }
344*843e1988Sjohnlev 
345*843e1988Sjohnlev /*ARGSUSED*/
346*843e1988Sjohnlev static int
347*843e1988Sjohnlev segmf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
348*843e1988Sjohnlev {
349*843e1988Sjohnlev 	return (EINVAL);
350*843e1988Sjohnlev }
351*843e1988Sjohnlev 
352*843e1988Sjohnlev /*ARGSUSED*/
353*843e1988Sjohnlev static int
354*843e1988Sjohnlev segmf_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
355*843e1988Sjohnlev {
356*843e1988Sjohnlev 	return (-1);
357*843e1988Sjohnlev }
358*843e1988Sjohnlev 
359*843e1988Sjohnlev /*ARGSUSED*/
360*843e1988Sjohnlev static int
361*843e1988Sjohnlev segmf_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
362*843e1988Sjohnlev {
363*843e1988Sjohnlev 	return (0);
364*843e1988Sjohnlev }
365*843e1988Sjohnlev 
366*843e1988Sjohnlev /*
367*843e1988Sjohnlev  * XXPV	Hmm.  Should we say that mf mapping are "in core?"
368*843e1988Sjohnlev  */
369*843e1988Sjohnlev 
370*843e1988Sjohnlev /*ARGSUSED*/
371*843e1988Sjohnlev static size_t
372*843e1988Sjohnlev segmf_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
373*843e1988Sjohnlev {
374*843e1988Sjohnlev 	size_t v;
375*843e1988Sjohnlev 
376*843e1988Sjohnlev 	for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
377*843e1988Sjohnlev 	    len -= PAGESIZE, v += PAGESIZE)
378*843e1988Sjohnlev 		*vec++ = 1;
379*843e1988Sjohnlev 	return (v);
380*843e1988Sjohnlev }
381*843e1988Sjohnlev 
382*843e1988Sjohnlev /*ARGSUSED*/
383*843e1988Sjohnlev static int
384*843e1988Sjohnlev segmf_lockop(struct seg *seg, caddr_t addr,
385*843e1988Sjohnlev     size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
386*843e1988Sjohnlev {
387*843e1988Sjohnlev 	return (0);
388*843e1988Sjohnlev }
389*843e1988Sjohnlev 
390*843e1988Sjohnlev static int
391*843e1988Sjohnlev segmf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
392*843e1988Sjohnlev {
393*843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
394*843e1988Sjohnlev 	pgcnt_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
395*843e1988Sjohnlev 
396*843e1988Sjohnlev 	if (pgno != 0) {
397*843e1988Sjohnlev 		do
398*843e1988Sjohnlev 			protv[--pgno] = data->prot;
399*843e1988Sjohnlev 		while (pgno != 0)
400*843e1988Sjohnlev 			;
401*843e1988Sjohnlev 	}
402*843e1988Sjohnlev 	return (0);
403*843e1988Sjohnlev }
404*843e1988Sjohnlev 
405*843e1988Sjohnlev static u_offset_t
406*843e1988Sjohnlev segmf_getoffset(struct seg *seg, caddr_t addr)
407*843e1988Sjohnlev {
408*843e1988Sjohnlev 	return (addr - seg->s_base);
409*843e1988Sjohnlev }
410*843e1988Sjohnlev 
411*843e1988Sjohnlev /*ARGSUSED*/
412*843e1988Sjohnlev static int
413*843e1988Sjohnlev segmf_gettype(struct seg *seg, caddr_t addr)
414*843e1988Sjohnlev {
415*843e1988Sjohnlev 	return (MAP_SHARED);
416*843e1988Sjohnlev }
417*843e1988Sjohnlev 
418*843e1988Sjohnlev /*ARGSUSED1*/
419*843e1988Sjohnlev static int
420*843e1988Sjohnlev segmf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
421*843e1988Sjohnlev {
422*843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
423*843e1988Sjohnlev 
424*843e1988Sjohnlev 	*vpp = VTOCVP(data->vp);
425*843e1988Sjohnlev 	return (0);
426*843e1988Sjohnlev }
427*843e1988Sjohnlev 
428*843e1988Sjohnlev /*ARGSUSED*/
429*843e1988Sjohnlev static int
430*843e1988Sjohnlev segmf_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
431*843e1988Sjohnlev {
432*843e1988Sjohnlev 	return (0);
433*843e1988Sjohnlev }
434*843e1988Sjohnlev 
435*843e1988Sjohnlev /*ARGSUSED*/
436*843e1988Sjohnlev static void
437*843e1988Sjohnlev segmf_dump(struct seg *seg)
438*843e1988Sjohnlev {}
439*843e1988Sjohnlev 
440*843e1988Sjohnlev /*ARGSUSED*/
441*843e1988Sjohnlev static int
442*843e1988Sjohnlev segmf_pagelock(struct seg *seg, caddr_t addr, size_t len,
443*843e1988Sjohnlev     struct page ***ppp, enum lock_type type, enum seg_rw rw)
444*843e1988Sjohnlev {
445*843e1988Sjohnlev 	return (ENOTSUP);
446*843e1988Sjohnlev }
447*843e1988Sjohnlev 
448*843e1988Sjohnlev /*ARGSUSED*/
449*843e1988Sjohnlev static int
450*843e1988Sjohnlev segmf_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
451*843e1988Sjohnlev {
452*843e1988Sjohnlev 	return (ENOTSUP);
453*843e1988Sjohnlev }
454*843e1988Sjohnlev 
455*843e1988Sjohnlev static int
456*843e1988Sjohnlev segmf_getmemid(struct seg *seg, caddr_t addr, memid_t *memid)
457*843e1988Sjohnlev {
458*843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
459*843e1988Sjohnlev 
460*843e1988Sjohnlev 	memid->val[0] = (uintptr_t)VTOCVP(data->vp);
461*843e1988Sjohnlev 	memid->val[1] = (uintptr_t)seg_page(seg, addr);
462*843e1988Sjohnlev 	return (0);
463*843e1988Sjohnlev }
464*843e1988Sjohnlev 
465*843e1988Sjohnlev /*ARGSUSED*/
466*843e1988Sjohnlev static lgrp_mem_policy_info_t *
467*843e1988Sjohnlev segmf_getpolicy(struct seg *seg, caddr_t addr)
468*843e1988Sjohnlev {
469*843e1988Sjohnlev 	return (NULL);
470*843e1988Sjohnlev }
471*843e1988Sjohnlev 
472*843e1988Sjohnlev /*ARGSUSED*/
473*843e1988Sjohnlev static int
474*843e1988Sjohnlev segmf_capable(struct seg *seg, segcapability_t capability)
475*843e1988Sjohnlev {
476*843e1988Sjohnlev 	return (0);
477*843e1988Sjohnlev }
478*843e1988Sjohnlev 
479*843e1988Sjohnlev /*
480*843e1988Sjohnlev  * Add a set of contiguous foreign MFNs to the segment. soft-locking them.  The
481*843e1988Sjohnlev  * pre-faulting is necessary due to live migration; in particular we must
482*843e1988Sjohnlev  * return an error in response to IOCTL_PRIVCMD_MMAPBATCH rather than faulting
483*843e1988Sjohnlev  * later on a bad MFN.  Whilst this isn't necessary for the other MMAP
484*843e1988Sjohnlev  * ioctl()s, we lock them too, as they should be transitory.
485*843e1988Sjohnlev  */
486*843e1988Sjohnlev int
487*843e1988Sjohnlev segmf_add_mfns(struct seg *seg, caddr_t addr, mfn_t mfn,
488*843e1988Sjohnlev     pgcnt_t pgcnt, domid_t domid)
489*843e1988Sjohnlev {
490*843e1988Sjohnlev 	struct segmf_data *data = seg->s_data;
491*843e1988Sjohnlev 	pgcnt_t base = seg_page(seg, addr);
492*843e1988Sjohnlev 	faultcode_t fc;
493*843e1988Sjohnlev 	pgcnt_t i;
494*843e1988Sjohnlev 	int error = 0;
495*843e1988Sjohnlev 
496*843e1988Sjohnlev 	if (seg->s_ops != &segmf_ops)
497*843e1988Sjohnlev 		return (EINVAL);
498*843e1988Sjohnlev 
499*843e1988Sjohnlev 	/*
500*843e1988Sjohnlev 	 * Don't mess with dom0.
501*843e1988Sjohnlev 	 *
502*843e1988Sjohnlev 	 * Only allow the domid to be set once for the segment.
503*843e1988Sjohnlev 	 * After that attempts to add mappings to this segment for
504*843e1988Sjohnlev 	 * other domains explicitly fails.
505*843e1988Sjohnlev 	 */
506*843e1988Sjohnlev 
507*843e1988Sjohnlev 	if (domid == 0 || domid == DOMID_SELF)
508*843e1988Sjohnlev 		return (EACCES);
509*843e1988Sjohnlev 
510*843e1988Sjohnlev 	mutex_enter(&data->lock);
511*843e1988Sjohnlev 
512*843e1988Sjohnlev 	if (data->domid == 0)
513*843e1988Sjohnlev 		data->domid = domid;
514*843e1988Sjohnlev 
515*843e1988Sjohnlev 	if (data->domid != domid) {
516*843e1988Sjohnlev 		error = EINVAL;
517*843e1988Sjohnlev 		goto out;
518*843e1988Sjohnlev 	}
519*843e1988Sjohnlev 
520*843e1988Sjohnlev 	base = seg_page(seg, addr);
521*843e1988Sjohnlev 
522*843e1988Sjohnlev 	for (i = 0; i < pgcnt; i++)
523*843e1988Sjohnlev 		data->mfns[base + i] = mfn++;
524*843e1988Sjohnlev 
525*843e1988Sjohnlev 	fc = segmf_fault_range(seg->s_as->a_hat, seg, addr,
526*843e1988Sjohnlev 	    pgcnt * MMU_PAGESIZE, F_SOFTLOCK, S_OTHER);
527*843e1988Sjohnlev 
528*843e1988Sjohnlev 	if (fc != 0) {
529*843e1988Sjohnlev 		error = fc_decode(fc);
530*843e1988Sjohnlev 		for (i = 0; i < pgcnt; i++)
531*843e1988Sjohnlev 			data->mfns[base + i] = MFN_INVALID;
532*843e1988Sjohnlev 	}
533*843e1988Sjohnlev 
534*843e1988Sjohnlev out:
535*843e1988Sjohnlev 	mutex_exit(&data->lock);
536*843e1988Sjohnlev 	return (error);
537*843e1988Sjohnlev }
538*843e1988Sjohnlev 
539*843e1988Sjohnlev static struct seg_ops segmf_ops = {
540*843e1988Sjohnlev 	segmf_dup,
541*843e1988Sjohnlev 	segmf_unmap,
542*843e1988Sjohnlev 	segmf_free,
543*843e1988Sjohnlev 	segmf_fault,
544*843e1988Sjohnlev 	segmf_faulta,
545*843e1988Sjohnlev 	segmf_setprot,
546*843e1988Sjohnlev 	segmf_checkprot,
547*843e1988Sjohnlev 	(int (*)())segmf_kluster,
548*843e1988Sjohnlev 	(size_t (*)(struct seg *))NULL,	/* swapout */
549*843e1988Sjohnlev 	segmf_sync,
550*843e1988Sjohnlev 	segmf_incore,
551*843e1988Sjohnlev 	segmf_lockop,
552*843e1988Sjohnlev 	segmf_getprot,
553*843e1988Sjohnlev 	segmf_getoffset,
554*843e1988Sjohnlev 	segmf_gettype,
555*843e1988Sjohnlev 	segmf_getvp,
556*843e1988Sjohnlev 	segmf_advise,
557*843e1988Sjohnlev 	segmf_dump,
558*843e1988Sjohnlev 	segmf_pagelock,
559*843e1988Sjohnlev 	segmf_setpagesize,
560*843e1988Sjohnlev 	segmf_getmemid,
561*843e1988Sjohnlev 	segmf_getpolicy,
562*843e1988Sjohnlev 	segmf_capable
563*843e1988Sjohnlev };
564