1843e1988Sjohnlev /* 2843e1988Sjohnlev * CDDL HEADER START 3843e1988Sjohnlev * 4843e1988Sjohnlev * The contents of this file are subject to the terms of the 5843e1988Sjohnlev * Common Development and Distribution License (the "License"). 6843e1988Sjohnlev * You may not use this file except in compliance with the License. 7843e1988Sjohnlev * 8843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing. 10843e1988Sjohnlev * See the License for the specific language governing permissions 11843e1988Sjohnlev * and limitations under the License. 12843e1988Sjohnlev * 13843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each 14843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the 16843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying 17843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner] 18843e1988Sjohnlev * 19843e1988Sjohnlev * CDDL HEADER END 20843e1988Sjohnlev */ 21843e1988Sjohnlev 22843e1988Sjohnlev /* 237eea693dSMark Johnson * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24843e1988Sjohnlev * Use is subject to license terms. 25843e1988Sjohnlev */ 26843e1988Sjohnlev 27843e1988Sjohnlev /* 28843e1988Sjohnlev * Machine frame segment driver. This segment driver allows dom0 processes to 29843e1988Sjohnlev * map pages of other domains or Xen (e.g. during save/restore). ioctl()s on 30843e1988Sjohnlev * the privcmd driver provide the MFN values backing each mapping, and we map 31843e1988Sjohnlev * them into the process's address space at this time. Demand-faulting is not 32843e1988Sjohnlev * supported by this driver due to the requirements upon some of the ioctl()s. 33843e1988Sjohnlev */ 34843e1988Sjohnlev 35843e1988Sjohnlev 36843e1988Sjohnlev #include <sys/types.h> 37843e1988Sjohnlev #include <sys/systm.h> 38843e1988Sjohnlev #include <sys/vmsystm.h> 39843e1988Sjohnlev #include <sys/mman.h> 40843e1988Sjohnlev #include <sys/errno.h> 41843e1988Sjohnlev #include <sys/kmem.h> 42843e1988Sjohnlev #include <sys/cmn_err.h> 43843e1988Sjohnlev #include <sys/vnode.h> 44843e1988Sjohnlev #include <sys/conf.h> 45843e1988Sjohnlev #include <sys/debug.h> 46843e1988Sjohnlev #include <sys/lgrp.h> 47843e1988Sjohnlev #include <sys/hypervisor.h> 48843e1988Sjohnlev 49843e1988Sjohnlev #include <vm/page.h> 50843e1988Sjohnlev #include <vm/hat.h> 51843e1988Sjohnlev #include <vm/as.h> 52843e1988Sjohnlev #include <vm/seg.h> 53843e1988Sjohnlev 54843e1988Sjohnlev #include <vm/hat_pte.h> 557eea693dSMark Johnson #include <vm/hat_i86.h> 56843e1988Sjohnlev #include <vm/seg_mf.h> 57843e1988Sjohnlev 58843e1988Sjohnlev #include <sys/fs/snode.h> 59843e1988Sjohnlev 60843e1988Sjohnlev #define VTOCVP(vp) (VTOS(vp)->s_commonvp) 61843e1988Sjohnlev 627eea693dSMark Johnson typedef struct segmf_mfn_s { 637eea693dSMark Johnson mfn_t m_mfn; 647eea693dSMark Johnson } segmf_mfn_t; 657eea693dSMark Johnson 667eea693dSMark Johnson /* g_flags */ 677eea693dSMark Johnson #define SEGMF_GFLAGS_WR 0x1 687eea693dSMark Johnson #define SEGMF_GFLAGS_MAPPED 0x2 697eea693dSMark Johnson typedef struct segmf_gref_s { 707eea693dSMark Johnson uint64_t g_ptep; 717eea693dSMark Johnson grant_ref_t g_gref; 727eea693dSMark Johnson uint32_t g_flags; 737eea693dSMark Johnson grant_handle_t g_handle; 747eea693dSMark Johnson } segmf_gref_t; 757eea693dSMark Johnson 767eea693dSMark Johnson typedef union segmf_mu_u { 777eea693dSMark Johnson segmf_mfn_t m; 787eea693dSMark Johnson segmf_gref_t g; 797eea693dSMark Johnson } segmf_mu_t; 807eea693dSMark Johnson 817eea693dSMark Johnson typedef enum { 827eea693dSMark Johnson SEGMF_MAP_EMPTY = 0, 837eea693dSMark Johnson SEGMF_MAP_MFN, 847eea693dSMark Johnson SEGMF_MAP_GREF 857eea693dSMark Johnson } segmf_map_type_t; 867eea693dSMark Johnson 877eea693dSMark Johnson typedef struct segmf_map_s { 887eea693dSMark Johnson segmf_map_type_t t_type; 897eea693dSMark Johnson segmf_mu_t u; 907eea693dSMark Johnson } segmf_map_t; 91843e1988Sjohnlev 92843e1988Sjohnlev struct segmf_data { 93843e1988Sjohnlev kmutex_t lock; 94843e1988Sjohnlev struct vnode *vp; 95843e1988Sjohnlev uchar_t prot; 96843e1988Sjohnlev uchar_t maxprot; 97843e1988Sjohnlev size_t softlockcnt; 98843e1988Sjohnlev domid_t domid; 997eea693dSMark Johnson segmf_map_t *map; 100843e1988Sjohnlev }; 101843e1988Sjohnlev 102843e1988Sjohnlev static struct seg_ops segmf_ops; 103843e1988Sjohnlev 1047eea693dSMark Johnson static int segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t len); 1057eea693dSMark Johnson 106843e1988Sjohnlev static struct segmf_data * 107843e1988Sjohnlev segmf_data_zalloc(struct seg *seg) 108843e1988Sjohnlev { 109843e1988Sjohnlev struct segmf_data *data = kmem_zalloc(sizeof (*data), KM_SLEEP); 110843e1988Sjohnlev 111843e1988Sjohnlev mutex_init(&data->lock, "segmf.lock", MUTEX_DEFAULT, NULL); 112843e1988Sjohnlev seg->s_ops = &segmf_ops; 113843e1988Sjohnlev seg->s_data = data; 114843e1988Sjohnlev return (data); 115843e1988Sjohnlev } 116843e1988Sjohnlev 117843e1988Sjohnlev int 118843e1988Sjohnlev segmf_create(struct seg *seg, void *args) 119843e1988Sjohnlev { 120843e1988Sjohnlev struct segmf_crargs *a = args; 121843e1988Sjohnlev struct segmf_data *data; 122843e1988Sjohnlev struct as *as = seg->s_as; 123843e1988Sjohnlev pgcnt_t i, npages = seg_pages(seg); 124843e1988Sjohnlev int error; 125843e1988Sjohnlev 126843e1988Sjohnlev hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP); 127843e1988Sjohnlev 128843e1988Sjohnlev data = segmf_data_zalloc(seg); 129843e1988Sjohnlev data->vp = specfind(a->dev, VCHR); 130843e1988Sjohnlev data->prot = a->prot; 131843e1988Sjohnlev data->maxprot = a->maxprot; 132843e1988Sjohnlev 1337eea693dSMark Johnson data->map = kmem_alloc(npages * sizeof (segmf_map_t), KM_SLEEP); 1347eea693dSMark Johnson for (i = 0; i < npages; i++) { 1357eea693dSMark Johnson data->map[i].t_type = SEGMF_MAP_EMPTY; 1367eea693dSMark Johnson } 137843e1988Sjohnlev 138843e1988Sjohnlev error = VOP_ADDMAP(VTOCVP(data->vp), 0, as, seg->s_base, seg->s_size, 139da6c28aaSamw data->prot, data->maxprot, MAP_SHARED, CRED(), NULL); 140843e1988Sjohnlev 141843e1988Sjohnlev if (error != 0) 142843e1988Sjohnlev hat_unload(as->a_hat, 143843e1988Sjohnlev seg->s_base, seg->s_size, HAT_UNLOAD_UNMAP); 144843e1988Sjohnlev return (error); 145843e1988Sjohnlev } 146843e1988Sjohnlev 147843e1988Sjohnlev /* 148843e1988Sjohnlev * Duplicate a seg and return new segment in newseg. 149843e1988Sjohnlev */ 150843e1988Sjohnlev static int 151843e1988Sjohnlev segmf_dup(struct seg *seg, struct seg *newseg) 152843e1988Sjohnlev { 153843e1988Sjohnlev struct segmf_data *data = seg->s_data; 154843e1988Sjohnlev struct segmf_data *ndata; 155843e1988Sjohnlev pgcnt_t npages = seg_pages(newseg); 1567eea693dSMark Johnson size_t sz; 157843e1988Sjohnlev 158843e1988Sjohnlev ndata = segmf_data_zalloc(newseg); 159843e1988Sjohnlev 160843e1988Sjohnlev VN_HOLD(data->vp); 161843e1988Sjohnlev ndata->vp = data->vp; 162843e1988Sjohnlev ndata->prot = data->prot; 163843e1988Sjohnlev ndata->maxprot = data->maxprot; 164843e1988Sjohnlev ndata->domid = data->domid; 165843e1988Sjohnlev 1667eea693dSMark Johnson sz = npages * sizeof (segmf_map_t); 1677eea693dSMark Johnson ndata->map = kmem_alloc(sz, KM_SLEEP); 1687eea693dSMark Johnson bcopy(data->map, ndata->map, sz); 169843e1988Sjohnlev 170843e1988Sjohnlev return (VOP_ADDMAP(VTOCVP(ndata->vp), 0, newseg->s_as, 171843e1988Sjohnlev newseg->s_base, newseg->s_size, ndata->prot, ndata->maxprot, 172da6c28aaSamw MAP_SHARED, CRED(), NULL)); 173843e1988Sjohnlev } 174843e1988Sjohnlev 175843e1988Sjohnlev /* 176843e1988Sjohnlev * We only support unmapping the whole segment, and we automatically unlock 177843e1988Sjohnlev * what we previously soft-locked. 178843e1988Sjohnlev */ 179843e1988Sjohnlev static int 180843e1988Sjohnlev segmf_unmap(struct seg *seg, caddr_t addr, size_t len) 181843e1988Sjohnlev { 182843e1988Sjohnlev struct segmf_data *data = seg->s_data; 183843e1988Sjohnlev offset_t off; 184843e1988Sjohnlev 185843e1988Sjohnlev if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size || 186843e1988Sjohnlev (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET)) 187843e1988Sjohnlev panic("segmf_unmap"); 188843e1988Sjohnlev 189843e1988Sjohnlev if (addr != seg->s_base || len != seg->s_size) 190843e1988Sjohnlev return (ENOTSUP); 191843e1988Sjohnlev 192843e1988Sjohnlev hat_unload(seg->s_as->a_hat, addr, len, 193843e1988Sjohnlev HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK); 194843e1988Sjohnlev 195843e1988Sjohnlev off = (offset_t)seg_page(seg, addr); 196843e1988Sjohnlev 197843e1988Sjohnlev ASSERT(data->vp != NULL); 198843e1988Sjohnlev 199843e1988Sjohnlev (void) VOP_DELMAP(VTOCVP(data->vp), off, seg->s_as, addr, len, 200da6c28aaSamw data->prot, data->maxprot, MAP_SHARED, CRED(), NULL); 201843e1988Sjohnlev 202843e1988Sjohnlev seg_free(seg); 203843e1988Sjohnlev return (0); 204843e1988Sjohnlev } 205843e1988Sjohnlev 206843e1988Sjohnlev static void 207843e1988Sjohnlev segmf_free(struct seg *seg) 208843e1988Sjohnlev { 209843e1988Sjohnlev struct segmf_data *data = seg->s_data; 210843e1988Sjohnlev pgcnt_t npages = seg_pages(seg); 211843e1988Sjohnlev 2127eea693dSMark Johnson kmem_free(data->map, npages * sizeof (segmf_map_t)); 213843e1988Sjohnlev VN_RELE(data->vp); 214843e1988Sjohnlev mutex_destroy(&data->lock); 215843e1988Sjohnlev kmem_free(data, sizeof (*data)); 216843e1988Sjohnlev } 217843e1988Sjohnlev 218843e1988Sjohnlev static int segmf_faultpage_debug = 0; 219843e1988Sjohnlev /*ARGSUSED*/ 220843e1988Sjohnlev static int 221843e1988Sjohnlev segmf_faultpage(struct hat *hat, struct seg *seg, caddr_t addr, 222843e1988Sjohnlev enum fault_type type, uint_t prot) 223843e1988Sjohnlev { 224843e1988Sjohnlev struct segmf_data *data = seg->s_data; 225843e1988Sjohnlev uint_t hat_flags = HAT_LOAD_NOCONSIST; 226843e1988Sjohnlev mfn_t mfn; 227843e1988Sjohnlev x86pte_t pte; 2287eea693dSMark Johnson segmf_map_t *map; 2297eea693dSMark Johnson uint_t idx; 230843e1988Sjohnlev 231843e1988Sjohnlev 2327eea693dSMark Johnson idx = seg_page(seg, addr); 2337eea693dSMark Johnson map = &data->map[idx]; 2347eea693dSMark Johnson ASSERT(map->t_type == SEGMF_MAP_MFN); 2357eea693dSMark Johnson 2367eea693dSMark Johnson mfn = map->u.m.m_mfn; 237843e1988Sjohnlev 238843e1988Sjohnlev if (type == F_SOFTLOCK) { 239843e1988Sjohnlev mutex_enter(&freemem_lock); 240843e1988Sjohnlev data->softlockcnt++; 241843e1988Sjohnlev mutex_exit(&freemem_lock); 242843e1988Sjohnlev hat_flags |= HAT_LOAD_LOCK; 243843e1988Sjohnlev } else 244843e1988Sjohnlev hat_flags |= HAT_LOAD; 245843e1988Sjohnlev 246843e1988Sjohnlev if (segmf_faultpage_debug > 0) { 247843e1988Sjohnlev uprintf("segmf_faultpage: addr %p domid %x mfn %lx prot %x\n", 248843e1988Sjohnlev (void *)addr, data->domid, mfn, prot); 249843e1988Sjohnlev segmf_faultpage_debug--; 250843e1988Sjohnlev } 251843e1988Sjohnlev 252843e1988Sjohnlev /* 253843e1988Sjohnlev * Ask the HAT to load a throwaway mapping to page zero, then 254843e1988Sjohnlev * overwrite it with our foreign domain mapping. It gets removed 255843e1988Sjohnlev * later via hat_unload() 256843e1988Sjohnlev */ 257843e1988Sjohnlev hat_devload(hat, addr, MMU_PAGESIZE, (pfn_t)0, 258843e1988Sjohnlev PROT_READ | HAT_UNORDERED_OK, hat_flags); 259843e1988Sjohnlev 260843e1988Sjohnlev pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER | PT_FOREIGN; 261843e1988Sjohnlev if (prot & PROT_WRITE) 262843e1988Sjohnlev pte |= PT_WRITABLE; 263843e1988Sjohnlev 264843e1988Sjohnlev if (HYPERVISOR_update_va_mapping_otherdomain((uintptr_t)addr, pte, 265843e1988Sjohnlev UVMF_INVLPG | UVMF_ALL, data->domid) != 0) { 266843e1988Sjohnlev hat_flags = HAT_UNLOAD_UNMAP; 267843e1988Sjohnlev 268843e1988Sjohnlev if (type == F_SOFTLOCK) { 269843e1988Sjohnlev hat_flags |= HAT_UNLOAD_UNLOCK; 270843e1988Sjohnlev mutex_enter(&freemem_lock); 271843e1988Sjohnlev data->softlockcnt--; 272843e1988Sjohnlev mutex_exit(&freemem_lock); 273843e1988Sjohnlev } 274843e1988Sjohnlev 275843e1988Sjohnlev hat_unload(hat, addr, MMU_PAGESIZE, hat_flags); 276843e1988Sjohnlev return (FC_MAKE_ERR(EFAULT)); 277843e1988Sjohnlev } 278843e1988Sjohnlev 279843e1988Sjohnlev return (0); 280843e1988Sjohnlev } 281843e1988Sjohnlev 282843e1988Sjohnlev static int 283843e1988Sjohnlev seg_rw_to_prot(enum seg_rw rw) 284843e1988Sjohnlev { 285843e1988Sjohnlev switch (rw) { 286843e1988Sjohnlev case S_READ: 287843e1988Sjohnlev return (PROT_READ); 288843e1988Sjohnlev case S_WRITE: 289843e1988Sjohnlev return (PROT_WRITE); 290843e1988Sjohnlev case S_EXEC: 291843e1988Sjohnlev return (PROT_EXEC); 292843e1988Sjohnlev case S_OTHER: 293843e1988Sjohnlev default: 294843e1988Sjohnlev break; 295843e1988Sjohnlev } 296843e1988Sjohnlev return (PROT_READ | PROT_WRITE | PROT_EXEC); 297843e1988Sjohnlev } 298843e1988Sjohnlev 299843e1988Sjohnlev static void 300843e1988Sjohnlev segmf_softunlock(struct hat *hat, struct seg *seg, caddr_t addr, size_t len) 301843e1988Sjohnlev { 302843e1988Sjohnlev struct segmf_data *data = seg->s_data; 303843e1988Sjohnlev 304843e1988Sjohnlev hat_unlock(hat, addr, len); 305843e1988Sjohnlev 306843e1988Sjohnlev mutex_enter(&freemem_lock); 307843e1988Sjohnlev ASSERT(data->softlockcnt >= btopr(len)); 308843e1988Sjohnlev data->softlockcnt -= btopr(len); 309843e1988Sjohnlev mutex_exit(&freemem_lock); 310843e1988Sjohnlev 311843e1988Sjohnlev if (data->softlockcnt == 0) { 312843e1988Sjohnlev struct as *as = seg->s_as; 313843e1988Sjohnlev 314843e1988Sjohnlev if (AS_ISUNMAPWAIT(as)) { 315843e1988Sjohnlev mutex_enter(&as->a_contents); 316843e1988Sjohnlev if (AS_ISUNMAPWAIT(as)) { 317843e1988Sjohnlev AS_CLRUNMAPWAIT(as); 318843e1988Sjohnlev cv_broadcast(&as->a_cv); 319843e1988Sjohnlev } 320843e1988Sjohnlev mutex_exit(&as->a_contents); 321843e1988Sjohnlev } 322843e1988Sjohnlev } 323843e1988Sjohnlev } 324843e1988Sjohnlev 325843e1988Sjohnlev static int 326843e1988Sjohnlev segmf_fault_range(struct hat *hat, struct seg *seg, caddr_t addr, size_t len, 327843e1988Sjohnlev enum fault_type type, enum seg_rw rw) 328843e1988Sjohnlev { 329843e1988Sjohnlev struct segmf_data *data = seg->s_data; 330843e1988Sjohnlev int error = 0; 331843e1988Sjohnlev caddr_t a; 332843e1988Sjohnlev 333843e1988Sjohnlev if ((data->prot & seg_rw_to_prot(rw)) == 0) 334843e1988Sjohnlev return (FC_PROT); 335843e1988Sjohnlev 336843e1988Sjohnlev /* loop over the address range handling each fault */ 337843e1988Sjohnlev 338843e1988Sjohnlev for (a = addr; a < addr + len; a += PAGESIZE) { 339843e1988Sjohnlev error = segmf_faultpage(hat, seg, a, type, data->prot); 340843e1988Sjohnlev if (error != 0) 341843e1988Sjohnlev break; 342843e1988Sjohnlev } 343843e1988Sjohnlev 344843e1988Sjohnlev if (error != 0 && type == F_SOFTLOCK) { 345843e1988Sjohnlev size_t done = (size_t)(a - addr); 346843e1988Sjohnlev 347843e1988Sjohnlev /* 348843e1988Sjohnlev * Undo what's been done so far. 349843e1988Sjohnlev */ 350843e1988Sjohnlev if (done > 0) 351843e1988Sjohnlev segmf_softunlock(hat, seg, addr, done); 352843e1988Sjohnlev } 353843e1988Sjohnlev 354843e1988Sjohnlev return (error); 355843e1988Sjohnlev } 356843e1988Sjohnlev 357843e1988Sjohnlev /* 358843e1988Sjohnlev * We never demand-fault for seg_mf. 359843e1988Sjohnlev */ 360843e1988Sjohnlev /*ARGSUSED*/ 361843e1988Sjohnlev static int 362843e1988Sjohnlev segmf_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len, 363843e1988Sjohnlev enum fault_type type, enum seg_rw rw) 364843e1988Sjohnlev { 365843e1988Sjohnlev return (FC_MAKE_ERR(EFAULT)); 366843e1988Sjohnlev } 367843e1988Sjohnlev 368843e1988Sjohnlev /*ARGSUSED*/ 369843e1988Sjohnlev static int 370843e1988Sjohnlev segmf_faulta(struct seg *seg, caddr_t addr) 371843e1988Sjohnlev { 372843e1988Sjohnlev return (0); 373843e1988Sjohnlev } 374843e1988Sjohnlev 375843e1988Sjohnlev /*ARGSUSED*/ 376843e1988Sjohnlev static int 377843e1988Sjohnlev segmf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 378843e1988Sjohnlev { 379843e1988Sjohnlev return (EINVAL); 380843e1988Sjohnlev } 381843e1988Sjohnlev 382843e1988Sjohnlev /*ARGSUSED*/ 383843e1988Sjohnlev static int 384843e1988Sjohnlev segmf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 385843e1988Sjohnlev { 386843e1988Sjohnlev return (EINVAL); 387843e1988Sjohnlev } 388843e1988Sjohnlev 389843e1988Sjohnlev /*ARGSUSED*/ 390843e1988Sjohnlev static int 391843e1988Sjohnlev segmf_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 392843e1988Sjohnlev { 393843e1988Sjohnlev return (-1); 394843e1988Sjohnlev } 395843e1988Sjohnlev 396843e1988Sjohnlev /*ARGSUSED*/ 397843e1988Sjohnlev static int 398843e1988Sjohnlev segmf_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags) 399843e1988Sjohnlev { 400843e1988Sjohnlev return (0); 401843e1988Sjohnlev } 402843e1988Sjohnlev 403843e1988Sjohnlev /* 404843e1988Sjohnlev * XXPV Hmm. Should we say that mf mapping are "in core?" 405843e1988Sjohnlev */ 406843e1988Sjohnlev 407843e1988Sjohnlev /*ARGSUSED*/ 408843e1988Sjohnlev static size_t 409843e1988Sjohnlev segmf_incore(struct seg *seg, caddr_t addr, size_t len, char *vec) 410843e1988Sjohnlev { 411843e1988Sjohnlev size_t v; 412843e1988Sjohnlev 413843e1988Sjohnlev for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len; 414843e1988Sjohnlev len -= PAGESIZE, v += PAGESIZE) 415843e1988Sjohnlev *vec++ = 1; 416843e1988Sjohnlev return (v); 417843e1988Sjohnlev } 418843e1988Sjohnlev 419843e1988Sjohnlev /*ARGSUSED*/ 420843e1988Sjohnlev static int 421843e1988Sjohnlev segmf_lockop(struct seg *seg, caddr_t addr, 422843e1988Sjohnlev size_t len, int attr, int op, ulong_t *lockmap, size_t pos) 423843e1988Sjohnlev { 424843e1988Sjohnlev return (0); 425843e1988Sjohnlev } 426843e1988Sjohnlev 427843e1988Sjohnlev static int 428843e1988Sjohnlev segmf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 429843e1988Sjohnlev { 430843e1988Sjohnlev struct segmf_data *data = seg->s_data; 431843e1988Sjohnlev pgcnt_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 432843e1988Sjohnlev 433843e1988Sjohnlev if (pgno != 0) { 434843e1988Sjohnlev do 435843e1988Sjohnlev protv[--pgno] = data->prot; 436843e1988Sjohnlev while (pgno != 0) 437843e1988Sjohnlev ; 438843e1988Sjohnlev } 439843e1988Sjohnlev return (0); 440843e1988Sjohnlev } 441843e1988Sjohnlev 442843e1988Sjohnlev static u_offset_t 443843e1988Sjohnlev segmf_getoffset(struct seg *seg, caddr_t addr) 444843e1988Sjohnlev { 445843e1988Sjohnlev return (addr - seg->s_base); 446843e1988Sjohnlev } 447843e1988Sjohnlev 448843e1988Sjohnlev /*ARGSUSED*/ 449843e1988Sjohnlev static int 450843e1988Sjohnlev segmf_gettype(struct seg *seg, caddr_t addr) 451843e1988Sjohnlev { 452843e1988Sjohnlev return (MAP_SHARED); 453843e1988Sjohnlev } 454843e1988Sjohnlev 455843e1988Sjohnlev /*ARGSUSED1*/ 456843e1988Sjohnlev static int 457843e1988Sjohnlev segmf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 458843e1988Sjohnlev { 459843e1988Sjohnlev struct segmf_data *data = seg->s_data; 460843e1988Sjohnlev 461843e1988Sjohnlev *vpp = VTOCVP(data->vp); 462843e1988Sjohnlev return (0); 463843e1988Sjohnlev } 464843e1988Sjohnlev 465843e1988Sjohnlev /*ARGSUSED*/ 466843e1988Sjohnlev static int 467843e1988Sjohnlev segmf_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) 468843e1988Sjohnlev { 469843e1988Sjohnlev return (0); 470843e1988Sjohnlev } 471843e1988Sjohnlev 472843e1988Sjohnlev /*ARGSUSED*/ 473843e1988Sjohnlev static void 474843e1988Sjohnlev segmf_dump(struct seg *seg) 475843e1988Sjohnlev {} 476843e1988Sjohnlev 477843e1988Sjohnlev /*ARGSUSED*/ 478843e1988Sjohnlev static int 479843e1988Sjohnlev segmf_pagelock(struct seg *seg, caddr_t addr, size_t len, 480843e1988Sjohnlev struct page ***ppp, enum lock_type type, enum seg_rw rw) 481843e1988Sjohnlev { 482843e1988Sjohnlev return (ENOTSUP); 483843e1988Sjohnlev } 484843e1988Sjohnlev 485843e1988Sjohnlev /*ARGSUSED*/ 486843e1988Sjohnlev static int 487843e1988Sjohnlev segmf_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) 488843e1988Sjohnlev { 489843e1988Sjohnlev return (ENOTSUP); 490843e1988Sjohnlev } 491843e1988Sjohnlev 492843e1988Sjohnlev static int 493843e1988Sjohnlev segmf_getmemid(struct seg *seg, caddr_t addr, memid_t *memid) 494843e1988Sjohnlev { 495843e1988Sjohnlev struct segmf_data *data = seg->s_data; 496843e1988Sjohnlev 497843e1988Sjohnlev memid->val[0] = (uintptr_t)VTOCVP(data->vp); 498843e1988Sjohnlev memid->val[1] = (uintptr_t)seg_page(seg, addr); 499843e1988Sjohnlev return (0); 500843e1988Sjohnlev } 501843e1988Sjohnlev 502843e1988Sjohnlev /*ARGSUSED*/ 503843e1988Sjohnlev static lgrp_mem_policy_info_t * 504843e1988Sjohnlev segmf_getpolicy(struct seg *seg, caddr_t addr) 505843e1988Sjohnlev { 506843e1988Sjohnlev return (NULL); 507843e1988Sjohnlev } 508843e1988Sjohnlev 509843e1988Sjohnlev /*ARGSUSED*/ 510843e1988Sjohnlev static int 511843e1988Sjohnlev segmf_capable(struct seg *seg, segcapability_t capability) 512843e1988Sjohnlev { 513843e1988Sjohnlev return (0); 514843e1988Sjohnlev } 515843e1988Sjohnlev 516843e1988Sjohnlev /* 517843e1988Sjohnlev * Add a set of contiguous foreign MFNs to the segment. soft-locking them. The 518843e1988Sjohnlev * pre-faulting is necessary due to live migration; in particular we must 519843e1988Sjohnlev * return an error in response to IOCTL_PRIVCMD_MMAPBATCH rather than faulting 520843e1988Sjohnlev * later on a bad MFN. Whilst this isn't necessary for the other MMAP 521843e1988Sjohnlev * ioctl()s, we lock them too, as they should be transitory. 522843e1988Sjohnlev */ 523843e1988Sjohnlev int 524843e1988Sjohnlev segmf_add_mfns(struct seg *seg, caddr_t addr, mfn_t mfn, 525843e1988Sjohnlev pgcnt_t pgcnt, domid_t domid) 526843e1988Sjohnlev { 527843e1988Sjohnlev struct segmf_data *data = seg->s_data; 5287eea693dSMark Johnson pgcnt_t base; 529843e1988Sjohnlev faultcode_t fc; 530843e1988Sjohnlev pgcnt_t i; 531843e1988Sjohnlev int error = 0; 532843e1988Sjohnlev 533843e1988Sjohnlev if (seg->s_ops != &segmf_ops) 534843e1988Sjohnlev return (EINVAL); 535843e1988Sjohnlev 536843e1988Sjohnlev /* 537843e1988Sjohnlev * Don't mess with dom0. 538843e1988Sjohnlev * 539843e1988Sjohnlev * Only allow the domid to be set once for the segment. 540843e1988Sjohnlev * After that attempts to add mappings to this segment for 541843e1988Sjohnlev * other domains explicitly fails. 542843e1988Sjohnlev */ 543843e1988Sjohnlev 544843e1988Sjohnlev if (domid == 0 || domid == DOMID_SELF) 545843e1988Sjohnlev return (EACCES); 546843e1988Sjohnlev 547843e1988Sjohnlev mutex_enter(&data->lock); 548843e1988Sjohnlev 549843e1988Sjohnlev if (data->domid == 0) 550843e1988Sjohnlev data->domid = domid; 551843e1988Sjohnlev 552843e1988Sjohnlev if (data->domid != domid) { 553843e1988Sjohnlev error = EINVAL; 554843e1988Sjohnlev goto out; 555843e1988Sjohnlev } 556843e1988Sjohnlev 557843e1988Sjohnlev base = seg_page(seg, addr); 558843e1988Sjohnlev 5597eea693dSMark Johnson for (i = 0; i < pgcnt; i++) { 5607eea693dSMark Johnson data->map[base + i].t_type = SEGMF_MAP_MFN; 5617eea693dSMark Johnson data->map[base + i].u.m.m_mfn = mfn++; 5627eea693dSMark Johnson } 563843e1988Sjohnlev 564843e1988Sjohnlev fc = segmf_fault_range(seg->s_as->a_hat, seg, addr, 565843e1988Sjohnlev pgcnt * MMU_PAGESIZE, F_SOFTLOCK, S_OTHER); 566843e1988Sjohnlev 567843e1988Sjohnlev if (fc != 0) { 568843e1988Sjohnlev error = fc_decode(fc); 5697eea693dSMark Johnson for (i = 0; i < pgcnt; i++) { 5707eea693dSMark Johnson data->map[base + i].t_type = SEGMF_MAP_EMPTY; 5717eea693dSMark Johnson } 572843e1988Sjohnlev } 573843e1988Sjohnlev 574843e1988Sjohnlev out: 575843e1988Sjohnlev mutex_exit(&data->lock); 576843e1988Sjohnlev return (error); 577843e1988Sjohnlev } 578843e1988Sjohnlev 5797eea693dSMark Johnson int 5807eea693dSMark Johnson segmf_add_grefs(struct seg *seg, caddr_t addr, uint_t flags, 5817eea693dSMark Johnson grant_ref_t *grefs, uint_t cnt, domid_t domid) 5827eea693dSMark Johnson { 5837eea693dSMark Johnson struct segmf_data *data; 5847eea693dSMark Johnson segmf_map_t *map; 5857eea693dSMark Johnson faultcode_t fc; 5867eea693dSMark Johnson uint_t idx; 5877eea693dSMark Johnson uint_t i; 5887eea693dSMark Johnson int e; 5897eea693dSMark Johnson 5907eea693dSMark Johnson if (seg->s_ops != &segmf_ops) 5917eea693dSMark Johnson return (EINVAL); 5927eea693dSMark Johnson 5937eea693dSMark Johnson /* 5947eea693dSMark Johnson * Don't mess with dom0. 5957eea693dSMark Johnson * 5967eea693dSMark Johnson * Only allow the domid to be set once for the segment. 5977eea693dSMark Johnson * After that attempts to add mappings to this segment for 5987eea693dSMark Johnson * other domains explicitly fails. 5997eea693dSMark Johnson */ 6007eea693dSMark Johnson 6017eea693dSMark Johnson if (domid == 0 || domid == DOMID_SELF) 6027eea693dSMark Johnson return (EACCES); 6037eea693dSMark Johnson 6047eea693dSMark Johnson data = seg->s_data; 6057eea693dSMark Johnson idx = seg_page(seg, addr); 6067eea693dSMark Johnson map = &data->map[idx]; 6077eea693dSMark Johnson e = 0; 6087eea693dSMark Johnson 6097eea693dSMark Johnson mutex_enter(&data->lock); 6107eea693dSMark Johnson 6117eea693dSMark Johnson if (data->domid == 0) 6127eea693dSMark Johnson data->domid = domid; 6137eea693dSMark Johnson 6147eea693dSMark Johnson if (data->domid != domid) { 6157eea693dSMark Johnson e = EINVAL; 6167eea693dSMark Johnson goto out; 6177eea693dSMark Johnson } 6187eea693dSMark Johnson 6197eea693dSMark Johnson /* store away the grefs passed in then fault in the pages */ 6207eea693dSMark Johnson for (i = 0; i < cnt; i++) { 6217eea693dSMark Johnson map[i].t_type = SEGMF_MAP_GREF; 6227eea693dSMark Johnson map[i].u.g.g_gref = grefs[i]; 6237eea693dSMark Johnson map[i].u.g.g_handle = 0; 6247eea693dSMark Johnson map[i].u.g.g_flags = 0; 6257eea693dSMark Johnson if (flags & SEGMF_GREF_WR) { 6267eea693dSMark Johnson map[i].u.g.g_flags |= SEGMF_GFLAGS_WR; 6277eea693dSMark Johnson } 6287eea693dSMark Johnson } 6297eea693dSMark Johnson fc = segmf_fault_gref_range(seg, addr, cnt); 6307eea693dSMark Johnson if (fc != 0) { 6317eea693dSMark Johnson e = fc_decode(fc); 6327eea693dSMark Johnson for (i = 0; i < cnt; i++) { 6337eea693dSMark Johnson data->map[i].t_type = SEGMF_MAP_EMPTY; 6347eea693dSMark Johnson } 6357eea693dSMark Johnson } 6367eea693dSMark Johnson 6377eea693dSMark Johnson out: 6387eea693dSMark Johnson mutex_exit(&data->lock); 6397eea693dSMark Johnson return (e); 6407eea693dSMark Johnson } 6417eea693dSMark Johnson 6427eea693dSMark Johnson int 6437eea693dSMark Johnson segmf_release_grefs(struct seg *seg, caddr_t addr, uint_t cnt) 6447eea693dSMark Johnson { 6457eea693dSMark Johnson gnttab_unmap_grant_ref_t mapop[SEGMF_MAX_GREFS]; 6467eea693dSMark Johnson struct segmf_data *data; 6477eea693dSMark Johnson segmf_map_t *map; 6487eea693dSMark Johnson uint_t idx; 6497eea693dSMark Johnson long e; 6507eea693dSMark Johnson int i; 6517eea693dSMark Johnson int n; 6527eea693dSMark Johnson 6537eea693dSMark Johnson 6547eea693dSMark Johnson if (cnt > SEGMF_MAX_GREFS) { 6557eea693dSMark Johnson return (-1); 6567eea693dSMark Johnson } 6577eea693dSMark Johnson 6587eea693dSMark Johnson idx = seg_page(seg, addr); 6597eea693dSMark Johnson data = seg->s_data; 6607eea693dSMark Johnson map = &data->map[idx]; 6617eea693dSMark Johnson 6627eea693dSMark Johnson bzero(mapop, sizeof (gnttab_unmap_grant_ref_t) * cnt); 6637eea693dSMark Johnson 6647eea693dSMark Johnson /* 6657eea693dSMark Johnson * for each entry which isn't empty and is currently mapped, 6667eea693dSMark Johnson * set it up for an unmap then mark them empty. 6677eea693dSMark Johnson */ 6687eea693dSMark Johnson n = 0; 6697eea693dSMark Johnson for (i = 0; i < cnt; i++) { 6707eea693dSMark Johnson ASSERT(map[i].t_type != SEGMF_MAP_MFN); 6717eea693dSMark Johnson if ((map[i].t_type == SEGMF_MAP_GREF) && 6727eea693dSMark Johnson (map[i].u.g.g_flags & SEGMF_GFLAGS_MAPPED)) { 6737eea693dSMark Johnson mapop[n].handle = map[i].u.g.g_handle; 6747eea693dSMark Johnson mapop[n].host_addr = map[i].u.g.g_ptep; 6757eea693dSMark Johnson mapop[n].dev_bus_addr = 0; 6767eea693dSMark Johnson n++; 6777eea693dSMark Johnson } 6787eea693dSMark Johnson map[i].t_type = SEGMF_MAP_EMPTY; 6797eea693dSMark Johnson } 6807eea693dSMark Johnson 6817eea693dSMark Johnson /* if there's nothing to unmap, just return */ 6827eea693dSMark Johnson if (n == 0) { 6837eea693dSMark Johnson return (0); 6847eea693dSMark Johnson } 6857eea693dSMark Johnson 6867eea693dSMark Johnson e = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &mapop, n); 6877eea693dSMark Johnson if (e != 0) { 6887eea693dSMark Johnson return (-1); 6897eea693dSMark Johnson } 6907eea693dSMark Johnson 6917eea693dSMark Johnson return (0); 6927eea693dSMark Johnson } 6937eea693dSMark Johnson 6947eea693dSMark Johnson 6957eea693dSMark Johnson void 6967eea693dSMark Johnson segmf_add_gref_pte(struct seg *seg, caddr_t addr, uint64_t pte_ma) 6977eea693dSMark Johnson { 6987eea693dSMark Johnson struct segmf_data *data; 6997eea693dSMark Johnson uint_t idx; 7007eea693dSMark Johnson 7017eea693dSMark Johnson idx = seg_page(seg, addr); 7027eea693dSMark Johnson data = seg->s_data; 7037eea693dSMark Johnson 7047eea693dSMark Johnson data->map[idx].u.g.g_ptep = pte_ma; 7057eea693dSMark Johnson } 7067eea693dSMark Johnson 7077eea693dSMark Johnson 7087eea693dSMark Johnson static int 7097eea693dSMark Johnson segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t cnt) 7107eea693dSMark Johnson { 7117eea693dSMark Johnson gnttab_map_grant_ref_t mapop[SEGMF_MAX_GREFS]; 7127eea693dSMark Johnson struct segmf_data *data; 7137eea693dSMark Johnson segmf_map_t *map; 7147eea693dSMark Johnson uint_t idx; 7157eea693dSMark Johnson int e; 7167eea693dSMark Johnson int i; 7177eea693dSMark Johnson 7187eea693dSMark Johnson 7197eea693dSMark Johnson if (cnt > SEGMF_MAX_GREFS) { 7207eea693dSMark Johnson return (-1); 7217eea693dSMark Johnson } 7227eea693dSMark Johnson 7237eea693dSMark Johnson data = seg->s_data; 7247eea693dSMark Johnson idx = seg_page(seg, addr); 7257eea693dSMark Johnson map = &data->map[idx]; 7267eea693dSMark Johnson 7277eea693dSMark Johnson bzero(mapop, sizeof (gnttab_map_grant_ref_t) * cnt); 7287eea693dSMark Johnson 7297eea693dSMark Johnson ASSERT(map->t_type == SEGMF_MAP_GREF); 7307eea693dSMark Johnson 7317eea693dSMark Johnson /* 7327eea693dSMark Johnson * map in each page passed in into the user apps AS. We do this by 7337eea693dSMark Johnson * passing the MA of the actual pte of the mapping to the hypervisor. 7347eea693dSMark Johnson */ 7357eea693dSMark Johnson for (i = 0; i < cnt; i++) { 7367eea693dSMark Johnson mapop[i].host_addr = map[i].u.g.g_ptep; 7377eea693dSMark Johnson mapop[i].dom = data->domid; 7387eea693dSMark Johnson mapop[i].ref = map[i].u.g.g_gref; 7397eea693dSMark Johnson mapop[i].flags = GNTMAP_host_map | GNTMAP_application_map | 7407eea693dSMark Johnson GNTMAP_contains_pte; 7417eea693dSMark Johnson if (!(map[i].u.g.g_flags & SEGMF_GFLAGS_WR)) { 7427eea693dSMark Johnson mapop[i].flags |= GNTMAP_readonly; 7437eea693dSMark Johnson } 7447eea693dSMark Johnson } 7457eea693dSMark Johnson e = xen_map_gref(GNTTABOP_map_grant_ref, mapop, cnt, B_TRUE); 7467eea693dSMark Johnson if ((e != 0) || (mapop[0].status != GNTST_okay)) { 7477eea693dSMark Johnson return (FC_MAKE_ERR(EFAULT)); 7487eea693dSMark Johnson } 7497eea693dSMark Johnson 7507eea693dSMark Johnson /* save handle for segmf_release_grefs() and mark it as mapped */ 7517eea693dSMark Johnson for (i = 0; i < cnt; i++) { 7527eea693dSMark Johnson ASSERT(mapop[i].status == GNTST_okay); 7537eea693dSMark Johnson map[i].u.g.g_handle = mapop[i].handle; 7547eea693dSMark Johnson map[i].u.g.g_flags |= SEGMF_GFLAGS_MAPPED; 7557eea693dSMark Johnson } 7567eea693dSMark Johnson 7577eea693dSMark Johnson return (0); 7587eea693dSMark Johnson } 7597eea693dSMark Johnson 760843e1988Sjohnlev static struct seg_ops segmf_ops = { 761843e1988Sjohnlev segmf_dup, 762843e1988Sjohnlev segmf_unmap, 763843e1988Sjohnlev segmf_free, 764843e1988Sjohnlev segmf_fault, 765843e1988Sjohnlev segmf_faulta, 766843e1988Sjohnlev segmf_setprot, 767843e1988Sjohnlev segmf_checkprot, 768843e1988Sjohnlev (int (*)())segmf_kluster, 769843e1988Sjohnlev (size_t (*)(struct seg *))NULL, /* swapout */ 770843e1988Sjohnlev segmf_sync, 771843e1988Sjohnlev segmf_incore, 772843e1988Sjohnlev segmf_lockop, 773843e1988Sjohnlev segmf_getprot, 774843e1988Sjohnlev segmf_getoffset, 775843e1988Sjohnlev segmf_gettype, 776843e1988Sjohnlev segmf_getvp, 777843e1988Sjohnlev segmf_advise, 778843e1988Sjohnlev segmf_dump, 779843e1988Sjohnlev segmf_pagelock, 780843e1988Sjohnlev segmf_setpagesize, 781843e1988Sjohnlev segmf_getmemid, 782843e1988Sjohnlev segmf_getpolicy, 783*9d12795fSRobert Mustacchi segmf_capable, 784*9d12795fSRobert Mustacchi seg_inherit_notsup 785843e1988Sjohnlev }; 786