1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 2014 by Delphix. All rights reserved. 29 * Copyright 2017 Nexenta Systems, Inc. 30 */ 31 32 #ifndef _SYS_XDF_H 33 #define _SYS_XDF_H 34 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/cmlb.h> 38 #include <sys/dkio.h> 39 40 #include <sys/gnttab.h> 41 #include <xen/sys/xendev.h> 42 43 #ifdef __cplusplus 44 extern "C" { 45 #endif 46 47 48 /* 49 * VBDs have standard 512 byte blocks 50 * A single blkif_request can transfer up to 11 pages of data, 1 page/segment 51 */ 52 #define XB_BSIZE DEV_BSIZE 53 #define XB_BMASK (XB_BSIZE - 1) 54 #define XB_BSHIFT 9 55 #define XB_DTOB(bn, vdp) ((bn) * (vdp)->xdf_xdev_secsize) 56 57 #define XB_MAX_SEGLEN (8 * XB_BSIZE) 58 #define XB_SEGOFFSET (XB_MAX_SEGLEN - 1) 59 #define XB_MAX_XFER (XB_MAX_SEGLEN * BLKIF_MAX_SEGMENTS_PER_REQUEST) 60 #define XB_MAXPHYS (XB_MAX_XFER * BLKIF_RING_SIZE) 61 62 /* Number of sectors per segement */ 63 #define XB_NUM_SECTORS_PER_SEG (PAGESIZE / XB_BSIZE) 64 /* sectors are number 0 through XB_NUM_SECTORS_PER_SEG - 1 */ 65 #define XB_LAST_SECTOR_IN_SEG (XB_NUM_SECTORS_PER_SEG - 1) 66 67 68 /* 69 * Slice for absolute disk transaction. 70 * 71 * Hack Alert. XB_SLICE_NONE is a magic value that can be written into the 72 * b_private field of buf structures passed to xdf_strategy(). When present 73 * it indicates that the I/O is using an absolute offset. (ie, the I/O is 74 * not bound to any one partition.) This magic value is currently used by 75 * the pv_cmdk driver. This hack is shamelessly stolen from the sun4v vdc 76 * driver, another virtual disk device driver. (Although in the case of 77 * vdc the hack is less egregious since it is self contained within the 78 * vdc driver, where as here it is used as an interface between the pv_cmdk 79 * driver and the xdf driver.) 80 */ 81 #define XB_SLICE_NONE 0xFF 82 83 /* 84 * blkif status 85 */ 86 typedef enum xdf_state { 87 /* 88 * initial state 89 */ 90 XD_UNKNOWN = 0, 91 /* 92 * ring and evtchn alloced, xenbus state changed to 93 * XenbusStateInitialised, wait for backend to connect 94 */ 95 XD_INIT = 1, 96 /* 97 * backend and frontend xenbus state has changed to 98 * XenbusStateConnected. IO is now allowed, but we are not still 99 * fully initialized. 100 */ 101 XD_CONNECTED = 2, 102 /* 103 * We're fully initialized and allowing regular IO. 104 */ 105 XD_READY = 3, 106 /* 107 * vbd interface close request received from backend, no more I/O 108 * requestis allowed to be put into ring buffer, while interrupt handler 109 * is allowed to run to finish any outstanding I/O request, disconnect 110 * process is kicked off by changing xenbus state to XenbusStateClosed 111 */ 112 XD_CLOSING = 4, 113 /* 114 * disconnection process finished, both backend and frontend's 115 * xenbus state has been changed to XenbusStateClosed, can be detached 116 */ 117 XD_CLOSED = 5, 118 /* 119 * We're either being suspended or resuming from a suspend. If we're 120 * in the process of suspending, we block all new IO, but but allow 121 * existing IO to drain. 122 */ 123 XD_SUSPEND = 6 124 } xdf_state_t; 125 126 /* 127 * 16 partitions + fdisk 128 */ 129 #define XDF_PSHIFT 6 130 #define XDF_PMASK ((1 << XDF_PSHIFT) - 1) 131 #define XDF_PEXT (1 << XDF_PSHIFT) 132 #define XDF_MINOR(i, m) (((i) << XDF_PSHIFT) | (m)) 133 #define XDF_INST(m) ((m) >> XDF_PSHIFT) 134 #define XDF_PART(m) ((m) & XDF_PMASK) 135 136 /* 137 * one blkif_request_t will have one corresponding ge_slot_t 138 * where we save those grant table refs used in this blkif_request_t 139 * 140 * the id of this ge_slot_t will also be put into 'id' field in 141 * each blkif_request_t when sent out to the ring buffer. 142 */ 143 typedef struct ge_slot { 144 list_node_t gs_vreq_link; 145 struct v_req *gs_vreq; 146 domid_t gs_oeid; 147 int gs_isread; 148 grant_ref_t gs_ghead; 149 int gs_ngrefs; 150 grant_ref_t gs_ge[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 151 } ge_slot_t; 152 153 /* 154 * vbd I/O request 155 * 156 * An instance of this structure is bound to each buf passed to 157 * the driver's strategy by setting the pointer into bp->av_back. 158 * The id of this vreq will also be put into 'id' field in each 159 * blkif_request_t when sent out to the ring buffer for one DMA 160 * window of this buf. 161 * 162 * Vreq mainly contains DMA information for this buf. In one vreq/buf, 163 * there could be more than one DMA window, each of which will be 164 * mapped to one blkif_request_t/ge_slot_t. Ge_slot_t contains all grant 165 * table entry information for this buf. The ge_slot_t for current DMA 166 * window is pointed to by v_gs in vreq. 167 * 168 * So, grant table entries will only be alloc'ed when the DMA window is 169 * about to be transferred via blkif_request_t to the ring buffer. And 170 * they will be freed right after the blkif_response_t is seen. By this 171 * means, we can make use of grant table entries more efficiently. 172 */ 173 typedef struct v_req { 174 list_node_t v_link; 175 list_t v_gs; 176 int v_status; 177 buf_t *v_buf; 178 uint_t v_ndmacs; 179 uint_t v_dmaw; 180 uint_t v_ndmaws; 181 uint_t v_nslots; 182 uint64_t v_blkno; 183 ddi_dma_handle_t v_memdmahdl; 184 ddi_acc_handle_t v_align; 185 ddi_dma_handle_t v_dmahdl; 186 ddi_dma_cookie_t v_dmac; 187 caddr_t v_abuf; 188 uint8_t v_flush_diskcache; 189 boolean_t v_runq; 190 } v_req_t; 191 192 /* 193 * Status set and checked in vreq->v_status by vreq_setup() 194 * 195 * These flags will help us to continue the vreq setup work from last failure 196 * point, instead of starting from scratch after each failure. 197 */ 198 #define VREQ_INIT 0x0 199 #define VREQ_INIT_DONE 0x1 200 #define VREQ_DMAHDL_ALLOCED 0x2 201 #define VREQ_MEMDMAHDL_ALLOCED 0x3 202 #define VREQ_DMAMEM_ALLOCED 0x4 203 #define VREQ_DMABUF_BOUND 0x5 204 #define VREQ_GS_ALLOCED 0x6 205 #define VREQ_DMAWIN_DONE 0x7 206 207 /* 208 * virtual block device per-instance softstate 209 */ 210 typedef struct xdf { 211 dev_info_t *xdf_dip; 212 char *xdf_addr; 213 ddi_iblock_cookie_t xdf_ibc; /* mutex iblock cookie */ 214 domid_t xdf_peer; /* otherend's dom ID */ 215 xendev_ring_t *xdf_xb_ring; /* I/O ring buffer */ 216 ddi_acc_handle_t xdf_xb_ring_hdl; /* access handler for ring buffer */ 217 list_t xdf_vreq_act; /* active vreq list */ 218 buf_t *xdf_f_act; /* active buf list head */ 219 buf_t *xdf_l_act; /* active buf list tail */ 220 buf_t *xdf_i_act; /* active buf list index */ 221 xdf_state_t xdf_state; /* status of this virtual disk */ 222 boolean_t xdf_suspending; 223 ulong_t xdf_vd_open[OTYPCNT]; 224 ulong_t xdf_vd_lyropen[XDF_PEXT]; 225 ulong_t xdf_connect_req; 226 kthread_t *xdf_connect_thread; 227 ulong_t xdf_vd_exclopen; 228 kmutex_t xdf_iostat_lk; /* muxes lock for the iostat ptr */ 229 kmutex_t xdf_dev_lk; /* mutex lock for I/O path */ 230 kmutex_t xdf_cb_lk; /* mutex lock for event handling path */ 231 kcondvar_t xdf_dev_cv; /* cv used in I/O path */ 232 uint_t xdf_dinfo; /* disk info from backend xenstore */ 233 diskaddr_t xdf_xdev_nblocks; /* total size in block */ 234 uint_t xdf_xdev_secsize; /* disk blksize from backend */ 235 cmlb_geom_t xdf_pgeom; 236 boolean_t xdf_pgeom_set; 237 boolean_t xdf_pgeom_fixed; 238 kstat_t *xdf_xdev_iostat; 239 cmlb_handle_t xdf_vd_lbl; 240 ddi_softintr_t xdf_softintr_id; 241 timeout_id_t xdf_timeout_id; 242 struct gnttab_free_callback xdf_gnt_callback; 243 boolean_t xdf_feature_barrier; 244 boolean_t xdf_flush_supported; 245 boolean_t xdf_media_req_supported; 246 boolean_t xdf_wce; 247 boolean_t xdf_cmlb_reattach; 248 char *xdf_flush_mem; 249 char *xdf_cache_flush_block; 250 int xdf_evtchn; 251 enum dkio_state xdf_mstate; 252 kcondvar_t xdf_mstate_cv; 253 kcondvar_t xdf_hp_status_cv; 254 struct buf *xdf_ready_bp; 255 ddi_taskq_t *xdf_ready_tq; 256 kthread_t *xdf_ready_tq_thread; 257 struct buf *xdf_ready_tq_bp; 258 ddi_devid_t xdf_tgt_devid; 259 #ifdef DEBUG 260 int xdf_dmacallback_num; 261 kthread_t *xdf_oe_change_thread; 262 #endif 263 } xdf_t; 264 265 /* 266 * VBD I/O requests must be aligned on a 512-byte boundary and specify 267 * a transfer size which is a mutiple of 512-bytes 268 */ 269 #define ALIGNED_XFER(bp) \ 270 ((((uintptr_t)((bp)->b_un.b_addr) & XB_BMASK) == 0) && \ 271 (((bp)->b_bcount & XB_BMASK) == 0)) 272 273 #define U_INVAL(u) (((u)->uio_loffset & (offset_t)(XB_BMASK)) || \ 274 ((u)->uio_iov->iov_len & (offset_t)(XB_BMASK))) 275 276 /* wrap pa_to_ma() for xdf to run in dom0 */ 277 #define PATOMA(addr) (DOMAIN_IS_INITDOMAIN(xen_info) ? addr : pa_to_ma(addr)) 278 279 #define XD_IS_RO(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_READONLY) 280 #define XD_IS_CD(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_CDROM) 281 #define XD_IS_RM(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_REMOVABLE) 282 #define IS_READ(bp) VOID2BOOLEAN((bp)->b_flags & B_READ) 283 #define IS_ERROR(bp) VOID2BOOLEAN((bp)->b_flags & B_ERROR) 284 285 #define XDF_UPDATE_IO_STAT(vdp, bp) \ 286 { \ 287 kstat_io_t *kip = KSTAT_IO_PTR((vdp)->xdf_xdev_iostat); \ 288 size_t n_done = (bp)->b_bcount - (bp)->b_resid; \ 289 if ((bp)->b_flags & B_READ) { \ 290 kip->reads++; \ 291 kip->nread += n_done; \ 292 } else { \ 293 kip->writes++; \ 294 kip->nwritten += n_done; \ 295 } \ 296 } 297 298 #ifdef DEBUG 299 #define DPRINTF(flag, args) {if (xdf_debug & (flag)) prom_printf args; } 300 #define SETDMACBON(vbd) {(vbd)->xdf_dmacallback_num++; } 301 #define SETDMACBOFF(vbd) {(vbd)->xdf_dmacallback_num--; } 302 #define ISDMACBON(vbd) ((vbd)->xdf_dmacallback_num > 0) 303 #else 304 #define DPRINTF(flag, args) 305 #define SETDMACBON(vbd) 306 #define SETDMACBOFF(vbd) 307 #define ISDMACBON(vbd) 308 #endif /* DEBUG */ 309 310 #define DDI_DBG 0x1 311 #define DMA_DBG 0x2 312 #define INTR_DBG 0x8 313 #define IO_DBG 0x10 314 #define IOCTL_DBG 0x20 315 #define SUSRES_DBG 0x40 316 #define LBL_DBG 0x80 317 318 #ifdef XPV_HVM_DRIVER 319 extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 320 extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 321 void *); 322 extern void xdfmin(struct buf *bp); 323 extern dev_info_t *xdf_hvm_hold(const char *); 324 extern boolean_t xdf_hvm_connect(dev_info_t *); 325 extern int xdf_hvm_setpgeom(dev_info_t *, cmlb_geom_t *); 326 extern boolean_t xdf_is_cd(dev_info_t *); 327 extern boolean_t xdf_is_rm(dev_info_t *); 328 extern boolean_t xdf_media_req_supported(dev_info_t *); 329 #endif /* XPV_HVM_DRIVER */ 330 331 #ifdef __cplusplus 332 } 333 #endif 334 335 #endif /* _SYS_XDF_H */ 336