1 /* VNode Disk driver, by D.C. van Moolenbroek <david@minix3.org> */ 2 3 #include <minix/drivers.h> 4 #include <minix/blockdriver.h> 5 #include <minix/drvlib.h> 6 #include <sys/ioctl.h> 7 #include <sys/mman.h> 8 #include <sys/stat.h> 9 #include <fcntl.h> 10 #include <assert.h> 11 12 #define VND_BUF_SIZE 65536 13 14 static struct { 15 int fd; /* file descriptor for the underlying file */ 16 int openct; /* number of times the device is open */ 17 int exiting; /* exit after the last close? */ 18 int rdonly; /* is the device set up read-only? */ 19 dev_t dev; /* device on which the file resides */ 20 ino_t ino; /* inode number of the file */ 21 struct device part[DEV_PER_DRIVE]; /* partition bases and sizes */ 22 struct device subpart[SUB_PER_DRIVE]; /* same for subpartitions */ 23 struct part_geom geom; /* geometry information */ 24 char *buf; /* intermediate I/O transfer buffer */ 25 } state; 26 27 static unsigned int instance; 28 29 static int vnd_open(devminor_t, int); 30 static int vnd_close(devminor_t); 31 static int vnd_transfer(devminor_t, int, u64_t, endpoint_t, iovec_t *, 32 unsigned int, int); 33 static int vnd_ioctl(devminor_t, unsigned long, endpoint_t, cp_grant_id_t, 34 endpoint_t); 35 static struct device *vnd_part(devminor_t); 36 static void vnd_geometry(devminor_t, struct part_geom *); 37 38 static struct blockdriver vnd_dtab = { 39 .bdr_type = BLOCKDRIVER_TYPE_DISK, 40 .bdr_open = vnd_open, 41 .bdr_close = vnd_close, 42 .bdr_transfer = vnd_transfer, 43 .bdr_ioctl = vnd_ioctl, 44 .bdr_part = vnd_part, 45 .bdr_geometry = vnd_geometry 46 }; 47 48 /* 49 * Parse partition tables. 50 */ 51 static void 52 vnd_partition(void) 53 { 54 memset(state.part, 0, sizeof(state.part)); 55 memset(state.subpart, 0, sizeof(state.subpart)); 56 57 state.part[0].dv_size = state.geom.size; 58 59 partition(&vnd_dtab, 0, P_PRIMARY, FALSE /*atapi*/); 60 } 61 62 /* 63 * Open a device. 64 */ 65 static int 66 vnd_open(devminor_t minor, int access) 67 { 68 /* No sub/partition devices are available before initialization. */ 69 if (state.fd == -1 && minor != 0) 70 return ENXIO; 71 else if (state.fd != -1 && vnd_part(minor) == NULL) 72 return ENXIO; 73 74 /* 75 * If the device either is not configured or configured as read-only, 76 * block open calls that request write permission. This is what user- 77 * land expects, although it does mean that vnconfig(8) has to open the 78 * device as read-only in order to (un)configure it. 79 */ 80 if (access & BDEV_W_BIT) { 81 if (state.fd == -1) 82 return ENXIO; 83 if (state.rdonly) 84 return EACCES; 85 } 86 87 /* 88 * Userland expects that if the device is opened after having been 89 * fully closed, partition tables are (re)parsed. Since we already 90 * parse partition tables upon initialization, we could skip this for 91 * the first open, but that would introduce more state. 92 */ 93 if (state.fd != -1 && state.openct == 0) { 94 vnd_partition(); 95 96 /* Make sure our target device didn't just disappear. */ 97 if (vnd_part(minor) == NULL) 98 return ENXIO; 99 } 100 101 state.openct++; 102 103 return OK; 104 } 105 106 /* 107 * Close a device. 108 */ 109 static int 110 vnd_close(devminor_t UNUSED(minor)) 111 { 112 if (state.openct == 0) { 113 printf("VND%u: closing already-closed device\n", instance); 114 return EINVAL; 115 } 116 117 state.openct--; 118 119 if (state.exiting) 120 blockdriver_terminate(); 121 122 return OK; 123 } 124 125 /* 126 * Copy a number of bytes from or to the caller, to or from the intermediate 127 * buffer. If the given endpoint is SELF, a local memory copy must be made. 128 */ 129 static int 130 vnd_copy(iovec_s_t *iov, size_t iov_off, size_t bytes, endpoint_t endpt, 131 int do_write) 132 { 133 struct vscp_vec vvec[SCPVEC_NR], *vvp; 134 size_t off, chunk; 135 int count; 136 char *ptr; 137 138 assert(bytes > 0 && bytes <= VND_BUF_SIZE); 139 140 vvp = vvec; 141 count = 0; 142 143 for (off = 0; off < bytes; off += chunk) { 144 chunk = MIN(bytes - off, iov->iov_size - iov_off); 145 146 if (endpt == SELF) { 147 ptr = (char *) iov->iov_grant + iov_off; 148 149 if (do_write) 150 memcpy(&state.buf[off], ptr, chunk); 151 else 152 memcpy(ptr, &state.buf[off], chunk); 153 } else { 154 assert(count < SCPVEC_NR); /* SCPVEC_NR >= NR_IOREQS */ 155 156 vvp->v_from = do_write ? endpt : SELF; 157 vvp->v_to = do_write ? SELF : endpt; 158 vvp->v_bytes = chunk; 159 vvp->v_gid = iov->iov_grant; 160 vvp->v_offset = iov_off; 161 vvp->v_addr = (vir_bytes) &state.buf[off]; 162 163 vvp++; 164 count++; 165 } 166 167 iov_off += chunk; 168 if (iov_off == iov->iov_size) { 169 iov++; 170 iov_off = 0; 171 } 172 } 173 174 if (endpt != SELF) 175 return sys_vsafecopy(vvec, count); 176 else 177 return OK; 178 } 179 180 /* 181 * Advance the given I/O vector, and the offset into its first element, by the 182 * given number of bytes. 183 */ 184 static iovec_s_t * 185 vnd_advance(iovec_s_t *iov, size_t *iov_offp, size_t bytes) 186 { 187 size_t iov_off; 188 189 assert(bytes > 0 && bytes <= VND_BUF_SIZE); 190 191 iov_off = *iov_offp; 192 193 while (bytes > 0) { 194 if (bytes >= iov->iov_size - iov_off) { 195 bytes -= iov->iov_size - iov_off; 196 iov++; 197 iov_off = 0; 198 } else { 199 iov_off += bytes; 200 bytes = 0; 201 } 202 } 203 204 *iov_offp = iov_off; 205 return iov; 206 } 207 208 /* 209 * Perform data transfer on the selected device. 210 */ 211 static int 212 vnd_transfer(devminor_t minor, int do_write, u64_t position, 213 endpoint_t endpt, iovec_t *iovt, unsigned int nr_req, int flags) 214 { 215 struct device *dv; 216 iovec_s_t *iov; 217 size_t off, chunk, bytes, iov_off; 218 ssize_t r; 219 unsigned int i; 220 221 iov = (iovec_s_t *) iovt; 222 223 if (state.fd == -1 || (dv = vnd_part(minor)) == NULL) 224 return ENXIO; 225 226 /* Prevent write operations on devices opened as write-only. */ 227 if (do_write && state.rdonly) 228 return EACCES; 229 230 /* Determine the total number of bytes to transfer. */ 231 if (position >= dv->dv_size) 232 return 0; 233 234 bytes = 0; 235 236 for (i = 0; i < nr_req; i++) { 237 if (iov[i].iov_size == 0 || iov[i].iov_size > LONG_MAX) 238 return EINVAL; 239 bytes += iov[i].iov_size; 240 if (bytes > LONG_MAX) 241 return EINVAL; 242 } 243 244 if (bytes > dv->dv_size - position) 245 bytes = dv->dv_size - position; 246 247 position += dv->dv_base; 248 249 /* Perform the actual transfer, in chunks if necessary. */ 250 iov_off = 0; 251 252 for (off = 0; off < bytes; off += chunk) { 253 chunk = MIN(bytes - off, VND_BUF_SIZE); 254 255 assert((unsigned int) (iov - (iovec_s_t *) iovt) < nr_req); 256 257 /* For reads, read in the data for the chunk; possibly less. */ 258 if (!do_write) { 259 chunk = r = pread(state.fd, state.buf, chunk, 260 position); 261 262 if (r < 0) { 263 printf("VND%u: pread failed (%d)\n", instance, 264 -errno); 265 return -errno; 266 } 267 if (r == 0) 268 break; 269 } 270 271 /* Copy the data for this chunk from or to the caller. */ 272 if ((r = vnd_copy(iov, iov_off, chunk, endpt, do_write)) < 0) { 273 printf("VND%u: data copy failed (%d)\n", instance, r); 274 return r; 275 } 276 277 /* For writes, write the data to the file; possibly less. */ 278 if (do_write) { 279 chunk = r = pwrite(state.fd, state.buf, chunk, 280 position); 281 282 if (r <= 0) { 283 if (r < 0) 284 r = -errno; 285 printf("VND%u: pwrite failed (%d)\n", instance, 286 r); 287 return (r < 0) ? r : EIO; 288 } 289 } 290 291 /* Move ahead on the I/O vector and the file position. */ 292 iov = vnd_advance(iov, &iov_off, chunk); 293 294 position += chunk; 295 } 296 297 /* If force-write is requested, flush the underlying file to disk. */ 298 if (do_write && (flags & BDEV_FORCEWRITE)) 299 fsync(state.fd); 300 301 /* Return the number of bytes transferred. */ 302 return off; 303 } 304 305 /* 306 * Initialize the size and geometry for the device and any partitions. If the 307 * user provided a geometry, this will be used; otherwise, a geometry will be 308 * computed. 309 */ 310 static int 311 vnd_layout(u64_t size, struct vnd_ioctl *vnd) 312 { 313 u64_t sectors; 314 315 state.geom.base = 0ULL; 316 317 if (vnd->vnd_flags & VNDIOF_HASGEOM) { 318 /* 319 * The geometry determines the accessible part of the file. 320 * The resulting size must not exceed the file size. 321 */ 322 state.geom.cylinders = vnd->vnd_geom.vng_ncylinders; 323 state.geom.heads = vnd->vnd_geom.vng_ntracks; 324 state.geom.sectors = vnd->vnd_geom.vng_nsectors; 325 326 state.geom.size = (u64_t) state.geom.cylinders * 327 state.geom.heads * state.geom.sectors * 328 vnd->vnd_geom.vng_secsize; 329 if (state.geom.size == 0 || state.geom.size > size) 330 return EINVAL; 331 } else { 332 sectors = size / SECTOR_SIZE; 333 state.geom.size = sectors * SECTOR_SIZE; 334 335 if (sectors >= 32 * 64) { 336 state.geom.cylinders = sectors / (32 * 64); 337 state.geom.heads = 64; 338 state.geom.sectors = 32; 339 } else { 340 state.geom.cylinders = sectors; 341 state.geom.heads = 1; 342 state.geom.sectors = 1; 343 } 344 } 345 346 /* 347 * Parse partition tables immediately, so that (sub)partitions can be 348 * opened right away. The first open will perform the same procedure, 349 * but that is only necessary to match userland expectations. 350 */ 351 vnd_partition(); 352 353 return OK; 354 } 355 356 /* 357 * Process I/O control requests. 358 */ 359 static int 360 vnd_ioctl(devminor_t UNUSED(minor), unsigned long request, endpoint_t endpt, 361 cp_grant_id_t grant, endpoint_t user_endpt) 362 { 363 struct vnd_ioctl vnd; 364 struct vnd_user vnu; 365 struct stat st; 366 int r; 367 368 switch (request) { 369 case VNDIOCSET: 370 /* 371 * The VND must not be busy. Note that the caller has the 372 * device open to perform the IOCTL request. 373 */ 374 if (state.fd != -1 || state.openct != 1) 375 return EBUSY; 376 377 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &vnd, 378 sizeof(vnd))) != OK) 379 return r; 380 381 /* 382 * Issue a special VFS backcall that copies a file descriptor 383 * to the current process, from the user process ultimately 384 * making the IOCTL call. The result is either a newly 385 * allocated file descriptor or an error. 386 */ 387 if ((r = copyfd(user_endpt, vnd.vnd_fildes, COPYFD_FROM)) < 0) 388 return r; 389 390 state.fd = r; 391 392 /* The target file must be regular. */ 393 if (fstat(state.fd, &st) == -1) { 394 printf("VND%u: fstat failed (%d)\n", instance, -errno); 395 r = -errno; 396 } 397 if (r == OK && !S_ISREG(st.st_mode)) 398 r = EINVAL; 399 400 /* 401 * Allocate memory for an intermediate I/O transfer buffer. In 402 * order to save on memory in the common case, the buffer is 403 * only allocated when the vnd is in use. We use mmap instead 404 * of malloc to allow the memory to be actually freed later. 405 */ 406 if (r == OK) { 407 state.buf = mmap(NULL, VND_BUF_SIZE, PROT_READ | 408 PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); 409 if (state.buf == MAP_FAILED) 410 r = ENOMEM; 411 } 412 413 if (r != OK) { 414 close(state.fd); 415 state.fd = -1; 416 return r; 417 } 418 419 /* Set various device state fields. */ 420 state.dev = st.st_dev; 421 state.ino = st.st_ino; 422 state.rdonly = !!(vnd.vnd_flags & VNDIOF_READONLY); 423 424 r = vnd_layout(st.st_size, &vnd); 425 426 /* Upon success, return the device size to userland. */ 427 if (r == OK) { 428 vnd.vnd_size = state.geom.size; 429 430 r = sys_safecopyto(endpt, grant, 0, (vir_bytes) &vnd, 431 sizeof(vnd)); 432 } 433 434 if (r != OK) { 435 munmap(state.buf, VND_BUF_SIZE); 436 close(state.fd); 437 state.fd = -1; 438 } 439 440 return r; 441 442 case VNDIOCCLR: 443 /* The VND can only be cleared if it has been configured. */ 444 if (state.fd == -1) 445 return ENXIO; 446 447 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &vnd, 448 sizeof(vnd))) != OK) 449 return r; 450 451 /* The caller has the device open to do the IOCTL request. */ 452 if (!(vnd.vnd_flags & VNDIOF_FORCE) && state.openct != 1) 453 return EBUSY; 454 455 /* 456 * Close the associated file descriptor immediately, but do not 457 * allow reuse until the device has been closed by the other 458 * users. 459 */ 460 munmap(state.buf, VND_BUF_SIZE); 461 close(state.fd); 462 state.fd = -1; 463 464 return OK; 465 466 case VNDIOCGET: 467 /* 468 * We need not copy in the given structure. It would contain 469 * the requested unit number, but each driver instance provides 470 * only one unit anyway. 471 */ 472 473 memset(&vnu, 0, sizeof(vnu)); 474 475 vnu.vnu_unit = instance; 476 477 /* Leave these fields zeroed if the device is not in use. */ 478 if (state.fd != -1) { 479 vnu.vnu_dev = state.dev; 480 vnu.vnu_ino = state.ino; 481 } 482 483 return sys_safecopyto(endpt, grant, 0, (vir_bytes) &vnu, 484 sizeof(vnu)); 485 486 case DIOCOPENCT: 487 return sys_safecopyto(endpt, grant, 0, 488 (vir_bytes) &state.openct, sizeof(state.openct)); 489 490 case DIOCFLUSH: 491 if (state.fd == -1) 492 return ENXIO; 493 494 fsync(state.fd); 495 496 return OK; 497 } 498 499 return ENOTTY; 500 } 501 502 /* 503 * Return a pointer to the partition structure for the given minor device. 504 */ 505 static struct device * 506 vnd_part(devminor_t minor) 507 { 508 if (minor >= 0 && minor < DEV_PER_DRIVE) 509 return &state.part[minor]; 510 else if ((unsigned int) (minor -= MINOR_d0p0s0) < SUB_PER_DRIVE) 511 return &state.subpart[minor]; 512 else 513 return NULL; 514 } 515 516 /* 517 * Return geometry information. 518 */ 519 static void 520 vnd_geometry(devminor_t UNUSED(minor), struct part_geom *part) 521 { 522 part->cylinders = state.geom.cylinders; 523 part->heads = state.geom.heads; 524 part->sectors = state.geom.sectors; 525 } 526 527 /* 528 * Initialize the device. 529 */ 530 static int 531 vnd_init(int UNUSED(type), sef_init_info_t *UNUSED(info)) 532 { 533 long v; 534 535 /* 536 * No support for crash recovery. The driver would have no way to 537 * reacquire the file descriptor for the target file. 538 */ 539 540 /* 541 * The instance number is used for two purposes: reporting errors, and 542 * returning the proper unit number to userland in VNDIOCGET calls. 543 */ 544 v = 0; 545 (void) env_parse("instance", "d", 0, &v, 0, 255); 546 instance = (unsigned int) v; 547 548 state.openct = 0; 549 state.exiting = FALSE; 550 state.fd = -1; 551 552 return OK; 553 } 554 555 /* 556 * Process an incoming signal. 557 */ 558 static void 559 vnd_signal(int signo) 560 { 561 562 /* In case of a termination signal, initiate driver shutdown. */ 563 if (signo != SIGTERM) 564 return; 565 566 state.exiting = TRUE; 567 568 /* Keep running until the device has been fully closed. */ 569 if (state.openct == 0) 570 blockdriver_terminate(); 571 } 572 573 /* 574 * Set callbacks and initialize the System Event Framework (SEF). 575 */ 576 static void 577 vnd_startup(void) 578 { 579 580 /* Register init and signal callbacks. */ 581 sef_setcb_init_fresh(vnd_init); 582 sef_setcb_signal_handler(vnd_signal); 583 584 /* Let SEF perform startup. */ 585 sef_startup(); 586 } 587 588 /* 589 * Driver task. 590 */ 591 int 592 main(int argc, char **argv) 593 { 594 595 /* Initialize the driver. */ 596 env_setargs(argc, argv); 597 vnd_startup(); 598 599 /* Process requests until shutdown. */ 600 blockdriver_task(&vnd_dtab); 601 602 return 0; 603 } 604