xref: /minix/minix/drivers/storage/vnd/vnd.c (revision 83133719)
1 /* VNode Disk driver, by D.C. van Moolenbroek <david@minix3.org> */
2 
3 #include <minix/drivers.h>
4 #include <minix/blockdriver.h>
5 #include <minix/drvlib.h>
6 #include <sys/ioctl.h>
7 #include <sys/mman.h>
8 #include <sys/stat.h>
9 #include <fcntl.h>
10 #include <assert.h>
11 
12 #define VND_BUF_SIZE	65536
13 
14 static struct {
15 	int fd;			/* file descriptor for the underlying file */
16 	int openct;		/* number of times the device is open */
17 	int exiting;		/* exit after the last close? */
18 	int rdonly;		/* is the device set up read-only? */
19 	dev_t dev;		/* device on which the file resides */
20 	ino_t ino;		/* inode number of the file */
21 	struct device part[DEV_PER_DRIVE];	/* partition bases and sizes */
22 	struct device subpart[SUB_PER_DRIVE];	/* same for subpartitions */
23 	struct part_geom geom;	/* geometry information */
24 	char *buf;		/* intermediate I/O transfer buffer */
25 } state;
26 
27 static unsigned int instance;
28 
29 static int vnd_open(devminor_t, int);
30 static int vnd_close(devminor_t);
31 static int vnd_transfer(devminor_t, int, u64_t, endpoint_t, iovec_t *,
32 	unsigned int, int);
33 static int vnd_ioctl(devminor_t, unsigned long, endpoint_t, cp_grant_id_t,
34 	endpoint_t);
35 static struct device *vnd_part(devminor_t);
36 static void vnd_geometry(devminor_t, struct part_geom *);
37 
38 static struct blockdriver vnd_dtab = {
39 	.bdr_type	= BLOCKDRIVER_TYPE_DISK,
40 	.bdr_open	= vnd_open,
41 	.bdr_close	= vnd_close,
42 	.bdr_transfer	= vnd_transfer,
43 	.bdr_ioctl	= vnd_ioctl,
44 	.bdr_part	= vnd_part,
45 	.bdr_geometry	= vnd_geometry
46 };
47 
48 /*
49  * Parse partition tables.
50  */
51 static void
52 vnd_partition(void)
53 {
54 	memset(state.part, 0, sizeof(state.part));
55 	memset(state.subpart, 0, sizeof(state.subpart));
56 
57 	state.part[0].dv_size = state.geom.size;
58 
59 	partition(&vnd_dtab, 0, P_PRIMARY, FALSE /*atapi*/);
60 }
61 
62 /*
63  * Open a device.
64  */
65 static int
66 vnd_open(devminor_t minor, int access)
67 {
68 	/* No sub/partition devices are available before initialization. */
69 	if (state.fd == -1 && minor != 0)
70 		return ENXIO;
71 	else if (state.fd != -1 && vnd_part(minor) == NULL)
72 		return ENXIO;
73 
74 	/*
75 	 * If the device either is not configured or configured as read-only,
76 	 * block open calls that request write permission.  This is what user-
77 	 * land expects, although it does mean that vnconfig(8) has to open the
78 	 * device as read-only in order to (un)configure it.
79 	 */
80 	if (access & BDEV_W_BIT) {
81 		if (state.fd == -1)
82 			return ENXIO;
83 		if (state.rdonly)
84 			return EACCES;
85 	}
86 
87 	/*
88 	 * Userland expects that if the device is opened after having been
89 	 * fully closed, partition tables are (re)parsed.  Since we already
90 	 * parse partition tables upon initialization, we could skip this for
91 	 * the first open, but that would introduce more state.
92 	 */
93 	if (state.fd != -1 && state.openct == 0) {
94 		vnd_partition();
95 
96 		/* Make sure our target device didn't just disappear. */
97 		if (vnd_part(minor) == NULL)
98 			return ENXIO;
99 	}
100 
101 	state.openct++;
102 
103 	return OK;
104 }
105 
106 /*
107  * Close a device.
108  */
109 static int
110 vnd_close(devminor_t UNUSED(minor))
111 {
112 	if (state.openct == 0) {
113 		printf("VND%u: closing already-closed device\n", instance);
114 		return EINVAL;
115 	}
116 
117 	state.openct--;
118 
119 	if (state.exiting)
120 		blockdriver_terminate();
121 
122 	return OK;
123 }
124 
125 /*
126  * Copy a number of bytes from or to the caller, to or from the intermediate
127  * buffer.  If the given endpoint is SELF, a local memory copy must be made.
128  */
129 static int
130 vnd_copy(iovec_s_t *iov, size_t iov_off, size_t bytes, endpoint_t endpt,
131 	int do_write)
132 {
133 	struct vscp_vec vvec[SCPVEC_NR], *vvp;
134 	size_t off, chunk;
135 	int count;
136 	char *ptr;
137 
138 	assert(bytes > 0 && bytes <= VND_BUF_SIZE);
139 
140 	vvp = vvec;
141 	count = 0;
142 
143 	for (off = 0; off < bytes; off += chunk) {
144 		chunk = MIN(bytes - off, iov->iov_size - iov_off);
145 
146 		if (endpt == SELF) {
147 			ptr = (char *) iov->iov_grant + iov_off;
148 
149 			if (do_write)
150 				memcpy(&state.buf[off], ptr, chunk);
151 			else
152 				memcpy(ptr, &state.buf[off], chunk);
153 		} else {
154 			assert(count < SCPVEC_NR); /* SCPVEC_NR >= NR_IOREQS */
155 
156 			vvp->v_from = do_write ? endpt : SELF;
157 			vvp->v_to = do_write ? SELF : endpt;
158 			vvp->v_bytes = chunk;
159 			vvp->v_gid = iov->iov_grant;
160 			vvp->v_offset = iov_off;
161 			vvp->v_addr = (vir_bytes) &state.buf[off];
162 
163 			vvp++;
164 			count++;
165 		}
166 
167 		iov_off += chunk;
168 		if (iov_off == iov->iov_size) {
169 			iov++;
170 			iov_off = 0;
171 		}
172 	}
173 
174 	if (endpt != SELF)
175 		return sys_vsafecopy(vvec, count);
176 	else
177 		return OK;
178 }
179 
180 /*
181  * Advance the given I/O vector, and the offset into its first element, by the
182  * given number of bytes.
183  */
184 static iovec_s_t *
185 vnd_advance(iovec_s_t *iov, size_t *iov_offp, size_t bytes)
186 {
187 	size_t iov_off;
188 
189 	assert(bytes > 0 && bytes <= VND_BUF_SIZE);
190 
191 	iov_off = *iov_offp;
192 
193 	while (bytes > 0) {
194 		if (bytes >= iov->iov_size - iov_off) {
195 			bytes -= iov->iov_size - iov_off;
196 			iov++;
197 			iov_off = 0;
198 		} else {
199 			iov_off += bytes;
200 			bytes = 0;
201 		}
202 	}
203 
204 	*iov_offp = iov_off;
205 	return iov;
206 }
207 
208 /*
209  * Perform data transfer on the selected device.
210  */
211 static int
212 vnd_transfer(devminor_t minor, int do_write, u64_t position,
213 	endpoint_t endpt, iovec_t *iovt, unsigned int nr_req, int flags)
214 {
215 	struct device *dv;
216 	iovec_s_t *iov;
217 	size_t off, chunk, bytes, iov_off;
218 	ssize_t r;
219 	unsigned int i;
220 
221 	iov = (iovec_s_t *) iovt;
222 
223 	if (state.fd == -1 || (dv = vnd_part(minor)) == NULL)
224 		return ENXIO;
225 
226 	/* Prevent write operations on devices opened as write-only. */
227 	if (do_write && state.rdonly)
228 		return EACCES;
229 
230 	/* Determine the total number of bytes to transfer. */
231 	if (position >= dv->dv_size)
232 		return 0;
233 
234 	bytes = 0;
235 
236 	for (i = 0; i < nr_req; i++) {
237 		if (iov[i].iov_size == 0 || iov[i].iov_size > LONG_MAX)
238 			return EINVAL;
239 		bytes += iov[i].iov_size;
240 		if (bytes > LONG_MAX)
241 			return EINVAL;
242 	}
243 
244 	if (bytes > dv->dv_size - position)
245 		bytes = dv->dv_size - position;
246 
247 	position += dv->dv_base;
248 
249 	/* Perform the actual transfer, in chunks if necessary. */
250 	iov_off = 0;
251 
252 	for (off = 0; off < bytes; off += chunk) {
253 		chunk = MIN(bytes - off, VND_BUF_SIZE);
254 
255 		assert((unsigned int) (iov - (iovec_s_t *) iovt) < nr_req);
256 
257 		/* For reads, read in the data for the chunk; possibly less. */
258 		if (!do_write) {
259 			chunk = r = pread(state.fd, state.buf, chunk,
260 			    position);
261 
262 			if (r < 0) {
263 				printf("VND%u: pread failed (%d)\n", instance,
264 				    -errno);
265 				return -errno;
266 			}
267 			if (r == 0)
268 				break;
269 		}
270 
271 		/* Copy the data for this chunk from or to the caller. */
272 		if ((r = vnd_copy(iov, iov_off, chunk, endpt, do_write)) < 0) {
273 			printf("VND%u: data copy failed (%d)\n", instance, r);
274 			return r;
275 		}
276 
277 		/* For writes, write the data to the file; possibly less. */
278 		if (do_write) {
279 			chunk = r = pwrite(state.fd, state.buf, chunk,
280 			    position);
281 
282 			if (r <= 0) {
283 				if (r < 0)
284 					r = -errno;
285 				printf("VND%u: pwrite failed (%d)\n", instance,
286 				    r);
287 				return (r < 0) ? r : EIO;
288 			}
289 		}
290 
291 		/* Move ahead on the I/O vector and the file position. */
292 		iov = vnd_advance(iov, &iov_off, chunk);
293 
294 		position += chunk;
295 	}
296 
297 	/* If force-write is requested, flush the underlying file to disk. */
298 	if (do_write && (flags & BDEV_FORCEWRITE))
299 		fsync(state.fd);
300 
301 	/* Return the number of bytes transferred. */
302 	return off;
303 }
304 
305 /*
306  * Initialize the size and geometry for the device and any partitions.  If the
307  * user provided a geometry, this will be used; otherwise, a geometry will be
308  * computed.
309  */
310 static int
311 vnd_layout(u64_t size, struct vnd_ioctl *vnd)
312 {
313 	u64_t sectors;
314 
315 	state.geom.base = 0ULL;
316 
317 	if (vnd->vnd_flags & VNDIOF_HASGEOM) {
318 		/*
319 		 * The geometry determines the accessible part of the file.
320 		 * The resulting size must not exceed the file size.
321 		 */
322 		state.geom.cylinders = vnd->vnd_geom.vng_ncylinders;
323 		state.geom.heads = vnd->vnd_geom.vng_ntracks;
324 		state.geom.sectors = vnd->vnd_geom.vng_nsectors;
325 
326 		state.geom.size = (u64_t) state.geom.cylinders *
327 		    state.geom.heads * state.geom.sectors *
328 		    vnd->vnd_geom.vng_secsize;
329 		if (state.geom.size == 0 || state.geom.size > size)
330 			return EINVAL;
331 	} else {
332 		sectors = size / SECTOR_SIZE;
333 		state.geom.size = sectors * SECTOR_SIZE;
334 
335 		if (sectors >= 32 * 64) {
336 			state.geom.cylinders = sectors / (32 * 64);
337 			state.geom.heads = 64;
338 			state.geom.sectors = 32;
339 		} else {
340 			state.geom.cylinders = sectors;
341 			state.geom.heads = 1;
342 			state.geom.sectors = 1;
343 		}
344 	}
345 
346 	/*
347 	 * Parse partition tables immediately, so that (sub)partitions can be
348 	 * opened right away.  The first open will perform the same procedure,
349 	 * but that is only necessary to match userland expectations.
350 	 */
351 	vnd_partition();
352 
353 	return OK;
354 }
355 
356 /*
357  * Process I/O control requests.
358  */
359 static int
360 vnd_ioctl(devminor_t UNUSED(minor), unsigned long request, endpoint_t endpt,
361 	cp_grant_id_t grant, endpoint_t user_endpt)
362 {
363 	struct vnd_ioctl vnd;
364 	struct vnd_user vnu;
365 	struct stat st;
366 	int r;
367 
368 	switch (request) {
369 	case VNDIOCSET:
370 		/*
371 		 * The VND must not be busy.  Note that the caller has the
372 		 * device open to perform the IOCTL request.
373 		 */
374 		if (state.fd != -1 || state.openct != 1)
375 			return EBUSY;
376 
377 		if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &vnd,
378 		    sizeof(vnd))) != OK)
379 			return r;
380 
381 		/*
382 		 * Issue a special VFS backcall that copies a file descriptor
383 		 * to the current process, from the user process ultimately
384 		 * making the IOCTL call.  The result is either a newly
385 		 * allocated file descriptor or an error.
386 		 */
387 		if ((r = copyfd(user_endpt, vnd.vnd_fildes, COPYFD_FROM)) < 0)
388 			return r;
389 
390 		state.fd = r;
391 
392 		/* The target file must be regular. */
393 		if (fstat(state.fd, &st) == -1) {
394 			printf("VND%u: fstat failed (%d)\n", instance, -errno);
395 			r = -errno;
396 		}
397 		if (r == OK && !S_ISREG(st.st_mode))
398 			r = EINVAL;
399 
400 		/*
401 		 * Allocate memory for an intermediate I/O transfer buffer. In
402 		 * order to save on memory in the common case, the buffer is
403 		 * only allocated when the vnd is in use.  We use mmap instead
404 		 * of malloc to allow the memory to be actually freed later.
405 		 */
406 		if (r == OK) {
407 			state.buf = mmap(NULL, VND_BUF_SIZE, PROT_READ |
408 			    PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
409 			if (state.buf == MAP_FAILED)
410 				r = ENOMEM;
411 		}
412 
413 		if (r != OK) {
414 			close(state.fd);
415 			state.fd = -1;
416 			return r;
417 		}
418 
419 		/* Set various device state fields. */
420 		state.dev = st.st_dev;
421 		state.ino = st.st_ino;
422 		state.rdonly = !!(vnd.vnd_flags & VNDIOF_READONLY);
423 
424 		r = vnd_layout(st.st_size, &vnd);
425 
426 		/* Upon success, return the device size to userland. */
427 		if (r == OK) {
428 			vnd.vnd_size = state.geom.size;
429 
430 			r = sys_safecopyto(endpt, grant, 0, (vir_bytes) &vnd,
431 			    sizeof(vnd));
432 		}
433 
434 		if (r != OK) {
435 			munmap(state.buf, VND_BUF_SIZE);
436 			close(state.fd);
437 			state.fd = -1;
438 		}
439 
440 		return r;
441 
442 	case VNDIOCCLR:
443 		/* The VND can only be cleared if it has been configured. */
444 		if (state.fd == -1)
445 			return ENXIO;
446 
447 		if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &vnd,
448 		    sizeof(vnd))) != OK)
449 			return r;
450 
451 		/* The caller has the device open to do the IOCTL request. */
452 		if (!(vnd.vnd_flags & VNDIOF_FORCE) && state.openct != 1)
453 			return EBUSY;
454 
455 		/*
456 		 * Close the associated file descriptor immediately, but do not
457 		 * allow reuse until the device has been closed by the other
458 		 * users.
459 		 */
460 		munmap(state.buf, VND_BUF_SIZE);
461 		close(state.fd);
462 		state.fd = -1;
463 
464 		return OK;
465 
466 	case VNDIOCGET:
467 		/*
468 		 * We need not copy in the given structure.  It would contain
469 		 * the requested unit number, but each driver instance provides
470 		 * only one unit anyway.
471 		 */
472 
473 		memset(&vnu, 0, sizeof(vnu));
474 
475 		vnu.vnu_unit = instance;
476 
477 		/* Leave these fields zeroed if the device is not in use. */
478 		if (state.fd != -1) {
479 			vnu.vnu_dev = state.dev;
480 			vnu.vnu_ino = state.ino;
481 		}
482 
483 		return sys_safecopyto(endpt, grant, 0, (vir_bytes) &vnu,
484 		    sizeof(vnu));
485 
486 	case DIOCOPENCT:
487 		return sys_safecopyto(endpt, grant, 0,
488 		    (vir_bytes) &state.openct, sizeof(state.openct));
489 
490 	case DIOCFLUSH:
491 		if (state.fd == -1)
492 			return ENXIO;
493 
494 		fsync(state.fd);
495 
496 		return OK;
497 	}
498 
499 	return ENOTTY;
500 }
501 
502 /*
503  * Return a pointer to the partition structure for the given minor device.
504  */
505 static struct device *
506 vnd_part(devminor_t minor)
507 {
508 	if (minor >= 0 && minor < DEV_PER_DRIVE)
509 		return &state.part[minor];
510 	else if ((unsigned int) (minor -= MINOR_d0p0s0) < SUB_PER_DRIVE)
511 		return &state.subpart[minor];
512 	else
513 		return NULL;
514 }
515 
516 /*
517  * Return geometry information.
518  */
519 static void
520 vnd_geometry(devminor_t UNUSED(minor), struct part_geom *part)
521 {
522 	part->cylinders = state.geom.cylinders;
523 	part->heads = state.geom.heads;
524 	part->sectors = state.geom.sectors;
525 }
526 
527 /*
528  * Initialize the device.
529  */
530 static int
531 vnd_init(int UNUSED(type), sef_init_info_t *UNUSED(info))
532 {
533 	long v;
534 
535 	/*
536 	 * No support for crash recovery.  The driver would have no way to
537 	 * reacquire the file descriptor for the target file.
538 	 */
539 
540 	/*
541 	 * The instance number is used for two purposes: reporting errors, and
542 	 * returning the proper unit number to userland in VNDIOCGET calls.
543 	 */
544 	v = 0;
545 	(void) env_parse("instance", "d", 0, &v, 0, 255);
546 	instance = (unsigned int) v;
547 
548 	state.openct = 0;
549 	state.exiting = FALSE;
550 	state.fd = -1;
551 
552 	return OK;
553 }
554 
555 /*
556  * Process an incoming signal.
557  */
558 static void
559 vnd_signal(int signo)
560 {
561 
562 	/* In case of a termination signal, initiate driver shutdown. */
563 	if (signo != SIGTERM)
564 		return;
565 
566 	state.exiting = TRUE;
567 
568 	/* Keep running until the device has been fully closed. */
569 	if (state.openct == 0)
570 		blockdriver_terminate();
571 }
572 
573 /*
574  * Set callbacks and initialize the System Event Framework (SEF).
575  */
576 static void
577 vnd_startup(void)
578 {
579 
580 	/* Register init and signal callbacks. */
581 	sef_setcb_init_fresh(vnd_init);
582 	sef_setcb_signal_handler(vnd_signal);
583 
584 	/* Let SEF perform startup. */
585 	sef_startup();
586 }
587 
588 /*
589  * Driver task.
590  */
591 int
592 main(int argc, char **argv)
593 {
594 
595 	/* Initialize the driver. */
596 	env_setargs(argc, argv);
597 	vnd_startup();
598 
599 	/* Process requests until shutdown. */
600 	blockdriver_task(&vnd_dtab);
601 
602 	return 0;
603 }
604