xref: /dragonfly/sys/kern/subr_diskiocom.c (revision fbc9049b)
1 /*
2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/proc.h>
38 #include <sys/sysctl.h>
39 #include <sys/buf.h>
40 #include <sys/conf.h>
41 #include <sys/disklabel.h>
42 #include <sys/disklabel32.h>
43 #include <sys/disklabel64.h>
44 #include <sys/diskslice.h>
45 #include <sys/diskmbr.h>
46 #include <sys/disk.h>
47 #include <sys/malloc.h>
48 #include <sys/device.h>
49 #include <sys/devfs.h>
50 #include <sys/thread.h>
51 #include <sys/queue.h>
52 #include <sys/lock.h>
53 #include <sys/stat.h>
54 #include <sys/uuid.h>
55 #include <sys/dmsg.h>
56 
57 #include <sys/buf2.h>
58 #include <sys/mplock2.h>
59 #include <sys/msgport2.h>
60 #include <sys/thread2.h>
61 
62 struct dios_open {
63 	int	openrd;
64 	int	openwr;
65 };
66 
67 struct dios_io {
68 	int	count;
69 	int	eof;
70 	kdmsg_data_t data;
71 };
72 
73 static MALLOC_DEFINE(M_DMSG_DISK, "dmsg_disk", "disk dmsg");
74 
75 static int blk_active;
76 SYSCTL_INT(_debug, OID_AUTO, blk_active, CTLFLAG_RW, &blk_active, 0,
77            "Number of active iocom IOs");
78 
79 static int disk_iocom_reconnect(struct disk *dp, struct file *fp);
80 static int disk_rcvdmsg(kdmsg_msg_t *msg);
81 
82 static void disk_blk_open(struct disk *dp, kdmsg_msg_t *msg);
83 static void disk_blk_read(struct disk *dp, kdmsg_msg_t *msg);
84 static void disk_blk_write(struct disk *dp, kdmsg_msg_t *msg);
85 static void disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg);
86 static void disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg);
87 static void diskiodone(struct bio *bio);
88 
89 void
90 disk_iocom_init(struct disk *dp)
91 {
92 	kdmsg_iocom_init(&dp->d_iocom, dp,
93 			 KDMSG_IOCOMF_AUTOCONN |
94 			 KDMSG_IOCOMF_AUTORXSPAN |
95 			 KDMSG_IOCOMF_AUTOTXSPAN,
96 			 M_DMSG_DISK, disk_rcvdmsg);
97 }
98 
99 void
100 disk_iocom_update(struct disk *dp)
101 {
102 }
103 
104 void
105 disk_iocom_uninit(struct disk *dp)
106 {
107 	kdmsg_iocom_uninit(&dp->d_iocom);
108 }
109 
110 int
111 disk_iocom_ioctl(struct disk *dp, int cmd, void *data)
112 {
113 	struct file *fp;
114 	struct disk_ioc_recluster *recl;
115 	int error;
116 
117 	switch(cmd) {
118 	case DIOCRECLUSTER:
119 		recl = data;
120 		fp = holdfp(curproc->p_fd, recl->fd, -1);
121 		if (fp) {
122 			error = disk_iocom_reconnect(dp, fp);
123 		} else {
124 			error = EINVAL;
125 		}
126 		break;
127 	default:
128 		error = EOPNOTSUPP;
129 		break;
130 	}
131 	return error;
132 }
133 
134 static
135 int
136 disk_iocom_reconnect(struct disk *dp, struct file *fp)
137 {
138 	char devname[64];
139 
140 	ksnprintf(devname, sizeof(devname), "%s%d",
141 		  dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev));
142 
143 	kdmsg_iocom_reconnect(&dp->d_iocom, fp, devname);
144 
145 	dp->d_iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
146 	dp->d_iocom.auto_lnk_conn.peer_type = DMSG_PEER_BLOCK;
147 	dp->d_iocom.auto_lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK;
148 	dp->d_iocom.auto_lnk_conn.peer_mask = (uint64_t)-1;
149 #if 0
150 	if (dp->d_info.d_serialno) {
151 		ksnprintf(dp->d_iocom.auto_lnk_conn.peer_label,
152 			  sizeof(dp->d_iocom.auto_lnk_conn.peer_label),
153 			  "%s/%s", hostname, dp->d_info.d_serialno);
154 	} else {
155 		ksnprintf(dp->d_iocom.auto_lnk_conn.peer_label,
156 			  sizeof(dp->d_iocom.auto_lnk_conn.peer_label),
157 			  "%s/%s", hostname, devname);
158 	}
159 #endif
160 	ksnprintf(dp->d_iocom.auto_lnk_conn.peer_label,
161 		  sizeof(dp->d_iocom.auto_lnk_conn.peer_label),
162 		  "%s/%s", hostname, devname);
163 
164 	dp->d_iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1;
165 	dp->d_iocom.auto_lnk_span.peer_type = DMSG_PEER_BLOCK;
166 	dp->d_iocom.auto_lnk_span.media.block.bytes =
167 						dp->d_info.d_media_size;
168 	dp->d_iocom.auto_lnk_span.media.block.blksize =
169 						dp->d_info.d_media_blksize;
170 	ksnprintf(dp->d_iocom.auto_lnk_span.peer_label,
171 		  sizeof(dp->d_iocom.auto_lnk_span.peer_label),
172 		  "%s", dp->d_iocom.auto_lnk_conn.peer_label);
173 	if (dp->d_info.d_serialno) {
174 		ksnprintf(dp->d_iocom.auto_lnk_span.pfs_label,
175 			  sizeof(dp->d_iocom.auto_lnk_span.pfs_label),
176 			  "%s", dp->d_info.d_serialno);
177 	} else {
178 		/*
179 		 * If no serial number is available generate a dummy serial
180 		 * number from the host and device name and pray.  This will
181 		 * allow e.g. /dev/vn* to look meaningful on a remote machine.
182 		 */
183 		ksnprintf(dp->d_iocom.auto_lnk_span.pfs_label,
184 			  sizeof(dp->d_iocom.auto_lnk_span.pfs_label),
185 			  "%s.%s", hostname, devname);
186 	}
187 
188 	kdmsg_iocom_autoinitiate(&dp->d_iocom, NULL);
189 
190 	return (0);
191 }
192 
193 static int
194 disk_rcvdmsg(kdmsg_msg_t *msg)
195 {
196 	struct disk *dp = msg->state->iocom->handle;
197 
198 	/*
199 	 * Handle debug messages (these might not be in transactions)
200 	 */
201 	switch(msg->any.head.cmd & DMSGF_CMDSWMASK) {
202 	case DMSG_DBG_SHELL:
203 		/*
204 		 * Execute shell command (not supported atm)
205 		 */
206 		kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
207 		return(0);
208 	case DMSG_DBG_SHELL | DMSGF_REPLY:
209 		if (msg->aux_data) {
210 			msg->aux_data[msg->aux_size - 1] = 0;
211 			kprintf("diskiocom: DEBUGMSG: %s\n", msg->aux_data);
212 		}
213 		return(0);
214 	}
215 
216 	/*
217 	 * All remaining messages must be in a transaction.
218 	 *
219 	 * NOTE!  We currently don't care if the transaction is just
220 	 *	  the span transaction (for disk probes) or if it is the
221 	 *	  BLK_OPEN transaction.
222 	 *
223 	 * NOTE!  We are switching on the first message's command.  The
224 	 *	  actual message command within the transaction may be
225 	 *	  different (if streaming within a transaction).
226 	 */
227 	if (msg->state == &msg->state->iocom->state0) {
228 		kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
229 		return(0);
230 	}
231 
232 	switch(msg->state->rxcmd & DMSGF_CMDSWMASK) {
233 	case DMSG_BLK_OPEN:
234 		disk_blk_open(dp, msg);
235 		break;
236 	case DMSG_BLK_READ:
237 		/*
238 		 * not reached normally but leave in for completeness
239 		 */
240 		disk_blk_read(dp, msg);
241 		break;
242 	case DMSG_BLK_WRITE:
243 		disk_blk_write(dp, msg);
244 		break;
245 	case DMSG_BLK_FLUSH:
246 		disk_blk_flush(dp, msg);
247 		break;
248 	case DMSG_BLK_FREEBLKS:
249 		disk_blk_freeblks(dp, msg);
250 		break;
251 	default:
252 		if ((msg->any.head.cmd & DMSGF_REPLY) == 0) {
253 			if (msg->any.head.cmd & DMSGF_DELETE)
254 				kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
255 			else
256 				kdmsg_msg_result(msg, DMSG_ERR_NOSUPP);
257 		}
258 		break;
259 	}
260 	return (0);
261 }
262 
263 static
264 void
265 disk_blk_open(struct disk *dp, kdmsg_msg_t *msg)
266 {
267 	struct dios_open *openst;
268 	int error = DMSG_ERR_NOSUPP;
269 	int fflags;
270 
271 	openst = msg->state->any.any;
272 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_OPEN) {
273 		if (openst == NULL) {
274 			openst = kmalloc(sizeof(*openst), M_DEVBUF,
275 						M_WAITOK | M_ZERO);
276 			msg->state->any.any = openst;
277 		}
278 		fflags = 0;
279 		if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
280 			fflags = FREAD;
281 		if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
282 			fflags |= FWRITE;
283 		error = dev_dopen(dp->d_rawdev, fflags, S_IFCHR, proc0.p_ucred, NULL);
284 		if (error) {
285 			error = DMSG_ERR_IO;
286 		} else {
287 			if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
288 				++openst->openrd;
289 			if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
290 				++openst->openwr;
291 		}
292 	}
293 #if 0
294 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_CLOSE &&
295 	    openst) {
296 		fflags = 0;
297 		if ((msg->any.blk_open.modes & DMSG_BLKOPEN_RD) &&
298 		    openst->openrd) {
299 			fflags = FREAD;
300 		}
301 		if ((msg->any.blk_open.modes & DMSG_BLKOPEN_WR) &&
302 		    openst->openwr) {
303 			fflags |= FWRITE;
304 		}
305 		error = dev_dclose(dp->d_rawdev, fflags, S_IFCHR, NULL);
306 		if (error) {
307 			error = DMSG_ERR_IO;
308 		} else {
309 			if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
310 				--openst->openrd;
311 			if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
312 				--openst->openwr;
313 		}
314 	}
315 #endif
316 	if (msg->any.head.cmd & DMSGF_DELETE) {
317 		if (openst) {
318 			while (openst->openrd && openst->openwr) {
319 				--openst->openrd;
320 				--openst->openwr;
321 				dev_dclose(dp->d_rawdev, FREAD|FWRITE, S_IFCHR, NULL);
322 			}
323 			while (openst->openrd) {
324 				--openst->openrd;
325 				dev_dclose(dp->d_rawdev, FREAD, S_IFCHR, NULL);
326 			}
327 			while (openst->openwr) {
328 				--openst->openwr;
329 				dev_dclose(dp->d_rawdev, FWRITE, S_IFCHR, NULL);
330 			}
331 			kfree(openst, M_DEVBUF);
332 			msg->state->any.any = NULL;
333 		}
334 		kdmsg_msg_reply(msg, error);
335 	} else {
336 		kdmsg_msg_result(msg, error);
337 	}
338 }
339 
340 static
341 void
342 disk_blk_read(struct disk *dp, kdmsg_msg_t *msg)
343 {
344 	struct dios_io *iost;
345 	struct buf *bp;
346 	struct bio *bio;
347 	int error = DMSG_ERR_NOSUPP;
348 	int reterr = 1;
349 
350 	/*
351 	 * Only DMSG_BLK_READ commands imply read ops.
352 	 */
353 	iost = msg->state->any.any;
354 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_READ) {
355 		if (msg->any.blk_read.bytes < DEV_BSIZE ||
356 		    msg->any.blk_read.bytes > MAXPHYS) {
357 			error = DMSG_ERR_PARAM;
358 			goto done;
359 		}
360 		if (iost == NULL) {
361 			iost = kmalloc(sizeof(*iost), M_DEVBUF,
362 				       M_WAITOK | M_ZERO);
363 			msg->state->any.any = iost;
364 		}
365 		reterr = 0;
366 		bp = geteblk(msg->any.blk_read.bytes);
367 		bio = &bp->b_bio1;
368 		bp->b_cmd = BUF_CMD_READ;
369 		bp->b_bcount = msg->any.blk_read.bytes;
370 		bp->b_resid = bp->b_bcount;
371 		bio->bio_offset = msg->any.blk_read.offset;
372 		bio->bio_caller_info1.ptr = msg->state;
373 		bio->bio_done = diskiodone;
374 
375 		/* kdmsg_state_hold(msg->state); */
376 		atomic_add_int(&blk_active, 1);
377 		atomic_add_int(&iost->count, 1);
378 		if (msg->any.head.cmd & DMSGF_DELETE)
379 			iost->eof = 1;
380 		BUF_KERNPROC(bp);
381 		dev_dstrategy(dp->d_rawdev, bio);
382 	}
383 done:
384 	if (reterr) {
385 		if (msg->any.head.cmd & DMSGF_DELETE) {
386 			if (iost && iost->count == 0) {
387 				kfree(iost, M_DEVBUF);
388 				msg->state->any.any = NULL;
389 			}
390 			kdmsg_msg_reply(msg, error);
391 		} else {
392 			kdmsg_msg_result(msg, error);
393 		}
394 	}
395 }
396 
397 static
398 void
399 disk_blk_write(struct disk *dp, kdmsg_msg_t *msg)
400 {
401 	struct dios_io *iost;
402 	struct buf *bp;
403 	struct bio *bio;
404 	int error = DMSG_ERR_NOSUPP;
405 	int reterr = 1;
406 
407 	/*
408 	 * Only DMSG_BLK_WRITE commands imply read ops.
409 	 */
410 	iost = msg->state->any.any;
411 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_WRITE) {
412 		if (msg->any.blk_write.bytes < DEV_BSIZE ||
413 		    msg->any.blk_write.bytes > MAXPHYS) {
414 			error = DMSG_ERR_PARAM;
415 			goto done;
416 		}
417 		if (iost == NULL) {
418 			iost = kmalloc(sizeof(*iost), M_DEVBUF,
419 				       M_WAITOK | M_ZERO);
420 			msg->state->any.any = iost;
421 		}
422 
423 		/*
424 		 * Issue WRITE.  Short data implies zeros.  Try to optimize
425 		 * the buffer cache buffer for the case where we can just
426 		 * use the message's data pointer.
427 		 */
428 		reterr = 0;
429 		if (msg->aux_size >= msg->any.blk_write.bytes)
430 			bp = getpbuf(NULL);
431 		else
432 			bp = geteblk(msg->any.blk_write.bytes);
433 		bio = &bp->b_bio1;
434 		bp->b_cmd = BUF_CMD_WRITE;
435 		bp->b_bcount = msg->any.blk_write.bytes;
436 		bp->b_resid = bp->b_bcount;
437 		if (msg->aux_size >= msg->any.blk_write.bytes) {
438 			bp->b_data = msg->aux_data;
439 			kdmsg_detach_aux_data(msg, &iost->data);
440 		} else {
441 			bcopy(msg->aux_data, bp->b_data, msg->aux_size);
442 			bzero(bp->b_data + msg->aux_size,
443 			      msg->any.blk_write.bytes - msg->aux_size);
444 			bzero(&iost->data, sizeof(iost->data));
445 		}
446 		bio->bio_offset = msg->any.blk_write.offset;
447 		bio->bio_caller_info1.ptr = msg->state;
448 		bio->bio_done = diskiodone;
449 
450 		/* kdmsg_state_hold(msg->state); */
451 		atomic_add_int(&blk_active, 1);
452 		atomic_add_int(&iost->count, 1);
453 		if (msg->any.head.cmd & DMSGF_DELETE)
454 			iost->eof = 1;
455 		BUF_KERNPROC(bp);
456 		dev_dstrategy(dp->d_rawdev, bio);
457 	}
458 done:
459 	if (reterr) {
460 		if (msg->any.head.cmd & DMSGF_DELETE) {
461 			if (iost && iost->count == 0) {
462 				kfree(iost, M_DEVBUF);
463 				msg->state->any.any = NULL;
464 			}
465 			kdmsg_msg_reply(msg, error);
466 		} else {
467 			kdmsg_msg_result(msg, error);
468 		}
469 	}
470 }
471 
472 static
473 void
474 disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg)
475 {
476 	struct dios_io *iost;
477 	struct buf *bp;
478 	struct bio *bio;
479 	int error = DMSG_ERR_NOSUPP;
480 	int reterr = 1;
481 
482 	/*
483 	 * Only DMSG_BLK_FLUSH commands imply read ops.
484 	 */
485 	iost = msg->state->any.any;
486 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FLUSH) {
487 		if (iost == NULL) {
488 			iost = kmalloc(sizeof(*iost), M_DEVBUF,
489 				       M_WAITOK | M_ZERO);
490 			msg->state->any.any = iost;
491 		}
492 		reterr = 0;
493 		bp = getpbuf(NULL);
494 		bio = &bp->b_bio1;
495 		bp->b_cmd = BUF_CMD_FLUSH;
496 		bp->b_bcount = msg->any.blk_flush.bytes;
497 		bp->b_resid = 0;
498 		bio->bio_offset = msg->any.blk_flush.offset;
499 		bio->bio_caller_info1.ptr = msg->state;
500 		bio->bio_done = diskiodone;
501 
502 		/* kdmsg_state_hold(msg->state); */
503 		atomic_add_int(&blk_active, 1);
504 		atomic_add_int(&iost->count, 1);
505 		if (msg->any.head.cmd & DMSGF_DELETE)
506 			iost->eof = 1;
507 		BUF_KERNPROC(bp);
508 		dev_dstrategy(dp->d_rawdev, bio);
509 	}
510 	if (reterr) {
511 		if (msg->any.head.cmd & DMSGF_DELETE) {
512 			if (iost && iost->count == 0) {
513 				kfree(iost, M_DEVBUF);
514 				msg->state->any.any = NULL;
515 			}
516 			kdmsg_msg_reply(msg, error);
517 		} else {
518 			kdmsg_msg_result(msg, error);
519 		}
520 	}
521 }
522 
523 static
524 void
525 disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg)
526 {
527 	struct dios_io *iost;
528 	struct buf *bp;
529 	struct bio *bio;
530 	int error = DMSG_ERR_NOSUPP;
531 	int reterr = 1;
532 
533 	/*
534 	 * Only DMSG_BLK_FREEBLKS commands imply read ops.
535 	 */
536 	iost = msg->state->any.any;
537 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FREEBLKS) {
538 		if (iost == NULL) {
539 			iost = kmalloc(sizeof(*iost), M_DEVBUF,
540 				       M_WAITOK | M_ZERO);
541 			msg->state->any.any = iost;
542 		}
543 		reterr = 0;
544 		bp = getpbuf(NULL);
545 		bio = &bp->b_bio1;
546 		bp->b_cmd = BUF_CMD_FREEBLKS;
547 		bp->b_bcount = msg->any.blk_freeblks.bytes;
548 		bp->b_resid = 0;
549 		bio->bio_offset = msg->any.blk_freeblks.offset;
550 		bio->bio_caller_info1.ptr = msg->state;
551 		bio->bio_done = diskiodone;
552 
553 		/* kdmsg_state_hold(msg->state); */
554 		atomic_add_int(&blk_active, 1);
555 		atomic_add_int(&iost->count, 1);
556 		if (msg->any.head.cmd & DMSGF_DELETE)
557 			iost->eof = 1;
558 		BUF_KERNPROC(bp);
559 		dev_dstrategy(dp->d_rawdev, bio);
560 	}
561 	if (reterr) {
562 		if (msg->any.head.cmd & DMSGF_DELETE) {
563 			if (iost && iost->count == 0) {
564 				kfree(iost, M_DEVBUF);
565 				msg->state->any.any = NULL;
566 			}
567 			kdmsg_msg_reply(msg, error);
568 		} else {
569 			kdmsg_msg_result(msg, error);
570 		}
571 	}
572 }
573 
574 static
575 void
576 diskiodone(struct bio *bio)
577 {
578 	struct buf *bp = bio->bio_buf;
579 	kdmsg_state_t *state = bio->bio_caller_info1.ptr;
580 	kdmsg_msg_t *rmsg;
581 	struct dios_io *iost = state->any.any;
582 	int error;
583 	int resid = 0;
584 	int bytes;
585 	uint32_t cmd;
586 	void *data;
587 
588 	cmd = DMSG_LNK_ERROR;
589 	data = NULL;
590 	bytes = 0;
591 
592 	switch(bp->b_cmd) {
593 	case BUF_CMD_READ:
594 		cmd = DMSG_LNK_ERROR;
595 		data = bp->b_data;
596 		bytes = bp->b_bcount;
597 		/* fall through */
598 	case BUF_CMD_WRITE:
599 		if (bp->b_flags & B_ERROR) {
600 			error = bp->b_error;
601 		} else {
602 			error = 0;
603 			resid = bp->b_resid;
604 		}
605 		kdmsg_free_aux_data(&iost->data);
606 		break;
607 	case BUF_CMD_FLUSH:
608 	case BUF_CMD_FREEBLKS:
609 		if (bp->b_flags & B_ERROR)
610 			error = bp->b_error;
611 		else
612 			error = 0;
613 		break;
614 	default:
615 		panic("diskiodone: Unknown bio cmd = %d\n",
616 		      bio->bio_buf->b_cmd);
617 		error = 0;	/* avoid compiler warning */
618 		break;		/* NOT REACHED */
619 	}
620 
621 	/*
622 	 * Convert error to DMSG_ERR_* code.
623 	 */
624 	if (error)
625 		error = DMSG_ERR_IO;
626 
627 	/*
628 	 * Convert LNK_ERROR or BLK_ERROR if non-zero resid.  READS will
629 	 * have already converted cmd to BLK_ERROR and set up data to return.
630 	 */
631 	if (resid && cmd == DMSG_LNK_ERROR)
632 		cmd = DMSG_BLK_ERROR;
633 	/* XXX txcmd is delayed so this won't work for streaming */
634 	if ((state->txcmd & DMSGF_CREATE) == 0)	/* assume serialized */
635 		cmd |= DMSGF_CREATE;
636 	if (iost->eof) {
637 		if (atomic_fetchadd_int(&iost->count, -1) == 1)
638 			cmd |= DMSGF_DELETE;
639 	} else {
640 		atomic_add_int(&iost->count, -1);
641 	}
642 	atomic_add_int(&blk_active, -1);
643 	cmd |= DMSGF_REPLY;
644 
645 	/*
646 	 * Allocate a basic or extended reply.  Be careful not to populate
647 	 * extended header fields unless we allocated an extended reply.
648 	 */
649 	rmsg = kdmsg_msg_alloc(state, cmd, NULL, 0);
650 	if (data) {
651 		rmsg->aux_data = kmalloc(bytes, state->iocom->mmsg, M_INTWAIT);
652 		rmsg->aux_size = bytes;
653 		rmsg->flags |= KDMSG_FLAG_AUXALLOC;
654 		bcopy(data, rmsg->aux_data, bytes);
655 	}
656 	rmsg->any.blk_error.head.error = error;
657 	if ((cmd & DMSGF_BASECMDMASK) == DMSG_BLK_ERROR)
658 		rmsg->any.blk_error.resid = resid;
659 	bio->bio_caller_info1.ptr = NULL;
660 	/* kdmsg_state_drop(state); */
661 	kdmsg_msg_write(rmsg);
662 	if (bp->b_flags & B_PAGING) {
663 		relpbuf(bio->bio_buf, NULL);
664 	} else {
665 		bp->b_flags |= B_INVAL | B_AGE;
666 		brelse(bp);
667 	}
668 }
669