xref: /dragonfly/sys/kern/subr_diskiocom.c (revision cb740add)
1 /*
2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/proc.h>
38 #include <sys/sysctl.h>
39 #include <sys/buf.h>
40 #include <sys/conf.h>
41 #include <sys/disklabel.h>
42 #include <sys/disklabel32.h>
43 #include <sys/disklabel64.h>
44 #include <sys/diskslice.h>
45 #include <sys/diskmbr.h>
46 #include <sys/disk.h>
47 #include <sys/malloc.h>
48 #include <sys/device.h>
49 #include <sys/devfs.h>
50 #include <sys/thread.h>
51 #include <sys/queue.h>
52 #include <sys/lock.h>
53 #include <sys/stat.h>
54 #include <sys/uuid.h>
55 #include <sys/dmsg.h>
56 
57 #include <sys/buf2.h>
58 #include <sys/msgport2.h>
59 #include <sys/thread2.h>
60 
61 struct dios_open {
62 	int	openrd;
63 	int	openwr;
64 };
65 
66 struct dios_io {
67 	int	count;
68 	int	eof;
69 	kdmsg_data_t data;
70 };
71 
72 static MALLOC_DEFINE(M_DMSG_DISK, "dmsg_disk", "disk dmsg");
73 
74 static int blk_active;
75 SYSCTL_INT(_debug, OID_AUTO, blk_active, CTLFLAG_RW, &blk_active, 0,
76            "Number of active iocom IOs");
77 
78 static int disk_iocom_reconnect(struct disk *dp, struct file *fp);
79 static int disk_rcvdmsg(kdmsg_msg_t *msg);
80 
81 static void disk_blk_open(struct disk *dp, kdmsg_msg_t *msg);
82 static void disk_blk_read(struct disk *dp, kdmsg_msg_t *msg);
83 static void disk_blk_write(struct disk *dp, kdmsg_msg_t *msg);
84 static void disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg);
85 static void disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg);
86 static void diskiodone(struct bio *bio);
87 
88 void
89 disk_iocom_init(struct disk *dp)
90 {
91 	kdmsg_iocom_init(&dp->d_iocom, dp,
92 			 KDMSG_IOCOMF_AUTOCONN |
93 			 KDMSG_IOCOMF_AUTORXSPAN |
94 			 KDMSG_IOCOMF_AUTOTXSPAN,
95 			 M_DMSG_DISK, disk_rcvdmsg);
96 }
97 
98 void
99 disk_iocom_update(struct disk *dp)
100 {
101 }
102 
103 void
104 disk_iocom_uninit(struct disk *dp)
105 {
106 	kdmsg_iocom_uninit(&dp->d_iocom);
107 }
108 
109 int
110 disk_iocom_ioctl(struct disk *dp, int cmd, void *data)
111 {
112 	struct file *fp;
113 	struct disk_ioc_recluster *recl;
114 	int error;
115 
116 	switch(cmd) {
117 	case DIOCRECLUSTER:
118 		recl = data;
119 		fp = holdfp(curproc->p_fd, recl->fd, -1);
120 		if (fp) {
121 			error = disk_iocom_reconnect(dp, fp);
122 		} else {
123 			error = EINVAL;
124 		}
125 		break;
126 	default:
127 		error = EOPNOTSUPP;
128 		break;
129 	}
130 	return error;
131 }
132 
133 static
134 int
135 disk_iocom_reconnect(struct disk *dp, struct file *fp)
136 {
137 	char devname[64];
138 
139 	ksnprintf(devname, sizeof(devname), "%s%d",
140 		  dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev));
141 
142 	kdmsg_iocom_reconnect(&dp->d_iocom, fp, devname);
143 
144 	dp->d_iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
145 	dp->d_iocom.auto_lnk_conn.peer_type = DMSG_PEER_BLOCK;
146 	dp->d_iocom.auto_lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK;
147 	dp->d_iocom.auto_lnk_conn.peer_mask = (uint64_t)-1;
148 #if 0
149 	if (dp->d_info.d_serialno) {
150 		ksnprintf(dp->d_iocom.auto_lnk_conn.peer_label,
151 			  sizeof(dp->d_iocom.auto_lnk_conn.peer_label),
152 			  "%s/%s", hostname, dp->d_info.d_serialno);
153 	} else {
154 		ksnprintf(dp->d_iocom.auto_lnk_conn.peer_label,
155 			  sizeof(dp->d_iocom.auto_lnk_conn.peer_label),
156 			  "%s/%s", hostname, devname);
157 	}
158 #endif
159 	ksnprintf(dp->d_iocom.auto_lnk_conn.peer_label,
160 		  sizeof(dp->d_iocom.auto_lnk_conn.peer_label),
161 		  "%s/%s", hostname, devname);
162 
163 	dp->d_iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1;
164 	dp->d_iocom.auto_lnk_span.peer_type = DMSG_PEER_BLOCK;
165 	dp->d_iocom.auto_lnk_span.media.block.bytes =
166 						dp->d_info.d_media_size;
167 	dp->d_iocom.auto_lnk_span.media.block.blksize =
168 						dp->d_info.d_media_blksize;
169 	ksnprintf(dp->d_iocom.auto_lnk_span.peer_label,
170 		  sizeof(dp->d_iocom.auto_lnk_span.peer_label),
171 		  "%s", dp->d_iocom.auto_lnk_conn.peer_label);
172 	if (dp->d_info.d_serialno) {
173 		ksnprintf(dp->d_iocom.auto_lnk_span.pfs_label,
174 			  sizeof(dp->d_iocom.auto_lnk_span.pfs_label),
175 			  "%s", dp->d_info.d_serialno);
176 	} else {
177 		/*
178 		 * If no serial number is available generate a dummy serial
179 		 * number from the host and device name and pray.  This will
180 		 * allow e.g. /dev/vn* to look meaningful on a remote machine.
181 		 */
182 		ksnprintf(dp->d_iocom.auto_lnk_span.pfs_label,
183 			  sizeof(dp->d_iocom.auto_lnk_span.pfs_label),
184 			  "%s.%s", hostname, devname);
185 	}
186 
187 	kdmsg_iocom_autoinitiate(&dp->d_iocom, NULL);
188 
189 	return (0);
190 }
191 
192 static int
193 disk_rcvdmsg(kdmsg_msg_t *msg)
194 {
195 	struct disk *dp = msg->state->iocom->handle;
196 
197 	/*
198 	 * Handle debug messages (these might not be in transactions)
199 	 */
200 	switch(msg->any.head.cmd & DMSGF_CMDSWMASK) {
201 	case DMSG_DBG_SHELL:
202 		/*
203 		 * Execute shell command (not supported atm)
204 		 */
205 		kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
206 		return(0);
207 	case DMSG_DBG_SHELL | DMSGF_REPLY:
208 		if (msg->aux_data) {
209 			msg->aux_data[msg->aux_size - 1] = 0;
210 			kprintf("diskiocom: DEBUGMSG: %s\n", msg->aux_data);
211 		}
212 		return(0);
213 	}
214 
215 	/*
216 	 * All remaining messages must be in a transaction.
217 	 *
218 	 * NOTE!  We currently don't care if the transaction is just
219 	 *	  the span transaction (for disk probes) or if it is the
220 	 *	  BLK_OPEN transaction.
221 	 *
222 	 * NOTE!  We are switching on the first message's command.  The
223 	 *	  actual message command within the transaction may be
224 	 *	  different (if streaming within a transaction).
225 	 */
226 	if (msg->state == &msg->state->iocom->state0) {
227 		kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
228 		return(0);
229 	}
230 
231 	switch(msg->state->rxcmd & DMSGF_CMDSWMASK) {
232 	case DMSG_BLK_OPEN:
233 		disk_blk_open(dp, msg);
234 		break;
235 	case DMSG_BLK_READ:
236 		/*
237 		 * not reached normally but leave in for completeness
238 		 */
239 		disk_blk_read(dp, msg);
240 		break;
241 	case DMSG_BLK_WRITE:
242 		disk_blk_write(dp, msg);
243 		break;
244 	case DMSG_BLK_FLUSH:
245 		disk_blk_flush(dp, msg);
246 		break;
247 	case DMSG_BLK_FREEBLKS:
248 		disk_blk_freeblks(dp, msg);
249 		break;
250 	default:
251 		if ((msg->any.head.cmd & DMSGF_REPLY) == 0) {
252 			if (msg->any.head.cmd & DMSGF_DELETE)
253 				kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
254 			else
255 				kdmsg_msg_result(msg, DMSG_ERR_NOSUPP);
256 		}
257 		break;
258 	}
259 	return (0);
260 }
261 
262 static
263 void
264 disk_blk_open(struct disk *dp, kdmsg_msg_t *msg)
265 {
266 	struct dios_open *openst;
267 	int error = DMSG_ERR_NOSUPP;
268 	int fflags;
269 
270 	openst = msg->state->any.any;
271 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_OPEN) {
272 		if (openst == NULL) {
273 			openst = kmalloc(sizeof(*openst), M_DEVBUF,
274 						M_WAITOK | M_ZERO);
275 			msg->state->any.any = openst;
276 		}
277 		fflags = 0;
278 		if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
279 			fflags = FREAD;
280 		if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
281 			fflags |= FWRITE;
282 		error = dev_dopen(dp->d_rawdev, fflags, S_IFCHR, proc0.p_ucred, NULL);
283 		if (error) {
284 			error = DMSG_ERR_IO;
285 		} else {
286 			if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
287 				++openst->openrd;
288 			if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
289 				++openst->openwr;
290 		}
291 	}
292 #if 0
293 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_CLOSE &&
294 	    openst) {
295 		fflags = 0;
296 		if ((msg->any.blk_open.modes & DMSG_BLKOPEN_RD) &&
297 		    openst->openrd) {
298 			fflags = FREAD;
299 		}
300 		if ((msg->any.blk_open.modes & DMSG_BLKOPEN_WR) &&
301 		    openst->openwr) {
302 			fflags |= FWRITE;
303 		}
304 		error = dev_dclose(dp->d_rawdev, fflags, S_IFCHR, NULL);
305 		if (error) {
306 			error = DMSG_ERR_IO;
307 		} else {
308 			if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
309 				--openst->openrd;
310 			if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
311 				--openst->openwr;
312 		}
313 	}
314 #endif
315 	if (msg->any.head.cmd & DMSGF_DELETE) {
316 		if (openst) {
317 			while (openst->openrd && openst->openwr) {
318 				--openst->openrd;
319 				--openst->openwr;
320 				dev_dclose(dp->d_rawdev, FREAD|FWRITE, S_IFCHR, NULL);
321 			}
322 			while (openst->openrd) {
323 				--openst->openrd;
324 				dev_dclose(dp->d_rawdev, FREAD, S_IFCHR, NULL);
325 			}
326 			while (openst->openwr) {
327 				--openst->openwr;
328 				dev_dclose(dp->d_rawdev, FWRITE, S_IFCHR, NULL);
329 			}
330 			kfree(openst, M_DEVBUF);
331 			msg->state->any.any = NULL;
332 		}
333 		kdmsg_msg_reply(msg, error);
334 	} else {
335 		kdmsg_msg_result(msg, error);
336 	}
337 }
338 
339 static
340 void
341 disk_blk_read(struct disk *dp, kdmsg_msg_t *msg)
342 {
343 	struct dios_io *iost;
344 	struct buf *bp;
345 	struct bio *bio;
346 	int error = DMSG_ERR_NOSUPP;
347 	int reterr = 1;
348 
349 	/*
350 	 * Only DMSG_BLK_READ commands imply read ops.
351 	 */
352 	iost = msg->state->any.any;
353 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_READ) {
354 		if (msg->any.blk_read.bytes < DEV_BSIZE ||
355 		    msg->any.blk_read.bytes > MAXPHYS) {
356 			error = DMSG_ERR_PARAM;
357 			goto done;
358 		}
359 		if (iost == NULL) {
360 			iost = kmalloc(sizeof(*iost), M_DEVBUF,
361 				       M_WAITOK | M_ZERO);
362 			msg->state->any.any = iost;
363 		}
364 		reterr = 0;
365 		bp = geteblk(msg->any.blk_read.bytes);
366 		bio = &bp->b_bio1;
367 		bp->b_cmd = BUF_CMD_READ;
368 		bp->b_bcount = msg->any.blk_read.bytes;
369 		bp->b_resid = bp->b_bcount;
370 		bio->bio_offset = msg->any.blk_read.offset;
371 		bio->bio_caller_info1.ptr = msg->state;
372 		bio->bio_done = diskiodone;
373 
374 		/* kdmsg_state_hold(msg->state); */
375 		atomic_add_int(&blk_active, 1);
376 		atomic_add_int(&iost->count, 1);
377 		if (msg->any.head.cmd & DMSGF_DELETE)
378 			iost->eof = 1;
379 		BUF_KERNPROC(bp);
380 		dev_dstrategy(dp->d_rawdev, bio);
381 	}
382 done:
383 	if (reterr) {
384 		if (msg->any.head.cmd & DMSGF_DELETE) {
385 			if (iost && iost->count == 0) {
386 				kfree(iost, M_DEVBUF);
387 				msg->state->any.any = NULL;
388 			}
389 			kdmsg_msg_reply(msg, error);
390 		} else {
391 			kdmsg_msg_result(msg, error);
392 		}
393 	}
394 }
395 
396 static
397 void
398 disk_blk_write(struct disk *dp, kdmsg_msg_t *msg)
399 {
400 	struct dios_io *iost;
401 	struct buf *bp;
402 	struct bio *bio;
403 	int error = DMSG_ERR_NOSUPP;
404 	int reterr = 1;
405 
406 	/*
407 	 * Only DMSG_BLK_WRITE commands imply read ops.
408 	 */
409 	iost = msg->state->any.any;
410 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_WRITE) {
411 		if (msg->any.blk_write.bytes < DEV_BSIZE ||
412 		    msg->any.blk_write.bytes > MAXPHYS) {
413 			error = DMSG_ERR_PARAM;
414 			goto done;
415 		}
416 		if (iost == NULL) {
417 			iost = kmalloc(sizeof(*iost), M_DEVBUF,
418 				       M_WAITOK | M_ZERO);
419 			msg->state->any.any = iost;
420 		}
421 
422 		/*
423 		 * Issue WRITE.  Short data implies zeros.  Try to optimize
424 		 * the buffer cache buffer for the case where we can just
425 		 * use the message's data pointer.
426 		 */
427 		reterr = 0;
428 		if (msg->aux_size >= msg->any.blk_write.bytes)
429 			bp = getpbuf(NULL);
430 		else
431 			bp = geteblk(msg->any.blk_write.bytes);
432 		bio = &bp->b_bio1;
433 		bp->b_cmd = BUF_CMD_WRITE;
434 		bp->b_bcount = msg->any.blk_write.bytes;
435 		bp->b_resid = bp->b_bcount;
436 		if (msg->aux_size >= msg->any.blk_write.bytes) {
437 			bp->b_data = msg->aux_data;
438 			kdmsg_detach_aux_data(msg, &iost->data);
439 		} else {
440 			bcopy(msg->aux_data, bp->b_data, msg->aux_size);
441 			bzero(bp->b_data + msg->aux_size,
442 			      msg->any.blk_write.bytes - msg->aux_size);
443 			bzero(&iost->data, sizeof(iost->data));
444 		}
445 		bio->bio_offset = msg->any.blk_write.offset;
446 		bio->bio_caller_info1.ptr = msg->state;
447 		bio->bio_done = diskiodone;
448 
449 		/* kdmsg_state_hold(msg->state); */
450 		atomic_add_int(&blk_active, 1);
451 		atomic_add_int(&iost->count, 1);
452 		if (msg->any.head.cmd & DMSGF_DELETE)
453 			iost->eof = 1;
454 		BUF_KERNPROC(bp);
455 		dev_dstrategy(dp->d_rawdev, bio);
456 	}
457 done:
458 	if (reterr) {
459 		if (msg->any.head.cmd & DMSGF_DELETE) {
460 			if (iost && iost->count == 0) {
461 				kfree(iost, M_DEVBUF);
462 				msg->state->any.any = NULL;
463 			}
464 			kdmsg_msg_reply(msg, error);
465 		} else {
466 			kdmsg_msg_result(msg, error);
467 		}
468 	}
469 }
470 
471 static
472 void
473 disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg)
474 {
475 	struct dios_io *iost;
476 	struct buf *bp;
477 	struct bio *bio;
478 	int error = DMSG_ERR_NOSUPP;
479 	int reterr = 1;
480 
481 	/*
482 	 * Only DMSG_BLK_FLUSH commands imply read ops.
483 	 */
484 	iost = msg->state->any.any;
485 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FLUSH) {
486 		if (iost == NULL) {
487 			iost = kmalloc(sizeof(*iost), M_DEVBUF,
488 				       M_WAITOK | M_ZERO);
489 			msg->state->any.any = iost;
490 		}
491 		reterr = 0;
492 		bp = getpbuf(NULL);
493 		bio = &bp->b_bio1;
494 		bp->b_cmd = BUF_CMD_FLUSH;
495 		bp->b_bcount = msg->any.blk_flush.bytes;
496 		bp->b_resid = 0;
497 		bio->bio_offset = msg->any.blk_flush.offset;
498 		bio->bio_caller_info1.ptr = msg->state;
499 		bio->bio_done = diskiodone;
500 
501 		/* kdmsg_state_hold(msg->state); */
502 		atomic_add_int(&blk_active, 1);
503 		atomic_add_int(&iost->count, 1);
504 		if (msg->any.head.cmd & DMSGF_DELETE)
505 			iost->eof = 1;
506 		BUF_KERNPROC(bp);
507 		dev_dstrategy(dp->d_rawdev, bio);
508 	}
509 	if (reterr) {
510 		if (msg->any.head.cmd & DMSGF_DELETE) {
511 			if (iost && iost->count == 0) {
512 				kfree(iost, M_DEVBUF);
513 				msg->state->any.any = NULL;
514 			}
515 			kdmsg_msg_reply(msg, error);
516 		} else {
517 			kdmsg_msg_result(msg, error);
518 		}
519 	}
520 }
521 
522 static
523 void
524 disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg)
525 {
526 	struct dios_io *iost;
527 	struct buf *bp;
528 	struct bio *bio;
529 	int error = DMSG_ERR_NOSUPP;
530 	int reterr = 1;
531 
532 	/*
533 	 * Only DMSG_BLK_FREEBLKS commands imply read ops.
534 	 */
535 	iost = msg->state->any.any;
536 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FREEBLKS) {
537 		if (iost == NULL) {
538 			iost = kmalloc(sizeof(*iost), M_DEVBUF,
539 				       M_WAITOK | M_ZERO);
540 			msg->state->any.any = iost;
541 		}
542 		reterr = 0;
543 		bp = getpbuf(NULL);
544 		bio = &bp->b_bio1;
545 		bp->b_cmd = BUF_CMD_FREEBLKS;
546 		bp->b_bcount = msg->any.blk_freeblks.bytes;
547 		bp->b_resid = 0;
548 		bio->bio_offset = msg->any.blk_freeblks.offset;
549 		bio->bio_caller_info1.ptr = msg->state;
550 		bio->bio_done = diskiodone;
551 
552 		/* kdmsg_state_hold(msg->state); */
553 		atomic_add_int(&blk_active, 1);
554 		atomic_add_int(&iost->count, 1);
555 		if (msg->any.head.cmd & DMSGF_DELETE)
556 			iost->eof = 1;
557 		BUF_KERNPROC(bp);
558 		dev_dstrategy(dp->d_rawdev, bio);
559 	}
560 	if (reterr) {
561 		if (msg->any.head.cmd & DMSGF_DELETE) {
562 			if (iost && iost->count == 0) {
563 				kfree(iost, M_DEVBUF);
564 				msg->state->any.any = NULL;
565 			}
566 			kdmsg_msg_reply(msg, error);
567 		} else {
568 			kdmsg_msg_result(msg, error);
569 		}
570 	}
571 }
572 
573 static
574 void
575 diskiodone(struct bio *bio)
576 {
577 	struct buf *bp = bio->bio_buf;
578 	kdmsg_state_t *state = bio->bio_caller_info1.ptr;
579 	kdmsg_msg_t *rmsg;
580 	struct dios_io *iost = state->any.any;
581 	int error;
582 	int resid = 0;
583 	int bytes;
584 	uint32_t cmd;
585 	void *data;
586 
587 	cmd = DMSG_LNK_ERROR;
588 	data = NULL;
589 	bytes = 0;
590 
591 	switch(bp->b_cmd) {
592 	case BUF_CMD_READ:
593 		cmd = DMSG_LNK_ERROR;
594 		data = bp->b_data;
595 		bytes = bp->b_bcount;
596 		/* fall through */
597 	case BUF_CMD_WRITE:
598 		if (bp->b_flags & B_ERROR) {
599 			error = bp->b_error;
600 		} else {
601 			error = 0;
602 			resid = bp->b_resid;
603 		}
604 		kdmsg_free_aux_data(&iost->data);
605 		break;
606 	case BUF_CMD_FLUSH:
607 	case BUF_CMD_FREEBLKS:
608 		if (bp->b_flags & B_ERROR)
609 			error = bp->b_error;
610 		else
611 			error = 0;
612 		break;
613 	default:
614 		panic("diskiodone: Unknown bio cmd = %d\n",
615 		      bio->bio_buf->b_cmd);
616 		error = 0;	/* avoid compiler warning */
617 		break;		/* NOT REACHED */
618 	}
619 
620 	/*
621 	 * Convert error to DMSG_ERR_* code.
622 	 */
623 	if (error)
624 		error = DMSG_ERR_IO;
625 
626 	/*
627 	 * Convert LNK_ERROR or BLK_ERROR if non-zero resid.  READS will
628 	 * have already converted cmd to BLK_ERROR and set up data to return.
629 	 */
630 	if (resid && cmd == DMSG_LNK_ERROR)
631 		cmd = DMSG_BLK_ERROR;
632 	/* XXX txcmd is delayed so this won't work for streaming */
633 	if ((state->txcmd & DMSGF_CREATE) == 0)	/* assume serialized */
634 		cmd |= DMSGF_CREATE;
635 	if (iost->eof) {
636 		if (atomic_fetchadd_int(&iost->count, -1) == 1)
637 			cmd |= DMSGF_DELETE;
638 	} else {
639 		atomic_add_int(&iost->count, -1);
640 	}
641 	atomic_add_int(&blk_active, -1);
642 	cmd |= DMSGF_REPLY;
643 
644 	/*
645 	 * Allocate a basic or extended reply.  Be careful not to populate
646 	 * extended header fields unless we allocated an extended reply.
647 	 */
648 	rmsg = kdmsg_msg_alloc(state, cmd, NULL, 0);
649 	if (data) {
650 		rmsg->aux_data = kmalloc(bytes, state->iocom->mmsg, M_INTWAIT);
651 		rmsg->aux_size = bytes;
652 		rmsg->flags |= KDMSG_FLAG_AUXALLOC;
653 		bcopy(data, rmsg->aux_data, bytes);
654 	}
655 	rmsg->any.blk_error.head.error = error;
656 	if ((cmd & DMSGF_BASECMDMASK) == DMSG_BLK_ERROR)
657 		rmsg->any.blk_error.resid = resid;
658 	bio->bio_caller_info1.ptr = NULL;
659 	/* kdmsg_state_drop(state); */
660 	kdmsg_msg_write(rmsg);
661 	if (bp->b_flags & B_PAGING) {
662 		relpbuf(bio->bio_buf, NULL);
663 	} else {
664 		bp->b_flags |= B_INVAL | B_AGE;
665 		brelse(bp);
666 	}
667 }
668