xref: /illumos-gate/usr/src/uts/common/os/streamio.c (revision 06e1a714)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved  	*/
23 
24 
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/types.h>
33 #include <sys/sysmacros.h>
34 #include <sys/param.h>
35 #include <sys/errno.h>
36 #include <sys/signal.h>
37 #include <sys/stat.h>
38 #include <sys/proc.h>
39 #include <sys/cred.h>
40 #include <sys/user.h>
41 #include <sys/vnode.h>
42 #include <sys/file.h>
43 #include <sys/stream.h>
44 #include <sys/strsubr.h>
45 #include <sys/stropts.h>
46 #include <sys/tihdr.h>
47 #include <sys/var.h>
48 #include <sys/poll.h>
49 #include <sys/termio.h>
50 #include <sys/ttold.h>
51 #include <sys/systm.h>
52 #include <sys/uio.h>
53 #include <sys/cmn_err.h>
54 #include <sys/sad.h>
55 #include <sys/priocntl.h>
56 #include <sys/jioctl.h>
57 #include <sys/procset.h>
58 #include <sys/session.h>
59 #include <sys/kmem.h>
60 #include <sys/filio.h>
61 #include <sys/vtrace.h>
62 #include <sys/debug.h>
63 #include <sys/strredir.h>
64 #include <sys/fs/fifonode.h>
65 #include <sys/fs/snode.h>
66 #include <sys/strlog.h>
67 #include <sys/strsun.h>
68 #include <sys/project.h>
69 #include <sys/kbio.h>
70 #include <sys/msio.h>
71 #include <sys/tty.h>
72 #include <sys/ptyvar.h>
73 #include <sys/vuid_event.h>
74 #include <sys/modctl.h>
75 #include <sys/sunddi.h>
76 #include <sys/sunldi_impl.h>
77 #include <sys/autoconf.h>
78 #include <sys/policy.h>
79 
80 
81 /*
82  * This define helps improve the readability of streams code while
83  * still maintaining a very old streams performance enhancement.  The
84  * performance enhancement basically involved having all callers
85  * of straccess() perform the first check that straccess() will do
86  * locally before actually calling straccess().  (There by reducing
87  * the number of unnecessary calls to straccess().)
88  */
89 #define	i_straccess(x, y)	((stp->sd_sidp == NULL) ? 0 : \
90 				    (stp->sd_vnode->v_type == VFIFO) ? 0 : \
91 				    straccess((x), (y)))
92 
93 /*
94  * what is mblk_pull_len?
95  *
96  * If a streams message consists of many short messages,
97  * a performance degradation occurs from copyout overhead.
98  * To decrease the per mblk overhead, messages that are
99  * likely to consist of many small mblks are pulled up into
100  * one continuous chunk of memory.
101  *
102  * To avoid the processing overhead of examining every
103  * mblk, a quick heuristic is used. If the first mblk in
104  * the message is shorter than mblk_pull_len, it is likely
105  * that the rest of the mblk will be short.
106  *
107  * This heuristic was decided upon after performance tests
108  * indicated that anything more complex slowed down the main
109  * code path.
110  */
111 #define	MBLK_PULL_LEN 64
112 uint32_t mblk_pull_len = MBLK_PULL_LEN;
113 
114 /*
115  * The sgttyb_handling flag controls the handling of the old BSD
116  * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows:
117  *
118  * 0 - Emit no warnings at all and retain old, broken behavior.
119  * 1 - Emit no warnings and silently handle new semantics.
120  * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used
121  *     (once per system invocation).  Handle with new semantics.
122  * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is
123  *     made (so that offenders drop core and are easy to debug).
124  *
125  * The "new semantics" are that TIOCGETP returns B38400 for
126  * sg_[io]speed if the corresponding value is over B38400, and that
127  * TIOCSET[PN] accept B38400 in these cases to mean "retain current
128  * bit rate."
129  */
130 int sgttyb_handling = 1;
131 static boolean_t sgttyb_complaint;
132 
133 /* don't push drcompat module by default on Style-2 streams */
134 static int push_drcompat = 0;
135 
136 /*
137  * id value used to distinguish between different ioctl messages
138  */
139 static uint32_t ioc_id;
140 
141 static void putback(struct stdata *, queue_t *, mblk_t *, int);
142 static void strcleanall(struct vnode *);
143 static int strwsrv(queue_t *);
144 
145 /*
146  * qinit and module_info structures for stream head read and write queues
147  */
148 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW };
149 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 };
150 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info };
151 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info };
152 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT,
153     FIFOLOWAT };
154 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 };
155 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info };
156 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info };
157 
158 extern kmutex_t	strresources;	/* protects global resources */
159 extern kmutex_t muxifier;	/* single-threads multiplexor creation */
160 kmutex_t sad_lock;		/* protects sad drivers autopush */
161 
162 static boolean_t msghasdata(mblk_t *bp);
163 #define	msgnodata(bp) (!msghasdata(bp))
164 
165 /*
166  * Stream head locking notes:
167  *	There are four monitors associated with the stream head:
168  *	1. v_stream monitor: in stropen() and strclose() v_lock
169  *		is held while the association of vnode and stream
170  *		head is established or tested for.
171  *	2. open/close/push/pop monitor: sd_lock is held while each
172  *		thread bids for exclusive access to this monitor
173  *		for opening or closing a stream.  In addition, this
174  *		monitor is entered during pushes and pops.  This
175  *		guarantees that during plumbing operations there
176  *		is only one thread trying to change the plumbing.
177  *		Any other threads present in the stream are only
178  *		using the plumbing.
179  *	3. read/write monitor: in the case of read, a thread holds
180  *		sd_lock while trying to get data from the stream
181  *		head queue.  if there is none to fulfill a read
182  *		request, it sets RSLEEP and calls cv_wait_sig() down
183  *		in strwaitq() to await the arrival of new data.
184  *		when new data arrives in strrput(), sd_lock is acquired
185  *		before testing for RSLEEP and calling cv_broadcast().
186  *		the behavior of strwrite(), strwsrv(), and WSLEEP
187  *		mirror this.
188  *	4. ioctl monitor: sd_lock is gotten to ensure that only one
189  *		thread is doing an ioctl at a time.
190  */
191 
192 static int
193 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name,
194     int anchor, cred_t *crp)
195 {
196 	int error;
197 	fmodsw_impl_t *fp;
198 
199 	if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) {
200 		error = (stp->sd_flag & STRHUP) ? ENXIO : EIO;
201 		return (error);
202 	}
203 	if (stp->sd_pushcnt >= nstrpush) {
204 		return (EINVAL);
205 	}
206 
207 	if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) {
208 		stp->sd_flag |= STREOPENFAIL;
209 		return (EINVAL);
210 	}
211 
212 	/*
213 	 * push new module and call its open routine via qattach
214 	 */
215 	if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0)
216 		return (error);
217 
218 	/*
219 	 * Check to see if caller wants a STREAMS anchor
220 	 * put at this place in the stream, and add if so.
221 	 */
222 	mutex_enter(&stp->sd_lock);
223 	if (anchor == stp->sd_pushcnt)
224 		stp->sd_anchor = stp->sd_pushcnt;
225 	mutex_exit(&stp->sd_lock);
226 
227 	return (0);
228 }
229 
230 /*
231  * Open a stream device.
232  */
233 int
234 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp)
235 {
236 	struct stdata *stp;
237 	queue_t *qp;
238 	int s;
239 	dev_t dummydev;
240 	struct autopush *ap;
241 	int error = 0;
242 	ssize_t	rmin, rmax;
243 	int cloneopen;
244 	queue_t *brq;
245 	major_t major;
246 
247 #ifdef C2_AUDIT
248 	if (audit_active)
249 		audit_stropen(vp, devp, flag, crp);
250 #endif
251 
252 	/*
253 	 * If the stream already exists, wait for any open in progress
254 	 * to complete, then call the open function of each module and
255 	 * driver in the stream.  Otherwise create the stream.
256 	 */
257 	TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp);
258 retry:
259 	mutex_enter(&vp->v_lock);
260 	if ((stp = vp->v_stream) != NULL) {
261 
262 		/*
263 		 * Waiting for stream to be created to device
264 		 * due to another open.
265 		 */
266 	    mutex_exit(&vp->v_lock);
267 
268 	    if (STRMATED(stp)) {
269 		struct stdata *strmatep = stp->sd_mate;
270 
271 		STRLOCKMATES(stp);
272 		if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
273 			if (flag & (FNDELAY|FNONBLOCK)) {
274 				error = EAGAIN;
275 				mutex_exit(&strmatep->sd_lock);
276 				goto ckreturn;
277 			}
278 			mutex_exit(&stp->sd_lock);
279 			if (!cv_wait_sig(&strmatep->sd_monitor,
280 			    &strmatep->sd_lock)) {
281 				error = EINTR;
282 				mutex_exit(&strmatep->sd_lock);
283 				mutex_enter(&stp->sd_lock);
284 				goto ckreturn;
285 			}
286 			mutex_exit(&strmatep->sd_lock);
287 			goto retry;
288 		}
289 		if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
290 			if (flag & (FNDELAY|FNONBLOCK)) {
291 				error = EAGAIN;
292 				mutex_exit(&strmatep->sd_lock);
293 				goto ckreturn;
294 			}
295 			mutex_exit(&strmatep->sd_lock);
296 			if (!cv_wait_sig(&stp->sd_monitor, &stp->sd_lock)) {
297 				error = EINTR;
298 				goto ckreturn;
299 			}
300 			mutex_exit(&stp->sd_lock);
301 			goto retry;
302 		}
303 
304 		if (stp->sd_flag & (STRDERR|STWRERR)) {
305 			error = EIO;
306 			mutex_exit(&strmatep->sd_lock);
307 			goto ckreturn;
308 		}
309 
310 		stp->sd_flag |= STWOPEN;
311 		STRUNLOCKMATES(stp);
312 	    } else {
313 		mutex_enter(&stp->sd_lock);
314 		if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
315 			if (flag & (FNDELAY|FNONBLOCK)) {
316 				error = EAGAIN;
317 				goto ckreturn;
318 			}
319 			if (!cv_wait_sig(&stp->sd_monitor, &stp->sd_lock)) {
320 				error = EINTR;
321 				goto ckreturn;
322 			}
323 			mutex_exit(&stp->sd_lock);
324 			goto retry;  /* could be clone! */
325 		}
326 
327 		if (stp->sd_flag & (STRDERR|STWRERR)) {
328 			error = EIO;
329 			goto ckreturn;
330 		}
331 
332 		stp->sd_flag |= STWOPEN;
333 		mutex_exit(&stp->sd_lock);
334 	    }
335 
336 		/*
337 		 * Open all modules and devices down stream to notify
338 		 * that another user is streaming.  For modules, set the
339 		 * last argument to MODOPEN and do not pass any open flags.
340 		 * Ignore dummydev since this is not the first open.
341 		 */
342 	    claimstr(stp->sd_wrq);
343 	    qp = stp->sd_wrq;
344 	    while (_SAMESTR(qp)) {
345 		qp = qp->q_next;
346 		if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0)
347 			break;
348 	    }
349 	    releasestr(stp->sd_wrq);
350 	    mutex_enter(&stp->sd_lock);
351 	    stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR);
352 	    stp->sd_rerror = 0;
353 	    stp->sd_werror = 0;
354 ckreturn:
355 	    cv_broadcast(&stp->sd_monitor);
356 	    mutex_exit(&stp->sd_lock);
357 	    return (error);
358 	}
359 
360 	/*
361 	 * This vnode isn't streaming.  SPECFS already
362 	 * checked for multiple vnodes pointing to the
363 	 * same stream, so create a stream to the driver.
364 	 */
365 	qp = allocq();
366 	stp = shalloc(qp);
367 
368 	/*
369 	 * Initialize stream head.  shalloc() has given us
370 	 * exclusive access, and we have the vnode locked;
371 	 * we can do whatever we want with stp.
372 	 */
373 	stp->sd_flag = STWOPEN;
374 	stp->sd_siglist = NULL;
375 	stp->sd_pollist.ph_list = NULL;
376 	stp->sd_sigflags = 0;
377 	stp->sd_mark = NULL;
378 	stp->sd_closetime = STRTIMOUT;
379 	stp->sd_sidp = NULL;
380 	stp->sd_pgidp = NULL;
381 	stp->sd_vnode = vp;
382 	stp->sd_rerror = 0;
383 	stp->sd_werror = 0;
384 	stp->sd_wroff = 0;
385 	stp->sd_tail = 0;
386 	stp->sd_iocblk = NULL;
387 	stp->sd_pushcnt = 0;
388 	stp->sd_qn_minpsz = 0;
389 	stp->sd_qn_maxpsz = INFPSZ - 1;	/* used to check for initialization */
390 	stp->sd_maxblk = INFPSZ;
391 	qp->q_ptr = _WR(qp)->q_ptr = stp;
392 	STREAM(qp) = STREAM(_WR(qp)) = stp;
393 	vp->v_stream = stp;
394 	mutex_exit(&vp->v_lock);
395 	if (vp->v_type == VFIFO) {
396 		stp->sd_flag |= OLDNDELAY;
397 		/*
398 		 * This means, both for pipes and fifos
399 		 * strwrite will send SIGPIPE if the other
400 		 * end is closed. For putmsg it depends
401 		 * on whether it is a XPG4_2 application
402 		 * or not
403 		 */
404 		stp->sd_wput_opt = SW_SIGPIPE;
405 
406 		/* setq might sleep in kmem_alloc - avoid holding locks. */
407 		setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE,
408 		    SQ_CI|SQ_CO, B_FALSE);
409 
410 		set_qend(qp);
411 		stp->sd_strtab = fifo_getinfo();
412 		_WR(qp)->q_nfsrv = _WR(qp);
413 		qp->q_nfsrv = qp;
414 		/*
415 		 * Wake up others that are waiting for stream to be created.
416 		 */
417 		mutex_enter(&stp->sd_lock);
418 		/*
419 		 * nothing is be pushed on stream yet, so
420 		 * optimized stream head packetsizes are just that
421 		 * of the read queue
422 		 */
423 		stp->sd_qn_minpsz = qp->q_minpsz;
424 		stp->sd_qn_maxpsz = qp->q_maxpsz;
425 		stp->sd_flag &= ~STWOPEN;
426 		goto fifo_opendone;
427 	}
428 	/* setq might sleep in kmem_alloc - avoid holding locks. */
429 	setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE);
430 
431 	set_qend(qp);
432 
433 	/*
434 	 * Open driver and create stream to it (via qattach).
435 	 */
436 	cloneopen = (getmajor(*devp) == clone_major);
437 	if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) {
438 		mutex_enter(&vp->v_lock);
439 		vp->v_stream = NULL;
440 		mutex_exit(&vp->v_lock);
441 		mutex_enter(&stp->sd_lock);
442 		cv_broadcast(&stp->sd_monitor);
443 		mutex_exit(&stp->sd_lock);
444 		freeq(_RD(qp));
445 		shfree(stp);
446 		return (error);
447 	}
448 	/*
449 	 * Set sd_strtab after open in order to handle clonable drivers
450 	 */
451 	stp->sd_strtab = STREAMSTAB(getmajor(*devp));
452 
453 	/*
454 	 * Historical note: dummydev used to be be prior to the initial
455 	 * open (via qattach above), which made the value seen
456 	 * inconsistent between an I_PUSH and an autopush of a module.
457 	 */
458 	dummydev = *devp;
459 
460 	/*
461 	 * For clone open of old style (Q not associated) network driver,
462 	 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH
463 	 */
464 	brq = _RD(_WR(qp)->q_next);
465 	major = getmajor(*devp);
466 	if (push_drcompat && cloneopen && NETWORK_DRV(major) &&
467 	    ((brq->q_flag & _QASSOCIATED) == 0)) {
468 		if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp) != 0)
469 			cmn_err(CE_WARN, "cannot push " DRMODNAME
470 			    " streams module");
471 	}
472 
473 	/*
474 	 * check for autopush
475 	 */
476 	mutex_enter(&sad_lock);
477 	ap = strphash(getemajor(*devp));
478 #define	DEVT(ap)	makedevice(ap->ap_major, ap->ap_minor)
479 #define	DEVLT(ap)	makedevice(ap->ap_major, ap->ap_lastminor)
480 
481 	while (ap) {
482 		if (ap->ap_major == (getemajor(*devp))) {
483 			if (ap->ap_type == SAP_ALL)
484 				break;
485 			else if ((ap->ap_type == SAP_ONE) &&
486 			    (getminor(DEVT(ap)) == getminor(*devp)))
487 				break;
488 			else if (ap->ap_type == SAP_RANGE &&
489 			    getminor(*devp) >= getminor(DEVT(ap)) &&
490 			    getminor(*devp) <= getminor(DEVLT(ap)))
491 				break;
492 		}
493 		ap = ap->ap_nextp;
494 	}
495 	if (ap == NULL) {
496 		mutex_exit(&sad_lock);
497 		goto opendone;
498 	}
499 	ap->ap_cnt++;
500 	mutex_exit(&sad_lock);
501 	for (s = 0; s < ap->ap_npush; s++) {
502 		error = push_mod(qp, &dummydev, stp, ap->ap_list[s],
503 		    ap->ap_anchor, crp);
504 		if (error != 0)
505 			break;
506 	}
507 	mutex_enter(&sad_lock);
508 	if (--(ap->ap_cnt) <= 0)
509 		ap_free(ap);
510 	mutex_exit(&sad_lock);
511 
512 	/*
513 	 * let specfs know that open failed part way through
514 	 */
515 
516 	if (error) {
517 		mutex_enter(&stp->sd_lock);
518 		stp->sd_flag |= STREOPENFAIL;
519 		mutex_exit(&stp->sd_lock);
520 	}
521 
522 opendone:
523 
524 	/*
525 	 * Wake up others that are waiting for stream to be created.
526 	 */
527 	mutex_enter(&stp->sd_lock);
528 	stp->sd_flag &= ~STWOPEN;
529 
530 	/*
531 	 * As a performance concern we are caching the values of
532 	 * q_minpsz and q_maxpsz of the module below the stream
533 	 * head in the stream head.
534 	 */
535 	mutex_enter(QLOCK(stp->sd_wrq->q_next));
536 	rmin = stp->sd_wrq->q_next->q_minpsz;
537 	rmax = stp->sd_wrq->q_next->q_maxpsz;
538 	mutex_exit(QLOCK(stp->sd_wrq->q_next));
539 
540 	/* do this processing here as a performance concern */
541 	if (strmsgsz != 0) {
542 		if (rmax == INFPSZ)
543 			rmax = strmsgsz;
544 		else
545 			rmax = MIN(strmsgsz, rmax);
546 	}
547 
548 	mutex_enter(QLOCK(stp->sd_wrq));
549 	stp->sd_qn_minpsz = rmin;
550 	stp->sd_qn_maxpsz = rmax;
551 	mutex_exit(QLOCK(stp->sd_wrq));
552 
553 fifo_opendone:
554 	cv_broadcast(&stp->sd_monitor);
555 	mutex_exit(&stp->sd_lock);
556 	return (error);
557 }
558 
559 static int strsink(queue_t *, mblk_t *);
560 static struct qinit deadrend = {
561 	strsink, NULL, NULL, NULL, NULL, &strm_info, NULL
562 };
563 static struct qinit deadwend = {
564 	NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL
565 };
566 
567 /*
568  * Close a stream.
569  * This is called from closef() on the last close of an open stream.
570  * Strclean() will already have removed the siglist and pollist
571  * information, so all that remains is to remove all multiplexor links
572  * for the stream, pop all the modules (and the driver), and free the
573  * stream structure.
574  */
575 
576 int
577 strclose(struct vnode *vp, int flag, cred_t *crp)
578 {
579 	struct stdata *stp;
580 	queue_t *qp;
581 	int rval;
582 	int freestp = 1;
583 	queue_t *rmq;
584 
585 #ifdef C2_AUDIT
586 	if (audit_active)
587 		audit_strclose(vp, flag, crp);
588 #endif
589 
590 	TRACE_1(TR_FAC_STREAMS_FR,
591 		TR_STRCLOSE, "strclose:%p", vp);
592 	ASSERT(vp->v_stream);
593 
594 	stp = vp->v_stream;
595 	ASSERT(!(stp->sd_flag & STPLEX));
596 	qp = stp->sd_wrq;
597 
598 	/*
599 	 * Needed so that strpoll will return non-zero for this fd.
600 	 * Note that with POLLNOERR STRHUP does still cause POLLHUP.
601 	 */
602 	mutex_enter(&stp->sd_lock);
603 	stp->sd_flag |= STRHUP;
604 	mutex_exit(&stp->sd_lock);
605 
606 	/*
607 	 * If the registered process or process group did not have an
608 	 * open instance of this stream then strclean would not be
609 	 * called. Thus at the time of closing all remaining siglist entries
610 	 * are removed.
611 	 */
612 	if (stp->sd_siglist != NULL)
613 		strcleanall(vp);
614 
615 	ASSERT(stp->sd_siglist == NULL);
616 	ASSERT(stp->sd_sigflags == 0);
617 
618 	if (STRMATED(stp)) {
619 		struct stdata *strmatep = stp->sd_mate;
620 		int waited = 1;
621 
622 		STRLOCKMATES(stp);
623 		while (waited) {
624 			waited = 0;
625 			while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
626 				mutex_exit(&strmatep->sd_lock);
627 				cv_wait(&stp->sd_monitor, &stp->sd_lock);
628 				mutex_exit(&stp->sd_lock);
629 				STRLOCKMATES(stp);
630 				waited = 1;
631 			}
632 			while (strmatep->sd_flag &
633 			    (STWOPEN|STRCLOSE|STRPLUMB)) {
634 				mutex_exit(&stp->sd_lock);
635 				cv_wait(&strmatep->sd_monitor,
636 				    &strmatep->sd_lock);
637 				mutex_exit(&strmatep->sd_lock);
638 				STRLOCKMATES(stp);
639 				waited = 1;
640 			}
641 		}
642 		stp->sd_flag |= STRCLOSE;
643 		STRUNLOCKMATES(stp);
644 	} else {
645 		mutex_enter(&stp->sd_lock);
646 		stp->sd_flag |= STRCLOSE;
647 		mutex_exit(&stp->sd_lock);
648 	}
649 
650 	ASSERT(qp->q_first == NULL);	/* No more delayed write */
651 
652 	/* Check if an I_LINK was ever done on this stream */
653 	if (stp->sd_flag & STRHASLINKS) {
654 		(void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval);
655 	}
656 
657 	while (_SAMESTR(qp)) {
658 		/*
659 		 * Holding sd_lock prevents q_next from changing in
660 		 * this stream.
661 		 */
662 		mutex_enter(&stp->sd_lock);
663 		if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) {
664 
665 			/*
666 			 * sleep until awakened by strwsrv() or timeout
667 			 */
668 			for (;;) {
669 				mutex_enter(QLOCK(qp->q_next));
670 				if (!(qp->q_next->q_mblkcnt)) {
671 					mutex_exit(QLOCK(qp->q_next));
672 					break;
673 				}
674 				stp->sd_flag |= WSLEEP;
675 
676 				/* ensure strwsrv gets enabled */
677 				qp->q_next->q_flag |= QWANTW;
678 				mutex_exit(QLOCK(qp->q_next));
679 				/* get out if we timed out or recv'd a signal */
680 				if (str_cv_wait(&qp->q_wait, &stp->sd_lock,
681 				    stp->sd_closetime, 0) <= 0) {
682 					break;
683 				}
684 			}
685 			stp->sd_flag &= ~WSLEEP;
686 		}
687 		mutex_exit(&stp->sd_lock);
688 
689 		rmq = qp->q_next;
690 		if (rmq->q_flag & QISDRV) {
691 			ASSERT(!_SAMESTR(rmq));
692 			wait_sq_svc(_RD(qp)->q_syncq);
693 		}
694 
695 		qdetach(_RD(rmq), 1, flag, crp, B_FALSE);
696 	}
697 
698 	/*
699 	 * Since we call pollwakeup in close() now, the poll list should
700 	 * be empty in most cases. The only exception is the layered devices
701 	 * (e.g. the console drivers with redirection modules pushed on top
702 	 * of it).  We have to do this after calling qdetach() because
703 	 * the redirection module won't have torn down the console
704 	 * redirection until after qdetach() has been invoked.
705 	 */
706 	if (stp->sd_pollist.ph_list != NULL) {
707 		pollwakeup(&stp->sd_pollist, POLLERR);
708 		pollhead_clean(&stp->sd_pollist);
709 	}
710 	ASSERT(stp->sd_pollist.ph_list == NULL);
711 	ASSERT(stp->sd_sidp == NULL);
712 	ASSERT(stp->sd_pgidp == NULL);
713 
714 	/* Prevent qenable from re-enabling the stream head queue */
715 	disable_svc(_RD(qp));
716 
717 	/*
718 	 * Wait until service procedure of each queue is
719 	 * run, if QINSERVICE is set.
720 	 */
721 	wait_svc(_RD(qp));
722 
723 	/*
724 	 * Now, flush both queues.
725 	 */
726 	flushq(_RD(qp), FLUSHALL);
727 	flushq(qp, FLUSHALL);
728 
729 	/*
730 	 * If the write queue of the stream head is pointing to a
731 	 * read queue, we have a twisted stream.  If the read queue
732 	 * is alive, convert the stream head queues into a dead end.
733 	 * If the read queue is dead, free the dead pair.
734 	 */
735 	if (qp->q_next && !_SAMESTR(qp)) {
736 		if (qp->q_next->q_qinfo == &deadrend) {	/* half-closed pipe */
737 			flushq(qp->q_next, FLUSHALL); /* ensure no message */
738 			shfree(qp->q_next->q_stream);
739 			freeq(qp->q_next);
740 			freeq(_RD(qp));
741 		} else if (qp->q_next == _RD(qp)) {	/* fifo */
742 			freeq(_RD(qp));
743 		} else {				/* pipe */
744 			freestp = 0;
745 			/*
746 			 * The q_info pointers are never accessed when
747 			 * SQLOCK is held.
748 			 */
749 			ASSERT(qp->q_syncq == _RD(qp)->q_syncq);
750 			mutex_enter(SQLOCK(qp->q_syncq));
751 			qp->q_qinfo = &deadwend;
752 			_RD(qp)->q_qinfo = &deadrend;
753 			mutex_exit(SQLOCK(qp->q_syncq));
754 		}
755 	} else {
756 		freeq(_RD(qp)); /* free stream head queue pair */
757 	}
758 
759 	mutex_enter(&vp->v_lock);
760 	if (stp->sd_iocblk) {
761 		if (stp->sd_iocblk != (mblk_t *)-1) {
762 			freemsg(stp->sd_iocblk);
763 		}
764 		stp->sd_iocblk = NULL;
765 	}
766 	stp->sd_vnode = NULL;
767 	vp->v_stream = NULL;
768 	mutex_exit(&vp->v_lock);
769 	mutex_enter(&stp->sd_lock);
770 	stp->sd_flag &= ~STRCLOSE;
771 	cv_broadcast(&stp->sd_monitor);
772 	mutex_exit(&stp->sd_lock);
773 
774 	if (freestp)
775 		shfree(stp);
776 	return (0);
777 }
778 
779 static int
780 strsink(queue_t *q, mblk_t *bp)
781 {
782 	struct copyresp *resp;
783 
784 	switch (bp->b_datap->db_type) {
785 	case M_FLUSH:
786 		if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
787 			*bp->b_rptr &= ~FLUSHR;
788 			bp->b_flag |= MSGNOLOOP;
789 			/*
790 			 * Protect against the driver passing up
791 			 * messages after it has done a qprocsoff.
792 			 */
793 			if (_OTHERQ(q)->q_next == NULL)
794 				freemsg(bp);
795 			else
796 				qreply(q, bp);
797 		} else {
798 			freemsg(bp);
799 		}
800 		break;
801 
802 	case M_COPYIN:
803 	case M_COPYOUT:
804 		if (bp->b_cont) {
805 			freemsg(bp->b_cont);
806 			bp->b_cont = NULL;
807 		}
808 		bp->b_datap->db_type = M_IOCDATA;
809 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
810 		resp = (struct copyresp *)bp->b_rptr;
811 		resp->cp_rval = (caddr_t)1;	/* failure */
812 		/*
813 		 * Protect against the driver passing up
814 		 * messages after it has done a qprocsoff.
815 		 */
816 		if (_OTHERQ(q)->q_next == NULL)
817 			freemsg(bp);
818 		else
819 			qreply(q, bp);
820 		break;
821 
822 	case M_IOCTL:
823 		if (bp->b_cont) {
824 			freemsg(bp->b_cont);
825 			bp->b_cont = NULL;
826 		}
827 		bp->b_datap->db_type = M_IOCNAK;
828 		/*
829 		 * Protect against the driver passing up
830 		 * messages after it has done a qprocsoff.
831 		 */
832 		if (_OTHERQ(q)->q_next == NULL)
833 			freemsg(bp);
834 		else
835 			qreply(q, bp);
836 		break;
837 
838 	default:
839 		freemsg(bp);
840 		break;
841 	}
842 
843 	return (0);
844 }
845 
846 /*
847  * Clean up after a process when it closes a stream.  This is called
848  * from closef for all closes, whereas strclose is called only for the
849  * last close on a stream.  The siglist is scanned for entries for the
850  * current process, and these are removed.
851  */
852 void
853 strclean(struct vnode *vp)
854 {
855 	strsig_t *ssp, *pssp, *tssp;
856 	stdata_t *stp;
857 	int update = 0;
858 
859 	TRACE_1(TR_FAC_STREAMS_FR,
860 		TR_STRCLEAN, "strclean:%p", vp);
861 	stp = vp->v_stream;
862 	pssp = NULL;
863 	mutex_enter(&stp->sd_lock);
864 	ssp = stp->sd_siglist;
865 	while (ssp) {
866 		if (ssp->ss_pidp == curproc->p_pidp) {
867 			tssp = ssp->ss_next;
868 			if (pssp)
869 				pssp->ss_next = tssp;
870 			else
871 				stp->sd_siglist = tssp;
872 			mutex_enter(&pidlock);
873 			PID_RELE(ssp->ss_pidp);
874 			mutex_exit(&pidlock);
875 			kmem_free(ssp, sizeof (strsig_t));
876 			update = 1;
877 			ssp = tssp;
878 		} else {
879 			pssp = ssp;
880 			ssp = ssp->ss_next;
881 		}
882 	}
883 	if (update) {
884 		stp->sd_sigflags = 0;
885 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
886 			stp->sd_sigflags |= ssp->ss_events;
887 	}
888 	mutex_exit(&stp->sd_lock);
889 }
890 
891 /*
892  * Used on the last close to remove any remaining items on the siglist.
893  * These could be present on the siglist due to I_ESETSIG calls that
894  * use process groups or processed that do not have an open file descriptor
895  * for this stream (Such entries would not be removed by strclean).
896  */
897 static void
898 strcleanall(struct vnode *vp)
899 {
900 	strsig_t *ssp, *nssp;
901 	stdata_t *stp;
902 
903 	stp = vp->v_stream;
904 	mutex_enter(&stp->sd_lock);
905 	ssp = stp->sd_siglist;
906 	stp->sd_siglist = NULL;
907 	while (ssp) {
908 		nssp = ssp->ss_next;
909 		mutex_enter(&pidlock);
910 		PID_RELE(ssp->ss_pidp);
911 		mutex_exit(&pidlock);
912 		kmem_free(ssp, sizeof (strsig_t));
913 		ssp = nssp;
914 	}
915 	stp->sd_sigflags = 0;
916 	mutex_exit(&stp->sd_lock);
917 }
918 
919 /*
920  * Retrieve the next message from the logical stream head read queue
921  * using either rwnext (if sync stream) or getq_noenab.
922  * It is the callers responsibility to call qbackenable after
923  * it is finished with the message. The caller should not call
924  * qbackenable until after any putback calls to avoid spurious backenabling.
925  */
926 mblk_t *
927 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
928     int *errorp)
929 {
930 	mblk_t *bp;
931 	int error;
932 
933 	ASSERT(MUTEX_HELD(&stp->sd_lock));
934 	/* Holding sd_lock prevents the read queue from changing  */
935 
936 	if (uiop != NULL && stp->sd_struiordq != NULL &&
937 	    q->q_first == NULL &&
938 	    (!first || (stp->sd_wakeq & RSLEEP))) {
939 		/*
940 		 * Stream supports rwnext() for the read side.
941 		 * If this is the first time we're called by e.g. strread
942 		 * only do the downcall if there is a deferred wakeup
943 		 * (registered in sd_wakeq).
944 		 */
945 		struiod_t uiod;
946 
947 		if (first)
948 			stp->sd_wakeq &= ~RSLEEP;
949 
950 		(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
951 			sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
952 		uiod.d_mp = 0;
953 		/*
954 		 * Mark that a thread is in rwnext on the read side
955 		 * to prevent strrput from nacking ioctls immediately.
956 		 * When the last concurrent rwnext returns
957 		 * the ioctls are nack'ed.
958 		 */
959 		ASSERT(MUTEX_HELD(&stp->sd_lock));
960 		stp->sd_struiodnak++;
961 		/*
962 		 * Note: rwnext will drop sd_lock.
963 		 */
964 		error = rwnext(q, &uiod);
965 		ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
966 		mutex_enter(&stp->sd_lock);
967 		stp->sd_struiodnak--;
968 		while (stp->sd_struiodnak == 0 &&
969 		    ((bp = stp->sd_struionak) != NULL)) {
970 			stp->sd_struionak = bp->b_next;
971 			bp->b_next = NULL;
972 			bp->b_datap->db_type = M_IOCNAK;
973 			/*
974 			 * Protect against the driver passing up
975 			 * messages after it has done a qprocsoff.
976 			 */
977 			if (_OTHERQ(q)->q_next == NULL)
978 				freemsg(bp);
979 			else {
980 				mutex_exit(&stp->sd_lock);
981 				qreply(q, bp);
982 				mutex_enter(&stp->sd_lock);
983 			}
984 		}
985 		ASSERT(MUTEX_HELD(&stp->sd_lock));
986 		if (error == 0 || error == EWOULDBLOCK) {
987 			if ((bp = uiod.d_mp) != NULL) {
988 				*errorp = 0;
989 				ASSERT(MUTEX_HELD(&stp->sd_lock));
990 				return (bp);
991 			}
992 			error = 0;
993 		} else if (error == EINVAL) {
994 			/*
995 			 * The stream plumbing must have
996 			 * changed while we were away, so
997 			 * just turn off rwnext()s.
998 			 */
999 			error = 0;
1000 		} else if (error == EBUSY) {
1001 			/*
1002 			 * The module might have data in transit using putnext
1003 			 * Fall back on waiting + getq.
1004 			 */
1005 			error = 0;
1006 		} else {
1007 			*errorp = error;
1008 			ASSERT(MUTEX_HELD(&stp->sd_lock));
1009 			return (NULL);
1010 		}
1011 		/*
1012 		 * Try a getq in case a rwnext() generated mblk
1013 		 * has bubbled up via strrput().
1014 		 */
1015 	}
1016 	*errorp = 0;
1017 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1018 	return (getq_noenab(q));
1019 }
1020 
1021 /*
1022  * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'.
1023  * If the message does not fit in the uio the remainder of it is returned;
1024  * otherwise NULL is returned.  Any embedded zero-length mblk_t's are
1025  * consumed, even if uio_resid reaches zero.  On error, `*errorp' is set to
1026  * the error code, the message is consumed, and NULL is returned.
1027  */
1028 static mblk_t *
1029 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp)
1030 {
1031 	int error;
1032 	ptrdiff_t n;
1033 	mblk_t *nbp;
1034 
1035 	ASSERT(bp->b_wptr >= bp->b_rptr);
1036 
1037 	do {
1038 		if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) {
1039 			ASSERT(n > 0);
1040 
1041 			error = uiomove(bp->b_rptr, n, UIO_READ, uiop);
1042 			if (error != 0) {
1043 				freemsg(bp);
1044 				*errorp = error;
1045 				return (NULL);
1046 			}
1047 		}
1048 
1049 		bp->b_rptr += n;
1050 		while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) {
1051 			nbp = bp;
1052 			bp = bp->b_cont;
1053 			freeb(nbp);
1054 		}
1055 	} while (bp != NULL && uiop->uio_resid > 0);
1056 
1057 	*errorp = 0;
1058 	return (bp);
1059 }
1060 
1061 /*
1062  * Read a stream according to the mode flags in sd_flag:
1063  *
1064  * (default mode)		- Byte stream, msg boundaries are ignored
1065  * RD_MSGDIS (msg discard)	- Read on msg boundaries and throw away
1066  *				any data remaining in msg
1067  * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back
1068  *				any remaining data on head of read queue
1069  *
1070  * Consume readable messages on the front of the queue until
1071  * ttolwp(curthread)->lwp_count
1072  * is satisfied, the readable messages are exhausted, or a message
1073  * boundary is reached in a message mode.  If no data was read and
1074  * the stream was not opened with the NDELAY flag, block until data arrives.
1075  * Otherwise return the data read and update the count.
1076  *
1077  * In default mode a 0 length message signifies end-of-file and terminates
1078  * a read in progress.  The 0 length message is removed from the queue
1079  * only if it is the only message read (no data is read).
1080  *
1081  * An attempt to read an M_PROTO or M_PCPROTO message results in an
1082  * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set.
1083  * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data.
1084  * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message
1085  * are unlinked from and M_DATA blocks in the message, the protos are
1086  * thrown away, and the data is read.
1087  */
1088 /* ARGSUSED */
1089 int
1090 strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
1091 {
1092 	struct stdata *stp;
1093 	mblk_t *bp, *nbp;
1094 	queue_t *q;
1095 	int error = 0;
1096 	uint_t old_sd_flag;
1097 	int first;
1098 	char rflg;
1099 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
1100 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
1101 	short delim;
1102 	unsigned char pri = 0;
1103 	char waitflag;
1104 	unsigned char type;
1105 
1106 	TRACE_1(TR_FAC_STREAMS_FR,
1107 		TR_STRREAD_ENTER, "strread:%p", vp);
1108 	ASSERT(vp->v_stream);
1109 	stp = vp->v_stream;
1110 
1111 	mutex_enter(&stp->sd_lock);
1112 
1113 	if ((error = i_straccess(stp, JCREAD)) != 0) {
1114 		mutex_exit(&stp->sd_lock);
1115 		return (error);
1116 	}
1117 
1118 	if (stp->sd_flag & (STRDERR|STPLEX)) {
1119 		error = strgeterr(stp, STRDERR|STPLEX, 0);
1120 		if (error != 0) {
1121 			mutex_exit(&stp->sd_lock);
1122 			return (error);
1123 		}
1124 	}
1125 
1126 	/*
1127 	 * Loop terminates when uiop->uio_resid == 0.
1128 	 */
1129 	rflg = 0;
1130 	waitflag = READWAIT;
1131 	q = _RD(stp->sd_wrq);
1132 	for (;;) {
1133 		ASSERT(MUTEX_HELD(&stp->sd_lock));
1134 		old_sd_flag = stp->sd_flag;
1135 		mark = 0;
1136 		delim = 0;
1137 		first = 1;
1138 		while ((bp = strget(stp, q, uiop, first, &error)) == NULL) {
1139 			int done = 0;
1140 
1141 			ASSERT(MUTEX_HELD(&stp->sd_lock));
1142 
1143 			if (error != 0)
1144 				goto oops;
1145 
1146 			if (stp->sd_flag & (STRHUP|STREOF)) {
1147 				goto oops;
1148 			}
1149 			if (rflg && !(stp->sd_flag & STRDELIM)) {
1150 				goto oops;
1151 			}
1152 			/*
1153 			 * If a read(fd,buf,0) has been done, there is no
1154 			 * need to sleep. We always have zero bytes to
1155 			 * return.
1156 			 */
1157 			if (uiop->uio_resid == 0) {
1158 				goto oops;
1159 			}
1160 
1161 			qbackenable(q, 0);
1162 
1163 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT,
1164 				"strread calls strwaitq:%p, %p, %p",
1165 				vp, uiop, crp);
1166 			if ((error = strwaitq(stp, waitflag, uiop->uio_resid,
1167 			    uiop->uio_fmode, -1, &done)) != 0 || done) {
1168 				TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE,
1169 					"strread error or done:%p, %p, %p",
1170 					vp, uiop, crp);
1171 				if ((uiop->uio_fmode & FNDELAY) &&
1172 				    (stp->sd_flag & OLDNDELAY) &&
1173 				    (error == EAGAIN))
1174 					error = 0;
1175 				goto oops;
1176 			}
1177 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE,
1178 				"strread awakes:%p, %p, %p", vp, uiop, crp);
1179 			if ((error = i_straccess(stp, JCREAD)) != 0) {
1180 				goto oops;
1181 			}
1182 			first = 0;
1183 		}
1184 		ASSERT(MUTEX_HELD(&stp->sd_lock));
1185 		ASSERT(bp);
1186 		pri = bp->b_band;
1187 		/*
1188 		 * Extract any mark information. If the message is not
1189 		 * completely consumed this information will be put in the mblk
1190 		 * that is putback.
1191 		 * If MSGMARKNEXT is set and the message is completely consumed
1192 		 * the STRATMARK flag will be set below. Likewise, if
1193 		 * MSGNOTMARKNEXT is set and the message is
1194 		 * completely consumed STRNOTATMARK will be set.
1195 		 *
1196 		 * For some unknown reason strread only breaks the read at the
1197 		 * last mark.
1198 		 */
1199 		mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
1200 		ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
1201 			(MSGMARKNEXT|MSGNOTMARKNEXT));
1202 		if (mark != 0 && bp == stp->sd_mark) {
1203 			if (rflg) {
1204 				putback(stp, q, bp, pri);
1205 				goto oops;
1206 			}
1207 			mark |= _LASTMARK;
1208 			stp->sd_mark = NULL;
1209 		}
1210 		if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM))
1211 			delim = 1;
1212 		mutex_exit(&stp->sd_lock);
1213 
1214 		if (STREAM_NEEDSERVICE(stp))
1215 			stream_runservice(stp);
1216 
1217 		type = bp->b_datap->db_type;
1218 
1219 		switch (type) {
1220 
1221 		case M_DATA:
1222 ismdata:
1223 			if (msgnodata(bp)) {
1224 				if (mark || delim) {
1225 					freemsg(bp);
1226 				} else if (rflg) {
1227 
1228 					/*
1229 					 * If already read data put zero
1230 					 * length message back on queue else
1231 					 * free msg and return 0.
1232 					 */
1233 					bp->b_band = pri;
1234 					mutex_enter(&stp->sd_lock);
1235 					putback(stp, q, bp, pri);
1236 					mutex_exit(&stp->sd_lock);
1237 				} else {
1238 					freemsg(bp);
1239 				}
1240 				error =  0;
1241 				goto oops1;
1242 			}
1243 
1244 			rflg = 1;
1245 			waitflag |= NOINTR;
1246 			bp = struiocopyout(bp, uiop, &error);
1247 			if (error != 0)
1248 				goto oops1;
1249 
1250 			mutex_enter(&stp->sd_lock);
1251 			if (bp) {
1252 				/*
1253 				 * Have remaining data in message.
1254 				 * Free msg if in discard mode.
1255 				 */
1256 				if (stp->sd_read_opt & RD_MSGDIS) {
1257 					freemsg(bp);
1258 				} else {
1259 					bp->b_band = pri;
1260 					if ((mark & _LASTMARK) &&
1261 					    (stp->sd_mark == NULL))
1262 						stp->sd_mark = bp;
1263 					bp->b_flag |= mark & ~_LASTMARK;
1264 					if (delim)
1265 						bp->b_flag |= MSGDELIM;
1266 					if (msgnodata(bp))
1267 						freemsg(bp);
1268 					else
1269 						putback(stp, q, bp, pri);
1270 				}
1271 			} else {
1272 				/*
1273 				 * Consumed the complete message.
1274 				 * Move the MSG*MARKNEXT information
1275 				 * to the stream head just in case
1276 				 * the read queue becomes empty.
1277 				 *
1278 				 * If the stream head was at the mark
1279 				 * (STRATMARK) before we dropped sd_lock above
1280 				 * and some data was consumed then we have
1281 				 * moved past the mark thus STRATMARK is
1282 				 * cleared. However, if a message arrived in
1283 				 * strrput during the copyout above causing
1284 				 * STRATMARK to be set we can not clear that
1285 				 * flag.
1286 				 */
1287 				if (mark &
1288 				    (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
1289 					if (mark & MSGMARKNEXT) {
1290 						stp->sd_flag &= ~STRNOTATMARK;
1291 						stp->sd_flag |= STRATMARK;
1292 					} else if (mark & MSGNOTMARKNEXT) {
1293 						stp->sd_flag &= ~STRATMARK;
1294 						stp->sd_flag |= STRNOTATMARK;
1295 					} else {
1296 						stp->sd_flag &=
1297 						    ~(STRATMARK|STRNOTATMARK);
1298 					}
1299 				} else if (rflg && (old_sd_flag & STRATMARK)) {
1300 					stp->sd_flag &= ~STRATMARK;
1301 				}
1302 			}
1303 
1304 			/*
1305 			 * Check for signal messages at the front of the read
1306 			 * queue and generate the signal(s) if appropriate.
1307 			 * The only signal that can be on queue is M_SIG at
1308 			 * this point.
1309 			 */
1310 			while ((((bp = q->q_first)) != NULL) &&
1311 				(bp->b_datap->db_type == M_SIG)) {
1312 				bp = getq_noenab(q);
1313 				/*
1314 				 * sd_lock is held so the content of the
1315 				 * read queue can not change.
1316 				 */
1317 				ASSERT(bp != NULL &&
1318 					bp->b_datap->db_type == M_SIG);
1319 				strsignal_nolock(stp, *bp->b_rptr,
1320 					(int32_t)bp->b_band);
1321 				mutex_exit(&stp->sd_lock);
1322 				freemsg(bp);
1323 				if (STREAM_NEEDSERVICE(stp))
1324 					stream_runservice(stp);
1325 				mutex_enter(&stp->sd_lock);
1326 			}
1327 
1328 			if ((uiop->uio_resid == 0) || (mark & _LASTMARK) ||
1329 			    delim ||
1330 			    (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) {
1331 				goto oops;
1332 			}
1333 			continue;
1334 
1335 		case M_SIG:
1336 			strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band);
1337 			freemsg(bp);
1338 			mutex_enter(&stp->sd_lock);
1339 			continue;
1340 
1341 		case M_PROTO:
1342 		case M_PCPROTO:
1343 			/*
1344 			 * Only data messages are readable.
1345 			 * Any others generate an error, unless
1346 			 * RD_PROTDIS or RD_PROTDAT is set.
1347 			 */
1348 			if (stp->sd_read_opt & RD_PROTDAT) {
1349 				for (nbp = bp; nbp; nbp = nbp->b_next) {
1350 				    if ((nbp->b_datap->db_type == M_PROTO) ||
1351 					(nbp->b_datap->db_type == M_PCPROTO))
1352 					nbp->b_datap->db_type = M_DATA;
1353 				    else
1354 					break;
1355 				}
1356 				/*
1357 				 * clear stream head hi pri flag based on
1358 				 * first message
1359 				 */
1360 				if (type == M_PCPROTO) {
1361 					mutex_enter(&stp->sd_lock);
1362 					stp->sd_flag &= ~STRPRI;
1363 					mutex_exit(&stp->sd_lock);
1364 				}
1365 				goto ismdata;
1366 			} else if (stp->sd_read_opt & RD_PROTDIS) {
1367 				/*
1368 				 * discard non-data messages
1369 				 */
1370 				while (bp &&
1371 				    ((bp->b_datap->db_type == M_PROTO) ||
1372 				    (bp->b_datap->db_type == M_PCPROTO))) {
1373 					nbp = unlinkb(bp);
1374 					freeb(bp);
1375 					bp = nbp;
1376 				}
1377 				/*
1378 				 * clear stream head hi pri flag based on
1379 				 * first message
1380 				 */
1381 				if (type == M_PCPROTO) {
1382 					mutex_enter(&stp->sd_lock);
1383 					stp->sd_flag &= ~STRPRI;
1384 					mutex_exit(&stp->sd_lock);
1385 				}
1386 				if (bp) {
1387 					bp->b_band = pri;
1388 					goto ismdata;
1389 				} else {
1390 					break;
1391 				}
1392 			}
1393 			/* FALLTHRU */
1394 		case M_PASSFP:
1395 			if ((bp->b_datap->db_type == M_PASSFP) &&
1396 			    (stp->sd_read_opt & RD_PROTDIS)) {
1397 				freemsg(bp);
1398 				break;
1399 			}
1400 			mutex_enter(&stp->sd_lock);
1401 			putback(stp, q, bp, pri);
1402 			mutex_exit(&stp->sd_lock);
1403 			if (rflg == 0)
1404 				error = EBADMSG;
1405 			goto oops1;
1406 
1407 		default:
1408 			/*
1409 			 * Garbage on stream head read queue.
1410 			 */
1411 			cmn_err(CE_WARN, "bad %x found at stream head\n",
1412 				bp->b_datap->db_type);
1413 			freemsg(bp);
1414 			goto oops1;
1415 		}
1416 		mutex_enter(&stp->sd_lock);
1417 	}
1418 oops:
1419 	mutex_exit(&stp->sd_lock);
1420 oops1:
1421 	qbackenable(q, pri);
1422 	return (error);
1423 #undef	_LASTMARK
1424 }
1425 
1426 /*
1427  * Default processing of M_PROTO/M_PCPROTO messages.
1428  * Determine which wakeups and signals are needed.
1429  * This can be replaced by a user-specified procedure for kernel users
1430  * of STREAMS.
1431  */
1432 /* ARGSUSED */
1433 mblk_t *
1434 strrput_proto(vnode_t *vp, mblk_t *mp,
1435     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1436     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1437 {
1438 	*wakeups = RSLEEP;
1439 	*allmsgsigs = 0;
1440 
1441 	switch (mp->b_datap->db_type) {
1442 	case M_PROTO:
1443 		if (mp->b_band == 0) {
1444 			*firstmsgsigs = S_INPUT | S_RDNORM;
1445 			*pollwakeups = POLLIN | POLLRDNORM;
1446 		} else {
1447 			*firstmsgsigs = S_INPUT | S_RDBAND;
1448 			*pollwakeups = POLLIN | POLLRDBAND;
1449 		}
1450 		break;
1451 	case M_PCPROTO:
1452 		*firstmsgsigs = S_HIPRI;
1453 		*pollwakeups = POLLPRI;
1454 		break;
1455 	}
1456 	return (mp);
1457 }
1458 
1459 /*
1460  * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and
1461  * M_PASSFP messages.
1462  * Determine which wakeups and signals are needed.
1463  * This can be replaced by a user-specified procedure for kernel users
1464  * of STREAMS.
1465  */
1466 /* ARGSUSED */
1467 mblk_t *
1468 strrput_misc(vnode_t *vp, mblk_t *mp,
1469     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1470     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1471 {
1472 	*wakeups = 0;
1473 	*firstmsgsigs = 0;
1474 	*allmsgsigs = 0;
1475 	*pollwakeups = 0;
1476 	return (mp);
1477 }
1478 
1479 /*
1480  * Stream read put procedure.  Called from downstream driver/module
1481  * with messages for the stream head.  Data, protocol, and in-stream
1482  * signal messages are placed on the queue, others are handled directly.
1483  */
1484 int
1485 strrput(queue_t *q, mblk_t *bp)
1486 {
1487 	struct stdata	*stp;
1488 	ulong_t		rput_opt;
1489 	strwakeup_t	wakeups;
1490 	strsigset_t	firstmsgsigs;	/* Signals if first message on queue */
1491 	strsigset_t	allmsgsigs;	/* Signals for all messages */
1492 	strsigset_t	signals;	/* Signals events to generate */
1493 	strpollset_t	pollwakeups;
1494 	mblk_t		*nextbp;
1495 	uchar_t		band = 0;
1496 	int		hipri_sig;
1497 
1498 	stp = (struct stdata *)q->q_ptr;
1499 	/*
1500 	 * Use rput_opt for optimized access to the SR_ flags except
1501 	 * SR_POLLIN. That flag has to be checked under sd_lock since it
1502 	 * is modified by strpoll().
1503 	 */
1504 	rput_opt = stp->sd_rput_opt;
1505 
1506 	ASSERT(qclaimed(q));
1507 	TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER,
1508 		"strrput called with message type:q %p bp %p", q, bp);
1509 
1510 	/*
1511 	 * Perform initial processing and pass to the parameterized functions.
1512 	 */
1513 	ASSERT(bp->b_next == NULL);
1514 
1515 	switch (bp->b_datap->db_type) {
1516 	case M_DATA:
1517 		/*
1518 		 * sockfs is the only consumer of STREOF and when it is set,
1519 		 * it implies that the receiver is not interested in receiving
1520 		 * any more data, hence the mblk is freed to prevent unnecessary
1521 		 * message queueing at the stream head.
1522 		 */
1523 		if (stp->sd_flag == STREOF) {
1524 			freemsg(bp);
1525 			return (0);
1526 		}
1527 		if ((rput_opt & SR_IGN_ZEROLEN) &&
1528 		    bp->b_rptr == bp->b_wptr && msgnodata(bp)) {
1529 			/*
1530 			 * Ignore zero-length M_DATA messages. These might be
1531 			 * generated by some transports.
1532 			 * The zero-length M_DATA messages, even if they
1533 			 * are ignored, should effect the atmark tracking and
1534 			 * should wake up a thread sleeping in strwaitmark.
1535 			 */
1536 			mutex_enter(&stp->sd_lock);
1537 			if (bp->b_flag & MSGMARKNEXT) {
1538 				/*
1539 				 * Record the position of the mark either
1540 				 * in q_last or in STRATMARK.
1541 				 */
1542 				if (q->q_last != NULL) {
1543 					q->q_last->b_flag &= ~MSGNOTMARKNEXT;
1544 					q->q_last->b_flag |= MSGMARKNEXT;
1545 				} else {
1546 					stp->sd_flag &= ~STRNOTATMARK;
1547 					stp->sd_flag |= STRATMARK;
1548 				}
1549 			} else if (bp->b_flag & MSGNOTMARKNEXT) {
1550 				/*
1551 				 * Record that this is not the position of
1552 				 * the mark either in q_last or in
1553 				 * STRNOTATMARK.
1554 				 */
1555 				if (q->q_last != NULL) {
1556 					q->q_last->b_flag &= ~MSGMARKNEXT;
1557 					q->q_last->b_flag |= MSGNOTMARKNEXT;
1558 				} else {
1559 					stp->sd_flag &= ~STRATMARK;
1560 					stp->sd_flag |= STRNOTATMARK;
1561 				}
1562 			}
1563 			if (stp->sd_flag & RSLEEP) {
1564 				stp->sd_flag &= ~RSLEEP;
1565 				cv_broadcast(&q->q_wait);
1566 			}
1567 			mutex_exit(&stp->sd_lock);
1568 			freemsg(bp);
1569 			return (0);
1570 		}
1571 		wakeups = RSLEEP;
1572 		if (bp->b_band == 0) {
1573 			firstmsgsigs = S_INPUT | S_RDNORM;
1574 			pollwakeups = POLLIN | POLLRDNORM;
1575 		} else {
1576 			firstmsgsigs = S_INPUT | S_RDBAND;
1577 			pollwakeups = POLLIN | POLLRDBAND;
1578 		}
1579 		if (rput_opt & SR_SIGALLDATA)
1580 			allmsgsigs = firstmsgsigs;
1581 		else
1582 			allmsgsigs = 0;
1583 
1584 		mutex_enter(&stp->sd_lock);
1585 		if ((rput_opt & SR_CONSOL_DATA) &&
1586 		    (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) {
1587 			/*
1588 			 * Consolidate on M_DATA message onto an M_DATA,
1589 			 * M_PROTO, or M_PCPROTO by merging it with q_last.
1590 			 * The consolidation does not take place if
1591 			 * the old message is marked with either of the
1592 			 * marks or the delim flag or if the new
1593 			 * message is marked with MSGMARK. The MSGMARK
1594 			 * check is needed to handle the odd semantics of
1595 			 * MSGMARK where essentially the whole message
1596 			 * is to be treated as marked.
1597 			 * Carry any MSGMARKNEXT  and MSGNOTMARKNEXT from the
1598 			 * new message to the front of the b_cont chain.
1599 			 */
1600 			mblk_t *lbp;
1601 
1602 			lbp = q->q_last;
1603 			if (lbp != NULL &&
1604 			    (lbp->b_datap->db_type == M_DATA ||
1605 			    lbp->b_datap->db_type == M_PROTO ||
1606 			    lbp->b_datap->db_type == M_PCPROTO) &&
1607 			    !(lbp->b_flag & (MSGDELIM|MSGMARK|
1608 			    MSGMARKNEXT))) {
1609 				rmvq_noenab(q, lbp);
1610 				/*
1611 				 * The first message in the b_cont list
1612 				 * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
1613 				 * We need to handle the case where we
1614 				 * are appending
1615 				 *
1616 				 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
1617 				 * 2) a MSGMARKNEXT to a plain message.
1618 				 * 3) a MSGNOTMARKNEXT to a plain message
1619 				 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
1620 				 *    message.
1621 				 *
1622 				 * Thus we never append a MSGMARKNEXT or
1623 				 * MSGNOTMARKNEXT to a MSGMARKNEXT message.
1624 				 */
1625 				if (bp->b_flag & MSGMARKNEXT) {
1626 					lbp->b_flag |= MSGMARKNEXT;
1627 					lbp->b_flag &= ~MSGNOTMARKNEXT;
1628 					bp->b_flag &= ~MSGMARKNEXT;
1629 				} else if (bp->b_flag & MSGNOTMARKNEXT) {
1630 					lbp->b_flag |= MSGNOTMARKNEXT;
1631 					bp->b_flag &= ~MSGNOTMARKNEXT;
1632 				}
1633 
1634 				linkb(lbp, bp);
1635 				bp = lbp;
1636 				/*
1637 				 * The new message logically isn't the first
1638 				 * even though the q_first check below thinks
1639 				 * it is. Clear the firstmsgsigs to make it
1640 				 * not appear to be first.
1641 				 */
1642 				firstmsgsigs = 0;
1643 			}
1644 		}
1645 		break;
1646 
1647 	case M_PASSFP:
1648 		wakeups = RSLEEP;
1649 		allmsgsigs = 0;
1650 		if (bp->b_band == 0) {
1651 			firstmsgsigs = S_INPUT | S_RDNORM;
1652 			pollwakeups = POLLIN | POLLRDNORM;
1653 		} else {
1654 			firstmsgsigs = S_INPUT | S_RDBAND;
1655 			pollwakeups = POLLIN | POLLRDBAND;
1656 		}
1657 		mutex_enter(&stp->sd_lock);
1658 		break;
1659 
1660 	case M_PROTO:
1661 	case M_PCPROTO:
1662 		ASSERT(stp->sd_rprotofunc != NULL);
1663 		bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp,
1664 			&wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1665 #define	ALLSIG	(S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\
1666 		S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)
1667 #define	ALLPOLL	(POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\
1668 		POLLWRBAND)
1669 
1670 		ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1671 		ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1672 		ASSERT((allmsgsigs & ~ALLSIG) == 0);
1673 		ASSERT((pollwakeups & ~ALLPOLL) == 0);
1674 
1675 		mutex_enter(&stp->sd_lock);
1676 		break;
1677 
1678 	default:
1679 		ASSERT(stp->sd_rmiscfunc != NULL);
1680 		bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp,
1681 			&wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1682 		ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1683 		ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1684 		ASSERT((allmsgsigs & ~ALLSIG) == 0);
1685 		ASSERT((pollwakeups & ~ALLPOLL) == 0);
1686 #undef	ALLSIG
1687 #undef	ALLPOLL
1688 		mutex_enter(&stp->sd_lock);
1689 		break;
1690 	}
1691 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1692 
1693 	/* By default generate superset of signals */
1694 	signals = (firstmsgsigs | allmsgsigs);
1695 
1696 	/*
1697 	 * The  proto and misc functions can return multiple messages
1698 	 * as a b_next chain. Such messages are processed separately.
1699 	 */
1700 one_more:
1701 	hipri_sig = 0;
1702 	if (bp == NULL) {
1703 		nextbp = NULL;
1704 	} else {
1705 		nextbp = bp->b_next;
1706 		bp->b_next = NULL;
1707 
1708 		switch (bp->b_datap->db_type) {
1709 		case M_PCPROTO:
1710 			/*
1711 			 * Only one priority protocol message is allowed at the
1712 			 * stream head at a time.
1713 			 */
1714 			if (stp->sd_flag & STRPRI) {
1715 				TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR,
1716 				    "M_PCPROTO already at head");
1717 				freemsg(bp);
1718 				mutex_exit(&stp->sd_lock);
1719 				goto done;
1720 			}
1721 			stp->sd_flag |= STRPRI;
1722 			hipri_sig = 1;
1723 			/* FALLTHRU */
1724 		case M_DATA:
1725 		case M_PROTO:
1726 		case M_PASSFP:
1727 			band = bp->b_band;
1728 			/*
1729 			 * Marking doesn't work well when messages
1730 			 * are marked in more than one band.  We only
1731 			 * remember the last message received, even if
1732 			 * it is placed on the queue ahead of other
1733 			 * marked messages.
1734 			 */
1735 			if (bp->b_flag & MSGMARK)
1736 				stp->sd_mark = bp;
1737 			(void) putq(q, bp);
1738 
1739 			/*
1740 			 * If message is a PCPROTO message, always use
1741 			 * firstmsgsigs to determine if a signal should be
1742 			 * sent as strrput is the only place to send
1743 			 * signals for PCPROTO. Other messages are based on
1744 			 * the STRGETINPROG flag. The flag determines if
1745 			 * strrput or (k)strgetmsg will be responsible for
1746 			 * sending the signals, in the firstmsgsigs case.
1747 			 */
1748 			if ((hipri_sig == 1) ||
1749 			    (((stp->sd_flag & STRGETINPROG) == 0) &&
1750 			    (q->q_first == bp)))
1751 				signals = (firstmsgsigs | allmsgsigs);
1752 			else
1753 				signals = allmsgsigs;
1754 			break;
1755 
1756 		default:
1757 			mutex_exit(&stp->sd_lock);
1758 			(void) strrput_nondata(q, bp);
1759 			mutex_enter(&stp->sd_lock);
1760 			break;
1761 		}
1762 	}
1763 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1764 	/*
1765 	 * Wake sleeping read/getmsg and cancel deferred wakeup
1766 	 */
1767 	if (wakeups & RSLEEP)
1768 		stp->sd_wakeq &= ~RSLEEP;
1769 
1770 	wakeups &= stp->sd_flag;
1771 	if (wakeups & RSLEEP) {
1772 		stp->sd_flag &= ~RSLEEP;
1773 		cv_broadcast(&q->q_wait);
1774 	}
1775 	if (wakeups & WSLEEP) {
1776 		stp->sd_flag &= ~WSLEEP;
1777 		cv_broadcast(&_WR(q)->q_wait);
1778 	}
1779 
1780 	if (pollwakeups != 0) {
1781 		if (pollwakeups == (POLLIN | POLLRDNORM)) {
1782 			/*
1783 			 * Can't use rput_opt since it was not
1784 			 * read when sd_lock was held and SR_POLLIN is changed
1785 			 * by strpoll() under sd_lock.
1786 			 */
1787 			if (!(stp->sd_rput_opt & SR_POLLIN))
1788 				goto no_pollwake;
1789 			stp->sd_rput_opt &= ~SR_POLLIN;
1790 		}
1791 		mutex_exit(&stp->sd_lock);
1792 		pollwakeup(&stp->sd_pollist, pollwakeups);
1793 		mutex_enter(&stp->sd_lock);
1794 	}
1795 no_pollwake:
1796 
1797 	/*
1798 	 * strsendsig can handle multiple signals with a
1799 	 * single call.
1800 	 */
1801 	if (stp->sd_sigflags & signals)
1802 		strsendsig(stp->sd_siglist, signals, band, 0);
1803 	mutex_exit(&stp->sd_lock);
1804 
1805 
1806 done:
1807 	if (nextbp == NULL)
1808 		return (0);
1809 
1810 	/*
1811 	 * Any signals were handled the first time.
1812 	 * Wakeups and pollwakeups are redone to avoid any race
1813 	 * conditions - all the messages are not queued until the
1814 	 * last message has been processed by strrput.
1815 	 */
1816 	bp = nextbp;
1817 	signals = firstmsgsigs = allmsgsigs = 0;
1818 	mutex_enter(&stp->sd_lock);
1819 	goto one_more;
1820 }
1821 
1822 static void
1823 log_dupioc(queue_t *rq, mblk_t *bp)
1824 {
1825 	queue_t *wq, *qp;
1826 	char *modnames, *mnp, *dname;
1827 	size_t maxmodstr;
1828 	boolean_t islast;
1829 
1830 	/*
1831 	 * Allocate a buffer large enough to hold the names of nstrpush modules
1832 	 * and one driver, with spaces between and NUL terminator.  If we can't
1833 	 * get memory, then we'll just log the driver name.
1834 	 */
1835 	maxmodstr = nstrpush * (FMNAMESZ + 1);
1836 	mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP);
1837 
1838 	/* march down write side to print log message down to the driver */
1839 	wq = WR(rq);
1840 
1841 	/* make sure q_next doesn't shift around while we're grabbing data */
1842 	claimstr(wq);
1843 	qp = wq->q_next;
1844 	do {
1845 		if ((dname = qp->q_qinfo->qi_minfo->mi_idname) == NULL)
1846 			dname = "?";
1847 		islast = !SAMESTR(qp) || qp->q_next == NULL;
1848 		if (modnames == NULL) {
1849 			/*
1850 			 * If we don't have memory, then get the driver name in
1851 			 * the log where we can see it.  Note that memory
1852 			 * pressure is a possible cause of these sorts of bugs.
1853 			 */
1854 			if (islast) {
1855 				modnames = dname;
1856 				maxmodstr = 0;
1857 			}
1858 		} else {
1859 			mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname);
1860 			if (!islast)
1861 				*mnp++ = ' ';
1862 		}
1863 		qp = qp->q_next;
1864 	} while (!islast);
1865 	releasestr(wq);
1866 	/* Cannot happen unless stream head is corrupt. */
1867 	ASSERT(modnames != NULL);
1868 	(void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1,
1869 	    SL_CONSOLE|SL_TRACE|SL_ERROR,
1870 	    "Warning: stream %p received duplicate %X M_IOC%s; module list: %s",
1871 	    rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd,
1872 	    (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames);
1873 	if (maxmodstr != 0)
1874 		kmem_free(modnames, maxmodstr);
1875 }
1876 
1877 int
1878 strrput_nondata(queue_t *q, mblk_t *bp)
1879 {
1880 	struct stdata *stp;
1881 	struct iocblk *iocbp;
1882 	struct stroptions *sop;
1883 	struct copyreq *reqp;
1884 	struct copyresp *resp;
1885 	unsigned char bpri;
1886 	unsigned char  flushed_already = 0;
1887 
1888 	stp = (struct stdata *)q->q_ptr;
1889 
1890 	ASSERT(!(stp->sd_flag & STPLEX));
1891 	ASSERT(qclaimed(q));
1892 
1893 	switch (bp->b_datap->db_type) {
1894 	case M_ERROR:
1895 		/*
1896 		 * An error has occurred downstream, the errno is in the first
1897 		 * bytes of the message.
1898 		 */
1899 		if ((bp->b_wptr - bp->b_rptr) == 2) {	/* New flavor */
1900 			unsigned char rw = 0;
1901 
1902 			mutex_enter(&stp->sd_lock);
1903 			if (*bp->b_rptr != NOERROR) {	/* read error */
1904 				if (*bp->b_rptr != 0) {
1905 					if (stp->sd_flag & STRDERR)
1906 						flushed_already |= FLUSHR;
1907 					stp->sd_flag |= STRDERR;
1908 					rw |= FLUSHR;
1909 				} else {
1910 					stp->sd_flag &= ~STRDERR;
1911 				}
1912 				stp->sd_rerror = *bp->b_rptr;
1913 			}
1914 			bp->b_rptr++;
1915 			if (*bp->b_rptr != NOERROR) {	/* write error */
1916 				if (*bp->b_rptr != 0) {
1917 					if (stp->sd_flag & STWRERR)
1918 						flushed_already |= FLUSHW;
1919 					stp->sd_flag |= STWRERR;
1920 					rw |= FLUSHW;
1921 				} else {
1922 					stp->sd_flag &= ~STWRERR;
1923 				}
1924 				stp->sd_werror = *bp->b_rptr;
1925 			}
1926 			if (rw) {
1927 				TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE,
1928 					"strrput cv_broadcast:q %p, bp %p",
1929 					q, bp);
1930 				cv_broadcast(&q->q_wait); /* readers */
1931 				cv_broadcast(&_WR(q)->q_wait); /* writers */
1932 				cv_broadcast(&stp->sd_monitor); /* ioctllers */
1933 
1934 				mutex_exit(&stp->sd_lock);
1935 				pollwakeup(&stp->sd_pollist, POLLERR);
1936 				mutex_enter(&stp->sd_lock);
1937 
1938 				if (stp->sd_sigflags & S_ERROR)
1939 					strsendsig(stp->sd_siglist, S_ERROR, 0,
1940 					    ((rw & FLUSHR) ? stp->sd_rerror :
1941 					    stp->sd_werror));
1942 				mutex_exit(&stp->sd_lock);
1943 				/*
1944 				 * Send the M_FLUSH only
1945 				 * for the first M_ERROR
1946 				 * message on the stream
1947 				 */
1948 				if (flushed_already == rw) {
1949 					freemsg(bp);
1950 					return (0);
1951 				}
1952 
1953 				bp->b_datap->db_type = M_FLUSH;
1954 				*bp->b_rptr = rw;
1955 				bp->b_wptr = bp->b_rptr + 1;
1956 				/*
1957 				 * Protect against the driver
1958 				 * passing up messages after
1959 				 * it has done a qprocsoff
1960 				 */
1961 				if (_OTHERQ(q)->q_next == NULL)
1962 					freemsg(bp);
1963 				else
1964 					qreply(q, bp);
1965 				return (0);
1966 			} else
1967 				mutex_exit(&stp->sd_lock);
1968 		} else if (*bp->b_rptr != 0) {		/* Old flavor */
1969 				if (stp->sd_flag & (STRDERR|STWRERR))
1970 					flushed_already = FLUSHRW;
1971 				mutex_enter(&stp->sd_lock);
1972 				stp->sd_flag |= (STRDERR|STWRERR);
1973 				stp->sd_rerror = *bp->b_rptr;
1974 				stp->sd_werror = *bp->b_rptr;
1975 				TRACE_2(TR_FAC_STREAMS_FR,
1976 					TR_STRRPUT_WAKE2,
1977 					"strrput wakeup #2:q %p, bp %p", q, bp);
1978 				cv_broadcast(&q->q_wait); /* the readers */
1979 				cv_broadcast(&_WR(q)->q_wait); /* the writers */
1980 				cv_broadcast(&stp->sd_monitor); /* ioctllers */
1981 
1982 				mutex_exit(&stp->sd_lock);
1983 				pollwakeup(&stp->sd_pollist, POLLERR);
1984 				mutex_enter(&stp->sd_lock);
1985 
1986 				if (stp->sd_sigflags & S_ERROR)
1987 					strsendsig(stp->sd_siglist, S_ERROR, 0,
1988 					    (stp->sd_werror ? stp->sd_werror :
1989 					    stp->sd_rerror));
1990 				mutex_exit(&stp->sd_lock);
1991 
1992 				/*
1993 				 * Send the M_FLUSH only
1994 				 * for the first M_ERROR
1995 				 * message on the stream
1996 				 */
1997 				if (flushed_already != FLUSHRW) {
1998 					bp->b_datap->db_type = M_FLUSH;
1999 					*bp->b_rptr = FLUSHRW;
2000 					/*
2001 					 * Protect against the driver passing up
2002 					 * messages after it has done a
2003 					 * qprocsoff.
2004 					 */
2005 				if (_OTHERQ(q)->q_next == NULL)
2006 					freemsg(bp);
2007 				else
2008 					qreply(q, bp);
2009 				return (0);
2010 				}
2011 		}
2012 		freemsg(bp);
2013 		return (0);
2014 
2015 	case M_HANGUP:
2016 
2017 		freemsg(bp);
2018 		mutex_enter(&stp->sd_lock);
2019 		stp->sd_werror = ENXIO;
2020 		stp->sd_flag |= STRHUP;
2021 		stp->sd_flag &= ~(WSLEEP|RSLEEP);
2022 
2023 		/*
2024 		 * send signal if controlling tty
2025 		 */
2026 
2027 		if (stp->sd_sidp) {
2028 			prsignal(stp->sd_sidp, SIGHUP);
2029 			if (stp->sd_sidp != stp->sd_pgidp)
2030 				pgsignal(stp->sd_pgidp, SIGTSTP);
2031 		}
2032 
2033 		/*
2034 		 * wake up read, write, and exception pollers and
2035 		 * reset wakeup mechanism.
2036 		 */
2037 		cv_broadcast(&q->q_wait);	/* the readers */
2038 		cv_broadcast(&_WR(q)->q_wait);	/* the writers */
2039 		cv_broadcast(&stp->sd_monitor);	/* the ioctllers */
2040 		strhup(stp);
2041 		mutex_exit(&stp->sd_lock);
2042 		return (0);
2043 
2044 	case M_UNHANGUP:
2045 		freemsg(bp);
2046 		mutex_enter(&stp->sd_lock);
2047 		stp->sd_werror = 0;
2048 		stp->sd_flag &= ~STRHUP;
2049 		mutex_exit(&stp->sd_lock);
2050 		return (0);
2051 
2052 	case M_SIG:
2053 		/*
2054 		 * Someone downstream wants to post a signal.  The
2055 		 * signal to post is contained in the first byte of the
2056 		 * message.  If the message would go on the front of
2057 		 * the queue, send a signal to the process group
2058 		 * (if not SIGPOLL) or to the siglist processes
2059 		 * (SIGPOLL).  If something is already on the queue,
2060 		 * OR if we are delivering a delayed suspend (*sigh*
2061 		 * another "tty" hack) and there's no one sleeping already,
2062 		 * just enqueue the message.
2063 		 */
2064 		mutex_enter(&stp->sd_lock);
2065 		if (q->q_first || (*bp->b_rptr == SIGTSTP &&
2066 		    !(stp->sd_flag & RSLEEP))) {
2067 			(void) putq(q, bp);
2068 			mutex_exit(&stp->sd_lock);
2069 			return (0);
2070 		}
2071 		mutex_exit(&stp->sd_lock);
2072 		/* FALLTHRU */
2073 
2074 	case M_PCSIG:
2075 		/*
2076 		 * Don't enqueue, just post the signal.
2077 		 */
2078 		strsignal(stp, *bp->b_rptr, 0L);
2079 		freemsg(bp);
2080 		return (0);
2081 
2082 	case M_FLUSH:
2083 		/*
2084 		 * Flush queues.  The indication of which queues to flush
2085 		 * is in the first byte of the message.  If the read queue
2086 		 * is specified, then flush it.  If FLUSHBAND is set, just
2087 		 * flush the band specified by the second byte of the message.
2088 		 *
2089 		 * If a module has issued a M_SETOPT to not flush hi
2090 		 * priority messages off of the stream head, then pass this
2091 		 * flag into the flushq code to preserve such messages.
2092 		 */
2093 
2094 		if (*bp->b_rptr & FLUSHR) {
2095 			mutex_enter(&stp->sd_lock);
2096 			if (*bp->b_rptr & FLUSHBAND) {
2097 				ASSERT((bp->b_wptr - bp->b_rptr) >= 2);
2098 				flushband(q, *(bp->b_rptr + 1), FLUSHALL);
2099 			} else
2100 				flushq_common(q, FLUSHALL,
2101 				    stp->sd_read_opt & RFLUSHPCPROT);
2102 			if ((q->q_first == NULL) ||
2103 			    (q->q_first->b_datap->db_type < QPCTL))
2104 				stp->sd_flag &= ~STRPRI;
2105 			else {
2106 				ASSERT(stp->sd_flag & STRPRI);
2107 			}
2108 			mutex_exit(&stp->sd_lock);
2109 		}
2110 		if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
2111 			*bp->b_rptr &= ~FLUSHR;
2112 			bp->b_flag |= MSGNOLOOP;
2113 			/*
2114 			 * Protect against the driver passing up
2115 			 * messages after it has done a qprocsoff.
2116 			 */
2117 			if (_OTHERQ(q)->q_next == NULL)
2118 				freemsg(bp);
2119 			else
2120 				qreply(q, bp);
2121 			return (0);
2122 		}
2123 		freemsg(bp);
2124 		return (0);
2125 
2126 	case M_IOCACK:
2127 	case M_IOCNAK:
2128 		iocbp = (struct iocblk *)bp->b_rptr;
2129 		/*
2130 		 * If not waiting for ACK or NAK then just free msg.
2131 		 * If incorrect id sequence number then just free msg.
2132 		 * If already have ACK or NAK for user then this is a
2133 		 *    duplicate, display a warning and free the msg.
2134 		 */
2135 		mutex_enter(&stp->sd_lock);
2136 		if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2137 		    (stp->sd_iocid != iocbp->ioc_id)) {
2138 			/*
2139 			 * If the ACK/NAK is a dup, display a message
2140 			 * Dup is when sd_iocid == ioc_id, and
2141 			 * sd_iocblk == <valid ptr> or -1 (the former
2142 			 * is when an ioctl has been put on the stream
2143 			 * head, but has not yet been consumed, the
2144 			 * later is when it has been consumed).
2145 			 */
2146 			if ((stp->sd_iocid == iocbp->ioc_id) &&
2147 			    (stp->sd_iocblk != NULL)) {
2148 				log_dupioc(q, bp);
2149 			}
2150 			freemsg(bp);
2151 			mutex_exit(&stp->sd_lock);
2152 			return (0);
2153 		}
2154 
2155 		/*
2156 		 * Assign ACK or NAK to user and wake up.
2157 		 */
2158 		stp->sd_iocblk = bp;
2159 		cv_broadcast(&stp->sd_monitor);
2160 		mutex_exit(&stp->sd_lock);
2161 		return (0);
2162 
2163 	case M_COPYIN:
2164 	case M_COPYOUT:
2165 		reqp = (struct copyreq *)bp->b_rptr;
2166 
2167 		/*
2168 		 * If not waiting for ACK or NAK then just fail request.
2169 		 * If already have ACK, NAK, or copy request, then just
2170 		 * fail request.
2171 		 * If incorrect id sequence number then just fail request.
2172 		 */
2173 		mutex_enter(&stp->sd_lock);
2174 		if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2175 		    (stp->sd_iocid != reqp->cq_id)) {
2176 			if (bp->b_cont) {
2177 				freemsg(bp->b_cont);
2178 				bp->b_cont = NULL;
2179 			}
2180 			bp->b_datap->db_type = M_IOCDATA;
2181 			bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
2182 			resp = (struct copyresp *)bp->b_rptr;
2183 			resp->cp_rval = (caddr_t)1;	/* failure */
2184 			mutex_exit(&stp->sd_lock);
2185 			putnext(stp->sd_wrq, bp);
2186 			return (0);
2187 		}
2188 
2189 		/*
2190 		 * Assign copy request to user and wake up.
2191 		 */
2192 		stp->sd_iocblk = bp;
2193 		cv_broadcast(&stp->sd_monitor);
2194 		mutex_exit(&stp->sd_lock);
2195 		return (0);
2196 
2197 	case M_SETOPTS:
2198 		/*
2199 		 * Set stream head options (read option, write offset,
2200 		 * min/max packet size, and/or high/low water marks for
2201 		 * the read side only).
2202 		 */
2203 
2204 		bpri = 0;
2205 		sop = (struct stroptions *)bp->b_rptr;
2206 		mutex_enter(&stp->sd_lock);
2207 		if (sop->so_flags & SO_READOPT) {
2208 			switch (sop->so_readopt & RMODEMASK) {
2209 			case RNORM:
2210 				stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
2211 				break;
2212 
2213 			case RMSGD:
2214 				stp->sd_read_opt =
2215 				    ((stp->sd_read_opt & ~RD_MSGNODIS) |
2216 				    RD_MSGDIS);
2217 				break;
2218 
2219 			case RMSGN:
2220 				stp->sd_read_opt =
2221 				    ((stp->sd_read_opt & ~RD_MSGDIS) |
2222 				    RD_MSGNODIS);
2223 				break;
2224 			}
2225 			switch (sop->so_readopt & RPROTMASK) {
2226 			case RPROTNORM:
2227 				stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
2228 				break;
2229 
2230 			case RPROTDAT:
2231 				stp->sd_read_opt =
2232 				    ((stp->sd_read_opt & ~RD_PROTDIS) |
2233 				    RD_PROTDAT);
2234 				break;
2235 
2236 			case RPROTDIS:
2237 				stp->sd_read_opt =
2238 				    ((stp->sd_read_opt & ~RD_PROTDAT) |
2239 				    RD_PROTDIS);
2240 				break;
2241 			}
2242 			switch (sop->so_readopt & RFLUSHMASK) {
2243 			case RFLUSHPCPROT:
2244 				/*
2245 				 * This sets the stream head to NOT flush
2246 				 * M_PCPROTO messages.
2247 				 */
2248 				stp->sd_read_opt |= RFLUSHPCPROT;
2249 				break;
2250 			}
2251 		}
2252 		if (sop->so_flags & SO_ERROPT) {
2253 			switch (sop->so_erropt & RERRMASK) {
2254 			case RERRNORM:
2255 				stp->sd_flag &= ~STRDERRNONPERSIST;
2256 				break;
2257 			case RERRNONPERSIST:
2258 				stp->sd_flag |= STRDERRNONPERSIST;
2259 				break;
2260 			}
2261 			switch (sop->so_erropt & WERRMASK) {
2262 			case WERRNORM:
2263 				stp->sd_flag &= ~STWRERRNONPERSIST;
2264 				break;
2265 			case WERRNONPERSIST:
2266 				stp->sd_flag |= STWRERRNONPERSIST;
2267 				break;
2268 			}
2269 		}
2270 		if (sop->so_flags & SO_COPYOPT) {
2271 			if (sop->so_copyopt & ZCVMSAFE) {
2272 				stp->sd_copyflag |= STZCVMSAFE;
2273 				stp->sd_copyflag &= ~STZCVMUNSAFE;
2274 			} else if (sop->so_copyopt & ZCVMUNSAFE) {
2275 				stp->sd_copyflag |= STZCVMUNSAFE;
2276 				stp->sd_copyflag &= ~STZCVMSAFE;
2277 			}
2278 
2279 			if (sop->so_copyopt & COPYCACHED) {
2280 				stp->sd_copyflag |= STRCOPYCACHED;
2281 			}
2282 		}
2283 		if (sop->so_flags & SO_WROFF)
2284 			stp->sd_wroff = sop->so_wroff;
2285 		if (sop->so_flags & SO_TAIL)
2286 			stp->sd_tail = sop->so_tail;
2287 		if (sop->so_flags & SO_MINPSZ)
2288 			q->q_minpsz = sop->so_minpsz;
2289 		if (sop->so_flags & SO_MAXPSZ)
2290 			q->q_maxpsz = sop->so_maxpsz;
2291 		if (sop->so_flags & SO_MAXBLK)
2292 			stp->sd_maxblk = sop->so_maxblk;
2293 		if (sop->so_flags & SO_HIWAT) {
2294 		    if (sop->so_flags & SO_BAND) {
2295 			if (strqset(q, QHIWAT, sop->so_band, sop->so_hiwat))
2296 				cmn_err(CE_WARN,
2297 				    "strrput: could not allocate qband\n");
2298 			else
2299 				bpri = sop->so_band;
2300 		    } else {
2301 			q->q_hiwat = sop->so_hiwat;
2302 		    }
2303 		}
2304 		if (sop->so_flags & SO_LOWAT) {
2305 		    if (sop->so_flags & SO_BAND) {
2306 			if (strqset(q, QLOWAT, sop->so_band, sop->so_lowat))
2307 				cmn_err(CE_WARN,
2308 				    "strrput: could not allocate qband\n");
2309 			else
2310 				bpri = sop->so_band;
2311 		    } else {
2312 			q->q_lowat = sop->so_lowat;
2313 		    }
2314 		}
2315 		if (sop->so_flags & SO_MREADON)
2316 			stp->sd_flag |= SNDMREAD;
2317 		if (sop->so_flags & SO_MREADOFF)
2318 			stp->sd_flag &= ~SNDMREAD;
2319 		if (sop->so_flags & SO_NDELON)
2320 			stp->sd_flag |= OLDNDELAY;
2321 		if (sop->so_flags & SO_NDELOFF)
2322 			stp->sd_flag &= ~OLDNDELAY;
2323 		if (sop->so_flags & SO_ISTTY)
2324 			stp->sd_flag |= STRISTTY;
2325 		if (sop->so_flags & SO_ISNTTY)
2326 			stp->sd_flag &= ~STRISTTY;
2327 		if (sop->so_flags & SO_TOSTOP)
2328 			stp->sd_flag |= STRTOSTOP;
2329 		if (sop->so_flags & SO_TONSTOP)
2330 			stp->sd_flag &= ~STRTOSTOP;
2331 		if (sop->so_flags & SO_DELIM)
2332 			stp->sd_flag |= STRDELIM;
2333 		if (sop->so_flags & SO_NODELIM)
2334 			stp->sd_flag &= ~STRDELIM;
2335 
2336 		mutex_exit(&stp->sd_lock);
2337 		freemsg(bp);
2338 
2339 		/* Check backenable in case the water marks changed */
2340 		qbackenable(q, bpri);
2341 		return (0);
2342 
2343 	/*
2344 	 * The following set of cases deal with situations where two stream
2345 	 * heads are connected to each other (twisted streams).  These messages
2346 	 * have no meaning at the stream head.
2347 	 */
2348 	case M_BREAK:
2349 	case M_CTL:
2350 	case M_DELAY:
2351 	case M_START:
2352 	case M_STOP:
2353 	case M_IOCDATA:
2354 	case M_STARTI:
2355 	case M_STOPI:
2356 		freemsg(bp);
2357 		return (0);
2358 
2359 	case M_IOCTL:
2360 		/*
2361 		 * Always NAK this condition
2362 		 * (makes no sense)
2363 		 * If there is one or more threads in the read side
2364 		 * rwnext we have to defer the nacking until that thread
2365 		 * returns (in strget).
2366 		 */
2367 		mutex_enter(&stp->sd_lock);
2368 		if (stp->sd_struiodnak != 0) {
2369 			/*
2370 			 * Defer NAK to the streamhead. Queue at the end
2371 			 * the list.
2372 			 */
2373 			mblk_t *mp = stp->sd_struionak;
2374 
2375 			while (mp && mp->b_next)
2376 				mp = mp->b_next;
2377 			if (mp)
2378 				mp->b_next = bp;
2379 			else
2380 				stp->sd_struionak = bp;
2381 			bp->b_next = NULL;
2382 			mutex_exit(&stp->sd_lock);
2383 			return (0);
2384 		}
2385 		mutex_exit(&stp->sd_lock);
2386 
2387 		bp->b_datap->db_type = M_IOCNAK;
2388 		/*
2389 		 * Protect against the driver passing up
2390 		 * messages after it has done a qprocsoff.
2391 		 */
2392 		if (_OTHERQ(q)->q_next == NULL)
2393 			freemsg(bp);
2394 		else
2395 			qreply(q, bp);
2396 		return (0);
2397 
2398 	default:
2399 #ifdef DEBUG
2400 		cmn_err(CE_WARN,
2401 			"bad message type %x received at stream head\n",
2402 			bp->b_datap->db_type);
2403 #endif
2404 		freemsg(bp);
2405 		return (0);
2406 	}
2407 
2408 	/* NOTREACHED */
2409 }
2410 
2411 /*
2412  * Check if the stream pointed to by `stp' can be written to, and return an
2413  * error code if not.  If `eiohup' is set, then return EIO if STRHUP is set.
2414  * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream,
2415  * then always return EPIPE and send a SIGPIPE to the invoking thread.
2416  */
2417 static int
2418 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok)
2419 {
2420 	int error;
2421 
2422 	ASSERT(MUTEX_HELD(&stp->sd_lock));
2423 
2424 	/*
2425 	 * For modem support, POSIX states that on writes, EIO should
2426 	 * be returned if the stream has been hung up.
2427 	 */
2428 	if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP)
2429 		error = EIO;
2430 	else
2431 		error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0);
2432 
2433 	if (error != 0) {
2434 		if (!(stp->sd_flag & STPLEX) &&
2435 		    (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) {
2436 			tsignal(curthread, SIGPIPE);
2437 			error = EPIPE;
2438 		}
2439 	}
2440 
2441 	return (error);
2442 }
2443 
2444 /*
2445  * Copyin and send data down a stream.
2446  * The caller will allocate and copyin any control part that precedes the
2447  * message and pass than in as mctl.
2448  *
2449  * Caller should *not* hold sd_lock.
2450  * When EWOULDBLOCK is returned the caller has to redo the canputnext
2451  * under sd_lock in order to avoid missing a backenabling wakeup.
2452  *
2453  * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA.
2454  *
2455  * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages.
2456  * For sync streams we can only ignore flow control by reverting to using
2457  * putnext.
2458  *
2459  * If sd_maxblk is less than *iosize this routine might return without
2460  * transferring all of *iosize. In all cases, on return *iosize will contain
2461  * the amount of data that was transferred.
2462  */
2463 static int
2464 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
2465     int b_flag, int pri, int flags)
2466 {
2467 	struiod_t uiod;
2468 	mblk_t *mp;
2469 	queue_t *wqp = stp->sd_wrq;
2470 	int error = 0;
2471 	ssize_t count = *iosize;
2472 	cred_t *cr;
2473 
2474 	ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
2475 
2476 	if (uiop != NULL && count >= 0)
2477 		flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
2478 
2479 	if (!(flags & STRUIO_POSTPONE)) {
2480 		/*
2481 		 * Use regular canputnext, strmakedata, putnext sequence.
2482 		 */
2483 		if (pri == 0) {
2484 			if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2485 				freemsg(mctl);
2486 				return (EWOULDBLOCK);
2487 			}
2488 		} else {
2489 			if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) {
2490 				freemsg(mctl);
2491 				return (EWOULDBLOCK);
2492 			}
2493 		}
2494 
2495 		if ((error = strmakedata(iosize, uiop, stp, flags,
2496 					&mp)) != 0) {
2497 			freemsg(mctl);
2498 			/*
2499 			 * need to change return code to ENOMEM
2500 			 * so that this is not confused with
2501 			 * flow control, EAGAIN.
2502 			 */
2503 
2504 			if (error == EAGAIN)
2505 				return (ENOMEM);
2506 			else
2507 				return (error);
2508 		}
2509 		if (mctl != NULL) {
2510 			if (mctl->b_cont == NULL)
2511 				mctl->b_cont = mp;
2512 			else if (mp != NULL)
2513 				linkb(mctl, mp);
2514 			mp = mctl;
2515 			/*
2516 			 * Note that for interrupt thread, the CRED() is
2517 			 * NULL. Don't bother with the pid either.
2518 			 */
2519 			if ((cr = CRED()) != NULL) {
2520 				mblk_setcred(mp, cr);
2521 				DB_CPID(mp) = curproc->p_pid;
2522 			}
2523 		} else if (mp == NULL)
2524 			return (0);
2525 
2526 		mp->b_flag |= b_flag;
2527 		mp->b_band = (uchar_t)pri;
2528 
2529 		if (flags & MSG_IGNFLOW) {
2530 			/*
2531 			 * XXX Hack: Don't get stuck running service
2532 			 * procedures. This is needed for sockfs when
2533 			 * sending the unbind message out of the rput
2534 			 * procedure - we don't want a put procedure
2535 			 * to run service procedures.
2536 			 */
2537 			putnext(wqp, mp);
2538 		} else {
2539 			stream_willservice(stp);
2540 			putnext(wqp, mp);
2541 			stream_runservice(stp);
2542 		}
2543 		return (0);
2544 	}
2545 	/*
2546 	 * Stream supports rwnext() for the write side.
2547 	 */
2548 	if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) {
2549 		freemsg(mctl);
2550 		/*
2551 		 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled".
2552 		 */
2553 		return (error == EAGAIN ? ENOMEM : error);
2554 	}
2555 	if (mctl != NULL) {
2556 		if (mctl->b_cont == NULL)
2557 			mctl->b_cont = mp;
2558 		else if (mp != NULL)
2559 			linkb(mctl, mp);
2560 		mp = mctl;
2561 		/*
2562 		 * Note that for interrupt thread, the CRED() is
2563 		 * NULL.  Don't bother with the pid either.
2564 		 */
2565 		if ((cr = CRED()) != NULL) {
2566 			mblk_setcred(mp, cr);
2567 			DB_CPID(mp) = curproc->p_pid;
2568 		}
2569 	} else if (mp == NULL) {
2570 		return (0);
2571 	}
2572 
2573 	mp->b_flag |= b_flag;
2574 	mp->b_band = (uchar_t)pri;
2575 
2576 	(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
2577 		sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
2578 	uiod.d_uio.uio_offset = 0;
2579 	uiod.d_mp = mp;
2580 	error = rwnext(wqp, &uiod);
2581 	if (! uiod.d_mp) {
2582 		uioskip(uiop, *iosize);
2583 		return (error);
2584 	}
2585 	ASSERT(mp == uiod.d_mp);
2586 	if (error == EINVAL) {
2587 		/*
2588 		 * The stream plumbing must have changed while
2589 		 * we were away, so just turn off rwnext()s.
2590 		 */
2591 		error = 0;
2592 	} else if (error == EBUSY || error == EWOULDBLOCK) {
2593 		/*
2594 		 * Couldn't enter a perimeter or took a page fault,
2595 		 * so fall-back to putnext().
2596 		 */
2597 		error = 0;
2598 	} else {
2599 		freemsg(mp);
2600 		return (error);
2601 	}
2602 	/* Have to check canput before consuming data from the uio */
2603 	if (pri == 0) {
2604 		if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2605 			freemsg(mp);
2606 			return (EWOULDBLOCK);
2607 		}
2608 	} else {
2609 		if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
2610 			freemsg(mp);
2611 			return (EWOULDBLOCK);
2612 		}
2613 	}
2614 	ASSERT(mp == uiod.d_mp);
2615 	/* Copyin data from the uio */
2616 	if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
2617 		freemsg(mp);
2618 		return (error);
2619 	}
2620 	uioskip(uiop, *iosize);
2621 	if (flags & MSG_IGNFLOW) {
2622 		/*
2623 		 * XXX Hack: Don't get stuck running service procedures.
2624 		 * This is needed for sockfs when sending the unbind message
2625 		 * out of the rput procedure - we don't want a put procedure
2626 		 * to run service procedures.
2627 		 */
2628 		putnext(wqp, mp);
2629 	} else {
2630 		stream_willservice(stp);
2631 		putnext(wqp, mp);
2632 		stream_runservice(stp);
2633 	}
2634 	return (0);
2635 }
2636 
2637 /*
2638  * Write attempts to break the write request into messages conforming
2639  * with the minimum and maximum packet sizes set downstream.
2640  *
2641  * Write will not block if downstream queue is full and
2642  * O_NDELAY is set, otherwise it will block waiting for the queue to get room.
2643  *
2644  * A write of zero bytes gets packaged into a zero length message and sent
2645  * downstream like any other message.
2646  *
2647  * If buffers of the requested sizes are not available, the write will
2648  * sleep until the buffers become available.
2649  *
2650  * Write (if specified) will supply a write offset in a message if it
2651  * makes sense. This can be specified by downstream modules as part of
2652  * a M_SETOPTS message.  Write will not supply the write offset if it
2653  * cannot supply any data in a buffer.  In other words, write will never
2654  * send down an empty packet due to a write offset.
2655  */
2656 /* ARGSUSED2 */
2657 int
2658 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp)
2659 {
2660 	return (strwrite_common(vp, uiop, crp, 0));
2661 }
2662 
2663 /* ARGSUSED2 */
2664 int
2665 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag)
2666 {
2667 	struct stdata *stp;
2668 	struct queue *wqp;
2669 	ssize_t rmin, rmax;
2670 	ssize_t iosize;
2671 	int waitflag;
2672 	int tempmode;
2673 	int error = 0;
2674 	int b_flag;
2675 
2676 	ASSERT(vp->v_stream);
2677 	stp = vp->v_stream;
2678 
2679 	mutex_enter(&stp->sd_lock);
2680 
2681 	if ((error = i_straccess(stp, JCWRITE)) != 0) {
2682 		mutex_exit(&stp->sd_lock);
2683 		return (error);
2684 	}
2685 
2686 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
2687 		error = strwriteable(stp, B_TRUE, B_TRUE);
2688 		if (error != 0) {
2689 			mutex_exit(&stp->sd_lock);
2690 			return (error);
2691 		}
2692 	}
2693 
2694 	mutex_exit(&stp->sd_lock);
2695 
2696 	wqp = stp->sd_wrq;
2697 
2698 	/* get these values from them cached in the stream head */
2699 	rmin = stp->sd_qn_minpsz;
2700 	rmax = stp->sd_qn_maxpsz;
2701 
2702 	/*
2703 	 * Check the min/max packet size constraints.  If min packet size
2704 	 * is non-zero, the write cannot be split into multiple messages
2705 	 * and still guarantee the size constraints.
2706 	 */
2707 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp);
2708 
2709 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
2710 	if (rmax == 0) {
2711 		return (0);
2712 	}
2713 	if (rmin > 0) {
2714 		if (uiop->uio_resid < rmin) {
2715 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2716 				"strwrite out:q %p out %d error %d",
2717 				wqp, 0, ERANGE);
2718 			return (ERANGE);
2719 		}
2720 		if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) {
2721 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2722 				"strwrite out:q %p out %d error %d",
2723 				wqp, 1, ERANGE);
2724 			return (ERANGE);
2725 		}
2726 	}
2727 
2728 	/*
2729 	 * Do until count satisfied or error.
2730 	 */
2731 	waitflag = WRITEWAIT | wflag;
2732 	if (stp->sd_flag & OLDNDELAY)
2733 		tempmode = uiop->uio_fmode & ~FNDELAY;
2734 	else
2735 		tempmode = uiop->uio_fmode;
2736 
2737 	if (rmax == INFPSZ)
2738 		rmax = uiop->uio_resid;
2739 
2740 	/*
2741 	 * Note that tempmode does not get used in strput/strmakedata
2742 	 * but only in strwaitq. The other routines use uio_fmode
2743 	 * unmodified.
2744 	 */
2745 
2746 	/* LINTED: constant in conditional context */
2747 	while (1) {	/* breaks when uio_resid reaches zero */
2748 		/*
2749 		 * Determine the size of the next message to be
2750 		 * packaged.  May have to break write into several
2751 		 * messages based on max packet size.
2752 		 */
2753 		iosize = MIN(uiop->uio_resid, rmax);
2754 
2755 		/*
2756 		 * Put block downstream when flow control allows it.
2757 		 */
2758 		if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize))
2759 			b_flag = MSGDELIM;
2760 		else
2761 			b_flag = 0;
2762 
2763 		for (;;) {
2764 			int done = 0;
2765 
2766 			error = strput(stp, NULL, uiop, &iosize, b_flag,
2767 				0, 0);
2768 			if (error == 0)
2769 				break;
2770 			if (error != EWOULDBLOCK)
2771 				goto out;
2772 
2773 			mutex_enter(&stp->sd_lock);
2774 			/*
2775 			 * Check for a missed wakeup.
2776 			 * Needed since strput did not hold sd_lock across
2777 			 * the canputnext.
2778 			 */
2779 			if (canputnext(wqp)) {
2780 				/* Try again */
2781 				mutex_exit(&stp->sd_lock);
2782 				continue;
2783 			}
2784 			TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT,
2785 				"strwrite wait:q %p wait", wqp);
2786 			if ((error = strwaitq(stp, waitflag, (ssize_t)0,
2787 			    tempmode, -1, &done)) != 0 || done) {
2788 				mutex_exit(&stp->sd_lock);
2789 				if ((vp->v_type == VFIFO) &&
2790 				    (uiop->uio_fmode & FNDELAY) &&
2791 				    (error == EAGAIN))
2792 					error = 0;
2793 				goto out;
2794 			}
2795 			TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE,
2796 				"strwrite wake:q %p awakes", wqp);
2797 			if ((error = i_straccess(stp, JCWRITE)) != 0) {
2798 				mutex_exit(&stp->sd_lock);
2799 				goto out;
2800 			}
2801 			mutex_exit(&stp->sd_lock);
2802 		}
2803 		waitflag |= NOINTR;
2804 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID,
2805 			"strwrite resid:q %p uiop %p", wqp, uiop);
2806 		if (uiop->uio_resid) {
2807 			/* Recheck for errors - needed for sockets */
2808 			if ((stp->sd_wput_opt & SW_RECHECK_ERR) &&
2809 			    (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
2810 				mutex_enter(&stp->sd_lock);
2811 				error = strwriteable(stp, B_FALSE, B_TRUE);
2812 				mutex_exit(&stp->sd_lock);
2813 				if (error != 0)
2814 					return (error);
2815 			}
2816 			continue;
2817 		}
2818 		break;
2819 	}
2820 out:
2821 	/*
2822 	 * For historical reasons, applications expect EAGAIN when a data
2823 	 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN.
2824 	 */
2825 	if (error == ENOMEM)
2826 		error = EAGAIN;
2827 	TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2828 		"strwrite out:q %p out %d error %d", wqp, 2, error);
2829 	return (error);
2830 }
2831 
2832 /*
2833  * Stream head write service routine.
2834  * Its job is to wake up any sleeping writers when a queue
2835  * downstream needs data (part of the flow control in putq and getq).
2836  * It also must wake anyone sleeping on a poll().
2837  * For stream head right below mux module, it must also invoke put procedure
2838  * of next downstream module.
2839  */
2840 int
2841 strwsrv(queue_t *q)
2842 {
2843 	struct stdata *stp;
2844 	queue_t *tq;
2845 	qband_t *qbp;
2846 	int i;
2847 	qband_t *myqbp;
2848 	int isevent;
2849 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
2850 
2851 	TRACE_1(TR_FAC_STREAMS_FR,
2852 		TR_STRWSRV, "strwsrv:q %p", q);
2853 	stp = (struct stdata *)q->q_ptr;
2854 	ASSERT(qclaimed(q));
2855 	mutex_enter(&stp->sd_lock);
2856 	ASSERT(!(stp->sd_flag & STPLEX));
2857 
2858 	if (stp->sd_flag & WSLEEP) {
2859 		stp->sd_flag &= ~WSLEEP;
2860 		cv_broadcast(&q->q_wait);
2861 	}
2862 	mutex_exit(&stp->sd_lock);
2863 
2864 	/* The other end of a stream pipe went away. */
2865 	if ((tq = q->q_next) == NULL) {
2866 		return (0);
2867 	}
2868 
2869 	/* Find the next module forward that has a service procedure */
2870 	claimstr(q);
2871 	tq = q->q_nfsrv;
2872 	ASSERT(tq != NULL);
2873 
2874 	if ((q->q_flag & QBACK)) {
2875 		if ((tq->q_flag & QFULL)) {
2876 			mutex_enter(QLOCK(tq));
2877 			if (!(tq->q_flag & QFULL)) {
2878 				mutex_exit(QLOCK(tq));
2879 				goto wakeup;
2880 			}
2881 			/*
2882 			 * The queue must have become full again. Set QWANTW
2883 			 * again so strwsrv will be back enabled when
2884 			 * the queue becomes non-full next time.
2885 			 */
2886 			tq->q_flag |= QWANTW;
2887 			mutex_exit(QLOCK(tq));
2888 		} else {
2889 		wakeup:
2890 			pollwakeup(&stp->sd_pollist, POLLWRNORM);
2891 			mutex_enter(&stp->sd_lock);
2892 			if (stp->sd_sigflags & S_WRNORM)
2893 				strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
2894 			mutex_exit(&stp->sd_lock);
2895 		}
2896 	}
2897 
2898 	isevent = 0;
2899 	i = 1;
2900 	bzero((caddr_t)qbf, NBAND);
2901 	mutex_enter(QLOCK(tq));
2902 	if ((myqbp = q->q_bandp) != NULL)
2903 		for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) {
2904 			ASSERT(myqbp);
2905 			if ((myqbp->qb_flag & QB_BACK)) {
2906 				if (qbp->qb_flag & QB_FULL) {
2907 					/*
2908 					 * The band must have become full again.
2909 					 * Set QB_WANTW again so strwsrv will
2910 					 * be back enabled when the band becomes
2911 					 * non-full next time.
2912 					 */
2913 					qbp->qb_flag |= QB_WANTW;
2914 				} else {
2915 					isevent = 1;
2916 					qbf[i] = 1;
2917 				}
2918 			}
2919 			myqbp = myqbp->qb_next;
2920 			i++;
2921 		}
2922 	mutex_exit(QLOCK(tq));
2923 
2924 	if (isevent) {
2925 	    for (i = tq->q_nband; i; i--) {
2926 		if (qbf[i]) {
2927 			pollwakeup(&stp->sd_pollist, POLLWRBAND);
2928 			mutex_enter(&stp->sd_lock);
2929 			if (stp->sd_sigflags & S_WRBAND)
2930 				strsendsig(stp->sd_siglist, S_WRBAND,
2931 					(uchar_t)i, 0);
2932 			mutex_exit(&stp->sd_lock);
2933 		}
2934 	    }
2935 	}
2936 
2937 	releasestr(q);
2938 	return (0);
2939 }
2940 
2941 /*
2942  * Special case of strcopyin/strcopyout for copying
2943  * struct strioctl that can deal with both data
2944  * models.
2945  */
2946 
2947 #ifdef	_LP64
2948 
2949 static int
2950 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
2951 {
2952 	struct	strioctl32 strioc32;
2953 	struct	strioctl *striocp;
2954 
2955 	if (copyflag & U_TO_K) {
2956 		ASSERT((copyflag & K_TO_K) == 0);
2957 
2958 		if ((flag & FMODELS) == DATAMODEL_ILP32) {
2959 			if (copyin(from, &strioc32, sizeof (strioc32)))
2960 				return (EFAULT);
2961 
2962 			striocp = (struct strioctl *)to;
2963 			striocp->ic_cmd	= strioc32.ic_cmd;
2964 			striocp->ic_timout = strioc32.ic_timout;
2965 			striocp->ic_len	= strioc32.ic_len;
2966 			striocp->ic_dp	= (char *)(uintptr_t)strioc32.ic_dp;
2967 
2968 		} else { /* NATIVE data model */
2969 			if (copyin(from, to, sizeof (struct strioctl))) {
2970 				return (EFAULT);
2971 			} else {
2972 				return (0);
2973 			}
2974 		}
2975 	} else {
2976 		ASSERT(copyflag & K_TO_K);
2977 		bcopy(from, to, sizeof (struct strioctl));
2978 	}
2979 	return (0);
2980 }
2981 
2982 static int
2983 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
2984 {
2985 	struct	strioctl32 strioc32;
2986 	struct	strioctl *striocp;
2987 
2988 	if (copyflag & U_TO_K) {
2989 		ASSERT((copyflag & K_TO_K) == 0);
2990 
2991 		if ((flag & FMODELS) == DATAMODEL_ILP32) {
2992 			striocp = (struct strioctl *)from;
2993 			strioc32.ic_cmd	= striocp->ic_cmd;
2994 			strioc32.ic_timout = striocp->ic_timout;
2995 			strioc32.ic_len	= striocp->ic_len;
2996 			strioc32.ic_dp	= (caddr32_t)(uintptr_t)striocp->ic_dp;
2997 			ASSERT((char *)(uintptr_t)strioc32.ic_dp ==
2998 			    striocp->ic_dp);
2999 
3000 			if (copyout(&strioc32, to, sizeof (strioc32)))
3001 				return (EFAULT);
3002 
3003 		} else { /* NATIVE data model */
3004 			if (copyout(from, to, sizeof (struct strioctl))) {
3005 				return (EFAULT);
3006 			} else {
3007 				return (0);
3008 			}
3009 		}
3010 	} else {
3011 		ASSERT(copyflag & K_TO_K);
3012 		bcopy(from, to, sizeof (struct strioctl));
3013 	}
3014 	return (0);
3015 }
3016 
3017 #else	/* ! _LP64 */
3018 
3019 /* ARGSUSED2 */
3020 static int
3021 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3022 {
3023 	return (strcopyin(from, to, sizeof (struct strioctl), copyflag));
3024 }
3025 
3026 /* ARGSUSED2 */
3027 static int
3028 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3029 {
3030 	return (strcopyout(from, to, sizeof (struct strioctl), copyflag));
3031 }
3032 
3033 #endif	/* _LP64 */
3034 
3035 /*
3036  * Determine type of job control semantics expected by user.  The
3037  * possibilities are:
3038  *	JCREAD	- Behaves like read() on fd; send SIGTTIN
3039  *	JCWRITE	- Behaves like write() on fd; send SIGTTOU if TOSTOP set
3040  *	JCSETP	- Sets a value in the stream; send SIGTTOU, ignore TOSTOP
3041  *	JCGETP	- Gets a value in the stream; no signals.
3042  * See straccess in strsubr.c for usage of these values.
3043  *
3044  * This routine also returns -1 for I_STR as a special case; the
3045  * caller must call again with the real ioctl number for
3046  * classification.
3047  */
3048 static int
3049 job_control_type(int cmd)
3050 {
3051 	switch (cmd) {
3052 	case I_STR:
3053 		return (-1);
3054 
3055 	case I_RECVFD:
3056 	case I_E_RECVFD:
3057 		return (JCREAD);
3058 
3059 	case I_FDINSERT:
3060 	case I_SENDFD:
3061 		return (JCWRITE);
3062 
3063 	case TCSETA:
3064 	case TCSETAW:
3065 	case TCSETAF:
3066 	case TCSBRK:
3067 	case TCXONC:
3068 	case TCFLSH:
3069 	case TCDSET:	/* Obsolete */
3070 	case TIOCSWINSZ:
3071 	case TCSETS:
3072 	case TCSETSW:
3073 	case TCSETSF:
3074 	case TIOCSETD:
3075 	case TIOCHPCL:
3076 	case TIOCSETP:
3077 	case TIOCSETN:
3078 	case TIOCEXCL:
3079 	case TIOCNXCL:
3080 	case TIOCFLUSH:
3081 	case TIOCSETC:
3082 	case TIOCLBIS:
3083 	case TIOCLBIC:
3084 	case TIOCLSET:
3085 	case TIOCSBRK:
3086 	case TIOCCBRK:
3087 	case TIOCSDTR:
3088 	case TIOCCDTR:
3089 	case TIOCSLTC:
3090 	case TIOCSTOP:
3091 	case TIOCSTART:
3092 	case TIOCSTI:
3093 	case TIOCSPGRP:
3094 	case TIOCMSET:
3095 	case TIOCMBIS:
3096 	case TIOCMBIC:
3097 	case TIOCREMOTE:
3098 	case TIOCSIGNAL:
3099 	case LDSETT:
3100 	case LDSMAP:	/* Obsolete */
3101 	case DIOCSETP:
3102 	case I_FLUSH:
3103 	case I_SRDOPT:
3104 	case I_SETSIG:
3105 	case I_SWROPT:
3106 	case I_FLUSHBAND:
3107 	case I_SETCLTIME:
3108 	case I_SERROPT:
3109 	case I_ESETSIG:
3110 	case FIONBIO:
3111 	case FIOASYNC:
3112 	case FIOSETOWN:
3113 	case JBOOT:	/* Obsolete */
3114 	case JTERM:	/* Obsolete */
3115 	case JTIMOM:	/* Obsolete */
3116 	case JZOMBOOT:	/* Obsolete */
3117 	case JAGENT:	/* Obsolete */
3118 	case JTRUN:	/* Obsolete */
3119 	case JXTPROTO:	/* Obsolete */
3120 	case TIOCSETLD:
3121 		return (JCSETP);
3122 	}
3123 
3124 	return (JCGETP);
3125 }
3126 
3127 /*
3128  * ioctl for streams
3129  */
3130 int
3131 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
3132     cred_t *crp, int *rvalp)
3133 {
3134 	struct stdata *stp;
3135 	struct strioctl strioc;
3136 	struct uio uio;
3137 	struct iovec iov;
3138 	int access;
3139 	mblk_t *mp;
3140 	int error = 0;
3141 	int done = 0;
3142 	ssize_t	rmin, rmax;
3143 	queue_t *wrq;
3144 	queue_t *rdq;
3145 	boolean_t kioctl = B_FALSE;
3146 
3147 	if (flag & FKIOCTL) {
3148 		copyflag = K_TO_K;
3149 		kioctl = B_TRUE;
3150 	}
3151 	ASSERT(vp->v_stream);
3152 	ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
3153 	stp = vp->v_stream;
3154 
3155 	TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER,
3156 		"strioctl:stp %p cmd %X arg %lX", stp, cmd, arg);
3157 
3158 #ifdef C2_AUDIT
3159 	if (audit_active)
3160 		audit_strioctl(vp, cmd, arg, flag, copyflag, crp, rvalp);
3161 #endif
3162 
3163 	/*
3164 	 * If the copy is kernel to kernel, make sure that the FNATIVE
3165 	 * flag is set.  After this it would be a serious error to have
3166 	 * no model flag.
3167 	 */
3168 	if (copyflag == K_TO_K)
3169 		flag = (flag & ~FMODELS) | FNATIVE;
3170 
3171 	ASSERT((flag & FMODELS) != 0);
3172 
3173 	wrq = stp->sd_wrq;
3174 	rdq = _RD(wrq);
3175 
3176 	access = job_control_type(cmd);
3177 
3178 	/* We should never see these here, should be handled by iwscn */
3179 	if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR)
3180 		return (EINVAL);
3181 
3182 	mutex_enter(&stp->sd_lock);
3183 	if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) {
3184 		mutex_exit(&stp->sd_lock);
3185 		return (error);
3186 	}
3187 	mutex_exit(&stp->sd_lock);
3188 
3189 	/*
3190 	 * Check for sgttyb-related ioctls first, and complain as
3191 	 * necessary.
3192 	 */
3193 	switch (cmd) {
3194 	case TIOCGETP:
3195 	case TIOCSETP:
3196 	case TIOCSETN:
3197 		if (sgttyb_handling >= 2 && !sgttyb_complaint) {
3198 			sgttyb_complaint = B_TRUE;
3199 			cmn_err(CE_NOTE,
3200 			    "application used obsolete TIOC[GS]ET");
3201 		}
3202 		if (sgttyb_handling >= 3) {
3203 			tsignal(curthread, SIGSYS);
3204 			return (EIO);
3205 		}
3206 		break;
3207 	}
3208 
3209 	mutex_enter(&stp->sd_lock);
3210 
3211 	switch (cmd) {
3212 	case I_RECVFD:
3213 	case I_E_RECVFD:
3214 	case I_PEEK:
3215 	case I_NREAD:
3216 	case FIONREAD:
3217 	case FIORDCHK:
3218 	case I_ATMARK:
3219 	case FIONBIO:
3220 	case FIOASYNC:
3221 		if (stp->sd_flag & (STRDERR|STPLEX)) {
3222 			error = strgeterr(stp, STRDERR|STPLEX, 0);
3223 			if (error != 0) {
3224 				mutex_exit(&stp->sd_lock);
3225 				return (error);
3226 			}
3227 		}
3228 		break;
3229 
3230 	default:
3231 		if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) {
3232 			error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0);
3233 			if (error != 0) {
3234 				mutex_exit(&stp->sd_lock);
3235 				return (error);
3236 			}
3237 		}
3238 	}
3239 
3240 	mutex_exit(&stp->sd_lock);
3241 
3242 	switch (cmd) {
3243 	default:
3244 		/*
3245 		 * The stream head has hardcoded knowledge of a
3246 		 * miscellaneous collection of terminal-, keyboard- and
3247 		 * mouse-related ioctls, enumerated below.  This hardcoded
3248 		 * knowledge allows the stream head to automatically
3249 		 * convert transparent ioctl requests made by userland
3250 		 * programs into I_STR ioctls which many old STREAMS
3251 		 * modules and drivers require.
3252 		 *
3253 		 * No new ioctls should ever be added to this list.
3254 		 * Instead, the STREAMS module or driver should be written
3255 		 * to either handle transparent ioctls or require any
3256 		 * userland programs to use I_STR ioctls (by returning
3257 		 * EINVAL to any transparent ioctl requests).
3258 		 *
3259 		 * More importantly, removing ioctls from this list should
3260 		 * be done with the utmost care, since our STREAMS modules
3261 		 * and drivers *count* on the stream head performing this
3262 		 * conversion, and thus may panic while processing
3263 		 * transparent ioctl request for one of these ioctls (keep
3264 		 * in mind that third party modules and drivers may have
3265 		 * similar problems).
3266 		 */
3267 		if (((cmd & IOCTYPE) == LDIOC) ||
3268 		    ((cmd & IOCTYPE) == tIOC) ||
3269 		    ((cmd & IOCTYPE) == TIOC) ||
3270 		    ((cmd & IOCTYPE) == KIOC) ||
3271 		    ((cmd & IOCTYPE) == MSIOC) ||
3272 		    ((cmd & IOCTYPE) == VUIOC)) {
3273 			/*
3274 			 * The ioctl is a tty ioctl - set up strioc buffer
3275 			 * and call strdoioctl() to do the work.
3276 			 */
3277 			if (stp->sd_flag & STRHUP)
3278 				return (ENXIO);
3279 			strioc.ic_cmd = cmd;
3280 			strioc.ic_timout = INFTIM;
3281 
3282 			switch (cmd) {
3283 
3284 			case TCXONC:
3285 			case TCSBRK:
3286 			case TCFLSH:
3287 			case TCDSET:
3288 				{
3289 				int native_arg = (int)arg;
3290 				strioc.ic_len = sizeof (int);
3291 				strioc.ic_dp = (char *)&native_arg;
3292 				return (strdoioctl(stp, &strioc, flag,
3293 				    K_TO_K, crp, rvalp));
3294 				}
3295 
3296 			case TCSETA:
3297 			case TCSETAW:
3298 			case TCSETAF:
3299 				strioc.ic_len = sizeof (struct termio);
3300 				strioc.ic_dp = (char *)arg;
3301 				return (strdoioctl(stp, &strioc, flag,
3302 					copyflag, crp, rvalp));
3303 
3304 			case TCSETS:
3305 			case TCSETSW:
3306 			case TCSETSF:
3307 				strioc.ic_len = sizeof (struct termios);
3308 				strioc.ic_dp = (char *)arg;
3309 				return (strdoioctl(stp, &strioc, flag,
3310 					copyflag, crp, rvalp));
3311 
3312 			case LDSETT:
3313 				strioc.ic_len = sizeof (struct termcb);
3314 				strioc.ic_dp = (char *)arg;
3315 				return (strdoioctl(stp, &strioc, flag,
3316 					copyflag, crp, rvalp));
3317 
3318 			case TIOCSETP:
3319 				strioc.ic_len = sizeof (struct sgttyb);
3320 				strioc.ic_dp = (char *)arg;
3321 				return (strdoioctl(stp, &strioc, flag,
3322 					copyflag, crp, rvalp));
3323 
3324 			case TIOCSTI:
3325 				if ((flag & FREAD) == 0 &&
3326 				    secpolicy_sti(crp) != 0) {
3327 					return (EPERM);
3328 				}
3329 				mutex_enter(&stp->sd_lock);
3330 				mutex_enter(&curproc->p_splock);
3331 				if (stp->sd_sidp != curproc->p_sessp->s_sidp &&
3332 				    secpolicy_sti(crp) != 0) {
3333 					mutex_exit(&curproc->p_splock);
3334 					mutex_exit(&stp->sd_lock);
3335 					return (EACCES);
3336 				}
3337 				mutex_exit(&curproc->p_splock);
3338 				mutex_exit(&stp->sd_lock);
3339 
3340 				strioc.ic_len = sizeof (char);
3341 				strioc.ic_dp = (char *)arg;
3342 				return (strdoioctl(stp, &strioc, flag,
3343 					copyflag, crp, rvalp));
3344 
3345 			case TIOCSWINSZ:
3346 				strioc.ic_len = sizeof (struct winsize);
3347 				strioc.ic_dp = (char *)arg;
3348 				return (strdoioctl(stp, &strioc, flag,
3349 					copyflag, crp, rvalp));
3350 
3351 			case TIOCSSIZE:
3352 				strioc.ic_len = sizeof (struct ttysize);
3353 				strioc.ic_dp = (char *)arg;
3354 				return (strdoioctl(stp, &strioc, flag,
3355 					copyflag, crp, rvalp));
3356 
3357 			case TIOCSSOFTCAR:
3358 			case KIOCTRANS:
3359 			case KIOCTRANSABLE:
3360 			case KIOCCMD:
3361 			case KIOCSDIRECT:
3362 			case KIOCSCOMPAT:
3363 			case KIOCSKABORTEN:
3364 			case KIOCSRPTDELAY:
3365 			case KIOCSRPTRATE:
3366 			case VUIDSFORMAT:
3367 			case TIOCSPPS:
3368 				strioc.ic_len = sizeof (int);
3369 				strioc.ic_dp = (char *)arg;
3370 				return (strdoioctl(stp, &strioc, flag,
3371 					copyflag, crp, rvalp));
3372 
3373 			case KIOCSETKEY:
3374 			case KIOCGETKEY:
3375 				strioc.ic_len = sizeof (struct kiockey);
3376 				strioc.ic_dp = (char *)arg;
3377 				return (strdoioctl(stp, &strioc, flag,
3378 					copyflag, crp, rvalp));
3379 
3380 			case KIOCSKEY:
3381 			case KIOCGKEY:
3382 				strioc.ic_len = sizeof (struct kiockeymap);
3383 				strioc.ic_dp = (char *)arg;
3384 				return (strdoioctl(stp, &strioc, flag,
3385 					copyflag, crp, rvalp));
3386 
3387 			case KIOCSLED:
3388 				/* arg is a pointer to char */
3389 				strioc.ic_len = sizeof (char);
3390 				strioc.ic_dp = (char *)arg;
3391 				return (strdoioctl(stp, &strioc, flag,
3392 					copyflag, crp, rvalp));
3393 
3394 			case MSIOSETPARMS:
3395 				strioc.ic_len = sizeof (Ms_parms);
3396 				strioc.ic_dp = (char *)arg;
3397 				return (strdoioctl(stp, &strioc, flag,
3398 					copyflag, crp, rvalp));
3399 
3400 			case VUIDSADDR:
3401 			case VUIDGADDR:
3402 				strioc.ic_len = sizeof (struct vuid_addr_probe);
3403 				strioc.ic_dp = (char *)arg;
3404 				return (strdoioctl(stp, &strioc, flag,
3405 					copyflag, crp, rvalp));
3406 
3407 			/*
3408 			 * These M_IOCTL's don't require any data to be sent
3409 			 * downstream, and the driver will allocate and link
3410 			 * on its own mblk_t upon M_IOCACK -- thus we set
3411 			 * ic_len to zero and set ic_dp to arg so we know
3412 			 * where to copyout to later.
3413 			 */
3414 			case TIOCGSOFTCAR:
3415 			case TIOCGWINSZ:
3416 			case TIOCGSIZE:
3417 			case KIOCGTRANS:
3418 			case KIOCGTRANSABLE:
3419 			case KIOCTYPE:
3420 			case KIOCGDIRECT:
3421 			case KIOCGCOMPAT:
3422 			case KIOCLAYOUT:
3423 			case KIOCGLED:
3424 			case MSIOGETPARMS:
3425 			case MSIOBUTTONS:
3426 			case VUIDGFORMAT:
3427 			case TIOCGPPS:
3428 			case TIOCGPPSEV:
3429 			case TCGETA:
3430 			case TCGETS:
3431 			case LDGETT:
3432 			case TIOCGETP:
3433 			case KIOCGRPTDELAY:
3434 			case KIOCGRPTRATE:
3435 				strioc.ic_len = 0;
3436 				strioc.ic_dp = (char *)arg;
3437 				return (strdoioctl(stp, &strioc, flag,
3438 					copyflag, crp, rvalp));
3439 			}
3440 		}
3441 
3442 		/*
3443 		 * Unknown cmd - send it down as a transparent ioctl.
3444 		 */
3445 		strioc.ic_cmd = cmd;
3446 		strioc.ic_timout = INFTIM;
3447 		strioc.ic_len = TRANSPARENT;
3448 		strioc.ic_dp = (char *)&arg;
3449 
3450 		return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp));
3451 
3452 	case I_STR:
3453 		/*
3454 		 * Stream ioctl.  Read in an strioctl buffer from the user
3455 		 * along with any data specified and send it downstream.
3456 		 * Strdoioctl will wait allow only one ioctl message at
3457 		 * a time, and waits for the acknowledgement.
3458 		 */
3459 
3460 		if (stp->sd_flag & STRHUP)
3461 			return (ENXIO);
3462 
3463 		error = strcopyin_strioctl((void *)arg, &strioc, flag,
3464 		    copyflag);
3465 		if (error != 0)
3466 			return (error);
3467 
3468 		if ((strioc.ic_len < 0) || (strioc.ic_timout < -1))
3469 			return (EINVAL);
3470 
3471 		access = job_control_type(strioc.ic_cmd);
3472 		mutex_enter(&stp->sd_lock);
3473 		if ((access != -1) &&
3474 		    ((error = i_straccess(stp, access)) != 0)) {
3475 			mutex_exit(&stp->sd_lock);
3476 			return (error);
3477 		}
3478 		mutex_exit(&stp->sd_lock);
3479 
3480 		/*
3481 		 * The I_STR facility provides a trap door for malicious
3482 		 * code to send down bogus streamio(7I) ioctl commands to
3483 		 * unsuspecting STREAMS modules and drivers which expect to
3484 		 * only get these messages from the stream head.
3485 		 * Explicitly prohibit any streamio ioctls which can be
3486 		 * passed downstream by the stream head.  Note that we do
3487 		 * not block all streamio ioctls because the ioctl
3488 		 * numberspace is not well managed and thus it's possible
3489 		 * that a module or driver's ioctl numbers may accidentally
3490 		 * collide with them.
3491 		 */
3492 		switch (strioc.ic_cmd) {
3493 		case I_LINK:
3494 		case I_PLINK:
3495 		case I_UNLINK:
3496 		case I_PUNLINK:
3497 		case _I_GETPEERCRED:
3498 		case _I_PLINK_LH:
3499 			return (EINVAL);
3500 		}
3501 
3502 		error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp);
3503 		if (error == 0) {
3504 			error = strcopyout_strioctl(&strioc, (void *)arg,
3505 			    flag, copyflag);
3506 		}
3507 		return (error);
3508 
3509 	case I_NREAD:
3510 		/*
3511 		 * Return number of bytes of data in first message
3512 		 * in queue in "arg" and return the number of messages
3513 		 * in queue in return value.
3514 		 */
3515 	    {
3516 		size_t	size;
3517 		int	retval;
3518 		int	count = 0;
3519 
3520 		mutex_enter(QLOCK(rdq));
3521 
3522 		size = msgdsize(rdq->q_first);
3523 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3524 			count++;
3525 
3526 		mutex_exit(QLOCK(rdq));
3527 		if (stp->sd_struiordq) {
3528 			infod_t infod;
3529 
3530 			infod.d_cmd = INFOD_COUNT;
3531 			infod.d_count = 0;
3532 			if (count == 0) {
3533 				infod.d_cmd |= INFOD_FIRSTBYTES;
3534 				infod.d_bytes = 0;
3535 			}
3536 			infod.d_res = 0;
3537 			(void) infonext(rdq, &infod);
3538 			count += infod.d_count;
3539 			if (infod.d_res & INFOD_FIRSTBYTES)
3540 				size = infod.d_bytes;
3541 		}
3542 
3543 		/*
3544 		 * Drop down from size_t to the "int" required by the
3545 		 * interface.  Cap at INT_MAX.
3546 		 */
3547 		retval = MIN(size, INT_MAX);
3548 		error = strcopyout(&retval, (void *)arg, sizeof (retval),
3549 		    copyflag);
3550 		if (!error)
3551 			*rvalp = count;
3552 		return (error);
3553 	    }
3554 
3555 	case FIONREAD:
3556 		/*
3557 		 * Return number of bytes of data in all data messages
3558 		 * in queue in "arg".
3559 		 */
3560 	    {
3561 		size_t	size = 0;
3562 		int	retval;
3563 
3564 		mutex_enter(QLOCK(rdq));
3565 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3566 			size += msgdsize(mp);
3567 		mutex_exit(QLOCK(rdq));
3568 
3569 		if (stp->sd_struiordq) {
3570 			infod_t infod;
3571 
3572 			infod.d_cmd = INFOD_BYTES;
3573 			infod.d_res = 0;
3574 			infod.d_bytes = 0;
3575 			(void) infonext(rdq, &infod);
3576 			size += infod.d_bytes;
3577 		}
3578 
3579 		/*
3580 		 * Drop down from size_t to the "int" required by the
3581 		 * interface.  Cap at INT_MAX.
3582 		 */
3583 		retval = MIN(size, INT_MAX);
3584 		error = strcopyout(&retval, (void *)arg, sizeof (retval),
3585 		    copyflag);
3586 
3587 		*rvalp = 0;
3588 		return (error);
3589 	    }
3590 	case FIORDCHK:
3591 		/*
3592 		 * FIORDCHK does not use arg value (like FIONREAD),
3593 		 * instead a count is returned. I_NREAD value may
3594 		 * not be accurate but safe. The real thing to do is
3595 		 * to add the msgdsizes of all data  messages until
3596 		 * a non-data message.
3597 		 */
3598 	    {
3599 		size_t size = 0;
3600 
3601 		mutex_enter(QLOCK(rdq));
3602 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3603 			size += msgdsize(mp);
3604 		mutex_exit(QLOCK(rdq));
3605 
3606 		if (stp->sd_struiordq) {
3607 			infod_t infod;
3608 
3609 			infod.d_cmd = INFOD_BYTES;
3610 			infod.d_res = 0;
3611 			infod.d_bytes = 0;
3612 			(void) infonext(rdq, &infod);
3613 			size += infod.d_bytes;
3614 		}
3615 
3616 		/*
3617 		 * Since ioctl returns an int, and memory sizes under
3618 		 * LP64 may not fit, we return INT_MAX if the count was
3619 		 * actually greater.
3620 		 */
3621 		*rvalp = MIN(size, INT_MAX);
3622 		return (0);
3623 	    }
3624 
3625 	case I_FIND:
3626 		/*
3627 		 * Get module name.
3628 		 */
3629 	    {
3630 		char mname[FMNAMESZ + 1];
3631 		queue_t *q;
3632 
3633 		error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3634 		    mname, FMNAMESZ + 1, NULL);
3635 		if (error)
3636 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3637 
3638 		/*
3639 		 * Return EINVAL if we're handed a bogus module name.
3640 		 */
3641 		if (fmodsw_find(mname, FMODSW_LOAD) == NULL) {
3642 			TRACE_0(TR_FAC_STREAMS_FR,
3643 				TR_I_CANT_FIND, "couldn't I_FIND");
3644 			return (EINVAL);
3645 		}
3646 
3647 		*rvalp = 0;
3648 
3649 		/* Look downstream to see if module is there. */
3650 		claimstr(stp->sd_wrq);
3651 		for (q = stp->sd_wrq->q_next; q; q = q->q_next) {
3652 			if (q->q_flag&QREADR) {
3653 				q = NULL;
3654 				break;
3655 			}
3656 			if (strcmp(mname, q->q_qinfo->qi_minfo->mi_idname) == 0)
3657 				break;
3658 		}
3659 		releasestr(stp->sd_wrq);
3660 
3661 		*rvalp = (q ? 1 : 0);
3662 		return (error);
3663 	    }
3664 
3665 	case I_PUSH:
3666 	case __I_PUSH_NOCTTY:
3667 		/*
3668 		 * Push a module.
3669 		 * For the case __I_PUSH_NOCTTY push a module but
3670 		 * do not allocate controlling tty. See bugid 4025044
3671 		 */
3672 
3673 	    {
3674 		char mname[FMNAMESZ + 1];
3675 		fmodsw_impl_t *fp;
3676 		dev_t dummydev;
3677 
3678 		if (stp->sd_flag & STRHUP)
3679 			return (ENXIO);
3680 
3681 		/*
3682 		 * Get module name and look up in fmodsw.
3683 		 */
3684 		error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3685 		    mname, FMNAMESZ + 1, NULL);
3686 		if (error)
3687 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3688 
3689 		if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) ==
3690 		    NULL)
3691 			return (EINVAL);
3692 
3693 		TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH,
3694 		    "I_PUSH:fp %p stp %p", fp, stp);
3695 
3696 		if (error = strstartplumb(stp, flag, cmd)) {
3697 			fmodsw_rele(fp);
3698 			return (error);
3699 		}
3700 
3701 		/*
3702 		 * See if any more modules can be pushed on this stream.
3703 		 * Note that this check must be done after strstartplumb()
3704 		 * since otherwise multiple threads issuing I_PUSHes on
3705 		 * the same stream will be able to exceed nstrpush.
3706 		 */
3707 		mutex_enter(&stp->sd_lock);
3708 		if (stp->sd_pushcnt >= nstrpush) {
3709 			fmodsw_rele(fp);
3710 			strendplumb(stp);
3711 			mutex_exit(&stp->sd_lock);
3712 			return (EINVAL);
3713 		}
3714 		mutex_exit(&stp->sd_lock);
3715 
3716 		/*
3717 		 * Push new module and call its open routine
3718 		 * via qattach().  Modules don't change device
3719 		 * numbers, so just ignore dummydev here.
3720 		 */
3721 		dummydev = vp->v_rdev;
3722 		if ((error = qattach(rdq, &dummydev, 0, crp, fp,
3723 		    B_FALSE)) == 0) {
3724 			if (vp->v_type == VCHR && /* sorry, no pipes allowed */
3725 			    (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) {
3726 				/*
3727 				 * try to allocate it as a controlling terminal
3728 				 */
3729 				(void) strctty(stp);
3730 			}
3731 		}
3732 
3733 		mutex_enter(&stp->sd_lock);
3734 
3735 		/*
3736 		 * As a performance concern we are caching the values of
3737 		 * q_minpsz and q_maxpsz of the module below the stream
3738 		 * head in the stream head.
3739 		 */
3740 		mutex_enter(QLOCK(stp->sd_wrq->q_next));
3741 		rmin = stp->sd_wrq->q_next->q_minpsz;
3742 		rmax = stp->sd_wrq->q_next->q_maxpsz;
3743 		mutex_exit(QLOCK(stp->sd_wrq->q_next));
3744 
3745 		/* Do this processing here as a performance concern */
3746 		if (strmsgsz != 0) {
3747 			if (rmax == INFPSZ)
3748 				rmax = strmsgsz;
3749 			else  {
3750 				if (vp->v_type == VFIFO)
3751 					rmax = MIN(PIPE_BUF, rmax);
3752 				else	rmax = MIN(strmsgsz, rmax);
3753 			}
3754 		}
3755 
3756 		mutex_enter(QLOCK(wrq));
3757 		stp->sd_qn_minpsz = rmin;
3758 		stp->sd_qn_maxpsz = rmax;
3759 		mutex_exit(QLOCK(wrq));
3760 
3761 		strendplumb(stp);
3762 		mutex_exit(&stp->sd_lock);
3763 		return (error);
3764 	    }
3765 
3766 	case I_POP:
3767 	    {
3768 		queue_t	*q;
3769 
3770 		if (stp->sd_flag & STRHUP)
3771 			return (ENXIO);
3772 		if (!wrq->q_next)	/* for broken pipes */
3773 			return (EINVAL);
3774 
3775 		if (error = strstartplumb(stp, flag, cmd))
3776 			return (error);
3777 
3778 		/*
3779 		 * If there is an anchor on this stream and popping
3780 		 * the current module would attempt to pop through the
3781 		 * anchor, then disallow the pop unless we have sufficient
3782 		 * privileges; take the cheapest (non-locking) check
3783 		 * first.
3784 		 */
3785 		if (secpolicy_net_config(crp, B_TRUE) != 0) {
3786 			mutex_enter(&stp->sd_lock);
3787 			/*
3788 			 * Anchors only apply if there's at least one
3789 			 * module on the stream (sd_pushcnt > 0).
3790 			 */
3791 			if (stp->sd_pushcnt > 0 &&
3792 			    stp->sd_pushcnt == stp->sd_anchor &&
3793 			    stp->sd_vnode->v_type != VFIFO) {
3794 				strendplumb(stp);
3795 				mutex_exit(&stp->sd_lock);
3796 				/* Audit and report error */
3797 				return (secpolicy_net_config(crp, B_FALSE));
3798 			}
3799 			mutex_exit(&stp->sd_lock);
3800 		}
3801 
3802 		q = wrq->q_next;
3803 		TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP,
3804 			"I_POP:%p from %p", q, stp);
3805 		if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) {
3806 			error = EINVAL;
3807 		} else {
3808 			qdetach(_RD(q), 1, flag, crp, B_FALSE);
3809 			error = 0;
3810 		}
3811 		mutex_enter(&stp->sd_lock);
3812 
3813 		/*
3814 		 * As a performance concern we are caching the values of
3815 		 * q_minpsz and q_maxpsz of the module below the stream
3816 		 * head in the stream head.
3817 		 */
3818 		mutex_enter(QLOCK(wrq->q_next));
3819 		rmin = wrq->q_next->q_minpsz;
3820 		rmax = wrq->q_next->q_maxpsz;
3821 		mutex_exit(QLOCK(wrq->q_next));
3822 
3823 		/* Do this processing here as a performance concern */
3824 		if (strmsgsz != 0) {
3825 			if (rmax == INFPSZ)
3826 				rmax = strmsgsz;
3827 			else  {
3828 				if (vp->v_type == VFIFO)
3829 					rmax = MIN(PIPE_BUF, rmax);
3830 				else	rmax = MIN(strmsgsz, rmax);
3831 			}
3832 		}
3833 
3834 		mutex_enter(QLOCK(wrq));
3835 		stp->sd_qn_minpsz = rmin;
3836 		stp->sd_qn_maxpsz = rmax;
3837 		mutex_exit(QLOCK(wrq));
3838 
3839 		/* If we popped through the anchor, then reset the anchor. */
3840 		if (stp->sd_pushcnt < stp->sd_anchor)
3841 			stp->sd_anchor = 0;
3842 
3843 		strendplumb(stp);
3844 		mutex_exit(&stp->sd_lock);
3845 		return (error);
3846 	    }
3847 
3848 	case _I_MUXID2FD:
3849 	{
3850 		/*
3851 		 * Create a fd for a I_PLINK'ed lower stream with a given
3852 		 * muxid.  With the fd, application can send down ioctls,
3853 		 * like I_LIST, to the previously I_PLINK'ed stream.  Note
3854 		 * that after getting the fd, the application has to do an
3855 		 * I_PUNLINK on the muxid before it can do any operation
3856 		 * on the lower stream.  This is required by spec1170.
3857 		 *
3858 		 * The fd used to do this ioctl should point to the same
3859 		 * controlling device used to do the I_PLINK.  If it uses
3860 		 * a different stream or an invalid muxid, I_MUXID2FD will
3861 		 * fail.  The error code is set to EINVAL.
3862 		 *
3863 		 * The intended use of this interface is the following.
3864 		 * An application I_PLINK'ed a stream and exits.  The fd
3865 		 * to the lower stream is gone.  Another application
3866 		 * wants to get a fd to the lower stream, it uses I_MUXID2FD.
3867 		 */
3868 		int muxid = (int)arg;
3869 		int fd;
3870 		linkinfo_t *linkp;
3871 		struct file *fp;
3872 
3873 		/*
3874 		 * Do not allow the wildcard muxid.  This ioctl is not
3875 		 * intended to find arbitrary link.
3876 		 */
3877 		if (muxid == 0) {
3878 			return (EINVAL);
3879 		}
3880 
3881 		mutex_enter(&muxifier);
3882 		linkp = findlinks(vp->v_stream, muxid, LINKPERSIST);
3883 		if (linkp == NULL) {
3884 			mutex_exit(&muxifier);
3885 			return (EINVAL);
3886 		}
3887 
3888 		if ((fd = ufalloc(0)) == -1) {
3889 			mutex_exit(&muxifier);
3890 			return (EMFILE);
3891 		}
3892 		fp = linkp->li_fpdown;
3893 		mutex_enter(&fp->f_tlock);
3894 		fp->f_count++;
3895 		mutex_exit(&fp->f_tlock);
3896 		mutex_exit(&muxifier);
3897 		setf(fd, fp);
3898 		*rvalp = fd;
3899 		return (0);
3900 	}
3901 
3902 	case _I_INSERT:
3903 	{
3904 		/*
3905 		 * To insert a module to a given position in a stream.
3906 		 * In the first release, only allow privileged user
3907 		 * to use this ioctl.
3908 		 *
3909 		 * Note that we do not plan to support this ioctl
3910 		 * on pipes in the first release.  We want to learn more
3911 		 * about the implications of these ioctls before extending
3912 		 * their support.  And we do not think these features are
3913 		 * valuable for pipes.
3914 		 *
3915 		 * Neither do we support O/C hot stream.  Note that only
3916 		 * the upper streams of TCP/IP stack are O/C hot streams.
3917 		 * The lower IP stream is not.
3918 		 * When there is a O/C cold barrier, we only allow inserts
3919 		 * above the barrier.
3920 		 */
3921 		STRUCT_DECL(strmodconf, strmodinsert);
3922 		char mod_name[FMNAMESZ + 1];
3923 		fmodsw_impl_t *fp;
3924 		dev_t dummydev;
3925 		queue_t *tmp_wrq;
3926 		int pos;
3927 		boolean_t is_insert;
3928 
3929 		STRUCT_INIT(strmodinsert, flag);
3930 		if (stp->sd_flag & STRHUP)
3931 			return (ENXIO);
3932 		if (STRMATED(stp))
3933 			return (EINVAL);
3934 		if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
3935 			return (error);
3936 
3937 		error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert),
3938 		    STRUCT_SIZE(strmodinsert), copyflag);
3939 		if (error)
3940 			return (error);
3941 
3942 		/*
3943 		 * Get module name and look up in fmodsw.
3944 		 */
3945 		error = (copyflag & U_TO_K ? copyinstr :
3946 		    copystr)(STRUCT_FGETP(strmodinsert, mod_name),
3947 		    mod_name, FMNAMESZ + 1, NULL);
3948 		if (error)
3949 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3950 
3951 		if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) ==
3952 		    NULL)
3953 			return (EINVAL);
3954 
3955 		if (error = strstartplumb(stp, flag, cmd)) {
3956 			fmodsw_rele(fp);
3957 			return (error);
3958 		}
3959 
3960 		/*
3961 		 * Is this _I_INSERT just like an I_PUSH?  We need to know
3962 		 * this because we do some optimizations if this is a
3963 		 * module being pushed.
3964 		 */
3965 		pos = STRUCT_FGET(strmodinsert, pos);
3966 		is_insert = (pos != 0);
3967 
3968 		/*
3969 		 * Make sure pos is valid.  Even though it is not an I_PUSH,
3970 		 * we impose the same limit on the number of modules in a
3971 		 * stream.
3972 		 */
3973 		mutex_enter(&stp->sd_lock);
3974 		if (stp->sd_pushcnt >= nstrpush || pos < 0 ||
3975 		    pos > stp->sd_pushcnt) {
3976 			fmodsw_rele(fp);
3977 			strendplumb(stp);
3978 			mutex_exit(&stp->sd_lock);
3979 			return (EINVAL);
3980 		}
3981 		mutex_exit(&stp->sd_lock);
3982 
3983 		/*
3984 		 * First find the correct position this module to
3985 		 * be inserted.  We don't need to call claimstr()
3986 		 * as the stream should not be changing at this point.
3987 		 *
3988 		 * Insert new module and call its open routine
3989 		 * via qattach().  Modules don't change device
3990 		 * numbers, so just ignore dummydev here.
3991 		 */
3992 		for (tmp_wrq = stp->sd_wrq; pos > 0;
3993 		    tmp_wrq = tmp_wrq->q_next, pos--) {
3994 			ASSERT(SAMESTR(tmp_wrq));
3995 		}
3996 		dummydev = vp->v_rdev;
3997 		if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp,
3998 		    fp, is_insert)) != 0) {
3999 			mutex_enter(&stp->sd_lock);
4000 			strendplumb(stp);
4001 			mutex_exit(&stp->sd_lock);
4002 			return (error);
4003 		}
4004 
4005 		mutex_enter(&stp->sd_lock);
4006 
4007 		/*
4008 		 * As a performance concern we are caching the values of
4009 		 * q_minpsz and q_maxpsz of the module below the stream
4010 		 * head in the stream head.
4011 		 */
4012 		if (!is_insert) {
4013 			mutex_enter(QLOCK(stp->sd_wrq->q_next));
4014 			rmin = stp->sd_wrq->q_next->q_minpsz;
4015 			rmax = stp->sd_wrq->q_next->q_maxpsz;
4016 			mutex_exit(QLOCK(stp->sd_wrq->q_next));
4017 
4018 			/* Do this processing here as a performance concern */
4019 			if (strmsgsz != 0) {
4020 				if (rmax == INFPSZ) {
4021 					rmax = strmsgsz;
4022 				} else  {
4023 					rmax = MIN(strmsgsz, rmax);
4024 				}
4025 			}
4026 
4027 			mutex_enter(QLOCK(wrq));
4028 			stp->sd_qn_minpsz = rmin;
4029 			stp->sd_qn_maxpsz = rmax;
4030 			mutex_exit(QLOCK(wrq));
4031 		}
4032 
4033 		/*
4034 		 * Need to update the anchor value if this module is
4035 		 * inserted below the anchor point.
4036 		 */
4037 		if (stp->sd_anchor != 0) {
4038 			pos = STRUCT_FGET(strmodinsert, pos);
4039 			if (pos >= (stp->sd_pushcnt - stp->sd_anchor))
4040 				stp->sd_anchor++;
4041 		}
4042 
4043 		strendplumb(stp);
4044 		mutex_exit(&stp->sd_lock);
4045 		return (0);
4046 	}
4047 
4048 	case _I_REMOVE:
4049 	{
4050 		/*
4051 		 * To remove a module with a given name in a stream.  The
4052 		 * caller of this ioctl needs to provide both the name and
4053 		 * the position of the module to be removed.  This eliminates
4054 		 * the ambiguity of removal if a module is inserted/pushed
4055 		 * multiple times in a stream.  In the first release, only
4056 		 * allow privileged user to use this ioctl.
4057 		 *
4058 		 * Note that we do not plan to support this ioctl
4059 		 * on pipes in the first release.  We want to learn more
4060 		 * about the implications of these ioctls before extending
4061 		 * their support.  And we do not think these features are
4062 		 * valuable for pipes.
4063 		 *
4064 		 * Neither do we support O/C hot stream.  Note that only
4065 		 * the upper streams of TCP/IP stack are O/C hot streams.
4066 		 * The lower IP stream is not.
4067 		 * When there is a O/C cold barrier we do not allow removal
4068 		 * below the barrier.
4069 		 *
4070 		 * Also note that _I_REMOVE cannot be used to remove a
4071 		 * driver or the stream head.
4072 		 */
4073 		STRUCT_DECL(strmodconf, strmodremove);
4074 		queue_t	*q;
4075 		int pos;
4076 		char mod_name[FMNAMESZ + 1];
4077 		boolean_t is_remove;
4078 
4079 		STRUCT_INIT(strmodremove, flag);
4080 		if (stp->sd_flag & STRHUP)
4081 			return (ENXIO);
4082 		if (STRMATED(stp))
4083 			return (EINVAL);
4084 		if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
4085 			return (error);
4086 
4087 		error = strcopyin((void *)arg, STRUCT_BUF(strmodremove),
4088 		    STRUCT_SIZE(strmodremove), copyflag);
4089 		if (error)
4090 			return (error);
4091 
4092 		error = (copyflag & U_TO_K ? copyinstr :
4093 		    copystr)(STRUCT_FGETP(strmodremove, mod_name),
4094 		    mod_name, FMNAMESZ + 1, NULL);
4095 		if (error)
4096 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
4097 
4098 		if ((error = strstartplumb(stp, flag, cmd)) != 0)
4099 			return (error);
4100 
4101 		/*
4102 		 * Match the name of given module to the name of module at
4103 		 * the given position.
4104 		 */
4105 		pos = STRUCT_FGET(strmodremove, pos);
4106 
4107 		is_remove = (pos != 0);
4108 		for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0;
4109 		    q = q->q_next, pos--)
4110 			;
4111 		if (pos > 0 || ! SAMESTR(q) ||
4112 		    strncmp(q->q_qinfo->qi_minfo->mi_idname, mod_name,
4113 		    strlen(q->q_qinfo->qi_minfo->mi_idname)) != 0) {
4114 			mutex_enter(&stp->sd_lock);
4115 			strendplumb(stp);
4116 			mutex_exit(&stp->sd_lock);
4117 			return (EINVAL);
4118 		}
4119 
4120 		ASSERT(!(q->q_flag & QREADR));
4121 		qdetach(_RD(q), 1, flag, crp, is_remove);
4122 
4123 		mutex_enter(&stp->sd_lock);
4124 
4125 		/*
4126 		 * As a performance concern we are caching the values of
4127 		 * q_minpsz and q_maxpsz of the module below the stream
4128 		 * head in the stream head.
4129 		 */
4130 		if (!is_remove) {
4131 			mutex_enter(QLOCK(wrq->q_next));
4132 			rmin = wrq->q_next->q_minpsz;
4133 			rmax = wrq->q_next->q_maxpsz;
4134 			mutex_exit(QLOCK(wrq->q_next));
4135 
4136 			/* Do this processing here as a performance concern */
4137 			if (strmsgsz != 0) {
4138 				if (rmax == INFPSZ)
4139 					rmax = strmsgsz;
4140 				else  {
4141 					if (vp->v_type == VFIFO)
4142 						rmax = MIN(PIPE_BUF, rmax);
4143 					else	rmax = MIN(strmsgsz, rmax);
4144 				}
4145 			}
4146 
4147 			mutex_enter(QLOCK(wrq));
4148 			stp->sd_qn_minpsz = rmin;
4149 			stp->sd_qn_maxpsz = rmax;
4150 			mutex_exit(QLOCK(wrq));
4151 		}
4152 
4153 		/*
4154 		 * Need to update the anchor value if this module is removed
4155 		 * at or below the anchor point.  If the removed module is at
4156 		 * the anchor point, remove the anchor for this stream if
4157 		 * there is no module above the anchor point.  Otherwise, if
4158 		 * the removed module is below the anchor point, decrement the
4159 		 * anchor point by 1.
4160 		 */
4161 		if (stp->sd_anchor != 0) {
4162 			pos = STRUCT_FGET(strmodremove, pos);
4163 			if (pos == 0)
4164 				stp->sd_anchor = 0;
4165 			else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1))
4166 				stp->sd_anchor--;
4167 		}
4168 
4169 		strendplumb(stp);
4170 		mutex_exit(&stp->sd_lock);
4171 		return (0);
4172 	}
4173 
4174 	case I_ANCHOR:
4175 		/*
4176 		 * Set the anchor position on the stream to reside at
4177 		 * the top module (in other words, the top module
4178 		 * cannot be popped).  Anchors with a FIFO make no
4179 		 * obvious sense, so they're not allowed.
4180 		 */
4181 		mutex_enter(&stp->sd_lock);
4182 
4183 		if (stp->sd_vnode->v_type == VFIFO) {
4184 			mutex_exit(&stp->sd_lock);
4185 			return (EINVAL);
4186 		}
4187 
4188 		stp->sd_anchor = stp->sd_pushcnt;
4189 
4190 		mutex_exit(&stp->sd_lock);
4191 		return (0);
4192 
4193 	case I_LOOK:
4194 		/*
4195 		 * Get name of first module downstream.
4196 		 * If no module, return an error.
4197 		 */
4198 	    {
4199 		claimstr(wrq);
4200 		if (_SAMESTR(wrq) && wrq->q_next->q_next) {
4201 			char *name = wrq->q_next->q_qinfo->qi_minfo->mi_idname;
4202 			error = strcopyout(name, (void *)arg, strlen(name) + 1,
4203 			    copyflag);
4204 			releasestr(wrq);
4205 			return (error);
4206 		}
4207 		releasestr(wrq);
4208 		return (EINVAL);
4209 	    }
4210 
4211 	case I_LINK:
4212 	case I_PLINK:
4213 		/*
4214 		 * Link a multiplexor.
4215 		 */
4216 		return (mlink(vp, cmd, (int)arg, crp, rvalp, 0));
4217 
4218 	case _I_PLINK_LH:
4219 		/*
4220 		 * Link a multiplexor: Call must originate from kernel.
4221 		 */
4222 		if (kioctl)
4223 			return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp));
4224 
4225 		return (EINVAL);
4226 	case I_UNLINK:
4227 	case I_PUNLINK:
4228 		/*
4229 		 * Unlink a multiplexor.
4230 		 * If arg is -1, unlink all links for which this is the
4231 		 * controlling stream.  Otherwise, arg is an index number
4232 		 * for a link to be removed.
4233 		 */
4234 	    {
4235 		struct linkinfo *linkp;
4236 		int native_arg = (int)arg;
4237 		int type;
4238 
4239 		TRACE_1(TR_FAC_STREAMS_FR,
4240 			TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp);
4241 		if (vp->v_type == VFIFO) {
4242 			return (EINVAL);
4243 		}
4244 		if (cmd == I_UNLINK)
4245 			type = LINKNORMAL;
4246 		else	/* I_PUNLINK */
4247 			type = LINKPERSIST;
4248 		if (native_arg == 0) {
4249 			return (EINVAL);
4250 		}
4251 		if (native_arg == MUXID_ALL)
4252 			error = munlinkall(stp, type, crp, rvalp);
4253 		else {
4254 			mutex_enter(&muxifier);
4255 			if (!(linkp = findlinks(stp, (int)arg, type))) {
4256 				/* invalid user supplied index number */
4257 				mutex_exit(&muxifier);
4258 				return (EINVAL);
4259 			}
4260 			/* munlink drops the muxifier lock */
4261 			error = munlink(stp, linkp, type, crp, rvalp);
4262 		}
4263 		return (error);
4264 	    }
4265 
4266 	case I_FLUSH:
4267 		/*
4268 		 * send a flush message downstream
4269 		 * flush message can indicate
4270 		 * FLUSHR - flush read queue
4271 		 * FLUSHW - flush write queue
4272 		 * FLUSHRW - flush read/write queue
4273 		 */
4274 		if (stp->sd_flag & STRHUP)
4275 			return (ENXIO);
4276 		if (arg & ~FLUSHRW)
4277 			return (EINVAL);
4278 
4279 		for (;;) {
4280 			if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) {
4281 				break;
4282 			}
4283 			if (error = strwaitbuf(1, BPRI_HI)) {
4284 				return (error);
4285 			}
4286 		}
4287 
4288 		/*
4289 		 * Send down an unsupported ioctl and wait for the nack
4290 		 * in order to allow the M_FLUSH to propagate back
4291 		 * up to the stream head.
4292 		 * Replaces if (qready()) runqueues();
4293 		 */
4294 		strioc.ic_cmd = -1;	/* The unsupported ioctl */
4295 		strioc.ic_timout = 0;
4296 		strioc.ic_len = 0;
4297 		strioc.ic_dp = NULL;
4298 		(void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4299 		*rvalp = 0;
4300 		return (0);
4301 
4302 	case I_FLUSHBAND:
4303 	    {
4304 		struct bandinfo binfo;
4305 
4306 		error = strcopyin((void *)arg, &binfo, sizeof (binfo),
4307 		    copyflag);
4308 		if (error)
4309 			return (error);
4310 		if (stp->sd_flag & STRHUP)
4311 			return (ENXIO);
4312 		if (binfo.bi_flag & ~FLUSHRW)
4313 			return (EINVAL);
4314 		while (!(mp = allocb(2, BPRI_HI))) {
4315 			if (error = strwaitbuf(2, BPRI_HI))
4316 				return (error);
4317 		}
4318 		mp->b_datap->db_type = M_FLUSH;
4319 		*mp->b_wptr++ = binfo.bi_flag | FLUSHBAND;
4320 		*mp->b_wptr++ = binfo.bi_pri;
4321 		putnext(stp->sd_wrq, mp);
4322 		/*
4323 		 * Send down an unsupported ioctl and wait for the nack
4324 		 * in order to allow the M_FLUSH to propagate back
4325 		 * up to the stream head.
4326 		 * Replaces if (qready()) runqueues();
4327 		 */
4328 		strioc.ic_cmd = -1;	/* The unsupported ioctl */
4329 		strioc.ic_timout = 0;
4330 		strioc.ic_len = 0;
4331 		strioc.ic_dp = NULL;
4332 		(void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4333 		*rvalp = 0;
4334 		return (0);
4335 	    }
4336 
4337 	case I_SRDOPT:
4338 		/*
4339 		 * Set read options
4340 		 *
4341 		 * RNORM - default stream mode
4342 		 * RMSGN - message no discard
4343 		 * RMSGD - message discard
4344 		 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs
4345 		 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs
4346 		 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs
4347 		 */
4348 		if (arg & ~(RMODEMASK | RPROTMASK))
4349 			return (EINVAL);
4350 
4351 		if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN))
4352 			return (EINVAL);
4353 
4354 		mutex_enter(&stp->sd_lock);
4355 		switch (arg & RMODEMASK) {
4356 		case RNORM:
4357 			stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
4358 			break;
4359 		case RMSGD:
4360 			stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) |
4361 			    RD_MSGDIS;
4362 			break;
4363 		case RMSGN:
4364 			stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) |
4365 			    RD_MSGNODIS;
4366 			break;
4367 		}
4368 
4369 		switch (arg & RPROTMASK) {
4370 		case RPROTNORM:
4371 			stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
4372 			break;
4373 
4374 		case RPROTDAT:
4375 			stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) |
4376 			    RD_PROTDAT);
4377 			break;
4378 
4379 		case RPROTDIS:
4380 			stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) |
4381 			    RD_PROTDIS);
4382 			break;
4383 		}
4384 		mutex_exit(&stp->sd_lock);
4385 		return (0);
4386 
4387 	case I_GRDOPT:
4388 		/*
4389 		 * Get read option and return the value
4390 		 * to spot pointed to by arg
4391 		 */
4392 	    {
4393 		int rdopt;
4394 
4395 		rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD :
4396 		    ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM));
4397 		rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT :
4398 		    ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM));
4399 
4400 		return (strcopyout(&rdopt, (void *)arg, sizeof (int),
4401 		    copyflag));
4402 	    }
4403 
4404 	case I_SERROPT:
4405 		/*
4406 		 * Set error options
4407 		 *
4408 		 * RERRNORM - persistent read errors
4409 		 * RERRNONPERSIST - non-persistent read errors
4410 		 * WERRNORM - persistent write errors
4411 		 * WERRNONPERSIST - non-persistent write errors
4412 		 */
4413 		if (arg & ~(RERRMASK | WERRMASK))
4414 			return (EINVAL);
4415 
4416 		mutex_enter(&stp->sd_lock);
4417 		switch (arg & RERRMASK) {
4418 		case RERRNORM:
4419 			stp->sd_flag &= ~STRDERRNONPERSIST;
4420 			break;
4421 		case RERRNONPERSIST:
4422 			stp->sd_flag |= STRDERRNONPERSIST;
4423 			break;
4424 		}
4425 		switch (arg & WERRMASK) {
4426 		case WERRNORM:
4427 			stp->sd_flag &= ~STWRERRNONPERSIST;
4428 			break;
4429 		case WERRNONPERSIST:
4430 			stp->sd_flag |= STWRERRNONPERSIST;
4431 			break;
4432 		}
4433 		mutex_exit(&stp->sd_lock);
4434 		return (0);
4435 
4436 	case I_GERROPT:
4437 		/*
4438 		 * Get error option and return the value
4439 		 * to spot pointed to by arg
4440 		 */
4441 	    {
4442 		int erropt = 0;
4443 
4444 		erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST :
4445 			RERRNORM;
4446 		erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST :
4447 			WERRNORM;
4448 		return (strcopyout(&erropt, (void *)arg, sizeof (int),
4449 		    copyflag));
4450 	    }
4451 
4452 	case I_SETSIG:
4453 		/*
4454 		 * Register the calling proc to receive the SIGPOLL
4455 		 * signal based on the events given in arg.  If
4456 		 * arg is zero, remove the proc from register list.
4457 		 */
4458 	    {
4459 		strsig_t *ssp, *pssp;
4460 		struct pid *pidp;
4461 
4462 		pssp = NULL;
4463 		pidp = curproc->p_pidp;
4464 		/*
4465 		 * Hold sd_lock to prevent traversal of sd_siglist while
4466 		 * it is modified.
4467 		 */
4468 		mutex_enter(&stp->sd_lock);
4469 		for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp);
4470 			pssp = ssp, ssp = ssp->ss_next)
4471 			;
4472 
4473 		if (arg) {
4474 			if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4475 			    S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4476 				mutex_exit(&stp->sd_lock);
4477 				return (EINVAL);
4478 			}
4479 			if ((arg & S_BANDURG) && !(arg & S_RDBAND)) {
4480 				mutex_exit(&stp->sd_lock);
4481 				return (EINVAL);
4482 			}
4483 
4484 			/*
4485 			 * If proc not already registered, add it
4486 			 * to list.
4487 			 */
4488 			if (!ssp) {
4489 				ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4490 				ssp->ss_pidp = pidp;
4491 				ssp->ss_pid = pidp->pid_id;
4492 				ssp->ss_next = NULL;
4493 				if (pssp)
4494 					pssp->ss_next = ssp;
4495 				else
4496 					stp->sd_siglist = ssp;
4497 				mutex_enter(&pidlock);
4498 				PID_HOLD(pidp);
4499 				mutex_exit(&pidlock);
4500 			}
4501 
4502 			/*
4503 			 * Set events.
4504 			 */
4505 			ssp->ss_events = (int)arg;
4506 		} else {
4507 			/*
4508 			 * Remove proc from register list.
4509 			 */
4510 			if (ssp) {
4511 				mutex_enter(&pidlock);
4512 				PID_RELE(pidp);
4513 				mutex_exit(&pidlock);
4514 				if (pssp)
4515 					pssp->ss_next = ssp->ss_next;
4516 				else
4517 					stp->sd_siglist = ssp->ss_next;
4518 				kmem_free(ssp, sizeof (strsig_t));
4519 			} else {
4520 				mutex_exit(&stp->sd_lock);
4521 				return (EINVAL);
4522 			}
4523 		}
4524 
4525 		/*
4526 		 * Recalculate OR of sig events.
4527 		 */
4528 		stp->sd_sigflags = 0;
4529 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4530 			stp->sd_sigflags |= ssp->ss_events;
4531 		mutex_exit(&stp->sd_lock);
4532 		return (0);
4533 	    }
4534 
4535 	case I_GETSIG:
4536 		/*
4537 		 * Return (in arg) the current registration of events
4538 		 * for which the calling proc is to be signaled.
4539 		 */
4540 	    {
4541 		struct strsig *ssp;
4542 		struct pid  *pidp;
4543 
4544 		pidp = curproc->p_pidp;
4545 		mutex_enter(&stp->sd_lock);
4546 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4547 			if (ssp->ss_pidp == pidp) {
4548 				error = strcopyout(&ssp->ss_events, (void *)arg,
4549 				    sizeof (int), copyflag);
4550 				mutex_exit(&stp->sd_lock);
4551 				return (error);
4552 			}
4553 		mutex_exit(&stp->sd_lock);
4554 		return (EINVAL);
4555 	    }
4556 
4557 	case I_ESETSIG:
4558 		/*
4559 		 * Register the ss_pid to receive the SIGPOLL
4560 		 * signal based on the events is ss_events arg.  If
4561 		 * ss_events is zero, remove the proc from register list.
4562 		 */
4563 	{
4564 		struct strsig *ssp, *pssp;
4565 		struct proc *proc;
4566 		struct pid  *pidp;
4567 		pid_t pid;
4568 		struct strsigset ss;
4569 
4570 		error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4571 		if (error)
4572 			return (error);
4573 
4574 		pid = ss.ss_pid;
4575 
4576 		if (ss.ss_events != 0) {
4577 			/*
4578 			 * Permissions check by sending signal 0.
4579 			 * Note that when kill fails it does a set_errno
4580 			 * causing the system call to fail.
4581 			 */
4582 			error = kill(pid, 0);
4583 			if (error) {
4584 				return (error);
4585 			}
4586 		}
4587 		mutex_enter(&pidlock);
4588 		if (pid == 0)
4589 			proc = curproc;
4590 		else if (pid < 0)
4591 			proc = pgfind(-pid);
4592 		else
4593 			proc = prfind(pid);
4594 		if (proc == NULL) {
4595 			mutex_exit(&pidlock);
4596 			return (ESRCH);
4597 		}
4598 		if (pid < 0)
4599 			pidp = proc->p_pgidp;
4600 		else
4601 			pidp = proc->p_pidp;
4602 		ASSERT(pidp);
4603 		/*
4604 		 * Get a hold on the pid structure while referencing it.
4605 		 * There is a separate PID_HOLD should it be inserted
4606 		 * in the list below.
4607 		 */
4608 		PID_HOLD(pidp);
4609 		mutex_exit(&pidlock);
4610 
4611 		pssp = NULL;
4612 		/*
4613 		 * Hold sd_lock to prevent traversal of sd_siglist while
4614 		 * it is modified.
4615 		 */
4616 		mutex_enter(&stp->sd_lock);
4617 		for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid);
4618 				pssp = ssp, ssp = ssp->ss_next)
4619 			;
4620 
4621 		if (ss.ss_events) {
4622 			if (ss.ss_events &
4623 			    ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4624 			    S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4625 				mutex_exit(&stp->sd_lock);
4626 				mutex_enter(&pidlock);
4627 				PID_RELE(pidp);
4628 				mutex_exit(&pidlock);
4629 				return (EINVAL);
4630 			}
4631 			if ((ss.ss_events & S_BANDURG) &&
4632 			    !(ss.ss_events & S_RDBAND)) {
4633 				mutex_exit(&stp->sd_lock);
4634 				mutex_enter(&pidlock);
4635 				PID_RELE(pidp);
4636 				mutex_exit(&pidlock);
4637 				return (EINVAL);
4638 			}
4639 
4640 			/*
4641 			 * If proc not already registered, add it
4642 			 * to list.
4643 			 */
4644 			if (!ssp) {
4645 				ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4646 				ssp->ss_pidp = pidp;
4647 				ssp->ss_pid = pid;
4648 				ssp->ss_next = NULL;
4649 				if (pssp)
4650 					pssp->ss_next = ssp;
4651 				else
4652 					stp->sd_siglist = ssp;
4653 				mutex_enter(&pidlock);
4654 				PID_HOLD(pidp);
4655 				mutex_exit(&pidlock);
4656 			}
4657 
4658 			/*
4659 			 * Set events.
4660 			 */
4661 			ssp->ss_events = ss.ss_events;
4662 		} else {
4663 			/*
4664 			 * Remove proc from register list.
4665 			 */
4666 			if (ssp) {
4667 				mutex_enter(&pidlock);
4668 				PID_RELE(pidp);
4669 				mutex_exit(&pidlock);
4670 				if (pssp)
4671 					pssp->ss_next = ssp->ss_next;
4672 				else
4673 					stp->sd_siglist = ssp->ss_next;
4674 				kmem_free(ssp, sizeof (strsig_t));
4675 			} else {
4676 				mutex_exit(&stp->sd_lock);
4677 				mutex_enter(&pidlock);
4678 				PID_RELE(pidp);
4679 				mutex_exit(&pidlock);
4680 				return (EINVAL);
4681 			}
4682 		}
4683 
4684 		/*
4685 		 * Recalculate OR of sig events.
4686 		 */
4687 		stp->sd_sigflags = 0;
4688 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4689 			stp->sd_sigflags |= ssp->ss_events;
4690 		mutex_exit(&stp->sd_lock);
4691 		mutex_enter(&pidlock);
4692 		PID_RELE(pidp);
4693 		mutex_exit(&pidlock);
4694 		return (0);
4695 	    }
4696 
4697 	case I_EGETSIG:
4698 		/*
4699 		 * Return (in arg) the current registration of events
4700 		 * for which the calling proc is to be signaled.
4701 		 */
4702 	    {
4703 		struct strsig *ssp;
4704 		struct proc *proc;
4705 		pid_t pid;
4706 		struct pid  *pidp;
4707 		struct strsigset ss;
4708 
4709 		error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4710 		if (error)
4711 			return (error);
4712 
4713 		pid = ss.ss_pid;
4714 		mutex_enter(&pidlock);
4715 		if (pid == 0)
4716 			proc = curproc;
4717 		else if (pid < 0)
4718 			proc = pgfind(-pid);
4719 		else
4720 			proc = prfind(pid);
4721 		if (proc == NULL) {
4722 			mutex_exit(&pidlock);
4723 			return (ESRCH);
4724 		}
4725 		if (pid < 0)
4726 			pidp = proc->p_pgidp;
4727 		else
4728 			pidp = proc->p_pidp;
4729 
4730 		/* Prevent the pidp from being reassigned */
4731 		PID_HOLD(pidp);
4732 		mutex_exit(&pidlock);
4733 
4734 		mutex_enter(&stp->sd_lock);
4735 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4736 			if (ssp->ss_pid == pid) {
4737 				ss.ss_pid = ssp->ss_pid;
4738 				ss.ss_events = ssp->ss_events;
4739 				error = strcopyout(&ss, (void *)arg,
4740 				    sizeof (struct strsigset), copyflag);
4741 				mutex_exit(&stp->sd_lock);
4742 				mutex_enter(&pidlock);
4743 				PID_RELE(pidp);
4744 				mutex_exit(&pidlock);
4745 				return (error);
4746 			}
4747 		mutex_exit(&stp->sd_lock);
4748 		mutex_enter(&pidlock);
4749 		PID_RELE(pidp);
4750 		mutex_exit(&pidlock);
4751 		return (EINVAL);
4752 	    }
4753 
4754 	case I_PEEK:
4755 	    {
4756 		STRUCT_DECL(strpeek, strpeek);
4757 		size_t n;
4758 		mblk_t *fmp, *tmp_mp = NULL;
4759 
4760 		STRUCT_INIT(strpeek, flag);
4761 
4762 		error = strcopyin((void *)arg, STRUCT_BUF(strpeek),
4763 		    STRUCT_SIZE(strpeek), copyflag);
4764 		if (error)
4765 			return (error);
4766 
4767 		mutex_enter(QLOCK(rdq));
4768 		/*
4769 		 * Skip the invalid messages
4770 		 */
4771 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
4772 			if (mp->b_datap->db_type != M_SIG)
4773 				break;
4774 
4775 		/*
4776 		 * If user has requested to peek at a high priority message
4777 		 * and first message is not, return 0
4778 		 */
4779 		if (mp != NULL) {
4780 			if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) &&
4781 			    queclass(mp) == QNORM) {
4782 				*rvalp = 0;
4783 				mutex_exit(QLOCK(rdq));
4784 				return (0);
4785 			}
4786 		} else if (stp->sd_struiordq == NULL ||
4787 		    (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) {
4788 			/*
4789 			 * No mblks to look at at the streamhead and
4790 			 * 1). This isn't a synch stream or
4791 			 * 2). This is a synch stream but caller wants high
4792 			 *	priority messages which is not supported by
4793 			 *	the synch stream. (it only supports QNORM)
4794 			 */
4795 			*rvalp = 0;
4796 			mutex_exit(QLOCK(rdq));
4797 			return (0);
4798 		}
4799 
4800 		fmp = mp;
4801 
4802 		if (mp && mp->b_datap->db_type == M_PASSFP) {
4803 			mutex_exit(QLOCK(rdq));
4804 			return (EBADMSG);
4805 		}
4806 
4807 		ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO ||
4808 		    mp->b_datap->db_type == M_PROTO ||
4809 		    mp->b_datap->db_type == M_DATA);
4810 
4811 		if (mp && mp->b_datap->db_type == M_PCPROTO) {
4812 			STRUCT_FSET(strpeek, flags, RS_HIPRI);
4813 		} else {
4814 			STRUCT_FSET(strpeek, flags, 0);
4815 		}
4816 
4817 
4818 		if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) {
4819 			mutex_exit(QLOCK(rdq));
4820 			return (ENOSR);
4821 		}
4822 		mutex_exit(QLOCK(rdq));
4823 
4824 		/*
4825 		 * set mp = tmp_mp, so that I_PEEK processing can continue.
4826 		 * tmp_mp is used to free the dup'd message.
4827 		 */
4828 		mp = tmp_mp;
4829 
4830 		uio.uio_fmode = 0;
4831 		uio.uio_extflg = UIO_COPY_CACHED;
4832 		uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
4833 		    UIO_SYSSPACE;
4834 		uio.uio_limit = 0;
4835 		/*
4836 		 * First process PROTO blocks, if any.
4837 		 * If user doesn't want to get ctl info by setting maxlen <= 0,
4838 		 * then set len to -1/0 and skip control blocks part.
4839 		 */
4840 		if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0)
4841 			STRUCT_FSET(strpeek, ctlbuf.len, -1);
4842 		else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0)
4843 			STRUCT_FSET(strpeek, ctlbuf.len, 0);
4844 		else {
4845 			int	ctl_part = 0;
4846 
4847 			iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf);
4848 			iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen);
4849 			uio.uio_iov = &iov;
4850 			uio.uio_resid = iov.iov_len;
4851 			uio.uio_loffset = 0;
4852 			uio.uio_iovcnt = 1;
4853 			while (mp && mp->b_datap->db_type != M_DATA &&
4854 			    uio.uio_resid >= 0) {
4855 				ASSERT(STRUCT_FGET(strpeek, flags) == 0 ?
4856 				    mp->b_datap->db_type == M_PROTO :
4857 				    mp->b_datap->db_type == M_PCPROTO);
4858 
4859 				if ((n = MIN(uio.uio_resid,
4860 				    mp->b_wptr - mp->b_rptr)) != 0 &&
4861 				    (error = uiomove((char *)mp->b_rptr, n,
4862 				    UIO_READ, &uio)) != 0) {
4863 					freemsg(tmp_mp);
4864 					return (error);
4865 				}
4866 				ctl_part = 1;
4867 				mp = mp->b_cont;
4868 			}
4869 			/* No ctl message */
4870 			if (ctl_part == 0)
4871 				STRUCT_FSET(strpeek, ctlbuf.len, -1);
4872 			else
4873 				STRUCT_FSET(strpeek, ctlbuf.len,
4874 				    STRUCT_FGET(strpeek, ctlbuf.maxlen) -
4875 				    uio.uio_resid);
4876 		}
4877 
4878 		/*
4879 		 * Now process DATA blocks, if any.
4880 		 * If user doesn't want to get data info by setting maxlen <= 0,
4881 		 * then set len to -1/0 and skip data blocks part.
4882 		 */
4883 		if (STRUCT_FGET(strpeek, databuf.maxlen) < 0)
4884 			STRUCT_FSET(strpeek, databuf.len, -1);
4885 		else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0)
4886 			STRUCT_FSET(strpeek, databuf.len, 0);
4887 		else {
4888 			int	data_part = 0;
4889 
4890 			iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf);
4891 			iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen);
4892 			uio.uio_iov = &iov;
4893 			uio.uio_resid = iov.iov_len;
4894 			uio.uio_loffset = 0;
4895 			uio.uio_iovcnt = 1;
4896 			while (mp && uio.uio_resid) {
4897 				if (mp->b_datap->db_type == M_DATA) {
4898 					if ((n = MIN(uio.uio_resid,
4899 					    mp->b_wptr - mp->b_rptr)) != 0 &&
4900 					    (error = uiomove((char *)mp->b_rptr,
4901 						n, UIO_READ, &uio)) != 0) {
4902 						freemsg(tmp_mp);
4903 						return (error);
4904 					}
4905 					data_part = 1;
4906 				}
4907 				ASSERT(data_part == 0 ||
4908 				    mp->b_datap->db_type == M_DATA);
4909 				mp = mp->b_cont;
4910 			}
4911 			/* No data message */
4912 			if (data_part == 0)
4913 				STRUCT_FSET(strpeek, databuf.len, -1);
4914 			else
4915 				STRUCT_FSET(strpeek, databuf.len,
4916 				    STRUCT_FGET(strpeek, databuf.maxlen) -
4917 				    uio.uio_resid);
4918 		}
4919 		freemsg(tmp_mp);
4920 
4921 		/*
4922 		 * It is a synch stream and user wants to get
4923 		 * data (maxlen > 0).
4924 		 * uio setup is done by the codes that process DATA
4925 		 * blocks above.
4926 		 */
4927 		if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) {
4928 			infod_t infod;
4929 
4930 			infod.d_cmd = INFOD_COPYOUT;
4931 			infod.d_res = 0;
4932 			infod.d_uiop = &uio;
4933 			error = infonext(rdq, &infod);
4934 			if (error == EINVAL || error == EBUSY)
4935 				error = 0;
4936 			if (error)
4937 				return (error);
4938 			STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek,
4939 			    databuf.maxlen) - uio.uio_resid);
4940 			if (STRUCT_FGET(strpeek, databuf.len) == 0) {
4941 				/*
4942 				 * No data found by the infonext().
4943 				 */
4944 				STRUCT_FSET(strpeek, databuf.len, -1);
4945 			}
4946 		}
4947 		error = strcopyout(STRUCT_BUF(strpeek), (void *)arg,
4948 		    STRUCT_SIZE(strpeek), copyflag);
4949 		if (error) {
4950 			return (error);
4951 		}
4952 		/*
4953 		 * If there is no message retrieved, set return code to 0
4954 		 * otherwise, set it to 1.
4955 		 */
4956 		if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 &&
4957 		    STRUCT_FGET(strpeek, databuf.len) == -1)
4958 			*rvalp = 0;
4959 		else
4960 			*rvalp = 1;
4961 		return (0);
4962 	    }
4963 
4964 	case I_FDINSERT:
4965 	    {
4966 		STRUCT_DECL(strfdinsert, strfdinsert);
4967 		struct file *resftp;
4968 		struct stdata *resstp;
4969 		t_uscalar_t	ival;
4970 		ssize_t msgsize;
4971 		struct strbuf mctl;
4972 
4973 		STRUCT_INIT(strfdinsert, flag);
4974 		if (stp->sd_flag & STRHUP)
4975 			return (ENXIO);
4976 		/*
4977 		 * STRDERR, STWRERR and STPLEX tested above.
4978 		 */
4979 		error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert),
4980 		    STRUCT_SIZE(strfdinsert), copyflag);
4981 		if (error)
4982 			return (error);
4983 
4984 		if (STRUCT_FGET(strfdinsert, offset) < 0 ||
4985 		    (STRUCT_FGET(strfdinsert, offset) %
4986 		    sizeof (t_uscalar_t)) != 0)
4987 			return (EINVAL);
4988 		if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) {
4989 			if ((resstp = resftp->f_vnode->v_stream) == NULL) {
4990 				releasef(STRUCT_FGET(strfdinsert, fildes));
4991 				return (EINVAL);
4992 			}
4993 		} else
4994 			return (EINVAL);
4995 
4996 		mutex_enter(&resstp->sd_lock);
4997 		if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) {
4998 			error = strgeterr(resstp,
4999 					STRDERR|STWRERR|STRHUP|STPLEX, 0);
5000 			if (error != 0) {
5001 				mutex_exit(&resstp->sd_lock);
5002 				releasef(STRUCT_FGET(strfdinsert, fildes));
5003 				return (error);
5004 			}
5005 		}
5006 		mutex_exit(&resstp->sd_lock);
5007 
5008 #ifdef	_ILP32
5009 		{
5010 			queue_t	*q;
5011 			queue_t	*mate = NULL;
5012 
5013 			/* get read queue of stream terminus */
5014 			claimstr(resstp->sd_wrq);
5015 			for (q = resstp->sd_wrq->q_next; q->q_next != NULL;
5016 			    q = q->q_next)
5017 				if (!STRMATED(resstp) && STREAM(q) != resstp &&
5018 				    mate == NULL) {
5019 					ASSERT(q->q_qinfo->qi_srvp);
5020 					ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp);
5021 					claimstr(q);
5022 					mate = q;
5023 				}
5024 			q = _RD(q);
5025 			if (mate)
5026 				releasestr(mate);
5027 			releasestr(resstp->sd_wrq);
5028 			ival = (t_uscalar_t)q;
5029 		}
5030 #else
5031 		ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev);
5032 #endif	/* _ILP32 */
5033 
5034 		if (STRUCT_FGET(strfdinsert, ctlbuf.len) <
5035 		    STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) {
5036 			releasef(STRUCT_FGET(strfdinsert, fildes));
5037 			return (EINVAL);
5038 		}
5039 
5040 		/*
5041 		 * Check for legal flag value.
5042 		 */
5043 		if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) {
5044 			releasef(STRUCT_FGET(strfdinsert, fildes));
5045 			return (EINVAL);
5046 		}
5047 
5048 		/* get these values from those cached in the stream head */
5049 		mutex_enter(QLOCK(stp->sd_wrq));
5050 		rmin = stp->sd_qn_minpsz;
5051 		rmax = stp->sd_qn_maxpsz;
5052 		mutex_exit(QLOCK(stp->sd_wrq));
5053 
5054 		/*
5055 		 * Make sure ctl and data sizes together fall within
5056 		 * the limits of the max and min receive packet sizes
5057 		 * and do not exceed system limit.  A negative data
5058 		 * length means that no data part is to be sent.
5059 		 */
5060 		ASSERT((rmax >= 0) || (rmax == INFPSZ));
5061 		if (rmax == 0) {
5062 			releasef(STRUCT_FGET(strfdinsert, fildes));
5063 			return (ERANGE);
5064 		}
5065 		if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0)
5066 			msgsize = 0;
5067 		if ((msgsize < rmin) ||
5068 		    ((msgsize > rmax) && (rmax != INFPSZ)) ||
5069 		    (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) {
5070 			releasef(STRUCT_FGET(strfdinsert, fildes));
5071 			return (ERANGE);
5072 		}
5073 
5074 		mutex_enter(&stp->sd_lock);
5075 		while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) &&
5076 		    !canputnext(stp->sd_wrq)) {
5077 			if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0,
5078 			    flag, -1, &done)) != 0 || done) {
5079 				mutex_exit(&stp->sd_lock);
5080 				releasef(STRUCT_FGET(strfdinsert, fildes));
5081 				return (error);
5082 			}
5083 			if ((error = i_straccess(stp, access)) != 0) {
5084 				mutex_exit(&stp->sd_lock);
5085 				releasef(
5086 				    STRUCT_FGET(strfdinsert, fildes));
5087 				return (error);
5088 			}
5089 		}
5090 		mutex_exit(&stp->sd_lock);
5091 
5092 		/*
5093 		 * Copy strfdinsert.ctlbuf into native form of
5094 		 * ctlbuf to pass down into strmakemsg().
5095 		 */
5096 		mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen);
5097 		mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len);
5098 		mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf);
5099 
5100 		iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf);
5101 		iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len);
5102 		uio.uio_iov = &iov;
5103 		uio.uio_iovcnt = 1;
5104 		uio.uio_loffset = 0;
5105 		uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
5106 		    UIO_SYSSPACE;
5107 		uio.uio_fmode = 0;
5108 		uio.uio_extflg = UIO_COPY_CACHED;
5109 		uio.uio_resid = iov.iov_len;
5110 		if ((error = strmakemsg(&mctl,
5111 		    &msgsize, &uio, stp,
5112 		    STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) {
5113 			STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5114 			releasef(STRUCT_FGET(strfdinsert, fildes));
5115 			return (error);
5116 		}
5117 
5118 		STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5119 
5120 		/*
5121 		 * Place the possibly reencoded queue pointer 'offset' bytes
5122 		 * from the start of the control portion of the message.
5123 		 */
5124 		*((t_uscalar_t *)(mp->b_rptr +
5125 		    STRUCT_FGET(strfdinsert, offset))) = ival;
5126 
5127 		/*
5128 		 * Put message downstream.
5129 		 */
5130 		stream_willservice(stp);
5131 		putnext(stp->sd_wrq, mp);
5132 		stream_runservice(stp);
5133 		releasef(STRUCT_FGET(strfdinsert, fildes));
5134 		return (error);
5135 	    }
5136 
5137 	case I_SENDFD:
5138 	    {
5139 		struct file *fp;
5140 
5141 		if ((fp = getf((int)arg)) == NULL)
5142 			return (EBADF);
5143 		error = do_sendfp(stp, fp, crp);
5144 #ifdef C2_AUDIT
5145 		if (audit_active) {
5146 			audit_fdsend((int)arg, fp, error);
5147 		}
5148 #endif
5149 		releasef((int)arg);
5150 		return (error);
5151 	    }
5152 
5153 	case I_RECVFD:
5154 	case I_E_RECVFD:
5155 	    {
5156 		struct k_strrecvfd *srf;
5157 		int i, fd;
5158 
5159 		mutex_enter(&stp->sd_lock);
5160 		while (!(mp = getq(rdq))) {
5161 			if (stp->sd_flag & (STRHUP|STREOF)) {
5162 				mutex_exit(&stp->sd_lock);
5163 				return (ENXIO);
5164 			}
5165 			if ((error = strwaitq(stp, GETWAIT, (ssize_t)0,
5166 			    flag, -1, &done)) != 0 || done) {
5167 				mutex_exit(&stp->sd_lock);
5168 				return (error);
5169 			}
5170 			if ((error = i_straccess(stp, access)) != 0) {
5171 				mutex_exit(&stp->sd_lock);
5172 				return (error);
5173 			}
5174 		}
5175 		if (mp->b_datap->db_type != M_PASSFP) {
5176 			putback(stp, rdq, mp, mp->b_band);
5177 			mutex_exit(&stp->sd_lock);
5178 			return (EBADMSG);
5179 		}
5180 		mutex_exit(&stp->sd_lock);
5181 
5182 		srf = (struct k_strrecvfd *)mp->b_rptr;
5183 		if ((fd = ufalloc(0)) == -1) {
5184 			mutex_enter(&stp->sd_lock);
5185 			putback(stp, rdq, mp, mp->b_band);
5186 			mutex_exit(&stp->sd_lock);
5187 			return (EMFILE);
5188 		}
5189 		if (cmd == I_RECVFD) {
5190 			struct o_strrecvfd	ostrfd;
5191 
5192 			/* check to see if uid/gid values are too large. */
5193 
5194 			if (srf->uid > (o_uid_t)USHRT_MAX ||
5195 			    srf->gid > (o_gid_t)USHRT_MAX) {
5196 				mutex_enter(&stp->sd_lock);
5197 				putback(stp, rdq, mp, mp->b_band);
5198 				mutex_exit(&stp->sd_lock);
5199 				setf(fd, NULL);	/* release fd entry */
5200 				return (EOVERFLOW);
5201 			}
5202 
5203 			ostrfd.fd = fd;
5204 			ostrfd.uid = (o_uid_t)srf->uid;
5205 			ostrfd.gid = (o_gid_t)srf->gid;
5206 
5207 			/* Null the filler bits */
5208 			for (i = 0; i < 8; i++)
5209 				ostrfd.fill[i] = 0;
5210 
5211 			error = strcopyout(&ostrfd, (void *)arg,
5212 			    sizeof (struct o_strrecvfd), copyflag);
5213 		} else {		/* I_E_RECVFD */
5214 			struct strrecvfd	strfd;
5215 
5216 			strfd.fd = fd;
5217 			strfd.uid = srf->uid;
5218 			strfd.gid = srf->gid;
5219 
5220 			/* null the filler bits */
5221 			for (i = 0; i < 8; i++)
5222 				strfd.fill[i] = 0;
5223 
5224 			error = strcopyout(&strfd, (void *)arg,
5225 			    sizeof (struct strrecvfd), copyflag);
5226 		}
5227 
5228 		if (error) {
5229 			setf(fd, NULL);	/* release fd entry */
5230 			mutex_enter(&stp->sd_lock);
5231 			putback(stp, rdq, mp, mp->b_band);
5232 			mutex_exit(&stp->sd_lock);
5233 			return (error);
5234 		}
5235 #ifdef C2_AUDIT
5236 		if (audit_active) {
5237 			audit_fdrecv(fd, srf->fp);
5238 		}
5239 #endif
5240 
5241 		/*
5242 		 * Always increment f_count since the freemsg() below will
5243 		 * always call free_passfp() which performs a closef().
5244 		 */
5245 		mutex_enter(&srf->fp->f_tlock);
5246 		srf->fp->f_count++;
5247 		mutex_exit(&srf->fp->f_tlock);
5248 		setf(fd, srf->fp);
5249 		freemsg(mp);
5250 		return (0);
5251 	    }
5252 
5253 	case I_SWROPT:
5254 		/*
5255 		 * Set/clear the write options. arg is a bit
5256 		 * mask with any of the following bits set...
5257 		 * 	SNDZERO - send zero length message
5258 		 *	SNDPIPE - send sigpipe to process if
5259 		 *		sd_werror is set and process is
5260 		 *		doing a write or putmsg.
5261 		 * The new stream head write options should reflect
5262 		 * what is in arg.
5263 		 */
5264 		if (arg & ~(SNDZERO|SNDPIPE))
5265 			return (EINVAL);
5266 
5267 		mutex_enter(&stp->sd_lock);
5268 		stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO);
5269 		if (arg & SNDZERO)
5270 			stp->sd_wput_opt |= SW_SNDZERO;
5271 		if (arg & SNDPIPE)
5272 			stp->sd_wput_opt |= SW_SIGPIPE;
5273 		mutex_exit(&stp->sd_lock);
5274 		return (0);
5275 
5276 	case I_GWROPT:
5277 	    {
5278 		int wropt = 0;
5279 
5280 		if (stp->sd_wput_opt & SW_SNDZERO)
5281 			wropt |= SNDZERO;
5282 		if (stp->sd_wput_opt & SW_SIGPIPE)
5283 			wropt |= SNDPIPE;
5284 		return (strcopyout(&wropt, (void *)arg, sizeof (wropt),
5285 		    copyflag));
5286 	    }
5287 
5288 	case I_LIST:
5289 		/*
5290 		 * Returns all the modules found on this stream,
5291 		 * upto the driver. If argument is NULL, return the
5292 		 * number of modules (including driver). If argument
5293 		 * is not NULL, copy the names into the structure
5294 		 * provided.
5295 		 */
5296 
5297 	    {
5298 		queue_t *q;
5299 		int num_modules, space_allocated;
5300 		STRUCT_DECL(str_list, strlist);
5301 		struct str_mlist *mlist_ptr;
5302 
5303 		if (arg == NULL) { /* Return number of modules plus driver */
5304 			q = stp->sd_wrq;
5305 			if (stp->sd_vnode->v_type == VFIFO) {
5306 				*rvalp = stp->sd_pushcnt;
5307 			} else {
5308 				*rvalp = stp->sd_pushcnt + 1;
5309 			}
5310 		} else {
5311 			STRUCT_INIT(strlist, flag);
5312 
5313 			error = strcopyin((void *)arg, STRUCT_BUF(strlist),
5314 			    STRUCT_SIZE(strlist), copyflag);
5315 			if (error)
5316 				return (error);
5317 
5318 			space_allocated = STRUCT_FGET(strlist, sl_nmods);
5319 			if ((space_allocated) <= 0)
5320 				return (EINVAL);
5321 			claimstr(stp->sd_wrq);
5322 			q = stp->sd_wrq;
5323 			num_modules = 0;
5324 			while (_SAMESTR(q) && (space_allocated != 0)) {
5325 				char *name =
5326 				    q->q_next->q_qinfo->qi_minfo->mi_idname;
5327 
5328 				mlist_ptr = STRUCT_FGETP(strlist, sl_modlist);
5329 
5330 				error = strcopyout(name, mlist_ptr,
5331 				    strlen(name) + 1, copyflag);
5332 
5333 				if (error) {
5334 					releasestr(stp->sd_wrq);
5335 					return (error);
5336 				}
5337 				q = q->q_next;
5338 				space_allocated--;
5339 				num_modules++;
5340 				mlist_ptr =
5341 				    (struct str_mlist *)((uintptr_t)mlist_ptr +
5342 				    sizeof (struct str_mlist));
5343 				STRUCT_FSETP(strlist, sl_modlist, mlist_ptr);
5344 			}
5345 			releasestr(stp->sd_wrq);
5346 			error = strcopyout(&num_modules, (void *)arg,
5347 			    sizeof (int), copyflag);
5348 		}
5349 		return (error);
5350 	    }
5351 
5352 	case I_CKBAND:
5353 	    {
5354 		queue_t *q;
5355 		qband_t *qbp;
5356 
5357 		if ((arg < 0) || (arg >= NBAND))
5358 			return (EINVAL);
5359 		q = _RD(stp->sd_wrq);
5360 		mutex_enter(QLOCK(q));
5361 		if (arg > (int)q->q_nband) {
5362 			*rvalp = 0;
5363 		} else {
5364 			if (arg == 0) {
5365 				if (q->q_first)
5366 					*rvalp = 1;
5367 				else
5368 					*rvalp = 0;
5369 			} else {
5370 				qbp = q->q_bandp;
5371 				while (--arg > 0)
5372 					qbp = qbp->qb_next;
5373 				if (qbp->qb_first)
5374 					*rvalp = 1;
5375 				else
5376 					*rvalp = 0;
5377 			}
5378 		}
5379 		mutex_exit(QLOCK(q));
5380 		return (0);
5381 	    }
5382 
5383 	case I_GETBAND:
5384 	    {
5385 		int intpri;
5386 		queue_t *q;
5387 
5388 		q = _RD(stp->sd_wrq);
5389 		mutex_enter(QLOCK(q));
5390 		mp = q->q_first;
5391 		if (!mp) {
5392 			mutex_exit(QLOCK(q));
5393 			return (ENODATA);
5394 		}
5395 		intpri = (int)mp->b_band;
5396 		error = strcopyout(&intpri, (void *)arg, sizeof (int),
5397 		    copyflag);
5398 		mutex_exit(QLOCK(q));
5399 		return (error);
5400 	    }
5401 
5402 	case I_ATMARK:
5403 	    {
5404 		queue_t *q;
5405 
5406 		if (arg & ~(ANYMARK|LASTMARK))
5407 			return (EINVAL);
5408 		q = _RD(stp->sd_wrq);
5409 		mutex_enter(&stp->sd_lock);
5410 		if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) {
5411 			*rvalp = 1;
5412 		} else {
5413 			mutex_enter(QLOCK(q));
5414 			mp = q->q_first;
5415 
5416 			if (mp == NULL)
5417 				*rvalp = 0;
5418 			else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK))
5419 				*rvalp = 1;
5420 			else if ((arg == LASTMARK) && (mp == stp->sd_mark))
5421 				*rvalp = 1;
5422 			else
5423 				*rvalp = 0;
5424 			mutex_exit(QLOCK(q));
5425 		}
5426 		mutex_exit(&stp->sd_lock);
5427 		return (0);
5428 	    }
5429 
5430 	case I_CANPUT:
5431 	    {
5432 		char band;
5433 
5434 		if ((arg < 0) || (arg >= NBAND))
5435 			return (EINVAL);
5436 		band = (char)arg;
5437 		*rvalp = bcanputnext(stp->sd_wrq, band);
5438 		return (0);
5439 	    }
5440 
5441 	case I_SETCLTIME:
5442 	    {
5443 		int closetime;
5444 
5445 		error = strcopyin((void *)arg, &closetime, sizeof (int),
5446 		    copyflag);
5447 		if (error)
5448 			return (error);
5449 		if (closetime < 0)
5450 			return (EINVAL);
5451 
5452 		stp->sd_closetime = closetime;
5453 		return (0);
5454 	    }
5455 
5456 	case I_GETCLTIME:
5457 	    {
5458 		int closetime;
5459 
5460 		closetime = stp->sd_closetime;
5461 		return (strcopyout(&closetime, (void *)arg, sizeof (int),
5462 		    copyflag));
5463 	    }
5464 
5465 	case TIOCGSID:
5466 	{
5467 		pid_t sid;
5468 
5469 		mutex_enter(&stp->sd_lock);
5470 		if (stp->sd_sidp == NULL) {
5471 			mutex_exit(&stp->sd_lock);
5472 			return (ENOTTY);
5473 		}
5474 		sid = stp->sd_sidp->pid_id;
5475 		mutex_exit(&stp->sd_lock);
5476 		return (strcopyout(&sid, (void *)arg, sizeof (pid_t),
5477 		    copyflag));
5478 	}
5479 
5480 	case TIOCSPGRP:
5481 	{
5482 		pid_t pgrp;
5483 		proc_t *q;
5484 		pid_t	sid, fg_pgid, bg_pgid;
5485 
5486 		if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t),
5487 		    copyflag))
5488 			return (error);
5489 		mutex_enter(&stp->sd_lock);
5490 		mutex_enter(&pidlock);
5491 		if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) {
5492 			mutex_exit(&pidlock);
5493 			mutex_exit(&stp->sd_lock);
5494 			return (ENOTTY);
5495 		}
5496 		if (pgrp == stp->sd_pgidp->pid_id) {
5497 			mutex_exit(&pidlock);
5498 			mutex_exit(&stp->sd_lock);
5499 			return (0);
5500 		}
5501 		if (pgrp <= 0 || pgrp >= maxpid) {
5502 			mutex_exit(&pidlock);
5503 			mutex_exit(&stp->sd_lock);
5504 			return (EINVAL);
5505 		}
5506 		if ((q = pgfind(pgrp)) == NULL ||
5507 		    q->p_sessp != ttoproc(curthread)->p_sessp) {
5508 			mutex_exit(&pidlock);
5509 			mutex_exit(&stp->sd_lock);
5510 			return (EPERM);
5511 		}
5512 		sid = stp->sd_sidp->pid_id;
5513 		fg_pgid = q->p_pgrp;
5514 		bg_pgid = stp->sd_pgidp->pid_id;
5515 		CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid);
5516 		PID_RELE(stp->sd_pgidp);
5517 		ctty_clear_sighuped();
5518 		stp->sd_pgidp = q->p_pgidp;
5519 		PID_HOLD(stp->sd_pgidp);
5520 		mutex_exit(&pidlock);
5521 		mutex_exit(&stp->sd_lock);
5522 		return (0);
5523 	}
5524 
5525 	case TIOCGPGRP:
5526 	{
5527 		pid_t pgrp;
5528 
5529 		mutex_enter(&stp->sd_lock);
5530 		if (stp->sd_sidp == NULL) {
5531 			mutex_exit(&stp->sd_lock);
5532 			return (ENOTTY);
5533 		}
5534 		pgrp = stp->sd_pgidp->pid_id;
5535 		mutex_exit(&stp->sd_lock);
5536 		return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t),
5537 		    copyflag));
5538 	}
5539 
5540 	case TIOCSCTTY:
5541 	{
5542 		return (strctty(stp));
5543 	}
5544 
5545 	case TIOCNOTTY:
5546 	{
5547 		/* freectty() always assumes curproc. */
5548 		if (freectty(B_FALSE) != 0)
5549 			return (0);
5550 		return (ENOTTY);
5551 	}
5552 
5553 	case FIONBIO:
5554 	case FIOASYNC:
5555 		return (0);	/* handled by the upper layer */
5556 	}
5557 }
5558 
5559 /*
5560  * Custom free routine used for M_PASSFP messages.
5561  */
5562 static void
5563 free_passfp(struct k_strrecvfd *srf)
5564 {
5565 	(void) closef(srf->fp);
5566 	kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t));
5567 }
5568 
5569 /* ARGSUSED */
5570 int
5571 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr)
5572 {
5573 	queue_t *qp, *nextqp;
5574 	struct k_strrecvfd *srf;
5575 	mblk_t *mp;
5576 	frtn_t *frtnp;
5577 	size_t bufsize;
5578 	queue_t	*mate = NULL;
5579 	syncq_t	*sq = NULL;
5580 	int retval = 0;
5581 
5582 	if (stp->sd_flag & STRHUP)
5583 		return (ENXIO);
5584 
5585 	claimstr(stp->sd_wrq);
5586 
5587 	/* Fastpath, we have a pipe, and we are already mated, use it. */
5588 	if (STRMATED(stp)) {
5589 		qp = _RD(stp->sd_mate->sd_wrq);
5590 		claimstr(qp);
5591 		mate = qp;
5592 	} else { /* Not already mated. */
5593 
5594 		/*
5595 		 * Walk the stream to the end of this one.
5596 		 * assumes that the claimstr() will prevent
5597 		 * plumbing between the stream head and the
5598 		 * driver from changing
5599 		 */
5600 		qp = stp->sd_wrq;
5601 
5602 		/*
5603 		 * Loop until we reach the end of this stream.
5604 		 * On completion, qp points to the write queue
5605 		 * at the end of the stream, or the read queue
5606 		 * at the stream head if this is a fifo.
5607 		 */
5608 		while (((qp = qp->q_next) != NULL) && _SAMESTR(qp))
5609 			;
5610 
5611 		/*
5612 		 * Just in case we get a q_next which is NULL, but
5613 		 * not at the end of the stream.  This is actually
5614 		 * broken, so we set an assert to catch it in
5615 		 * debug, and set an error and return if not debug.
5616 		 */
5617 		ASSERT(qp);
5618 		if (qp == NULL) {
5619 			releasestr(stp->sd_wrq);
5620 			return (EINVAL);
5621 		}
5622 
5623 		/*
5624 		 * Enter the syncq for the driver, so (hopefully)
5625 		 * the queue values will not change on us.
5626 		 * XXXX - This will only prevent the race IFF only
5627 		 *   the write side modifies the q_next member, and
5628 		 *   the put procedure is protected by at least
5629 		 *   MT_PERQ.
5630 		 */
5631 		if ((sq = qp->q_syncq) != NULL)
5632 			entersq(sq, SQ_PUT);
5633 
5634 		/* Now get the q_next value from this qp. */
5635 		nextqp = qp->q_next;
5636 
5637 		/*
5638 		 * If nextqp exists and the other stream is different
5639 		 * from this one claim the stream, set the mate, and
5640 		 * get the read queue at the stream head of the other
5641 		 * stream.  Assumes that nextqp was at least valid when
5642 		 * we got it.  Hopefully the entersq of the driver
5643 		 * will prevent it from changing on us.
5644 		 */
5645 		if ((nextqp != NULL) && (STREAM(nextqp) != stp)) {
5646 			ASSERT(qp->q_qinfo->qi_srvp);
5647 			ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp);
5648 			ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp);
5649 			claimstr(nextqp);
5650 
5651 			/* Make sure we still have a q_next */
5652 			if (nextqp != qp->q_next) {
5653 				releasestr(stp->sd_wrq);
5654 				releasestr(nextqp);
5655 				return (EINVAL);
5656 			}
5657 
5658 			qp = _RD(STREAM(nextqp)->sd_wrq);
5659 			mate = qp;
5660 		}
5661 		/* If we entered the synq above, leave it. */
5662 		if (sq != NULL)
5663 			leavesq(sq, SQ_PUT);
5664 	} /*  STRMATED(STP)  */
5665 
5666 	/* XXX prevents substitution of the ops vector */
5667 	if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) {
5668 		retval = EINVAL;
5669 		goto out;
5670 	}
5671 
5672 	if (qp->q_flag & QFULL) {
5673 		retval = EAGAIN;
5674 		goto out;
5675 	}
5676 
5677 	/*
5678 	 * Since M_PASSFP messages include a file descriptor, we use
5679 	 * esballoc() and specify a custom free routine (free_passfp()) that
5680 	 * will close the descriptor as part of freeing the message.  For
5681 	 * convenience, we stash the frtn_t right after the data block.
5682 	 */
5683 	bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t);
5684 	srf = kmem_alloc(bufsize, KM_NOSLEEP);
5685 	if (srf == NULL) {
5686 		retval = EAGAIN;
5687 		goto out;
5688 	}
5689 
5690 	frtnp = (frtn_t *)(srf + 1);
5691 	frtnp->free_arg = (caddr_t)srf;
5692 	frtnp->free_func = free_passfp;
5693 
5694 	mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp);
5695 	if (mp == NULL) {
5696 		kmem_free(srf, bufsize);
5697 		retval = EAGAIN;
5698 		goto out;
5699 	}
5700 	mp->b_wptr += sizeof (struct k_strrecvfd);
5701 	mp->b_datap->db_type = M_PASSFP;
5702 
5703 	srf->fp = fp;
5704 	srf->uid = crgetuid(curthread->t_cred);
5705 	srf->gid = crgetgid(curthread->t_cred);
5706 	mutex_enter(&fp->f_tlock);
5707 	fp->f_count++;
5708 	mutex_exit(&fp->f_tlock);
5709 
5710 	put(qp, mp);
5711 out:
5712 	releasestr(stp->sd_wrq);
5713 	if (mate)
5714 		releasestr(mate);
5715 	return (retval);
5716 }
5717 
5718 /*
5719  * Send an ioctl message downstream and wait for acknowledgement.
5720  * flags may be set to either U_TO_K or K_TO_K and a combination
5721  * of STR_NOERROR or STR_NOSIG
5722  * STR_NOSIG: Signals are essentially ignored or held and have
5723  *	no effect for the duration of the call.
5724  * STR_NOERROR: Ignores stream head read, write and hup errors.
5725  *	Additionally, if an existing ioctl times out, it is assumed
5726  *	lost and and this ioctl will continue as if the previous ioctl had
5727  *	finished.  ETIME may be returned if this ioctl times out (i.e.
5728  *	ic_timout is not INFTIM).  Non-stream head errors may be returned if
5729  *	the ioc_error indicates that the driver/module had problems,
5730  *	an EFAULT was found when accessing user data, a lack of
5731  * 	resources, etc.
5732  */
5733 int
5734 strdoioctl(
5735 	struct stdata *stp,
5736 	struct strioctl *strioc,
5737 	int fflags,		/* file flags with model info */
5738 	int flag,
5739 	cred_t *crp,
5740 	int *rvalp)
5741 {
5742 	mblk_t *bp;
5743 	struct iocblk *iocbp;
5744 	struct copyreq *reqp;
5745 	struct copyresp *resp;
5746 	int id;
5747 	int transparent = 0;
5748 	int error = 0;
5749 	int len = 0;
5750 	caddr_t taddr;
5751 	int copyflag = (flag & (U_TO_K | K_TO_K));
5752 	int sigflag = (flag & STR_NOSIG);
5753 	int errs;
5754 	uint_t waitflags;
5755 
5756 	ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
5757 	ASSERT((fflags & FMODELS) != 0);
5758 
5759 	TRACE_2(TR_FAC_STREAMS_FR,
5760 		TR_STRDOIOCTL,
5761 		"strdoioctl:stp %p strioc %p", stp, strioc);
5762 	if (strioc->ic_len == TRANSPARENT) {	/* send arg in M_DATA block */
5763 		transparent = 1;
5764 		strioc->ic_len = sizeof (intptr_t);
5765 	}
5766 
5767 	if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz))
5768 		return (EINVAL);
5769 
5770 	if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error,
5771 	    crp)) == NULL)
5772 			return (error);
5773 
5774 	bzero(bp->b_wptr, sizeof (union ioctypes));
5775 
5776 	iocbp = (struct iocblk *)bp->b_wptr;
5777 	iocbp->ioc_count = strioc->ic_len;
5778 	iocbp->ioc_cmd = strioc->ic_cmd;
5779 	iocbp->ioc_flag = (fflags & FMODELS);
5780 
5781 	crhold(crp);
5782 	iocbp->ioc_cr = crp;
5783 	DB_TYPE(bp) = M_IOCTL;
5784 	DB_CPID(bp) = curproc->p_pid;
5785 	bp->b_wptr += sizeof (struct iocblk);
5786 
5787 	if (flag & STR_NOERROR)
5788 		errs = STPLEX;
5789 	else
5790 		errs = STRHUP|STRDERR|STWRERR|STPLEX;
5791 
5792 	/*
5793 	 * If there is data to copy into ioctl block, do so.
5794 	 */
5795 	if (iocbp->ioc_count > 0) {
5796 		if (transparent)
5797 			/*
5798 			 * Note: STR_NOERROR does not have an effect
5799 			 * in putiocd()
5800 			 */
5801 			id = K_TO_K | sigflag;
5802 		else
5803 			id = flag;
5804 		if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) {
5805 			freemsg(bp);
5806 			crfree(crp);
5807 			return (error);
5808 		}
5809 
5810 		/*
5811 		 * We could have slept copying in user pages.
5812 		 * Recheck the stream head state (the other end
5813 		 * of a pipe could have gone away).
5814 		 */
5815 		if (stp->sd_flag & errs) {
5816 			mutex_enter(&stp->sd_lock);
5817 			error = strgeterr(stp, errs, 0);
5818 			mutex_exit(&stp->sd_lock);
5819 			if (error != 0) {
5820 				freemsg(bp);
5821 				crfree(crp);
5822 				return (error);
5823 			}
5824 		}
5825 	}
5826 	if (transparent)
5827 		iocbp->ioc_count = TRANSPARENT;
5828 
5829 	/*
5830 	 * Block for up to STRTIMOUT milliseconds if there is an outstanding
5831 	 * ioctl for this stream already running.  All processes
5832 	 * sleeping here will be awakened as a result of an ACK
5833 	 * or NAK being received for the outstanding ioctl, or
5834 	 * as a result of the timer expiring on the outstanding
5835 	 * ioctl (a failure), or as a result of any waiting
5836 	 * process's timer expiring (also a failure).
5837 	 */
5838 
5839 	error = 0;
5840 	mutex_enter(&stp->sd_lock);
5841 	while (stp->sd_flag & (IOCWAIT | IOCWAITNE)) {
5842 		clock_t cv_rval;
5843 
5844 		TRACE_0(TR_FAC_STREAMS_FR,
5845 			TR_STRDOIOCTL_WAIT,
5846 			"strdoioctl sleeps - IOCWAIT");
5847 		cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock,
5848 		    STRTIMOUT, sigflag);
5849 		if (cv_rval <= 0) {
5850 			if (cv_rval == 0) {
5851 				error = EINTR;
5852 			} else {
5853 				if (flag & STR_NOERROR) {
5854 					/*
5855 					 * Terminating current ioctl in
5856 					 * progress -- assume it got lost and
5857 					 * wake up the other thread so that the
5858 					 * operation completes.
5859 					 */
5860 					if (!(stp->sd_flag & IOCWAITNE)) {
5861 						stp->sd_flag |= IOCWAITNE;
5862 						cv_broadcast(&stp->sd_monitor);
5863 					}
5864 					/*
5865 					 * Otherwise, there's a running
5866 					 * STR_NOERROR -- we have no choice
5867 					 * here but to wait forever (or until
5868 					 * interrupted).
5869 					 */
5870 				} else {
5871 					/*
5872 					 * pending ioctl has caused
5873 					 * us to time out
5874 					 */
5875 					error = ETIME;
5876 				}
5877 			}
5878 		} else if ((stp->sd_flag & errs)) {
5879 			error = strgeterr(stp, errs, 0);
5880 		}
5881 		if (error) {
5882 			mutex_exit(&stp->sd_lock);
5883 			freemsg(bp);
5884 			crfree(crp);
5885 			return (error);
5886 		}
5887 	}
5888 
5889 	/*
5890 	 * Have control of ioctl mechanism.
5891 	 * Send down ioctl packet and wait for response.
5892 	 */
5893 	if (stp->sd_iocblk != (mblk_t *)-1) {
5894 		freemsg(stp->sd_iocblk);
5895 	}
5896 	stp->sd_iocblk = NULL;
5897 
5898 	/*
5899 	 * If this is marked with 'noerror' (internal; mostly
5900 	 * I_{P,}{UN,}LINK), then make sure nobody else is able to get
5901 	 * in here by setting IOCWAITNE.
5902 	 */
5903 	waitflags = IOCWAIT;
5904 	if (flag & STR_NOERROR)
5905 		waitflags |= IOCWAITNE;
5906 
5907 	stp->sd_flag |= waitflags;
5908 
5909 	/*
5910 	 * Assign sequence number.
5911 	 */
5912 	iocbp->ioc_id = stp->sd_iocid = getiocseqno();
5913 
5914 	mutex_exit(&stp->sd_lock);
5915 
5916 	TRACE_1(TR_FAC_STREAMS_FR,
5917 		TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp);
5918 	stream_willservice(stp);
5919 	putnext(stp->sd_wrq, bp);
5920 	stream_runservice(stp);
5921 
5922 	/*
5923 	 * Timed wait for acknowledgment.  The wait time is limited by the
5924 	 * timeout value, which must be a positive integer (number of
5925 	 * milliseconds) to wait, or 0 (use default value of STRTIMOUT
5926 	 * milliseconds), or -1 (wait forever).  This will be awakened
5927 	 * either by an ACK/NAK message arriving, the timer expiring, or
5928 	 * the timer expiring on another ioctl waiting for control of the
5929 	 * mechanism.
5930 	 */
5931 waitioc:
5932 	mutex_enter(&stp->sd_lock);
5933 
5934 
5935 	/*
5936 	 * If the reply has already arrived, don't sleep.  If awakened from
5937 	 * the sleep, fail only if the reply has not arrived by then.
5938 	 * Otherwise, process the reply.
5939 	 */
5940 	while (!stp->sd_iocblk) {
5941 		clock_t cv_rval;
5942 
5943 		if (stp->sd_flag & errs) {
5944 			error = strgeterr(stp, errs, 0);
5945 			if (error != 0) {
5946 				stp->sd_flag &= ~waitflags;
5947 				cv_broadcast(&stp->sd_iocmonitor);
5948 				mutex_exit(&stp->sd_lock);
5949 				crfree(crp);
5950 				return (error);
5951 			}
5952 		}
5953 
5954 		TRACE_0(TR_FAC_STREAMS_FR,
5955 			TR_STRDOIOCTL_WAIT2,
5956 			"strdoioctl sleeps awaiting reply");
5957 		ASSERT(error == 0);
5958 
5959 		cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock,
5960 		    (strioc->ic_timout ?
5961 		    strioc->ic_timout * 1000 : STRTIMOUT), sigflag);
5962 
5963 		/*
5964 		 * There are four possible cases here: interrupt, timeout,
5965 		 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a
5966 		 * valid M_IOCTL reply).
5967 		 *
5968 		 * If we've been awakened by a STR_NOERROR ioctl on some other
5969 		 * thread, then sd_iocblk will still be NULL, and IOCWAITNE
5970 		 * will be set.  Pretend as if we just timed out.  Note that
5971 		 * this other thread waited at least STRTIMOUT before trying to
5972 		 * awaken our thread, so this is indistinguishable (even for
5973 		 * INFTIM) from the case where we failed with ETIME waiting on
5974 		 * IOCWAIT in the prior loop.
5975 		 */
5976 		if (cv_rval > 0 && !(flag & STR_NOERROR) &&
5977 		    stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) {
5978 			cv_rval = -1;
5979 		}
5980 
5981 		/*
5982 		 * note: STR_NOERROR does not protect
5983 		 * us here.. use ic_timout < 0
5984 		 */
5985 		if (cv_rval <= 0) {
5986 			if (cv_rval == 0) {
5987 				error = EINTR;
5988 			} else {
5989 				error =  ETIME;
5990 			}
5991 			/*
5992 			 * A message could have come in after we were scheduled
5993 			 * but before we were actually run.
5994 			 */
5995 			bp = stp->sd_iocblk;
5996 			stp->sd_iocblk = NULL;
5997 			if (bp != NULL) {
5998 				if ((bp->b_datap->db_type == M_COPYIN) ||
5999 				    (bp->b_datap->db_type == M_COPYOUT)) {
6000 					mutex_exit(&stp->sd_lock);
6001 					if (bp->b_cont) {
6002 						freemsg(bp->b_cont);
6003 						bp->b_cont = NULL;
6004 					}
6005 					bp->b_datap->db_type = M_IOCDATA;
6006 					bp->b_wptr = bp->b_rptr +
6007 						sizeof (struct copyresp);
6008 					resp = (struct copyresp *)bp->b_rptr;
6009 					resp->cp_rval =
6010 					    (caddr_t)1; /* failure */
6011 					stream_willservice(stp);
6012 					putnext(stp->sd_wrq, bp);
6013 					stream_runservice(stp);
6014 					mutex_enter(&stp->sd_lock);
6015 				} else {
6016 					freemsg(bp);
6017 				}
6018 			}
6019 			stp->sd_flag &= ~waitflags;
6020 			cv_broadcast(&stp->sd_iocmonitor);
6021 			mutex_exit(&stp->sd_lock);
6022 			crfree(crp);
6023 			return (error);
6024 		}
6025 	}
6026 	bp = stp->sd_iocblk;
6027 	/*
6028 	 * Note: it is strictly impossible to get here with sd_iocblk set to
6029 	 * -1.  This is because the initial loop above doesn't allow any new
6030 	 * ioctls into the fray until all others have passed this point.
6031 	 */
6032 	ASSERT(bp != NULL && bp != (mblk_t *)-1);
6033 	TRACE_1(TR_FAC_STREAMS_FR,
6034 		TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp);
6035 	if ((bp->b_datap->db_type == M_IOCACK) ||
6036 	    (bp->b_datap->db_type == M_IOCNAK)) {
6037 		/* for detection of duplicate ioctl replies */
6038 		stp->sd_iocblk = (mblk_t *)-1;
6039 		stp->sd_flag &= ~waitflags;
6040 		cv_broadcast(&stp->sd_iocmonitor);
6041 		mutex_exit(&stp->sd_lock);
6042 	} else {
6043 		/*
6044 		 * flags not cleared here because we're still doing
6045 		 * copy in/out for ioctl.
6046 		 */
6047 		stp->sd_iocblk = NULL;
6048 		mutex_exit(&stp->sd_lock);
6049 	}
6050 
6051 
6052 	/*
6053 	 * Have received acknowledgment.
6054 	 */
6055 
6056 	switch (bp->b_datap->db_type) {
6057 	case M_IOCACK:
6058 		/*
6059 		 * Positive ack.
6060 		 */
6061 		iocbp = (struct iocblk *)bp->b_rptr;
6062 
6063 		/*
6064 		 * Set error if indicated.
6065 		 */
6066 		if (iocbp->ioc_error) {
6067 			error = iocbp->ioc_error;
6068 			break;
6069 		}
6070 
6071 		/*
6072 		 * Set return value.
6073 		 */
6074 		*rvalp = iocbp->ioc_rval;
6075 
6076 		/*
6077 		 * Data may have been returned in ACK message (ioc_count > 0).
6078 		 * If so, copy it out to the user's buffer.
6079 		 */
6080 		if (iocbp->ioc_count && !transparent) {
6081 			if (error = getiocd(bp, strioc->ic_dp, copyflag))
6082 				break;
6083 		}
6084 		if (!transparent) {
6085 			if (len)	/* an M_COPYOUT was used with I_STR */
6086 				strioc->ic_len = len;
6087 			else
6088 				strioc->ic_len = (int)iocbp->ioc_count;
6089 		}
6090 		break;
6091 
6092 	case M_IOCNAK:
6093 		/*
6094 		 * Negative ack.
6095 		 *
6096 		 * The only thing to do is set error as specified
6097 		 * in neg ack packet.
6098 		 */
6099 		iocbp = (struct iocblk *)bp->b_rptr;
6100 
6101 		error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL);
6102 		break;
6103 
6104 	case M_COPYIN:
6105 		/*
6106 		 * Driver or module has requested user ioctl data.
6107 		 */
6108 		reqp = (struct copyreq *)bp->b_rptr;
6109 
6110 		/*
6111 		 * M_COPYIN should *never* have a message attached, though
6112 		 * it's harmless if it does -- thus, panic on a DEBUG
6113 		 * kernel and just free it on a non-DEBUG build.
6114 		 */
6115 		ASSERT(bp->b_cont == NULL);
6116 		if (bp->b_cont != NULL) {
6117 			freemsg(bp->b_cont);
6118 			bp->b_cont = NULL;
6119 		}
6120 
6121 		error = putiocd(bp, reqp->cq_addr, flag, crp);
6122 		if (error && bp->b_cont) {
6123 			freemsg(bp->b_cont);
6124 			bp->b_cont = NULL;
6125 		}
6126 
6127 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6128 		bp->b_datap->db_type = M_IOCDATA;
6129 
6130 		mblk_setcred(bp, crp);
6131 		DB_CPID(bp) = curproc->p_pid;
6132 		resp = (struct copyresp *)bp->b_rptr;
6133 		resp->cp_rval = (caddr_t)(uintptr_t)error;
6134 		resp->cp_flag = (fflags & FMODELS);
6135 
6136 		stream_willservice(stp);
6137 		putnext(stp->sd_wrq, bp);
6138 		stream_runservice(stp);
6139 
6140 		if (error) {
6141 			mutex_enter(&stp->sd_lock);
6142 			stp->sd_flag &= ~waitflags;
6143 			cv_broadcast(&stp->sd_iocmonitor);
6144 			mutex_exit(&stp->sd_lock);
6145 			crfree(crp);
6146 			return (error);
6147 		}
6148 
6149 		goto waitioc;
6150 
6151 	case M_COPYOUT:
6152 		/*
6153 		 * Driver or module has ioctl data for a user.
6154 		 */
6155 		reqp = (struct copyreq *)bp->b_rptr;
6156 		ASSERT(bp->b_cont != NULL);
6157 
6158 		/*
6159 		 * Always (transparent or non-transparent )
6160 		 * use the address specified in the request
6161 		 */
6162 		taddr = reqp->cq_addr;
6163 		if (!transparent)
6164 			len = (int)reqp->cq_size;
6165 
6166 		/* copyout data to the provided address */
6167 		error = getiocd(bp, taddr, copyflag);
6168 
6169 		freemsg(bp->b_cont);
6170 		bp->b_cont = NULL;
6171 
6172 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6173 		bp->b_datap->db_type = M_IOCDATA;
6174 
6175 		mblk_setcred(bp, crp);
6176 		DB_CPID(bp) = curproc->p_pid;
6177 		resp = (struct copyresp *)bp->b_rptr;
6178 		resp->cp_rval = (caddr_t)(uintptr_t)error;
6179 		resp->cp_flag = (fflags & FMODELS);
6180 
6181 		stream_willservice(stp);
6182 		putnext(stp->sd_wrq, bp);
6183 		stream_runservice(stp);
6184 
6185 		if (error) {
6186 			mutex_enter(&stp->sd_lock);
6187 			stp->sd_flag &= ~waitflags;
6188 			cv_broadcast(&stp->sd_iocmonitor);
6189 			mutex_exit(&stp->sd_lock);
6190 			crfree(crp);
6191 			return (error);
6192 		}
6193 		goto waitioc;
6194 
6195 	default:
6196 		ASSERT(0);
6197 		mutex_enter(&stp->sd_lock);
6198 		stp->sd_flag &= ~waitflags;
6199 		cv_broadcast(&stp->sd_iocmonitor);
6200 		mutex_exit(&stp->sd_lock);
6201 		break;
6202 	}
6203 
6204 	freemsg(bp);
6205 	crfree(crp);
6206 	return (error);
6207 }
6208 
6209 /*
6210  * For the SunOS keyboard driver.
6211  * Return the next available "ioctl" sequence number.
6212  * Exported, so that streams modules can send "ioctl" messages
6213  * downstream from their open routine.
6214  */
6215 int
6216 getiocseqno(void)
6217 {
6218 	int	i;
6219 
6220 	mutex_enter(&strresources);
6221 	i = ++ioc_id;
6222 	mutex_exit(&strresources);
6223 	return (i);
6224 }
6225 
6226 /*
6227  * Get the next message from the read queue.  If the message is
6228  * priority, STRPRI will have been set by strrput().  This flag
6229  * should be reset only when the entire message at the front of the
6230  * queue as been consumed.
6231  *
6232  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
6233  */
6234 int
6235 strgetmsg(
6236 	struct vnode *vp,
6237 	struct strbuf *mctl,
6238 	struct strbuf *mdata,
6239 	unsigned char *prip,
6240 	int *flagsp,
6241 	int fmode,
6242 	rval_t *rvp)
6243 {
6244 	struct stdata *stp;
6245 	mblk_t *bp, *nbp;
6246 	mblk_t *savemp = NULL;
6247 	mblk_t *savemptail = NULL;
6248 	uint_t old_sd_flag;
6249 	int flg;
6250 	int more = 0;
6251 	int error = 0;
6252 	char first = 1;
6253 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
6254 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
6255 	unsigned char pri = 0;
6256 	queue_t *q;
6257 	int	pr = 0;			/* Partial read successful */
6258 	struct uio uios;
6259 	struct uio *uiop = &uios;
6260 	struct iovec iovs;
6261 	unsigned char type;
6262 
6263 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER,
6264 		"strgetmsg:%p", vp);
6265 
6266 	ASSERT(vp->v_stream);
6267 	stp = vp->v_stream;
6268 	rvp->r_val1 = 0;
6269 
6270 	mutex_enter(&stp->sd_lock);
6271 
6272 	if ((error = i_straccess(stp, JCREAD)) != 0) {
6273 		mutex_exit(&stp->sd_lock);
6274 		return (error);
6275 	}
6276 
6277 	if (stp->sd_flag & (STRDERR|STPLEX)) {
6278 		error = strgeterr(stp, STRDERR|STPLEX, 0);
6279 		if (error != 0) {
6280 			mutex_exit(&stp->sd_lock);
6281 			return (error);
6282 		}
6283 	}
6284 	mutex_exit(&stp->sd_lock);
6285 
6286 	switch (*flagsp) {
6287 	case MSG_HIPRI:
6288 		if (*prip != 0)
6289 			return (EINVAL);
6290 		break;
6291 
6292 	case MSG_ANY:
6293 	case MSG_BAND:
6294 		break;
6295 
6296 	default:
6297 		return (EINVAL);
6298 	}
6299 	/*
6300 	 * Setup uio and iov for data part
6301 	 */
6302 	iovs.iov_base = mdata->buf;
6303 	iovs.iov_len = mdata->maxlen;
6304 	uios.uio_iov = &iovs;
6305 	uios.uio_iovcnt = 1;
6306 	uios.uio_loffset = 0;
6307 	uios.uio_segflg = UIO_USERSPACE;
6308 	uios.uio_fmode = 0;
6309 	uios.uio_extflg = UIO_COPY_CACHED;
6310 	uios.uio_resid = mdata->maxlen;
6311 	uios.uio_offset = 0;
6312 
6313 	q = _RD(stp->sd_wrq);
6314 	mutex_enter(&stp->sd_lock);
6315 	old_sd_flag = stp->sd_flag;
6316 	mark = 0;
6317 	for (;;) {
6318 		int done = 0;
6319 		mblk_t *q_first = q->q_first;
6320 
6321 		/*
6322 		 * Get the next message of appropriate priority
6323 		 * from the stream head.  If the caller is interested
6324 		 * in band or hipri messages, then they should already
6325 		 * be enqueued at the stream head.  On the other hand
6326 		 * if the caller wants normal (band 0) messages, they
6327 		 * might be deferred in a synchronous stream and they
6328 		 * will need to be pulled up.
6329 		 *
6330 		 * After we have dequeued a message, we might find that
6331 		 * it was a deferred M_SIG that was enqueued at the
6332 		 * stream head.  It must now be posted as part of the
6333 		 * read by calling strsignal_nolock().
6334 		 *
6335 		 * Also note that strrput does not enqueue an M_PCSIG,
6336 		 * and there cannot be more than one hipri message,
6337 		 * so there was no need to have the M_PCSIG case.
6338 		 *
6339 		 * At some time it might be nice to try and wrap the
6340 		 * functionality of kstrgetmsg() and strgetmsg() into
6341 		 * a common routine so to reduce the amount of replicated
6342 		 * code (since they are extremely similar).
6343 		 */
6344 		if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) {
6345 			/* Asking for normal, band0 data */
6346 			bp = strget(stp, q, uiop, first, &error);
6347 			ASSERT(MUTEX_HELD(&stp->sd_lock));
6348 			if (bp != NULL) {
6349 				if (bp->b_datap->db_type == M_SIG) {
6350 					strsignal_nolock(stp, *bp->b_rptr,
6351 					    (int32_t)bp->b_band);
6352 					continue;
6353 				} else {
6354 					break;
6355 				}
6356 			}
6357 			if (error != 0) {
6358 				goto getmout;
6359 			}
6360 
6361 		/*
6362 		 * We can't depend on the value of STRPRI here because
6363 		 * the stream head may be in transit. Therefore, we
6364 		 * must look at the type of the first message to
6365 		 * determine if a high priority messages is waiting
6366 		 */
6367 		} else if ((*flagsp & MSG_HIPRI) && q_first != NULL &&
6368 			    q_first->b_datap->db_type >= QPCTL &&
6369 			    (bp = getq_noenab(q)) != NULL) {
6370 			/* Asked for HIPRI and got one */
6371 			ASSERT(bp->b_datap->db_type >= QPCTL);
6372 			break;
6373 		} else if ((*flagsp & MSG_BAND) && q_first != NULL &&
6374 			    ((q_first->b_band >= *prip) ||
6375 			    q_first->b_datap->db_type >= QPCTL) &&
6376 			    (bp = getq_noenab(q)) != NULL) {
6377 			/*
6378 			 * Asked for at least band "prip" and got either at
6379 			 * least that band or a hipri message.
6380 			 */
6381 			ASSERT(bp->b_band >= *prip ||
6382 				bp->b_datap->db_type >= QPCTL);
6383 			if (bp->b_datap->db_type == M_SIG) {
6384 				strsignal_nolock(stp, *bp->b_rptr,
6385 				    (int32_t)bp->b_band);
6386 				continue;
6387 			} else {
6388 				break;
6389 			}
6390 		}
6391 
6392 		/* No data. Time to sleep? */
6393 		qbackenable(q, 0);
6394 
6395 		/*
6396 		 * If STRHUP or STREOF, return 0 length control and data.
6397 		 * If resid is 0, then a read(fd,buf,0) was done. Do not
6398 		 * sleep to satisfy this request because by default we have
6399 		 * zero bytes to return.
6400 		 */
6401 		if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 &&
6402 		    mdata->maxlen == 0)) {
6403 			mctl->len = mdata->len = 0;
6404 			*flagsp = 0;
6405 			mutex_exit(&stp->sd_lock);
6406 			return (0);
6407 		}
6408 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT,
6409 			"strgetmsg calls strwaitq:%p, %p",
6410 			vp, uiop);
6411 		if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1,
6412 		    &done)) != 0) || done) {
6413 			TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE,
6414 				"strgetmsg error or done:%p, %p",
6415 				vp, uiop);
6416 			mutex_exit(&stp->sd_lock);
6417 			return (error);
6418 		}
6419 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE,
6420 			"strgetmsg awakes:%p, %p", vp, uiop);
6421 		if ((error = i_straccess(stp, JCREAD)) != 0) {
6422 			mutex_exit(&stp->sd_lock);
6423 			return (error);
6424 		}
6425 		first = 0;
6426 	}
6427 	ASSERT(bp != NULL);
6428 	/*
6429 	 * Extract any mark information. If the message is not completely
6430 	 * consumed this information will be put in the mblk
6431 	 * that is putback.
6432 	 * If MSGMARKNEXT is set and the message is completely consumed
6433 	 * the STRATMARK flag will be set below. Likewise, if
6434 	 * MSGNOTMARKNEXT is set and the message is
6435 	 * completely consumed STRNOTATMARK will be set.
6436 	 */
6437 	mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
6438 	ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
6439 		(MSGMARKNEXT|MSGNOTMARKNEXT));
6440 	if (mark != 0 && bp == stp->sd_mark) {
6441 		mark |= _LASTMARK;
6442 		stp->sd_mark = NULL;
6443 	}
6444 	/*
6445 	 * keep track of the original message type and priority
6446 	 */
6447 	pri = bp->b_band;
6448 	type = bp->b_datap->db_type;
6449 	if (type == M_PASSFP) {
6450 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
6451 			stp->sd_mark = bp;
6452 		bp->b_flag |= mark & ~_LASTMARK;
6453 		putback(stp, q, bp, pri);
6454 		qbackenable(q, pri);
6455 		mutex_exit(&stp->sd_lock);
6456 		return (EBADMSG);
6457 	}
6458 	ASSERT(type != M_SIG);
6459 
6460 	/*
6461 	 * Set this flag so strrput will not generate signals. Need to
6462 	 * make sure this flag is cleared before leaving this routine
6463 	 * else signals will stop being sent.
6464 	 */
6465 	stp->sd_flag |= STRGETINPROG;
6466 	mutex_exit(&stp->sd_lock);
6467 
6468 	if (STREAM_NEEDSERVICE(stp))
6469 		stream_runservice(stp);
6470 
6471 	/*
6472 	 * Set HIPRI flag if message is priority.
6473 	 */
6474 	if (type >= QPCTL)
6475 		flg = MSG_HIPRI;
6476 	else
6477 		flg = MSG_BAND;
6478 
6479 	/*
6480 	 * First process PROTO or PCPROTO blocks, if any.
6481 	 */
6482 	if (mctl->maxlen >= 0 && type != M_DATA) {
6483 		size_t	n, bcnt;
6484 		char	*ubuf;
6485 
6486 		bcnt = mctl->maxlen;
6487 		ubuf = mctl->buf;
6488 		while (bp != NULL && bp->b_datap->db_type != M_DATA) {
6489 			if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 &&
6490 			    copyout(bp->b_rptr, ubuf, n)) {
6491 				error = EFAULT;
6492 				mutex_enter(&stp->sd_lock);
6493 				/*
6494 				 * clear stream head pri flag based on
6495 				 * first message type
6496 				 */
6497 				if (type >= QPCTL) {
6498 					ASSERT(type == M_PCPROTO);
6499 					stp->sd_flag &= ~STRPRI;
6500 				}
6501 				more = 0;
6502 				freemsg(bp);
6503 				goto getmout;
6504 			}
6505 			ubuf += n;
6506 			bp->b_rptr += n;
6507 			if (bp->b_rptr >= bp->b_wptr) {
6508 				nbp = bp;
6509 				bp = bp->b_cont;
6510 				freeb(nbp);
6511 			}
6512 			ASSERT(n <= bcnt);
6513 			bcnt -= n;
6514 			if (bcnt == 0)
6515 				break;
6516 		}
6517 		mctl->len = mctl->maxlen - bcnt;
6518 	} else
6519 		mctl->len = -1;
6520 
6521 	if (bp && bp->b_datap->db_type != M_DATA) {
6522 		/*
6523 		 * More PROTO blocks in msg.
6524 		 */
6525 		more |= MORECTL;
6526 		savemp = bp;
6527 		while (bp && bp->b_datap->db_type != M_DATA) {
6528 			savemptail = bp;
6529 			bp = bp->b_cont;
6530 		}
6531 		savemptail->b_cont = NULL;
6532 	}
6533 
6534 	/*
6535 	 * Now process DATA blocks, if any.
6536 	 */
6537 	if (mdata->maxlen >= 0 && bp) {
6538 		/*
6539 		 * struiocopyout will consume a potential zero-length
6540 		 * M_DATA even if uio_resid is zero.
6541 		 */
6542 		size_t oldresid = uiop->uio_resid;
6543 
6544 		bp = struiocopyout(bp, uiop, &error);
6545 		if (error != 0) {
6546 			mutex_enter(&stp->sd_lock);
6547 			/*
6548 			 * clear stream head hi pri flag based on
6549 			 * first message
6550 			 */
6551 			if (type >= QPCTL) {
6552 				ASSERT(type == M_PCPROTO);
6553 				stp->sd_flag &= ~STRPRI;
6554 			}
6555 			more = 0;
6556 			freemsg(savemp);
6557 			goto getmout;
6558 		}
6559 		/*
6560 		 * (pr == 1) indicates a partial read.
6561 		 */
6562 		if (oldresid > uiop->uio_resid)
6563 			pr = 1;
6564 		mdata->len = mdata->maxlen - uiop->uio_resid;
6565 	} else
6566 		mdata->len = -1;
6567 
6568 	if (bp) {			/* more data blocks in msg */
6569 		more |= MOREDATA;
6570 		if (savemp)
6571 			savemptail->b_cont = bp;
6572 		else
6573 			savemp = bp;
6574 	}
6575 
6576 	mutex_enter(&stp->sd_lock);
6577 	if (savemp) {
6578 		if (pr && (savemp->b_datap->db_type == M_DATA) &&
6579 		    msgnodata(savemp)) {
6580 			/*
6581 			 * Avoid queuing a zero-length tail part of
6582 			 * a message. pr=1 indicates that we read some of
6583 			 * the message.
6584 			 */
6585 			freemsg(savemp);
6586 			more &= ~MOREDATA;
6587 			/*
6588 			 * clear stream head hi pri flag based on
6589 			 * first message
6590 			 */
6591 			if (type >= QPCTL) {
6592 				ASSERT(type == M_PCPROTO);
6593 				stp->sd_flag &= ~STRPRI;
6594 			}
6595 		} else {
6596 			savemp->b_band = pri;
6597 			/*
6598 			 * If the first message was HIPRI and the one we're
6599 			 * putting back isn't, then clear STRPRI, otherwise
6600 			 * set STRPRI again.  Note that we must set STRPRI
6601 			 * again since the flush logic in strrput_nondata()
6602 			 * may have cleared it while we had sd_lock dropped.
6603 			 */
6604 			if (type >= QPCTL) {
6605 				ASSERT(type == M_PCPROTO);
6606 				if (queclass(savemp) < QPCTL)
6607 					stp->sd_flag &= ~STRPRI;
6608 				else
6609 					stp->sd_flag |= STRPRI;
6610 			} else if (queclass(savemp) >= QPCTL) {
6611 				/*
6612 				 * The first message was not a HIPRI message,
6613 				 * but the one we are about to putback is.
6614 				 * For simplicitly, we do not allow for HIPRI
6615 				 * messages to be embedded in the message
6616 				 * body, so just force it to same type as
6617 				 * first message.
6618 				 */
6619 				ASSERT(type == M_DATA || type == M_PROTO);
6620 				ASSERT(savemp->b_datap->db_type == M_PCPROTO);
6621 				savemp->b_datap->db_type = type;
6622 			}
6623 			if (mark != 0) {
6624 				savemp->b_flag |= mark & ~_LASTMARK;
6625 				if ((mark & _LASTMARK) &&
6626 				    (stp->sd_mark == NULL)) {
6627 					/*
6628 					 * If another marked message arrived
6629 					 * while sd_lock was not held sd_mark
6630 					 * would be non-NULL.
6631 					 */
6632 					stp->sd_mark = savemp;
6633 				}
6634 			}
6635 			putback(stp, q, savemp, pri);
6636 		}
6637 	} else {
6638 		/*
6639 		 * The complete message was consumed.
6640 		 *
6641 		 * If another M_PCPROTO arrived while sd_lock was not held
6642 		 * it would have been discarded since STRPRI was still set.
6643 		 *
6644 		 * Move the MSG*MARKNEXT information
6645 		 * to the stream head just in case
6646 		 * the read queue becomes empty.
6647 		 * clear stream head hi pri flag based on
6648 		 * first message
6649 		 *
6650 		 * If the stream head was at the mark
6651 		 * (STRATMARK) before we dropped sd_lock above
6652 		 * and some data was consumed then we have
6653 		 * moved past the mark thus STRATMARK is
6654 		 * cleared. However, if a message arrived in
6655 		 * strrput during the copyout above causing
6656 		 * STRATMARK to be set we can not clear that
6657 		 * flag.
6658 		 */
6659 		if (type >= QPCTL) {
6660 			ASSERT(type == M_PCPROTO);
6661 			stp->sd_flag &= ~STRPRI;
6662 		}
6663 		if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
6664 			if (mark & MSGMARKNEXT) {
6665 				stp->sd_flag &= ~STRNOTATMARK;
6666 				stp->sd_flag |= STRATMARK;
6667 			} else if (mark & MSGNOTMARKNEXT) {
6668 				stp->sd_flag &= ~STRATMARK;
6669 				stp->sd_flag |= STRNOTATMARK;
6670 			} else {
6671 				stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
6672 			}
6673 		} else if (pr && (old_sd_flag & STRATMARK)) {
6674 			stp->sd_flag &= ~STRATMARK;
6675 		}
6676 	}
6677 
6678 	*flagsp = flg;
6679 	*prip = pri;
6680 
6681 	/*
6682 	 * Getmsg cleanup processing - if the state of the queue has changed
6683 	 * some signals may need to be sent and/or poll awakened.
6684 	 */
6685 getmout:
6686 	qbackenable(q, pri);
6687 
6688 	/*
6689 	 * We dropped the stream head lock above. Send all M_SIG messages
6690 	 * before processing stream head for SIGPOLL messages.
6691 	 */
6692 	ASSERT(MUTEX_HELD(&stp->sd_lock));
6693 	while ((bp = q->q_first) != NULL &&
6694 	    (bp->b_datap->db_type == M_SIG)) {
6695 		/*
6696 		 * sd_lock is held so the content of the read queue can not
6697 		 * change.
6698 		 */
6699 		bp = getq(q);
6700 		ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
6701 
6702 		strsignal_nolock(stp, *bp->b_rptr, (int32_t)bp->b_band);
6703 		mutex_exit(&stp->sd_lock);
6704 		freemsg(bp);
6705 		if (STREAM_NEEDSERVICE(stp))
6706 			stream_runservice(stp);
6707 		mutex_enter(&stp->sd_lock);
6708 	}
6709 
6710 	/*
6711 	 * stream head cannot change while we make the determination
6712 	 * whether or not to send a signal. Drop the flag to allow strrput
6713 	 * to send firstmsgsigs again.
6714 	 */
6715 	stp->sd_flag &= ~STRGETINPROG;
6716 
6717 	/*
6718 	 * If the type of message at the front of the queue changed
6719 	 * due to the receive the appropriate signals and pollwakeup events
6720 	 * are generated. The type of changes are:
6721 	 *	Processed a hipri message, q_first is not hipri.
6722 	 *	Processed a band X message, and q_first is band Y.
6723 	 * The generated signals and pollwakeups are identical to what
6724 	 * strrput() generates should the message that is now on q_first
6725 	 * arrive to an empty read queue.
6726 	 *
6727 	 * Note: only strrput will send a signal for a hipri message.
6728 	 */
6729 	if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
6730 		strsigset_t signals = 0;
6731 		strpollset_t pollwakeups = 0;
6732 
6733 		if (flg & MSG_HIPRI) {
6734 			/*
6735 			 * Removed a hipri message. Regular data at
6736 			 * the front of  the queue.
6737 			 */
6738 			if (bp->b_band == 0) {
6739 				signals = S_INPUT | S_RDNORM;
6740 				pollwakeups = POLLIN | POLLRDNORM;
6741 			} else {
6742 				signals = S_INPUT | S_RDBAND;
6743 				pollwakeups = POLLIN | POLLRDBAND;
6744 			}
6745 		} else if (pri != bp->b_band) {
6746 			/*
6747 			 * The band is different for the new q_first.
6748 			 */
6749 			if (bp->b_band == 0) {
6750 				signals = S_RDNORM;
6751 				pollwakeups = POLLIN | POLLRDNORM;
6752 			} else {
6753 				signals = S_RDBAND;
6754 				pollwakeups = POLLIN | POLLRDBAND;
6755 			}
6756 		}
6757 
6758 		if (pollwakeups != 0) {
6759 			if (pollwakeups == (POLLIN | POLLRDNORM)) {
6760 				if (!(stp->sd_rput_opt & SR_POLLIN))
6761 					goto no_pollwake;
6762 				stp->sd_rput_opt &= ~SR_POLLIN;
6763 			}
6764 			mutex_exit(&stp->sd_lock);
6765 			pollwakeup(&stp->sd_pollist, pollwakeups);
6766 			mutex_enter(&stp->sd_lock);
6767 		}
6768 no_pollwake:
6769 
6770 		if (stp->sd_sigflags & signals)
6771 			strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
6772 	}
6773 	mutex_exit(&stp->sd_lock);
6774 
6775 	rvp->r_val1 = more;
6776 	return (error);
6777 #undef	_LASTMARK
6778 }
6779 
6780 /*
6781  * Get the next message from the read queue.  If the message is
6782  * priority, STRPRI will have been set by strrput().  This flag
6783  * should be reset only when the entire message at the front of the
6784  * queue as been consumed.
6785  *
6786  * If uiop is NULL all data is returned in mctlp.
6787  * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed
6788  * not enabled.
6789  * The timeout parameter is in milliseconds; -1 for infinity.
6790  * This routine handles the consolidation private flags:
6791  *	MSG_IGNERROR	Ignore any stream head error except STPLEX.
6792  *	MSG_DELAYERROR	Defer the error check until the queue is empty.
6793  *	MSG_HOLDSIG	Hold signals while waiting for data.
6794  *	MSG_IPEEK	Only peek at messages.
6795  *	MSG_DISCARDTAIL	Discard the tail M_DATA part of the message
6796  *			that doesn't fit.
6797  *	MSG_NOMARK	If the message is marked leave it on the queue.
6798  *
6799  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
6800  */
6801 int
6802 kstrgetmsg(
6803 	struct vnode *vp,
6804 	mblk_t **mctlp,
6805 	struct uio *uiop,
6806 	unsigned char *prip,
6807 	int *flagsp,
6808 	clock_t timout,
6809 	rval_t *rvp)
6810 {
6811 	struct stdata *stp;
6812 	mblk_t *bp, *nbp;
6813 	mblk_t *savemp = NULL;
6814 	mblk_t *savemptail = NULL;
6815 	int flags;
6816 	uint_t old_sd_flag;
6817 	int flg;
6818 	int more = 0;
6819 	int error = 0;
6820 	char first = 1;
6821 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
6822 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
6823 	unsigned char pri = 0;
6824 	queue_t *q;
6825 	int	pr = 0;			/* Partial read successful */
6826 	unsigned char type;
6827 
6828 	TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER,
6829 		"kstrgetmsg:%p", vp);
6830 
6831 	ASSERT(vp->v_stream);
6832 	stp = vp->v_stream;
6833 	rvp->r_val1 = 0;
6834 
6835 	mutex_enter(&stp->sd_lock);
6836 
6837 	if ((error = i_straccess(stp, JCREAD)) != 0) {
6838 		mutex_exit(&stp->sd_lock);
6839 		return (error);
6840 	}
6841 
6842 	flags = *flagsp;
6843 	if (stp->sd_flag & (STRDERR|STPLEX)) {
6844 		if ((stp->sd_flag & STPLEX) ||
6845 		    (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) {
6846 			error = strgeterr(stp, STRDERR|STPLEX,
6847 					(flags & MSG_IPEEK));
6848 			if (error != 0) {
6849 				mutex_exit(&stp->sd_lock);
6850 				return (error);
6851 			}
6852 		}
6853 	}
6854 	mutex_exit(&stp->sd_lock);
6855 
6856 	switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) {
6857 	case MSG_HIPRI:
6858 		if (*prip != 0)
6859 			return (EINVAL);
6860 		break;
6861 
6862 	case MSG_ANY:
6863 	case MSG_BAND:
6864 		break;
6865 
6866 	default:
6867 		return (EINVAL);
6868 	}
6869 
6870 retry:
6871 	q = _RD(stp->sd_wrq);
6872 	mutex_enter(&stp->sd_lock);
6873 	old_sd_flag = stp->sd_flag;
6874 	mark = 0;
6875 	for (;;) {
6876 		int done = 0;
6877 		int waitflag;
6878 		int fmode;
6879 		mblk_t *q_first = q->q_first;
6880 
6881 		/*
6882 		 * This section of the code operates just like the code
6883 		 * in strgetmsg().  There is a comment there about what
6884 		 * is going on here.
6885 		 */
6886 		if (!(flags & (MSG_HIPRI|MSG_BAND))) {
6887 			/* Asking for normal, band0 data */
6888 			bp = strget(stp, q, uiop, first, &error);
6889 			ASSERT(MUTEX_HELD(&stp->sd_lock));
6890 			if (bp != NULL) {
6891 				if (bp->b_datap->db_type == M_SIG) {
6892 					strsignal_nolock(stp, *bp->b_rptr,
6893 					    (int32_t)bp->b_band);
6894 					continue;
6895 				} else {
6896 					break;
6897 				}
6898 			}
6899 			if (error != 0) {
6900 				goto getmout;
6901 			}
6902 		/*
6903 		 * We can't depend on the value of STRPRI here because
6904 		 * the stream head may be in transit. Therefore, we
6905 		 * must look at the type of the first message to
6906 		 * determine if a high priority messages is waiting
6907 		 */
6908 		} else if ((flags & MSG_HIPRI) && q_first != NULL &&
6909 			    q_first->b_datap->db_type >= QPCTL &&
6910 			    (bp = getq_noenab(q)) != NULL) {
6911 			ASSERT(bp->b_datap->db_type >= QPCTL);
6912 			break;
6913 		} else if ((flags & MSG_BAND) && q_first != NULL &&
6914 			    ((q_first->b_band >= *prip) ||
6915 			    q_first->b_datap->db_type >= QPCTL) &&
6916 			    (bp = getq_noenab(q)) != NULL) {
6917 			/*
6918 			 * Asked for at least band "prip" and got either at
6919 			 * least that band or a hipri message.
6920 			 */
6921 			ASSERT(bp->b_band >= *prip ||
6922 				bp->b_datap->db_type >= QPCTL);
6923 			if (bp->b_datap->db_type == M_SIG) {
6924 				strsignal_nolock(stp, *bp->b_rptr,
6925 				    (int32_t)bp->b_band);
6926 				continue;
6927 			} else {
6928 				break;
6929 			}
6930 		}
6931 
6932 		/* No data. Time to sleep? */
6933 		qbackenable(q, 0);
6934 
6935 		/*
6936 		 * Delayed error notification?
6937 		 */
6938 		if ((stp->sd_flag & (STRDERR|STPLEX)) &&
6939 		    (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) {
6940 			error = strgeterr(stp, STRDERR|STPLEX,
6941 					(flags & MSG_IPEEK));
6942 			if (error != 0) {
6943 				mutex_exit(&stp->sd_lock);
6944 				return (error);
6945 			}
6946 		}
6947 
6948 		/*
6949 		 * If STRHUP or STREOF, return 0 length control and data.
6950 		 * If a read(fd,buf,0) has been done, do not sleep, just
6951 		 * return.
6952 		 *
6953 		 * If mctlp == NULL and uiop == NULL, then the code will
6954 		 * do the strwaitq. This is an understood way of saying
6955 		 * sleep "polling" until a message is received.
6956 		 */
6957 		if ((stp->sd_flag & (STRHUP|STREOF)) ||
6958 		    (uiop != NULL && uiop->uio_resid == 0)) {
6959 			if (mctlp != NULL)
6960 				*mctlp = NULL;
6961 			*flagsp = 0;
6962 			mutex_exit(&stp->sd_lock);
6963 			return (0);
6964 		}
6965 
6966 		waitflag = GETWAIT;
6967 		if (flags &
6968 		    (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) {
6969 			if (flags & MSG_HOLDSIG)
6970 				waitflag |= STR_NOSIG;
6971 			if (flags & MSG_IGNERROR)
6972 				waitflag |= STR_NOERROR;
6973 			if (flags & MSG_IPEEK)
6974 				waitflag |= STR_PEEK;
6975 			if (flags & MSG_DELAYERROR)
6976 				waitflag |= STR_DELAYERR;
6977 		}
6978 		if (uiop != NULL)
6979 			fmode = uiop->uio_fmode;
6980 		else
6981 			fmode = 0;
6982 
6983 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT,
6984 			"kstrgetmsg calls strwaitq:%p, %p",
6985 			vp, uiop);
6986 		if (((error = strwaitq(stp, waitflag, (ssize_t)0,
6987 		    fmode, timout, &done)) != 0) || done) {
6988 			TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE,
6989 				"kstrgetmsg error or done:%p, %p",
6990 				vp, uiop);
6991 			mutex_exit(&stp->sd_lock);
6992 			return (error);
6993 		}
6994 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE,
6995 			"kstrgetmsg awakes:%p, %p", vp, uiop);
6996 		if ((error = i_straccess(stp, JCREAD)) != 0) {
6997 			mutex_exit(&stp->sd_lock);
6998 			return (error);
6999 		}
7000 		first = 0;
7001 	}
7002 	ASSERT(bp != NULL);
7003 	/*
7004 	 * Extract any mark information. If the message is not completely
7005 	 * consumed this information will be put in the mblk
7006 	 * that is putback.
7007 	 * If MSGMARKNEXT is set and the message is completely consumed
7008 	 * the STRATMARK flag will be set below. Likewise, if
7009 	 * MSGNOTMARKNEXT is set and the message is
7010 	 * completely consumed STRNOTATMARK will be set.
7011 	 */
7012 	mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
7013 	ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
7014 		(MSGMARKNEXT|MSGNOTMARKNEXT));
7015 	pri = bp->b_band;
7016 	if (mark != 0) {
7017 		/*
7018 		 * If the caller doesn't want the mark return.
7019 		 * Used to implement MSG_WAITALL in sockets.
7020 		 */
7021 		if (flags & MSG_NOMARK) {
7022 			putback(stp, q, bp, pri);
7023 			qbackenable(q, pri);
7024 			mutex_exit(&stp->sd_lock);
7025 			return (EWOULDBLOCK);
7026 		}
7027 		if (bp == stp->sd_mark) {
7028 			mark |= _LASTMARK;
7029 			stp->sd_mark = NULL;
7030 		}
7031 	}
7032 
7033 	/*
7034 	 * keep track of the first message type
7035 	 */
7036 	type = bp->b_datap->db_type;
7037 
7038 	if (bp->b_datap->db_type == M_PASSFP) {
7039 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7040 			stp->sd_mark = bp;
7041 		bp->b_flag |= mark & ~_LASTMARK;
7042 		putback(stp, q, bp, pri);
7043 		qbackenable(q, pri);
7044 		mutex_exit(&stp->sd_lock);
7045 		return (EBADMSG);
7046 	}
7047 	ASSERT(type != M_SIG);
7048 
7049 	if (flags & MSG_IPEEK) {
7050 		/*
7051 		 * Clear any struioflag - we do the uiomove over again
7052 		 * when peeking since it simplifies the code.
7053 		 *
7054 		 * Dup the message and put the original back on the queue.
7055 		 * If dupmsg() fails, try again with copymsg() to see if
7056 		 * there is indeed a shortage of memory.  dupmsg() may fail
7057 		 * if db_ref in any of the messages reaches its limit.
7058 		 */
7059 		if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) {
7060 			/*
7061 			 * Restore the state of the stream head since we
7062 			 * need to drop sd_lock (strwaitbuf is sleeping).
7063 			 */
7064 			size_t size = msgdsize(bp);
7065 
7066 			if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7067 				stp->sd_mark = bp;
7068 			bp->b_flag |= mark & ~_LASTMARK;
7069 			putback(stp, q, bp, pri);
7070 			mutex_exit(&stp->sd_lock);
7071 			error = strwaitbuf(size, BPRI_HI);
7072 			if (error) {
7073 				/*
7074 				 * There is no net change to the queue thus
7075 				 * no need to qbackenable.
7076 				 */
7077 				return (error);
7078 			}
7079 			goto retry;
7080 		}
7081 
7082 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7083 			stp->sd_mark = bp;
7084 		bp->b_flag |= mark & ~_LASTMARK;
7085 		putback(stp, q, bp, pri);
7086 		bp = nbp;
7087 	}
7088 
7089 	/*
7090 	 * Set this flag so strrput will not generate signals. Need to
7091 	 * make sure this flag is cleared before leaving this routine
7092 	 * else signals will stop being sent.
7093 	 */
7094 	stp->sd_flag |= STRGETINPROG;
7095 	mutex_exit(&stp->sd_lock);
7096 
7097 	if ((stp->sd_rputdatafunc != NULL) && (DB_TYPE(bp) == M_DATA) &&
7098 	    (!(DB_FLAGS(bp) & DBLK_COOKED))) {
7099 
7100 		bp = (stp->sd_rputdatafunc)(
7101 		    stp->sd_vnode, bp, NULL,
7102 		    NULL, NULL, NULL);
7103 
7104 		if (bp == NULL)
7105 			goto retry;
7106 
7107 		DB_FLAGS(bp) |= DBLK_COOKED;
7108 	}
7109 
7110 	if (STREAM_NEEDSERVICE(stp))
7111 		stream_runservice(stp);
7112 
7113 	/*
7114 	 * Set HIPRI flag if message is priority.
7115 	 */
7116 	if (type >= QPCTL)
7117 		flg = MSG_HIPRI;
7118 	else
7119 		flg = MSG_BAND;
7120 
7121 	/*
7122 	 * First process PROTO or PCPROTO blocks, if any.
7123 	 */
7124 	if (mctlp != NULL && type != M_DATA) {
7125 		mblk_t *nbp;
7126 
7127 		*mctlp = bp;
7128 		while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA)
7129 			bp = bp->b_cont;
7130 		nbp = bp->b_cont;
7131 		bp->b_cont = NULL;
7132 		bp = nbp;
7133 	}
7134 
7135 	if (bp && bp->b_datap->db_type != M_DATA) {
7136 		/*
7137 		 * More PROTO blocks in msg. Will only happen if mctlp is NULL.
7138 		 */
7139 		more |= MORECTL;
7140 		savemp = bp;
7141 		while (bp && bp->b_datap->db_type != M_DATA) {
7142 			savemptail = bp;
7143 			bp = bp->b_cont;
7144 		}
7145 		savemptail->b_cont = NULL;
7146 	}
7147 
7148 	/*
7149 	 * Now process DATA blocks, if any.
7150 	 */
7151 	if (uiop == NULL) {
7152 		/* Append data to tail of mctlp */
7153 		if (mctlp != NULL) {
7154 			mblk_t **mpp = mctlp;
7155 
7156 			while (*mpp != NULL)
7157 				mpp = &((*mpp)->b_cont);
7158 			*mpp = bp;
7159 			bp = NULL;
7160 		}
7161 	} else if (uiop->uio_resid >= 0 && bp) {
7162 		size_t oldresid = uiop->uio_resid;
7163 
7164 		/*
7165 		 * If a streams message is likely to consist
7166 		 * of many small mblks, it is pulled up into
7167 		 * one continuous chunk of memory.
7168 		 * see longer comment at top of page
7169 		 * by mblk_pull_len declaration.
7170 		 */
7171 
7172 		if (MBLKL(bp) < mblk_pull_len) {
7173 			(void) pullupmsg(bp, -1);
7174 		}
7175 
7176 		bp = struiocopyout(bp, uiop, &error);
7177 		if (error != 0) {
7178 			if (mctlp != NULL) {
7179 				freemsg(*mctlp);
7180 				*mctlp = NULL;
7181 			} else
7182 				freemsg(savemp);
7183 			mutex_enter(&stp->sd_lock);
7184 			/*
7185 			 * clear stream head hi pri flag based on
7186 			 * first message
7187 			 */
7188 			if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7189 				ASSERT(type == M_PCPROTO);
7190 				stp->sd_flag &= ~STRPRI;
7191 			}
7192 			more = 0;
7193 			goto getmout;
7194 		}
7195 		/*
7196 		 * (pr == 1) indicates a partial read.
7197 		 */
7198 		if (oldresid > uiop->uio_resid)
7199 			pr = 1;
7200 	}
7201 
7202 	if (bp) {			/* more data blocks in msg */
7203 		more |= MOREDATA;
7204 		if (savemp)
7205 			savemptail->b_cont = bp;
7206 		else
7207 			savemp = bp;
7208 	}
7209 
7210 	mutex_enter(&stp->sd_lock);
7211 	if (savemp) {
7212 		if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) {
7213 			/*
7214 			 * When MSG_DISCARDTAIL is set or
7215 			 * when peeking discard any tail. When peeking this
7216 			 * is the tail of the dup that was copied out - the
7217 			 * message has already been putback on the queue.
7218 			 * Return MOREDATA to the caller even though the data
7219 			 * is discarded. This is used by sockets (to
7220 			 * set MSG_TRUNC).
7221 			 */
7222 			freemsg(savemp);
7223 			if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7224 				ASSERT(type == M_PCPROTO);
7225 				stp->sd_flag &= ~STRPRI;
7226 			}
7227 		} else if (pr && (savemp->b_datap->db_type == M_DATA) &&
7228 			    msgnodata(savemp)) {
7229 			/*
7230 			 * Avoid queuing a zero-length tail part of
7231 			 * a message. pr=1 indicates that we read some of
7232 			 * the message.
7233 			 */
7234 			freemsg(savemp);
7235 			more &= ~MOREDATA;
7236 			if (type >= QPCTL) {
7237 				ASSERT(type == M_PCPROTO);
7238 				stp->sd_flag &= ~STRPRI;
7239 			}
7240 		} else {
7241 			savemp->b_band = pri;
7242 			/*
7243 			 * If the first message was HIPRI and the one we're
7244 			 * putting back isn't, then clear STRPRI, otherwise
7245 			 * set STRPRI again.  Note that we must set STRPRI
7246 			 * again since the flush logic in strrput_nondata()
7247 			 * may have cleared it while we had sd_lock dropped.
7248 			 */
7249 			if (type >= QPCTL) {
7250 				ASSERT(type == M_PCPROTO);
7251 				if (queclass(savemp) < QPCTL)
7252 					stp->sd_flag &= ~STRPRI;
7253 				else
7254 					stp->sd_flag |= STRPRI;
7255 			} else if (queclass(savemp) >= QPCTL) {
7256 				/*
7257 				 * The first message was not a HIPRI message,
7258 				 * but the one we are about to putback is.
7259 				 * For simplicitly, we do not allow for HIPRI
7260 				 * messages to be embedded in the message
7261 				 * body, so just force it to same type as
7262 				 * first message.
7263 				 */
7264 				ASSERT(type == M_DATA || type == M_PROTO);
7265 				ASSERT(savemp->b_datap->db_type == M_PCPROTO);
7266 				savemp->b_datap->db_type = type;
7267 			}
7268 			if (mark != 0) {
7269 				if ((mark & _LASTMARK) &&
7270 				    (stp->sd_mark == NULL)) {
7271 					/*
7272 					 * If another marked message arrived
7273 					 * while sd_lock was not held sd_mark
7274 					 * would be non-NULL.
7275 					 */
7276 					stp->sd_mark = savemp;
7277 				}
7278 				savemp->b_flag |= mark & ~_LASTMARK;
7279 			}
7280 			putback(stp, q, savemp, pri);
7281 		}
7282 	} else if (!(flags & MSG_IPEEK)) {
7283 		/*
7284 		 * The complete message was consumed.
7285 		 *
7286 		 * If another M_PCPROTO arrived while sd_lock was not held
7287 		 * it would have been discarded since STRPRI was still set.
7288 		 *
7289 		 * Move the MSG*MARKNEXT information
7290 		 * to the stream head just in case
7291 		 * the read queue becomes empty.
7292 		 * clear stream head hi pri flag based on
7293 		 * first message
7294 		 *
7295 		 * If the stream head was at the mark
7296 		 * (STRATMARK) before we dropped sd_lock above
7297 		 * and some data was consumed then we have
7298 		 * moved past the mark thus STRATMARK is
7299 		 * cleared. However, if a message arrived in
7300 		 * strrput during the copyout above causing
7301 		 * STRATMARK to be set we can not clear that
7302 		 * flag.
7303 		 * XXX A "perimeter" would help by single-threading strrput,
7304 		 * strread, strgetmsg and kstrgetmsg.
7305 		 */
7306 		if (type >= QPCTL) {
7307 			ASSERT(type == M_PCPROTO);
7308 			stp->sd_flag &= ~STRPRI;
7309 		}
7310 		if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
7311 			if (mark & MSGMARKNEXT) {
7312 				stp->sd_flag &= ~STRNOTATMARK;
7313 				stp->sd_flag |= STRATMARK;
7314 			} else if (mark & MSGNOTMARKNEXT) {
7315 				stp->sd_flag &= ~STRATMARK;
7316 				stp->sd_flag |= STRNOTATMARK;
7317 			} else {
7318 				stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
7319 			}
7320 		} else if (pr && (old_sd_flag & STRATMARK)) {
7321 			stp->sd_flag &= ~STRATMARK;
7322 		}
7323 	}
7324 
7325 	*flagsp = flg;
7326 	*prip = pri;
7327 
7328 	/*
7329 	 * Getmsg cleanup processing - if the state of the queue has changed
7330 	 * some signals may need to be sent and/or poll awakened.
7331 	 */
7332 getmout:
7333 	qbackenable(q, pri);
7334 
7335 	/*
7336 	 * We dropped the stream head lock above. Send all M_SIG messages
7337 	 * before processing stream head for SIGPOLL messages.
7338 	 */
7339 	ASSERT(MUTEX_HELD(&stp->sd_lock));
7340 	while ((bp = q->q_first) != NULL &&
7341 	    (bp->b_datap->db_type == M_SIG)) {
7342 		/*
7343 		 * sd_lock is held so the content of the read queue can not
7344 		 * change.
7345 		 */
7346 		bp = getq(q);
7347 		ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
7348 
7349 		strsignal_nolock(stp, *bp->b_rptr, (int32_t)bp->b_band);
7350 		mutex_exit(&stp->sd_lock);
7351 		freemsg(bp);
7352 		if (STREAM_NEEDSERVICE(stp))
7353 			stream_runservice(stp);
7354 		mutex_enter(&stp->sd_lock);
7355 	}
7356 
7357 	/*
7358 	 * stream head cannot change while we make the determination
7359 	 * whether or not to send a signal. Drop the flag to allow strrput
7360 	 * to send firstmsgsigs again.
7361 	 */
7362 	stp->sd_flag &= ~STRGETINPROG;
7363 
7364 	/*
7365 	 * If the type of message at the front of the queue changed
7366 	 * due to the receive the appropriate signals and pollwakeup events
7367 	 * are generated. The type of changes are:
7368 	 *	Processed a hipri message, q_first is not hipri.
7369 	 *	Processed a band X message, and q_first is band Y.
7370 	 * The generated signals and pollwakeups are identical to what
7371 	 * strrput() generates should the message that is now on q_first
7372 	 * arrive to an empty read queue.
7373 	 *
7374 	 * Note: only strrput will send a signal for a hipri message.
7375 	 */
7376 	if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
7377 		strsigset_t signals = 0;
7378 		strpollset_t pollwakeups = 0;
7379 
7380 		if (flg & MSG_HIPRI) {
7381 			/*
7382 			 * Removed a hipri message. Regular data at
7383 			 * the front of  the queue.
7384 			 */
7385 			if (bp->b_band == 0) {
7386 				signals = S_INPUT | S_RDNORM;
7387 				pollwakeups = POLLIN | POLLRDNORM;
7388 			} else {
7389 				signals = S_INPUT | S_RDBAND;
7390 				pollwakeups = POLLIN | POLLRDBAND;
7391 			}
7392 		} else if (pri != bp->b_band) {
7393 			/*
7394 			 * The band is different for the new q_first.
7395 			 */
7396 			if (bp->b_band == 0) {
7397 				signals = S_RDNORM;
7398 				pollwakeups = POLLIN | POLLRDNORM;
7399 			} else {
7400 				signals = S_RDBAND;
7401 				pollwakeups = POLLIN | POLLRDBAND;
7402 			}
7403 		}
7404 
7405 		if (pollwakeups != 0) {
7406 			if (pollwakeups == (POLLIN | POLLRDNORM)) {
7407 				if (!(stp->sd_rput_opt & SR_POLLIN))
7408 					goto no_pollwake;
7409 				stp->sd_rput_opt &= ~SR_POLLIN;
7410 			}
7411 			mutex_exit(&stp->sd_lock);
7412 			pollwakeup(&stp->sd_pollist, pollwakeups);
7413 			mutex_enter(&stp->sd_lock);
7414 		}
7415 no_pollwake:
7416 
7417 		if (stp->sd_sigflags & signals)
7418 			strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
7419 	}
7420 	mutex_exit(&stp->sd_lock);
7421 
7422 	rvp->r_val1 = more;
7423 	return (error);
7424 #undef	_LASTMARK
7425 }
7426 
7427 /*
7428  * Put a message downstream.
7429  *
7430  * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7431  */
7432 int
7433 strputmsg(
7434 	struct vnode *vp,
7435 	struct strbuf *mctl,
7436 	struct strbuf *mdata,
7437 	unsigned char pri,
7438 	int flag,
7439 	int fmode)
7440 {
7441 	struct stdata *stp;
7442 	queue_t *wqp;
7443 	mblk_t *mp;
7444 	ssize_t msgsize;
7445 	ssize_t rmin, rmax;
7446 	int error;
7447 	struct uio uios;
7448 	struct uio *uiop = &uios;
7449 	struct iovec iovs;
7450 	int xpg4 = 0;
7451 
7452 	ASSERT(vp->v_stream);
7453 	stp = vp->v_stream;
7454 	wqp = stp->sd_wrq;
7455 
7456 	/*
7457 	 * If it is an XPG4 application, we need to send
7458 	 * SIGPIPE below
7459 	 */
7460 
7461 	xpg4 = (flag & MSG_XPG4) ? 1 : 0;
7462 	flag &= ~MSG_XPG4;
7463 
7464 #ifdef C2_AUDIT
7465 	if (audit_active)
7466 		audit_strputmsg(vp, mctl, mdata, pri, flag, fmode);
7467 #endif
7468 
7469 	mutex_enter(&stp->sd_lock);
7470 
7471 	if ((error = i_straccess(stp, JCWRITE)) != 0) {
7472 		mutex_exit(&stp->sd_lock);
7473 		return (error);
7474 	}
7475 
7476 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
7477 		error = strwriteable(stp, B_FALSE, xpg4);
7478 		if (error != 0) {
7479 			mutex_exit(&stp->sd_lock);
7480 			return (error);
7481 		}
7482 	}
7483 
7484 	mutex_exit(&stp->sd_lock);
7485 
7486 	/*
7487 	 * Check for legal flag value.
7488 	 */
7489 	switch (flag) {
7490 	case MSG_HIPRI:
7491 		if ((mctl->len < 0) || (pri != 0))
7492 			return (EINVAL);
7493 		break;
7494 	case MSG_BAND:
7495 		break;
7496 
7497 	default:
7498 		return (EINVAL);
7499 	}
7500 
7501 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN,
7502 		"strputmsg in:stp %p", stp);
7503 
7504 	/* get these values from those cached in the stream head */
7505 	rmin = stp->sd_qn_minpsz;
7506 	rmax = stp->sd_qn_maxpsz;
7507 
7508 	/*
7509 	 * Make sure ctl and data sizes together fall within the
7510 	 * limits of the max and min receive packet sizes and do
7511 	 * not exceed system limit.
7512 	 */
7513 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
7514 	if (rmax == 0) {
7515 		return (ERANGE);
7516 	}
7517 	/*
7518 	 * Use the MAXIMUM of sd_maxblk and q_maxpsz.
7519 	 * Needed to prevent partial failures in the strmakedata loop.
7520 	 */
7521 	if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
7522 		rmax = stp->sd_maxblk;
7523 
7524 	if ((msgsize = mdata->len) < 0) {
7525 		msgsize = 0;
7526 		rmin = 0;	/* no range check for NULL data part */
7527 	}
7528 	if ((msgsize < rmin) ||
7529 	    ((msgsize > rmax) && (rmax != INFPSZ)) ||
7530 	    (mctl->len > strctlsz)) {
7531 		return (ERANGE);
7532 	}
7533 
7534 	/*
7535 	 * Setup uio and iov for data part
7536 	 */
7537 	iovs.iov_base = mdata->buf;
7538 	iovs.iov_len = msgsize;
7539 	uios.uio_iov = &iovs;
7540 	uios.uio_iovcnt = 1;
7541 	uios.uio_loffset = 0;
7542 	uios.uio_segflg = UIO_USERSPACE;
7543 	uios.uio_fmode = fmode;
7544 	uios.uio_extflg = UIO_COPY_DEFAULT;
7545 	uios.uio_resid = msgsize;
7546 	uios.uio_offset = 0;
7547 
7548 	/* Ignore flow control in strput for HIPRI */
7549 	if (flag & MSG_HIPRI)
7550 		flag |= MSG_IGNFLOW;
7551 
7552 	for (;;) {
7553 		int done = 0;
7554 
7555 		/*
7556 		 * strput will always free the ctl mblk - even when strput
7557 		 * fails.
7558 		 */
7559 		if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) {
7560 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7561 				"strputmsg out:stp %p out %d error %d",
7562 				stp, 1, error);
7563 			return (error);
7564 		}
7565 		/*
7566 		 * Verify that the whole message can be transferred by
7567 		 * strput.
7568 		 */
7569 		ASSERT(stp->sd_maxblk == INFPSZ ||
7570 			stp->sd_maxblk >= mdata->len);
7571 
7572 		msgsize = mdata->len;
7573 		error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
7574 		mdata->len = msgsize;
7575 
7576 		if (error == 0)
7577 			break;
7578 
7579 		if (error != EWOULDBLOCK)
7580 			goto out;
7581 
7582 		mutex_enter(&stp->sd_lock);
7583 		/*
7584 		 * Check for a missed wakeup.
7585 		 * Needed since strput did not hold sd_lock across
7586 		 * the canputnext.
7587 		 */
7588 		if (bcanputnext(wqp, pri)) {
7589 			/* Try again */
7590 			mutex_exit(&stp->sd_lock);
7591 			continue;
7592 		}
7593 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT,
7594 			"strputmsg wait:stp %p waits pri %d", stp, pri);
7595 		if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1,
7596 		    &done)) != 0) || done) {
7597 			mutex_exit(&stp->sd_lock);
7598 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7599 				"strputmsg out:q %p out %d error %d",
7600 				stp, 0, error);
7601 			return (error);
7602 		}
7603 		TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE,
7604 			"strputmsg wake:stp %p wakes", stp);
7605 		if ((error = i_straccess(stp, JCWRITE)) != 0) {
7606 			mutex_exit(&stp->sd_lock);
7607 			return (error);
7608 		}
7609 		mutex_exit(&stp->sd_lock);
7610 	}
7611 out:
7612 	/*
7613 	 * For historic reasons, applications expect EAGAIN
7614 	 * when data mblk could not be allocated. so change
7615 	 * ENOMEM back to EAGAIN
7616 	 */
7617 	if (error == ENOMEM)
7618 		error = EAGAIN;
7619 	TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7620 		"strputmsg out:stp %p out %d error %d", stp, 2, error);
7621 	return (error);
7622 }
7623 
7624 /*
7625  * Put a message downstream.
7626  * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop.
7627  * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio
7628  * and the fmode parameter.
7629  *
7630  * This routine handles the consolidation private flags:
7631  *	MSG_IGNERROR	Ignore any stream head error except STPLEX.
7632  *	MSG_HOLDSIG	Hold signals while waiting for data.
7633  *	MSG_IGNFLOW	Don't check streams flow control.
7634  *
7635  * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7636  */
7637 int
7638 kstrputmsg(
7639 	struct vnode *vp,
7640 	mblk_t *mctl,
7641 	struct uio *uiop,
7642 	ssize_t msgsize,
7643 	unsigned char pri,
7644 	int flag,
7645 	int fmode)
7646 {
7647 	struct stdata *stp;
7648 	queue_t *wqp;
7649 	ssize_t rmin, rmax;
7650 	int error;
7651 
7652 	ASSERT(vp->v_stream);
7653 	stp = vp->v_stream;
7654 	wqp = stp->sd_wrq;
7655 #ifdef C2_AUDIT
7656 	if (audit_active)
7657 		audit_strputmsg(vp, NULL, NULL, pri, flag, fmode);
7658 #endif
7659 	if (mctl == NULL)
7660 		return (EINVAL);
7661 
7662 	mutex_enter(&stp->sd_lock);
7663 
7664 	if ((error = i_straccess(stp, JCWRITE)) != 0) {
7665 		mutex_exit(&stp->sd_lock);
7666 		freemsg(mctl);
7667 		return (error);
7668 	}
7669 
7670 	if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) {
7671 		if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
7672 			error = strwriteable(stp, B_FALSE, B_TRUE);
7673 			if (error != 0) {
7674 				mutex_exit(&stp->sd_lock);
7675 				freemsg(mctl);
7676 				return (error);
7677 			}
7678 		}
7679 	}
7680 
7681 	mutex_exit(&stp->sd_lock);
7682 
7683 	/*
7684 	 * Check for legal flag value.
7685 	 */
7686 	switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) {
7687 	case MSG_HIPRI:
7688 		if (pri != 0) {
7689 			freemsg(mctl);
7690 			return (EINVAL);
7691 		}
7692 		break;
7693 	case MSG_BAND:
7694 		break;
7695 	default:
7696 		freemsg(mctl);
7697 		return (EINVAL);
7698 	}
7699 
7700 	TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN,
7701 		"kstrputmsg in:stp %p", stp);
7702 
7703 	/* get these values from those cached in the stream head */
7704 	rmin = stp->sd_qn_minpsz;
7705 	rmax = stp->sd_qn_maxpsz;
7706 
7707 	/*
7708 	 * Make sure ctl and data sizes together fall within the
7709 	 * limits of the max and min receive packet sizes and do
7710 	 * not exceed system limit.
7711 	 */
7712 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
7713 	if (rmax == 0) {
7714 		freemsg(mctl);
7715 		return (ERANGE);
7716 	}
7717 	/*
7718 	 * Use the MAXIMUM of sd_maxblk and q_maxpsz.
7719 	 * Needed to prevent partial failures in the strmakedata loop.
7720 	 */
7721 	if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
7722 		rmax = stp->sd_maxblk;
7723 
7724 	if (uiop == NULL) {
7725 		msgsize = -1;
7726 		rmin = -1;	/* no range check for NULL data part */
7727 	} else {
7728 		/* Use uio flags as well as the fmode parameter flags */
7729 		fmode |= uiop->uio_fmode;
7730 
7731 		if ((msgsize < rmin) ||
7732 		    ((msgsize > rmax) && (rmax != INFPSZ))) {
7733 			freemsg(mctl);
7734 			return (ERANGE);
7735 		}
7736 	}
7737 
7738 	/* Ignore flow control in strput for HIPRI */
7739 	if (flag & MSG_HIPRI)
7740 		flag |= MSG_IGNFLOW;
7741 
7742 	for (;;) {
7743 		int done = 0;
7744 		int waitflag;
7745 		mblk_t *mp;
7746 
7747 		/*
7748 		 * strput will always free the ctl mblk - even when strput
7749 		 * fails. If MSG_IGNFLOW is set then any error returned
7750 		 * will cause us to break the loop, so we don't need a copy
7751 		 * of the message. If MSG_IGNFLOW is not set, then we can
7752 		 * get hit by flow control and be forced to try again. In
7753 		 * this case we need to have a copy of the message. We
7754 		 * do this using copymsg since the message may get modified
7755 		 * by something below us.
7756 		 *
7757 		 * We've observed that many TPI providers do not check db_ref
7758 		 * on the control messages but blindly reuse them for the
7759 		 * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more
7760 		 * friendly to such providers than using dupmsg. Also, note
7761 		 * that sockfs uses MSG_IGNFLOW for all TPI control messages.
7762 		 * Only data messages are subject to flow control, hence
7763 		 * subject to this copymsg.
7764 		 */
7765 		if (flag & MSG_IGNFLOW) {
7766 			mp = mctl;
7767 			mctl = NULL;
7768 		} else {
7769 			do {
7770 				/*
7771 				 * If a message has a free pointer, the message
7772 				 * must be dupmsg to maintain this pointer.
7773 				 * Code using this facility must be sure
7774 				 * that modules below will not change the
7775 				 * contents of the dblk without checking db_ref
7776 				 * first. If db_ref is > 1, then the module
7777 				 * needs to do a copymsg first. Otherwise,
7778 				 * the contents of the dblk may become
7779 				 * inconsistent because the freesmg/freeb below
7780 				 * may end up calling atomic_add_32_nv.
7781 				 * The atomic_add_32_nv in freeb (accessing
7782 				 * all of db_ref, db_type, db_flags, and
7783 				 * db_struioflag) does not prevent other threads
7784 				 * from concurrently trying to modify e.g.
7785 				 * db_type.
7786 				 */
7787 				if (mctl->b_datap->db_frtnp != NULL)
7788 					mp = dupmsg(mctl);
7789 				else
7790 					mp = copymsg(mctl);
7791 
7792 				if (mp != NULL)
7793 					break;
7794 
7795 				error = strwaitbuf(msgdsize(mctl), BPRI_MED);
7796 				if (error) {
7797 					freemsg(mctl);
7798 					return (error);
7799 				}
7800 			} while (mp == NULL);
7801 		}
7802 		/*
7803 		 * Verify that all of msgsize can be transferred by
7804 		 * strput.
7805 		 */
7806 		ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize);
7807 		error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
7808 		if (error == 0)
7809 			break;
7810 
7811 		if (error != EWOULDBLOCK)
7812 			goto out;
7813 
7814 		/*
7815 		 * IF MSG_IGNFLOW is set we should have broken out of loop
7816 		 * above.
7817 		 */
7818 		ASSERT(!(flag & MSG_IGNFLOW));
7819 		mutex_enter(&stp->sd_lock);
7820 		/*
7821 		 * Check for a missed wakeup.
7822 		 * Needed since strput did not hold sd_lock across
7823 		 * the canputnext.
7824 		 */
7825 		if (bcanputnext(wqp, pri)) {
7826 			/* Try again */
7827 			mutex_exit(&stp->sd_lock);
7828 			continue;
7829 		}
7830 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT,
7831 			"kstrputmsg wait:stp %p waits pri %d", stp, pri);
7832 
7833 		waitflag = WRITEWAIT;
7834 		if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) {
7835 			if (flag & MSG_HOLDSIG)
7836 				waitflag |= STR_NOSIG;
7837 			if (flag & MSG_IGNERROR)
7838 				waitflag |= STR_NOERROR;
7839 		}
7840 		if (((error = strwaitq(stp, waitflag,
7841 		    (ssize_t)0, fmode, -1, &done)) != 0) || done) {
7842 			mutex_exit(&stp->sd_lock);
7843 			TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
7844 				"kstrputmsg out:stp %p out %d error %d",
7845 				stp, 0, error);
7846 			freemsg(mctl);
7847 			return (error);
7848 		}
7849 		TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE,
7850 			"kstrputmsg wake:stp %p wakes", stp);
7851 		if ((error = i_straccess(stp, JCWRITE)) != 0) {
7852 			mutex_exit(&stp->sd_lock);
7853 			freemsg(mctl);
7854 			return (error);
7855 		}
7856 		mutex_exit(&stp->sd_lock);
7857 	}
7858 out:
7859 	freemsg(mctl);
7860 	/*
7861 	 * For historic reasons, applications expect EAGAIN
7862 	 * when data mblk could not be allocated. so change
7863 	 * ENOMEM back to EAGAIN
7864 	 */
7865 	if (error == ENOMEM)
7866 		error = EAGAIN;
7867 	TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
7868 		"kstrputmsg out:stp %p out %d error %d", stp, 2, error);
7869 	return (error);
7870 }
7871 
7872 /*
7873  * Determines whether the necessary conditions are set on a stream
7874  * for it to be readable, writeable, or have exceptions.
7875  *
7876  * strpoll handles the consolidation private events:
7877  *	POLLNOERR	Do not return POLLERR even if there are stream
7878  *			head errors.
7879  *			Used by sockfs.
7880  *	POLLRDDATA	Do not return POLLIN unless at least one message on
7881  *			the queue contains one or more M_DATA mblks. Thus
7882  *			when this flag is set a queue with only
7883  *			M_PROTO/M_PCPROTO mblks does not return POLLIN.
7884  *			Used by sockfs to ignore T_EXDATA_IND messages.
7885  *
7886  * Note: POLLRDDATA assumes that synch streams only return messages with
7887  * an M_DATA attached (i.e. not messages consisting of only
7888  * an M_PROTO/M_PCPROTO part).
7889  */
7890 int
7891 strpoll(
7892 	struct stdata *stp,
7893 	short events_arg,
7894 	int anyyet,
7895 	short *reventsp,
7896 	struct pollhead **phpp)
7897 {
7898 	int events = (ushort_t)events_arg;
7899 	int retevents = 0;
7900 	mblk_t *mp;
7901 	qband_t *qbp;
7902 	long sd_flags = stp->sd_flag;
7903 	int headlocked = 0;
7904 
7905 	/*
7906 	 * For performance, a single 'if' tests for most possible edge
7907 	 * conditions in one shot
7908 	 */
7909 	if (sd_flags & (STPLEX | STRDERR | STWRERR)) {
7910 		if (sd_flags & STPLEX) {
7911 			*reventsp = POLLNVAL;
7912 			return (EINVAL);
7913 		}
7914 		if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) &&
7915 		    (sd_flags & STRDERR)) ||
7916 		    ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) &&
7917 		    (sd_flags & STWRERR))) {
7918 			if (!(events & POLLNOERR)) {
7919 				*reventsp = POLLERR;
7920 				return (0);
7921 			}
7922 		}
7923 	}
7924 	if (sd_flags & STRHUP) {
7925 		retevents |= POLLHUP;
7926 	} else if (events & (POLLWRNORM | POLLWRBAND)) {
7927 		queue_t *tq;
7928 		queue_t	*qp = stp->sd_wrq;
7929 
7930 		claimstr(qp);
7931 		/* Find next module forward that has a service procedure */
7932 		tq = qp->q_next->q_nfsrv;
7933 		ASSERT(tq != NULL);
7934 
7935 		polllock(&stp->sd_pollist, QLOCK(tq));
7936 		if (events & POLLWRNORM) {
7937 			queue_t *sqp;
7938 
7939 			if (tq->q_flag & QFULL)
7940 				/* ensure backq svc procedure runs */
7941 				tq->q_flag |= QWANTW;
7942 			else if ((sqp = stp->sd_struiowrq) != NULL) {
7943 				/* Check sync stream barrier write q */
7944 				mutex_exit(QLOCK(tq));
7945 				polllock(&stp->sd_pollist, QLOCK(sqp));
7946 				if (sqp->q_flag & QFULL)
7947 					/* ensure pollwakeup() is done */
7948 					sqp->q_flag |= QWANTWSYNC;
7949 				else
7950 					retevents |= POLLOUT;
7951 				/* More write events to process ??? */
7952 				if (! (events & POLLWRBAND)) {
7953 					mutex_exit(QLOCK(sqp));
7954 					releasestr(qp);
7955 					goto chkrd;
7956 				}
7957 				mutex_exit(QLOCK(sqp));
7958 				polllock(&stp->sd_pollist, QLOCK(tq));
7959 			} else
7960 				retevents |= POLLOUT;
7961 		}
7962 		if (events & POLLWRBAND) {
7963 			qbp = tq->q_bandp;
7964 			if (qbp) {
7965 				while (qbp) {
7966 					if (qbp->qb_flag & QB_FULL)
7967 						qbp->qb_flag |= QB_WANTW;
7968 					else
7969 						retevents |= POLLWRBAND;
7970 					qbp = qbp->qb_next;
7971 				}
7972 			} else {
7973 				retevents |= POLLWRBAND;
7974 			}
7975 		}
7976 		mutex_exit(QLOCK(tq));
7977 		releasestr(qp);
7978 	}
7979 chkrd:
7980 	if (sd_flags & STRPRI) {
7981 		retevents |= (events & POLLPRI);
7982 	} else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) {
7983 		queue_t	*qp = _RD(stp->sd_wrq);
7984 		int normevents = (events & (POLLIN | POLLRDNORM));
7985 
7986 		/*
7987 		 * Note: Need to do polllock() here since ps_lock may be
7988 		 * held. See bug 4191544.
7989 		 */
7990 		polllock(&stp->sd_pollist, &stp->sd_lock);
7991 		headlocked = 1;
7992 		mp = qp->q_first;
7993 		while (mp) {
7994 			/*
7995 			 * For POLLRDDATA we scan b_cont and b_next until we
7996 			 * find an M_DATA.
7997 			 */
7998 			if ((events & POLLRDDATA) &&
7999 			    mp->b_datap->db_type != M_DATA) {
8000 				mblk_t *nmp = mp->b_cont;
8001 
8002 				while (nmp != NULL &&
8003 				    nmp->b_datap->db_type != M_DATA)
8004 					nmp = nmp->b_cont;
8005 				if (nmp == NULL) {
8006 					mp = mp->b_next;
8007 					continue;
8008 				}
8009 			}
8010 			if (mp->b_band == 0)
8011 				retevents |= normevents;
8012 			else
8013 				retevents |= (events & (POLLIN | POLLRDBAND));
8014 			break;
8015 		}
8016 		if (! (retevents & normevents) &&
8017 		    (stp->sd_wakeq & RSLEEP)) {
8018 			/*
8019 			 * Sync stream barrier read queue has data.
8020 			 */
8021 			retevents |= normevents;
8022 		}
8023 		/* Treat eof as normal data */
8024 		if (sd_flags & STREOF)
8025 			retevents |= normevents;
8026 	}
8027 
8028 	*reventsp = (short)retevents;
8029 	if (retevents) {
8030 		if (headlocked)
8031 			mutex_exit(&stp->sd_lock);
8032 		return (0);
8033 	}
8034 
8035 	/*
8036 	 * If poll() has not found any events yet, set up event cell
8037 	 * to wake up the poll if a requested event occurs on this
8038 	 * stream.  Check for collisions with outstanding poll requests.
8039 	 */
8040 	if (!anyyet) {
8041 		*phpp = &stp->sd_pollist;
8042 		if (headlocked == 0) {
8043 			polllock(&stp->sd_pollist, &stp->sd_lock);
8044 			headlocked = 1;
8045 		}
8046 		stp->sd_rput_opt |= SR_POLLIN;
8047 	}
8048 	if (headlocked)
8049 		mutex_exit(&stp->sd_lock);
8050 	return (0);
8051 }
8052 
8053 /*
8054  * The purpose of putback() is to assure sleeping polls/reads
8055  * are awakened when there are no new messages arriving at the,
8056  * stream head, and a message is placed back on the read queue.
8057  *
8058  * sd_lock must be held when messages are placed back on stream
8059  * head.  (getq() holds sd_lock when it removes messages from
8060  * the queue)
8061  */
8062 
8063 static void
8064 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band)
8065 {
8066 	ASSERT(MUTEX_HELD(&stp->sd_lock));
8067 	(void) putbq(q, bp);
8068 	/*
8069 	 * A message may have come in when the sd_lock was dropped in the
8070 	 * calling routine. If this is the case and STR*ATMARK info was
8071 	 * received, need to move that from the stream head to the q_last
8072 	 * so that SIOCATMARK can return the proper value.
8073 	 */
8074 	if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) {
8075 		unsigned short *flagp = &q->q_last->b_flag;
8076 		uint_t b_flag = (uint_t)*flagp;
8077 
8078 		if (stp->sd_flag & STRATMARK) {
8079 			b_flag &= ~MSGNOTMARKNEXT;
8080 			b_flag |= MSGMARKNEXT;
8081 			stp->sd_flag &= ~STRATMARK;
8082 		} else {
8083 			b_flag &= ~MSGMARKNEXT;
8084 			b_flag |= MSGNOTMARKNEXT;
8085 			stp->sd_flag &= ~STRNOTATMARK;
8086 		}
8087 		*flagp = (unsigned short) b_flag;
8088 	}
8089 
8090 #ifdef	DEBUG
8091 	/*
8092 	 * Make sure that the flags are not messed up.
8093 	 */
8094 	{
8095 		mblk_t *mp;
8096 		mp = q->q_last;
8097 		while (mp != NULL) {
8098 			ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
8099 			    (MSGMARKNEXT|MSGNOTMARKNEXT));
8100 			mp = mp->b_cont;
8101 		}
8102 	}
8103 #endif
8104 	if (q->q_first == bp) {
8105 		short pollevents;
8106 
8107 		if (stp->sd_flag & RSLEEP) {
8108 			stp->sd_flag &= ~RSLEEP;
8109 			cv_broadcast(&q->q_wait);
8110 		}
8111 		if (stp->sd_flag & STRPRI) {
8112 			pollevents = POLLPRI;
8113 		} else {
8114 			if (band == 0) {
8115 				if (!(stp->sd_rput_opt & SR_POLLIN))
8116 					return;
8117 				stp->sd_rput_opt &= ~SR_POLLIN;
8118 				pollevents = POLLIN | POLLRDNORM;
8119 			} else {
8120 				pollevents = POLLIN | POLLRDBAND;
8121 			}
8122 		}
8123 		mutex_exit(&stp->sd_lock);
8124 		pollwakeup(&stp->sd_pollist, pollevents);
8125 		mutex_enter(&stp->sd_lock);
8126 	}
8127 }
8128 
8129 /*
8130  * Return the held vnode attached to the stream head of a
8131  * given queue
8132  * It is the responsibility of the calling routine to ensure
8133  * that the queue does not go away (e.g. pop).
8134  */
8135 vnode_t *
8136 strq2vp(queue_t *qp)
8137 {
8138 	vnode_t *vp;
8139 	vp = STREAM(qp)->sd_vnode;
8140 	ASSERT(vp != NULL);
8141 	VN_HOLD(vp);
8142 	return (vp);
8143 }
8144 
8145 /*
8146  * return the stream head write queue for the given vp
8147  * It is the responsibility of the calling routine to ensure
8148  * that the stream or vnode do not close.
8149  */
8150 queue_t *
8151 strvp2wq(vnode_t *vp)
8152 {
8153 	ASSERT(vp->v_stream != NULL);
8154 	return (vp->v_stream->sd_wrq);
8155 }
8156 
8157 /*
8158  * pollwakeup stream head
8159  * It is the responsibility of the calling routine to ensure
8160  * that the stream or vnode do not close.
8161  */
8162 void
8163 strpollwakeup(vnode_t *vp, short event)
8164 {
8165 	ASSERT(vp->v_stream);
8166 	pollwakeup(&vp->v_stream->sd_pollist, event);
8167 }
8168 
8169 /*
8170  * Mate the stream heads of two vnodes together. If the two vnodes are the
8171  * same, we just make the write-side point at the read-side -- otherwise,
8172  * we do a full mate.  Only works on vnodes associated with streams that are
8173  * still being built and thus have only a stream head.
8174  */
8175 void
8176 strmate(vnode_t *vp1, vnode_t *vp2)
8177 {
8178 	queue_t *wrq1 = strvp2wq(vp1);
8179 	queue_t *wrq2 = strvp2wq(vp2);
8180 
8181 	/*
8182 	 * Verify that there are no modules on the stream yet.  We also
8183 	 * rely on the stream head always having a service procedure to
8184 	 * avoid tweaking q_nfsrv.
8185 	 */
8186 	ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL);
8187 	ASSERT(wrq1->q_qinfo->qi_srvp != NULL);
8188 	ASSERT(wrq2->q_qinfo->qi_srvp != NULL);
8189 
8190 	/*
8191 	 * If the queues are the same, just twist; otherwise do a full mate.
8192 	 */
8193 	if (wrq1 == wrq2) {
8194 		wrq1->q_next = _RD(wrq1);
8195 	} else {
8196 		wrq1->q_next = _RD(wrq2);
8197 		wrq2->q_next = _RD(wrq1);
8198 		STREAM(wrq1)->sd_mate = STREAM(wrq2);
8199 		STREAM(wrq1)->sd_flag |= STRMATE;
8200 		STREAM(wrq2)->sd_mate = STREAM(wrq1);
8201 		STREAM(wrq2)->sd_flag |= STRMATE;
8202 	}
8203 }
8204 
8205 /*
8206  * XXX will go away when console is correctly fixed.
8207  * Clean up the console PIDS, from previous I_SETSIG,
8208  * called only for cnopen which never calls strclean().
8209  */
8210 void
8211 str_cn_clean(struct vnode *vp)
8212 {
8213 	strsig_t *ssp, *pssp, *tssp;
8214 	struct stdata *stp;
8215 	struct pid  *pidp;
8216 	int update = 0;
8217 
8218 	ASSERT(vp->v_stream);
8219 	stp = vp->v_stream;
8220 	pssp = NULL;
8221 	mutex_enter(&stp->sd_lock);
8222 	ssp = stp->sd_siglist;
8223 	while (ssp) {
8224 		mutex_enter(&pidlock);
8225 		pidp = ssp->ss_pidp;
8226 		/*
8227 		 * Get rid of PID if the proc is gone.
8228 		 */
8229 		if (pidp->pid_prinactive) {
8230 			tssp = ssp->ss_next;
8231 			if (pssp)
8232 				pssp->ss_next = tssp;
8233 			else
8234 				stp->sd_siglist = tssp;
8235 			ASSERT(pidp->pid_ref <= 1);
8236 			PID_RELE(ssp->ss_pidp);
8237 			mutex_exit(&pidlock);
8238 			kmem_free(ssp, sizeof (strsig_t));
8239 			update = 1;
8240 			ssp = tssp;
8241 			continue;
8242 		} else
8243 			mutex_exit(&pidlock);
8244 		pssp = ssp;
8245 		ssp = ssp->ss_next;
8246 	}
8247 	if (update) {
8248 		stp->sd_sigflags = 0;
8249 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
8250 			stp->sd_sigflags |= ssp->ss_events;
8251 	}
8252 	mutex_exit(&stp->sd_lock);
8253 }
8254 
8255 /*
8256  * Return B_TRUE if there is data in the message, B_FALSE otherwise.
8257  */
8258 static boolean_t
8259 msghasdata(mblk_t *bp)
8260 {
8261 	for (; bp; bp = bp->b_cont)
8262 		if (bp->b_datap->db_type == M_DATA) {
8263 			ASSERT(bp->b_wptr >= bp->b_rptr);
8264 			if (bp->b_wptr > bp->b_rptr)
8265 				return (B_TRUE);
8266 		}
8267 	return (B_FALSE);
8268 }
8269