xref: /illumos-gate/usr/src/uts/common/io/tl.c (revision 23a1ccea)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Multithreaded STREAMS Local Transport Provider.
28  *
29  * OVERVIEW
30  * ========
31  *
32  * This driver provides TLI as well as socket semantics.  It provides
33  * connectionless, connection oriented, and connection oriented with orderly
34  * release transports for TLI and sockets. Each transport type has separate name
35  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
36  * this removes any name space conflicts when binding to socket style transport
37  * addresses.
38  *
39  * NOTE: There is one exception: Socket ticots and ticotsord transports share
40  * the same namespace. In fact, sockets always use ticotsord type transport.
41  *
42  * The driver mode is specified during open() by the minor number used for
43  * open.
44  *
45  *  The sockets in addition have the following semantic differences:
46  *  No support for passing up credentials (TL_SET[U]CRED).
47  *
48  *	Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
49  *	from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
50  *	T_OPTDATA_IND.
51  *
52  *	The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
53  *	a T_CONN_RES is received from the acceptor. This means that a socket
54  *	connect will complete before the peer has called accept.
55  *
56  *
57  * MULTITHREADING
58  * ==============
59  *
60  * The driver does not use STREAMS protection mechanisms. Instead it uses a
61  * generic "serializer" abstraction. Most of the operations are executed behind
62  * the serializer and are, essentially single-threaded. All functions executed
63  * behind the same serializer are strictly serialized. So if one thread calls
64  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
65  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
66  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
67  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
68  * same time.
69  *
70  * Connectionless transport use a single serializer per transport type (one for
71  * TLI and one for sockets. Connection-oriented transports use finer-grained
72  * serializers.
73  *
74  * All COTS-type endpoints start their life with private serializers. During
75  * connection request processing the endpoint serializer is switched to the
76  * listener's serializer and the rest of T_CONN_REQ processing is done on the
77  * listener serializer. During T_CONN_RES processing the eager serializer is
78  * switched from listener to acceptor serializer and after that point all
79  * processing for eager and acceptor happens on this serializer. To avoid races
80  * with endpoint closes while its serializer may be changing closes are blocked
81  * while serializers are manipulated.
82  *
83  * References accounting
84  * ---------------------
85  *
86  * Endpoints are reference counted and freed when the last reference is
87  * dropped. Functions within the serializer may access an endpoint state even
88  * after an endpoint closed. The te_closing being set on the endpoint indicates
89  * that the endpoint entered its close routine.
90  *
91  * One reference is held for each opened endpoint instance. The reference
92  * counter is incremented when the endpoint is linked to another endpoint and
93  * decremented when the link disappears. It is also incremented when the
94  * endpoint is found by the hash table lookup. This increment is atomic with the
95  * lookup itself and happens while the hash table read lock is held.
96  *
97  * Close synchronization
98  * ---------------------
99  *
100  * During close the endpoint as marked as closing using te_closing flag. It is
101  * usually enough to check for te_closing flag since all other state changes
102  * happen after this flag is set and the close entered serializer. Immediately
103  * after setting te_closing flag tl_close() enters serializer and waits until
104  * the callback finishes. This allows all functions called within serializer to
105  * simply check te_closing without any locks.
106  *
107  * Serializer management.
108  * ---------------------
109  *
110  * For COTS transports serializers are created when the endpoint is constructed
111  * and destroyed when the endpoint is destructed. CLTS transports use global
112  * serializers - one for sockets and one for TLI.
113  *
114  * COTS serializers have separate reference counts to deal with several
115  * endpoints sharing the same serializer. There is a subtle problem related to
116  * the serializer destruction. The serializer should never be destroyed by any
117  * function executed inside serializer. This means that close has to wait till
118  * all serializer activity for this endpoint is finished before it can drop the
119  * last reference on the endpoint (which may as well free the serializer).  This
120  * is only relevant for COTS transports which manage serializers
121  * dynamically. For CLTS transports close may complete without waiting for all
122  * serializer activity to finish since serializer is only destroyed at driver
123  * detach time.
124  *
125  * COTS endpoints keep track of the number of outstanding requests on the
126  * serializer for the endpoint. The code handling accept() avoids changing
127  * client serializer if it has any pending messages on the serializer and
128  * instead moves acceptor to listener's serializer.
129  *
130  *
131  * Use of hash tables
132  * ------------------
133  *
134  * The driver uses modhash hash table implementation. Each transport uses two
135  * hash tables - one for finding endpoints by acceptor ID and another one for
136  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
137  * pair of hash tables since sockets only use TICOTSORD.
138  *
139  * All hash tables lookups increment a reference count for returned endpoints,
140  * so we may safely check the endpoint state even when the endpoint is removed
141  * from the hash by another thread immediately after it is found.
142  *
143  *
144  * CLOSE processing
145  * ================
146  *
147  * The driver enters serializer twice on close(). The close sequence is the
148  * following:
149  *
150  * 1) Wait until closing is safe (te_closewait becomes zero)
151  *	This step is needed to prevent close during serializer switches. In most
152  *	cases (close happening after connection establishment) te_closewait is
153  *	zero.
154  * 1) Set te_closing.
155  * 2) Call tl_close_ser() within serializer and wait for it to complete.
156  *
157  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
158  *	It also needs to clear write-side q_next pointers - this should be done
159  *	before qprocsoff().
160  *
161  *    This synchronous serializer entry during close is needed to ensure that
162  *    the queue is valid everywhere inside the serializer.
163  *
164  *    Note that in many cases close will execute tl_close_ser() synchronously,
165  *    so it will not wait at all.
166  *
167  * 3) Calls qprocsoff().
168  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
169  *	complete (for COTS transports). For CLTS transport there is no wait.
170  *
171  *	tl_close_finish_ser() Finishes the close process and wakes up waiting
172  *	close if there is any.
173  *
174  *    Note that in most cases close will enter te_close_ser_finish()
175  *    synchronously and will not wait at all.
176  *
177  *
178  * Flow Control
179  * ============
180  *
181  * The driver implements both read and write side service routines. No one calls
182  * putq() on the read queue. The read side service routine tl_rsrv() is called
183  * when the read side stream is back-enabled. It enters serializer synchronously
184  * (waits till serializer processing is complete). Within serializer it
185  * back-enables all endpoints blocked by the queue for connection-less
186  * transports and enables write side service processing for the peer for
187  * connection-oriented transports.
188  *
189  * Read and write side service routines use special mblk_sized space in the
190  * endpoint structure to enter perimeter.
191  *
192  * Write-side flow control
193  * -----------------------
194  *
195  * Write side flow control is a bit tricky. The driver needs to deal with two
196  * message queues - the explicit STREAMS message queue maintained by
197  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
198  * queues should be synchronized to preserve message ordering and should
199  * maintain a single order determined by the order in which messages enter
200  * tl_wput(). In order to maintain the ordering between these two queues the
201  * STREAMS queue is only manipulated within the serializer, so the ordering is
202  * provided by the serializer.
203  *
204  * Functions called from the tl_wsrv() sometimes may call putbq(). To
205  * immediately stop any further processing of the STREAMS message queues the
206  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
207  * side service processing stops when the flag is set.
208  *
209  * The tl_wsrv() function enters serializer synchronously and waits for it to
210  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
211  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
212  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
213  * always bounded by the amount of messages on the STREAMS queue at the time
214  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
215  * queue from another serialized entry which can't happen in parallel. This
216  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
217  * of it draining forever while writer places new messages on the STREAMS
218  * queue).
219  *
220  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
221  *
222  *
223  * Unix Domain Sockets
224  * ===================
225  *
226  * The driver knows the structure of Unix Domain sockets addresses and treats
227  * them differently from generic TLI addresses. For sockets implicit binds are
228  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
229  * instead of using address length of zero. Explicit binds specify
230  * SOU_MAGIC_EXPLICIT as magic.
231  *
232  * For implicit binds we always use minor number as soua_vp part of the address
233  * and avoid any hash table lookups. This saves two hash tables lookups per
234  * anonymous bind.
235  *
236  * For explicit address we hash the vnode pointer instead of hashing the
237  * full-scale address+zone+length. Hashing by pointer is more efficient then
238  * hashing by the full address.
239  *
240  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
241  * tep structure, so it should be never freed.
242  *
243  * Also for sockets the driver always uses minor number as acceptor id.
244  *
245  * TPI VIOLATIONS
246  * --------------
247  *
248  * This driver violates TPI in several respects for Unix Domain Sockets:
249  *
250  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
251  *	is requested and the endpoint is already in use. There is no point in
252  *	generating an unused address since this address will be rejected by
253  *	sockfs anyway. For implicit binds it always generates a new address
254  *	(sets soua_vp to its minor number).
255  *
256  * 2) It always uses minor number as acceptor ID and never uses queue
257  *	pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
258  *	message and they do not use the queue pointer.
259  *
260  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
261  *	followed by listen(). The listen() should be issued with non-zero
262  *	backlog, so sotpi_listen() issues unbind request followed by bind
263  *	request to the same address but with a non-zero qlen value. Both
264  *	tl_bind() and tl_unbind() require write lock on the hash table to
265  *	insert/remove the address. The driver does not remove the address from
266  *	the hash for endpoints that are bound to the explicit address and have
267  *	backlog of zero. During T_BIND_REQ processing if the address requested
268  *	is equal to the address the endpoint already has it updates the backlog
269  *	without reinserting the address in the hash table. This optimization
270  *	avoids two hash table updates for each listener created. It always
271  *	avoids the problem of a "stolen" address when another listener may use
272  *	the same address between the unbind and bind and suddenly listen() fails
273  *	because address is in use even though the bind() succeeded.
274  *
275  *
276  * CONNECTIONLESS TRANSPORTS
277  * =========================
278  *
279  * Connectionless transports all share the same serializer (one for TLI and one
280  * for Sockets). Functions executing behind serializer can check or modify state
281  * of any endpoint.
282  *
283  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
284  * te_lastep field. The next time X talks to some address A it checks whether A
285  * is the same as Y's address and if it is there is no need to lookup Y. If the
286  * address is different or the state of Y is not appropriate (e.g. closed or not
287  * idle) X does a lookup using tl_find_peer() and caches the new address.
288  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
289  * on the endpoint found.
290  *
291  * During close of endpoint Y it doesn't try to remove itself from other
292  * endpoints caches. They will detect that Y is gone and will search the peer
293  * endpoint again.
294  *
295  * Flow Control Handling.
296  * ----------------------
297  *
298  * Each connectionless endpoint keeps a list of endpoints which are
299  * flow-controlled by its queue. It also keeps a pointer to the queue which
300  * flow-controls itself.  Whenever flow control releases for endpoint X it
301  * enables all queues from the list. During close it also back-enables everyone
302  * in the list. If X is flow-controlled when it is closing it removes it from
303  * the peers list.
304  *
305  * DATA STRUCTURES
306  * ===============
307  *
308  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
309  * endpoint state. For connection-oriented transports it has a keeps a list
310  * of pending connections (tl_icon_t). For connectionless transports it keeps a
311  * list of endpoints flow controlled by this one.
312  *
313  * Each transport type is represented by a per-transport data structure
314  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
315  * endpoint address hash tables for each transport. It also contains pointer to
316  * transport serializer for connectionless transports.
317  *
318  * Each endpoint keeps a link to its transport structure, so the code can find
319  * all per-transport information quickly.
320  */
321 
322 #include	<sys/types.h>
323 #include	<sys/inttypes.h>
324 #include	<sys/stream.h>
325 #include	<sys/stropts.h>
326 #define	_SUN_TPI_VERSION 2
327 #include	<sys/tihdr.h>
328 #include	<sys/strlog.h>
329 #include	<sys/debug.h>
330 #include	<sys/cred.h>
331 #include	<sys/errno.h>
332 #include	<sys/kmem.h>
333 #include	<sys/id_space.h>
334 #include	<sys/modhash.h>
335 #include	<sys/mkdev.h>
336 #include	<sys/tl.h>
337 #include	<sys/stat.h>
338 #include	<sys/conf.h>
339 #include	<sys/modctl.h>
340 #include	<sys/strsun.h>
341 #include	<sys/socket.h>
342 #include	<sys/socketvar.h>
343 #include	<sys/sysmacros.h>
344 #include	<sys/xti_xtiopt.h>
345 #include	<sys/ddi.h>
346 #include	<sys/sunddi.h>
347 #include	<sys/zone.h>
348 #include	<inet/common.h>	/* typedef int (*pfi_t)() for inet/optcom.h */
349 #include	<inet/optcom.h>
350 #include	<sys/strsubr.h>
351 #include	<sys/ucred.h>
352 #include	<sys/suntpi.h>
353 #include	<sys/list.h>
354 #include	<sys/serializer.h>
355 
356 /*
357  * TBD List
358  * 14 Eliminate state changes through table
359  * 16. AF_UNIX socket options
360  * 17. connect() for ticlts
361  * 18. support for "netstat" to show AF_UNIX plus TLI local
362  *	transport connections
363  * 21. sanity check to flushing on sending M_ERROR
364  */
365 
366 /*
367  * CONSTANT DECLARATIONS
368  * --------------------
369  */
370 
371 /*
372  * Local declarations
373  */
374 #define	NEXTSTATE(EV, ST)	ti_statetbl[EV][ST]
375 
376 #define	BADSEQNUM	(-1)	/* initial seq number used by T_DISCON_IND */
377 #define	TL_BUFWAIT	(10000)	/* usecs to wait for allocb buffer timeout */
378 #define	TL_TIDUSZ (64*1024)	/* tidu size when "strmsgz" is unlimited (0) */
379 /*
380  * Hash tables size.
381  */
382 #define	TL_HASH_SIZE 311
383 
384 /*
385  * Definitions for module_info
386  */
387 #define		TL_ID		(104)		/* module ID number */
388 #define		TL_NAME		"tl"		/* module name */
389 #define		TL_MINPSZ	(0)		/* min packet size */
390 #define		TL_MAXPSZ	INFPSZ 		/* max packet size ZZZ */
391 #define		TL_HIWAT	(16*1024)	/* hi water mark */
392 #define		TL_LOWAT	(256)		/* lo water mark */
393 /*
394  * Definition of minor numbers/modes for new transport provider modes.
395  * We view the socket use as a separate mode to get a separate name space.
396  */
397 #define		TL_TICOTS	0	/* connection oriented transport */
398 #define		TL_TICOTSORD 	1	/* COTS w/ orderly release */
399 #define		TL_TICLTS 	2	/* connectionless transport */
400 #define		TL_UNUSED	3
401 #define		TL_SOCKET	4	/* Socket */
402 #define		TL_SOCK_COTS	(TL_SOCKET|TL_TICOTS)
403 #define		TL_SOCK_COTSORD	(TL_SOCKET|TL_TICOTSORD)
404 #define		TL_SOCK_CLTS	(TL_SOCKET|TL_TICLTS)
405 
406 #define		TL_MINOR_MASK	0x7
407 #define		TL_MINOR_START	(TL_TICLTS + 1)
408 
409 /*
410  * LOCAL MACROS
411  */
412 #define	T_ALIGN(p)	P2ROUNDUP((p), sizeof (t_scalar_t))
413 
414 /*
415  * EXTERNAL VARIABLE DECLARATIONS
416  * -----------------------------
417  */
418 /*
419  * state table defined in the OS space.c
420  */
421 extern	char	ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
422 
423 /*
424  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
425  */
426 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
427 static int tl_close(queue_t *, int, cred_t *);
428 static void tl_wput(queue_t *, mblk_t *);
429 static void tl_wsrv(queue_t *);
430 static void tl_rsrv(queue_t *);
431 
432 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
433 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
434 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
435 
436 
437 /*
438  * GLOBAL DATA STRUCTURES AND VARIABLES
439  * -----------------------------------
440  */
441 
442 /*
443  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
444  * For now, we only manage the SO_RECVUCRED option but we also have
445  * harmless dummy options to make things work with some common code we access.
446  */
447 opdes_t	tl_opt_arr[] = {
448 	/* The SO_TYPE is needed for the hack below */
449 	{
450 		SO_TYPE,
451 		SOL_SOCKET,
452 		OA_R,
453 		OA_R,
454 		OP_NP,
455 		0,
456 		sizeof (t_scalar_t),
457 		0
458 	},
459 	{
460 		SO_RECVUCRED,
461 		SOL_SOCKET,
462 		OA_RW,
463 		OA_RW,
464 		OP_NP,
465 		0,
466 		sizeof (int),
467 		0
468 	}
469 };
470 
471 /*
472  * Table of all supported levels
473  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
474  * any supported options so we need this info separately.
475  *
476  * This is needed only for topmost tpi providers.
477  */
478 optlevel_t	tl_valid_levels_arr[] = {
479 	XTI_GENERIC,
480 	SOL_SOCKET,
481 	TL_PROT_LEVEL
482 };
483 
484 #define	TL_VALID_LEVELS_CNT	A_CNT(tl_valid_levels_arr)
485 /*
486  * Current upper bound on the amount of space needed to return all options.
487  * Additional options with data size of sizeof(long) are handled automatically.
488  * Others need hand job.
489  */
490 #define	TL_MAX_OPT_BUF_LEN						\
491 		((A_CNT(tl_opt_arr) << 2) +				\
492 		(A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +		\
493 		+ 64 + sizeof (struct T_optmgmt_ack))
494 
495 #define	TL_OPT_ARR_CNT	A_CNT(tl_opt_arr)
496 
497 /*
498  *	transport addr structure
499  */
500 typedef struct tl_addr {
501 	zoneid_t	ta_zoneid;		/* Zone scope of address */
502 	t_scalar_t	ta_alen;		/* length of abuf */
503 	void		*ta_abuf;		/* the addr itself */
504 } tl_addr_t;
505 
506 /*
507  * Refcounted version of serializer.
508  */
509 typedef struct tl_serializer {
510 	uint_t		ts_refcnt;
511 	serializer_t	*ts_serializer;
512 } tl_serializer_t;
513 
514 /*
515  * Each transport type has a separate state.
516  * Per-transport state.
517  */
518 typedef struct tl_transport_state {
519 	char		*tr_name;
520 	minor_t		tr_minor;
521 	uint32_t	tr_defaddr;
522 	mod_hash_t	*tr_ai_hash;
523 	mod_hash_t	*tr_addr_hash;
524 	tl_serializer_t	*tr_serializer;
525 } tl_transport_state_t;
526 
527 #define	TL_DFADDR 0x1000
528 
529 static tl_transport_state_t tl_transports[] = {
530 	{ "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
531 	{ "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
532 	{ "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
533 	{ "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
534 	{ "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
535 	{ "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
536 	{ "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
537 };
538 
539 #define	TL_MAXTRANSPORT A_CNT(tl_transports)
540 
541 struct tl_endpt;
542 typedef struct tl_endpt tl_endpt_t;
543 
544 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
545 
546 /*
547  * Data structure used to represent pending connects.
548  * Records enough information so that the connecting peer can close
549  * before the connection gets accepted.
550  */
551 typedef struct tl_icon {
552 	list_node_t	ti_node;
553 	struct tl_endpt *ti_tep;	/* NULL if peer has already closed */
554 	mblk_t		*ti_mp;		/* b_next list of data + ordrel_ind */
555 	t_scalar_t	ti_seqno;	/* Sequence number */
556 } tl_icon_t;
557 
558 typedef struct so_ux_addr soux_addr_t;
559 #define	TL_SOUX_ADDRLEN sizeof (soux_addr_t)
560 
561 /*
562  * Maximum number of unaccepted connection indications allowed per listener.
563  */
564 #define	TL_MAXQLEN	4096
565 int tl_maxqlen = TL_MAXQLEN;
566 
567 /*
568  *	transport endpoint structure
569  */
570 struct tl_endpt {
571 	queue_t		*te_rq;		/* stream read queue */
572 	queue_t		*te_wq;		/* stream write queue */
573 	uint32_t	te_refcnt;
574 	int32_t 	te_state;	/* TPI state of endpoint */
575 	minor_t		te_minor;	/* minor number */
576 #define	te_seqno	te_minor
577 	uint_t		te_flag;	/* flag field */
578 	boolean_t	te_nowsrv;
579 	tl_serializer_t	*te_ser;	/* Serializer to use */
580 #define	te_serializer	te_ser->ts_serializer
581 
582 	soux_addr_t	te_uxaddr;	/* Socket address */
583 #define	te_magic	te_uxaddr.soua_magic
584 #define	te_vp		te_uxaddr.soua_vp
585 	tl_addr_t	te_ap;		/* addr bound to this endpt */
586 #define	te_zoneid te_ap.ta_zoneid
587 #define	te_alen	te_ap.ta_alen
588 #define	te_abuf	te_ap.ta_abuf
589 
590 	tl_transport_state_t *te_transport;
591 #define	te_addrhash	te_transport->tr_addr_hash
592 #define	te_aihash	te_transport->tr_ai_hash
593 #define	te_defaddr	te_transport->tr_defaddr
594 	cred_t		*te_credp;	/* endpoint user credentials */
595 	mod_hash_hndl_t	te_hash_hndl;	/* Handle for address hash */
596 
597 	/*
598 	 * State specific for connection-oriented and connectionless transports.
599 	 */
600 	union {
601 		/* Connection-oriented state. */
602 		struct {
603 			t_uscalar_t _te_nicon;	/* count of conn requests */
604 			t_uscalar_t _te_qlen;	/* max conn requests */
605 			tl_endpt_t  *_te_oconp;	/* conn request pending */
606 			tl_endpt_t  *_te_conp;	/* connected endpt */
607 #ifndef _ILP32
608 			void	    *_te_pad;
609 #endif
610 			list_t	_te_iconp;	/* list of conn ind. pending */
611 		} _te_cots_state;
612 		/* Connection-less state. */
613 		struct {
614 			tl_endpt_t *_te_lastep;	/* last dest. endpoint */
615 			tl_endpt_t *_te_flowq;	/* flow controlled on whom */
616 			list_node_t _te_flows;	/* lists of connections */
617 			list_t  _te_flowlist;	/* Who flowcontrols on me */
618 		} _te_clts_state;
619 	} _te_transport_state;
620 #define	te_nicon	_te_transport_state._te_cots_state._te_nicon
621 #define	te_qlen		_te_transport_state._te_cots_state._te_qlen
622 #define	te_oconp	_te_transport_state._te_cots_state._te_oconp
623 #define	te_conp		_te_transport_state._te_cots_state._te_conp
624 #define	te_iconp	_te_transport_state._te_cots_state._te_iconp
625 #define	te_lastep	_te_transport_state._te_clts_state._te_lastep
626 #define	te_flowq	_te_transport_state._te_clts_state._te_flowq
627 #define	te_flowlist	_te_transport_state._te_clts_state._te_flowlist
628 #define	te_flows	_te_transport_state._te_clts_state._te_flows
629 
630 	bufcall_id_t	te_bufcid;	/* outstanding bufcall id */
631 	timeout_id_t	te_timoutid;	/* outstanding timeout id */
632 	pid_t		te_cpid;	/* cached pid of endpoint */
633 	t_uscalar_t	te_acceptor_id;	/* acceptor id for T_CONN_RES */
634 	/*
635 	 * Pieces of the endpoint state needed for closing.
636 	 */
637 	kmutex_t	te_closelock;
638 	kcondvar_t	te_closecv;
639 	uint8_t		te_closing;	/* The endpoint started closing */
640 	uint8_t		te_closewait;	/* Wait in close until zero */
641 	mblk_t		te_closemp;	/* for entering serializer on close */
642 	mblk_t		te_rsrvmp;	/* for entering serializer on rsrv */
643 	mblk_t		te_wsrvmp;	/* for entering serializer on wsrv */
644 	kmutex_t	te_srv_lock;
645 	kcondvar_t	te_srv_cv;
646 	uint8_t		te_rsrv_active;	/* Running in tl_rsrv()	*/
647 	uint8_t		te_wsrv_active;	/* Running in tl_wsrv()	*/
648 	/*
649 	 * Pieces of the endpoint state needed for serializer transitions.
650 	 */
651 	kmutex_t	te_ser_lock;	/* Protects the count below */
652 	uint_t		te_ser_count;	/* Number of messages on serializer */
653 };
654 
655 /*
656  * Flag values. Lower 4 bits specify that transport used.
657  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
658  * they allow to identify the endpoint more easily.
659  */
660 #define	TL_LISTENER	0x00010	/* the listener endpoint */
661 #define	TL_ACCEPTOR	0x00020	/* the accepting endpoint */
662 #define	TL_EAGER	0x00040	/* connecting endpoint */
663 #define	TL_ACCEPTED	0x00080	/* accepted connection */
664 #define	TL_SETCRED	0x00100	/* flag to indicate sending of credentials */
665 #define	TL_SETUCRED	0x00200	/* flag to indicate sending of ucred */
666 #define	TL_SOCKUCRED	0x00400	/* flag to indicate sending of SCM_UCRED */
667 #define	TL_ADDRHASHED	0x01000	/* Endpoint address is stored in te_addrhash */
668 #define	TL_CLOSE_SER	0x10000	/* Endpoint close has entered the serializer */
669 /*
670  * Boolean checks for the endpoint type.
671  */
672 #define		IS_CLTS(x)	(((x)->te_flag & TL_TICLTS) != 0)
673 #define		IS_COTS(x)	(((x)->te_flag & TL_TICLTS) == 0)
674 #define		IS_COTSORD(x)	(((x)->te_flag & TL_TICOTSORD) != 0)
675 #define		IS_SOCKET(x)	(((x)->te_flag & TL_SOCKET) != 0)
676 
677 /*
678  * Certain operations are always used together. These macros reduce the chance
679  * of missing a part of a combination.
680  */
681 #define	TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
682 #define	TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
683 
684 #define	TL_PUTBQ(x, mp) {		\
685 	ASSERT(!((x)->te_flag & TL_CLOSE_SER));	\
686 	(x)->te_nowsrv = B_TRUE;	\
687 	(void) putbq((x)->te_wq, mp);	\
688 }
689 
690 #define	TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
691 #define	TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
692 
693 /*
694  * STREAMS driver glue data structures.
695  */
696 static	struct	module_info	tl_minfo = {
697 	TL_ID,			/* mi_idnum */
698 	TL_NAME,		/* mi_idname */
699 	TL_MINPSZ,		/* mi_minpsz */
700 	TL_MAXPSZ,		/* mi_maxpsz */
701 	TL_HIWAT,		/* mi_hiwat */
702 	TL_LOWAT		/* mi_lowat */
703 };
704 
705 static	struct	qinit	tl_rinit = {
706 	NULL,			/* qi_putp */
707 	(int (*)())tl_rsrv,	/* qi_srvp */
708 	tl_open,		/* qi_qopen */
709 	tl_close,		/* qi_qclose */
710 	NULL,			/* qi_qadmin */
711 	&tl_minfo,		/* qi_minfo */
712 	NULL			/* qi_mstat */
713 };
714 
715 static	struct	qinit	tl_winit = {
716 	(int (*)())tl_wput,	/* qi_putp */
717 	(int (*)())tl_wsrv,	/* qi_srvp */
718 	NULL,			/* qi_qopen */
719 	NULL,			/* qi_qclose */
720 	NULL,			/* qi_qadmin */
721 	&tl_minfo,		/* qi_minfo */
722 	NULL			/* qi_mstat */
723 };
724 
725 static	struct streamtab	tlinfo = {
726 	&tl_rinit,		/* st_rdinit */
727 	&tl_winit,		/* st_wrinit */
728 	NULL,			/* st_muxrinit */
729 	NULL			/* st_muxwrinit */
730 };
731 
732 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
733     nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
734 
735 static struct modldrv modldrv = {
736 	&mod_driverops,		/* Type of module -- pseudo driver here */
737 	"TPI Local Transport (tl)",
738 	&tl_devops,		/* driver ops */
739 };
740 
741 /*
742  * Module linkage information for the kernel.
743  */
744 static struct modlinkage modlinkage = {
745 	MODREV_1,
746 	&modldrv,
747 	NULL
748 };
749 
750 /*
751  * Templates for response to info request
752  * Check sanity of unlimited connect data etc.
753  */
754 
755 #define		TL_CLTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
756 #define		TL_COTS_PROVIDER_FLAG	(XPG4_1|SENDZERO)
757 
758 static struct T_info_ack tl_cots_info_ack =
759 	{
760 		T_INFO_ACK,	/* PRIM_type -always T_INFO_ACK */
761 		T_INFINITE,	/* TSDU size */
762 		T_INFINITE,	/* ETSDU size */
763 		T_INFINITE,	/* CDATA_size */
764 		T_INFINITE,	/* DDATA_size */
765 		T_INFINITE,	/* ADDR_size  */
766 		T_INFINITE,	/* OPT_size */
767 		0,		/* TIDU_size - fill at run time */
768 		T_COTS,		/* SERV_type */
769 		-1,		/* CURRENT_state */
770 		TL_COTS_PROVIDER_FLAG	/* PROVIDER_flag */
771 	};
772 
773 static struct T_info_ack tl_clts_info_ack =
774 	{
775 		T_INFO_ACK,	/* PRIM_type - always T_INFO_ACK */
776 		0,		/* TSDU_size - fill at run time */
777 		-2,		/* ETSDU_size -2 => not supported */
778 		-2,		/* CDATA_size -2 => not supported */
779 		-2,		/* DDATA_size  -2 => not supported */
780 		-1,		/* ADDR_size -1 => unlimited */
781 		-1,		/* OPT_size */
782 		0,		/* TIDU_size - fill at run time */
783 		T_CLTS,		/* SERV_type */
784 		-1,		/* CURRENT_state */
785 		TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
786 	};
787 
788 /*
789  * private copy of devinfo pointer used in tl_info
790  */
791 static dev_info_t *tl_dip;
792 
793 /*
794  * Endpoints cache.
795  */
796 static kmem_cache_t *tl_cache;
797 /*
798  * Minor number space.
799  */
800 static id_space_t *tl_minors;
801 
802 /*
803  * Default Data Unit size.
804  */
805 static t_scalar_t tl_tidusz;
806 
807 /*
808  * Size of hash tables.
809  */
810 static size_t tl_hash_size = TL_HASH_SIZE;
811 
812 /*
813  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
814  * for sockets.
815  */
816 static int tl_disable_early_connect = 0;
817 static int tl_client_closing_when_accepting;
818 
819 static int tl_serializer_noswitch;
820 
821 /*
822  * LOCAL FUNCTION PROTOTYPES
823  * -------------------------
824  */
825 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
826 static void tl_do_proto(mblk_t *, tl_endpt_t *);
827 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
828 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
829 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
830 	t_scalar_t);
831 static void tl_bind(mblk_t *, tl_endpt_t *);
832 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
833 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
834 static void tl_unbind(mblk_t *, tl_endpt_t *);
835 static void tl_optmgmt(queue_t *, mblk_t *);
836 static void tl_conn_req(queue_t *, mblk_t *);
837 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
838 static void tl_conn_res(mblk_t *, tl_endpt_t *);
839 static void tl_discon_req(mblk_t *, tl_endpt_t *);
840 static void tl_capability_req(mblk_t *, tl_endpt_t *);
841 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
842 static void tl_info_req(mblk_t *, tl_endpt_t *);
843 static void tl_addr_req(mblk_t *, tl_endpt_t *);
844 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
845 static void tl_data(mblk_t  *, tl_endpt_t *);
846 static void tl_exdata(mblk_t *, tl_endpt_t *);
847 static void tl_ordrel(mblk_t *, tl_endpt_t *);
848 static void tl_unitdata(mblk_t *, tl_endpt_t *);
849 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
850 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
851 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
852 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
853 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
854 static void tl_cl_backenable(tl_endpt_t *);
855 static void tl_co_unconnect(tl_endpt_t *);
856 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
857 static void tl_discon_ind(tl_endpt_t *, uint32_t);
858 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
859 static mblk_t *tl_ordrel_ind_alloc(void);
860 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
861 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
862 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
863 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
864 static void tl_icon_freemsgs(mblk_t **);
865 static void tl_merror(queue_t *, mblk_t *, int);
866 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
867 static int tl_default_opt(queue_t *, int, int, uchar_t *);
868 static int tl_get_opt(queue_t *, int, int, uchar_t *);
869 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
870     uchar_t *, void *, cred_t *);
871 static void tl_memrecover(queue_t *, mblk_t *, size_t);
872 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
873 static void tl_free(tl_endpt_t *);
874 static int  tl_constructor(void *, void *, int);
875 static void tl_destructor(void *, void *);
876 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
877 static tl_serializer_t *tl_serializer_alloc(int);
878 static void tl_serializer_refhold(tl_serializer_t *);
879 static void tl_serializer_refrele(tl_serializer_t *);
880 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
881 static void tl_serializer_exit(tl_endpt_t *);
882 static boolean_t tl_noclose(tl_endpt_t *);
883 static void tl_closeok(tl_endpt_t *);
884 static void tl_refhold(tl_endpt_t *);
885 static void tl_refrele(tl_endpt_t *);
886 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
887 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
888 static void tl_close_ser(mblk_t *, tl_endpt_t *);
889 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
890 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
891 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
892 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
893 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
894 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
895 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
896 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
897 static void tl_addr_unbind(tl_endpt_t *);
898 
899 /*
900  * Intialize option database object for TL
901  */
902 
903 optdb_obj_t tl_opt_obj = {
904 	tl_default_opt,		/* TL default value function pointer */
905 	tl_get_opt,		/* TL get function pointer */
906 	tl_set_opt,		/* TL set function pointer */
907 	TL_OPT_ARR_CNT,		/* TL option database count of entries */
908 	tl_opt_arr,		/* TL option database */
909 	TL_VALID_LEVELS_CNT,	/* TL valid level count of entries */
910 	tl_valid_levels_arr	/* TL valid level array */
911 };
912 
913 /*
914  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
915  * ---------------------------------------
916  */
917 
918 /*
919  * Loadable module routines
920  */
921 int
922 _init(void)
923 {
924 	return (mod_install(&modlinkage));
925 }
926 
927 int
928 _fini(void)
929 {
930 	return (mod_remove(&modlinkage));
931 }
932 
933 int
934 _info(struct modinfo *modinfop)
935 {
936 	return (mod_info(&modlinkage, modinfop));
937 }
938 
939 /*
940  * Driver Entry Points and Other routines
941  */
942 static int
943 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
944 {
945 	int i;
946 	char name[32];
947 
948 	/*
949 	 * Resume from a checkpoint state.
950 	 */
951 	if (cmd == DDI_RESUME)
952 		return (DDI_SUCCESS);
953 
954 	if (cmd != DDI_ATTACH)
955 		return (DDI_FAILURE);
956 
957 	/*
958 	 * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
959 	 * streams message sizes can be unlimited. We use a defined constant
960 	 * instead.
961 	 */
962 	tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
963 
964 	/*
965 	 * Create subdevices for each transport.
966 	 */
967 	for (i = 0; i < TL_UNUSED; i++) {
968 		if (ddi_create_minor_node(devi,
969 		    tl_transports[i].tr_name,
970 		    S_IFCHR, tl_transports[i].tr_minor,
971 		    DDI_PSEUDO, NULL) == DDI_FAILURE) {
972 			ddi_remove_minor_node(devi, NULL);
973 			return (DDI_FAILURE);
974 		}
975 	}
976 
977 	tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
978 	    0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
979 
980 	if (tl_cache == NULL) {
981 		ddi_remove_minor_node(devi, NULL);
982 		return (DDI_FAILURE);
983 	}
984 
985 	tl_minors = id_space_create("tl_minor_space",
986 	    TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
987 
988 	/*
989 	 * Create ID space for minor numbers
990 	 */
991 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
992 		tl_transport_state_t *t = &tl_transports[i];
993 
994 		if (i == TL_UNUSED)
995 			continue;
996 
997 		/* Socket COTSORD shares namespace with COTS */
998 		if (i == TL_SOCK_COTSORD) {
999 			t->tr_ai_hash =
1000 			    tl_transports[TL_SOCK_COTS].tr_ai_hash;
1001 			ASSERT(t->tr_ai_hash != NULL);
1002 			t->tr_addr_hash =
1003 			    tl_transports[TL_SOCK_COTS].tr_addr_hash;
1004 			ASSERT(t->tr_addr_hash != NULL);
1005 			continue;
1006 		}
1007 
1008 		/*
1009 		 * Create hash tables.
1010 		 */
1011 		(void) snprintf(name, sizeof (name), "%s_ai_hash",
1012 		    t->tr_name);
1013 #ifdef _ILP32
1014 		if (i & TL_SOCKET)
1015 			t->tr_ai_hash =
1016 			    mod_hash_create_idhash(name, tl_hash_size - 1,
1017 			    mod_hash_null_valdtor);
1018 		else
1019 			t->tr_ai_hash =
1020 			    mod_hash_create_ptrhash(name, tl_hash_size,
1021 			    mod_hash_null_valdtor, sizeof (queue_t));
1022 #else
1023 		t->tr_ai_hash =
1024 		    mod_hash_create_idhash(name, tl_hash_size - 1,
1025 		    mod_hash_null_valdtor);
1026 #endif /* _ILP32 */
1027 
1028 		if (i & TL_SOCKET) {
1029 			(void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1030 			    t->tr_name);
1031 			t->tr_addr_hash = mod_hash_create_ptrhash(name,
1032 			    tl_hash_size, mod_hash_null_valdtor,
1033 			    sizeof (uintptr_t));
1034 		} else {
1035 			(void) snprintf(name, sizeof (name), "%s_addr_hash",
1036 			    t->tr_name);
1037 			t->tr_addr_hash = mod_hash_create_extended(name,
1038 			    tl_hash_size, mod_hash_null_keydtor,
1039 			    mod_hash_null_valdtor,
1040 			    tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1041 		}
1042 
1043 		/* Create serializer for connectionless transports. */
1044 		if (i & TL_TICLTS)
1045 			t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1046 	}
1047 
1048 	tl_dip = devi;
1049 
1050 	return (DDI_SUCCESS);
1051 }
1052 
1053 static int
1054 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1055 {
1056 	int i;
1057 
1058 	if (cmd == DDI_SUSPEND)
1059 		return (DDI_SUCCESS);
1060 
1061 	if (cmd != DDI_DETACH)
1062 		return (DDI_FAILURE);
1063 
1064 	/*
1065 	 * Destroy arenas and hash tables.
1066 	 */
1067 	for (i = 0; i < TL_MAXTRANSPORT; i++) {
1068 		tl_transport_state_t *t = &tl_transports[i];
1069 
1070 		if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1071 			continue;
1072 
1073 		EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1074 		if (t->tr_serializer != NULL) {
1075 			tl_serializer_refrele(t->tr_serializer);
1076 			t->tr_serializer = NULL;
1077 		}
1078 
1079 #ifdef _ILP32
1080 		if (i & TL_SOCKET)
1081 			mod_hash_destroy_idhash(t->tr_ai_hash);
1082 		else
1083 			mod_hash_destroy_ptrhash(t->tr_ai_hash);
1084 #else
1085 		mod_hash_destroy_idhash(t->tr_ai_hash);
1086 #endif /* _ILP32 */
1087 		t->tr_ai_hash = NULL;
1088 		if (i & TL_SOCKET)
1089 			mod_hash_destroy_ptrhash(t->tr_addr_hash);
1090 		else
1091 			mod_hash_destroy_hash(t->tr_addr_hash);
1092 		t->tr_addr_hash = NULL;
1093 	}
1094 
1095 	kmem_cache_destroy(tl_cache);
1096 	tl_cache = NULL;
1097 	id_space_destroy(tl_minors);
1098 	tl_minors = NULL;
1099 	ddi_remove_minor_node(devi, NULL);
1100 	return (DDI_SUCCESS);
1101 }
1102 
1103 /* ARGSUSED */
1104 static int
1105 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1106 {
1107 
1108 	int retcode = DDI_FAILURE;
1109 
1110 	switch (infocmd) {
1111 
1112 	case DDI_INFO_DEVT2DEVINFO:
1113 		if (tl_dip != NULL) {
1114 			*result = (void *)tl_dip;
1115 			retcode = DDI_SUCCESS;
1116 		}
1117 		break;
1118 
1119 	case DDI_INFO_DEVT2INSTANCE:
1120 		*result = (void *)0;
1121 		retcode = DDI_SUCCESS;
1122 		break;
1123 
1124 	default:
1125 		break;
1126 	}
1127 	return (retcode);
1128 }
1129 
1130 /*
1131  * Endpoint reference management.
1132  */
1133 static void
1134 tl_refhold(tl_endpt_t *tep)
1135 {
1136 	atomic_add_32(&tep->te_refcnt, 1);
1137 }
1138 
1139 static void
1140 tl_refrele(tl_endpt_t *tep)
1141 {
1142 	ASSERT(tep->te_refcnt != 0);
1143 
1144 	if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0)
1145 		tl_free(tep);
1146 }
1147 
1148 /*ARGSUSED*/
1149 static int
1150 tl_constructor(void *buf, void *cdrarg, int kmflags)
1151 {
1152 	tl_endpt_t *tep = buf;
1153 
1154 	bzero(tep, sizeof (tl_endpt_t));
1155 	mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1156 	cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1157 	mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1158 	cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1159 	mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1160 
1161 	return (0);
1162 }
1163 
1164 /*ARGSUSED*/
1165 static void
1166 tl_destructor(void *buf, void *cdrarg)
1167 {
1168 	tl_endpt_t *tep = buf;
1169 
1170 	mutex_destroy(&tep->te_closelock);
1171 	cv_destroy(&tep->te_closecv);
1172 	mutex_destroy(&tep->te_srv_lock);
1173 	cv_destroy(&tep->te_srv_cv);
1174 	mutex_destroy(&tep->te_ser_lock);
1175 }
1176 
1177 static void
1178 tl_free(tl_endpt_t *tep)
1179 {
1180 	ASSERT(tep->te_refcnt == 0);
1181 	ASSERT(tep->te_transport != NULL);
1182 	ASSERT(tep->te_rq == NULL);
1183 	ASSERT(tep->te_wq == NULL);
1184 	ASSERT(tep->te_ser != NULL);
1185 	ASSERT(tep->te_ser_count == 0);
1186 	ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1187 
1188 	if (IS_SOCKET(tep)) {
1189 		ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1190 		ASSERT(tep->te_abuf == &tep->te_uxaddr);
1191 		ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1192 		ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1193 	} else if (tep->te_abuf != NULL) {
1194 		kmem_free(tep->te_abuf, tep->te_alen);
1195 		tep->te_alen = -1; /* uninitialized */
1196 		tep->te_abuf = NULL;
1197 	} else {
1198 		ASSERT(tep->te_alen == -1);
1199 	}
1200 
1201 	id_free(tl_minors, tep->te_minor);
1202 	ASSERT(tep->te_credp == NULL);
1203 
1204 	if (tep->te_hash_hndl != NULL)
1205 		mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1206 
1207 	if (IS_COTS(tep)) {
1208 		TL_REMOVE_PEER(tep->te_conp);
1209 		TL_REMOVE_PEER(tep->te_oconp);
1210 		tl_serializer_refrele(tep->te_ser);
1211 		tep->te_ser = NULL;
1212 		ASSERT(tep->te_nicon == 0);
1213 		ASSERT(list_head(&tep->te_iconp) == NULL);
1214 	} else {
1215 		ASSERT(tep->te_lastep == NULL);
1216 		ASSERT(list_head(&tep->te_flowlist) == NULL);
1217 		ASSERT(tep->te_flowq == NULL);
1218 	}
1219 
1220 	ASSERT(tep->te_bufcid == 0);
1221 	ASSERT(tep->te_timoutid == 0);
1222 	bzero(&tep->te_ap, sizeof (tep->te_ap));
1223 	tep->te_acceptor_id = 0;
1224 
1225 	ASSERT(tep->te_closewait == 0);
1226 	ASSERT(!tep->te_rsrv_active);
1227 	ASSERT(!tep->te_wsrv_active);
1228 	tep->te_closing = 0;
1229 	tep->te_nowsrv = B_FALSE;
1230 	tep->te_flag = 0;
1231 
1232 	kmem_cache_free(tl_cache, tep);
1233 }
1234 
1235 /*
1236  * Allocate/free reference-counted wrappers for serializers.
1237  */
1238 static tl_serializer_t *
1239 tl_serializer_alloc(int flags)
1240 {
1241 	tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1242 	serializer_t *ser;
1243 
1244 	if (s == NULL)
1245 		return (NULL);
1246 
1247 	ser = serializer_create(flags);
1248 
1249 	if (ser == NULL) {
1250 		kmem_free(s, sizeof (tl_serializer_t));
1251 		return (NULL);
1252 	}
1253 
1254 	s->ts_refcnt = 1;
1255 	s->ts_serializer = ser;
1256 	return (s);
1257 }
1258 
1259 static void
1260 tl_serializer_refhold(tl_serializer_t *s)
1261 {
1262 	atomic_add_32(&s->ts_refcnt, 1);
1263 }
1264 
1265 static void
1266 tl_serializer_refrele(tl_serializer_t *s)
1267 {
1268 	if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) {
1269 		serializer_destroy(s->ts_serializer);
1270 		kmem_free(s, sizeof (tl_serializer_t));
1271 	}
1272 }
1273 
1274 /*
1275  * Post a request on the endpoint serializer. For COTS transports keep track of
1276  * the number of pending requests.
1277  */
1278 static void
1279 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1280 {
1281 	if (IS_COTS(tep)) {
1282 		mutex_enter(&tep->te_ser_lock);
1283 		tep->te_ser_count++;
1284 		mutex_exit(&tep->te_ser_lock);
1285 	}
1286 	serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1287 }
1288 
1289 /*
1290  * Complete processing the request on the serializer. Decrement the counter for
1291  * pending requests for COTS transports.
1292  */
1293 static void
1294 tl_serializer_exit(tl_endpt_t *tep)
1295 {
1296 	if (IS_COTS(tep)) {
1297 		mutex_enter(&tep->te_ser_lock);
1298 		ASSERT(tep->te_ser_count != 0);
1299 		tep->te_ser_count--;
1300 		mutex_exit(&tep->te_ser_lock);
1301 	}
1302 }
1303 
1304 /*
1305  * Hash management functions.
1306  */
1307 
1308 /*
1309  * Return TRUE if two addresses are equal, false otherwise.
1310  */
1311 static boolean_t
1312 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1313 {
1314 	return ((ap1->ta_alen > 0) &&
1315 	    (ap1->ta_alen == ap2->ta_alen) &&
1316 	    (ap1->ta_zoneid == ap2->ta_zoneid) &&
1317 	    (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1318 }
1319 
1320 /*
1321  * This function is called whenever an endpoint is found in the hash table.
1322  */
1323 /* ARGSUSED0 */
1324 static void
1325 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1326 {
1327 	tl_refhold((tl_endpt_t *)val);
1328 }
1329 
1330 /*
1331  * Address hash function.
1332  */
1333 /* ARGSUSED */
1334 static uint_t
1335 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1336 {
1337 	tl_addr_t *ap = (tl_addr_t *)key;
1338 	size_t	len = ap->ta_alen;
1339 	uchar_t *p = ap->ta_abuf;
1340 	uint_t i, g;
1341 
1342 	ASSERT((len > 0) && (p != NULL));
1343 
1344 	for (i = ap->ta_zoneid; len -- != 0; p++) {
1345 		i = (i << 4) + (*p);
1346 		if ((g = (i & 0xf0000000U)) != 0) {
1347 			i ^= (g >> 24);
1348 			i ^= g;
1349 		}
1350 	}
1351 	return (i);
1352 }
1353 
1354 /*
1355  * This function is used by hash lookups. It compares two generic addresses.
1356  */
1357 static int
1358 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1359 {
1360 #ifdef 	DEBUG
1361 	tl_addr_t *ap1 = (tl_addr_t *)key1;
1362 	tl_addr_t *ap2 = (tl_addr_t *)key2;
1363 
1364 	ASSERT(key1 != NULL);
1365 	ASSERT(key2 != NULL);
1366 
1367 	ASSERT(ap1->ta_abuf != NULL);
1368 	ASSERT(ap2->ta_abuf != NULL);
1369 	ASSERT(ap1->ta_alen > 0);
1370 	ASSERT(ap2->ta_alen > 0);
1371 #endif
1372 
1373 	return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1374 }
1375 
1376 /*
1377  * Prevent endpoint from closing if possible.
1378  * Return B_TRUE on success, B_FALSE on failure.
1379  */
1380 static boolean_t
1381 tl_noclose(tl_endpt_t *tep)
1382 {
1383 	boolean_t rc = B_FALSE;
1384 
1385 	mutex_enter(&tep->te_closelock);
1386 	if (! tep->te_closing) {
1387 		ASSERT(tep->te_closewait == 0);
1388 		tep->te_closewait++;
1389 		rc = B_TRUE;
1390 	}
1391 	mutex_exit(&tep->te_closelock);
1392 	return (rc);
1393 }
1394 
1395 /*
1396  * Allow endpoint to close if needed.
1397  */
1398 static void
1399 tl_closeok(tl_endpt_t *tep)
1400 {
1401 	ASSERT(tep->te_closewait > 0);
1402 	mutex_enter(&tep->te_closelock);
1403 	ASSERT(tep->te_closewait == 1);
1404 	tep->te_closewait--;
1405 	cv_signal(&tep->te_closecv);
1406 	mutex_exit(&tep->te_closelock);
1407 }
1408 
1409 /*
1410  * STREAMS open entry point.
1411  */
1412 /* ARGSUSED */
1413 static int
1414 tl_open(queue_t	*rq, dev_t *devp, int oflag, int sflag,	cred_t	*credp)
1415 {
1416 	tl_endpt_t *tep;
1417 	minor_t	    minor = getminor(*devp);
1418 
1419 	/*
1420 	 * Driver is called directly. Both CLONEOPEN and MODOPEN
1421 	 * are illegal
1422 	 */
1423 	if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1424 		return (ENXIO);
1425 
1426 	if (rq->q_ptr != NULL)
1427 		return (0);
1428 
1429 	/* Minor number should specify the mode used for the driver. */
1430 	if ((minor >= TL_UNUSED))
1431 		return (ENXIO);
1432 
1433 	if (oflag & SO_SOCKSTR) {
1434 		minor |= TL_SOCKET;
1435 	}
1436 
1437 	tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1438 	tep->te_refcnt = 1;
1439 	tep->te_cpid = curproc->p_pid;
1440 	rq->q_ptr = WR(rq)->q_ptr = tep;
1441 	tep->te_state = TS_UNBND;
1442 	tep->te_credp = credp;
1443 	crhold(credp);
1444 	tep->te_zoneid = getzoneid();
1445 
1446 	tep->te_flag = minor & TL_MINOR_MASK;
1447 	tep->te_transport = &tl_transports[minor];
1448 
1449 	/* Allocate a unique minor number for this instance. */
1450 	tep->te_minor = (minor_t)id_alloc(tl_minors);
1451 
1452 	/* Reserve hash handle for bind(). */
1453 	(void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1454 
1455 	/* Transport-specific initialization */
1456 	if (IS_COTS(tep)) {
1457 		/* Use private serializer */
1458 		tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1459 
1460 		/* Create list for pending connections */
1461 		list_create(&tep->te_iconp, sizeof (tl_icon_t),
1462 		    offsetof(tl_icon_t, ti_node));
1463 		tep->te_qlen = 0;
1464 		tep->te_nicon = 0;
1465 		tep->te_oconp = NULL;
1466 		tep->te_conp = NULL;
1467 	} else {
1468 		/* Use shared serializer */
1469 		tep->te_ser = tep->te_transport->tr_serializer;
1470 		bzero(&tep->te_flows, sizeof (list_node_t));
1471 		/* Create list for flow control */
1472 		list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1473 		    offsetof(tl_endpt_t, te_flows));
1474 		tep->te_flowq = NULL;
1475 		tep->te_lastep = NULL;
1476 
1477 	}
1478 
1479 	/* Initialize endpoint address */
1480 	if (IS_SOCKET(tep)) {
1481 		/* Socket-specific address handling. */
1482 		tep->te_alen = TL_SOUX_ADDRLEN;
1483 		tep->te_abuf = &tep->te_uxaddr;
1484 		tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1485 		tep->te_magic = SOU_MAGIC_IMPLICIT;
1486 	} else {
1487 		tep->te_alen = -1;
1488 		tep->te_abuf = NULL;
1489 	}
1490 
1491 	/* clone the driver */
1492 	*devp = makedevice(getmajor(*devp), tep->te_minor);
1493 
1494 	tep->te_rq = rq;
1495 	tep->te_wq = WR(rq);
1496 
1497 #ifdef	_ILP32
1498 	if (IS_SOCKET(tep))
1499 		tep->te_acceptor_id = tep->te_minor;
1500 	else
1501 		tep->te_acceptor_id = (t_uscalar_t)rq;
1502 #else
1503 	tep->te_acceptor_id = tep->te_minor;
1504 #endif	/* _ILP32 */
1505 
1506 
1507 	qprocson(rq);
1508 
1509 	/*
1510 	 * Insert acceptor ID in the hash. The AI hash always sleeps on
1511 	 * insertion so insertion can't fail.
1512 	 */
1513 	(void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1514 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1515 	    (mod_hash_val_t)tep);
1516 
1517 	return (0);
1518 }
1519 
1520 /* ARGSUSED1 */
1521 static int
1522 tl_close(queue_t *rq, int flag,	cred_t *credp)
1523 {
1524 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1525 	tl_endpt_t *elp = NULL;
1526 	queue_t *wq = tep->te_wq;
1527 	int rc;
1528 
1529 	ASSERT(wq == WR(rq));
1530 
1531 	/*
1532 	 * Remove the endpoint from acceptor hash.
1533 	 */
1534 	rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1535 	    (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1536 	    (mod_hash_val_t *)&elp);
1537 	ASSERT(rc == 0 && tep == elp);
1538 	if ((rc != 0) || (tep != elp)) {
1539 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
1540 		    SL_TRACE|SL_ERROR,
1541 		    "tl_close:inconsistency in AI hash"));
1542 	}
1543 
1544 	/*
1545 	 * Wait till close is safe, then mark endpoint as closing.
1546 	 */
1547 	mutex_enter(&tep->te_closelock);
1548 	while (tep->te_closewait)
1549 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1550 	tep->te_closing = B_TRUE;
1551 	/*
1552 	 * Will wait for the serializer part of the close to finish, so set
1553 	 * te_closewait now.
1554 	 */
1555 	tep->te_closewait = 1;
1556 	tep->te_nowsrv = B_FALSE;
1557 	mutex_exit(&tep->te_closelock);
1558 
1559 	/*
1560 	 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1561 	 * It is safe because close will wait for tl_close_ser to finish.
1562 	 */
1563 	tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1564 
1565 	/*
1566 	 * Wait for the first phase of close to complete before qprocsoff().
1567 	 */
1568 	mutex_enter(&tep->te_closelock);
1569 	while (tep->te_closewait)
1570 		cv_wait(&tep->te_closecv, &tep->te_closelock);
1571 	mutex_exit(&tep->te_closelock);
1572 
1573 	qprocsoff(rq);
1574 
1575 	if (tep->te_bufcid) {
1576 		qunbufcall(rq, tep->te_bufcid);
1577 		tep->te_bufcid = 0;
1578 	}
1579 	if (tep->te_timoutid) {
1580 		(void) quntimeout(rq, tep->te_timoutid);
1581 		tep->te_timoutid = 0;
1582 	}
1583 
1584 	/*
1585 	 * Finish close behind serializer.
1586 	 *
1587 	 * For a CLTS endpoint increase a refcount and continue close processing
1588 	 * with serializer protection. This processing may happen asynchronously
1589 	 * with the completion of tl_close().
1590 	 *
1591 	 * Fot a COTS endpoint wait before destroying tep since the serializer
1592 	 * may go away together with tep and we need to destroy serializer
1593 	 * outside of serializer context.
1594 	 */
1595 	ASSERT(tep->te_closewait == 0);
1596 	if (IS_COTS(tep))
1597 		tep->te_closewait = 1;
1598 	else
1599 		tl_refhold(tep);
1600 
1601 	tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1602 
1603 	/*
1604 	 * For connection-oriented transports wait for all serializer activity
1605 	 * to settle down.
1606 	 */
1607 	if (IS_COTS(tep)) {
1608 		mutex_enter(&tep->te_closelock);
1609 		while (tep->te_closewait)
1610 			cv_wait(&tep->te_closecv, &tep->te_closelock);
1611 		mutex_exit(&tep->te_closelock);
1612 	}
1613 
1614 	crfree(tep->te_credp);
1615 	tep->te_credp = NULL;
1616 	tep->te_wq = NULL;
1617 	tl_refrele(tep);
1618 	/*
1619 	 * tep is likely to be destroyed now, so can't reference it any more.
1620 	 */
1621 
1622 	rq->q_ptr = wq->q_ptr = NULL;
1623 	return (0);
1624 }
1625 
1626 /*
1627  * First phase of close processing done behind the serializer.
1628  *
1629  * Do not drop the reference in the end - tl_close() wants this reference to
1630  * stay.
1631  */
1632 /* ARGSUSED0 */
1633 static void
1634 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1635 {
1636 	ASSERT(tep->te_closing);
1637 	ASSERT(tep->te_closewait == 1);
1638 	ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1639 
1640 	tep->te_flag |= TL_CLOSE_SER;
1641 
1642 	/*
1643 	 * Drain out all messages on queue except for TL_TICOTS where the
1644 	 * abortive release semantics permit discarding of data on close
1645 	 */
1646 	if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1647 		tl_wsrv_ser(NULL, tep);
1648 	}
1649 
1650 	/* Remove address from hash table. */
1651 	tl_addr_unbind(tep);
1652 	/*
1653 	 * qprocsoff() gets confused when q->q_next is not NULL on the write
1654 	 * queue of the driver, so clear these before qprocsoff() is called.
1655 	 * Also clear q_next for the peer since this queue is going away.
1656 	 */
1657 	if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1658 		tl_endpt_t *peer_tep = tep->te_conp;
1659 
1660 		tep->te_wq->q_next = NULL;
1661 		if ((peer_tep != NULL) && !peer_tep->te_closing)
1662 			peer_tep->te_wq->q_next = NULL;
1663 	}
1664 
1665 	tep->te_rq = NULL;
1666 
1667 	/* wake up tl_close() */
1668 	tl_closeok(tep);
1669 	tl_serializer_exit(tep);
1670 }
1671 
1672 /*
1673  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1674  * the reference for CLTS.
1675  *
1676  * Called from serializer. Should drop reference count for CLTS only.
1677  */
1678 /* ARGSUSED0 */
1679 static void
1680 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1681 {
1682 	ASSERT(tep->te_closing);
1683 	IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1684 	IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1685 
1686 	tep->te_state = -1;	/* Uninitialized */
1687 	if (IS_COTS(tep)) {
1688 		tl_co_unconnect(tep);
1689 	} else {
1690 		/* Connectionless specific cleanup */
1691 		TL_REMOVE_PEER(tep->te_lastep);
1692 		/*
1693 		 * Backenable anybody that is flow controlled waiting for
1694 		 * this endpoint.
1695 		 */
1696 		tl_cl_backenable(tep);
1697 		if (tep->te_flowq != NULL) {
1698 			list_remove(&(tep->te_flowq->te_flowlist), tep);
1699 			tep->te_flowq = NULL;
1700 		}
1701 	}
1702 
1703 	tl_serializer_exit(tep);
1704 	if (IS_COTS(tep))
1705 		tl_closeok(tep);
1706 	else
1707 		tl_refrele(tep);
1708 }
1709 
1710 /*
1711  * STREAMS write-side put procedure.
1712  * Enter serializer for most of the processing.
1713  *
1714  * The T_CONN_REQ is processed outside of serializer.
1715  */
1716 static void
1717 tl_wput(queue_t *wq, mblk_t *mp)
1718 {
1719 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
1720 	ssize_t			msz = MBLKL(mp);
1721 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
1722 	tlproc_t		*tl_proc = NULL;
1723 
1724 	switch (DB_TYPE(mp)) {
1725 	case M_DATA:
1726 		/* Only valid for connection-oriented transports */
1727 		if (IS_CLTS(tep)) {
1728 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1729 			    SL_TRACE|SL_ERROR,
1730 			    "tl_wput:M_DATA invalid for ticlts driver"));
1731 			tl_merror(wq, mp, EPROTO);
1732 			return;
1733 		}
1734 		tl_proc = tl_wput_data_ser;
1735 		break;
1736 
1737 	case M_IOCTL:
1738 		switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1739 		case TL_IOC_CREDOPT:
1740 			/* FALLTHROUGH */
1741 		case TL_IOC_UCREDOPT:
1742 			/*
1743 			 * Serialize endpoint state change.
1744 			 */
1745 			tl_proc = tl_do_ioctl_ser;
1746 			break;
1747 
1748 		default:
1749 			miocnak(wq, mp, 0, EINVAL);
1750 			return;
1751 		}
1752 		break;
1753 
1754 	case M_FLUSH:
1755 		/*
1756 		 * do canonical M_FLUSH processing
1757 		 */
1758 		if (*mp->b_rptr & FLUSHW) {
1759 			flushq(wq, FLUSHALL);
1760 			*mp->b_rptr &= ~FLUSHW;
1761 		}
1762 		if (*mp->b_rptr & FLUSHR) {
1763 			flushq(RD(wq), FLUSHALL);
1764 			qreply(wq, mp);
1765 		} else {
1766 			freemsg(mp);
1767 		}
1768 		return;
1769 
1770 	case M_PROTO:
1771 		if (msz < sizeof (prim->type)) {
1772 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1773 			    SL_TRACE|SL_ERROR,
1774 			    "tl_wput:M_PROTO data too short"));
1775 			tl_merror(wq, mp, EPROTO);
1776 			return;
1777 		}
1778 		switch (prim->type) {
1779 		case T_OPTMGMT_REQ:
1780 		case T_SVR4_OPTMGMT_REQ:
1781 			/*
1782 			 * Process TPI option management requests immediately
1783 			 * in put procedure regardless of in-order processing
1784 			 * of already queued messages.
1785 			 * (Note: This driver supports AF_UNIX socket
1786 			 * implementation.  Unless we implement this processing,
1787 			 * setsockopt() on socket endpoint will block on flow
1788 			 * controlled endpoints which it should not. That is
1789 			 * required for successful execution of VSU socket tests
1790 			 * and is consistent with BSD socket behavior).
1791 			 */
1792 			tl_optmgmt(wq, mp);
1793 			return;
1794 		case O_T_BIND_REQ:
1795 		case T_BIND_REQ:
1796 			tl_proc = tl_bind_ser;
1797 			break;
1798 		case T_CONN_REQ:
1799 			if (IS_CLTS(tep)) {
1800 				tl_merror(wq, mp, EPROTO);
1801 				return;
1802 			}
1803 			tl_conn_req(wq, mp);
1804 			return;
1805 		case T_DATA_REQ:
1806 		case T_OPTDATA_REQ:
1807 		case T_EXDATA_REQ:
1808 		case T_ORDREL_REQ:
1809 			tl_proc = tl_putq_ser;
1810 			break;
1811 		case T_UNITDATA_REQ:
1812 			if (IS_COTS(tep) ||
1813 			    (msz < sizeof (struct T_unitdata_req))) {
1814 				tl_merror(wq, mp, EPROTO);
1815 				return;
1816 			}
1817 			if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1818 				tl_proc = tl_unitdata_ser;
1819 			} else {
1820 				tl_proc = tl_putq_ser;
1821 			}
1822 			break;
1823 		default:
1824 			/*
1825 			 * process in service procedure if message already
1826 			 * queued (maintain in-order processing)
1827 			 */
1828 			if (wq->q_first != NULL) {
1829 				tl_proc = tl_putq_ser;
1830 			} else {
1831 				tl_proc = tl_wput_ser;
1832 			}
1833 			break;
1834 		}
1835 		break;
1836 
1837 	case M_PCPROTO:
1838 		/*
1839 		 * Check that the message has enough data to figure out TPI
1840 		 * primitive.
1841 		 */
1842 		if (msz < sizeof (prim->type)) {
1843 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1844 			    SL_TRACE|SL_ERROR,
1845 			    "tl_wput:M_PCROTO data too short"));
1846 			tl_merror(wq, mp, EPROTO);
1847 			return;
1848 		}
1849 		switch (prim->type) {
1850 		case T_CAPABILITY_REQ:
1851 			tl_capability_req(mp, tep);
1852 			return;
1853 		case T_INFO_REQ:
1854 			tl_proc = tl_info_req_ser;
1855 			break;
1856 		default:
1857 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
1858 			    SL_TRACE|SL_ERROR,
1859 			    "tl_wput:unknown TPI msg primitive"));
1860 			tl_merror(wq, mp, EPROTO);
1861 			return;
1862 		}
1863 		break;
1864 	default:
1865 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1866 		    "tl_wput:default:unexpected Streams message"));
1867 		freemsg(mp);
1868 		return;
1869 	}
1870 
1871 	/*
1872 	 * Continue processing via serializer.
1873 	 */
1874 	ASSERT(tl_proc != NULL);
1875 	tl_refhold(tep);
1876 	tl_serializer_enter(tep, tl_proc, mp);
1877 }
1878 
1879 /*
1880  * Place message on the queue while preserving order.
1881  */
1882 static void
1883 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1884 {
1885 	if (tep->te_closing) {
1886 		tl_wput_ser(mp, tep);
1887 	} else {
1888 		TL_PUTQ(tep, mp);
1889 		tl_serializer_exit(tep);
1890 		tl_refrele(tep);
1891 	}
1892 
1893 }
1894 
1895 static void
1896 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1897 {
1898 	ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1899 
1900 	switch (DB_TYPE(mp)) {
1901 	case M_DATA:
1902 		tl_data(mp, tep);
1903 		break;
1904 	case M_PROTO:
1905 		tl_do_proto(mp, tep);
1906 		break;
1907 	default:
1908 		freemsg(mp);
1909 		break;
1910 	}
1911 }
1912 
1913 /*
1914  * Write side put procedure called from serializer.
1915  */
1916 static void
1917 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1918 {
1919 	tl_wput_common_ser(mp, tep);
1920 	tl_serializer_exit(tep);
1921 	tl_refrele(tep);
1922 }
1923 
1924 /*
1925  * M_DATA processing. Called from serializer.
1926  */
1927 static void
1928 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1929 {
1930 	tl_endpt_t	*peer_tep = tep->te_conp;
1931 	queue_t		*peer_rq;
1932 
1933 	ASSERT(DB_TYPE(mp) == M_DATA);
1934 	ASSERT(IS_COTS(tep));
1935 
1936 	IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1937 
1938 	/*
1939 	 * fastpath for data. Ignore flow control if tep is closing.
1940 	 */
1941 	if ((peer_tep != NULL) &&
1942 	    !peer_tep->te_closing &&
1943 	    ((tep->te_state == TS_DATA_XFER) ||
1944 	    (tep->te_state == TS_WREQ_ORDREL)) &&
1945 	    (tep->te_wq != NULL) &&
1946 	    (tep->te_wq->q_first == NULL) &&
1947 	    ((peer_tep->te_state == TS_DATA_XFER) ||
1948 	    (peer_tep->te_state == TS_WREQ_ORDREL))	&&
1949 	    ((peer_rq = peer_tep->te_rq) != NULL) &&
1950 	    (canputnext(peer_rq) || tep->te_closing)) {
1951 		putnext(peer_rq, mp);
1952 	} else if (tep->te_closing) {
1953 		/*
1954 		 * It is possible that by the time we got here tep started to
1955 		 * close. If the write queue is not empty, and the state is
1956 		 * TS_DATA_XFER the data should be delivered in order, so we
1957 		 * call putq() instead of freeing the data.
1958 		 */
1959 		if ((tep->te_wq != NULL) &&
1960 		    ((tep->te_state == TS_DATA_XFER) ||
1961 		    (tep->te_state == TS_WREQ_ORDREL))) {
1962 			TL_PUTQ(tep, mp);
1963 		} else {
1964 			freemsg(mp);
1965 		}
1966 	} else {
1967 		TL_PUTQ(tep, mp);
1968 	}
1969 
1970 	tl_serializer_exit(tep);
1971 	tl_refrele(tep);
1972 }
1973 
1974 /*
1975  * Write side service routine.
1976  *
1977  * All actual processing happens within serializer which is entered
1978  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1979  * messages that need processing may have arrived, so tl_wsrv repeats until
1980  * queue is empty or te_nowsrv is set.
1981  */
1982 static void
1983 tl_wsrv(queue_t *wq)
1984 {
1985 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1986 
1987 	while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1988 		mutex_enter(&tep->te_srv_lock);
1989 		ASSERT(tep->te_wsrv_active == B_FALSE);
1990 		tep->te_wsrv_active = B_TRUE;
1991 		mutex_exit(&tep->te_srv_lock);
1992 
1993 		tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
1994 
1995 		/*
1996 		 * Wait for serializer job to complete.
1997 		 */
1998 		mutex_enter(&tep->te_srv_lock);
1999 		while (tep->te_wsrv_active) {
2000 			cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2001 		}
2002 		cv_signal(&tep->te_srv_cv);
2003 		mutex_exit(&tep->te_srv_lock);
2004 	}
2005 }
2006 
2007 /*
2008  * Serialized write side processing of the STREAMS queue.
2009  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2010  * is NULL.
2011  */
2012 static void
2013 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2014 {
2015 	mblk_t *mp;
2016 	queue_t *wq = tep->te_wq;
2017 
2018 	ASSERT(wq != NULL);
2019 	while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2020 		tl_wput_common_ser(mp, tep);
2021 	}
2022 
2023 	/*
2024 	 * Wakeup service routine unless called from close.
2025 	 * If ser_mp is specified, the caller is tl_wsrv().
2026 	 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2027 	 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2028 	 * be no matching tl_serializer_exit() in this case.
2029 	 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2030 	 * waiting on te_srv_cv.
2031 	 */
2032 	if (ser_mp != NULL) {
2033 		/*
2034 		 * We are called from tl_wsrv.
2035 		 */
2036 		mutex_enter(&tep->te_srv_lock);
2037 		ASSERT(tep->te_wsrv_active);
2038 		tep->te_wsrv_active = B_FALSE;
2039 		cv_signal(&tep->te_srv_cv);
2040 		mutex_exit(&tep->te_srv_lock);
2041 		tl_serializer_exit(tep);
2042 	}
2043 }
2044 
2045 /*
2046  * Called when the stream is backenabled. Enter serializer and qenable everyone
2047  * flow controlled by tep.
2048  *
2049  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2050  * is possible that two instances of tl_rsrv will be running reusing the same
2051  * rsrv mblk.
2052  */
2053 static void
2054 tl_rsrv(queue_t *rq)
2055 {
2056 	tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2057 
2058 	ASSERT(rq->q_first == NULL);
2059 	ASSERT(tep->te_rsrv_active == 0);
2060 
2061 	tep->te_rsrv_active = B_TRUE;
2062 	tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2063 	/*
2064 	 * Wait for serializer job to complete.
2065 	 */
2066 	mutex_enter(&tep->te_srv_lock);
2067 	while (tep->te_rsrv_active) {
2068 		cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2069 	}
2070 	cv_signal(&tep->te_srv_cv);
2071 	mutex_exit(&tep->te_srv_lock);
2072 }
2073 
2074 /* ARGSUSED */
2075 static void
2076 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2077 {
2078 	tl_endpt_t *peer_tep;
2079 
2080 	if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2081 		tl_cl_backenable(tep);
2082 	} else if (
2083 	    IS_COTS(tep) &&
2084 	    ((peer_tep = tep->te_conp) != NULL) &&
2085 	    !peer_tep->te_closing &&
2086 	    ((tep->te_state == TS_DATA_XFER) ||
2087 	    (tep->te_state == TS_WIND_ORDREL)||
2088 	    (tep->te_state == TS_WREQ_ORDREL))) {
2089 		TL_QENABLE(peer_tep);
2090 	}
2091 
2092 	/*
2093 	 * Wakeup read side service routine.
2094 	 */
2095 	mutex_enter(&tep->te_srv_lock);
2096 	ASSERT(tep->te_rsrv_active);
2097 	tep->te_rsrv_active = B_FALSE;
2098 	cv_signal(&tep->te_srv_cv);
2099 	mutex_exit(&tep->te_srv_lock);
2100 	tl_serializer_exit(tep);
2101 }
2102 
2103 /*
2104  * process M_PROTO messages. Always called from serializer.
2105  */
2106 static void
2107 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2108 {
2109 	ssize_t			msz = MBLKL(mp);
2110 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
2111 
2112 	/* Message size was validated by tl_wput(). */
2113 	ASSERT(msz >= sizeof (prim->type));
2114 
2115 	switch (prim->type) {
2116 	case T_UNBIND_REQ:
2117 		tl_unbind(mp, tep);
2118 		break;
2119 
2120 	case T_ADDR_REQ:
2121 		tl_addr_req(mp, tep);
2122 		break;
2123 
2124 	case O_T_CONN_RES:
2125 	case T_CONN_RES:
2126 		if (IS_CLTS(tep)) {
2127 			tl_merror(tep->te_wq, mp, EPROTO);
2128 			break;
2129 		}
2130 		tl_conn_res(mp, tep);
2131 		break;
2132 
2133 	case T_DISCON_REQ:
2134 		if (IS_CLTS(tep)) {
2135 			tl_merror(tep->te_wq, mp, EPROTO);
2136 			break;
2137 		}
2138 		tl_discon_req(mp, tep);
2139 		break;
2140 
2141 	case T_DATA_REQ:
2142 		if (IS_CLTS(tep)) {
2143 			tl_merror(tep->te_wq, mp, EPROTO);
2144 			break;
2145 		}
2146 		tl_data(mp, tep);
2147 		break;
2148 
2149 	case T_OPTDATA_REQ:
2150 		if (IS_CLTS(tep)) {
2151 			tl_merror(tep->te_wq, mp, EPROTO);
2152 			break;
2153 		}
2154 		tl_data(mp, tep);
2155 		break;
2156 
2157 	case T_EXDATA_REQ:
2158 		if (IS_CLTS(tep)) {
2159 			tl_merror(tep->te_wq, mp, EPROTO);
2160 			break;
2161 		}
2162 		tl_exdata(mp, tep);
2163 		break;
2164 
2165 	case T_ORDREL_REQ:
2166 		if (! IS_COTSORD(tep)) {
2167 			tl_merror(tep->te_wq, mp, EPROTO);
2168 			break;
2169 		}
2170 		tl_ordrel(mp, tep);
2171 		break;
2172 
2173 	case T_UNITDATA_REQ:
2174 		if (IS_COTS(tep)) {
2175 			tl_merror(tep->te_wq, mp, EPROTO);
2176 			break;
2177 		}
2178 		tl_unitdata(mp, tep);
2179 		break;
2180 
2181 	default:
2182 		tl_merror(tep->te_wq, mp, EPROTO);
2183 		break;
2184 	}
2185 }
2186 
2187 /*
2188  * Process ioctl from serializer.
2189  * This is a wrapper around tl_do_ioctl().
2190  */
2191 static void
2192 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2193 {
2194 	if (! tep->te_closing)
2195 		tl_do_ioctl(mp, tep);
2196 	else
2197 		freemsg(mp);
2198 
2199 	tl_serializer_exit(tep);
2200 	tl_refrele(tep);
2201 }
2202 
2203 static void
2204 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2205 {
2206 	struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2207 	int cmd = iocbp->ioc_cmd;
2208 	queue_t *wq = tep->te_wq;
2209 	int error;
2210 	int thisopt, otheropt;
2211 
2212 	ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2213 
2214 	switch (cmd) {
2215 	case TL_IOC_CREDOPT:
2216 		if (cmd == TL_IOC_CREDOPT) {
2217 			thisopt = TL_SETCRED;
2218 			otheropt = TL_SETUCRED;
2219 		} else {
2220 			/* FALLTHROUGH */
2221 	case TL_IOC_UCREDOPT:
2222 			thisopt = TL_SETUCRED;
2223 			otheropt = TL_SETCRED;
2224 		}
2225 		/*
2226 		 * The credentials passing does not apply to sockets.
2227 		 * Only one of the cred options can be set at a given time.
2228 		 */
2229 		if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2230 			miocnak(wq, mp, 0, EINVAL);
2231 			return;
2232 		}
2233 
2234 		/*
2235 		 * Turn on generation of credential options for
2236 		 * T_conn_req, T_conn_con, T_unidata_ind.
2237 		 */
2238 		error = miocpullup(mp, sizeof (uint32_t));
2239 		if (error != 0) {
2240 			miocnak(wq, mp, 0, error);
2241 			return;
2242 		}
2243 		if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2244 			miocnak(wq, mp, 0, EINVAL);
2245 			return;
2246 		}
2247 
2248 		if (*(uint32_t *)mp->b_cont->b_rptr)
2249 			tep->te_flag |= thisopt;
2250 		else
2251 			tep->te_flag &= ~thisopt;
2252 
2253 		miocack(wq, mp, 0, 0);
2254 		break;
2255 
2256 	default:
2257 		/* Should not be here */
2258 		miocnak(wq, mp, 0, EINVAL);
2259 		break;
2260 	}
2261 }
2262 
2263 
2264 /*
2265  * send T_ERROR_ACK
2266  * Note: assumes enough memory or caller passed big enough mp
2267  *	- no recovery from allocb failures
2268  */
2269 
2270 static void
2271 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2272     t_scalar_t unix_err, t_scalar_t type)
2273 {
2274 	struct T_error_ack *err_ack;
2275 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2276 	    M_PCPROTO, T_ERROR_ACK);
2277 
2278 	if (ackmp == NULL) {
2279 		(void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2280 		    "tl_error_ack:out of mblk memory"));
2281 		tl_merror(wq, NULL, ENOSR);
2282 		return;
2283 	}
2284 	err_ack = (struct T_error_ack *)ackmp->b_rptr;
2285 	err_ack->ERROR_prim = type;
2286 	err_ack->TLI_error = tli_err;
2287 	err_ack->UNIX_error = unix_err;
2288 
2289 	/*
2290 	 * send error ack message
2291 	 */
2292 	qreply(wq, ackmp);
2293 }
2294 
2295 
2296 
2297 /*
2298  * send T_OK_ACK
2299  * Note: assumes enough memory or caller passed big enough mp
2300  *	- no recovery from allocb failures
2301  */
2302 static void
2303 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2304 {
2305 	struct T_ok_ack *ok_ack;
2306 	mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2307 	    M_PCPROTO, T_OK_ACK);
2308 
2309 	if (ackmp == NULL) {
2310 		tl_merror(wq, NULL, ENOMEM);
2311 		return;
2312 	}
2313 
2314 	ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2315 	ok_ack->CORRECT_prim = type;
2316 
2317 	(void) qreply(wq, ackmp);
2318 }
2319 
2320 /*
2321  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2322  * This is a wrapper around tl_bind().
2323  */
2324 static void
2325 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2326 {
2327 	if (! tep->te_closing)
2328 		tl_bind(mp, tep);
2329 	else
2330 		freemsg(mp);
2331 
2332 	tl_serializer_exit(tep);
2333 	tl_refrele(tep);
2334 }
2335 
2336 /*
2337  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2338  * Assumes that the endpoint is in the unbound.
2339  */
2340 static void
2341 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2342 {
2343 	queue_t			*wq = tep->te_wq;
2344 	struct T_bind_ack	*b_ack;
2345 	struct T_bind_req	*bind = (struct T_bind_req *)mp->b_rptr;
2346 	mblk_t			*ackmp, *bamp;
2347 	soux_addr_t		ux_addr;
2348 	t_uscalar_t		qlen = 0;
2349 	t_scalar_t		alen, aoff;
2350 	tl_addr_t		addr_req;
2351 	void			*addr_startp;
2352 	ssize_t			msz = MBLKL(mp), basize;
2353 	t_scalar_t		tli_err = 0, unix_err = 0;
2354 	t_scalar_t		save_prim_type = bind->PRIM_type;
2355 	t_scalar_t		save_state = tep->te_state;
2356 
2357 	if (tep->te_state != TS_UNBND) {
2358 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2359 		    SL_TRACE|SL_ERROR,
2360 		    "tl_wput:bind_request:out of state, state=%d",
2361 		    tep->te_state));
2362 		tli_err = TOUTSTATE;
2363 		goto error;
2364 	}
2365 
2366 	if (msz < sizeof (struct T_bind_req)) {
2367 		tli_err = TSYSERR; unix_err = EINVAL;
2368 		goto error;
2369 	}
2370 
2371 	tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2372 
2373 	ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2374 	    (bind->PRIM_type == T_BIND_REQ));
2375 
2376 	alen = bind->ADDR_length;
2377 	aoff = bind->ADDR_offset;
2378 
2379 	/* negotiate max conn req pending */
2380 	if (IS_COTS(tep)) {
2381 		qlen = bind->CONIND_number;
2382 		if (qlen > tl_maxqlen)
2383 			qlen = tl_maxqlen;
2384 	}
2385 
2386 	/*
2387 	 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2388 	 * and bound again.
2389 	 */
2390 	if ((tep->te_hash_hndl == NULL) &&
2391 	    ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2392 	    mod_hash_reserve_nosleep(tep->te_addrhash,
2393 	    &tep->te_hash_hndl) != 0) {
2394 		tli_err = TSYSERR; unix_err = ENOSR;
2395 		goto error;
2396 	}
2397 
2398 	/*
2399 	 * Verify address correctness.
2400 	 */
2401 	if (IS_SOCKET(tep)) {
2402 		ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2403 
2404 		if ((alen != TL_SOUX_ADDRLEN) ||
2405 		    (aoff < 0) ||
2406 		    (aoff + alen > msz)) {
2407 			(void) (STRLOG(TL_ID, tep->te_minor,
2408 			    1, SL_TRACE|SL_ERROR,
2409 			    "tl_bind: invalid socket addr"));
2410 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2411 			tli_err = TSYSERR; unix_err = EINVAL;
2412 			goto error;
2413 		}
2414 		/* Copy address from message to local buffer. */
2415 		bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2416 		/*
2417 		 * Check that we got correct address from sockets
2418 		 */
2419 		if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2420 		    (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2421 			(void) (STRLOG(TL_ID, tep->te_minor,
2422 			    1, SL_TRACE|SL_ERROR,
2423 			    "tl_bind: invalid socket magic"));
2424 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2425 			tli_err = TSYSERR; unix_err = EINVAL;
2426 			goto error;
2427 		}
2428 		if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2429 		    (ux_addr.soua_vp != NULL)) {
2430 			(void) (STRLOG(TL_ID, tep->te_minor,
2431 			    1, SL_TRACE|SL_ERROR,
2432 			    "tl_bind: implicit addr non-empty"));
2433 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2434 			tli_err = TSYSERR; unix_err = EINVAL;
2435 			goto error;
2436 		}
2437 		if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2438 		    (ux_addr.soua_vp == NULL)) {
2439 			(void) (STRLOG(TL_ID, tep->te_minor,
2440 			    1, SL_TRACE|SL_ERROR,
2441 			    "tl_bind: explicit addr empty"));
2442 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2443 			tli_err = TSYSERR; unix_err = EINVAL;
2444 			goto error;
2445 		}
2446 	} else {
2447 		if ((alen > 0) && ((aoff < 0) ||
2448 		    ((ssize_t)(aoff + alen) > msz) ||
2449 		    ((aoff + alen) < 0))) {
2450 			(void) (STRLOG(TL_ID, tep->te_minor,
2451 			    1, SL_TRACE|SL_ERROR,
2452 			    "tl_bind: invalid message"));
2453 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2454 			tli_err = TSYSERR; unix_err = EINVAL;
2455 			goto error;
2456 		}
2457 		if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2458 			(void) (STRLOG(TL_ID, tep->te_minor,
2459 			    1, SL_TRACE|SL_ERROR,
2460 			    "tl_bind: bad addr in  message"));
2461 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2462 			tli_err = TBADADDR;
2463 			goto error;
2464 		}
2465 #ifdef DEBUG
2466 		/*
2467 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2468 		 * if (! assertion)
2469 		 *	log warning;
2470 		 */
2471 		if (! ((alen == 0 && aoff == 0) ||
2472 			(aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2473 			(void) (STRLOG(TL_ID, tep->te_minor,
2474 				    3, SL_TRACE|SL_ERROR,
2475 				    "tl_bind: addr overlaps TPI message"));
2476 		}
2477 #endif
2478 	}
2479 
2480 	/*
2481 	 * Bind the address provided or allocate one if requested.
2482 	 * Allow rebinds with a new qlen value.
2483 	 */
2484 	if (IS_SOCKET(tep)) {
2485 		/*
2486 		 * For anonymous requests the te_ap is already set up properly
2487 		 * so use minor number as an address.
2488 		 * For explicit requests need to check whether the address is
2489 		 * already in use.
2490 		 */
2491 		if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2492 			int rc;
2493 
2494 			if (tep->te_flag & TL_ADDRHASHED) {
2495 				ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2496 				if (tep->te_vp == ux_addr.soua_vp)
2497 					goto skip_addr_bind;
2498 				else /* Rebind to a new address. */
2499 					tl_addr_unbind(tep);
2500 			}
2501 			/*
2502 			 * Insert address in the hash if it is not already
2503 			 * there.  Since we use preallocated handle, the insert
2504 			 * can fail only if the key is already present.
2505 			 */
2506 			rc = mod_hash_insert_reserve(tep->te_addrhash,
2507 			    (mod_hash_key_t)ux_addr.soua_vp,
2508 			    (mod_hash_val_t)tep, tep->te_hash_hndl);
2509 
2510 			if (rc != 0) {
2511 				ASSERT(rc == MH_ERR_DUPLICATE);
2512 				/*
2513 				 * Violate O_T_BIND_REQ semantics and fail with
2514 				 * TADDRBUSY - sockets will not use any address
2515 				 * other than supplied one for explicit binds.
2516 				 */
2517 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2518 				    SL_TRACE|SL_ERROR,
2519 				    "tl_bind:requested addr %p is busy",
2520 				    ux_addr.soua_vp));
2521 				tli_err = TADDRBUSY; unix_err = 0;
2522 				goto error;
2523 			}
2524 			tep->te_uxaddr = ux_addr;
2525 			tep->te_flag |= TL_ADDRHASHED;
2526 			tep->te_hash_hndl = NULL;
2527 		}
2528 	} else if (alen == 0) {
2529 		/*
2530 		 * assign any free address
2531 		 */
2532 		if (! tl_get_any_addr(tep, NULL)) {
2533 			(void) (STRLOG(TL_ID, tep->te_minor,
2534 			    1, SL_TRACE|SL_ERROR,
2535 			    "tl_bind:failed to get buffer for any "
2536 			    "address"));
2537 			tli_err = TSYSERR; unix_err = ENOSR;
2538 			goto error;
2539 		}
2540 	} else {
2541 		addr_req.ta_alen = alen;
2542 		addr_req.ta_abuf = (mp->b_rptr + aoff);
2543 		addr_req.ta_zoneid = tep->te_zoneid;
2544 
2545 		tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2546 		if (tep->te_abuf == NULL) {
2547 			tli_err = TSYSERR; unix_err = ENOSR;
2548 			goto error;
2549 		}
2550 		bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2551 		tep->te_alen = alen;
2552 
2553 		if (mod_hash_insert_reserve(tep->te_addrhash,
2554 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2555 		    tep->te_hash_hndl) != 0) {
2556 			if (save_prim_type == T_BIND_REQ) {
2557 				/*
2558 				 * The bind semantics for this primitive
2559 				 * require a failure if the exact address
2560 				 * requested is busy
2561 				 */
2562 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2563 				    SL_TRACE|SL_ERROR,
2564 				    "tl_bind:requested addr is busy"));
2565 				tli_err = TADDRBUSY; unix_err = 0;
2566 				goto error;
2567 			}
2568 
2569 			/*
2570 			 * O_T_BIND_REQ semantics say if address if requested
2571 			 * address is busy, bind to any available free address
2572 			 */
2573 			if (! tl_get_any_addr(tep, &addr_req)) {
2574 				(void) (STRLOG(TL_ID, tep->te_minor, 1,
2575 				    SL_TRACE|SL_ERROR,
2576 				    "tl_bind:unable to get any addr buf"));
2577 				tli_err = TSYSERR; unix_err = ENOMEM;
2578 				goto error;
2579 			}
2580 		} else {
2581 			tep->te_flag |= TL_ADDRHASHED;
2582 			tep->te_hash_hndl = NULL;
2583 		}
2584 	}
2585 
2586 	ASSERT(tep->te_alen >= 0);
2587 
2588 skip_addr_bind:
2589 	/*
2590 	 * prepare T_BIND_ACK TPI message
2591 	 */
2592 	basize = sizeof (struct T_bind_ack) + tep->te_alen;
2593 	bamp = reallocb(mp, basize, 0);
2594 	if (bamp == NULL) {
2595 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2596 		    "tl_wput:tl_bind: allocb failed"));
2597 		/*
2598 		 * roll back state changes
2599 		 */
2600 		tl_addr_unbind(tep);
2601 		tep->te_state = TS_UNBND;
2602 		tl_memrecover(wq, mp, basize);
2603 		return;
2604 	}
2605 
2606 	DB_TYPE(bamp) = M_PCPROTO;
2607 	bamp->b_wptr = bamp->b_rptr + basize;
2608 	b_ack = (struct T_bind_ack *)bamp->b_rptr;
2609 	b_ack->PRIM_type = T_BIND_ACK;
2610 	b_ack->CONIND_number = qlen;
2611 	b_ack->ADDR_length = tep->te_alen;
2612 	b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2613 	addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2614 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2615 
2616 	if (IS_COTS(tep)) {
2617 		tep->te_qlen = qlen;
2618 		if (qlen > 0)
2619 			tep->te_flag |= TL_LISTENER;
2620 	}
2621 
2622 	tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2623 	/*
2624 	 * send T_BIND_ACK message
2625 	 */
2626 	(void) qreply(wq, bamp);
2627 	return;
2628 
2629 error:
2630 	ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2631 	if (ackmp == NULL) {
2632 		/*
2633 		 * roll back state changes
2634 		 */
2635 		tep->te_state = save_state;
2636 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2637 		return;
2638 	}
2639 	tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2640 	tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2641 }
2642 
2643 /*
2644  * Process T_UNBIND_REQ.
2645  * Called from serializer.
2646  */
2647 static void
2648 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2649 {
2650 	queue_t *wq;
2651 	mblk_t *ackmp;
2652 
2653 	if (tep->te_closing) {
2654 		freemsg(mp);
2655 		return;
2656 	}
2657 
2658 	wq = tep->te_wq;
2659 
2660 	/*
2661 	 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2662 	 * ==> allocate for T_ERROR_ACK (known max)
2663 	 */
2664 	if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2665 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2666 		return;
2667 	}
2668 	/*
2669 	 * memory resources committed
2670 	 * Note: no message validation. T_UNBIND_REQ message is
2671 	 * same size as PRIM_type field so already verified earlier.
2672 	 */
2673 
2674 	/*
2675 	 * validate state
2676 	 */
2677 	if (tep->te_state != TS_IDLE) {
2678 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2679 		    SL_TRACE|SL_ERROR,
2680 		    "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2681 		    tep->te_state));
2682 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2683 		return;
2684 	}
2685 	tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2686 
2687 	/*
2688 	 * TPI says on T_UNBIND_REQ:
2689 	 *    send up a M_FLUSH to flush both
2690 	 *    read and write queues
2691 	 */
2692 	(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2693 
2694 	if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2695 	    tep->te_magic != SOU_MAGIC_EXPLICIT) {
2696 
2697 		/*
2698 		 * Sockets use bind with qlen==0 followed by bind() to
2699 		 * the same address with qlen > 0 for listeners.
2700 		 * We allow rebind with a new qlen value.
2701 		 */
2702 		tl_addr_unbind(tep);
2703 	}
2704 
2705 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2706 	/*
2707 	 * send  T_OK_ACK
2708 	 */
2709 	tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2710 }
2711 
2712 
2713 /*
2714  * Option management code from drv/ip is used here
2715  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2716  *	database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2717  *	However, that is what we want as that option is 'unorthodox'
2718  *	and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2719  *	and not in T_SVR4_OPTMGMT_REQ/ACK
2720  * Note2: use of optcom_req means this routine is an exception to
2721  *	 recovery from allocb() failures.
2722  */
2723 
2724 static void
2725 tl_optmgmt(queue_t *wq, mblk_t *mp)
2726 {
2727 	tl_endpt_t *tep;
2728 	mblk_t *ackmp;
2729 	union T_primitives *prim;
2730 	cred_t *cr;
2731 
2732 	tep = (tl_endpt_t *)wq->q_ptr;
2733 	prim = (union T_primitives *)mp->b_rptr;
2734 
2735 	/*
2736 	 * All Solaris components should pass a db_credp
2737 	 * for this TPI message, hence we ASSERT.
2738 	 * But in case there is some other M_PROTO that looks
2739 	 * like a TPI message sent by some other kernel
2740 	 * component, we check and return an error.
2741 	 */
2742 	cr = msg_getcred(mp, NULL);
2743 	ASSERT(cr != NULL);
2744 	if (cr == NULL) {
2745 		tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2746 		return;
2747 	}
2748 
2749 	/*  all states OK for AF_UNIX options ? */
2750 	if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2751 	    prim->type == T_SVR4_OPTMGMT_REQ) {
2752 		/*
2753 		 * Broken TLI semantics that options can only be managed
2754 		 * in TS_IDLE state. Needed for Sparc ABI test suite that
2755 		 * tests this TLI (mis)feature using this device driver.
2756 		 */
2757 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2758 		    SL_TRACE|SL_ERROR,
2759 		    "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2760 		    tep->te_state));
2761 		/*
2762 		 * preallocate memory for T_ERROR_ACK
2763 		 */
2764 		ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2765 		if (! ackmp) {
2766 			tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2767 			return;
2768 		}
2769 
2770 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2771 		freemsg(mp);
2772 		return;
2773 	}
2774 
2775 	/*
2776 	 * call common option management routine from drv/ip
2777 	 */
2778 	if (prim->type == T_SVR4_OPTMGMT_REQ) {
2779 		svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2780 	} else {
2781 		ASSERT(prim->type == T_OPTMGMT_REQ);
2782 		tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2783 	}
2784 }
2785 
2786 /*
2787  * Handle T_conn_req - the driver part of accept().
2788  * If TL_SET[U]CRED generate the credentials options.
2789  * If this is a socket pass through options unmodified.
2790  * For sockets generate the T_CONN_CON here instead of
2791  * waiting for the T_CONN_RES.
2792  */
2793 static void
2794 tl_conn_req(queue_t *wq, mblk_t *mp)
2795 {
2796 	tl_endpt_t		*tep = (tl_endpt_t *)wq->q_ptr;
2797 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_rptr;
2798 	ssize_t			msz = MBLKL(mp);
2799 	t_scalar_t		alen, aoff, olen, ooff,	err = 0;
2800 	tl_endpt_t		*peer_tep = NULL;
2801 	mblk_t			*ackmp;
2802 	mblk_t			*dimp;
2803 	struct T_discon_ind	*di;
2804 	soux_addr_t		ux_addr;
2805 	tl_addr_t		dst;
2806 
2807 	ASSERT(IS_COTS(tep));
2808 
2809 	if (tep->te_closing) {
2810 		freemsg(mp);
2811 		return;
2812 	}
2813 
2814 	/*
2815 	 * preallocate memory for:
2816 	 * 1. max of T_ERROR_ACK and T_OK_ACK
2817 	 *	==> known max T_ERROR_ACK
2818 	 * 2. max of T_DISCON_IND and T_CONN_IND
2819 	 */
2820 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2821 	if (! ackmp) {
2822 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2823 		return;
2824 	}
2825 	/*
2826 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
2827 	 * will be committed for T_DISCON_IND/T_CONN_IND later
2828 	 */
2829 
2830 	if (tep->te_state != TS_IDLE) {
2831 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
2832 		    SL_TRACE|SL_ERROR,
2833 		    "tl_wput:T_CONN_REQ:out of state, state=%d",
2834 		    tep->te_state));
2835 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2836 		freemsg(mp);
2837 		return;
2838 	}
2839 
2840 	/*
2841 	 * validate the message
2842 	 * Note: dereference fields in struct inside message only
2843 	 * after validating the message length.
2844 	 */
2845 	if (msz < sizeof (struct T_conn_req)) {
2846 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2847 		    "tl_conn_req:invalid message length"));
2848 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2849 		freemsg(mp);
2850 		return;
2851 	}
2852 	alen = creq->DEST_length;
2853 	aoff = creq->DEST_offset;
2854 	olen = creq->OPT_length;
2855 	ooff = creq->OPT_offset;
2856 	if (olen == 0)
2857 		ooff = 0;
2858 
2859 	if (IS_SOCKET(tep)) {
2860 		if ((alen != TL_SOUX_ADDRLEN) ||
2861 		    (aoff < 0) ||
2862 		    (aoff + alen > msz) ||
2863 		    (alen > msz - sizeof (struct T_conn_req))) {
2864 			(void) (STRLOG(TL_ID, tep->te_minor,
2865 				    1, SL_TRACE|SL_ERROR,
2866 				    "tl_conn_req: invalid socket addr"));
2867 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2868 			freemsg(mp);
2869 			return;
2870 		}
2871 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2872 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2873 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2874 			(void) (STRLOG(TL_ID, tep->te_minor,
2875 			    1, SL_TRACE|SL_ERROR,
2876 			    "tl_conn_req: invalid socket magic"));
2877 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2878 			freemsg(mp);
2879 			return;
2880 		}
2881 	} else {
2882 		if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2883 		    (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2884 		    ooff + olen < 0)) ||
2885 		    olen < 0 || ooff < 0) {
2886 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2887 			    SL_TRACE|SL_ERROR,
2888 			    "tl_conn_req:invalid message"));
2889 			tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2890 			freemsg(mp);
2891 			return;
2892 		}
2893 
2894 		if (alen <= 0 || aoff < 0 ||
2895 		    (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2896 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2897 				    SL_TRACE|SL_ERROR,
2898 				    "tl_conn_req:bad addr in message, "
2899 				    "alen=%d, msz=%ld",
2900 				    alen, msz));
2901 			tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2902 			freemsg(mp);
2903 			return;
2904 		}
2905 #ifdef DEBUG
2906 		/*
2907 		 * Mild form of ASSERT()ion to detect broken TPI apps.
2908 		 * if (! assertion)
2909 		 *	log warning;
2910 		 */
2911 		if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2912 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
2913 			    SL_TRACE|SL_ERROR,
2914 			    "tl_conn_req: addr overlaps TPI message"));
2915 		}
2916 #endif
2917 		if (olen) {
2918 			/*
2919 			 * no opts in connect req
2920 			 * supported in this provider except for sockets.
2921 			 */
2922 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
2923 			    SL_TRACE|SL_ERROR,
2924 			    "tl_conn_req:options not supported "
2925 			    "in message"));
2926 			tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2927 			freemsg(mp);
2928 			return;
2929 		}
2930 	}
2931 
2932 	/*
2933 	 * Prevent tep from closing on us.
2934 	 */
2935 	if (! tl_noclose(tep)) {
2936 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2937 		    "tl_conn_req:endpoint is closing"));
2938 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2939 		freemsg(mp);
2940 		return;
2941 	}
2942 
2943 	tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2944 	/*
2945 	 * get endpoint to connect to
2946 	 * check that peer with DEST addr is bound to addr
2947 	 * and has CONIND_number > 0
2948 	 */
2949 	dst.ta_alen = alen;
2950 	dst.ta_abuf = mp->b_rptr + aoff;
2951 	dst.ta_zoneid = tep->te_zoneid;
2952 
2953 	/*
2954 	 * Verify if remote addr is in use
2955 	 */
2956 	peer_tep = (IS_SOCKET(tep) ?
2957 	    tl_sock_find_peer(tep, &ux_addr) :
2958 	    tl_find_peer(tep, &dst));
2959 
2960 	if (peer_tep == NULL) {
2961 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2962 		    "tl_conn_req:no one at connect address"));
2963 		err = ECONNREFUSED;
2964 	} else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2965 		/*
2966 		 * validate that number of incoming connection is
2967 		 * not to capacity on destination endpoint
2968 		 */
2969 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2970 		    "tl_conn_req: qlen overflow connection refused"));
2971 			err = ECONNREFUSED;
2972 	}
2973 
2974 	/*
2975 	 * Send T_DISCON_IND in case of error
2976 	 */
2977 	if (err != 0) {
2978 		if (peer_tep != NULL)
2979 			tl_refrele(peer_tep);
2980 		/* We are still expected to send T_OK_ACK */
2981 		tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2982 		tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2983 		tl_closeok(tep);
2984 		dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2985 		    M_PROTO, T_DISCON_IND);
2986 		if (dimp == NULL) {
2987 			tl_merror(wq, NULL, ENOSR);
2988 			return;
2989 		}
2990 		di = (struct T_discon_ind *)dimp->b_rptr;
2991 		di->DISCON_reason = err;
2992 		di->SEQ_number = BADSEQNUM;
2993 
2994 		tep->te_state = TS_IDLE;
2995 		/*
2996 		 * send T_DISCON_IND message
2997 		 */
2998 		putnext(tep->te_rq, dimp);
2999 		return;
3000 	}
3001 
3002 	ASSERT(IS_COTS(peer_tep));
3003 
3004 	/*
3005 	 * Found the listener. At this point processing will continue on
3006 	 * listener serializer. Close of the endpoint should be blocked while we
3007 	 * switch serializers.
3008 	 */
3009 	tl_serializer_refhold(peer_tep->te_ser);
3010 	tl_serializer_refrele(tep->te_ser);
3011 	tep->te_ser = peer_tep->te_ser;
3012 	ASSERT(tep->te_oconp == NULL);
3013 	tep->te_oconp = peer_tep;
3014 
3015 	/*
3016 	 * It is safe to close now. Close may continue on listener serializer.
3017 	 */
3018 	tl_closeok(tep);
3019 
3020 	/*
3021 	 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3022 	 * data, so we link mp to ackmp.
3023 	 */
3024 	ackmp->b_cont = mp;
3025 	mp = ackmp;
3026 
3027 	tl_refhold(tep);
3028 	tl_serializer_enter(tep, tl_conn_req_ser, mp);
3029 }
3030 
3031 /*
3032  * Finish T_CONN_REQ processing on listener serializer.
3033  */
3034 static void
3035 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3036 {
3037 	queue_t		*wq;
3038 	tl_endpt_t	*peer_tep = tep->te_oconp;
3039 	mblk_t		*confmp, *cimp, *indmp;
3040 	void		*opts = NULL;
3041 	mblk_t		*ackmp = mp;
3042 	struct T_conn_req	*creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3043 	struct T_conn_ind	*ci;
3044 	tl_icon_t	*tip;
3045 	void		*addr_startp;
3046 	t_scalar_t	olen = creq->OPT_length;
3047 	t_scalar_t	ooff = creq->OPT_offset;
3048 	size_t 		ci_msz;
3049 	size_t		size;
3050 	cred_t		*cr = NULL;
3051 	pid_t		cpid;
3052 
3053 	if (tep->te_closing) {
3054 		TL_UNCONNECT(tep->te_oconp);
3055 		tl_serializer_exit(tep);
3056 		tl_refrele(tep);
3057 		freemsg(mp);
3058 		return;
3059 	}
3060 
3061 	wq = tep->te_wq;
3062 	tep->te_flag |= TL_EAGER;
3063 
3064 	/*
3065 	 * Extract preallocated ackmp from mp.
3066 	 */
3067 	mp = mp->b_cont;
3068 	ackmp->b_cont = NULL;
3069 
3070 	if (olen == 0)
3071 		ooff = 0;
3072 
3073 	if (peer_tep->te_closing ||
3074 	    !((peer_tep->te_state == TS_IDLE) ||
3075 	    (peer_tep->te_state == TS_WRES_CIND))) {
3076 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3077 		    "tl_conn_req:peer in bad state (%d)",
3078 		    peer_tep->te_state));
3079 		TL_UNCONNECT(tep->te_oconp);
3080 		tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3081 		freemsg(ackmp);
3082 		tl_serializer_exit(tep);
3083 		tl_refrele(tep);
3084 		return;
3085 	}
3086 
3087 	/*
3088 	 * preallocate now for T_DISCON_IND or T_CONN_IND
3089 	 */
3090 	/*
3091 	 * calculate length of T_CONN_IND message
3092 	 */
3093 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3094 		cr = msg_getcred(mp, &cpid);
3095 		ASSERT(cr != NULL);
3096 		if (peer_tep->te_flag & TL_SETCRED) {
3097 			ooff = 0;
3098 			olen = (t_scalar_t) sizeof (struct opthdr) +
3099 			    OPTLEN(sizeof (tl_credopt_t));
3100 			/* 1 option only */
3101 		} else {
3102 			ooff = 0;
3103 			olen = (t_scalar_t)sizeof (struct opthdr) +
3104 			    OPTLEN(ucredminsize(cr));
3105 			/* 1 option only */
3106 		}
3107 	}
3108 	ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3109 	ci_msz = T_ALIGN(ci_msz) + olen;
3110 	size = max(ci_msz, sizeof (struct T_discon_ind));
3111 
3112 	/*
3113 	 * Save options from mp - we'll need them for T_CONN_IND.
3114 	 */
3115 	if (ooff != 0) {
3116 		opts = kmem_alloc(olen, KM_NOSLEEP);
3117 		if (opts == NULL) {
3118 			/*
3119 			 * roll back state changes
3120 			 */
3121 			tep->te_state = TS_IDLE;
3122 			tl_memrecover(wq, mp, size);
3123 			freemsg(ackmp);
3124 			TL_UNCONNECT(tep->te_oconp);
3125 			tl_serializer_exit(tep);
3126 			tl_refrele(tep);
3127 			return;
3128 		}
3129 		/* Copy options to a temp buffer */
3130 		bcopy(mp->b_rptr + ooff, opts, olen);
3131 	}
3132 
3133 	if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3134 		/*
3135 		 * Generate a T_CONN_CON that has the identical address
3136 		 * (and options) as the T_CONN_REQ.
3137 		 * NOTE: assumes that the T_conn_req and T_conn_con structures
3138 		 * are isomorphic.
3139 		 */
3140 		confmp = copyb(mp);
3141 		if (! confmp) {
3142 			/*
3143 			 * roll back state changes
3144 			 */
3145 			tep->te_state = TS_IDLE;
3146 			tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3147 			freemsg(ackmp);
3148 			if (opts != NULL)
3149 				kmem_free(opts, olen);
3150 			TL_UNCONNECT(tep->te_oconp);
3151 			tl_serializer_exit(tep);
3152 			tl_refrele(tep);
3153 			return;
3154 		}
3155 		((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3156 		    T_CONN_CON;
3157 	} else {
3158 		confmp = NULL;
3159 	}
3160 	if ((indmp = reallocb(mp, size, 0)) == NULL) {
3161 		/*
3162 		 * roll back state changes
3163 		 */
3164 		tep->te_state = TS_IDLE;
3165 		tl_memrecover(wq, mp, size);
3166 		freemsg(ackmp);
3167 		if (opts != NULL)
3168 			kmem_free(opts, olen);
3169 		freemsg(confmp);
3170 		TL_UNCONNECT(tep->te_oconp);
3171 		tl_serializer_exit(tep);
3172 		tl_refrele(tep);
3173 		return;
3174 	}
3175 
3176 	tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3177 	if (tip == NULL) {
3178 		/*
3179 		 * roll back state changes
3180 		 */
3181 		tep->te_state = TS_IDLE;
3182 		tl_memrecover(wq, indmp, sizeof (*tip));
3183 		freemsg(ackmp);
3184 		if (opts != NULL)
3185 			kmem_free(opts, olen);
3186 		freemsg(confmp);
3187 		TL_UNCONNECT(tep->te_oconp);
3188 		tl_serializer_exit(tep);
3189 		tl_refrele(tep);
3190 		return;
3191 	}
3192 	tip->ti_mp = NULL;
3193 
3194 	/*
3195 	 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3196 	 * and tl_icon_t cell.
3197 	 */
3198 
3199 	/*
3200 	 * ack validity of request and send the peer credential in the ACK.
3201 	 */
3202 	tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3203 
3204 	if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3205 	    confmp != NULL) {
3206 		mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3207 	}
3208 
3209 	tl_ok_ack(wq, ackmp, T_CONN_REQ);
3210 
3211 	/*
3212 	 * prepare message to send T_CONN_IND
3213 	 */
3214 	/*
3215 	 * allocate the message - original data blocks retained
3216 	 * in the returned mblk
3217 	 */
3218 	cimp = tl_resizemp(indmp, size);
3219 	if (! cimp) {
3220 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3221 		    "tl_conn_req:con_ind:allocb failure"));
3222 		tl_merror(wq, indmp, ENOMEM);
3223 		TL_UNCONNECT(tep->te_oconp);
3224 		tl_serializer_exit(tep);
3225 		tl_refrele(tep);
3226 		if (opts != NULL)
3227 			kmem_free(opts, olen);
3228 		freemsg(confmp);
3229 		ASSERT(tip->ti_mp == NULL);
3230 		kmem_free(tip, sizeof (*tip));
3231 		return;
3232 	}
3233 
3234 	DB_TYPE(cimp) = M_PROTO;
3235 	ci = (struct T_conn_ind *)cimp->b_rptr;
3236 	ci->PRIM_type  = T_CONN_IND;
3237 	ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3238 	ci->SRC_length = tep->te_alen;
3239 	ci->SEQ_number = tep->te_seqno;
3240 
3241 	addr_startp = cimp->b_rptr + ci->SRC_offset;
3242 	bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3243 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3244 
3245 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3246 		    ci->SRC_length);
3247 		ci->OPT_length = olen; /* because only 1 option */
3248 		tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3249 		    cr, cpid,
3250 		    peer_tep->te_flag, peer_tep->te_credp);
3251 	} else if (ooff != 0) {
3252 		/* Copy option from T_CONN_REQ */
3253 		ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3254 		    ci->SRC_length);
3255 		ci->OPT_length = olen;
3256 		ASSERT(opts != NULL);
3257 		bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3258 	} else {
3259 		ci->OPT_offset = 0;
3260 		ci->OPT_length = 0;
3261 	}
3262 	if (opts != NULL)
3263 		kmem_free(opts, olen);
3264 
3265 	/*
3266 	 * register connection request with server peer
3267 	 * append to list of incoming connections
3268 	 * increment references for both peer_tep and tep: peer_tep is placed on
3269 	 * te_oconp and tep is placed on listeners queue.
3270 	 */
3271 	tip->ti_tep = tep;
3272 	tip->ti_seqno = tep->te_seqno;
3273 	list_insert_tail(&peer_tep->te_iconp, tip);
3274 	peer_tep->te_nicon++;
3275 
3276 	peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3277 	/*
3278 	 * send the T_CONN_IND message
3279 	 */
3280 	putnext(peer_tep->te_rq, cimp);
3281 
3282 	/*
3283 	 * Send a T_CONN_CON message for sockets.
3284 	 * Disable the queues until we have reached the correct state!
3285 	 */
3286 	if (confmp != NULL) {
3287 		tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3288 		noenable(wq);
3289 		putnext(tep->te_rq, confmp);
3290 	}
3291 	/*
3292 	 * Now we need to increment tep reference because tep is referenced by
3293 	 * server list of pending connections. We also need to decrement
3294 	 * reference before exiting serializer. Two operations void each other
3295 	 * so we don't modify reference at all.
3296 	 */
3297 	ASSERT(tep->te_refcnt >= 2);
3298 	ASSERT(peer_tep->te_refcnt >= 2);
3299 	tl_serializer_exit(tep);
3300 }
3301 
3302 
3303 
3304 /*
3305  * Handle T_conn_res on listener stream. Called on listener serializer.
3306  * tl_conn_req has already generated the T_CONN_CON.
3307  * tl_conn_res is called on listener serializer.
3308  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3309  * Switch eager serializer to acceptor's.
3310  *
3311  * If TL_SET[U]CRED generate the credentials options.
3312  * For sockets tl_conn_req has already generated the T_CONN_CON.
3313  */
3314 static void
3315 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3316 {
3317 	queue_t			*wq;
3318 	struct T_conn_res	*cres = (struct T_conn_res *)mp->b_rptr;
3319 	ssize_t			msz = MBLKL(mp);
3320 	t_scalar_t		olen, ooff, err = 0;
3321 	t_scalar_t		prim = cres->PRIM_type;
3322 	uchar_t			*addr_startp;
3323 	tl_endpt_t 		*acc_ep = NULL, *cl_ep = NULL;
3324 	tl_icon_t		*tip;
3325 	size_t			size;
3326 	mblk_t			*ackmp, *respmp;
3327 	mblk_t			*dimp, *ccmp = NULL;
3328 	struct T_discon_ind	*di;
3329 	struct T_conn_con	*cc;
3330 	boolean_t		client_noclose_set = B_FALSE;
3331 	boolean_t		switch_client_serializer = B_TRUE;
3332 
3333 	ASSERT(IS_COTS(tep));
3334 
3335 	if (tep->te_closing) {
3336 		freemsg(mp);
3337 		return;
3338 	}
3339 
3340 	wq = tep->te_wq;
3341 
3342 	/*
3343 	 * preallocate memory for:
3344 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3345 	 *	==> known max T_ERROR_ACK
3346 	 * 2. max of T_DISCON_IND and T_CONN_CON
3347 	 */
3348 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3349 	if (! ackmp) {
3350 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3351 		return;
3352 	}
3353 	/*
3354 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3355 	 * will be committed for T_DISCON_IND/T_CONN_CON later
3356 	 */
3357 
3358 
3359 	ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3360 
3361 	/*
3362 	 * validate state
3363 	 */
3364 	if (tep->te_state != TS_WRES_CIND) {
3365 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3366 		    SL_TRACE|SL_ERROR,
3367 		    "tl_wput:T_CONN_RES:out of state, state=%d",
3368 		    tep->te_state));
3369 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3370 		freemsg(mp);
3371 		return;
3372 	}
3373 
3374 	/*
3375 	 * validate the message
3376 	 * Note: dereference fields in struct inside message only
3377 	 * after validating the message length.
3378 	 */
3379 	if (msz < sizeof (struct T_conn_res)) {
3380 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3381 		    "tl_conn_res:invalid message length"));
3382 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3383 		freemsg(mp);
3384 		return;
3385 	}
3386 	olen = cres->OPT_length;
3387 	ooff = cres->OPT_offset;
3388 	if (((olen > 0) && ((ooff + olen) > msz))) {
3389 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3390 		    "tl_conn_res:invalid message"));
3391 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3392 		freemsg(mp);
3393 		return;
3394 	}
3395 	if (olen) {
3396 		/*
3397 		 * no opts in connect res
3398 		 * supported in this provider
3399 		 */
3400 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3401 		    "tl_conn_res:options not supported in message"));
3402 		tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3403 		freemsg(mp);
3404 		return;
3405 	}
3406 
3407 	tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3408 	ASSERT(tep->te_state == TS_WACK_CRES);
3409 
3410 	if (cres->SEQ_number < TL_MINOR_START &&
3411 	    cres->SEQ_number >= BADSEQNUM) {
3412 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3413 		    "tl_conn_res:remote endpoint sequence number bad"));
3414 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3415 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3416 		freemsg(mp);
3417 		return;
3418 	}
3419 
3420 	/*
3421 	 * find accepting endpoint. Will have extra reference if found.
3422 	 */
3423 	if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3424 	    (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3425 	    (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3426 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3427 		    "tl_conn_res:bad accepting endpoint"));
3428 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3429 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3430 		freemsg(mp);
3431 		return;
3432 	}
3433 
3434 	/*
3435 	 * Prevent acceptor from closing.
3436 	 */
3437 	if (! tl_noclose(acc_ep)) {
3438 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3439 		    "tl_conn_res:bad accepting endpoint"));
3440 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3441 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3442 		tl_refrele(acc_ep);
3443 		freemsg(mp);
3444 		return;
3445 	}
3446 
3447 	acc_ep->te_flag |= TL_ACCEPTOR;
3448 
3449 	/*
3450 	 * validate that accepting endpoint, if different from listening
3451 	 * has address bound => state is TS_IDLE
3452 	 * TROUBLE in XPG4 !!?
3453 	 */
3454 	if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3455 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3456 		    "tl_conn_res:accepting endpoint has no address bound,"
3457 		    "state=%d", acc_ep->te_state));
3458 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3459 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3460 		freemsg(mp);
3461 		tl_closeok(acc_ep);
3462 		tl_refrele(acc_ep);
3463 		return;
3464 	}
3465 
3466 	/*
3467 	 * validate if accepting endpt same as listening, then
3468 	 * no other incoming connection should be on the queue
3469 	 */
3470 
3471 	if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3472 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3473 		    "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3474 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3475 		tl_error_ack(wq, ackmp, TBADF, 0, prim);
3476 		freemsg(mp);
3477 		tl_closeok(acc_ep);
3478 		tl_refrele(acc_ep);
3479 		return;
3480 	}
3481 
3482 	/*
3483 	 * Mark for deletion, the entry corresponding to client
3484 	 * on list of pending connections made by the listener
3485 	 *  search list to see if client is one of the
3486 	 * recorded as a listener.
3487 	 */
3488 	tip = tl_icon_find(tep, cres->SEQ_number);
3489 	if (tip == NULL) {
3490 		(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3491 		    "tl_conn_res:no client in listener list"));
3492 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3493 		tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3494 		freemsg(mp);
3495 		tl_closeok(acc_ep);
3496 		tl_refrele(acc_ep);
3497 		return;
3498 	}
3499 
3500 	/*
3501 	 * If ti_tep is NULL the client has already closed. In this case
3502 	 * the code below will avoid any action on the client side
3503 	 * but complete the server and acceptor state transitions.
3504 	 */
3505 	ASSERT(tip->ti_tep == NULL ||
3506 	    tip->ti_tep->te_seqno == cres->SEQ_number);
3507 	cl_ep = tip->ti_tep;
3508 
3509 	/*
3510 	 * If the client is present it is switched from listener's to acceptor's
3511 	 * serializer. We should block client closes while serializers are
3512 	 * being switched.
3513 	 *
3514 	 * It is possible that the client is present but is currently being
3515 	 * closed. There are two possible cases:
3516 	 *
3517 	 * 1) The client has already entered tl_close_finish_ser() and sent
3518 	 *    T_ORDREL_IND. In this case we can just ignore the client (but we
3519 	 *    still need to send all messages from tip->ti_mp to the acceptor).
3520 	 *
3521 	 * 2) The client started the close but has not entered
3522 	 *    tl_close_finish_ser() yet. In this case, the client is already
3523 	 *    proceeding asynchronously on the listener's serializer, so we're
3524 	 *    forced to change the acceptor to use the listener's serializer to
3525 	 *    ensure that any operations on the acceptor are serialized with
3526 	 *    respect to the close that's in-progress.
3527 	 */
3528 	if (cl_ep != NULL) {
3529 		if (tl_noclose(cl_ep)) {
3530 			client_noclose_set = B_TRUE;
3531 		} else {
3532 			/*
3533 			 * Client is closing. If it it has sent the
3534 			 * T_ORDREL_IND, we can simply ignore it - otherwise,
3535 			 * we have to let let the client continue until it is
3536 			 * sent.
3537 			 *
3538 			 * If we do continue using the client, acceptor will
3539 			 * switch to client's serializer which is used by client
3540 			 * for its close.
3541 			 */
3542 			tl_client_closing_when_accepting++;
3543 			switch_client_serializer = B_FALSE;
3544 			if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3545 			    cl_ep->te_state == -1)
3546 				cl_ep = NULL;
3547 		}
3548 	}
3549 
3550 	if (cl_ep != NULL) {
3551 		/*
3552 		 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3553 		 * (latter for sockets only)
3554 		 */
3555 		if (cl_ep->te_state != TS_WCON_CREQ &&
3556 		    (cl_ep->te_state != TS_DATA_XFER &&
3557 		    IS_SOCKET(cl_ep))) {
3558 			err = ECONNREFUSED;
3559 			/*
3560 			 * T_DISCON_IND sent later after committing memory
3561 			 * and acking validity of request
3562 			 */
3563 			(void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3564 			    "tl_conn_res:peer in bad state"));
3565 		}
3566 
3567 		/*
3568 		 * preallocate now for T_DISCON_IND or T_CONN_CONN
3569 		 * ack validity of request (T_OK_ACK) after memory committed
3570 		 */
3571 
3572 		if (err)
3573 			size = sizeof (struct T_discon_ind);
3574 		else {
3575 			/*
3576 			 * calculate length of T_CONN_CON message
3577 			 */
3578 			olen = 0;
3579 			if (cl_ep->te_flag & TL_SETCRED) {
3580 				olen = (t_scalar_t)sizeof (struct opthdr) +
3581 				    OPTLEN(sizeof (tl_credopt_t));
3582 			} else if (cl_ep->te_flag & TL_SETUCRED) {
3583 				olen = (t_scalar_t)sizeof (struct opthdr) +
3584 				    OPTLEN(ucredminsize(acc_ep->te_credp));
3585 			}
3586 			size = T_ALIGN(sizeof (struct T_conn_con) +
3587 			    acc_ep->te_alen) + olen;
3588 		}
3589 		if ((respmp = reallocb(mp, size, 0)) == NULL) {
3590 			/*
3591 			 * roll back state changes
3592 			 */
3593 			tep->te_state = TS_WRES_CIND;
3594 			tl_memrecover(wq, mp, size);
3595 			freemsg(ackmp);
3596 			if (client_noclose_set)
3597 				tl_closeok(cl_ep);
3598 			tl_closeok(acc_ep);
3599 			tl_refrele(acc_ep);
3600 			return;
3601 		}
3602 		mp = NULL;
3603 	}
3604 
3605 	/*
3606 	 * Now ack validity of request
3607 	 */
3608 	if (tep->te_nicon == 1) {
3609 		if (tep == acc_ep)
3610 			tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3611 		else
3612 			tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3613 	} else
3614 		tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3615 
3616 	/*
3617 	 * send T_DISCON_IND now if client state validation failed earlier
3618 	 */
3619 	if (err) {
3620 		tl_ok_ack(wq, ackmp, prim);
3621 		/*
3622 		 * flush the queues - why always ?
3623 		 */
3624 		(void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3625 
3626 		dimp = tl_resizemp(respmp, size);
3627 		if (! dimp) {
3628 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3629 			    SL_TRACE|SL_ERROR,
3630 			    "tl_conn_res:con_ind:allocb failure"));
3631 			tl_merror(wq, respmp, ENOMEM);
3632 			tl_closeok(acc_ep);
3633 			if (client_noclose_set)
3634 				tl_closeok(cl_ep);
3635 			tl_refrele(acc_ep);
3636 			return;
3637 		}
3638 		if (dimp->b_cont) {
3639 			/* no user data in provider generated discon ind */
3640 			freemsg(dimp->b_cont);
3641 			dimp->b_cont = NULL;
3642 		}
3643 
3644 		DB_TYPE(dimp) = M_PROTO;
3645 		di = (struct T_discon_ind *)dimp->b_rptr;
3646 		di->PRIM_type  = T_DISCON_IND;
3647 		di->DISCON_reason = err;
3648 		di->SEQ_number = BADSEQNUM;
3649 
3650 		tep->te_state = TS_IDLE;
3651 		/*
3652 		 * send T_DISCON_IND message
3653 		 */
3654 		putnext(acc_ep->te_rq, dimp);
3655 		if (client_noclose_set)
3656 			tl_closeok(cl_ep);
3657 		tl_closeok(acc_ep);
3658 		tl_refrele(acc_ep);
3659 		return;
3660 	}
3661 
3662 	/*
3663 	 * now start connecting the accepting endpoint
3664 	 */
3665 	if (tep != acc_ep)
3666 		acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3667 
3668 	if (cl_ep == NULL) {
3669 		/*
3670 		 * The client has already closed. Send up any queued messages
3671 		 * and change the state accordingly.
3672 		 */
3673 		tl_ok_ack(wq, ackmp, prim);
3674 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3675 
3676 		/*
3677 		 * remove endpoint from incoming connection
3678 		 * delete client from list of incoming connections
3679 		 */
3680 		tl_freetip(tep, tip);
3681 		freemsg(mp);
3682 		tl_closeok(acc_ep);
3683 		tl_refrele(acc_ep);
3684 		return;
3685 	} else if (tip->ti_mp != NULL) {
3686 		/*
3687 		 * The client could have queued a T_DISCON_IND which needs
3688 		 * to be sent up.
3689 		 * Note that t_discon_req can not operate the same as
3690 		 * t_data_req since it is not possible for it to putbq
3691 		 * the message and return -1 due to the use of qwriter.
3692 		 */
3693 		tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3694 	}
3695 
3696 	/*
3697 	 * prepare connect confirm T_CONN_CON message
3698 	 */
3699 
3700 	/*
3701 	 * allocate the message - original data blocks
3702 	 * retained in the returned mblk
3703 	 */
3704 	if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3705 		ccmp = tl_resizemp(respmp, size);
3706 		if (ccmp == NULL) {
3707 			tl_ok_ack(wq, ackmp, prim);
3708 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
3709 			    SL_TRACE|SL_ERROR,
3710 			    "tl_conn_res:conn_con:allocb failure"));
3711 			tl_merror(wq, respmp, ENOMEM);
3712 			tl_closeok(acc_ep);
3713 			if (client_noclose_set)
3714 				tl_closeok(cl_ep);
3715 			tl_refrele(acc_ep);
3716 			return;
3717 		}
3718 
3719 		DB_TYPE(ccmp) = M_PROTO;
3720 		cc = (struct T_conn_con *)ccmp->b_rptr;
3721 		cc->PRIM_type  = T_CONN_CON;
3722 		cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3723 		cc->RES_length = acc_ep->te_alen;
3724 		addr_startp = ccmp->b_rptr + cc->RES_offset;
3725 		bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3726 		if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3727 			cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3728 			    cc->RES_length);
3729 			cc->OPT_length = olen;
3730 			tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3731 			    acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3732 			    cl_ep->te_credp);
3733 		} else {
3734 			cc->OPT_offset = 0;
3735 			cc->OPT_length = 0;
3736 		}
3737 		/*
3738 		 * Forward the credential in the packet so it can be picked up
3739 		 * at the higher layers for more complete credential processing
3740 		 */
3741 		mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3742 	} else {
3743 		freemsg(respmp);
3744 		respmp = NULL;
3745 	}
3746 
3747 	/*
3748 	 * make connection linking
3749 	 * accepting and client endpoints
3750 	 * No need to increment references:
3751 	 *	on client: it should already have one from tip->ti_tep linkage.
3752 	 *	on acceptor is should already have one from the table lookup.
3753 	 *
3754 	 * At this point both client and acceptor can't close. Set client
3755 	 * serializer to acceptor's.
3756 	 */
3757 	ASSERT(cl_ep->te_refcnt >= 2);
3758 	ASSERT(acc_ep->te_refcnt >= 2);
3759 	ASSERT(cl_ep->te_conp == NULL);
3760 	ASSERT(acc_ep->te_conp == NULL);
3761 	cl_ep->te_conp = acc_ep;
3762 	acc_ep->te_conp = cl_ep;
3763 	ASSERT(cl_ep->te_ser == tep->te_ser);
3764 	if (switch_client_serializer) {
3765 		mutex_enter(&cl_ep->te_ser_lock);
3766 		if (cl_ep->te_ser_count > 0) {
3767 			switch_client_serializer = B_FALSE;
3768 			tl_serializer_noswitch++;
3769 		} else {
3770 			/*
3771 			 * Move client to the acceptor's serializer.
3772 			 */
3773 			tl_serializer_refhold(acc_ep->te_ser);
3774 			tl_serializer_refrele(cl_ep->te_ser);
3775 			cl_ep->te_ser = acc_ep->te_ser;
3776 		}
3777 		mutex_exit(&cl_ep->te_ser_lock);
3778 	}
3779 	if (!switch_client_serializer) {
3780 		/*
3781 		 * It is not possible to switch client to use acceptor's.
3782 		 * Move acceptor to client's serializer (which is the same as
3783 		 * listener's).
3784 		 */
3785 		tl_serializer_refhold(cl_ep->te_ser);
3786 		tl_serializer_refrele(acc_ep->te_ser);
3787 		acc_ep->te_ser = cl_ep->te_ser;
3788 	}
3789 
3790 	TL_REMOVE_PEER(cl_ep->te_oconp);
3791 	TL_REMOVE_PEER(acc_ep->te_oconp);
3792 
3793 	/*
3794 	 * remove endpoint from incoming connection
3795 	 * delete client from list of incoming connections
3796 	 */
3797 	tip->ti_tep = NULL;
3798 	tl_freetip(tep, tip);
3799 	tl_ok_ack(wq, ackmp, prim);
3800 
3801 	/*
3802 	 * data blocks already linked in reallocb()
3803 	 */
3804 
3805 	/*
3806 	 * link queues so that I_SENDFD will work
3807 	 */
3808 	if (! IS_SOCKET(tep)) {
3809 		acc_ep->te_wq->q_next = cl_ep->te_rq;
3810 		cl_ep->te_wq->q_next = acc_ep->te_rq;
3811 	}
3812 
3813 	/*
3814 	 * send T_CONN_CON up on client side unless it was already
3815 	 * done (for a socket). In cases any data or ordrel req has been
3816 	 * queued make sure that the service procedure runs.
3817 	 */
3818 	if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3819 		enableok(cl_ep->te_wq);
3820 		TL_QENABLE(cl_ep);
3821 		if (ccmp != NULL)
3822 			freemsg(ccmp);
3823 	} else {
3824 		/*
3825 		 * change client state on TE_CONN_CON event
3826 		 */
3827 		cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3828 		putnext(cl_ep->te_rq, ccmp);
3829 	}
3830 
3831 	/* Mark the both endpoints as accepted */
3832 	cl_ep->te_flag |= TL_ACCEPTED;
3833 	acc_ep->te_flag |= TL_ACCEPTED;
3834 
3835 	/*
3836 	 * Allow client and acceptor to close.
3837 	 */
3838 	tl_closeok(acc_ep);
3839 	if (client_noclose_set)
3840 		tl_closeok(cl_ep);
3841 }
3842 
3843 
3844 
3845 
3846 static void
3847 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3848 {
3849 	queue_t			*wq;
3850 	struct T_discon_req	*dr;
3851 	ssize_t			msz;
3852 	tl_endpt_t		*peer_tep = tep->te_conp;
3853 	tl_endpt_t		*srv_tep = tep->te_oconp;
3854 	tl_icon_t		*tip;
3855 	size_t			size;
3856 	mblk_t			*ackmp, *dimp, *respmp;
3857 	struct T_discon_ind	*di;
3858 	t_scalar_t		save_state, new_state;
3859 
3860 	if (tep->te_closing) {
3861 		freemsg(mp);
3862 		return;
3863 	}
3864 
3865 	if ((peer_tep != NULL) && peer_tep->te_closing) {
3866 		TL_UNCONNECT(tep->te_conp);
3867 		peer_tep = NULL;
3868 	}
3869 	if ((srv_tep != NULL) && srv_tep->te_closing) {
3870 		TL_UNCONNECT(tep->te_oconp);
3871 		srv_tep = NULL;
3872 	}
3873 
3874 	wq = tep->te_wq;
3875 
3876 	/*
3877 	 * preallocate memory for:
3878 	 * 1. max of T_ERROR_ACK and T_OK_ACK
3879 	 *	==> known max T_ERROR_ACK
3880 	 * 2. for  T_DISCON_IND
3881 	 */
3882 	ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3883 	if (! ackmp) {
3884 		tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3885 		return;
3886 	}
3887 	/*
3888 	 * memory committed for T_OK_ACK/T_ERROR_ACK now
3889 	 * will be committed for T_DISCON_IND  later
3890 	 */
3891 
3892 	dr = (struct T_discon_req *)mp->b_rptr;
3893 	msz = MBLKL(mp);
3894 
3895 	/*
3896 	 * validate the state
3897 	 */
3898 	save_state = new_state = tep->te_state;
3899 	if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3900 	    ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3901 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
3902 		    SL_TRACE|SL_ERROR,
3903 		    "tl_wput:T_DISCON_REQ:out of state, state=%d",
3904 		    tep->te_state));
3905 		tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3906 		freemsg(mp);
3907 		return;
3908 	}
3909 	/*
3910 	 * Defer committing the state change until it is determined if
3911 	 * the message will be queued with the tl_icon or not.
3912 	 */
3913 	new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3914 
3915 	/* validate the message */
3916 	if (msz < sizeof (struct T_discon_req)) {
3917 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3918 		    "tl_discon_req:invalid message"));
3919 		tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3920 		tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3921 		freemsg(mp);
3922 		return;
3923 	}
3924 
3925 	/*
3926 	 * if server, then validate that client exists
3927 	 * by connection sequence number etc.
3928 	 */
3929 	if (tep->te_nicon > 0) { /* server */
3930 
3931 		/*
3932 		 * search server list for disconnect client
3933 		 */
3934 		tip = tl_icon_find(tep, dr->SEQ_number);
3935 		if (tip == NULL) {
3936 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
3937 			    SL_TRACE|SL_ERROR,
3938 			    "tl_discon_req:no disconnect endpoint"));
3939 			tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3940 			tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3941 			freemsg(mp);
3942 			return;
3943 		}
3944 		/*
3945 		 * If ti_tep is NULL the client has already closed. In this case
3946 		 * the code below will avoid any action on the client side.
3947 		 */
3948 
3949 		IMPLY(tip->ti_tep != NULL,
3950 		    tip->ti_tep->te_seqno == dr->SEQ_number);
3951 		peer_tep = tip->ti_tep;
3952 	}
3953 
3954 	/*
3955 	 * preallocate now for T_DISCON_IND
3956 	 * ack validity of request (T_OK_ACK) after memory committed
3957 	 */
3958 	size = sizeof (struct T_discon_ind);
3959 	if ((respmp = reallocb(mp, size, 0)) == NULL) {
3960 		tl_memrecover(wq, mp, size);
3961 		freemsg(ackmp);
3962 		return;
3963 	}
3964 
3965 	/*
3966 	 * prepare message to ack validity of request
3967 	 */
3968 	if (tep->te_nicon == 0)
3969 		new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3970 	else
3971 		if (tep->te_nicon == 1)
3972 			new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3973 		else
3974 			new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3975 
3976 	/*
3977 	 * Flushing queues according to TPI. Using the old state.
3978 	 */
3979 	if ((tep->te_nicon <= 1) &&
3980 	    ((save_state == TS_DATA_XFER) ||
3981 	    (save_state == TS_WIND_ORDREL) ||
3982 	    (save_state == TS_WREQ_ORDREL)))
3983 		(void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3984 
3985 	/* send T_OK_ACK up  */
3986 	tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3987 
3988 	/*
3989 	 * now do disconnect business
3990 	 */
3991 	if (tep->te_nicon > 0) { /* listener */
3992 		if (peer_tep != NULL && !peer_tep->te_closing) {
3993 			/*
3994 			 * disconnect incoming connect request pending to tep
3995 			 */
3996 			if ((dimp = tl_resizemp(respmp, size)) == NULL) {
3997 				(void) (STRLOG(TL_ID, tep->te_minor, 2,
3998 				    SL_TRACE|SL_ERROR,
3999 				    "tl_discon_req: reallocb failed"));
4000 				tep->te_state = new_state;
4001 				tl_merror(wq, respmp, ENOMEM);
4002 				return;
4003 			}
4004 			di = (struct T_discon_ind *)dimp->b_rptr;
4005 			di->SEQ_number = BADSEQNUM;
4006 			save_state = peer_tep->te_state;
4007 			peer_tep->te_state = TS_IDLE;
4008 
4009 			TL_REMOVE_PEER(peer_tep->te_oconp);
4010 			enableok(peer_tep->te_wq);
4011 			TL_QENABLE(peer_tep);
4012 		} else {
4013 			freemsg(respmp);
4014 			dimp = NULL;
4015 		}
4016 
4017 		/*
4018 		 * remove endpoint from incoming connection list
4019 		 * - remove disconnect client from list on server
4020 		 */
4021 		tl_freetip(tep, tip);
4022 	} else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4023 		/*
4024 		 * disconnect an outgoing request pending from tep
4025 		 */
4026 
4027 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4028 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4029 			    SL_TRACE|SL_ERROR,
4030 			    "tl_discon_req: reallocb failed"));
4031 			tep->te_state = new_state;
4032 			tl_merror(wq, respmp, ENOMEM);
4033 			return;
4034 		}
4035 		di = (struct T_discon_ind *)dimp->b_rptr;
4036 		DB_TYPE(dimp) = M_PROTO;
4037 		di->PRIM_type  = T_DISCON_IND;
4038 		di->DISCON_reason = ECONNRESET;
4039 		di->SEQ_number = tep->te_seqno;
4040 
4041 		/*
4042 		 * If this is a socket the T_DISCON_IND is queued with
4043 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4044 		 * from the list of pending connections.
4045 		 * Note that when te_oconp is set the peer better have
4046 		 * a t_connind_t for the client.
4047 		 */
4048 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4049 			/*
4050 			 * No need to check that
4051 			 * ti_tep == NULL since the T_DISCON_IND
4052 			 * takes precedence over other queued
4053 			 * messages.
4054 			 */
4055 			tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4056 			peer_tep = NULL;
4057 			dimp = NULL;
4058 			/*
4059 			 * Can't clear te_oconp since tl_co_unconnect needs
4060 			 * it as a hint not to free the tep.
4061 			 * Keep the state unchanged since tl_conn_res inspects
4062 			 * it.
4063 			 */
4064 			new_state = tep->te_state;
4065 		} else {
4066 			/* Found - delete it */
4067 			tip = tl_icon_find(peer_tep, tep->te_seqno);
4068 			if (tip != NULL) {
4069 				ASSERT(tep == tip->ti_tep);
4070 				save_state = peer_tep->te_state;
4071 				if (peer_tep->te_nicon == 1)
4072 					peer_tep->te_state =
4073 					    NEXTSTATE(TE_DISCON_IND2,
4074 					    peer_tep->te_state);
4075 				else
4076 					peer_tep->te_state =
4077 					    NEXTSTATE(TE_DISCON_IND3,
4078 					    peer_tep->te_state);
4079 				tl_freetip(peer_tep, tip);
4080 			}
4081 			ASSERT(tep->te_oconp != NULL);
4082 			TL_UNCONNECT(tep->te_oconp);
4083 		}
4084 	} else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4085 		if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4086 			(void) (STRLOG(TL_ID, tep->te_minor, 2,
4087 			    SL_TRACE|SL_ERROR,
4088 			    "tl_discon_req: reallocb failed"));
4089 			tep->te_state = new_state;
4090 			tl_merror(wq, respmp, ENOMEM);
4091 			return;
4092 		}
4093 		di = (struct T_discon_ind *)dimp->b_rptr;
4094 		di->SEQ_number = BADSEQNUM;
4095 
4096 		save_state = peer_tep->te_state;
4097 		peer_tep->te_state = TS_IDLE;
4098 	} else {
4099 		/* Not connected */
4100 		tep->te_state = new_state;
4101 		freemsg(respmp);
4102 		return;
4103 	}
4104 
4105 	/* Commit state changes */
4106 	tep->te_state = new_state;
4107 
4108 	if (peer_tep == NULL) {
4109 		ASSERT(dimp == NULL);
4110 		goto done;
4111 	}
4112 	/*
4113 	 * Flush queues on peer before sending up
4114 	 * T_DISCON_IND according to TPI
4115 	 */
4116 
4117 	if ((save_state == TS_DATA_XFER) ||
4118 	    (save_state == TS_WIND_ORDREL) ||
4119 	    (save_state == TS_WREQ_ORDREL))
4120 		(void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4121 
4122 	DB_TYPE(dimp) = M_PROTO;
4123 	di->PRIM_type  = T_DISCON_IND;
4124 	di->DISCON_reason = ECONNRESET;
4125 
4126 	/*
4127 	 * data blocks already linked into dimp by reallocb()
4128 	 */
4129 	/*
4130 	 * send indication message to peer user module
4131 	 */
4132 	ASSERT(dimp != NULL);
4133 	putnext(peer_tep->te_rq, dimp);
4134 done:
4135 	if (tep->te_conp) {	/* disconnect pointers if connected */
4136 		ASSERT(! peer_tep->te_closing);
4137 
4138 		/*
4139 		 * Messages may be queued on peer's write queue
4140 		 * waiting to be processed by its write service
4141 		 * procedure. Before the pointer to the peer transport
4142 		 * structure is set to NULL, qenable the peer's write
4143 		 * queue so that the queued up messages are processed.
4144 		 */
4145 		if ((save_state == TS_DATA_XFER) ||
4146 		    (save_state == TS_WIND_ORDREL) ||
4147 		    (save_state == TS_WREQ_ORDREL))
4148 			TL_QENABLE(peer_tep);
4149 		ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4150 		TL_UNCONNECT(peer_tep->te_conp);
4151 		if (! IS_SOCKET(tep)) {
4152 			/*
4153 			 * unlink the streams
4154 			 */
4155 			tep->te_wq->q_next = NULL;
4156 			peer_tep->te_wq->q_next = NULL;
4157 		}
4158 		TL_UNCONNECT(tep->te_conp);
4159 	}
4160 }
4161 
4162 
4163 static void
4164 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4165 {
4166 	queue_t			*wq;
4167 	size_t			ack_sz;
4168 	mblk_t			*ackmp;
4169 	struct T_addr_ack	*taa;
4170 
4171 	if (tep->te_closing) {
4172 		freemsg(mp);
4173 		return;
4174 	}
4175 
4176 	wq = tep->te_wq;
4177 
4178 	/*
4179 	 * Note: T_ADDR_REQ message has only PRIM_type field
4180 	 * so it is already validated earlier.
4181 	 */
4182 
4183 	if (IS_CLTS(tep) ||
4184 	    (tep->te_state > TS_WREQ_ORDREL) ||
4185 	    (tep->te_state < TS_DATA_XFER)) {
4186 		/*
4187 		 * Either connectionless or connection oriented but not
4188 		 * in connected data transfer state or half-closed states.
4189 		 */
4190 		ack_sz = sizeof (struct T_addr_ack);
4191 		if (tep->te_state >= TS_IDLE)
4192 			/* is bound */
4193 			ack_sz += tep->te_alen;
4194 		ackmp = reallocb(mp, ack_sz, 0);
4195 		if (ackmp == NULL) {
4196 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4197 			    SL_TRACE|SL_ERROR,
4198 			    "tl_addr_req: reallocb failed"));
4199 			tl_memrecover(wq, mp, ack_sz);
4200 			return;
4201 		}
4202 
4203 		taa = (struct T_addr_ack *)ackmp->b_rptr;
4204 
4205 		bzero(taa, sizeof (struct T_addr_ack));
4206 
4207 		taa->PRIM_type = T_ADDR_ACK;
4208 		ackmp->b_datap->db_type = M_PCPROTO;
4209 		ackmp->b_wptr = (uchar_t *)&taa[1];
4210 
4211 		if (tep->te_state >= TS_IDLE) {
4212 			/* endpoint is bound */
4213 			taa->LOCADDR_length = tep->te_alen;
4214 			taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4215 
4216 			bcopy(tep->te_abuf, ackmp->b_wptr,
4217 			    tep->te_alen);
4218 			ackmp->b_wptr += tep->te_alen;
4219 			ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4220 		}
4221 
4222 		(void) qreply(wq, ackmp);
4223 	} else {
4224 		ASSERT(tep->te_state == TS_DATA_XFER ||
4225 		    tep->te_state == TS_WIND_ORDREL ||
4226 		    tep->te_state == TS_WREQ_ORDREL);
4227 		/* connection oriented in data transfer */
4228 		tl_connected_cots_addr_req(mp, tep);
4229 	}
4230 }
4231 
4232 
4233 static void
4234 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4235 {
4236 	tl_endpt_t		*peer_tep;
4237 	size_t			ack_sz;
4238 	mblk_t			*ackmp;
4239 	struct T_addr_ack	*taa;
4240 	uchar_t			*addr_startp;
4241 
4242 	if (tep->te_closing) {
4243 		freemsg(mp);
4244 		return;
4245 	}
4246 
4247 	ASSERT(tep->te_state >= TS_IDLE);
4248 
4249 	ack_sz = sizeof (struct T_addr_ack);
4250 	ack_sz += T_ALIGN(tep->te_alen);
4251 	peer_tep = tep->te_conp;
4252 	ack_sz += peer_tep->te_alen;
4253 
4254 	ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4255 	if (ackmp == NULL) {
4256 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4257 		    "tl_connected_cots_addr_req: reallocb failed"));
4258 		tl_memrecover(tep->te_wq, mp, ack_sz);
4259 		return;
4260 	}
4261 
4262 	taa = (struct T_addr_ack *)ackmp->b_rptr;
4263 
4264 	/* endpoint is bound */
4265 	taa->LOCADDR_length = tep->te_alen;
4266 	taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4267 
4268 	addr_startp = (uchar_t *)&taa[1];
4269 
4270 	bcopy(tep->te_abuf, addr_startp,
4271 	    tep->te_alen);
4272 
4273 	taa->REMADDR_length = peer_tep->te_alen;
4274 	taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4275 	    taa->LOCADDR_length);
4276 	addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4277 	bcopy(peer_tep->te_abuf, addr_startp,
4278 	    peer_tep->te_alen);
4279 	ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4280 	    taa->REMADDR_offset + peer_tep->te_alen;
4281 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4282 
4283 	putnext(tep->te_rq, ackmp);
4284 }
4285 
4286 static void
4287 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4288 {
4289 	if (IS_CLTS(tep)) {
4290 		*ia = tl_clts_info_ack;
4291 		ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4292 	} else {
4293 		*ia = tl_cots_info_ack;
4294 		if (IS_COTSORD(tep))
4295 			ia->SERV_type = T_COTS_ORD;
4296 	}
4297 	ia->TIDU_size = tl_tidusz;
4298 	ia->CURRENT_state = tep->te_state;
4299 }
4300 
4301 /*
4302  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4303  * tl_wput.
4304  */
4305 static void
4306 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4307 {
4308 	mblk_t			*ackmp;
4309 	t_uscalar_t		cap_bits1;
4310 	struct T_capability_ack	*tcap;
4311 
4312 	if (tep->te_closing) {
4313 		freemsg(mp);
4314 		return;
4315 	}
4316 
4317 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4318 
4319 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4320 	    M_PCPROTO, T_CAPABILITY_ACK);
4321 	if (ackmp == NULL) {
4322 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4323 		    "tl_capability_req: reallocb failed"));
4324 		tl_memrecover(tep->te_wq, mp,
4325 		    sizeof (struct T_capability_ack));
4326 		return;
4327 	}
4328 
4329 	tcap = (struct T_capability_ack *)ackmp->b_rptr;
4330 	tcap->CAP_bits1 = 0;
4331 
4332 	if (cap_bits1 & TC1_INFO) {
4333 		tl_copy_info(&tcap->INFO_ack, tep);
4334 		tcap->CAP_bits1 |= TC1_INFO;
4335 	}
4336 
4337 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
4338 		tcap->ACCEPTOR_id = tep->te_acceptor_id;
4339 		tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4340 	}
4341 
4342 	putnext(tep->te_rq, ackmp);
4343 }
4344 
4345 static void
4346 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4347 {
4348 	if (! tep->te_closing)
4349 		tl_info_req(mp, tep);
4350 	else
4351 		freemsg(mp);
4352 
4353 	tl_serializer_exit(tep);
4354 	tl_refrele(tep);
4355 }
4356 
4357 static void
4358 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4359 {
4360 	mblk_t *ackmp;
4361 
4362 	ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4363 	    M_PCPROTO, T_INFO_ACK);
4364 	if (ackmp == NULL) {
4365 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4366 		    "tl_info_req: reallocb failed"));
4367 		tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4368 		return;
4369 	}
4370 
4371 	/*
4372 	 * fill in T_INFO_ACK contents
4373 	 */
4374 	tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4375 
4376 	/*
4377 	 * send ack message
4378 	 */
4379 	putnext(tep->te_rq, ackmp);
4380 }
4381 
4382 /*
4383  * Handle M_DATA, T_data_req and T_optdata_req.
4384  * If this is a socket pass through T_optdata_req options unmodified.
4385  */
4386 static void
4387 tl_data(mblk_t *mp, tl_endpt_t *tep)
4388 {
4389 	queue_t			*wq = tep->te_wq;
4390 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4391 	ssize_t			msz = MBLKL(mp);
4392 	tl_endpt_t		*peer_tep;
4393 	queue_t			*peer_rq;
4394 	boolean_t		closing = tep->te_closing;
4395 
4396 	if (IS_CLTS(tep)) {
4397 		(void) (STRLOG(TL_ID, tep->te_minor, 2,
4398 		    SL_TRACE|SL_ERROR,
4399 		    "tl_wput:clts:unattached M_DATA"));
4400 		if (!closing) {
4401 			tl_merror(wq, mp, EPROTO);
4402 		} else {
4403 			freemsg(mp);
4404 		}
4405 		return;
4406 	}
4407 
4408 	/*
4409 	 * If the endpoint is closing it should still forward any data to the
4410 	 * peer (if it has one). If it is not allowed to forward it can just
4411 	 * free the message.
4412 	 */
4413 	if (closing &&
4414 	    (tep->te_state != TS_DATA_XFER) &&
4415 	    (tep->te_state != TS_WREQ_ORDREL)) {
4416 		freemsg(mp);
4417 		return;
4418 	}
4419 
4420 	if (DB_TYPE(mp) == M_PROTO) {
4421 		if (prim->type == T_DATA_REQ &&
4422 		    msz < sizeof (struct T_data_req)) {
4423 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4424 				SL_TRACE|SL_ERROR,
4425 				"tl_data:T_DATA_REQ:invalid message"));
4426 			if (!closing) {
4427 				tl_merror(wq, mp, EPROTO);
4428 			} else {
4429 				freemsg(mp);
4430 			}
4431 			return;
4432 		} else if (prim->type == T_OPTDATA_REQ &&
4433 		    (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4434 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4435 			    SL_TRACE|SL_ERROR,
4436 			    "tl_data:T_OPTDATA_REQ:invalid message"));
4437 			if (!closing) {
4438 				tl_merror(wq, mp, EPROTO);
4439 			} else {
4440 				freemsg(mp);
4441 			}
4442 			return;
4443 		}
4444 	}
4445 
4446 	/*
4447 	 * connection oriented provider
4448 	 */
4449 	switch (tep->te_state) {
4450 	case TS_IDLE:
4451 		/*
4452 		 * Other end not here - do nothing.
4453 		 */
4454 		freemsg(mp);
4455 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4456 		    "tl_data:cots with endpoint idle"));
4457 		return;
4458 
4459 	case TS_DATA_XFER:
4460 		/* valid states */
4461 		if (tep->te_conp != NULL)
4462 			break;
4463 
4464 		if (tep->te_oconp == NULL) {
4465 			if (!closing) {
4466 				tl_merror(wq, mp, EPROTO);
4467 			} else {
4468 				freemsg(mp);
4469 			}
4470 			return;
4471 		}
4472 		/*
4473 		 * For a socket the T_CONN_CON is sent early thus
4474 		 * the peer might not yet have accepted the connection.
4475 		 * If we are closing queue the packet with the T_CONN_IND.
4476 		 * Otherwise defer processing the packet until the peer
4477 		 * accepts the connection.
4478 		 * Note that the queue is noenabled when we go into this
4479 		 * state.
4480 		 */
4481 		if (!closing) {
4482 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4483 			    SL_TRACE|SL_ERROR,
4484 			    "tl_data: ocon"));
4485 			TL_PUTBQ(tep, mp);
4486 			return;
4487 		}
4488 		if (DB_TYPE(mp) == M_PROTO) {
4489 			if (msz < sizeof (t_scalar_t)) {
4490 				freemsg(mp);
4491 				return;
4492 			}
4493 			/* reuse message block - just change REQ to IND */
4494 			if (prim->type == T_DATA_REQ)
4495 				prim->type = T_DATA_IND;
4496 			else
4497 				prim->type = T_OPTDATA_IND;
4498 		}
4499 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4500 		return;
4501 
4502 	case TS_WREQ_ORDREL:
4503 		if (tep->te_conp == NULL) {
4504 			/*
4505 			 * Other end closed - generate discon_ind
4506 			 * with reason 0 to cause an EPIPE but no
4507 			 * read side error on AF_UNIX sockets.
4508 			 */
4509 			freemsg(mp);
4510 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4511 			    SL_TRACE|SL_ERROR,
4512 			    "tl_data: WREQ_ORDREL and no peer"));
4513 			tl_discon_ind(tep, 0);
4514 			return;
4515 		}
4516 		break;
4517 
4518 	default:
4519 		/* invalid state for event TE_DATA_REQ */
4520 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4521 		    "tl_data:cots:out of state"));
4522 		tl_merror(wq, mp, EPROTO);
4523 		return;
4524 	}
4525 	/*
4526 	 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4527 	 * (State stays same on this event)
4528 	 */
4529 
4530 	/*
4531 	 * get connected endpoint
4532 	 */
4533 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4534 		freemsg(mp);
4535 		/* Peer closed */
4536 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4537 		    "tl_data: peer gone"));
4538 		return;
4539 	}
4540 
4541 	ASSERT(tep->te_serializer == peer_tep->te_serializer);
4542 	peer_rq = peer_tep->te_rq;
4543 
4544 	/*
4545 	 * Put it back if flow controlled
4546 	 * Note: Messages already on queue when we are closing is bounded
4547 	 * so we can ignore flow control.
4548 	 */
4549 	if (!canputnext(peer_rq) && !closing) {
4550 		TL_PUTBQ(tep, mp);
4551 		return;
4552 	}
4553 
4554 	/*
4555 	 * validate peer state
4556 	 */
4557 	switch (peer_tep->te_state) {
4558 	case TS_DATA_XFER:
4559 	case TS_WIND_ORDREL:
4560 		/* valid states */
4561 		break;
4562 	default:
4563 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4564 		    "tl_data:rx side:invalid state"));
4565 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4566 		return;
4567 	}
4568 	if (DB_TYPE(mp) == M_PROTO) {
4569 		/* reuse message block - just change REQ to IND */
4570 		if (prim->type == T_DATA_REQ)
4571 			prim->type = T_DATA_IND;
4572 		else
4573 			prim->type = T_OPTDATA_IND;
4574 	}
4575 	/*
4576 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4577 	 * (peer state stays same on this event)
4578 	 */
4579 	/*
4580 	 * send data to connected peer
4581 	 */
4582 	putnext(peer_rq, mp);
4583 }
4584 
4585 
4586 
4587 static void
4588 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4589 {
4590 	queue_t			*wq = tep->te_wq;
4591 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4592 	ssize_t			msz = MBLKL(mp);
4593 	tl_endpt_t		*peer_tep;
4594 	queue_t			*peer_rq;
4595 	boolean_t		closing = tep->te_closing;
4596 
4597 	if (msz < sizeof (struct T_exdata_req)) {
4598 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4599 		    "tl_exdata:invalid message"));
4600 		if (!closing) {
4601 			tl_merror(wq, mp, EPROTO);
4602 		} else {
4603 			freemsg(mp);
4604 		}
4605 		return;
4606 	}
4607 
4608 	/*
4609 	 * If the endpoint is closing it should still forward any data to the
4610 	 * peer (if it has one). If it is not allowed to forward it can just
4611 	 * free the message.
4612 	 */
4613 	if (closing &&
4614 	    (tep->te_state != TS_DATA_XFER) &&
4615 	    (tep->te_state != TS_WREQ_ORDREL)) {
4616 		freemsg(mp);
4617 		return;
4618 	}
4619 
4620 	/*
4621 	 * validate state
4622 	 */
4623 	switch (tep->te_state) {
4624 	case TS_IDLE:
4625 		/*
4626 		 * Other end not here - do nothing.
4627 		 */
4628 		freemsg(mp);
4629 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4630 		    "tl_exdata:cots with endpoint idle"));
4631 		return;
4632 
4633 	case TS_DATA_XFER:
4634 		/* valid states */
4635 		if (tep->te_conp != NULL)
4636 			break;
4637 
4638 		if (tep->te_oconp == NULL) {
4639 			if (!closing) {
4640 				tl_merror(wq, mp, EPROTO);
4641 			} else {
4642 				freemsg(mp);
4643 			}
4644 			return;
4645 		}
4646 		/*
4647 		 * For a socket the T_CONN_CON is sent early thus
4648 		 * the peer might not yet have accepted the connection.
4649 		 * If we are closing queue the packet with the T_CONN_IND.
4650 		 * Otherwise defer processing the packet until the peer
4651 		 * accepts the connection.
4652 		 * Note that the queue is noenabled when we go into this
4653 		 * state.
4654 		 */
4655 		if (!closing) {
4656 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4657 			    SL_TRACE|SL_ERROR,
4658 			    "tl_exdata: ocon"));
4659 			TL_PUTBQ(tep, mp);
4660 			return;
4661 		}
4662 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4663 		    "tl_exdata: closing socket ocon"));
4664 		prim->type = T_EXDATA_IND;
4665 		tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4666 		return;
4667 
4668 	case TS_WREQ_ORDREL:
4669 		if (tep->te_conp == NULL) {
4670 			/*
4671 			 * Other end closed - generate discon_ind
4672 			 * with reason 0 to cause an EPIPE but no
4673 			 * read side error on AF_UNIX sockets.
4674 			 */
4675 			freemsg(mp);
4676 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
4677 			    SL_TRACE|SL_ERROR,
4678 			    "tl_exdata: WREQ_ORDREL and no peer"));
4679 			tl_discon_ind(tep, 0);
4680 			return;
4681 		}
4682 		break;
4683 
4684 	default:
4685 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4686 		    SL_TRACE|SL_ERROR,
4687 		    "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4688 		    tep->te_state));
4689 		tl_merror(wq, mp, EPROTO);
4690 		return;
4691 	}
4692 	/*
4693 	 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4694 	 * (state stays same on this event)
4695 	 */
4696 
4697 	/*
4698 	 * get connected endpoint
4699 	 */
4700 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4701 		freemsg(mp);
4702 		/* Peer closed */
4703 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4704 		    "tl_exdata: peer gone"));
4705 		return;
4706 	}
4707 
4708 	peer_rq = peer_tep->te_rq;
4709 
4710 	/*
4711 	 * Put it back if flow controlled
4712 	 * Note: Messages already on queue when we are closing is bounded
4713 	 * so we can ignore flow control.
4714 	 */
4715 	if (!canputnext(peer_rq) && !closing) {
4716 		TL_PUTBQ(tep, mp);
4717 		return;
4718 	}
4719 
4720 	/*
4721 	 * validate state on peer
4722 	 */
4723 	switch (peer_tep->te_state) {
4724 	case TS_DATA_XFER:
4725 	case TS_WIND_ORDREL:
4726 		/* valid states */
4727 		break;
4728 	default:
4729 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4730 		    "tl_exdata:rx side:invalid state"));
4731 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4732 		return;
4733 	}
4734 	/*
4735 	 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4736 	 * (peer state stays same on this event)
4737 	 */
4738 	/*
4739 	 * reuse message block
4740 	 */
4741 	prim->type = T_EXDATA_IND;
4742 
4743 	/*
4744 	 * send data to connected peer
4745 	 */
4746 	putnext(peer_rq, mp);
4747 }
4748 
4749 
4750 
4751 static void
4752 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4753 {
4754 	queue_t			*wq =  tep->te_wq;
4755 	union T_primitives	*prim = (union T_primitives *)mp->b_rptr;
4756 	ssize_t			msz = MBLKL(mp);
4757 	tl_endpt_t		*peer_tep;
4758 	queue_t			*peer_rq;
4759 	boolean_t		closing = tep->te_closing;
4760 
4761 	if (msz < sizeof (struct T_ordrel_req)) {
4762 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4763 		    "tl_ordrel:invalid message"));
4764 		if (!closing) {
4765 			tl_merror(wq, mp, EPROTO);
4766 		} else {
4767 			freemsg(mp);
4768 		}
4769 		return;
4770 	}
4771 
4772 	/*
4773 	 * validate state
4774 	 */
4775 	switch (tep->te_state) {
4776 	case TS_DATA_XFER:
4777 	case TS_WREQ_ORDREL:
4778 		/* valid states */
4779 		if (tep->te_conp != NULL)
4780 			break;
4781 
4782 		if (tep->te_oconp == NULL)
4783 			break;
4784 
4785 		/*
4786 		 * For a socket the T_CONN_CON is sent early thus
4787 		 * the peer might not yet have accepted the connection.
4788 		 * If we are closing queue the packet with the T_CONN_IND.
4789 		 * Otherwise defer processing the packet until the peer
4790 		 * accepts the connection.
4791 		 * Note that the queue is noenabled when we go into this
4792 		 * state.
4793 		 */
4794 		if (!closing) {
4795 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
4796 			    SL_TRACE|SL_ERROR,
4797 			    "tl_ordlrel: ocon"));
4798 			TL_PUTBQ(tep, mp);
4799 			return;
4800 		}
4801 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4802 		    "tl_ordlrel: closing socket ocon"));
4803 		prim->type = T_ORDREL_IND;
4804 		(void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4805 		return;
4806 
4807 	default:
4808 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4809 		    SL_TRACE|SL_ERROR,
4810 		    "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4811 		    tep->te_state));
4812 		if (!closing) {
4813 			tl_merror(wq, mp, EPROTO);
4814 		} else {
4815 			freemsg(mp);
4816 		}
4817 		return;
4818 	}
4819 	tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4820 
4821 	/*
4822 	 * get connected endpoint
4823 	 */
4824 	if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4825 		/* Peer closed */
4826 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4827 		    "tl_ordrel: peer gone"));
4828 		freemsg(mp);
4829 		return;
4830 	}
4831 
4832 	peer_rq = peer_tep->te_rq;
4833 
4834 	/*
4835 	 * Put it back if flow controlled except when we are closing.
4836 	 * Note: Messages already on queue when we are closing is bounded
4837 	 * so we can ignore flow control.
4838 	 */
4839 	if (! canputnext(peer_rq) && !closing) {
4840 		TL_PUTBQ(tep, mp);
4841 		return;
4842 	}
4843 
4844 	/*
4845 	 * validate state on peer
4846 	 */
4847 	switch (peer_tep->te_state) {
4848 	case TS_DATA_XFER:
4849 	case TS_WIND_ORDREL:
4850 		/* valid states */
4851 		break;
4852 	default:
4853 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4854 		    "tl_ordrel:rx side:invalid state"));
4855 		tl_merror(peer_tep->te_wq, mp, EPROTO);
4856 		return;
4857 	}
4858 	peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4859 
4860 	/*
4861 	 * reuse message block
4862 	 */
4863 	prim->type = T_ORDREL_IND;
4864 	(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4865 	    "tl_ordrel: send ordrel_ind"));
4866 
4867 	/*
4868 	 * send data to connected peer
4869 	 */
4870 	putnext(peer_rq, mp);
4871 }
4872 
4873 
4874 /*
4875  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4876  */
4877 static void
4878 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4879 {
4880 	size_t			err_sz;
4881 	tl_endpt_t		*tep;
4882 	struct T_unitdata_req	*udreq;
4883 	mblk_t			*err_mp;
4884 	t_scalar_t		alen;
4885 	t_scalar_t		olen;
4886 	struct T_uderror_ind	*uderr;
4887 	uchar_t			*addr_startp;
4888 
4889 	err_sz = sizeof (struct T_uderror_ind);
4890 	tep = (tl_endpt_t *)wq->q_ptr;
4891 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4892 	alen = udreq->DEST_length;
4893 	olen = udreq->OPT_length;
4894 
4895 	if (alen > 0)
4896 		err_sz = T_ALIGN(err_sz + alen);
4897 	if (olen > 0)
4898 		err_sz += olen;
4899 
4900 	err_mp = allocb(err_sz, BPRI_MED);
4901 	if (! err_mp) {
4902 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4903 		    "tl_uderr:allocb failure"));
4904 		/*
4905 		 * Note: no rollback of state needed as it does
4906 		 * not change in connectionless transport
4907 		 */
4908 		tl_memrecover(wq, mp, err_sz);
4909 		return;
4910 	}
4911 
4912 	DB_TYPE(err_mp) = M_PROTO;
4913 	err_mp->b_wptr = err_mp->b_rptr + err_sz;
4914 	uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4915 	uderr->PRIM_type = T_UDERROR_IND;
4916 	uderr->ERROR_type = err;
4917 	uderr->DEST_length = alen;
4918 	uderr->OPT_length = olen;
4919 	if (alen <= 0) {
4920 		uderr->DEST_offset = 0;
4921 	} else {
4922 		uderr->DEST_offset =
4923 		    (t_scalar_t)sizeof (struct T_uderror_ind);
4924 		addr_startp  = mp->b_rptr + udreq->DEST_offset;
4925 		bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4926 		    (size_t)alen);
4927 	}
4928 	if (olen <= 0) {
4929 		uderr->OPT_offset = 0;
4930 	} else {
4931 		uderr->OPT_offset =
4932 		    (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4933 		    uderr->DEST_length);
4934 		addr_startp  = mp->b_rptr + udreq->OPT_offset;
4935 		bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4936 		    (size_t)olen);
4937 	}
4938 	freemsg(mp);
4939 
4940 	/*
4941 	 * send indication message
4942 	 */
4943 	tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4944 
4945 	qreply(wq, err_mp);
4946 }
4947 
4948 static void
4949 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4950 {
4951 	queue_t *wq = tep->te_wq;
4952 
4953 	if (!tep->te_closing && (wq->q_first != NULL)) {
4954 		TL_PUTQ(tep, mp);
4955 	} else if (tep->te_rq != NULL)
4956 		tl_unitdata(mp, tep);
4957 	else
4958 		freemsg(mp);
4959 
4960 	tl_serializer_exit(tep);
4961 	tl_refrele(tep);
4962 }
4963 
4964 /*
4965  * Handle T_unitdata_req.
4966  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4967  * If this is a socket pass through options unmodified.
4968  */
4969 static void
4970 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4971 {
4972 	queue_t			*wq = tep->te_wq;
4973 	soux_addr_t		ux_addr;
4974 	tl_addr_t		destaddr;
4975 	uchar_t			*addr_startp;
4976 	tl_endpt_t		*peer_tep;
4977 	struct T_unitdata_ind	*udind;
4978 	struct T_unitdata_req	*udreq;
4979 	ssize_t			msz, ui_sz;
4980 	t_scalar_t		alen, aoff, olen, ooff;
4981 	t_scalar_t		oldolen = 0;
4982 	cred_t			*cr = NULL;
4983 	pid_t			cpid;
4984 
4985 	udreq = (struct T_unitdata_req *)mp->b_rptr;
4986 	msz = MBLKL(mp);
4987 
4988 	/*
4989 	 * validate the state
4990 	 */
4991 	if (tep->te_state != TS_IDLE) {
4992 		(void) (STRLOG(TL_ID, tep->te_minor, 1,
4993 		    SL_TRACE|SL_ERROR,
4994 		    "tl_wput:T_CONN_REQ:out of state"));
4995 		tl_merror(wq, mp, EPROTO);
4996 		return;
4997 	}
4998 	/*
4999 	 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5000 	 * (state does not change on this event)
5001 	 */
5002 
5003 	/*
5004 	 * validate the message
5005 	 * Note: dereference fields in struct inside message only
5006 	 * after validating the message length.
5007 	 */
5008 	if (msz < sizeof (struct T_unitdata_req)) {
5009 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5010 		    "tl_unitdata:invalid message length"));
5011 		tl_merror(wq, mp, EINVAL);
5012 		return;
5013 	}
5014 	alen = udreq->DEST_length;
5015 	aoff = udreq->DEST_offset;
5016 	oldolen = olen = udreq->OPT_length;
5017 	ooff = udreq->OPT_offset;
5018 	if (olen == 0)
5019 		ooff = 0;
5020 
5021 	if (IS_SOCKET(tep)) {
5022 		if ((alen != TL_SOUX_ADDRLEN) ||
5023 		    (aoff < 0) ||
5024 		    (aoff + alen > msz) ||
5025 		    (olen < 0) || (ooff < 0) ||
5026 		    ((olen > 0) && ((ooff + olen) > msz))) {
5027 			(void) (STRLOG(TL_ID, tep->te_minor,
5028 			    1, SL_TRACE|SL_ERROR,
5029 			    "tl_unitdata_req: invalid socket addr "
5030 			    "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5031 			    (int)msz, alen, aoff, olen, ooff));
5032 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5033 			return;
5034 		}
5035 		bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5036 
5037 		if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5038 		    (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5039 			(void) (STRLOG(TL_ID, tep->te_minor,
5040 			    1, SL_TRACE|SL_ERROR,
5041 			    "tl_conn_req: invalid socket magic"));
5042 			tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5043 			return;
5044 		}
5045 	} else {
5046 		if ((alen < 0) ||
5047 		    (aoff < 0) ||
5048 		    ((alen > 0) && ((aoff + alen) > msz)) ||
5049 		    ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5050 		    ((aoff + alen) < 0) ||
5051 		    ((olen > 0) && ((ooff + olen) > msz)) ||
5052 		    (olen < 0) ||
5053 		    (ooff < 0) ||
5054 		    ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5055 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5056 				    SL_TRACE|SL_ERROR,
5057 				    "tl_unitdata:invalid unit data message"));
5058 			tl_merror(wq, mp, EINVAL);
5059 			return;
5060 		}
5061 	}
5062 
5063 	/* Options not supported unless it's a socket */
5064 	if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5065 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5066 		    "tl_unitdata:option use(unsupported) or zero len addr"));
5067 		tl_uderr(wq, mp, EPROTO);
5068 		return;
5069 	}
5070 #ifdef DEBUG
5071 	/*
5072 	 * Mild form of ASSERT()ion to detect broken TPI apps.
5073 	 * if (! assertion)
5074 	 *	log warning;
5075 	 */
5076 	if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5077 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5078 		    "tl_unitdata:addr overlaps TPI message"));
5079 	}
5080 #endif
5081 	/*
5082 	 * get destination endpoint
5083 	 */
5084 	destaddr.ta_alen = alen;
5085 	destaddr.ta_abuf = mp->b_rptr + aoff;
5086 	destaddr.ta_zoneid = tep->te_zoneid;
5087 
5088 	/*
5089 	 * Check whether the destination is the same that was used previously
5090 	 * and the destination endpoint is in the right state. If something is
5091 	 * wrong, find destination again and cache it.
5092 	 */
5093 	peer_tep = tep->te_lastep;
5094 
5095 	if ((peer_tep == NULL) || peer_tep->te_closing ||
5096 	    (peer_tep->te_state != TS_IDLE) ||
5097 	    !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5098 		/*
5099 		 * Not the same as cached destination , need to find the right
5100 		 * destination.
5101 		 */
5102 		peer_tep = (IS_SOCKET(tep) ?
5103 		    tl_sock_find_peer(tep, &ux_addr) :
5104 		    tl_find_peer(tep, &destaddr));
5105 
5106 		if (peer_tep == NULL) {
5107 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5108 			    SL_TRACE|SL_ERROR,
5109 			    "tl_unitdata:no one at destination address"));
5110 			tl_uderr(wq, mp, ECONNRESET);
5111 			return;
5112 		}
5113 
5114 		/*
5115 		 * Cache the new peer.
5116 		 */
5117 		if (tep->te_lastep != NULL)
5118 			tl_refrele(tep->te_lastep);
5119 
5120 		tep->te_lastep = peer_tep;
5121 	}
5122 
5123 	if (peer_tep->te_state != TS_IDLE) {
5124 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5125 		    "tl_unitdata:provider in invalid state"));
5126 		tl_uderr(wq, mp, EPROTO);
5127 		return;
5128 	}
5129 
5130 	ASSERT(peer_tep->te_rq != NULL);
5131 
5132 	/*
5133 	 * Put it back if flow controlled except when we are closing.
5134 	 * Note: Messages already on queue when we are closing is bounded
5135 	 * so we can ignore flow control.
5136 	 */
5137 	if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5138 		/* record what we are flow controlled on */
5139 		if (tep->te_flowq != NULL) {
5140 			list_remove(&tep->te_flowq->te_flowlist, tep);
5141 		}
5142 		list_insert_head(&peer_tep->te_flowlist, tep);
5143 		tep->te_flowq = peer_tep;
5144 		TL_PUTBQ(tep, mp);
5145 		return;
5146 	}
5147 	/*
5148 	 * prepare indication message
5149 	 */
5150 
5151 	/*
5152 	 * calculate length of message
5153 	 */
5154 	if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5155 		cr = msg_getcred(mp, &cpid);
5156 		ASSERT(cr != NULL);
5157 
5158 		if (peer_tep->te_flag & TL_SETCRED) {
5159 			ASSERT(olen == 0);
5160 			olen = (t_scalar_t)sizeof (struct opthdr) +
5161 			    OPTLEN(sizeof (tl_credopt_t));
5162 						/* 1 option only */
5163 		} else if (peer_tep->te_flag & TL_SETUCRED) {
5164 			ASSERT(olen == 0);
5165 			olen = (t_scalar_t)sizeof (struct opthdr) +
5166 			    OPTLEN(ucredminsize(cr));
5167 						/* 1 option only */
5168 		} else {
5169 			/* Possibly more than one option */
5170 			olen += (t_scalar_t)sizeof (struct T_opthdr) +
5171 			    OPTLEN(ucredminsize(cr));
5172 		}
5173 	}
5174 
5175 	ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) +
5176 	    olen;
5177 	/*
5178 	 * If the unitdata_ind fits and we are not adding options
5179 	 * reuse the udreq mblk.
5180 	 */
5181 	if (msz >= ui_sz && alen >= tep->te_alen &&
5182 	    !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5183 		/*
5184 		 * Reuse the original mblk. Leave options in place.
5185 		 */
5186 		udind =  (struct T_unitdata_ind *)mp->b_rptr;
5187 		udind->PRIM_type = T_UNITDATA_IND;
5188 		udind->SRC_length = tep->te_alen;
5189 		addr_startp = mp->b_rptr + udind->SRC_offset;
5190 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5191 	} else {
5192 		/* Allocate a new T_unidata_ind message */
5193 		mblk_t *ui_mp;
5194 
5195 		ui_mp = allocb(ui_sz, BPRI_MED);
5196 		if (! ui_mp) {
5197 			(void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5198 			    "tl_unitdata:allocb failure:message queued"));
5199 			tl_memrecover(wq, mp, ui_sz);
5200 			return;
5201 		}
5202 
5203 		/*
5204 		 * fill in T_UNITDATA_IND contents
5205 		 */
5206 		DB_TYPE(ui_mp) = M_PROTO;
5207 		ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5208 		udind =  (struct T_unitdata_ind *)ui_mp->b_rptr;
5209 		udind->PRIM_type = T_UNITDATA_IND;
5210 		udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5211 		udind->SRC_length = tep->te_alen;
5212 		addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5213 		bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5214 		udind->OPT_offset =
5215 		    (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5216 		udind->OPT_length = olen;
5217 		if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5218 
5219 			if (oldolen != 0) {
5220 				bcopy((void *)((uintptr_t)udreq + ooff),
5221 				    (void *)((uintptr_t)udind +
5222 				    udind->OPT_offset),
5223 				    oldolen);
5224 			}
5225 			ASSERT(cr != NULL);
5226 
5227 			tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5228 			    oldolen, cr, cpid,
5229 			    peer_tep->te_flag, peer_tep->te_credp);
5230 		} else {
5231 			bcopy((void *)((uintptr_t)udreq + ooff),
5232 			    (void *)((uintptr_t)udind + udind->OPT_offset),
5233 			    olen);
5234 		}
5235 
5236 		/*
5237 		 * relink data blocks from mp to ui_mp
5238 		 */
5239 		ui_mp->b_cont = mp->b_cont;
5240 		freeb(mp);
5241 		mp = ui_mp;
5242 	}
5243 	/*
5244 	 * send indication message
5245 	 */
5246 	peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5247 	putnext(peer_tep->te_rq, mp);
5248 }
5249 
5250 
5251 
5252 /*
5253  * Check if a given addr is in use.
5254  * Endpoint ptr returned or NULL if not found.
5255  * The name space is separate for each mode. This implies that
5256  * sockets get their own name space.
5257  */
5258 static tl_endpt_t *
5259 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5260 {
5261 	tl_endpt_t *peer_tep = NULL;
5262 	int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5263 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5264 
5265 	ASSERT(! IS_SOCKET(tep));
5266 
5267 	ASSERT(ap != NULL && ap->ta_alen > 0);
5268 	ASSERT(ap->ta_zoneid == tep->te_zoneid);
5269 	ASSERT(ap->ta_abuf != NULL);
5270 	EQUIV(rc == 0, peer_tep != NULL);
5271 	IMPLY(rc == 0,
5272 	    (tep->te_zoneid == peer_tep->te_zoneid) &&
5273 	    (tep->te_transport == peer_tep->te_transport));
5274 
5275 	if ((rc == 0) && (peer_tep->te_closing)) {
5276 		tl_refrele(peer_tep);
5277 		peer_tep = NULL;
5278 	}
5279 
5280 	return (peer_tep);
5281 }
5282 
5283 /*
5284  * Find peer for a socket based on unix domain address.
5285  * For implicit addresses our peer can be found by minor number in ai hash. For
5286  * explicit binds we look vnode address at addr_hash.
5287  */
5288 static tl_endpt_t *
5289 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5290 {
5291 	tl_endpt_t *peer_tep = NULL;
5292 	mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5293 	    tep->te_aihash : tep->te_addrhash;
5294 	int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5295 	    (mod_hash_val_t *)&peer_tep, tl_find_callback);
5296 
5297 	ASSERT(IS_SOCKET(tep));
5298 	EQUIV(rc == 0, peer_tep != NULL);
5299 	IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5300 
5301 	if (peer_tep != NULL) {
5302 		/* Don't attempt to use closing peer. */
5303 		if (peer_tep->te_closing)
5304 			goto errout;
5305 
5306 		/*
5307 		 * Cross-zone unix sockets are permitted, but for Trusted
5308 		 * Extensions only, the "server" for these must be in the
5309 		 * global zone.
5310 		 */
5311 		if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5312 		    is_system_labeled() &&
5313 		    (peer_tep->te_zoneid != GLOBAL_ZONEID))
5314 			goto errout;
5315 	}
5316 
5317 	return (peer_tep);
5318 
5319 errout:
5320 	tl_refrele(peer_tep);
5321 	return (NULL);
5322 }
5323 
5324 /*
5325  * Generate a free addr and return it in struct pointed by ap
5326  * but allocating space for address buffer.
5327  * The generated address will be at least 4 bytes long and, if req->ta_alen
5328  * exceeds 4 bytes, be req->ta_alen bytes long.
5329  *
5330  * If address is found it will be inserted in the hash.
5331  *
5332  * If req->ta_alen is larger than the default alen (4 bytes) the last
5333  * alen-4 bytes will always be the same as in req.
5334  *
5335  * Return 0 for failure.
5336  * Return non-zero for success.
5337  */
5338 static boolean_t
5339 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5340 {
5341 	t_scalar_t	alen;
5342 	uint32_t	loopcnt;	/* Limit loop to 2^32 */
5343 
5344 	ASSERT(tep->te_hash_hndl != NULL);
5345 	ASSERT(! IS_SOCKET(tep));
5346 
5347 	if (tep->te_hash_hndl == NULL)
5348 		return (B_FALSE);
5349 
5350 	/*
5351 	 * check if default addr is in use
5352 	 * if it is - bump it and try again
5353 	 */
5354 	if (req == NULL) {
5355 		alen = sizeof (uint32_t);
5356 	} else {
5357 		alen = max(req->ta_alen, sizeof (uint32_t));
5358 		ASSERT(tep->te_zoneid == req->ta_zoneid);
5359 	}
5360 
5361 	if (tep->te_alen < alen) {
5362 		void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5363 
5364 		/*
5365 		 * Not enough space in tep->ta_ap to hold the address,
5366 		 * allocate a bigger space.
5367 		 */
5368 		if (abuf == NULL)
5369 			return (B_FALSE);
5370 
5371 		if (tep->te_alen > 0)
5372 			kmem_free(tep->te_abuf, tep->te_alen);
5373 
5374 		tep->te_alen = alen;
5375 		tep->te_abuf = abuf;
5376 	}
5377 
5378 	/* Copy in the address in req */
5379 	if (req != NULL) {
5380 		ASSERT(alen >= req->ta_alen);
5381 		bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5382 	}
5383 
5384 	/*
5385 	 * First try minor number then try default addresses.
5386 	 */
5387 	bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5388 
5389 	for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5390 		if (mod_hash_insert_reserve(tep->te_addrhash,
5391 		    (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5392 		    tep->te_hash_hndl) == 0) {
5393 			/*
5394 			 * found free address
5395 			 */
5396 			tep->te_flag |= TL_ADDRHASHED;
5397 			tep->te_hash_hndl = NULL;
5398 
5399 			return (B_TRUE); /* successful return */
5400 		}
5401 		/*
5402 		 * Use default address.
5403 		 */
5404 		bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5405 		atomic_add_32(&tep->te_defaddr, 1);
5406 	}
5407 
5408 	/*
5409 	 * Failed to find anything.
5410 	 */
5411 	(void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5412 	    "tl_get_any_addr:looped 2^32 times"));
5413 	return (B_FALSE);
5414 }
5415 
5416 /*
5417  * reallocb + set r/w ptrs to reflect size.
5418  */
5419 static mblk_t *
5420 tl_resizemp(mblk_t *mp, ssize_t new_size)
5421 {
5422 	if ((mp = reallocb(mp, new_size, 0)) == NULL)
5423 		return (NULL);
5424 
5425 	mp->b_rptr = DB_BASE(mp);
5426 	mp->b_wptr = mp->b_rptr + new_size;
5427 	return (mp);
5428 }
5429 
5430 static void
5431 tl_cl_backenable(tl_endpt_t *tep)
5432 {
5433 	list_t *l = &tep->te_flowlist;
5434 	tl_endpt_t *elp;
5435 
5436 	ASSERT(IS_CLTS(tep));
5437 
5438 	for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5439 		ASSERT(tep->te_ser == elp->te_ser);
5440 		ASSERT(elp->te_flowq == tep);
5441 		if (! elp->te_closing)
5442 			TL_QENABLE(elp);
5443 		elp->te_flowq = NULL;
5444 		list_remove(l, elp);
5445 	}
5446 }
5447 
5448 /*
5449  * Unconnect endpoints.
5450  */
5451 static void
5452 tl_co_unconnect(tl_endpt_t *tep)
5453 {
5454 	tl_endpt_t	*peer_tep = tep->te_conp;
5455 	tl_endpt_t	*srv_tep = tep->te_oconp;
5456 	list_t		*l;
5457 	tl_icon_t  	*tip;
5458 	tl_endpt_t	*cl_tep;
5459 	mblk_t		*d_mp;
5460 
5461 	ASSERT(IS_COTS(tep));
5462 	/*
5463 	 * If our peer is closing, don't use it.
5464 	 */
5465 	if ((peer_tep != NULL) && peer_tep->te_closing) {
5466 		TL_UNCONNECT(tep->te_conp);
5467 		peer_tep = NULL;
5468 	}
5469 	if ((srv_tep != NULL) && srv_tep->te_closing) {
5470 		TL_UNCONNECT(tep->te_oconp);
5471 		srv_tep = NULL;
5472 	}
5473 
5474 	if (tep->te_nicon > 0) {
5475 		l = &tep->te_iconp;
5476 		/*
5477 		 * If incoming requests pending, change state
5478 		 * of clients on disconnect ind event and send
5479 		 * discon_ind pdu to modules above them
5480 		 * for server: all clients get disconnect
5481 		 */
5482 
5483 		while (tep->te_nicon > 0) {
5484 			tip    = list_head(l);
5485 			cl_tep = tip->ti_tep;
5486 
5487 			if (cl_tep == NULL) {
5488 				tl_freetip(tep, tip);
5489 				continue;
5490 			}
5491 
5492 			if (cl_tep->te_oconp != NULL) {
5493 				ASSERT(cl_tep != cl_tep->te_oconp);
5494 				TL_UNCONNECT(cl_tep->te_oconp);
5495 			}
5496 
5497 			if (cl_tep->te_closing) {
5498 				tl_freetip(tep, tip);
5499 				continue;
5500 			}
5501 
5502 			enableok(cl_tep->te_wq);
5503 			TL_QENABLE(cl_tep);
5504 			d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5505 			if (d_mp != NULL) {
5506 				cl_tep->te_state = TS_IDLE;
5507 				putnext(cl_tep->te_rq, d_mp);
5508 			} else {
5509 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5510 				    SL_TRACE|SL_ERROR,
5511 				    "tl_co_unconnect:icmng: "
5512 				    "allocb failure"));
5513 			}
5514 			tl_freetip(tep, tip);
5515 		}
5516 	} else if (srv_tep != NULL) {
5517 		/*
5518 		 * If outgoing request pending, change state
5519 		 * of server on discon ind event
5520 		 */
5521 
5522 		if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5523 		    IS_COTSORD(srv_tep) &&
5524 		    !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5525 			/*
5526 			 * Queue ordrel_ind for server to be picked up
5527 			 * when the connection is accepted.
5528 			 */
5529 			d_mp = tl_ordrel_ind_alloc();
5530 		} else {
5531 			/*
5532 			 * send discon_ind to server
5533 			 */
5534 			d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5535 		}
5536 		if (d_mp == NULL) {
5537 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5538 			    SL_TRACE|SL_ERROR,
5539 			    "tl_co_unconnect:outgoing:allocb failure"));
5540 			TL_UNCONNECT(tep->te_oconp);
5541 			goto discon_peer;
5542 		}
5543 
5544 		/*
5545 		 * If this is a socket the T_DISCON_IND is queued with
5546 		 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5547 		 * from the list of pending connections.
5548 		 * Note that when te_oconp is set the peer better have
5549 		 * a t_connind_t for the client.
5550 		 */
5551 		if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5552 			/*
5553 			 * Queue the disconnection message.
5554 			 */
5555 			tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5556 		} else {
5557 			tip = tl_icon_find(srv_tep, tep->te_seqno);
5558 			if (tip == NULL) {
5559 				freemsg(d_mp);
5560 			} else {
5561 				ASSERT(tep == tip->ti_tep);
5562 				ASSERT(tep->te_ser == srv_tep->te_ser);
5563 				/*
5564 				 * Delete tip from the server list.
5565 				 */
5566 				if (srv_tep->te_nicon == 1) {
5567 					srv_tep->te_state =
5568 					    NEXTSTATE(TE_DISCON_IND2,
5569 					    srv_tep->te_state);
5570 				} else {
5571 					srv_tep->te_state =
5572 					    NEXTSTATE(TE_DISCON_IND3,
5573 					    srv_tep->te_state);
5574 				}
5575 				ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5576 				    T_DISCON_IND);
5577 				putnext(srv_tep->te_rq, d_mp);
5578 				tl_freetip(srv_tep, tip);
5579 			}
5580 			TL_UNCONNECT(tep->te_oconp);
5581 			srv_tep = NULL;
5582 		}
5583 	} else if (peer_tep != NULL) {
5584 		/*
5585 		 * unconnect existing connection
5586 		 * If connected, change state of peer on
5587 		 * discon ind event and send discon ind pdu
5588 		 * to module above it
5589 		 */
5590 
5591 		ASSERT(tep->te_ser == peer_tep->te_ser);
5592 		if (IS_COTSORD(peer_tep) &&
5593 		    (peer_tep->te_state == TS_WIND_ORDREL ||
5594 		    peer_tep->te_state == TS_DATA_XFER)) {
5595 			/*
5596 			 * send ordrel ind
5597 			 */
5598 			(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5599 			"tl_co_unconnect:connected: ordrel_ind state %d->%d",
5600 			    peer_tep->te_state,
5601 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5602 			d_mp = tl_ordrel_ind_alloc();
5603 			if (! d_mp) {
5604 				(void) (STRLOG(TL_ID, tep->te_minor, 3,
5605 				    SL_TRACE|SL_ERROR,
5606 				    "tl_co_unconnect:connected:"
5607 				    "allocb failure"));
5608 				/*
5609 				 * Continue with cleaning up peer as
5610 				 * this side may go away with the close
5611 				 */
5612 				TL_QENABLE(peer_tep);
5613 				goto discon_peer;
5614 			}
5615 			peer_tep->te_state =
5616 			    NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5617 
5618 			putnext(peer_tep->te_rq, d_mp);
5619 			/*
5620 			 * Handle flow control case.  This will generate
5621 			 * a t_discon_ind message with reason 0 if there
5622 			 * is data queued on the write side.
5623 			 */
5624 			TL_QENABLE(peer_tep);
5625 		} else if (IS_COTSORD(peer_tep) &&
5626 		    peer_tep->te_state == TS_WREQ_ORDREL) {
5627 			/*
5628 			 * Sent an ordrel_ind. We send a discon with
5629 			 * with error 0 to inform that the peer is gone.
5630 			 */
5631 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5632 			    SL_TRACE|SL_ERROR,
5633 			    "tl_co_unconnect: discon in state %d",
5634 			    tep->te_state));
5635 			tl_discon_ind(peer_tep, 0);
5636 		} else {
5637 			(void) (STRLOG(TL_ID, tep->te_minor, 3,
5638 			    SL_TRACE|SL_ERROR,
5639 			    "tl_co_unconnect: state %d", tep->te_state));
5640 			tl_discon_ind(peer_tep, ECONNRESET);
5641 		}
5642 
5643 discon_peer:
5644 		/*
5645 		 * Disconnect cross-pointers only for close
5646 		 */
5647 		if (tep->te_closing) {
5648 			peer_tep = tep->te_conp;
5649 			TL_REMOVE_PEER(peer_tep->te_conp);
5650 			TL_REMOVE_PEER(tep->te_conp);
5651 		}
5652 	}
5653 }
5654 
5655 /*
5656  * Note: The following routine does not recover from allocb()
5657  * failures
5658  * The reason should be from the <sys/errno.h> space.
5659  */
5660 static void
5661 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5662 {
5663 	mblk_t *d_mp;
5664 
5665 	if (tep->te_closing)
5666 		return;
5667 
5668 	/*
5669 	 * flush the queues.
5670 	 */
5671 	flushq(tep->te_rq, FLUSHDATA);
5672 	(void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5673 
5674 	/*
5675 	 * send discon ind
5676 	 */
5677 	d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5678 	if (! d_mp) {
5679 		(void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5680 		    "tl_discon_ind:allocb failure"));
5681 		return;
5682 	}
5683 	tep->te_state = TS_IDLE;
5684 	putnext(tep->te_rq, d_mp);
5685 }
5686 
5687 /*
5688  * Note: The following routine does not recover from allocb()
5689  * failures
5690  * The reason should be from the <sys/errno.h> space.
5691  */
5692 static mblk_t *
5693 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5694 {
5695 	mblk_t *mp;
5696 	struct T_discon_ind *tdi;
5697 
5698 	if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5699 		DB_TYPE(mp) = M_PROTO;
5700 		mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5701 		tdi = (struct T_discon_ind *)mp->b_rptr;
5702 		tdi->PRIM_type = T_DISCON_IND;
5703 		tdi->DISCON_reason = reason;
5704 		tdi->SEQ_number = seqnum;
5705 	}
5706 	return (mp);
5707 }
5708 
5709 
5710 /*
5711  * Note: The following routine does not recover from allocb()
5712  * failures
5713  */
5714 static mblk_t *
5715 tl_ordrel_ind_alloc(void)
5716 {
5717 	mblk_t *mp;
5718 	struct T_ordrel_ind *toi;
5719 
5720 	if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5721 		DB_TYPE(mp) = M_PROTO;
5722 		mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5723 		toi = (struct T_ordrel_ind *)mp->b_rptr;
5724 		toi->PRIM_type = T_ORDREL_IND;
5725 	}
5726 	return (mp);
5727 }
5728 
5729 
5730 /*
5731  * Lookup the seqno in the list of queued connections.
5732  */
5733 static tl_icon_t *
5734 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5735 {
5736 	list_t *l = &tep->te_iconp;
5737 	tl_icon_t *tip = list_head(l);
5738 
5739 	ASSERT(seqno != 0);
5740 
5741 	for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5742 		;
5743 
5744 	return (tip);
5745 }
5746 
5747 /*
5748  * Queue data for a given T_CONN_IND while verifying that redundant
5749  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5750  * Used when the originator of the connection closes.
5751  */
5752 static void
5753 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5754 {
5755 	tl_icon_t		*tip;
5756 	mblk_t			**mpp, *mp;
5757 	int			prim, nprim;
5758 
5759 	if (nmp->b_datap->db_type == M_PROTO)
5760 		nprim = ((union T_primitives *)nmp->b_rptr)->type;
5761 	else
5762 		nprim = -1;	/* M_DATA */
5763 
5764 	tip = tl_icon_find(tep, seqno);
5765 	if (tip == NULL) {
5766 		freemsg(nmp);
5767 		return;
5768 	}
5769 
5770 	ASSERT(tip->ti_seqno != 0);
5771 	mpp = &tip->ti_mp;
5772 	while (*mpp != NULL) {
5773 		mp = *mpp;
5774 
5775 		if (mp->b_datap->db_type == M_PROTO)
5776 			prim = ((union T_primitives *)mp->b_rptr)->type;
5777 		else
5778 			prim = -1;	/* M_DATA */
5779 
5780 		/*
5781 		 * Allow nothing after a T_DISCON_IND
5782 		 */
5783 		if (prim == T_DISCON_IND) {
5784 			freemsg(nmp);
5785 			return;
5786 		}
5787 		/*
5788 		 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5789 		 */
5790 		if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5791 			freemsg(nmp);
5792 			return;
5793 		}
5794 		mpp = &(mp->b_next);
5795 	}
5796 	*mpp = nmp;
5797 }
5798 
5799 /*
5800  * Verify if a certain TPI primitive exists on the connind queue.
5801  * Use prim -1 for M_DATA.
5802  * Return non-zero if found.
5803  */
5804 static boolean_t
5805 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5806 {
5807 	tl_icon_t *tip = tl_icon_find(tep, seqno);
5808 	boolean_t found = B_FALSE;
5809 
5810 	if (tip != NULL) {
5811 		mblk_t *mp;
5812 		for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5813 			found = (DB_TYPE(mp) == M_PROTO &&
5814 			    ((union T_primitives *)mp->b_rptr)->type == prim);
5815 		}
5816 	}
5817 	return (found);
5818 }
5819 
5820 /*
5821  * Send the b_next mblk chain that has accumulated before the connection
5822  * was accepted. Perform the necessary state transitions.
5823  */
5824 static void
5825 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5826 {
5827 	mblk_t			*mp;
5828 	union T_primitives	*primp;
5829 
5830 	if (tep->te_closing) {
5831 		tl_icon_freemsgs(mpp);
5832 		return;
5833 	}
5834 
5835 	ASSERT(tep->te_state == TS_DATA_XFER);
5836 	ASSERT(tep->te_rq->q_first == NULL);
5837 
5838 	while ((mp = *mpp) != NULL) {
5839 		*mpp = mp->b_next;
5840 		mp->b_next = NULL;
5841 
5842 		ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5843 		switch (DB_TYPE(mp)) {
5844 		default:
5845 			freemsg(mp);
5846 			break;
5847 		case M_DATA:
5848 			putnext(tep->te_rq, mp);
5849 			break;
5850 		case M_PROTO:
5851 			primp = (union T_primitives *)mp->b_rptr;
5852 			switch (primp->type) {
5853 			case T_UNITDATA_IND:
5854 			case T_DATA_IND:
5855 			case T_OPTDATA_IND:
5856 			case T_EXDATA_IND:
5857 				putnext(tep->te_rq, mp);
5858 				break;
5859 			case T_ORDREL_IND:
5860 				tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5861 				    tep->te_state);
5862 				putnext(tep->te_rq, mp);
5863 				break;
5864 			case T_DISCON_IND:
5865 				tep->te_state = TS_IDLE;
5866 				putnext(tep->te_rq, mp);
5867 				break;
5868 			default:
5869 #ifdef DEBUG
5870 				cmn_err(CE_PANIC,
5871 				    "tl_icon_sendmsgs: unknown primitive");
5872 #endif /* DEBUG */
5873 				freemsg(mp);
5874 				break;
5875 			}
5876 			break;
5877 		}
5878 	}
5879 }
5880 
5881 /*
5882  * Free the b_next mblk chain that has accumulated before the connection
5883  * was accepted.
5884  */
5885 static void
5886 tl_icon_freemsgs(mblk_t **mpp)
5887 {
5888 	mblk_t *mp;
5889 
5890 	while ((mp = *mpp) != NULL) {
5891 		*mpp = mp->b_next;
5892 		mp->b_next = NULL;
5893 		freemsg(mp);
5894 	}
5895 }
5896 
5897 /*
5898  * Send M_ERROR
5899  * Note: assumes caller ensured enough space in mp or enough
5900  *	memory available. Does not attempt recovery from allocb()
5901  *	failures
5902  */
5903 
5904 static void
5905 tl_merror(queue_t *wq, mblk_t *mp, int error)
5906 {
5907 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5908 
5909 	if (tep->te_closing) {
5910 		freemsg(mp);
5911 		return;
5912 	}
5913 
5914 	(void) (STRLOG(TL_ID, tep->te_minor, 1,
5915 	    SL_TRACE|SL_ERROR,
5916 	    "tl_merror: tep=%p, err=%d", (void *)tep, error));
5917 
5918 	/*
5919 	 * flush all messages on queue. we are shutting
5920 	 * the stream down on fatal error
5921 	 */
5922 	flushq(wq, FLUSHALL);
5923 	if (IS_COTS(tep)) {
5924 		/* connection oriented - unconnect endpoints */
5925 		tl_co_unconnect(tep);
5926 	}
5927 	if (mp->b_cont) {
5928 		freemsg(mp->b_cont);
5929 		mp->b_cont = NULL;
5930 	}
5931 
5932 	if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5933 		freemsg(mp);
5934 		mp = allocb(1, BPRI_HI);
5935 		if (!mp) {
5936 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
5937 			    SL_TRACE|SL_ERROR,
5938 			    "tl_merror:M_PROTO: out of memory"));
5939 			return;
5940 		}
5941 	}
5942 	if (mp) {
5943 		DB_TYPE(mp) = M_ERROR;
5944 		mp->b_rptr = DB_BASE(mp);
5945 		*mp->b_rptr = (char)error;
5946 		mp->b_wptr = mp->b_rptr + sizeof (char);
5947 		qreply(wq, mp);
5948 	} else {
5949 		(void) putnextctl1(tep->te_rq, M_ERROR, error);
5950 	}
5951 }
5952 
5953 static void
5954 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
5955 {
5956 	ASSERT(cr != NULL);
5957 
5958 	if (flag & TL_SETCRED) {
5959 		struct opthdr *opt = (struct opthdr *)buf;
5960 		tl_credopt_t *tlcred;
5961 
5962 		opt->level = TL_PROT_LEVEL;
5963 		opt->name = TL_OPT_PEER_CRED;
5964 		opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
5965 
5966 		tlcred = (tl_credopt_t *)(opt + 1);
5967 		tlcred->tc_uid = crgetuid(cr);
5968 		tlcred->tc_gid = crgetgid(cr);
5969 		tlcred->tc_ruid = crgetruid(cr);
5970 		tlcred->tc_rgid = crgetrgid(cr);
5971 		tlcred->tc_suid = crgetsuid(cr);
5972 		tlcred->tc_sgid = crgetsgid(cr);
5973 		tlcred->tc_ngroups = crgetngroups(cr);
5974 	} else if (flag & TL_SETUCRED) {
5975 		struct opthdr *opt = (struct opthdr *)buf;
5976 
5977 		opt->level = TL_PROT_LEVEL;
5978 		opt->name = TL_OPT_PEER_UCRED;
5979 		opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
5980 
5981 		(void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
5982 	} else {
5983 		struct T_opthdr *topt = (struct T_opthdr *)buf;
5984 		ASSERT(flag & TL_SOCKUCRED);
5985 
5986 		topt->level = SOL_SOCKET;
5987 		topt->name = SCM_UCRED;
5988 		topt->len = ucredminsize(cr) + sizeof (*topt);
5989 		topt->status = 0;
5990 		(void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
5991 	}
5992 }
5993 
5994 /* ARGSUSED */
5995 static int
5996 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
5997 {
5998 	/* no default value processed in protocol specific code currently */
5999 	return (-1);
6000 }
6001 
6002 /* ARGSUSED */
6003 static int
6004 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6005 {
6006 	int len;
6007 	tl_endpt_t *tep;
6008 	int *valp;
6009 
6010 	tep = (tl_endpt_t *)wq->q_ptr;
6011 
6012 	len = 0;
6013 
6014 	/*
6015 	 * Assumes: option level and name sanity check done elsewhere
6016 	 */
6017 
6018 	switch (level) {
6019 	case SOL_SOCKET:
6020 		if (! IS_SOCKET(tep))
6021 			break;
6022 		switch (name) {
6023 		case SO_RECVUCRED:
6024 			len = sizeof (int);
6025 			valp = (int *)ptr;
6026 			*valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6027 			break;
6028 		default:
6029 			break;
6030 		}
6031 		break;
6032 	case TL_PROT_LEVEL:
6033 		switch (name) {
6034 		case TL_OPT_PEER_CRED:
6035 		case TL_OPT_PEER_UCRED:
6036 			/*
6037 			 * option not supposed to retrieved directly
6038 			 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6039 			 * when some internal flags set by other options
6040 			 * Direct retrieval always designed to fail(ignored)
6041 			 * for this option.
6042 			 */
6043 			break;
6044 		}
6045 	}
6046 	return (len);
6047 }
6048 
6049 /* ARGSUSED */
6050 static int
6051 tl_set_opt(
6052 	queue_t		*wq,
6053 	uint_t		mgmt_flags,
6054 	int		level,
6055 	int		name,
6056 	uint_t		inlen,
6057 	uchar_t		*invalp,
6058 	uint_t		*outlenp,
6059 	uchar_t		*outvalp,
6060 	void		*thisdg_attrs,
6061 	cred_t		*cr)
6062 {
6063 	int error;
6064 	tl_endpt_t *tep;
6065 
6066 	tep = (tl_endpt_t *)wq->q_ptr;
6067 
6068 	error = 0;		/* NOERROR */
6069 
6070 	/*
6071 	 * Assumes: option level and name sanity checks done elsewhere
6072 	 */
6073 
6074 	switch (level) {
6075 	case SOL_SOCKET:
6076 		if (! IS_SOCKET(tep)) {
6077 			error = EINVAL;
6078 			break;
6079 		}
6080 		/*
6081 		 * TBD: fill in other AF_UNIX socket options and then stop
6082 		 * returning error.
6083 		 */
6084 		switch (name) {
6085 		case SO_RECVUCRED:
6086 			/*
6087 			 * We only support this for datagram sockets;
6088 			 * getpeerucred handles the connection oriented
6089 			 * transports.
6090 			 */
6091 			if (! IS_CLTS(tep)) {
6092 				error = EINVAL;
6093 				break;
6094 			}
6095 			if (*(int *)invalp == 0)
6096 				tep->te_flag &= ~TL_SOCKUCRED;
6097 			else
6098 				tep->te_flag |= TL_SOCKUCRED;
6099 			break;
6100 		default:
6101 			error = EINVAL;
6102 			break;
6103 		}
6104 		break;
6105 	case TL_PROT_LEVEL:
6106 		switch (name) {
6107 		case TL_OPT_PEER_CRED:
6108 		case TL_OPT_PEER_UCRED:
6109 			/*
6110 			 * option not supposed to be set directly
6111 			 * Its value in initialized for each endpoint at
6112 			 * driver open time.
6113 			 * Direct setting always designed to fail for this
6114 			 * option.
6115 			 */
6116 			(void) (STRLOG(TL_ID, tep->te_minor, 1,
6117 			    SL_TRACE|SL_ERROR,
6118 			    "tl_set_opt: option is not supported"));
6119 			error = EPROTO;
6120 			break;
6121 		}
6122 	}
6123 	return (error);
6124 }
6125 
6126 
6127 static void
6128 tl_timer(void *arg)
6129 {
6130 	queue_t *wq = arg;
6131 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6132 
6133 	ASSERT(tep);
6134 
6135 	tep->te_timoutid = 0;
6136 
6137 	enableok(wq);
6138 	/*
6139 	 * Note: can call wsrv directly here and save context switch
6140 	 * Consider change when qtimeout (not timeout) is active
6141 	 */
6142 	qenable(wq);
6143 }
6144 
6145 static void
6146 tl_buffer(void *arg)
6147 {
6148 	queue_t *wq = arg;
6149 	tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6150 
6151 	ASSERT(tep);
6152 
6153 	tep->te_bufcid = 0;
6154 	tep->te_nowsrv = B_FALSE;
6155 
6156 	enableok(wq);
6157 	/*
6158 	 *  Note: can call wsrv directly here and save context switch
6159 	 * Consider change when qbufcall (not bufcall) is active
6160 	 */
6161 	qenable(wq);
6162 }
6163 
6164 static void
6165 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6166 {
6167 	tl_endpt_t *tep;
6168 
6169 	tep = (tl_endpt_t *)wq->q_ptr;
6170 
6171 	if (tep->te_closing) {
6172 		freemsg(mp);
6173 		return;
6174 	}
6175 	noenable(wq);
6176 
6177 	(void) insq(wq, wq->q_first, mp);
6178 
6179 	if (tep->te_bufcid || tep->te_timoutid) {
6180 		(void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6181 		    "tl_memrecover:recover %p pending", (void *)wq));
6182 		return;
6183 	}
6184 
6185 	if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6186 		tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6187 		    drv_usectohz(TL_BUFWAIT));
6188 	}
6189 }
6190 
6191 static void
6192 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6193 {
6194 	ASSERT(tip->ti_seqno != 0);
6195 
6196 	if (tip->ti_mp != NULL) {
6197 		tl_icon_freemsgs(&tip->ti_mp);
6198 		tip->ti_mp = NULL;
6199 	}
6200 	if (tip->ti_tep != NULL) {
6201 		tl_refrele(tip->ti_tep);
6202 		tip->ti_tep = NULL;
6203 	}
6204 	list_remove(&tep->te_iconp, tip);
6205 	kmem_free(tip, sizeof (tl_icon_t));
6206 	tep->te_nicon--;
6207 }
6208 
6209 /*
6210  * Remove address from address hash.
6211  */
6212 static void
6213 tl_addr_unbind(tl_endpt_t *tep)
6214 {
6215 	tl_endpt_t *elp;
6216 
6217 	if (tep->te_flag & TL_ADDRHASHED) {
6218 		if (IS_SOCKET(tep)) {
6219 			(void) mod_hash_remove(tep->te_addrhash,
6220 			    (mod_hash_key_t)tep->te_vp,
6221 			    (mod_hash_val_t *)&elp);
6222 			tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6223 			tep->te_magic = SOU_MAGIC_IMPLICIT;
6224 		} else {
6225 			(void) mod_hash_remove(tep->te_addrhash,
6226 			    (mod_hash_key_t)&tep->te_ap,
6227 			    (mod_hash_val_t *)&elp);
6228 			(void) kmem_free(tep->te_abuf, tep->te_alen);
6229 			tep->te_alen = -1;
6230 			tep->te_abuf = NULL;
6231 		}
6232 		tep->te_flag &= ~TL_ADDRHASHED;
6233 	}
6234 }
6235