1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2018, Joyent, Inc.
28 */
29
30 #include <sys/types.h>
31 #include <sys/conf.h>
32 #include <sys/modctl.h>
33 #include <sys/stat.h>
34 #include <sys/stream.h>
35 #include <sys/strsun.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/priv_names.h>
39 #include <inet/common.h>
40
41 #define _SUN_TPI_VERSION 2
42 #include <sys/tihdr.h>
43 #include <sys/timod.h>
44 #include <sys/tiuser.h>
45 #include <sys/suntpi.h>
46 #include <inet/common.h>
47 #include <inet/ip.h>
48 #include <inet/mi.h>
49 #include <inet/proto_set.h>
50 #include <sys/ib/clients/rds/rds.h>
51 #include <sys/policy.h>
52 #include <inet/ipclassifier.h>
53 #include <sys/ib/clients/rds/rds_kstat.h>
54 #include "sys/random.h"
55 #include <sys/ib/clients/rds/rds_transport.h>
56 #include <sys/ib/ibtl/ibti.h>
57
58
59 #define RDS_NAME "rds"
60 #define RDS_STRTAB rdsinfo
61 #define RDS_DEVDESC "RDS STREAMS driver"
62 #define RDS_DEVMINOR 0
63 #define RDS_DEVMTFLAGS D_MP | D_SYNCSTR
64 #define RDS_DEFAULT_PRIV_MODE 0666
65
66 #define rds_smallest_port 1
67 #define rds_largest_port 65535
68
69 #define RDS_RECV_HIWATER (56 * 1024)
70 #define RDS_RECV_LOWATER 128
71 #define RDS_XMIT_HIWATER (56 * 1024)
72 #define RDS_XMIT_LOWATER 1024
73
74 #define RDS_DPRINTF2 0 &&
75 #define LABEL "RDS"
76
77 typedef struct rdsahdr_s {
78 in_port_t uha_src_port; /* Source port */
79 in_port_t uha_dst_port; /* Destination port */
80 } rdsha_t;
81
82 #define RDSH_SIZE 4
83
84 int rds_recv_hiwat = RDS_RECV_HIWATER;
85 int rds_recv_lowat = RDS_RECV_LOWATER;
86 int rds_xmit_hiwat = RDS_XMIT_HIWATER;
87 int rds_xmit_lowat = RDS_XMIT_LOWATER;
88
89 int rdsdebug;
90
91 static dev_info_t *rds_dev_info;
92
93 /* Hint not protected by any lock */
94 static in_port_t rds_next_port_to_try;
95
96 ldi_ident_t rds_li;
97 static int loopmax = rds_largest_port - rds_smallest_port + 1;
98
99 /* global configuration variables */
100 uint_t UserBufferSize;
101 uint_t rds_rx_pkts_pending_hwm;
102
103 extern void rds_ioctl(queue_t *, mblk_t *);
104 extern void rds_ioctl_copyin_done(queue_t *q, mblk_t *mp);
105
106 int rds_open_transport_driver();
107 int rds_close_transport_driver();
108
109 #define RDS_CURRENT_PORT_QUOTA() \
110 (rds_rx_pkts_pending_hwm/RDS_GET_NPORT())
111
112 krwlock_t rds_transport_lock;
113 ldi_handle_t rds_transport_handle = NULL;
114 rds_transport_ops_t *rds_transport_ops = NULL;
115
116 static int
rds_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)117 rds_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
118 {
119 int ret;
120
121 if (cmd != DDI_ATTACH)
122 return (DDI_FAILURE);
123
124 rds_dev_info = devi;
125
126 ret = ddi_create_minor_node(devi, RDS_NAME, S_IFCHR,
127 RDS_DEVMINOR, DDI_PSEUDO, 0);
128 if (ret != DDI_SUCCESS) {
129 return (ret);
130 }
131
132 return (DDI_SUCCESS);
133 }
134
135 static int
rds_detach(dev_info_t * devi,ddi_detach_cmd_t cmd)136 rds_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
137 {
138 if (cmd != DDI_DETACH)
139 return (DDI_FAILURE);
140
141 ASSERT(devi == rds_dev_info);
142
143 ddi_remove_minor_node(devi, NULL);
144
145 return (DDI_SUCCESS);
146 }
147
148 /* ARGSUSED */
149 static int
rds_info(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)150 rds_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
151 {
152 int error = DDI_FAILURE;
153
154 switch (cmd) {
155 case DDI_INFO_DEVT2DEVINFO:
156 if (rds_dev_info != NULL) {
157 *result = (void *)rds_dev_info;
158 error = DDI_SUCCESS;
159 }
160 break;
161
162 case DDI_INFO_DEVT2INSTANCE:
163 *result = NULL;
164 error = DDI_SUCCESS;
165 break;
166
167 default:
168 break;
169 }
170
171 return (error);
172 }
173
174
175 /*ARGSUSED*/
176 static int
rds_open(queue_t * q,dev_t * devp,int flag,int sflag,cred_t * credp)177 rds_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
178 {
179 rds_t *rds;
180 int ret;
181
182 if (is_system_labeled()) {
183 /*
184 * RDS socket is not supported on labeled systems
185 */
186 return (ESOCKTNOSUPPORT);
187 }
188
189 /* Open the transport driver if IB HW is present */
190 rw_enter(&rds_transport_lock, RW_READER);
191 if (rds_transport_handle == NULL) {
192 rw_exit(&rds_transport_lock);
193 ret = rds_open_transport_driver();
194 rw_enter(&rds_transport_lock, RW_READER);
195
196 if (ret != 0) {
197 /* Transport driver failed to load */
198 rw_exit(&rds_transport_lock);
199 return (ret);
200 }
201 }
202 rw_exit(&rds_transport_lock);
203
204 if (sflag == MODOPEN) {
205 return (EINVAL);
206 }
207
208 /* Reopen not supported */
209 if (q->q_ptr != NULL) {
210 dprint(2, ("%s: Reopen is not supported: %p", LABEL, q->q_ptr));
211 return (0);
212 }
213
214 rds = rds_create(q, credp);
215 if (rds == NULL) {
216 dprint(2, ("%s: rds_create failed", LABEL));
217 return (0);
218 }
219
220 q->q_ptr = WR(q)->q_ptr = rds;
221 rds->rds_state = TS_UNBND;
222 rds->rds_family = AF_INET_OFFLOAD;
223
224 q->q_hiwat = rds_recv_hiwat;
225 q->q_lowat = rds_recv_lowat;
226
227 qprocson(q);
228
229 WR(q)->q_hiwat = rds_xmit_hiwat;
230 WR(q)->q_lowat = rds_xmit_lowat;
231
232 /* Set the Stream head watermarks */
233 (void) proto_set_rx_hiwat(q, NULL, rds_recv_hiwat);
234 (void) proto_set_rx_lowat(q, NULL, rds_recv_lowat);
235
236 return (0);
237 }
238
239 /* ARGSUSED */
240 static int
rds_close(queue_t * q,int flags __unused,cred_t * credp __unused)241 rds_close(queue_t *q, int flags __unused, cred_t *credp __unused)
242 {
243 rds_t *rdsp = (rds_t *)q->q_ptr;
244
245 qprocsoff(q);
246
247 /*
248 * NPORT should be decremented only if this socket was previously
249 * bound to an RDS port.
250 */
251 if (rdsp->rds_state >= TS_IDLE) {
252 RDS_DECR_NPORT();
253 RDS_SET_PORT_QUOTA(RDS_CURRENT_PORT_QUOTA());
254 rds_transport_ops->
255 rds_transport_resume_port(ntohs(rdsp->rds_port));
256 }
257
258 /* close the transport driver if this is the last socket */
259 if (RDS_GET_NPORT() == 1) {
260 (void) rds_close_transport_driver();
261 }
262
263 /*
264 * We set the flags without holding a lock as this is
265 * just a hint for the fanout lookup to skip this rds.
266 * We dont free the struct until it's out of the hash and
267 * the ref count goes down.
268 */
269 rdsp->rds_flags |= RDS_CLOSING;
270 rds_bind_hash_remove(rdsp, B_FALSE);
271 mutex_enter(&rdsp->rds_lock);
272 ASSERT(rdsp->rds_refcnt > 0);
273 if (rdsp->rds_refcnt != 1) {
274 cv_wait(&rdsp->rds_refcv, &rdsp->rds_lock);
275 }
276 mutex_exit(&rdsp->rds_lock);
277 RDS_DEC_REF_CNT(rdsp);
278 RD(q)->q_ptr = NULL;
279 WR(q)->q_ptr = NULL;
280 return (0);
281 }
282
283 /*
284 * Add a new message to the socket
285 */
286 int
rds_deliver_new_msg(mblk_t * mp,ipaddr_t local_addr,ipaddr_t rem_addr,in_port_t local_port,in_port_t rem_port,zoneid_t zoneid)287 rds_deliver_new_msg(mblk_t *mp, ipaddr_t local_addr, ipaddr_t rem_addr,
288 in_port_t local_port, in_port_t rem_port, zoneid_t zoneid)
289 {
290 rds_t *rds;
291 struct T_unitdata_ind *tudi;
292 int udi_size; /* Size of T_unitdata_ind */
293 mblk_t *mp1;
294 sin_t *sin;
295 int error = 0;
296
297 local_port = htons(local_port);
298 rem_port = htons(rem_port);
299
300 ASSERT(mp->b_datap->db_type == M_DATA);
301 rds = rds_fanout(local_addr, rem_addr, local_port, rem_port, zoneid);
302 if (rds == NULL) {
303 dprint(2, ("%s: rds_fanout failed: (0x%x 0x%x %d %d)", LABEL,
304 local_addr, rem_addr, ntohs(local_port), ntohs(rem_port)));
305 freemsg(mp);
306 return (error);
307 }
308
309 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
310
311 /* Allocate a message block for the T_UNITDATA_IND structure. */
312 mp1 = allocb(udi_size, BPRI_MED);
313 if (mp1 == NULL) {
314 dprint(2, ("%s: allocb failed", LABEL));
315 freemsg(mp);
316 return (ENOMEM);
317 }
318
319 mp1->b_cont = mp;
320 mp = mp1;
321 mp->b_datap->db_type = M_PROTO;
322 tudi = (struct T_unitdata_ind *)(uintptr_t)mp->b_rptr;
323 mp->b_wptr = (uchar_t *)tudi + udi_size;
324 tudi->PRIM_type = T_UNITDATA_IND;
325 tudi->SRC_length = sizeof (sin_t);
326 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
327 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
328 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
329 tudi->OPT_length = udi_size;
330 sin = (sin_t *)&tudi[1];
331 sin->sin_addr.s_addr = rem_addr;
332 sin->sin_port = ntohs(rem_port);
333 sin->sin_family = rds->rds_family;
334 *(uint32_t *)(uintptr_t)&sin->sin_zero[0] = 0;
335 *(uint32_t *)(uintptr_t)&sin->sin_zero[4] = 0;
336
337 putnext(rds->rds_ulpd, mp);
338
339 /* check port quota */
340 if (RDS_GET_RXPKTS_PEND() > rds_rx_pkts_pending_hwm) {
341 ulong_t current_port_quota = RDS_GET_PORT_QUOTA();
342 if (rds->rds_port_quota > current_port_quota) {
343 /* this may result in stalling the port */
344 rds->rds_port_quota = current_port_quota;
345 (void) proto_set_rx_hiwat(rds->rds_ulpd, NULL,
346 rds->rds_port_quota * UserBufferSize);
347 RDS_INCR_PORT_QUOTA_ADJUSTED();
348 }
349 }
350
351 /*
352 * canputnext() check is done after putnext as the protocol does
353 * not allow dropping any received packet.
354 */
355 if (!canputnext(rds->rds_ulpd)) {
356 error = ENOSPC;
357 }
358
359 RDS_DEC_REF_CNT(rds);
360 return (error);
361 }
362
363
364 /* Default structure copied into T_INFO_ACK messages */
365 static struct T_info_ack rds_g_t_info_ack_ipv4 = {
366 T_INFO_ACK,
367 65535, /* TSDU_size. Excl. headers */
368 T_INVALID, /* ETSU_size. rds does not support expedited data. */
369 T_INVALID, /* CDATA_size. rds does not support connect data. */
370 T_INVALID, /* DDATA_size. rds does not support disconnect data. */
371 sizeof (sin_t), /* ADDR_size. */
372 0, /* OPT_size - not initialized here */
373 65535, /* TIDU_size. Excl. headers */
374 T_CLTS, /* SERV_type. rds supports connection-less. */
375 TS_UNBND, /* CURRENT_state. This is set from rds_state. */
376 (XPG4_1|SENDZERO) /* PROVIDER_flag */
377 };
378
379 static in_port_t
rds_update_next_port(in_port_t port)380 rds_update_next_port(in_port_t port)
381 {
382 (void) random_get_pseudo_bytes((uint8_t *)&port, sizeof (in_port_t));
383 if (port < rds_smallest_port)
384 port = rds_smallest_port;
385 return (port);
386 }
387
388 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
389 static void
rds_err_ack(queue_t * q,mblk_t * mp,t_scalar_t t_error,int sys_error)390 rds_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
391 {
392 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
393 qreply(q, mp);
394 }
395
396 static void
rds_capability_req(queue_t * q,mblk_t * mp)397 rds_capability_req(queue_t *q, mblk_t *mp)
398 {
399 t_uscalar_t cap_bits1;
400 struct T_capability_ack *tcap;
401
402 cap_bits1 =
403 ((struct T_capability_req *)(uintptr_t)mp->b_rptr)->CAP_bits1;
404
405 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
406 mp->b_datap->db_type, T_CAPABILITY_ACK);
407 if (mp == NULL)
408 return;
409 tcap = (struct T_capability_ack *)(uintptr_t)mp->b_rptr;
410 tcap->CAP_bits1 = 0;
411
412 if (cap_bits1 & TC1_INFO) {
413 tcap->CAP_bits1 |= TC1_INFO;
414 *(&tcap->INFO_ack) = rds_g_t_info_ack_ipv4;
415 }
416
417 qreply(q, mp);
418 }
419
420 static void
rds_info_req(queue_t * q,mblk_t * omp)421 rds_info_req(queue_t *q, mblk_t *omp)
422 {
423 rds_t *rds = (rds_t *)q->q_ptr;
424 struct T_info_ack *tap;
425 mblk_t *mp;
426
427 /* Create a T_INFO_ACK message. */
428 mp = tpi_ack_alloc(omp, sizeof (struct T_info_ack), M_PCPROTO,
429 T_INFO_ACK);
430 if (mp == NULL)
431 return;
432 tap = (struct T_info_ack *)(uintptr_t)mp->b_rptr;
433 *tap = rds_g_t_info_ack_ipv4;
434 tap->CURRENT_state = rds->rds_state;
435 tap->OPT_size = 128;
436 qreply(q, mp);
437 }
438
439 /*
440 * NO locking protection here as sockfs will only send down
441 * one bind operation at a time.
442 */
443 static void
rds_bind(queue_t * q,mblk_t * mp)444 rds_bind(queue_t *q, mblk_t *mp)
445 {
446 sin_t *sin;
447 rds_t *rds;
448 struct T_bind_req *tbr;
449 in_port_t port; /* Host byte order */
450 in_port_t requested_port; /* Host byte order */
451 struct T_bind_ack *tba;
452 int count;
453 rds_bf_t *rdsbf;
454 in_port_t lport; /* Network byte order */
455
456 rds = (rds_t *)q->q_ptr;
457 if (((uintptr_t)mp->b_wptr - (uintptr_t)mp->b_rptr) < sizeof (*tbr)) {
458 rds_err_ack(q, mp, TPROTO, 0);
459 return;
460 }
461
462 /*
463 * We don't allow multiple binds
464 */
465 if (rds->rds_state != TS_UNBND) {
466 rds_err_ack(q, mp, TOUTSTATE, 0);
467 return;
468 }
469
470 tbr = (struct T_bind_req *)(uintptr_t)mp->b_rptr;
471 switch (tbr->ADDR_length) {
472 case sizeof (sin_t): /* Complete IPv4 address */
473 sin = (sin_t *)(uintptr_t)mi_offset_param(mp, tbr->ADDR_offset,
474 sizeof (sin_t));
475 if (sin == NULL || !OK_32PTR((char *)sin)) {
476 rds_err_ack(q, mp, TSYSERR, EINVAL);
477 return;
478 }
479 if (rds->rds_family != AF_INET_OFFLOAD ||
480 sin->sin_family != AF_INET_OFFLOAD) {
481 rds_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
482 return;
483 }
484 if (sin->sin_addr.s_addr == INADDR_ANY) {
485 rds_err_ack(q, mp, TBADADDR, 0);
486 return;
487 }
488
489 /*
490 * verify that the address is hosted on IB
491 * only exception is the loopback address.
492 */
493 if ((sin->sin_addr.s_addr != INADDR_LOOPBACK) &&
494 !rds_verify_bind_address(sin->sin_addr.s_addr)) {
495 rds_err_ack(q, mp, TBADADDR, 0);
496 return;
497 }
498
499 port = ntohs(sin->sin_port);
500 break;
501 default: /* Invalid request */
502 rds_err_ack(q, mp, TBADADDR, 0);
503 return;
504 }
505
506 requested_port = port;
507
508 /*
509 * TPI only sends down T_BIND_REQ for AF_INET and AF_INET6
510 * since RDS socket is of type AF_INET_OFFLOAD a O_T_BIND_REQ
511 * will be sent down. Treat O_T_BIND_REQ as T_BIND_REQ
512 */
513
514 if (requested_port == 0) {
515 /*
516 * If the application passed in zero for the port number, it
517 * doesn't care which port number we bind to. Get one in the
518 * valid range.
519 */
520 port = rds_update_next_port(rds_next_port_to_try);
521 }
522
523 ASSERT(port != 0);
524 count = 0;
525 for (;;) {
526 rds_t *rds1;
527 ASSERT(sin->sin_addr.s_addr != INADDR_ANY);
528 /*
529 * Walk through the list of rds streams bound to
530 * requested port with the same IP address.
531 */
532 lport = htons(port);
533 rdsbf = &rds_bind_fanout[RDS_BIND_HASH(lport)];
534 mutex_enter(&rdsbf->rds_bf_lock);
535 for (rds1 = rdsbf->rds_bf_rds; rds1 != NULL;
536 rds1 = rds1->rds_bind_hash) {
537 if (lport != rds1->rds_port ||
538 rds1->rds_src != sin->sin_addr.s_addr ||
539 rds1->rds_zoneid != rds->rds_zoneid)
540
541 continue;
542 break;
543 }
544
545 if (rds1 == NULL) {
546 /*
547 * No other stream has this IP address
548 * and port number. We can use it.
549 */
550 break;
551 }
552 mutex_exit(&rdsbf->rds_bf_lock);
553 if (requested_port != 0) {
554 /*
555 * We get here only when requested port
556 * is bound (and only first of the for()
557 * loop iteration).
558 *
559 * The semantics of this bind request
560 * require it to fail so we return from
561 * the routine (and exit the loop).
562 *
563 */
564 rds_err_ack(q, mp, TADDRBUSY, 0);
565 return;
566 }
567
568 port = rds_update_next_port(port + 1);
569
570 if (++count >= loopmax) {
571 /*
572 * We've tried every possible port number and
573 * there are none available, so send an error
574 * to the user.
575 */
576 rds_err_ack(q, mp, TNOADDR, 0);
577 return;
578 }
579 }
580
581 /*
582 * Copy the source address into our rds structure.
583 */
584 rds->rds_src = sin->sin_addr.s_addr;
585 rds->rds_port = lport;
586
587 /*
588 * reset the next port if we choose the port
589 */
590 if (requested_port == 0) {
591 rds_next_port_to_try = port + 1;
592 }
593
594 rds->rds_state = TS_IDLE;
595 rds_bind_hash_insert(rdsbf, rds);
596 mutex_exit(&rdsbf->rds_bf_lock);
597
598 /* Reset the message type in preparation for shipping it back. */
599 mp->b_datap->db_type = M_PCPROTO;
600 tba = (struct T_bind_ack *)(uintptr_t)mp->b_rptr;
601 tba->PRIM_type = T_BIND_ACK;
602
603 /* Increment the number of ports and set the port quota */
604 RDS_INCR_NPORT();
605 rds->rds_port_quota = RDS_CURRENT_PORT_QUOTA();
606 RDS_SET_PORT_QUOTA(rds->rds_port_quota);
607 (void) proto_set_rx_hiwat(RD(q), NULL,
608 rds->rds_port_quota * UserBufferSize);
609
610 qreply(q, mp);
611 }
612
613 static void
rds_wput_other(queue_t * q,mblk_t * mp)614 rds_wput_other(queue_t *q, mblk_t *mp)
615 {
616 uchar_t *rptr = mp->b_rptr;
617 struct datab *db;
618 cred_t *cr;
619
620 db = mp->b_datap;
621 switch (db->db_type) {
622 case M_DATA:
623 /* Not connected */
624 freemsg(mp);
625 return;
626 case M_PROTO:
627 case M_PCPROTO:
628 if ((uintptr_t)mp->b_wptr - (uintptr_t)rptr <
629 sizeof (t_scalar_t)) {
630 freemsg(mp);
631 return;
632 }
633 switch (((union T_primitives *)(uintptr_t)rptr)->type) {
634 case T_CAPABILITY_REQ:
635 rds_capability_req(q, mp);
636 return;
637
638 case T_INFO_REQ:
639 rds_info_req(q, mp);
640 return;
641 case O_T_BIND_REQ:
642 case T_BIND_REQ:
643 rds_bind(q, mp);
644 return;
645 case T_SVR4_OPTMGMT_REQ:
646 case T_OPTMGMT_REQ:
647 /*
648 * All Solaris components should pass a db_credp
649 * for this TPI message, hence we ASSERT.
650 * But in case there is some other M_PROTO that looks
651 * like a TPI message sent by some other kernel
652 * component, we check and return an error.
653 */
654 cr = msg_getcred(mp, NULL);
655 ASSERT(cr != NULL);
656 if (cr == NULL) {
657 rds_err_ack(q, mp, TSYSERR, EINVAL);
658 return;
659 }
660 if (((union T_primitives *)(uintptr_t)rptr)->type ==
661 T_SVR4_OPTMGMT_REQ) {
662 svr4_optcom_req(q, mp, cr, &rds_opt_obj);
663 } else {
664 tpi_optcom_req(q, mp, cr, &rds_opt_obj);
665 }
666 return;
667 case T_CONN_REQ:
668 /*
669 * We should not receive T_CONN_REQ as sockfs only
670 * sends down T_CONN_REQ if family == AF_INET/AF_INET6
671 * and type == SOCK_DGRAM/SOCK_RAW. For all others
672 * it simply calls soisconnected. see sotpi_connect()
673 * for details.
674 */
675 /* FALLTHRU */
676 default:
677 cmn_err(CE_PANIC, "type %d \n",
678 ((union T_primitives *)(uintptr_t)rptr)->type);
679 }
680 break;
681 case M_FLUSH:
682 if (*rptr & FLUSHW)
683 flushq(q, FLUSHDATA);
684 break;
685 case M_IOCTL:
686 rds_ioctl(q, mp);
687 break;
688 case M_IOCDATA:
689 /* IOCTL continuation following copyin or copyout. */
690 if (mi_copy_state(q, mp, NULL) == -1) {
691 /*
692 * The copy operation failed. mi_copy_state already
693 * cleaned up, so we're out of here.
694 */
695 return;
696 }
697 /*
698 * If we just completed a copy in, continue processing
699 * in rds_ioctl_copyin_done. If it was a copy out, we call
700 * mi_copyout again. If there is nothing more to copy out,
701 * it will complete the IOCTL.
702 */
703
704 if (MI_COPY_DIRECTION(mp) == MI_COPY_IN)
705 rds_ioctl_copyin_done(q, mp);
706 else
707 mi_copyout(q, mp);
708 return;
709
710 default:
711 cmn_err(CE_PANIC, "types %d \n", db->db_type);
712 }
713 }
714
715 static int
rds_wput(queue_t * q,mblk_t * mp)716 rds_wput(queue_t *q, mblk_t *mp)
717 {
718 struct datab *db;
719 uchar_t *rptr = mp->b_rptr;
720
721 db = mp->b_datap;
722 switch (db->db_type) {
723 case M_PROTO:
724 case M_PCPROTO:
725 ASSERT(((uintptr_t)mp->b_wptr - (uintptr_t)rptr) <=
726 (uintptr_t)INT_MAX);
727 if ((uintptr_t)mp->b_wptr - (uintptr_t)rptr >=
728 sizeof (struct T_unitdata_req)) {
729 if (((union T_primitives *)(uintptr_t)rptr)->type
730 == T_UNITDATA_REQ) {
731 /*
732 * We should never come here for T_UNITDATA_REQ
733 */
734 cmn_err(CE_PANIC, "rds_wput T_UNITDATA_REQ \n");
735 }
736 }
737 /* FALLTHRU */
738 default:
739 rds_wput_other(q, mp);
740 return (0);
741 }
742 }
743
744 static int
rds_wput_data(queue_t * q,mblk_t * mp,uio_t * uiop)745 rds_wput_data(queue_t *q, mblk_t *mp, uio_t *uiop)
746 {
747 uchar_t *rptr = mp->b_rptr;
748 rds_t *rds;
749 mblk_t *mp1;
750 sin_t *sin;
751 ipaddr_t dst;
752 uint16_t port;
753 int ret = 0;
754
755 #define tudr ((struct T_unitdata_req *)(uintptr_t)rptr)
756
757 rds = (rds_t *)q->q_ptr;
758 /* Handle UNITDATA_REQ messages here */
759 if (rds->rds_state == TS_UNBND) {
760 /* If a port has not been bound to the stream, fail. */
761 dprint(2, ("%s: socket is not bound to a port", LABEL));
762 freemsg(mp);
763 return (EPROTO);
764 }
765
766 mp1 = mp->b_cont;
767 mp->b_cont = NULL;
768 if (mp1 == NULL) {
769 dprint(2, ("%s: No message to send", LABEL));
770 freemsg(mp);
771 return (EPROTO);
772 }
773
774 /*
775 * No options allowed
776 */
777 if (tudr->OPT_length != 0) {
778 ret = EINVAL;
779 goto done;
780 }
781
782 ASSERT(mp1->b_datap->db_ref == 1);
783
784 if ((rptr + tudr->DEST_offset + tudr->DEST_length) >
785 mp->b_wptr) {
786 ret = EDESTADDRREQ;
787 goto done;
788 }
789
790 sin = (sin_t *)(uintptr_t)&rptr[tudr->DEST_offset];
791 if (!OK_32PTR((char *)sin) || tudr->DEST_length !=
792 sizeof (sin_t) || sin->sin_family != AF_INET_OFFLOAD) {
793 ret = EDESTADDRREQ;
794 goto done;
795 }
796 /* Extract port and ipaddr */
797 port = sin->sin_port;
798 dst = sin->sin_addr.s_addr;
799
800 if (port == 0 || dst == INADDR_ANY) {
801 ret = EDESTADDRREQ;
802 goto done;
803 }
804
805 ASSERT(rds_transport_ops != NULL);
806 ret = rds_transport_ops->rds_transport_sendmsg(uiop, rds->rds_src, dst,
807 ntohs(rds->rds_port), ntohs(port), rds->rds_zoneid);
808 if (ret != 0) {
809 if ((ret != ENOBUFS) && (ret != ENOMEM)) {
810 /* ENOMEM is actually EWOULDBLOCK */
811 dprint(2, ("%s: rds_sendmsg returned %d", LABEL, ret));
812 goto done;
813 }
814 }
815 done:
816 freemsg(mp1);
817 freemsg(mp);
818 return (ret);
819 }
820
821 /*
822 * Make sure we dont return EINVAL and EWOULDBLOCK as it has
823 * special meanings for the synchronous streams (rwnext()).
824 * We should return ENOMEM which is changed to EWOULDBLOCK by kstrputmsg()
825 */
826 static int
rds_wrw(queue_t * q,struiod_t * dp)827 rds_wrw(queue_t *q, struiod_t *dp)
828 {
829 mblk_t *mp = dp->d_mp;
830 int error = 0;
831 struct datab *db;
832 uchar_t *rptr;
833
834 db = mp->b_datap;
835 rptr = mp->b_rptr;
836 switch (db->db_type) {
837 case M_PROTO:
838 case M_PCPROTO:
839 ASSERT(((uintptr_t)mp->b_wptr - (uintptr_t)rptr) <=
840 (uintptr_t)INT_MAX);
841 if ((uintptr_t)mp->b_wptr - (uintptr_t)rptr >=
842 sizeof (struct T_unitdata_req)) {
843 /* Detect valid T_UNITDATA_REQ here */
844 if (((union T_primitives *)(uintptr_t)rptr)->type
845 == T_UNITDATA_REQ)
846 break;
847 }
848 /* FALLTHRU */
849 default:
850
851 if (isuioq(q) && (error = struioget(q, mp, dp, 0))) {
852 /*
853 * Uio error of some sort, so just return the error.
854 */
855 goto done;
856 }
857 dp->d_mp = 0;
858 rds_wput_other(q, mp);
859 return (0);
860 }
861
862 dp->d_mp = 0;
863 error = rds_wput_data(q, mp, &dp->d_uio);
864 done:
865 if (error == EWOULDBLOCK || error == EINVAL)
866 error = EIO;
867
868 return (error);
869 }
870
871 static int
rds_rsrv(queue_t * q)872 rds_rsrv(queue_t *q)
873 {
874 rds_t *rds = (rds_t *)q->q_ptr;
875 ulong_t current_port_quota;
876
877 /* update the port quota to the current level */
878 current_port_quota = RDS_GET_PORT_QUOTA();
879 if (rds->rds_port_quota != current_port_quota) {
880 rds->rds_port_quota = current_port_quota;
881 (void) proto_set_rx_hiwat(q, NULL,
882 rds->rds_port_quota * UserBufferSize);
883 }
884
885 /* No more messages in the q, unstall the socket */
886 rds_transport_ops->rds_transport_resume_port(ntohs(rds->rds_port));
887 return (0);
888 }
889
890 int
rds_close_transport_driver()891 rds_close_transport_driver()
892 {
893 ASSERT(rds_transport_ops != NULL);
894
895 rw_enter(&rds_transport_lock, RW_WRITER);
896 if (rds_transport_handle != NULL) {
897 rds_transport_ops->rds_transport_close_ib();
898 (void) ldi_close(rds_transport_handle, FNDELAY, kcred);
899 rds_transport_handle = NULL;
900 }
901 rw_exit(&rds_transport_lock);
902
903 return (0);
904 }
905
906
907 int
rds_open_transport_driver()908 rds_open_transport_driver()
909 {
910 int ret = 0;
911
912 rw_enter(&rds_transport_lock, RW_WRITER);
913 if (rds_transport_handle != NULL) {
914 /*
915 * Someone beat us to it.
916 */
917 goto done;
918 }
919
920 if (ibt_hw_is_present() == 0) {
921 ret = ENODEV;
922 goto done;
923 }
924
925 if (rds_li == NULL) {
926 ret = EPROTONOSUPPORT;
927 goto done;
928 }
929
930 ret = ldi_open_by_name("/devices/ib/rdsib@0:rdsib",
931 FREAD | FWRITE, kcred, &rds_transport_handle, rds_li);
932 if (ret != 0) {
933 ret = EPROTONOSUPPORT;
934 rds_transport_handle = NULL;
935 goto done;
936 }
937
938 ret = rds_transport_ops->rds_transport_open_ib();
939 if (ret != 0) {
940 (void) ldi_close(rds_transport_handle, FNDELAY, kcred);
941 rds_transport_handle = NULL;
942 }
943 done:
944 rw_exit(&rds_transport_lock);
945 return (ret);
946 }
947
948 static struct module_info info = {
949 0, "rds", 1, INFPSZ, 65536, 1024
950 };
951
952 static struct qinit rinit = {
953 NULL, rds_rsrv, rds_open, rds_close, NULL, &info
954 };
955
956 static struct qinit winit = {
957 rds_wput, NULL, rds_open, rds_close, NULL, &info,
958 NULL, rds_wrw, NULL, STRUIOT_STANDARD
959 };
960
961 struct streamtab rdsinfo = {
962 &rinit, &winit, NULL, NULL
963 };
964
965 DDI_DEFINE_STREAM_OPS(rds_devops, nulldev, nulldev, rds_attach, rds_detach,
966 nulldev, rds_info, RDS_DEVMTFLAGS, &RDS_STRTAB, ddi_quiesce_not_supported);
967
968 /*
969 * Module linkage information for the kernel.
970 */
971 static struct modldrv modldrv = {
972 &mod_driverops,
973 RDS_DEVDESC,
974 &rds_devops
975 };
976
977 static struct modlinkage modlinkage = {
978 MODREV_1,
979 &modldrv,
980 NULL
981 };
982
983 int
_init(void)984 _init(void)
985 {
986 int ret;
987
988 rds_init();
989
990 ret = mod_install(&modlinkage);
991 if (ret != 0)
992 goto done;
993 ret = ldi_ident_from_mod(&modlinkage, &rds_li);
994 if (ret != 0)
995 rds_li = NULL;
996 done:
997 return (ret);
998 }
999
1000 int
_fini(void)1001 _fini(void)
1002 {
1003 int ret;
1004
1005 ret = mod_remove(&modlinkage);
1006 if (ret != 0) {
1007 return (ret);
1008 }
1009
1010 rds_fini();
1011
1012 ldi_ident_release(rds_li);
1013 return (0);
1014 }
1015
1016 int
_info(struct modinfo * modinfop)1017 _info(struct modinfo *modinfop)
1018 {
1019 return (mod_info(&modlinkage, modinfop));
1020 }
1021