1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/sysmacros.h>
27 #include <sys/ib/clients/rds/rds.h>
28 #include <sys/ib/clients/rds/rds_kstat.h>
29 
30 #include <inet/ipclassifier.h>
31 
32 struct rds_kstat_s rds_kstat = {
33 	{"rds_nports",			KSTAT_DATA_ULONG},
34 	{"rds_nsessions",		KSTAT_DATA_ULONG},
35 	{"rds_tx_bytes",		KSTAT_DATA_ULONG},
36 	{"rds_tx_pkts",			KSTAT_DATA_ULONG},
37 	{"rds_tx_errors",		KSTAT_DATA_ULONG},
38 	{"rds_rx_bytes",		KSTAT_DATA_ULONG},
39 	{"rds_rx_pkts",			KSTAT_DATA_ULONG},
40 	{"rds_rx_pkts_pending",		KSTAT_DATA_ULONG},
41 	{"rds_rx_errors",		KSTAT_DATA_ULONG},
42 	{"rds_tx_acks",			KSTAT_DATA_ULONG},
43 	{"rds_post_recv_buf_called",	KSTAT_DATA_ULONG},
44 	{"rds_stalls_triggered",	KSTAT_DATA_ULONG},
45 	{"rds_stalls_sent",		KSTAT_DATA_ULONG},
46 	{"rds_unstalls_triggered",	KSTAT_DATA_ULONG},
47 	{"rds_unstalls_sent",		KSTAT_DATA_ULONG},
48 	{"rds_stalls_recvd",		KSTAT_DATA_ULONG},
49 	{"rds_unstalls_recvd",		KSTAT_DATA_ULONG},
50 	{"rds_stalls_ignored",		KSTAT_DATA_ULONG},
51 	{"rds_enobufs",			KSTAT_DATA_ULONG},
52 	{"rds_ewouldblocks",		KSTAT_DATA_ULONG},
53 	{"rds_failovers",		KSTAT_DATA_ULONG},
54 	{"rds_port_quota",		KSTAT_DATA_ULONG},
55 	{"rds_port_quota_adjusted",	KSTAT_DATA_ULONG},
56 };
57 
58 kstat_t *rds_kstatsp;
59 static kmutex_t rds_kstat_mutex;
60 
61 
62 struct	kmem_cache	*rds_alloc_cache;
63 
64 uint_t	rds_bind_fanout_size = RDS_BIND_FANOUT_SIZE;
65 rds_bf_t *rds_bind_fanout;
66 
67 void
68 rds_increment_kstat(kstat_named_t *ksnp, boolean_t lock, uint_t num)
69 {
70 	if (lock)
71 		mutex_enter(&rds_kstat_mutex);
72 	ksnp->value.ul += num;
73 	if (lock)
74 		mutex_exit(&rds_kstat_mutex);
75 }
76 
77 void
78 rds_decrement_kstat(kstat_named_t *ksnp, boolean_t lock, uint_t num)
79 {
80 	if (lock)
81 		mutex_enter(&rds_kstat_mutex);
82 	ksnp->value.ul -= num;
83 	if (lock)
84 		mutex_exit(&rds_kstat_mutex);
85 }
86 
87 void
88 rds_set_kstat(kstat_named_t *ksnp, boolean_t lock, ulong_t num)
89 {
90 	if (lock)
91 		mutex_enter(&rds_kstat_mutex);
92 	ksnp->value.ul = num;
93 	if (lock)
94 		mutex_exit(&rds_kstat_mutex);
95 }
96 
97 ulong_t
98 rds_get_kstat(kstat_named_t *ksnp, boolean_t lock)
99 {
100 	ulong_t	value;
101 
102 	if (lock)
103 		mutex_enter(&rds_kstat_mutex);
104 	value = ksnp->value.ul;
105 	if (lock)
106 		mutex_exit(&rds_kstat_mutex);
107 
108 	return (value);
109 }
110 
111 
112 void
113 rds_fini()
114 {
115 	int	i;
116 
117 	for (i = 0; i < rds_bind_fanout_size; i++) {
118 		mutex_destroy(&rds_bind_fanout[i].rds_bf_lock);
119 	}
120 	kmem_free(rds_bind_fanout, rds_bind_fanout_size * sizeof (rds_bf_t));
121 
122 	kmem_cache_destroy(rds_alloc_cache);
123 	kstat_delete(rds_kstatsp);
124 }
125 
126 
127 void
128 rds_init()
129 {
130 	rds_alloc_cache = kmem_cache_create("rds_alloc_cache",
131 	    sizeof (rds_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
132 	rds_hash_init();
133 	/*
134 	 * kstats
135 	 */
136 	rds_kstatsp = kstat_create("rds", 0,
137 	    "rds_kstat", "misc", KSTAT_TYPE_NAMED,
138 	    sizeof (rds_kstat) / sizeof (kstat_named_t),
139 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
140 	if (rds_kstatsp != NULL) {
141 		rds_kstatsp->ks_lock = &rds_kstat_mutex;
142 		rds_kstatsp->ks_data = (void *)&rds_kstat;
143 		kstat_install(rds_kstatsp);
144 	}
145 }
146 
147 #define	UINT_32_BITS 31
148 void
149 rds_hash_init()
150 {
151 	int i;
152 
153 	if (!ISP2(rds_bind_fanout_size)) {
154 		/* Not a power of two. Round up to nearest power of two */
155 		for (i = 0; i < UINT_32_BITS; i++) {
156 			if (rds_bind_fanout_size < (1 << i))
157 				break;
158 		}
159 		rds_bind_fanout_size = 1 << i;
160 	}
161 	rds_bind_fanout = kmem_zalloc(rds_bind_fanout_size *
162 	    sizeof (rds_bf_t), KM_SLEEP);
163 	for (i = 0; i < rds_bind_fanout_size; i++) {
164 		mutex_init(&rds_bind_fanout[i].rds_bf_lock, NULL, MUTEX_DEFAULT,
165 		    NULL);
166 	}
167 }
168 
169 void
170 rds_free(rds_t *rds)
171 {
172 	ASSERT(rds->rds_refcnt == 0);
173 	ASSERT(MUTEX_HELD(&rds->rds_lock));
174 	crfree(rds->rds_cred);
175 	kmem_cache_free(rds_alloc_cache, rds);
176 }
177 
178 rds_t *
179 rds_create(void *rds_ulpd, cred_t *credp)
180 {
181 	rds_t	*rds;
182 
183 	/* User must supply a credential. */
184 	if (credp == NULL)
185 		return (NULL);
186 	rds = kmem_cache_alloc(rds_alloc_cache, KM_SLEEP);
187 	if (rds == NULL) {
188 		return (NULL);
189 	}
190 
191 	bzero(rds, sizeof (rds_t));
192 	mutex_init(&rds->rds_lock, NULL, MUTEX_DEFAULT, NULL);
193 	cv_init(&rds->rds_refcv, NULL, CV_DEFAULT, NULL);
194 	rds->rds_cred = credp;
195 	rds->rds_ulpd = rds_ulpd;
196 	rds->rds_zoneid = getzoneid();
197 	crhold(credp);
198 	rds->rds_refcnt++;
199 	return (rds);
200 }
201 
202 
203 /*
204  * Hash list removal routine for rds_t structures.
205  */
206 void
207 rds_bind_hash_remove(rds_t *rds, boolean_t caller_holds_lock)
208 {
209 	rds_t   *rdsnext;
210 	kmutex_t *lockp;
211 
212 	if (rds->rds_ptpbhn == NULL)
213 		return;
214 
215 	/*
216 	 * Extract the lock pointer in case there are concurrent
217 	 * hash_remove's for this instance.
218 	 */
219 	ASSERT(rds->rds_port != 0);
220 	if (!caller_holds_lock) {
221 		lockp = &rds_bind_fanout[RDS_BIND_HASH(rds->rds_port)].
222 		    rds_bf_lock;
223 		ASSERT(lockp != NULL);
224 		mutex_enter(lockp);
225 	}
226 
227 	if (rds->rds_ptpbhn != NULL) {
228 		rdsnext = rds->rds_bind_hash;
229 		if (rdsnext != NULL) {
230 			rdsnext->rds_ptpbhn = rds->rds_ptpbhn;
231 			rds->rds_bind_hash = NULL;
232 		}
233 		*rds->rds_ptpbhn = rdsnext;
234 		rds->rds_ptpbhn = NULL;
235 	}
236 
237 	RDS_DEC_REF_CNT(rds);
238 
239 	if (!caller_holds_lock) {
240 		mutex_exit(lockp);
241 	}
242 }
243 
244 void
245 rds_bind_hash_insert(rds_bf_t *rdsbf, rds_t *rds)
246 {
247 	rds_t   **rdsp;
248 	rds_t   *rdsnext;
249 
250 	ASSERT(MUTEX_HELD(&rdsbf->rds_bf_lock));
251 	if (rds->rds_ptpbhn != NULL) {
252 		rds_bind_hash_remove(rds, B_TRUE);
253 	}
254 
255 	rdsp = &rdsbf->rds_bf_rds;
256 	rdsnext = rdsp[0];
257 
258 	if (rdsnext != NULL) {
259 		rdsnext->rds_ptpbhn = &rds->rds_bind_hash;
260 	}
261 	rds->rds_bind_hash = rdsnext;
262 	rds->rds_ptpbhn = rdsp;
263 	rdsp[0] = rds;
264 	RDS_INCR_REF_CNT(rds);
265 
266 }
267 
268 /*
269  * Everything is in network byte order
270  */
271 /* ARGSUSED */
272 rds_t *
273 rds_fanout(ipaddr_t local_addr, ipaddr_t rem_addr,
274     in_port_t local_port, in_port_t rem_port, zoneid_t zoneid)
275 {
276 	rds_t	*rds;
277 	rds_bf_t *rdsbf;
278 
279 	rdsbf = &rds_bind_fanout[RDS_BIND_HASH(local_port)];
280 	mutex_enter(&rdsbf->rds_bf_lock);
281 	rds = rdsbf->rds_bf_rds;
282 	while (rds != NULL) {
283 		if (!(rds->rds_flags & RDS_CLOSING)) {
284 			if ((RDS_MATCH(rds, local_port, local_addr)) &&
285 			    ((local_addr != INADDR_LOOPBACK) ||
286 			    (rds->rds_zoneid == zoneid))) {
287 				RDS_INCR_REF_CNT(rds);
288 				break;
289 			}
290 		}
291 		rds = rds->rds_bind_hash;
292 	}
293 	mutex_exit(&rdsbf->rds_bf_lock);
294 	return (rds);
295 }
296 
297 boolean_t
298 rds_islocal(ipaddr_t addr)
299 {
300 	ip_stack_t *ipst;
301 
302 	ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip;
303 	ASSERT(ipst != NULL);
304 	if (ip_laddr_verify_v4(addr, ALL_ZONES, ipst, B_FALSE) == IPVL_BAD) {
305 		netstack_rele(ipst->ips_netstack);
306 		return (B_FALSE);
307 	}
308 	netstack_rele(ipst->ips_netstack);
309 	return (B_TRUE);
310 }
311