xref: /illumos-gate/usr/src/uts/common/inet/ip/ip6_asp.c (revision d362b749)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/socket.h>
30 #include <sys/ksynch.h>
31 #include <sys/kmem.h>
32 #include <sys/errno.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/cmn_err.h>
36 #include <sys/strsun.h>
37 #include <sys/zone.h>
38 #include <netinet/in.h>
39 #include <inet/common.h>
40 #include <inet/ip.h>
41 #include <inet/ip6.h>
42 #include <inet/ip6_asp.h>
43 #include <inet/ip_ire.h>
44 #include <inet/ipclassifier.h>
45 
46 #define	IN6ADDR_MASK128_INIT \
47 	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }
48 #define	IN6ADDR_MASK96_INIT	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0 }
49 #ifdef _BIG_ENDIAN
50 #define	IN6ADDR_MASK16_INIT	{ 0xffff0000U, 0, 0, 0 }
51 #else
52 #define	IN6ADDR_MASK16_INIT	{ 0x0000ffffU, 0, 0, 0 }
53 #endif
54 
55 
56 /*
57  * This table is ordered such that longest prefix matches are hit first
58  * (longer prefix lengths first).  The last entry must be the "default"
59  * entry (::0/0).
60  */
61 static ip6_asp_t default_ip6_asp_table[] = {
62 	{ IN6ADDR_LOOPBACK_INIT,	IN6ADDR_MASK128_INIT,
63 	    "Loopback", 50 },
64 	{ IN6ADDR_ANY_INIT,		IN6ADDR_MASK96_INIT,
65 	    "IPv4_Compatible", 20 },
66 #ifdef _BIG_ENDIAN
67 	{ { 0, 0, 0x0000ffffU, 0 },	IN6ADDR_MASK96_INIT,
68 	    "IPv4", 10 },
69 	{ { 0x20020000U, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
70 	    "6to4", 30 },
71 #else
72 	{ { 0, 0, 0xffff0000U, 0 },	IN6ADDR_MASK96_INIT,
73 	    "IPv4", 10 },
74 	{ { 0x00000220U, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
75 	    "6to4", 30 },
76 #endif
77 	{ IN6ADDR_ANY_INIT,		IN6ADDR_ANY_INIT,
78 	    "Default", 40 }
79 };
80 
81 /*
82  * The IPv6 Default Address Selection policy table.
83  * Until someone up above reconfigures the policy table, use the global
84  * default.  The table needs no lock since the only way to alter it is
85  * through the SIOCSIP6ADDRPOLICY which is exclusive in ip.
86  */
87 static void ip6_asp_copy(ip6_asp_t *, ip6_asp_t *, uint_t);
88 static void ip6_asp_check_for_updates(ip_stack_t *);
89 
90 void
91 ip6_asp_init(ip_stack_t *ipst)
92 {
93 	/* Initialize the table lock */
94 	mutex_init(&ipst->ips_ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL);
95 
96 	ipst->ips_ip6_asp_table = default_ip6_asp_table;
97 
98 	ipst->ips_ip6_asp_table_count =
99 	    sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
100 }
101 
102 void
103 ip6_asp_free(ip_stack_t *ipst)
104 {
105 	if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
106 		kmem_free(ipst->ips_ip6_asp_table,
107 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
108 		ipst->ips_ip6_asp_table = NULL;
109 	}
110 	mutex_destroy(&ipst->ips_ip6_asp_lock);
111 }
112 
113 /*
114  * Return false if the table is being updated. Else, increment the ref
115  * count and return true.
116  */
117 boolean_t
118 ip6_asp_can_lookup(ip_stack_t *ipst)
119 {
120 	mutex_enter(&ipst->ips_ip6_asp_lock);
121 	if (ipst->ips_ip6_asp_uip) {
122 		mutex_exit(&ipst->ips_ip6_asp_lock);
123 		return (B_FALSE);
124 	}
125 	IP6_ASP_TABLE_REFHOLD(ipst);
126 	mutex_exit(&ipst->ips_ip6_asp_lock);
127 	return (B_TRUE);
128 
129 }
130 
131 void
132 ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func)
133 {
134 	conn_t	*connp = Q_TO_CONN(q);
135 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
136 
137 	ASSERT((mp->b_prev == NULL) && (mp->b_queue == NULL) &&
138 	    (mp->b_next == NULL));
139 	mp->b_queue = (void *)q;
140 	mp->b_prev = (void *)func;
141 	mp->b_next = NULL;
142 
143 	mutex_enter(&ipst->ips_ip6_asp_lock);
144 	if (ipst->ips_ip6_asp_pending_ops == NULL) {
145 		ASSERT(ipst->ips_ip6_asp_pending_ops_tail == NULL);
146 		ipst->ips_ip6_asp_pending_ops =
147 		    ipst->ips_ip6_asp_pending_ops_tail = mp;
148 	} else {
149 		ipst->ips_ip6_asp_pending_ops_tail->b_next = mp;
150 		ipst->ips_ip6_asp_pending_ops_tail = mp;
151 	}
152 	mutex_exit(&ipst->ips_ip6_asp_lock);
153 }
154 
155 static void
156 ip6_asp_complete_op(ip_stack_t *ipst)
157 {
158 	mblk_t		*mp;
159 	queue_t		*q;
160 	aspfunc_t	func;
161 
162 	mutex_enter(&ipst->ips_ip6_asp_lock);
163 	while (ipst->ips_ip6_asp_pending_ops != NULL) {
164 		mp = ipst->ips_ip6_asp_pending_ops;
165 		ipst->ips_ip6_asp_pending_ops = mp->b_next;
166 		mp->b_next = NULL;
167 		if (ipst->ips_ip6_asp_pending_ops == NULL)
168 			ipst->ips_ip6_asp_pending_ops_tail = NULL;
169 		mutex_exit(&ipst->ips_ip6_asp_lock);
170 
171 		q = (queue_t *)mp->b_queue;
172 		func = (aspfunc_t)mp->b_prev;
173 
174 		mp->b_prev = NULL;
175 		mp->b_queue = NULL;
176 
177 
178 		(*func)(NULL, q, mp, NULL);
179 		mutex_enter(&ipst->ips_ip6_asp_lock);
180 	}
181 	mutex_exit(&ipst->ips_ip6_asp_lock);
182 }
183 
184 /*
185  * Decrement reference count. When it gets to 0, we check for (pending)
186  * saved update to the table, if any.
187  */
188 void
189 ip6_asp_table_refrele(ip_stack_t *ipst)
190 {
191 	IP6_ASP_TABLE_REFRELE(ipst);
192 }
193 
194 /*
195  * This function is guaranteed never to return a NULL pointer.  It
196  * will always return information from one of the entries in the
197  * asp_table (which will never be empty).  If a pointer is passed
198  * in for the precedence, the precedence value will be set; a
199  * pointer to the label will be returned by the function.
200  *
201  * Since the table is only anticipated to have five or six entries
202  * total, the lookup algorithm hasn't been optimized to anything
203  * better than O(n).
204  */
205 char *
206 ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence, ip_stack_t *ipst)
207 {
208 	ip6_asp_t *aspp;
209 	ip6_asp_t *match = NULL;
210 	ip6_asp_t *default_policy;
211 
212 	aspp = ipst->ips_ip6_asp_table;
213 	/* The default entry must always be the last one */
214 	default_policy = aspp + ipst->ips_ip6_asp_table_count - 1;
215 
216 	while (match == NULL) {
217 		if (aspp == default_policy) {
218 			match = aspp;
219 		} else {
220 			if (V6_MASK_EQ(*addr, aspp->ip6_asp_mask,
221 			    aspp->ip6_asp_prefix))
222 				match = aspp;
223 			else
224 				aspp++;
225 		}
226 	}
227 
228 	if (precedence != NULL)
229 		*precedence = match->ip6_asp_precedence;
230 	return (match->ip6_asp_label);
231 }
232 
233 /*
234  * If we had deferred updating the table because of outstanding references,
235  * do it now. Note, we don't do error checking on the queued IOCTL mblk, since
236  * ip_sioctl_ip6addrpolicy() has already done it for us.
237  */
238 void
239 ip6_asp_check_for_updates(ip_stack_t *ipst)
240 {
241 	ip6_asp_t *table;
242 	size_t	table_size;
243 	mblk_t	*data_mp, *mp;
244 	struct iocblk *iocp;
245 
246 	mutex_enter(&ipst->ips_ip6_asp_lock);
247 	if (ipst->ips_ip6_asp_pending_update == NULL ||
248 	    ipst->ips_ip6_asp_refcnt > 0) {
249 		mutex_exit(&ipst->ips_ip6_asp_lock);
250 		return;
251 	}
252 
253 	mp = ipst->ips_ip6_asp_pending_update;
254 	ipst->ips_ip6_asp_pending_update = NULL;
255 	ASSERT(mp->b_prev != NULL);
256 
257 	ipst->ips_ip6_asp_uip = B_TRUE;
258 
259 	iocp = (struct iocblk *)mp->b_rptr;
260 	data_mp = mp->b_cont;
261 	if (data_mp == NULL) {
262 		table = NULL;
263 		table_size = iocp->ioc_count;
264 	} else {
265 		table = (ip6_asp_t *)data_mp->b_rptr;
266 		table_size = iocp->ioc_count;
267 	}
268 
269 	ip6_asp_replace(mp, table, table_size, B_TRUE, ipst,
270 	    iocp->ioc_flag & IOC_MODELS);
271 }
272 
273 /*
274  * ip6_asp_replace replaces the contents of the IPv6 address selection
275  * policy table with those specified in new_table.  If new_table is NULL,
276  * this indicates that the caller wishes ip to use the default policy
277  * table.  The caller is responsible for making sure that there are exactly
278  * new_count policy entries in new_table.
279  */
280 /*ARGSUSED5*/
281 void
282 ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size,
283     boolean_t locked, ip_stack_t *ipst, model_t datamodel)
284 {
285 	int			ret_val = 0;
286 	ip6_asp_t		*tmp_table;
287 	uint_t			count;
288 	queue_t			*q;
289 	struct iocblk		*iocp;
290 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
291 	size_t ip6_asp_size = SIZEOF_STRUCT(ip6_asp, datamodel);
292 #else
293 	const size_t ip6_asp_size = sizeof (ip6_asp_t);
294 #endif
295 
296 	if (new_size % ip6_asp_size != 0) {
297 		ip1dbg(("ip6_asp_replace: invalid table size\n"));
298 		ret_val = EINVAL;
299 		if (locked)
300 			goto unlock_end;
301 		goto replace_end;
302 	} else {
303 		count = new_size / ip6_asp_size;
304 	}
305 
306 
307 	if (!locked)
308 		mutex_enter(&ipst->ips_ip6_asp_lock);
309 	/*
310 	 * Check if we are in the process of creating any IRE using the
311 	 * current information. If so, wait till that is done.
312 	 */
313 	if (!locked && ipst->ips_ip6_asp_refcnt > 0) {
314 		/* Save this request for later processing */
315 		if (ipst->ips_ip6_asp_pending_update == NULL) {
316 			ipst->ips_ip6_asp_pending_update = mp;
317 		} else {
318 			/* Let's not queue multiple requests for now */
319 			ip1dbg(("ip6_asp_replace: discarding request\n"));
320 			mutex_exit(&ipst->ips_ip6_asp_lock);
321 			ret_val =  EAGAIN;
322 			goto replace_end;
323 		}
324 		mutex_exit(&ipst->ips_ip6_asp_lock);
325 		return;
326 	}
327 
328 	/* Prevent lookups till the table have been updated */
329 	if (!locked)
330 		ipst->ips_ip6_asp_uip = B_TRUE;
331 
332 	ASSERT(ipst->ips_ip6_asp_refcnt == 0);
333 
334 	if (new_table == NULL) {
335 		/*
336 		 * This is a special case.  The user wants to revert
337 		 * back to using the default table.
338 		 */
339 		if (ipst->ips_ip6_asp_table == default_ip6_asp_table)
340 			goto unlock_end;
341 
342 		kmem_free(ipst->ips_ip6_asp_table,
343 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
344 		ipst->ips_ip6_asp_table = default_ip6_asp_table;
345 		ipst->ips_ip6_asp_table_count =
346 		    sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
347 		goto unlock_end;
348 	}
349 
350 	if (count == 0) {
351 		ret_val = EINVAL;
352 		ip1dbg(("ip6_asp_replace: empty table\n"));
353 		goto unlock_end;
354 	}
355 
356 	if ((tmp_table = kmem_alloc(count * sizeof (ip6_asp_t), KM_NOSLEEP)) ==
357 	    NULL) {
358 		ret_val = ENOMEM;
359 		goto unlock_end;
360 	}
361 
362 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
363 
364 	/*
365 	 * If 'new_table' -actually- originates from a 32-bit process
366 	 * then the nicely aligned ip6_asp_label array will be
367 	 * subtlely misaligned on this kernel, because the structure
368 	 * is 8 byte aligned in the kernel, but only 4 byte aligned in
369 	 * userland.  Fix it up here.
370 	 *
371 	 * XX64	See the notes in ip_sioctl_ip6addrpolicy.  Perhaps we could
372 	 *	do the datamodel transformation (below) there instead of here?
373 	 */
374 	if (datamodel == IOC_ILP32) {
375 		ip6_asp_t *dst;
376 		ip6_asp32_t *src;
377 		int i;
378 
379 		if ((dst = kmem_zalloc(count * sizeof (*dst),
380 		    KM_NOSLEEP)) == NULL) {
381 			kmem_free(tmp_table, count * sizeof (ip6_asp_t));
382 			ret_val = ENOMEM;
383 			goto unlock_end;
384 		}
385 
386 		/*
387 		 * Copy each element of the table from ip6_asp32_t
388 		 * format into ip6_asp_t format.  Fortunately, since
389 		 * we're just dealing with a trailing structure pad,
390 		 * we can do this straightforwardly with a flurry of
391 		 * bcopying.
392 		 */
393 		src = (void *)new_table;
394 		for (i = 0; i < count; i++)
395 			bcopy(src + i, dst + i, sizeof (*src));
396 
397 		ip6_asp_copy(dst, tmp_table, count);
398 		kmem_free(dst, count * sizeof (*dst));
399 	} else
400 #endif
401 		ip6_asp_copy(new_table, tmp_table, count);
402 
403 	/* Make sure the last entry is the default entry */
404 	if (!IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_prefix) ||
405 	    !IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_mask)) {
406 		ret_val = EINVAL;
407 		kmem_free(tmp_table, count * sizeof (ip6_asp_t));
408 		ip1dbg(("ip6_asp_replace: bad table: no default entry\n"));
409 		goto unlock_end;
410 	}
411 	if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
412 		kmem_free(ipst->ips_ip6_asp_table,
413 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
414 	}
415 	ipst->ips_ip6_asp_table = tmp_table;
416 	ipst->ips_ip6_asp_table_count = count;
417 
418 	/*
419 	 * The user has changed the address selection policy table.  IPv6
420 	 * source address selection for existing IRE_CACHE and
421 	 * RTF_DYNAMIC entries used the old table, so we need to
422 	 * clear the cache.
423 	 */
424 	ire_walk_v6(ire_delete_cache_v6, NULL, ALL_ZONES, ipst);
425 
426 unlock_end:
427 	ipst->ips_ip6_asp_uip = B_FALSE;
428 	mutex_exit(&ipst->ips_ip6_asp_lock);
429 
430 replace_end:
431 	/* Reply to the ioctl */
432 	q = (queue_t *)mp->b_prev;
433 	mp->b_prev = NULL;
434 	if (q == NULL) {
435 		freemsg(mp);
436 		goto check_binds;
437 	}
438 	iocp = (struct iocblk *)mp->b_rptr;
439 	iocp->ioc_error = ret_val;
440 	iocp->ioc_count = 0;
441 	DB_TYPE(mp) = (iocp->ioc_error == 0) ? M_IOCACK : M_IOCNAK;
442 	qreply(q, mp);
443 check_binds:
444 	ip6_asp_complete_op(ipst);
445 }
446 
447 /*
448  * Copies the contents of src_table to dst_table, and sorts the
449  * entries in decending order of prefix lengths.  It assumes that both
450  * tables are appropriately sized to contain count entries.
451  */
452 static void
453 ip6_asp_copy(ip6_asp_t *src_table, ip6_asp_t *dst_table, uint_t count)
454 {
455 	ip6_asp_t *src_ptr, *src_limit, *dst_ptr, *dst_limit, *dp;
456 
457 	dst_table[0] = src_table[0];
458 	if (count == 1)
459 		return;
460 
461 	/*
462 	 * Sort the entries in descending order of prefix lengths.
463 	 *
464 	 * Note: this should be a small table.  In 99% of cases, we
465 	 * expect the table to have 5 entries.  In the remaining 1%
466 	 * of cases, we expect the table to have one or two more
467 	 * entries.  It would be very rare for the table to have
468 	 * double-digit entries.
469 	 */
470 	src_limit = src_table + count;
471 	dst_limit = dst_table + 1;
472 	for (src_ptr = src_table + 1; src_ptr != src_limit;
473 	    src_ptr++, dst_limit++) {
474 		for (dst_ptr = dst_table; dst_ptr < dst_limit; dst_ptr++) {
475 			if (ip_mask_to_plen_v6(&src_ptr->ip6_asp_mask) >
476 			    ip_mask_to_plen_v6(&dst_ptr->ip6_asp_mask)) {
477 				/*
478 				 * Make room to insert the source entry
479 				 * before dst_ptr by shifting entries to
480 				 * the right.
481 				 */
482 				for (dp = dst_limit - 1; dp >= dst_ptr; dp--)
483 					*(dp + 1) = *dp;
484 				break;
485 			}
486 		}
487 		*dst_ptr = *src_ptr;
488 	}
489 }
490 
491 /*
492  * This function copies as many entries from ip6_asp_table as will fit
493  * into dtable.  The dtable_size parameter is the size of dtable
494  * in bytes.  This function returns the number of entries in
495  * ip6_asp_table, even if it's not able to fit all of the entries into
496  * dtable.
497  */
498 int
499 ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size, ip_stack_t *ipst)
500 {
501 	uint_t dtable_count;
502 
503 	if (dtable != NULL) {
504 		if (dtable_size < sizeof (ip6_asp_t))
505 			return (-1);
506 
507 		dtable_count = dtable_size / sizeof (ip6_asp_t);
508 		bcopy(ipst->ips_ip6_asp_table, dtable,
509 		    MIN(ipst->ips_ip6_asp_table_count, dtable_count) *
510 		    sizeof (ip6_asp_t));
511 	}
512 
513 	return (ipst->ips_ip6_asp_table_count);
514 }
515 
516 /*
517  * Compare two labels.  Return B_TRUE if they are equal, B_FALSE
518  * otherwise.
519  */
520 boolean_t
521 ip6_asp_labelcmp(const char *label1, const char *label2)
522 {
523 	int64_t *llptr1, *llptr2;
524 
525 	/*
526 	 * The common case, the two labels are actually the same string
527 	 * from the policy table.
528 	 */
529 	if (label1 == label2)
530 		return (B_TRUE);
531 
532 	/*
533 	 * Since we know the labels are at most 16 bytes long, compare
534 	 * the two strings as two 8-byte long integers.  The ip6_asp_t
535 	 * structure guarantees that the labels are 8 byte alligned.
536 	 */
537 	llptr1 = (int64_t *)label1;
538 	llptr2 = (int64_t *)label2;
539 	if (llptr1[0] == llptr2[0] && llptr1[1] == llptr2[1])
540 		return (B_TRUE);
541 	return (B_FALSE);
542 }
543