xref: /illumos-gate/usr/src/uts/common/inet/ip/ip6_asp.c (revision 03831d35)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/socket.h>
31 #include <sys/ksynch.h>
32 #include <sys/kmem.h>
33 #include <sys/errno.h>
34 #include <sys/systm.h>
35 #include <sys/sysmacros.h>
36 #include <sys/cmn_err.h>
37 #include <sys/strsun.h>
38 #include <sys/zone.h>
39 #include <netinet/in.h>
40 #include <inet/common.h>
41 #include <inet/ip.h>
42 #include <inet/ip6.h>
43 #include <inet/ip6_asp.h>
44 #include <inet/ip_ire.h>
45 
46 #define	IN6ADDR_MASK128_INIT \
47 	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }
48 #define	IN6ADDR_MASK96_INIT	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0 }
49 #ifdef _BIG_ENDIAN
50 #define	IN6ADDR_MASK16_INIT	{ 0xffff0000U, 0, 0, 0 }
51 #else
52 #define	IN6ADDR_MASK16_INIT	{ 0x0000ffffU, 0, 0, 0 }
53 #endif
54 
55 
56 /*
57  * This table is ordered such that longest prefix matches are hit first
58  * (longer prefix lengths first).  The last entry must be the "default"
59  * entry (::0/0).
60  */
61 static ip6_asp_t default_ip6_asp_table[] = {
62 	{ IN6ADDR_LOOPBACK_INIT,	IN6ADDR_MASK128_INIT,
63 	    "Loopback", 50 },
64 	{ IN6ADDR_ANY_INIT,		IN6ADDR_MASK96_INIT,
65 	    "IPv4_Compatible", 20 },
66 #ifdef _BIG_ENDIAN
67 	{ { 0, 0, 0x0000ffffU, 0 },	IN6ADDR_MASK96_INIT,
68 	    "IPv4", 10 },
69 	{ { 0x20020000U, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
70 	    "6to4", 30 },
71 #else
72 	{ { 0, 0, 0xffff0000U, 0 },	IN6ADDR_MASK96_INIT,
73 	    "IPv4", 10 },
74 	{ { 0x00000220U, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
75 	    "6to4", 30 },
76 #endif
77 	{ IN6ADDR_ANY_INIT,		IN6ADDR_ANY_INIT,
78 	    "Default", 40 }
79 };
80 
81 /* pending binds */
82 static mblk_t *ip6_asp_pending_ops = NULL, *ip6_asp_pending_ops_tail = NULL;
83 
84 /* Synchronize updates with table usage */
85 static mblk_t *ip6_asp_pending_update = NULL;	/* pending table updates */
86 
87 static boolean_t ip6_asp_uip = B_FALSE;		/* table update in progress */
88 static kmutex_t	ip6_asp_lock;			/* protect all the above */
89 static uint32_t	ip6_asp_refcnt = 0;		/* outstanding references */
90 
91 /*
92  * The IPv6 Default Address Selection policy table.
93  * Until someone up above reconfigures the policy table, use the global
94  * default.  The table needs no lock since the only way to alter it is
95  * through the SIOCSIP6ADDRPOLICY which is exclusive in ip.
96  */
97 static ip6_asp_t *ip6_asp_table = default_ip6_asp_table;
98 /* The number of policy entries in the table */
99 static uint_t ip6_asp_table_count =
100     sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
101 
102 static void ip6_asp_copy(ip6_asp_t *, ip6_asp_t *, uint_t);
103 static void ip6_asp_check_for_updates();
104 
105 void
106 ip6_asp_init(void)
107 {
108 	/* Initialize the table lock */
109 	mutex_init(&ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL);
110 }
111 
112 void
113 ip6_asp_free(void)
114 {
115 	if (ip6_asp_table != default_ip6_asp_table) {
116 		kmem_free(ip6_asp_table,
117 		    ip6_asp_table_count * sizeof (ip6_asp_t));
118 	}
119 	mutex_destroy(&ip6_asp_lock);
120 }
121 
122 /*
123  * Return false if the table is being updated. Else, increment the ref
124  * count and return true.
125  */
126 boolean_t
127 ip6_asp_can_lookup()
128 {
129 	mutex_enter(&ip6_asp_lock);
130 	if (ip6_asp_uip) {
131 		mutex_exit(&ip6_asp_lock);
132 		return (B_FALSE);
133 	}
134 	IP6_ASP_TABLE_REFHOLD();
135 	mutex_exit(&ip6_asp_lock);
136 	return (B_TRUE);
137 
138 }
139 
140 void
141 ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func)
142 {
143 
144 	ASSERT((mp->b_prev == NULL) && (mp->b_queue == NULL) &&
145 	    (mp->b_next == NULL));
146 	mp->b_queue = (void *)q;
147 	mp->b_prev = (void *)func;
148 	mp->b_next = NULL;
149 
150 	mutex_enter(&ip6_asp_lock);
151 	if (ip6_asp_pending_ops == NULL) {
152 		ASSERT(ip6_asp_pending_ops_tail == NULL);
153 		ip6_asp_pending_ops = ip6_asp_pending_ops_tail = mp;
154 	} else {
155 		ip6_asp_pending_ops_tail->b_next = mp;
156 		ip6_asp_pending_ops_tail = mp;
157 	}
158 	mutex_exit(&ip6_asp_lock);
159 }
160 
161 static void
162 ip6_asp_complete_op()
163 {
164 	mblk_t		*mp;
165 	queue_t		*q;
166 	aspfunc_t	func;
167 
168 	mutex_enter(&ip6_asp_lock);
169 	while (ip6_asp_pending_ops != NULL) {
170 		mp = ip6_asp_pending_ops;
171 		ip6_asp_pending_ops = mp->b_next;
172 		mp->b_next = NULL;
173 		if (ip6_asp_pending_ops == NULL)
174 			ip6_asp_pending_ops_tail = NULL;
175 		mutex_exit(&ip6_asp_lock);
176 
177 		q = (queue_t *)mp->b_queue;
178 		func = (aspfunc_t)mp->b_prev;
179 
180 		mp->b_prev = NULL;
181 		mp->b_queue = NULL;
182 
183 
184 		(*func)(NULL, q, mp, NULL);
185 		mutex_enter(&ip6_asp_lock);
186 	}
187 	mutex_exit(&ip6_asp_lock);
188 }
189 
190 /*
191  * Decrement reference count. When it gets to 0, we check for (pending)
192  * saved update to the table, if any.
193  */
194 void
195 ip6_asp_table_refrele()
196 {
197 	IP6_ASP_TABLE_REFRELE();
198 }
199 
200 /*
201  * This function is guaranteed never to return a NULL pointer.  It
202  * will always return information from one of the entries in the
203  * asp_table (which will never be empty).  If a pointer is passed
204  * in for the precedence, the precedence value will be set; a
205  * pointer to the label will be returned by the function.
206  *
207  * Since the table is only anticipated to have five or six entries
208  * total, the lookup algorithm hasn't been optimized to anything
209  * better than O(n).
210  */
211 char *
212 ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence)
213 {
214 	ip6_asp_t *aspp;
215 	ip6_asp_t *match = NULL;
216 	ip6_asp_t *default_policy;
217 
218 	aspp = ip6_asp_table;
219 	/* The default entry must always be the last one */
220 	default_policy = aspp + ip6_asp_table_count - 1;
221 
222 	while (match == NULL) {
223 		if (aspp == default_policy) {
224 			match = aspp;
225 		} else {
226 			if (V6_MASK_EQ(*addr, aspp->ip6_asp_mask,
227 			    aspp->ip6_asp_prefix))
228 				match = aspp;
229 			else
230 				aspp++;
231 		}
232 	}
233 
234 	if (precedence != NULL)
235 		*precedence = match->ip6_asp_precedence;
236 	return (match->ip6_asp_label);
237 }
238 
239 /*
240  * If we had deferred updating the table because of outstanding references,
241  * do it now. Note, we don't do error checking on the queued IOCTL mblk, since
242  * ip_sioctl_ip6addrpolicy() has already done it for us.
243  */
244 void
245 ip6_asp_check_for_updates()
246 {
247 	ip6_asp_t *table;
248 	size_t	table_size;
249 	mblk_t	*data_mp, *mp;
250 	struct iocblk *iocp;
251 
252 	mutex_enter(&ip6_asp_lock);
253 	if (ip6_asp_pending_update == NULL || ip6_asp_refcnt > 0) {
254 		mutex_exit(&ip6_asp_lock);
255 		return;
256 	}
257 
258 	mp = ip6_asp_pending_update;
259 	ip6_asp_pending_update = NULL;
260 	ASSERT(mp->b_prev != NULL);
261 
262 	ip6_asp_uip = B_TRUE;
263 
264 	iocp = (struct iocblk *)mp->b_rptr;
265 	data_mp = mp->b_cont;
266 	if (data_mp == NULL) {
267 		table = NULL;
268 		table_size = iocp->ioc_count;
269 	} else {
270 		table = (ip6_asp_t *)data_mp->b_rptr;
271 		table_size = iocp->ioc_count;
272 	}
273 
274 	ip6_asp_replace(mp, table, table_size, B_TRUE,
275 	    iocp->ioc_flag & IOC_MODELS);
276 }
277 
278 /*
279  * ip6_asp_replace replaces the contents of the IPv6 address selection
280  * policy table with those specified in new_table.  If new_table is NULL,
281  * this indicates that the caller wishes ip to use the default policy
282  * table.  The caller is responsible for making sure that there are exactly
283  * new_count policy entries in new_table.
284  */
285 /*ARGSUSED4*/
286 void
287 ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size,
288     boolean_t locked, model_t datamodel)
289 {
290 	int			ret_val = 0;
291 	ip6_asp_t		*tmp_table;
292 	uint_t			count;
293 	queue_t			*q;
294 	struct iocblk		*iocp;
295 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
296 	size_t ip6_asp_size = SIZEOF_STRUCT(ip6_asp, datamodel);
297 #else
298 	const size_t ip6_asp_size = sizeof (ip6_asp_t);
299 #endif
300 
301 	if (new_size % ip6_asp_size != 0) {
302 		ip1dbg(("ip6_asp_replace: invalid table size\n"));
303 		ret_val = EINVAL;
304 		if (locked)
305 			goto unlock_end;
306 		goto replace_end;
307 	} else {
308 		count = new_size / ip6_asp_size;
309 	}
310 
311 
312 	if (!locked)
313 		mutex_enter(&ip6_asp_lock);
314 	/*
315 	 * Check if we are in the process of creating any IRE using the
316 	 * current information. If so, wait till that is done.
317 	 */
318 	if (!locked && ip6_asp_refcnt > 0) {
319 		/* Save this request for later processing */
320 		if (ip6_asp_pending_update == NULL) {
321 			ip6_asp_pending_update = mp;
322 		} else {
323 			/* Let's not queue multiple requests for now */
324 			ip1dbg(("ip6_asp_replace: discarding request\n"));
325 			mutex_exit(&ip6_asp_lock);
326 			ret_val =  EAGAIN;
327 			goto replace_end;
328 		}
329 		mutex_exit(&ip6_asp_lock);
330 		return;
331 	}
332 
333 	/* Prevent lookups till the table have been updated */
334 	if (!locked)
335 		ip6_asp_uip = B_TRUE;
336 
337 	ASSERT(ip6_asp_refcnt == 0);
338 
339 	if (new_table == NULL) {
340 		/*
341 		 * This is a special case.  The user wants to revert
342 		 * back to using the default table.
343 		 */
344 		if (ip6_asp_table == default_ip6_asp_table)
345 			goto unlock_end;
346 
347 		kmem_free(ip6_asp_table,
348 		    ip6_asp_table_count * sizeof (ip6_asp_t));
349 		ip6_asp_table = default_ip6_asp_table;
350 		ip6_asp_table_count =
351 		    sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
352 		goto unlock_end;
353 	}
354 
355 	if (count == 0) {
356 		ret_val = EINVAL;
357 		ip1dbg(("ip6_asp_replace: empty table\n"));
358 		goto unlock_end;
359 	}
360 
361 	if ((tmp_table = kmem_alloc(count * sizeof (ip6_asp_t), KM_NOSLEEP)) ==
362 	    NULL) {
363 		ret_val = ENOMEM;
364 		goto unlock_end;
365 	}
366 
367 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
368 
369 	/*
370 	 * If 'new_table' -actually- originates from a 32-bit process
371 	 * then the nicely aligned ip6_asp_label array will be
372 	 * subtlely misaligned on this kernel, because the structure
373 	 * is 8 byte aligned in the kernel, but only 4 byte aligned in
374 	 * userland.  Fix it up here.
375 	 *
376 	 * XX64	See the notes in ip_sioctl_ip6addrpolicy.  Perhaps we could
377 	 *	do the datamodel transformation (below) there instead of here?
378 	 */
379 	if (datamodel == IOC_ILP32) {
380 		ip6_asp_t *dst;
381 		ip6_asp32_t *src;
382 		int i;
383 
384 		if ((dst = kmem_zalloc(count * sizeof (*dst),
385 		    KM_NOSLEEP)) == NULL) {
386 			kmem_free(tmp_table, count * sizeof (ip6_asp_t));
387 			ret_val = ENOMEM;
388 			goto unlock_end;
389 		}
390 
391 		/*
392 		 * Copy each element of the table from ip6_asp32_t
393 		 * format into ip6_asp_t format.  Fortunately, since
394 		 * we're just dealing with a trailing structure pad,
395 		 * we can do this straightforwardly with a flurry of
396 		 * bcopying.
397 		 */
398 		src = (void *)new_table;
399 		for (i = 0; i < count; i++)
400 			bcopy(src + i, dst + i, sizeof (*src));
401 
402 		ip6_asp_copy(dst, tmp_table, count);
403 		kmem_free(dst, count * sizeof (*dst));
404 	} else
405 #endif
406 		ip6_asp_copy(new_table, tmp_table, count);
407 
408 	/* Make sure the last entry is the default entry */
409 	if (!IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_prefix) ||
410 	    !IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_mask)) {
411 		ret_val = EINVAL;
412 		kmem_free(tmp_table, count * sizeof (ip6_asp_t));
413 		ip1dbg(("ip6_asp_replace: bad table: no default entry\n"));
414 		goto unlock_end;
415 	}
416 	if (ip6_asp_table != default_ip6_asp_table) {
417 		kmem_free(ip6_asp_table,
418 		    ip6_asp_table_count * sizeof (ip6_asp_t));
419 	}
420 	ip6_asp_table = tmp_table;
421 	ip6_asp_table_count = count;
422 
423 	/*
424 	 * The user has changed the address selection policy table.  IPv6
425 	 * source address selection for existing IRE_CACHE and
426 	 * IRE_HOST_REDIRECT entries used the old table, so we need to
427 	 * clear the cache.
428 	 */
429 	ire_walk_v6(ire_delete_cache_v6, NULL, ALL_ZONES);
430 
431 unlock_end:
432 	ip6_asp_uip = B_FALSE;
433 	mutex_exit(&ip6_asp_lock);
434 
435 replace_end:
436 	/* Reply to the ioctl */
437 	q = (queue_t *)mp->b_prev;
438 	mp->b_prev = NULL;
439 	if (q == NULL) {
440 		freemsg(mp);
441 		goto check_binds;
442 	}
443 	iocp = (struct iocblk *)mp->b_rptr;
444 	iocp->ioc_error = ret_val;
445 	iocp->ioc_count = 0;
446 	DB_TYPE(mp) = (iocp->ioc_error == 0) ? M_IOCACK : M_IOCNAK;
447 	qreply(q, mp);
448 check_binds:
449 	ip6_asp_complete_op();
450 }
451 
452 /*
453  * Copies the contents of src_table to dst_table, and sorts the
454  * entries in decending order of prefix lengths.  It assumes that both
455  * tables are appropriately sized to contain count entries.
456  */
457 static void
458 ip6_asp_copy(ip6_asp_t *src_table, ip6_asp_t *dst_table, uint_t count)
459 {
460 	ip6_asp_t *src_ptr, *src_limit, *dst_ptr, *dst_limit, *dp;
461 
462 	dst_table[0] = src_table[0];
463 	if (count == 1)
464 		return;
465 
466 	/*
467 	 * Sort the entries in descending order of prefix lengths.
468 	 *
469 	 * Note: this should be a small table.  In 99% of cases, we
470 	 * expect the table to have 5 entries.  In the remaining 1%
471 	 * of cases, we expect the table to have one or two more
472 	 * entries.  It would be very rare for the table to have
473 	 * double-digit entries.
474 	 */
475 	src_limit = src_table + count;
476 	dst_limit = dst_table + 1;
477 	for (src_ptr = src_table + 1; src_ptr != src_limit;
478 	    src_ptr++, dst_limit++) {
479 		for (dst_ptr = dst_table; dst_ptr < dst_limit; dst_ptr++) {
480 			if (ip_mask_to_plen_v6(&src_ptr->ip6_asp_mask) >
481 			    ip_mask_to_plen_v6(&dst_ptr->ip6_asp_mask)) {
482 				/*
483 				 * Make room to insert the source entry
484 				 * before dst_ptr by shifting entries to
485 				 * the right.
486 				 */
487 				for (dp = dst_limit - 1; dp >= dst_ptr; dp--)
488 					*(dp + 1) = *dp;
489 				break;
490 			}
491 		}
492 		*dst_ptr = *src_ptr;
493 	}
494 }
495 
496 /*
497  * This function copies as many entries from ip6_asp_table as will fit
498  * into dtable.  The dtable_size parameter is the size of dtable
499  * in bytes.  This function returns the number of entries in
500  * ip6_asp_table, even if it's not able to fit all of the entries into
501  * dtable.
502  */
503 int
504 ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size)
505 {
506 	uint_t dtable_count;
507 
508 	if (dtable != NULL) {
509 		if (dtable_size < sizeof (ip6_asp_t))
510 			return (-1);
511 
512 		dtable_count = dtable_size / sizeof (ip6_asp_t);
513 		bcopy(ip6_asp_table, dtable,
514 		    MIN(ip6_asp_table_count, dtable_count) *
515 		    sizeof (ip6_asp_t));
516 	}
517 
518 	return (ip6_asp_table_count);
519 }
520 
521 /*
522  * Compare two labels.  Return B_TRUE if they are equal, B_FALSE
523  * otherwise.
524  */
525 boolean_t
526 ip6_asp_labelcmp(const char *label1, const char *label2)
527 {
528 	int64_t *llptr1, *llptr2;
529 
530 	/*
531 	 * The common case, the two labels are actually the same string
532 	 * from the policy table.
533 	 */
534 	if (label1 == label2)
535 		return (B_TRUE);
536 
537 	/*
538 	 * Since we know the labels are at most 16 bytes long, compare
539 	 * the two strings as two 8-byte long integers.  The ip6_asp_t
540 	 * structure guarantees that the labels are 8 byte alligned.
541 	 */
542 	llptr1 = (int64_t *)label1;
543 	llptr2 = (int64_t *)label2;
544 	if (llptr1[0] == llptr2[0] && llptr1[1] == llptr2[1])
545 		return (B_TRUE);
546 	return (B_FALSE);
547 }
548