1 /* $FreeBSD: src/sys/netinet/ip_encap.c,v 1.1.2.5 2003/01/23 21:06:45 sam Exp $ */ 2 /* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 /* 33 * My grandfather said that there's a devil inside tunnelling technology... 34 * 35 * We have surprisingly many protocols that want packets with IP protocol 36 * #4 or #41. Here's a list of protocols that want protocol #41: 37 * RFC1933 configured tunnel 38 * RFC1933 automatic tunnel 39 * RFC2401 IPsec tunnel 40 * RFC2473 IPv6 generic packet tunnelling 41 * RFC2529 6over4 tunnel 42 * mobile-ip6 (uses RFC2473) 43 * RFC3056 6to4 tunnel 44 * isatap tunnel 45 * Here's a list of protocol that want protocol #4: 46 * RFC1853 IPv4-in-IPv4 tunnelling 47 * RFC2003 IPv4 encapsulation within IPv4 48 * RFC2344 reverse tunnelling for mobile-ip4 49 * RFC2401 IPsec tunnel 50 * Well, what can I say. They impose different en/decapsulation mechanism 51 * from each other, so they need separate protocol handler. The only one 52 * we can easily determine by protocol # is IPsec, which always has 53 * AH/ESP/IPComp header right after outer IP header. 54 * 55 * So, clearly good old protosw does not work for protocol #4 and #41. 56 * The code will let you match protocol via src/dst address pair. 57 */ 58 59 #include "opt_inet.h" 60 #include "opt_inet6.h" 61 62 #include <sys/param.h> 63 #include <sys/systm.h> 64 #include <sys/socket.h> 65 #include <sys/sockio.h> 66 #include <sys/mbuf.h> 67 #include <sys/errno.h> 68 #include <sys/protosw.h> 69 #include <sys/queue.h> 70 71 #include <net/if.h> 72 #include <net/route.h> 73 74 #include <netinet/in.h> 75 #include <netinet/in_systm.h> 76 #include <netinet/ip.h> 77 #include <netinet/ip_var.h> 78 #include <netinet/ip_encap.h> 79 80 #ifdef INET6 81 #include <netinet/ip6.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/ip6protosw.h> 84 #endif 85 86 #include <machine/stdarg.h> 87 88 #include <net/net_osdep.h> 89 90 #include <sys/kernel.h> 91 #include <sys/malloc.h> 92 #include <sys/thread2.h> 93 MALLOC_DEFINE(M_IPENCAP, "IP Encapsulation", "IP Encapsulation"); 94 95 static void encap_add (struct encaptab *); 96 static int mask_match (const struct encaptab *, const struct sockaddr *, 97 const struct sockaddr *); 98 static void encap_fillarg (struct mbuf *, const struct encaptab *); 99 100 #ifndef LIST_HEAD_INITIALIZER 101 /* rely upon BSS initialization */ 102 LIST_HEAD(, encaptab) encaptab; 103 #else 104 LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); 105 #endif 106 107 int (*ipip_input)(struct mbuf **, int *, int); /* hook for mrouting */ 108 109 void 110 encap_init(void) 111 { 112 static int initialized = 0; 113 114 if (initialized) 115 return; 116 initialized++; 117 #if 0 118 /* 119 * we cannot use LIST_INIT() here, since drivers may want to call 120 * encap_attach(), on driver attach. encap_init() will be called 121 * on AF_INET{,6} initialization, which happens after driver 122 * initialization - using LIST_INIT() here can nuke encap_attach() 123 * from drivers. 124 */ 125 LIST_INIT(&encaptab); 126 #endif 127 } 128 129 #ifdef INET 130 int 131 encap4_input(struct mbuf **mp, int *offp, int proto) 132 { 133 struct mbuf *m = *mp; 134 int off = *offp; 135 struct ip *ip; 136 struct sockaddr_in s, d; 137 const struct protosw *psw; 138 struct encaptab *ep, *match; 139 int prio, matchprio; 140 141 ip = mtod(m, struct ip *); 142 *mp = NULL; 143 144 bzero(&s, sizeof s); 145 s.sin_family = AF_INET; 146 s.sin_len = sizeof(struct sockaddr_in); 147 s.sin_addr = ip->ip_src; 148 bzero(&d, sizeof d); 149 d.sin_family = AF_INET; 150 d.sin_len = sizeof(struct sockaddr_in); 151 d.sin_addr = ip->ip_dst; 152 153 match = NULL; 154 matchprio = 0; 155 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 156 if (ep->af != AF_INET) 157 continue; 158 if (ep->proto >= 0 && ep->proto != proto) 159 continue; 160 if (ep->func) 161 prio = (*ep->func)(m, off, proto, ep->arg); 162 else { 163 /* 164 * it's inbound traffic, we need to match in reverse 165 * order 166 */ 167 prio = mask_match(ep, 168 (struct sockaddr *)&d, 169 (struct sockaddr *)&s); 170 } 171 172 /* 173 * We prioritize the matches by using bit length of the 174 * matches. mask_match() and user-supplied matching function 175 * should return the bit length of the matches (for example, 176 * if both src/dst are matched for IPv4, 64 should be returned). 177 * 0 or negative return value means "it did not match". 178 * 179 * The question is, since we have two "mask" portion, we 180 * cannot really define total order between entries. 181 * For example, which of these should be preferred? 182 * mask_match() returns 48 (32 + 16) for both of them. 183 * src=3ffe::/16, dst=3ffe:501::/32 184 * src=3ffe:501::/32, dst=3ffe::/16 185 * 186 * We need to loop through all the possible candidates 187 * to get the best match - the search takes O(n) for 188 * n attachments (i.e. interfaces). 189 */ 190 if (prio <= 0) 191 continue; 192 if (prio > matchprio) { 193 matchprio = prio; 194 match = ep; 195 } 196 } 197 198 if (match) { 199 /* found a match, "match" has the best one */ 200 psw = match->psw; 201 if (psw && psw->pr_input) { 202 encap_fillarg(m, match); 203 *mp = m; 204 (*psw->pr_input)(mp, offp, proto); 205 } else { 206 m_freem(m); 207 } 208 return(IPPROTO_DONE); 209 } 210 211 /* for backward compatibility */ 212 if (proto == IPPROTO_IPV4 && ipip_input) { 213 *mp = m; 214 ipip_input(mp, offp, proto); 215 return(IPPROTO_DONE); 216 } 217 218 /* last resort: inject to raw socket */ 219 *mp = m; 220 rip_input(mp, offp, proto); 221 return(IPPROTO_DONE); 222 } 223 #endif 224 225 #ifdef INET6 226 int 227 encap6_input(struct mbuf **mp, int *offp, int proto) 228 { 229 struct mbuf *m = *mp; 230 struct ip6_hdr *ip6; 231 struct sockaddr_in6 s, d; 232 const struct protosw *psw; 233 struct encaptab *ep, *match; 234 int prio, matchprio; 235 236 ip6 = mtod(m, struct ip6_hdr *); 237 238 bzero(&s, sizeof s); 239 s.sin6_family = AF_INET6; 240 s.sin6_len = sizeof(struct sockaddr_in6); 241 s.sin6_addr = ip6->ip6_src; 242 bzero(&d, sizeof d); 243 d.sin6_family = AF_INET6; 244 d.sin6_len = sizeof(struct sockaddr_in6); 245 d.sin6_addr = ip6->ip6_dst; 246 247 match = NULL; 248 matchprio = 0; 249 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 250 if (ep->af != AF_INET6) 251 continue; 252 if (ep->proto >= 0 && ep->proto != proto) 253 continue; 254 if (ep->func) 255 prio = (*ep->func)(m, *offp, proto, ep->arg); 256 else { 257 /* 258 * it's inbound traffic, we need to match in reverse 259 * order 260 */ 261 prio = mask_match(ep, (struct sockaddr *)&d, 262 (struct sockaddr *)&s); 263 } 264 265 /* see encap4_input() for issues here */ 266 if (prio <= 0) 267 continue; 268 if (prio > matchprio) { 269 matchprio = prio; 270 match = ep; 271 } 272 } 273 274 if (match) { 275 /* found a match */ 276 psw = match->psw; 277 if (psw && psw->pr_input) { 278 encap_fillarg(m, match); 279 return (*psw->pr_input)(mp, offp, proto); 280 } else { 281 m_freem(m); 282 return IPPROTO_DONE; 283 } 284 } 285 286 /* last resort: inject to raw socket */ 287 return rip6_input(mp, offp, proto); 288 } 289 #endif 290 291 static void 292 encap_add(struct encaptab *ep) 293 { 294 295 LIST_INSERT_HEAD(&encaptab, ep, chain); 296 } 297 298 /* 299 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. 300 * length of mask (sm and dm) is assumed to be same as sp/dp. 301 * Return value will be necessary as input (cookie) for encap_detach(). 302 */ 303 const struct encaptab * 304 encap_attach(int af, int proto, const struct sockaddr *sp, 305 const struct sockaddr *sm, const struct sockaddr *dp, 306 const struct sockaddr *dm, const struct protosw *psw, void *arg) 307 { 308 struct encaptab *ep; 309 310 crit_enter(); 311 /* sanity check on args */ 312 if (sp->sa_len > sizeof ep->src || dp->sa_len > sizeof ep->dst) 313 goto fail; 314 if (sp->sa_len != dp->sa_len) 315 goto fail; 316 if (af != sp->sa_family || af != dp->sa_family) 317 goto fail; 318 319 /* check if anyone have already attached with exactly same config */ 320 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 321 if (ep->af != af) 322 continue; 323 if (ep->proto != proto) 324 continue; 325 if (ep->src.ss_len != sp->sa_len || 326 bcmp(&ep->src, sp, sp->sa_len) != 0 || 327 bcmp(&ep->srcmask, sm, sp->sa_len) != 0) 328 continue; 329 if (ep->dst.ss_len != dp->sa_len || 330 bcmp(&ep->dst, dp, dp->sa_len) != 0 || 331 bcmp(&ep->dstmask, dm, dp->sa_len) != 0) 332 continue; 333 334 goto fail; 335 } 336 337 ep = kmalloc(sizeof *ep, M_IPENCAP, M_INTWAIT | M_ZERO | M_NULLOK); 338 if (ep == NULL) 339 goto fail; 340 341 ep->af = af; 342 ep->proto = proto; 343 bcopy(sp, &ep->src, sp->sa_len); 344 bcopy(sm, &ep->srcmask, sp->sa_len); 345 bcopy(dp, &ep->dst, dp->sa_len); 346 bcopy(dm, &ep->dstmask, dp->sa_len); 347 ep->psw = psw; 348 ep->arg = arg; 349 350 encap_add(ep); 351 352 crit_exit(); 353 return ep; 354 355 fail: 356 crit_exit(); 357 return NULL; 358 } 359 360 const struct encaptab * 361 encap_attach_func(int af, int proto, 362 int (*func)(const struct mbuf *, int, int, void *), 363 const struct protosw *psw, void *arg) 364 { 365 struct encaptab *ep; 366 367 crit_enter(); 368 /* sanity check on args */ 369 if (!func) 370 goto fail; 371 372 ep = kmalloc(sizeof *ep, M_IPENCAP, M_INTWAIT | M_ZERO | M_NULLOK); 373 if (ep == NULL) 374 goto fail; 375 376 ep->af = af; 377 ep->proto = proto; 378 ep->func = func; 379 ep->psw = psw; 380 ep->arg = arg; 381 382 encap_add(ep); 383 384 crit_exit(); 385 return ep; 386 387 fail: 388 crit_exit(); 389 return NULL; 390 } 391 392 int 393 encap_detach(const struct encaptab *cookie) 394 { 395 const struct encaptab *ep = cookie; 396 struct encaptab *p; 397 398 for (p = LIST_FIRST(&encaptab); p; p = LIST_NEXT(p, chain)) { 399 if (p == ep) { 400 LIST_REMOVE(p, chain); 401 kfree(p, M_IPENCAP); /*XXX*/ 402 return 0; 403 } 404 } 405 406 return EINVAL; 407 } 408 409 static int 410 mask_match(const struct encaptab *ep, const struct sockaddr *sp, 411 const struct sockaddr *dp) 412 { 413 struct sockaddr_storage s; 414 struct sockaddr_storage d; 415 int i; 416 const u_int8_t *p, *q; 417 u_int8_t *r; 418 int matchlen; 419 420 if (sp->sa_len > sizeof s || dp->sa_len > sizeof d) 421 return 0; 422 if (sp->sa_family != ep->af || dp->sa_family != ep->af) 423 return 0; 424 if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) 425 return 0; 426 427 matchlen = 0; 428 429 p = (const u_int8_t *)sp; 430 q = (const u_int8_t *)&ep->srcmask; 431 r = (u_int8_t *)&s; 432 for (i = 0 ; i < sp->sa_len; i++) { 433 r[i] = p[i] & q[i]; 434 /* XXX estimate */ 435 matchlen += (q[i] ? 8 : 0); 436 } 437 438 p = (const u_int8_t *)dp; 439 q = (const u_int8_t *)&ep->dstmask; 440 r = (u_int8_t *)&d; 441 for (i = 0 ; i < dp->sa_len; i++) { 442 r[i] = p[i] & q[i]; 443 /* XXX rough estimate */ 444 matchlen += (q[i] ? 8 : 0); 445 } 446 447 /* need to overwrite len/family portion as we don't compare them */ 448 s.ss_len = sp->sa_len; 449 s.ss_family = sp->sa_family; 450 d.ss_len = dp->sa_len; 451 d.ss_family = dp->sa_family; 452 453 if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 && 454 bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) { 455 return matchlen; 456 } else 457 return 0; 458 } 459 460 static void 461 encap_fillarg(struct mbuf *m, const struct encaptab *ep) 462 { 463 struct m_tag *tag; 464 465 tag = m_tag_get(PACKET_TAG_ENCAP, sizeof(void *), M_NOWAIT); 466 if (tag != NULL) { 467 *(void **)m_tag_data(tag) = ep->arg; 468 m_tag_prepend(m, tag); 469 } 470 } 471 472 void * 473 encap_getarg(struct mbuf *m) 474 { 475 void *p = NULL; 476 struct m_tag *tag; 477 478 tag = m_tag_find(m, PACKET_TAG_ENCAP, NULL); 479 if (tag != NULL) { 480 p = *(void **)m_tag_data(tag); 481 m_tag_delete(m, tag); 482 } 483 return p; 484 } 485