1 /* $FreeBSD: src/sys/netinet/ip_encap.c,v 1.1.2.5 2003/01/23 21:06:45 sam Exp $ */ 2 /* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 /* 33 * My grandfather said that there's a devil inside tunnelling technology... 34 * 35 * We have surprisingly many protocols that want packets with IP protocol 36 * #4 or #41. Here's a list of protocols that want protocol #41: 37 * RFC1933 configured tunnel 38 * RFC1933 automatic tunnel 39 * RFC2401 IPsec tunnel 40 * RFC2473 IPv6 generic packet tunnelling 41 * RFC2529 6over4 tunnel 42 * mobile-ip6 (uses RFC2473) 43 * RFC3056 6to4 tunnel 44 * isatap tunnel 45 * Here's a list of protocol that want protocol #4: 46 * RFC1853 IPv4-in-IPv4 tunnelling 47 * RFC2003 IPv4 encapsulation within IPv4 48 * RFC2344 reverse tunnelling for mobile-ip4 49 * RFC2401 IPsec tunnel 50 * Well, what can I say. They impose different en/decapsulation mechanism 51 * from each other, so they need separate protocol handler. The only one 52 * we can easily determine by protocol # is IPsec, which always has 53 * AH/ESP/IPComp header right after outer IP header. 54 * 55 * So, clearly good old protosw does not work for protocol #4 and #41. 56 * The code will let you match protocol via src/dst address pair. 57 */ 58 /* XXX is M_NETADDR correct? */ 59 60 #include "opt_inet.h" 61 #include "opt_inet6.h" 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/socket.h> 66 #include <sys/sockio.h> 67 #include <sys/mbuf.h> 68 #include <sys/errno.h> 69 #include <sys/protosw.h> 70 #include <sys/queue.h> 71 72 #include <net/if.h> 73 #include <net/route.h> 74 75 #include <netinet/in.h> 76 #include <netinet/in_systm.h> 77 #include <netinet/ip.h> 78 #include <netinet/ip_var.h> 79 #include <netinet/ip_encap.h> 80 81 #ifdef INET6 82 #include <netinet/ip6.h> 83 #include <netinet6/ip6_var.h> 84 #include <netinet6/ip6protosw.h> 85 #endif 86 87 #include <machine/stdarg.h> 88 89 #include <net/net_osdep.h> 90 91 #include <sys/kernel.h> 92 #include <sys/malloc.h> 93 #include <sys/thread2.h> 94 MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 95 96 static void encap_add (struct encaptab *); 97 static int mask_match (const struct encaptab *, const struct sockaddr *, 98 const struct sockaddr *); 99 static void encap_fillarg (struct mbuf *, const struct encaptab *); 100 101 #ifndef LIST_HEAD_INITIALIZER 102 /* rely upon BSS initialization */ 103 LIST_HEAD(, encaptab) encaptab; 104 #else 105 LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); 106 #endif 107 108 int (*ipip_input)(struct mbuf **, int *, int); /* hook for mrouting */ 109 110 void 111 encap_init(void) 112 { 113 static int initialized = 0; 114 115 if (initialized) 116 return; 117 initialized++; 118 #if 0 119 /* 120 * we cannot use LIST_INIT() here, since drivers may want to call 121 * encap_attach(), on driver attach. encap_init() will be called 122 * on AF_INET{,6} initialization, which happens after driver 123 * initialization - using LIST_INIT() here can nuke encap_attach() 124 * from drivers. 125 */ 126 LIST_INIT(&encaptab); 127 #endif 128 } 129 130 #ifdef INET 131 int 132 encap4_input(struct mbuf **mp, int *offp, int proto) 133 { 134 struct mbuf *m = *mp; 135 int off = *offp; 136 struct ip *ip; 137 struct sockaddr_in s, d; 138 const struct protosw *psw; 139 struct encaptab *ep, *match; 140 int prio, matchprio; 141 142 ip = mtod(m, struct ip *); 143 *mp = NULL; 144 145 bzero(&s, sizeof s); 146 s.sin_family = AF_INET; 147 s.sin_len = sizeof(struct sockaddr_in); 148 s.sin_addr = ip->ip_src; 149 bzero(&d, sizeof d); 150 d.sin_family = AF_INET; 151 d.sin_len = sizeof(struct sockaddr_in); 152 d.sin_addr = ip->ip_dst; 153 154 match = NULL; 155 matchprio = 0; 156 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 157 if (ep->af != AF_INET) 158 continue; 159 if (ep->proto >= 0 && ep->proto != proto) 160 continue; 161 if (ep->func) 162 prio = (*ep->func)(m, off, proto, ep->arg); 163 else { 164 /* 165 * it's inbound traffic, we need to match in reverse 166 * order 167 */ 168 prio = mask_match(ep, 169 (struct sockaddr *)&d, 170 (struct sockaddr *)&s); 171 } 172 173 /* 174 * We prioritize the matches by using bit length of the 175 * matches. mask_match() and user-supplied matching function 176 * should return the bit length of the matches (for example, 177 * if both src/dst are matched for IPv4, 64 should be returned). 178 * 0 or negative return value means "it did not match". 179 * 180 * The question is, since we have two "mask" portion, we 181 * cannot really define total order between entries. 182 * For example, which of these should be preferred? 183 * mask_match() returns 48 (32 + 16) for both of them. 184 * src=3ffe::/16, dst=3ffe:501::/32 185 * src=3ffe:501::/32, dst=3ffe::/16 186 * 187 * We need to loop through all the possible candidates 188 * to get the best match - the search takes O(n) for 189 * n attachments (i.e. interfaces). 190 */ 191 if (prio <= 0) 192 continue; 193 if (prio > matchprio) { 194 matchprio = prio; 195 match = ep; 196 } 197 } 198 199 if (match) { 200 /* found a match, "match" has the best one */ 201 psw = match->psw; 202 if (psw && psw->pr_input) { 203 encap_fillarg(m, match); 204 *mp = m; 205 (*psw->pr_input)(mp, offp, proto); 206 } else { 207 m_freem(m); 208 } 209 return(IPPROTO_DONE); 210 } 211 212 /* for backward compatibility */ 213 if (proto == IPPROTO_IPV4 && ipip_input) { 214 *mp = m; 215 ipip_input(mp, offp, proto); 216 return(IPPROTO_DONE); 217 } 218 219 /* last resort: inject to raw socket */ 220 *mp = m; 221 rip_input(mp, offp, proto); 222 return(IPPROTO_DONE); 223 } 224 #endif 225 226 #ifdef INET6 227 int 228 encap6_input(struct mbuf **mp, int *offp, int proto) 229 { 230 struct mbuf *m = *mp; 231 struct ip6_hdr *ip6; 232 struct sockaddr_in6 s, d; 233 const struct protosw *psw; 234 struct encaptab *ep, *match; 235 int prio, matchprio; 236 237 ip6 = mtod(m, struct ip6_hdr *); 238 239 bzero(&s, sizeof s); 240 s.sin6_family = AF_INET6; 241 s.sin6_len = sizeof(struct sockaddr_in6); 242 s.sin6_addr = ip6->ip6_src; 243 bzero(&d, sizeof d); 244 d.sin6_family = AF_INET6; 245 d.sin6_len = sizeof(struct sockaddr_in6); 246 d.sin6_addr = ip6->ip6_dst; 247 248 match = NULL; 249 matchprio = 0; 250 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 251 if (ep->af != AF_INET6) 252 continue; 253 if (ep->proto >= 0 && ep->proto != proto) 254 continue; 255 if (ep->func) 256 prio = (*ep->func)(m, *offp, proto, ep->arg); 257 else { 258 /* 259 * it's inbound traffic, we need to match in reverse 260 * order 261 */ 262 prio = mask_match(ep, (struct sockaddr *)&d, 263 (struct sockaddr *)&s); 264 } 265 266 /* see encap4_input() for issues here */ 267 if (prio <= 0) 268 continue; 269 if (prio > matchprio) { 270 matchprio = prio; 271 match = ep; 272 } 273 } 274 275 if (match) { 276 /* found a match */ 277 psw = match->psw; 278 if (psw && psw->pr_input) { 279 encap_fillarg(m, match); 280 return (*psw->pr_input)(mp, offp, proto); 281 } else { 282 m_freem(m); 283 return IPPROTO_DONE; 284 } 285 } 286 287 /* last resort: inject to raw socket */ 288 return rip6_input(mp, offp, proto); 289 } 290 #endif 291 292 static void 293 encap_add(struct encaptab *ep) 294 { 295 296 LIST_INSERT_HEAD(&encaptab, ep, chain); 297 } 298 299 /* 300 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. 301 * length of mask (sm and dm) is assumed to be same as sp/dp. 302 * Return value will be necessary as input (cookie) for encap_detach(). 303 */ 304 const struct encaptab * 305 encap_attach(int af, int proto, const struct sockaddr *sp, 306 const struct sockaddr *sm, const struct sockaddr *dp, 307 const struct sockaddr *dm, const struct protosw *psw, void *arg) 308 { 309 struct encaptab *ep; 310 311 crit_enter(); 312 /* sanity check on args */ 313 if (sp->sa_len > sizeof ep->src || dp->sa_len > sizeof ep->dst) 314 goto fail; 315 if (sp->sa_len != dp->sa_len) 316 goto fail; 317 if (af != sp->sa_family || af != dp->sa_family) 318 goto fail; 319 320 /* check if anyone have already attached with exactly same config */ 321 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 322 if (ep->af != af) 323 continue; 324 if (ep->proto != proto) 325 continue; 326 if (ep->src.ss_len != sp->sa_len || 327 bcmp(&ep->src, sp, sp->sa_len) != 0 || 328 bcmp(&ep->srcmask, sm, sp->sa_len) != 0) 329 continue; 330 if (ep->dst.ss_len != dp->sa_len || 331 bcmp(&ep->dst, dp, dp->sa_len) != 0 || 332 bcmp(&ep->dstmask, dm, dp->sa_len) != 0) 333 continue; 334 335 goto fail; 336 } 337 338 ep = kmalloc(sizeof *ep, M_NETADDR, M_INTWAIT | M_ZERO | M_NULLOK); 339 if (ep == NULL) 340 goto fail; 341 342 ep->af = af; 343 ep->proto = proto; 344 bcopy(sp, &ep->src, sp->sa_len); 345 bcopy(sm, &ep->srcmask, sp->sa_len); 346 bcopy(dp, &ep->dst, dp->sa_len); 347 bcopy(dm, &ep->dstmask, dp->sa_len); 348 ep->psw = psw; 349 ep->arg = arg; 350 351 encap_add(ep); 352 353 crit_exit(); 354 return ep; 355 356 fail: 357 crit_exit(); 358 return NULL; 359 } 360 361 const struct encaptab * 362 encap_attach_func(int af, int proto, 363 int (*func)(const struct mbuf *, int, int, void *), 364 const struct protosw *psw, void *arg) 365 { 366 struct encaptab *ep; 367 368 crit_enter(); 369 /* sanity check on args */ 370 if (!func) 371 goto fail; 372 373 ep = kmalloc(sizeof *ep, M_NETADDR, M_INTWAIT | M_ZERO | M_NULLOK); 374 if (ep == NULL) 375 goto fail; 376 377 ep->af = af; 378 ep->proto = proto; 379 ep->func = func; 380 ep->psw = psw; 381 ep->arg = arg; 382 383 encap_add(ep); 384 385 crit_exit(); 386 return ep; 387 388 fail: 389 crit_exit(); 390 return NULL; 391 } 392 393 int 394 encap_detach(const struct encaptab *cookie) 395 { 396 const struct encaptab *ep = cookie; 397 struct encaptab *p; 398 399 for (p = LIST_FIRST(&encaptab); p; p = LIST_NEXT(p, chain)) { 400 if (p == ep) { 401 LIST_REMOVE(p, chain); 402 kfree(p, M_NETADDR); /*XXX*/ 403 return 0; 404 } 405 } 406 407 return EINVAL; 408 } 409 410 static int 411 mask_match(const struct encaptab *ep, const struct sockaddr *sp, 412 const struct sockaddr *dp) 413 { 414 struct sockaddr_storage s; 415 struct sockaddr_storage d; 416 int i; 417 const u_int8_t *p, *q; 418 u_int8_t *r; 419 int matchlen; 420 421 if (sp->sa_len > sizeof s || dp->sa_len > sizeof d) 422 return 0; 423 if (sp->sa_family != ep->af || dp->sa_family != ep->af) 424 return 0; 425 if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) 426 return 0; 427 428 matchlen = 0; 429 430 p = (const u_int8_t *)sp; 431 q = (const u_int8_t *)&ep->srcmask; 432 r = (u_int8_t *)&s; 433 for (i = 0 ; i < sp->sa_len; i++) { 434 r[i] = p[i] & q[i]; 435 /* XXX estimate */ 436 matchlen += (q[i] ? 8 : 0); 437 } 438 439 p = (const u_int8_t *)dp; 440 q = (const u_int8_t *)&ep->dstmask; 441 r = (u_int8_t *)&d; 442 for (i = 0 ; i < dp->sa_len; i++) { 443 r[i] = p[i] & q[i]; 444 /* XXX rough estimate */ 445 matchlen += (q[i] ? 8 : 0); 446 } 447 448 /* need to overwrite len/family portion as we don't compare them */ 449 s.ss_len = sp->sa_len; 450 s.ss_family = sp->sa_family; 451 d.ss_len = dp->sa_len; 452 d.ss_family = dp->sa_family; 453 454 if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 && 455 bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) { 456 return matchlen; 457 } else 458 return 0; 459 } 460 461 static void 462 encap_fillarg(struct mbuf *m, const struct encaptab *ep) 463 { 464 struct m_tag *tag; 465 466 tag = m_tag_get(PACKET_TAG_ENCAP, sizeof(void *), M_NOWAIT); 467 if (tag != NULL) { 468 *(void **)m_tag_data(tag) = ep->arg; 469 m_tag_prepend(m, tag); 470 } 471 } 472 473 void * 474 encap_getarg(struct mbuf *m) 475 { 476 void *p = NULL; 477 struct m_tag *tag; 478 479 tag = m_tag_find(m, PACKET_TAG_ENCAP, NULL); 480 if (tag != NULL) { 481 p = *(void **)m_tag_data(tag); 482 m_tag_delete(m, tag); 483 } 484 return p; 485 } 486