1 /* $OpenBSD: raw_ip.c,v 1.48 2009/11/03 10:59:04 claudio Exp $ */ 2 /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/protosw.h> 76 #include <sys/socketvar.h> 77 78 #include <net/if.h> 79 #include <net/route.h> 80 #include <net/pfvar.h> 81 82 #include <netinet/in.h> 83 #include <netinet/in_systm.h> 84 #include <netinet/ip.h> 85 #include <netinet/ip_mroute.h> 86 #include <netinet/ip_var.h> 87 #include <netinet/in_pcb.h> 88 #include <netinet/in_var.h> 89 #include <netinet/ip_icmp.h> 90 91 #include "pf.h" 92 93 struct inpcbtable rawcbtable; 94 95 /* 96 * Nominal space allocated to a raw ip socket. 97 */ 98 #define RIPSNDQ 8192 99 #define RIPRCVQ 8192 100 101 /* 102 * Raw interface to IP protocol. 103 */ 104 105 /* 106 * Initialize raw connection block q. 107 */ 108 void 109 rip_init() 110 { 111 112 in_pcbinit(&rawcbtable, 1); 113 } 114 115 struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; 116 117 /* 118 * Setup generic address and protocol structures 119 * for raw_input routine, then pass them along with 120 * mbuf chain. 121 */ 122 void 123 rip_input(struct mbuf *m, ...) 124 { 125 struct ip *ip = mtod(m, struct ip *); 126 struct inpcb *inp, *last = NULL; 127 struct mbuf *opts = NULL; 128 129 ripsrc.sin_addr = ip->ip_src; 130 CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 131 #ifdef INET6 132 if (inp->inp_flags & INP_IPV6) 133 continue; 134 #endif 135 if (inp->inp_rdomain != rtable_l2(m->m_pkthdr.rdomain)) 136 continue; 137 138 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) 139 continue; 140 #if NPF > 0 141 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 142 struct pf_divert *divert; 143 144 /* XXX rdomain support */ 145 if ((divert = pf_find_divert(m)) == NULL) 146 continue; 147 if (inp->inp_laddr.s_addr != divert->addr.ipv4.s_addr) 148 continue; 149 } else 150 #endif 151 if (inp->inp_laddr.s_addr && 152 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 153 continue; 154 if (inp->inp_faddr.s_addr && 155 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 156 continue; 157 if (last) { 158 struct mbuf *n; 159 160 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { 161 if (last->inp_flags & INP_CONTROLOPTS || 162 last->inp_socket->so_options & SO_TIMESTAMP) 163 ip_savecontrol(last, &opts, ip, n); 164 if (sbappendaddr(&last->inp_socket->so_rcv, 165 sintosa(&ripsrc), n, opts) == 0) { 166 /* should notify about lost packet */ 167 m_freem(n); 168 if (opts) 169 m_freem(opts); 170 } else 171 sorwakeup(last->inp_socket); 172 opts = NULL; 173 } 174 } 175 last = inp; 176 } 177 if (last) { 178 if (last->inp_flags & INP_CONTROLOPTS || 179 last->inp_socket->so_options & SO_TIMESTAMP) 180 ip_savecontrol(last, &opts, ip, m); 181 if (sbappendaddr(&last->inp_socket->so_rcv, sintosa(&ripsrc), m, 182 opts) == 0) { 183 m_freem(m); 184 if (opts) 185 m_freem(opts); 186 } else 187 sorwakeup(last->inp_socket); 188 } else { 189 if (ip->ip_p != IPPROTO_ICMP) 190 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); 191 else 192 m_freem(m); 193 ipstat.ips_noproto++; 194 ipstat.ips_delivered--; 195 } 196 } 197 198 /* 199 * Generate IP header and pass packet to ip_output. 200 * Tack on options user may have setup with control call. 201 */ 202 int 203 rip_output(struct mbuf *m, ...) 204 { 205 struct socket *so; 206 u_long dst; 207 struct ip *ip; 208 struct inpcb *inp; 209 int flags; 210 va_list ap; 211 212 va_start(ap, m); 213 so = va_arg(ap, struct socket *); 214 dst = va_arg(ap, u_long); 215 va_end(ap); 216 217 inp = sotoinpcb(so); 218 flags = (so->so_options & (SO_DONTROUTE|SO_JUMBO)) | IP_ALLOWBROADCAST; 219 220 /* 221 * If the user handed us a complete IP packet, use it. 222 * Otherwise, allocate an mbuf for a header and fill it in. 223 */ 224 if ((inp->inp_flags & INP_HDRINCL) == 0) { 225 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 226 m_freem(m); 227 return (EMSGSIZE); 228 } 229 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 230 if (!m) 231 return (ENOBUFS); 232 ip = mtod(m, struct ip *); 233 ip->ip_tos = inp->inp_ip.ip_tos; 234 ip->ip_off = htons(0); 235 ip->ip_p = inp->inp_ip.ip_p; 236 ip->ip_len = htons(m->m_pkthdr.len); 237 ip->ip_src = inp->inp_laddr; 238 ip->ip_dst.s_addr = dst; 239 ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL; 240 } else { 241 if (m->m_pkthdr.len > IP_MAXPACKET) { 242 m_freem(m); 243 return (EMSGSIZE); 244 } 245 if (m->m_pkthdr.len < sizeof(struct ip)) { 246 m_freem(m); 247 return (EINVAL); 248 } 249 ip = mtod(m, struct ip *); 250 /* 251 * don't allow both user specified and setsockopt options, 252 * and don't allow packet length sizes that will crash 253 */ 254 if ((ip->ip_hl != (sizeof (*ip) >> 2) && inp->inp_options) || 255 ntohs(ip->ip_len) > m->m_pkthdr.len || 256 ntohs(ip->ip_len) < ip->ip_hl << 2) { 257 m_freem(m); 258 return (EINVAL); 259 } 260 if (ip->ip_id == 0) { 261 ip->ip_id = htons(ip_randomid()); 262 } 263 /* XXX prevent ip_output from overwriting header fields */ 264 flags |= IP_RAWOUTPUT; 265 ipstat.ips_rawout++; 266 } 267 #ifdef INET6 268 /* 269 * A thought: Even though raw IP shouldn't be able to set IPv6 270 * multicast options, if it does, the last parameter to 271 * ip_output should be guarded against v6/v4 problems. 272 */ 273 #endif 274 /* force routing domain */ 275 m->m_pkthdr.rdomain = inp->inp_rdomain; 276 277 return (ip_output(m, inp->inp_options, &inp->inp_route, flags, 278 inp->inp_moptions, inp)); 279 } 280 281 /* 282 * Raw IP socket option processing. 283 */ 284 int 285 rip_ctloutput(int op, struct socket *so, int level, int optname, 286 struct mbuf **m) 287 { 288 struct inpcb *inp = sotoinpcb(so); 289 int error; 290 291 if (level != IPPROTO_IP) { 292 if (op == PRCO_SETOPT && *m) 293 (void) m_free(*m); 294 return (EINVAL); 295 } 296 297 switch (optname) { 298 299 case IP_HDRINCL: 300 error = 0; 301 if (op == PRCO_SETOPT) { 302 if (*m == 0 || (*m)->m_len < sizeof (int)) 303 error = EINVAL; 304 else if (*mtod(*m, int *)) 305 inp->inp_flags |= INP_HDRINCL; 306 else 307 inp->inp_flags &= ~INP_HDRINCL; 308 if (*m) 309 (void)m_free(*m); 310 } else { 311 *m = m_get(M_WAIT, M_SOOPTS); 312 (*m)->m_len = sizeof(int); 313 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL; 314 } 315 return (error); 316 317 case MRT_INIT: 318 case MRT_DONE: 319 case MRT_ADD_VIF: 320 case MRT_DEL_VIF: 321 case MRT_ADD_MFC: 322 case MRT_DEL_MFC: 323 case MRT_VERSION: 324 case MRT_ASSERT: 325 case MRT_API_SUPPORT: 326 case MRT_API_CONFIG: 327 case MRT_ADD_BW_UPCALL: 328 case MRT_DEL_BW_UPCALL: 329 #ifdef MROUTING 330 switch (op) { 331 case PRCO_SETOPT: 332 error = ip_mrouter_set(so, optname, m); 333 break; 334 case PRCO_GETOPT: 335 error = ip_mrouter_get(so, optname, m); 336 break; 337 default: 338 error = EINVAL; 339 break; 340 } 341 return (error); 342 #else 343 if (op == PRCO_SETOPT && *m) 344 m_free(*m); 345 return (EOPNOTSUPP); 346 #endif 347 } 348 return (ip_ctloutput(op, so, level, optname, m)); 349 } 350 351 u_long rip_sendspace = RIPSNDQ; 352 u_long rip_recvspace = RIPRCVQ; 353 354 /*ARGSUSED*/ 355 int 356 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 357 struct mbuf *control, struct proc *p) 358 { 359 int error = 0; 360 struct inpcb *inp = sotoinpcb(so); 361 #ifdef MROUTING 362 extern struct socket *ip_mrouter; 363 #endif 364 if (req == PRU_CONTROL) 365 return (in_control(so, (u_long)m, (caddr_t)nam, 366 (struct ifnet *)control)); 367 368 if (inp == NULL && req != PRU_ATTACH) { 369 error = EINVAL; 370 goto release; 371 } 372 373 switch (req) { 374 375 case PRU_ATTACH: 376 if (inp) 377 panic("rip_attach"); 378 if ((so->so_state & SS_PRIV) == 0) { 379 error = EACCES; 380 break; 381 } 382 if ((error = soreserve(so, rip_sendspace, rip_recvspace)) || 383 (error = in_pcballoc(so, &rawcbtable))) 384 break; 385 inp = (struct inpcb *)so->so_pcb; 386 inp->inp_ip.ip_p = (long)nam; 387 break; 388 389 case PRU_DISCONNECT: 390 if ((so->so_state & SS_ISCONNECTED) == 0) { 391 error = ENOTCONN; 392 break; 393 } 394 /* FALLTHROUGH */ 395 case PRU_ABORT: 396 soisdisconnected(so); 397 /* FALLTHROUGH */ 398 case PRU_DETACH: 399 if (inp == 0) 400 panic("rip_detach"); 401 #ifdef MROUTING 402 if (so == ip_mrouter) 403 ip_mrouter_done(); 404 #endif 405 in_pcbdetach(inp); 406 break; 407 408 case PRU_BIND: 409 { 410 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 411 412 if (nam->m_len != sizeof(*addr)) { 413 error = EINVAL; 414 break; 415 } 416 if ((TAILQ_EMPTY(&ifnet)) || 417 ((addr->sin_family != AF_INET) && 418 (addr->sin_family != AF_IMPLINK)) || 419 (addr->sin_addr.s_addr && 420 (!(so->so_options & SO_BINDANY) && 421 in_iawithaddr(addr->sin_addr, NULL, inp->inp_rdomain) == 422 0))) { 423 error = EADDRNOTAVAIL; 424 break; 425 } 426 inp->inp_laddr = addr->sin_addr; 427 break; 428 } 429 case PRU_CONNECT: 430 { 431 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 432 433 if (nam->m_len != sizeof(*addr)) { 434 error = EINVAL; 435 break; 436 } 437 if (TAILQ_EMPTY(&ifnet)) { 438 error = EADDRNOTAVAIL; 439 break; 440 } 441 if ((addr->sin_family != AF_INET) && 442 (addr->sin_family != AF_IMPLINK)) { 443 error = EAFNOSUPPORT; 444 break; 445 } 446 inp->inp_faddr = addr->sin_addr; 447 soisconnected(so); 448 break; 449 } 450 451 case PRU_CONNECT2: 452 error = EOPNOTSUPP; 453 break; 454 455 /* 456 * Mark the connection as being incapable of further input. 457 */ 458 case PRU_SHUTDOWN: 459 socantsendmore(so); 460 break; 461 462 /* 463 * Ship a packet out. The appropriate raw output 464 * routine handles any massaging necessary. 465 */ 466 case PRU_SEND: 467 { 468 u_int32_t dst; 469 470 if (so->so_state & SS_ISCONNECTED) { 471 if (nam) { 472 error = EISCONN; 473 break; 474 } 475 dst = inp->inp_faddr.s_addr; 476 } else { 477 if (nam == NULL) { 478 error = ENOTCONN; 479 break; 480 } 481 dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr; 482 } 483 #ifdef IPSEC 484 /* XXX Find an IPsec TDB */ 485 #endif 486 error = rip_output(m, so, dst); 487 m = NULL; 488 break; 489 } 490 491 case PRU_SENSE: 492 /* 493 * stat: don't bother with a blocksize. 494 */ 495 return (0); 496 497 /* 498 * Not supported. 499 */ 500 case PRU_RCVOOB: 501 case PRU_RCVD: 502 case PRU_LISTEN: 503 case PRU_ACCEPT: 504 case PRU_SENDOOB: 505 error = EOPNOTSUPP; 506 break; 507 508 case PRU_SOCKADDR: 509 in_setsockaddr(inp, nam); 510 break; 511 512 case PRU_PEERADDR: 513 in_setpeeraddr(inp, nam); 514 break; 515 516 default: 517 panic("rip_usrreq"); 518 } 519 release: 520 if (m != NULL) 521 m_freem(m); 522 return (error); 523 } 524