xref: /openbsd/sys/netinet/raw_ip.c (revision 7b36286a)
1 /*	$OpenBSD: raw_ip.c,v 1.45 2008/06/14 02:17:27 jsing Exp $	*/
2 /*	$NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  * 	This product includes software developed by the University of
46  * 	California, Berkeley and its contributors.
47  * 	This product includes software developed at the Information
48  * 	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/socket.h>
75 #include <sys/protosw.h>
76 #include <sys/socketvar.h>
77 
78 #include <net/if.h>
79 #include <net/route.h>
80 #include <net/pfvar.h>
81 
82 #include <netinet/in.h>
83 #include <netinet/in_systm.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip_mroute.h>
86 #include <netinet/ip_var.h>
87 #include <netinet/in_pcb.h>
88 #include <netinet/in_var.h>
89 #include <netinet/ip_icmp.h>
90 
91 #include "pf.h"
92 
93 struct inpcbtable rawcbtable;
94 
95 /*
96  * Nominal space allocated to a raw ip socket.
97  */
98 #define	RIPSNDQ		8192
99 #define	RIPRCVQ		8192
100 
101 /*
102  * Raw interface to IP protocol.
103  */
104 
105 /*
106  * Initialize raw connection block q.
107  */
108 void
109 rip_init()
110 {
111 
112 	in_pcbinit(&rawcbtable, 1);
113 }
114 
115 struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
116 
117 /*
118  * Setup generic address and protocol structures
119  * for raw_input routine, then pass them along with
120  * mbuf chain.
121  */
122 void
123 rip_input(struct mbuf *m, ...)
124 {
125 	struct ip *ip = mtod(m, struct ip *);
126 	struct inpcb *inp, *last = NULL;
127 	struct mbuf *opts = NULL;
128 
129 	ripsrc.sin_addr = ip->ip_src;
130 	CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
131 #ifdef INET6
132 		if (inp->inp_flags & INP_IPV6)
133 			continue;
134 #endif
135 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
136 			continue;
137 #if NPF
138 		if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
139 			struct pf_divert *divert;
140 
141 			if ((divert = pf_find_divert(m)) == NULL)
142 				continue;
143 			if (inp->inp_laddr.s_addr != divert->addr.ipv4.s_addr)
144 				continue;
145 		} else
146 #endif
147 		if (inp->inp_laddr.s_addr &&
148 		    inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
149 			continue;
150 		if (inp->inp_faddr.s_addr &&
151 		    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
152 			continue;
153 		if (last) {
154 			struct mbuf *n;
155 
156 			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
157 				if (last->inp_flags & INP_CONTROLOPTS ||
158 				    last->inp_socket->so_options & SO_TIMESTAMP)
159 					ip_savecontrol(last, &opts, ip, n);
160 				if (sbappendaddr(&last->inp_socket->so_rcv,
161 				    sintosa(&ripsrc), n, opts) == 0) {
162 					/* should notify about lost packet */
163 					m_freem(n);
164 					if (opts)
165 						m_freem(opts);
166 				} else
167 					sorwakeup(last->inp_socket);
168 				opts = NULL;
169 			}
170 		}
171 		last = inp;
172 	}
173 	if (last) {
174 		if (last->inp_flags & INP_CONTROLOPTS ||
175 		    last->inp_socket->so_options & SO_TIMESTAMP)
176 			ip_savecontrol(last, &opts, ip, m);
177 		if (sbappendaddr(&last->inp_socket->so_rcv, sintosa(&ripsrc), m,
178 		    opts) == 0) {
179 			m_freem(m);
180 			if (opts)
181 				m_freem(opts);
182 		} else
183 			sorwakeup(last->inp_socket);
184 	} else {
185 		if (ip->ip_p != IPPROTO_ICMP)
186 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
187 		else
188 			m_freem(m);
189 		ipstat.ips_noproto++;
190 		ipstat.ips_delivered--;
191 	}
192 }
193 
194 /*
195  * Generate IP header and pass packet to ip_output.
196  * Tack on options user may have setup with control call.
197  */
198 int
199 rip_output(struct mbuf *m, ...)
200 {
201 	struct socket *so;
202 	u_long dst;
203 	struct ip *ip;
204 	struct inpcb *inp;
205 	int flags;
206 	va_list ap;
207 
208 	va_start(ap, m);
209 	so = va_arg(ap, struct socket *);
210 	dst = va_arg(ap, u_long);
211 	va_end(ap);
212 
213 	inp = sotoinpcb(so);
214 	flags = (so->so_options & (SO_DONTROUTE|SO_JUMBO)) | IP_ALLOWBROADCAST;
215 
216 	/*
217 	 * If the user handed us a complete IP packet, use it.
218 	 * Otherwise, allocate an mbuf for a header and fill it in.
219 	 */
220 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
221 		if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
222 			m_freem(m);
223 			return (EMSGSIZE);
224 		}
225 		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
226 		if (!m)
227 			return (ENOBUFS);
228 		ip = mtod(m, struct ip *);
229 		ip->ip_tos = inp->inp_ip.ip_tos;
230 		ip->ip_off = htons(0);
231 		ip->ip_p = inp->inp_ip.ip_p;
232 		ip->ip_len = htons(m->m_pkthdr.len);
233 		ip->ip_src = inp->inp_laddr;
234 		ip->ip_dst.s_addr = dst;
235 		ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL;
236 	} else {
237 		if (m->m_pkthdr.len > IP_MAXPACKET) {
238 			m_freem(m);
239 			return (EMSGSIZE);
240 		}
241 		if (m->m_pkthdr.len < sizeof(struct ip)) {
242 			m_freem(m);
243 			return (EINVAL);
244 		}
245 		ip = mtod(m, struct ip *);
246 		/*
247 		 * don't allow both user specified and setsockopt options,
248 		 * and don't allow packet length sizes that will crash
249 		 */
250 		if ((ip->ip_hl != (sizeof (*ip) >> 2) && inp->inp_options) ||
251 		    ntohs(ip->ip_len) > m->m_pkthdr.len ||
252 		    ntohs(ip->ip_len) < ip->ip_hl << 2) {
253 			m_freem(m);
254 			return (EINVAL);
255 		}
256 		if (ip->ip_id == 0) {
257 			ip->ip_id = htons(ip_randomid());
258 		}
259 		/* XXX prevent ip_output from overwriting header fields */
260 		flags |= IP_RAWOUTPUT;
261 		ipstat.ips_rawout++;
262 	}
263 #ifdef INET6
264 	/*
265 	 * A thought:  Even though raw IP shouldn't be able to set IPv6
266 	 *             multicast options, if it does, the last parameter to
267 	 *             ip_output should be guarded against v6/v4 problems.
268 	 */
269 #endif
270 	return (ip_output(m, inp->inp_options, &inp->inp_route, flags,
271 	    inp->inp_moptions, inp));
272 }
273 
274 /*
275  * Raw IP socket option processing.
276  */
277 int
278 rip_ctloutput(int op, struct socket *so, int level, int optname,
279     struct mbuf **m)
280 {
281 	struct inpcb *inp = sotoinpcb(so);
282 	int error;
283 
284 	if (level != IPPROTO_IP) {
285 		if (op == PRCO_SETOPT && *m)
286 			(void) m_free(*m);
287 		return (EINVAL);
288 	}
289 
290 	switch (optname) {
291 
292 	case IP_HDRINCL:
293 		error = 0;
294 		if (op == PRCO_SETOPT) {
295 			if (*m == 0 || (*m)->m_len < sizeof (int))
296 				error = EINVAL;
297 			else if (*mtod(*m, int *))
298 				inp->inp_flags |= INP_HDRINCL;
299 			else
300 				inp->inp_flags &= ~INP_HDRINCL;
301 			if (*m)
302 				(void)m_free(*m);
303 		} else {
304 			*m = m_get(M_WAIT, M_SOOPTS);
305 			(*m)->m_len = sizeof(int);
306 			*mtod(*m, int *) = inp->inp_flags & INP_HDRINCL;
307 		}
308 		return (error);
309 
310 	case MRT_INIT:
311 	case MRT_DONE:
312 	case MRT_ADD_VIF:
313 	case MRT_DEL_VIF:
314 	case MRT_ADD_MFC:
315 	case MRT_DEL_MFC:
316 	case MRT_VERSION:
317 	case MRT_ASSERT:
318 	case MRT_API_SUPPORT:
319 	case MRT_API_CONFIG:
320 	case MRT_ADD_BW_UPCALL:
321 	case MRT_DEL_BW_UPCALL:
322 #ifdef MROUTING
323 		switch (op) {
324 		case PRCO_SETOPT:
325 			error = ip_mrouter_set(so, optname, m);
326 			break;
327 		case PRCO_GETOPT:
328 			error = ip_mrouter_get(so, optname, m);
329 			break;
330 		default:
331 			error = EINVAL;
332 			break;
333 		}
334 		return (error);
335 #else
336 		if (op == PRCO_SETOPT && *m)
337 			m_free(*m);
338 		return (EOPNOTSUPP);
339 #endif
340 	}
341 	return (ip_ctloutput(op, so, level, optname, m));
342 }
343 
344 u_long	rip_sendspace = RIPSNDQ;
345 u_long	rip_recvspace = RIPRCVQ;
346 
347 /*ARGSUSED*/
348 int
349 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
350     struct mbuf *control, struct proc *p)
351 {
352 	int error = 0;
353 	struct inpcb *inp = sotoinpcb(so);
354 #ifdef MROUTING
355 	extern struct socket *ip_mrouter;
356 #endif
357 	if (req == PRU_CONTROL)
358 		return (in_control(so, (u_long)m, (caddr_t)nam,
359 		    (struct ifnet *)control));
360 
361 	if (inp == NULL && req != PRU_ATTACH) {
362 		error = EINVAL;
363 		goto release;
364 	}
365 
366 	switch (req) {
367 
368 	case PRU_ATTACH:
369 		if (inp)
370 			panic("rip_attach");
371 		if ((so->so_state & SS_PRIV) == 0) {
372 			error = EACCES;
373 			break;
374 		}
375 		if ((error = soreserve(so, rip_sendspace, rip_recvspace)) ||
376 		    (error = in_pcballoc(so, &rawcbtable)))
377 			break;
378 		inp = (struct inpcb *)so->so_pcb;
379 		inp->inp_ip.ip_p = (long)nam;
380 		break;
381 
382 	case PRU_DISCONNECT:
383 		if ((so->so_state & SS_ISCONNECTED) == 0) {
384 			error = ENOTCONN;
385 			break;
386 		}
387 		/* FALLTHROUGH */
388 	case PRU_ABORT:
389 		soisdisconnected(so);
390 		/* FALLTHROUGH */
391 	case PRU_DETACH:
392 		if (inp == 0)
393 			panic("rip_detach");
394 #ifdef MROUTING
395 		if (so == ip_mrouter)
396 			ip_mrouter_done();
397 #endif
398 		in_pcbdetach(inp);
399 		break;
400 
401 	case PRU_BIND:
402 	    {
403 		struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
404 
405 		if (nam->m_len != sizeof(*addr)) {
406 			error = EINVAL;
407 			break;
408 		}
409 		if ((TAILQ_EMPTY(&ifnet)) ||
410 		    ((addr->sin_family != AF_INET) &&
411 		     (addr->sin_family != AF_IMPLINK)) ||
412 		    (addr->sin_addr.s_addr &&
413 		     (!(so->so_options & SO_BINDANY) &&
414 		     in_iawithaddr(addr->sin_addr, NULL) == 0))) {
415 			error = EADDRNOTAVAIL;
416 			break;
417 		}
418 		inp->inp_laddr = addr->sin_addr;
419 		break;
420 	    }
421 	case PRU_CONNECT:
422 	    {
423 		struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
424 
425 		if (nam->m_len != sizeof(*addr)) {
426 			error = EINVAL;
427 			break;
428 		}
429 		if (TAILQ_EMPTY(&ifnet)) {
430 			error = EADDRNOTAVAIL;
431 			break;
432 		}
433 		if ((addr->sin_family != AF_INET) &&
434 		     (addr->sin_family != AF_IMPLINK)) {
435 			error = EAFNOSUPPORT;
436 			break;
437 		}
438 		inp->inp_faddr = addr->sin_addr;
439 		soisconnected(so);
440 		break;
441 	    }
442 
443 	case PRU_CONNECT2:
444 		error = EOPNOTSUPP;
445 		break;
446 
447 	/*
448 	 * Mark the connection as being incapable of further input.
449 	 */
450 	case PRU_SHUTDOWN:
451 		socantsendmore(so);
452 		break;
453 
454 	/*
455 	 * Ship a packet out.  The appropriate raw output
456 	 * routine handles any massaging necessary.
457 	 */
458 	case PRU_SEND:
459 	    {
460 		u_int32_t dst;
461 
462 		if (so->so_state & SS_ISCONNECTED) {
463 			if (nam) {
464 				error = EISCONN;
465 				break;
466 			}
467 			dst = inp->inp_faddr.s_addr;
468 		} else {
469 			if (nam == NULL) {
470 				error = ENOTCONN;
471 				break;
472 			}
473 			dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
474 		}
475 #ifdef IPSEC
476 		/* XXX Find an IPsec TDB */
477 #endif
478 		error = rip_output(m, so, dst);
479 		m = NULL;
480 		break;
481 	    }
482 
483 	case PRU_SENSE:
484 		/*
485 		 * stat: don't bother with a blocksize.
486 		 */
487 		return (0);
488 
489 	/*
490 	 * Not supported.
491 	 */
492 	case PRU_RCVOOB:
493 	case PRU_RCVD:
494 	case PRU_LISTEN:
495 	case PRU_ACCEPT:
496 	case PRU_SENDOOB:
497 		error = EOPNOTSUPP;
498 		break;
499 
500 	case PRU_SOCKADDR:
501 		in_setsockaddr(inp, nam);
502 		break;
503 
504 	case PRU_PEERADDR:
505 		in_setpeeraddr(inp, nam);
506 		break;
507 
508 	default:
509 		panic("rip_usrreq");
510 	}
511 release:
512 	if (m != NULL)
513 		m_freem(m);
514 	return (error);
515 }
516