xref: /netbsd/sys/netinet/raw_ip.c (revision bf9ec67e)
1 /*	$NetBSD: raw_ip.c,v 1.60 2001/12/21 02:51:47 itojun Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1982, 1986, 1988, 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
65  */
66 
67 #include <sys/cdefs.h>
68 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.60 2001/12/21 02:51:47 itojun Exp $");
69 
70 #include "opt_ipsec.h"
71 #include "opt_mrouting.h"
72 
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/protosw.h>
78 #include <sys/socketvar.h>
79 #include <sys/errno.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82 
83 #include <net/if.h>
84 #include <net/route.h>
85 
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/ip.h>
89 #include <netinet/ip_var.h>
90 #include <netinet/ip_mroute.h>
91 #include <netinet/ip_icmp.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_var.h>
94 
95 #include <machine/stdarg.h>
96 
97 #ifdef IPSEC
98 #include <netinet6/ipsec.h>
99 #endif /*IPSEC*/
100 
101 struct inpcbtable rawcbtable;
102 
103 int	 rip_pcbnotify __P((struct inpcbtable *, struct in_addr,
104     struct in_addr, int, int, void (*) __P((struct inpcb *, int))));
105 int	 rip_bind __P((struct inpcb *, struct mbuf *));
106 int	 rip_connect __P((struct inpcb *, struct mbuf *));
107 void	 rip_disconnect __P((struct inpcb *));
108 
109 /*
110  * Nominal space allocated to a raw ip socket.
111  */
112 #define	RIPSNDQ		8192
113 #define	RIPRCVQ		8192
114 
115 /*
116  * Raw interface to IP protocol.
117  */
118 
119 /*
120  * Initialize raw connection block q.
121  */
122 void
123 rip_init()
124 {
125 
126 	in_pcbinit(&rawcbtable, 1, 1);
127 }
128 
129 static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
130 
131 /*
132  * Setup generic address and protocol structures
133  * for raw_input routine, then pass them along with
134  * mbuf chain.
135  */
136 void
137 #if __STDC__
138 rip_input(struct mbuf *m, ...)
139 #else
140 rip_input(m, va_alist)
141 	struct mbuf *m;
142 	va_dcl
143 #endif
144 {
145 	int off, proto;
146 	struct ip *ip = mtod(m, struct ip *);
147 	struct inpcb *inp;
148 	struct inpcb *last = 0;
149 	struct mbuf *opts = 0;
150 	struct sockaddr_in ripsrc;
151 	va_list ap;
152 
153 	va_start(ap, m);
154 	off = va_arg(ap, int);
155 	proto = va_arg(ap, int);
156 	va_end(ap);
157 
158 	ripsrc.sin_family = AF_INET;
159 	ripsrc.sin_len = sizeof(struct sockaddr_in);
160 	ripsrc.sin_addr = ip->ip_src;
161 	ripsrc.sin_port = 0;
162 	bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero));
163 
164 	/*
165 	 * XXX Compatibility: programs using raw IP expect ip_len
166 	 * XXX to have the header length subtracted.
167 	 */
168 	ip->ip_len -= ip->ip_hl << 2;
169 
170 	CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
171 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
172 			continue;
173 		if (!in_nullhost(inp->inp_laddr) &&
174 		    !in_hosteq(inp->inp_laddr, ip->ip_dst))
175 			continue;
176 		if (!in_nullhost(inp->inp_faddr) &&
177 		    !in_hosteq(inp->inp_faddr, ip->ip_src))
178 			continue;
179 		if (last) {
180 			struct mbuf *n;
181 
182 #ifdef IPSEC
183 			/* check AH/ESP integrity. */
184 			if (ipsec4_in_reject_so(m, last->inp_socket)) {
185 				ipsecstat.in_polvio++;
186 				/* do not inject data to pcb */
187 			} else
188 #endif /*IPSEC*/
189 			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
190 				if (last->inp_flags & INP_CONTROLOPTS ||
191 				    last->inp_socket->so_options & SO_TIMESTAMP)
192 					ip_savecontrol(last, &opts, ip, n);
193 				if (sbappendaddr(&last->inp_socket->so_rcv,
194 				    sintosa(&ripsrc), n, opts) == 0) {
195 					/* should notify about lost packet */
196 					m_freem(n);
197 					if (opts)
198 						m_freem(opts);
199 				} else
200 					sorwakeup(last->inp_socket);
201 				opts = NULL;
202 			}
203 		}
204 		last = inp;
205 	}
206 #ifdef IPSEC
207 	/* check AH/ESP integrity. */
208 	if (last && ipsec4_in_reject_so(m, last->inp_socket)) {
209 		m_freem(m);
210 		ipsecstat.in_polvio++;
211 		ipstat.ips_delivered--;
212 		/* do not inject data to pcb */
213 	} else
214 #endif /*IPSEC*/
215 	if (last) {
216 		if (last->inp_flags & INP_CONTROLOPTS ||
217 		    last->inp_socket->so_options & SO_TIMESTAMP)
218 			ip_savecontrol(last, &opts, ip, m);
219 		if (sbappendaddr(&last->inp_socket->so_rcv,
220 		    sintosa(&ripsrc), m, opts) == 0) {
221 			m_freem(m);
222 			if (opts)
223 				m_freem(opts);
224 		} else
225 			sorwakeup(last->inp_socket);
226 	} else {
227 		if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
228 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
229 			    0, 0);
230 			ipstat.ips_noproto++;
231 			ipstat.ips_delivered--;
232 		} else
233 			m_freem(m);
234 	}
235 	return;
236 }
237 
238 int
239 rip_pcbnotify(table, faddr, laddr, proto, errno, notify)
240 	struct inpcbtable *table;
241 	struct in_addr faddr, laddr;
242 	int proto;
243 	int errno;
244 	void (*notify) __P((struct inpcb *, int));
245 {
246 	struct inpcb *inp, *ninp;
247 	int nmatch;
248 
249 	nmatch = 0;
250 	for (inp = CIRCLEQ_FIRST(&table->inpt_queue);
251 	    inp != (struct inpcb *)&table->inpt_queue;
252 	    inp = ninp) {
253 		ninp = inp->inp_queue.cqe_next;
254 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
255 			continue;
256 		if (in_hosteq(inp->inp_faddr, faddr) &&
257 		    in_hosteq(inp->inp_laddr, laddr)) {
258 			(*notify)(inp, errno);
259 			nmatch++;
260 		}
261 	}
262 
263 	return nmatch;
264 }
265 
266 void *
267 rip_ctlinput(cmd, sa, v)
268 	int cmd;
269 	struct sockaddr *sa;
270 	void *v;
271 {
272 	struct ip *ip = v;
273 	void (*notify) __P((struct inpcb *, int)) = in_rtchange;
274 	int errno;
275 
276 	if (sa->sa_family != AF_INET ||
277 	    sa->sa_len != sizeof(struct sockaddr_in))
278 		return NULL;
279 	if ((unsigned)cmd >= PRC_NCMDS)
280 		return NULL;
281 	errno = inetctlerrmap[cmd];
282 	if (PRC_IS_REDIRECT(cmd))
283 		notify = in_rtchange, ip = 0;
284 	else if (cmd == PRC_HOSTDEAD)
285 		ip = 0;
286 	else if (errno == 0)
287 		return NULL;
288 	if (ip) {
289 		rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr,
290 		    ip->ip_src, ip->ip_p, errno, notify);
291 
292 		/* XXX mapped address case */
293 	} else
294 		in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno,
295 		    notify);
296 	return NULL;
297 }
298 
299 /*
300  * Generate IP header and pass packet to ip_output.
301  * Tack on options user may have setup with control call.
302  */
303 int
304 #if __STDC__
305 rip_output(struct mbuf *m, ...)
306 #else
307 rip_output(m, va_alist)
308 	struct mbuf *m;
309 	va_dcl
310 #endif
311 {
312 	struct inpcb *inp;
313 	struct ip *ip;
314 	struct mbuf *opts;
315 	int flags;
316 	va_list ap;
317 
318 	va_start(ap, m);
319 	inp = va_arg(ap, struct inpcb *);
320 	va_end(ap);
321 
322 	flags =
323 	    (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST
324 	    | IP_RETURNMTU;
325 
326 	/*
327 	 * If the user handed us a complete IP packet, use it.
328 	 * Otherwise, allocate an mbuf for a header and fill it in.
329 	 */
330 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
331 		if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
332 			m_freem(m);
333 			return (EMSGSIZE);
334 		}
335 		M_PREPEND(m, sizeof(struct ip), M_WAIT);
336 		ip = mtod(m, struct ip *);
337 		ip->ip_tos = 0;
338 		ip->ip_off = 0;
339 		ip->ip_p = inp->inp_ip.ip_p;
340 		ip->ip_len = m->m_pkthdr.len;
341 		ip->ip_src = inp->inp_laddr;
342 		ip->ip_dst = inp->inp_faddr;
343 		ip->ip_ttl = MAXTTL;
344 		opts = inp->inp_options;
345 	} else {
346 		if (m->m_pkthdr.len > IP_MAXPACKET) {
347 			m_freem(m);
348 			return (EMSGSIZE);
349 		}
350 		ip = mtod(m, struct ip *);
351 		if (m->m_pkthdr.len != ip->ip_len) {
352 			m_freem(m);
353 			return (EINVAL);
354 		}
355 		if (ip->ip_id == 0)
356 			ip->ip_id = htons(ip_id++);
357 		opts = NULL;
358 		/* XXX prevent ip_output from overwriting header fields */
359 		flags |= IP_RAWOUTPUT;
360 		ipstat.ips_rawout++;
361 	}
362 #ifdef IPSEC
363 	if (ipsec_setsocket(m, inp->inp_socket) != 0) {
364 		m_freem(m);
365 		return ENOBUFS;
366 	}
367 #endif /*IPSEC*/
368 	return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions, &inp->inp_errormtu));
369 }
370 
371 /*
372  * Raw IP socket option processing.
373  */
374 int
375 rip_ctloutput(op, so, level, optname, m)
376 	int op;
377 	struct socket *so;
378 	int level, optname;
379 	struct mbuf **m;
380 {
381 	struct inpcb *inp = sotoinpcb(so);
382 	int error = 0;
383 
384 	if (level != IPPROTO_IP) {
385 		error = ENOPROTOOPT;
386 		if (op == PRCO_SETOPT && *m != 0)
387 			(void) m_free(*m);
388 	} else switch (op) {
389 
390 	case PRCO_SETOPT:
391 		switch (optname) {
392 		case IP_HDRINCL:
393 			if (*m == 0 || (*m)->m_len < sizeof (int))
394 				error = EINVAL;
395 			else {
396 				if (*mtod(*m, int *))
397 					inp->inp_flags |= INP_HDRINCL;
398 				else
399 					inp->inp_flags &= ~INP_HDRINCL;
400 			}
401 			if (*m != 0)
402 				(void) m_free(*m);
403 			break;
404 
405 #ifdef MROUTING
406 		case MRT_INIT:
407 		case MRT_DONE:
408 		case MRT_ADD_VIF:
409 		case MRT_DEL_VIF:
410 		case MRT_ADD_MFC:
411 		case MRT_DEL_MFC:
412 		case MRT_ASSERT:
413 			error = ip_mrouter_set(so, optname, m);
414 			break;
415 #endif
416 
417 		default:
418 			error = ip_ctloutput(op, so, level, optname, m);
419 			break;
420 		}
421 		break;
422 
423 	case PRCO_GETOPT:
424 		switch (optname) {
425 		case IP_HDRINCL:
426 			*m = m_get(M_WAIT, M_SOOPTS);
427 			(*m)->m_len = sizeof (int);
428 			*mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0;
429 			break;
430 
431 #ifdef MROUTING
432 		case MRT_VERSION:
433 		case MRT_ASSERT:
434 			error = ip_mrouter_get(so, optname, m);
435 			break;
436 #endif
437 
438 		default:
439 			error = ip_ctloutput(op, so, level, optname, m);
440 			break;
441 		}
442 		break;
443 	}
444 	return (error);
445 }
446 
447 int
448 rip_bind(inp, nam)
449 	struct inpcb *inp;
450 	struct mbuf *nam;
451 {
452 	struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
453 
454 	if (nam->m_len != sizeof(*addr))
455 		return (EINVAL);
456 	if (TAILQ_FIRST(&ifnet) == 0)
457 		return (EADDRNOTAVAIL);
458 	if (addr->sin_family != AF_INET &&
459 	    addr->sin_family != AF_IMPLINK)
460 		return (EAFNOSUPPORT);
461 	if (!in_nullhost(addr->sin_addr) &&
462 	    ifa_ifwithaddr(sintosa(addr)) == 0)
463 		return (EADDRNOTAVAIL);
464 	inp->inp_laddr = addr->sin_addr;
465 	return (0);
466 }
467 
468 int
469 rip_connect(inp, nam)
470 	struct inpcb *inp;
471 	struct mbuf *nam;
472 {
473 	struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
474 
475 	if (nam->m_len != sizeof(*addr))
476 		return (EINVAL);
477 	if (TAILQ_FIRST(&ifnet) == 0)
478 		return (EADDRNOTAVAIL);
479 	if (addr->sin_family != AF_INET &&
480 	    addr->sin_family != AF_IMPLINK)
481 		return (EAFNOSUPPORT);
482 	inp->inp_faddr = addr->sin_addr;
483 	return (0);
484 }
485 
486 void
487 rip_disconnect(inp)
488 	struct inpcb *inp;
489 {
490 
491 	inp->inp_faddr = zeroin_addr;
492 }
493 
494 u_long	rip_sendspace = RIPSNDQ;
495 u_long	rip_recvspace = RIPRCVQ;
496 
497 /*ARGSUSED*/
498 int
499 rip_usrreq(so, req, m, nam, control, p)
500 	struct socket *so;
501 	int req;
502 	struct mbuf *m, *nam, *control;
503 	struct proc *p;
504 {
505 	struct inpcb *inp;
506 	int s;
507 	int error = 0;
508 #ifdef MROUTING
509 	extern struct socket *ip_mrouter;
510 #endif
511 
512 	if (req == PRU_CONTROL)
513 		return (in_control(so, (long)m, (caddr_t)nam,
514 		    (struct ifnet *)control, p));
515 
516 	if (req == PRU_PURGEIF) {
517 		in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control);
518 		in_purgeif((struct ifnet *)control);
519 		in_pcbpurgeif(&rawcbtable, (struct ifnet *)control);
520 		return (0);
521 	}
522 
523 	s = splsoftnet();
524 	inp = sotoinpcb(so);
525 #ifdef DIAGNOSTIC
526 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
527 		panic("rip_usrreq: unexpected control mbuf");
528 #endif
529 	if (inp == 0 && req != PRU_ATTACH) {
530 		error = EINVAL;
531 		goto release;
532 	}
533 
534 	switch (req) {
535 
536 	case PRU_ATTACH:
537 		if (inp != 0) {
538 			error = EISCONN;
539 			break;
540 		}
541 		if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) {
542 			error = EACCES;
543 			break;
544 		}
545 		if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
546 			error = soreserve(so, rip_sendspace, rip_recvspace);
547 			if (error)
548 				break;
549 		}
550 		error = in_pcballoc(so, &rawcbtable);
551 		if (error)
552 			break;
553 		inp = sotoinpcb(so);
554 		inp->inp_ip.ip_p = (long)nam;
555 		break;
556 
557 	case PRU_DETACH:
558 #ifdef MROUTING
559 		if (so == ip_mrouter)
560 			ip_mrouter_done();
561 #endif
562 		in_pcbdetach(inp);
563 		break;
564 
565 	case PRU_BIND:
566 		error = rip_bind(inp, nam);
567 		break;
568 
569 	case PRU_LISTEN:
570 		error = EOPNOTSUPP;
571 		break;
572 
573 	case PRU_CONNECT:
574 		error = rip_connect(inp, nam);
575 		if (error)
576 			break;
577 		soisconnected(so);
578 		break;
579 
580 	case PRU_CONNECT2:
581 		error = EOPNOTSUPP;
582 		break;
583 
584 	case PRU_DISCONNECT:
585 		soisdisconnected(so);
586 		rip_disconnect(inp);
587 		break;
588 
589 	/*
590 	 * Mark the connection as being incapable of further input.
591 	 */
592 	case PRU_SHUTDOWN:
593 		socantsendmore(so);
594 		break;
595 
596 	case PRU_RCVD:
597 		error = EOPNOTSUPP;
598 		break;
599 
600 	/*
601 	 * Ship a packet out.  The appropriate raw output
602 	 * routine handles any massaging necessary.
603 	 */
604 	case PRU_SEND:
605 		if (control && control->m_len) {
606 			m_freem(control);
607 			m_freem(m);
608 			error = EINVAL;
609 			break;
610 		}
611 	{
612 		if (nam) {
613 			if ((so->so_state & SS_ISCONNECTED) != 0) {
614 				error = EISCONN;
615 				goto die;
616 			}
617 			error = rip_connect(inp, nam);
618 			if (error) {
619 			die:
620 				m_freem(m);
621 				break;
622 			}
623 		} else {
624 			if ((so->so_state & SS_ISCONNECTED) == 0) {
625 				error = ENOTCONN;
626 				goto die;
627 			}
628 		}
629 		error = rip_output(m, inp);
630 		if (nam)
631 			rip_disconnect(inp);
632 	}
633 		break;
634 
635 	case PRU_SENSE:
636 		/*
637 		 * stat: don't bother with a blocksize.
638 		 */
639 		splx(s);
640 		return (0);
641 
642 	case PRU_RCVOOB:
643 		error = EOPNOTSUPP;
644 		break;
645 
646 	case PRU_SENDOOB:
647 		m_freem(control);
648 		m_freem(m);
649 		error = EOPNOTSUPP;
650 		break;
651 
652 	case PRU_SOCKADDR:
653 		in_setsockaddr(inp, nam);
654 		break;
655 
656 	case PRU_PEERADDR:
657 		in_setpeeraddr(inp, nam);
658 		break;
659 
660 	default:
661 		panic("rip_usrreq");
662 	}
663 
664 release:
665 	splx(s);
666 	return (error);
667 }
668