xref: /original-bsd/sys/netinet/ip_input.c (revision 333da485)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)ip_input.c	8.2 (Berkeley) 01/04/94
8  */
9 
10 #include <sys/param.h>
11 #include <sys/systm.h>
12 #include <sys/malloc.h>
13 #include <sys/mbuf.h>
14 #include <sys/domain.h>
15 #include <sys/protosw.h>
16 #include <sys/socket.h>
17 #include <sys/errno.h>
18 #include <sys/time.h>
19 #include <sys/kernel.h>
20 
21 #include <net/if.h>
22 #include <net/route.h>
23 
24 #include <netinet/in.h>
25 #include <netinet/in_systm.h>
26 #include <netinet/ip.h>
27 #include <netinet/in_pcb.h>
28 #include <netinet/in_var.h>
29 #include <netinet/ip_var.h>
30 #include <netinet/ip_icmp.h>
31 
32 #ifndef	IPFORWARDING
33 #ifdef GATEWAY
34 #define	IPFORWARDING	1	/* forward IP packets not for us */
35 #else /* GATEWAY */
36 #define	IPFORWARDING	0	/* don't forward IP packets not for us */
37 #endif /* GATEWAY */
38 #endif /* IPFORWARDING */
39 #ifndef	IPSENDREDIRECTS
40 #define	IPSENDREDIRECTS	1
41 #endif
42 int	ipforwarding = IPFORWARDING;
43 int	ipsendredirects = IPSENDREDIRECTS;
44 int	ip_defttl = IPDEFTTL;
45 #ifdef DIAGNOSTIC
46 int	ipprintfs = 0;
47 #endif
48 
49 extern	struct domain inetdomain;
50 extern	struct protosw inetsw[];
51 u_char	ip_protox[IPPROTO_MAX];
52 int	ipqmaxlen = IFQ_MAXLEN;
53 struct	in_ifaddr *in_ifaddr;			/* first inet address */
54 struct	ifqueue ipintrq;
55 
56 /*
57  * We need to save the IP options in case a protocol wants to respond
58  * to an incoming packet over the same route if the packet got here
59  * using IP source routing.  This allows connection establishment and
60  * maintenance when the remote end is on a network that is not known
61  * to us.
62  */
63 int	ip_nhops = 0;
64 static	struct ip_srcrt {
65 	struct	in_addr dst;			/* final destination */
66 	char	nop;				/* one NOP to align */
67 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
68 	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
69 } ip_srcrt;
70 
71 #ifdef GATEWAY
72 extern	int if_index;
73 u_long	*ip_ifmatrix;
74 #endif
75 
76 static void save_rte __P((u_char *, struct in_addr));
77 /*
78  * IP initialization: fill in IP protocol switch table.
79  * All protocols not implemented in kernel go to raw IP protocol handler.
80  */
81 void
82 ip_init()
83 {
84 	register struct protosw *pr;
85 	register int i;
86 
87 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
88 	if (pr == 0)
89 		panic("ip_init");
90 	for (i = 0; i < IPPROTO_MAX; i++)
91 		ip_protox[i] = pr - inetsw;
92 	for (pr = inetdomain.dom_protosw;
93 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
94 		if (pr->pr_domain->dom_family == PF_INET &&
95 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
96 			ip_protox[pr->pr_protocol] = pr - inetsw;
97 	ipq.next = ipq.prev = &ipq;
98 	ip_id = time.tv_sec & 0xffff;
99 	ipintrq.ifq_maxlen = ipqmaxlen;
100 #ifdef GATEWAY
101 	i = (if_index + 1) * (if_index + 1) * sizeof (u_long);
102 	ip_ifmatrix = (u_long *) malloc(i, M_RTABLE, M_WAITOK);
103 	bzero((char *)ip_ifmatrix, i);
104 #endif
105 }
106 
107 struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
108 struct	route ipforward_rt;
109 
110 /*
111  * Ip input routine.  Checksum and byte swap header.  If fragmented
112  * try to reassemble.  Process options.  Pass to next level.
113  */
114 void
115 ipintr()
116 {
117 	register struct ip *ip;
118 	register struct mbuf *m;
119 	register struct ipq *fp;
120 	register struct in_ifaddr *ia;
121 	int hlen, s;
122 
123 next:
124 	/*
125 	 * Get next datagram off input queue and get IP header
126 	 * in first mbuf.
127 	 */
128 	s = splimp();
129 	IF_DEQUEUE(&ipintrq, m);
130 	splx(s);
131 	if (m == 0)
132 		return;
133 #ifdef	DIAGNOSTIC
134 	if ((m->m_flags & M_PKTHDR) == 0)
135 		panic("ipintr no HDR");
136 #endif
137 	/*
138 	 * If no IP addresses have been set yet but the interfaces
139 	 * are receiving, can't do anything with incoming packets yet.
140 	 */
141 	if (in_ifaddr == NULL)
142 		goto bad;
143 	ipstat.ips_total++;
144 	if (m->m_len < sizeof (struct ip) &&
145 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
146 		ipstat.ips_toosmall++;
147 		goto next;
148 	}
149 	ip = mtod(m, struct ip *);
150 	if (ip->ip_v != IPVERSION) {
151 		ipstat.ips_badvers++;
152 		goto bad;
153 	}
154 	hlen = ip->ip_hl << 2;
155 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
156 		ipstat.ips_badhlen++;
157 		goto bad;
158 	}
159 	if (hlen > m->m_len) {
160 		if ((m = m_pullup(m, hlen)) == 0) {
161 			ipstat.ips_badhlen++;
162 			goto next;
163 		}
164 		ip = mtod(m, struct ip *);
165 	}
166 	if (ip->ip_sum = in_cksum(m, hlen)) {
167 		ipstat.ips_badsum++;
168 		goto bad;
169 	}
170 
171 	/*
172 	 * Convert fields to host representation.
173 	 */
174 	NTOHS(ip->ip_len);
175 	if (ip->ip_len < hlen) {
176 		ipstat.ips_badlen++;
177 		goto bad;
178 	}
179 	NTOHS(ip->ip_id);
180 	NTOHS(ip->ip_off);
181 
182 	/*
183 	 * Check that the amount of data in the buffers
184 	 * is as at least much as the IP header would have us expect.
185 	 * Trim mbufs if longer than we expect.
186 	 * Drop packet if shorter than we expect.
187 	 */
188 	if (m->m_pkthdr.len < ip->ip_len) {
189 		ipstat.ips_tooshort++;
190 		goto bad;
191 	}
192 	if (m->m_pkthdr.len > ip->ip_len) {
193 		if (m->m_len == m->m_pkthdr.len) {
194 			m->m_len = ip->ip_len;
195 			m->m_pkthdr.len = ip->ip_len;
196 		} else
197 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
198 	}
199 
200 	/*
201 	 * Process options and, if not destined for us,
202 	 * ship it on.  ip_dooptions returns 1 when an
203 	 * error was detected (causing an icmp message
204 	 * to be sent and the original packet to be freed).
205 	 */
206 	ip_nhops = 0;		/* for source routed packets */
207 	if (hlen > sizeof (struct ip) && ip_dooptions(m))
208 		goto next;
209 
210 	/*
211 	 * Check our list of addresses, to see if the packet is for us.
212 	 */
213 	for (ia = in_ifaddr; ia; ia = ia->ia_next) {
214 #define	satosin(sa)	((struct sockaddr_in *)(sa))
215 
216 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
217 			goto ours;
218 		if (
219 #ifdef	DIRECTED_BROADCAST
220 		    ia->ia_ifp == m->m_pkthdr.rcvif &&
221 #endif
222 		    (ia->ia_ifp->if_flags & IFF_BROADCAST)) {
223 			u_long t;
224 
225 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
226 			    ip->ip_dst.s_addr)
227 				goto ours;
228 			if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
229 				goto ours;
230 			/*
231 			 * Look for all-0's host part (old broadcast addr),
232 			 * either for subnet or net.
233 			 */
234 			t = ntohl(ip->ip_dst.s_addr);
235 			if (t == ia->ia_subnet)
236 				goto ours;
237 			if (t == ia->ia_net)
238 				goto ours;
239 		}
240 	}
241 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
242 		struct in_multi *inm;
243 #ifdef MROUTING
244 		extern struct socket *ip_mrouter;
245 
246 		if (ip_mrouter) {
247 			/*
248 			 * If we are acting as a multicast router, all
249 			 * incoming multicast packets are passed to the
250 			 * kernel-level multicast forwarding function.
251 			 * The packet is returned (relatively) intact; if
252 			 * ip_mforward() returns a non-zero value, the packet
253 			 * must be discarded, else it may be accepted below.
254 			 *
255 			 * (The IP ident field is put in the same byte order
256 			 * as expected when ip_mforward() is called from
257 			 * ip_output().)
258 			 */
259 			ip->ip_id = htons(ip->ip_id);
260 			if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) {
261 				ipstat.ips_cantforward++;
262 				m_freem(m);
263 				goto next;
264 			}
265 			ip->ip_id = ntohs(ip->ip_id);
266 
267 			/*
268 			 * The process-level routing demon needs to receive
269 			 * all multicast IGMP packets, whether or not this
270 			 * host belongs to their destination groups.
271 			 */
272 			if (ip->ip_p == IPPROTO_IGMP)
273 				goto ours;
274 			ipstat.ips_forward++;
275 		}
276 #endif
277 		/*
278 		 * See if we belong to the destination multicast group on the
279 		 * arrival interface.
280 		 */
281 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
282 		if (inm == NULL) {
283 			ipstat.ips_cantforward++;
284 			m_freem(m);
285 			goto next;
286 		}
287 		goto ours;
288 	}
289 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
290 		goto ours;
291 	if (ip->ip_dst.s_addr == INADDR_ANY)
292 		goto ours;
293 
294 	/*
295 	 * Not for us; forward if possible and desirable.
296 	 */
297 	if (ipforwarding == 0) {
298 		ipstat.ips_cantforward++;
299 		m_freem(m);
300 	} else
301 		ip_forward(m, 0);
302 	goto next;
303 
304 ours:
305 	/*
306 	 * If offset or IP_MF are set, must reassemble.
307 	 * Otherwise, nothing need be done.
308 	 * (We could look in the reassembly queue to see
309 	 * if the packet was previously fragmented,
310 	 * but it's not worth the time; just let them time out.)
311 	 */
312 	if (ip->ip_off &~ IP_DF) {
313 		if (m->m_flags & M_EXT) {		/* XXX */
314 			if ((m = m_pullup(m, sizeof (struct ip))) == 0) {
315 				ipstat.ips_toosmall++;
316 				goto next;
317 			}
318 			ip = mtod(m, struct ip *);
319 		}
320 		/*
321 		 * Look for queue of fragments
322 		 * of this datagram.
323 		 */
324 		for (fp = ipq.next; fp != &ipq; fp = fp->next)
325 			if (ip->ip_id == fp->ipq_id &&
326 			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
327 			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
328 			    ip->ip_p == fp->ipq_p)
329 				goto found;
330 		fp = 0;
331 found:
332 
333 		/*
334 		 * Adjust ip_len to not reflect header,
335 		 * set ip_mff if more fragments are expected,
336 		 * convert offset of this to bytes.
337 		 */
338 		ip->ip_len -= hlen;
339 		((struct ipasfrag *)ip)->ipf_mff &= ~1;
340 		if (ip->ip_off & IP_MF)
341 			((struct ipasfrag *)ip)->ipf_mff |= 1;
342 		ip->ip_off <<= 3;
343 
344 		/*
345 		 * If datagram marked as having more fragments
346 		 * or if this is not the first fragment,
347 		 * attempt reassembly; if it succeeds, proceed.
348 		 */
349 		if (((struct ipasfrag *)ip)->ipf_mff & 1 || ip->ip_off) {
350 			ipstat.ips_fragments++;
351 			ip = ip_reass((struct ipasfrag *)ip, fp);
352 			if (ip == 0)
353 				goto next;
354 			ipstat.ips_reassembled++;
355 			m = dtom(ip);
356 		} else
357 			if (fp)
358 				ip_freef(fp);
359 	} else
360 		ip->ip_len -= hlen;
361 
362 	/*
363 	 * Switch out to protocol's input routine.
364 	 */
365 	ipstat.ips_delivered++;
366 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
367 	goto next;
368 bad:
369 	m_freem(m);
370 	goto next;
371 }
372 
373 /*
374  * Take incoming datagram fragment and try to
375  * reassemble it into whole datagram.  If a chain for
376  * reassembly of this datagram already exists, then it
377  * is given as fp; otherwise have to make a chain.
378  */
379 struct ip *
380 ip_reass(ip, fp)
381 	register struct ipasfrag *ip;
382 	register struct ipq *fp;
383 {
384 	register struct mbuf *m = dtom(ip);
385 	register struct ipasfrag *q;
386 	struct mbuf *t;
387 	int hlen = ip->ip_hl << 2;
388 	int i, next;
389 
390 	/*
391 	 * Presence of header sizes in mbufs
392 	 * would confuse code below.
393 	 */
394 	m->m_data += hlen;
395 	m->m_len -= hlen;
396 
397 	/*
398 	 * If first fragment to arrive, create a reassembly queue.
399 	 */
400 	if (fp == 0) {
401 		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
402 			goto dropfrag;
403 		fp = mtod(t, struct ipq *);
404 		insque(fp, &ipq);
405 		fp->ipq_ttl = IPFRAGTTL;
406 		fp->ipq_p = ip->ip_p;
407 		fp->ipq_id = ip->ip_id;
408 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
409 		fp->ipq_src = ((struct ip *)ip)->ip_src;
410 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
411 		q = (struct ipasfrag *)fp;
412 		goto insert;
413 	}
414 
415 	/*
416 	 * Find a segment which begins after this one does.
417 	 */
418 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
419 		if (q->ip_off > ip->ip_off)
420 			break;
421 
422 	/*
423 	 * If there is a preceding segment, it may provide some of
424 	 * our data already.  If so, drop the data from the incoming
425 	 * segment.  If it provides all of our data, drop us.
426 	 */
427 	if (q->ipf_prev != (struct ipasfrag *)fp) {
428 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
429 		if (i > 0) {
430 			if (i >= ip->ip_len)
431 				goto dropfrag;
432 			m_adj(dtom(ip), i);
433 			ip->ip_off += i;
434 			ip->ip_len -= i;
435 		}
436 	}
437 
438 	/*
439 	 * While we overlap succeeding segments trim them or,
440 	 * if they are completely covered, dequeue them.
441 	 */
442 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
443 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
444 		if (i < q->ip_len) {
445 			q->ip_len -= i;
446 			q->ip_off += i;
447 			m_adj(dtom(q), i);
448 			break;
449 		}
450 		q = q->ipf_next;
451 		m_freem(dtom(q->ipf_prev));
452 		ip_deq(q->ipf_prev);
453 	}
454 
455 insert:
456 	/*
457 	 * Stick new segment in its place;
458 	 * check for complete reassembly.
459 	 */
460 	ip_enq(ip, q->ipf_prev);
461 	next = 0;
462 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
463 		if (q->ip_off != next)
464 			return (0);
465 		next += q->ip_len;
466 	}
467 	if (q->ipf_prev->ipf_mff & 1)
468 		return (0);
469 
470 	/*
471 	 * Reassembly is complete; concatenate fragments.
472 	 */
473 	q = fp->ipq_next;
474 	m = dtom(q);
475 	t = m->m_next;
476 	m->m_next = 0;
477 	m_cat(m, t);
478 	q = q->ipf_next;
479 	while (q != (struct ipasfrag *)fp) {
480 		t = dtom(q);
481 		q = q->ipf_next;
482 		m_cat(m, t);
483 	}
484 
485 	/*
486 	 * Create header for new ip packet by
487 	 * modifying header of first packet;
488 	 * dequeue and discard fragment reassembly header.
489 	 * Make header visible.
490 	 */
491 	ip = fp->ipq_next;
492 	ip->ip_len = next;
493 	ip->ipf_mff &= ~1;
494 	((struct ip *)ip)->ip_src = fp->ipq_src;
495 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
496 	remque(fp);
497 	(void) m_free(dtom(fp));
498 	m = dtom(ip);
499 	m->m_len += (ip->ip_hl << 2);
500 	m->m_data -= (ip->ip_hl << 2);
501 	/* some debugging cruft by sklower, below, will go away soon */
502 	if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
503 		register int plen = 0;
504 		for (t = m; m; m = m->m_next)
505 			plen += m->m_len;
506 		t->m_pkthdr.len = plen;
507 	}
508 	return ((struct ip *)ip);
509 
510 dropfrag:
511 	ipstat.ips_fragdropped++;
512 	m_freem(m);
513 	return (0);
514 }
515 
516 /*
517  * Free a fragment reassembly header and all
518  * associated datagrams.
519  */
520 void
521 ip_freef(fp)
522 	struct ipq *fp;
523 {
524 	register struct ipasfrag *q, *p;
525 
526 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
527 		p = q->ipf_next;
528 		ip_deq(q);
529 		m_freem(dtom(q));
530 	}
531 	remque(fp);
532 	(void) m_free(dtom(fp));
533 }
534 
535 /*
536  * Put an ip fragment on a reassembly chain.
537  * Like insque, but pointers in middle of structure.
538  */
539 void
540 ip_enq(p, prev)
541 	register struct ipasfrag *p, *prev;
542 {
543 
544 	p->ipf_prev = prev;
545 	p->ipf_next = prev->ipf_next;
546 	prev->ipf_next->ipf_prev = p;
547 	prev->ipf_next = p;
548 }
549 
550 /*
551  * To ip_enq as remque is to insque.
552  */
553 void
554 ip_deq(p)
555 	register struct ipasfrag *p;
556 {
557 
558 	p->ipf_prev->ipf_next = p->ipf_next;
559 	p->ipf_next->ipf_prev = p->ipf_prev;
560 }
561 
562 /*
563  * IP timer processing;
564  * if a timer expires on a reassembly
565  * queue, discard it.
566  */
567 void
568 ip_slowtimo()
569 {
570 	register struct ipq *fp;
571 	int s = splnet();
572 
573 	fp = ipq.next;
574 	if (fp == 0) {
575 		splx(s);
576 		return;
577 	}
578 	while (fp != &ipq) {
579 		--fp->ipq_ttl;
580 		fp = fp->next;
581 		if (fp->prev->ipq_ttl == 0) {
582 			ipstat.ips_fragtimeout++;
583 			ip_freef(fp->prev);
584 		}
585 	}
586 	splx(s);
587 }
588 
589 /*
590  * Drain off all datagram fragments.
591  */
592 void
593 ip_drain()
594 {
595 
596 	while (ipq.next != &ipq) {
597 		ipstat.ips_fragdropped++;
598 		ip_freef(ipq.next);
599 	}
600 }
601 
602 /*
603  * Do option processing on a datagram,
604  * possibly discarding it if bad options are encountered,
605  * or forwarding it if source-routed.
606  * Returns 1 if packet has been forwarded/freed,
607  * 0 if the packet should be processed further.
608  */
609 int
610 ip_dooptions(m)
611 	struct mbuf *m;
612 {
613 	register struct ip *ip = mtod(m, struct ip *);
614 	register u_char *cp;
615 	register struct ip_timestamp *ipt;
616 	register struct in_ifaddr *ia;
617 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
618 	struct in_addr *sin, dst;
619 	n_time ntime;
620 
621 	dst = ip->ip_dst;
622 	cp = (u_char *)(ip + 1);
623 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
624 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
625 		opt = cp[IPOPT_OPTVAL];
626 		if (opt == IPOPT_EOL)
627 			break;
628 		if (opt == IPOPT_NOP)
629 			optlen = 1;
630 		else {
631 			optlen = cp[IPOPT_OLEN];
632 			if (optlen <= 0 || optlen > cnt) {
633 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
634 				goto bad;
635 			}
636 		}
637 		switch (opt) {
638 
639 		default:
640 			break;
641 
642 		/*
643 		 * Source routing with record.
644 		 * Find interface with current destination address.
645 		 * If none on this machine then drop if strictly routed,
646 		 * or do nothing if loosely routed.
647 		 * Record interface address and bring up next address
648 		 * component.  If strictly routed make sure next
649 		 * address is on directly accessible net.
650 		 */
651 		case IPOPT_LSRR:
652 		case IPOPT_SSRR:
653 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
654 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
655 				goto bad;
656 			}
657 			ipaddr.sin_addr = ip->ip_dst;
658 			ia = (struct in_ifaddr *)
659 				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
660 			if (ia == 0) {
661 				if (opt == IPOPT_SSRR) {
662 					type = ICMP_UNREACH;
663 					code = ICMP_UNREACH_SRCFAIL;
664 					goto bad;
665 				}
666 				/*
667 				 * Loose routing, and not at next destination
668 				 * yet; nothing to do except forward.
669 				 */
670 				break;
671 			}
672 			off--;			/* 0 origin */
673 			if (off > optlen - sizeof(struct in_addr)) {
674 				/*
675 				 * End of source route.  Should be for us.
676 				 */
677 				save_rte(cp, ip->ip_src);
678 				break;
679 			}
680 			/*
681 			 * locate outgoing interface
682 			 */
683 			bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr,
684 			    sizeof(ipaddr.sin_addr));
685 			if (opt == IPOPT_SSRR) {
686 #define	INA	struct in_ifaddr *
687 #define	SA	struct sockaddr *
688 			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
689 				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
690 			} else
691 				ia = ip_rtaddr(ipaddr.sin_addr);
692 			if (ia == 0) {
693 				type = ICMP_UNREACH;
694 				code = ICMP_UNREACH_SRCFAIL;
695 				goto bad;
696 			}
697 			ip->ip_dst = ipaddr.sin_addr;
698 			bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
699 			    (caddr_t)(cp + off), sizeof(struct in_addr));
700 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
701 			/*
702 			 * Let ip_intr's mcast routing check handle mcast pkts
703 			 */
704 			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
705 			break;
706 
707 		case IPOPT_RR:
708 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
709 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
710 				goto bad;
711 			}
712 			/*
713 			 * If no space remains, ignore.
714 			 */
715 			off--;			/* 0 origin */
716 			if (off > optlen - sizeof(struct in_addr))
717 				break;
718 			bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr,
719 			    sizeof(ipaddr.sin_addr));
720 			/*
721 			 * locate outgoing interface; if we're the destination,
722 			 * use the incoming interface (should be same).
723 			 */
724 			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
725 			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
726 				type = ICMP_UNREACH;
727 				code = ICMP_UNREACH_HOST;
728 				goto bad;
729 			}
730 			bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
731 			    (caddr_t)(cp + off), sizeof(struct in_addr));
732 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
733 			break;
734 
735 		case IPOPT_TS:
736 			code = cp - (u_char *)ip;
737 			ipt = (struct ip_timestamp *)cp;
738 			if (ipt->ipt_len < 5)
739 				goto bad;
740 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
741 				if (++ipt->ipt_oflw == 0)
742 					goto bad;
743 				break;
744 			}
745 			sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
746 			switch (ipt->ipt_flg) {
747 
748 			case IPOPT_TS_TSONLY:
749 				break;
750 
751 			case IPOPT_TS_TSANDADDR:
752 				if (ipt->ipt_ptr + sizeof(n_time) +
753 				    sizeof(struct in_addr) > ipt->ipt_len)
754 					goto bad;
755 				ipaddr.sin_addr = dst;
756 				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
757 							    m->m_pkthdr.rcvif);
758 				if (ia == 0)
759 					continue;
760 				bcopy((caddr_t)&IA_SIN(ia)->sin_addr,
761 				    (caddr_t)sin, sizeof(struct in_addr));
762 				ipt->ipt_ptr += sizeof(struct in_addr);
763 				break;
764 
765 			case IPOPT_TS_PRESPEC:
766 				if (ipt->ipt_ptr + sizeof(n_time) +
767 				    sizeof(struct in_addr) > ipt->ipt_len)
768 					goto bad;
769 				bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr,
770 				    sizeof(struct in_addr));
771 				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
772 					continue;
773 				ipt->ipt_ptr += sizeof(struct in_addr);
774 				break;
775 
776 			default:
777 				goto bad;
778 			}
779 			ntime = iptime();
780 			bcopy((caddr_t)&ntime, (caddr_t)cp + ipt->ipt_ptr - 1,
781 			    sizeof(n_time));
782 			ipt->ipt_ptr += sizeof(n_time);
783 		}
784 	}
785 	if (forward) {
786 		ip_forward(m, 1);
787 		return (1);
788 	}
789 	return (0);
790 bad:
791 	ip->ip_len -= ip->ip_hl << 2;   /* XXX icmp_error adds in hdr length */
792 	icmp_error(m, type, code, 0, 0);
793 	ipstat.ips_badoptions++;
794 	return (1);
795 }
796 
797 /*
798  * Given address of next destination (final or next hop),
799  * return internet address info of interface to be used to get there.
800  */
801 struct in_ifaddr *
802 ip_rtaddr(dst)
803 	 struct in_addr dst;
804 {
805 	register struct sockaddr_in *sin;
806 
807 	sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
808 
809 	if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
810 		if (ipforward_rt.ro_rt) {
811 			RTFREE(ipforward_rt.ro_rt);
812 			ipforward_rt.ro_rt = 0;
813 		}
814 		sin->sin_family = AF_INET;
815 		sin->sin_len = sizeof(*sin);
816 		sin->sin_addr = dst;
817 
818 		rtalloc(&ipforward_rt);
819 	}
820 	if (ipforward_rt.ro_rt == 0)
821 		return ((struct in_ifaddr *)0);
822 	return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa);
823 }
824 
825 /*
826  * Save incoming source route for use in replies,
827  * to be picked up later by ip_srcroute if the receiver is interested.
828  */
829 void
830 save_rte(option, dst)
831 	u_char *option;
832 	struct in_addr dst;
833 {
834 	unsigned olen;
835 
836 	olen = option[IPOPT_OLEN];
837 #ifdef DIAGNOSTIC
838 	if (ipprintfs)
839 		printf("save_rte: olen %d\n", olen);
840 #endif
841 	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
842 		return;
843 	bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen);
844 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
845 	ip_srcrt.dst = dst;
846 }
847 
848 /*
849  * Retrieve incoming source route for use in replies,
850  * in the same form used by setsockopt.
851  * The first hop is placed before the options, will be removed later.
852  */
853 struct mbuf *
854 ip_srcroute()
855 {
856 	register struct in_addr *p, *q;
857 	register struct mbuf *m;
858 
859 	if (ip_nhops == 0)
860 		return ((struct mbuf *)0);
861 	m = m_get(M_DONTWAIT, MT_SOOPTS);
862 	if (m == 0)
863 		return ((struct mbuf *)0);
864 
865 #define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
866 
867 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
868 	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
869 	    OPTSIZ;
870 #ifdef DIAGNOSTIC
871 	if (ipprintfs)
872 		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
873 #endif
874 
875 	/*
876 	 * First save first hop for return route
877 	 */
878 	p = &ip_srcrt.route[ip_nhops - 1];
879 	*(mtod(m, struct in_addr *)) = *p--;
880 #ifdef DIAGNOSTIC
881 	if (ipprintfs)
882 		printf(" hops %lx", ntohl(mtod(m, struct in_addr *)->s_addr));
883 #endif
884 
885 	/*
886 	 * Copy option fields and padding (nop) to mbuf.
887 	 */
888 	ip_srcrt.nop = IPOPT_NOP;
889 	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
890 	bcopy((caddr_t)&ip_srcrt.nop,
891 	    mtod(m, caddr_t) + sizeof(struct in_addr), OPTSIZ);
892 	q = (struct in_addr *)(mtod(m, caddr_t) +
893 	    sizeof(struct in_addr) + OPTSIZ);
894 #undef OPTSIZ
895 	/*
896 	 * Record return path as an IP source route,
897 	 * reversing the path (pointers are now aligned).
898 	 */
899 	while (p >= ip_srcrt.route) {
900 #ifdef DIAGNOSTIC
901 		if (ipprintfs)
902 			printf(" %lx", ntohl(q->s_addr));
903 #endif
904 		*q++ = *p--;
905 	}
906 	/*
907 	 * Last hop goes to final destination.
908 	 */
909 	*q = ip_srcrt.dst;
910 #ifdef DIAGNOSTIC
911 	if (ipprintfs)
912 		printf(" %lx\n", ntohl(q->s_addr));
913 #endif
914 	return (m);
915 }
916 
917 /*
918  * Strip out IP options, at higher
919  * level protocol in the kernel.
920  * Second argument is buffer to which options
921  * will be moved, and return value is their length.
922  * XXX should be deleted; last arg currently ignored.
923  */
924 void
925 ip_stripoptions(m, mopt)
926 	register struct mbuf *m;
927 	struct mbuf *mopt;
928 {
929 	register int i;
930 	struct ip *ip = mtod(m, struct ip *);
931 	register caddr_t opts;
932 	int olen;
933 
934 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
935 	opts = (caddr_t)(ip + 1);
936 	i = m->m_len - (sizeof (struct ip) + olen);
937 	bcopy(opts  + olen, opts, (unsigned)i);
938 	m->m_len -= olen;
939 	if (m->m_flags & M_PKTHDR)
940 		m->m_pkthdr.len -= olen;
941 	ip->ip_hl = sizeof(struct ip) >> 2;
942 }
943 
944 u_char inetctlerrmap[PRC_NCMDS] = {
945 	0,		0,		0,		0,
946 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
947 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
948 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
949 	0,		0,		0,		0,
950 	ENOPROTOOPT
951 };
952 
953 /*
954  * Forward a packet.  If some error occurs return the sender
955  * an icmp packet.  Note we can't always generate a meaningful
956  * icmp message because icmp doesn't have a large enough repertoire
957  * of codes and types.
958  *
959  * If not forwarding, just drop the packet.  This could be confusing
960  * if ipforwarding was zero but some routing protocol was advancing
961  * us as a gateway to somewhere.  However, we must let the routing
962  * protocol deal with that.
963  *
964  * The srcrt parameter indicates whether the packet is being forwarded
965  * via a source route.
966  */
967 void
968 ip_forward(m, srcrt)
969 	struct mbuf *m;
970 	int srcrt;
971 {
972 	register struct ip *ip = mtod(m, struct ip *);
973 	register struct sockaddr_in *sin;
974 	register struct rtentry *rt;
975 	int error, type = 0, code;
976 	struct mbuf *mcopy;
977 	n_long dest;
978 	struct ifnet *destifp;
979 
980 	dest = 0;
981 #ifdef DIAGNOSTIC
982 	if (ipprintfs)
983 		printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
984 			ip->ip_dst, ip->ip_ttl);
985 #endif
986 	if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) {
987 		ipstat.ips_cantforward++;
988 		m_freem(m);
989 		return;
990 	}
991 	HTONS(ip->ip_id);
992 	if (ip->ip_ttl <= IPTTLDEC) {
993 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
994 		return;
995 	}
996 	ip->ip_ttl -= IPTTLDEC;
997 
998 	sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
999 	if ((rt = ipforward_rt.ro_rt) == 0 ||
1000 	    ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
1001 		if (ipforward_rt.ro_rt) {
1002 			RTFREE(ipforward_rt.ro_rt);
1003 			ipforward_rt.ro_rt = 0;
1004 		}
1005 		sin->sin_family = AF_INET;
1006 		sin->sin_len = sizeof(*sin);
1007 		sin->sin_addr = ip->ip_dst;
1008 
1009 		rtalloc(&ipforward_rt);
1010 		if (ipforward_rt.ro_rt == 0) {
1011 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1012 			return;
1013 		}
1014 		rt = ipforward_rt.ro_rt;
1015 	}
1016 
1017 	/*
1018 	 * Save at most 64 bytes of the packet in case
1019 	 * we need to generate an ICMP message to the src.
1020 	 */
1021 	mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64));
1022 
1023 #ifdef GATEWAY
1024 	ip_ifmatrix[rt->rt_ifp->if_index +
1025 	     if_index * m->m_pkthdr.rcvif->if_index]++;
1026 #endif
1027 	/*
1028 	 * If forwarding packet using same interface that it came in on,
1029 	 * perhaps should send a redirect to sender to shortcut a hop.
1030 	 * Only send redirect if source is sending directly to us,
1031 	 * and if packet was not source routed (or has any options).
1032 	 * Also, don't send redirect if forwarding using a default route
1033 	 * or a route modified by a redirect.
1034 	 */
1035 #define	satosin(sa)	((struct sockaddr_in *)(sa))
1036 	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1037 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1038 	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
1039 	    ipsendredirects && !srcrt) {
1040 #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1041 		u_long src = ntohl(ip->ip_src.s_addr);
1042 
1043 		if (RTA(rt) &&
1044 		    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1045 		    if (rt->rt_flags & RTF_GATEWAY)
1046 			dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1047 		    else
1048 			dest = ip->ip_dst.s_addr;
1049 		    /* Router requirements says to only send host redirects */
1050 		    type = ICMP_REDIRECT;
1051 		    code = ICMP_REDIRECT_HOST;
1052 #ifdef DIAGNOSTIC
1053 		    if (ipprintfs)
1054 		        printf("redirect (%d) to %lx\n", code, (u_long)dest);
1055 #endif
1056 		}
1057 	}
1058 
1059 	error = ip_output(m, (struct mbuf *)0, &ipforward_rt, IP_FORWARDING
1060 #ifdef DIRECTED_BROADCAST
1061 			    | IP_ALLOWBROADCAST
1062 #endif
1063 						, 0);
1064 	if (error)
1065 		ipstat.ips_cantforward++;
1066 	else {
1067 		ipstat.ips_forward++;
1068 		if (type)
1069 			ipstat.ips_redirectsent++;
1070 		else {
1071 			if (mcopy)
1072 				m_freem(mcopy);
1073 			return;
1074 		}
1075 	}
1076 	if (mcopy == NULL)
1077 		return;
1078 	destifp = NULL;
1079 
1080 	switch (error) {
1081 
1082 	case 0:				/* forwarded, but need redirect */
1083 		/* type, code set above */
1084 		break;
1085 
1086 	case ENETUNREACH:		/* shouldn't happen, checked above */
1087 	case EHOSTUNREACH:
1088 	case ENETDOWN:
1089 	case EHOSTDOWN:
1090 	default:
1091 		type = ICMP_UNREACH;
1092 		code = ICMP_UNREACH_HOST;
1093 		break;
1094 
1095 	case EMSGSIZE:
1096 		type = ICMP_UNREACH;
1097 		code = ICMP_UNREACH_NEEDFRAG;
1098 		if (ipforward_rt.ro_rt)
1099 			destifp = ipforward_rt.ro_rt->rt_ifp;
1100 		ipstat.ips_cantfrag++;
1101 		break;
1102 
1103 	case ENOBUFS:
1104 		type = ICMP_SOURCEQUENCH;
1105 		code = 0;
1106 		break;
1107 	}
1108 	icmp_error(mcopy, type, code, dest, destifp);
1109 }
1110 
1111 int
1112 ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
1113 	int *name;
1114 	u_int namelen;
1115 	void *oldp;
1116 	size_t *oldlenp;
1117 	void *newp;
1118 	size_t newlen;
1119 {
1120 	/* All sysctl names at this level are terminal. */
1121 	if (namelen != 1)
1122 		return (ENOTDIR);
1123 
1124 	switch (name[0]) {
1125 	case IPCTL_FORWARDING:
1126 		return (sysctl_int(oldp, oldlenp, newp, newlen, &ipforwarding));
1127 	case IPCTL_SENDREDIRECTS:
1128 		return (sysctl_int(oldp, oldlenp, newp, newlen,
1129 			&ipsendredirects));
1130 	case IPCTL_DEFTTL:
1131 		return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl));
1132 #ifdef notyet
1133 	case IPCTL_DEFMTU:
1134 		return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu));
1135 #endif
1136 	default:
1137 		return (EOPNOTSUPP);
1138 	}
1139 	/* NOTREACHED */
1140 }
1141