xref: /original-bsd/sys/netinet/ip_input.c (revision 0a83ae40)
1 /*
2  * Copyright (c) 1982, 1986 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that this notice is preserved and that due credit is given
7  * to the University of California at Berkeley. The name of the University
8  * may not be used to endorse or promote products derived from this
9  * software without specific prior written permission. This software
10  * is provided ``as is'' without express or implied warranty.
11  *
12  *	@(#)ip_input.c	7.8 (Berkeley) 12/07/87
13  */
14 
15 #include "param.h"
16 #include "systm.h"
17 #include "mbuf.h"
18 #include "domain.h"
19 #include "protosw.h"
20 #include "socket.h"
21 #include "errno.h"
22 #include "time.h"
23 #include "kernel.h"
24 
25 #include "../net/if.h"
26 #include "../net/route.h"
27 
28 #include "in.h"
29 #include "in_pcb.h"
30 #include "in_systm.h"
31 #include "in_var.h"
32 #include "ip.h"
33 #include "ip_var.h"
34 #include "ip_icmp.h"
35 #include "tcp.h"
36 
37 u_char	ip_protox[IPPROTO_MAX];
38 int	ipqmaxlen = IFQ_MAXLEN;
39 struct	in_ifaddr *in_ifaddr;			/* first inet address */
40 
41 /*
42  * We need to save the IP options in case a protocol wants to respond
43  * to an incoming packet over the same route if the packet got here
44  * using IP source routing.  This allows connection establishment and
45  * maintenance when the remote end is on a network that is not known
46  * to us.
47  */
48 int	ip_nhops = 0;
49 static	struct ip_srcrt {
50 	char	nop;				/* one NOP to align */
51 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
52 	struct	in_addr route[MAX_IPOPTLEN];
53 } ip_srcrt;
54 
55 /*
56  * IP initialization: fill in IP protocol switch table.
57  * All protocols not implemented in kernel go to raw IP protocol handler.
58  */
59 ip_init()
60 {
61 	register struct protosw *pr;
62 	register int i;
63 
64 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
65 	if (pr == 0)
66 		panic("ip_init");
67 	for (i = 0; i < IPPROTO_MAX; i++)
68 		ip_protox[i] = pr - inetsw;
69 	for (pr = inetdomain.dom_protosw;
70 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
71 		if (pr->pr_domain->dom_family == PF_INET &&
72 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
73 			ip_protox[pr->pr_protocol] = pr - inetsw;
74 	ipq.next = ipq.prev = &ipq;
75 	ip_id = time.tv_sec & 0xffff;
76 	ipintrq.ifq_maxlen = ipqmaxlen;
77 }
78 
79 u_char	ipcksum = 1;
80 struct	ip *ip_reass();
81 struct	sockaddr_in ipaddr = { AF_INET };
82 struct	route ipforward_rt;
83 
84 /*
85  * Ip input routine.  Checksum and byte swap header.  If fragmented
86  * try to reassamble.  If complete and fragment queue exists, discard.
87  * Process options.  Pass to next level.
88  */
89 ipintr()
90 {
91 	register struct ip *ip;
92 	register struct mbuf *m;
93 	struct mbuf *m0;
94 	register int i;
95 	register struct ipq *fp;
96 	register struct in_ifaddr *ia;
97 	struct ifnet *ifp;
98 	int hlen, s;
99 
100 next:
101 	/*
102 	 * Get next datagram off input queue and get IP header
103 	 * in first mbuf.
104 	 */
105 	s = splimp();
106 	IF_DEQUEUEIF(&ipintrq, m, ifp);
107 	splx(s);
108 	if (m == 0)
109 		return;
110 	/*
111 	 * If no IP addresses have been set yet but the interfaces
112 	 * are receiving, can't do anything with incoming packets yet.
113 	 */
114 	if (in_ifaddr == NULL)
115 		goto bad;
116 	ipstat.ips_total++;
117 	if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
118 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
119 		ipstat.ips_toosmall++;
120 		goto next;
121 	}
122 	ip = mtod(m, struct ip *);
123 	hlen = ip->ip_hl << 2;
124 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
125 		ipstat.ips_badhlen++;
126 		goto bad;
127 	}
128 	if (hlen > m->m_len) {
129 		if ((m = m_pullup(m, hlen)) == 0) {
130 			ipstat.ips_badhlen++;
131 			goto next;
132 		}
133 		ip = mtod(m, struct ip *);
134 	}
135 	if (ipcksum)
136 		if (ip->ip_sum = in_cksum(m, hlen)) {
137 			ipstat.ips_badsum++;
138 			goto bad;
139 		}
140 
141 	/*
142 	 * Convert fields to host representation.
143 	 */
144 	ip->ip_len = ntohs((u_short)ip->ip_len);
145 	if (ip->ip_len < hlen) {
146 		ipstat.ips_badlen++;
147 		goto bad;
148 	}
149 	ip->ip_id = ntohs(ip->ip_id);
150 	ip->ip_off = ntohs((u_short)ip->ip_off);
151 
152 	/*
153 	 * Check that the amount of data in the buffers
154 	 * is as at least much as the IP header would have us expect.
155 	 * Trim mbufs if longer than we expect.
156 	 * Drop packet if shorter than we expect.
157 	 */
158 	i = -(u_short)ip->ip_len;
159 	m0 = m;
160 	for (;;) {
161 		i += m->m_len;
162 		if (m->m_next == 0)
163 			break;
164 		m = m->m_next;
165 	}
166 	if (i != 0) {
167 		if (i < 0) {
168 			ipstat.ips_tooshort++;
169 			m = m0;
170 			goto bad;
171 		}
172 		if (i <= m->m_len)
173 			m->m_len -= i;
174 		else
175 			m_adj(m0, -i);
176 	}
177 	m = m0;
178 
179 	/*
180 	 * Process options and, if not destined for us,
181 	 * ship it on.  ip_dooptions returns 1 when an
182 	 * error was detected (causing an icmp message
183 	 * to be sent and the original packet to be freed).
184 	 */
185 	ip_nhops = 0;		/* for source routed packets */
186 	if (hlen > sizeof (struct ip) && ip_dooptions(ip, ifp))
187 		goto next;
188 
189 	/*
190 	 * Check our list of addresses, to see if the packet is for us.
191 	 */
192 	for (ia = in_ifaddr; ia; ia = ia->ia_next) {
193 #define	satosin(sa)	((struct sockaddr_in *)(sa))
194 
195 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
196 			goto ours;
197 		if (
198 #ifdef	DIRECTED_BROADCAST
199 		    ia->ia_ifp == ifp &&
200 #endif
201 		    (ia->ia_ifp->if_flags & IFF_BROADCAST)) {
202 			u_long t;
203 
204 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
205 			    ip->ip_dst.s_addr)
206 				goto ours;
207 			if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
208 				goto ours;
209 			/*
210 			 * Look for all-0's host part (old broadcast addr),
211 			 * either for subnet or net.
212 			 */
213 			t = ntohl(ip->ip_dst.s_addr);
214 			if (t == ia->ia_subnet)
215 				goto ours;
216 			if (t == ia->ia_net)
217 				goto ours;
218 		}
219 	}
220 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
221 		goto ours;
222 	if (ip->ip_dst.s_addr == INADDR_ANY)
223 		goto ours;
224 
225 	/*
226 	 * Not for us; forward if possible and desirable.
227 	 */
228 	ip_forward(ip, ifp);
229 	goto next;
230 
231 ours:
232 	/*
233 	 * Look for queue of fragments
234 	 * of this datagram.
235 	 */
236 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
237 		if (ip->ip_id == fp->ipq_id &&
238 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
239 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
240 		    ip->ip_p == fp->ipq_p)
241 			goto found;
242 	fp = 0;
243 found:
244 
245 	/*
246 	 * Adjust ip_len to not reflect header,
247 	 * set ip_mff if more fragments are expected,
248 	 * convert offset of this to bytes.
249 	 */
250 	ip->ip_len -= hlen;
251 	((struct ipasfrag *)ip)->ipf_mff = 0;
252 	if (ip->ip_off & IP_MF)
253 		((struct ipasfrag *)ip)->ipf_mff = 1;
254 	ip->ip_off <<= 3;
255 
256 	/*
257 	 * If datagram marked as having more fragments
258 	 * or if this is not the first fragment,
259 	 * attempt reassembly; if it succeeds, proceed.
260 	 */
261 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
262 		ipstat.ips_fragments++;
263 		ip = ip_reass((struct ipasfrag *)ip, fp);
264 		if (ip == 0)
265 			goto next;
266 		m = dtom(ip);
267 	} else
268 		if (fp)
269 			ip_freef(fp);
270 
271 	/*
272 	 * Switch out to protocol's input routine.
273 	 */
274 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, ifp);
275 	goto next;
276 bad:
277 	m_freem(m);
278 	goto next;
279 }
280 
281 /*
282  * Take incoming datagram fragment and try to
283  * reassemble it into whole datagram.  If a chain for
284  * reassembly of this datagram already exists, then it
285  * is given as fp; otherwise have to make a chain.
286  */
287 struct ip *
288 ip_reass(ip, fp)
289 	register struct ipasfrag *ip;
290 	register struct ipq *fp;
291 {
292 	register struct mbuf *m = dtom(ip);
293 	register struct ipasfrag *q;
294 	struct mbuf *t;
295 	int hlen = ip->ip_hl << 2;
296 	int i, next;
297 
298 	/*
299 	 * Presence of header sizes in mbufs
300 	 * would confuse code below.
301 	 */
302 	m->m_off += hlen;
303 	m->m_len -= hlen;
304 
305 	/*
306 	 * If first fragment to arrive, create a reassembly queue.
307 	 */
308 	if (fp == 0) {
309 		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
310 			goto dropfrag;
311 		fp = mtod(t, struct ipq *);
312 		insque(fp, &ipq);
313 		fp->ipq_ttl = IPFRAGTTL;
314 		fp->ipq_p = ip->ip_p;
315 		fp->ipq_id = ip->ip_id;
316 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
317 		fp->ipq_src = ((struct ip *)ip)->ip_src;
318 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
319 		q = (struct ipasfrag *)fp;
320 		goto insert;
321 	}
322 
323 	/*
324 	 * Find a segment which begins after this one does.
325 	 */
326 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
327 		if (q->ip_off > ip->ip_off)
328 			break;
329 
330 	/*
331 	 * If there is a preceding segment, it may provide some of
332 	 * our data already.  If so, drop the data from the incoming
333 	 * segment.  If it provides all of our data, drop us.
334 	 */
335 	if (q->ipf_prev != (struct ipasfrag *)fp) {
336 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
337 		if (i > 0) {
338 			if (i >= ip->ip_len)
339 				goto dropfrag;
340 			m_adj(dtom(ip), i);
341 			ip->ip_off += i;
342 			ip->ip_len -= i;
343 		}
344 	}
345 
346 	/*
347 	 * While we overlap succeeding segments trim them or,
348 	 * if they are completely covered, dequeue them.
349 	 */
350 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
351 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
352 		if (i < q->ip_len) {
353 			q->ip_len -= i;
354 			q->ip_off += i;
355 			m_adj(dtom(q), i);
356 			break;
357 		}
358 		q = q->ipf_next;
359 		m_freem(dtom(q->ipf_prev));
360 		ip_deq(q->ipf_prev);
361 	}
362 
363 insert:
364 	/*
365 	 * Stick new segment in its place;
366 	 * check for complete reassembly.
367 	 */
368 	ip_enq(ip, q->ipf_prev);
369 	next = 0;
370 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
371 		if (q->ip_off != next)
372 			return (0);
373 		next += q->ip_len;
374 	}
375 	if (q->ipf_prev->ipf_mff)
376 		return (0);
377 
378 	/*
379 	 * Reassembly is complete; concatenate fragments.
380 	 */
381 	q = fp->ipq_next;
382 	m = dtom(q);
383 	t = m->m_next;
384 	m->m_next = 0;
385 	m_cat(m, t);
386 	q = q->ipf_next;
387 	while (q != (struct ipasfrag *)fp) {
388 		t = dtom(q);
389 		q = q->ipf_next;
390 		m_cat(m, t);
391 	}
392 
393 	/*
394 	 * Create header for new ip packet by
395 	 * modifying header of first packet;
396 	 * dequeue and discard fragment reassembly header.
397 	 * Make header visible.
398 	 */
399 	ip = fp->ipq_next;
400 	ip->ip_len = next;
401 	((struct ip *)ip)->ip_src = fp->ipq_src;
402 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
403 	remque(fp);
404 	(void) m_free(dtom(fp));
405 	m = dtom(ip);
406 	m->m_len += (ip->ip_hl << 2);
407 	m->m_off -= (ip->ip_hl << 2);
408 	return ((struct ip *)ip);
409 
410 dropfrag:
411 	ipstat.ips_fragdropped++;
412 	m_freem(m);
413 	return (0);
414 }
415 
416 /*
417  * Free a fragment reassembly header and all
418  * associated datagrams.
419  */
420 ip_freef(fp)
421 	struct ipq *fp;
422 {
423 	register struct ipasfrag *q, *p;
424 
425 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
426 		p = q->ipf_next;
427 		ip_deq(q);
428 		m_freem(dtom(q));
429 	}
430 	remque(fp);
431 	(void) m_free(dtom(fp));
432 }
433 
434 /*
435  * Put an ip fragment on a reassembly chain.
436  * Like insque, but pointers in middle of structure.
437  */
438 ip_enq(p, prev)
439 	register struct ipasfrag *p, *prev;
440 {
441 
442 	p->ipf_prev = prev;
443 	p->ipf_next = prev->ipf_next;
444 	prev->ipf_next->ipf_prev = p;
445 	prev->ipf_next = p;
446 }
447 
448 /*
449  * To ip_enq as remque is to insque.
450  */
451 ip_deq(p)
452 	register struct ipasfrag *p;
453 {
454 
455 	p->ipf_prev->ipf_next = p->ipf_next;
456 	p->ipf_next->ipf_prev = p->ipf_prev;
457 }
458 
459 /*
460  * IP timer processing;
461  * if a timer expires on a reassembly
462  * queue, discard it.
463  */
464 ip_slowtimo()
465 {
466 	register struct ipq *fp;
467 	int s = splnet();
468 
469 	fp = ipq.next;
470 	if (fp == 0) {
471 		splx(s);
472 		return;
473 	}
474 	while (fp != &ipq) {
475 		--fp->ipq_ttl;
476 		fp = fp->next;
477 		if (fp->prev->ipq_ttl == 0) {
478 			ipstat.ips_fragtimeout++;
479 			ip_freef(fp->prev);
480 		}
481 	}
482 	splx(s);
483 }
484 
485 /*
486  * Drain off all datagram fragments.
487  */
488 ip_drain()
489 {
490 
491 	while (ipq.next != &ipq) {
492 		ipstat.ips_fragdropped++;
493 		ip_freef(ipq.next);
494 	}
495 }
496 
497 extern struct in_ifaddr *ifptoia();
498 struct in_ifaddr *ip_rtaddr();
499 
500 /*
501  * Do option processing on a datagram,
502  * possibly discarding it if bad options
503  * are encountered.
504  */
505 ip_dooptions(ip, ifp)
506 	register struct ip *ip;
507 	struct ifnet *ifp;
508 {
509 	register u_char *cp;
510 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB;
511 	register struct ip_timestamp *ipt;
512 	register struct in_ifaddr *ia;
513 	struct in_addr *sin;
514 	n_time ntime;
515 
516 	cp = (u_char *)(ip + 1);
517 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
518 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
519 		opt = cp[IPOPT_OPTVAL];
520 		if (opt == IPOPT_EOL)
521 			break;
522 		if (opt == IPOPT_NOP)
523 			optlen = 1;
524 		else {
525 			optlen = cp[IPOPT_OLEN];
526 			if (optlen <= 0 || optlen > cnt) {
527 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
528 				goto bad;
529 			}
530 		}
531 		switch (opt) {
532 
533 		default:
534 			break;
535 
536 		/*
537 		 * Source routing with record.
538 		 * Find interface with current destination address.
539 		 * If none on this machine then drop if strictly routed,
540 		 * or do nothing if loosely routed.
541 		 * Record interface address and bring up next address
542 		 * component.  If strictly routed make sure next
543 		 * address on directly accessible net.
544 		 */
545 		case IPOPT_LSRR:
546 		case IPOPT_SSRR:
547 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
548 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
549 				goto bad;
550 			}
551 			ipaddr.sin_addr = ip->ip_dst;
552 			ia = (struct in_ifaddr *)
553 				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
554 			if (ia == 0) {
555 				if (opt == IPOPT_SSRR) {
556 					type = ICMP_UNREACH;
557 					code = ICMP_UNREACH_SRCFAIL;
558 					goto bad;
559 				}
560 				/*
561 				 * Loose routing, and not at next destination
562 				 * yet; nothing to do except forward.
563 				 */
564 				break;
565 			}
566 			off--;			/* 0 origin */
567 			if (off > optlen - sizeof(struct in_addr)) {
568 				/*
569 				 * End of source route.  Should be for us.
570 				 */
571 				save_rte(cp, ip->ip_src);
572 				break;
573 			}
574 			/*
575 			 * locate outgoing interface
576 			 */
577 			bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr,
578 			    sizeof(ipaddr.sin_addr));
579 			if ((opt == IPOPT_SSRR &&
580 			    in_iaonnetof(in_netof(ipaddr.sin_addr)) == 0) ||
581 			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
582 				type = ICMP_UNREACH;
583 				code = ICMP_UNREACH_SRCFAIL;
584 				goto bad;
585 			}
586 			ip->ip_dst = ipaddr.sin_addr;
587 			bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
588 			    (caddr_t)(cp + off), sizeof(struct in_addr));
589 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
590 			break;
591 
592 		case IPOPT_RR:
593 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
594 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
595 				goto bad;
596 			}
597 			/*
598 			 * If no space remains, ignore.
599 			 */
600 			off--;			/* 0 origin */
601 			if (off > optlen - sizeof(struct in_addr))
602 				break;
603 			bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr,
604 			    sizeof(ipaddr.sin_addr));
605 			/*
606 			 * locate outgoing interface
607 			 */
608 			if ((ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
609 				type = ICMP_UNREACH;
610 				code = ICMP_UNREACH_HOST;
611 				goto bad;
612 			}
613 			bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
614 			    (caddr_t)(cp + off), sizeof(struct in_addr));
615 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
616 			break;
617 
618 		case IPOPT_TS:
619 			code = cp - (u_char *)ip;
620 			ipt = (struct ip_timestamp *)cp;
621 			if (ipt->ipt_len < 5)
622 				goto bad;
623 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
624 				if (++ipt->ipt_oflw == 0)
625 					goto bad;
626 				break;
627 			}
628 			sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
629 			switch (ipt->ipt_flg) {
630 
631 			case IPOPT_TS_TSONLY:
632 				break;
633 
634 			case IPOPT_TS_TSANDADDR:
635 				if (ipt->ipt_ptr + sizeof(n_time) +
636 				    sizeof(struct in_addr) > ipt->ipt_len)
637 					goto bad;
638 				ia = ifptoia(ifp);
639 				bcopy((caddr_t)&IA_SIN(ia)->sin_addr,
640 				    (caddr_t)sin, sizeof(struct in_addr));
641 				ipt->ipt_ptr += sizeof(struct in_addr);
642 				break;
643 
644 			case IPOPT_TS_PRESPEC:
645 				if (ipt->ipt_ptr + sizeof(n_time) +
646 				    sizeof(struct in_addr) > ipt->ipt_len)
647 					goto bad;
648 				bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr,
649 				    sizeof(struct in_addr));
650 				if (ifa_ifwithaddr((struct sockaddr *)&ipaddr) == 0)
651 					continue;
652 				ipt->ipt_ptr += sizeof(struct in_addr);
653 				break;
654 
655 			default:
656 				goto bad;
657 			}
658 			ntime = iptime();
659 			bcopy((caddr_t)&ntime, (caddr_t)cp + ipt->ipt_ptr - 1,
660 			    sizeof(n_time));
661 			ipt->ipt_ptr += sizeof(n_time);
662 		}
663 	}
664 	return (0);
665 bad:
666 	icmp_error(ip, type, code, ifp);
667 	return (1);
668 }
669 
670 /*
671  * Given address of next destination (final or next hop),
672  * return internet address info of interface to be used to get there.
673  */
674 struct in_ifaddr *
675 ip_rtaddr(dst)
676 	 struct in_addr dst;
677 {
678 	register struct sockaddr_in *sin;
679 	register struct in_ifaddr *ia;
680 
681 	sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
682 
683 	if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
684 		if (ipforward_rt.ro_rt) {
685 			RTFREE(ipforward_rt.ro_rt);
686 			ipforward_rt.ro_rt = 0;
687 		}
688 		sin->sin_family = AF_INET;
689 		sin->sin_addr = dst;
690 
691 		rtalloc(&ipforward_rt);
692 	}
693 	if (ipforward_rt.ro_rt == 0)
694 		return ((struct in_ifaddr *)0);
695 	/*
696 	 * Find address associated with outgoing interface.
697 	 */
698 	for (ia = in_ifaddr; ia; ia = ia->ia_next)
699 		if (ia->ia_ifp == ipforward_rt.ro_rt->rt_ifp)
700 			break;
701 	return (ia);
702 }
703 
704 /*
705  * Save incoming source route for use in replies,
706  * to be picked up later by ip_srcroute if the receiver is interested.
707  */
708 save_rte(option, dst)
709 	u_char *option;
710 	struct in_addr dst;
711 {
712 	unsigned olen;
713 	extern ipprintfs;
714 
715 	olen = option[IPOPT_OLEN];
716 	if (olen > sizeof(ip_srcrt) - 1) {
717 		if (ipprintfs)
718 			printf("save_rte: olen %d\n", olen);
719 		return;
720 	}
721 	bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen);
722 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
723 	ip_srcrt.route[ip_nhops++] = dst;
724 }
725 
726 /*
727  * Retrieve incoming source route for use in replies,
728  * in the same form used by setsockopt.
729  * The first hop is placed before the options, will be removed later.
730  */
731 struct mbuf *
732 ip_srcroute()
733 {
734 	register struct in_addr *p, *q;
735 	register struct mbuf *m;
736 
737 	if (ip_nhops == 0)
738 		return ((struct mbuf *)0);
739 	m = m_get(M_DONTWAIT, MT_SOOPTS);
740 	if (m == 0)
741 		return ((struct mbuf *)0);
742 	m->m_len = ip_nhops * sizeof(struct in_addr) + IPOPT_OFFSET + 1 + 1;
743 
744 	/*
745 	 * First save first hop for return route
746 	 */
747 	p = &ip_srcrt.route[ip_nhops - 1];
748 	*(mtod(m, struct in_addr *)) = *p--;
749 
750 	/*
751 	 * Copy option fields and padding (nop) to mbuf.
752 	 */
753 	ip_srcrt.nop = IPOPT_NOP;
754 	bcopy((caddr_t)&ip_srcrt, mtod(m, caddr_t) + sizeof(struct in_addr),
755 	    IPOPT_OFFSET + 1 + 1);
756 	q = (struct in_addr *)(mtod(m, caddr_t) +
757 	    sizeof(struct in_addr) + IPOPT_OFFSET + 1 + 1);
758 	/*
759 	 * Record return path as an IP source route,
760 	 * reversing the path (pointers are now aligned).
761 	 */
762 	while (p >= ip_srcrt.route)
763 		*q++ = *p--;
764 	return (m);
765 }
766 
767 /*
768  * Strip out IP options, at higher
769  * level protocol in the kernel.
770  * Second argument is buffer to which options
771  * will be moved, and return value is their length.
772  */
773 ip_stripoptions(ip, mopt)
774 	struct ip *ip;
775 	struct mbuf *mopt;
776 {
777 	register int i;
778 	register struct mbuf *m;
779 	register caddr_t opts;
780 	int olen;
781 
782 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
783 	m = dtom(ip);
784 	opts = (caddr_t)(ip + 1);
785 	if (mopt) {
786 		mopt->m_len = olen;
787 		mopt->m_off = MMINOFF;
788 		bcopy(opts, mtod(mopt, caddr_t), (unsigned)olen);
789 	}
790 	i = m->m_len - (sizeof (struct ip) + olen);
791 	bcopy(opts  + olen, opts, (unsigned)i);
792 	m->m_len -= olen;
793 	ip->ip_hl = sizeof(struct ip) >> 2;
794 }
795 
796 u_char inetctlerrmap[PRC_NCMDS] = {
797 	0,		0,		0,		0,
798 	0,		0,		EHOSTDOWN,	EHOSTUNREACH,
799 	ENETUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
800 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
801 	0,		0,		0,		0,
802 	ENOPROTOOPT
803 };
804 
805 #ifndef	IPFORWARDING
806 #define	IPFORWARDING	1
807 #endif
808 #ifndef	IPSENDREDIRECTS
809 #define	IPSENDREDIRECTS	1
810 #endif
811 int	ipprintfs = 0;
812 int	ipforwarding = IPFORWARDING;
813 extern	int in_interfaces;
814 int	ipsendredirects = IPSENDREDIRECTS;
815 
816 /*
817  * Forward a packet.  If some error occurs return the sender
818  * an icmp packet.  Note we can't always generate a meaningful
819  * icmp message because icmp doesn't have a large enough repertoire
820  * of codes and types.
821  *
822  * If not forwarding (possibly because we have only a single external
823  * network), just drop the packet.  This could be confusing if ipforwarding
824  * was zero but some routing protocol was advancing us as a gateway
825  * to somewhere.  However, we must let the routing protocol deal with that.
826  */
827 ip_forward(ip, ifp)
828 	register struct ip *ip;
829 	struct ifnet *ifp;
830 {
831 	register int error, type = 0, code;
832 	register struct sockaddr_in *sin;
833 	struct mbuf *mcopy;
834 	struct in_addr dest;
835 
836 	dest.s_addr = 0;
837 	if (ipprintfs)
838 		printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
839 			ip->ip_dst, ip->ip_ttl);
840 	ip->ip_id = htons(ip->ip_id);
841 	if (ipforwarding == 0 || in_interfaces <= 1) {
842 		ipstat.ips_cantforward++;
843 #ifdef GATEWAY
844 		type = ICMP_UNREACH, code = ICMP_UNREACH_NET;
845 		goto sendicmp;
846 #else
847 		m_freem(dtom(ip));
848 		return;
849 #endif
850 	}
851 	if (in_canforward(ip->ip_dst) == 0) {
852 		m_freem(dtom(ip));
853 		return;
854 	}
855 	if (ip->ip_ttl <= IPTTLDEC) {
856 		type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS;
857 		goto sendicmp;
858 	}
859 	ip->ip_ttl -= IPTTLDEC;
860 
861 	/*
862 	 * Save at most 64 bytes of the packet in case
863 	 * we need to generate an ICMP message to the src.
864 	 */
865 	mcopy = m_copy(dtom(ip), 0, imin((int)ip->ip_len, 64));
866 
867 	sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
868 	if (ipforward_rt.ro_rt == 0 ||
869 	    ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
870 		if (ipforward_rt.ro_rt) {
871 			RTFREE(ipforward_rt.ro_rt);
872 			ipforward_rt.ro_rt = 0;
873 		}
874 		sin->sin_family = AF_INET;
875 		sin->sin_addr = ip->ip_dst;
876 
877 		rtalloc(&ipforward_rt);
878 	}
879 	/*
880 	 * If forwarding packet using same interface that it came in on,
881 	 * perhaps should send a redirect to sender to shortcut a hop.
882 	 * Only send redirect if source is sending directly to us,
883 	 * and if packet was not source routed (or has any options).
884 	 * Also, don't send redirect if forwarding using a default route
885 	 * or a route modfied by a redirect.
886 	 */
887 #define	satosin(sa)	((struct sockaddr_in *)(sa))
888 	if (ipforward_rt.ro_rt && ipforward_rt.ro_rt->rt_ifp == ifp &&
889 	    (ipforward_rt.ro_rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
890 	    satosin(&ipforward_rt.ro_rt->rt_dst)->sin_addr.s_addr != 0 &&
891 	    ipsendredirects && ip->ip_hl == (sizeof(struct ip) >> 2)) {
892 		struct in_ifaddr *ia;
893 		u_long src = ntohl(ip->ip_src.s_addr);
894 		u_long dst = ntohl(ip->ip_dst.s_addr);
895 
896 		if ((ia = ifptoia(ifp)) &&
897 		   (src & ia->ia_subnetmask) == ia->ia_subnet) {
898 		    if (ipforward_rt.ro_rt->rt_flags & RTF_GATEWAY)
899 			dest = satosin(&ipforward_rt.ro_rt->rt_gateway)->sin_addr;
900 		    else
901 			dest = ip->ip_dst;
902 		    /*
903 		     * If the destination is reached by a route to host,
904 		     * is on a subnet of a local net, or is directly
905 		     * on the attached net (!), use host redirect.
906 		     * (We may be the correct first hop for other subnets.)
907 		     */
908 		    type = ICMP_REDIRECT;
909 		    code = ICMP_REDIRECT_NET;
910 		    if ((ipforward_rt.ro_rt->rt_flags & RTF_HOST) ||
911 		       (ipforward_rt.ro_rt->rt_flags & RTF_GATEWAY) == 0)
912 			code = ICMP_REDIRECT_HOST;
913 		    else for (ia = in_ifaddr; ia = ia->ia_next; )
914 			if ((dst & ia->ia_netmask) == ia->ia_net) {
915 			    if (ia->ia_subnetmask != ia->ia_netmask)
916 				    code = ICMP_REDIRECT_HOST;
917 			    break;
918 			}
919 		    if (ipprintfs)
920 		        printf("redirect (%d) to %x\n", code, dest);
921 		}
922 	}
923 
924 	error = ip_output(dtom(ip), (struct mbuf *)0, &ipforward_rt,
925 		IP_FORWARDING);
926 	if (error)
927 		ipstat.ips_cantforward++;
928 	else if (type)
929 		ipstat.ips_redirectsent++;
930 	else {
931 		if (mcopy)
932 			m_freem(mcopy);
933 		ipstat.ips_forward++;
934 		return;
935 	}
936 	if (mcopy == NULL)
937 		return;
938 	ip = mtod(mcopy, struct ip *);
939 	type = ICMP_UNREACH;
940 	switch (error) {
941 
942 	case 0:				/* forwarded, but need redirect */
943 		type = ICMP_REDIRECT;
944 		/* code set above */
945 		break;
946 
947 	case ENETUNREACH:
948 	case ENETDOWN:
949 		if (in_localaddr(ip->ip_dst))
950 			code = ICMP_UNREACH_HOST;
951 		else
952 			code = ICMP_UNREACH_NET;
953 		break;
954 
955 	case EMSGSIZE:
956 		code = ICMP_UNREACH_NEEDFRAG;
957 		break;
958 
959 	case EPERM:
960 		code = ICMP_UNREACH_PORT;
961 		break;
962 
963 	case ENOBUFS:
964 		type = ICMP_SOURCEQUENCH;
965 		break;
966 
967 	case EHOSTDOWN:
968 	case EHOSTUNREACH:
969 		code = ICMP_UNREACH_HOST;
970 		break;
971 	}
972 sendicmp:
973 	icmp_error(ip, type, code, ifp, dest);
974 }
975