xref: /original-bsd/sys/netinet/ip_input.c (revision f0fd5f8a)
1 /*	ip_input.c	1.59	82/12/14	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/mbuf.h"
6 #include "../h/domain.h"
7 #include "../h/protosw.h"
8 #include "../h/socket.h"
9 #include <errno.h>
10 #include <time.h>
11 #include "../h/kernel.h"
12 
13 #include "../net/if.h"
14 #include "../net/route.h"
15 #include "../netinet/in.h"
16 #include "../netinet/in_pcb.h"
17 #include "../netinet/in_systm.h"
18 #include "../netinet/ip.h"
19 #include "../netinet/ip_var.h"
20 #include "../netinet/ip_icmp.h"
21 #include "../netinet/tcp.h"
22 
23 u_char	ip_protox[IPPROTO_MAX];
24 int	ipqmaxlen = IFQ_MAXLEN;
25 struct	ifnet *ifinet;			/* first inet interface */
26 
27 /*
28  * IP initialization: fill in IP protocol switch table.
29  * All protocols not implemented in kernel go to raw IP protocol handler.
30  */
31 ip_init()
32 {
33 	register struct protosw *pr;
34 	register int i;
35 
36 	pr = pffindproto(PF_INET, IPPROTO_RAW);
37 	if (pr == 0)
38 		panic("ip_init");
39 	for (i = 0; i < IPPROTO_MAX; i++)
40 		ip_protox[i] = pr - inetsw;
41 	for (pr = inetdomain.dom_protosw;
42 	    pr <= inetdomain.dom_protoswNPROTOSW; pr++)
43 		if (pr->pr_family == PF_INET &&
44 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
45 			ip_protox[pr->pr_protocol] = pr - inetsw;
46 	ipq.next = ipq.prev = &ipq;
47 	ip_id = time.tv_sec & 0xffff;
48 	ipintrq.ifq_maxlen = ipqmaxlen;
49 	ifinet = if_ifwithaf(AF_INET);
50 }
51 
52 u_char	ipcksum = 1;
53 struct	ip *ip_reass();
54 struct	sockaddr_in ipaddr = { AF_INET };
55 
56 /*
57  * Ip input routine.  Checksum and byte swap header.  If fragmented
58  * try to reassamble.  If complete and fragment queue exists, discard.
59  * Process options.  Pass to next level.
60  */
61 ipintr()
62 {
63 	register struct ip *ip;
64 	register struct mbuf *m;
65 	struct mbuf *m0;
66 	register int i;
67 	register struct ipq *fp;
68 	int hlen, s;
69 
70 next:
71 	/*
72 	 * Get next datagram off input queue and get IP header
73 	 * in first mbuf.
74 	 */
75 	s = splimp();
76 	IF_DEQUEUE(&ipintrq, m);
77 	splx(s);
78 	if (m == 0)
79 		return;
80 	if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
81 	    (m = m_pullup(m, sizeof (struct ip))) == 0)
82 		return;
83 	ip = mtod(m, struct ip *);
84 	if ((hlen = ip->ip_hl << 2) > m->m_len) {
85 		if ((m = m_pullup(m, hlen)) == 0)
86 			return;
87 		ip = mtod(m, struct ip *);
88 	}
89 	if (ipcksum)
90 		if (ip->ip_sum = in_cksum(m, hlen)) {
91 			printf("ip_sum %x\n", ip->ip_sum);	/* XXX */
92 			ipstat.ips_badsum++;
93 			goto bad;
94 		}
95 
96 	/*
97 	 * Convert fields to host representation.
98 	 */
99 	ip->ip_len = ntohs((u_short)ip->ip_len);
100 	ip->ip_id = ntohs(ip->ip_id);
101 	ip->ip_off = ntohs((u_short)ip->ip_off);
102 
103 	/*
104 	 * Check that the amount of data in the buffers
105 	 * is as at least much as the IP header would have us expect.
106 	 * Trim mbufs if longer than we expect.
107 	 * Drop packet if shorter than we expect.
108 	 */
109 	i = -ip->ip_len;
110 	m0 = m;
111 	for (;;) {
112 		i += m->m_len;
113 		if (m->m_next == 0)
114 			break;
115 		m = m->m_next;
116 	}
117 	if (i != 0) {
118 		if (i < 0) {
119 			ipstat.ips_tooshort++;
120 			goto bad;
121 		}
122 		if (i <= m->m_len)
123 			m->m_len -= i;
124 		else
125 			m_adj(m0, -i);
126 	}
127 	m = m0;
128 
129 	/*
130 	 * Process options and, if not destined for us,
131 	 * ship it on.  ip_dooptions returns 1 when an
132 	 * error was detected (causing an icmp message
133 	 * to be sent).
134 	 */
135 	if (hlen > sizeof (struct ip) && ip_dooptions(ip))
136 		goto next;
137 
138 	/*
139 	 * Fast check on the first internet
140 	 * interface in the list.
141 	 */
142 	if (ifinet) {
143 		struct sockaddr_in *sin;
144 
145 		sin = (struct sockaddr_in *)&ifinet->if_addr;
146 		if (sin->sin_addr.s_addr == ip->ip_dst.s_addr)
147 			goto ours;
148 		sin = (struct sockaddr_in *)&ifinet->if_broadaddr;
149 		if ((ifinet->if_flags & IFF_BROADCAST) &&
150 		    sin->sin_addr.s_addr == ip->ip_dst.s_addr)
151 			goto ours;
152 	}
153 	ipaddr.sin_addr = ip->ip_dst;
154 	if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) {
155 		ip_forward(ip);
156 		goto next;
157 	}
158 
159 ours:
160 	/*
161 	 * Look for queue of fragments
162 	 * of this datagram.
163 	 */
164 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
165 		if (ip->ip_id == fp->ipq_id &&
166 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
167 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
168 		    ip->ip_p == fp->ipq_p)
169 			goto found;
170 	fp = 0;
171 found:
172 
173 	/*
174 	 * Adjust ip_len to not reflect header,
175 	 * set ip_mff if more fragments are expected,
176 	 * convert offset of this to bytes.
177 	 */
178 	ip->ip_len -= hlen;
179 	((struct ipasfrag *)ip)->ipf_mff = 0;
180 	if (ip->ip_off & IP_MF)
181 		((struct ipasfrag *)ip)->ipf_mff = 1;
182 	ip->ip_off <<= 3;
183 
184 	/*
185 	 * If datagram marked as having more fragments
186 	 * or if this is not the first fragment,
187 	 * attempt reassembly; if it succeeds, proceed.
188 	 */
189 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
190 		ip = ip_reass((struct ipasfrag *)ip, fp);
191 		if (ip == 0)
192 			goto next;
193 		hlen = ip->ip_hl << 2;
194 		m = dtom(ip);
195 	} else
196 		if (fp)
197 			(void) ip_freef(fp);
198 
199 	/*
200 	 * Switch out to protocol's input routine.
201 	 */
202 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m);
203 	goto next;
204 bad:
205 	m_freem(m);
206 	goto next;
207 }
208 
209 /*
210  * Take incoming datagram fragment and try to
211  * reassemble it into whole datagram.  If a chain for
212  * reassembly of this datagram already exists, then it
213  * is given as fp; otherwise have to make a chain.
214  */
215 struct ip *
216 ip_reass(ip, fp)
217 	register struct ipasfrag *ip;
218 	register struct ipq *fp;
219 {
220 	register struct mbuf *m = dtom(ip);
221 	register struct ipasfrag *q;
222 	struct mbuf *t;
223 	int hlen = ip->ip_hl << 2;
224 	int i, next;
225 
226 	/*
227 	 * Presence of header sizes in mbufs
228 	 * would confuse code below.
229 	 */
230 	m->m_off += hlen;
231 	m->m_len -= hlen;
232 
233 	/*
234 	 * If first fragment to arrive, create a reassembly queue.
235 	 */
236 	if (fp == 0) {
237 		if ((t = m_get(M_WAIT, MT_FTABLE)) == NULL)
238 			goto dropfrag;
239 		fp = mtod(t, struct ipq *);
240 		insque(fp, &ipq);
241 		fp->ipq_ttl = IPFRAGTTL;
242 		fp->ipq_p = ip->ip_p;
243 		fp->ipq_id = ip->ip_id;
244 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
245 		fp->ipq_src = ((struct ip *)ip)->ip_src;
246 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
247 		q = (struct ipasfrag *)fp;
248 		goto insert;
249 	}
250 
251 	/*
252 	 * Find a segment which begins after this one does.
253 	 */
254 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
255 		if (q->ip_off > ip->ip_off)
256 			break;
257 
258 	/*
259 	 * If there is a preceding segment, it may provide some of
260 	 * our data already.  If so, drop the data from the incoming
261 	 * segment.  If it provides all of our data, drop us.
262 	 */
263 	if (q->ipf_prev != (struct ipasfrag *)fp) {
264 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
265 		if (i > 0) {
266 			if (i >= ip->ip_len)
267 				goto dropfrag;
268 			m_adj(dtom(ip), i);
269 			ip->ip_off += i;
270 			ip->ip_len -= i;
271 		}
272 	}
273 
274 	/*
275 	 * While we overlap succeeding segments trim them or,
276 	 * if they are completely covered, dequeue them.
277 	 */
278 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
279 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
280 		if (i < q->ip_len) {
281 			q->ip_len -= i;
282 			q->ip_off += i;
283 			m_adj(dtom(q), i);
284 			break;
285 		}
286 		q = q->ipf_next;
287 		m_freem(dtom(q->ipf_prev));
288 		ip_deq(q->ipf_prev);
289 	}
290 
291 insert:
292 	/*
293 	 * Stick new segment in its place;
294 	 * check for complete reassembly.
295 	 */
296 	ip_enq(ip, q->ipf_prev);
297 	next = 0;
298 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
299 		if (q->ip_off != next)
300 			return (0);
301 		next += q->ip_len;
302 	}
303 	if (q->ipf_prev->ipf_mff)
304 		return (0);
305 
306 	/*
307 	 * Reassembly is complete; concatenate fragments.
308 	 */
309 	q = fp->ipq_next;
310 	m = dtom(q);
311 	t = m->m_next;
312 	m->m_next = 0;
313 	m_cat(m, t);
314 	q = q->ipf_next;
315 	while (q != (struct ipasfrag *)fp) {
316 		t = dtom(q);
317 		q = q->ipf_next;
318 		m_cat(m, t);
319 	}
320 
321 	/*
322 	 * Create header for new ip packet by
323 	 * modifying header of first packet;
324 	 * dequeue and discard fragment reassembly header.
325 	 * Make header visible.
326 	 */
327 	ip = fp->ipq_next;
328 	ip->ip_len = next;
329 	((struct ip *)ip)->ip_src = fp->ipq_src;
330 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
331 	remque(fp);
332 	(void) m_free(dtom(fp));
333 	m = dtom(ip);
334 	m->m_len += sizeof (struct ipasfrag);
335 	m->m_off -= sizeof (struct ipasfrag);
336 	return ((struct ip *)ip);
337 
338 dropfrag:
339 	m_freem(m);
340 	return (0);
341 }
342 
343 /*
344  * Free a fragment reassembly header and all
345  * associated datagrams.
346  */
347 struct ipq *
348 ip_freef(fp)
349 	struct ipq *fp;
350 {
351 	register struct ipasfrag *q;
352 	struct mbuf *m;
353 
354 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
355 		m_freem(dtom(q));
356 	m = dtom(fp);
357 	fp = fp->next;
358 	remque(fp->prev);
359 	(void) m_free(m);
360 	return (fp);
361 }
362 
363 /*
364  * Put an ip fragment on a reassembly chain.
365  * Like insque, but pointers in middle of structure.
366  */
367 ip_enq(p, prev)
368 	register struct ipasfrag *p, *prev;
369 {
370 
371 	p->ipf_prev = prev;
372 	p->ipf_next = prev->ipf_next;
373 	prev->ipf_next->ipf_prev = p;
374 	prev->ipf_next = p;
375 }
376 
377 /*
378  * To ip_enq as remque is to insque.
379  */
380 ip_deq(p)
381 	register struct ipasfrag *p;
382 {
383 
384 	p->ipf_prev->ipf_next = p->ipf_next;
385 	p->ipf_next->ipf_prev = p->ipf_prev;
386 }
387 
388 /*
389  * IP timer processing;
390  * if a timer expires on a reassembly
391  * queue, discard it.
392  */
393 ip_slowtimo()
394 {
395 	register struct ipq *fp;
396 	int s = splnet();
397 
398 	fp = ipq.next;
399 	if (fp == 0) {
400 		splx(s);
401 		return;
402 	}
403 	while (fp != &ipq)
404 		if (--fp->ipq_ttl == 0)
405 			fp = ip_freef(fp);
406 		else
407 			fp = fp->next;
408 	splx(s);
409 }
410 
411 /*
412  * Drain off all datagram fragments.
413  */
414 ip_drain()
415 {
416 
417 	while (ipq.next != &ipq)
418 		(void) ip_freef(ipq.next);
419 }
420 
421 /*
422  * Do option processing on a datagram,
423  * possibly discarding it if bad options
424  * are encountered.
425  */
426 ip_dooptions(ip)
427 	struct ip *ip;
428 {
429 	register u_char *cp;
430 	int opt, optlen, cnt, code, type;
431 	struct in_addr *sin;
432 	register struct ip_timestamp *ipt;
433 	register struct ifnet *ifp;
434 	struct in_addr t;
435 
436 	cp = (u_char *)(ip + 1);
437 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
438 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
439 		opt = cp[0];
440 		if (opt == IPOPT_EOL)
441 			break;
442 		if (opt == IPOPT_NOP)
443 			optlen = 1;
444 		else
445 			optlen = cp[1];
446 		switch (opt) {
447 
448 		default:
449 			break;
450 
451 		/*
452 		 * Source routing with record.
453 		 * Find interface with current destination address.
454 		 * If none on this machine then drop if strictly routed,
455 		 * or do nothing if loosely routed.
456 		 * Record interface address and bring up next address
457 		 * component.  If strictly routed make sure next
458 		 * address on directly accessible net.
459 		 */
460 		case IPOPT_LSRR:
461 		case IPOPT_SSRR:
462 			if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
463 				break;
464 			sin = (struct in_addr *)(cp + cp[2]);
465 			ipaddr.sin_addr = *sin;
466 			ifp = if_ifwithaddr((struct sockaddr *)&ipaddr);
467 			type = ICMP_UNREACH, code = ICMP_UNREACH_SRCFAIL;
468 			if (ifp == 0) {
469 				if (opt == IPOPT_SSRR)
470 					goto bad;
471 				break;
472 			}
473 			t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
474 			cp[2] += 4;
475 			if (cp[2] > optlen - (sizeof (long) - 1))
476 				break;
477 			ip->ip_dst = sin[1];
478 			if (opt == IPOPT_SSRR &&
479 			    if_ifonnetof(in_netof(ip->ip_dst)) == 0)
480 				goto bad;
481 			break;
482 
483 		case IPOPT_TS:
484 			code = cp - (u_char *)ip;
485 			type = ICMP_PARAMPROB;
486 			ipt = (struct ip_timestamp *)cp;
487 			if (ipt->ipt_len < 5)
488 				goto bad;
489 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
490 				if (++ipt->ipt_oflw == 0)
491 					goto bad;
492 				break;
493 			}
494 			sin = (struct in_addr *)(cp+cp[2]);
495 			switch (ipt->ipt_flg) {
496 
497 			case IPOPT_TS_TSONLY:
498 				break;
499 
500 			case IPOPT_TS_TSANDADDR:
501 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
502 					goto bad;
503 				if (ifinet == 0)
504 					goto bad;	/* ??? */
505 				*sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr;
506 				break;
507 
508 			case IPOPT_TS_PRESPEC:
509 				ipaddr.sin_addr = *sin;
510 				if (!if_ifwithaddr((struct sockaddr *)&ipaddr))
511 					continue;
512 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
513 					goto bad;
514 				ipt->ipt_ptr += 4;
515 				break;
516 
517 			default:
518 				goto bad;
519 			}
520 			*(n_time *)sin = iptime();
521 			ipt->ipt_ptr += 4;
522 		}
523 	}
524 	return (0);
525 bad:
526 	icmp_error(ip, type, code);
527 	return (1);
528 }
529 
530 /*
531  * Strip out IP options, at higher
532  * level protocol in the kernel.
533  * Second argument is buffer to which options
534  * will be moved, and return value is their length.
535  */
536 ip_stripoptions(ip, mopt)
537 	struct ip *ip;
538 	struct mbuf *mopt;
539 {
540 	register int i;
541 	register struct mbuf *m;
542 	int olen;
543 
544 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
545 	m = dtom(ip);
546 	ip++;
547 	if (mopt) {
548 		mopt->m_len = olen;
549 		mopt->m_off = MMINOFF;
550 		bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
551 	}
552 	i = m->m_len - (sizeof (struct ip) + olen);
553 	bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
554 	m->m_len -= olen;
555 }
556 
557 u_char inetctlerrmap[] = {
558 	ECONNABORTED,	ECONNABORTED,	0,		0,
559 	0,		0,
560 	EHOSTDOWN,	EHOSTUNREACH,	ENETUNREACH,	EHOSTUNREACH,
561 	ECONNREFUSED,	ECONNREFUSED,	EMSGSIZE,	0,
562 	0,		0,		0,		0
563 };
564 
565 ip_ctlinput(cmd, arg)
566 	int cmd;
567 	caddr_t arg;
568 {
569 	struct in_addr *in;
570 	int tcp_abort(), udp_abort();
571 	extern struct inpcb tcb, udb;
572 
573 	if (cmd < 0 || cmd > PRC_NCMDS)
574 		return;
575 	if (inetctlerrmap[cmd] == 0)
576 		return;		/* XXX */
577 	if (cmd == PRC_IFDOWN)
578 		in = &((struct sockaddr_in *)arg)->sin_addr;
579 	else if (cmd == PRC_HOSTDEAD || cmd == PRC_HOSTUNREACH)
580 		in = (struct in_addr *)arg;
581 	else
582 		in = &((struct icmp *)arg)->icmp_ip.ip_dst;
583 /* THIS IS VERY QUESTIONABLE, SHOULD HIT ALL PROTOCOLS */
584 	in_pcbnotify(&tcb, in, (int)inetctlerrmap[cmd], tcp_abort);
585 	in_pcbnotify(&udb, in, (int)inetctlerrmap[cmd], udp_abort);
586 }
587 
588 int	ipprintfs = 0;
589 int	ipforwarding = 1;
590 /*
591  * Forward a packet.  If some error occurs return the sender
592  * and icmp packet.  Note we can't always generate a meaningful
593  * icmp message because icmp doesn't have a large enough repetoire
594  * of codes and types.
595  */
596 ip_forward(ip)
597 	register struct ip *ip;
598 {
599 	register int error, type, code;
600 	struct mbuf *mopt, *mcopy;
601 
602 	if (ipprintfs)
603 		printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
604 			ip->ip_dst, ip->ip_ttl);
605 	if (ipforwarding == 0) {
606 		/* can't tell difference between net and host */
607 		type = ICMP_UNREACH, code = ICMP_UNREACH_NET;
608 		goto sendicmp;
609 	}
610 	if (ip->ip_ttl < IPTTLDEC) {
611 		type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS;
612 		goto sendicmp;
613 	}
614 	ip->ip_ttl -= IPTTLDEC;
615 	mopt = m_get(M_DONTWAIT, MT_DATA);
616 	if (mopt == 0) {
617 		m_freem(dtom(ip));
618 		return;
619 	}
620 
621 	/*
622 	 * Save at most 64 bytes of the packet in case
623 	 * we need to generate an ICMP message to the src.
624 	 */
625 	mcopy = m_copy(dtom(ip), 0, imin(ip->ip_len, 64));
626 	ip_stripoptions(ip, mopt);
627 
628 	/* last 0 here means no directed broadcast */
629 	if ((error = ip_output(dtom(ip), mopt, (struct route *)0, 0)) == 0) {
630 		if (mcopy)
631 			m_freem(mcopy);
632 		return;
633 	}
634 	ip = mtod(mcopy, struct ip *);
635 	type = ICMP_UNREACH, code = 0;		/* need ``undefined'' */
636 	switch (error) {
637 
638 	case ENETUNREACH:
639 	case ENETDOWN:
640 		code = ICMP_UNREACH_NET;
641 		break;
642 
643 	case EMSGSIZE:
644 		code = ICMP_UNREACH_NEEDFRAG;
645 		break;
646 
647 	case EPERM:
648 		code = ICMP_UNREACH_PORT;
649 		break;
650 
651 	case ENOBUFS:
652 		type = ICMP_SOURCEQUENCH;
653 		break;
654 
655 	case EHOSTDOWN:
656 	case EHOSTUNREACH:
657 		code = ICMP_UNREACH_HOST;
658 		break;
659 	}
660 sendicmp:
661 	icmp_error(ip, type, code);
662 }
663