xref: /original-bsd/sys/netinet/ip_input.c (revision 145b48a8)
1 /*	ip_input.c	1.35	82/03/28	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/clock.h"
6 #include "../h/mbuf.h"
7 #include "../h/protosw.h"
8 #include "../h/socket.h"
9 #include "../net/in.h"
10 #include "../net/in_systm.h"
11 #include "../net/if.h"
12 #include "../net/ip.h"			/* belongs before in.h */
13 #include "../net/ip_var.h"
14 #include "../net/ip_icmp.h"
15 #include "../net/tcp.h"
16 #include "../net/route.h"
17 
18 #define	IPTTLDEC	5		/* doesn't belong here */
19 
20 u_char	ip_protox[IPPROTO_MAX];
21 int	ipqmaxlen = IFQ_MAXLEN;
22 struct	ifnet *ifinet;			/* first inet interface */
23 
24 /*
25  * IP initialization: fill in IP protocol switch table.
26  * All protocols not implemented in kernel go to raw IP protocol handler.
27  */
28 ip_init()
29 {
30 	register struct protosw *pr;
31 	register int i;
32 
33 COUNT(IP_INIT);
34 	pr = pffindproto(PF_INET, IPPROTO_RAW);
35 	if (pr == 0)
36 		panic("ip_init");
37 	for (i = 0; i < IPPROTO_MAX; i++)
38 		ip_protox[i] = pr - protosw;
39 	for (pr = protosw; pr <= protoswLAST; pr++)
40 		if (pr->pr_family == PF_INET &&
41 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
42 			ip_protox[pr->pr_protocol] = pr - protosw;
43 	ipq.next = ipq.prev = &ipq;
44 	ip_id = time & 0xffff;
45 	ipintrq.ifq_maxlen = ipqmaxlen;
46 	ifinet = if_ifwithaf(AF_INET);
47 }
48 
49 u_char	ipcksum = 1;
50 struct	ip *ip_reass();
51 int	ipforwarding = 0;
52 struct	sockaddr_in ipaddr = { AF_INET };
53 
54 /*
55  * Ip input routine.  Checksum and byte swap header.  If fragmented
56  * try to reassamble.  If complete and fragment queue exists, discard.
57  * Process options.  Pass to next level.
58  */
59 ipintr()
60 {
61 	register struct ip *ip;
62 	register struct mbuf *m;
63 	struct mbuf *m0, *mopt;
64 	register int i;
65 	register struct ipq *fp;
66 	int hlen, s;
67 
68 COUNT(IPINTR);
69 next:
70 	/*
71 	 * Get next datagram off input queue and get IP header
72 	 * in first mbuf.
73 	 */
74 	s = splimp();
75 	IF_DEQUEUE(&ipintrq, m);
76 	splx(s);
77 	if (m == 0)
78 		return;
79 	if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
80 	    (m = m_pullup(m, sizeof (struct ip))) == 0)
81 		return;
82 	ip = mtod(m, struct ip *);
83 	if ((hlen = ip->ip_hl << 2) > m->m_len) {
84 		if ((m = m_pullup(m, hlen)) == 0)
85 			return;
86 		ip = mtod(m, struct ip *);
87 	}
88 	if (ipcksum)
89 		if (ip->ip_sum = in_cksum(m, hlen)) {
90 			printf("ip_sum %x\n", ip->ip_sum);	/* XXX */
91 			ipstat.ips_badsum++;
92 			goto bad;
93 		}
94 
95 #if vax
96 	/*
97 	 * Convert fields to host representation.
98 	 */
99 	ip->ip_len = ntohs((u_short)ip->ip_len);
100 	ip->ip_id = ntohs(ip->ip_id);
101 	ip->ip_off = ntohs((u_short)ip->ip_off);
102 #endif
103 
104 	/*
105 	 * Check that the amount of data in the buffers
106 	 * is as at least much as the IP header would have us expect.
107 	 * Trim mbufs if longer than we expect.
108 	 * Drop packet if shorter than we expect.
109 	 */
110 	i = 0;
111 	m0 = m;
112 	for (; m != NULL; m = m->m_next) {
113 		if (m->m_free) panic("ipinput already free");
114 		i += m->m_len;
115 	}
116 	m = m0;
117 	if (i != ip->ip_len) {
118 		if (i < ip->ip_len) {
119 			ipstat.ips_tooshort++;
120 			goto bad;
121 		}
122 		m_adj(m, ip->ip_len - i);
123 	}
124 
125 	/*
126 	 * Process options and, if not destined for us,
127 	 * ship it on.
128 	 */
129 	if (hlen > sizeof (struct ip))
130 		ip_dooptions(ip);
131 
132 	/*
133 	 * Fast check on the first interface in the list.
134 	 */
135 	if (ifinet) {
136 		struct sockaddr_in *sin;
137 
138 		sin = (struct sockaddr_in *)&ifinet->if_addr;
139 		if (sin->sin_addr.s_addr == ip->ip_dst.s_addr)
140 			goto ours;
141 	}
142 	ipaddr.sin_addr = ip->ip_dst;
143 	if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) {
144 		register struct rtentry *rt;
145 
146 printf("forward: dst %x ttl %x\n", ip->ip_dst, ip->ip_ttl);
147 		if (ipforwarding == 0)
148 			goto bad;
149 		if (ip->ip_ttl < IPTTLDEC) {
150 			icmp_error(ip, ICMP_TIMXCEED, 0);
151 			goto next;
152 		}
153 		ip->ip_ttl -= IPTTLDEC;
154 		mopt = m_get(M_DONTWAIT);
155 		if (mopt == 0)
156 			goto bad;
157 		ip_stripoptions(ip, mopt);
158 
159 		/*
160 		 * Check the routing table in case we should
161 		 * munge the src address before it gets passed on.
162 		 */
163 		ipaddr.sin_addr = ip->ip_src;
164 		rt = reroute(&ipaddr);
165 		if (rt && (rt->rt_flags & RTF_MUNGE)) {
166 			struct sockaddr_in *sin;
167 
168 			sin = (struct sockaddr_in *)&rt->rt_dst;
169 			ip->ip_src = sin->sin_addr;
170 		}
171 		/* 0 here means no directed broadcast */
172 		(void) ip_output(m0, mopt, 0, 0);
173 		goto next;
174 	}
175 
176 ours:
177 	/*
178 	 * Look for queue of fragments
179 	 * of this datagram.
180 	 */
181 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
182 		if (ip->ip_id == fp->ipq_id &&
183 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
184 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
185 		    ip->ip_p == fp->ipq_p)
186 			goto found;
187 	fp = 0;
188 found:
189 
190 	/*
191 	 * Adjust ip_len to not reflect header,
192 	 * set ip_mff if more fragments are expected,
193 	 * convert offset of this to bytes.
194 	 */
195 	ip->ip_len -= hlen;
196 	((struct ipasfrag *)ip)->ipf_mff = 0;
197 	if (ip->ip_off & IP_MF)
198 		((struct ipasfrag *)ip)->ipf_mff = 1;
199 	ip->ip_off <<= 3;
200 
201 	/*
202 	 * If datagram marked as having more fragments
203 	 * or if this is not the first fragment,
204 	 * attempt reassembly; if it succeeds, proceed.
205 	 */
206 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
207 		ip = ip_reass((struct ipasfrag *)ip, fp);
208 		if (ip == 0)
209 			goto next;
210 		hlen = ip->ip_hl << 2;
211 		m = dtom(ip);
212 	} else
213 		if (fp)
214 			(void) ip_freef(fp);
215 
216 	/*
217 	 * Switch out to protocol's input routine.
218 	 */
219 	(*protosw[ip_protox[ip->ip_p]].pr_input)(m);
220 	goto next;
221 bad:
222 	m_freem(m);
223 	goto next;
224 }
225 
226 /*
227  * Take incoming datagram fragment and try to
228  * reassemble it into whole datagram.  If a chain for
229  * reassembly of this datagram already exists, then it
230  * is given as fp; otherwise have to make a chain.
231  */
232 struct ip *
233 ip_reass(ip, fp)
234 	register struct ipasfrag *ip;
235 	register struct ipq *fp;
236 {
237 	register struct mbuf *m = dtom(ip);
238 	register struct ipasfrag *q;
239 	struct mbuf *t;
240 	int hlen = ip->ip_hl << 2;
241 	int i, next;
242 COUNT(IP_REASS);
243 
244 	/*
245 	 * Presence of header sizes in mbufs
246 	 * would confuse code below.
247 	 */
248 	m->m_off += hlen;
249 	m->m_len -= hlen;
250 
251 	/*
252 	 * If first fragment to arrive, create a reassembly queue.
253 	 */
254 	if (fp == 0) {
255 		if ((t = m_get(M_WAIT)) == NULL)
256 			goto dropfrag;
257 		t->m_off = MMINOFF;
258 		fp = mtod(t, struct ipq *);
259 		insque(fp, &ipq);
260 		fp->ipq_ttl = IPFRAGTTL;
261 		fp->ipq_p = ip->ip_p;
262 		fp->ipq_id = ip->ip_id;
263 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
264 		fp->ipq_src = ((struct ip *)ip)->ip_src;
265 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
266 		q = (struct ipasfrag *)fp;
267 		goto insert;
268 	}
269 
270 	/*
271 	 * Find a segment which begins after this one does.
272 	 */
273 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
274 		if (q->ip_off > ip->ip_off)
275 			break;
276 
277 	/*
278 	 * If there is a preceding segment, it may provide some of
279 	 * our data already.  If so, drop the data from the incoming
280 	 * segment.  If it provides all of our data, drop us.
281 	 */
282 	if (q->ipf_prev != (struct ipasfrag *)fp) {
283 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
284 		if (i > 0) {
285 			if (i >= ip->ip_len)
286 				goto dropfrag;
287 			m_adj(dtom(ip), i);
288 			ip->ip_off += i;
289 			ip->ip_len -= i;
290 		}
291 	}
292 
293 	/*
294 	 * While we overlap succeeding segments trim them or,
295 	 * if they are completely covered, dequeue them.
296 	 */
297 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
298 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
299 		if (i < q->ip_len) {
300 			q->ip_len -= i;
301 			q->ip_off += i;
302 			m_adj(dtom(q), i);
303 			break;
304 		}
305 		q = q->ipf_next;
306 		m_freem(dtom(q->ipf_prev));
307 		ip_deq(q->ipf_prev);
308 	}
309 
310 insert:
311 	/*
312 	 * Stick new segment in its place;
313 	 * check for complete reassembly.
314 	 */
315 	ip_enq(ip, q->ipf_prev);
316 	next = 0;
317 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
318 		if (q->ip_off != next)
319 			return (0);
320 		next += q->ip_len;
321 	}
322 	if (q->ipf_prev->ipf_mff)
323 		return (0);
324 
325 	/*
326 	 * Reassembly is complete; concatenate fragments.
327 	 */
328 	q = fp->ipq_next;
329 	m = dtom(q);
330 	t = m->m_next;
331 	m->m_next = 0;
332 	m_cat(m, t);
333 	q = q->ipf_next;
334 	while (q != (struct ipasfrag *)fp) {
335 		t = dtom(q);
336 		q = q->ipf_next;
337 		m_cat(m, t);
338 	}
339 
340 	/*
341 	 * Create header for new ip packet by
342 	 * modifying header of first packet;
343 	 * dequeue and discard fragment reassembly header.
344 	 * Make header visible.
345 	 */
346 	ip = fp->ipq_next;
347 	ip->ip_len = next;
348 	((struct ip *)ip)->ip_src = fp->ipq_src;
349 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
350 	remque(fp);
351 	(void) m_free(dtom(fp));
352 	m = dtom(ip);
353 	m->m_len += sizeof (struct ipasfrag);
354 	m->m_off -= sizeof (struct ipasfrag);
355 	return ((struct ip *)ip);
356 
357 dropfrag:
358 	m_freem(m);
359 	return (0);
360 }
361 
362 /*
363  * Free a fragment reassembly header and all
364  * associated datagrams.
365  */
366 struct ipq *
367 ip_freef(fp)
368 	struct ipq *fp;
369 {
370 	register struct ipasfrag *q;
371 	struct mbuf *m;
372 COUNT(IP_FREEF);
373 
374 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
375 		m_freem(dtom(q));
376 	m = dtom(fp);
377 	fp = fp->next;
378 	remque(fp->prev);
379 	(void) m_free(m);
380 	return (fp);
381 }
382 
383 /*
384  * Put an ip fragment on a reassembly chain.
385  * Like insque, but pointers in middle of structure.
386  */
387 ip_enq(p, prev)
388 	register struct ipasfrag *p, *prev;
389 {
390 
391 COUNT(IP_ENQ);
392 	p->ipf_prev = prev;
393 	p->ipf_next = prev->ipf_next;
394 	prev->ipf_next->ipf_prev = p;
395 	prev->ipf_next = p;
396 }
397 
398 /*
399  * To ip_enq as remque is to insque.
400  */
401 ip_deq(p)
402 	register struct ipasfrag *p;
403 {
404 
405 COUNT(IP_DEQ);
406 	p->ipf_prev->ipf_next = p->ipf_next;
407 	p->ipf_next->ipf_prev = p->ipf_prev;
408 }
409 
410 /*
411  * IP timer processing;
412  * if a timer expires on a reassembly
413  * queue, discard it.
414  */
415 ip_slowtimo()
416 {
417 	register struct ipq *fp;
418 	int s = splnet();
419 
420 COUNT(IP_SLOWTIMO);
421 	fp = ipq.next;
422 	if (fp == 0) {
423 		splx(s);
424 		return;
425 	}
426 	while (fp != &ipq)
427 		if (--fp->ipq_ttl == 0)
428 			fp = ip_freef(fp);
429 		else
430 			fp = fp->next;
431 	splx(s);
432 }
433 
434 /*
435  * Drain off all datagram fragments.
436  */
437 ip_drain()
438 {
439 
440 COUNT(IP_DRAIN);
441 	while (ipq.next != &ipq)
442 		(void) ip_freef(ipq.next);
443 }
444 
445 /*
446  * Do option processing on a datagram,
447  * possibly discarding it if bad options
448  * are encountered.
449  */
450 ip_dooptions(ip)
451 	struct ip *ip;
452 {
453 	register u_char *cp;
454 	int opt, optlen, cnt;
455 	struct in_addr *sin;
456 	register struct ip_timestamp *ipt;
457 	register struct ifnet *ifp;
458 	struct in_addr t;
459 
460 COUNT(IP_DOOPTIONS);
461 	cp = (u_char *)(ip + 1);
462 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
463 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
464 		opt = cp[0];
465 		if (opt == IPOPT_EOL)
466 			break;
467 		if (opt == IPOPT_NOP)
468 			optlen = 1;
469 		else
470 			optlen = cp[1];
471 		switch (opt) {
472 
473 		default:
474 			break;
475 
476 		/*
477 		 * Source routing with record.
478 		 * Find interface with current destination address.
479 		 * If none on this machine then drop if strictly routed,
480 		 * or do nothing if loosely routed.
481 		 * Record interface address and bring up next address
482 		 * component.  If strictly routed make sure next
483 		 * address on directly accessible net.
484 		 */
485 		case IPOPT_LSRR:
486 			if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
487 				break;
488 			sin = (struct in_addr *)(cp + cp[2]);
489 			ipaddr.sin_addr = *sin;
490 			ifp = if_ifwithaddr((struct sockaddr *)&ipaddr);
491 			if (ifp == 0) {
492 				if (opt == IPOPT_SSRR)
493 					goto bad;
494 				break;
495 			}
496 			t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
497 			cp[2] += 4;
498 			if (cp[2] > optlen - (sizeof (long) - 1))
499 				break;
500 			ip->ip_dst = sin[1];
501 			if (opt == IPOPT_SSRR &&
502 			    if_ifonnetof(ip->ip_dst.s_net) == 0)
503 				goto bad;
504 			break;
505 
506 		case IPOPT_TS:
507 			ipt = (struct ip_timestamp *)cp;
508 			if (ipt->ipt_len < 5)
509 				goto bad;
510 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
511 				if (++ipt->ipt_oflw == 0)
512 					goto bad;
513 				break;
514 			}
515 			sin = (struct in_addr *)(cp+cp[2]);
516 			switch (ipt->ipt_flg) {
517 
518 			case IPOPT_TS_TSONLY:
519 				break;
520 
521 			case IPOPT_TS_TSANDADDR:
522 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
523 					goto bad;
524 				if (ifinet == 0)
525 					goto bad;	/* ??? */
526 				*sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr;
527 				break;
528 
529 			case IPOPT_TS_PRESPEC:
530 				ipaddr.sin_addr = *sin;
531 				if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0)
532 					continue;
533 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
534 					goto bad;
535 				ipt->ipt_ptr += 4;
536 				break;
537 
538 			default:
539 				goto bad;
540 			}
541 			*(n_time *)sin = iptime();
542 			ipt->ipt_ptr += 4;
543 		}
544 	}
545 	return;
546 bad:
547 	/* SHOULD FORCE ICMP MESSAGE */
548 	return;
549 }
550 
551 /*
552  * Strip out IP options, at higher
553  * level protocol in the kernel.
554  * Second argument is buffer to which options
555  * will be moved, and return value is their length.
556  */
557 ip_stripoptions(ip, mopt)
558 	struct ip *ip;
559 	struct mbuf *mopt;
560 {
561 	register int i;
562 	register struct mbuf *m;
563 	int olen;
564 COUNT(IP_STRIPOPTIONS);
565 
566 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
567 	m = dtom(ip);
568 	ip++;
569 	if (mopt) {
570 		mopt->m_len = olen;
571 		mopt->m_off = MMINOFF;
572 		bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
573 	}
574 	i = m->m_len - (sizeof (struct ip) + olen);
575 	bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
576 	m->m_len -= olen;
577 }
578