xref: /original-bsd/sys/netinet/ip_input.c (revision 9c59a687)
1 /*	ip_input.c	1.36	82/03/29	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/clock.h"
6 #include "../h/mbuf.h"
7 #include "../h/protosw.h"
8 #include "../h/socket.h"
9 #include "../net/in.h"
10 #include "../net/in_systm.h"
11 #include "../net/if.h"
12 #include "../net/ip.h"			/* belongs before in.h */
13 #include "../net/ip_var.h"
14 #include "../net/ip_icmp.h"
15 #include "../net/tcp.h"
16 #include "../net/route.h"
17 
18 #define	IPTTLDEC	5		/* doesn't belong here */
19 
20 u_char	ip_protox[IPPROTO_MAX];
21 int	ipqmaxlen = IFQ_MAXLEN;
22 struct	ifnet *ifinet;			/* first inet interface */
23 
24 /*
25  * IP initialization: fill in IP protocol switch table.
26  * All protocols not implemented in kernel go to raw IP protocol handler.
27  */
28 ip_init()
29 {
30 	register struct protosw *pr;
31 	register int i;
32 
33 COUNT(IP_INIT);
34 	pr = pffindproto(PF_INET, IPPROTO_RAW);
35 	if (pr == 0)
36 		panic("ip_init");
37 	for (i = 0; i < IPPROTO_MAX; i++)
38 		ip_protox[i] = pr - protosw;
39 	for (pr = protosw; pr <= protoswLAST; pr++)
40 		if (pr->pr_family == PF_INET &&
41 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
42 			ip_protox[pr->pr_protocol] = pr - protosw;
43 	ipq.next = ipq.prev = &ipq;
44 	ip_id = time & 0xffff;
45 	ipintrq.ifq_maxlen = ipqmaxlen;
46 	ifinet = if_ifwithaf(AF_INET);
47 }
48 
49 u_char	ipcksum = 1;
50 struct	ip *ip_reass();
51 int	ipforwarding = 0;
52 struct	sockaddr_in ipaddr = { AF_INET };
53 
54 /*
55  * Ip input routine.  Checksum and byte swap header.  If fragmented
56  * try to reassamble.  If complete and fragment queue exists, discard.
57  * Process options.  Pass to next level.
58  */
59 ipintr()
60 {
61 	register struct ip *ip;
62 	register struct mbuf *m;
63 	struct mbuf *m0, *mopt;
64 	register int i;
65 	register struct ipq *fp;
66 	int hlen, s;
67 
68 COUNT(IPINTR);
69 next:
70 	/*
71 	 * Get next datagram off input queue and get IP header
72 	 * in first mbuf.
73 	 */
74 	s = splimp();
75 	IF_DEQUEUE(&ipintrq, m);
76 	splx(s);
77 	if (m == 0)
78 		return;
79 	if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
80 	    (m = m_pullup(m, sizeof (struct ip))) == 0)
81 		return;
82 	ip = mtod(m, struct ip *);
83 	if ((hlen = ip->ip_hl << 2) > m->m_len) {
84 		if ((m = m_pullup(m, hlen)) == 0)
85 			return;
86 		ip = mtod(m, struct ip *);
87 	}
88 	if (ipcksum)
89 		if (ip->ip_sum = in_cksum(m, hlen)) {
90 			printf("ip_sum %x\n", ip->ip_sum);	/* XXX */
91 			ipstat.ips_badsum++;
92 			goto bad;
93 		}
94 
95 #if vax
96 	/*
97 	 * Convert fields to host representation.
98 	 */
99 	ip->ip_len = ntohs((u_short)ip->ip_len);
100 	ip->ip_id = ntohs(ip->ip_id);
101 	ip->ip_off = ntohs((u_short)ip->ip_off);
102 #endif
103 
104 	/*
105 	 * Check that the amount of data in the buffers
106 	 * is as at least much as the IP header would have us expect.
107 	 * Trim mbufs if longer than we expect.
108 	 * Drop packet if shorter than we expect.
109 	 */
110 	i = 0;
111 	m0 = m;
112 	for (; m != NULL; m = m->m_next) {
113 		if (m->m_free) panic("ipinput already free");
114 		i += m->m_len;
115 	}
116 	m = m0;
117 	if (i != ip->ip_len) {
118 		if (i < ip->ip_len) {
119 			ipstat.ips_tooshort++;
120 			goto bad;
121 		}
122 		m_adj(m, ip->ip_len - i);
123 	}
124 
125 	/*
126 	 * Process options and, if not destined for us,
127 	 * ship it on.
128 	 */
129 	if (hlen > sizeof (struct ip))
130 		ip_dooptions(ip);
131 
132 	/*
133 	 * Fast check on the first internet
134 	 * interface in the list.
135 	 */
136 	if (ifinet) {
137 		struct sockaddr_in *sin;
138 
139 		sin = (struct sockaddr_in *)&ifinet->if_addr;
140 		if (sin->sin_addr.s_addr == ip->ip_dst.s_addr)
141 			goto ours;
142 		if ((ifinet->if_flags & IFF_BROADCAST) &&
143 		    sin->sin_addr.s_addr == ip->ip_dst.s_addr)
144 			goto ours;
145 	}
146 	ipaddr.sin_addr = ip->ip_dst;
147 	if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) {
148 		register struct rtentry *rt;
149 
150 printf("forward: dst %x ttl %x\n", ip->ip_dst, ip->ip_ttl);
151 		if (ipforwarding == 0)
152 			goto bad;
153 		if (ip->ip_ttl < IPTTLDEC) {
154 			icmp_error(ip, ICMP_TIMXCEED, 0);
155 			goto next;
156 		}
157 		ip->ip_ttl -= IPTTLDEC;
158 		mopt = m_get(M_DONTWAIT);
159 		if (mopt == 0)
160 			goto bad;
161 		ip_stripoptions(ip, mopt);
162 
163 		/* last 0 here means no directed broadcast */
164 		(void) ip_output(m0, mopt, 0, 0);
165 		goto next;
166 	}
167 
168 ours:
169 	/*
170 	 * Look for queue of fragments
171 	 * of this datagram.
172 	 */
173 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
174 		if (ip->ip_id == fp->ipq_id &&
175 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
176 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
177 		    ip->ip_p == fp->ipq_p)
178 			goto found;
179 	fp = 0;
180 found:
181 
182 	/*
183 	 * Adjust ip_len to not reflect header,
184 	 * set ip_mff if more fragments are expected,
185 	 * convert offset of this to bytes.
186 	 */
187 	ip->ip_len -= hlen;
188 	((struct ipasfrag *)ip)->ipf_mff = 0;
189 	if (ip->ip_off & IP_MF)
190 		((struct ipasfrag *)ip)->ipf_mff = 1;
191 	ip->ip_off <<= 3;
192 
193 	/*
194 	 * If datagram marked as having more fragments
195 	 * or if this is not the first fragment,
196 	 * attempt reassembly; if it succeeds, proceed.
197 	 */
198 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
199 		ip = ip_reass((struct ipasfrag *)ip, fp);
200 		if (ip == 0)
201 			goto next;
202 		hlen = ip->ip_hl << 2;
203 		m = dtom(ip);
204 	} else
205 		if (fp)
206 			(void) ip_freef(fp);
207 
208 	/*
209 	 * Switch out to protocol's input routine.
210 	 */
211 	(*protosw[ip_protox[ip->ip_p]].pr_input)(m);
212 	goto next;
213 bad:
214 	m_freem(m);
215 	goto next;
216 }
217 
218 /*
219  * Take incoming datagram fragment and try to
220  * reassemble it into whole datagram.  If a chain for
221  * reassembly of this datagram already exists, then it
222  * is given as fp; otherwise have to make a chain.
223  */
224 struct ip *
225 ip_reass(ip, fp)
226 	register struct ipasfrag *ip;
227 	register struct ipq *fp;
228 {
229 	register struct mbuf *m = dtom(ip);
230 	register struct ipasfrag *q;
231 	struct mbuf *t;
232 	int hlen = ip->ip_hl << 2;
233 	int i, next;
234 COUNT(IP_REASS);
235 
236 	/*
237 	 * Presence of header sizes in mbufs
238 	 * would confuse code below.
239 	 */
240 	m->m_off += hlen;
241 	m->m_len -= hlen;
242 
243 	/*
244 	 * If first fragment to arrive, create a reassembly queue.
245 	 */
246 	if (fp == 0) {
247 		if ((t = m_get(M_WAIT)) == NULL)
248 			goto dropfrag;
249 		t->m_off = MMINOFF;
250 		fp = mtod(t, struct ipq *);
251 		insque(fp, &ipq);
252 		fp->ipq_ttl = IPFRAGTTL;
253 		fp->ipq_p = ip->ip_p;
254 		fp->ipq_id = ip->ip_id;
255 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
256 		fp->ipq_src = ((struct ip *)ip)->ip_src;
257 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
258 		q = (struct ipasfrag *)fp;
259 		goto insert;
260 	}
261 
262 	/*
263 	 * Find a segment which begins after this one does.
264 	 */
265 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
266 		if (q->ip_off > ip->ip_off)
267 			break;
268 
269 	/*
270 	 * If there is a preceding segment, it may provide some of
271 	 * our data already.  If so, drop the data from the incoming
272 	 * segment.  If it provides all of our data, drop us.
273 	 */
274 	if (q->ipf_prev != (struct ipasfrag *)fp) {
275 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
276 		if (i > 0) {
277 			if (i >= ip->ip_len)
278 				goto dropfrag;
279 			m_adj(dtom(ip), i);
280 			ip->ip_off += i;
281 			ip->ip_len -= i;
282 		}
283 	}
284 
285 	/*
286 	 * While we overlap succeeding segments trim them or,
287 	 * if they are completely covered, dequeue them.
288 	 */
289 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
290 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
291 		if (i < q->ip_len) {
292 			q->ip_len -= i;
293 			q->ip_off += i;
294 			m_adj(dtom(q), i);
295 			break;
296 		}
297 		q = q->ipf_next;
298 		m_freem(dtom(q->ipf_prev));
299 		ip_deq(q->ipf_prev);
300 	}
301 
302 insert:
303 	/*
304 	 * Stick new segment in its place;
305 	 * check for complete reassembly.
306 	 */
307 	ip_enq(ip, q->ipf_prev);
308 	next = 0;
309 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
310 		if (q->ip_off != next)
311 			return (0);
312 		next += q->ip_len;
313 	}
314 	if (q->ipf_prev->ipf_mff)
315 		return (0);
316 
317 	/*
318 	 * Reassembly is complete; concatenate fragments.
319 	 */
320 	q = fp->ipq_next;
321 	m = dtom(q);
322 	t = m->m_next;
323 	m->m_next = 0;
324 	m_cat(m, t);
325 	q = q->ipf_next;
326 	while (q != (struct ipasfrag *)fp) {
327 		t = dtom(q);
328 		q = q->ipf_next;
329 		m_cat(m, t);
330 	}
331 
332 	/*
333 	 * Create header for new ip packet by
334 	 * modifying header of first packet;
335 	 * dequeue and discard fragment reassembly header.
336 	 * Make header visible.
337 	 */
338 	ip = fp->ipq_next;
339 	ip->ip_len = next;
340 	((struct ip *)ip)->ip_src = fp->ipq_src;
341 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
342 	remque(fp);
343 	(void) m_free(dtom(fp));
344 	m = dtom(ip);
345 	m->m_len += sizeof (struct ipasfrag);
346 	m->m_off -= sizeof (struct ipasfrag);
347 	return ((struct ip *)ip);
348 
349 dropfrag:
350 	m_freem(m);
351 	return (0);
352 }
353 
354 /*
355  * Free a fragment reassembly header and all
356  * associated datagrams.
357  */
358 struct ipq *
359 ip_freef(fp)
360 	struct ipq *fp;
361 {
362 	register struct ipasfrag *q;
363 	struct mbuf *m;
364 COUNT(IP_FREEF);
365 
366 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
367 		m_freem(dtom(q));
368 	m = dtom(fp);
369 	fp = fp->next;
370 	remque(fp->prev);
371 	(void) m_free(m);
372 	return (fp);
373 }
374 
375 /*
376  * Put an ip fragment on a reassembly chain.
377  * Like insque, but pointers in middle of structure.
378  */
379 ip_enq(p, prev)
380 	register struct ipasfrag *p, *prev;
381 {
382 
383 COUNT(IP_ENQ);
384 	p->ipf_prev = prev;
385 	p->ipf_next = prev->ipf_next;
386 	prev->ipf_next->ipf_prev = p;
387 	prev->ipf_next = p;
388 }
389 
390 /*
391  * To ip_enq as remque is to insque.
392  */
393 ip_deq(p)
394 	register struct ipasfrag *p;
395 {
396 
397 COUNT(IP_DEQ);
398 	p->ipf_prev->ipf_next = p->ipf_next;
399 	p->ipf_next->ipf_prev = p->ipf_prev;
400 }
401 
402 /*
403  * IP timer processing;
404  * if a timer expires on a reassembly
405  * queue, discard it.
406  */
407 ip_slowtimo()
408 {
409 	register struct ipq *fp;
410 	int s = splnet();
411 
412 COUNT(IP_SLOWTIMO);
413 	fp = ipq.next;
414 	if (fp == 0) {
415 		splx(s);
416 		return;
417 	}
418 	while (fp != &ipq)
419 		if (--fp->ipq_ttl == 0)
420 			fp = ip_freef(fp);
421 		else
422 			fp = fp->next;
423 	splx(s);
424 }
425 
426 /*
427  * Drain off all datagram fragments.
428  */
429 ip_drain()
430 {
431 
432 COUNT(IP_DRAIN);
433 	while (ipq.next != &ipq)
434 		(void) ip_freef(ipq.next);
435 }
436 
437 /*
438  * Do option processing on a datagram,
439  * possibly discarding it if bad options
440  * are encountered.
441  */
442 ip_dooptions(ip)
443 	struct ip *ip;
444 {
445 	register u_char *cp;
446 	int opt, optlen, cnt;
447 	struct in_addr *sin;
448 	register struct ip_timestamp *ipt;
449 	register struct ifnet *ifp;
450 	struct in_addr t;
451 
452 COUNT(IP_DOOPTIONS);
453 	cp = (u_char *)(ip + 1);
454 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
455 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
456 		opt = cp[0];
457 		if (opt == IPOPT_EOL)
458 			break;
459 		if (opt == IPOPT_NOP)
460 			optlen = 1;
461 		else
462 			optlen = cp[1];
463 		switch (opt) {
464 
465 		default:
466 			break;
467 
468 		/*
469 		 * Source routing with record.
470 		 * Find interface with current destination address.
471 		 * If none on this machine then drop if strictly routed,
472 		 * or do nothing if loosely routed.
473 		 * Record interface address and bring up next address
474 		 * component.  If strictly routed make sure next
475 		 * address on directly accessible net.
476 		 */
477 		case IPOPT_LSRR:
478 			if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
479 				break;
480 			sin = (struct in_addr *)(cp + cp[2]);
481 			ipaddr.sin_addr = *sin;
482 			ifp = if_ifwithaddr((struct sockaddr *)&ipaddr);
483 			if (ifp == 0) {
484 				if (opt == IPOPT_SSRR)
485 					goto bad;
486 				break;
487 			}
488 			t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
489 			cp[2] += 4;
490 			if (cp[2] > optlen - (sizeof (long) - 1))
491 				break;
492 			ip->ip_dst = sin[1];
493 			if (opt == IPOPT_SSRR &&
494 			    if_ifonnetof(ip->ip_dst.s_net) == 0)
495 				goto bad;
496 			break;
497 
498 		case IPOPT_TS:
499 			ipt = (struct ip_timestamp *)cp;
500 			if (ipt->ipt_len < 5)
501 				goto bad;
502 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
503 				if (++ipt->ipt_oflw == 0)
504 					goto bad;
505 				break;
506 			}
507 			sin = (struct in_addr *)(cp+cp[2]);
508 			switch (ipt->ipt_flg) {
509 
510 			case IPOPT_TS_TSONLY:
511 				break;
512 
513 			case IPOPT_TS_TSANDADDR:
514 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
515 					goto bad;
516 				if (ifinet == 0)
517 					goto bad;	/* ??? */
518 				*sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr;
519 				break;
520 
521 			case IPOPT_TS_PRESPEC:
522 				ipaddr.sin_addr = *sin;
523 				if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0)
524 					continue;
525 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
526 					goto bad;
527 				ipt->ipt_ptr += 4;
528 				break;
529 
530 			default:
531 				goto bad;
532 			}
533 			*(n_time *)sin = iptime();
534 			ipt->ipt_ptr += 4;
535 		}
536 	}
537 	return;
538 bad:
539 	/* SHOULD FORCE ICMP MESSAGE */
540 	return;
541 }
542 
543 /*
544  * Strip out IP options, at higher
545  * level protocol in the kernel.
546  * Second argument is buffer to which options
547  * will be moved, and return value is their length.
548  */
549 ip_stripoptions(ip, mopt)
550 	struct ip *ip;
551 	struct mbuf *mopt;
552 {
553 	register int i;
554 	register struct mbuf *m;
555 	int olen;
556 COUNT(IP_STRIPOPTIONS);
557 
558 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
559 	m = dtom(ip);
560 	ip++;
561 	if (mopt) {
562 		mopt->m_len = olen;
563 		mopt->m_off = MMINOFF;
564 		bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
565 	}
566 	i = m->m_len - (sizeof (struct ip) + olen);
567 	bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
568 	m->m_len -= olen;
569 }
570