xref: /original-bsd/sys/netinet/ip_input.c (revision b3b53e97)
1 /*	ip_input.c	1.38	82/03/31	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/clock.h"
6 #include "../h/mbuf.h"
7 #include "../h/protosw.h"
8 #include "../h/socket.h"
9 #include "../net/in.h"
10 #include "../net/in_systm.h"
11 #include "../net/if.h"
12 #include "../net/ip.h"			/* belongs before in.h */
13 #include "../net/ip_var.h"
14 #include "../net/ip_icmp.h"
15 #include "../net/tcp.h"
16 
17 u_char	ip_protox[IPPROTO_MAX];
18 int	ipqmaxlen = IFQ_MAXLEN;
19 struct	ifnet *ifinet;			/* first inet interface */
20 
21 /*
22  * IP initialization: fill in IP protocol switch table.
23  * All protocols not implemented in kernel go to raw IP protocol handler.
24  */
25 ip_init()
26 {
27 	register struct protosw *pr;
28 	register int i;
29 
30 COUNT(IP_INIT);
31 	pr = pffindproto(PF_INET, IPPROTO_RAW);
32 	if (pr == 0)
33 		panic("ip_init");
34 	for (i = 0; i < IPPROTO_MAX; i++)
35 		ip_protox[i] = pr - protosw;
36 	for (pr = protosw; pr <= protoswLAST; pr++)
37 		if (pr->pr_family == PF_INET &&
38 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
39 			ip_protox[pr->pr_protocol] = pr - protosw;
40 	ipq.next = ipq.prev = &ipq;
41 	ip_id = time & 0xffff;
42 	ipintrq.ifq_maxlen = ipqmaxlen;
43 	ifinet = if_ifwithaf(AF_INET);
44 }
45 
46 u_char	ipcksum = 1;
47 struct	ip *ip_reass();
48 int	ipforwarding = 1;
49 int	ipprintfs = 0;
50 struct	sockaddr_in ipaddr = { AF_INET };
51 
52 /*
53  * Ip input routine.  Checksum and byte swap header.  If fragmented
54  * try to reassamble.  If complete and fragment queue exists, discard.
55  * Process options.  Pass to next level.
56  */
57 ipintr()
58 {
59 	register struct ip *ip;
60 	register struct mbuf *m;
61 	struct mbuf *m0, *mopt;
62 	register int i;
63 	register struct ipq *fp;
64 	int hlen, s;
65 
66 COUNT(IPINTR);
67 next:
68 	/*
69 	 * Get next datagram off input queue and get IP header
70 	 * in first mbuf.
71 	 */
72 	s = splimp();
73 	IF_DEQUEUE(&ipintrq, m);
74 	splx(s);
75 	if (m == 0)
76 		return;
77 	if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
78 	    (m = m_pullup(m, sizeof (struct ip))) == 0)
79 		return;
80 	ip = mtod(m, struct ip *);
81 	if ((hlen = ip->ip_hl << 2) > m->m_len) {
82 		if ((m = m_pullup(m, hlen)) == 0)
83 			return;
84 		ip = mtod(m, struct ip *);
85 	}
86 	if (ipcksum)
87 		if (ip->ip_sum = in_cksum(m, hlen)) {
88 			printf("ip_sum %x\n", ip->ip_sum);	/* XXX */
89 			ipstat.ips_badsum++;
90 			goto bad;
91 		}
92 
93 #if vax
94 	/*
95 	 * Convert fields to host representation.
96 	 */
97 	ip->ip_len = ntohs((u_short)ip->ip_len);
98 	ip->ip_id = ntohs(ip->ip_id);
99 	ip->ip_off = ntohs((u_short)ip->ip_off);
100 #endif
101 
102 	/*
103 	 * Check that the amount of data in the buffers
104 	 * is as at least much as the IP header would have us expect.
105 	 * Trim mbufs if longer than we expect.
106 	 * Drop packet if shorter than we expect.
107 	 */
108 	i = 0;
109 	m0 = m;
110 	for (; m != NULL; m = m->m_next) {
111 		if (m->m_free) panic("ipinput already free");
112 		i += m->m_len;
113 	}
114 	m = m0;
115 	if (i != ip->ip_len) {
116 		if (i < ip->ip_len) {
117 			ipstat.ips_tooshort++;
118 			goto bad;
119 		}
120 		m_adj(m, ip->ip_len - i);
121 	}
122 
123 	/*
124 	 * Process options and, if not destined for us,
125 	 * ship it on.
126 	 */
127 	if (hlen > sizeof (struct ip))
128 		ip_dooptions(ip);
129 
130 	/*
131 	 * Fast check on the first internet
132 	 * interface in the list.
133 	 */
134 	if (ifinet) {
135 		struct sockaddr_in *sin;
136 
137 		sin = (struct sockaddr_in *)&ifinet->if_addr;
138 		if (sin->sin_addr.s_addr == ip->ip_dst.s_addr)
139 			goto ours;
140 		if ((ifinet->if_flags & IFF_BROADCAST) &&
141 		    sin->sin_addr.s_addr == ip->ip_dst.s_addr)
142 			goto ours;
143 	}
144 	ipaddr.sin_addr = ip->ip_dst;
145 	if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) {
146 		if (ipprintfs)
147 			printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
148 				ip->ip_dst, ip->ip_ttl);
149 		if (ipforwarding == 0)
150 			goto bad;
151 		if (ip->ip_ttl < IPTTLDEC) {
152 			icmp_error(ip, ICMP_TIMXCEED, 0);
153 			goto next;
154 		}
155 		ip->ip_ttl -= IPTTLDEC;
156 		mopt = m_get(M_DONTWAIT);
157 		if (mopt == 0)
158 			goto bad;
159 		ip_stripoptions(ip, mopt);
160 
161 		/* last 0 here means no directed broadcast */
162 		(void) ip_output(m0, mopt, 0, 0);
163 		goto next;
164 	}
165 
166 ours:
167 	/*
168 	 * Look for queue of fragments
169 	 * of this datagram.
170 	 */
171 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
172 		if (ip->ip_id == fp->ipq_id &&
173 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
174 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
175 		    ip->ip_p == fp->ipq_p)
176 			goto found;
177 	fp = 0;
178 found:
179 
180 	/*
181 	 * Adjust ip_len to not reflect header,
182 	 * set ip_mff if more fragments are expected,
183 	 * convert offset of this to bytes.
184 	 */
185 	ip->ip_len -= hlen;
186 	((struct ipasfrag *)ip)->ipf_mff = 0;
187 	if (ip->ip_off & IP_MF)
188 		((struct ipasfrag *)ip)->ipf_mff = 1;
189 	ip->ip_off <<= 3;
190 
191 	/*
192 	 * If datagram marked as having more fragments
193 	 * or if this is not the first fragment,
194 	 * attempt reassembly; if it succeeds, proceed.
195 	 */
196 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
197 		ip = ip_reass((struct ipasfrag *)ip, fp);
198 		if (ip == 0)
199 			goto next;
200 		hlen = ip->ip_hl << 2;
201 		m = dtom(ip);
202 	} else
203 		if (fp)
204 			(void) ip_freef(fp);
205 
206 	/*
207 	 * Switch out to protocol's input routine.
208 	 */
209 	(*protosw[ip_protox[ip->ip_p]].pr_input)(m);
210 	goto next;
211 bad:
212 	m_freem(m);
213 	goto next;
214 }
215 
216 /*
217  * Take incoming datagram fragment and try to
218  * reassemble it into whole datagram.  If a chain for
219  * reassembly of this datagram already exists, then it
220  * is given as fp; otherwise have to make a chain.
221  */
222 struct ip *
223 ip_reass(ip, fp)
224 	register struct ipasfrag *ip;
225 	register struct ipq *fp;
226 {
227 	register struct mbuf *m = dtom(ip);
228 	register struct ipasfrag *q;
229 	struct mbuf *t;
230 	int hlen = ip->ip_hl << 2;
231 	int i, next;
232 COUNT(IP_REASS);
233 
234 	/*
235 	 * Presence of header sizes in mbufs
236 	 * would confuse code below.
237 	 */
238 	m->m_off += hlen;
239 	m->m_len -= hlen;
240 
241 	/*
242 	 * If first fragment to arrive, create a reassembly queue.
243 	 */
244 	if (fp == 0) {
245 		if ((t = m_get(M_WAIT)) == NULL)
246 			goto dropfrag;
247 		t->m_off = MMINOFF;
248 		fp = mtod(t, struct ipq *);
249 		insque(fp, &ipq);
250 		fp->ipq_ttl = IPFRAGTTL;
251 		fp->ipq_p = ip->ip_p;
252 		fp->ipq_id = ip->ip_id;
253 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
254 		fp->ipq_src = ((struct ip *)ip)->ip_src;
255 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
256 		q = (struct ipasfrag *)fp;
257 		goto insert;
258 	}
259 
260 	/*
261 	 * Find a segment which begins after this one does.
262 	 */
263 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
264 		if (q->ip_off > ip->ip_off)
265 			break;
266 
267 	/*
268 	 * If there is a preceding segment, it may provide some of
269 	 * our data already.  If so, drop the data from the incoming
270 	 * segment.  If it provides all of our data, drop us.
271 	 */
272 	if (q->ipf_prev != (struct ipasfrag *)fp) {
273 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
274 		if (i > 0) {
275 			if (i >= ip->ip_len)
276 				goto dropfrag;
277 			m_adj(dtom(ip), i);
278 			ip->ip_off += i;
279 			ip->ip_len -= i;
280 		}
281 	}
282 
283 	/*
284 	 * While we overlap succeeding segments trim them or,
285 	 * if they are completely covered, dequeue them.
286 	 */
287 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
288 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
289 		if (i < q->ip_len) {
290 			q->ip_len -= i;
291 			q->ip_off += i;
292 			m_adj(dtom(q), i);
293 			break;
294 		}
295 		q = q->ipf_next;
296 		m_freem(dtom(q->ipf_prev));
297 		ip_deq(q->ipf_prev);
298 	}
299 
300 insert:
301 	/*
302 	 * Stick new segment in its place;
303 	 * check for complete reassembly.
304 	 */
305 	ip_enq(ip, q->ipf_prev);
306 	next = 0;
307 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
308 		if (q->ip_off != next)
309 			return (0);
310 		next += q->ip_len;
311 	}
312 	if (q->ipf_prev->ipf_mff)
313 		return (0);
314 
315 	/*
316 	 * Reassembly is complete; concatenate fragments.
317 	 */
318 	q = fp->ipq_next;
319 	m = dtom(q);
320 	t = m->m_next;
321 	m->m_next = 0;
322 	m_cat(m, t);
323 	q = q->ipf_next;
324 	while (q != (struct ipasfrag *)fp) {
325 		t = dtom(q);
326 		q = q->ipf_next;
327 		m_cat(m, t);
328 	}
329 
330 	/*
331 	 * Create header for new ip packet by
332 	 * modifying header of first packet;
333 	 * dequeue and discard fragment reassembly header.
334 	 * Make header visible.
335 	 */
336 	ip = fp->ipq_next;
337 	ip->ip_len = next;
338 	((struct ip *)ip)->ip_src = fp->ipq_src;
339 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
340 	remque(fp);
341 	(void) m_free(dtom(fp));
342 	m = dtom(ip);
343 	m->m_len += sizeof (struct ipasfrag);
344 	m->m_off -= sizeof (struct ipasfrag);
345 	return ((struct ip *)ip);
346 
347 dropfrag:
348 	m_freem(m);
349 	return (0);
350 }
351 
352 /*
353  * Free a fragment reassembly header and all
354  * associated datagrams.
355  */
356 struct ipq *
357 ip_freef(fp)
358 	struct ipq *fp;
359 {
360 	register struct ipasfrag *q;
361 	struct mbuf *m;
362 COUNT(IP_FREEF);
363 
364 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
365 		m_freem(dtom(q));
366 	m = dtom(fp);
367 	fp = fp->next;
368 	remque(fp->prev);
369 	(void) m_free(m);
370 	return (fp);
371 }
372 
373 /*
374  * Put an ip fragment on a reassembly chain.
375  * Like insque, but pointers in middle of structure.
376  */
377 ip_enq(p, prev)
378 	register struct ipasfrag *p, *prev;
379 {
380 
381 COUNT(IP_ENQ);
382 	p->ipf_prev = prev;
383 	p->ipf_next = prev->ipf_next;
384 	prev->ipf_next->ipf_prev = p;
385 	prev->ipf_next = p;
386 }
387 
388 /*
389  * To ip_enq as remque is to insque.
390  */
391 ip_deq(p)
392 	register struct ipasfrag *p;
393 {
394 
395 COUNT(IP_DEQ);
396 	p->ipf_prev->ipf_next = p->ipf_next;
397 	p->ipf_next->ipf_prev = p->ipf_prev;
398 }
399 
400 /*
401  * IP timer processing;
402  * if a timer expires on a reassembly
403  * queue, discard it.
404  */
405 ip_slowtimo()
406 {
407 	register struct ipq *fp;
408 	int s = splnet();
409 
410 COUNT(IP_SLOWTIMO);
411 	fp = ipq.next;
412 	if (fp == 0) {
413 		splx(s);
414 		return;
415 	}
416 	while (fp != &ipq)
417 		if (--fp->ipq_ttl == 0)
418 			fp = ip_freef(fp);
419 		else
420 			fp = fp->next;
421 	splx(s);
422 }
423 
424 /*
425  * Drain off all datagram fragments.
426  */
427 ip_drain()
428 {
429 
430 COUNT(IP_DRAIN);
431 	while (ipq.next != &ipq)
432 		(void) ip_freef(ipq.next);
433 }
434 
435 /*
436  * Do option processing on a datagram,
437  * possibly discarding it if bad options
438  * are encountered.
439  */
440 ip_dooptions(ip)
441 	struct ip *ip;
442 {
443 	register u_char *cp;
444 	int opt, optlen, cnt;
445 	struct in_addr *sin;
446 	register struct ip_timestamp *ipt;
447 	register struct ifnet *ifp;
448 	struct in_addr t;
449 
450 COUNT(IP_DOOPTIONS);
451 	cp = (u_char *)(ip + 1);
452 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
453 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
454 		opt = cp[0];
455 		if (opt == IPOPT_EOL)
456 			break;
457 		if (opt == IPOPT_NOP)
458 			optlen = 1;
459 		else
460 			optlen = cp[1];
461 		switch (opt) {
462 
463 		default:
464 			break;
465 
466 		/*
467 		 * Source routing with record.
468 		 * Find interface with current destination address.
469 		 * If none on this machine then drop if strictly routed,
470 		 * or do nothing if loosely routed.
471 		 * Record interface address and bring up next address
472 		 * component.  If strictly routed make sure next
473 		 * address on directly accessible net.
474 		 */
475 		case IPOPT_LSRR:
476 			if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
477 				break;
478 			sin = (struct in_addr *)(cp + cp[2]);
479 			ipaddr.sin_addr = *sin;
480 			ifp = if_ifwithaddr((struct sockaddr *)&ipaddr);
481 			if (ifp == 0) {
482 				if (opt == IPOPT_SSRR)
483 					goto bad;
484 				break;
485 			}
486 			t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
487 			cp[2] += 4;
488 			if (cp[2] > optlen - (sizeof (long) - 1))
489 				break;
490 			ip->ip_dst = sin[1];
491 			if (opt == IPOPT_SSRR &&
492 			    if_ifonnetof(ip->ip_dst.s_net) == 0)
493 				goto bad;
494 			break;
495 
496 		case IPOPT_TS:
497 			ipt = (struct ip_timestamp *)cp;
498 			if (ipt->ipt_len < 5)
499 				goto bad;
500 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
501 				if (++ipt->ipt_oflw == 0)
502 					goto bad;
503 				break;
504 			}
505 			sin = (struct in_addr *)(cp+cp[2]);
506 			switch (ipt->ipt_flg) {
507 
508 			case IPOPT_TS_TSONLY:
509 				break;
510 
511 			case IPOPT_TS_TSANDADDR:
512 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
513 					goto bad;
514 				if (ifinet == 0)
515 					goto bad;	/* ??? */
516 				*sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr;
517 				break;
518 
519 			case IPOPT_TS_PRESPEC:
520 				ipaddr.sin_addr = *sin;
521 				if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0)
522 					continue;
523 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
524 					goto bad;
525 				ipt->ipt_ptr += 4;
526 				break;
527 
528 			default:
529 				goto bad;
530 			}
531 			*(n_time *)sin = iptime();
532 			ipt->ipt_ptr += 4;
533 		}
534 	}
535 	return;
536 bad:
537 	/* SHOULD FORCE ICMP MESSAGE */
538 	return;
539 }
540 
541 /*
542  * Strip out IP options, at higher
543  * level protocol in the kernel.
544  * Second argument is buffer to which options
545  * will be moved, and return value is their length.
546  */
547 ip_stripoptions(ip, mopt)
548 	struct ip *ip;
549 	struct mbuf *mopt;
550 {
551 	register int i;
552 	register struct mbuf *m;
553 	int olen;
554 COUNT(IP_STRIPOPTIONS);
555 
556 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
557 	m = dtom(ip);
558 	ip++;
559 	if (mopt) {
560 		mopt->m_len = olen;
561 		mopt->m_off = MMINOFF;
562 		bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
563 	}
564 	i = m->m_len - (sizeof (struct ip) + olen);
565 	bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
566 	m->m_len -= olen;
567 }
568