xref: /original-bsd/sys/netinet/ip_input.c (revision 8208c1e2)
1 /*	ip_input.c	1.34	82/03/23	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/clock.h"
6 #include "../h/mbuf.h"
7 #include "../h/protosw.h"
8 #include "../h/socket.h"
9 #include "../net/in.h"
10 #include "../net/in_systm.h"
11 #include "../net/if.h"
12 #include "../net/ip.h"			/* belongs before in.h */
13 #include "../net/ip_var.h"
14 #include "../net/ip_icmp.h"
15 #include "../net/tcp.h"
16 
17 u_char	ip_protox[IPPROTO_MAX];
18 int	ipqmaxlen = IFQ_MAXLEN;
19 
20 /*
21  * IP initialization: fill in IP protocol switch table.
22  * All protocols not implemented in kernel go to raw IP protocol handler.
23  */
24 ip_init()
25 {
26 	register struct protosw *pr;
27 	register int i;
28 
29 COUNT(IP_INIT);
30 	pr = pffindproto(PF_INET, IPPROTO_RAW);
31 	if (pr == 0)
32 		panic("ip_init");
33 	for (i = 0; i < IPPROTO_MAX; i++)
34 		ip_protox[i] = pr - protosw;
35 	for (pr = protosw; pr <= protoswLAST; pr++)
36 		if (pr->pr_family == PF_INET &&
37 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
38 			ip_protox[pr->pr_protocol] = pr - protosw;
39 	ipq.next = ipq.prev = &ipq;
40 	ip_id = time & 0xffff;
41 	ipintrq.ifq_maxlen = ipqmaxlen;
42 }
43 
44 u_char	ipcksum = 1;
45 struct	ip *ip_reass();
46 
47 /*
48  * Ip input routine.  Checksum and byte swap header.  If fragmented
49  * try to reassamble.  If complete and fragment queue exists, discard.
50  * Process options.  Pass to next level.
51  */
52 ipintr()
53 {
54 	register struct ip *ip;
55 	register struct mbuf *m;
56 	struct mbuf *m0, *mopt;
57 	register int i;
58 	register struct ipq *fp;
59 	int hlen, s;
60 
61 COUNT(IPINTR);
62 next:
63 	/*
64 	 * Get next datagram off input queue and get IP header
65 	 * in first mbuf.
66 	 */
67 	s = splimp();
68 	IF_DEQUEUE(&ipintrq, m);
69 	splx(s);
70 	if (m == 0)
71 		return;
72 	if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
73 	    (m = m_pullup(m, sizeof (struct ip))) == 0)
74 		return;
75 	ip = mtod(m, struct ip *);
76 	if ((hlen = ip->ip_hl << 2) > m->m_len) {
77 		if ((m = m_pullup(m, hlen)) == 0)
78 			return;
79 		ip = mtod(m, struct ip *);
80 	}
81 	if (ipcksum)
82 		if (ip->ip_sum = in_cksum(m, hlen)) {
83 			printf("ip_sum %x\n", ip->ip_sum);	/* XXX */
84 			ipstat.ips_badsum++;
85 			goto bad;
86 		}
87 
88 #if vax
89 	/*
90 	 * Convert fields to host representation.
91 	 */
92 	ip->ip_len = ntohs((u_short)ip->ip_len);
93 	ip->ip_id = ntohs(ip->ip_id);
94 	ip->ip_off = ntohs((u_short)ip->ip_off);
95 #endif
96 
97 	/*
98 	 * Check that the amount of data in the buffers
99 	 * is as at least much as the IP header would have us expect.
100 	 * Trim mbufs if longer than we expect.
101 	 * Drop packet if shorter than we expect.
102 	 */
103 	i = 0;
104 	m0 = m;
105 	for (; m != NULL; m = m->m_next) {
106 		if (m->m_free) panic("ipinput already free");
107 		i += m->m_len;
108 	}
109 	m = m0;
110 	if (i != ip->ip_len) {
111 		if (i < ip->ip_len) {
112 			ipstat.ips_tooshort++;
113 			goto bad;
114 		}
115 		m_adj(m, ip->ip_len - i);
116 	}
117 
118 	/*
119 	 * Process options and, if not destined for us,
120 	 * ship it on.
121 	 */
122 	if (hlen > sizeof (struct ip))
123 		ip_dooptions(ip);
124 	if (ifnet && ip->ip_dst.s_addr != ifnet->if_addr.s_addr &&
125 	    if_ifwithaddr(ip->ip_dst) == 0) {
126 
127 		goto bad;
128 #ifdef notdef
129 		printf("ip->ip_dst %x ip->ip_ttl %x\n",
130 		    ip->ip_dst, ip->ip_ttl);
131 		if (--ip->ip_ttl == 0) {
132 			icmp_error(ip, ICMP_TIMXCEED, 0);
133 			goto next;
134 		}
135 		mopt = m_get(M_DONTWAIT);
136 		if (mopt == 0)
137 			goto bad;
138 		ip_stripoptions(ip, mopt);
139 		/* 0 here means no directed broadcast */
140 		(void) ip_output(m0, mopt, 0);
141 		goto next;
142 #endif
143 	}
144 
145 	/*
146 	 * Look for queue of fragments
147 	 * of this datagram.
148 	 */
149 	for (fp = ipq.next; fp != &ipq; fp = fp->next)
150 		if (ip->ip_id == fp->ipq_id &&
151 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
152 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
153 		    ip->ip_p == fp->ipq_p)
154 			goto found;
155 	fp = 0;
156 found:
157 
158 	/*
159 	 * Adjust ip_len to not reflect header,
160 	 * set ip_mff if more fragments are expected,
161 	 * convert offset of this to bytes.
162 	 */
163 	ip->ip_len -= hlen;
164 	((struct ipasfrag *)ip)->ipf_mff = 0;
165 	if (ip->ip_off & IP_MF)
166 		((struct ipasfrag *)ip)->ipf_mff = 1;
167 	ip->ip_off <<= 3;
168 
169 	/*
170 	 * If datagram marked as having more fragments
171 	 * or if this is not the first fragment,
172 	 * attempt reassembly; if it succeeds, proceed.
173 	 */
174 	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
175 		ip = ip_reass((struct ipasfrag *)ip, fp);
176 		if (ip == 0)
177 			goto next;
178 		hlen = ip->ip_hl << 2;
179 		m = dtom(ip);
180 	} else
181 		if (fp)
182 			(void) ip_freef(fp);
183 
184 	/*
185 	 * Switch out to protocol's input routine.
186 	 */
187 	(*protosw[ip_protox[ip->ip_p]].pr_input)(m);
188 	goto next;
189 bad:
190 	m_freem(m);
191 	goto next;
192 }
193 
194 /*
195  * Take incoming datagram fragment and try to
196  * reassemble it into whole datagram.  If a chain for
197  * reassembly of this datagram already exists, then it
198  * is given as fp; otherwise have to make a chain.
199  */
200 struct ip *
201 ip_reass(ip, fp)
202 	register struct ipasfrag *ip;
203 	register struct ipq *fp;
204 {
205 	register struct mbuf *m = dtom(ip);
206 	register struct ipasfrag *q;
207 	struct mbuf *t;
208 	int hlen = ip->ip_hl << 2;
209 	int i, next;
210 COUNT(IP_REASS);
211 
212 	/*
213 	 * Presence of header sizes in mbufs
214 	 * would confuse code below.
215 	 */
216 	m->m_off += hlen;
217 	m->m_len -= hlen;
218 
219 	/*
220 	 * If first fragment to arrive, create a reassembly queue.
221 	 */
222 	if (fp == 0) {
223 		if ((t = m_get(M_WAIT)) == NULL)
224 			goto dropfrag;
225 		t->m_off = MMINOFF;
226 		fp = mtod(t, struct ipq *);
227 		insque(fp, &ipq);
228 		fp->ipq_ttl = IPFRAGTTL;
229 		fp->ipq_p = ip->ip_p;
230 		fp->ipq_id = ip->ip_id;
231 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
232 		fp->ipq_src = ((struct ip *)ip)->ip_src;
233 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
234 		q = (struct ipasfrag *)fp;
235 		goto insert;
236 	}
237 
238 	/*
239 	 * Find a segment which begins after this one does.
240 	 */
241 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
242 		if (q->ip_off > ip->ip_off)
243 			break;
244 
245 	/*
246 	 * If there is a preceding segment, it may provide some of
247 	 * our data already.  If so, drop the data from the incoming
248 	 * segment.  If it provides all of our data, drop us.
249 	 */
250 	if (q->ipf_prev != (struct ipasfrag *)fp) {
251 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
252 		if (i > 0) {
253 			if (i >= ip->ip_len)
254 				goto dropfrag;
255 			m_adj(dtom(ip), i);
256 			ip->ip_off += i;
257 			ip->ip_len -= i;
258 		}
259 	}
260 
261 	/*
262 	 * While we overlap succeeding segments trim them or,
263 	 * if they are completely covered, dequeue them.
264 	 */
265 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
266 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
267 		if (i < q->ip_len) {
268 			q->ip_len -= i;
269 			q->ip_off += i;
270 			m_adj(dtom(q), i);
271 			break;
272 		}
273 		q = q->ipf_next;
274 		m_freem(dtom(q->ipf_prev));
275 		ip_deq(q->ipf_prev);
276 	}
277 
278 insert:
279 	/*
280 	 * Stick new segment in its place;
281 	 * check for complete reassembly.
282 	 */
283 	ip_enq(ip, q->ipf_prev);
284 	next = 0;
285 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
286 		if (q->ip_off != next)
287 			return (0);
288 		next += q->ip_len;
289 	}
290 	if (q->ipf_prev->ipf_mff)
291 		return (0);
292 
293 	/*
294 	 * Reassembly is complete; concatenate fragments.
295 	 */
296 	q = fp->ipq_next;
297 	m = dtom(q);
298 	t = m->m_next;
299 	m->m_next = 0;
300 	m_cat(m, t);
301 	q = q->ipf_next;
302 	while (q != (struct ipasfrag *)fp) {
303 		t = dtom(q);
304 		q = q->ipf_next;
305 		m_cat(m, t);
306 	}
307 
308 	/*
309 	 * Create header for new ip packet by
310 	 * modifying header of first packet;
311 	 * dequeue and discard fragment reassembly header.
312 	 * Make header visible.
313 	 */
314 	ip = fp->ipq_next;
315 	ip->ip_len = next;
316 	((struct ip *)ip)->ip_src = fp->ipq_src;
317 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
318 	remque(fp);
319 	(void) m_free(dtom(fp));
320 	m = dtom(ip);
321 	m->m_len += sizeof (struct ipasfrag);
322 	m->m_off -= sizeof (struct ipasfrag);
323 	return ((struct ip *)ip);
324 
325 dropfrag:
326 	m_freem(m);
327 	return (0);
328 }
329 
330 /*
331  * Free a fragment reassembly header and all
332  * associated datagrams.
333  */
334 struct ipq *
335 ip_freef(fp)
336 	struct ipq *fp;
337 {
338 	register struct ipasfrag *q;
339 	struct mbuf *m;
340 COUNT(IP_FREEF);
341 
342 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
343 		m_freem(dtom(q));
344 	m = dtom(fp);
345 	fp = fp->next;
346 	remque(fp->prev);
347 	(void) m_free(m);
348 	return (fp);
349 }
350 
351 /*
352  * Put an ip fragment on a reassembly chain.
353  * Like insque, but pointers in middle of structure.
354  */
355 ip_enq(p, prev)
356 	register struct ipasfrag *p, *prev;
357 {
358 
359 COUNT(IP_ENQ);
360 	p->ipf_prev = prev;
361 	p->ipf_next = prev->ipf_next;
362 	prev->ipf_next->ipf_prev = p;
363 	prev->ipf_next = p;
364 }
365 
366 /*
367  * To ip_enq as remque is to insque.
368  */
369 ip_deq(p)
370 	register struct ipasfrag *p;
371 {
372 
373 COUNT(IP_DEQ);
374 	p->ipf_prev->ipf_next = p->ipf_next;
375 	p->ipf_next->ipf_prev = p->ipf_prev;
376 }
377 
378 /*
379  * IP timer processing;
380  * if a timer expires on a reassembly
381  * queue, discard it.
382  */
383 ip_slowtimo()
384 {
385 	register struct ipq *fp;
386 	int s = splnet();
387 
388 COUNT(IP_SLOWTIMO);
389 	fp = ipq.next;
390 	if (fp == 0) {
391 		splx(s);
392 		return;
393 	}
394 	while (fp != &ipq)
395 		if (--fp->ipq_ttl == 0)
396 			fp = ip_freef(fp);
397 		else
398 			fp = fp->next;
399 	splx(s);
400 }
401 
402 /*
403  * Drain off all datagram fragments.
404  */
405 ip_drain()
406 {
407 
408 COUNT(IP_DRAIN);
409 	while (ipq.next != &ipq)
410 		(void) ip_freef(ipq.next);
411 }
412 
413 /*
414  * Do option processing on a datagram,
415  * possibly discarding it if bad options
416  * are encountered.
417  */
418 ip_dooptions(ip)
419 	struct ip *ip;
420 {
421 	register u_char *cp;
422 	int opt, optlen, cnt;
423 	struct in_addr *sin;
424 	register struct ip_timestamp *ipt;
425 	register struct ifnet *ifp;
426 	struct in_addr t;
427 
428 COUNT(IP_DOOPTIONS);
429 	cp = (u_char *)(ip + 1);
430 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
431 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
432 		opt = cp[0];
433 		if (opt == IPOPT_EOL)
434 			break;
435 		if (opt == IPOPT_NOP)
436 			optlen = 1;
437 		else
438 			optlen = cp[1];
439 		switch (opt) {
440 
441 		default:
442 			break;
443 
444 		/*
445 		 * Source routing with record.
446 		 * Find interface with current destination address.
447 		 * If none on this machine then drop if strictly routed,
448 		 * or do nothing if loosely routed.
449 		 * Record interface address and bring up next address
450 		 * component.  If strictly routed make sure next
451 		 * address on directly accessible net.
452 		 */
453 		case IPOPT_LSRR:
454 			if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
455 				break;
456 			sin = (struct in_addr *)(cp + cp[2]);
457 			ifp = if_ifwithaddr(*sin);
458 			if (ifp == 0) {
459 				if (opt == IPOPT_SSRR)
460 					goto bad;
461 				break;
462 			}
463 			t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
464 			cp[2] += 4;
465 			if (cp[2] > optlen - (sizeof (long) - 1))
466 				break;
467 			ip->ip_dst = sin[1];
468 			if (opt == IPOPT_SSRR && if_ifonnetof(ip->ip_dst)==0)
469 				goto bad;
470 			break;
471 
472 		case IPOPT_TS:
473 			ipt = (struct ip_timestamp *)cp;
474 			if (ipt->ipt_len < 5)
475 				goto bad;
476 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
477 				if (++ipt->ipt_oflw == 0)
478 					goto bad;
479 				break;
480 			}
481 			sin = (struct in_addr *)(cp+cp[2]);
482 			switch (ipt->ipt_flg) {
483 
484 			case IPOPT_TS_TSONLY:
485 				break;
486 
487 			case IPOPT_TS_TSANDADDR:
488 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
489 					goto bad;
490 				/* stamp with ``first'' interface address */
491 				*sin++ = ifnet->if_addr;
492 				break;
493 
494 			case IPOPT_TS_PRESPEC:
495 				if (if_ifwithaddr(*sin) == 0)
496 					continue;
497 				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
498 					goto bad;
499 				ipt->ipt_ptr += 4;
500 				break;
501 
502 			default:
503 				goto bad;
504 			}
505 			*(n_time *)sin = iptime();
506 			ipt->ipt_ptr += 4;
507 		}
508 	}
509 	return;
510 bad:
511 	/* SHOULD FORCE ICMP MESSAGE */
512 	return;
513 }
514 
515 /*
516  * Strip out IP options, at higher
517  * level protocol in the kernel.
518  * Second argument is buffer to which options
519  * will be moved, and return value is their length.
520  */
521 ip_stripoptions(ip, mopt)
522 	struct ip *ip;
523 	struct mbuf *mopt;
524 {
525 	register int i;
526 	register struct mbuf *m;
527 	int olen;
528 COUNT(IP_STRIPOPTIONS);
529 
530 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
531 	m = dtom(ip);
532 	ip++;
533 	if (mopt) {
534 		mopt->m_len = olen;
535 		mopt->m_off = MMINOFF;
536 		bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
537 	}
538 	i = m->m_len - (sizeof (struct ip) + olen);
539 	bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
540 	m->m_len -= olen;
541 }
542