xref: /386bsd/usr/src/kernel/inet/ip_output.c (revision a2142627)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	$Id: ip_output.c,v 1.1 94/10/20 10:53:34 root Exp $
34  */
35 
36 #include "sys/param.h"
37 #include "malloc.h"
38 #include "mbuf.h"
39 #include "sys/errno.h"
40 #include "protosw.h"
41 #include "sys/file.h"
42 #include "socketvar.h"
43 #include "prototypes.h"
44 
45 #include "if.h"
46 #include "route.h"
47 
48 #include "in.h"
49 #include "in_systm.h"
50 #include "ip.h"
51 #include "in_pcb.h"
52 #include "in_var.h"
53 #include "ip_var.h"
54 
55 struct mbuf *ip_insertoptions();
56 
57 /*
58  * IP output.  The packet in mbuf chain m contains a skeletal IP
59  * header (with len, off, ttl, proto, tos, src, dst).
60  * The mbuf chain containing the packet will be freed.
61  * The mbuf opt, if present, will not be freed.
62  */
63 ip_output(m0, opt, ro, flags)
64 	struct mbuf *m0;
65 	struct mbuf *opt;
66 	struct route *ro;
67 	int flags;
68 {
69 	register struct ip *ip, *mhip;
70 	register struct ifnet *ifp;
71 	register struct mbuf *m = m0;
72 	register int hlen = sizeof (struct ip);
73 	int len, off, error = 0;
74 	struct route iproute;
75 	struct sockaddr_in *dst;
76 	struct in_ifaddr *ia;
77 
78 #ifdef	DIAGNOSTIC
79 	if ((m->m_flags & M_PKTHDR) == 0)
80 		panic("ip_output no HDR");
81 #endif
82 	if (opt) {
83 		m = ip_insertoptions(m, opt, &len);
84 		hlen = len;
85 	}
86 	ip = mtod(m, struct ip *);
87 	/*
88 	 * Fill in IP header.
89 	 */
90 	if ((flags & IP_FORWARDING) == 0) {
91 		ip->ip_v = IPVERSION;
92 		ip->ip_off &= IP_DF;
93 		ip->ip_id = htons(ip_id++);
94 		ip->ip_hl = hlen >> 2;
95 	} else {
96 		hlen = ip->ip_hl << 2;
97 		ipstat.ips_localout++;
98 	}
99 	/*
100 	 * Route packet.
101 	 */
102 	if (ro == 0) {
103 		ro = &iproute;
104 		(void) memset((caddr_t)ro, 0, sizeof (*ro));
105 	}
106 	dst = (struct sockaddr_in *)&ro->ro_dst;
107 	/*
108 	 * If there is a cached route,
109 	 * check that it is to the same destination
110 	 * and is still up.  If not, free it and try again.
111 	 */
112 	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
113 	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
114 		RTFREE_(ro->ro_rt);
115 		ro->ro_rt = (struct rtentry *)0;
116 	}
117 	if (ro->ro_rt == 0) {
118 		dst->sin_family = AF_INET;
119 		dst->sin_len = sizeof(*dst);
120 		dst->sin_addr = ip->ip_dst;
121 	}
122 	/*
123 	 * If routing to interface only,
124 	 * short circuit routing lookup.
125 	 */
126 	if (flags & IP_ROUTETOIF) {
127 
128 		ia = (struct in_ifaddr *)ifa_ifwithdstaddr((struct sockaddr *)dst);
129 		if (ia == 0)
130 			ia = in_iaonnetof(in_netof(ip->ip_dst));
131 		if (ia == 0) {
132 			error = ENETUNREACH;
133 			goto bad;
134 		}
135 		ifp = ia->ia_ifp;
136 	} else {
137 		if (ro->ro_rt == 0)
138 			RTALLOC(ro);
139 		if (ro->ro_rt == 0) {
140 			error = EHOSTUNREACH;
141 			goto bad;
142 		}
143 		ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
144 		ifp = ro->ro_rt->rt_ifp;
145 		ro->ro_rt->rt_use++;
146 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
147 			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
148 	}
149 #ifndef notdef
150 	/*
151 	 * If source address not specified yet, use address
152 	 * of outgoing interface.
153 	 */
154 	if (ip->ip_src.s_addr == INADDR_ANY)
155 		ip->ip_src = IA_SIN(ia)->sin_addr;
156 #endif
157 	/*
158 	 * Look for broadcast address and
159 	 * and verify user is allowed to send
160 	 * such a packet.
161 	 */
162 	if (in_broadcast(dst->sin_addr)) {
163 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
164 			error = EADDRNOTAVAIL;
165 			goto bad;
166 		}
167 		if ((flags & IP_ALLOWBROADCAST) == 0) {
168 			error = EACCES;
169 			goto bad;
170 		}
171 		/* don't allow broadcast messages to be fragmented */
172 		if ((u_short)ip->ip_len > ifp->if_mtu) {
173 			error = EMSGSIZE;
174 			goto bad;
175 		}
176 		m->m_flags |= M_BCAST;
177 	}
178 
179 	/*
180 	 * If small enough for interface, can just send directly.
181 	 */
182 	if ((u_short)ip->ip_len <= ifp->if_mtu) {
183 		ip->ip_len = htons((u_short)ip->ip_len);
184 		ip->ip_off = htons((u_short)ip->ip_off);
185 		ip->ip_sum = 0;
186 #ifdef was
187 		ip->ip_sum = in_cksum(m, hlen);
188 #else
189 		if (hlen == 20)
190 			ip->ip_sum = in_cksumiphdr((void *)ip);
191 		else
192 			ip->ip_sum = in_cksum(m, hlen);
193 #endif
194 		error = (*ifp->if_output)(ifp, m,
195 				(struct sockaddr *)dst, ro->ro_rt);
196 		goto done;
197 	}
198 	ipstat.ips_fragmented++;
199 	/*
200 	 * Too large for interface; fragment if possible.
201 	 * Must be able to put at least 8 bytes per fragment.
202 	 */
203 	if (ip->ip_off & IP_DF) {
204 		error = EMSGSIZE;
205 		goto bad;
206 	}
207 	len = (ifp->if_mtu - hlen) &~ 7;
208 	if (len < 8) {
209 		error = EMSGSIZE;
210 		goto bad;
211 	}
212 
213     {
214 	int mhlen, firstlen = len;
215 	struct mbuf **mnext = &m->m_nextpkt;
216 
217 	/*
218 	 * Loop through length of segment after first fragment,
219 	 * make new header and copy data of each part and link onto chain.
220 	 */
221 	m0 = m;
222 	mhlen = sizeof (struct ip);
223 	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
224 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
225 		if (m == 0) {
226 			error = ENOBUFS;
227 			goto sendorfree;
228 		}
229 		m->m_data += max_linkhdr;
230 		mhip = mtod(m, struct ip *);
231 		*mhip = *ip;
232 		if (hlen > sizeof (struct ip)) {
233 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
234 			mhip->ip_hl = mhlen >> 2;
235 		}
236 		m->m_len = mhlen;
237 		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
238 		if (ip->ip_off & IP_MF)
239 			mhip->ip_off |= IP_MF;
240 		if (off + len >= (u_short)ip->ip_len)
241 			len = (u_short)ip->ip_len - off;
242 		else
243 			mhip->ip_off |= IP_MF;
244 		mhip->ip_len = htons((u_short)(len + mhlen));
245 		m->m_next = m_copy(m0, off, len);
246 		if (m->m_next == 0) {
247 			error = ENOBUFS;	/* ??? */
248 			goto sendorfree;
249 		}
250 		m->m_pkthdr.len = mhlen + len;
251 		m->m_pkthdr.rcvif = (struct ifnet *)0;
252 		mhip->ip_off = htons((u_short)mhip->ip_off);
253 		mhip->ip_sum = 0;
254 #ifdef was
255 		mhip->ip_sum = in_cksum(m, mhlen);
256 #else
257 		if (mhlen == 20)
258 			mhip->ip_sum = in_cksumiphdr((void *)mhip);
259 		else
260 			mhip->ip_sum = in_cksum(m, mhlen);
261 #endif
262 		*mnext = m;
263 		mnext = &m->m_nextpkt;
264 		ipstat.ips_ofragments++;
265 	}
266 	/*
267 	 * Update first fragment by trimming what's been copied out
268 	 * and updating header, then send each fragment (in order).
269 	 */
270 	m = m0;
271 	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
272 	m->m_pkthdr.len = hlen + firstlen;
273 	ip->ip_len = htons((u_short)m->m_pkthdr.len);
274 	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
275 	ip->ip_sum = 0;
276 #ifdef	was
277 	ip->ip_sum = in_cksum(m, hlen);
278 #else
279 	if (hlen == 20)
280 		ip->ip_sum = in_cksumiphdr((void *)ip);
281 	else
282 		ip->ip_sum = in_cksum(m, hlen);
283 #endif
284 sendorfree:
285 	for (m = m0; m; m = m0) {
286 		m0 = m->m_nextpkt;
287 		m->m_nextpkt = 0;
288 		if (error == 0)
289 			error = (*ifp->if_output)(ifp, m,
290 			    (struct sockaddr *)dst, ro->ro_rt);
291 		else
292 			m_freem(m);
293 	}
294     }
295 done:
296 	if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt)
297 		RTFREE_(ro->ro_rt);
298 	return (error);
299 bad:
300 	m_freem(m0);
301 	goto done;
302 }
303 
304 /*
305  * Insert IP options into preformed packet.
306  * Adjust IP destination as required for IP source routing,
307  * as indicated by a non-zero in_addr at the start of the options.
308  */
309 struct mbuf *
310 ip_insertoptions(m, opt, phlen)
311 	register struct mbuf *m;
312 	struct mbuf *opt;
313 	int *phlen;
314 {
315 	register struct ipoption *p = mtod(opt, struct ipoption *);
316 	struct mbuf *n;
317 	register struct ip *ip = mtod(m, struct ip *);
318 	unsigned optlen;
319 
320 	optlen = opt->m_len - sizeof(p->ipopt_dst);
321 	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
322 		return (m);		/* XXX should fail */
323 	if (p->ipopt_dst.s_addr)
324 		ip->ip_dst = p->ipopt_dst;
325 	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
326 		MGETHDR(n, M_DONTWAIT, MT_HEADER);
327 		if (n == 0)
328 			return (m);
329 		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
330 		m->m_len -= sizeof(struct ip);
331 		m->m_data += sizeof(struct ip);
332 		n->m_next = m;
333 		m = n;
334 		m->m_len = optlen + sizeof(struct ip);
335 		m->m_data += max_linkhdr;
336 		(void) memcpy(mtod(m, caddr_t), (caddr_t)ip, sizeof(struct ip));
337 	} else {
338 		m->m_data -= optlen;
339 		m->m_len += optlen;
340 		m->m_pkthdr.len += optlen;
341 		(void) memmove(mtod(m, caddr_t), (caddr_t)ip, sizeof(struct ip));
342 	}
343 	ip = mtod(m, struct ip *);
344 	(void) memcpy((caddr_t)(ip + 1), (caddr_t)p->ipopt_list, (unsigned)optlen);
345 	*phlen = sizeof(struct ip) + optlen;
346 	ip->ip_len += optlen;
347 	return (m);
348 }
349 
350 /*
351  * Copy options from ip to jp,
352  * omitting those not copied during fragmentation.
353  */
354 ip_optcopy(ip, jp)
355 	struct ip *ip, *jp;
356 {
357 	register u_char *cp, *dp;
358 	int opt, optlen, cnt;
359 
360 	cp = (u_char *)(ip + 1);
361 	dp = (u_char *)(jp + 1);
362 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
363 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
364 		opt = cp[0];
365 		if (opt == IPOPT_EOL)
366 			break;
367 		if (opt == IPOPT_NOP)
368 			optlen = 1;
369 		else
370 			optlen = cp[IPOPT_OLEN];
371 		/* bogus lengths should have been caught by ip_dooptions */
372 		if (optlen > cnt)
373 			optlen = cnt;
374 		if (IPOPT_COPIED(opt)) {
375 			(void) memcpy((caddr_t)dp, (caddr_t)cp, (unsigned)optlen);
376 			dp += optlen;
377 		}
378 	}
379 	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
380 		*dp++ = IPOPT_EOL;
381 	return (optlen);
382 }
383 
384 /*
385  * IP socket option processing.
386  */
387 ip_ctloutput(op, so, level, optname, mp)
388 	int op;
389 	struct socket *so;
390 	int level, optname;
391 	struct mbuf **mp;
392 {
393 	register struct inpcb *inp = sotoinpcb(so);
394 	register struct mbuf *m = *mp;
395 	register int optval;
396 	int error = 0;
397 
398 	if (level != IPPROTO_IP)
399 		error = EINVAL;
400 	else switch (op) {
401 
402 	case PRCO_SETOPT:
403 		switch (optname) {
404 		case IP_OPTIONS:
405 #ifdef notyet
406 		case IP_RETOPTS:
407 			return (ip_pcbopts(optname, &inp->inp_options, m));
408 #else
409 			return (ip_pcbopts(&inp->inp_options, m));
410 #endif
411 
412 		case IP_TOS:
413 		case IP_TTL:
414 		case IP_RECVOPTS:
415 		case IP_RECVRETOPTS:
416 		case IP_RECVDSTADDR:
417 			if (m->m_len != sizeof(int))
418 				error = EINVAL;
419 			else {
420 				optval = *mtod(m, int *);
421 				switch (optname) {
422 
423 				case IP_TOS:
424 					inp->inp_ip.ip_tos = optval;
425 					break;
426 
427 				case IP_TTL:
428 					inp->inp_ip.ip_ttl = optval;
429 					break;
430 #define	OPTSET(bit) \
431 	if (optval) \
432 		inp->inp_flags |= bit; \
433 	else \
434 		inp->inp_flags &= ~bit;
435 
436 				case IP_RECVOPTS:
437 					OPTSET(INP_RECVOPTS);
438 					break;
439 
440 				case IP_RECVRETOPTS:
441 					OPTSET(INP_RECVRETOPTS);
442 					break;
443 
444 				case IP_RECVDSTADDR:
445 					OPTSET(INP_RECVDSTADDR);
446 					break;
447 				}
448 			}
449 			break;
450 #undef OPTSET
451 
452 		default:
453 			error = EINVAL;
454 			break;
455 		}
456 		if (m)
457 			(void)m_free(m);
458 		break;
459 
460 	case PRCO_GETOPT:
461 		switch (optname) {
462 		case IP_OPTIONS:
463 		case IP_RETOPTS:
464 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
465 			if (inp->inp_options) {
466 				m->m_len = inp->inp_options->m_len;
467 				(void) memcpy(mtod(m, caddr_t),
468 				    mtod(inp->inp_options, caddr_t),
469 				    (unsigned)m->m_len);
470 			} else
471 				m->m_len = 0;
472 			break;
473 
474 		case IP_TOS:
475 		case IP_TTL:
476 		case IP_RECVOPTS:
477 		case IP_RECVRETOPTS:
478 		case IP_RECVDSTADDR:
479 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
480 			m->m_len = sizeof(int);
481 			switch (optname) {
482 
483 			case IP_TOS:
484 				optval = inp->inp_ip.ip_tos;
485 				break;
486 
487 			case IP_TTL:
488 				optval = inp->inp_ip.ip_ttl;
489 				break;
490 
491 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
492 
493 			case IP_RECVOPTS:
494 				optval = OPTBIT(INP_RECVOPTS);
495 				break;
496 
497 			case IP_RECVRETOPTS:
498 				optval = OPTBIT(INP_RECVRETOPTS);
499 				break;
500 
501 			case IP_RECVDSTADDR:
502 				optval = OPTBIT(INP_RECVDSTADDR);
503 				break;
504 			}
505 			*mtod(m, int *) = optval;
506 			break;
507 
508 		default:
509 			error = EINVAL;
510 			break;
511 		}
512 		break;
513 	}
514 	return (error);
515 }
516 
517 /*
518  * Set up IP options in pcb for insertion in output packets.
519  * Store in mbuf with pointer in pcbopt, adding pseudo-option
520  * with destination address if source routed.
521  */
522 #ifdef notyet
523 ip_pcbopts(optname, pcbopt, m)
524 	int optname;
525 #else
526 ip_pcbopts(pcbopt, m)
527 #endif
528 	struct mbuf **pcbopt;
529 	register struct mbuf *m;
530 {
531 	register cnt, optlen;
532 	register u_char *cp;
533 	u_char opt;
534 
535 	/* turn off any old options */
536 	if (*pcbopt)
537 		(void)m_free(*pcbopt);
538 	*pcbopt = 0;
539 	if (m == (struct mbuf *)0 || m->m_len == 0) {
540 		/*
541 		 * Only turning off any previous options.
542 		 */
543 		if (m)
544 			(void)m_free(m);
545 		return (0);
546 	}
547 
548 #ifndef	vax
549 	if (m->m_len % sizeof(long))
550 		goto bad;
551 #endif
552 	/*
553 	 * IP first-hop destination address will be stored before
554 	 * actual options; move other options back
555 	 * and clear it when none present.
556 	 */
557 	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
558 		goto bad;
559 	cnt = m->m_len;
560 	m->m_len += sizeof(struct in_addr);
561 	cp = mtod(m, u_char *) + sizeof(struct in_addr);
562 	(void) memmove((caddr_t)cp, mtod(m, caddr_t), (unsigned)cnt);
563 	(void) memset(mtod(m, caddr_t), 0, sizeof(struct in_addr));
564 
565 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
566 		opt = cp[IPOPT_OPTVAL];
567 		if (opt == IPOPT_EOL)
568 			break;
569 		if (opt == IPOPT_NOP)
570 			optlen = 1;
571 		else {
572 			optlen = cp[IPOPT_OLEN];
573 			if (optlen <= IPOPT_OLEN || optlen > cnt)
574 				goto bad;
575 		}
576 		switch (opt) {
577 
578 		default:
579 			break;
580 
581 		case IPOPT_LSRR:
582 		case IPOPT_SSRR:
583 			/*
584 			 * user process specifies route as:
585 			 *	->A->B->C->D
586 			 * D must be our final destination (but we can't
587 			 * check that since we may not have connected yet).
588 			 * A is first hop destination, which doesn't appear in
589 			 * actual IP option, but is stored before the options.
590 			 */
591 			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
592 				goto bad;
593 			m->m_len -= sizeof(struct in_addr);
594 			cnt -= sizeof(struct in_addr);
595 			optlen -= sizeof(struct in_addr);
596 			cp[IPOPT_OLEN] = optlen;
597 			/*
598 			 * Move first hop before start of options.
599 			 */
600 			(void) memcpy(mtod(m, caddr_t), (caddr_t)&cp[IPOPT_OFFSET+1],
601 			    sizeof(struct in_addr));
602 			/*
603 			 * Then copy rest of options back
604 			 * to close up the deleted entry.
605 			 */
606 			(void) memmove((caddr_t)&cp[IPOPT_OFFSET+1],
607 			    (caddr_t)(&cp[IPOPT_OFFSET+1] +
608 			    sizeof(struct in_addr)),
609 			    (unsigned)cnt + sizeof(struct in_addr));
610 			break;
611 		}
612 	}
613 	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
614 		goto bad;
615 	*pcbopt = m;
616 	return (0);
617 
618 bad:
619 	(void)m_free(m);
620 	return (EINVAL);
621 }
622