1 #ifdef	RCSIDENT
2 static char rcsident[] = "$Header: ip_output.c,v 1.28 85/07/31 09:32:09 walsh Exp $";
3 #endif
4 
5 #include "../h/param.h"
6 #include "../h/dir.h"
7 #include "../h/user.h"
8 #include "../h/mbuf.h"
9 #include "../h/socket.h"
10 #include "../h/socketvar.h"
11 #include "../h/protosw.h"
12 #include "../h/domain.h"
13 #include "../h/ioctl.h"
14 #include "../h/syslog.h"
15 
16 #include "../net/if.h"
17 #include "../net/route.h"
18 
19 #include "../bbnnet/in.h"
20 #include "../bbnnet/net.h"
21 #include "../bbnnet/in_pcb.h"
22 #include "../bbnnet/in_var.h"
23 #include "../bbnnet/ip.h"
24 #include "../bbnnet/icmp.h"
25 
26 /*
27  * If you're going to a specific host or via a gateway, the routing
28  * entry gateway field holds the best way to get there.  Otherwise,
29  * the routing entry tells you how to get onto that net -- it has
30  * the net address portion of our local host:
31  *
32  * On bbn-labs-b:
33  *
34  *		rt_dst		rt_gateway	flags
35  * il0  => 	0x00000b80	0x2010b80	UP
36  * imp0 =>	0x00000008	0x2000708	UP
37  * loop =>	0x0000007f	0x100007f	UP
38  *
39  * So you can see that the rt_gateway is our local address, and the
40  * rt_dst may be the net number of the media.  If it's a route
41  * to a net, the other guy is on this net and you want to route the
42  * packet to him anyway.
43  *
44  * gateway	0               0x1000b80       UP, RTF_GATEWAY
45  */
46 
47 #define IF_SEND(ifp, mp, rt, retval) \
48 {\
49     static struct sockaddr_in tmproute = {AF_INET};                           \
50 \
51     if (! ((ifp)->if_flags & IFF_UP)){					      \
52 	/* goes with PRC_IFDOWN in in.c */				      \
53 	m_freem(mp);							      \
54 	retval = ENETUNREACH;						      \
55     } else if ((rt)->rt_flags & (RTF_GATEWAY|RTF_HOST))                       \
56 	retval = (*(ifp)->if_output)(ifp, mp, &(rt)->rt_gateway);             \
57     else {                                                                    \
58 	tmproute.sin_addr.s_addr = (mtod(mp, struct ip *))->ip_dst.s_addr;    \
59 	retval = (*(ifp)->if_output)(ifp, mp, (struct sockaddr *) &tmproute); \
60 }}
61 
62 if_send(ifp, mp, rt)
63 register struct ifnet *ifp;
64 register struct mbuf *mp;
65 register struct rtentry *rt;
66 {
67     int	retval;
68 
69     IF_SEND (ifp, mp, rt, retval);
70     return (retval);
71 }
72 
73 
74 /*
75  * Find a route to this destination.  Given the source and destination
76  * addresses, it returns a local net address
77  * to send to (either the address of the destination itself or a gateway).
78  * Taken mostly from rtalloc;  expanded to route according to
79  * both ends of the connection.
80  */
81 
82 
83 struct rtentry *ip_route(src, dst)
84 struct in_addr *src;
85 struct in_addr *dst;
86 {
87     register struct rtentry *rt;
88     register struct mbuf *m;
89     register unsigned hash;
90     net_t snet, dnet;
91     int doinghost;
92     struct rtentry *rtmin;
93     struct mbuf **table;
94     static struct in_addr wildcard;
95 
96     /* get network parts of src and dest addresses */
97 
98     snet = iptonet(*src);
99     dnet = iptonet(*dst);
100 
101     rtmin = NULL;
102     hash = HOSTHASH(dst->s_addr);
103     table = rthost;
104     doinghost = TRUE;
105 again :
106     for (m = table[hash % RTHASHSIZ]; m; m = m->m_next)
107     {
108 	rt = mtod(m, struct rtentry *);
109 	if (rt->rt_hash != hash)
110 	    continue;
111 	if (! (rt->rt_flags & RTF_UP))
112 	    continue;
113 	if (! (rt->rt_ifp->if_flags & IFF_UP))
114 	    continue;
115 	if (rt->rt_dst.sa_family != AF_INET)
116 	    continue;
117 
118 	/* packets go out an interface with our local IP address */
119 	if (iptonet(((struct sockaddr_in *)&(rt->rt_gateway))->sin_addr) != snet)
120 	    continue;
121 
122 	/* does this route get us there? */
123 	if (doinghost)
124 	{
125 	    if (((struct sockaddr_in *)&(rt->rt_dst))->sin_addr.s_addr !=
126 		dst->s_addr)
127 		continue;
128 	}
129 	else
130 	{
131 	    /*
132 	     * iptonet == 0 => smart gateway (route to anywhere)
133 	     * iptonet != 0 => gateway to another net (route to net)
134 	     */
135 	    if (iptonet(((struct sockaddr_in *)&(rt->rt_dst))->sin_addr) != dnet)
136 		continue;
137 	}
138 
139 	/* and try to share load across gateways */
140 	if (rtmin == NULL)
141 	    rtmin = rt;
142 	else if (rt->rt_use < rtmin->rt_use)
143 	    rtmin = rt;
144     }
145 
146     if (rtmin == NULL)
147     {
148 	if (doinghost)
149 	{
150 	    doinghost = FALSE;
151 	    hash = NETHASH(*dst), table = rtnet;
152 	    goto again;
153 	}
154 	/*
155 	 * Check for wildcard gateway, by convention network 0.
156 	 */
157 	if (dst != &wildcard)
158 	{
159 	    hash = 0;
160 	    dst = &wildcard;
161 	    dnet = 0;
162 	    goto again;
163 	}
164 	rtstat.rts_unreach++;
165 	return(NULL);
166     }
167 
168     rtmin->rt_refcnt++;
169     if (dst == &wildcard)
170 	rtstat.rts_wildcard++;
171     return(rtmin);
172 }
173 
174 
175 /*
176  * Ip_send is called from the higher protocol layer (TCP/RDP/UDP) and is passed
177  * an mbuf chain containing a packet to send to the local network.  The first
178  * mbuf contains the protocol header and an IP header which is partially
179  * filled in.  After determining a route (outgoing interface + first hop) for
180  * the packet, it is fragmented (if necessary) and sent to the local net
181  * through the local net send routine.
182  *
183  * For non-raw output, caller should have stuffed:
184  *	ip protocol type, type of service, source addr, destin addr
185  *
186  * ip_tos is left to caller so that people using raw sockets can do whatever
187  * they please.  (They don't have an inpcb in which to store such info.)
188  *
189  * The asis argument is TRUE for raw output and the gateway (packet forwarding)
190  * code.  It indicates that the IP header is fully constructed.
191  *
192  * Errors at the IP layer and below occur synchronously, and can be reported
193  * back via subroutine return values.  Higher level protocols should remember
194  * that if they do things asynchronous to a system call (ie., packet
195  * retransmission) that they should post error back to user via advise_user()
196  * so that user gets error next time he rendezvous with the kernel.
197  */
198 ip_send(inp, mp, len, asis)
199 struct inpcb *inp;
200 register struct mbuf *mp;
201 register int len;
202 int asis;
203 {
204     register struct ip *p;
205     register struct ifnet *ifp;
206     register struct rtentry *rt;
207     register int hlen;
208     int free_route = FALSE;
209     int retval;
210 
211     p = mtod(mp, struct ip *);	/* -> ip header */
212     /*
213      * Find route for datagram if one has not been assigned.
214      */
215     if ((rt = inp->inp_route.ro_rt) == NULL)
216     {
217 	if ((rt = ip_route(&p->ip_src, &p->ip_dst)) == NULL)
218 	{
219 	    if (asis || (p->ip_src.s_addr == INADDR_ANY))
220 	    {
221 		/*
222 		 * asis: forwarding a packet not sourced by us
223 		 *      eg., by raw interface and user level repeater process
224 		 * INADDR_ANY: sending icmp packet for which
225 		 *      we're trying to avoid routing twice.
226 		 */
227 		struct route tmproute;
228 		struct sockaddr_in *sin;
229 
230 		bzero ((caddr_t) &tmproute, sizeof(tmproute));
231 		sin = (struct sockaddr_in *) &tmproute.ro_dst;
232 		sin->sin_family = AF_INET;
233 		sin->sin_addr.s_addr = p->ip_dst.s_addr;
234 		rtalloc (&tmproute);
235 		rt = tmproute.ro_rt;
236 
237 		if (rt && (p->ip_src.s_addr == INADDR_ANY))
238 		    p->ip_src = IA_INADDR(in_iafromif(rt->rt_ifp));
239 	    }
240 
241 	    if (rt == NULL)
242 	    {
243 		m_freem(mp);
244 		return(ENETUNREACH);
245 	    }
246 	}
247 	free_route = TRUE;
248     }
249     ifp = rt->rt_ifp;
250 
251     /*
252      * Copy ip source route to header.  Know asis must be FALSE, if do.
253      */
254     if (inp->inp_optlen > 0)
255     {
256 	char	*q;
257 
258 	if (mp->m_off - inp->inp_optlen >= MMINOFF)
259 	{
260 	    struct in_addr *ipa;
261 
262 	    mp->m_off -= inp->inp_optlen;
263 	    mp->m_len += inp->inp_optlen;
264 	    q = (char *) p;
265 	    p = (struct ip *) (q - inp->inp_optlen);
266 	    bcopy(q, (caddr_t)p, sizeof(struct ip));
267 	    bcopy(inp->inp_options, (caddr_t)(p+1), (unsigned)inp->inp_optlen);
268 	    /*
269 	     * And replate eventual destination with first hop.
270 	     * Eventual destination is in source route just
271 	     * copied in.
272 	     */
273 	    ipa = (struct in_addr *) (&inp->inp_options[0]);
274 	    p->ip_dst = ipa[inp->inp_optlen/sizeof(struct in_addr)];
275 	}
276 	else
277 	    log(LOG_INFO, "ip_send: optlen %d inpcb 0x%x\n",
278 		(int)inp->inp_optlen, inp);
279     }
280 
281     /*
282      * fill in ip header fields
283      */
284     if (asis)
285     {
286 	/*
287 	 * RAW OUTPUT.  Must get len, hlen, off from packet header.
288 	 * Byte swap is ugly (since we must swap back below), but
289 	 * necessary in case we must fragment.
290 	 */
291 	hlen = p->ip_hl << IP_HLSHIFT;
292 	len = ntohs(p->ip_len);
293 	p->ip_off = ntohs(p->ip_off);
294     }
295     else
296     {
297 	static u_short next_ip_id; /* some day RDP may want to force for rxmit */
298 
299 	hlen = sizeof(struct ip) + inp->inp_optlen;
300 	len += hlen;
301 	p->ip_v = IPVERSION;
302 	p->ip_hl = hlen >> IP_HLSHIFT;
303 	p->ip_off = 0;
304 	p->ip_ttl = MAXTTL; /* ### should come from route */
305 	p->ip_id = htons(next_ip_id++);
306     }
307 
308     /*
309      * let ip_frag do the send if needed, otherwise do it directly.
310      */
311 
312     /* for testing IP reassembly code */
313 #ifdef FORCE_FRAG
314 #define MTU(ifp) (((ifp)->if_mtu >> FORCE_FRAG) & (~3))
315 #else
316 #define MTU(ifp) (ifp)->if_mtu
317 #endif
318 
319     if (len > MTU(ifp))
320     {
321 	p->ip_len = len;
322 	retval = ip_frag(p, ifp, rt, hlen);
323     }
324     else
325     {
326 	/*
327 	 * complete header, byte swap, and send to local net
328 	 */
329 	p->ip_len = htons((u_short)len);
330 	p->ip_off = htons(p->ip_off);
331 	/*
332 	 * No reason not to have kernel checksum, even for raw packets.
333 	 */
334 	p->ip_sum = 0;
335 	p->ip_sum = in_cksum(dtom(p), hlen);
336 	IF_SEND (ifp, mp, rt, retval);
337     }
338 
339     rt->rt_use ++;	/* Yet another IP packet sent away */
340 
341     if (free_route)
342     {
343 	struct socket *so;
344 
345 	if ((so = inp->inp_socket) &&
346 	    (so->so_proto->pr_flags & PR_CONNREQUIRED))
347 	    /*
348 	     * Found a new route after old one pinged out.
349 	     */
350 	    inp->inp_route.ro_rt = rt;
351 	else
352 	    rtfree(rt);
353     }
354 
355     return(retval);
356 }
357 
358 /*
359  * Ip_frag is called with a packet with a completed ip header
360  * (except for checksum).  It fragments the packet, inserts the
361  * IP checksum, and calls the appropriate local net output routine
362  * to send it to the net.
363  *
364  * Previously, when there was only one kind of mbuf, it tried to
365  * reduce space requirements by recycling the chain to be fragmented.
366  * Preserving this approach is overly complicated, and should mbufs
367  * change again, cause problems.  Therefore, have switched to copying
368  * the chain to be fragmented.
369  */
370 ip_frag(p, ifp, rt, hlen)
371 register struct ip *p;
372 struct ifnet *ifp;
373 struct rtentry *rt;
374 register int hlen;
375 {
376     register struct mbuf *m;	/* original chunk */
377     register struct mbuf *mhdr;	/* fragment */
378     register struct ip *fip;	/* the fragment IP header */
379     int off;	/* offset into entire IP datagram */
380     int here;	/* offset into this chunk of it */
381     register int len;	/* length of data in this chunk */
382     int flags;	/* of this chunk to fragment */
383     int max;	/* max data length in a fragment */
384     int fdlen;	/* actual fragment data length */
385     int error;
386 
387     m = dtom(p);
388 
389     if (p->ip_off & ip_df)
390     {	/* can't fragment */
391 	m_freem(m);
392 	return(EMSGSIZE);
393     }
394     max = MTU(ifp) - hlen;	/* max data length in frag */
395     len = p->ip_len - hlen;	/* data length */
396 
397     /*
398      * this only needs to be this complicated if we are handed
399      * an already-fragmented packet
400      */
401     flags	= p->ip_off&(ip_mf|ip_df);	/* save old flags */
402     p->ip_off &= ~flags;	/* take them out of ip_off */
403     off	= p->ip_off << IP_OFFSHIFT;	/* fragment offset */
404     here	= hlen;
405     error	= 0;
406 
407     while (len > 0)
408     {
409 	/*
410 	 * Allocate mbuf for fragment IP header
411 	 */
412 	mhdr = m_get(M_DONTWAIT, MT_HEADER);
413 	if (mhdr == NULL)
414 	{
415 	    error = ENOBUFS;
416 	    break;
417 	}
418 	/*
419 	 * get copy of data for fragment
420 	 */
421 	if (len < max)
422 	    fdlen = len;
423 	else
424 	    fdlen = max & (~7); /* 7 == 2^IP_OFFSHIFT -1 */
425 	mhdr->m_next = m_copy(m, here, fdlen);
426 	if (mhdr->m_next == NULL)
427 	{
428 	    m_free(mhdr);
429 	    error = ENOBUFS;
430 	    break;
431 	}
432 	/*
433 	 * build the header for this fragment and ship it off.
434 	 */
435 	mhdr->m_len = hlen;
436 	mhdr->m_off = MMAXOFF - hlen;
437 	fip = mtod(mhdr, struct ip *);
438 	bcopy((caddr_t)p, (caddr_t)fip, (unsigned)hlen);
439 	fip->ip_off = off >> IP_OFFSHIFT;
440 	if (fdlen >= len)
441 	    /* it's the last fragment */
442 	    fip->ip_off |= flags;
443 	else
444 	    fip->ip_off |= ip_mf;
445 	fip->ip_off = htons((u_short)fip->ip_off);
446 	fip->ip_len = htons((u_short)fdlen + hlen);
447 	fip->ip_sum = 0;
448 	fip->ip_sum = in_cksum(mhdr, hlen);
449 	if (error = if_send (ifp, mhdr, rt))
450 	    break;
451 
452 	/*
453 	 * and get ready for next pass through the loop
454 	 */
455 	len	-= fdlen;
456 	off	+= fdlen;
457 	here	+= fdlen;
458     }
459 
460     m_freem(m);
461     return (error);
462 }
463 
464 /*
465  * Current connection should use a new path.
466  */
467 struct rtentry *ip_reroute(inp)
468 register struct inpcb *inp;
469 {
470     register struct route *ro = &inp->inp_route;
471 
472     rtfree(ro->ro_rt);
473     return(ro->ro_rt = ip_route(&inp->inp_laddr, &inp->inp_faddr));
474 }
475 
476 /*
477  * A gateway has gone down.  Change route used by all connections currently
478  * using it.
479  */
480 ip_gdown(addr)
481 u_long	addr;
482 {
483     register struct protosw *psw;
484 
485     for(psw=inetdomain.dom_protosw; psw < inetdomain.dom_protoswNPROTOSW; psw++)
486 	if (psw->pr_type != SOCK_RAW)
487 		if (psw->pr_ctlinput)
488 			(*(psw->pr_ctlinput)) (PRC_GWDOWN, addr);
489 }
490 
491 /*
492  * Called from protocol ctlinput routine.  This way, IP/ICMP don't need to know
493  * about protocol's head of inpcbs... for all the protocols.
494  */
495 in_gdown (head, addr)
496 register struct inpcb *head;
497 u_long addr;
498 {
499     register struct inpcb   *inp;
500     register struct rtentry *rt;
501 
502     if (head == NULL)
503 	return;
504 
505     for(inp = head->inp_next; inp != head; inp = inp->inp_next)
506     {
507 	if (rt = inp->inp_route.ro_rt)
508 	{
509 	    if (rt->rt_flags & RTF_GATEWAY)
510 	    {
511 		if (((struct sockaddr_in *) &rt->rt_gateway)->sin_addr.s_addr == addr)
512 		{
513 			/*
514 			 * Don't remove route permanently, since want to catch
515 			 * the gateway when it reboots:
516 			 *      -- rtrequest (SIOCDELRT, rt) --
517 			 *
518 			 * make sure rtfree() not remove route mbuf
519 			 * incrementing reference count here, and decrementing
520 			 * when timeout on reinstatement goes off.  Cannot call
521 			 * rtfree with zero reference count when have not done
522 			 * SIOCDELRT.
523 			 */
524 			if (rt->rt_flags & RTF_UP)
525 			{
526 			    rt->rt_flags &= ~RTF_UP;
527 			    rt->rt_flags |= RTF_REINSTATE;
528 			    rt->irt_gdown = RT_REINSTATE;
529 			    rt->rt_refcnt ++;
530 			}
531 
532 			if (!ip_reroute(inp))
533 			    advise_user(inp->inp_socket, ENETUNREACH);
534 
535 		}
536 	    }
537 	}
538     }
539 }
540