1 #ifdef RCSIDENT 2 static char rcsident[] = "$Header: ip_output.c,v 1.28 85/07/31 09:32:09 walsh Exp $"; 3 #endif 4 5 #include "../h/param.h" 6 #include "../h/dir.h" 7 #include "../h/user.h" 8 #include "../h/mbuf.h" 9 #include "../h/socket.h" 10 #include "../h/socketvar.h" 11 #include "../h/protosw.h" 12 #include "../h/domain.h" 13 #include "../h/ioctl.h" 14 #include "../h/syslog.h" 15 16 #include "../net/if.h" 17 #include "../net/route.h" 18 19 #include "../bbnnet/in.h" 20 #include "../bbnnet/net.h" 21 #include "../bbnnet/in_pcb.h" 22 #include "../bbnnet/in_var.h" 23 #include "../bbnnet/ip.h" 24 #include "../bbnnet/icmp.h" 25 26 /* 27 * If you're going to a specific host or via a gateway, the routing 28 * entry gateway field holds the best way to get there. Otherwise, 29 * the routing entry tells you how to get onto that net -- it has 30 * the net address portion of our local host: 31 * 32 * On bbn-labs-b: 33 * 34 * rt_dst rt_gateway flags 35 * il0 => 0x00000b80 0x2010b80 UP 36 * imp0 => 0x00000008 0x2000708 UP 37 * loop => 0x0000007f 0x100007f UP 38 * 39 * So you can see that the rt_gateway is our local address, and the 40 * rt_dst may be the net number of the media. If it's a route 41 * to a net, the other guy is on this net and you want to route the 42 * packet to him anyway. 43 * 44 * gateway 0 0x1000b80 UP, RTF_GATEWAY 45 */ 46 47 #define IF_SEND(ifp, mp, rt, retval) \ 48 {\ 49 static struct sockaddr_in tmproute = {AF_INET}; \ 50 \ 51 if (! ((ifp)->if_flags & IFF_UP)){ \ 52 /* goes with PRC_IFDOWN in in.c */ \ 53 m_freem(mp); \ 54 retval = ENETUNREACH; \ 55 } else if ((rt)->rt_flags & (RTF_GATEWAY|RTF_HOST)) \ 56 retval = (*(ifp)->if_output)(ifp, mp, &(rt)->rt_gateway); \ 57 else { \ 58 tmproute.sin_addr.s_addr = (mtod(mp, struct ip *))->ip_dst.s_addr; \ 59 retval = (*(ifp)->if_output)(ifp, mp, (struct sockaddr *) &tmproute); \ 60 }} 61 62 if_send(ifp, mp, rt) 63 register struct ifnet *ifp; 64 register struct mbuf *mp; 65 register struct rtentry *rt; 66 { 67 int retval; 68 69 IF_SEND (ifp, mp, rt, retval); 70 return (retval); 71 } 72 73 74 /* 75 * Find a route to this destination. Given the source and destination 76 * addresses, it returns a local net address 77 * to send to (either the address of the destination itself or a gateway). 78 * Taken mostly from rtalloc; expanded to route according to 79 * both ends of the connection. 80 */ 81 82 83 struct rtentry *ip_route(src, dst) 84 struct in_addr *src; 85 struct in_addr *dst; 86 { 87 register struct rtentry *rt; 88 register struct mbuf *m; 89 register unsigned hash; 90 net_t snet, dnet; 91 int doinghost; 92 struct rtentry *rtmin; 93 struct mbuf **table; 94 static struct in_addr wildcard; 95 96 /* get network parts of src and dest addresses */ 97 98 snet = iptonet(*src); 99 dnet = iptonet(*dst); 100 101 rtmin = NULL; 102 hash = HOSTHASH(dst->s_addr); 103 table = rthost; 104 doinghost = TRUE; 105 again : 106 for (m = table[hash % RTHASHSIZ]; m; m = m->m_next) 107 { 108 rt = mtod(m, struct rtentry *); 109 if (rt->rt_hash != hash) 110 continue; 111 if (! (rt->rt_flags & RTF_UP)) 112 continue; 113 if (! (rt->rt_ifp->if_flags & IFF_UP)) 114 continue; 115 if (rt->rt_dst.sa_family != AF_INET) 116 continue; 117 118 /* packets go out an interface with our local IP address */ 119 if (iptonet(((struct sockaddr_in *)&(rt->rt_gateway))->sin_addr) != snet) 120 continue; 121 122 /* does this route get us there? */ 123 if (doinghost) 124 { 125 if (((struct sockaddr_in *)&(rt->rt_dst))->sin_addr.s_addr != 126 dst->s_addr) 127 continue; 128 } 129 else 130 { 131 /* 132 * iptonet == 0 => smart gateway (route to anywhere) 133 * iptonet != 0 => gateway to another net (route to net) 134 */ 135 if (iptonet(((struct sockaddr_in *)&(rt->rt_dst))->sin_addr) != dnet) 136 continue; 137 } 138 139 /* and try to share load across gateways */ 140 if (rtmin == NULL) 141 rtmin = rt; 142 else if (rt->rt_use < rtmin->rt_use) 143 rtmin = rt; 144 } 145 146 if (rtmin == NULL) 147 { 148 if (doinghost) 149 { 150 doinghost = FALSE; 151 hash = NETHASH(*dst), table = rtnet; 152 goto again; 153 } 154 /* 155 * Check for wildcard gateway, by convention network 0. 156 */ 157 if (dst != &wildcard) 158 { 159 hash = 0; 160 dst = &wildcard; 161 dnet = 0; 162 goto again; 163 } 164 rtstat.rts_unreach++; 165 return(NULL); 166 } 167 168 rtmin->rt_refcnt++; 169 if (dst == &wildcard) 170 rtstat.rts_wildcard++; 171 return(rtmin); 172 } 173 174 175 /* 176 * Ip_send is called from the higher protocol layer (TCP/RDP/UDP) and is passed 177 * an mbuf chain containing a packet to send to the local network. The first 178 * mbuf contains the protocol header and an IP header which is partially 179 * filled in. After determining a route (outgoing interface + first hop) for 180 * the packet, it is fragmented (if necessary) and sent to the local net 181 * through the local net send routine. 182 * 183 * For non-raw output, caller should have stuffed: 184 * ip protocol type, type of service, source addr, destin addr 185 * 186 * ip_tos is left to caller so that people using raw sockets can do whatever 187 * they please. (They don't have an inpcb in which to store such info.) 188 * 189 * The asis argument is TRUE for raw output and the gateway (packet forwarding) 190 * code. It indicates that the IP header is fully constructed. 191 * 192 * Errors at the IP layer and below occur synchronously, and can be reported 193 * back via subroutine return values. Higher level protocols should remember 194 * that if they do things asynchronous to a system call (ie., packet 195 * retransmission) that they should post error back to user via advise_user() 196 * so that user gets error next time he rendezvous with the kernel. 197 */ 198 ip_send(inp, mp, len, asis) 199 struct inpcb *inp; 200 register struct mbuf *mp; 201 register int len; 202 int asis; 203 { 204 register struct ip *p; 205 register struct ifnet *ifp; 206 register struct rtentry *rt; 207 register int hlen; 208 int free_route = FALSE; 209 int retval; 210 211 p = mtod(mp, struct ip *); /* -> ip header */ 212 /* 213 * Find route for datagram if one has not been assigned. 214 */ 215 if ((rt = inp->inp_route.ro_rt) == NULL) 216 { 217 if ((rt = ip_route(&p->ip_src, &p->ip_dst)) == NULL) 218 { 219 if (asis || (p->ip_src.s_addr == INADDR_ANY)) 220 { 221 /* 222 * asis: forwarding a packet not sourced by us 223 * eg., by raw interface and user level repeater process 224 * INADDR_ANY: sending icmp packet for which 225 * we're trying to avoid routing twice. 226 */ 227 struct route tmproute; 228 struct sockaddr_in *sin; 229 230 bzero ((caddr_t) &tmproute, sizeof(tmproute)); 231 sin = (struct sockaddr_in *) &tmproute.ro_dst; 232 sin->sin_family = AF_INET; 233 sin->sin_addr.s_addr = p->ip_dst.s_addr; 234 rtalloc (&tmproute); 235 rt = tmproute.ro_rt; 236 237 if (rt && (p->ip_src.s_addr == INADDR_ANY)) 238 p->ip_src = IA_INADDR(in_iafromif(rt->rt_ifp)); 239 } 240 241 if (rt == NULL) 242 { 243 m_freem(mp); 244 return(ENETUNREACH); 245 } 246 } 247 free_route = TRUE; 248 } 249 ifp = rt->rt_ifp; 250 251 /* 252 * Copy ip source route to header. Know asis must be FALSE, if do. 253 */ 254 if (inp->inp_optlen > 0) 255 { 256 char *q; 257 258 if (mp->m_off - inp->inp_optlen >= MMINOFF) 259 { 260 struct in_addr *ipa; 261 262 mp->m_off -= inp->inp_optlen; 263 mp->m_len += inp->inp_optlen; 264 q = (char *) p; 265 p = (struct ip *) (q - inp->inp_optlen); 266 bcopy(q, (caddr_t)p, sizeof(struct ip)); 267 bcopy(inp->inp_options, (caddr_t)(p+1), (unsigned)inp->inp_optlen); 268 /* 269 * And replate eventual destination with first hop. 270 * Eventual destination is in source route just 271 * copied in. 272 */ 273 ipa = (struct in_addr *) (&inp->inp_options[0]); 274 p->ip_dst = ipa[inp->inp_optlen/sizeof(struct in_addr)]; 275 } 276 else 277 log(LOG_INFO, "ip_send: optlen %d inpcb 0x%x\n", 278 (int)inp->inp_optlen, inp); 279 } 280 281 /* 282 * fill in ip header fields 283 */ 284 if (asis) 285 { 286 /* 287 * RAW OUTPUT. Must get len, hlen, off from packet header. 288 * Byte swap is ugly (since we must swap back below), but 289 * necessary in case we must fragment. 290 */ 291 hlen = p->ip_hl << IP_HLSHIFT; 292 len = ntohs(p->ip_len); 293 p->ip_off = ntohs(p->ip_off); 294 } 295 else 296 { 297 static u_short next_ip_id; /* some day RDP may want to force for rxmit */ 298 299 hlen = sizeof(struct ip) + inp->inp_optlen; 300 len += hlen; 301 p->ip_v = IPVERSION; 302 p->ip_hl = hlen >> IP_HLSHIFT; 303 p->ip_off = 0; 304 p->ip_ttl = MAXTTL; /* ### should come from route */ 305 p->ip_id = htons(next_ip_id++); 306 } 307 308 /* 309 * let ip_frag do the send if needed, otherwise do it directly. 310 */ 311 312 /* for testing IP reassembly code */ 313 #ifdef FORCE_FRAG 314 #define MTU(ifp) (((ifp)->if_mtu >> FORCE_FRAG) & (~3)) 315 #else 316 #define MTU(ifp) (ifp)->if_mtu 317 #endif 318 319 if (len > MTU(ifp)) 320 { 321 p->ip_len = len; 322 retval = ip_frag(p, ifp, rt, hlen); 323 } 324 else 325 { 326 /* 327 * complete header, byte swap, and send to local net 328 */ 329 p->ip_len = htons((u_short)len); 330 p->ip_off = htons(p->ip_off); 331 /* 332 * No reason not to have kernel checksum, even for raw packets. 333 */ 334 p->ip_sum = 0; 335 p->ip_sum = in_cksum(dtom(p), hlen); 336 IF_SEND (ifp, mp, rt, retval); 337 } 338 339 rt->rt_use ++; /* Yet another IP packet sent away */ 340 341 if (free_route) 342 { 343 struct socket *so; 344 345 if ((so = inp->inp_socket) && 346 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 347 /* 348 * Found a new route after old one pinged out. 349 */ 350 inp->inp_route.ro_rt = rt; 351 else 352 rtfree(rt); 353 } 354 355 return(retval); 356 } 357 358 /* 359 * Ip_frag is called with a packet with a completed ip header 360 * (except for checksum). It fragments the packet, inserts the 361 * IP checksum, and calls the appropriate local net output routine 362 * to send it to the net. 363 * 364 * Previously, when there was only one kind of mbuf, it tried to 365 * reduce space requirements by recycling the chain to be fragmented. 366 * Preserving this approach is overly complicated, and should mbufs 367 * change again, cause problems. Therefore, have switched to copying 368 * the chain to be fragmented. 369 */ 370 ip_frag(p, ifp, rt, hlen) 371 register struct ip *p; 372 struct ifnet *ifp; 373 struct rtentry *rt; 374 register int hlen; 375 { 376 register struct mbuf *m; /* original chunk */ 377 register struct mbuf *mhdr; /* fragment */ 378 register struct ip *fip; /* the fragment IP header */ 379 int off; /* offset into entire IP datagram */ 380 int here; /* offset into this chunk of it */ 381 register int len; /* length of data in this chunk */ 382 int flags; /* of this chunk to fragment */ 383 int max; /* max data length in a fragment */ 384 int fdlen; /* actual fragment data length */ 385 int error; 386 387 m = dtom(p); 388 389 if (p->ip_off & ip_df) 390 { /* can't fragment */ 391 m_freem(m); 392 return(EMSGSIZE); 393 } 394 max = MTU(ifp) - hlen; /* max data length in frag */ 395 len = p->ip_len - hlen; /* data length */ 396 397 /* 398 * this only needs to be this complicated if we are handed 399 * an already-fragmented packet 400 */ 401 flags = p->ip_off&(ip_mf|ip_df); /* save old flags */ 402 p->ip_off &= ~flags; /* take them out of ip_off */ 403 off = p->ip_off << IP_OFFSHIFT; /* fragment offset */ 404 here = hlen; 405 error = 0; 406 407 while (len > 0) 408 { 409 /* 410 * Allocate mbuf for fragment IP header 411 */ 412 mhdr = m_get(M_DONTWAIT, MT_HEADER); 413 if (mhdr == NULL) 414 { 415 error = ENOBUFS; 416 break; 417 } 418 /* 419 * get copy of data for fragment 420 */ 421 if (len < max) 422 fdlen = len; 423 else 424 fdlen = max & (~7); /* 7 == 2^IP_OFFSHIFT -1 */ 425 mhdr->m_next = m_copy(m, here, fdlen); 426 if (mhdr->m_next == NULL) 427 { 428 m_free(mhdr); 429 error = ENOBUFS; 430 break; 431 } 432 /* 433 * build the header for this fragment and ship it off. 434 */ 435 mhdr->m_len = hlen; 436 mhdr->m_off = MMAXOFF - hlen; 437 fip = mtod(mhdr, struct ip *); 438 bcopy((caddr_t)p, (caddr_t)fip, (unsigned)hlen); 439 fip->ip_off = off >> IP_OFFSHIFT; 440 if (fdlen >= len) 441 /* it's the last fragment */ 442 fip->ip_off |= flags; 443 else 444 fip->ip_off |= ip_mf; 445 fip->ip_off = htons((u_short)fip->ip_off); 446 fip->ip_len = htons((u_short)fdlen + hlen); 447 fip->ip_sum = 0; 448 fip->ip_sum = in_cksum(mhdr, hlen); 449 if (error = if_send (ifp, mhdr, rt)) 450 break; 451 452 /* 453 * and get ready for next pass through the loop 454 */ 455 len -= fdlen; 456 off += fdlen; 457 here += fdlen; 458 } 459 460 m_freem(m); 461 return (error); 462 } 463 464 /* 465 * Current connection should use a new path. 466 */ 467 struct rtentry *ip_reroute(inp) 468 register struct inpcb *inp; 469 { 470 register struct route *ro = &inp->inp_route; 471 472 rtfree(ro->ro_rt); 473 return(ro->ro_rt = ip_route(&inp->inp_laddr, &inp->inp_faddr)); 474 } 475 476 /* 477 * A gateway has gone down. Change route used by all connections currently 478 * using it. 479 */ 480 ip_gdown(addr) 481 u_long addr; 482 { 483 register struct protosw *psw; 484 485 for(psw=inetdomain.dom_protosw; psw < inetdomain.dom_protoswNPROTOSW; psw++) 486 if (psw->pr_type != SOCK_RAW) 487 if (psw->pr_ctlinput) 488 (*(psw->pr_ctlinput)) (PRC_GWDOWN, addr); 489 } 490 491 /* 492 * Called from protocol ctlinput routine. This way, IP/ICMP don't need to know 493 * about protocol's head of inpcbs... for all the protocols. 494 */ 495 in_gdown (head, addr) 496 register struct inpcb *head; 497 u_long addr; 498 { 499 register struct inpcb *inp; 500 register struct rtentry *rt; 501 502 if (head == NULL) 503 return; 504 505 for(inp = head->inp_next; inp != head; inp = inp->inp_next) 506 { 507 if (rt = inp->inp_route.ro_rt) 508 { 509 if (rt->rt_flags & RTF_GATEWAY) 510 { 511 if (((struct sockaddr_in *) &rt->rt_gateway)->sin_addr.s_addr == addr) 512 { 513 /* 514 * Don't remove route permanently, since want to catch 515 * the gateway when it reboots: 516 * -- rtrequest (SIOCDELRT, rt) -- 517 * 518 * make sure rtfree() not remove route mbuf 519 * incrementing reference count here, and decrementing 520 * when timeout on reinstatement goes off. Cannot call 521 * rtfree with zero reference count when have not done 522 * SIOCDELRT. 523 */ 524 if (rt->rt_flags & RTF_UP) 525 { 526 rt->rt_flags &= ~RTF_UP; 527 rt->rt_flags |= RTF_REINSTATE; 528 rt->irt_gdown = RT_REINSTATE; 529 rt->rt_refcnt ++; 530 } 531 532 if (!ip_reroute(inp)) 533 advise_user(inp->inp_socket, ENETUNREACH); 534 535 } 536 } 537 } 538 } 539 } 540