1 /* ip_input.c 1.59 82/12/14 */ 2 3 #include "../h/param.h" 4 #include "../h/systm.h" 5 #include "../h/mbuf.h" 6 #include "../h/domain.h" 7 #include "../h/protosw.h" 8 #include "../h/socket.h" 9 #include <errno.h> 10 #include <time.h> 11 #include "../h/kernel.h" 12 13 #include "../net/if.h" 14 #include "../net/route.h" 15 #include "../netinet/in.h" 16 #include "../netinet/in_pcb.h" 17 #include "../netinet/in_systm.h" 18 #include "../netinet/ip.h" 19 #include "../netinet/ip_var.h" 20 #include "../netinet/ip_icmp.h" 21 #include "../netinet/tcp.h" 22 23 u_char ip_protox[IPPROTO_MAX]; 24 int ipqmaxlen = IFQ_MAXLEN; 25 struct ifnet *ifinet; /* first inet interface */ 26 27 /* 28 * IP initialization: fill in IP protocol switch table. 29 * All protocols not implemented in kernel go to raw IP protocol handler. 30 */ 31 ip_init() 32 { 33 register struct protosw *pr; 34 register int i; 35 36 pr = pffindproto(PF_INET, IPPROTO_RAW); 37 if (pr == 0) 38 panic("ip_init"); 39 for (i = 0; i < IPPROTO_MAX; i++) 40 ip_protox[i] = pr - inetsw; 41 for (pr = inetdomain.dom_protosw; 42 pr <= inetdomain.dom_protoswNPROTOSW; pr++) 43 if (pr->pr_family == PF_INET && 44 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) 45 ip_protox[pr->pr_protocol] = pr - inetsw; 46 ipq.next = ipq.prev = &ipq; 47 ip_id = time.tv_sec & 0xffff; 48 ipintrq.ifq_maxlen = ipqmaxlen; 49 ifinet = if_ifwithaf(AF_INET); 50 } 51 52 u_char ipcksum = 1; 53 struct ip *ip_reass(); 54 struct sockaddr_in ipaddr = { AF_INET }; 55 56 /* 57 * Ip input routine. Checksum and byte swap header. If fragmented 58 * try to reassamble. If complete and fragment queue exists, discard. 59 * Process options. Pass to next level. 60 */ 61 ipintr() 62 { 63 register struct ip *ip; 64 register struct mbuf *m; 65 struct mbuf *m0; 66 register int i; 67 register struct ipq *fp; 68 int hlen, s; 69 70 next: 71 /* 72 * Get next datagram off input queue and get IP header 73 * in first mbuf. 74 */ 75 s = splimp(); 76 IF_DEQUEUE(&ipintrq, m); 77 splx(s); 78 if (m == 0) 79 return; 80 if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) && 81 (m = m_pullup(m, sizeof (struct ip))) == 0) 82 return; 83 ip = mtod(m, struct ip *); 84 if ((hlen = ip->ip_hl << 2) > m->m_len) { 85 if ((m = m_pullup(m, hlen)) == 0) 86 return; 87 ip = mtod(m, struct ip *); 88 } 89 if (ipcksum) 90 if (ip->ip_sum = in_cksum(m, hlen)) { 91 printf("ip_sum %x\n", ip->ip_sum); /* XXX */ 92 ipstat.ips_badsum++; 93 goto bad; 94 } 95 96 /* 97 * Convert fields to host representation. 98 */ 99 ip->ip_len = ntohs((u_short)ip->ip_len); 100 ip->ip_id = ntohs(ip->ip_id); 101 ip->ip_off = ntohs((u_short)ip->ip_off); 102 103 /* 104 * Check that the amount of data in the buffers 105 * is as at least much as the IP header would have us expect. 106 * Trim mbufs if longer than we expect. 107 * Drop packet if shorter than we expect. 108 */ 109 i = -ip->ip_len; 110 m0 = m; 111 for (;;) { 112 i += m->m_len; 113 if (m->m_next == 0) 114 break; 115 m = m->m_next; 116 } 117 if (i != 0) { 118 if (i < 0) { 119 ipstat.ips_tooshort++; 120 goto bad; 121 } 122 if (i <= m->m_len) 123 m->m_len -= i; 124 else 125 m_adj(m0, -i); 126 } 127 m = m0; 128 129 /* 130 * Process options and, if not destined for us, 131 * ship it on. ip_dooptions returns 1 when an 132 * error was detected (causing an icmp message 133 * to be sent). 134 */ 135 if (hlen > sizeof (struct ip) && ip_dooptions(ip)) 136 goto next; 137 138 /* 139 * Fast check on the first internet 140 * interface in the list. 141 */ 142 if (ifinet) { 143 struct sockaddr_in *sin; 144 145 sin = (struct sockaddr_in *)&ifinet->if_addr; 146 if (sin->sin_addr.s_addr == ip->ip_dst.s_addr) 147 goto ours; 148 sin = (struct sockaddr_in *)&ifinet->if_broadaddr; 149 if ((ifinet->if_flags & IFF_BROADCAST) && 150 sin->sin_addr.s_addr == ip->ip_dst.s_addr) 151 goto ours; 152 } 153 ipaddr.sin_addr = ip->ip_dst; 154 if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) { 155 ip_forward(ip); 156 goto next; 157 } 158 159 ours: 160 /* 161 * Look for queue of fragments 162 * of this datagram. 163 */ 164 for (fp = ipq.next; fp != &ipq; fp = fp->next) 165 if (ip->ip_id == fp->ipq_id && 166 ip->ip_src.s_addr == fp->ipq_src.s_addr && 167 ip->ip_dst.s_addr == fp->ipq_dst.s_addr && 168 ip->ip_p == fp->ipq_p) 169 goto found; 170 fp = 0; 171 found: 172 173 /* 174 * Adjust ip_len to not reflect header, 175 * set ip_mff if more fragments are expected, 176 * convert offset of this to bytes. 177 */ 178 ip->ip_len -= hlen; 179 ((struct ipasfrag *)ip)->ipf_mff = 0; 180 if (ip->ip_off & IP_MF) 181 ((struct ipasfrag *)ip)->ipf_mff = 1; 182 ip->ip_off <<= 3; 183 184 /* 185 * If datagram marked as having more fragments 186 * or if this is not the first fragment, 187 * attempt reassembly; if it succeeds, proceed. 188 */ 189 if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) { 190 ip = ip_reass((struct ipasfrag *)ip, fp); 191 if (ip == 0) 192 goto next; 193 hlen = ip->ip_hl << 2; 194 m = dtom(ip); 195 } else 196 if (fp) 197 (void) ip_freef(fp); 198 199 /* 200 * Switch out to protocol's input routine. 201 */ 202 (*inetsw[ip_protox[ip->ip_p]].pr_input)(m); 203 goto next; 204 bad: 205 m_freem(m); 206 goto next; 207 } 208 209 /* 210 * Take incoming datagram fragment and try to 211 * reassemble it into whole datagram. If a chain for 212 * reassembly of this datagram already exists, then it 213 * is given as fp; otherwise have to make a chain. 214 */ 215 struct ip * 216 ip_reass(ip, fp) 217 register struct ipasfrag *ip; 218 register struct ipq *fp; 219 { 220 register struct mbuf *m = dtom(ip); 221 register struct ipasfrag *q; 222 struct mbuf *t; 223 int hlen = ip->ip_hl << 2; 224 int i, next; 225 226 /* 227 * Presence of header sizes in mbufs 228 * would confuse code below. 229 */ 230 m->m_off += hlen; 231 m->m_len -= hlen; 232 233 /* 234 * If first fragment to arrive, create a reassembly queue. 235 */ 236 if (fp == 0) { 237 if ((t = m_get(M_WAIT, MT_FTABLE)) == NULL) 238 goto dropfrag; 239 fp = mtod(t, struct ipq *); 240 insque(fp, &ipq); 241 fp->ipq_ttl = IPFRAGTTL; 242 fp->ipq_p = ip->ip_p; 243 fp->ipq_id = ip->ip_id; 244 fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp; 245 fp->ipq_src = ((struct ip *)ip)->ip_src; 246 fp->ipq_dst = ((struct ip *)ip)->ip_dst; 247 q = (struct ipasfrag *)fp; 248 goto insert; 249 } 250 251 /* 252 * Find a segment which begins after this one does. 253 */ 254 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) 255 if (q->ip_off > ip->ip_off) 256 break; 257 258 /* 259 * If there is a preceding segment, it may provide some of 260 * our data already. If so, drop the data from the incoming 261 * segment. If it provides all of our data, drop us. 262 */ 263 if (q->ipf_prev != (struct ipasfrag *)fp) { 264 i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off; 265 if (i > 0) { 266 if (i >= ip->ip_len) 267 goto dropfrag; 268 m_adj(dtom(ip), i); 269 ip->ip_off += i; 270 ip->ip_len -= i; 271 } 272 } 273 274 /* 275 * While we overlap succeeding segments trim them or, 276 * if they are completely covered, dequeue them. 277 */ 278 while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) { 279 i = (ip->ip_off + ip->ip_len) - q->ip_off; 280 if (i < q->ip_len) { 281 q->ip_len -= i; 282 q->ip_off += i; 283 m_adj(dtom(q), i); 284 break; 285 } 286 q = q->ipf_next; 287 m_freem(dtom(q->ipf_prev)); 288 ip_deq(q->ipf_prev); 289 } 290 291 insert: 292 /* 293 * Stick new segment in its place; 294 * check for complete reassembly. 295 */ 296 ip_enq(ip, q->ipf_prev); 297 next = 0; 298 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) { 299 if (q->ip_off != next) 300 return (0); 301 next += q->ip_len; 302 } 303 if (q->ipf_prev->ipf_mff) 304 return (0); 305 306 /* 307 * Reassembly is complete; concatenate fragments. 308 */ 309 q = fp->ipq_next; 310 m = dtom(q); 311 t = m->m_next; 312 m->m_next = 0; 313 m_cat(m, t); 314 q = q->ipf_next; 315 while (q != (struct ipasfrag *)fp) { 316 t = dtom(q); 317 q = q->ipf_next; 318 m_cat(m, t); 319 } 320 321 /* 322 * Create header for new ip packet by 323 * modifying header of first packet; 324 * dequeue and discard fragment reassembly header. 325 * Make header visible. 326 */ 327 ip = fp->ipq_next; 328 ip->ip_len = next; 329 ((struct ip *)ip)->ip_src = fp->ipq_src; 330 ((struct ip *)ip)->ip_dst = fp->ipq_dst; 331 remque(fp); 332 (void) m_free(dtom(fp)); 333 m = dtom(ip); 334 m->m_len += sizeof (struct ipasfrag); 335 m->m_off -= sizeof (struct ipasfrag); 336 return ((struct ip *)ip); 337 338 dropfrag: 339 m_freem(m); 340 return (0); 341 } 342 343 /* 344 * Free a fragment reassembly header and all 345 * associated datagrams. 346 */ 347 struct ipq * 348 ip_freef(fp) 349 struct ipq *fp; 350 { 351 register struct ipasfrag *q; 352 struct mbuf *m; 353 354 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) 355 m_freem(dtom(q)); 356 m = dtom(fp); 357 fp = fp->next; 358 remque(fp->prev); 359 (void) m_free(m); 360 return (fp); 361 } 362 363 /* 364 * Put an ip fragment on a reassembly chain. 365 * Like insque, but pointers in middle of structure. 366 */ 367 ip_enq(p, prev) 368 register struct ipasfrag *p, *prev; 369 { 370 371 p->ipf_prev = prev; 372 p->ipf_next = prev->ipf_next; 373 prev->ipf_next->ipf_prev = p; 374 prev->ipf_next = p; 375 } 376 377 /* 378 * To ip_enq as remque is to insque. 379 */ 380 ip_deq(p) 381 register struct ipasfrag *p; 382 { 383 384 p->ipf_prev->ipf_next = p->ipf_next; 385 p->ipf_next->ipf_prev = p->ipf_prev; 386 } 387 388 /* 389 * IP timer processing; 390 * if a timer expires on a reassembly 391 * queue, discard it. 392 */ 393 ip_slowtimo() 394 { 395 register struct ipq *fp; 396 int s = splnet(); 397 398 fp = ipq.next; 399 if (fp == 0) { 400 splx(s); 401 return; 402 } 403 while (fp != &ipq) 404 if (--fp->ipq_ttl == 0) 405 fp = ip_freef(fp); 406 else 407 fp = fp->next; 408 splx(s); 409 } 410 411 /* 412 * Drain off all datagram fragments. 413 */ 414 ip_drain() 415 { 416 417 while (ipq.next != &ipq) 418 (void) ip_freef(ipq.next); 419 } 420 421 /* 422 * Do option processing on a datagram, 423 * possibly discarding it if bad options 424 * are encountered. 425 */ 426 ip_dooptions(ip) 427 struct ip *ip; 428 { 429 register u_char *cp; 430 int opt, optlen, cnt, code, type; 431 struct in_addr *sin; 432 register struct ip_timestamp *ipt; 433 register struct ifnet *ifp; 434 struct in_addr t; 435 436 cp = (u_char *)(ip + 1); 437 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 438 for (; cnt > 0; cnt -= optlen, cp += optlen) { 439 opt = cp[0]; 440 if (opt == IPOPT_EOL) 441 break; 442 if (opt == IPOPT_NOP) 443 optlen = 1; 444 else 445 optlen = cp[1]; 446 switch (opt) { 447 448 default: 449 break; 450 451 /* 452 * Source routing with record. 453 * Find interface with current destination address. 454 * If none on this machine then drop if strictly routed, 455 * or do nothing if loosely routed. 456 * Record interface address and bring up next address 457 * component. If strictly routed make sure next 458 * address on directly accessible net. 459 */ 460 case IPOPT_LSRR: 461 case IPOPT_SSRR: 462 if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1)) 463 break; 464 sin = (struct in_addr *)(cp + cp[2]); 465 ipaddr.sin_addr = *sin; 466 ifp = if_ifwithaddr((struct sockaddr *)&ipaddr); 467 type = ICMP_UNREACH, code = ICMP_UNREACH_SRCFAIL; 468 if (ifp == 0) { 469 if (opt == IPOPT_SSRR) 470 goto bad; 471 break; 472 } 473 t = ip->ip_dst; ip->ip_dst = *sin; *sin = t; 474 cp[2] += 4; 475 if (cp[2] > optlen - (sizeof (long) - 1)) 476 break; 477 ip->ip_dst = sin[1]; 478 if (opt == IPOPT_SSRR && 479 if_ifonnetof(in_netof(ip->ip_dst)) == 0) 480 goto bad; 481 break; 482 483 case IPOPT_TS: 484 code = cp - (u_char *)ip; 485 type = ICMP_PARAMPROB; 486 ipt = (struct ip_timestamp *)cp; 487 if (ipt->ipt_len < 5) 488 goto bad; 489 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) { 490 if (++ipt->ipt_oflw == 0) 491 goto bad; 492 break; 493 } 494 sin = (struct in_addr *)(cp+cp[2]); 495 switch (ipt->ipt_flg) { 496 497 case IPOPT_TS_TSONLY: 498 break; 499 500 case IPOPT_TS_TSANDADDR: 501 if (ipt->ipt_ptr + 8 > ipt->ipt_len) 502 goto bad; 503 if (ifinet == 0) 504 goto bad; /* ??? */ 505 *sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr; 506 break; 507 508 case IPOPT_TS_PRESPEC: 509 ipaddr.sin_addr = *sin; 510 if (!if_ifwithaddr((struct sockaddr *)&ipaddr)) 511 continue; 512 if (ipt->ipt_ptr + 8 > ipt->ipt_len) 513 goto bad; 514 ipt->ipt_ptr += 4; 515 break; 516 517 default: 518 goto bad; 519 } 520 *(n_time *)sin = iptime(); 521 ipt->ipt_ptr += 4; 522 } 523 } 524 return (0); 525 bad: 526 icmp_error(ip, type, code); 527 return (1); 528 } 529 530 /* 531 * Strip out IP options, at higher 532 * level protocol in the kernel. 533 * Second argument is buffer to which options 534 * will be moved, and return value is their length. 535 */ 536 ip_stripoptions(ip, mopt) 537 struct ip *ip; 538 struct mbuf *mopt; 539 { 540 register int i; 541 register struct mbuf *m; 542 int olen; 543 544 olen = (ip->ip_hl<<2) - sizeof (struct ip); 545 m = dtom(ip); 546 ip++; 547 if (mopt) { 548 mopt->m_len = olen; 549 mopt->m_off = MMINOFF; 550 bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen); 551 } 552 i = m->m_len - (sizeof (struct ip) + olen); 553 bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i); 554 m->m_len -= olen; 555 } 556 557 u_char inetctlerrmap[] = { 558 ECONNABORTED, ECONNABORTED, 0, 0, 559 0, 0, 560 EHOSTDOWN, EHOSTUNREACH, ENETUNREACH, EHOSTUNREACH, 561 ECONNREFUSED, ECONNREFUSED, EMSGSIZE, 0, 562 0, 0, 0, 0 563 }; 564 565 ip_ctlinput(cmd, arg) 566 int cmd; 567 caddr_t arg; 568 { 569 struct in_addr *in; 570 int tcp_abort(), udp_abort(); 571 extern struct inpcb tcb, udb; 572 573 if (cmd < 0 || cmd > PRC_NCMDS) 574 return; 575 if (inetctlerrmap[cmd] == 0) 576 return; /* XXX */ 577 if (cmd == PRC_IFDOWN) 578 in = &((struct sockaddr_in *)arg)->sin_addr; 579 else if (cmd == PRC_HOSTDEAD || cmd == PRC_HOSTUNREACH) 580 in = (struct in_addr *)arg; 581 else 582 in = &((struct icmp *)arg)->icmp_ip.ip_dst; 583 /* THIS IS VERY QUESTIONABLE, SHOULD HIT ALL PROTOCOLS */ 584 in_pcbnotify(&tcb, in, (int)inetctlerrmap[cmd], tcp_abort); 585 in_pcbnotify(&udb, in, (int)inetctlerrmap[cmd], udp_abort); 586 } 587 588 int ipprintfs = 0; 589 int ipforwarding = 1; 590 /* 591 * Forward a packet. If some error occurs return the sender 592 * and icmp packet. Note we can't always generate a meaningful 593 * icmp message because icmp doesn't have a large enough repetoire 594 * of codes and types. 595 */ 596 ip_forward(ip) 597 register struct ip *ip; 598 { 599 register int error, type, code; 600 struct mbuf *mopt, *mcopy; 601 602 if (ipprintfs) 603 printf("forward: src %x dst %x ttl %x\n", ip->ip_src, 604 ip->ip_dst, ip->ip_ttl); 605 if (ipforwarding == 0) { 606 /* can't tell difference between net and host */ 607 type = ICMP_UNREACH, code = ICMP_UNREACH_NET; 608 goto sendicmp; 609 } 610 if (ip->ip_ttl < IPTTLDEC) { 611 type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS; 612 goto sendicmp; 613 } 614 ip->ip_ttl -= IPTTLDEC; 615 mopt = m_get(M_DONTWAIT, MT_DATA); 616 if (mopt == 0) { 617 m_freem(dtom(ip)); 618 return; 619 } 620 621 /* 622 * Save at most 64 bytes of the packet in case 623 * we need to generate an ICMP message to the src. 624 */ 625 mcopy = m_copy(dtom(ip), 0, imin(ip->ip_len, 64)); 626 ip_stripoptions(ip, mopt); 627 628 /* last 0 here means no directed broadcast */ 629 if ((error = ip_output(dtom(ip), mopt, (struct route *)0, 0)) == 0) { 630 if (mcopy) 631 m_freem(mcopy); 632 return; 633 } 634 ip = mtod(mcopy, struct ip *); 635 type = ICMP_UNREACH, code = 0; /* need ``undefined'' */ 636 switch (error) { 637 638 case ENETUNREACH: 639 case ENETDOWN: 640 code = ICMP_UNREACH_NET; 641 break; 642 643 case EMSGSIZE: 644 code = ICMP_UNREACH_NEEDFRAG; 645 break; 646 647 case EPERM: 648 code = ICMP_UNREACH_PORT; 649 break; 650 651 case ENOBUFS: 652 type = ICMP_SOURCEQUENCH; 653 break; 654 655 case EHOSTDOWN: 656 case EHOSTUNREACH: 657 code = ICMP_UNREACH_HOST; 658 break; 659 } 660 sendicmp: 661 icmp_error(ip, type, code); 662 } 663