1 /* 2 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $DragonFly: src/sys/netinet/ip_demux.c,v 1.45 2008/11/11 10:46:58 sephe Exp $ 34 */ 35 36 #include "opt_inet.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/thread.h> 44 #include <sys/sysctl.h> 45 #include <sys/globaldata.h> 46 47 #include <net/if.h> 48 #include <net/netisr.h> 49 50 #include <netinet/in_systm.h> 51 #include <netinet/in.h> 52 #include <netinet/in_var.h> 53 #include <netinet/in_pcb.h> 54 #include <netinet/ip.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/tcp.h> 57 #include <netinet/tcpip.h> 58 #include <netinet/tcp_var.h> 59 #include <netinet/udp.h> 60 #include <netinet/udp_var.h> 61 62 extern struct thread netisr_cpu[]; 63 extern int udp_mpsafe_thread; 64 65 static struct thread tcp_thread[MAXCPU]; 66 static struct thread udp_thread[MAXCPU]; 67 68 static __inline int 69 INP_MPORT_HASH(in_addr_t faddr, in_addr_t laddr, 70 in_port_t fport, in_port_t lport) 71 { 72 /* 73 * Use low order bytes. 74 */ 75 76 #if (BYTE_ORDER == LITTLE_ENDIAN) 77 KASSERT(ncpus2 < 256, ("need different hash function")); /* XXX JH */ 78 return (((faddr >> 24) ^ (fport >> 8) ^ (laddr >> 24) ^ (lport >> 8)) & 79 ncpus2_mask); 80 #else 81 return ((faddr ^ fport ^ laddr ^ lport) & ncpus2_mask); 82 #endif 83 } 84 85 /* 86 * If the packet is a valid IP datagram, upon returning of this function 87 * following things are promised: 88 * 89 * o IP header (including any possible IP options) is in one mbuf (m_len). 90 * o IP header length is not less than the minimum (sizeof(struct ip)). 91 * o IP total length is not less than IP header length. 92 * o IP datagram resides completely in the mbuf chain, 93 * i.e. pkthdr.len >= IP total length. 94 * 95 * If the packet is a UDP datagram, 96 * o IP header (including any possible IP options) and UDP header are in 97 * one mbuf (m_len). 98 * o IP total length is not less than (IP header length + UDP header length). 99 * 100 * If the packet is a TCP segment, 101 * o IP header (including any possible IP options) and TCP header (including 102 * any possible TCP options) are in one mbuf (m_len). 103 * o TCP header length is not less than the minimum (sizeof(struct tcphdr)). 104 * o IP total length is not less than (IP header length + TCP header length). 105 */ 106 boolean_t 107 ip_lengthcheck(struct mbuf **mp) 108 { 109 struct mbuf *m = *mp; 110 struct ip *ip; 111 int iphlen, iplen; 112 struct tcphdr *th; 113 int thoff; /* TCP data offset */ 114 115 /* The packet must be at least the size of an IP header. */ 116 if (m->m_pkthdr.len < sizeof(struct ip)) { 117 ipstat.ips_tooshort++; 118 goto fail; 119 } 120 121 /* The fixed IP header must reside completely in the first mbuf. */ 122 if (m->m_len < sizeof(struct ip)) { 123 m = m_pullup(m, sizeof(struct ip)); 124 if (m == NULL) { 125 ipstat.ips_toosmall++; 126 goto fail; 127 } 128 } 129 130 ip = mtod(m, struct ip *); 131 132 /* Bound check the packet's stated IP header length. */ 133 iphlen = ip->ip_hl << 2; 134 if (iphlen < sizeof(struct ip)) { /* minimum header length */ 135 ipstat.ips_badhlen++; 136 goto fail; 137 } 138 139 /* The full IP header must reside completely in the one mbuf. */ 140 if (m->m_len < iphlen) { 141 m = m_pullup(m, iphlen); 142 if (m == NULL) { 143 ipstat.ips_badhlen++; 144 goto fail; 145 } 146 ip = mtod(m, struct ip *); 147 } 148 149 iplen = ntohs(ip->ip_len); 150 151 /* 152 * Check that the amount of data in the buffers is as 153 * at least much as the IP header would have us expect. 154 */ 155 if (m->m_pkthdr.len < iplen) { 156 ipstat.ips_tooshort++; 157 goto fail; 158 } 159 160 /* 161 * Fragments other than the first fragment don't have much 162 * length information. 163 */ 164 if (ntohs(ip->ip_off) & IP_OFFMASK) 165 goto ipcheckonly; 166 167 /* 168 * The TCP/IP or UDP/IP header must be entirely contained within 169 * the first fragment of a packet. Packet filters will break if they 170 * aren't. 171 * 172 * Since the packet will be trimmed to ip_len we must also make sure 173 * the potentially trimmed down length is still sufficient to hold 174 * the header(s). 175 */ 176 switch (ip->ip_p) { 177 case IPPROTO_TCP: 178 if (iplen < iphlen + sizeof(struct tcphdr)) { 179 ++tcpstat.tcps_rcvshort; 180 goto fail; 181 } 182 if (m->m_len < iphlen + sizeof(struct tcphdr)) { 183 m = m_pullup(m, iphlen + sizeof(struct tcphdr)); 184 if (m == NULL) { 185 tcpstat.tcps_rcvshort++; 186 goto fail; 187 } 188 ip = mtod(m, struct ip *); 189 } 190 th = (struct tcphdr *)((caddr_t)ip + iphlen); 191 thoff = th->th_off << 2; 192 if (thoff < sizeof(struct tcphdr) || 193 thoff + iphlen > ntohs(ip->ip_len)) { 194 tcpstat.tcps_rcvbadoff++; 195 goto fail; 196 } 197 if (m->m_len < iphlen + thoff) { 198 m = m_pullup(m, iphlen + thoff); 199 if (m == NULL) { 200 tcpstat.tcps_rcvshort++; 201 goto fail; 202 } 203 } 204 break; 205 case IPPROTO_UDP: 206 if (iplen < iphlen + sizeof(struct udphdr)) { 207 ++udpstat.udps_hdrops; 208 goto fail; 209 } 210 if (m->m_len < iphlen + sizeof(struct udphdr)) { 211 m = m_pullup(m, iphlen + sizeof(struct udphdr)); 212 if (m == NULL) { 213 udpstat.udps_hdrops++; 214 goto fail; 215 } 216 } 217 break; 218 default: 219 ipcheckonly: 220 if (iplen < iphlen) { 221 ++ipstat.ips_badlen; 222 goto fail; 223 } 224 break; 225 } 226 227 *mp = m; 228 return TRUE; 229 230 fail: 231 if (m != NULL) 232 m_freem(m); 233 *mp = NULL; 234 return FALSE; 235 } 236 237 /* 238 * Map a packet to a protocol processing thread and return the thread's port. 239 * If an error occurs, the passed mbuf will be freed, *mptr will be set 240 * to NULL, and NULL will be returned. If no error occurs, the passed mbuf 241 * may be modified and a port pointer will be returned. 242 */ 243 lwkt_port_t 244 ip_mport(struct mbuf **mptr, int dir) 245 { 246 struct ip *ip; 247 int iphlen; 248 struct tcphdr *th; 249 struct udphdr *uh; 250 struct mbuf *m; 251 int thoff; /* TCP data offset */ 252 lwkt_port_t port; 253 int cpu; 254 255 if (!ip_lengthcheck(mptr)) 256 return (NULL); 257 258 m = *mptr; 259 ip = mtod(m, struct ip *); 260 iphlen = ip->ip_hl << 2; 261 262 /* 263 * XXX generic packet handling defrag on CPU 0 for now. 264 */ 265 if (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) 266 return (&netisr_cpu[0].td_msgport); 267 268 switch (ip->ip_p) { 269 case IPPROTO_TCP: 270 th = (struct tcphdr *)((caddr_t)ip + iphlen); 271 thoff = th->th_off << 2; 272 cpu = INP_MPORT_HASH(ip->ip_src.s_addr, ip->ip_dst.s_addr, 273 th->th_sport, th->th_dport); 274 port = &tcp_thread[cpu].td_msgport; 275 break; 276 case IPPROTO_UDP: 277 uh = (struct udphdr *)((caddr_t)ip + iphlen); 278 279 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 280 (dir == IP_MPORT_IN && 281 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))) { 282 cpu = 0; 283 } else { 284 cpu = INP_MPORT_HASH(ip->ip_src.s_addr, 285 ip->ip_dst.s_addr, uh->uh_sport, uh->uh_dport); 286 } 287 port = &udp_thread[cpu].td_msgport; 288 break; 289 default: 290 port = &netisr_cpu[0].td_msgport; 291 break; 292 } 293 294 return (port); 295 } 296 297 lwkt_port_t 298 ip_mport_in(struct mbuf **mptr) 299 { 300 return ip_mport(mptr, IP_MPORT_IN); 301 } 302 303 /* 304 * Map a TCP socket to a protocol processing thread. 305 */ 306 lwkt_port_t 307 tcp_soport(struct socket *so, struct sockaddr *nam __unused, 308 struct mbuf **dummy __unused, int req) 309 { 310 struct inpcb *inp; 311 312 /* The following processing all take place on Protocol Thread 0. */ 313 if (req == PRU_BIND || req == PRU_CONNECT || req == PRU_ATTACH || 314 req == PRU_LISTEN) 315 return (&tcp_thread[0].td_msgport); 316 317 inp = so->so_pcb; 318 if (!inp) /* connection reset by peer */ 319 return (&tcp_thread[0].td_msgport); 320 321 /* 322 * Already bound and connected or listening. For TCP connections, 323 * the (faddr, fport, laddr, lport) association cannot change now. 324 * 325 * Note: T/TCP code needs some reorganization to fit into 326 * this model. XXX JH 327 * 328 * Rely on type-stable memory and check in protocol handler 329 * to fix race condition here w/ deallocation of inp. XXX JH 330 */ 331 return (&tcp_thread[INP_MPORT_HASH(inp->inp_faddr.s_addr, 332 inp->inp_laddr.s_addr, inp->inp_fport, inp->inp_lport)].td_msgport); 333 } 334 335 /* 336 * Used to route icmp messages to the proper protocol thread for ctlinput 337 * operation. 338 */ 339 lwkt_port_t 340 tcp_ctlport(int cmd, struct sockaddr *sa, void *vip) 341 { 342 struct ip *ip = vip; 343 struct tcphdr *th; 344 struct in_addr faddr; 345 int cpu; 346 347 faddr = ((struct sockaddr_in *)sa)->sin_addr; 348 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 349 return(NULL); 350 if (ip == NULL || PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { 351 /* 352 * Message will be forwarded to all TCP protocol threads 353 * in following way: 354 * 355 * netisr0 (the msgport we return here) 356 * | 357 * | 358 * | domsg <----------------------------+ 359 * | | 360 * | | replymsg 361 * | | 362 * V forwardmsg forwardmsg | 363 * tcp0 ------------> tcp1 ------------> tcpN 364 */ 365 return cpu0_ctlport(cmd, sa, vip); 366 } else { 367 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 368 cpu = tcp_addrcpu(faddr.s_addr, th->th_dport, 369 ip->ip_src.s_addr, th->th_sport); 370 } 371 return(&tcp_thread[cpu].td_msgport); 372 } 373 374 lwkt_port_t 375 tcp_addrport(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) 376 { 377 return (&tcp_thread[tcp_addrcpu(faddr, fport, 378 laddr, lport)].td_msgport); 379 } 380 381 /* 382 * Map a UDP socket to a protocol processing thread. 383 */ 384 lwkt_port_t 385 udp_soport(struct socket *so, struct sockaddr *nam __unused, 386 struct mbuf **dummy __unused, int req) 387 { 388 struct inpcb *inp; 389 390 /* 391 * The following processing all take place on Protocol Thread 0: 392 * bind() 393 * attach() has a null socket parameter 394 * Fast and slow timeouts pass in null socket parameter 395 */ 396 if (req == PRU_BIND || so == NULL) 397 return (&udp_thread[0].td_msgport); 398 399 inp = so->so_pcb; 400 401 if (IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) 402 return (&udp_thread[0].td_msgport); 403 404 /* 405 * Rely on type-stable memory and check in protocol handler 406 * to fix race condition here w/ deallocation of inp. XXX JH 407 */ 408 409 return (&udp_thread[INP_MPORT_HASH(inp->inp_faddr.s_addr, 410 inp->inp_laddr.s_addr, inp->inp_fport, inp->inp_lport)].td_msgport); 411 } 412 413 /* 414 * Used to route icmp messages to the proper protocol thread for ctlinput 415 * operation. 416 */ 417 lwkt_port_t 418 udp_ctlport(int cmd, struct sockaddr *sa, void *vip) 419 { 420 struct ip *ip = vip; 421 struct udphdr *uh; 422 struct in_addr faddr; 423 int cpu; 424 425 faddr = ((struct sockaddr_in *)sa)->sin_addr; 426 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 427 return(NULL); 428 if (PRC_IS_REDIRECT(cmd)) { 429 /* 430 * See the comment in tcp_ctlport; the only difference 431 * is that message is forwarded to UDP protocol theads. 432 */ 433 return cpu0_ctlport(cmd, sa, vip); 434 } else if (ip == NULL || cmd == PRC_HOSTDEAD) { 435 /* 436 * XXX 437 * Once UDP inpcbs are CPU localized, we should do 438 * the same forwarding as PRC_IS_REDIRECT(cmd) 439 */ 440 cpu = 0; 441 } else { 442 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 443 444 cpu = INP_MPORT_HASH(faddr.s_addr, ip->ip_src.s_addr, 445 uh->uh_dport, uh->uh_sport); 446 } 447 return (&udp_thread[cpu].td_msgport); 448 } 449 450 /* 451 * Map a network address to a processor. 452 */ 453 int 454 tcp_addrcpu(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) 455 { 456 return (INP_MPORT_HASH(faddr, laddr, fport, lport)); 457 } 458 459 int 460 udp_addrcpu(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) 461 { 462 if (IN_MULTICAST(ntohl(laddr))) 463 return (0); 464 else 465 return (INP_MPORT_HASH(faddr, laddr, fport, lport)); 466 } 467 468 /* 469 * Return LWKT port for cpu. 470 */ 471 lwkt_port_t 472 tcp_cport(int cpu) 473 { 474 return (&tcp_thread[cpu].td_msgport); 475 } 476 477 lwkt_port_t 478 udp_cport(int cpu) 479 { 480 return (&udp_thread[cpu].td_msgport); 481 } 482 483 void 484 tcp_thread_init(void) 485 { 486 int cpu; 487 488 for (cpu = 0; cpu < ncpus2; cpu++) { 489 lwkt_create(tcpmsg_service_loop, NULL, NULL, 490 &tcp_thread[cpu], TDF_NETWORK | TDF_MPSAFE, cpu, 491 "tcp_thread %d", cpu); 492 netmsg_service_port_init(&tcp_thread[cpu].td_msgport); 493 } 494 } 495 496 void 497 udp_thread_init(void) 498 { 499 int cpu; 500 501 for (cpu = 0; cpu < ncpus2; cpu++) { 502 lwkt_create(netmsg_service_loop, &udp_mpsafe_thread, NULL, 503 &udp_thread[cpu], TDF_NETWORK | TDF_MPSAFE, cpu, 504 "udp_thread %d", cpu); 505 netmsg_service_port_init(&udp_thread[cpu].td_msgport); 506 } 507 } 508