1 /* 2 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $DragonFly: src/sys/netinet/ip_demux.c,v 1.39 2008/03/29 04:45:47 sephe Exp $ 34 */ 35 36 #include "opt_inet.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/thread.h> 44 #include <sys/sysctl.h> 45 #include <sys/globaldata.h> 46 47 #include <net/if.h> 48 #include <net/netisr.h> 49 50 #include <netinet/in_systm.h> 51 #include <netinet/in.h> 52 #include <netinet/in_var.h> 53 #include <netinet/in_pcb.h> 54 #include <netinet/ip.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/tcp.h> 57 #include <netinet/tcpip.h> 58 #include <netinet/tcp_var.h> 59 #include <netinet/udp.h> 60 #include <netinet/udp_var.h> 61 62 extern struct thread netisr_cpu[]; 63 64 static struct thread tcp_thread[MAXCPU]; 65 static struct thread udp_thread[MAXCPU]; 66 67 static int udp_mpsafe_thread = 0; 68 TUNABLE_INT("net.inet.udp.mpsafe_thread", &udp_mpsafe_thread); 69 70 static __inline int 71 INP_MPORT_HASH(in_addr_t faddr, in_addr_t laddr, 72 in_port_t fport, in_port_t lport) 73 { 74 /* 75 * Use low order bytes. 76 */ 77 78 #if (BYTE_ORDER == LITTLE_ENDIAN) 79 KASSERT(ncpus2 < 256, ("need different hash function")); /* XXX JH */ 80 return (((faddr >> 24) ^ (fport >> 8) ^ (laddr >> 24) ^ (lport >> 8)) & 81 ncpus2_mask); 82 #else 83 return ((faddr ^ fport ^ laddr ^ lport) & ncpus2_mask); 84 #endif 85 } 86 87 boolean_t 88 ip_lengthcheck(struct mbuf **mp) 89 { 90 struct mbuf *m = *mp; 91 struct ip *ip; 92 int iphlen, iplen; 93 struct tcphdr *th; 94 int thoff; /* TCP data offset */ 95 96 /* The packet must be at least the size of an IP header. */ 97 if (m->m_pkthdr.len < sizeof(struct ip)) { 98 ipstat.ips_tooshort++; 99 goto fail; 100 } 101 102 /* The fixed IP header must reside completely in the first mbuf. */ 103 if (m->m_len < sizeof(struct ip)) { 104 m = m_pullup(m, sizeof(struct ip)); 105 if (m == NULL) { 106 ipstat.ips_toosmall++; 107 goto fail; 108 } 109 } 110 111 ip = mtod(m, struct ip *); 112 113 /* Bound check the packet's stated IP header length. */ 114 iphlen = ip->ip_hl << 2; 115 if (iphlen < sizeof(struct ip)) { /* minimum header length */ 116 ipstat.ips_badhlen++; 117 goto fail; 118 } 119 120 /* The full IP header must reside completely in the one mbuf. */ 121 if (m->m_len < iphlen) { 122 m = m_pullup(m, iphlen); 123 if (m == NULL) { 124 ipstat.ips_badhlen++; 125 goto fail; 126 } 127 ip = mtod(m, struct ip *); 128 } 129 130 iplen = ntohs(ip->ip_len); 131 132 /* 133 * Fragments other than the first fragment don't have much 134 * length information. 135 */ 136 if (ntohs(ip->ip_off) & IP_OFFMASK) 137 goto ipcheckonly; 138 139 /* 140 * The TCP/IP or UDP/IP header must be entirely contained within 141 * the first fragment of a packet. Packet filters will break if they 142 * aren't. 143 * 144 * Since the packet will be trimmed to ip_len we must also make sure 145 * the potentially trimmed down length is still sufficient to hold 146 * the header(s). 147 */ 148 switch (ip->ip_p) { 149 case IPPROTO_TCP: 150 if (iplen < iphlen + sizeof(struct tcphdr)) { 151 ++tcpstat.tcps_rcvshort; 152 goto fail; 153 } 154 if (m->m_len < iphlen + sizeof(struct tcphdr)) { 155 m = m_pullup(m, iphlen + sizeof(struct tcphdr)); 156 if (m == NULL) { 157 tcpstat.tcps_rcvshort++; 158 goto fail; 159 } 160 ip = mtod(m, struct ip *); 161 } 162 th = (struct tcphdr *)((caddr_t)ip + iphlen); 163 thoff = th->th_off << 2; 164 if (thoff < sizeof(struct tcphdr) || 165 thoff + iphlen > ntohs(ip->ip_len)) { 166 tcpstat.tcps_rcvbadoff++; 167 goto fail; 168 } 169 if (m->m_len < iphlen + thoff) { 170 m = m_pullup(m, iphlen + thoff); 171 if (m == NULL) { 172 tcpstat.tcps_rcvshort++; 173 goto fail; 174 } 175 } 176 break; 177 case IPPROTO_UDP: 178 if (iplen < iphlen + sizeof(struct udphdr)) { 179 ++udpstat.udps_hdrops; 180 goto fail; 181 } 182 if (m->m_len < iphlen + sizeof(struct udphdr)) { 183 m = m_pullup(m, iphlen + sizeof(struct udphdr)); 184 if (m == NULL) { 185 udpstat.udps_hdrops++; 186 goto fail; 187 } 188 } 189 break; 190 default: 191 ipcheckonly: 192 if (iplen < iphlen) { 193 ++ipstat.ips_badlen; 194 goto fail; 195 } 196 break; 197 } 198 199 *mp = m; 200 return TRUE; 201 202 fail: 203 if (m != NULL) 204 m_freem(m); 205 *mp = NULL; 206 return FALSE; 207 } 208 209 /* 210 * Map a packet to a protocol processing thread and return the thread's port. 211 * If an error occurs, the passed mbuf will be freed, *mptr will be set 212 * to NULL, and NULL will be returned. If no error occurs, the passed mbuf 213 * may be modified and a port pointer will be returned. 214 */ 215 lwkt_port_t 216 ip_mport(struct mbuf **mptr, int dir) 217 { 218 struct ip *ip; 219 int iphlen; 220 struct tcphdr *th; 221 struct udphdr *uh; 222 struct mbuf *m; 223 int thoff; /* TCP data offset */ 224 lwkt_port_t port; 225 int cpu; 226 227 if (!ip_lengthcheck(mptr)) 228 return (NULL); 229 230 m = *mptr; 231 ip = mtod(m, struct ip *); 232 iphlen = ip->ip_hl << 2; 233 234 /* 235 * XXX generic packet handling defrag on CPU 0 for now. 236 */ 237 if (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) 238 return (&netisr_cpu[0].td_msgport); 239 240 switch (ip->ip_p) { 241 case IPPROTO_TCP: 242 th = (struct tcphdr *)((caddr_t)ip + iphlen); 243 thoff = th->th_off << 2; 244 cpu = INP_MPORT_HASH(ip->ip_src.s_addr, ip->ip_dst.s_addr, 245 th->th_sport, th->th_dport); 246 port = &tcp_thread[cpu].td_msgport; 247 break; 248 case IPPROTO_UDP: 249 uh = (struct udphdr *)((caddr_t)ip + iphlen); 250 251 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 252 (dir == IP_MPORT_IN && 253 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))) { 254 cpu = 0; 255 } else { 256 cpu = INP_MPORT_HASH(ip->ip_src.s_addr, 257 ip->ip_dst.s_addr, uh->uh_sport, uh->uh_dport); 258 } 259 port = &udp_thread[cpu].td_msgport; 260 break; 261 default: 262 port = &netisr_cpu[0].td_msgport; 263 break; 264 } 265 266 return (port); 267 } 268 269 lwkt_port_t 270 ip_mport_in(struct mbuf **mptr) 271 { 272 return ip_mport(mptr, IP_MPORT_IN); 273 } 274 275 /* 276 * Map a TCP socket to a protocol processing thread. 277 */ 278 lwkt_port_t 279 tcp_soport(struct socket *so, struct sockaddr *nam __unused, 280 struct mbuf **dummy __unused, int req) 281 { 282 struct inpcb *inp; 283 284 /* The following processing all take place on Protocol Thread 0. */ 285 if (req == PRU_BIND || req == PRU_CONNECT || req == PRU_ATTACH || 286 req == PRU_LISTEN) 287 return (&tcp_thread[0].td_msgport); 288 289 inp = so->so_pcb; 290 if (!inp) /* connection reset by peer */ 291 return (&tcp_thread[0].td_msgport); 292 293 /* 294 * Already bound and connected or listening. For TCP connections, 295 * the (faddr, fport, laddr, lport) association cannot change now. 296 * 297 * Note: T/TCP code needs some reorganization to fit into 298 * this model. XXX JH 299 * 300 * Rely on type-stable memory and check in protocol handler 301 * to fix race condition here w/ deallocation of inp. XXX JH 302 */ 303 return (&tcp_thread[INP_MPORT_HASH(inp->inp_faddr.s_addr, 304 inp->inp_laddr.s_addr, inp->inp_fport, inp->inp_lport)].td_msgport); 305 } 306 307 lwkt_port_t 308 tcp_addrport(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) 309 { 310 return (&tcp_thread[tcp_addrcpu(faddr, fport, 311 laddr, lport)].td_msgport); 312 } 313 314 /* 315 * Map a UDP socket to a protocol processing thread. 316 */ 317 lwkt_port_t 318 udp_soport(struct socket *so, struct sockaddr *nam __unused, 319 struct mbuf **dummy __unused, int req) 320 { 321 struct inpcb *inp; 322 323 /* 324 * The following processing all take place on Protocol Thread 0: 325 * bind() 326 * attach() has a null socket parameter 327 * Fast and slow timeouts pass in null socket parameter 328 */ 329 if (req == PRU_BIND || so == NULL) 330 return (&udp_thread[0].td_msgport); 331 332 inp = so->so_pcb; 333 334 if (IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) 335 return (&udp_thread[0].td_msgport); 336 337 /* 338 * Rely on type-stable memory and check in protocol handler 339 * to fix race condition here w/ deallocation of inp. XXX JH 340 */ 341 342 return (&udp_thread[INP_MPORT_HASH(inp->inp_faddr.s_addr, 343 inp->inp_laddr.s_addr, inp->inp_fport, inp->inp_lport)].td_msgport); 344 } 345 346 /* 347 * Map a network address to a processor. 348 */ 349 int 350 tcp_addrcpu(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) 351 { 352 return (INP_MPORT_HASH(faddr, laddr, fport, lport)); 353 } 354 355 int 356 udp_addrcpu(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) 357 { 358 if (IN_MULTICAST(ntohl(laddr))) 359 return (0); 360 else 361 return (INP_MPORT_HASH(faddr, laddr, fport, lport)); 362 } 363 364 /* 365 * Return LWKT port for cpu. 366 */ 367 lwkt_port_t 368 tcp_cport(int cpu) 369 { 370 return (&tcp_thread[cpu].td_msgport); 371 } 372 373 void 374 tcp_thread_init(void) 375 { 376 int cpu; 377 378 for (cpu = 0; cpu < ncpus2; cpu++) { 379 lwkt_create(tcpmsg_service_loop, NULL, NULL, 380 &tcp_thread[cpu], 0, cpu, "tcp_thread %d", cpu); 381 netmsg_service_port_init(&tcp_thread[cpu].td_msgport); 382 } 383 } 384 385 void 386 udp_thread_init(void) 387 { 388 int cpu; 389 390 for (cpu = 0; cpu < ncpus2; cpu++) { 391 lwkt_create(udp_mpsafe_thread ? 392 netmsg_service_loop_mpsafe : netmsg_service_loop, 393 NULL, NULL, &udp_thread[cpu], 0, cpu, 394 "udp_thread %d", cpu); 395 netmsg_service_port_init(&udp_thread[cpu].td_msgport); 396 } 397 } 398