1 /* 2 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $DragonFly: src/sys/netinet/ip_demux.c,v 1.26 2004/08/03 00:04:13 dillon Exp $ 34 */ 35 36 /* 37 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 38 * 39 * License terms: all terms for the DragonFly license above plus the following: 40 * 41 * 4. All advertising materials mentioning features or use of this software 42 * must display the following acknowledgement: 43 * 44 * This product includes software developed by Jeffrey M. Hsu 45 * for the DragonFly Project. 46 * 47 * This requirement may be waived with permission from Jeffrey Hsu. 48 * This requirement will sunset and may be removed on July 8 2005, 49 * after which the standard DragonFly license (as shown above) will 50 * apply. 51 */ 52 53 #include "opt_inet.h" 54 55 #include <sys/param.h> 56 #include <sys/systm.h> 57 #include <sys/kernel.h> 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/thread.h> 61 #include <sys/sysctl.h> 62 #include <sys/globaldata.h> 63 64 #include <net/if.h> 65 #include <net/netisr.h> 66 67 #include <netinet/in_systm.h> 68 #include <netinet/in.h> 69 #include <netinet/in_var.h> 70 #include <netinet/in_pcb.h> 71 #include <netinet/ip.h> 72 #include <netinet/ip_var.h> 73 #include <netinet/tcp.h> 74 #include <netinet/tcpip.h> 75 #include <netinet/tcp_var.h> 76 #include <netinet/udp.h> 77 #include <netinet/udp_var.h> 78 79 extern struct thread netisr_cpu[]; 80 81 static struct thread tcp_thread[MAXCPU]; 82 static struct thread udp_thread[MAXCPU]; 83 84 static __inline int 85 INP_MPORT_HASH(in_addr_t faddr, in_addr_t laddr, 86 in_port_t fport, in_port_t lport) 87 { 88 /* 89 * Use low order bytes. 90 */ 91 92 #if (BYTE_ORDER == LITTLE_ENDIAN) 93 KASSERT(ncpus2 < 256, ("need different hash function")); /* XXX JH */ 94 return (((faddr >> 24) ^ (fport >> 8) ^ (laddr >> 24) ^ (lport >> 8)) & 95 ncpus2_mask); 96 #else 97 return ((faddr ^ fport ^ laddr ^ lport) & ncpus2_mask); 98 #endif 99 } 100 101 /* 102 * Map a packet to a protocol processing thread and return the thread's port. 103 * If an error occurs, the passed mbuf will be freed, *mptr will be set 104 * to NULL, and NULL will be returned. If no error occurs, the passed mbuf 105 * may be modified and a port pointer will be returned. 106 */ 107 lwkt_port_t 108 ip_mport(struct mbuf **mptr) 109 { 110 struct ip *ip; 111 int iphlen; 112 struct tcphdr *th; 113 struct udphdr *uh; 114 struct mbuf *m = *mptr; 115 int thoff; /* TCP data offset */ 116 lwkt_port_t port; 117 int cpu; 118 119 /* 120 * The packet must be at least the size of an IP header 121 */ 122 if (m->m_pkthdr.len < sizeof(struct ip)) { 123 ipstat.ips_tooshort++; 124 m_freem(m); 125 *mptr = NULL; 126 return (NULL); 127 } 128 129 /* 130 * The first mbuf must entirely contain the IP header 131 */ 132 if (m->m_len < sizeof(struct ip) && 133 (m = m_pullup(m, sizeof(struct ip))) == NULL) { 134 ipstat.ips_toosmall++; 135 *mptr = NULL; 136 return (NULL); 137 } 138 ip = mtod(m, struct ip *); 139 140 /* 141 * Extract the actual IP header length and do a bounds check. The 142 * first mbuf must entirely contain the extended IP header. 143 */ 144 iphlen = ip->ip_hl << 2; 145 if (iphlen < sizeof(struct ip)) { /* minimum header length */ 146 ipstat.ips_badhlen++; 147 m_freem(m); 148 return (NULL); 149 } 150 if (m->m_len < iphlen) { 151 m = m_pullup(m, iphlen); 152 if (m == NULL) { 153 ipstat.ips_badhlen++; 154 *mptr = NULL; 155 return (NULL); 156 } 157 ip = mtod(m, struct ip *); 158 } 159 160 /* 161 * The TCP/IP or UDP/IP header must be entirely contained within 162 * the first fragment of a packet. Packet filters will break if they 163 * aren't. 164 */ 165 if ((ntohs(ip->ip_off) & IP_OFFMASK) == 0) { 166 switch (ip->ip_p) { 167 case IPPROTO_TCP: 168 if (m->m_len < iphlen + sizeof(struct tcphdr)) { 169 m = m_pullup(m, iphlen + sizeof(struct tcphdr)); 170 if (m == NULL) { 171 tcpstat.tcps_rcvshort++; 172 *mptr = NULL; 173 return (NULL); 174 } 175 ip = mtod(m, struct ip *); 176 } 177 break; 178 case IPPROTO_UDP: 179 if (m->m_len < iphlen + sizeof(struct udphdr)) { 180 m = m_pullup(m, iphlen + sizeof(struct udphdr)); 181 if (m == NULL) { 182 udpstat.udps_hdrops++; 183 *mptr = NULL; 184 return (NULL); 185 } 186 ip = mtod(m, struct ip *); 187 } 188 break; 189 default: 190 break; 191 } 192 } 193 194 /* 195 * XXX generic packet handling defrag on CPU 0 for now. 196 */ 197 if (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) { 198 *mptr = m; 199 return (&netisr_cpu[0].td_msgport); 200 } 201 202 switch (ip->ip_p) { 203 case IPPROTO_TCP: 204 th = (struct tcphdr *)((caddr_t)ip + iphlen); 205 thoff = th->th_off << 2; 206 if (thoff < sizeof(struct tcphdr) || 207 thoff > ntohs(ip->ip_len)) { 208 tcpstat.tcps_rcvbadoff++; 209 m_freem(m); 210 *mptr = NULL; 211 return (NULL); 212 } 213 if (m->m_len < iphlen + thoff) { 214 m = m_pullup(m, iphlen + thoff); 215 if (m == NULL) { 216 tcpstat.tcps_rcvshort++; 217 *mptr = NULL; 218 return (NULL); 219 } 220 ip = mtod(m, struct ip *); 221 th = (struct tcphdr *)((caddr_t)ip + iphlen); 222 } 223 224 cpu = INP_MPORT_HASH(ip->ip_src.s_addr, ip->ip_dst.s_addr, 225 th->th_sport, th->th_dport); 226 port = &tcp_thread[cpu].td_msgport; 227 break; 228 case IPPROTO_UDP: 229 uh = (struct udphdr *)((caddr_t)ip + iphlen); 230 231 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 232 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { 233 cpu = 0; 234 } else { 235 cpu = INP_MPORT_HASH(ip->ip_src.s_addr, 236 ip->ip_dst.s_addr, uh->uh_sport, uh->uh_dport); 237 } 238 port = &udp_thread[cpu].td_msgport; 239 break; 240 default: 241 port = &netisr_cpu[0].td_msgport; 242 break; 243 } 244 KKASSERT(port->mp_putport != NULL); 245 *mptr = m; 246 return (port); 247 } 248 249 /* 250 * Map a TCP socket to a protocol processing thread. 251 */ 252 lwkt_port_t 253 tcp_soport(struct socket *so, struct sockaddr *nam, int req) 254 { 255 struct inpcb *inp; 256 257 /* The following processing all take place on Protocol Thread 0. */ 258 if (req == PRU_BIND || req == PRU_CONNECT || req == PRU_ATTACH || 259 req == PRU_LISTEN) 260 return (&tcp_thread[0].td_msgport); 261 262 inp = sotoinpcb(so); 263 if (!inp) /* connection reset by peer */ 264 return (&tcp_thread[0].td_msgport); 265 266 /* 267 * Already bound and connected or listening. For TCP connections, 268 * the (faddr, fport, laddr, lport) association cannot change now. 269 * 270 * Note: T/TCP code needs some reorganization to fit into 271 * this model. XXX JH 272 * 273 * Rely on type-stable memory and check in protocol handler 274 * to fix race condition here w/ deallocation of inp. XXX JH 275 */ 276 return (&tcp_thread[INP_MPORT_HASH(inp->inp_faddr.s_addr, 277 inp->inp_laddr.s_addr, inp->inp_fport, inp->inp_lport)].td_msgport); 278 } 279 280 lwkt_port_t 281 tcp_addrport(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) 282 { 283 return (&tcp_thread[tcp_addrcpu(faddr, fport, 284 laddr, lport)].td_msgport); 285 } 286 287 /* 288 * Map a UDP socket to a protocol processing thread. 289 */ 290 lwkt_port_t 291 udp_soport(struct socket *so, struct sockaddr *nam, int req) 292 { 293 struct inpcb *inp; 294 295 /* 296 * The following processing all take place on Protocol Thread 0: 297 * only bind() and connect() have a non-null nam parameter 298 * attach() has a null socket parameter 299 * Fast and slow timeouts pass in two NULLs 300 */ 301 if (nam != NULL || so == NULL) 302 return (&udp_thread[0].td_msgport); 303 304 inp = sotoinpcb(so); 305 306 if (IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) 307 return (&udp_thread[0].td_msgport); 308 309 /* 310 * Rely on type-stable memory and check in protocol handler 311 * to fix race condition here w/ deallocation of inp. XXX JH 312 */ 313 314 return (&udp_thread[INP_MPORT_HASH(inp->inp_faddr.s_addr, 315 inp->inp_laddr.s_addr, inp->inp_fport, inp->inp_lport)].td_msgport); 316 } 317 318 /* 319 * Map a network address to a processor. 320 */ 321 int 322 tcp_addrcpu(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) 323 { 324 return (INP_MPORT_HASH(faddr, laddr, fport, lport)); 325 } 326 327 int 328 udp_addrcpu(in_addr_t faddr, in_port_t fport, in_addr_t laddr, in_port_t lport) 329 { 330 if (IN_MULTICAST(ntohl(laddr))) 331 return (0); 332 else 333 return (INP_MPORT_HASH(faddr, laddr, fport, lport)); 334 } 335 336 /* 337 * Return LWKT port for cpu. 338 */ 339 lwkt_port_t 340 tcp_cport(int cpu) 341 { 342 return (&tcp_thread[cpu].td_msgport); 343 } 344 345 void 346 tcp_thread_init(void) 347 { 348 int cpu; 349 350 for (cpu = 0; cpu < ncpus2; cpu++) { 351 lwkt_create(tcpmsg_service_loop, NULL, NULL, 352 &tcp_thread[cpu], 0, cpu, "tcp_thread %d", cpu); 353 tcp_thread[cpu].td_msgport.mp_putport = netmsg_put_port; 354 } 355 } 356 357 void 358 udp_thread_init(void) 359 { 360 int cpu; 361 362 for (cpu = 0; cpu < ncpus2; cpu++) { 363 lwkt_create(netmsg_service_loop, NULL, NULL, 364 &udp_thread[cpu], 0, cpu, "udp_thread %d", cpu); 365 udp_thread[cpu].td_msgport.mp_putport = netmsg_put_port; 366 } 367 } 368