1 /* 2 * Copyright (c) 2003, 2004 Matthew Dillon. All rights reserved. 3 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 4 * Copyright (c) 2003 Jonathan Lemon. All rights reserved. 5 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Jonathan Lemon, Jeffrey M. Hsu, and Matthew Dillon. 9 * 10 * Jonathan Lemon gave Jeffrey Hsu permission to combine his copyright 11 * into this one around July 8 2004. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of The DragonFly Project nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific, prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 29 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 30 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 35 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * $DragonFly: src/sys/net/netisr.c,v 1.26 2006/05/20 06:32:37 dillon Exp $ 39 */ 40 41 /* 42 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 43 * 44 * License terms: all terms for the DragonFly license above plus the following: 45 * 46 * 4. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * 49 * This product includes software developed by Jeffrey M. Hsu 50 * for the DragonFly Project. 51 * 52 * This requirement may be waived with permission from Jeffrey Hsu. 53 * This requirement will sunset and may be removed on July 8 2005, 54 * after which the standard DragonFly license (as shown above) will 55 * apply. 56 */ 57 58 #include <sys/param.h> 59 #include <sys/systm.h> 60 #include <sys/kernel.h> 61 #include <sys/malloc.h> 62 #include <sys/msgport.h> 63 #include <sys/proc.h> 64 #include <sys/interrupt.h> 65 #include <sys/socket.h> 66 #include <sys/sysctl.h> 67 #include <net/if.h> 68 #include <net/if_var.h> 69 #include <net/netisr.h> 70 #include <machine/cpufunc.h> 71 #include <machine/ipl.h> 72 73 #include <sys/thread2.h> 74 #include <sys/msgport2.h> 75 76 static int netmsg_sync_func(struct netmsg *msg); 77 78 struct netmsg_port_registration { 79 TAILQ_ENTRY(netmsg_port_registration) npr_entry; 80 lwkt_port_t npr_port; 81 }; 82 83 static struct netisr netisrs[NETISR_MAX]; 84 static TAILQ_HEAD(,netmsg_port_registration) netreglist; 85 86 /* Per-CPU thread to handle any protocol. */ 87 struct thread netisr_cpu[MAXCPU]; 88 lwkt_port netisr_afree_rport; 89 lwkt_port netisr_adone_rport; 90 lwkt_port netisr_apanic_rport; 91 lwkt_port netisr_sync_port; 92 93 /* 94 * netisr_afree_rport replymsg function, only used to handle async 95 * messages which the sender has abandoned to their fate. 96 */ 97 static void 98 netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) 99 { 100 free(msg, M_LWKTMSG); 101 } 102 103 static void 104 netisr_autopanic_reply(lwkt_port_t port, lwkt_msg_t msg) 105 { 106 panic("unreplyable msg %p was replied!", msg); 107 } 108 109 /* 110 * We must construct a custom putport function (which runs in the context 111 * of the message originator) 112 * 113 * Our custom putport must check for self-referential messages, which can 114 * occur when the so_upcall routine is called (e.g. nfs). Self referential 115 * messages are executed synchronously. However, we must panic if the message 116 * is not marked DONE on completion because the self-referential case cannot 117 * block without deadlocking. 118 * 119 * note: ms_target_port does not need to be set when returning a synchronous 120 * error code. 121 */ 122 static int 123 netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg) 124 { 125 int error; 126 127 if ((lmsg->ms_flags & MSGF_ASYNC) == 0 && port->mp_td == curthread) { 128 error = lmsg->ms_cmd.cm_func(lmsg); 129 if (error == EASYNC && (lmsg->ms_flags & MSGF_DONE) == 0) 130 panic("netmsg_put_port: self-referential deadlock on netport"); 131 return(error); 132 } else { 133 return(lwkt_default_putport(port, lmsg)); 134 } 135 } 136 137 /* 138 * UNIX DOMAIN sockets still have to run their uipc functions synchronously, 139 * because they depend on the user proc context for a number of things 140 * (like creds) which we have not yet incorporated into the message structure. 141 * 142 * However, we maintain or message/port abstraction. Having a special 143 * synchronous port which runs the commands synchronously gives us the 144 * ability to serialize operations in one place later on when we start 145 * removing the BGL. 146 * 147 * We clear MSGF_DONE prior to executing the message in order to close 148 * any potential replymsg races with the flags field. If a synchronous 149 * result code is returned we set MSGF_DONE again. MSGF_DONE's flag state 150 * must be correct or the caller will be confused. 151 */ 152 static int 153 netmsg_sync_putport(lwkt_port_t port, lwkt_msg_t lmsg) 154 { 155 int error; 156 157 lmsg->ms_flags &= ~MSGF_DONE; 158 lmsg->ms_target_port = port; /* required for abort */ 159 error = lmsg->ms_cmd.cm_func(lmsg); 160 if (error == EASYNC) 161 error = lwkt_waitmsg(lmsg); 162 else 163 lmsg->ms_flags |= MSGF_DONE; 164 return(error); 165 } 166 167 static void 168 netmsg_sync_abortport(lwkt_port_t port, lwkt_msg_t lmsg) 169 { 170 lmsg->ms_abort_port = lmsg->ms_reply_port; 171 lmsg->ms_flags |= MSGF_ABORTED; 172 lmsg->ms_abort.cm_func(lmsg); 173 } 174 175 static void 176 netisr_init(void) 177 { 178 int i; 179 180 TAILQ_INIT(&netreglist); 181 182 /* 183 * Create default per-cpu threads for generic protocol handling. 184 */ 185 for (i = 0; i < ncpus; ++i) { 186 lwkt_create(netmsg_service_loop, NULL, NULL, &netisr_cpu[i], 0, i, 187 "netisr_cpu %d", i); 188 netmsg_service_port_init(&netisr_cpu[i].td_msgport); 189 } 190 191 /* 192 * The netisr_afree_rport is a special reply port which automatically 193 * frees the replied message. The netisr_adone_rport simply marks 194 * the message as being done. The netisr_apanic_rport panics if 195 * the message is replied to. 196 */ 197 lwkt_initport(&netisr_afree_rport, NULL); 198 netisr_afree_rport.mp_replyport = netisr_autofree_reply; 199 lwkt_initport_null_rport(&netisr_adone_rport, NULL); 200 lwkt_initport(&netisr_apanic_rport, NULL); 201 netisr_apanic_rport.mp_replyport = netisr_autopanic_reply; 202 203 /* 204 * The netisr_syncport is a special port which executes the message 205 * synchronously and waits for it if EASYNC is returned. 206 */ 207 lwkt_initport(&netisr_sync_port, NULL); 208 netisr_sync_port.mp_putport = netmsg_sync_putport; 209 netisr_sync_port.mp_abortport = netmsg_sync_abortport; 210 } 211 212 SYSINIT(netisr, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, netisr_init, NULL); 213 214 /* 215 * Finish initializing the message port for a netmsg service. This also 216 * registers the port for synchronous cleanup operations such as when an 217 * ifnet is being destroyed. There is no deregistration API yet. 218 */ 219 void 220 netmsg_service_port_init(lwkt_port_t port) 221 { 222 struct netmsg_port_registration *reg; 223 224 /* 225 * Override the putport function. Our custom function checks for 226 * self-references and executes such commands synchronously. 227 */ 228 port->mp_putport = netmsg_put_port; 229 230 /* 231 * Keep track of ports using the netmsg API so we can synchronize 232 * certain operations (such as freeing an ifnet structure) across all 233 * consumers. 234 */ 235 reg = malloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO); 236 reg->npr_port = port; 237 TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry); 238 } 239 240 /* 241 * This function synchronizes the caller with all netmsg services. For 242 * example, if an interface is being removed we must make sure that all 243 * packets related to that interface complete processing before the structure 244 * can actually be freed. This sort of synchronization is an alternative to 245 * ref-counting the netif, removing the ref counting overhead in favor of 246 * placing additional overhead in the netif freeing sequence (where it is 247 * inconsequential). 248 */ 249 void 250 netmsg_service_sync(void) 251 { 252 struct netmsg_port_registration *reg; 253 struct netmsg smsg; 254 255 lwkt_initmsg(&smsg.nm_lmsg, &curthread->td_msgport, 0, 256 lwkt_cmd_func((void *)netmsg_sync_func), lwkt_cmd_op_none); 257 258 TAILQ_FOREACH(reg, &netreglist, npr_entry) { 259 lwkt_domsg(reg->npr_port, &smsg.nm_lmsg); 260 } 261 } 262 263 /* 264 * The netmsg function simply replies the message. API semantics require 265 * EASYNC to be returned if the netmsg function disposes of the message. 266 */ 267 static int 268 netmsg_sync_func(struct netmsg *msg) 269 { 270 lwkt_replymsg(&msg->nm_lmsg, 0); 271 return(EASYNC); 272 } 273 274 /* 275 * Generic netmsg service loop. Some protocols may roll their own but all 276 * must do the basic command dispatch function call done here. 277 */ 278 void 279 netmsg_service_loop(void *arg) 280 { 281 struct netmsg *msg; 282 283 while ((msg = lwkt_waitport(&curthread->td_msgport, NULL))) { 284 msg->nm_lmsg.ms_cmd.cm_func(&msg->nm_lmsg); 285 } 286 } 287 288 /* 289 * Call the netisr directly. 290 * Queueing may be done in the msg port layer at its discretion. 291 */ 292 void 293 netisr_dispatch(int num, struct mbuf *m) 294 { 295 /* just queue it for now XXX JH */ 296 netisr_queue(num, m); 297 } 298 299 /* 300 * Same as netisr_dispatch(), but always queue. 301 * This is either used in places where we are not confident that 302 * direct dispatch is possible, or where queueing is required. 303 */ 304 int 305 netisr_queue(int num, struct mbuf *m) 306 { 307 struct netisr *ni; 308 struct netmsg_packet *pmsg; 309 lwkt_port_t port; 310 311 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 312 ("netisr_queue: bad isr %d", num)); 313 314 ni = &netisrs[num]; 315 if (ni->ni_handler == NULL) { 316 printf("netisr_queue: unregistered isr %d\n", num); 317 return (EIO); 318 } 319 320 if ((port = ni->ni_mport(&m)) == NULL) 321 return (EIO); 322 323 pmsg = &m->m_hdr.mh_netmsg; 324 325 lwkt_initmsg(&pmsg->nm_lmsg, &netisr_apanic_rport, 0, 326 lwkt_cmd_func((void *)ni->ni_handler), lwkt_cmd_op_none); 327 pmsg->nm_packet = m; 328 pmsg->nm_lmsg.u.ms_result = num; 329 lwkt_sendmsg(port, &pmsg->nm_lmsg); 330 return (0); 331 } 332 333 void 334 netisr_register(int num, lwkt_portfn_t mportfn, netisr_fn_t handler) 335 { 336 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 337 ("netisr_register: bad isr %d", num)); 338 lwkt_initmsg(&netisrs[num].ni_netmsg.nm_lmsg, &netisr_adone_rport, 0, 339 lwkt_cmd_op_none, lwkt_cmd_op_none); 340 netisrs[num].ni_mport = mportfn; 341 netisrs[num].ni_handler = handler; 342 } 343 344 int 345 netisr_unregister(int num) 346 { 347 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 348 ("unregister_netisr: bad isr number: %d\n", num)); 349 350 /* XXX JH */ 351 return (0); 352 } 353 354 /* 355 * Return message port for default handler thread on CPU 0. 356 */ 357 lwkt_port_t 358 cpu0_portfn(struct mbuf **mptr) 359 { 360 return (&netisr_cpu[0].td_msgport); 361 } 362 363 lwkt_port_t 364 cpu_portfn(int cpu) 365 { 366 return (&netisr_cpu[cpu].td_msgport); 367 } 368 369 /* ARGSUSED */ 370 lwkt_port_t 371 cpu0_soport(struct socket *so __unused, struct sockaddr *nam __unused, 372 int req __unused) 373 { 374 return (&netisr_cpu[0].td_msgport); 375 } 376 377 lwkt_port_t 378 sync_soport(struct socket *so __unused, struct sockaddr *nam __unused, 379 int req __unused) 380 { 381 return (&netisr_sync_port); 382 } 383 384 /* 385 * schednetisr() is used to call the netisr handler from the appropriate 386 * netisr thread for polling and other purposes. 387 * 388 * This function may be called from a hard interrupt or IPI and must be 389 * MP SAFE and non-blocking. We use a fixed per-cpu message instead of 390 * trying to allocate one. We must get ourselves onto the target cpu 391 * to safely check the MSGF_DONE bit on the message but since the message 392 * will be sent to that cpu anyway this does not add any extra work beyond 393 * what lwkt_sendmsg() would have already had to do to schedule the target 394 * thread. 395 */ 396 static void 397 schednetisr_remote(void *data) 398 { 399 int num = (int)data; 400 struct netisr *ni = &netisrs[num]; 401 lwkt_port_t port = &netisr_cpu[0].td_msgport; 402 struct netmsg *pmsg; 403 404 pmsg = &netisrs[num].ni_netmsg; 405 crit_enter(); 406 if (pmsg->nm_lmsg.ms_flags & MSGF_DONE) { 407 lwkt_initmsg(&pmsg->nm_lmsg, &netisr_adone_rport, 0, 408 lwkt_cmd_func((void *)ni->ni_handler), lwkt_cmd_op_none); 409 pmsg->nm_lmsg.u.ms_result = num; 410 lwkt_sendmsg(port, &pmsg->nm_lmsg); 411 } 412 crit_exit(); 413 } 414 415 void 416 schednetisr(int num) 417 { 418 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 419 ("schednetisr: bad isr %d", num)); 420 #ifdef SMP 421 if (mycpu->gd_cpuid != 0) 422 lwkt_send_ipiq(globaldata_find(0), schednetisr_remote, (void *)num); 423 else 424 schednetisr_remote((void *)num); 425 #else 426 schednetisr_remote((void *)num); 427 #endif 428 } 429 430