1 /* 2 * Copyright (c) 2003, 2004 Matthew Dillon. All rights reserved. 3 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 4 * Copyright (c) 2003 Jonathan Lemon. All rights reserved. 5 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Jonathan Lemon, Jeffrey M. Hsu, and Matthew Dillon. 9 * 10 * Jonathan Lemon gave Jeffrey Hsu permission to combine his copyright 11 * into this one around July 8 2004. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of The DragonFly Project nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific, prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 29 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 30 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 35 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * $DragonFly: src/sys/net/netisr.c,v 1.30 2007/03/04 18:51:59 swildner Exp $ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/msgport.h> 46 #include <sys/proc.h> 47 #include <sys/interrupt.h> 48 #include <sys/socket.h> 49 #include <sys/sysctl.h> 50 #include <net/if.h> 51 #include <net/if_var.h> 52 #include <net/netisr.h> 53 #include <machine/cpufunc.h> 54 55 #include <sys/thread2.h> 56 #include <sys/msgport2.h> 57 58 static int netmsg_sync_func(struct netmsg *msg); 59 60 struct netmsg_port_registration { 61 TAILQ_ENTRY(netmsg_port_registration) npr_entry; 62 lwkt_port_t npr_port; 63 }; 64 65 static struct netisr netisrs[NETISR_MAX]; 66 static TAILQ_HEAD(,netmsg_port_registration) netreglist; 67 68 /* Per-CPU thread to handle any protocol. */ 69 struct thread netisr_cpu[MAXCPU]; 70 lwkt_port netisr_afree_rport; 71 lwkt_port netisr_adone_rport; 72 lwkt_port netisr_apanic_rport; 73 lwkt_port netisr_sync_port; 74 75 /* 76 * netisr_afree_rport replymsg function, only used to handle async 77 * messages which the sender has abandoned to their fate. 78 */ 79 static void 80 netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) 81 { 82 kfree(msg, M_LWKTMSG); 83 } 84 85 static void 86 netisr_autopanic_reply(lwkt_port_t port, lwkt_msg_t msg) 87 { 88 panic("unreplyable msg %p was replied!", msg); 89 } 90 91 /* 92 * We must construct a custom putport function (which runs in the context 93 * of the message originator) 94 * 95 * Our custom putport must check for self-referential messages, which can 96 * occur when the so_upcall routine is called (e.g. nfs). Self referential 97 * messages are executed synchronously. However, we must panic if the message 98 * is not marked DONE on completion because the self-referential case cannot 99 * block without deadlocking. 100 * 101 * note: ms_target_port does not need to be set when returning a synchronous 102 * error code. 103 */ 104 static int 105 netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg) 106 { 107 int error; 108 109 if ((lmsg->ms_flags & MSGF_ASYNC) == 0 && port->mp_td == curthread) { 110 error = lmsg->ms_cmd.cm_func(lmsg); 111 if (error == EASYNC && (lmsg->ms_flags & MSGF_DONE) == 0) 112 panic("netmsg_put_port: self-referential deadlock on netport"); 113 return(error); 114 } else { 115 return(lwkt_default_putport(port, lmsg)); 116 } 117 } 118 119 /* 120 * UNIX DOMAIN sockets still have to run their uipc functions synchronously, 121 * because they depend on the user proc context for a number of things 122 * (like creds) which we have not yet incorporated into the message structure. 123 * 124 * However, we maintain or message/port abstraction. Having a special 125 * synchronous port which runs the commands synchronously gives us the 126 * ability to serialize operations in one place later on when we start 127 * removing the BGL. 128 * 129 * We clear MSGF_DONE prior to executing the message in order to close 130 * any potential replymsg races with the flags field. If a synchronous 131 * result code is returned we set MSGF_DONE again. MSGF_DONE's flag state 132 * must be correct or the caller will be confused. 133 */ 134 static int 135 netmsg_sync_putport(lwkt_port_t port, lwkt_msg_t lmsg) 136 { 137 int error; 138 139 lmsg->ms_flags &= ~MSGF_DONE; 140 lmsg->ms_target_port = port; /* required for abort */ 141 error = lmsg->ms_cmd.cm_func(lmsg); 142 if (error == EASYNC) 143 error = lwkt_waitmsg(lmsg); 144 else 145 lmsg->ms_flags |= MSGF_DONE; 146 return(error); 147 } 148 149 static void 150 netmsg_sync_abortport(lwkt_port_t port, lwkt_msg_t lmsg) 151 { 152 lmsg->ms_abort_port = lmsg->ms_reply_port; 153 lmsg->ms_flags |= MSGF_ABORTED; 154 lmsg->ms_abort.cm_func(lmsg); 155 } 156 157 static void 158 netisr_init(void) 159 { 160 int i; 161 162 TAILQ_INIT(&netreglist); 163 164 /* 165 * Create default per-cpu threads for generic protocol handling. 166 */ 167 for (i = 0; i < ncpus; ++i) { 168 lwkt_create(netmsg_service_loop, NULL, NULL, &netisr_cpu[i], 0, i, 169 "netisr_cpu %d", i); 170 netmsg_service_port_init(&netisr_cpu[i].td_msgport); 171 } 172 173 /* 174 * The netisr_afree_rport is a special reply port which automatically 175 * frees the replied message. The netisr_adone_rport simply marks 176 * the message as being done. The netisr_apanic_rport panics if 177 * the message is replied to. 178 */ 179 lwkt_initport(&netisr_afree_rport, NULL); 180 netisr_afree_rport.mp_replyport = netisr_autofree_reply; 181 lwkt_initport_null_rport(&netisr_adone_rport, NULL); 182 lwkt_initport(&netisr_apanic_rport, NULL); 183 netisr_apanic_rport.mp_replyport = netisr_autopanic_reply; 184 185 /* 186 * The netisr_syncport is a special port which executes the message 187 * synchronously and waits for it if EASYNC is returned. 188 */ 189 lwkt_initport(&netisr_sync_port, NULL); 190 netisr_sync_port.mp_putport = netmsg_sync_putport; 191 netisr_sync_port.mp_abortport = netmsg_sync_abortport; 192 } 193 194 SYSINIT(netisr, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, netisr_init, NULL); 195 196 /* 197 * Finish initializing the message port for a netmsg service. This also 198 * registers the port for synchronous cleanup operations such as when an 199 * ifnet is being destroyed. There is no deregistration API yet. 200 */ 201 void 202 netmsg_service_port_init(lwkt_port_t port) 203 { 204 struct netmsg_port_registration *reg; 205 206 /* 207 * Override the putport function. Our custom function checks for 208 * self-references and executes such commands synchronously. 209 */ 210 port->mp_putport = netmsg_put_port; 211 212 /* 213 * Keep track of ports using the netmsg API so we can synchronize 214 * certain operations (such as freeing an ifnet structure) across all 215 * consumers. 216 */ 217 reg = kmalloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO); 218 reg->npr_port = port; 219 TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry); 220 } 221 222 /* 223 * This function synchronizes the caller with all netmsg services. For 224 * example, if an interface is being removed we must make sure that all 225 * packets related to that interface complete processing before the structure 226 * can actually be freed. This sort of synchronization is an alternative to 227 * ref-counting the netif, removing the ref counting overhead in favor of 228 * placing additional overhead in the netif freeing sequence (where it is 229 * inconsequential). 230 */ 231 void 232 netmsg_service_sync(void) 233 { 234 struct netmsg_port_registration *reg; 235 struct netmsg smsg; 236 237 lwkt_initmsg(&smsg.nm_lmsg, &curthread->td_msgport, 0, 238 lwkt_cmd_func((void *)netmsg_sync_func), lwkt_cmd_op_none); 239 240 TAILQ_FOREACH(reg, &netreglist, npr_entry) { 241 lwkt_domsg(reg->npr_port, &smsg.nm_lmsg); 242 } 243 } 244 245 /* 246 * The netmsg function simply replies the message. API semantics require 247 * EASYNC to be returned if the netmsg function disposes of the message. 248 */ 249 static int 250 netmsg_sync_func(struct netmsg *msg) 251 { 252 lwkt_replymsg(&msg->nm_lmsg, 0); 253 return(EASYNC); 254 } 255 256 /* 257 * Generic netmsg service loop. Some protocols may roll their own but all 258 * must do the basic command dispatch function call done here. 259 */ 260 void 261 netmsg_service_loop(void *arg) 262 { 263 struct netmsg *msg; 264 265 while ((msg = lwkt_waitport(&curthread->td_msgport, NULL))) { 266 msg->nm_lmsg.ms_cmd.cm_func(&msg->nm_lmsg); 267 } 268 } 269 270 /* 271 * Call the netisr directly. 272 * Queueing may be done in the msg port layer at its discretion. 273 */ 274 void 275 netisr_dispatch(int num, struct mbuf *m) 276 { 277 /* just queue it for now XXX JH */ 278 netisr_queue(num, m); 279 } 280 281 /* 282 * Same as netisr_dispatch(), but always queue. 283 * This is either used in places where we are not confident that 284 * direct dispatch is possible, or where queueing is required. 285 */ 286 int 287 netisr_queue(int num, struct mbuf *m) 288 { 289 struct netisr *ni; 290 struct netmsg_packet *pmsg; 291 lwkt_port_t port; 292 293 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 294 ("netisr_queue: bad isr %d", num)); 295 296 ni = &netisrs[num]; 297 if (ni->ni_handler == NULL) { 298 kprintf("netisr_queue: unregistered isr %d\n", num); 299 return (EIO); 300 } 301 302 if ((port = ni->ni_mport(&m)) == NULL) 303 return (EIO); 304 305 pmsg = &m->m_hdr.mh_netmsg; 306 307 lwkt_initmsg(&pmsg->nm_lmsg, &netisr_apanic_rport, 0, 308 lwkt_cmd_func((void *)ni->ni_handler), lwkt_cmd_op_none); 309 pmsg->nm_packet = m; 310 pmsg->nm_lmsg.u.ms_result = num; 311 lwkt_sendmsg(port, &pmsg->nm_lmsg); 312 return (0); 313 } 314 315 void 316 netisr_register(int num, lwkt_portfn_t mportfn, netisr_fn_t handler) 317 { 318 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 319 ("netisr_register: bad isr %d", num)); 320 lwkt_initmsg(&netisrs[num].ni_netmsg.nm_lmsg, &netisr_adone_rport, 0, 321 lwkt_cmd_op_none, lwkt_cmd_op_none); 322 netisrs[num].ni_mport = mportfn; 323 netisrs[num].ni_handler = handler; 324 } 325 326 int 327 netisr_unregister(int num) 328 { 329 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 330 ("unregister_netisr: bad isr number: %d\n", num)); 331 332 /* XXX JH */ 333 return (0); 334 } 335 336 /* 337 * Return message port for default handler thread on CPU 0. 338 */ 339 lwkt_port_t 340 cpu0_portfn(struct mbuf **mptr) 341 { 342 return (&netisr_cpu[0].td_msgport); 343 } 344 345 lwkt_port_t 346 cpu_portfn(int cpu) 347 { 348 return (&netisr_cpu[cpu].td_msgport); 349 } 350 351 /* ARGSUSED */ 352 lwkt_port_t 353 cpu0_soport(struct socket *so __unused, struct sockaddr *nam __unused, 354 int req __unused) 355 { 356 return (&netisr_cpu[0].td_msgport); 357 } 358 359 lwkt_port_t 360 sync_soport(struct socket *so __unused, struct sockaddr *nam __unused, 361 int req __unused) 362 { 363 return (&netisr_sync_port); 364 } 365 366 /* 367 * schednetisr() is used to call the netisr handler from the appropriate 368 * netisr thread for polling and other purposes. 369 * 370 * This function may be called from a hard interrupt or IPI and must be 371 * MP SAFE and non-blocking. We use a fixed per-cpu message instead of 372 * trying to allocate one. We must get ourselves onto the target cpu 373 * to safely check the MSGF_DONE bit on the message but since the message 374 * will be sent to that cpu anyway this does not add any extra work beyond 375 * what lwkt_sendmsg() would have already had to do to schedule the target 376 * thread. 377 */ 378 static void 379 schednetisr_remote(void *data) 380 { 381 int num = (int)data; 382 struct netisr *ni = &netisrs[num]; 383 lwkt_port_t port = &netisr_cpu[0].td_msgport; 384 struct netmsg *pmsg; 385 386 pmsg = &netisrs[num].ni_netmsg; 387 crit_enter(); 388 if (pmsg->nm_lmsg.ms_flags & MSGF_DONE) { 389 lwkt_initmsg(&pmsg->nm_lmsg, &netisr_adone_rport, 0, 390 lwkt_cmd_func((void *)ni->ni_handler), lwkt_cmd_op_none); 391 pmsg->nm_lmsg.u.ms_result = num; 392 lwkt_sendmsg(port, &pmsg->nm_lmsg); 393 } 394 crit_exit(); 395 } 396 397 void 398 schednetisr(int num) 399 { 400 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 401 ("schednetisr: bad isr %d", num)); 402 #ifdef SMP 403 if (mycpu->gd_cpuid != 0) 404 lwkt_send_ipiq(globaldata_find(0), schednetisr_remote, (void *)num); 405 else 406 schednetisr_remote((void *)num); 407 #else 408 schednetisr_remote((void *)num); 409 #endif 410 } 411 412