1 /* 2 * Copyright (C) 1993-2001, 2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 7 */ 8 9 #if !defined(lint) 10 static const char sccsid[] = "@(#)ip_fil_solaris.c 1.7 07/22/06 (C) 1993-2000 Darren Reed"; 11 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $"; 12 #endif 13 14 #include <sys/types.h> 15 #include <sys/errno.h> 16 #include <sys/param.h> 17 #include <sys/cpuvar.h> 18 #include <sys/open.h> 19 #include <sys/ioctl.h> 20 #include <sys/filio.h> 21 #include <sys/systm.h> 22 #include <sys/strsubr.h> 23 #include <sys/cred.h> 24 #include <sys/ddi.h> 25 #include <sys/sunddi.h> 26 #include <sys/ksynch.h> 27 #include <sys/kmem.h> 28 #include <sys/mkdev.h> 29 #include <sys/protosw.h> 30 #include <sys/socket.h> 31 #include <sys/dditypes.h> 32 #include <sys/cmn_err.h> 33 #include <sys/zone.h> 34 #include <net/if.h> 35 #include <net/af.h> 36 #include <net/route.h> 37 #include <netinet/in.h> 38 #include <netinet/in_systm.h> 39 #include <netinet/ip.h> 40 #include <netinet/ip_var.h> 41 #include <netinet/tcp.h> 42 #include <netinet/udp.h> 43 #include <netinet/tcpip.h> 44 #include <netinet/ip_icmp.h> 45 #include "netinet/ip_compat.h" 46 #ifdef USE_INET6 47 # include <netinet/icmp6.h> 48 #endif 49 #include "netinet/ip_fil.h" 50 #include "netinet/ip_nat.h" 51 #include "netinet/ip_frag.h" 52 #include "netinet/ip_state.h" 53 #include "netinet/ip_auth.h" 54 #include "netinet/ip_proxy.h" 55 #include "netinet/ipf_stack.h" 56 #ifdef IPFILTER_LOOKUP 57 # include "netinet/ip_lookup.h" 58 #endif 59 #include <inet/ip_ire.h> 60 61 #include <sys/md5.h> 62 #include <sys/neti.h> 63 64 static int frzerostats __P((caddr_t, ipf_stack_t *)); 65 static int fr_setipfloopback __P((int, ipf_stack_t *)); 66 static int fr_enableipf __P((ipf_stack_t *, int)); 67 static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp)); 68 static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *)); 69 static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *)); 70 static int ipf_hook __P((hook_data_t, int, int, void *)); 71 static int ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *)); 72 static int ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *)); 73 static int ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t, 74 void *)); 75 static int ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *)); 76 static int ipf_hook4 __P((hook_data_t, int, int, void *)); 77 static int ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *)); 78 static int ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *)); 79 static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t, 80 void *)); 81 static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t, 82 void *)); 83 static int ipf_hook6 __P((hook_data_t, int, int, void *)); 84 extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 85 extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *)); 86 87 #if SOLARIS2 < 10 88 #if SOLARIS2 >= 7 89 u_int *ip_ttl_ptr = NULL; 90 u_int *ip_mtudisc = NULL; 91 # if SOLARIS2 >= 8 92 int *ip_forwarding = NULL; 93 u_int *ip6_forwarding = NULL; 94 # else 95 u_int *ip_forwarding = NULL; 96 # endif 97 #else 98 u_long *ip_ttl_ptr = NULL; 99 u_long *ip_mtudisc = NULL; 100 u_long *ip_forwarding = NULL; 101 #endif 102 #endif 103 104 105 /* ------------------------------------------------------------------------ */ 106 /* Function: ipldetach */ 107 /* Returns: int - 0 == success, else error. */ 108 /* Parameters: Nil */ 109 /* */ 110 /* This function is responsible for undoing anything that might have been */ 111 /* done in a call to iplattach(). It must be able to clean up from a call */ 112 /* to iplattach() that did not succeed. Why might that happen? Someone */ 113 /* configures a table to be so large that we cannot allocate enough memory */ 114 /* for it. */ 115 /* ------------------------------------------------------------------------ */ 116 int ipldetach(ifs) 117 ipf_stack_t *ifs; 118 { 119 120 ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0); 121 122 #if SOLARIS2 < 10 123 124 if (ifs->ifs_fr_control_forwarding & 2) { 125 if (ip_forwarding != NULL) 126 *ip_forwarding = 0; 127 #if SOLARIS2 >= 8 128 if (ip6_forwarding != NULL) 129 *ip6_forwarding = 0; 130 #endif 131 } 132 #endif 133 134 /* 135 * This lock needs to be dropped around the net_hook_unregister calls 136 * because we can deadlock here with: 137 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs 138 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running) 139 */ 140 RWLOCK_EXIT(&ifs->ifs_ipf_global); 141 142 #define UNDO_HOOK(_f, _b, _e, _h) \ 143 do { \ 144 if (ifs->_f != NULL) { \ 145 if (ifs->_b) { \ 146 int tmp = net_hook_unregister(ifs->_f, \ 147 _e, ifs->_h); \ 148 ifs->_b = (tmp != 0 && tmp != ENXIO); \ 149 if (!ifs->_b && ifs->_h != NULL) { \ 150 hook_free(ifs->_h); \ 151 ifs->_h = NULL; \ 152 } \ 153 } else if (ifs->_h != NULL) { \ 154 hook_free(ifs->_h); \ 155 ifs->_h = NULL; \ 156 } \ 157 } \ 158 _NOTE(CONSTCOND) \ 159 } while (0) 160 161 /* 162 * Remove IPv6 Hooks 163 */ 164 if (ifs->ifs_ipf_ipv6 != NULL) { 165 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in, 166 NH_PHYSICAL_IN, ifs_ipfhook6_in); 167 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out, 168 NH_PHYSICAL_OUT, ifs_ipfhook6_out); 169 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events, 170 NH_NIC_EVENTS, ifs_ipfhook6_nicevents); 171 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in, 172 NH_LOOPBACK_IN, ifs_ipfhook6_loop_in); 173 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out, 174 NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out); 175 176 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0) 177 goto detach_failed; 178 ifs->ifs_ipf_ipv6 = NULL; 179 } 180 181 /* 182 * Remove IPv4 Hooks 183 */ 184 if (ifs->ifs_ipf_ipv4 != NULL) { 185 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in, 186 NH_PHYSICAL_IN, ifs_ipfhook4_in); 187 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out, 188 NH_PHYSICAL_OUT, ifs_ipfhook4_out); 189 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events, 190 NH_NIC_EVENTS, ifs_ipfhook4_nicevents); 191 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in, 192 NH_LOOPBACK_IN, ifs_ipfhook4_loop_in); 193 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out, 194 NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out); 195 196 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0) 197 goto detach_failed; 198 ifs->ifs_ipf_ipv4 = NULL; 199 } 200 201 #undef UNDO_HOOK 202 203 #ifdef IPFDEBUG 204 cmn_err(CE_CONT, "ipldetach()\n"); 205 #endif 206 207 WRITE_ENTER(&ifs->ifs_ipf_global); 208 fr_deinitialise(ifs); 209 210 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs); 211 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs); 212 213 if (ifs->ifs_ipf_locks_done == 1) { 214 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock); 215 MUTEX_DESTROY(&ifs->ifs_ipf_rw); 216 RW_DESTROY(&ifs->ifs_ipf_tokens); 217 RW_DESTROY(&ifs->ifs_ipf_ipidfrag); 218 ifs->ifs_ipf_locks_done = 0; 219 } 220 221 if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out || 222 ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in || 223 ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events || 224 ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out || 225 ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out) 226 return -1; 227 228 return 0; 229 230 detach_failed: 231 WRITE_ENTER(&ifs->ifs_ipf_global); 232 return -1; 233 } 234 235 int iplattach(ifs) 236 ipf_stack_t *ifs; 237 { 238 #if SOLARIS2 < 10 239 int i; 240 #endif 241 netid_t id = ifs->ifs_netid; 242 243 #ifdef IPFDEBUG 244 cmn_err(CE_CONT, "iplattach()\n"); 245 #endif 246 247 ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0); 248 ifs->ifs_fr_flags = IPF_LOGGING; 249 #ifdef _KERNEL 250 ifs->ifs_fr_update_ipid = 0; 251 #else 252 ifs->ifs_fr_update_ipid = 1; 253 #endif 254 ifs->ifs_fr_minttl = 4; 255 ifs->ifs_fr_icmpminfragmtu = 68; 256 #if defined(IPFILTER_DEFAULT_BLOCK) 257 ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH; 258 #else 259 ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH; 260 #endif 261 262 bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache)); 263 MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex"); 264 MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex"); 265 RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock"); 266 RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock"); 267 ifs->ifs_ipf_locks_done = 1; 268 269 if (fr_initialise(ifs) < 0) 270 return -1; 271 272 HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4, 273 "ipfilter_hook4_nicevents", ifs); 274 HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in, 275 "ipfilter_hook4_in", ifs); 276 HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out, 277 "ipfilter_hook4_out", ifs); 278 HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in, 279 "ipfilter_hook4_loop_in", ifs); 280 HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out, 281 "ipfilter_hook4_loop_out", ifs); 282 283 /* 284 * If we hold this lock over all of the net_hook_register calls, we 285 * can cause a deadlock to occur with the following lock ordering: 286 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs 287 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path) 288 */ 289 RWLOCK_EXIT(&ifs->ifs_ipf_global); 290 291 /* 292 * Add IPv4 hooks 293 */ 294 ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET); 295 if (ifs->ifs_ipf_ipv4 == NULL) 296 goto hookup_failed; 297 298 ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4, 299 NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0); 300 if (!ifs->ifs_hook4_nic_events) 301 goto hookup_failed; 302 303 ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4, 304 NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0); 305 if (!ifs->ifs_hook4_physical_in) 306 goto hookup_failed; 307 308 ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4, 309 NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0); 310 if (!ifs->ifs_hook4_physical_out) 311 goto hookup_failed; 312 313 if (ifs->ifs_ipf_loopback) { 314 ifs->ifs_hook4_loopback_in = (net_hook_register( 315 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN, 316 ifs->ifs_ipfhook4_loop_in) == 0); 317 if (!ifs->ifs_hook4_loopback_in) 318 goto hookup_failed; 319 320 ifs->ifs_hook4_loopback_out = (net_hook_register( 321 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT, 322 ifs->ifs_ipfhook4_loop_out) == 0); 323 if (!ifs->ifs_hook4_loopback_out) 324 goto hookup_failed; 325 } 326 /* 327 * Add IPv6 hooks 328 */ 329 ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6); 330 if (ifs->ifs_ipf_ipv6 == NULL) 331 goto hookup_failed; 332 333 HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6, 334 "ipfilter_hook6_nicevents", ifs); 335 HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in, 336 "ipfilter_hook6_in", ifs); 337 HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out, 338 "ipfilter_hook6_out", ifs); 339 HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in, 340 "ipfilter_hook6_loop_in", ifs); 341 HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out, 342 "ipfilter_hook6_loop_out", ifs); 343 344 ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6, 345 NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0); 346 if (!ifs->ifs_hook6_nic_events) 347 goto hookup_failed; 348 349 ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6, 350 NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0); 351 if (!ifs->ifs_hook6_physical_in) 352 goto hookup_failed; 353 354 ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6, 355 NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0); 356 if (!ifs->ifs_hook6_physical_out) 357 goto hookup_failed; 358 359 if (ifs->ifs_ipf_loopback) { 360 ifs->ifs_hook6_loopback_in = (net_hook_register( 361 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN, 362 ifs->ifs_ipfhook6_loop_in) == 0); 363 if (!ifs->ifs_hook6_loopback_in) 364 goto hookup_failed; 365 366 ifs->ifs_hook6_loopback_out = (net_hook_register( 367 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT, 368 ifs->ifs_ipfhook6_loop_out) == 0); 369 if (!ifs->ifs_hook6_loopback_out) 370 goto hookup_failed; 371 } 372 373 /* 374 * Reacquire ipf_global, now it is safe. 375 */ 376 WRITE_ENTER(&ifs->ifs_ipf_global); 377 378 /* Do not use private interface ip_params_arr[] in Solaris 10 */ 379 #if SOLARIS2 < 10 380 381 #if SOLARIS2 >= 8 382 ip_forwarding = &ip_g_forward; 383 #endif 384 /* 385 * XXX - There is no terminator for this array, so it is not possible 386 * to tell if what we are looking for is missing and go off the end 387 * of the array. 388 */ 389 390 #if SOLARIS2 <= 8 391 for (i = 0; ; i++) { 392 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) { 393 ip_ttl_ptr = &ip_param_arr[i].ip_param_value; 394 } else if (!strcmp(ip_param_arr[i].ip_param_name, 395 "ip_path_mtu_discovery")) { 396 ip_mtudisc = &ip_param_arr[i].ip_param_value; 397 } 398 #if SOLARIS2 < 8 399 else if (!strcmp(ip_param_arr[i].ip_param_name, 400 "ip_forwarding")) { 401 ip_forwarding = &ip_param_arr[i].ip_param_value; 402 } 403 #else 404 else if (!strcmp(ip_param_arr[i].ip_param_name, 405 "ip6_forwarding")) { 406 ip6_forwarding = &ip_param_arr[i].ip_param_value; 407 } 408 #endif 409 410 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL && 411 #if SOLARIS2 >= 8 412 ip6_forwarding != NULL && 413 #endif 414 ip_forwarding != NULL) 415 break; 416 } 417 #endif 418 419 if (ifs->ifs_fr_control_forwarding & 1) { 420 if (ip_forwarding != NULL) 421 *ip_forwarding = 1; 422 #if SOLARIS2 >= 8 423 if (ip6_forwarding != NULL) 424 *ip6_forwarding = 1; 425 #endif 426 } 427 428 #endif 429 430 return 0; 431 hookup_failed: 432 WRITE_ENTER(&ifs->ifs_ipf_global); 433 return -1; 434 } 435 436 static int fr_setipfloopback(set, ifs) 437 int set; 438 ipf_stack_t *ifs; 439 { 440 if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL) 441 return EFAULT; 442 443 if (set && !ifs->ifs_ipf_loopback) { 444 ifs->ifs_ipf_loopback = 1; 445 446 ifs->ifs_hook4_loopback_in = (net_hook_register( 447 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN, 448 ifs->ifs_ipfhook4_loop_in) == 0); 449 if (!ifs->ifs_hook4_loopback_in) 450 return EINVAL; 451 452 ifs->ifs_hook4_loopback_out = (net_hook_register( 453 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT, 454 ifs->ifs_ipfhook4_loop_out) == 0); 455 if (!ifs->ifs_hook4_loopback_out) 456 return EINVAL; 457 458 ifs->ifs_hook6_loopback_in = (net_hook_register( 459 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN, 460 ifs->ifs_ipfhook6_loop_in) == 0); 461 if (!ifs->ifs_hook6_loopback_in) 462 return EINVAL; 463 464 ifs->ifs_hook6_loopback_out = (net_hook_register( 465 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT, 466 ifs->ifs_ipfhook6_loop_out) == 0); 467 if (!ifs->ifs_hook6_loopback_out) 468 return EINVAL; 469 470 } else if (!set && ifs->ifs_ipf_loopback) { 471 ifs->ifs_ipf_loopback = 0; 472 473 ifs->ifs_hook4_loopback_in = 474 (net_hook_unregister(ifs->ifs_ipf_ipv4, 475 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0); 476 if (ifs->ifs_hook4_loopback_in) 477 return EBUSY; 478 479 ifs->ifs_hook4_loopback_out = 480 (net_hook_unregister(ifs->ifs_ipf_ipv4, 481 NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0); 482 if (ifs->ifs_hook4_loopback_out) 483 return EBUSY; 484 485 ifs->ifs_hook6_loopback_in = 486 (net_hook_unregister(ifs->ifs_ipf_ipv6, 487 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0); 488 if (ifs->ifs_hook6_loopback_in) 489 return EBUSY; 490 491 ifs->ifs_hook6_loopback_out = 492 (net_hook_unregister(ifs->ifs_ipf_ipv6, 493 NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0); 494 if (ifs->ifs_hook6_loopback_out) 495 return EBUSY; 496 } 497 return 0; 498 } 499 500 501 /* 502 * Filter ioctl interface. 503 */ 504 /*ARGSUSED*/ 505 int iplioctl(dev, cmd, data, mode, cp, rp) 506 dev_t dev; 507 int cmd; 508 #if SOLARIS2 >= 7 509 intptr_t data; 510 #else 511 int *data; 512 #endif 513 int mode; 514 cred_t *cp; 515 int *rp; 516 { 517 int error = 0, tmp; 518 friostat_t fio; 519 minor_t unit; 520 u_int enable; 521 ipf_stack_t *ifs; 522 523 #ifdef IPFDEBUG 524 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n", 525 dev, cmd, data, mode, cp, rp); 526 #endif 527 unit = getminor(dev); 528 if (IPL_LOGMAX < unit) 529 return ENXIO; 530 531 /* 532 * As we're calling ipf_find_stack in user space, from a given zone 533 * to find the stack pointer for this zone, there is no need to have 534 * a hold/refence count here. 535 */ 536 ifs = ipf_find_stack(crgetzoneid(cp)); 537 ASSERT(ifs != NULL); 538 539 if (ifs->ifs_fr_running <= 0) { 540 if (unit != IPL_LOGIPF) { 541 return EIO; 542 } 543 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET && 544 cmd != SIOCIPFSET && cmd != SIOCFRENB && 545 cmd != SIOCGETFS && cmd != SIOCGETFF) { 546 return EIO; 547 } 548 } 549 550 READ_ENTER(&ifs->ifs_ipf_global); 551 if (ifs->ifs_fr_enable_active != 0) { 552 RWLOCK_EXIT(&ifs->ifs_ipf_global); 553 return EBUSY; 554 } 555 556 error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp), 557 curproc, ifs); 558 if (error != -1) { 559 RWLOCK_EXIT(&ifs->ifs_ipf_global); 560 return error; 561 } 562 error = 0; 563 564 switch (cmd) 565 { 566 case SIOCFRENB : 567 if (!(mode & FWRITE)) 568 error = EPERM; 569 else { 570 error = COPYIN((caddr_t)data, (caddr_t)&enable, 571 sizeof(enable)); 572 if (error != 0) { 573 error = EFAULT; 574 break; 575 } 576 577 RWLOCK_EXIT(&ifs->ifs_ipf_global); 578 WRITE_ENTER(&ifs->ifs_ipf_global); 579 580 /* 581 * We must recheck fr_enable_active here, since we've 582 * dropped ifs_ipf_global from R in order to get it 583 * exclusively. 584 */ 585 if (ifs->ifs_fr_enable_active == 0) { 586 ifs->ifs_fr_enable_active = 1; 587 error = fr_enableipf(ifs, enable); 588 ifs->ifs_fr_enable_active = 0; 589 } 590 } 591 break; 592 case SIOCIPFSET : 593 if (!(mode & FWRITE)) { 594 error = EPERM; 595 break; 596 } 597 /* FALLTHRU */ 598 case SIOCIPFGETNEXT : 599 case SIOCIPFGET : 600 error = fr_ipftune(cmd, (void *)data, ifs); 601 break; 602 case SIOCSETFF : 603 if (!(mode & FWRITE)) 604 error = EPERM; 605 else { 606 error = COPYIN((caddr_t)data, 607 (caddr_t)&ifs->ifs_fr_flags, 608 sizeof(ifs->ifs_fr_flags)); 609 if (error != 0) 610 error = EFAULT; 611 } 612 break; 613 case SIOCIPFLP : 614 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 615 sizeof(tmp)); 616 if (error != 0) 617 error = EFAULT; 618 else 619 error = fr_setipfloopback(tmp, ifs); 620 break; 621 case SIOCGETFF : 622 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data, 623 sizeof(ifs->ifs_fr_flags)); 624 if (error != 0) 625 error = EFAULT; 626 break; 627 case SIOCFUNCL : 628 error = fr_resolvefunc((void *)data); 629 break; 630 case SIOCINAFR : 631 case SIOCRMAFR : 632 case SIOCADAFR : 633 case SIOCZRLST : 634 if (!(mode & FWRITE)) 635 error = EPERM; 636 else 637 error = frrequest(unit, cmd, (caddr_t)data, 638 ifs->ifs_fr_active, 1, ifs); 639 break; 640 case SIOCINIFR : 641 case SIOCRMIFR : 642 case SIOCADIFR : 643 if (!(mode & FWRITE)) 644 error = EPERM; 645 else 646 error = frrequest(unit, cmd, (caddr_t)data, 647 1 - ifs->ifs_fr_active, 1, ifs); 648 break; 649 case SIOCSWAPA : 650 if (!(mode & FWRITE)) 651 error = EPERM; 652 else { 653 WRITE_ENTER(&ifs->ifs_ipf_mutex); 654 bzero((char *)ifs->ifs_frcache, 655 sizeof (ifs->ifs_frcache)); 656 error = COPYOUT((caddr_t)&ifs->ifs_fr_active, 657 (caddr_t)data, 658 sizeof(ifs->ifs_fr_active)); 659 if (error != 0) 660 error = EFAULT; 661 else 662 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active; 663 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 664 } 665 break; 666 case SIOCGETFS : 667 fr_getstat(&fio, ifs); 668 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT); 669 break; 670 case SIOCFRZST : 671 if (!(mode & FWRITE)) 672 error = EPERM; 673 else 674 error = fr_zerostats((caddr_t)data, ifs); 675 break; 676 case SIOCIPFFL : 677 if (!(mode & FWRITE)) 678 error = EPERM; 679 else { 680 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 681 sizeof(tmp)); 682 if (!error) { 683 tmp = frflush(unit, 4, tmp, ifs); 684 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 685 sizeof(tmp)); 686 if (error != 0) 687 error = EFAULT; 688 } else 689 error = EFAULT; 690 } 691 break; 692 #ifdef USE_INET6 693 case SIOCIPFL6 : 694 if (!(mode & FWRITE)) 695 error = EPERM; 696 else { 697 error = COPYIN((caddr_t)data, (caddr_t)&tmp, 698 sizeof(tmp)); 699 if (!error) { 700 tmp = frflush(unit, 6, tmp, ifs); 701 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 702 sizeof(tmp)); 703 if (error != 0) 704 error = EFAULT; 705 } else 706 error = EFAULT; 707 } 708 break; 709 #endif 710 case SIOCSTLCK : 711 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); 712 if (error == 0) { 713 ifs->ifs_fr_state_lock = tmp; 714 ifs->ifs_fr_nat_lock = tmp; 715 ifs->ifs_fr_frag_lock = tmp; 716 ifs->ifs_fr_auth_lock = tmp; 717 } else 718 error = EFAULT; 719 break; 720 #ifdef IPFILTER_LOG 721 case SIOCIPFFB : 722 if (!(mode & FWRITE)) 723 error = EPERM; 724 else { 725 tmp = ipflog_clear(unit, ifs); 726 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, 727 sizeof(tmp)); 728 if (error) 729 error = EFAULT; 730 } 731 break; 732 #endif /* IPFILTER_LOG */ 733 case SIOCFRSYN : 734 if (!(mode & FWRITE)) 735 error = EPERM; 736 else { 737 RWLOCK_EXIT(&ifs->ifs_ipf_global); 738 WRITE_ENTER(&ifs->ifs_ipf_global); 739 740 frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 741 fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 742 fr_nataddrsync(0, NULL, NULL, ifs); 743 fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs); 744 error = 0; 745 } 746 break; 747 case SIOCGFRST : 748 error = fr_outobj((void *)data, fr_fragstats(ifs), 749 IPFOBJ_FRAGSTAT); 750 break; 751 case FIONREAD : 752 #ifdef IPFILTER_LOG 753 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF]; 754 755 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp)); 756 if (error != 0) 757 error = EFAULT; 758 #endif 759 break; 760 case SIOCIPFITER : 761 error = ipf_frruleiter((caddr_t)data, crgetuid(cp), 762 curproc, ifs); 763 break; 764 765 case SIOCGENITER : 766 error = ipf_genericiter((caddr_t)data, crgetuid(cp), 767 curproc, ifs); 768 break; 769 770 case SIOCIPFDELTOK : 771 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp)); 772 if (error != 0) { 773 error = EFAULT; 774 } else { 775 error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs); 776 } 777 break; 778 779 default : 780 #ifdef IPFDEBUG 781 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p", 782 cmd, (void *)data); 783 #endif 784 error = EINVAL; 785 break; 786 } 787 RWLOCK_EXIT(&ifs->ifs_ipf_global); 788 return error; 789 } 790 791 792 static int fr_enableipf(ifs, enable) 793 ipf_stack_t *ifs; 794 int enable; 795 { 796 int error; 797 798 if (!enable) { 799 error = ipldetach(ifs); 800 if (error == 0) 801 ifs->ifs_fr_running = -1; 802 return error; 803 } 804 805 if (ifs->ifs_fr_running > 0) 806 return 0; 807 808 error = iplattach(ifs); 809 if (error == 0) { 810 if (ifs->ifs_fr_timer_id == NULL) { 811 int hz = drv_usectohz(500000); 812 813 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, 814 (void *)ifs, 815 hz); 816 } 817 ifs->ifs_fr_running = 1; 818 } else { 819 (void) ipldetach(ifs); 820 } 821 return error; 822 } 823 824 825 phy_if_t get_unit(name, v, ifs) 826 char *name; 827 int v; 828 ipf_stack_t *ifs; 829 { 830 net_handle_t nif; 831 832 if (v == 4) 833 nif = ifs->ifs_ipf_ipv4; 834 else if (v == 6) 835 nif = ifs->ifs_ipf_ipv6; 836 else 837 return 0; 838 839 return (net_phylookup(nif, name)); 840 } 841 842 /* 843 * routines below for saving IP headers to buffer 844 */ 845 /*ARGSUSED*/ 846 int iplopen(devp, flags, otype, cred) 847 dev_t *devp; 848 int flags, otype; 849 cred_t *cred; 850 { 851 minor_t min = getminor(*devp); 852 853 #ifdef IPFDEBUG 854 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred); 855 #endif 856 if (!(otype & OTYP_CHR)) 857 return ENXIO; 858 859 min = (IPL_LOGMAX < min) ? ENXIO : 0; 860 return min; 861 } 862 863 864 /*ARGSUSED*/ 865 int iplclose(dev, flags, otype, cred) 866 dev_t dev; 867 int flags, otype; 868 cred_t *cred; 869 { 870 minor_t min = getminor(dev); 871 872 #ifdef IPFDEBUG 873 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred); 874 #endif 875 876 min = (IPL_LOGMAX < min) ? ENXIO : 0; 877 return min; 878 } 879 880 #ifdef IPFILTER_LOG 881 /* 882 * iplread/ipllog 883 * both of these must operate with at least splnet() lest they be 884 * called during packet processing and cause an inconsistancy to appear in 885 * the filter lists. 886 */ 887 /*ARGSUSED*/ 888 int iplread(dev, uio, cp) 889 dev_t dev; 890 register struct uio *uio; 891 cred_t *cp; 892 { 893 ipf_stack_t *ifs; 894 int ret; 895 896 /* 897 * As we're calling ipf_find_stack in user space, from a given zone 898 * to find the stack pointer for this zone, there is no need to have 899 * a hold/refence count here. 900 */ 901 ifs = ipf_find_stack(crgetzoneid(cp)); 902 ASSERT(ifs != NULL); 903 904 # ifdef IPFDEBUG 905 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp); 906 # endif 907 908 if (ifs->ifs_fr_running < 1) { 909 return EIO; 910 } 911 912 # ifdef IPFILTER_SYNC 913 if (getminor(dev) == IPL_LOGSYNC) { 914 return ipfsync_read(uio); 915 } 916 # endif 917 918 ret = ipflog_read(getminor(dev), uio, ifs); 919 return ret; 920 } 921 #endif /* IPFILTER_LOG */ 922 923 924 /* 925 * iplread/ipllog 926 * both of these must operate with at least splnet() lest they be 927 * called during packet processing and cause an inconsistancy to appear in 928 * the filter lists. 929 */ 930 int iplwrite(dev, uio, cp) 931 dev_t dev; 932 register struct uio *uio; 933 cred_t *cp; 934 { 935 ipf_stack_t *ifs; 936 937 /* 938 * As we're calling ipf_find_stack in user space, from a given zone 939 * to find the stack pointer for this zone, there is no need to have 940 * a hold/refence count here. 941 */ 942 ifs = ipf_find_stack(crgetzoneid(cp)); 943 ASSERT(ifs != NULL); 944 945 #ifdef IPFDEBUG 946 cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp); 947 #endif 948 949 if (ifs->ifs_fr_running < 1) { 950 return EIO; 951 } 952 953 #ifdef IPFILTER_SYNC 954 if (getminor(dev) == IPL_LOGSYNC) 955 return ipfsync_write(uio); 956 #endif /* IPFILTER_SYNC */ 957 dev = dev; /* LINT */ 958 uio = uio; /* LINT */ 959 cp = cp; /* LINT */ 960 return ENXIO; 961 } 962 963 964 /* 965 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that 966 * requires a large amount of setting up and isn't any more efficient. 967 */ 968 int fr_send_reset(fin) 969 fr_info_t *fin; 970 { 971 tcphdr_t *tcp, *tcp2; 972 int tlen, hlen; 973 mblk_t *m; 974 #ifdef USE_INET6 975 ip6_t *ip6; 976 #endif 977 ip_t *ip; 978 979 tcp = fin->fin_dp; 980 if (tcp->th_flags & TH_RST) 981 return -1; 982 983 #ifndef IPFILTER_CKSUM 984 if (fr_checkl4sum(fin) == -1) 985 return -1; 986 #endif 987 988 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0; 989 #ifdef USE_INET6 990 if (fin->fin_v == 6) 991 hlen = sizeof(ip6_t); 992 else 993 #endif 994 hlen = sizeof(ip_t); 995 hlen += sizeof(*tcp2); 996 if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL) 997 return -1; 998 999 m->b_rptr += 64; 1000 MTYPE(m) = M_DATA; 1001 m->b_wptr = m->b_rptr + hlen; 1002 ip = (ip_t *)m->b_rptr; 1003 bzero((char *)ip, hlen); 1004 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2)); 1005 tcp2->th_dport = tcp->th_sport; 1006 tcp2->th_sport = tcp->th_dport; 1007 if (tcp->th_flags & TH_ACK) { 1008 tcp2->th_seq = tcp->th_ack; 1009 tcp2->th_flags = TH_RST; 1010 } else { 1011 tcp2->th_ack = ntohl(tcp->th_seq); 1012 tcp2->th_ack += tlen; 1013 tcp2->th_ack = htonl(tcp2->th_ack); 1014 tcp2->th_flags = TH_RST|TH_ACK; 1015 } 1016 tcp2->th_off = sizeof(struct tcphdr) >> 2; 1017 1018 ip->ip_v = fin->fin_v; 1019 #ifdef USE_INET6 1020 if (fin->fin_v == 6) { 1021 ip6 = (ip6_t *)m->b_rptr; 1022 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; 1023 ip6->ip6_src = fin->fin_dst6.in6; 1024 ip6->ip6_dst = fin->fin_src6.in6; 1025 ip6->ip6_plen = htons(sizeof(*tcp)); 1026 ip6->ip6_nxt = IPPROTO_TCP; 1027 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2); 1028 } else 1029 #endif 1030 { 1031 ip->ip_src.s_addr = fin->fin_daddr; 1032 ip->ip_dst.s_addr = fin->fin_saddr; 1033 ip->ip_id = fr_nextipid(fin); 1034 ip->ip_hl = sizeof(*ip) >> 2; 1035 ip->ip_p = IPPROTO_TCP; 1036 ip->ip_len = sizeof(*ip) + sizeof(*tcp); 1037 ip->ip_tos = fin->fin_ip->ip_tos; 1038 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2); 1039 } 1040 return fr_send_ip(fin, m, &m); 1041 } 1042 1043 /* 1044 * Function: fr_send_ip 1045 * Returns: 0: success 1046 * -1: failed 1047 * Parameters: 1048 * fin: packet information 1049 * m: the message block where ip head starts 1050 * 1051 * Send a new packet through the IP stack. 1052 * 1053 * For IPv4 packets, ip_len must be in host byte order, and ip_v, 1054 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this 1055 * function). 1056 * 1057 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled 1058 * in by this function. 1059 * 1060 * All other portions of the packet must be in on-the-wire format. 1061 */ 1062 /*ARGSUSED*/ 1063 static int fr_send_ip(fin, m, mpp) 1064 fr_info_t *fin; 1065 mblk_t *m, **mpp; 1066 { 1067 qpktinfo_t qpi, *qpip; 1068 fr_info_t fnew; 1069 ip_t *ip; 1070 int i, hlen; 1071 ipf_stack_t *ifs = fin->fin_ifs; 1072 1073 ip = (ip_t *)m->b_rptr; 1074 bzero((char *)&fnew, sizeof(fnew)); 1075 1076 #ifdef USE_INET6 1077 if (fin->fin_v == 6) { 1078 ip6_t *ip6; 1079 1080 ip6 = (ip6_t *)ip; 1081 ip6->ip6_vfc = 0x60; 1082 ip6->ip6_hlim = 127; 1083 fnew.fin_v = 6; 1084 hlen = sizeof(*ip6); 1085 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen; 1086 } else 1087 #endif 1088 { 1089 fnew.fin_v = 4; 1090 #if SOLARIS2 >= 10 1091 ip->ip_ttl = 255; 1092 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1) 1093 ip->ip_off = htons(IP_DF); 1094 #else 1095 if (ip_ttl_ptr != NULL) 1096 ip->ip_ttl = (u_char)(*ip_ttl_ptr); 1097 else 1098 ip->ip_ttl = 63; 1099 if (ip_mtudisc != NULL) 1100 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0); 1101 else 1102 ip->ip_off = htons(IP_DF); 1103 #endif 1104 /* 1105 * The dance with byte order and ip_len/ip_off is because in 1106 * fr_fastroute, it expects them to be in host byte order but 1107 * ipf_cksum expects them to be in network byte order. 1108 */ 1109 ip->ip_len = htons(ip->ip_len); 1110 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip)); 1111 ip->ip_len = ntohs(ip->ip_len); 1112 ip->ip_off = ntohs(ip->ip_off); 1113 hlen = sizeof(*ip); 1114 fnew.fin_plen = ip->ip_len; 1115 } 1116 1117 qpip = fin->fin_qpi; 1118 qpi.qpi_off = 0; 1119 qpi.qpi_ill = qpip->qpi_ill; 1120 qpi.qpi_m = m; 1121 qpi.qpi_data = ip; 1122 fnew.fin_qpi = &qpi; 1123 fnew.fin_ifp = fin->fin_ifp; 1124 fnew.fin_flx = FI_NOCKSUM; 1125 fnew.fin_m = m; 1126 fnew.fin_qfm = m; 1127 fnew.fin_ip = ip; 1128 fnew.fin_mp = mpp; 1129 fnew.fin_hlen = hlen; 1130 fnew.fin_dp = (char *)ip + hlen; 1131 fnew.fin_ifs = fin->fin_ifs; 1132 (void) fr_makefrip(hlen, ip, &fnew); 1133 1134 i = fr_fastroute(m, mpp, &fnew, NULL); 1135 return i; 1136 } 1137 1138 1139 int fr_send_icmp_err(type, fin, dst) 1140 int type; 1141 fr_info_t *fin; 1142 int dst; 1143 { 1144 struct in_addr dst4; 1145 struct icmp *icmp; 1146 qpktinfo_t *qpi; 1147 int hlen, code; 1148 phy_if_t phy; 1149 u_short sz; 1150 #ifdef USE_INET6 1151 mblk_t *mb; 1152 #endif 1153 mblk_t *m; 1154 #ifdef USE_INET6 1155 ip6_t *ip6; 1156 #endif 1157 ip_t *ip; 1158 ipf_stack_t *ifs = fin->fin_ifs; 1159 1160 if ((type < 0) || (type > ICMP_MAXTYPE)) 1161 return -1; 1162 1163 code = fin->fin_icode; 1164 #ifdef USE_INET6 1165 if ((code < 0) || (code >= ICMP_MAX_UNREACH)) 1166 return -1; 1167 #endif 1168 1169 #ifndef IPFILTER_CKSUM 1170 if (fr_checkl4sum(fin) == -1) 1171 return -1; 1172 #endif 1173 1174 qpi = fin->fin_qpi; 1175 1176 #ifdef USE_INET6 1177 mb = fin->fin_qfm; 1178 1179 if (fin->fin_v == 6) { 1180 sz = sizeof(ip6_t); 1181 sz += MIN(mb->b_wptr - mb->b_rptr, 512); 1182 hlen = sizeof(ip6_t); 1183 type = icmptoicmp6types[type]; 1184 if (type == ICMP6_DST_UNREACH) 1185 code = icmptoicmp6unreach[code]; 1186 } else 1187 #endif 1188 { 1189 if ((fin->fin_p == IPPROTO_ICMP) && 1190 !(fin->fin_flx & FI_SHORT)) 1191 switch (ntohs(fin->fin_data[0]) >> 8) 1192 { 1193 case ICMP_ECHO : 1194 case ICMP_TSTAMP : 1195 case ICMP_IREQ : 1196 case ICMP_MASKREQ : 1197 break; 1198 default : 1199 return 0; 1200 } 1201 1202 sz = sizeof(ip_t) * 2; 1203 sz += 8; /* 64 bits of data */ 1204 hlen = sizeof(ip_t); 1205 } 1206 1207 sz += offsetof(struct icmp, icmp_ip); 1208 if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL) 1209 return -1; 1210 MTYPE(m) = M_DATA; 1211 m->b_rptr += 64; 1212 m->b_wptr = m->b_rptr + sz; 1213 bzero((char *)m->b_rptr, (size_t)sz); 1214 ip = (ip_t *)m->b_rptr; 1215 ip->ip_v = fin->fin_v; 1216 icmp = (struct icmp *)(m->b_rptr + hlen); 1217 icmp->icmp_type = type & 0xff; 1218 icmp->icmp_code = code & 0xff; 1219 phy = (phy_if_t)qpi->qpi_ill; 1220 if (type == ICMP_UNREACH && (phy != 0) && 1221 fin->fin_icode == ICMP_UNREACH_NEEDFRAG) 1222 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 ); 1223 1224 #ifdef USE_INET6 1225 if (fin->fin_v == 6) { 1226 struct in6_addr dst6; 1227 int csz; 1228 1229 if (dst == 0) { 1230 ipf_stack_t *ifs = fin->fin_ifs; 1231 1232 if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy, 1233 (void *)&dst6, NULL, ifs) == -1) { 1234 FREE_MB_T(m); 1235 return -1; 1236 } 1237 } else 1238 dst6 = fin->fin_dst6.in6; 1239 1240 csz = sz; 1241 sz -= sizeof(ip6_t); 1242 ip6 = (ip6_t *)m->b_rptr; 1243 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; 1244 ip6->ip6_plen = htons((u_short)sz); 1245 ip6->ip6_nxt = IPPROTO_ICMPV6; 1246 ip6->ip6_src = dst6; 1247 ip6->ip6_dst = fin->fin_src6.in6; 1248 sz -= offsetof(struct icmp, icmp_ip); 1249 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz); 1250 icmp->icmp_cksum = csz - sizeof(ip6_t); 1251 } else 1252 #endif 1253 { 1254 ip->ip_hl = sizeof(*ip) >> 2; 1255 ip->ip_p = IPPROTO_ICMP; 1256 ip->ip_id = fin->fin_ip->ip_id; 1257 ip->ip_tos = fin->fin_ip->ip_tos; 1258 ip->ip_len = (u_short)sz; 1259 if (dst == 0) { 1260 ipf_stack_t *ifs = fin->fin_ifs; 1261 1262 if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy, 1263 (void *)&dst4, NULL, ifs) == -1) { 1264 FREE_MB_T(m); 1265 return -1; 1266 } 1267 } else { 1268 dst4 = fin->fin_dst; 1269 } 1270 ip->ip_src = dst4; 1271 ip->ip_dst = fin->fin_src; 1272 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip, 1273 sizeof(*fin->fin_ip)); 1274 bcopy((char *)fin->fin_ip + fin->fin_hlen, 1275 (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8); 1276 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len); 1277 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off); 1278 icmp->icmp_cksum = ipf_cksum((u_short *)icmp, 1279 sz - sizeof(ip_t)); 1280 } 1281 1282 /* 1283 * Need to exit out of these so we don't recursively call rw_enter 1284 * from fr_qout. 1285 */ 1286 return fr_send_ip(fin, m, &m); 1287 } 1288 1289 #include <sys/time.h> 1290 #include <sys/varargs.h> 1291 1292 #ifndef _KERNEL 1293 #include <stdio.h> 1294 #endif 1295 1296 /* 1297 * Return the first IP Address associated with an interface 1298 * For IPv6, we walk through the list of logical interfaces and return 1299 * the address of the first one that isn't a link-local interface. 1300 * We can't assume that it is :1 because another link-local address 1301 * may have been assigned there. 1302 */ 1303 /*ARGSUSED*/ 1304 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs) 1305 int v, atype; 1306 void *ifptr; 1307 struct in_addr *inp, *inpmask; 1308 ipf_stack_t *ifs; 1309 { 1310 struct sockaddr_in6 v6addr[2]; 1311 struct sockaddr_in v4addr[2]; 1312 net_ifaddr_t type[2]; 1313 net_handle_t net_data; 1314 phy_if_t phyif; 1315 void *array; 1316 1317 switch (v) 1318 { 1319 case 4: 1320 net_data = ifs->ifs_ipf_ipv4; 1321 array = v4addr; 1322 break; 1323 case 6: 1324 net_data = ifs->ifs_ipf_ipv6; 1325 array = v6addr; 1326 break; 1327 default: 1328 net_data = NULL; 1329 break; 1330 } 1331 1332 if (net_data == NULL) 1333 return -1; 1334 1335 phyif = (phy_if_t)ifptr; 1336 1337 switch (atype) 1338 { 1339 case FRI_PEERADDR : 1340 type[0] = NA_PEER; 1341 break; 1342 1343 case FRI_BROADCAST : 1344 type[0] = NA_BROADCAST; 1345 break; 1346 1347 default : 1348 type[0] = NA_ADDRESS; 1349 break; 1350 } 1351 1352 type[1] = NA_NETMASK; 1353 1354 if (v == 6) { 1355 lif_if_t idx = 0; 1356 1357 do { 1358 idx = net_lifgetnext(net_data, phyif, idx); 1359 if (net_getlifaddr(net_data, phyif, idx, 2, type, 1360 array) < 0) 1361 return -1; 1362 if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) && 1363 !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr)) 1364 break; 1365 } while (idx != 0); 1366 1367 if (idx == 0) 1368 return -1; 1369 1370 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1], 1371 inp, inpmask); 1372 } 1373 1374 if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0) 1375 return -1; 1376 1377 return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask); 1378 } 1379 1380 1381 u_32_t fr_newisn(fin) 1382 fr_info_t *fin; 1383 { 1384 static int iss_seq_off = 0; 1385 u_char hash[16]; 1386 u_32_t newiss; 1387 MD5_CTX ctx; 1388 ipf_stack_t *ifs = fin->fin_ifs; 1389 1390 /* 1391 * Compute the base value of the ISS. It is a hash 1392 * of (saddr, sport, daddr, dport, secret). 1393 */ 1394 MD5Init(&ctx); 1395 1396 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src, 1397 sizeof(fin->fin_fi.fi_src)); 1398 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst, 1399 sizeof(fin->fin_fi.fi_dst)); 1400 MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat)); 1401 1402 MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret)); 1403 1404 MD5Final(hash, &ctx); 1405 1406 bcopy(hash, &newiss, sizeof(newiss)); 1407 1408 /* 1409 * Now increment our "timer", and add it in to 1410 * the computed value. 1411 * 1412 * XXX Use `addin'? 1413 * XXX TCP_ISSINCR too large to use? 1414 */ 1415 iss_seq_off += 0x00010000; 1416 newiss += iss_seq_off; 1417 return newiss; 1418 } 1419 1420 1421 /* ------------------------------------------------------------------------ */ 1422 /* Function: fr_nextipid */ 1423 /* Returns: int - 0 == success, -1 == error (packet should be droppped) */ 1424 /* Parameters: fin(I) - pointer to packet information */ 1425 /* */ 1426 /* Returns the next IPv4 ID to use for this packet. */ 1427 /* ------------------------------------------------------------------------ */ 1428 u_short fr_nextipid(fin) 1429 fr_info_t *fin; 1430 { 1431 static u_short ipid = 0; 1432 u_short id; 1433 ipf_stack_t *ifs = fin->fin_ifs; 1434 1435 MUTEX_ENTER(&ifs->ifs_ipf_rw); 1436 if (fin->fin_pktnum != 0) { 1437 id = fin->fin_pktnum & 0xffff; 1438 } else { 1439 id = ipid++; 1440 } 1441 MUTEX_EXIT(&ifs->ifs_ipf_rw); 1442 1443 return id; 1444 } 1445 1446 1447 #ifndef IPFILTER_CKSUM 1448 /* ARGSUSED */ 1449 #endif 1450 INLINE void fr_checkv4sum(fin) 1451 fr_info_t *fin; 1452 { 1453 #ifdef IPFILTER_CKSUM 1454 if (fr_checkl4sum(fin) == -1) 1455 fin->fin_flx |= FI_BAD; 1456 #endif 1457 } 1458 1459 1460 #ifdef USE_INET6 1461 # ifndef IPFILTER_CKSUM 1462 /* ARGSUSED */ 1463 # endif 1464 INLINE void fr_checkv6sum(fin) 1465 fr_info_t *fin; 1466 { 1467 # ifdef IPFILTER_CKSUM 1468 if (fr_checkl4sum(fin) == -1) 1469 fin->fin_flx |= FI_BAD; 1470 # endif 1471 } 1472 #endif /* USE_INET6 */ 1473 1474 1475 #if (SOLARIS2 < 7) 1476 void fr_slowtimer() 1477 #else 1478 /*ARGSUSED*/ 1479 void fr_slowtimer __P((void *arg)) 1480 #endif 1481 { 1482 ipf_stack_t *ifs = arg; 1483 1484 READ_ENTER(&ifs->ifs_ipf_global); 1485 if (ifs->ifs_fr_running != 1) { 1486 ifs->ifs_fr_timer_id = NULL; 1487 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1488 return; 1489 } 1490 ipf_expiretokens(ifs); 1491 fr_fragexpire(ifs); 1492 fr_timeoutstate(ifs); 1493 fr_natexpire(ifs); 1494 fr_authexpire(ifs); 1495 ifs->ifs_fr_ticks++; 1496 if (ifs->ifs_fr_running == 1) 1497 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg, 1498 drv_usectohz(500000)); 1499 else 1500 ifs->ifs_fr_timer_id = NULL; 1501 RWLOCK_EXIT(&ifs->ifs_ipf_global); 1502 } 1503 1504 1505 /* ------------------------------------------------------------------------ */ 1506 /* Function: fr_pullup */ 1507 /* Returns: NULL == pullup failed, else pointer to protocol header */ 1508 /* Parameters: m(I) - pointer to buffer where data packet starts */ 1509 /* fin(I) - pointer to packet information */ 1510 /* len(I) - number of bytes to pullup */ 1511 /* */ 1512 /* Attempt to move at least len bytes (from the start of the buffer) into a */ 1513 /* single buffer for ease of access. Operating system native functions are */ 1514 /* used to manage buffers - if necessary. If the entire packet ends up in */ 1515 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */ 1516 /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */ 1517 /* and ONLY if the pullup succeeds. */ 1518 /* */ 1519 /* We assume that 'min' is a pointer to a buffer that is part of the chain */ 1520 /* of buffers that starts at *fin->fin_mp. */ 1521 /* ------------------------------------------------------------------------ */ 1522 void *fr_pullup(min, fin, len) 1523 mb_t *min; 1524 fr_info_t *fin; 1525 int len; 1526 { 1527 qpktinfo_t *qpi = fin->fin_qpi; 1528 int out = fin->fin_out, dpoff, ipoff; 1529 mb_t *m = min, *m1, *m2; 1530 char *ip; 1531 uint32_t start, stuff, end, value, flags; 1532 ipf_stack_t *ifs = fin->fin_ifs; 1533 1534 if (m == NULL) 1535 return NULL; 1536 1537 ip = (char *)fin->fin_ip; 1538 if ((fin->fin_flx & FI_COALESCE) != 0) 1539 return ip; 1540 1541 ipoff = fin->fin_ipoff; 1542 if (fin->fin_dp != NULL) 1543 dpoff = (char *)fin->fin_dp - (char *)ip; 1544 else 1545 dpoff = 0; 1546 1547 if (M_LEN(m) < len + ipoff) { 1548 1549 /* 1550 * pfil_precheck ensures the IP header is on a 32bit 1551 * aligned address so simply fail if that isn't currently 1552 * the case (should never happen). 1553 */ 1554 int inc = 0; 1555 1556 if (ipoff > 0) { 1557 if ((ipoff & 3) != 0) { 1558 inc = 4 - (ipoff & 3); 1559 if (m->b_rptr - inc >= m->b_datap->db_base) 1560 m->b_rptr -= inc; 1561 else 1562 inc = 0; 1563 } 1564 } 1565 1566 /* 1567 * XXX This is here as a work around for a bug with DEBUG 1568 * XXX Solaris kernels. The problem is b_prev is used by IP 1569 * XXX code as a way to stash the phyint_index for a packet, 1570 * XXX this doesn't get reset by IP but freeb does an ASSERT() 1571 * XXX for both of these to be NULL. See 6442390. 1572 */ 1573 m1 = m; 1574 m2 = m->b_prev; 1575 1576 do { 1577 m1->b_next = NULL; 1578 m1->b_prev = NULL; 1579 m1 = m1->b_cont; 1580 } while (m1); 1581 1582 /* 1583 * Need to preserve checksum information by copying them 1584 * to newmp which heads the pulluped message. 1585 */ 1586 hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end, 1587 &value, &flags); 1588 1589 if (pullupmsg(m, len + ipoff + inc) == 0) { 1590 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]); 1591 FREE_MB_T(*fin->fin_mp); 1592 *fin->fin_mp = NULL; 1593 fin->fin_m = NULL; 1594 fin->fin_ip = NULL; 1595 fin->fin_dp = NULL; 1596 qpi->qpi_data = NULL; 1597 return NULL; 1598 } 1599 1600 (void) hcksum_assoc(m, NULL, NULL, start, stuff, end, 1601 value, flags, 0); 1602 1603 m->b_prev = m2; 1604 m->b_rptr += inc; 1605 fin->fin_m = m; 1606 ip = MTOD(m, char *) + ipoff; 1607 qpi->qpi_data = ip; 1608 } 1609 1610 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]); 1611 fin->fin_ip = (ip_t *)ip; 1612 if (fin->fin_dp != NULL) 1613 fin->fin_dp = (char *)fin->fin_ip + dpoff; 1614 1615 if (len == fin->fin_plen) 1616 fin->fin_flx |= FI_COALESCE; 1617 return ip; 1618 } 1619 1620 1621 /* 1622 * Function: fr_verifysrc 1623 * Returns: int (really boolean) 1624 * Parameters: fin - packet information 1625 * 1626 * Check whether the packet has a valid source address for the interface on 1627 * which the packet arrived, implementing the "fr_chksrc" feature. 1628 * Returns true iff the packet's source address is valid. 1629 */ 1630 int fr_verifysrc(fin) 1631 fr_info_t *fin; 1632 { 1633 net_handle_t net_data_p; 1634 phy_if_t phy_ifdata_routeto; 1635 struct sockaddr sin; 1636 ipf_stack_t *ifs = fin->fin_ifs; 1637 1638 if (fin->fin_v == 4) { 1639 net_data_p = ifs->ifs_ipf_ipv4; 1640 } else if (fin->fin_v == 6) { 1641 net_data_p = ifs->ifs_ipf_ipv6; 1642 } else { 1643 return (0); 1644 } 1645 1646 /* Get the index corresponding to the if name */ 1647 sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6; 1648 bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr)); 1649 phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL); 1650 1651 return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0); 1652 } 1653 1654 1655 /* 1656 * Function: fr_fastroute 1657 * Returns: 0: success; 1658 * -1: failed 1659 * Parameters: 1660 * mb: the message block where ip head starts 1661 * mpp: the pointer to the pointer of the orignal 1662 * packet message 1663 * fin: packet information 1664 * fdp: destination interface information 1665 * if it is NULL, no interface information provided. 1666 * 1667 * This function is for fastroute/to/dup-to rules. It calls 1668 * pfil_make_lay2_packet to search route, make lay-2 header 1669 * ,and identify output queue for the IP packet. 1670 * The destination address depends on the following conditions: 1671 * 1: for fastroute rule, fdp is passed in as NULL, so the 1672 * destination address is the IP Packet's destination address 1673 * 2: for to/dup-to rule, if an ip address is specified after 1674 * the interface name, this address is the as destination 1675 * address. Otherwise IP Packet's destination address is used 1676 */ 1677 int fr_fastroute(mb, mpp, fin, fdp) 1678 mblk_t *mb, **mpp; 1679 fr_info_t *fin; 1680 frdest_t *fdp; 1681 { 1682 net_handle_t net_data_p; 1683 net_inject_t *inj; 1684 mblk_t *mp = NULL; 1685 frentry_t *fr = fin->fin_fr; 1686 qpktinfo_t *qpi; 1687 ip_t *ip; 1688 1689 struct sockaddr_in *sin; 1690 struct sockaddr_in6 *sin6; 1691 struct sockaddr *sinp; 1692 ipf_stack_t *ifs = fin->fin_ifs; 1693 #ifndef sparc 1694 u_short __iplen, __ipoff; 1695 #endif 1696 1697 if (fin->fin_v == 4) { 1698 net_data_p = ifs->ifs_ipf_ipv4; 1699 } else if (fin->fin_v == 6) { 1700 net_data_p = ifs->ifs_ipf_ipv6; 1701 } else { 1702 return (-1); 1703 } 1704 1705 inj = net_inject_alloc(NETINFO_VERSION); 1706 if (inj == NULL) 1707 return -1; 1708 1709 ip = fin->fin_ip; 1710 qpi = fin->fin_qpi; 1711 1712 /* 1713 * If this is a duplicate mblk then we want ip to point at that 1714 * data, not the original, if and only if it is already pointing at 1715 * the current mblk data. 1716 * 1717 * Otherwise, if it's not a duplicate, and we're not already pointing 1718 * at the current mblk data, then we want to ensure that the data 1719 * points at ip. 1720 */ 1721 1722 if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) { 1723 ip = (ip_t *)mb->b_rptr; 1724 } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) { 1725 qpi->qpi_m->b_rptr = (uchar_t *)ip; 1726 qpi->qpi_off = 0; 1727 } 1728 1729 /* 1730 * If there is another M_PROTO, we don't want it 1731 */ 1732 if (*mpp != mb) { 1733 mp = unlinkb(*mpp); 1734 freeb(*mpp); 1735 *mpp = mp; 1736 } 1737 1738 sinp = (struct sockaddr *)&inj->ni_addr; 1739 sin = (struct sockaddr_in *)sinp; 1740 sin6 = (struct sockaddr_in6 *)sinp; 1741 bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr)); 1742 inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6; 1743 inj->ni_packet = mb; 1744 1745 /* 1746 * In case we're here due to "to <if>" being used with 1747 * "keep state", check that we're going in the correct 1748 * direction. 1749 */ 1750 if (fdp != NULL) { 1751 if ((fr != NULL) && (fdp->fd_ifp != NULL) && 1752 (fin->fin_rev != 0) && (fdp == &fr->fr_tif)) 1753 goto bad_fastroute; 1754 inj->ni_physical = (phy_if_t)fdp->fd_ifp; 1755 if (fin->fin_v == 4) { 1756 sin->sin_addr = fdp->fd_ip; 1757 } else { 1758 sin6->sin6_addr = fdp->fd_ip6.in6; 1759 } 1760 } else { 1761 if (fin->fin_v == 4) { 1762 sin->sin_addr = ip->ip_dst; 1763 } else { 1764 sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst; 1765 } 1766 inj->ni_physical = net_routeto(net_data_p, sinp, NULL); 1767 } 1768 1769 /* 1770 * Clear the hardware checksum flags from packets that we are doing 1771 * input processing on as leaving them set will cause the outgoing 1772 * NIC (if it supports hardware checksum) to calculate them anew, 1773 * using the old (correct) checksums as the pseudo value to start 1774 * from. 1775 */ 1776 if (fin->fin_out == 0) { 1777 DB_CKSUMFLAGS(mb) = 0; 1778 } 1779 1780 *mpp = mb; 1781 1782 if (fin->fin_out == 0) { 1783 void *saveifp; 1784 u_32_t pass; 1785 1786 saveifp = fin->fin_ifp; 1787 fin->fin_ifp = (void *)inj->ni_physical; 1788 fin->fin_flx &= ~FI_STATE; 1789 fin->fin_out = 1; 1790 (void) fr_acctpkt(fin, &pass); 1791 fin->fin_fr = NULL; 1792 if (!fr || !(fr->fr_flags & FR_RETMASK)) 1793 (void) fr_checkstate(fin, &pass); 1794 if (fr_checknatout(fin, NULL) == -1) 1795 goto bad_fastroute; 1796 fin->fin_out = 0; 1797 fin->fin_ifp = saveifp; 1798 } 1799 #ifndef sparc 1800 if (fin->fin_v == 4) { 1801 __iplen = (u_short)ip->ip_len, 1802 __ipoff = (u_short)ip->ip_off; 1803 1804 ip->ip_len = htons(__iplen); 1805 ip->ip_off = htons(__ipoff); 1806 } 1807 #endif 1808 1809 if (net_data_p) { 1810 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) { 1811 net_inject_free(inj); 1812 return (-1); 1813 } 1814 } 1815 1816 ifs->ifs_fr_frouteok[0]++; 1817 net_inject_free(inj); 1818 return 0; 1819 bad_fastroute: 1820 net_inject_free(inj); 1821 freemsg(mb); 1822 ifs->ifs_fr_frouteok[1]++; 1823 return -1; 1824 } 1825 1826 1827 /* ------------------------------------------------------------------------ */ 1828 /* Function: ipf_hook4_out */ 1829 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 1830 /* Parameters: event(I) - pointer to event */ 1831 /* info(I) - pointer to hook information for firewalling */ 1832 /* */ 1833 /* Calling ipf_hook. */ 1834 /* ------------------------------------------------------------------------ */ 1835 /*ARGSUSED*/ 1836 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg) 1837 { 1838 return ipf_hook(info, 1, 0, arg); 1839 } 1840 /*ARGSUSED*/ 1841 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg) 1842 { 1843 return ipf_hook6(info, 1, 0, arg); 1844 } 1845 1846 /* ------------------------------------------------------------------------ */ 1847 /* Function: ipf_hook4_in */ 1848 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 1849 /* Parameters: event(I) - pointer to event */ 1850 /* info(I) - pointer to hook information for firewalling */ 1851 /* */ 1852 /* Calling ipf_hook. */ 1853 /* ------------------------------------------------------------------------ */ 1854 /*ARGSUSED*/ 1855 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg) 1856 { 1857 return ipf_hook(info, 0, 0, arg); 1858 } 1859 /*ARGSUSED*/ 1860 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg) 1861 { 1862 return ipf_hook6(info, 0, 0, arg); 1863 } 1864 1865 1866 /* ------------------------------------------------------------------------ */ 1867 /* Function: ipf_hook4_loop_out */ 1868 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 1869 /* Parameters: event(I) - pointer to event */ 1870 /* info(I) - pointer to hook information for firewalling */ 1871 /* */ 1872 /* Calling ipf_hook. */ 1873 /* ------------------------------------------------------------------------ */ 1874 /*ARGSUSED*/ 1875 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg) 1876 { 1877 return ipf_hook(info, 1, FI_NOCKSUM, arg); 1878 } 1879 /*ARGSUSED*/ 1880 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg) 1881 { 1882 return ipf_hook6(info, 1, FI_NOCKSUM, arg); 1883 } 1884 1885 /* ------------------------------------------------------------------------ */ 1886 /* Function: ipf_hook4_loop_in */ 1887 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 1888 /* Parameters: event(I) - pointer to event */ 1889 /* info(I) - pointer to hook information for firewalling */ 1890 /* */ 1891 /* Calling ipf_hook. */ 1892 /* ------------------------------------------------------------------------ */ 1893 /*ARGSUSED*/ 1894 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg) 1895 { 1896 return ipf_hook(info, 0, FI_NOCKSUM, arg); 1897 } 1898 /*ARGSUSED*/ 1899 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg) 1900 { 1901 return ipf_hook6(info, 0, FI_NOCKSUM, arg); 1902 } 1903 1904 /* ------------------------------------------------------------------------ */ 1905 /* Function: ipf_hook */ 1906 /* Returns: int - 0 == packet ok, else problem, free packet if not done */ 1907 /* Parameters: info(I) - pointer to hook information for firewalling */ 1908 /* out(I) - whether packet is going in or out */ 1909 /* loopback(I) - whether packet is a loopback packet or not */ 1910 /* */ 1911 /* Stepping stone function between the IP mainline and IPFilter. Extracts */ 1912 /* parameters out of the info structure and forms them up to be useful for */ 1913 /* calling ipfilter. */ 1914 /* ------------------------------------------------------------------------ */ 1915 int ipf_hook(hook_data_t info, int out, int loopback, void *arg) 1916 { 1917 hook_pkt_event_t *fw; 1918 ipf_stack_t *ifs; 1919 qpktinfo_t qpi; 1920 int rval, hlen; 1921 u_short swap; 1922 phy_if_t phy; 1923 ip_t *ip; 1924 1925 ifs = arg; 1926 fw = (hook_pkt_event_t *)info; 1927 1928 ASSERT(fw != NULL); 1929 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp; 1930 1931 ip = fw->hpe_hdr; 1932 swap = ntohs(ip->ip_len); 1933 ip->ip_len = swap; 1934 swap = ntohs(ip->ip_off); 1935 ip->ip_off = swap; 1936 hlen = IPH_HDR_LENGTH(ip); 1937 1938 qpi.qpi_m = fw->hpe_mb; 1939 qpi.qpi_data = fw->hpe_hdr; 1940 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr; 1941 qpi.qpi_ill = (void *)phy; 1942 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST); 1943 if (qpi.qpi_flags) 1944 qpi.qpi_flags |= FI_MBCAST; 1945 qpi.qpi_flags |= loopback; 1946 1947 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, 1948 &qpi, fw->hpe_mp, ifs); 1949 1950 /* For fastroute cases, fr_check returns 0 with mp set to NULL */ 1951 if (rval == 0 && *(fw->hpe_mp) == NULL) 1952 rval = 1; 1953 1954 /* Notify IP the packet mblk_t and IP header pointers. */ 1955 fw->hpe_mb = qpi.qpi_m; 1956 fw->hpe_hdr = qpi.qpi_data; 1957 if (rval == 0) { 1958 ip = qpi.qpi_data; 1959 swap = ntohs(ip->ip_len); 1960 ip->ip_len = swap; 1961 swap = ntohs(ip->ip_off); 1962 ip->ip_off = swap; 1963 } 1964 return rval; 1965 1966 } 1967 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg) 1968 { 1969 hook_pkt_event_t *fw; 1970 int rval, hlen; 1971 qpktinfo_t qpi; 1972 phy_if_t phy; 1973 1974 fw = (hook_pkt_event_t *)info; 1975 1976 ASSERT(fw != NULL); 1977 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp; 1978 1979 hlen = sizeof (ip6_t); 1980 1981 qpi.qpi_m = fw->hpe_mb; 1982 qpi.qpi_data = fw->hpe_hdr; 1983 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr; 1984 qpi.qpi_ill = (void *)phy; 1985 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST); 1986 if (qpi.qpi_flags) 1987 qpi.qpi_flags |= FI_MBCAST; 1988 qpi.qpi_flags |= loopback; 1989 1990 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out, 1991 &qpi, fw->hpe_mp, arg); 1992 1993 /* For fastroute cases, fr_check returns 0 with mp set to NULL */ 1994 if (rval == 0 && *(fw->hpe_mp) == NULL) 1995 rval = 1; 1996 1997 /* Notify IP the packet mblk_t and IP header pointers. */ 1998 fw->hpe_mb = qpi.qpi_m; 1999 fw->hpe_hdr = qpi.qpi_data; 2000 return rval; 2001 2002 } 2003 2004 2005 /* ------------------------------------------------------------------------ */ 2006 /* Function: ipf_nic_event_v4 */ 2007 /* Returns: int - 0 == no problems encountered */ 2008 /* Parameters: event(I) - pointer to event */ 2009 /* info(I) - pointer to information about a NIC event */ 2010 /* */ 2011 /* Function to receive asynchronous NIC events from IP */ 2012 /* ------------------------------------------------------------------------ */ 2013 /*ARGSUSED*/ 2014 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg) 2015 { 2016 struct sockaddr_in *sin; 2017 hook_nic_event_t *hn; 2018 ipf_stack_t *ifs = arg; 2019 void *new_ifp = NULL; 2020 2021 if (ifs->ifs_fr_running <= 0) 2022 return (0); 2023 2024 hn = (hook_nic_event_t *)info; 2025 2026 switch (hn->hne_event) 2027 { 2028 case NE_PLUMB : 2029 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data, 2030 ifs); 2031 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, 2032 hn->hne_data, ifs); 2033 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, 2034 hn->hne_data, ifs); 2035 break; 2036 2037 case NE_UNPLUMB : 2038 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); 2039 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, 2040 ifs); 2041 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs); 2042 break; 2043 2044 case NE_ADDRESS_CHANGE : 2045 /* 2046 * We only respond to events for logical interface 0 because 2047 * IPFilter only uses the first address given to a network 2048 * interface. We check for hne_lif==1 because the netinfo 2049 * code maps adds 1 to the lif number so that it can return 2050 * 0 to indicate "no more lifs" when walking them. 2051 */ 2052 if (hn->hne_lif == 1) { 2053 frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL, 2054 ifs); 2055 sin = hn->hne_data; 2056 fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr, 2057 ifs); 2058 } 2059 break; 2060 2061 #if SOLARIS2 >= 10 2062 case NE_IFINDEX_CHANGE : 2063 WRITE_ENTER(&ifs->ifs_ipf_mutex); 2064 2065 if (hn->hne_data != NULL) { 2066 /* 2067 * The netinfo passes interface index as int (hne_data should be 2068 * handled as a pointer to int), which is always 32bit. We need to 2069 * convert it to void pointer here, since interfaces are 2070 * represented as pointers to void in IPF. The pointers are 64 bits 2071 * long on 64bit platforms. Doing something like 2072 * (void *)((int) x) 2073 * will throw warning: 2074 * "cast to pointer from integer of different size" 2075 * during 64bit compilation. 2076 * 2077 * The line below uses (size_t) to typecast int to 2078 * size_t, which might be 64bit/32bit (depending 2079 * on architecture). Once we have proper 64bit/32bit 2080 * type (size_t), we can safely convert it to void pointer. 2081 */ 2082 new_ifp = (void *)(size_t)*((int *)hn->hne_data); 2083 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2084 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2085 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2086 } 2087 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 2088 break; 2089 #endif 2090 2091 default : 2092 break; 2093 } 2094 2095 return 0; 2096 } 2097 2098 2099 /* ------------------------------------------------------------------------ */ 2100 /* Function: ipf_nic_event_v6 */ 2101 /* Returns: int - 0 == no problems encountered */ 2102 /* Parameters: event(I) - pointer to event */ 2103 /* info(I) - pointer to information about a NIC event */ 2104 /* */ 2105 /* Function to receive asynchronous NIC events from IP */ 2106 /* ------------------------------------------------------------------------ */ 2107 /*ARGSUSED*/ 2108 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg) 2109 { 2110 struct sockaddr_in6 *sin6; 2111 hook_nic_event_t *hn; 2112 ipf_stack_t *ifs = arg; 2113 void *new_ifp = NULL; 2114 2115 if (ifs->ifs_fr_running <= 0) 2116 return (0); 2117 2118 hn = (hook_nic_event_t *)info; 2119 2120 switch (hn->hne_event) 2121 { 2122 case NE_PLUMB : 2123 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2124 hn->hne_data, ifs); 2125 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2126 hn->hne_data, ifs); 2127 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic, 2128 hn->hne_data, ifs); 2129 break; 2130 2131 case NE_UNPLUMB : 2132 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); 2133 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, 2134 ifs); 2135 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs); 2136 break; 2137 2138 case NE_ADDRESS_CHANGE : 2139 if (hn->hne_lif == 1) { 2140 sin6 = hn->hne_data; 2141 fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr, 2142 ifs); 2143 } 2144 break; 2145 2146 #if SOLARIS2 >= 10 2147 case NE_IFINDEX_CHANGE : 2148 WRITE_ENTER(&ifs->ifs_ipf_mutex); 2149 if (hn->hne_data != NULL) { 2150 /* 2151 * The netinfo passes interface index as int (hne_data should be 2152 * handled as a pointer to int), which is always 32bit. We need to 2153 * convert it to void pointer here, since interfaces are 2154 * represented as pointers to void in IPF. The pointers are 64 bits 2155 * long on 64bit platforms. Doing something like 2156 * (void *)((int) x) 2157 * will throw warning: 2158 * "cast to pointer from integer of different size" 2159 * during 64bit compilation. 2160 * 2161 * The line below uses (size_t) to typecast int to 2162 * size_t, which might be 64bit/32bit (depending 2163 * on architecture). Once we have proper 64bit/32bit 2164 * type (size_t), we can safely convert it to void pointer. 2165 */ 2166 new_ifp = (void *)(size_t)*((int *)hn->hne_data); 2167 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2168 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2169 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs); 2170 } 2171 RWLOCK_EXIT(&ifs->ifs_ipf_mutex); 2172 break; 2173 #endif 2174 2175 default : 2176 break; 2177 } 2178 2179 return 0; 2180 } 2181 2182 /* 2183 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6() 2184 * are needed in Solaris kernel only. We don't need them in 2185 * ipftest to pretend the ICMP/RST packet was sent as a response. 2186 */ 2187 #if defined(_KERNEL) && (SOLARIS2 >= 10) 2188 /* ------------------------------------------------------------------------ */ 2189 /* Function: fr_make_rst */ 2190 /* Returns: int - 0 on success, -1 on failure */ 2191 /* Parameters: fin(I) - pointer to packet information */ 2192 /* */ 2193 /* We must alter the original mblks passed to IPF from IP stack via */ 2194 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations. */ 2195 /* IPF can basicaly do only these things with mblk representing the packet: */ 2196 /* leave it as it is (pass the packet) */ 2197 /* */ 2198 /* discard it (block the packet) */ 2199 /* */ 2200 /* alter it (i.e. NAT) */ 2201 /* */ 2202 /* As you can see IPF can not simply discard the mblk and supply a new one */ 2203 /* instead to IP stack via FW_HOOKS. */ 2204 /* */ 2205 /* The return-rst action for packets coming via NIC is handled as follows: */ 2206 /* mblk with packet is discarded */ 2207 /* */ 2208 /* new mblk with RST response is constructed and injected to network */ 2209 /* */ 2210 /* IPF can't inject packets to loopback interface, this is just another */ 2211 /* limitation we have to deal with here. The only option to send RST */ 2212 /* response to offending TCP packet coming via loopback is to alter it. */ 2213 /* */ 2214 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on */ 2215 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to */ 2216 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers. */ 2217 /* ------------------------------------------------------------------------ */ 2218 int fr_make_rst(fin) 2219 fr_info_t *fin; 2220 { 2221 uint16_t tmp_port; 2222 int rv = -1; 2223 uint32_t old_ack; 2224 tcphdr_t *tcp = NULL; 2225 struct in_addr tmp_src; 2226 #ifdef USE_INET6 2227 struct in6_addr tmp_src6; 2228 #endif 2229 2230 ASSERT(fin->fin_p == IPPROTO_TCP); 2231 2232 /* 2233 * We do not need to adjust chksum, since it is not being checked by 2234 * Solaris IP stack for loopback clients. 2235 */ 2236 if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) && 2237 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) { 2238 2239 if (tcp->th_flags & (TH_SYN | TH_FIN)) { 2240 /* Swap IPv4 addresses. */ 2241 tmp_src = fin->fin_ip->ip_src; 2242 fin->fin_ip->ip_src = fin->fin_ip->ip_dst; 2243 fin->fin_ip->ip_dst = tmp_src; 2244 2245 rv = 0; 2246 } 2247 else 2248 tcp = NULL; 2249 } 2250 #ifdef USE_INET6 2251 else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) && 2252 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) { 2253 /* 2254 * We are relying on fact the next header is TCP, which is true 2255 * for regular TCP packets coming in over loopback. 2256 */ 2257 if (tcp->th_flags & (TH_SYN | TH_FIN)) { 2258 /* Swap IPv6 addresses. */ 2259 tmp_src6 = fin->fin_ip6->ip6_src; 2260 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst; 2261 fin->fin_ip6->ip6_dst = tmp_src6; 2262 2263 rv = 0; 2264 } 2265 else 2266 tcp = NULL; 2267 } 2268 #endif 2269 2270 if (tcp != NULL) { 2271 /* 2272 * Adjust TCP header: 2273 * swap ports, 2274 * set flags, 2275 * set correct ACK number 2276 */ 2277 tmp_port = tcp->th_sport; 2278 tcp->th_sport = tcp->th_dport; 2279 tcp->th_dport = tmp_port; 2280 old_ack = tcp->th_ack; 2281 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1); 2282 tcp->th_seq = old_ack; 2283 tcp->th_flags = TH_RST | TH_ACK; 2284 } 2285 2286 return (rv); 2287 } 2288 2289 /* ------------------------------------------------------------------------ */ 2290 /* Function: fr_make_icmp_v4 */ 2291 /* Returns: int - 0 on success, -1 on failure */ 2292 /* Parameters: fin(I) - pointer to packet information */ 2293 /* */ 2294 /* Please read comment at fr_make_icmp() wrapper function to get an idea */ 2295 /* what is going to happen here and why. Once you read the comment there, */ 2296 /* continue here with next paragraph. */ 2297 /* */ 2298 /* To turn IPv4 packet into ICMPv4 response packet, these things must */ 2299 /* happen here: */ 2300 /* (1) Original mblk is copied (duplicated). */ 2301 /* */ 2302 /* (2) ICMP header is created. */ 2303 /* */ 2304 /* (3) Link ICMP header with copy of original mblk, we have ICMPv4 */ 2305 /* data ready then. */ 2306 /* */ 2307 /* (4) Swap IP addresses in original mblk and adjust IP header data. */ 2308 /* */ 2309 /* (5) The mblk containing original packet is trimmed to contain IP */ 2310 /* header only and ICMP chksum is computed. */ 2311 /* */ 2312 /* (6) The ICMP header we have from (3) is linked to original mblk, */ 2313 /* which now contains new IP header. If original packet was spread */ 2314 /* over several mblks, only the first mblk is kept. */ 2315 /* ------------------------------------------------------------------------ */ 2316 static int fr_make_icmp_v4(fin) 2317 fr_info_t *fin; 2318 { 2319 struct in_addr tmp_src; 2320 tcphdr_t *tcp; 2321 struct icmp *icmp; 2322 mblk_t *mblk_icmp; 2323 mblk_t *mblk_ip; 2324 size_t icmp_pld_len; /* octets to append to ICMP header */ 2325 size_t orig_iphdr_len; /* length of IP header only */ 2326 uint32_t sum; 2327 uint16_t *buf; 2328 int len; 2329 2330 2331 if (fin->fin_v != 4) 2332 return (-1); 2333 2334 /* 2335 * If we are dealing with TCP, then packet must be SYN/FIN to be routed 2336 * by IP stack. If it is not SYN/FIN, then we must drop it silently. 2337 */ 2338 tcp = (tcphdr_t *) fin->fin_dp; 2339 2340 if ((fin->fin_p == IPPROTO_TCP) && 2341 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0))) 2342 return (-1); 2343 2344 /* 2345 * Step (1) 2346 * 2347 * Make copy of original mblk. 2348 * 2349 * We want to copy as much data as necessary, not less, not more. The 2350 * ICMPv4 payload length for unreachable messages is: 2351 * original IP header + 8 bytes of L4 (if there are any). 2352 * 2353 * We determine if there are at least 8 bytes of L4 data following IP 2354 * header first. 2355 */ 2356 icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ? 2357 ICMPERR_ICMPHLEN : fin->fin_dlen; 2358 /* 2359 * Since we don't want to copy more data than necessary, we must trim 2360 * the original mblk here. The right way (STREAMish) would be to use 2361 * adjmsg() to trim it. However we would have to calculate the length 2362 * argument for adjmsg() from pointers we already have here. 2363 * 2364 * Since we have pointers and offsets, it's faster and easier for 2365 * us to just adjust pointers by hand instead of using adjmsg(). 2366 */ 2367 fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp; 2368 fin->fin_m->b_wptr += icmp_pld_len; 2369 icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip; 2370 2371 /* 2372 * Also we don't want to copy any L2 stuff, which might precede IP 2373 * header, so we have have to set b_rptr to point to the start of IP 2374 * header. 2375 */ 2376 fin->fin_m->b_rptr += fin->fin_ipoff; 2377 if ((mblk_ip = copyb(fin->fin_m)) == NULL) 2378 return (-1); 2379 fin->fin_m->b_rptr -= fin->fin_ipoff; 2380 2381 /* 2382 * Step (2) 2383 * 2384 * Create an ICMP header, which will be appened to original mblk later. 2385 * ICMP header is just another mblk. 2386 */ 2387 mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI); 2388 if (mblk_icmp == NULL) { 2389 FREE_MB_T(mblk_ip); 2390 return (-1); 2391 } 2392 2393 MTYPE(mblk_icmp) = M_DATA; 2394 icmp = (struct icmp *) mblk_icmp->b_wptr; 2395 icmp->icmp_type = ICMP_UNREACH; 2396 icmp->icmp_code = fin->fin_icode & 0xFF; 2397 icmp->icmp_void = 0; 2398 icmp->icmp_cksum = 0; 2399 mblk_icmp->b_wptr += ICMPERR_ICMPHLEN; 2400 2401 /* 2402 * Step (3) 2403 * 2404 * Complete ICMP packet - link ICMP header with L4 data from original 2405 * IP packet. 2406 */ 2407 linkb(mblk_icmp, mblk_ip); 2408 2409 /* 2410 * Step (4) 2411 * 2412 * Swap IP addresses and change IP header fields accordingly in 2413 * original IP packet. 2414 * 2415 * There is a rule option return-icmp as a dest for physical 2416 * interfaces. This option becomes useless for loopback, since IPF box 2417 * uses same address as a loopback destination. We ignore the option 2418 * here, the ICMP packet will always look like as it would have been 2419 * sent from the original destination host. 2420 */ 2421 tmp_src = fin->fin_ip->ip_src; 2422 fin->fin_ip->ip_src = fin->fin_ip->ip_dst; 2423 fin->fin_ip->ip_dst = tmp_src; 2424 fin->fin_ip->ip_p = IPPROTO_ICMP; 2425 fin->fin_ip->ip_sum = 0; 2426 2427 /* 2428 * Step (5) 2429 * 2430 * We trim the orignal mblk to hold IP header only. 2431 */ 2432 fin->fin_m->b_wptr = fin->fin_dp; 2433 orig_iphdr_len = fin->fin_m->b_wptr - 2434 (fin->fin_m->b_rptr + fin->fin_ipoff); 2435 fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN + 2436 orig_iphdr_len); 2437 2438 /* 2439 * ICMP chksum calculation. The data we are calculating chksum for are 2440 * spread over two mblks, therefore we have to use two for loops. 2441 * 2442 * First for loop computes chksum part for ICMP header. 2443 */ 2444 buf = (uint16_t *) icmp; 2445 len = ICMPERR_ICMPHLEN; 2446 for (sum = 0; len > 1; len -= 2) 2447 sum += *buf++; 2448 2449 /* 2450 * Here we add chksum part for ICMP payload. 2451 */ 2452 len = icmp_pld_len; 2453 buf = (uint16_t *) mblk_ip->b_rptr; 2454 for (; len > 1; len -= 2) 2455 sum += *buf++; 2456 2457 /* 2458 * Chksum is done. 2459 */ 2460 sum = (sum >> 16) + (sum & 0xffff); 2461 sum += (sum >> 16); 2462 icmp->icmp_cksum = ~sum; 2463 2464 /* 2465 * Step (6) 2466 * 2467 * Release all packet mblks, except the first one. 2468 */ 2469 if (fin->fin_m->b_cont != NULL) { 2470 FREE_MB_T(fin->fin_m->b_cont); 2471 } 2472 2473 /* 2474 * Append ICMP payload to first mblk, which already contains new IP 2475 * header. 2476 */ 2477 linkb(fin->fin_m, mblk_icmp); 2478 2479 return (0); 2480 } 2481 2482 #ifdef USE_INET6 2483 /* ------------------------------------------------------------------------ */ 2484 /* Function: fr_make_icmp_v6 */ 2485 /* Returns: int - 0 on success, -1 on failure */ 2486 /* Parameters: fin(I) - pointer to packet information */ 2487 /* */ 2488 /* Please read comment at fr_make_icmp() wrapper function to get an idea */ 2489 /* what and why is going to happen here. Once you read the comment there, */ 2490 /* continue here with next paragraph. */ 2491 /* */ 2492 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response. */ 2493 /* The algorithm is fairly simple: */ 2494 /* 1) We need to get copy of complete mblk. */ 2495 /* */ 2496 /* 2) New ICMPv6 header is created. */ 2497 /* */ 2498 /* 3) The copy of original mblk with packet is linked to ICMPv6 */ 2499 /* header. */ 2500 /* */ 2501 /* 4) The checksum must be adjusted. */ 2502 /* */ 2503 /* 5) IP addresses in original mblk are swapped and IP header data */ 2504 /* are adjusted (protocol number). */ 2505 /* */ 2506 /* 6) Original mblk is trimmed to hold IPv6 header only, then it is */ 2507 /* linked with the ICMPv6 data we got from (3). */ 2508 /* ------------------------------------------------------------------------ */ 2509 static int fr_make_icmp_v6(fin) 2510 fr_info_t *fin; 2511 { 2512 struct icmp6_hdr *icmp6; 2513 tcphdr_t *tcp; 2514 struct in6_addr tmp_src6; 2515 size_t icmp_pld_len; 2516 mblk_t *mblk_ip, *mblk_icmp; 2517 2518 if (fin->fin_v != 6) 2519 return (-1); 2520 2521 /* 2522 * If we are dealing with TCP, then packet must SYN/FIN to be routed by 2523 * IP stack. If it is not SYN/FIN, then we must drop it silently. 2524 */ 2525 tcp = (tcphdr_t *) fin->fin_dp; 2526 2527 if ((fin->fin_p == IPPROTO_TCP) && 2528 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0))) 2529 return (-1); 2530 2531 /* 2532 * Step (1) 2533 * 2534 * We need to copy complete packet in case of IPv6, no trimming is 2535 * needed (except the L2 headers). 2536 */ 2537 icmp_pld_len = M_LEN(fin->fin_m); 2538 fin->fin_m->b_rptr += fin->fin_ipoff; 2539 if ((mblk_ip = copyb(fin->fin_m)) == NULL) 2540 return (-1); 2541 fin->fin_m->b_rptr -= fin->fin_ipoff; 2542 2543 /* 2544 * Step (2) 2545 * 2546 * Allocate and create ICMP header. 2547 */ 2548 mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr), 2549 BPRI_HI); 2550 2551 if (mblk_icmp == NULL) 2552 return (-1); 2553 2554 MTYPE(mblk_icmp) = M_DATA; 2555 icmp6 = (struct icmp6_hdr *) mblk_icmp->b_wptr; 2556 icmp6->icmp6_type = ICMP6_DST_UNREACH; 2557 icmp6->icmp6_code = fin->fin_icode & 0xFF; 2558 icmp6->icmp6_data32[0] = 0; 2559 mblk_icmp->b_wptr += sizeof (struct icmp6_hdr); 2560 2561 /* 2562 * Step (3) 2563 * 2564 * Link the copy of IP packet to ICMP header. 2565 */ 2566 linkb(mblk_icmp, mblk_ip); 2567 2568 /* 2569 * Step (4) 2570 * 2571 * Calculate chksum - this is much more easier task than in case of 2572 * IPv4 - ICMPv6 chksum only covers IP addresses, and payload length. 2573 * We are making compensation just for change of packet length. 2574 */ 2575 icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr); 2576 2577 /* 2578 * Step (5) 2579 * 2580 * Swap IP addresses. 2581 */ 2582 tmp_src6 = fin->fin_ip6->ip6_src; 2583 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst; 2584 fin->fin_ip6->ip6_dst = tmp_src6; 2585 2586 /* 2587 * and adjust IP header data. 2588 */ 2589 fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6; 2590 fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr)); 2591 2592 /* 2593 * Step (6) 2594 * 2595 * We must release all linked mblks from original packet and keep only 2596 * the first mblk with IP header to link ICMP data. 2597 */ 2598 fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t); 2599 2600 if (fin->fin_m->b_cont != NULL) { 2601 FREE_MB_T(fin->fin_m->b_cont); 2602 } 2603 2604 /* 2605 * Append ICMP payload to IP header. 2606 */ 2607 linkb(fin->fin_m, mblk_icmp); 2608 2609 return (0); 2610 } 2611 #endif /* USE_INET6 */ 2612 2613 /* ------------------------------------------------------------------------ */ 2614 /* Function: fr_make_icmp */ 2615 /* Returns: int - 0 on success, -1 on failure */ 2616 /* Parameters: fin(I) - pointer to packet information */ 2617 /* */ 2618 /* We must alter the original mblks passed to IPF from IP stack via */ 2619 /* FW_HOOKS. The reasons why we must alter packet are discussed within */ 2620 /* comment at fr_make_rst() function. */ 2621 /* */ 2622 /* The fr_make_icmp() function acts as a wrapper, which passes the code */ 2623 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on */ 2624 /* protocol version. However there are some details, which are common to */ 2625 /* both IP versions. The details are going to be explained here. */ 2626 /* */ 2627 /* The packet looks as follows: */ 2628 /* xxx | IP hdr | IP payload ... | */ 2629 /* ^ ^ ^ ^ */ 2630 /* | | | | */ 2631 /* | | | fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */ 2632 /* | | | */ 2633 /* | | `- fin_m->fin_dp (in case of IPv4 points to L4 header) */ 2634 /* | | */ 2635 /* | `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case */ 2636 /* | of loopback) */ 2637 /* | */ 2638 /* `- fin_m->b_rptr - points to L2 header in case of physical NIC */ 2639 /* */ 2640 /* All relevant IP headers are pulled up into the first mblk. It happened */ 2641 /* well in advance before the matching rule was found (the rule, which took */ 2642 /* us here, to fr_make_icmp() function). */ 2643 /* */ 2644 /* Both functions will turn packet passed in fin->fin_m mblk into a new */ 2645 /* packet. New packet will be represented as chain of mblks. */ 2646 /* orig mblk |- b_cont ---. */ 2647 /* ^ `-> ICMP hdr |- b_cont--. */ 2648 /* | ^ `-> duped orig mblk */ 2649 /* | | ^ */ 2650 /* `- The original mblk | | */ 2651 /* will be trimmed to | | */ 2652 /* to contain IP header | | */ 2653 /* only | | */ 2654 /* | | */ 2655 /* `- This is newly | */ 2656 /* allocated mblk to | */ 2657 /* hold ICMPv6 data. | */ 2658 /* | */ 2659 /* | */ 2660 /* | */ 2661 /* This is the copy of original mblk, it will contain -' */ 2662 /* orignal IP packet in case of ICMPv6. In case of */ 2663 /* ICMPv4 it will contain up to 8 bytes of IP payload */ 2664 /* (TCP/UDP/L4) data from original packet. */ 2665 /* ------------------------------------------------------------------------ */ 2666 int fr_make_icmp(fin) 2667 fr_info_t *fin; 2668 { 2669 int rv; 2670 2671 if (fin->fin_v == 4) 2672 rv = fr_make_icmp_v4(fin); 2673 #ifdef USE_INET6 2674 else if (fin->fin_v == 6) 2675 rv = fr_make_icmp_v6(fin); 2676 #endif 2677 else 2678 rv = -1; 2679 2680 return (rv); 2681 } 2682 2683 /* ------------------------------------------------------------------------ */ 2684 /* Function: fr_buf_sum */ 2685 /* Returns: unsigned int - sum of buffer buf */ 2686 /* Parameters: buf - pointer to buf we want to sum up */ 2687 /* len - length of buffer buf */ 2688 /* */ 2689 /* Sums buffer buf. The result is used for chksum calculation. The buf */ 2690 /* argument must be aligned. */ 2691 /* ------------------------------------------------------------------------ */ 2692 static uint32_t fr_buf_sum(buf, len) 2693 const void *buf; 2694 unsigned int len; 2695 { 2696 uint32_t sum = 0; 2697 uint16_t *b = (uint16_t *)buf; 2698 2699 while (len > 1) { 2700 sum += *b++; 2701 len -= 2; 2702 } 2703 2704 if (len == 1) 2705 sum += htons((*(unsigned char *)b) << 8); 2706 2707 return (sum); 2708 } 2709 2710 /* ------------------------------------------------------------------------ */ 2711 /* Function: fr_calc_chksum */ 2712 /* Returns: void */ 2713 /* Parameters: fin - pointer to fr_info_t instance with packet data */ 2714 /* pkt - pointer to duplicated packet */ 2715 /* */ 2716 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP */ 2717 /* versions. */ 2718 /* ------------------------------------------------------------------------ */ 2719 void fr_calc_chksum(fin, pkt) 2720 fr_info_t *fin; 2721 mb_t *pkt; 2722 { 2723 struct pseudo_hdr { 2724 union { 2725 struct in_addr in4; 2726 #ifdef USE_INET6 2727 struct in6_addr in6; 2728 #endif 2729 } src_addr; 2730 union { 2731 struct in_addr in4; 2732 #ifdef USE_INET6 2733 struct in6_addr in6; 2734 #endif 2735 } dst_addr; 2736 char zero; 2737 char proto; 2738 uint16_t len; 2739 } phdr; 2740 uint32_t sum, ip_sum; 2741 void *buf; 2742 uint16_t *l4_csum_p; 2743 tcphdr_t *tcp; 2744 udphdr_t *udp; 2745 icmphdr_t *icmp; 2746 #ifdef USE_INET6 2747 struct icmp6_hdr *icmp6; 2748 #endif 2749 ip_t *ip; 2750 unsigned int len; 2751 int pld_len; 2752 2753 /* 2754 * We need to pullup the packet to the single continuous buffer to avoid 2755 * potential misaligment of b_rptr member in mblk chain. 2756 */ 2757 if (pullupmsg(pkt, -1) == 0) { 2758 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum" 2759 " will not be computed by IPF"); 2760 return; 2761 } 2762 2763 /* 2764 * It is guaranteed IP header starts right at b_rptr, because we are 2765 * working with a copy of the original packet. 2766 * 2767 * Compute pseudo header chksum for TCP and UDP. 2768 */ 2769 if ((fin->fin_p == IPPROTO_UDP) || 2770 (fin->fin_p == IPPROTO_TCP)) { 2771 bzero(&phdr, sizeof (phdr)); 2772 #ifdef USE_INET6 2773 if (fin->fin_v == 6) { 2774 phdr.src_addr.in6 = fin->fin_srcip6; 2775 phdr.dst_addr.in6 = fin->fin_dstip6; 2776 } else { 2777 phdr.src_addr.in4 = fin->fin_src; 2778 phdr.dst_addr.in4 = fin->fin_dst; 2779 } 2780 #else 2781 phdr.src_addr.in4 = fin->fin_src; 2782 phdr.dst_addr.in4 = fin->fin_dst; 2783 #endif 2784 phdr.zero = (char) 0; 2785 phdr.proto = fin->fin_p; 2786 phdr.len = htons((uint16_t)fin->fin_dlen); 2787 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr)); 2788 } else { 2789 sum = 0; 2790 } 2791 2792 /* 2793 * Set pointer to the L4 chksum field in the packet, set buf pointer to 2794 * the L4 header start. 2795 */ 2796 switch (fin->fin_p) { 2797 case IPPROTO_UDP: 2798 udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen); 2799 l4_csum_p = &udp->uh_sum; 2800 buf = udp; 2801 break; 2802 case IPPROTO_TCP: 2803 tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen); 2804 l4_csum_p = &tcp->th_sum; 2805 buf = tcp; 2806 break; 2807 case IPPROTO_ICMP: 2808 icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen); 2809 l4_csum_p = &icmp->icmp_cksum; 2810 buf = icmp; 2811 break; 2812 #ifdef USE_INET6 2813 case IPPROTO_ICMPV6: 2814 icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen); 2815 l4_csum_p = &icmp6->icmp6_cksum; 2816 buf = icmp6; 2817 break; 2818 #endif 2819 default: 2820 l4_csum_p = NULL; 2821 } 2822 2823 /* 2824 * Compute L4 chksum if needed. 2825 */ 2826 if (l4_csum_p != NULL) { 2827 *l4_csum_p = (uint16_t)0; 2828 pld_len = fin->fin_dlen; 2829 len = pkt->b_wptr - (unsigned char *)buf; 2830 ASSERT(len == pld_len); 2831 /* 2832 * Add payload sum to pseudoheader sum. 2833 */ 2834 sum += fr_buf_sum(buf, len); 2835 while (sum >> 16) 2836 sum = (sum & 0xFFFF) + (sum >> 16); 2837 2838 *l4_csum_p = ~((uint16_t)sum); 2839 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p); 2840 } 2841 2842 /* 2843 * The IP header chksum is needed just for IPv4. 2844 */ 2845 if (fin->fin_v == 4) { 2846 /* 2847 * Compute IPv4 header chksum. 2848 */ 2849 ip = (ip_t *)pkt->b_rptr; 2850 ip->ip_sum = (uint16_t)0; 2851 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen); 2852 while (ip_sum >> 16) 2853 ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16); 2854 2855 ip->ip_sum = ~((uint16_t)ip_sum); 2856 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum); 2857 } 2858 2859 return; 2860 } 2861 2862 #endif /* _KERNEL && SOLARIS2 >= 10 */ 2863