1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2017 Joyent, Inc. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/errno.h> 32 #include <sys/strlog.h> 33 #include <sys/tihdr.h> 34 #include <sys/socket.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/kmem.h> 38 #include <sys/zone.h> 39 #include <sys/sysmacros.h> 40 #include <sys/cmn_err.h> 41 #include <sys/vtrace.h> 42 #include <sys/debug.h> 43 #include <sys/atomic.h> 44 #include <sys/strsun.h> 45 #include <sys/random.h> 46 #include <netinet/in.h> 47 #include <net/if.h> 48 #include <netinet/ip6.h> 49 #include <net/pfkeyv2.h> 50 #include <net/pfpolicy.h> 51 52 #include <inet/common.h> 53 #include <inet/mi.h> 54 #include <inet/nd.h> 55 #include <inet/ip.h> 56 #include <inet/ip_impl.h> 57 #include <inet/ip6.h> 58 #include <inet/ip_if.h> 59 #include <inet/ip_ndp.h> 60 #include <inet/sadb.h> 61 #include <inet/ipsec_info.h> 62 #include <inet/ipsec_impl.h> 63 #include <inet/ipsecesp.h> 64 #include <inet/ipdrop.h> 65 #include <inet/tcp.h> 66 #include <sys/kstat.h> 67 #include <sys/policy.h> 68 #include <sys/strsun.h> 69 #include <sys/strsubr.h> 70 #include <inet/udp_impl.h> 71 #include <sys/taskq.h> 72 #include <sys/note.h> 73 74 #include <sys/tsol/tnet.h> 75 76 /* 77 * Table of ND variables supported by ipsecesp. These are loaded into 78 * ipsecesp_g_nd in ipsecesp_init_nd. 79 * All of these are alterable, within the min/max values given, at run time. 80 */ 81 static ipsecespparam_t lcl_param_arr[] = { 82 /* min max value name */ 83 { 0, 3, 0, "ipsecesp_debug"}, 84 { 125, 32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"}, 85 { 1, 10, 1, "ipsecesp_reap_delay"}, 86 { 1, SADB_MAX_REPLAY, 64, "ipsecesp_replay_size"}, 87 { 1, 300, 15, "ipsecesp_acquire_timeout"}, 88 { 1, 1800, 90, "ipsecesp_larval_timeout"}, 89 /* Default lifetime values for ACQUIRE messages. */ 90 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_bytes"}, 91 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_bytes"}, 92 { 0, 0xffffffffU, 24000, "ipsecesp_default_soft_addtime"}, 93 { 0, 0xffffffffU, 28800, "ipsecesp_default_hard_addtime"}, 94 { 0, 0xffffffffU, 0, "ipsecesp_default_soft_usetime"}, 95 { 0, 0xffffffffU, 0, "ipsecesp_default_hard_usetime"}, 96 { 0, 1, 0, "ipsecesp_log_unknown_spi"}, 97 { 0, 2, 1, "ipsecesp_padding_check"}, 98 { 0, 600, 20, "ipsecesp_nat_keepalive_interval"}, 99 }; 100 /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */ 101 102 #define esp0dbg(a) printf a 103 /* NOTE: != 0 instead of > 0 so lint doesn't complain. */ 104 #define esp1dbg(espstack, a) if (espstack->ipsecesp_debug != 0) printf a 105 #define esp2dbg(espstack, a) if (espstack->ipsecesp_debug > 1) printf a 106 #define esp3dbg(espstack, a) if (espstack->ipsecesp_debug > 2) printf a 107 108 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *); 109 static int ipsecesp_close(queue_t *, int, cred_t *); 110 static int ipsecesp_rput(queue_t *, mblk_t *); 111 static int ipsecesp_wput(queue_t *, mblk_t *); 112 static void *ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns); 113 static void ipsecesp_stack_fini(netstackid_t stackid, void *arg); 114 115 static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *); 116 static void esp_outbound_finish(mblk_t *, ip_xmit_attr_t *); 117 static void esp_inbound_restart(mblk_t *, ip_recv_attr_t *); 118 119 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t, 120 ipsecesp_stack_t *, cred_t *); 121 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t, 122 kstat_named_t **, ipsecesp_stack_t *); 123 static mblk_t *esp_submit_req_inbound(mblk_t *, ip_recv_attr_t *, 124 ipsa_t *, uint_t); 125 static mblk_t *esp_submit_req_outbound(mblk_t *, ip_xmit_attr_t *, 126 ipsa_t *, uchar_t *, uint_t); 127 128 /* Setable in /etc/system */ 129 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE; 130 131 static struct module_info info = { 132 5137, "ipsecesp", 0, INFPSZ, 65536, 1024 133 }; 134 135 static struct qinit rinit = { 136 ipsecesp_rput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 137 NULL 138 }; 139 140 static struct qinit winit = { 141 ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info, 142 NULL 143 }; 144 145 struct streamtab ipsecespinfo = { 146 &rinit, &winit, NULL, NULL 147 }; 148 149 static taskq_t *esp_taskq; 150 151 /* 152 * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now. 153 * 154 * Question: Do I need this, given that all instance's esps->esps_wq point 155 * to IP? 156 * 157 * Answer: Yes, because I need to know which queue is BOUND to 158 * IPPROTO_ESP 159 */ 160 161 static int esp_kstat_update(kstat_t *, int); 162 163 static boolean_t 164 esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid) 165 { 166 espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat", 167 "net", KSTAT_TYPE_NAMED, 168 sizeof (esp_kstats_t) / sizeof (kstat_named_t), 0, stackid); 169 170 if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL) 171 return (B_FALSE); 172 173 espstack->esp_kstats = espstack->esp_ksp->ks_data; 174 175 espstack->esp_ksp->ks_update = esp_kstat_update; 176 espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid; 177 178 #define K64 KSTAT_DATA_UINT64 179 #define KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64) 180 181 KI(num_aalgs); 182 KI(num_ealgs); 183 KI(good_auth); 184 KI(bad_auth); 185 KI(bad_padding); 186 KI(replay_failures); 187 KI(replay_early_failures); 188 KI(keysock_in); 189 KI(out_requests); 190 KI(acquire_requests); 191 KI(bytes_expired); 192 KI(out_discards); 193 KI(crypto_sync); 194 KI(crypto_async); 195 KI(crypto_failures); 196 KI(bad_decrypt); 197 KI(sa_port_renumbers); 198 199 #undef KI 200 #undef K64 201 202 kstat_install(espstack->esp_ksp); 203 204 return (B_TRUE); 205 } 206 207 static int 208 esp_kstat_update(kstat_t *kp, int rw) 209 { 210 esp_kstats_t *ekp; 211 netstackid_t stackid; 212 netstack_t *ns; 213 ipsec_stack_t *ipss; 214 215 if ((kp == NULL) || (kp->ks_data == NULL)) 216 return (EIO); 217 218 if (rw == KSTAT_WRITE) 219 return (EACCES); 220 221 stackid = (zoneid_t)(uintptr_t)kp->ks_private; 222 ns = netstack_find_by_stackid(stackid); 223 if (ns == NULL) 224 return (-1); 225 ipss = ns->netstack_ipsec; 226 if (ipss == NULL) { 227 netstack_rele(ns); 228 return (-1); 229 } 230 ekp = (esp_kstats_t *)kp->ks_data; 231 232 rw_enter(&ipss->ipsec_alg_lock, RW_READER); 233 ekp->esp_stat_num_aalgs.value.ui64 = 234 ipss->ipsec_nalgs[IPSEC_ALG_AUTH]; 235 ekp->esp_stat_num_ealgs.value.ui64 = 236 ipss->ipsec_nalgs[IPSEC_ALG_ENCR]; 237 rw_exit(&ipss->ipsec_alg_lock); 238 239 netstack_rele(ns); 240 return (0); 241 } 242 243 #ifdef DEBUG 244 /* 245 * Debug routine, useful to see pre-encryption data. 246 */ 247 static char * 248 dump_msg(mblk_t *mp) 249 { 250 char tmp_str[3], tmp_line[256]; 251 252 while (mp != NULL) { 253 unsigned char *ptr; 254 255 printf("mblk address 0x%p, length %ld, db_ref %d " 256 "type %d, base 0x%p, lim 0x%p\n", 257 (void *) mp, (long)(mp->b_wptr - mp->b_rptr), 258 mp->b_datap->db_ref, mp->b_datap->db_type, 259 (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim); 260 ptr = mp->b_rptr; 261 262 tmp_line[0] = '\0'; 263 while (ptr < mp->b_wptr) { 264 uint_t diff; 265 266 diff = (ptr - mp->b_rptr); 267 if (!(diff & 0x1f)) { 268 if (strlen(tmp_line) > 0) { 269 printf("bytes: %s\n", tmp_line); 270 tmp_line[0] = '\0'; 271 } 272 } 273 if (!(diff & 0x3)) 274 (void) strcat(tmp_line, " "); 275 (void) sprintf(tmp_str, "%02x", *ptr); 276 (void) strcat(tmp_line, tmp_str); 277 ptr++; 278 } 279 if (strlen(tmp_line) > 0) 280 printf("bytes: %s\n", tmp_line); 281 282 mp = mp->b_cont; 283 } 284 285 return ("\n"); 286 } 287 288 #else /* DEBUG */ 289 static char * 290 dump_msg(mblk_t *mp) 291 { 292 printf("Find value of mp %p.\n", mp); 293 return ("\n"); 294 } 295 #endif /* DEBUG */ 296 297 /* 298 * Don't have to lock age_interval, as only one thread will access it at 299 * a time, because I control the one function that does with timeout(). 300 */ 301 static void 302 esp_ager(void *arg) 303 { 304 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 305 netstack_t *ns = espstack->ipsecesp_netstack; 306 hrtime_t begin = gethrtime(); 307 308 sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q, 309 espstack->ipsecesp_reap_delay, ns); 310 sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q, 311 espstack->ipsecesp_reap_delay, ns); 312 313 espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q, 314 esp_ager, espstack, 315 &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max, 316 info.mi_idnum); 317 } 318 319 /* 320 * Get an ESP NDD parameter. 321 */ 322 /* ARGSUSED */ 323 static int 324 ipsecesp_param_get( 325 queue_t *q, 326 mblk_t *mp, 327 caddr_t cp, 328 cred_t *cr) 329 { 330 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 331 uint_t value; 332 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 333 334 mutex_enter(&espstack->ipsecesp_param_lock); 335 value = ipsecesppa->ipsecesp_param_value; 336 mutex_exit(&espstack->ipsecesp_param_lock); 337 338 (void) mi_mpprintf(mp, "%u", value); 339 return (0); 340 } 341 342 /* 343 * This routine sets an NDD variable in a ipsecespparam_t structure. 344 */ 345 /* ARGSUSED */ 346 static int 347 ipsecesp_param_set( 348 queue_t *q, 349 mblk_t *mp, 350 char *value, 351 caddr_t cp, 352 cred_t *cr) 353 { 354 ulong_t new_value; 355 ipsecespparam_t *ipsecesppa = (ipsecespparam_t *)cp; 356 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 357 358 /* 359 * Fail the request if the new value does not lie within the 360 * required bounds. 361 */ 362 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 363 new_value < ipsecesppa->ipsecesp_param_min || 364 new_value > ipsecesppa->ipsecesp_param_max) { 365 return (EINVAL); 366 } 367 368 /* Set the new value */ 369 mutex_enter(&espstack->ipsecesp_param_lock); 370 ipsecesppa->ipsecesp_param_value = new_value; 371 mutex_exit(&espstack->ipsecesp_param_lock); 372 return (0); 373 } 374 375 /* 376 * Using lifetime NDD variables, fill in an extended combination's 377 * lifetime information. 378 */ 379 void 380 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns) 381 { 382 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 383 384 ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes; 385 ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes; 386 ecomb->sadb_x_ecomb_soft_addtime = 387 espstack->ipsecesp_default_soft_addtime; 388 ecomb->sadb_x_ecomb_hard_addtime = 389 espstack->ipsecesp_default_hard_addtime; 390 ecomb->sadb_x_ecomb_soft_usetime = 391 espstack->ipsecesp_default_soft_usetime; 392 ecomb->sadb_x_ecomb_hard_usetime = 393 espstack->ipsecesp_default_hard_usetime; 394 } 395 396 /* 397 * Initialize things for ESP at module load time. 398 */ 399 boolean_t 400 ipsecesp_ddi_init(void) 401 { 402 esp_taskq = taskq_create("esp_taskq", 1, minclsyspri, 403 IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0); 404 405 /* 406 * We want to be informed each time a stack is created or 407 * destroyed in the kernel, so we can maintain the 408 * set of ipsecesp_stack_t's. 409 */ 410 netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL, 411 ipsecesp_stack_fini); 412 413 return (B_TRUE); 414 } 415 416 /* 417 * Walk through the param array specified registering each element with the 418 * named dispatch handler. 419 */ 420 static boolean_t 421 ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt) 422 { 423 for (; cnt-- > 0; espp++) { 424 if (espp->ipsecesp_param_name != NULL && 425 espp->ipsecesp_param_name[0]) { 426 if (!nd_load(ndp, 427 espp->ipsecesp_param_name, 428 ipsecesp_param_get, ipsecesp_param_set, 429 (caddr_t)espp)) { 430 nd_free(ndp); 431 return (B_FALSE); 432 } 433 } 434 } 435 return (B_TRUE); 436 } 437 438 /* 439 * Initialize things for ESP for each stack instance 440 */ 441 static void * 442 ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns) 443 { 444 ipsecesp_stack_t *espstack; 445 ipsecespparam_t *espp; 446 447 espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack), 448 KM_SLEEP); 449 espstack->ipsecesp_netstack = ns; 450 451 espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); 452 espstack->ipsecesp_params = espp; 453 bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr)); 454 455 (void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp, 456 A_CNT(lcl_param_arr)); 457 458 (void) esp_kstat_init(espstack, stackid); 459 460 espstack->esp_sadb.s_acquire_timeout = 461 &espstack->ipsecesp_acquire_timeout; 462 sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size, 463 espstack->ipsecesp_netstack); 464 465 mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0); 466 467 ip_drop_register(&espstack->esp_dropper, "IPsec ESP"); 468 return (espstack); 469 } 470 471 /* 472 * Destroy things for ESP at module unload time. 473 */ 474 void 475 ipsecesp_ddi_destroy(void) 476 { 477 netstack_unregister(NS_IPSECESP); 478 taskq_destroy(esp_taskq); 479 } 480 481 /* 482 * Destroy things for ESP for one stack instance 483 */ 484 static void 485 ipsecesp_stack_fini(netstackid_t stackid, void *arg) 486 { 487 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg; 488 489 if (espstack->esp_pfkey_q != NULL) { 490 (void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event); 491 } 492 espstack->esp_sadb.s_acquire_timeout = NULL; 493 sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack); 494 ip_drop_unregister(&espstack->esp_dropper); 495 mutex_destroy(&espstack->ipsecesp_param_lock); 496 nd_free(&espstack->ipsecesp_g_nd); 497 498 kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr)); 499 espstack->ipsecesp_params = NULL; 500 kstat_delete_netstack(espstack->esp_ksp, stackid); 501 espstack->esp_ksp = NULL; 502 espstack->esp_kstats = NULL; 503 kmem_free(espstack, sizeof (*espstack)); 504 } 505 506 /* 507 * ESP module open routine, which is here for keysock plumbing. 508 * Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old 509 * Days of export control, and fears that ESP would not be allowed 510 * to be shipped at all by default. Eventually, keysock should 511 * either access AH and ESP via modstubs or krtld dependencies, or 512 * perhaps be folded in with AH and ESP into a single IPsec/netsec 513 * module ("netsec" if PF_KEY provides more than AH/ESP keying tables). 514 */ 515 /* ARGSUSED */ 516 static int 517 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 518 { 519 netstack_t *ns; 520 ipsecesp_stack_t *espstack; 521 522 if (secpolicy_ip_config(credp, B_FALSE) != 0) 523 return (EPERM); 524 525 if (q->q_ptr != NULL) 526 return (0); /* Re-open of an already open instance. */ 527 528 if (sflag != MODOPEN) 529 return (EINVAL); 530 531 ns = netstack_find_by_cred(credp); 532 ASSERT(ns != NULL); 533 espstack = ns->netstack_ipsecesp; 534 ASSERT(espstack != NULL); 535 536 q->q_ptr = espstack; 537 WR(q)->q_ptr = q->q_ptr; 538 539 qprocson(q); 540 return (0); 541 } 542 543 /* 544 * ESP module close routine. 545 */ 546 /* ARGSUSED */ 547 static int 548 ipsecesp_close(queue_t *q, int flags __unused, cred_t *credp __unused) 549 { 550 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 551 552 /* 553 * Clean up q_ptr, if needed. 554 */ 555 qprocsoff(q); 556 557 /* Keysock queue check is safe, because of OCEXCL perimeter. */ 558 559 if (q == espstack->esp_pfkey_q) { 560 esp1dbg(espstack, 561 ("ipsecesp_close: Ummm... keysock is closing ESP.\n")); 562 espstack->esp_pfkey_q = NULL; 563 /* Detach qtimeouts. */ 564 (void) quntimeout(q, espstack->esp_event); 565 } 566 567 netstack_rele(espstack->ipsecesp_netstack); 568 return (0); 569 } 570 571 /* 572 * Add a number of bytes to what the SA has protected so far. Return 573 * B_TRUE if the SA can still protect that many bytes. 574 * 575 * Caller must REFRELE the passed-in assoc. This function must REFRELE 576 * any obtained peer SA. 577 */ 578 static boolean_t 579 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound) 580 { 581 ipsa_t *inassoc, *outassoc; 582 isaf_t *bucket; 583 boolean_t inrc, outrc, isv6; 584 sadb_t *sp; 585 int outhash; 586 netstack_t *ns = assoc->ipsa_netstack; 587 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 588 589 /* No peer? No problem! */ 590 if (!assoc->ipsa_haspeer) { 591 return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes, 592 B_TRUE)); 593 } 594 595 /* 596 * Otherwise, we want to grab both the original assoc and its peer. 597 * There might be a race for this, but if it's a real race, two 598 * expire messages may occur. We limit this by only sending the 599 * expire message on one of the peers, we'll pick the inbound 600 * arbitrarily. 601 * 602 * If we need tight synchronization on the peer SA, then we need to 603 * reconsider. 604 */ 605 606 /* Use address length to select IPv6/IPv4 */ 607 isv6 = (assoc->ipsa_addrfam == AF_INET6); 608 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 609 610 if (inbound) { 611 inassoc = assoc; 612 if (isv6) { 613 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 614 &inassoc->ipsa_dstaddr)); 615 } else { 616 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 617 &inassoc->ipsa_dstaddr)); 618 } 619 bucket = &sp->sdb_of[outhash]; 620 mutex_enter(&bucket->isaf_lock); 621 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 622 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 623 inassoc->ipsa_addrfam); 624 mutex_exit(&bucket->isaf_lock); 625 if (outassoc == NULL) { 626 /* Q: Do we wish to set haspeer == B_FALSE? */ 627 esp0dbg(("esp_age_bytes: " 628 "can't find peer for inbound.\n")); 629 return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc, 630 bytes, B_TRUE)); 631 } 632 } else { 633 outassoc = assoc; 634 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 635 mutex_enter(&bucket->isaf_lock); 636 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 637 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 638 outassoc->ipsa_addrfam); 639 mutex_exit(&bucket->isaf_lock); 640 if (inassoc == NULL) { 641 /* Q: Do we wish to set haspeer == B_FALSE? */ 642 esp0dbg(("esp_age_bytes: " 643 "can't find peer for outbound.\n")); 644 return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc, 645 bytes, B_TRUE)); 646 } 647 } 648 649 inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE); 650 outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE); 651 652 /* 653 * REFRELE any peer SA. 654 * 655 * Because of the multi-line macro nature of IPSA_REFRELE, keep 656 * them in { }. 657 */ 658 if (inbound) { 659 IPSA_REFRELE(outassoc); 660 } else { 661 IPSA_REFRELE(inassoc); 662 } 663 664 return (inrc && outrc); 665 } 666 667 /* 668 * Do incoming NAT-T manipulations for packet. 669 * Returns NULL if the mblk chain is consumed. 670 */ 671 static mblk_t * 672 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc) 673 { 674 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 675 tcpha_t *tcpha; 676 udpha_t *udpha; 677 /* Initialize to our inbound cksum adjustment... */ 678 uint32_t sum = assoc->ipsa_inbound_cksum; 679 680 switch (ipha->ipha_protocol) { 681 case IPPROTO_TCP: 682 tcpha = (tcpha_t *)(data_mp->b_rptr + 683 IPH_HDR_LENGTH(ipha)); 684 685 #define DOWN_SUM(x) (x) = ((x) & 0xFFFF) + ((x) >> 16) 686 sum += ~ntohs(tcpha->tha_sum) & 0xFFFF; 687 DOWN_SUM(sum); 688 DOWN_SUM(sum); 689 tcpha->tha_sum = ~htons(sum); 690 break; 691 case IPPROTO_UDP: 692 udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha)); 693 694 if (udpha->uha_checksum != 0) { 695 /* Adujst if the inbound one was not zero. */ 696 sum += ~ntohs(udpha->uha_checksum) & 0xFFFF; 697 DOWN_SUM(sum); 698 DOWN_SUM(sum); 699 udpha->uha_checksum = ~htons(sum); 700 if (udpha->uha_checksum == 0) 701 udpha->uha_checksum = 0xFFFF; 702 } 703 #undef DOWN_SUM 704 break; 705 case IPPROTO_IP: 706 /* 707 * This case is only an issue for self-encapsulated 708 * packets. So for now, fall through. 709 */ 710 break; 711 } 712 return (data_mp); 713 } 714 715 716 /* 717 * Strip ESP header, check padding, and fix IP header. 718 * Returns B_TRUE on success, B_FALSE if an error occured. 719 */ 720 static boolean_t 721 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen, 722 kstat_named_t **counter, ipsecesp_stack_t *espstack) 723 { 724 ipha_t *ipha; 725 ip6_t *ip6h; 726 uint_t divpoint; 727 mblk_t *scratch; 728 uint8_t nexthdr, padlen; 729 uint8_t lastpad; 730 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 731 uint8_t *lastbyte; 732 733 /* 734 * Strip ESP data and fix IP header. 735 * 736 * XXX In case the beginning of esp_inbound() changes to not do a 737 * pullup, this part of the code can remain unchanged. 738 */ 739 if (isv4) { 740 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t)); 741 ipha = (ipha_t *)data_mp->b_rptr; 742 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) + 743 IPH_HDR_LENGTH(ipha)); 744 divpoint = IPH_HDR_LENGTH(ipha); 745 } else { 746 ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t)); 747 ip6h = (ip6_t *)data_mp->b_rptr; 748 divpoint = ip_hdr_length_v6(data_mp, ip6h); 749 } 750 751 scratch = data_mp; 752 while (scratch->b_cont != NULL) 753 scratch = scratch->b_cont; 754 755 ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3); 756 757 /* 758 * "Next header" and padding length are the last two bytes in the 759 * ESP-protected datagram, thus the explicit - 1 and - 2. 760 * lastpad is the last byte of the padding, which can be used for 761 * a quick check to see if the padding is correct. 762 */ 763 lastbyte = scratch->b_wptr - 1; 764 nexthdr = *lastbyte--; 765 padlen = *lastbyte--; 766 767 if (isv4) { 768 /* Fix part of the IP header. */ 769 ipha->ipha_protocol = nexthdr; 770 /* 771 * Reality check the padlen. The explicit - 2 is for the 772 * padding length and the next-header bytes. 773 */ 774 if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 - 775 sizeof (esph_t) - ivlen) { 776 ESP_BUMP_STAT(espstack, bad_decrypt); 777 ipsec_rl_strlog(espstack->ipsecesp_netstack, 778 info.mi_idnum, 0, 0, 779 SL_ERROR | SL_WARN, 780 "Corrupt ESP packet (padlen too big).\n"); 781 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 782 padlen)); 783 esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp " 784 "hdr - ivlen(%d) = %d.\n", 785 ntohs(ipha->ipha_length), ivlen, 786 (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) - 787 2 - sizeof (esph_t) - ivlen))); 788 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 789 return (B_FALSE); 790 } 791 792 /* 793 * Fix the rest of the header. The explicit - 2 is for the 794 * padding length and the next-header bytes. 795 */ 796 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen - 797 2 - sizeof (esph_t) - ivlen); 798 ipha->ipha_hdr_checksum = 0; 799 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 800 } else { 801 if (ip6h->ip6_nxt == IPPROTO_ESP) { 802 ip6h->ip6_nxt = nexthdr; 803 } else { 804 ip_pkt_t ipp; 805 806 bzero(&ipp, sizeof (ipp)); 807 (void) ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, 808 NULL); 809 if (ipp.ipp_dstopts != NULL) { 810 ipp.ipp_dstopts->ip6d_nxt = nexthdr; 811 } else if (ipp.ipp_rthdr != NULL) { 812 ipp.ipp_rthdr->ip6r_nxt = nexthdr; 813 } else if (ipp.ipp_hopopts != NULL) { 814 ipp.ipp_hopopts->ip6h_nxt = nexthdr; 815 } else { 816 /* Panic a DEBUG kernel. */ 817 ASSERT(ipp.ipp_hopopts != NULL); 818 /* Otherwise, pretend it's IP + ESP. */ 819 cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n"); 820 ip6h->ip6_nxt = nexthdr; 821 } 822 } 823 824 if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) - 825 ivlen) { 826 ESP_BUMP_STAT(espstack, bad_decrypt); 827 ipsec_rl_strlog(espstack->ipsecesp_netstack, 828 info.mi_idnum, 0, 0, 829 SL_ERROR | SL_WARN, 830 "Corrupt ESP packet (v6 padlen too big).\n"); 831 esp1dbg(espstack, ("padlen (%d) is greater than:\n", 832 padlen)); 833 esp1dbg(espstack, 834 ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = " 835 "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen) 836 + sizeof (ip6_t)), ivlen, 837 (unsigned)(ntohs(ip6h->ip6_plen) - 2 - 838 sizeof (esph_t) - ivlen))); 839 *counter = DROPPER(ipss, ipds_esp_bad_padlen); 840 return (B_FALSE); 841 } 842 843 844 /* 845 * Fix the rest of the header. The explicit - 2 is for the 846 * padding length and the next-header bytes. IPv6 is nice, 847 * because there's no hdr checksum! 848 */ 849 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen - 850 2 - sizeof (esph_t) - ivlen); 851 } 852 853 if (espstack->ipsecesp_padding_check > 0 && padlen > 0) { 854 /* 855 * Weak padding check: compare last-byte to length, they 856 * should be equal. 857 */ 858 lastpad = *lastbyte--; 859 860 if (padlen != lastpad) { 861 ipsec_rl_strlog(espstack->ipsecesp_netstack, 862 info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 863 "Corrupt ESP packet (lastpad != padlen).\n"); 864 esp1dbg(espstack, 865 ("lastpad (%d) not equal to padlen (%d):\n", 866 lastpad, padlen)); 867 ESP_BUMP_STAT(espstack, bad_padding); 868 *counter = DROPPER(ipss, ipds_esp_bad_padding); 869 return (B_FALSE); 870 } 871 872 /* 873 * Strong padding check: Check all pad bytes to see that 874 * they're ascending. Go backwards using a descending counter 875 * to verify. padlen == 1 is checked by previous block, so 876 * only bother if we've more than 1 byte of padding. 877 * Consequently, start the check one byte before the location 878 * of "lastpad". 879 */ 880 if (espstack->ipsecesp_padding_check > 1) { 881 /* 882 * This assert may have to become an if and a pullup 883 * if we start accepting multi-dblk mblks. For now, 884 * though, any packet here will have been pulled up in 885 * esp_inbound. 886 */ 887 ASSERT(MBLKL(scratch) >= lastpad + 3); 888 889 /* 890 * Use "--lastpad" because we already checked the very 891 * last pad byte previously. 892 */ 893 while (--lastpad != 0) { 894 if (lastpad != *lastbyte) { 895 ipsec_rl_strlog( 896 espstack->ipsecesp_netstack, 897 info.mi_idnum, 0, 0, 898 SL_ERROR | SL_WARN, "Corrupt ESP " 899 "packet (bad padding).\n"); 900 esp1dbg(espstack, 901 ("padding not in correct" 902 " format:\n")); 903 ESP_BUMP_STAT(espstack, bad_padding); 904 *counter = DROPPER(ipss, 905 ipds_esp_bad_padding); 906 return (B_FALSE); 907 } 908 lastbyte--; 909 } 910 } 911 } 912 913 /* Trim off the padding. */ 914 ASSERT(data_mp->b_cont == NULL); 915 data_mp->b_wptr -= (padlen + 2); 916 917 /* 918 * Remove the ESP header. 919 * 920 * The above assertions about data_mp's size will make this work. 921 * 922 * XXX Question: If I send up and get back a contiguous mblk, 923 * would it be quicker to bcopy over, or keep doing the dupb stuff? 924 * I go with copying for now. 925 */ 926 927 if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) && 928 IS_P2ALIGNED(ivlen, sizeof (uint32_t))) { 929 uint8_t *start = data_mp->b_rptr; 930 uint32_t *src, *dst; 931 932 src = (uint32_t *)(start + divpoint); 933 dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen); 934 935 ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) && 936 IS_P2ALIGNED(src, sizeof (uint32_t))); 937 938 do { 939 src--; 940 dst--; 941 *dst = *src; 942 } while (src != (uint32_t *)start); 943 944 data_mp->b_rptr = (uchar_t *)dst; 945 } else { 946 uint8_t *start = data_mp->b_rptr; 947 uint8_t *src, *dst; 948 949 src = start + divpoint; 950 dst = src + sizeof (esph_t) + ivlen; 951 952 do { 953 src--; 954 dst--; 955 *dst = *src; 956 } while (src != start); 957 958 data_mp->b_rptr = dst; 959 } 960 961 esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n")); 962 esp2dbg(espstack, (dump_msg(data_mp))); 963 964 return (B_TRUE); 965 } 966 967 /* 968 * Updating use times can be tricky business if the ipsa_haspeer flag is 969 * set. This function is called once in an SA's lifetime. 970 * 971 * Caller has to REFRELE "assoc" which is passed in. This function has 972 * to REFRELE any peer SA that is obtained. 973 */ 974 static void 975 esp_set_usetime(ipsa_t *assoc, boolean_t inbound) 976 { 977 ipsa_t *inassoc, *outassoc; 978 isaf_t *bucket; 979 sadb_t *sp; 980 int outhash; 981 boolean_t isv6; 982 netstack_t *ns = assoc->ipsa_netstack; 983 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 984 985 /* No peer? No problem! */ 986 if (!assoc->ipsa_haspeer) { 987 sadb_set_usetime(assoc); 988 return; 989 } 990 991 /* 992 * Otherwise, we want to grab both the original assoc and its peer. 993 * There might be a race for this, but if it's a real race, the times 994 * will be out-of-synch by at most a second, and since our time 995 * granularity is a second, this won't be a problem. 996 * 997 * If we need tight synchronization on the peer SA, then we need to 998 * reconsider. 999 */ 1000 1001 /* Use address length to select IPv6/IPv4 */ 1002 isv6 = (assoc->ipsa_addrfam == AF_INET6); 1003 sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4; 1004 1005 if (inbound) { 1006 inassoc = assoc; 1007 if (isv6) { 1008 outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *) 1009 &inassoc->ipsa_dstaddr)); 1010 } else { 1011 outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *) 1012 &inassoc->ipsa_dstaddr)); 1013 } 1014 bucket = &sp->sdb_of[outhash]; 1015 mutex_enter(&bucket->isaf_lock); 1016 outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi, 1017 inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr, 1018 inassoc->ipsa_addrfam); 1019 mutex_exit(&bucket->isaf_lock); 1020 if (outassoc == NULL) { 1021 /* Q: Do we wish to set haspeer == B_FALSE? */ 1022 esp0dbg(("esp_set_usetime: " 1023 "can't find peer for inbound.\n")); 1024 sadb_set_usetime(inassoc); 1025 return; 1026 } 1027 } else { 1028 outassoc = assoc; 1029 bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi); 1030 mutex_enter(&bucket->isaf_lock); 1031 inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi, 1032 outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr, 1033 outassoc->ipsa_addrfam); 1034 mutex_exit(&bucket->isaf_lock); 1035 if (inassoc == NULL) { 1036 /* Q: Do we wish to set haspeer == B_FALSE? */ 1037 esp0dbg(("esp_set_usetime: " 1038 "can't find peer for outbound.\n")); 1039 sadb_set_usetime(outassoc); 1040 return; 1041 } 1042 } 1043 1044 /* Update usetime on both. */ 1045 sadb_set_usetime(inassoc); 1046 sadb_set_usetime(outassoc); 1047 1048 /* 1049 * REFRELE any peer SA. 1050 * 1051 * Because of the multi-line macro nature of IPSA_REFRELE, keep 1052 * them in { }. 1053 */ 1054 if (inbound) { 1055 IPSA_REFRELE(outassoc); 1056 } else { 1057 IPSA_REFRELE(inassoc); 1058 } 1059 } 1060 1061 /* 1062 * Handle ESP inbound data for IPv4 and IPv6. 1063 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1064 * mblk chain data_mp. 1065 */ 1066 mblk_t * 1067 esp_inbound(mblk_t *data_mp, void *arg, ip_recv_attr_t *ira) 1068 { 1069 esph_t *esph = (esph_t *)arg; 1070 ipsa_t *ipsa = ira->ira_ipsec_esp_sa; 1071 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1072 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1073 ipsec_stack_t *ipss = ns->netstack_ipsec; 1074 1075 /* 1076 * We may wish to check replay in-range-only here as an optimization. 1077 * Include the reality check of ipsa->ipsa_replay > 1078 * ipsa->ipsa_replay_wsize for times when it's the first N packets, 1079 * where N == ipsa->ipsa_replay_wsize. 1080 * 1081 * Another check that may come here later is the "collision" check. 1082 * If legitimate packets flow quickly enough, this won't be a problem, 1083 * but collisions may cause authentication algorithm crunching to 1084 * take place when it doesn't need to. 1085 */ 1086 if (!sadb_replay_peek(ipsa, esph->esph_replay)) { 1087 ESP_BUMP_STAT(espstack, replay_early_failures); 1088 IP_ESP_BUMP_STAT(ipss, in_discards); 1089 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, 1090 DROPPER(ipss, ipds_esp_early_replay), 1091 &espstack->esp_dropper); 1092 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1093 return (NULL); 1094 } 1095 1096 /* 1097 * Adjust the IP header's payload length to reflect the removal 1098 * of the ICV. 1099 */ 1100 if (!(ira->ira_flags & IRAF_IS_IPV4)) { 1101 ip6_t *ip6h = (ip6_t *)data_mp->b_rptr; 1102 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - 1103 ipsa->ipsa_mac_len); 1104 } else { 1105 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 1106 ipha->ipha_length = htons(ntohs(ipha->ipha_length) - 1107 ipsa->ipsa_mac_len); 1108 } 1109 1110 /* submit the request to the crypto framework */ 1111 return (esp_submit_req_inbound(data_mp, ira, ipsa, 1112 (uint8_t *)esph - data_mp->b_rptr)); 1113 } 1114 1115 /* XXX refactor me */ 1116 /* 1117 * Handle the SADB_GETSPI message. Create a larval SA. 1118 */ 1119 static void 1120 esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 1121 { 1122 ipsa_t *newbie, *target; 1123 isaf_t *outbound, *inbound; 1124 int rc, diagnostic; 1125 sadb_sa_t *assoc; 1126 keysock_out_t *kso; 1127 uint32_t newspi; 1128 1129 /* 1130 * Randomly generate a proposed SPI value 1131 */ 1132 if (cl_inet_getspi != NULL) { 1133 cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid, 1134 IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL); 1135 } else { 1136 (void) random_get_pseudo_bytes((uint8_t *)&newspi, 1137 sizeof (uint32_t)); 1138 } 1139 newbie = sadb_getspi(ksi, newspi, &diagnostic, 1140 espstack->ipsecesp_netstack, IPPROTO_ESP); 1141 1142 if (newbie == NULL) { 1143 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic, 1144 ksi->ks_in_serial); 1145 return; 1146 } else if (newbie == (ipsa_t *)-1) { 1147 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 1148 ksi->ks_in_serial); 1149 return; 1150 } 1151 1152 /* 1153 * XXX - We may randomly collide. We really should recover from this. 1154 * Unfortunately, that could require spending way-too-much-time 1155 * in here. For now, let the user retry. 1156 */ 1157 1158 if (newbie->ipsa_addrfam == AF_INET6) { 1159 outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6, 1160 *(uint32_t *)(newbie->ipsa_dstaddr)); 1161 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6, 1162 newbie->ipsa_spi); 1163 } else { 1164 ASSERT(newbie->ipsa_addrfam == AF_INET); 1165 outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4, 1166 *(uint32_t *)(newbie->ipsa_dstaddr)); 1167 inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4, 1168 newbie->ipsa_spi); 1169 } 1170 1171 mutex_enter(&outbound->isaf_lock); 1172 mutex_enter(&inbound->isaf_lock); 1173 1174 /* 1175 * Check for collisions (i.e. did sadb_getspi() return with something 1176 * that already exists?). 1177 * 1178 * Try outbound first. Even though SADB_GETSPI is traditionally 1179 * for inbound SAs, you never know what a user might do. 1180 */ 1181 target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi, 1182 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam); 1183 if (target == NULL) { 1184 target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi, 1185 newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, 1186 newbie->ipsa_addrfam); 1187 } 1188 1189 /* 1190 * I don't have collisions elsewhere! 1191 * (Nor will I because I'm still holding inbound/outbound locks.) 1192 */ 1193 1194 if (target != NULL) { 1195 rc = EEXIST; 1196 IPSA_REFRELE(target); 1197 } else { 1198 /* 1199 * sadb_insertassoc() also checks for collisions, so 1200 * if there's a colliding entry, rc will be set 1201 * to EEXIST. 1202 */ 1203 rc = sadb_insertassoc(newbie, inbound); 1204 newbie->ipsa_hardexpiretime = gethrestime_sec(); 1205 newbie->ipsa_hardexpiretime += 1206 espstack->ipsecesp_larval_timeout; 1207 } 1208 1209 /* 1210 * Can exit outbound mutex. Hold inbound until we're done 1211 * with newbie. 1212 */ 1213 mutex_exit(&outbound->isaf_lock); 1214 1215 if (rc != 0) { 1216 mutex_exit(&inbound->isaf_lock); 1217 IPSA_REFRELE(newbie); 1218 sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc, 1219 SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial); 1220 return; 1221 } 1222 1223 1224 /* Can write here because I'm still holding the bucket lock. */ 1225 newbie->ipsa_type = SADB_SATYPE_ESP; 1226 1227 /* 1228 * Construct successful return message. We have one thing going 1229 * for us in PF_KEY v2. That's the fact that 1230 * sizeof (sadb_spirange_t) == sizeof (sadb_sa_t) 1231 */ 1232 assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE]; 1233 assoc->sadb_sa_exttype = SADB_EXT_SA; 1234 assoc->sadb_sa_spi = newbie->ipsa_spi; 1235 *((uint64_t *)(&assoc->sadb_sa_replay)) = 0; 1236 mutex_exit(&inbound->isaf_lock); 1237 1238 /* Convert KEYSOCK_IN to KEYSOCK_OUT. */ 1239 kso = (keysock_out_t *)ksi; 1240 kso->ks_out_len = sizeof (*kso); 1241 kso->ks_out_serial = ksi->ks_in_serial; 1242 kso->ks_out_type = KEYSOCK_OUT; 1243 1244 /* 1245 * Can safely putnext() to esp_pfkey_q, because this is a turnaround 1246 * from the esp_pfkey_q. 1247 */ 1248 putnext(espstack->esp_pfkey_q, mp); 1249 } 1250 1251 /* 1252 * Insert the ESP header into a packet. Duplicate an mblk, and insert a newly 1253 * allocated mblk with the ESP header in between the two. 1254 */ 1255 static boolean_t 1256 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint, 1257 ipsecesp_stack_t *espstack) 1258 { 1259 mblk_t *split_mp = mp; 1260 uint_t wheretodiv = divpoint; 1261 1262 while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) { 1263 wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr); 1264 split_mp = split_mp->b_cont; 1265 ASSERT(split_mp != NULL); 1266 } 1267 1268 if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) { 1269 mblk_t *scratch; 1270 1271 /* "scratch" is the 2nd half, split_mp is the first. */ 1272 scratch = dupb(split_mp); 1273 if (scratch == NULL) { 1274 esp1dbg(espstack, 1275 ("esp_insert_esp: can't allocate scratch.\n")); 1276 return (B_FALSE); 1277 } 1278 /* NOTE: dupb() doesn't set b_cont appropriately. */ 1279 scratch->b_cont = split_mp->b_cont; 1280 scratch->b_rptr += wheretodiv; 1281 split_mp->b_wptr = split_mp->b_rptr + wheretodiv; 1282 split_mp->b_cont = scratch; 1283 } 1284 /* 1285 * At this point, split_mp is exactly "wheretodiv" bytes long, and 1286 * holds the end of the pre-ESP part of the datagram. 1287 */ 1288 esp_mp->b_cont = split_mp->b_cont; 1289 split_mp->b_cont = esp_mp; 1290 1291 return (B_TRUE); 1292 } 1293 1294 /* 1295 * Section 7 of RFC 3947 says: 1296 * 1297 * 7. Recovering from the Expiring NAT Mappings 1298 * 1299 * There are cases where NAT box decides to remove mappings that are still 1300 * alive (for example, when the keepalive interval is too long, or when the 1301 * NAT box is rebooted). To recover from this, ends that are NOT behind 1302 * NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from 1303 * the other end to determine which IP and port addresses should be used. 1304 * The host behind dynamic NAT MUST NOT do this, as otherwise it opens a 1305 * DoS attack possibility because the IP address or port of the other host 1306 * will not change (it is not behind NAT). 1307 * 1308 * Keepalives cannot be used for these purposes, as they are not 1309 * authenticated, but any IKE authenticated IKE packet or ESP packet can be 1310 * used to detect whether the IP address or the port has changed. 1311 * 1312 * The following function will check an SA and its explicitly-set pair to see 1313 * if the NAT-T remote port matches the received packet (which must have 1314 * passed ESP authentication, see esp_in_done() for the caller context). If 1315 * there is a mismatch, the SAs are updated. It is not important if we race 1316 * with a transmitting thread, as if there is a transmitting thread, it will 1317 * merely emit a packet that will most-likely be dropped. 1318 * 1319 * "ports" are ordered src,dst, and assoc is an inbound SA, where src should 1320 * match ipsa_remote_nat_port and dst should match ipsa_local_nat_port. 1321 */ 1322 #ifdef _LITTLE_ENDIAN 1323 #define FIRST_16(x) ((x) & 0xFFFF) 1324 #define NEXT_16(x) (((x) >> 16) & 0xFFFF) 1325 #else 1326 #define FIRST_16(x) (((x) >> 16) & 0xFFFF) 1327 #define NEXT_16(x) ((x) & 0xFFFF) 1328 #endif 1329 static void 1330 esp_port_freshness(uint32_t ports, ipsa_t *assoc) 1331 { 1332 uint16_t remote = FIRST_16(ports); 1333 uint16_t local = NEXT_16(ports); 1334 ipsa_t *outbound_peer; 1335 isaf_t *bucket; 1336 ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp; 1337 1338 /* We found a conn_t, therefore local != 0. */ 1339 ASSERT(local != 0); 1340 /* Assume an IPv4 SA. */ 1341 ASSERT(assoc->ipsa_addrfam == AF_INET); 1342 1343 /* 1344 * On-the-wire rport == 0 means something's very wrong. 1345 * An unpaired SA is also useless to us. 1346 * If we are behind the NAT, don't bother. 1347 * A zero local NAT port defaults to 4500, so check that too. 1348 * And, of course, if the ports already match, we don't need to 1349 * bother. 1350 */ 1351 if (remote == 0 || assoc->ipsa_otherspi == 0 || 1352 (assoc->ipsa_flags & IPSA_F_BEHIND_NAT) || 1353 (assoc->ipsa_remote_nat_port == 0 && 1354 remote == htons(IPPORT_IKE_NATT)) || 1355 remote == assoc->ipsa_remote_nat_port) 1356 return; 1357 1358 /* Try and snag the peer. NOTE: Assume IPv4 for now. */ 1359 bucket = OUTBOUND_BUCKET_V4(&(espstack->esp_sadb.s_v4), 1360 assoc->ipsa_srcaddr[0]); 1361 mutex_enter(&bucket->isaf_lock); 1362 outbound_peer = ipsec_getassocbyspi(bucket, assoc->ipsa_otherspi, 1363 assoc->ipsa_dstaddr, assoc->ipsa_srcaddr, AF_INET); 1364 mutex_exit(&bucket->isaf_lock); 1365 1366 /* We probably lost a race to a deleting or expiring thread. */ 1367 if (outbound_peer == NULL) 1368 return; 1369 1370 /* 1371 * Hold the mutexes for both SAs so we don't race another inbound 1372 * thread. A lock-entry order shouldn't matter, since all other 1373 * per-ipsa locks are individually held-then-released. 1374 * 1375 * Luckily, this has nothing to do with the remote-NAT address, 1376 * so we don't have to re-scribble the cached-checksum differential. 1377 */ 1378 mutex_enter(&outbound_peer->ipsa_lock); 1379 mutex_enter(&assoc->ipsa_lock); 1380 outbound_peer->ipsa_remote_nat_port = assoc->ipsa_remote_nat_port = 1381 remote; 1382 mutex_exit(&assoc->ipsa_lock); 1383 mutex_exit(&outbound_peer->ipsa_lock); 1384 IPSA_REFRELE(outbound_peer); 1385 ESP_BUMP_STAT(espstack, sa_port_renumbers); 1386 } 1387 /* 1388 * Finish processing of an inbound ESP packet after processing by the 1389 * crypto framework. 1390 * - Remove the ESP header. 1391 * - Send packet back to IP. 1392 * If authentication was performed on the packet, this function is called 1393 * only if the authentication succeeded. 1394 * On success returns B_TRUE, on failure returns B_FALSE and frees the 1395 * mblk chain data_mp. 1396 */ 1397 static mblk_t * 1398 esp_in_done(mblk_t *data_mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic) 1399 { 1400 ipsa_t *assoc; 1401 uint_t espstart; 1402 uint32_t ivlen = 0; 1403 uint_t processed_len; 1404 esph_t *esph; 1405 kstat_named_t *counter; 1406 boolean_t is_natt; 1407 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1408 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1409 ipsec_stack_t *ipss = ns->netstack_ipsec; 1410 1411 assoc = ira->ira_ipsec_esp_sa; 1412 ASSERT(assoc != NULL); 1413 1414 is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 1415 1416 /* get the pointer to the ESP header */ 1417 if (assoc->ipsa_encr_alg == SADB_EALG_NULL) { 1418 /* authentication-only ESP */ 1419 espstart = ic->ic_crypto_data.cd_offset; 1420 processed_len = ic->ic_crypto_data.cd_length; 1421 } else { 1422 /* encryption present */ 1423 ivlen = assoc->ipsa_iv_len; 1424 if (assoc->ipsa_auth_alg == SADB_AALG_NONE) { 1425 /* encryption-only ESP */ 1426 espstart = ic->ic_crypto_data.cd_offset - 1427 sizeof (esph_t) - assoc->ipsa_iv_len; 1428 processed_len = ic->ic_crypto_data.cd_length + 1429 ivlen; 1430 } else { 1431 /* encryption with authentication */ 1432 espstart = ic->ic_crypto_dual_data.dd_offset1; 1433 processed_len = ic->ic_crypto_dual_data.dd_len2 + 1434 ivlen; 1435 } 1436 } 1437 1438 esph = (esph_t *)(data_mp->b_rptr + espstart); 1439 1440 if (assoc->ipsa_auth_alg != IPSA_AALG_NONE || 1441 (assoc->ipsa_flags & IPSA_F_COMBINED)) { 1442 /* 1443 * Authentication passed if we reach this point. 1444 * Packets with authentication will have the ICV 1445 * after the crypto data. Adjust b_wptr before 1446 * making padlen checks. 1447 */ 1448 ESP_BUMP_STAT(espstack, good_auth); 1449 data_mp->b_wptr -= assoc->ipsa_mac_len; 1450 1451 /* 1452 * Check replay window here! 1453 * For right now, assume keysock will set the replay window 1454 * size to zero for SAs that have an unspecified sender. 1455 * This may change... 1456 */ 1457 1458 if (!sadb_replay_check(assoc, esph->esph_replay)) { 1459 /* 1460 * Log the event. As of now we print out an event. 1461 * Do not print the replay failure number, or else 1462 * syslog cannot collate the error messages. Printing 1463 * the replay number that failed opens a denial-of- 1464 * service attack. 1465 */ 1466 ipsec_assocfailure(info.mi_idnum, 0, 0, 1467 SL_ERROR | SL_WARN, 1468 "Replay failed for ESP spi 0x%x, dst %s.\n", 1469 assoc->ipsa_spi, assoc->ipsa_dstaddr, 1470 assoc->ipsa_addrfam, espstack->ipsecesp_netstack); 1471 ESP_BUMP_STAT(espstack, replay_failures); 1472 counter = DROPPER(ipss, ipds_esp_replay); 1473 goto drop_and_bail; 1474 } 1475 1476 if (is_natt) { 1477 ASSERT(ira->ira_flags & IRAF_ESP_UDP_PORTS); 1478 ASSERT(ira->ira_esp_udp_ports != 0); 1479 esp_port_freshness(ira->ira_esp_udp_ports, assoc); 1480 } 1481 } 1482 1483 esp_set_usetime(assoc, B_TRUE); 1484 1485 if (!esp_age_bytes(assoc, processed_len, B_TRUE)) { 1486 /* The ipsa has hit hard expiration, LOG and AUDIT. */ 1487 ipsec_assocfailure(info.mi_idnum, 0, 0, 1488 SL_ERROR | SL_WARN, 1489 "ESP association 0x%x, dst %s had bytes expire.\n", 1490 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1491 espstack->ipsecesp_netstack); 1492 ESP_BUMP_STAT(espstack, bytes_expired); 1493 counter = DROPPER(ipss, ipds_esp_bytes_expire); 1494 goto drop_and_bail; 1495 } 1496 1497 /* 1498 * Remove ESP header and padding from packet. I hope the compiler 1499 * spews "branch, predict taken" code for this. 1500 */ 1501 1502 if (esp_strip_header(data_mp, (ira->ira_flags & IRAF_IS_IPV4), 1503 ivlen, &counter, espstack)) { 1504 1505 if (is_system_labeled() && assoc->ipsa_tsl != NULL) { 1506 if (!ip_recv_attr_replace_label(ira, assoc->ipsa_tsl)) { 1507 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, 1508 DROPPER(ipss, ipds_ah_nomem), 1509 &espstack->esp_dropper); 1510 BUMP_MIB(ira->ira_ill->ill_ip_mib, 1511 ipIfStatsInDiscards); 1512 return (NULL); 1513 } 1514 } 1515 if (is_natt) 1516 return (esp_fix_natt_checksums(data_mp, assoc)); 1517 1518 if (assoc->ipsa_state == IPSA_STATE_IDLE) { 1519 /* 1520 * Cluster buffering case. Tell caller that we're 1521 * handling the packet. 1522 */ 1523 sadb_buf_pkt(assoc, data_mp, ira); 1524 return (NULL); 1525 } 1526 1527 return (data_mp); 1528 } 1529 1530 esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n")); 1531 drop_and_bail: 1532 IP_ESP_BUMP_STAT(ipss, in_discards); 1533 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, counter, 1534 &espstack->esp_dropper); 1535 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1536 return (NULL); 1537 } 1538 1539 /* 1540 * Called upon failing the inbound ICV check. The message passed as 1541 * argument is freed. 1542 */ 1543 static void 1544 esp_log_bad_auth(mblk_t *mp, ip_recv_attr_t *ira) 1545 { 1546 ipsa_t *assoc = ira->ira_ipsec_esp_sa; 1547 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1548 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1549 ipsec_stack_t *ipss = ns->netstack_ipsec; 1550 1551 /* 1552 * Log the event. Don't print to the console, block 1553 * potential denial-of-service attack. 1554 */ 1555 ESP_BUMP_STAT(espstack, bad_auth); 1556 1557 ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN, 1558 "ESP Authentication failed for spi 0x%x, dst %s.\n", 1559 assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam, 1560 espstack->ipsecesp_netstack); 1561 1562 IP_ESP_BUMP_STAT(ipss, in_discards); 1563 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 1564 DROPPER(ipss, ipds_esp_bad_auth), 1565 &espstack->esp_dropper); 1566 } 1567 1568 1569 /* 1570 * Invoked for outbound packets after ESP processing. If the packet 1571 * also requires AH, performs the AH SA selection and AH processing. 1572 * 1573 * Returns data_mp (possibly with AH added) unless data_mp was consumed 1574 * due to an error, or queued due to async. crypto or an ACQUIRE trigger. 1575 */ 1576 static mblk_t * 1577 esp_do_outbound_ah(mblk_t *data_mp, ip_xmit_attr_t *ixa) 1578 { 1579 ipsec_action_t *ap; 1580 1581 ap = ixa->ixa_ipsec_action; 1582 if (ap == NULL) { 1583 ipsec_policy_t *pp = ixa->ixa_ipsec_policy; 1584 ap = pp->ipsp_act; 1585 } 1586 1587 if (!ap->ipa_want_ah) 1588 return (data_mp); 1589 1590 /* 1591 * Normally the AH SA would have already been put in place 1592 * but it could have been flushed so we need to look for it. 1593 */ 1594 if (ixa->ixa_ipsec_ah_sa == NULL) { 1595 if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_AH)) { 1596 sadb_acquire(data_mp, ixa, B_TRUE, B_FALSE); 1597 return (NULL); 1598 } 1599 } 1600 ASSERT(ixa->ixa_ipsec_ah_sa != NULL); 1601 1602 data_mp = ixa->ixa_ipsec_ah_sa->ipsa_output_func(data_mp, ixa); 1603 return (data_mp); 1604 } 1605 1606 1607 /* 1608 * Kernel crypto framework callback invoked after completion of async 1609 * crypto requests for outbound packets. 1610 */ 1611 static void 1612 esp_kcf_callback_outbound(void *arg, int status) 1613 { 1614 mblk_t *mp = (mblk_t *)arg; 1615 mblk_t *async_mp; 1616 netstack_t *ns; 1617 ipsec_stack_t *ipss; 1618 ipsecesp_stack_t *espstack; 1619 mblk_t *data_mp; 1620 ip_xmit_attr_t ixas; 1621 ipsec_crypto_t *ic; 1622 ill_t *ill; 1623 1624 /* 1625 * First remove the ipsec_crypto_t mblk 1626 * Note that we need to ipsec_free_crypto_data(mp) once done with ic. 1627 */ 1628 async_mp = ipsec_remove_crypto_data(mp, &ic); 1629 ASSERT(async_mp != NULL); 1630 1631 /* 1632 * Extract the ip_xmit_attr_t from the first mblk. 1633 * Verifies that the netstack and ill is still around; could 1634 * have vanished while kEf was doing its work. 1635 * On succesful return we have a nce_t and the ill/ipst can't 1636 * disappear until we do the nce_refrele in ixa_cleanup. 1637 */ 1638 data_mp = async_mp->b_cont; 1639 async_mp->b_cont = NULL; 1640 if (!ip_xmit_attr_from_mblk(async_mp, &ixas)) { 1641 /* Disappeared on us - no ill/ipst for MIB */ 1642 /* We have nowhere to do stats since ixa_ipst could be NULL */ 1643 if (ixas.ixa_nce != NULL) { 1644 ill = ixas.ixa_nce->nce_ill; 1645 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1646 ip_drop_output("ipIfStatsOutDiscards", data_mp, ill); 1647 } 1648 freemsg(data_mp); 1649 goto done; 1650 } 1651 ns = ixas.ixa_ipst->ips_netstack; 1652 espstack = ns->netstack_ipsecesp; 1653 ipss = ns->netstack_ipsec; 1654 ill = ixas.ixa_nce->nce_ill; 1655 1656 if (status == CRYPTO_SUCCESS) { 1657 /* 1658 * If a ICV was computed, it was stored by the 1659 * crypto framework at the end of the packet. 1660 */ 1661 ipha_t *ipha = (ipha_t *)data_mp->b_rptr; 1662 1663 esp_set_usetime(ixas.ixa_ipsec_esp_sa, B_FALSE); 1664 /* NAT-T packet. */ 1665 if (IPH_HDR_VERSION(ipha) == IP_VERSION && 1666 ipha->ipha_protocol == IPPROTO_UDP) 1667 esp_prepare_udp(ns, data_mp, ipha); 1668 1669 /* do AH processing if needed */ 1670 data_mp = esp_do_outbound_ah(data_mp, &ixas); 1671 if (data_mp == NULL) 1672 goto done; 1673 1674 (void) ip_output_post_ipsec(data_mp, &ixas); 1675 } else { 1676 /* Outbound shouldn't see invalid MAC */ 1677 ASSERT(status != CRYPTO_INVALID_MAC); 1678 1679 esp1dbg(espstack, 1680 ("esp_kcf_callback_outbound: crypto failed with 0x%x\n", 1681 status)); 1682 ESP_BUMP_STAT(espstack, crypto_failures); 1683 ESP_BUMP_STAT(espstack, out_discards); 1684 ip_drop_packet(data_mp, B_FALSE, ill, 1685 DROPPER(ipss, ipds_esp_crypto_failed), 1686 &espstack->esp_dropper); 1687 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1688 } 1689 done: 1690 ixa_cleanup(&ixas); 1691 (void) ipsec_free_crypto_data(mp); 1692 } 1693 1694 /* 1695 * Kernel crypto framework callback invoked after completion of async 1696 * crypto requests for inbound packets. 1697 */ 1698 static void 1699 esp_kcf_callback_inbound(void *arg, int status) 1700 { 1701 mblk_t *mp = (mblk_t *)arg; 1702 mblk_t *async_mp; 1703 netstack_t *ns; 1704 ipsecesp_stack_t *espstack; 1705 ipsec_stack_t *ipss; 1706 mblk_t *data_mp; 1707 ip_recv_attr_t iras; 1708 ipsec_crypto_t *ic; 1709 1710 /* 1711 * First remove the ipsec_crypto_t mblk 1712 * Note that we need to ipsec_free_crypto_data(mp) once done with ic. 1713 */ 1714 async_mp = ipsec_remove_crypto_data(mp, &ic); 1715 ASSERT(async_mp != NULL); 1716 1717 /* 1718 * Extract the ip_recv_attr_t from the first mblk. 1719 * Verifies that the netstack and ill is still around; could 1720 * have vanished while kEf was doing its work. 1721 */ 1722 data_mp = async_mp->b_cont; 1723 async_mp->b_cont = NULL; 1724 if (!ip_recv_attr_from_mblk(async_mp, &iras)) { 1725 /* The ill or ip_stack_t disappeared on us */ 1726 ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL); 1727 freemsg(data_mp); 1728 goto done; 1729 } 1730 1731 ns = iras.ira_ill->ill_ipst->ips_netstack; 1732 espstack = ns->netstack_ipsecesp; 1733 ipss = ns->netstack_ipsec; 1734 1735 if (status == CRYPTO_SUCCESS) { 1736 data_mp = esp_in_done(data_mp, &iras, ic); 1737 if (data_mp == NULL) 1738 goto done; 1739 1740 /* finish IPsec processing */ 1741 ip_input_post_ipsec(data_mp, &iras); 1742 } else if (status == CRYPTO_INVALID_MAC) { 1743 esp_log_bad_auth(data_mp, &iras); 1744 } else { 1745 esp1dbg(espstack, 1746 ("esp_kcf_callback: crypto failed with 0x%x\n", 1747 status)); 1748 ESP_BUMP_STAT(espstack, crypto_failures); 1749 IP_ESP_BUMP_STAT(ipss, in_discards); 1750 ip_drop_packet(data_mp, B_TRUE, iras.ira_ill, 1751 DROPPER(ipss, ipds_esp_crypto_failed), 1752 &espstack->esp_dropper); 1753 BUMP_MIB(iras.ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1754 } 1755 done: 1756 ira_cleanup(&iras, B_TRUE); 1757 (void) ipsec_free_crypto_data(mp); 1758 } 1759 1760 /* 1761 * Invoked on crypto framework failure during inbound and outbound processing. 1762 */ 1763 static void 1764 esp_crypto_failed(mblk_t *data_mp, boolean_t is_inbound, int kef_rc, 1765 ill_t *ill, ipsecesp_stack_t *espstack) 1766 { 1767 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 1768 1769 esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n", 1770 is_inbound ? "inbound" : "outbound", kef_rc)); 1771 ip_drop_packet(data_mp, is_inbound, ill, 1772 DROPPER(ipss, ipds_esp_crypto_failed), 1773 &espstack->esp_dropper); 1774 ESP_BUMP_STAT(espstack, crypto_failures); 1775 if (is_inbound) 1776 IP_ESP_BUMP_STAT(ipss, in_discards); 1777 else 1778 ESP_BUMP_STAT(espstack, out_discards); 1779 } 1780 1781 /* 1782 * A statement-equivalent macro, _cr MUST point to a modifiable 1783 * crypto_call_req_t. 1784 */ 1785 #define ESP_INIT_CALLREQ(_cr, _mp, _callback) \ 1786 (_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_ALWAYS_QUEUE; \ 1787 (_cr)->cr_callback_arg = (_mp); \ 1788 (_cr)->cr_callback_func = (_callback) 1789 1790 #define ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) { \ 1791 (mac)->cd_format = CRYPTO_DATA_RAW; \ 1792 (mac)->cd_offset = 0; \ 1793 (mac)->cd_length = icvlen; \ 1794 (mac)->cd_raw.iov_base = (char *)icvbuf; \ 1795 (mac)->cd_raw.iov_len = icvlen; \ 1796 } 1797 1798 #define ESP_INIT_CRYPTO_DATA(data, mp, off, len) { \ 1799 if (MBLKL(mp) >= (len) + (off)) { \ 1800 (data)->cd_format = CRYPTO_DATA_RAW; \ 1801 (data)->cd_raw.iov_base = (char *)(mp)->b_rptr; \ 1802 (data)->cd_raw.iov_len = MBLKL(mp); \ 1803 (data)->cd_offset = off; \ 1804 } else { \ 1805 (data)->cd_format = CRYPTO_DATA_MBLK; \ 1806 (data)->cd_mp = mp; \ 1807 (data)->cd_offset = off; \ 1808 } \ 1809 (data)->cd_length = len; \ 1810 } 1811 1812 #define ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) { \ 1813 (data)->dd_format = CRYPTO_DATA_MBLK; \ 1814 (data)->dd_mp = mp; \ 1815 (data)->dd_len1 = len1; \ 1816 (data)->dd_offset1 = off1; \ 1817 (data)->dd_len2 = len2; \ 1818 (data)->dd_offset2 = off2; \ 1819 } 1820 1821 /* 1822 * Returns data_mp if successfully completed the request. Returns 1823 * NULL if it failed (and increments InDiscards) or if it is pending. 1824 */ 1825 static mblk_t * 1826 esp_submit_req_inbound(mblk_t *esp_mp, ip_recv_attr_t *ira, 1827 ipsa_t *assoc, uint_t esph_offset) 1828 { 1829 uint_t auth_offset, msg_len, auth_len; 1830 crypto_call_req_t call_req, *callrp; 1831 mblk_t *mp; 1832 esph_t *esph_ptr; 1833 int kef_rc; 1834 uint_t icv_len = assoc->ipsa_mac_len; 1835 crypto_ctx_template_t auth_ctx_tmpl; 1836 boolean_t do_auth, do_encr, force; 1837 uint_t encr_offset, encr_len; 1838 uint_t iv_len = assoc->ipsa_iv_len; 1839 crypto_ctx_template_t encr_ctx_tmpl; 1840 ipsec_crypto_t *ic, icstack; 1841 uchar_t *iv_ptr; 1842 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 1843 ipsec_stack_t *ipss = ns->netstack_ipsec; 1844 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 1845 1846 mp = NULL; 1847 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 1848 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 1849 force = (assoc->ipsa_flags & IPSA_F_ASYNC); 1850 1851 #ifdef IPSEC_LATENCY_TEST 1852 kef_rc = CRYPTO_SUCCESS; 1853 #else 1854 kef_rc = CRYPTO_FAILED; 1855 #endif 1856 1857 /* 1858 * An inbound packet is of the form: 1859 * [IP,options,ESP,IV,data,ICV,pad] 1860 */ 1861 esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset); 1862 iv_ptr = (uchar_t *)(esph_ptr + 1); 1863 /* Packet length starting at IP header ending after ESP ICV. */ 1864 msg_len = MBLKL(esp_mp); 1865 1866 encr_offset = esph_offset + sizeof (esph_t) + iv_len; 1867 encr_len = msg_len - encr_offset; 1868 1869 /* 1870 * Counter mode algs need a nonce. This is setup in sadb_common_add(). 1871 * If for some reason we are using a SA which does not have a nonce 1872 * then we must fail here. 1873 */ 1874 if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) && 1875 (assoc->ipsa_nonce == NULL)) { 1876 ip_drop_packet(esp_mp, B_TRUE, ira->ira_ill, 1877 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 1878 return (NULL); 1879 } 1880 1881 if (force) { 1882 /* We are doing asynch; allocate mblks to hold state */ 1883 if ((mp = ip_recv_attr_to_mblk(ira)) == NULL || 1884 (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) { 1885 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1886 ip_drop_input("ipIfStatsInDiscards", esp_mp, 1887 ira->ira_ill); 1888 return (NULL); 1889 } 1890 linkb(mp, esp_mp); 1891 callrp = &call_req; 1892 ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_inbound); 1893 } else { 1894 /* 1895 * If we know we are going to do sync then ipsec_crypto_t 1896 * should be on the stack. 1897 */ 1898 ic = &icstack; 1899 bzero(ic, sizeof (*ic)); 1900 callrp = NULL; 1901 } 1902 1903 if (do_auth) { 1904 /* authentication context template */ 1905 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 1906 auth_ctx_tmpl); 1907 1908 /* ICV to be verified */ 1909 ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, 1910 icv_len, esp_mp->b_wptr - icv_len); 1911 1912 /* authentication starts at the ESP header */ 1913 auth_offset = esph_offset; 1914 auth_len = msg_len - auth_offset - icv_len; 1915 if (!do_encr) { 1916 /* authentication only */ 1917 /* initialize input data argument */ 1918 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 1919 esp_mp, auth_offset, auth_len); 1920 1921 /* call the crypto framework */ 1922 kef_rc = crypto_mac_verify(&assoc->ipsa_amech, 1923 &ic->ic_crypto_data, 1924 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 1925 &ic->ic_crypto_mac, callrp); 1926 } 1927 } 1928 1929 if (do_encr) { 1930 /* encryption template */ 1931 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 1932 encr_ctx_tmpl); 1933 1934 /* Call the nonce update function. Also passes in IV */ 1935 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, encr_len, 1936 iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data); 1937 1938 if (!do_auth) { 1939 /* decryption only */ 1940 /* initialize input data argument */ 1941 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 1942 esp_mp, encr_offset, encr_len); 1943 1944 /* call the crypto framework */ 1945 kef_rc = crypto_decrypt((crypto_mechanism_t *) 1946 &ic->ic_cmm, &ic->ic_crypto_data, 1947 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 1948 NULL, callrp); 1949 } 1950 } 1951 1952 if (do_auth && do_encr) { 1953 /* dual operation */ 1954 /* initialize input data argument */ 1955 ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data, 1956 esp_mp, auth_offset, auth_len, 1957 encr_offset, encr_len - icv_len); 1958 1959 /* specify IV */ 1960 ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr; 1961 1962 /* call the framework */ 1963 kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech, 1964 &assoc->ipsa_emech, &ic->ic_crypto_dual_data, 1965 &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey, 1966 auth_ctx_tmpl, encr_ctx_tmpl, &ic->ic_crypto_mac, 1967 NULL, callrp); 1968 } 1969 1970 switch (kef_rc) { 1971 case CRYPTO_SUCCESS: 1972 ESP_BUMP_STAT(espstack, crypto_sync); 1973 esp_mp = esp_in_done(esp_mp, ira, ic); 1974 if (force) { 1975 /* Free mp after we are done with ic */ 1976 mp = ipsec_free_crypto_data(mp); 1977 (void) ip_recv_attr_free_mblk(mp); 1978 } 1979 return (esp_mp); 1980 case CRYPTO_QUEUED: 1981 /* esp_kcf_callback_inbound() will be invoked on completion */ 1982 ESP_BUMP_STAT(espstack, crypto_async); 1983 return (NULL); 1984 case CRYPTO_INVALID_MAC: 1985 if (force) { 1986 mp = ipsec_free_crypto_data(mp); 1987 esp_mp = ip_recv_attr_free_mblk(mp); 1988 } 1989 ESP_BUMP_STAT(espstack, crypto_sync); 1990 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1991 esp_log_bad_auth(esp_mp, ira); 1992 /* esp_mp was passed to ip_drop_packet */ 1993 return (NULL); 1994 } 1995 1996 if (force) { 1997 mp = ipsec_free_crypto_data(mp); 1998 esp_mp = ip_recv_attr_free_mblk(mp); 1999 } 2000 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 2001 esp_crypto_failed(esp_mp, B_TRUE, kef_rc, ira->ira_ill, espstack); 2002 /* esp_mp was passed to ip_drop_packet */ 2003 return (NULL); 2004 } 2005 2006 /* 2007 * Compute the IP and UDP checksums -- common code for both keepalives and 2008 * actual ESP-in-UDP packets. Be flexible with multiple mblks because ESP 2009 * uses mblk-insertion to insert the UDP header. 2010 * TODO - If there is an easy way to prep a packet for HW checksums, make 2011 * it happen here. 2012 * Note that this is used before both before calling ip_output_simple and 2013 * in the esp datapath. The former could use IXAF_SET_ULP_CKSUM but not the 2014 * latter. 2015 */ 2016 static void 2017 esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha) 2018 { 2019 int offset; 2020 uint32_t cksum; 2021 uint16_t *arr; 2022 mblk_t *udpmp = mp; 2023 uint_t hlen = IPH_HDR_LENGTH(ipha); 2024 2025 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2026 2027 ipha->ipha_hdr_checksum = 0; 2028 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 2029 2030 if (ns->netstack_udp->us_do_checksum) { 2031 ASSERT(MBLKL(udpmp) >= sizeof (udpha_t)); 2032 /* arr points to the IP header. */ 2033 arr = (uint16_t *)ipha; 2034 IP_STAT(ns->netstack_ip, ip_out_sw_cksum); 2035 IP_STAT_UPDATE(ns->netstack_ip, ip_out_sw_cksum_bytes, 2036 ntohs(htons(ipha->ipha_length) - hlen)); 2037 /* arr[6-9] are the IP addresses. */ 2038 cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] + 2039 ntohs(htons(ipha->ipha_length) - hlen); 2040 cksum = IP_CSUM(mp, hlen, cksum); 2041 offset = hlen + UDP_CHECKSUM_OFFSET; 2042 while (offset >= MBLKL(udpmp)) { 2043 offset -= MBLKL(udpmp); 2044 udpmp = udpmp->b_cont; 2045 } 2046 /* arr points to the UDP header's checksum field. */ 2047 arr = (uint16_t *)(udpmp->b_rptr + offset); 2048 *arr = cksum; 2049 } 2050 } 2051 2052 /* 2053 * taskq handler so we can send the NAT-T keepalive on a separate thread. 2054 */ 2055 static void 2056 actually_send_keepalive(void *arg) 2057 { 2058 mblk_t *mp = (mblk_t *)arg; 2059 ip_xmit_attr_t ixas; 2060 netstack_t *ns; 2061 netstackid_t stackid; 2062 2063 stackid = (netstackid_t)(uintptr_t)mp->b_prev; 2064 mp->b_prev = NULL; 2065 ns = netstack_find_by_stackid(stackid); 2066 if (ns == NULL) { 2067 /* Disappeared */ 2068 ip_drop_output("ipIfStatsOutDiscards", mp, NULL); 2069 freemsg(mp); 2070 return; 2071 } 2072 2073 bzero(&ixas, sizeof (ixas)); 2074 ixas.ixa_zoneid = ALL_ZONES; 2075 ixas.ixa_cred = kcred; 2076 ixas.ixa_cpid = NOPID; 2077 ixas.ixa_tsl = NULL; 2078 ixas.ixa_ipst = ns->netstack_ip; 2079 /* No ULP checksum; done by esp_prepare_udp */ 2080 ixas.ixa_flags = (IXAF_IS_IPV4 | IXAF_NO_IPSEC | IXAF_VERIFY_SOURCE); 2081 2082 (void) ip_output_simple(mp, &ixas); 2083 ixa_cleanup(&ixas); 2084 netstack_rele(ns); 2085 } 2086 2087 /* 2088 * Send a one-byte UDP NAT-T keepalive. 2089 */ 2090 void 2091 ipsecesp_send_keepalive(ipsa_t *assoc) 2092 { 2093 mblk_t *mp; 2094 ipha_t *ipha; 2095 udpha_t *udpha; 2096 netstack_t *ns = assoc->ipsa_netstack; 2097 2098 ASSERT(MUTEX_NOT_HELD(&assoc->ipsa_lock)); 2099 2100 mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI); 2101 if (mp == NULL) 2102 return; 2103 ipha = (ipha_t *)mp->b_rptr; 2104 ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 2105 ipha->ipha_type_of_service = 0; 2106 ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1); 2107 /* Use the low-16 of the SPI so we have some clue where it came from. */ 2108 ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1); 2109 ipha->ipha_fragment_offset_and_flags = 0; /* Too small to fragment! */ 2110 ipha->ipha_ttl = 0xFF; 2111 ipha->ipha_protocol = IPPROTO_UDP; 2112 ipha->ipha_hdr_checksum = 0; 2113 ipha->ipha_src = assoc->ipsa_srcaddr[0]; 2114 ipha->ipha_dst = assoc->ipsa_dstaddr[0]; 2115 udpha = (udpha_t *)(ipha + 1); 2116 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? 2117 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); 2118 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? 2119 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); 2120 udpha->uha_length = htons(sizeof (udpha_t) + 1); 2121 udpha->uha_checksum = 0; 2122 mp->b_wptr = (uint8_t *)(udpha + 1); 2123 *(mp->b_wptr++) = 0xFF; 2124 2125 esp_prepare_udp(ns, mp, ipha); 2126 2127 /* 2128 * We're holding an isaf_t bucket lock, so pawn off the actual 2129 * packet transmission to another thread. Just in case syncq 2130 * processing causes a same-bucket packet to be processed. 2131 */ 2132 mp->b_prev = (mblk_t *)(uintptr_t)ns->netstack_stackid; 2133 2134 if (taskq_dispatch(esp_taskq, actually_send_keepalive, mp, 2135 TQ_NOSLEEP) == TASKQID_INVALID) { 2136 /* Assume no memory if taskq_dispatch() fails. */ 2137 mp->b_prev = NULL; 2138 ip_drop_packet(mp, B_FALSE, NULL, 2139 DROPPER(ns->netstack_ipsec, ipds_esp_nomem), 2140 &ns->netstack_ipsecesp->esp_dropper); 2141 } 2142 } 2143 2144 /* 2145 * Returns mp if successfully completed the request. Returns 2146 * NULL if it failed (and increments InDiscards) or if it is pending. 2147 */ 2148 static mblk_t * 2149 esp_submit_req_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa, ipsa_t *assoc, 2150 uchar_t *icv_buf, uint_t payload_len) 2151 { 2152 uint_t auth_len; 2153 crypto_call_req_t call_req, *callrp; 2154 mblk_t *esp_mp; 2155 esph_t *esph_ptr; 2156 mblk_t *mp; 2157 int kef_rc = CRYPTO_FAILED; 2158 uint_t icv_len = assoc->ipsa_mac_len; 2159 crypto_ctx_template_t auth_ctx_tmpl; 2160 boolean_t do_auth, do_encr, force; 2161 uint_t iv_len = assoc->ipsa_iv_len; 2162 crypto_ctx_template_t encr_ctx_tmpl; 2163 boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0); 2164 size_t esph_offset = (is_natt ? UDPH_SIZE : 0); 2165 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 2166 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2167 ipsec_crypto_t *ic, icstack; 2168 uchar_t *iv_ptr; 2169 crypto_data_t *cd_ptr = NULL; 2170 ill_t *ill = ixa->ixa_nce->nce_ill; 2171 ipsec_stack_t *ipss = ns->netstack_ipsec; 2172 2173 esp3dbg(espstack, ("esp_submit_req_outbound:%s", 2174 is_natt ? "natt" : "not natt")); 2175 2176 mp = NULL; 2177 do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; 2178 do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; 2179 force = (assoc->ipsa_flags & IPSA_F_ASYNC); 2180 2181 #ifdef IPSEC_LATENCY_TEST 2182 kef_rc = CRYPTO_SUCCESS; 2183 #else 2184 kef_rc = CRYPTO_FAILED; 2185 #endif 2186 2187 /* 2188 * Outbound IPsec packets are of the form: 2189 * [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV] 2190 * unless it's NATT, then it's 2191 * [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV] 2192 * Get a pointer to the mblk containing the ESP header. 2193 */ 2194 ASSERT(data_mp->b_cont != NULL); 2195 esp_mp = data_mp->b_cont; 2196 esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset); 2197 iv_ptr = (uchar_t *)(esph_ptr + 1); 2198 2199 /* 2200 * Combined mode algs need a nonce. This is setup in sadb_common_add(). 2201 * If for some reason we are using a SA which does not have a nonce 2202 * then we must fail here. 2203 */ 2204 if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) && 2205 (assoc->ipsa_nonce == NULL)) { 2206 ip_drop_packet(data_mp, B_FALSE, NULL, 2207 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 2208 return (NULL); 2209 } 2210 2211 if (force) { 2212 /* We are doing asynch; allocate mblks to hold state */ 2213 if ((mp = ip_xmit_attr_to_mblk(ixa)) == NULL || 2214 (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) { 2215 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2216 ip_drop_output("ipIfStatsOutDiscards", data_mp, ill); 2217 freemsg(data_mp); 2218 return (NULL); 2219 } 2220 2221 linkb(mp, data_mp); 2222 callrp = &call_req; 2223 ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_outbound); 2224 } else { 2225 /* 2226 * If we know we are going to do sync then ipsec_crypto_t 2227 * should be on the stack. 2228 */ 2229 ic = &icstack; 2230 bzero(ic, sizeof (*ic)); 2231 callrp = NULL; 2232 } 2233 2234 2235 if (do_auth) { 2236 /* authentication context template */ 2237 IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, 2238 auth_ctx_tmpl); 2239 2240 /* where to store the computed mac */ 2241 ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, 2242 icv_len, icv_buf); 2243 2244 /* authentication starts at the ESP header */ 2245 auth_len = payload_len + iv_len + sizeof (esph_t); 2246 if (!do_encr) { 2247 /* authentication only */ 2248 /* initialize input data argument */ 2249 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 2250 esp_mp, esph_offset, auth_len); 2251 2252 /* call the crypto framework */ 2253 kef_rc = crypto_mac(&assoc->ipsa_amech, 2254 &ic->ic_crypto_data, 2255 &assoc->ipsa_kcfauthkey, auth_ctx_tmpl, 2256 &ic->ic_crypto_mac, callrp); 2257 } 2258 } 2259 2260 if (do_encr) { 2261 /* encryption context template */ 2262 IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR, 2263 encr_ctx_tmpl); 2264 /* Call the nonce update function. */ 2265 (assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, payload_len, 2266 iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data); 2267 2268 if (!do_auth) { 2269 /* encryption only, skip mblk that contains ESP hdr */ 2270 /* initialize input data argument */ 2271 ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data, 2272 esp_mp->b_cont, 0, payload_len); 2273 2274 /* 2275 * For combined mode ciphers, the ciphertext is the same 2276 * size as the clear text, the ICV should follow the 2277 * ciphertext. To convince the kcf to allow in-line 2278 * encryption, with an ICV, use ipsec_out_crypto_mac 2279 * to point to the same buffer as the data. The calling 2280 * function need to ensure the buffer is large enough to 2281 * include the ICV. 2282 * 2283 * The IV is already written to the packet buffer, the 2284 * nonce setup function copied it to the params struct 2285 * for the cipher to use. 2286 */ 2287 if (assoc->ipsa_flags & IPSA_F_COMBINED) { 2288 bcopy(&ic->ic_crypto_data, 2289 &ic->ic_crypto_mac, 2290 sizeof (crypto_data_t)); 2291 ic->ic_crypto_mac.cd_length = 2292 payload_len + icv_len; 2293 cd_ptr = &ic->ic_crypto_mac; 2294 } 2295 2296 /* call the crypto framework */ 2297 kef_rc = crypto_encrypt((crypto_mechanism_t *) 2298 &ic->ic_cmm, &ic->ic_crypto_data, 2299 &assoc->ipsa_kcfencrkey, encr_ctx_tmpl, 2300 cd_ptr, callrp); 2301 2302 } 2303 } 2304 2305 if (do_auth && do_encr) { 2306 /* 2307 * Encryption and authentication: 2308 * Pass the pointer to the mblk chain starting at the ESP 2309 * header to the framework. Skip the ESP header mblk 2310 * for encryption, which is reflected by an encryption 2311 * offset equal to the length of that mblk. Start 2312 * the authentication at the ESP header, i.e. use an 2313 * authentication offset of zero. 2314 */ 2315 ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data, 2316 esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len); 2317 2318 /* specify IV */ 2319 ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr; 2320 2321 /* call the framework */ 2322 kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech, 2323 &assoc->ipsa_amech, NULL, 2324 &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey, 2325 encr_ctx_tmpl, auth_ctx_tmpl, 2326 &ic->ic_crypto_dual_data, 2327 &ic->ic_crypto_mac, callrp); 2328 } 2329 2330 switch (kef_rc) { 2331 case CRYPTO_SUCCESS: 2332 ESP_BUMP_STAT(espstack, crypto_sync); 2333 esp_set_usetime(assoc, B_FALSE); 2334 if (force) { 2335 mp = ipsec_free_crypto_data(mp); 2336 data_mp = ip_xmit_attr_free_mblk(mp); 2337 } 2338 if (is_natt) 2339 esp_prepare_udp(ns, data_mp, (ipha_t *)data_mp->b_rptr); 2340 return (data_mp); 2341 case CRYPTO_QUEUED: 2342 /* esp_kcf_callback_outbound() will be invoked on completion */ 2343 ESP_BUMP_STAT(espstack, crypto_async); 2344 return (NULL); 2345 } 2346 2347 if (force) { 2348 mp = ipsec_free_crypto_data(mp); 2349 data_mp = ip_xmit_attr_free_mblk(mp); 2350 } 2351 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2352 esp_crypto_failed(data_mp, B_FALSE, kef_rc, NULL, espstack); 2353 /* data_mp was passed to ip_drop_packet */ 2354 return (NULL); 2355 } 2356 2357 /* 2358 * Handle outbound IPsec processing for IPv4 and IPv6 2359 * 2360 * Returns data_mp if successfully completed the request. Returns 2361 * NULL if it failed (and increments InDiscards) or if it is pending. 2362 */ 2363 static mblk_t * 2364 esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa) 2365 { 2366 mblk_t *espmp, *tailmp; 2367 ipha_t *ipha; 2368 ip6_t *ip6h; 2369 esph_t *esph_ptr, *iv_ptr; 2370 uint_t af; 2371 uint8_t *nhp; 2372 uintptr_t divpoint, datalen, adj, padlen, i, alloclen; 2373 uintptr_t esplen = sizeof (esph_t); 2374 uint8_t protocol; 2375 ipsa_t *assoc; 2376 uint_t iv_len, block_size, mac_len = 0; 2377 uchar_t *icv_buf; 2378 udpha_t *udpha; 2379 boolean_t is_natt = B_FALSE; 2380 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 2381 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2382 ipsec_stack_t *ipss = ns->netstack_ipsec; 2383 ill_t *ill = ixa->ixa_nce->nce_ill; 2384 boolean_t need_refrele = B_FALSE; 2385 2386 ESP_BUMP_STAT(espstack, out_requests); 2387 2388 /* 2389 * <sigh> We have to copy the message here, because TCP (for example) 2390 * keeps a dupb() of the message lying around for retransmission. 2391 * Since ESP changes the whole of the datagram, we have to create our 2392 * own copy lest we clobber TCP's data. Since we have to copy anyway, 2393 * we might as well make use of msgpullup() and get the mblk into one 2394 * contiguous piece! 2395 */ 2396 tailmp = msgpullup(data_mp, -1); 2397 if (tailmp == NULL) { 2398 esp0dbg(("esp_outbound: msgpullup() failed, " 2399 "dropping packet.\n")); 2400 ip_drop_packet(data_mp, B_FALSE, ill, 2401 DROPPER(ipss, ipds_esp_nomem), 2402 &espstack->esp_dropper); 2403 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2404 return (NULL); 2405 } 2406 freemsg(data_mp); 2407 data_mp = tailmp; 2408 2409 assoc = ixa->ixa_ipsec_esp_sa; 2410 ASSERT(assoc != NULL); 2411 2412 /* 2413 * Get the outer IP header in shape to escape this system.. 2414 */ 2415 if (is_system_labeled() && (assoc->ipsa_otsl != NULL)) { 2416 /* 2417 * Need to update packet with any CIPSO option and update 2418 * ixa_tsl to capture the new label. 2419 * We allocate a separate ixa for that purpose. 2420 */ 2421 ixa = ip_xmit_attr_duplicate(ixa); 2422 if (ixa == NULL) { 2423 ip_drop_packet(data_mp, B_FALSE, ill, 2424 DROPPER(ipss, ipds_esp_nomem), 2425 &espstack->esp_dropper); 2426 return (NULL); 2427 } 2428 need_refrele = B_TRUE; 2429 2430 label_hold(assoc->ipsa_otsl); 2431 ip_xmit_attr_replace_tsl(ixa, assoc->ipsa_otsl); 2432 2433 data_mp = sadb_whack_label(data_mp, assoc, ixa, 2434 DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper); 2435 if (data_mp == NULL) { 2436 /* Packet dropped by sadb_whack_label */ 2437 ixa_refrele(ixa); 2438 return (NULL); 2439 } 2440 } 2441 2442 /* 2443 * Reality check.... 2444 */ 2445 ipha = (ipha_t *)data_mp->b_rptr; /* So we can call esp_acquire(). */ 2446 ip6h = (ip6_t *)ipha; 2447 2448 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2449 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 2450 2451 af = AF_INET; 2452 divpoint = IPH_HDR_LENGTH(ipha); 2453 datalen = ntohs(ipha->ipha_length) - divpoint; 2454 nhp = (uint8_t *)&ipha->ipha_protocol; 2455 } else { 2456 ip_pkt_t ipp; 2457 2458 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 2459 2460 af = AF_INET6; 2461 bzero(&ipp, sizeof (ipp)); 2462 divpoint = ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, NULL); 2463 if (ipp.ipp_dstopts != NULL && 2464 ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) { 2465 /* 2466 * Destination options are tricky. If we get in here, 2467 * then we have a terminal header following the 2468 * destination options. We need to adjust backwards 2469 * so we insert ESP BEFORE the destination options 2470 * bag. (So that the dstopts get encrypted!) 2471 * 2472 * Since this is for outbound packets only, we know 2473 * that non-terminal destination options only precede 2474 * routing headers. 2475 */ 2476 divpoint -= ipp.ipp_dstoptslen; 2477 } 2478 datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint; 2479 2480 if (ipp.ipp_rthdr != NULL) { 2481 nhp = &ipp.ipp_rthdr->ip6r_nxt; 2482 } else if (ipp.ipp_hopopts != NULL) { 2483 nhp = &ipp.ipp_hopopts->ip6h_nxt; 2484 } else { 2485 ASSERT(divpoint == sizeof (ip6_t)); 2486 /* It's probably IP + ESP. */ 2487 nhp = &ip6h->ip6_nxt; 2488 } 2489 } 2490 2491 mac_len = assoc->ipsa_mac_len; 2492 2493 if (assoc->ipsa_flags & IPSA_F_NATT) { 2494 /* wedge in UDP header */ 2495 is_natt = B_TRUE; 2496 esplen += UDPH_SIZE; 2497 } 2498 2499 /* 2500 * Set up ESP header and encryption padding for ENCR PI request. 2501 */ 2502 2503 /* Determine the padding length. Pad to 4-bytes for no-encryption. */ 2504 if (assoc->ipsa_encr_alg != SADB_EALG_NULL) { 2505 iv_len = assoc->ipsa_iv_len; 2506 block_size = assoc->ipsa_datalen; 2507 2508 /* 2509 * Pad the data to the length of the cipher block size. 2510 * Include the two additional bytes (hence the - 2) for the 2511 * padding length and the next header. Take this into account 2512 * when calculating the actual length of the padding. 2513 */ 2514 ASSERT(ISP2(iv_len)); 2515 padlen = ((unsigned)(block_size - datalen - 2)) & 2516 (block_size - 1); 2517 } else { 2518 iv_len = 0; 2519 padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) & 2520 (sizeof (uint32_t) - 1); 2521 } 2522 2523 /* Allocate ESP header and IV. */ 2524 esplen += iv_len; 2525 2526 /* 2527 * Update association byte-count lifetimes. Don't forget to take 2528 * into account the padding length and next-header (hence the + 2). 2529 * 2530 * Use the amount of data fed into the "encryption algorithm". This 2531 * is the IV, the data length, the padding length, and the final two 2532 * bytes (padlen, and next-header). 2533 * 2534 */ 2535 2536 if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) { 2537 ip_drop_packet(data_mp, B_FALSE, ill, 2538 DROPPER(ipss, ipds_esp_bytes_expire), 2539 &espstack->esp_dropper); 2540 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2541 if (need_refrele) 2542 ixa_refrele(ixa); 2543 return (NULL); 2544 } 2545 2546 espmp = allocb(esplen, BPRI_HI); 2547 if (espmp == NULL) { 2548 ESP_BUMP_STAT(espstack, out_discards); 2549 esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n")); 2550 ip_drop_packet(data_mp, B_FALSE, ill, 2551 DROPPER(ipss, ipds_esp_nomem), 2552 &espstack->esp_dropper); 2553 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2554 if (need_refrele) 2555 ixa_refrele(ixa); 2556 return (NULL); 2557 } 2558 espmp->b_wptr += esplen; 2559 esph_ptr = (esph_t *)espmp->b_rptr; 2560 2561 if (is_natt) { 2562 esp3dbg(espstack, ("esp_outbound: NATT")); 2563 2564 udpha = (udpha_t *)espmp->b_rptr; 2565 udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ? 2566 assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT); 2567 udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ? 2568 assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT); 2569 /* 2570 * Set the checksum to 0, so that the esp_prepare_udp() call 2571 * can do the right thing. 2572 */ 2573 udpha->uha_checksum = 0; 2574 esph_ptr = (esph_t *)(udpha + 1); 2575 } 2576 2577 esph_ptr->esph_spi = assoc->ipsa_spi; 2578 2579 esph_ptr->esph_replay = htonl(atomic_inc_32_nv(&assoc->ipsa_replay)); 2580 if (esph_ptr->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) { 2581 /* 2582 * XXX We have replay counter wrapping. 2583 * We probably want to nuke this SA (and its peer). 2584 */ 2585 ipsec_assocfailure(info.mi_idnum, 0, 0, 2586 SL_ERROR | SL_CONSOLE | SL_WARN, 2587 "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n", 2588 esph_ptr->esph_spi, assoc->ipsa_dstaddr, af, 2589 espstack->ipsecesp_netstack); 2590 2591 ESP_BUMP_STAT(espstack, out_discards); 2592 sadb_replay_delete(assoc); 2593 ip_drop_packet(data_mp, B_FALSE, ill, 2594 DROPPER(ipss, ipds_esp_replay), 2595 &espstack->esp_dropper); 2596 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2597 if (need_refrele) 2598 ixa_refrele(ixa); 2599 return (NULL); 2600 } 2601 2602 iv_ptr = (esph_ptr + 1); 2603 /* 2604 * iv_ptr points to the mblk which will contain the IV once we have 2605 * written it there. This mblk will be part of a mblk chain that 2606 * will make up the packet. 2607 * 2608 * For counter mode algorithms, the IV is a 64 bit quantity, it 2609 * must NEVER repeat in the lifetime of the SA, otherwise an 2610 * attacker who had recorded enough packets might be able to 2611 * determine some clear text. 2612 * 2613 * To ensure this does not happen, the IV is stored in the SA and 2614 * incremented for each packet, the IV is then copied into the 2615 * "packet" for transmission to the receiving system. The IV will 2616 * also be copied into the nonce, when the packet is encrypted. 2617 * 2618 * CBC mode algorithms use a random IV for each packet. We do not 2619 * require the highest quality random bits, but for best security 2620 * with CBC mode ciphers, the value must be unlikely to repeat and 2621 * must not be known in advance to an adversary capable of influencing 2622 * the clear text. 2623 */ 2624 if (!update_iv((uint8_t *)iv_ptr, espstack->esp_pfkey_q, assoc, 2625 espstack)) { 2626 ip_drop_packet(data_mp, B_FALSE, ill, 2627 DROPPER(ipss, ipds_esp_iv_wrap), &espstack->esp_dropper); 2628 if (need_refrele) 2629 ixa_refrele(ixa); 2630 return (NULL); 2631 } 2632 2633 /* Fix the IP header. */ 2634 alloclen = padlen + 2 + mac_len; 2635 adj = alloclen + (espmp->b_wptr - espmp->b_rptr); 2636 2637 protocol = *nhp; 2638 2639 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2640 ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj); 2641 if (is_natt) { 2642 *nhp = IPPROTO_UDP; 2643 udpha->uha_length = htons(ntohs(ipha->ipha_length) - 2644 IPH_HDR_LENGTH(ipha)); 2645 } else { 2646 *nhp = IPPROTO_ESP; 2647 } 2648 ipha->ipha_hdr_checksum = 0; 2649 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 2650 } else { 2651 ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj); 2652 *nhp = IPPROTO_ESP; 2653 } 2654 2655 /* I've got the two ESP mblks, now insert them. */ 2656 2657 esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n")); 2658 esp2dbg(espstack, (dump_msg(data_mp))); 2659 2660 if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) { 2661 ESP_BUMP_STAT(espstack, out_discards); 2662 /* NOTE: esp_insert_esp() only fails if there's no memory. */ 2663 ip_drop_packet(data_mp, B_FALSE, ill, 2664 DROPPER(ipss, ipds_esp_nomem), 2665 &espstack->esp_dropper); 2666 freeb(espmp); 2667 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2668 if (need_refrele) 2669 ixa_refrele(ixa); 2670 return (NULL); 2671 } 2672 2673 /* Append padding (and leave room for ICV). */ 2674 for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont) 2675 ; 2676 if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) { 2677 tailmp->b_cont = allocb(alloclen, BPRI_HI); 2678 if (tailmp->b_cont == NULL) { 2679 ESP_BUMP_STAT(espstack, out_discards); 2680 esp0dbg(("esp_outbound: Can't allocate tailmp.\n")); 2681 ip_drop_packet(data_mp, B_FALSE, ill, 2682 DROPPER(ipss, ipds_esp_nomem), 2683 &espstack->esp_dropper); 2684 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2685 if (need_refrele) 2686 ixa_refrele(ixa); 2687 return (NULL); 2688 } 2689 tailmp = tailmp->b_cont; 2690 } 2691 2692 /* 2693 * If there's padding, N bytes of padding must be of the form 0x1, 2694 * 0x2, 0x3... 0xN. 2695 */ 2696 for (i = 0; i < padlen; ) { 2697 i++; 2698 *tailmp->b_wptr++ = i; 2699 } 2700 *tailmp->b_wptr++ = i; 2701 *tailmp->b_wptr++ = protocol; 2702 2703 esp2dbg(espstack, ("data_Mp before encryption:\n")); 2704 esp2dbg(espstack, (dump_msg(data_mp))); 2705 2706 /* 2707 * Okay. I've set up the pre-encryption ESP. Let's do it! 2708 */ 2709 2710 if (mac_len > 0) { 2711 ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim); 2712 icv_buf = tailmp->b_wptr; 2713 tailmp->b_wptr += mac_len; 2714 } else { 2715 icv_buf = NULL; 2716 } 2717 2718 data_mp = esp_submit_req_outbound(data_mp, ixa, assoc, icv_buf, 2719 datalen + padlen + 2); 2720 if (need_refrele) 2721 ixa_refrele(ixa); 2722 return (data_mp); 2723 } 2724 2725 /* 2726 * IP calls this to validate the ICMP errors that 2727 * we got from the network. 2728 */ 2729 mblk_t * 2730 ipsecesp_icmp_error(mblk_t *data_mp, ip_recv_attr_t *ira) 2731 { 2732 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 2733 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2734 ipsec_stack_t *ipss = ns->netstack_ipsec; 2735 2736 /* 2737 * Unless we get an entire packet back, this function is useless. 2738 * Why? 2739 * 2740 * 1.) Partial packets are useless, because the "next header" 2741 * is at the end of the decrypted ESP packet. Without the 2742 * whole packet, this is useless. 2743 * 2744 * 2.) If we every use a stateful cipher, such as a stream or a 2745 * one-time pad, we can't do anything. 2746 * 2747 * Since the chances of us getting an entire packet back are very 2748 * very small, we discard here. 2749 */ 2750 IP_ESP_BUMP_STAT(ipss, in_discards); 2751 ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, 2752 DROPPER(ipss, ipds_esp_icmp), 2753 &espstack->esp_dropper); 2754 return (NULL); 2755 } 2756 2757 /* 2758 * Construct an SADB_REGISTER message with the current algorithms. 2759 * This function gets called when 'ipsecalgs -s' is run or when 2760 * in.iked (or other KMD) starts. 2761 */ 2762 static boolean_t 2763 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial, 2764 ipsecesp_stack_t *espstack, cred_t *cr) 2765 { 2766 mblk_t *pfkey_msg_mp, *keysock_out_mp; 2767 sadb_msg_t *samsg; 2768 sadb_supported_t *sasupp_auth = NULL; 2769 sadb_supported_t *sasupp_encr = NULL; 2770 sadb_alg_t *saalg; 2771 uint_t allocsize = sizeof (*samsg); 2772 uint_t i, numalgs_snap; 2773 int current_aalgs; 2774 ipsec_alginfo_t **authalgs; 2775 uint_t num_aalgs; 2776 int current_ealgs; 2777 ipsec_alginfo_t **encralgs; 2778 uint_t num_ealgs; 2779 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 2780 sadb_sens_t *sens; 2781 size_t sens_len = 0; 2782 sadb_ext_t *nextext; 2783 ts_label_t *sens_tsl = NULL; 2784 2785 /* Allocate the KEYSOCK_OUT. */ 2786 keysock_out_mp = sadb_keysock_out(serial); 2787 if (keysock_out_mp == NULL) { 2788 esp0dbg(("esp_register_out: couldn't allocate mblk.\n")); 2789 return (B_FALSE); 2790 } 2791 2792 if (is_system_labeled() && (cr != NULL)) { 2793 sens_tsl = crgetlabel(cr); 2794 if (sens_tsl != NULL) { 2795 sens_len = sadb_sens_len_from_label(sens_tsl); 2796 allocsize += sens_len; 2797 } 2798 } 2799 2800 /* 2801 * Allocate the PF_KEY message that follows KEYSOCK_OUT. 2802 */ 2803 2804 rw_enter(&ipss->ipsec_alg_lock, RW_READER); 2805 /* 2806 * Fill SADB_REGISTER message's algorithm descriptors. Hold 2807 * down the lock while filling it. 2808 * 2809 * Return only valid algorithms, so the number of algorithms 2810 * to send up may be less than the number of algorithm entries 2811 * in the table. 2812 */ 2813 authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH]; 2814 for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 2815 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) 2816 num_aalgs++; 2817 2818 if (num_aalgs != 0) { 2819 allocsize += (num_aalgs * sizeof (*saalg)); 2820 allocsize += sizeof (*sasupp_auth); 2821 } 2822 encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR]; 2823 for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++) 2824 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) 2825 num_ealgs++; 2826 2827 if (num_ealgs != 0) { 2828 allocsize += (num_ealgs * sizeof (*saalg)); 2829 allocsize += sizeof (*sasupp_encr); 2830 } 2831 keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI); 2832 if (keysock_out_mp->b_cont == NULL) { 2833 rw_exit(&ipss->ipsec_alg_lock); 2834 freemsg(keysock_out_mp); 2835 return (B_FALSE); 2836 } 2837 pfkey_msg_mp = keysock_out_mp->b_cont; 2838 pfkey_msg_mp->b_wptr += allocsize; 2839 2840 nextext = (sadb_ext_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg)); 2841 2842 if (num_aalgs != 0) { 2843 sasupp_auth = (sadb_supported_t *)nextext; 2844 saalg = (sadb_alg_t *)(sasupp_auth + 1); 2845 2846 ASSERT(((ulong_t)saalg & 0x7) == 0); 2847 2848 numalgs_snap = 0; 2849 for (i = 0; 2850 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs)); 2851 i++) { 2852 if (authalgs[i] == NULL || !ALG_VALID(authalgs[i])) 2853 continue; 2854 2855 saalg->sadb_alg_id = authalgs[i]->alg_id; 2856 saalg->sadb_alg_ivlen = 0; 2857 saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits; 2858 saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits; 2859 saalg->sadb_x_alg_increment = 2860 authalgs[i]->alg_increment; 2861 saalg->sadb_x_alg_saltbits = SADB_8TO1( 2862 authalgs[i]->alg_saltlen); 2863 numalgs_snap++; 2864 saalg++; 2865 } 2866 ASSERT(numalgs_snap == num_aalgs); 2867 #ifdef DEBUG 2868 /* 2869 * Reality check to make sure I snagged all of the 2870 * algorithms. 2871 */ 2872 for (; i < IPSEC_MAX_ALGS; i++) { 2873 if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) { 2874 cmn_err(CE_PANIC, "esp_register_out()! " 2875 "Missed aalg #%d.\n", i); 2876 } 2877 } 2878 #endif /* DEBUG */ 2879 nextext = (sadb_ext_t *)saalg; 2880 } 2881 2882 if (num_ealgs != 0) { 2883 sasupp_encr = (sadb_supported_t *)nextext; 2884 saalg = (sadb_alg_t *)(sasupp_encr + 1); 2885 2886 numalgs_snap = 0; 2887 for (i = 0; 2888 ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) { 2889 if (encralgs[i] == NULL || !ALG_VALID(encralgs[i])) 2890 continue; 2891 saalg->sadb_alg_id = encralgs[i]->alg_id; 2892 saalg->sadb_alg_ivlen = encralgs[i]->alg_ivlen; 2893 saalg->sadb_alg_minbits = encralgs[i]->alg_ef_minbits; 2894 saalg->sadb_alg_maxbits = encralgs[i]->alg_ef_maxbits; 2895 /* 2896 * We could advertise the ICV length, except there 2897 * is not a value in sadb_x_algb to do this. 2898 * saalg->sadb_alg_maclen = encralgs[i]->alg_maclen; 2899 */ 2900 saalg->sadb_x_alg_increment = 2901 encralgs[i]->alg_increment; 2902 saalg->sadb_x_alg_saltbits = 2903 SADB_8TO1(encralgs[i]->alg_saltlen); 2904 2905 numalgs_snap++; 2906 saalg++; 2907 } 2908 ASSERT(numalgs_snap == num_ealgs); 2909 #ifdef DEBUG 2910 /* 2911 * Reality check to make sure I snagged all of the 2912 * algorithms. 2913 */ 2914 for (; i < IPSEC_MAX_ALGS; i++) { 2915 if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) { 2916 cmn_err(CE_PANIC, "esp_register_out()! " 2917 "Missed ealg #%d.\n", i); 2918 } 2919 } 2920 #endif /* DEBUG */ 2921 nextext = (sadb_ext_t *)saalg; 2922 } 2923 2924 current_aalgs = num_aalgs; 2925 current_ealgs = num_ealgs; 2926 2927 rw_exit(&ipss->ipsec_alg_lock); 2928 2929 if (sens_tsl != NULL) { 2930 sens = (sadb_sens_t *)nextext; 2931 sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY, 2932 sens_tsl, sens_len); 2933 2934 nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len); 2935 } 2936 2937 /* Now fill the rest of the SADB_REGISTER message. */ 2938 2939 samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr; 2940 samsg->sadb_msg_version = PF_KEY_V2; 2941 samsg->sadb_msg_type = SADB_REGISTER; 2942 samsg->sadb_msg_errno = 0; 2943 samsg->sadb_msg_satype = SADB_SATYPE_ESP; 2944 samsg->sadb_msg_len = SADB_8TO64(allocsize); 2945 samsg->sadb_msg_reserved = 0; 2946 /* 2947 * Assume caller has sufficient sequence/pid number info. If it's one 2948 * from me over a new alg., I could give two hoots about sequence. 2949 */ 2950 samsg->sadb_msg_seq = sequence; 2951 samsg->sadb_msg_pid = pid; 2952 2953 if (sasupp_auth != NULL) { 2954 sasupp_auth->sadb_supported_len = SADB_8TO64( 2955 sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs); 2956 sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; 2957 sasupp_auth->sadb_supported_reserved = 0; 2958 } 2959 2960 if (sasupp_encr != NULL) { 2961 sasupp_encr->sadb_supported_len = SADB_8TO64( 2962 sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs); 2963 sasupp_encr->sadb_supported_exttype = 2964 SADB_EXT_SUPPORTED_ENCRYPT; 2965 sasupp_encr->sadb_supported_reserved = 0; 2966 } 2967 2968 if (espstack->esp_pfkey_q != NULL) 2969 putnext(espstack->esp_pfkey_q, keysock_out_mp); 2970 else { 2971 freemsg(keysock_out_mp); 2972 return (B_FALSE); 2973 } 2974 2975 return (B_TRUE); 2976 } 2977 2978 /* 2979 * Invoked when the algorithm table changes. Causes SADB_REGISTER 2980 * messages continaining the current list of algorithms to be 2981 * sent up to the ESP listeners. 2982 */ 2983 void 2984 ipsecesp_algs_changed(netstack_t *ns) 2985 { 2986 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 2987 2988 /* 2989 * Time to send a PF_KEY SADB_REGISTER message to ESP listeners 2990 * everywhere. (The function itself checks for NULL esp_pfkey_q.) 2991 */ 2992 (void) esp_register_out(0, 0, 0, espstack, NULL); 2993 } 2994 2995 /* 2996 * Stub function that taskq_dispatch() invokes to take the mblk (in arg) 2997 * and send() it into ESP and IP again. 2998 */ 2999 static void 3000 inbound_task(void *arg) 3001 { 3002 mblk_t *mp = (mblk_t *)arg; 3003 mblk_t *async_mp; 3004 ip_recv_attr_t iras; 3005 3006 async_mp = mp; 3007 mp = async_mp->b_cont; 3008 async_mp->b_cont = NULL; 3009 if (!ip_recv_attr_from_mblk(async_mp, &iras)) { 3010 /* The ill or ip_stack_t disappeared on us */ 3011 ip_drop_input("ip_recv_attr_from_mblk", mp, NULL); 3012 freemsg(mp); 3013 goto done; 3014 } 3015 3016 esp_inbound_restart(mp, &iras); 3017 done: 3018 ira_cleanup(&iras, B_TRUE); 3019 } 3020 3021 /* 3022 * Restart ESP after the SA has been added. 3023 */ 3024 static void 3025 esp_inbound_restart(mblk_t *mp, ip_recv_attr_t *ira) 3026 { 3027 esph_t *esph; 3028 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 3029 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3030 3031 esp2dbg(espstack, ("in ESP inbound_task")); 3032 ASSERT(espstack != NULL); 3033 3034 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 3035 if (mp == NULL) 3036 return; 3037 3038 ASSERT(esph != NULL); 3039 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3040 ASSERT(ira->ira_ipsec_esp_sa != NULL); 3041 3042 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, ira); 3043 if (mp == NULL) { 3044 /* 3045 * Either it failed or is pending. In the former case 3046 * ipIfStatsInDiscards was increased. 3047 */ 3048 return; 3049 } 3050 3051 ip_input_post_ipsec(mp, ira); 3052 } 3053 3054 /* 3055 * Now that weak-key passed, actually ADD the security association, and 3056 * send back a reply ADD message. 3057 */ 3058 static int 3059 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi, 3060 int *diagnostic, ipsecesp_stack_t *espstack) 3061 { 3062 isaf_t *primary = NULL, *secondary; 3063 boolean_t clone = B_FALSE, is_inbound = B_FALSE; 3064 ipsa_t *larval = NULL; 3065 ipsacq_t *acqrec; 3066 iacqf_t *acq_bucket; 3067 mblk_t *acq_msgs = NULL; 3068 int rc; 3069 mblk_t *lpkt; 3070 int error; 3071 ipsa_query_t sq; 3072 ipsec_stack_t *ipss = espstack->ipsecesp_netstack->netstack_ipsec; 3073 3074 /* 3075 * Locate the appropriate table(s). 3076 */ 3077 sq.spp = &espstack->esp_sadb; /* XXX */ 3078 error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST, 3079 IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND, 3080 &sq, diagnostic); 3081 if (error) 3082 return (error); 3083 3084 /* 3085 * Use the direction flags provided by the KMD to determine 3086 * if the inbound or outbound table should be the primary 3087 * for this SA. If these flags were absent then make this 3088 * decision based on the addresses. 3089 */ 3090 if (sq.assoc->sadb_sa_flags & IPSA_F_INBOUND) { 3091 primary = sq.inbound; 3092 secondary = sq.outbound; 3093 is_inbound = B_TRUE; 3094 if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) 3095 clone = B_TRUE; 3096 } else if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) { 3097 primary = sq.outbound; 3098 secondary = sq.inbound; 3099 } 3100 3101 if (primary == NULL) { 3102 /* 3103 * The KMD did not set a direction flag, determine which 3104 * table to insert the SA into based on addresses. 3105 */ 3106 switch (ksi->ks_in_dsttype) { 3107 case KS_IN_ADDR_MBCAST: 3108 clone = B_TRUE; /* All mcast SAs can be bidirectional */ 3109 sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND; 3110 /* FALLTHRU */ 3111 /* 3112 * If the source address is either one of mine, or unspecified 3113 * (which is best summed up by saying "not 'not mine'"), 3114 * then the association is potentially bi-directional, 3115 * in that it can be used for inbound traffic and outbound 3116 * traffic. The best example of such an SA is a multicast 3117 * SA (which allows me to receive the outbound traffic). 3118 */ 3119 case KS_IN_ADDR_ME: 3120 sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND; 3121 primary = sq.inbound; 3122 secondary = sq.outbound; 3123 if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME) 3124 clone = B_TRUE; 3125 is_inbound = B_TRUE; 3126 break; 3127 /* 3128 * If the source address literally not mine (either 3129 * unspecified or not mine), then this SA may have an 3130 * address that WILL be mine after some configuration. 3131 * We pay the price for this by making it a bi-directional 3132 * SA. 3133 */ 3134 case KS_IN_ADDR_NOTME: 3135 sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND; 3136 primary = sq.outbound; 3137 secondary = sq.inbound; 3138 if (ksi->ks_in_srctype != KS_IN_ADDR_ME) { 3139 sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND; 3140 clone = B_TRUE; 3141 } 3142 break; 3143 default: 3144 *diagnostic = SADB_X_DIAGNOSTIC_BAD_DST; 3145 return (EINVAL); 3146 } 3147 } 3148 3149 /* 3150 * Find a ACQUIRE list entry if possible. If we've added an SA that 3151 * suits the needs of an ACQUIRE list entry, we can eliminate the 3152 * ACQUIRE list entry and transmit the enqueued packets. Use the 3153 * high-bit of the sequence number to queue it. Key off destination 3154 * addr, and change acqrec's state. 3155 */ 3156 3157 if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) { 3158 acq_bucket = &(sq.sp->sdb_acq[sq.outhash]); 3159 mutex_enter(&acq_bucket->iacqf_lock); 3160 for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL; 3161 acqrec = acqrec->ipsacq_next) { 3162 mutex_enter(&acqrec->ipsacq_lock); 3163 /* 3164 * Q: I only check sequence. Should I check dst? 3165 * A: Yes, check dest because those are the packets 3166 * that are queued up. 3167 */ 3168 if (acqrec->ipsacq_seq == samsg->sadb_msg_seq && 3169 IPSA_ARE_ADDR_EQUAL(sq.dstaddr, 3170 acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam)) 3171 break; 3172 mutex_exit(&acqrec->ipsacq_lock); 3173 } 3174 if (acqrec != NULL) { 3175 /* 3176 * AHA! I found an ACQUIRE record for this SA. 3177 * Grab the msg list, and free the acquire record. 3178 * I already am holding the lock for this record, 3179 * so all I have to do is free it. 3180 */ 3181 acq_msgs = acqrec->ipsacq_mp; 3182 acqrec->ipsacq_mp = NULL; 3183 mutex_exit(&acqrec->ipsacq_lock); 3184 sadb_destroy_acquire(acqrec, 3185 espstack->ipsecesp_netstack); 3186 } 3187 mutex_exit(&acq_bucket->iacqf_lock); 3188 } 3189 3190 /* 3191 * Find PF_KEY message, and see if I'm an update. If so, find entry 3192 * in larval list (if there). 3193 */ 3194 if (samsg->sadb_msg_type == SADB_UPDATE) { 3195 mutex_enter(&sq.inbound->isaf_lock); 3196 larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi, 3197 ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family); 3198 mutex_exit(&sq.inbound->isaf_lock); 3199 3200 if ((larval == NULL) || 3201 (larval->ipsa_state != IPSA_STATE_LARVAL)) { 3202 *diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND; 3203 if (larval != NULL) { 3204 IPSA_REFRELE(larval); 3205 } 3206 esp0dbg(("Larval update, but larval disappeared.\n")); 3207 return (ESRCH); 3208 } /* Else sadb_common_add unlinks it for me! */ 3209 } 3210 3211 if (larval != NULL) { 3212 /* 3213 * Hold again, because sadb_common_add() consumes a reference, 3214 * and we don't want to clear_lpkt() without a reference. 3215 */ 3216 IPSA_REFHOLD(larval); 3217 } 3218 3219 rc = sadb_common_add(espstack->esp_pfkey_q, 3220 mp, samsg, ksi, primary, secondary, larval, clone, is_inbound, 3221 diagnostic, espstack->ipsecesp_netstack, &espstack->esp_sadb); 3222 3223 if (larval != NULL) { 3224 if (rc == 0) { 3225 lpkt = sadb_clear_lpkt(larval); 3226 if (lpkt != NULL) { 3227 rc = taskq_dispatch(esp_taskq, inbound_task, 3228 lpkt, TQ_NOSLEEP) == TASKQID_INVALID; 3229 } 3230 } 3231 IPSA_REFRELE(larval); 3232 } 3233 3234 /* 3235 * How much more stack will I create with all of these 3236 * esp_outbound() calls? 3237 */ 3238 3239 /* Handle the packets queued waiting for the SA */ 3240 while (acq_msgs != NULL) { 3241 mblk_t *asyncmp; 3242 mblk_t *data_mp; 3243 ip_xmit_attr_t ixas; 3244 ill_t *ill; 3245 3246 asyncmp = acq_msgs; 3247 acq_msgs = acq_msgs->b_next; 3248 asyncmp->b_next = NULL; 3249 3250 /* 3251 * Extract the ip_xmit_attr_t from the first mblk. 3252 * Verifies that the netstack and ill is still around; could 3253 * have vanished while iked was doing its work. 3254 * On succesful return we have a nce_t and the ill/ipst can't 3255 * disappear until we do the nce_refrele in ixa_cleanup. 3256 */ 3257 data_mp = asyncmp->b_cont; 3258 asyncmp->b_cont = NULL; 3259 if (!ip_xmit_attr_from_mblk(asyncmp, &ixas)) { 3260 ESP_BUMP_STAT(espstack, out_discards); 3261 ip_drop_packet(data_mp, B_FALSE, NULL, 3262 DROPPER(ipss, ipds_sadb_acquire_timeout), 3263 &espstack->esp_dropper); 3264 } else if (rc != 0) { 3265 ill = ixas.ixa_nce->nce_ill; 3266 ESP_BUMP_STAT(espstack, out_discards); 3267 ip_drop_packet(data_mp, B_FALSE, ill, 3268 DROPPER(ipss, ipds_sadb_acquire_timeout), 3269 &espstack->esp_dropper); 3270 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 3271 } else { 3272 esp_outbound_finish(data_mp, &ixas); 3273 } 3274 ixa_cleanup(&ixas); 3275 } 3276 3277 return (rc); 3278 } 3279 3280 /* 3281 * Process one of the queued messages (from ipsacq_mp) once the SA 3282 * has been added. 3283 */ 3284 static void 3285 esp_outbound_finish(mblk_t *data_mp, ip_xmit_attr_t *ixa) 3286 { 3287 netstack_t *ns = ixa->ixa_ipst->ips_netstack; 3288 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3289 ipsec_stack_t *ipss = ns->netstack_ipsec; 3290 ill_t *ill = ixa->ixa_nce->nce_ill; 3291 3292 if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_ESP)) { 3293 ESP_BUMP_STAT(espstack, out_discards); 3294 ip_drop_packet(data_mp, B_FALSE, ill, 3295 DROPPER(ipss, ipds_sadb_acquire_timeout), 3296 &espstack->esp_dropper); 3297 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 3298 return; 3299 } 3300 3301 data_mp = esp_outbound(data_mp, ixa); 3302 if (data_mp == NULL) 3303 return; 3304 3305 /* do AH processing if needed */ 3306 data_mp = esp_do_outbound_ah(data_mp, ixa); 3307 if (data_mp == NULL) 3308 return; 3309 3310 (void) ip_output_post_ipsec(data_mp, ixa); 3311 } 3312 3313 /* 3314 * Add new ESP security association. This may become a generic AH/ESP 3315 * routine eventually. 3316 */ 3317 static int 3318 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns) 3319 { 3320 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3321 sadb_address_t *srcext = 3322 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3323 sadb_address_t *dstext = 3324 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3325 sadb_address_t *isrcext = 3326 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC]; 3327 sadb_address_t *idstext = 3328 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST]; 3329 sadb_address_t *nttext_loc = 3330 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC]; 3331 sadb_address_t *nttext_rem = 3332 (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM]; 3333 sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH]; 3334 sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT]; 3335 struct sockaddr_in *src, *dst; 3336 struct sockaddr_in *natt_loc, *natt_rem; 3337 struct sockaddr_in6 *natt_loc6, *natt_rem6; 3338 sadb_lifetime_t *soft = 3339 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT]; 3340 sadb_lifetime_t *hard = 3341 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD]; 3342 sadb_lifetime_t *idle = 3343 (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE]; 3344 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3345 ipsec_stack_t *ipss = ns->netstack_ipsec; 3346 3347 3348 3349 /* I need certain extensions present for an ADD message. */ 3350 if (srcext == NULL) { 3351 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC; 3352 return (EINVAL); 3353 } 3354 if (dstext == NULL) { 3355 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3356 return (EINVAL); 3357 } 3358 if (isrcext == NULL && idstext != NULL) { 3359 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC; 3360 return (EINVAL); 3361 } 3362 if (isrcext != NULL && idstext == NULL) { 3363 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST; 3364 return (EINVAL); 3365 } 3366 if (assoc == NULL) { 3367 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3368 return (EINVAL); 3369 } 3370 if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) { 3371 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY; 3372 return (EINVAL); 3373 } 3374 3375 src = (struct sockaddr_in *)(srcext + 1); 3376 dst = (struct sockaddr_in *)(dstext + 1); 3377 natt_loc = (struct sockaddr_in *)(nttext_loc + 1); 3378 natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1); 3379 natt_rem = (struct sockaddr_in *)(nttext_rem + 1); 3380 natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1); 3381 3382 /* Sundry ADD-specific reality checks. */ 3383 /* XXX STATS : Logging/stats here? */ 3384 3385 if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) && 3386 (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) { 3387 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE; 3388 return (EINVAL); 3389 } 3390 if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) { 3391 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3392 return (EINVAL); 3393 } 3394 3395 #ifndef IPSEC_LATENCY_TEST 3396 if (assoc->sadb_sa_encrypt == SADB_EALG_NULL && 3397 assoc->sadb_sa_auth == SADB_AALG_NONE) { 3398 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3399 return (EINVAL); 3400 } 3401 #endif 3402 3403 if (assoc->sadb_sa_flags & ~espstack->esp_sadb.s_addflags) { 3404 *diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS; 3405 return (EINVAL); 3406 } 3407 3408 if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) { 3409 return (EINVAL); 3410 } 3411 ASSERT(src->sin_family == dst->sin_family); 3412 3413 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) { 3414 if (nttext_loc == NULL) { 3415 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC; 3416 return (EINVAL); 3417 } 3418 3419 if (natt_loc->sin_family == AF_INET6 && 3420 !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) { 3421 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC; 3422 return (EINVAL); 3423 } 3424 } 3425 3426 if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) { 3427 if (nttext_rem == NULL) { 3428 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM; 3429 return (EINVAL); 3430 } 3431 if (natt_rem->sin_family == AF_INET6 && 3432 !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) { 3433 *diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM; 3434 return (EINVAL); 3435 } 3436 } 3437 3438 3439 /* Stuff I don't support, for now. XXX Diagnostic? */ 3440 if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) 3441 return (EOPNOTSUPP); 3442 3443 if ((*diagnostic = sadb_labelchk(ksi)) != 0) 3444 return (EINVAL); 3445 3446 /* 3447 * XXX Policy : I'm not checking identities at this time, 3448 * but if I did, I'd do them here, before I sent 3449 * the weak key check up to the algorithm. 3450 */ 3451 3452 rw_enter(&ipss->ipsec_alg_lock, RW_READER); 3453 3454 /* 3455 * First locate the authentication algorithm. 3456 */ 3457 #ifdef IPSEC_LATENCY_TEST 3458 if (akey != NULL && assoc->sadb_sa_auth != SADB_AALG_NONE) { 3459 #else 3460 if (akey != NULL) { 3461 #endif 3462 ipsec_alginfo_t *aalg; 3463 3464 aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH] 3465 [assoc->sadb_sa_auth]; 3466 if (aalg == NULL || !ALG_VALID(aalg)) { 3467 rw_exit(&ipss->ipsec_alg_lock); 3468 esp1dbg(espstack, ("Couldn't find auth alg #%d.\n", 3469 assoc->sadb_sa_auth)); 3470 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG; 3471 return (EINVAL); 3472 } 3473 3474 /* 3475 * Sanity check key sizes. 3476 * Note: It's not possible to use SADB_AALG_NONE because 3477 * this auth_alg is not defined with ALG_FLAG_VALID. If this 3478 * ever changes, the same check for SADB_AALG_NONE and 3479 * a auth_key != NULL should be made here ( see below). 3480 */ 3481 if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) { 3482 rw_exit(&ipss->ipsec_alg_lock); 3483 *diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS; 3484 return (EINVAL); 3485 } 3486 ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3487 3488 /* check key and fix parity if needed */ 3489 if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE, 3490 diagnostic) != 0) { 3491 rw_exit(&ipss->ipsec_alg_lock); 3492 return (EINVAL); 3493 } 3494 } 3495 3496 /* 3497 * Then locate the encryption algorithm. 3498 */ 3499 if (ekey != NULL) { 3500 uint_t keybits; 3501 ipsec_alginfo_t *ealg; 3502 3503 ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR] 3504 [assoc->sadb_sa_encrypt]; 3505 if (ealg == NULL || !ALG_VALID(ealg)) { 3506 rw_exit(&ipss->ipsec_alg_lock); 3507 esp1dbg(espstack, ("Couldn't find encr alg #%d.\n", 3508 assoc->sadb_sa_encrypt)); 3509 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG; 3510 return (EINVAL); 3511 } 3512 3513 /* 3514 * Sanity check key sizes. If the encryption algorithm is 3515 * SADB_EALG_NULL but the encryption key is NOT 3516 * NULL then complain. 3517 * 3518 * The keying material includes salt bits if required by 3519 * algorithm and optionally the Initial IV, check the 3520 * length of whats left. 3521 */ 3522 keybits = ekey->sadb_key_bits; 3523 keybits -= ekey->sadb_key_reserved; 3524 keybits -= SADB_8TO1(ealg->alg_saltlen); 3525 if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) || 3526 (!ipsec_valid_key_size(keybits, ealg))) { 3527 rw_exit(&ipss->ipsec_alg_lock); 3528 *diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS; 3529 return (EINVAL); 3530 } 3531 ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID); 3532 3533 /* check key */ 3534 if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE, 3535 diagnostic) != 0) { 3536 rw_exit(&ipss->ipsec_alg_lock); 3537 return (EINVAL); 3538 } 3539 } 3540 rw_exit(&ipss->ipsec_alg_lock); 3541 3542 return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi, 3543 diagnostic, espstack)); 3544 } 3545 3546 /* 3547 * Update a security association. Updates come in two varieties. The first 3548 * is an update of lifetimes on a non-larval SA. The second is an update of 3549 * a larval SA, which ends up looking a lot more like an add. 3550 */ 3551 static int 3552 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3553 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type) 3554 { 3555 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3556 mblk_t *buf_pkt; 3557 int rcode; 3558 3559 sadb_address_t *dstext = 3560 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3561 3562 if (dstext == NULL) { 3563 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST; 3564 return (EINVAL); 3565 } 3566 3567 rcode = sadb_update_sa(mp, ksi, &buf_pkt, &espstack->esp_sadb, 3568 diagnostic, espstack->esp_pfkey_q, esp_add_sa, 3569 espstack->ipsecesp_netstack, sadb_msg_type); 3570 3571 if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) || 3572 (rcode != 0)) { 3573 return (rcode); 3574 } 3575 3576 HANDLE_BUF_PKT(esp_taskq, espstack->ipsecesp_netstack->netstack_ipsec, 3577 espstack->esp_dropper, buf_pkt); 3578 3579 return (rcode); 3580 } 3581 3582 /* XXX refactor me */ 3583 /* 3584 * Delete a security association. This is REALLY likely to be code common to 3585 * both AH and ESP. Find the association, then unlink it. 3586 */ 3587 static int 3588 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, 3589 ipsecesp_stack_t *espstack, uint8_t sadb_msg_type) 3590 { 3591 sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA]; 3592 sadb_address_t *dstext = 3593 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST]; 3594 sadb_address_t *srcext = 3595 (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC]; 3596 struct sockaddr_in *sin; 3597 3598 if (assoc == NULL) { 3599 if (dstext != NULL) { 3600 sin = (struct sockaddr_in *)(dstext + 1); 3601 } else if (srcext != NULL) { 3602 sin = (struct sockaddr_in *)(srcext + 1); 3603 } else { 3604 *diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA; 3605 return (EINVAL); 3606 } 3607 return (sadb_purge_sa(mp, ksi, 3608 (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 : 3609 &espstack->esp_sadb.s_v4, diagnostic, 3610 espstack->esp_pfkey_q)); 3611 } 3612 3613 return (sadb_delget_sa(mp, ksi, &espstack->esp_sadb, diagnostic, 3614 espstack->esp_pfkey_q, sadb_msg_type)); 3615 } 3616 3617 /* XXX refactor me */ 3618 /* 3619 * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP 3620 * messages. 3621 */ 3622 static void 3623 esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack) 3624 { 3625 int error; 3626 sadb_msg_t *samsg; 3627 3628 /* 3629 * Dump each fanout, bailing if error is non-zero. 3630 */ 3631 3632 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi, 3633 &espstack->esp_sadb.s_v4); 3634 if (error != 0) 3635 goto bail; 3636 3637 error = sadb_dump(espstack->esp_pfkey_q, mp, ksi, 3638 &espstack->esp_sadb.s_v6); 3639 bail: 3640 ASSERT(mp->b_cont != NULL); 3641 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 3642 samsg->sadb_msg_errno = (uint8_t)error; 3643 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, 3644 (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL); 3645 } 3646 3647 /* 3648 * First-cut reality check for an inbound PF_KEY message. 3649 */ 3650 static boolean_t 3651 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi, 3652 ipsecesp_stack_t *espstack) 3653 { 3654 int diagnostic; 3655 3656 if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) { 3657 diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT; 3658 goto badmsg; 3659 } 3660 if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL || 3661 ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) { 3662 diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT; 3663 goto badmsg; 3664 } 3665 return (B_FALSE); /* False ==> no failures */ 3666 3667 badmsg: 3668 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic, 3669 ksi->ks_in_serial); 3670 return (B_TRUE); /* True ==> failures */ 3671 } 3672 3673 /* 3674 * ESP parsing of PF_KEY messages. Keysock did most of the really silly 3675 * error cases. What I receive is a fully-formed, syntactically legal 3676 * PF_KEY message. I then need to check semantics... 3677 * 3678 * This code may become common to AH and ESP. Stay tuned. 3679 * 3680 * I also make the assumption that db_ref's are cool. If this assumption 3681 * is wrong, this means that someone other than keysock or me has been 3682 * mucking with PF_KEY messages. 3683 */ 3684 static void 3685 esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack) 3686 { 3687 mblk_t *msg = mp->b_cont; 3688 sadb_msg_t *samsg; 3689 keysock_in_t *ksi; 3690 int error; 3691 int diagnostic = SADB_X_DIAGNOSTIC_NONE; 3692 3693 ASSERT(msg != NULL); 3694 3695 samsg = (sadb_msg_t *)msg->b_rptr; 3696 ksi = (keysock_in_t *)mp->b_rptr; 3697 3698 /* 3699 * If applicable, convert unspecified AF_INET6 to unspecified 3700 * AF_INET. And do other address reality checks. 3701 */ 3702 if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp, 3703 espstack->ipsecesp_netstack) || 3704 esp_pfkey_reality_failures(mp, ksi, espstack)) { 3705 return; 3706 } 3707 3708 switch (samsg->sadb_msg_type) { 3709 case SADB_ADD: 3710 error = esp_add_sa(mp, ksi, &diagnostic, 3711 espstack->ipsecesp_netstack); 3712 if (error != 0) { 3713 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3714 diagnostic, ksi->ks_in_serial); 3715 } 3716 /* else esp_add_sa() took care of things. */ 3717 break; 3718 case SADB_DELETE: 3719 case SADB_X_DELPAIR: 3720 case SADB_X_DELPAIR_STATE: 3721 error = esp_del_sa(mp, ksi, &diagnostic, espstack, 3722 samsg->sadb_msg_type); 3723 if (error != 0) { 3724 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3725 diagnostic, ksi->ks_in_serial); 3726 } 3727 /* Else esp_del_sa() took care of things. */ 3728 break; 3729 case SADB_GET: 3730 error = sadb_delget_sa(mp, ksi, &espstack->esp_sadb, 3731 &diagnostic, espstack->esp_pfkey_q, samsg->sadb_msg_type); 3732 if (error != 0) { 3733 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3734 diagnostic, ksi->ks_in_serial); 3735 } 3736 /* Else sadb_get_sa() took care of things. */ 3737 break; 3738 case SADB_FLUSH: 3739 sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack); 3740 sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL); 3741 break; 3742 case SADB_REGISTER: 3743 /* 3744 * Hmmm, let's do it! Check for extensions (there should 3745 * be none), extract the fields, call esp_register_out(), 3746 * then either free or report an error. 3747 * 3748 * Keysock takes care of the PF_KEY bookkeeping for this. 3749 */ 3750 if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid, 3751 ksi->ks_in_serial, espstack, msg_getcred(mp, NULL))) { 3752 freemsg(mp); 3753 } else { 3754 /* 3755 * Only way this path hits is if there is a memory 3756 * failure. It will not return B_FALSE because of 3757 * lack of esp_pfkey_q if I am in wput(). 3758 */ 3759 sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, 3760 diagnostic, ksi->ks_in_serial); 3761 } 3762 break; 3763 case SADB_UPDATE: 3764 case SADB_X_UPDATEPAIR: 3765 /* 3766 * Find a larval, if not there, find a full one and get 3767 * strict. 3768 */ 3769 error = esp_update_sa(mp, ksi, &diagnostic, espstack, 3770 samsg->sadb_msg_type); 3771 if (error != 0) { 3772 sadb_pfkey_error(espstack->esp_pfkey_q, mp, error, 3773 diagnostic, ksi->ks_in_serial); 3774 } 3775 /* else esp_update_sa() took care of things. */ 3776 break; 3777 case SADB_GETSPI: 3778 /* 3779 * Reserve a new larval entry. 3780 */ 3781 esp_getspi(mp, ksi, espstack); 3782 break; 3783 case SADB_ACQUIRE: 3784 /* 3785 * Find larval and/or ACQUIRE record and kill it (them), I'm 3786 * most likely an error. Inbound ACQUIRE messages should only 3787 * have the base header. 3788 */ 3789 sadb_in_acquire(samsg, &espstack->esp_sadb, 3790 espstack->esp_pfkey_q, espstack->ipsecesp_netstack); 3791 freemsg(mp); 3792 break; 3793 case SADB_DUMP: 3794 /* 3795 * Dump all entries. 3796 */ 3797 esp_dump(mp, ksi, espstack); 3798 /* esp_dump will take care of the return message, etc. */ 3799 break; 3800 case SADB_EXPIRE: 3801 /* Should never reach me. */ 3802 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP, 3803 diagnostic, ksi->ks_in_serial); 3804 break; 3805 default: 3806 sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, 3807 SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial); 3808 break; 3809 } 3810 } 3811 3812 /* 3813 * Handle case where PF_KEY says it can't find a keysock for one of my 3814 * ACQUIRE messages. 3815 */ 3816 static void 3817 esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack) 3818 { 3819 sadb_msg_t *samsg; 3820 keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr; 3821 3822 if (mp->b_cont == NULL) { 3823 freemsg(mp); 3824 return; 3825 } 3826 samsg = (sadb_msg_t *)mp->b_cont->b_rptr; 3827 3828 /* 3829 * If keysock can't find any registered, delete the acquire record 3830 * immediately, and handle errors. 3831 */ 3832 if (samsg->sadb_msg_type == SADB_ACQUIRE) { 3833 samsg->sadb_msg_errno = kse->ks_err_errno; 3834 samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg)); 3835 /* 3836 * Use the write-side of the esp_pfkey_q 3837 */ 3838 sadb_in_acquire(samsg, &espstack->esp_sadb, 3839 WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack); 3840 } 3841 3842 freemsg(mp); 3843 } 3844 3845 /* 3846 * ESP module read put routine. 3847 */ 3848 static int 3849 ipsecesp_rput(queue_t *q, mblk_t *mp) 3850 { 3851 putnext(q, mp); 3852 return (0); 3853 } 3854 3855 /* 3856 * ESP module write put routine. 3857 */ 3858 static int 3859 ipsecesp_wput(queue_t *q, mblk_t *mp) 3860 { 3861 ipsec_info_t *ii; 3862 struct iocblk *iocp; 3863 ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)q->q_ptr; 3864 3865 esp3dbg(espstack, ("In esp_wput().\n")); 3866 3867 /* NOTE: Each case must take care of freeing or passing mp. */ 3868 switch (mp->b_datap->db_type) { 3869 case M_CTL: 3870 if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) { 3871 /* Not big enough message. */ 3872 freemsg(mp); 3873 break; 3874 } 3875 ii = (ipsec_info_t *)mp->b_rptr; 3876 3877 switch (ii->ipsec_info_type) { 3878 case KEYSOCK_OUT_ERR: 3879 esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n")); 3880 esp_keysock_no_socket(mp, espstack); 3881 break; 3882 case KEYSOCK_IN: 3883 ESP_BUMP_STAT(espstack, keysock_in); 3884 esp3dbg(espstack, ("Got KEYSOCK_IN message.\n")); 3885 3886 /* Parse the message. */ 3887 esp_parse_pfkey(mp, espstack); 3888 break; 3889 case KEYSOCK_HELLO: 3890 sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp, 3891 esp_ager, (void *)espstack, &espstack->esp_event, 3892 SADB_SATYPE_ESP); 3893 break; 3894 default: 3895 esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n", 3896 ii->ipsec_info_type)); 3897 freemsg(mp); 3898 break; 3899 } 3900 break; 3901 case M_IOCTL: 3902 iocp = (struct iocblk *)mp->b_rptr; 3903 switch (iocp->ioc_cmd) { 3904 case ND_SET: 3905 case ND_GET: 3906 if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) { 3907 qreply(q, mp); 3908 return (0); 3909 } else { 3910 iocp->ioc_error = ENOENT; 3911 } 3912 /* FALLTHRU */ 3913 default: 3914 /* We really don't support any other ioctls, do we? */ 3915 3916 /* Return EINVAL */ 3917 if (iocp->ioc_error != ENOENT) 3918 iocp->ioc_error = EINVAL; 3919 iocp->ioc_count = 0; 3920 mp->b_datap->db_type = M_IOCACK; 3921 qreply(q, mp); 3922 return (0); 3923 } 3924 default: 3925 esp3dbg(espstack, 3926 ("Got default message, type %d, passing to IP.\n", 3927 mp->b_datap->db_type)); 3928 putnext(q, mp); 3929 } 3930 return (0); 3931 } 3932 3933 /* 3934 * Wrapper to allow IP to trigger an ESP association failure message 3935 * during inbound SA selection. 3936 */ 3937 void 3938 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt, 3939 uint32_t spi, void *addr, int af, ip_recv_attr_t *ira) 3940 { 3941 netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack; 3942 ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; 3943 ipsec_stack_t *ipss = ns->netstack_ipsec; 3944 3945 if (espstack->ipsecesp_log_unknown_spi) { 3946 ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi, 3947 addr, af, espstack->ipsecesp_netstack); 3948 } 3949 3950 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 3951 DROPPER(ipss, ipds_esp_no_sa), 3952 &espstack->esp_dropper); 3953 } 3954 3955 /* 3956 * Initialize the ESP input and output processing functions. 3957 */ 3958 void 3959 ipsecesp_init_funcs(ipsa_t *sa) 3960 { 3961 if (sa->ipsa_output_func == NULL) 3962 sa->ipsa_output_func = esp_outbound; 3963 if (sa->ipsa_input_func == NULL) 3964 sa->ipsa_input_func = esp_inbound; 3965 } 3966