1 /* $OpenBSD: frag6.c,v 1.86 2019/08/26 18:47:53 bluhm Exp $ */ 2 /* $KAME: frag6.c,v 1.40 2002/05/27 21:40:31 itojun Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/mbuf.h> 36 #include <sys/domain.h> 37 #include <sys/protosw.h> 38 #include <sys/socket.h> 39 #include <sys/errno.h> 40 #include <sys/time.h> 41 #include <sys/kernel.h> 42 #include <sys/pool.h> 43 #include <sys/mutex.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/route.h> 48 49 #include <netinet/in.h> 50 #include <netinet6/in6_var.h> 51 #include <netinet/ip6.h> 52 #include <netinet6/ip6_var.h> 53 #include <netinet/icmp6.h> 54 #include <netinet/ip.h> /* for ECN definitions */ 55 56 /* Protects `frag6_queue', `frag6_nfragpackets' and `frag6_nfrags'. */ 57 struct mutex frag6_mutex = MUTEX_INITIALIZER(IPL_SOFTNET); 58 59 u_int frag6_nfragpackets; 60 u_int frag6_nfrags; 61 TAILQ_HEAD(ip6q_head, ip6q) frag6_queue; /* ip6 reassemble queue */ 62 63 void frag6_freef(struct ip6q *); 64 void frag6_unlink(struct ip6q *, struct ip6q_head *); 65 66 struct pool ip6af_pool; 67 struct pool ip6q_pool; 68 69 /* 70 * Initialise reassembly queue and pools. 71 */ 72 void 73 frag6_init(void) 74 { 75 pool_init(&ip6af_pool, sizeof(struct ip6asfrag), 76 0, IPL_SOFTNET, 0, "ip6af", NULL); 77 pool_init(&ip6q_pool, sizeof(struct ip6q), 78 0, IPL_SOFTNET, 0, "ip6q", NULL); 79 80 TAILQ_INIT(&frag6_queue); 81 } 82 83 /* 84 * In RFC2460, fragment and reassembly rule do not agree with each other, 85 * in terms of next header field handling in fragment header. 86 * While the sender will use the same value for all of the fragmented packets, 87 * receiver is suggested not to check the consistency. 88 * 89 * fragment rule (p20): 90 * (2) A Fragment header containing: 91 * The Next Header value that identifies the first header of 92 * the Fragmentable Part of the original packet. 93 * -> next header field is same for all fragments 94 * 95 * reassembly rule (p21): 96 * The Next Header field of the last header of the Unfragmentable 97 * Part is obtained from the Next Header field of the first 98 * fragment's Fragment header. 99 * -> should grab it from the first fragment only 100 * 101 * The following note also contradicts with fragment rule - noone is going to 102 * send different fragment with different next header field. 103 * 104 * additional note (p22): 105 * The Next Header values in the Fragment headers of different 106 * fragments of the same original packet may differ. Only the value 107 * from the Offset zero fragment packet is used for reassembly. 108 * -> should grab it from the first fragment only 109 * 110 * There is no explicit reason given in the RFC. Historical reason maybe? 111 */ 112 /* 113 * Fragment input 114 */ 115 int 116 frag6_input(struct mbuf **mp, int *offp, int proto, int af) 117 { 118 struct mbuf *m = *mp, *t; 119 struct ip6_hdr *ip6; 120 struct ip6_frag *ip6f; 121 struct ip6q *q6; 122 struct ip6asfrag *af6, *ip6af, *naf6, *paf6; 123 int offset = *offp, nxt, i, next; 124 int first_frag = 0; 125 int fragoff, frgpartlen; /* must be larger than u_int16_t */ 126 u_int8_t ecn, ecn0; 127 128 ip6 = mtod(m, struct ip6_hdr *); 129 IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); 130 if (ip6f == NULL) 131 return IPPROTO_DONE; 132 133 /* jumbo payload can't contain a fragment header */ 134 if (ip6->ip6_plen == 0) { 135 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); 136 return IPPROTO_DONE; 137 } 138 139 /* 140 * check whether fragment packet's fragment length is 141 * multiple of 8 octets. 142 * sizeof(struct ip6_frag) == 8 143 * sizeof(struct ip6_hdr) = 40 144 */ 145 if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && 146 (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { 147 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 148 offsetof(struct ip6_hdr, ip6_plen)); 149 return IPPROTO_DONE; 150 } 151 152 ip6stat_inc(ip6s_fragments); 153 154 /* offset now points to data portion */ 155 offset += sizeof(struct ip6_frag); 156 157 /* 158 * RFC6946: A host that receives an IPv6 packet which includes 159 * a Fragment Header with the "Fragment Offset" equal to 0 and 160 * the "M" bit equal to 0 MUST process such packet in isolation 161 * from any other packets/fragments. 162 */ 163 fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); 164 if (fragoff == 0 && !(ip6f->ip6f_offlg & IP6F_MORE_FRAG)) { 165 ip6stat_inc(ip6s_reassembled); 166 *offp = offset; 167 return ip6f->ip6f_nxt; 168 } 169 170 /* Ignore empty non atomic fragment, do not classify as overlapping. */ 171 if (sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) <= offset) { 172 m_freem(m); 173 return IPPROTO_DONE; 174 } 175 176 mtx_enter(&frag6_mutex); 177 178 /* 179 * Enforce upper bound on number of fragments. 180 * If maxfrag is 0, never accept fragments. 181 * If maxfrag is -1, accept all fragments without limitation. 182 */ 183 if (ip6_maxfrags >= 0 && frag6_nfrags >= (u_int)ip6_maxfrags) { 184 mtx_leave(&frag6_mutex); 185 goto dropfrag; 186 } 187 188 TAILQ_FOREACH(q6, &frag6_queue, ip6q_queue) 189 if (ip6f->ip6f_ident == q6->ip6q_ident && 190 IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) && 191 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)) 192 break; 193 194 if (q6 == NULL) { 195 /* 196 * the first fragment to arrive, create a reassembly queue. 197 */ 198 first_frag = 1; 199 200 /* 201 * Enforce upper bound on number of fragmented packets 202 * for which we attempt reassembly; 203 * If maxfragpackets is 0, never accept fragments. 204 * If maxfragpackets is -1, accept all fragments without 205 * limitation. 206 */ 207 if (ip6_maxfragpackets >= 0 && 208 frag6_nfragpackets >= (u_int)ip6_maxfragpackets) { 209 mtx_leave(&frag6_mutex); 210 goto dropfrag; 211 } 212 frag6_nfragpackets++; 213 q6 = pool_get(&ip6q_pool, PR_NOWAIT | PR_ZERO); 214 if (q6 == NULL) { 215 mtx_leave(&frag6_mutex); 216 goto dropfrag; 217 } 218 219 TAILQ_INSERT_HEAD(&frag6_queue, q6, ip6q_queue); 220 221 /* ip6q_nxt will be filled afterwards, from 1st fragment */ 222 LIST_INIT(&q6->ip6q_asfrag); 223 q6->ip6q_ident = ip6f->ip6f_ident; 224 q6->ip6q_ttl = IPV6_FRAGTTL; 225 q6->ip6q_src = ip6->ip6_src; 226 q6->ip6q_dst = ip6->ip6_dst; 227 q6->ip6q_ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; 228 q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ 229 q6->ip6q_nfrag = 0; 230 } 231 232 /* 233 * If it's the 1st fragment, record the length of the 234 * unfragmentable part and the next header of the fragment header. 235 */ 236 if (fragoff == 0) { 237 q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - 238 sizeof(struct ip6_frag); 239 q6->ip6q_nxt = ip6f->ip6f_nxt; 240 } 241 242 /* 243 * Check that the reassembled packet would not exceed 65535 bytes 244 * in size. 245 * If it would exceed, discard the fragment and return an ICMP error. 246 */ 247 frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset; 248 if (q6->ip6q_unfrglen >= 0) { 249 /* The 1st fragment has already arrived. */ 250 if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { 251 mtx_leave(&frag6_mutex); 252 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 253 offset - sizeof(struct ip6_frag) + 254 offsetof(struct ip6_frag, ip6f_offlg)); 255 return (IPPROTO_DONE); 256 } 257 } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { 258 mtx_leave(&frag6_mutex); 259 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 260 offset - sizeof(struct ip6_frag) + 261 offsetof(struct ip6_frag, ip6f_offlg)); 262 return (IPPROTO_DONE); 263 } 264 /* 265 * If it's the first fragment, do the above check for each 266 * fragment already stored in the reassembly queue. 267 */ 268 if (fragoff == 0) { 269 LIST_FOREACH_SAFE(af6, &q6->ip6q_asfrag, ip6af_list, naf6) { 270 if (q6->ip6q_unfrglen + af6->ip6af_off + 271 af6->ip6af_frglen > IPV6_MAXPACKET) { 272 struct mbuf *merr = af6->ip6af_m; 273 struct ip6_hdr *ip6err; 274 int erroff = af6->ip6af_offset; 275 276 /* dequeue the fragment. */ 277 LIST_REMOVE(af6, ip6af_list); 278 pool_put(&ip6af_pool, af6); 279 280 /* adjust pointer. */ 281 ip6err = mtod(merr, struct ip6_hdr *); 282 283 /* 284 * Restore source and destination addresses 285 * in the erroneous IPv6 header. 286 */ 287 ip6err->ip6_src = q6->ip6q_src; 288 ip6err->ip6_dst = q6->ip6q_dst; 289 290 icmp6_error(merr, ICMP6_PARAM_PROB, 291 ICMP6_PARAMPROB_HEADER, 292 erroff - sizeof(struct ip6_frag) + 293 offsetof(struct ip6_frag, ip6f_offlg)); 294 } 295 } 296 } 297 298 ip6af = pool_get(&ip6af_pool, PR_NOWAIT | PR_ZERO); 299 if (ip6af == NULL) { 300 mtx_leave(&frag6_mutex); 301 goto dropfrag; 302 } 303 ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; 304 ip6af->ip6af_off = fragoff; 305 ip6af->ip6af_frglen = frgpartlen; 306 ip6af->ip6af_offset = offset; 307 ip6af->ip6af_m = m; 308 309 if (first_frag) { 310 paf6 = NULL; 311 goto insert; 312 } 313 314 /* 315 * Handle ECN by comparing this segment with the first one; 316 * if CE is set, do not lose CE. 317 * drop if CE and not-ECT are mixed for the same packet. 318 */ 319 ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; 320 ecn0 = q6->ip6q_ecn; 321 if (ecn == IPTOS_ECN_CE) { 322 if (ecn0 == IPTOS_ECN_NOTECT) { 323 mtx_leave(&frag6_mutex); 324 pool_put(&ip6af_pool, ip6af); 325 goto dropfrag; 326 } 327 if (ecn0 != IPTOS_ECN_CE) 328 q6->ip6q_ecn = IPTOS_ECN_CE; 329 } 330 if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { 331 mtx_leave(&frag6_mutex); 332 pool_put(&ip6af_pool, ip6af); 333 goto dropfrag; 334 } 335 336 /* 337 * Find a segment which begins after this one does. 338 */ 339 for (paf6 = NULL, af6 = LIST_FIRST(&q6->ip6q_asfrag); 340 af6 != NULL; 341 paf6 = af6, af6 = LIST_NEXT(af6, ip6af_list)) 342 if (af6->ip6af_off > ip6af->ip6af_off) 343 break; 344 345 /* 346 * RFC 5722, Errata 3089: When reassembling an IPv6 datagram, if one 347 * or more its constituent fragments is determined to be an overlapping 348 * fragment, the entire datagram (and any constituent fragments) MUST 349 * be silently discarded. 350 */ 351 if (paf6 != NULL) { 352 i = (paf6->ip6af_off + paf6->ip6af_frglen) - ip6af->ip6af_off; 353 if (i > 0) 354 goto flushfrags; 355 } 356 if (af6 != NULL) { 357 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; 358 if (i > 0) 359 goto flushfrags; 360 } 361 362 insert: 363 /* 364 * Stick new segment in its place; 365 * check for complete reassembly. 366 * Move to front of packet queue, as we are 367 * the most recently active fragmented packet. 368 */ 369 if (paf6 != NULL) 370 LIST_INSERT_AFTER(paf6, ip6af, ip6af_list); 371 else 372 LIST_INSERT_HEAD(&q6->ip6q_asfrag, ip6af, ip6af_list); 373 frag6_nfrags++; 374 q6->ip6q_nfrag++; 375 next = 0; 376 for (paf6 = NULL, af6 = LIST_FIRST(&q6->ip6q_asfrag); 377 af6 != NULL; 378 paf6 = af6, af6 = LIST_NEXT(af6, ip6af_list)) { 379 if (af6->ip6af_off != next) { 380 mtx_leave(&frag6_mutex); 381 return IPPROTO_DONE; 382 } 383 next += af6->ip6af_frglen; 384 } 385 if (paf6->ip6af_mff) { 386 mtx_leave(&frag6_mutex); 387 return IPPROTO_DONE; 388 } 389 390 /* 391 * Reassembly is complete; concatenate fragments. 392 */ 393 ip6af = LIST_FIRST(&q6->ip6q_asfrag); 394 LIST_REMOVE(ip6af, ip6af_list); 395 t = m = ip6af->ip6af_m; 396 while ((af6 = LIST_FIRST(&q6->ip6q_asfrag)) != NULL) { 397 LIST_REMOVE(af6, ip6af_list); 398 while (t->m_next) 399 t = t->m_next; 400 t->m_next = af6->ip6af_m; 401 m_adj(t->m_next, af6->ip6af_offset); 402 m_removehdr(t->m_next); 403 pool_put(&ip6af_pool, af6); 404 } 405 406 /* adjust offset to point where the original next header starts */ 407 offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); 408 pool_put(&ip6af_pool, ip6af); 409 ip6 = mtod(m, struct ip6_hdr *); 410 ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr)); 411 ip6->ip6_src = q6->ip6q_src; 412 ip6->ip6_dst = q6->ip6q_dst; 413 if (q6->ip6q_ecn == IPTOS_ECN_CE) 414 ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); 415 nxt = q6->ip6q_nxt; 416 417 /* Delete frag6 header */ 418 if (frag6_deletefraghdr(m, offset) != 0) { 419 TAILQ_REMOVE(&frag6_queue, q6, ip6q_queue); 420 frag6_nfrags -= q6->ip6q_nfrag; 421 frag6_nfragpackets--; 422 mtx_leave(&frag6_mutex); 423 pool_put(&ip6q_pool, q6); 424 goto dropfrag; 425 } 426 427 TAILQ_REMOVE(&frag6_queue, q6, ip6q_queue); 428 frag6_nfrags -= q6->ip6q_nfrag; 429 frag6_nfragpackets--; 430 431 mtx_leave(&frag6_mutex); 432 433 pool_put(&ip6q_pool, q6); 434 435 m_calchdrlen(m); 436 437 /* 438 * Restore NXT to the original. 439 */ 440 { 441 int prvnxt = ip6_get_prevhdr(m, offset); 442 uint8_t *prvnxtp; 443 444 IP6_EXTHDR_GET(prvnxtp, uint8_t *, m, prvnxt, 445 sizeof(*prvnxtp)); 446 if (prvnxtp == NULL) 447 goto dropfrag; 448 *prvnxtp = nxt; 449 } 450 451 ip6stat_inc(ip6s_reassembled); 452 453 /* 454 * Tell launch routine the next header 455 */ 456 457 *mp = m; 458 *offp = offset; 459 460 return nxt; 461 462 flushfrags: 463 TAILQ_REMOVE(&frag6_queue, q6, ip6q_queue); 464 frag6_nfrags -= q6->ip6q_nfrag; 465 frag6_nfragpackets--; 466 467 mtx_leave(&frag6_mutex); 468 469 pool_put(&ip6af_pool, ip6af); 470 471 while ((af6 = LIST_FIRST(&q6->ip6q_asfrag)) != NULL) { 472 LIST_REMOVE(af6, ip6af_list); 473 m_freem(af6->ip6af_m); 474 pool_put(&ip6af_pool, af6); 475 } 476 ip6stat_add(ip6s_fragdropped, q6->ip6q_nfrag + 1); 477 pool_put(&ip6q_pool, q6); 478 m_freem(m); 479 return IPPROTO_DONE; 480 481 dropfrag: 482 ip6stat_inc(ip6s_fragdropped); 483 m_freem(m); 484 return IPPROTO_DONE; 485 } 486 487 /* 488 * Delete fragment header after the unfragmentable header portions. 489 */ 490 int 491 frag6_deletefraghdr(struct mbuf *m, int offset) 492 { 493 struct mbuf *t; 494 495 if (m->m_len >= offset + sizeof(struct ip6_frag)) { 496 memmove(mtod(m, caddr_t) + sizeof(struct ip6_frag), 497 mtod(m, caddr_t), offset); 498 m->m_data += sizeof(struct ip6_frag); 499 m->m_len -= sizeof(struct ip6_frag); 500 } else { 501 /* this comes with no copy if the boundary is on cluster */ 502 if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) 503 return (ENOBUFS); 504 m_adj(t, sizeof(struct ip6_frag)); 505 m_cat(m, t); 506 } 507 508 return (0); 509 } 510 511 /* 512 * Free a fragment reassembly header and all 513 * associated datagrams. 514 * The header must not be in any queue. 515 */ 516 void 517 frag6_freef(struct ip6q *q6) 518 { 519 struct ip6asfrag *af6; 520 521 while ((af6 = LIST_FIRST(&q6->ip6q_asfrag)) != NULL) { 522 struct mbuf *m = af6->ip6af_m; 523 524 LIST_REMOVE(af6, ip6af_list); 525 526 /* 527 * Return ICMP time exceeded error for the 1st fragment. 528 * Just free other fragments. 529 */ 530 if (af6->ip6af_off == 0) { 531 struct ip6_hdr *ip6; 532 533 /* adjust pointer */ 534 ip6 = mtod(m, struct ip6_hdr *); 535 536 /* restore source and destination addresses */ 537 ip6->ip6_src = q6->ip6q_src; 538 ip6->ip6_dst = q6->ip6q_dst; 539 540 NET_LOCK(); 541 icmp6_error(m, ICMP6_TIME_EXCEEDED, 542 ICMP6_TIME_EXCEED_REASSEMBLY, 0); 543 NET_UNLOCK(); 544 } else 545 m_freem(m); 546 pool_put(&ip6af_pool, af6); 547 } 548 pool_put(&ip6q_pool, q6); 549 } 550 551 /* 552 * Unlinks a fragment reassembly header from the reassembly queue 553 * and inserts it into a given remove queue. 554 */ 555 void 556 frag6_unlink(struct ip6q *q6, struct ip6q_head *rmq6) 557 { 558 MUTEX_ASSERT_LOCKED(&frag6_mutex); 559 560 TAILQ_REMOVE(&frag6_queue, q6, ip6q_queue); 561 TAILQ_INSERT_HEAD(rmq6, q6, ip6q_queue); 562 frag6_nfrags -= q6->ip6q_nfrag; 563 frag6_nfragpackets--; 564 } 565 566 /* 567 * IPv6 reassembling timer processing; 568 * if a timer expires on a reassembly 569 * queue, discard it. 570 */ 571 void 572 frag6_slowtimo(void) 573 { 574 struct ip6q_head rmq6; 575 struct ip6q *q6, *nq6; 576 577 TAILQ_INIT(&rmq6); 578 579 mtx_enter(&frag6_mutex); 580 581 TAILQ_FOREACH_SAFE(q6, &frag6_queue, ip6q_queue, nq6) { 582 if (--q6->ip6q_ttl == 0) { 583 ip6stat_inc(ip6s_fragtimeout); 584 frag6_unlink(q6, &rmq6); 585 } 586 } 587 588 /* 589 * If we are over the maximum number of fragments 590 * (due to the limit being lowered), drain off 591 * enough to get down to the new limit. 592 */ 593 while (frag6_nfragpackets > (u_int)ip6_maxfragpackets && 594 !TAILQ_EMPTY(&frag6_queue)) { 595 ip6stat_inc(ip6s_fragoverflow); 596 frag6_unlink(TAILQ_LAST(&frag6_queue, ip6q_head), &rmq6); 597 } 598 599 mtx_leave(&frag6_mutex); 600 601 while ((q6 = TAILQ_FIRST(&rmq6)) != NULL) { 602 TAILQ_REMOVE(&rmq6, q6, ip6q_queue); 603 frag6_freef(q6); 604 } 605 } 606