1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990 The Regents of the University of California. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * from tahoe: in_cksum.c 1.2 86/01/05 32 * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/mbuf.h> 41 42 #include <netinet/in.h> 43 #include <netinet/in_systm.h> 44 #include <netinet/ip.h> 45 46 #include <machine/in_cksum.h> 47 48 /* 49 * Checksum routine for Internet Protocol family headers. 50 * 51 * This routine is very heavily used in the network 52 * code and should be modified for each CPU to be as fast as possible. 53 * 54 * This implementation is 386 version. 55 */ 56 57 #undef ADDCARRY 58 #define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff 59 #define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);} 60 61 /* 62 * These asm statements require __volatile because they pass information 63 * via the condition codes. GCC does not currently provide a way to specify 64 * the condition codes as an input or output operand. 65 * 66 * The LOAD macro below is effectively a prefetch into cache. GCC will 67 * load the value into a register but will not use it. Since modern CPUs 68 * reorder operations, this will generally take place in parallel with 69 * other calculations. 70 */ 71 u_short 72 in_cksum_skip(struct mbuf *m, int len, int skip) 73 { 74 u_short *w; 75 unsigned sum = 0; 76 int mlen = 0; 77 int byte_swapped = 0; 78 union { char c[2]; u_short s; } su; 79 80 len -= skip; 81 for (; skip && m; m = m->m_next) { 82 if (m->m_len > skip) { 83 mlen = m->m_len - skip; 84 w = (u_short *)(mtod(m, u_char *) + skip); 85 goto skip_start; 86 } else { 87 skip -= m->m_len; 88 } 89 } 90 91 for (;m && len; m = m->m_next) { 92 if (m->m_len == 0) 93 continue; 94 w = mtod(m, u_short *); 95 if (mlen == -1) { 96 /* 97 * The first byte of this mbuf is the continuation 98 * of a word spanning between this mbuf and the 99 * last mbuf. 100 */ 101 102 /* su.c[0] is already saved when scanning previous 103 * mbuf. sum was REDUCEd when we found mlen == -1 104 */ 105 su.c[1] = *(u_char *)w; 106 sum += su.s; 107 w = (u_short *)((char *)w + 1); 108 mlen = m->m_len - 1; 109 len--; 110 } else 111 mlen = m->m_len; 112 skip_start: 113 if (len < mlen) 114 mlen = len; 115 len -= mlen; 116 /* 117 * Force to long boundary so we do longword aligned 118 * memory operations 119 */ 120 if (3 & (int) w) { 121 REDUCE; 122 if ((1 & (int) w) && (mlen > 0)) { 123 sum <<= 8; 124 su.c[0] = *(char *)w; 125 w = (u_short *)((char *)w + 1); 126 mlen--; 127 byte_swapped = 1; 128 } 129 if ((2 & (int) w) && (mlen >= 2)) { 130 sum += *w++; 131 mlen -= 2; 132 } 133 } 134 /* 135 * Advance to a 486 cache line boundary. 136 */ 137 if (4 & (int) w && mlen >= 4) { 138 __asm __volatile ( 139 "addl %1, %0\n" 140 "adcl $0, %0" 141 : "+r" (sum) 142 : "g" (((const u_int32_t *)w)[0]) 143 ); 144 w += 2; 145 mlen -= 4; 146 } 147 if (8 & (int) w && mlen >= 8) { 148 __asm __volatile ( 149 "addl %1, %0\n" 150 "adcl %2, %0\n" 151 "adcl $0, %0" 152 : "+r" (sum) 153 : "g" (((const u_int32_t *)w)[0]), 154 "g" (((const u_int32_t *)w)[1]) 155 ); 156 w += 4; 157 mlen -= 8; 158 } 159 /* 160 * Do as much of the checksum as possible 32 bits at at time. 161 * In fact, this loop is unrolled to make overhead from 162 * branches &c small. 163 */ 164 mlen -= 1; 165 while ((mlen -= 32) >= 0) { 166 /* 167 * Add with carry 16 words and fold in the last 168 * carry by adding a 0 with carry. 169 * 170 * The early ADD(16) and the LOAD(32) are to load 171 * the next 2 cache lines in advance on 486's. The 172 * 486 has a penalty of 2 clock cycles for loading 173 * a cache line, plus whatever time the external 174 * memory takes to load the first word(s) addressed. 175 * These penalties are unavoidable. Subsequent 176 * accesses to a cache line being loaded (and to 177 * other external memory?) are delayed until the 178 * whole load finishes. These penalties are mostly 179 * avoided by not accessing external memory for 180 * 8 cycles after the ADD(16) and 12 cycles after 181 * the LOAD(32). The loop terminates when mlen 182 * is initially 33 (not 32) to guaranteed that 183 * the LOAD(32) is within bounds. 184 */ 185 __asm __volatile ( 186 "addl %1, %0\n" 187 "adcl %2, %0\n" 188 "adcl %3, %0\n" 189 "adcl %4, %0\n" 190 "adcl %5, %0\n" 191 "mov %6, %%eax\n" 192 "adcl %7, %0\n" 193 "adcl %8, %0\n" 194 "adcl %9, %0\n" 195 "adcl $0, %0" 196 : "+r" (sum) 197 : "g" (((const u_int32_t *)w)[4]), 198 "g" (((const u_int32_t *)w)[0]), 199 "g" (((const u_int32_t *)w)[1]), 200 "g" (((const u_int32_t *)w)[2]), 201 "g" (((const u_int32_t *)w)[3]), 202 "g" (((const u_int32_t *)w)[8]), 203 "g" (((const u_int32_t *)w)[5]), 204 "g" (((const u_int32_t *)w)[6]), 205 "g" (((const u_int32_t *)w)[7]) 206 : "eax" 207 ); 208 w += 16; 209 } 210 mlen += 32 + 1; 211 if (mlen >= 32) { 212 __asm __volatile ( 213 "addl %1, %0\n" 214 "adcl %2, %0\n" 215 "adcl %3, %0\n" 216 "adcl %4, %0\n" 217 "adcl %5, %0\n" 218 "adcl %6, %0\n" 219 "adcl %7, %0\n" 220 "adcl %8, %0\n" 221 "adcl $0, %0" 222 : "+r" (sum) 223 : "g" (((const u_int32_t *)w)[4]), 224 "g" (((const u_int32_t *)w)[0]), 225 "g" (((const u_int32_t *)w)[1]), 226 "g" (((const u_int32_t *)w)[2]), 227 "g" (((const u_int32_t *)w)[3]), 228 "g" (((const u_int32_t *)w)[5]), 229 "g" (((const u_int32_t *)w)[6]), 230 "g" (((const u_int32_t *)w)[7]) 231 ); 232 w += 16; 233 mlen -= 32; 234 } 235 if (mlen >= 16) { 236 __asm __volatile ( 237 "addl %1, %0\n" 238 "adcl %2, %0\n" 239 "adcl %3, %0\n" 240 "adcl %4, %0\n" 241 "adcl $0, %0" 242 : "+r" (sum) 243 : "g" (((const u_int32_t *)w)[0]), 244 "g" (((const u_int32_t *)w)[1]), 245 "g" (((const u_int32_t *)w)[2]), 246 "g" (((const u_int32_t *)w)[3]) 247 ); 248 w += 8; 249 mlen -= 16; 250 } 251 if (mlen >= 8) { 252 __asm __volatile ( 253 "addl %1, %0\n" 254 "adcl %2, %0\n" 255 "adcl $0, %0" 256 : "+r" (sum) 257 : "g" (((const u_int32_t *)w)[0]), 258 "g" (((const u_int32_t *)w)[1]) 259 ); 260 w += 4; 261 mlen -= 8; 262 } 263 if (mlen == 0 && byte_swapped == 0) 264 continue; /* worth 1% maybe ?? */ 265 REDUCE; 266 while ((mlen -= 2) >= 0) { 267 sum += *w++; 268 } 269 if (byte_swapped) { 270 sum <<= 8; 271 byte_swapped = 0; 272 if (mlen == -1) { 273 su.c[1] = *(char *)w; 274 sum += su.s; 275 mlen = 0; 276 } else 277 mlen = -1; 278 } else if (mlen == -1) 279 /* 280 * This mbuf has odd number of bytes. 281 * There could be a word split between 282 * this mbuf and the next mbuf. 283 * Save the last byte (to prepend to next mbuf). 284 */ 285 su.c[0] = *(char *)w; 286 } 287 288 if (len) 289 printf("%s: out of data by %d\n", __func__, len); 290 if (mlen == -1) { 291 /* The last mbuf has odd # of bytes. Follow the 292 standard (the odd byte is shifted left by 8 bits) */ 293 su.c[1] = 0; 294 sum += su.s; 295 } 296 REDUCE; 297 return (~sum & 0xffff); 298 } 299