1 /* $NetBSD: in_cksum.c,v 1.14 2010/09/18 16:43:50 tsutsui Exp $ */ 2 3 /* 4 * Copyright (c) 1993 Regents of the University of California. 5 * All rights reserved. 6 * 7 * Permission to use, copy, modify, and distribute this software and its 8 * documentation for any purpose, without fee, and without written agreement is 9 * hereby granted, provided that the above copyright notice and the following 10 * paragraph appears in all copies of this software. 11 * 12 * THIS SOFTWARE IS PROVIDED BY THE REGENTS ``AS IS'' AND ANY EXPRESS OR 13 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 14 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 15 * EVENT SHALL THE REGENTS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 16 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 18 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 19 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 20 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 /* 25 * ccsum.c - Highly optimized MIPS checksum function. 26 * by Jonathan Kay, Computer Systems Lab, UCSD 4/2/93 27 * 28 * Version 2.0 29 * Techniques and credits: 30 * Basic algorithm is 3-instruction inner loop sum by Peter Desnoyers. 31 * Full word-size reading as described in Usenix W'93 paper. 32 * Pipelined latency absoption technique as described in paper. 33 * Unrolling chosen through testing and examination of actual workload. 34 * Rewrite in 'C' without loss of performance suggested by Vernon Schryver. 35 * 15% faster than version 1 ("Usenix version"). 36 * 150% faster than Ultrix 4.2A checksum routine. 37 * 38 * BSD changes: Jonathan Stone, Stanford Distributed Systems Group, 1997-08-11 39 * 40 * re-written for incremental checksumming of BSD mbufs 41 * and byteswap out-of-phase mbuf sums. 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: in_cksum.c,v 1.14 2010/09/18 16:43:50 tsutsui Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/mbuf.h> 50 51 #include <netinet/in_systm.h> 52 #include <netinet/in.h> 53 #include <netinet/ip.h> 54 #include <netinet/ip_var.h> 55 56 #include <machine/endian.h> 57 58 union memptr { 59 uint32_t *l; 60 uintptr_t u; 61 uint16_t *s; 62 uint8_t *c; 63 }; 64 65 static inline uint32_t fastsum(union memptr, int, unsigned int, int); 66 67 /* 68 * Compute 1's complement sum over a contiguous block at 'buf' for 'n' bytes. 69 * 70 * Add the resulting checksum into 'oldsum' using 1's complement. 71 * 'odd_aligned' is a boolean which if set, indicate the data in 'buf' 72 * starts at an odd byte alignment within the containing packet, 73 * and so we must byteswap the memory-aligned 1's-complement sum 74 * over the data before adding it to `oldsum'. 75 */ 76 static inline uint32_t 77 fastsum(union memptr buf, int n, unsigned int oldsum, int odd_aligned) 78 { 79 unsigned long hilo = 0, high = 0; 80 unsigned long w0, w1; 81 unsigned int sum = 0; 82 83 /* Align to 32 bits. */ 84 if (buf.u & 0x3) { 85 /* 86 * 16-bit-align. 87 * If buf is odd-byte-aligned, add the byte and toggle 88 * our byte-alignment flag. 89 * If we were odd-aligned on entry, an odd-aligned 90 * byte makes a 16-bit word with the previous odd byte, 91 * unaligned, making us aligned again. 92 * If we were not already odd-aligned, we are now, 93 * and we must byteswap our 16-bit-aligned sum of 94 *'buf' before accumulating it. 95 */ 96 if (buf.u & 0x1) { 97 #if BYTE_ORDER == BIG_ENDIAN 98 sum += *(buf.c++); 99 #else 100 sum += (*(buf.c++) << 8); 101 #endif 102 n -= 1; 103 odd_aligned = !odd_aligned; 104 } 105 /* Skip to the end for very small mbufs */ 106 if (n <= 2) 107 goto postunaligned; 108 109 /* 32-bit-align */ 110 if (buf.u & 0x2) { 111 sum += *(buf.s++); 112 n -= 2; 113 } 114 } 115 116 /* 32-bit-aligned sum. 117 Peter Desnoyers' unbelievable 3-instruction main loop. */ 118 if (n < 64 + 8) 119 goto notmuchleft; 120 w0 = buf.l[0]; 121 w1 = buf.l[1]; 122 do { 123 hilo += w0; 124 high += w0 >> 16; 125 w0 = buf.l[2]; 126 127 hilo += w1; 128 high += w1 >> 16; 129 w1 = buf.l[3]; 130 131 hilo += w0; 132 high += w0 >> 16; 133 w0 = buf.l[4]; 134 135 hilo += w1; 136 high += w1 >> 16; 137 w1 = buf.l[5]; 138 139 hilo += w0; 140 high += w0 >> 16; 141 w0 = buf.l[6]; 142 143 hilo += w1; 144 high += w1 >> 16; 145 w1 = buf.l[7]; 146 147 hilo += w0; 148 high += w0 >> 16; 149 w0 = buf.l[8]; 150 151 hilo += w1; 152 high += w1 >> 16; 153 w1 = buf.l[9]; 154 155 156 hilo += w0; 157 high += w0 >> 16; 158 w0 = buf.l[10]; 159 160 hilo += w1; 161 high += w1 >> 16; 162 w1 = buf.l[11]; 163 164 hilo += w0; 165 high += w0 >> 16; 166 w0 = buf.l[12]; 167 168 hilo += w1; 169 high += w1 >> 16; 170 w1 = buf.l[13]; 171 172 hilo += w0; 173 high += w0 >> 16; 174 w0 = buf.l[14]; 175 176 hilo += w1; 177 high += w1 >> 16; 178 w1 = buf.l[15]; 179 180 hilo += w0; 181 high += w0 >> 16; 182 w0 = buf.l[16]; 183 184 hilo += w1; 185 high += w1 >> 16; 186 w1 = buf.l[17]; 187 188 189 n -= 64; 190 buf.c += 64; 191 192 } while (n >= 64 + 8); 193 hilo -= (high << 16); 194 sum += hilo; 195 sum += high; 196 197 notmuchleft: 198 high = hilo = 0; 199 while (n >= sizeof(uint32_t)) { 200 w0 = *(buf.l++); 201 hilo += w0; 202 high += w0 >> 16; 203 n -= 4; 204 } 205 hilo -= (high << 16); 206 sum += hilo; 207 sum += high; 208 209 postunaligned: 210 /* handle post 32bit unaligned payloads */ 211 if (n >= sizeof(uint16_t)) { 212 sum += *(buf.s++); 213 n -= sizeof(uint16_t); 214 } 215 216 /* handle a trailing odd byte */ 217 if (n > 0) { 218 #if BYTE_ORDER == BIG_ENDIAN 219 sum += *(buf.c++) << 8; 220 #else 221 sum += *(buf.c++); 222 #endif 223 n = 0; 224 } 225 226 /* 227 * compensate for a trailing byte in previous mbuf 228 * by byteswapping the memory-aligned sum of this mbuf. 229 */ 230 if (odd_aligned) { 231 sum = (sum & 0xffff) + (sum >> 16); 232 sum = (sum & 0xffff) + (sum >> 16); 233 sum = oldsum + ((sum >> 8) & 0xff) + ((sum & 0xff) << 8); 234 } else { 235 /* add upper and lower halfwords together to get full sum */ 236 sum = oldsum + sum; 237 sum = (sum & 0xffff) + (sum >> 16); 238 } 239 240 /* fold carry from combining sums */ 241 sum = (sum & 0xffff) + (sum >> 16); 242 return(sum); 243 } 244 245 /* 246 * Checksum routine for Internet Protocol family headers. 247 * 248 */ 249 static inline int 250 in_cksum_internal(struct mbuf *m, int off, int len, uint32_t sum) 251 { 252 /*u_short **/ union memptr w; 253 int mlen; 254 int odd_aligned = 0; 255 256 for (; m && len; m = m->m_next) { 257 if (m->m_len == 0) 258 continue; 259 w.c = mtod(m, u_char *) + off; 260 mlen = m->m_len - off; 261 off = 0; 262 if (len < mlen) 263 mlen = len; 264 len -= mlen; 265 266 sum = fastsum(w, mlen, sum, odd_aligned); 267 odd_aligned = (odd_aligned + mlen) & 0x01; 268 } 269 if (len != 0) { 270 printf("cksum: out of data, %d\n", len); 271 } 272 return (~sum & 0xffff); 273 } 274 275 int 276 in_cksum(struct mbuf *m, int len) 277 { 278 279 return (in_cksum_internal(m, 0, len, 0)); 280 } 281 282 int 283 in4_cksum(struct mbuf *m, uint8_t nxt, int off, int len) 284 { 285 uint sum = 0; 286 287 if (nxt != 0) { 288 uint16_t *w; 289 union { 290 struct ipovly ipov; 291 u_int16_t w[10]; 292 } u; 293 294 /* pseudo header */ 295 memset(&u.ipov, 0, sizeof(u.ipov)); 296 u.ipov.ih_len = htons(len); 297 u.ipov.ih_pr = nxt; 298 u.ipov.ih_src = mtod(m, struct ip *)->ip_src; 299 u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst; 300 w = u.w; 301 /* assumes sizeof(ipov) == 20 */ 302 sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; 303 sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9]; 304 } 305 306 /* skip unnecessary part */ 307 while (m && off > 0) { 308 if (m->m_len > off) 309 break; 310 off -= m->m_len; 311 m = m->m_next; 312 } 313 314 return (in_cksum_internal(m, off, len, sum)); 315 } 316