1 /* $NetBSD: in_cksum.c,v 1.9 2002/07/29 09:14:36 itojun Exp $ */ 2 3 /* 4 * Copyright (c) 1993 Regents of the University of California. 5 * All rights reserved. 6 * 7 * Permission to use, copy, modify, and distribute this software and its 8 * documentation for any purpose, without fee, and without written agreement is 9 * hereby granted, provided that the above copyright notice and the following 10 * paragraph appears in all copies of this software. 11 * 12 * THIS SOFTWARE IS PROVIDED BY THE REGENTS ``AS IS'' AND ANY EXPRESS OR 13 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 14 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 15 * EVENT SHALL THE REGENTS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 16 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 18 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 19 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 20 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 /* 25 * ccsum.c - Highly optimized MIPS checksum function. 26 * by Jonathan Kay, Computer Systems Lab, UCSD 4/2/93 27 * 28 * Version 2.0 29 * Techniques and credits: 30 * Basic algorithm is 3-instruction inner loop sum by Peter Desnoyers. 31 * Full word-size reading as described in Usenix W'93 paper. 32 * Pipelined latency absoption technique as described in paper. 33 * Unrolling chosen through testing and examination of actual workload. 34 * Rewrite in 'C' without loss of performance suggested by Vernon Schryver. 35 * 15% faster than version 1 ("Usenix version"). 36 * 150% faster than Ultrix 4.2A checksum routine. 37 * 38 * BSD changes: Jonathan Stone, Stanford Distributed Systems Group, 1997-08-11 39 * 40 * re-written for incremental checksumming of BSD mbufs 41 * and byteswap out-of-phase mbuf sums. 42 */ 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/mbuf.h> 47 #include <sys/cdefs.h> 48 49 #include <netinet/in_systm.h> 50 #include <netinet/in.h> 51 #include <netinet/ip.h> 52 #include <netinet/ip_var.h> 53 54 #include <machine/endian.h> 55 56 union memptr { 57 unsigned int *i; 58 unsigned long *l; 59 unsigned long u; 60 unsigned short *s; 61 unsigned char *c; 62 }; 63 64 static inline uint32_t fastsum(union memptr, int, unsigned int, int); 65 66 /* 67 * Compute 1's complement sum over a contiguous block at 'buf' for 'n' bytes. 68 * 69 * Add the resulting checksum into 'oldsum' using 1's complement. 70 * 'odd_aligned' is a boolean which if set, indicate the data in 'buf' 71 * starts at an odd byte alignment within the containing packet, 72 * and so we must byteswap the memory-aligned 1's-complement sum 73 * over the data before adding it to `oldsum'. 74 */ 75 static inline uint32_t 76 fastsum(union memptr buf, int n, unsigned int oldsum, int odd_aligned) 77 { 78 unsigned long hilo = 0, high = 0; 79 unsigned long w0, w1; 80 unsigned int sum = 0; 81 82 /* Align to 32 bits. */ 83 if (buf.u & 0x3) { 84 /* Skip to the end for very small mbufs */ 85 if (n < 3) 86 goto verylittleleft; 87 88 /* 89 * 16-bit-align. 90 * If buf is odd-byte-aligned, add the byte and toggle 91 * our byte-alignment flag. 92 * If we were odd-aligned on entry, an odd-aligned 93 * byte makes a 16-bit word with the previous odd byte, 94 * unaligned, making us aligned again. 95 * If we were not already odd-aligned, we are now, 96 * and we must byteswap our 16-bit-aligned sum of 97 *'buf' before accumulating it. 98 */ 99 if (buf.u & 0x1) { 100 #if BYTE_ORDER == BIG_ENDIAN 101 sum += *(buf.c++); 102 #else 103 sum += (*(buf.c++) << 8); 104 #endif 105 n -= 1; 106 odd_aligned = !odd_aligned; 107 } 108 109 /* 32-bit-align */ 110 if (buf.u & 0x2) { 111 sum += *(buf.s++); 112 n -= 2; 113 } 114 } 115 116 /* 32-bit-aligned sum. 117 Peter Desnoyers' unbelievable 3-instruction main loop. */ 118 if (n < 64 + 8) 119 goto notmuchleft; 120 w0 = buf.l[0]; 121 w1 = buf.l[1]; 122 do { 123 hilo += w0; 124 high += w0 >> 16; 125 w0 = buf.l[2]; 126 127 hilo += w1; 128 high += w1 >> 16; 129 w1 = buf.l[3]; 130 131 hilo += w0; 132 high += w0 >> 16; 133 w0 = buf.l[4]; 134 135 hilo += w1; 136 high += w1 >> 16; 137 w1 = buf.l[5]; 138 139 hilo += w0; 140 high += w0 >> 16; 141 w0 = buf.l[6]; 142 143 hilo += w1; 144 high += w1 >> 16; 145 w1 = buf.l[7]; 146 147 hilo += w0; 148 high += w0 >> 16; 149 w0 = buf.l[8]; 150 151 hilo += w1; 152 high += w1 >> 16; 153 w1 = buf.l[9]; 154 155 156 hilo += w0; 157 high += w0 >> 16; 158 w0 = buf.l[10]; 159 160 hilo += w1; 161 high += w1 >> 16; 162 w1 = buf.l[11]; 163 164 hilo += w0; 165 high += w0 >> 16; 166 w0 = buf.l[12]; 167 168 hilo += w1; 169 high += w1 >> 16; 170 w1 = buf.l[13]; 171 172 hilo += w0; 173 high += w0 >> 16; 174 w0 = buf.l[14]; 175 176 hilo += w1; 177 high += w1 >> 16; 178 w1 = buf.l[15]; 179 180 hilo += w0; 181 high += w0 >> 16; 182 w0 = buf.l[16]; 183 184 hilo += w1; 185 high += w1 >> 16; 186 w1 = buf.l[17]; 187 188 189 n -= 64; 190 buf.c += 64; 191 192 } while (n >= 64 + 8); 193 hilo -= (high << 16); 194 sum += hilo; 195 sum += high; 196 197 notmuchleft: 198 high = hilo = 0; 199 while (n >= 4) { 200 w0 = *(buf.l++); 201 hilo += w0; 202 high += w0 >> 16; 203 n -= 4; 204 } 205 hilo -= (high << 16); 206 sum += hilo; 207 sum += high; 208 209 while (n > 1) { 210 n -= sizeof(*buf.s); 211 sum += *(buf.s++); 212 } 213 214 verylittleleft: 215 /* handle trailing byte and short (possibly) unaligned payloads */ 216 while (n-- > 0) { 217 #if BYTE_ORDER == BIG_ENDIAN 218 sum += *buf.c << 8; 219 #else 220 sum += *buf.c; 221 #endif 222 } 223 224 /* 225 * compensate for a trailing byte in previous mbuf 226 * by byteswapping the memory-aligned sum of this mbuf. 227 */ 228 if (odd_aligned) { 229 sum = (sum & 0xffff) + (sum >> 16); 230 sum = (sum & 0xffff) + (sum >> 16); 231 sum = oldsum + ((sum >> 8) & 0xff) + ((sum & 0xff) << 8); 232 } else { 233 /* add upper and lower halfwords together to get full sum */ 234 sum = oldsum + sum; 235 sum = (sum & 0xffff) + (sum >> 16); 236 } 237 238 /* fold carry from combining sums */ 239 sum = (sum & 0xffff) + (sum >> 16); 240 return(sum); 241 } 242 243 /* 244 * Checksum routine for Internet Protocol family headers. 245 * 246 */ 247 static inline int 248 in_cksum_internal(struct mbuf *m, int off, int len, uint32_t sum) 249 { 250 /*u_short **/ union memptr w; 251 int mlen; 252 int odd_aligned = 0; 253 254 for (; m && len; m = m->m_next) { 255 if (m->m_len == 0) 256 continue; 257 w.c = mtod(m, u_char *) + off; 258 mlen = m->m_len - off; 259 off = 0; 260 if (len < mlen) 261 mlen = len; 262 len -= mlen; 263 264 sum = fastsum(w, mlen, sum, odd_aligned); 265 odd_aligned = (odd_aligned + mlen) & 0x01; 266 } 267 if (len != 0) { 268 printf("cksum: out of data, %d\n", len); 269 } 270 return (~sum & 0xffff); 271 } 272 273 int 274 in_cksum(struct mbuf *m, int len) 275 { 276 277 return (in_cksum_internal(m, 0, len, 0)); 278 } 279 280 int 281 in4_cksum(struct mbuf *m, uint8_t nxt, int off, int len) 282 { 283 uint sum = 0; 284 285 if (nxt != 0) { 286 uint16_t *w; 287 union { 288 struct ipovly ipov; 289 u_int16_t w[10]; 290 } u; 291 292 /* pseudo header */ 293 memset(&u.ipov, 0, sizeof(u.ipov)); 294 u.ipov.ih_len = htons(len); 295 u.ipov.ih_pr = nxt; 296 u.ipov.ih_src = mtod(m, struct ip *)->ip_src; 297 u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst; 298 w = u.w; 299 /* assumes sizeof(ipov) == 20 */ 300 sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; 301 sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9]; 302 } 303 304 /* skip unnecessary part */ 305 while (m && off > 0) { 306 if (m->m_len > off) 307 break; 308 off -= m->m_len; 309 m = m->m_next; 310 } 311 312 return (in_cksum_internal(m, off, len, sum)); 313 } 314