1 /* $NetBSD: in_cksum.c,v 1.7 2002/03/05 14:15:31 simonb Exp $ */ 2 3 /* 4 * Copyright (c) 1993 Regents of the University of California. 5 * All rights reserved. 6 * 7 * Permission to use, copy, modify, and distribute this software and its 8 * documentation for any purpose, without fee, and without written agreement is 9 * hereby granted, provided that the above copyright notice and the following 10 * paragraph appears in all copies of this software. 11 * 12 * THIS SOFTWARE IS PROVIDED BY THE REGENTS ``AS IS'' AND ANY EXPRESS OR 13 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 14 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 15 * EVENT SHALL THE REGENTS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 16 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 18 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 19 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 20 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 /* 25 * ccsum.c - Highly optimized MIPS checksum function. 26 * by Jonathan Kay, Computer Systems Lab, UCSD 4/2/93 27 * 28 * Version 2.0 29 * Techniques and credits: 30 * Basic algorithm is 3-instruction inner loop sum by Peter Desnoyers. 31 * Full word-size reading as described in Usenix W'93 paper. 32 * Pipelined latency absoption technique as described in paper. 33 * Unrolling chosen through testing and examination of actual workload. 34 * Rewrite in 'C' without loss of performance suggested by Vernon Schryver. 35 * 15% faster than version 1 ("Usenix version"). 36 * 150% faster than Ultrix 4.2A checksum routine. 37 * 38 * BSD changes: Jonathan Stone, Stanford Distributed Systems Group, 1997-08-11 39 * 40 * re-written for incremental checksumming of BSD mbufs 41 * and byteswap out-of-phase mbuf sums. 42 */ 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/mbuf.h> 47 #include <sys/cdefs.h> 48 #include <netinet/in.h> 49 #include <machine/endian.h> 50 51 union memptr { 52 unsigned int *i; 53 unsigned long *l; 54 unsigned long u; 55 unsigned short *s; 56 unsigned char *c; 57 }; 58 59 static __inline u_int32_t fastsum(union memptr, int, u_int, int); 60 61 62 /* 63 * Compute 1's complement sum over a contiguous block at 'buf' for 'n' bytes. 64 * 65 * Add the resulting checksum into 'oldsum' using 1's complement. 66 * 'odd_aligned' is a boolean which if set, indicate the data in 'buf' 67 * starts at an odd byte alignment within the containing packet, 68 * and so we must byteswap the memory-aligned 1's-complement sum 69 * over the data before adding it to `oldsum'. 70 */ 71 u_int32_t 72 fastsum(union memptr buf, int n, unsigned int oldsum, int odd_aligned) 73 { 74 unsigned long hilo = 0, high = 0; 75 unsigned long w0, w1; 76 unsigned int sum = 0; 77 78 /* Align to 32 bits. */ 79 if (buf.u & 0x3) { 80 /* Skip to the end for very small mbufs */ 81 if (n < 3) 82 goto verylittleleft; 83 84 /* 85 * 16-bit-align. 86 * If buf is odd-byte-aligned, add the byte and toggle 87 * our byte-alignment flag. 88 * If we were odd-aligned on entry, an odd-aligned 89 * byte makes a 16-bit word with the previous odd byte, 90 * unaligned, making us aligned again. 91 * If we were not already odd-aligned, we are now, 92 * and we must byteswap our 16-bit-aligned sum of 93 *'buf' before accumulating it. 94 */ 95 if (buf.u & 0x1) { 96 #if BYTE_ORDER == BIG_ENDIAN 97 sum += *(buf.c++); 98 #else 99 sum += (*(buf.c++) << 8); 100 #endif 101 n -= 1; 102 odd_aligned = !odd_aligned; 103 } 104 105 /* 32-bit-align */ 106 if (buf.u & 0x2) { 107 sum += *(buf.s++); 108 n -= 2; 109 } 110 } 111 112 /* 32-bit-aligned sum. 113 Peter Desnoyers' unbelievable 3-instruction main loop. */ 114 if (n < 64 + 8) 115 goto notmuchleft; 116 w0 = buf.l[0]; 117 w1 = buf.l[1]; 118 do { 119 hilo += w0; 120 high += w0 >> 16; 121 w0 = buf.l[2]; 122 123 hilo += w1; 124 high += w1 >> 16; 125 w1 = buf.l[3]; 126 127 hilo += w0; 128 high += w0 >> 16; 129 w0 = buf.l[4]; 130 131 hilo += w1; 132 high += w1 >> 16; 133 w1 = buf.l[5]; 134 135 hilo += w0; 136 high += w0 >> 16; 137 w0 = buf.l[6]; 138 139 hilo += w1; 140 high += w1 >> 16; 141 w1 = buf.l[7]; 142 143 hilo += w0; 144 high += w0 >> 16; 145 w0 = buf.l[8]; 146 147 hilo += w1; 148 high += w1 >> 16; 149 w1 = buf.l[9]; 150 151 152 hilo += w0; 153 high += w0 >> 16; 154 w0 = buf.l[10]; 155 156 hilo += w1; 157 high += w1 >> 16; 158 w1 = buf.l[11]; 159 160 hilo += w0; 161 high += w0 >> 16; 162 w0 = buf.l[12]; 163 164 hilo += w1; 165 high += w1 >> 16; 166 w1 = buf.l[13]; 167 168 hilo += w0; 169 high += w0 >> 16; 170 w0 = buf.l[14]; 171 172 hilo += w1; 173 high += w1 >> 16; 174 w1 = buf.l[15]; 175 176 hilo += w0; 177 high += w0 >> 16; 178 w0 = buf.l[16]; 179 180 hilo += w1; 181 high += w1 >> 16; 182 w1 = buf.l[17]; 183 184 185 n -= 64; 186 buf.c += 64; 187 188 } while (n >= 64 + 8); 189 hilo -= (high << 16); 190 sum += hilo; 191 sum += high; 192 193 notmuchleft: 194 high = hilo = 0; 195 while (n >= 4) { 196 w0 = *(buf.l++); 197 hilo += w0; 198 high += w0 >> 16; 199 n -= 4; 200 } 201 hilo -= (high << 16); 202 sum += hilo; 203 sum += high; 204 205 while (n > 1) { 206 n -= sizeof(*buf.s); 207 sum += *(buf.s++); 208 } 209 210 verylittleleft: 211 /* handle trailing byte and short (possibly) unaligned payloads */ 212 while (n-- > 0) { 213 #if BYTE_ORDER == BIG_ENDIAN 214 sum += *buf.c << 8; 215 #else 216 sum += *buf.c; 217 #endif 218 } 219 220 /* 221 * compensate for a trailing byte in previous mbuf 222 * by byteswapping the memory-aligned sum of this mbuf. 223 */ 224 if (odd_aligned) { 225 sum = (sum & 0xffff) + (sum >> 16); 226 sum = (sum & 0xffff) + (sum >> 16); 227 sum = oldsum + ((sum >> 8) & 0xff) + ((sum & 0xff) << 8); 228 } else { 229 /* add upper and lower halfwords together to get full sum */ 230 sum = oldsum + sum; 231 sum = (sum & 0xffff) + (sum >> 16); 232 } 233 234 /* fold carry from combining sums */ 235 sum = (sum & 0xffff) + (sum >> 16); 236 return(sum); 237 } 238 239 240 /* 241 * Checksum routine for Internet Protocol family headers (mips r3000 Version). 242 * 243 */ 244 int 245 in_cksum(struct mbuf *m, int len) 246 { 247 /*u_short **/ union memptr w; 248 u_int32_t sum = 0; 249 int mlen; 250 int odd_aligned = 0; 251 252 for ( ; m && len; m = m->m_next) { 253 254 mlen = m->m_len; 255 if (mlen == 0) 256 continue; 257 if (mlen > len) 258 mlen = len; 259 w.c = mtod(m, u_char *); 260 sum = fastsum(w, mlen, sum, odd_aligned); 261 len -= mlen; 262 odd_aligned = (odd_aligned + mlen) & 0x01; 263 } 264 if (len != 0) { 265 printf("in_cksum: out of data, %d\n", len); 266 } 267 return (~sum & 0xffff); 268 } 269