xref: /netbsd/sys/arch/mips/mips/in_cksum.c (revision bf9ec67e)
1 /*	$NetBSD: in_cksum.c,v 1.7 2002/03/05 14:15:31 simonb Exp $	*/
2 
3 /*
4  * Copyright (c) 1993 Regents of the University of California.
5  * All rights reserved.
6  *
7  * Permission to use, copy, modify, and distribute this software and its
8  * documentation for any purpose, without fee, and without written agreement is
9  * hereby granted, provided that the above copyright notice and the following
10  * paragraph appears in all copies of this software.
11  *
12  * THIS SOFTWARE IS PROVIDED BY THE REGENTS ``AS IS'' AND ANY EXPRESS OR
13  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
15  * EVENT SHALL THE REGENTS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
16  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
17  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
18  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
19  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
20  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
21  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23 
24 /*
25  * ccsum.c - Highly optimized MIPS checksum function.
26  * by Jonathan Kay, Computer Systems Lab, UCSD         4/2/93
27  *
28  * Version 2.0
29  * Techniques and credits:
30  *   Basic algorithm is 3-instruction inner loop sum by Peter Desnoyers.
31  *   Full word-size reading as described in Usenix W'93 paper.
32  *   Pipelined latency absoption technique as described in paper.
33  *   Unrolling chosen through testing and examination of actual workload.
34  *   Rewrite in 'C' without loss of performance suggested by Vernon Schryver.
35  *   15% faster than version 1 ("Usenix version").
36  *   150% faster than Ultrix 4.2A checksum routine.
37  *
38  * BSD changes: Jonathan Stone, Stanford Distributed Systems Group, 1997-08-11
39  *
40  *   re-written for incremental checksumming of BSD mbufs
41  *   and byteswap out-of-phase mbuf sums.
42  */
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/mbuf.h>
47 #include <sys/cdefs.h>
48 #include <netinet/in.h>
49 #include <machine/endian.h>
50 
51 union memptr {
52 	unsigned int *i;
53 	unsigned long *l;
54 	unsigned long u;
55 	unsigned short *s;
56 	unsigned char *c;
57 };
58 
59 static __inline u_int32_t fastsum(union memptr, int, u_int, int);
60 
61 
62 /*
63  * Compute 1's complement sum over a contiguous block at 'buf' for 'n' bytes.
64  *
65  * Add the resulting checksum into 'oldsum' using 1's complement.
66  * 'odd_aligned' is a boolean which if set, indicate the data in 'buf'
67  * starts at an odd byte alignment within the containing packet,
68  * and so we must byteswap the memory-aligned 1's-complement sum
69  * over the data before adding it to `oldsum'.
70  */
71 u_int32_t
72 fastsum(union memptr buf, int n, unsigned int oldsum, int odd_aligned)
73 {
74 	unsigned long hilo = 0, high = 0;
75 	unsigned long w0, w1;
76 	unsigned int sum = 0;
77 
78 	/* Align to 32 bits. */
79 	if (buf.u & 0x3) {
80 		/* Skip to the end for very small mbufs */
81 		if (n < 3)
82 			goto verylittleleft;
83 
84 		/*
85 	         * 16-bit-align.
86 		 * If buf is odd-byte-aligned, add the byte and toggle
87 		 * our byte-alignment flag.
88 		 *     If we were odd-aligned on entry, an odd-aligned
89 		 * byte  makes a 16-bit word with the previous odd byte,
90 		 * unaligned, making us aligned again.
91 	 	 *     If we were not already odd-aligned, we are now,
92 		 * and we must byteswap our 16-bit-aligned sum of
93 		 *'buf' before accumulating it.
94 		 */
95 		if (buf.u & 0x1) {
96 #if BYTE_ORDER == BIG_ENDIAN
97 			sum += *(buf.c++);
98 #else
99 			sum += (*(buf.c++) << 8);
100 #endif
101 			n -= 1;
102 			odd_aligned = !odd_aligned;
103 		}
104 
105 		/* 32-bit-align */
106 		if (buf.u & 0x2) {
107 			sum += *(buf.s++);
108 			n -= 2;
109 		}
110 	}
111 
112 	/* 32-bit-aligned sum.
113 	   Peter Desnoyers' unbelievable 3-instruction main loop. */
114 	if (n < 64 + 8)
115 		goto notmuchleft;
116 	w0 = buf.l[0];
117 	w1 = buf.l[1];
118 	do {
119 		hilo += w0;
120 		high += w0 >> 16;
121 		w0 = buf.l[2];
122 
123 		hilo += w1;
124 		high += w1 >> 16;
125 		w1 = buf.l[3];
126 
127 		hilo += w0;
128 		high += w0 >> 16;
129 		w0 = buf.l[4];
130 
131 		hilo += w1;
132 		high += w1 >> 16;
133 		w1 = buf.l[5];
134 
135 		hilo += w0;
136 		high += w0 >> 16;
137 		w0 = buf.l[6];
138 
139 		hilo += w1;
140 		high += w1 >> 16;
141 		w1 = buf.l[7];
142 
143 		hilo += w0;
144 		high += w0 >> 16;
145 		w0 = buf.l[8];
146 
147 		hilo += w1;
148 		high += w1 >> 16;
149 		w1 = buf.l[9];
150 
151 
152 		hilo += w0;
153 		high += w0 >> 16;
154 		w0 = buf.l[10];
155 
156 		hilo += w1;
157 		high += w1 >> 16;
158 		w1 = buf.l[11];
159 
160 		hilo += w0;
161 		high += w0 >> 16;
162 		w0 = buf.l[12];
163 
164 		hilo += w1;
165 		high += w1 >> 16;
166 		w1 = buf.l[13];
167 
168 		hilo += w0;
169 		high += w0 >> 16;
170 		w0 = buf.l[14];
171 
172 		hilo += w1;
173 		high += w1 >> 16;
174 		w1 = buf.l[15];
175 
176 		hilo += w0;
177 		high += w0 >> 16;
178 		w0 = buf.l[16];
179 
180 		hilo += w1;
181 		high += w1 >> 16;
182 		w1 = buf.l[17];
183 
184 
185 		n -= 64;
186 		buf.c += 64;
187 
188 	} while (n >= 64 + 8);
189 	hilo -= (high << 16);
190 	sum += hilo;
191 	sum += high;
192 
193  notmuchleft:
194 	high = hilo = 0;
195 	while (n >= 4) {
196 		w0 = *(buf.l++);
197 		hilo += w0;
198 		high += w0 >> 16;
199 		n -= 4;
200 	}
201 	hilo -= (high << 16);
202 	sum += hilo;
203 	sum += high;
204 
205 	while (n > 1) {
206 		n -= sizeof(*buf.s);
207 		sum += *(buf.s++);
208 	}
209 
210  verylittleleft:
211 	/* handle trailing byte and short (possibly) unaligned payloads */
212 	while (n-- > 0) {
213 #if BYTE_ORDER == BIG_ENDIAN
214 		sum += *buf.c << 8;
215 #else
216 		sum += *buf.c;
217 #endif
218 	}
219 
220 	/*
221 	 * compensate for a trailing byte in previous mbuf
222 	 * by byteswapping the memory-aligned sum of this mbuf.
223  	 */
224 	if (odd_aligned) {
225 		sum = (sum & 0xffff) + (sum >> 16);
226 		sum = (sum & 0xffff) + (sum >> 16);
227 		sum = oldsum + ((sum >> 8) & 0xff) + ((sum & 0xff) << 8);
228 	} else {
229 		/* add upper and lower halfwords together to get full sum */
230 		sum = oldsum + sum;
231 		sum = (sum & 0xffff) + (sum >> 16);
232 	}
233 
234 	/* fold carry from combining sums */
235 	sum = (sum & 0xffff) + (sum >> 16);
236 	return(sum);
237 }
238 
239 
240 /*
241  * Checksum routine for Internet Protocol family headers (mips r3000 Version).
242  *
243  */
244 int
245 in_cksum(struct mbuf *m, int len)
246 {
247 	/*u_short **/ union memptr w;
248 	u_int32_t sum = 0;
249 	int mlen;
250 	int odd_aligned = 0;
251 
252 	for ( ; m && len; m = m->m_next) {
253 
254 		mlen = m->m_len;
255 		if (mlen == 0)
256 			continue;
257 		if (mlen > len)
258 			mlen = len;
259 		w.c = mtod(m, u_char *);
260 		sum = fastsum(w, mlen, sum, odd_aligned);
261 		len -= mlen;
262 		odd_aligned = (odd_aligned + mlen) & 0x01;
263 	}
264 	if (len != 0) {
265 		printf("in_cksum: out of data, %d\n", len);
266 	}
267 	return (~sum & 0xffff);
268 }
269