xref: /netbsd/sys/arch/mips/mips/in_cksum.c (revision c4a72b64)
1 /* $NetBSD: in_cksum.c,v 1.9 2002/07/29 09:14:36 itojun Exp $ */
2 
3 /*
4  * Copyright (c) 1993 Regents of the University of California.
5  * All rights reserved.
6  *
7  * Permission to use, copy, modify, and distribute this software and its
8  * documentation for any purpose, without fee, and without written agreement is
9  * hereby granted, provided that the above copyright notice and the following
10  * paragraph appears in all copies of this software.
11  *
12  * THIS SOFTWARE IS PROVIDED BY THE REGENTS ``AS IS'' AND ANY EXPRESS OR
13  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
15  * EVENT SHALL THE REGENTS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
16  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
17  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
18  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
19  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
20  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
21  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23 
24 /*
25  * ccsum.c - Highly optimized MIPS checksum function.
26  * by Jonathan Kay, Computer Systems Lab, UCSD         4/2/93
27  *
28  * Version 2.0
29  * Techniques and credits:
30  *   Basic algorithm is 3-instruction inner loop sum by Peter Desnoyers.
31  *   Full word-size reading as described in Usenix W'93 paper.
32  *   Pipelined latency absoption technique as described in paper.
33  *   Unrolling chosen through testing and examination of actual workload.
34  *   Rewrite in 'C' without loss of performance suggested by Vernon Schryver.
35  *   15% faster than version 1 ("Usenix version").
36  *   150% faster than Ultrix 4.2A checksum routine.
37  *
38  * BSD changes: Jonathan Stone, Stanford Distributed Systems Group, 1997-08-11
39  *
40  *   re-written for incremental checksumming of BSD mbufs
41  *   and byteswap out-of-phase mbuf sums.
42  */
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/mbuf.h>
47 #include <sys/cdefs.h>
48 
49 #include <netinet/in_systm.h>
50 #include <netinet/in.h>
51 #include <netinet/ip.h>
52 #include <netinet/ip_var.h>
53 
54 #include <machine/endian.h>
55 
56 union memptr {
57 	unsigned int *i;
58 	unsigned long *l;
59 	unsigned long u;
60 	unsigned short *s;
61 	unsigned char *c;
62 };
63 
64 static inline uint32_t fastsum(union memptr, int, unsigned int, int);
65 
66 /*
67  * Compute 1's complement sum over a contiguous block at 'buf' for 'n' bytes.
68  *
69  * Add the resulting checksum into 'oldsum' using 1's complement.
70  * 'odd_aligned' is a boolean which if set, indicate the data in 'buf'
71  * starts at an odd byte alignment within the containing packet,
72  * and so we must byteswap the memory-aligned 1's-complement sum
73  * over the data before adding it to `oldsum'.
74  */
75 static inline uint32_t
76 fastsum(union memptr buf, int n, unsigned int oldsum, int odd_aligned)
77 {
78 	unsigned long hilo = 0, high = 0;
79 	unsigned long w0, w1;
80 	unsigned int sum = 0;
81 
82 	/* Align to 32 bits. */
83 	if (buf.u & 0x3) {
84 		/* Skip to the end for very small mbufs */
85 		if (n < 3)
86 			goto verylittleleft;
87 
88 		/*
89 	         * 16-bit-align.
90 		 * If buf is odd-byte-aligned, add the byte and toggle
91 		 * our byte-alignment flag.
92 		 *     If we were odd-aligned on entry, an odd-aligned
93 		 * byte  makes a 16-bit word with the previous odd byte,
94 		 * unaligned, making us aligned again.
95 	 	 *     If we were not already odd-aligned, we are now,
96 		 * and we must byteswap our 16-bit-aligned sum of
97 		 *'buf' before accumulating it.
98 		 */
99 		if (buf.u & 0x1) {
100 #if BYTE_ORDER == BIG_ENDIAN
101 			sum += *(buf.c++);
102 #else
103 			sum += (*(buf.c++) << 8);
104 #endif
105 			n -= 1;
106 			odd_aligned = !odd_aligned;
107 		}
108 
109 		/* 32-bit-align */
110 		if (buf.u & 0x2) {
111 			sum += *(buf.s++);
112 			n -= 2;
113 		}
114 	}
115 
116 	/* 32-bit-aligned sum.
117 	   Peter Desnoyers' unbelievable 3-instruction main loop. */
118 	if (n < 64 + 8)
119 		goto notmuchleft;
120 	w0 = buf.l[0];
121 	w1 = buf.l[1];
122 	do {
123 		hilo += w0;
124 		high += w0 >> 16;
125 		w0 = buf.l[2];
126 
127 		hilo += w1;
128 		high += w1 >> 16;
129 		w1 = buf.l[3];
130 
131 		hilo += w0;
132 		high += w0 >> 16;
133 		w0 = buf.l[4];
134 
135 		hilo += w1;
136 		high += w1 >> 16;
137 		w1 = buf.l[5];
138 
139 		hilo += w0;
140 		high += w0 >> 16;
141 		w0 = buf.l[6];
142 
143 		hilo += w1;
144 		high += w1 >> 16;
145 		w1 = buf.l[7];
146 
147 		hilo += w0;
148 		high += w0 >> 16;
149 		w0 = buf.l[8];
150 
151 		hilo += w1;
152 		high += w1 >> 16;
153 		w1 = buf.l[9];
154 
155 
156 		hilo += w0;
157 		high += w0 >> 16;
158 		w0 = buf.l[10];
159 
160 		hilo += w1;
161 		high += w1 >> 16;
162 		w1 = buf.l[11];
163 
164 		hilo += w0;
165 		high += w0 >> 16;
166 		w0 = buf.l[12];
167 
168 		hilo += w1;
169 		high += w1 >> 16;
170 		w1 = buf.l[13];
171 
172 		hilo += w0;
173 		high += w0 >> 16;
174 		w0 = buf.l[14];
175 
176 		hilo += w1;
177 		high += w1 >> 16;
178 		w1 = buf.l[15];
179 
180 		hilo += w0;
181 		high += w0 >> 16;
182 		w0 = buf.l[16];
183 
184 		hilo += w1;
185 		high += w1 >> 16;
186 		w1 = buf.l[17];
187 
188 
189 		n -= 64;
190 		buf.c += 64;
191 
192 	} while (n >= 64 + 8);
193 	hilo -= (high << 16);
194 	sum += hilo;
195 	sum += high;
196 
197  notmuchleft:
198 	high = hilo = 0;
199 	while (n >= 4) {
200 		w0 = *(buf.l++);
201 		hilo += w0;
202 		high += w0 >> 16;
203 		n -= 4;
204 	}
205 	hilo -= (high << 16);
206 	sum += hilo;
207 	sum += high;
208 
209 	while (n > 1) {
210 		n -= sizeof(*buf.s);
211 		sum += *(buf.s++);
212 	}
213 
214  verylittleleft:
215 	/* handle trailing byte and short (possibly) unaligned payloads */
216 	while (n-- > 0) {
217 #if BYTE_ORDER == BIG_ENDIAN
218 		sum += *buf.c << 8;
219 #else
220 		sum += *buf.c;
221 #endif
222 	}
223 
224 	/*
225 	 * compensate for a trailing byte in previous mbuf
226 	 * by byteswapping the memory-aligned sum of this mbuf.
227  	 */
228 	if (odd_aligned) {
229 		sum = (sum & 0xffff) + (sum >> 16);
230 		sum = (sum & 0xffff) + (sum >> 16);
231 		sum = oldsum + ((sum >> 8) & 0xff) + ((sum & 0xff) << 8);
232 	} else {
233 		/* add upper and lower halfwords together to get full sum */
234 		sum = oldsum + sum;
235 		sum = (sum & 0xffff) + (sum >> 16);
236 	}
237 
238 	/* fold carry from combining sums */
239 	sum = (sum & 0xffff) + (sum >> 16);
240 	return(sum);
241 }
242 
243 /*
244  * Checksum routine for Internet Protocol family headers.
245  *
246  */
247 static inline int
248 in_cksum_internal(struct mbuf *m, int off, int len, uint32_t sum)
249 {
250 	/*u_short **/ union memptr w;
251 	int mlen;
252 	int odd_aligned = 0;
253 
254 	for (; m && len; m = m->m_next) {
255 		if (m->m_len == 0)
256 			continue;
257 		w.c = mtod(m, u_char *) + off;
258 		mlen = m->m_len - off;
259 		off = 0;
260 		if (len < mlen)
261 			mlen = len;
262 		len -= mlen;
263 
264 		sum = fastsum(w, mlen, sum, odd_aligned);
265 		odd_aligned = (odd_aligned + mlen) & 0x01;
266 	}
267 	if (len != 0) {
268 		printf("cksum: out of data, %d\n", len);
269 	}
270 	return (~sum & 0xffff);
271 }
272 
273 int
274 in_cksum(struct mbuf *m, int len)
275 {
276 
277 	return (in_cksum_internal(m, 0, len, 0));
278 }
279 
280 int
281 in4_cksum(struct mbuf *m, uint8_t nxt, int off, int len)
282 {
283 	uint sum = 0;
284 
285 	if (nxt != 0) {
286 		uint16_t *w;
287 		union {
288 			struct ipovly ipov;
289 			u_int16_t w[10];
290 		} u;
291 
292 		/* pseudo header */
293 		memset(&u.ipov, 0, sizeof(u.ipov));
294 		u.ipov.ih_len = htons(len);
295 		u.ipov.ih_pr = nxt;
296 		u.ipov.ih_src = mtod(m, struct ip *)->ip_src;
297 		u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
298 		w = u.w;
299 		/* assumes sizeof(ipov) == 20 */
300 		sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4];
301 		sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9];
302 	}
303 
304 	/* skip unnecessary part */
305 	while (m && off > 0) {
306 		if (m->m_len > off)
307 			break;
308 		off -= m->m_len;
309 		m = m->m_next;
310 	}
311 
312 	return (in_cksum_internal(m, off, len, sum));
313 }
314