xref: /netbsd/sys/arch/mips/mips/in_cksum.c (revision 6550d01e)
1 /* $NetBSD: in_cksum.c,v 1.14 2010/09/18 16:43:50 tsutsui Exp $ */
2 
3 /*
4  * Copyright (c) 1993 Regents of the University of California.
5  * All rights reserved.
6  *
7  * Permission to use, copy, modify, and distribute this software and its
8  * documentation for any purpose, without fee, and without written agreement is
9  * hereby granted, provided that the above copyright notice and the following
10  * paragraph appears in all copies of this software.
11  *
12  * THIS SOFTWARE IS PROVIDED BY THE REGENTS ``AS IS'' AND ANY EXPRESS OR
13  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
15  * EVENT SHALL THE REGENTS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
16  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
17  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
18  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
19  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
20  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
21  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23 
24 /*
25  * ccsum.c - Highly optimized MIPS checksum function.
26  * by Jonathan Kay, Computer Systems Lab, UCSD         4/2/93
27  *
28  * Version 2.0
29  * Techniques and credits:
30  *   Basic algorithm is 3-instruction inner loop sum by Peter Desnoyers.
31  *   Full word-size reading as described in Usenix W'93 paper.
32  *   Pipelined latency absoption technique as described in paper.
33  *   Unrolling chosen through testing and examination of actual workload.
34  *   Rewrite in 'C' without loss of performance suggested by Vernon Schryver.
35  *   15% faster than version 1 ("Usenix version").
36  *   150% faster than Ultrix 4.2A checksum routine.
37  *
38  * BSD changes: Jonathan Stone, Stanford Distributed Systems Group, 1997-08-11
39  *
40  *   re-written for incremental checksumming of BSD mbufs
41  *   and byteswap out-of-phase mbuf sums.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: in_cksum.c,v 1.14 2010/09/18 16:43:50 tsutsui Exp $");
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/mbuf.h>
50 
51 #include <netinet/in_systm.h>
52 #include <netinet/in.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip_var.h>
55 
56 #include <machine/endian.h>
57 
58 union memptr {
59 	uint32_t *l;
60 	uintptr_t u;
61 	uint16_t *s;
62 	uint8_t *c;
63 };
64 
65 static inline uint32_t fastsum(union memptr, int, unsigned int, int);
66 
67 /*
68  * Compute 1's complement sum over a contiguous block at 'buf' for 'n' bytes.
69  *
70  * Add the resulting checksum into 'oldsum' using 1's complement.
71  * 'odd_aligned' is a boolean which if set, indicate the data in 'buf'
72  * starts at an odd byte alignment within the containing packet,
73  * and so we must byteswap the memory-aligned 1's-complement sum
74  * over the data before adding it to `oldsum'.
75  */
76 static inline uint32_t
77 fastsum(union memptr buf, int n, unsigned int oldsum, int odd_aligned)
78 {
79 	unsigned long hilo = 0, high = 0;
80 	unsigned long w0, w1;
81 	unsigned int sum = 0;
82 
83 	/* Align to 32 bits. */
84 	if (buf.u & 0x3) {
85 		/*
86 	         * 16-bit-align.
87 		 * If buf is odd-byte-aligned, add the byte and toggle
88 		 * our byte-alignment flag.
89 		 *     If we were odd-aligned on entry, an odd-aligned
90 		 * byte  makes a 16-bit word with the previous odd byte,
91 		 * unaligned, making us aligned again.
92 	 	 *     If we were not already odd-aligned, we are now,
93 		 * and we must byteswap our 16-bit-aligned sum of
94 		 *'buf' before accumulating it.
95 		 */
96 		if (buf.u & 0x1) {
97 #if BYTE_ORDER == BIG_ENDIAN
98 			sum += *(buf.c++);
99 #else
100 			sum += (*(buf.c++) << 8);
101 #endif
102 			n -= 1;
103 			odd_aligned = !odd_aligned;
104 		}
105 		/* Skip to the end for very small mbufs */
106 		if (n <= 2)
107 			goto postunaligned;
108 
109 		/* 32-bit-align */
110 		if (buf.u & 0x2) {
111 			sum += *(buf.s++);
112 			n -= 2;
113 		}
114 	}
115 
116 	/* 32-bit-aligned sum.
117 	   Peter Desnoyers' unbelievable 3-instruction main loop. */
118 	if (n < 64 + 8)
119 		goto notmuchleft;
120 	w0 = buf.l[0];
121 	w1 = buf.l[1];
122 	do {
123 		hilo += w0;
124 		high += w0 >> 16;
125 		w0 = buf.l[2];
126 
127 		hilo += w1;
128 		high += w1 >> 16;
129 		w1 = buf.l[3];
130 
131 		hilo += w0;
132 		high += w0 >> 16;
133 		w0 = buf.l[4];
134 
135 		hilo += w1;
136 		high += w1 >> 16;
137 		w1 = buf.l[5];
138 
139 		hilo += w0;
140 		high += w0 >> 16;
141 		w0 = buf.l[6];
142 
143 		hilo += w1;
144 		high += w1 >> 16;
145 		w1 = buf.l[7];
146 
147 		hilo += w0;
148 		high += w0 >> 16;
149 		w0 = buf.l[8];
150 
151 		hilo += w1;
152 		high += w1 >> 16;
153 		w1 = buf.l[9];
154 
155 
156 		hilo += w0;
157 		high += w0 >> 16;
158 		w0 = buf.l[10];
159 
160 		hilo += w1;
161 		high += w1 >> 16;
162 		w1 = buf.l[11];
163 
164 		hilo += w0;
165 		high += w0 >> 16;
166 		w0 = buf.l[12];
167 
168 		hilo += w1;
169 		high += w1 >> 16;
170 		w1 = buf.l[13];
171 
172 		hilo += w0;
173 		high += w0 >> 16;
174 		w0 = buf.l[14];
175 
176 		hilo += w1;
177 		high += w1 >> 16;
178 		w1 = buf.l[15];
179 
180 		hilo += w0;
181 		high += w0 >> 16;
182 		w0 = buf.l[16];
183 
184 		hilo += w1;
185 		high += w1 >> 16;
186 		w1 = buf.l[17];
187 
188 
189 		n -= 64;
190 		buf.c += 64;
191 
192 	} while (n >= 64 + 8);
193 	hilo -= (high << 16);
194 	sum += hilo;
195 	sum += high;
196 
197  notmuchleft:
198 	high = hilo = 0;
199 	while (n >= sizeof(uint32_t)) {
200 		w0 = *(buf.l++);
201 		hilo += w0;
202 		high += w0 >> 16;
203 		n -= 4;
204 	}
205 	hilo -= (high << 16);
206 	sum += hilo;
207 	sum += high;
208 
209  postunaligned:
210 	/* handle post 32bit unaligned payloads */
211 	if (n >= sizeof(uint16_t)) {
212 		sum += *(buf.s++);
213 		n -= sizeof(uint16_t);
214 	}
215 
216 	/* handle a trailing odd byte */
217 	if (n > 0) {
218 #if BYTE_ORDER == BIG_ENDIAN
219 		sum += *(buf.c++) << 8;
220 #else
221 		sum += *(buf.c++);
222 #endif
223 		n = 0;
224 	}
225 
226 	/*
227 	 * compensate for a trailing byte in previous mbuf
228 	 * by byteswapping the memory-aligned sum of this mbuf.
229  	 */
230 	if (odd_aligned) {
231 		sum = (sum & 0xffff) + (sum >> 16);
232 		sum = (sum & 0xffff) + (sum >> 16);
233 		sum = oldsum + ((sum >> 8) & 0xff) + ((sum & 0xff) << 8);
234 	} else {
235 		/* add upper and lower halfwords together to get full sum */
236 		sum = oldsum + sum;
237 		sum = (sum & 0xffff) + (sum >> 16);
238 	}
239 
240 	/* fold carry from combining sums */
241 	sum = (sum & 0xffff) + (sum >> 16);
242 	return(sum);
243 }
244 
245 /*
246  * Checksum routine for Internet Protocol family headers.
247  *
248  */
249 static inline int
250 in_cksum_internal(struct mbuf *m, int off, int len, uint32_t sum)
251 {
252 	/*u_short **/ union memptr w;
253 	int mlen;
254 	int odd_aligned = 0;
255 
256 	for (; m && len; m = m->m_next) {
257 		if (m->m_len == 0)
258 			continue;
259 		w.c = mtod(m, u_char *) + off;
260 		mlen = m->m_len - off;
261 		off = 0;
262 		if (len < mlen)
263 			mlen = len;
264 		len -= mlen;
265 
266 		sum = fastsum(w, mlen, sum, odd_aligned);
267 		odd_aligned = (odd_aligned + mlen) & 0x01;
268 	}
269 	if (len != 0) {
270 		printf("cksum: out of data, %d\n", len);
271 	}
272 	return (~sum & 0xffff);
273 }
274 
275 int
276 in_cksum(struct mbuf *m, int len)
277 {
278 
279 	return (in_cksum_internal(m, 0, len, 0));
280 }
281 
282 int
283 in4_cksum(struct mbuf *m, uint8_t nxt, int off, int len)
284 {
285 	uint sum = 0;
286 
287 	if (nxt != 0) {
288 		uint16_t *w;
289 		union {
290 			struct ipovly ipov;
291 			u_int16_t w[10];
292 		} u;
293 
294 		/* pseudo header */
295 		memset(&u.ipov, 0, sizeof(u.ipov));
296 		u.ipov.ih_len = htons(len);
297 		u.ipov.ih_pr = nxt;
298 		u.ipov.ih_src = mtod(m, struct ip *)->ip_src;
299 		u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
300 		w = u.w;
301 		/* assumes sizeof(ipov) == 20 */
302 		sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4];
303 		sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9];
304 	}
305 
306 	/* skip unnecessary part */
307 	while (m && off > 0) {
308 		if (m->m_len > off)
309 			break;
310 		off -= m->m_len;
311 		m = m->m_next;
312 	}
313 
314 	return (in_cksum_internal(m, off, len, sum));
315 }
316