1 /*	$NetBSD: rf_nwayxor.c,v 1.11 2006/11/16 01:33:23 christos Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: Mark Holland, Daniel Stodolsky
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /************************************************************
30  *
31  * nwayxor.c -- code to do N-way xors for reconstruction
32  *
33  * nWayXorN xors N input buffers into the destination buffer.
34  * adapted from danner's longword_bxor code.
35  *
36  ************************************************************/
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: rf_nwayxor.c,v 1.11 2006/11/16 01:33:23 christos Exp $");
40 
41 #include "rf_nwayxor.h"
42 #include "rf_shutdown.h"
43 
44 static int callcount[10];
45 static void rf_ShutdownNWayXor(void *);
46 
47 static void
rf_ShutdownNWayXor(void * ignored)48 rf_ShutdownNWayXor(void *ignored)
49 {
50 	int     i;
51 
52 	if (rf_showXorCallCounts == 0)
53 		return;
54 	printf("Call counts for n-way xor routines:  ");
55 	for (i = 0; i < 10; i++)
56 		printf("%d ", callcount[i]);
57 	printf("\n");
58 }
59 
60 int
rf_ConfigureNWayXor(RF_ShutdownList_t ** listp)61 rf_ConfigureNWayXor(RF_ShutdownList_t **listp)
62 {
63 	int     i;
64 
65 	for (i = 0; i < 10; i++)
66 		callcount[i] = 0;
67 	rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL);
68 	return (0);
69 }
70 
71 void
rf_nWayXor1(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)72 rf_nWayXor1(RF_ReconBuffer_t **src_rbs,	RF_ReconBuffer_t *dest_rb, int len)
73 {
74 	unsigned long *src = (unsigned long *) src_rbs[0]->buffer;
75 	unsigned long *dest = (unsigned long *) dest_rb->buffer;
76 	unsigned long *end = src + len;
77 	unsigned long d0, d1, d2, d3, s0, s1, s2, s3;
78 
79 	callcount[1]++;
80 	while (len >= 4) {
81 		d0 = dest[0];
82 		d1 = dest[1];
83 		d2 = dest[2];
84 		d3 = dest[3];
85 		s0 = src[0];
86 		s1 = src[1];
87 		s2 = src[2];
88 		s3 = src[3];
89 		dest[0] = d0 ^ s0;
90 		dest[1] = d1 ^ s1;
91 		dest[2] = d2 ^ s2;
92 		dest[3] = d3 ^ s3;
93 		src += 4;
94 		dest += 4;
95 		len -= 4;
96 	}
97 	while (src < end) {
98 		*dest++ ^= *src++;
99 	}
100 }
101 
102 void
rf_nWayXor2(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)103 rf_nWayXor2(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
104 {
105 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
106 	unsigned long *a = dst;
107 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
108 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
109 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
110 
111 	callcount[2]++;
112 	/* align dest to cache line */
113 	while ((((unsigned long) dst) & 0x1f)) {
114 		*dst++ = *a++ ^ *b++ ^ *c++;
115 		len--;
116 	}
117 	while (len > 4) {
118 		a0 = a[0];
119 		len -= 4;
120 
121 		a1 = a[1];
122 		a2 = a[2];
123 
124 		a3 = a[3];
125 		a += 4;
126 
127 		b0 = b[0];
128 		b1 = b[1];
129 
130 		b2 = b[2];
131 		b3 = b[3];
132 		/* start dual issue */
133 		a0 ^= b0;
134 		b0 = c[0];
135 
136 		b += 4;
137 		a1 ^= b1;
138 
139 		a2 ^= b2;
140 		a3 ^= b3;
141 
142 		b1 = c[1];
143 		a0 ^= b0;
144 
145 		b2 = c[2];
146 		a1 ^= b1;
147 
148 		b3 = c[3];
149 		a2 ^= b2;
150 
151 		dst[0] = a0;
152 		a3 ^= b3;
153 		dst[1] = a1;
154 		c += 4;
155 		dst[2] = a2;
156 		dst[3] = a3;
157 		dst += 4;
158 	}
159 	while (len) {
160 		*dst++ = *a++ ^ *b++ ^ *c++;
161 		len--;
162 	}
163 }
164 /* note that first arg is not incremented but 2nd arg is */
165 #define LOAD_FIRST(_dst,_b) \
166   a0 = _dst[0]; len -= 4;   \
167   a1 = _dst[1];             \
168   a2 = _dst[2];             \
169   a3 = _dst[3];             \
170   b0 = _b[0];               \
171   b1 = _b[1];               \
172   b2 = _b[2];               \
173   b3 = _b[3];  _b += 4;
174 
175 /* note: arg is incremented */
176 #define XOR_AND_LOAD_NEXT(_n) \
177   a0 ^= b0; b0 = _n[0];       \
178   a1 ^= b1; b1 = _n[1];       \
179   a2 ^= b2; b2 = _n[2];       \
180   a3 ^= b3; b3 = _n[3];       \
181   _n += 4;
182 
183 /* arg is incremented */
184 #define XOR_AND_STORE(_dst)       \
185   a0 ^= b0; _dst[0] = a0;         \
186   a1 ^= b1; _dst[1] = a1;         \
187   a2 ^= b2; _dst[2] = a2;         \
188   a3 ^= b3; _dst[3] = a3;         \
189   _dst += 4;
190 
191 
192 void
rf_nWayXor3(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)193 rf_nWayXor3(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
194 {
195 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
196 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
197 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
198 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
199 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
200 
201 	callcount[3]++;
202 	/* align dest to cache line */
203 	while ((((unsigned long) dst) & 0x1f)) {
204 		*dst++ ^= *b++ ^ *c++ ^ *d++;
205 		len--;
206 	}
207 	while (len > 4) {
208 		LOAD_FIRST(dst, b);
209 		XOR_AND_LOAD_NEXT(c);
210 		XOR_AND_LOAD_NEXT(d);
211 		XOR_AND_STORE(dst);
212 	}
213 	while (len) {
214 		*dst++ ^= *b++ ^ *c++ ^ *d++;
215 		len--;
216 	}
217 }
218 
219 void
rf_nWayXor4(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)220 rf_nWayXor4(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
221 {
222 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
223 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
224 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
225 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
226 	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
227 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
228 
229 	callcount[4]++;
230 	/* align dest to cache line */
231 	while ((((unsigned long) dst) & 0x1f)) {
232 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
233 		len--;
234 	}
235 	while (len > 4) {
236 		LOAD_FIRST(dst, b);
237 		XOR_AND_LOAD_NEXT(c);
238 		XOR_AND_LOAD_NEXT(d);
239 		XOR_AND_LOAD_NEXT(e);
240 		XOR_AND_STORE(dst);
241 	}
242 	while (len) {
243 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
244 		len--;
245 	}
246 }
247 
248 void
rf_nWayXor5(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)249 rf_nWayXor5(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
250 {
251 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
252 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
253 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
254 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
255 	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
256 	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
257 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
258 
259 	callcount[5]++;
260 	/* align dest to cache line */
261 	while ((((unsigned long) dst) & 0x1f)) {
262 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
263 		len--;
264 	}
265 	while (len > 4) {
266 		LOAD_FIRST(dst, b);
267 		XOR_AND_LOAD_NEXT(c);
268 		XOR_AND_LOAD_NEXT(d);
269 		XOR_AND_LOAD_NEXT(e);
270 		XOR_AND_LOAD_NEXT(f);
271 		XOR_AND_STORE(dst);
272 	}
273 	while (len) {
274 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
275 		len--;
276 	}
277 }
278 
279 void
rf_nWayXor6(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)280 rf_nWayXor6(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
281 {
282 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
283 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
284 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
285 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
286 	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
287 	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
288 	unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
289 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
290 
291 	callcount[6]++;
292 	/* align dest to cache line */
293 	while ((((unsigned long) dst) & 0x1f)) {
294 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
295 		len--;
296 	}
297 	while (len > 4) {
298 		LOAD_FIRST(dst, b);
299 		XOR_AND_LOAD_NEXT(c);
300 		XOR_AND_LOAD_NEXT(d);
301 		XOR_AND_LOAD_NEXT(e);
302 		XOR_AND_LOAD_NEXT(f);
303 		XOR_AND_LOAD_NEXT(g);
304 		XOR_AND_STORE(dst);
305 	}
306 	while (len) {
307 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
308 		len--;
309 	}
310 }
311 
312 void
rf_nWayXor7(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)313 rf_nWayXor7(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
314 {
315 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
316 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
317 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
318 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
319 	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
320 	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
321 	unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
322 	unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
323 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
324 
325 	callcount[7]++;
326 	/* align dest to cache line */
327 	while ((((unsigned long) dst) & 0x1f)) {
328 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
329 		len--;
330 	}
331 	while (len > 4) {
332 		LOAD_FIRST(dst, b);
333 		XOR_AND_LOAD_NEXT(c);
334 		XOR_AND_LOAD_NEXT(d);
335 		XOR_AND_LOAD_NEXT(e);
336 		XOR_AND_LOAD_NEXT(f);
337 		XOR_AND_LOAD_NEXT(g);
338 		XOR_AND_LOAD_NEXT(h);
339 		XOR_AND_STORE(dst);
340 	}
341 	while (len) {
342 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
343 		len--;
344 	}
345 }
346 
347 void
rf_nWayXor8(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)348 rf_nWayXor8(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
349 {
350 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
351 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
352 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
353 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
354 	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
355 	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
356 	unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
357 	unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
358 	unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
359 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
360 
361 	callcount[8]++;
362 	/* align dest to cache line */
363 	while ((((unsigned long) dst) & 0x1f)) {
364 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
365 		len--;
366 	}
367 	while (len > 4) {
368 		LOAD_FIRST(dst, b);
369 		XOR_AND_LOAD_NEXT(c);
370 		XOR_AND_LOAD_NEXT(d);
371 		XOR_AND_LOAD_NEXT(e);
372 		XOR_AND_LOAD_NEXT(f);
373 		XOR_AND_LOAD_NEXT(g);
374 		XOR_AND_LOAD_NEXT(h);
375 		XOR_AND_LOAD_NEXT(i);
376 		XOR_AND_STORE(dst);
377 	}
378 	while (len) {
379 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
380 		len--;
381 	}
382 }
383 
384 
385 void
rf_nWayXor9(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)386 rf_nWayXor9(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
387 {
388 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
389 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
390 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
391 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
392 	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
393 	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
394 	unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
395 	unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
396 	unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
397 	unsigned long *j = (unsigned long *) src_rbs[8]->buffer;
398 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
399 
400 	callcount[9]++;
401 	/* align dest to cache line */
402 	while ((((unsigned long) dst) & 0x1f)) {
403 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
404 		len--;
405 	}
406 	while (len > 4) {
407 		LOAD_FIRST(dst, b);
408 		XOR_AND_LOAD_NEXT(c);
409 		XOR_AND_LOAD_NEXT(d);
410 		XOR_AND_LOAD_NEXT(e);
411 		XOR_AND_LOAD_NEXT(f);
412 		XOR_AND_LOAD_NEXT(g);
413 		XOR_AND_LOAD_NEXT(h);
414 		XOR_AND_LOAD_NEXT(i);
415 		XOR_AND_LOAD_NEXT(j);
416 		XOR_AND_STORE(dst);
417 	}
418 	while (len) {
419 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
420 		len--;
421 	}
422 }
423