1 /* $NetBSD: rf_nwayxor.c,v 1.5 2001/11/13 07:11:15 lukem Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, Daniel Stodolsky 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /************************************************************ 30 * 31 * nwayxor.c -- code to do N-way xors for reconstruction 32 * 33 * nWayXorN xors N input buffers into the destination buffer. 34 * adapted from danner's longword_bxor code. 35 * 36 ************************************************************/ 37 38 #include <sys/cdefs.h> 39 __KERNEL_RCSID(0, "$NetBSD: rf_nwayxor.c,v 1.5 2001/11/13 07:11:15 lukem Exp $"); 40 41 #include "rf_nwayxor.h" 42 #include "rf_shutdown.h" 43 44 static int callcount[10]; 45 static void rf_ShutdownNWayXor(void *); 46 47 static void 48 rf_ShutdownNWayXor(ignored) 49 void *ignored; 50 { 51 int i; 52 53 if (rf_showXorCallCounts == 0) 54 return; 55 printf("Call counts for n-way xor routines: "); 56 for (i = 0; i < 10; i++) 57 printf("%d ", callcount[i]); 58 printf("\n"); 59 } 60 61 int 62 rf_ConfigureNWayXor(listp) 63 RF_ShutdownList_t **listp; 64 { 65 int i, rc; 66 67 for (i = 0; i < 10; i++) 68 callcount[i] = 0; 69 rc = rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL); 70 return (rc); 71 } 72 73 void 74 rf_nWayXor1(src_rbs, dest_rb, len) 75 RF_ReconBuffer_t **src_rbs; 76 RF_ReconBuffer_t *dest_rb; 77 int len; 78 { 79 unsigned long *src = (unsigned long *) src_rbs[0]->buffer; 80 unsigned long *dest = (unsigned long *) dest_rb->buffer; 81 unsigned long *end = src + len; 82 unsigned long d0, d1, d2, d3, s0, s1, s2, s3; 83 84 callcount[1]++; 85 while (len >= 4) { 86 d0 = dest[0]; 87 d1 = dest[1]; 88 d2 = dest[2]; 89 d3 = dest[3]; 90 s0 = src[0]; 91 s1 = src[1]; 92 s2 = src[2]; 93 s3 = src[3]; 94 dest[0] = d0 ^ s0; 95 dest[1] = d1 ^ s1; 96 dest[2] = d2 ^ s2; 97 dest[3] = d3 ^ s3; 98 src += 4; 99 dest += 4; 100 len -= 4; 101 } 102 while (src < end) { 103 *dest++ ^= *src++; 104 } 105 } 106 107 void 108 rf_nWayXor2(src_rbs, dest_rb, len) 109 RF_ReconBuffer_t **src_rbs; 110 RF_ReconBuffer_t *dest_rb; 111 int len; 112 { 113 unsigned long *dst = (unsigned long *) dest_rb->buffer; 114 unsigned long *a = dst; 115 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 116 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 117 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 118 119 callcount[2]++; 120 /* align dest to cache line */ 121 while ((((unsigned long) dst) & 0x1f)) { 122 *dst++ = *a++ ^ *b++ ^ *c++; 123 len--; 124 } 125 while (len > 4) { 126 a0 = a[0]; 127 len -= 4; 128 129 a1 = a[1]; 130 a2 = a[2]; 131 132 a3 = a[3]; 133 a += 4; 134 135 b0 = b[0]; 136 b1 = b[1]; 137 138 b2 = b[2]; 139 b3 = b[3]; 140 /* start dual issue */ 141 a0 ^= b0; 142 b0 = c[0]; 143 144 b += 4; 145 a1 ^= b1; 146 147 a2 ^= b2; 148 a3 ^= b3; 149 150 b1 = c[1]; 151 a0 ^= b0; 152 153 b2 = c[2]; 154 a1 ^= b1; 155 156 b3 = c[3]; 157 a2 ^= b2; 158 159 dst[0] = a0; 160 a3 ^= b3; 161 dst[1] = a1; 162 c += 4; 163 dst[2] = a2; 164 dst[3] = a3; 165 dst += 4; 166 } 167 while (len) { 168 *dst++ = *a++ ^ *b++ ^ *c++; 169 len--; 170 } 171 } 172 /* note that first arg is not incremented but 2nd arg is */ 173 #define LOAD_FIRST(_dst,_b) \ 174 a0 = _dst[0]; len -= 4; \ 175 a1 = _dst[1]; \ 176 a2 = _dst[2]; \ 177 a3 = _dst[3]; \ 178 b0 = _b[0]; \ 179 b1 = _b[1]; \ 180 b2 = _b[2]; \ 181 b3 = _b[3]; _b += 4; 182 183 /* note: arg is incremented */ 184 #define XOR_AND_LOAD_NEXT(_n) \ 185 a0 ^= b0; b0 = _n[0]; \ 186 a1 ^= b1; b1 = _n[1]; \ 187 a2 ^= b2; b2 = _n[2]; \ 188 a3 ^= b3; b3 = _n[3]; \ 189 _n += 4; 190 191 /* arg is incremented */ 192 #define XOR_AND_STORE(_dst) \ 193 a0 ^= b0; _dst[0] = a0; \ 194 a1 ^= b1; _dst[1] = a1; \ 195 a2 ^= b2; _dst[2] = a2; \ 196 a3 ^= b3; _dst[3] = a3; \ 197 _dst += 4; 198 199 200 void 201 rf_nWayXor3(src_rbs, dest_rb, len) 202 RF_ReconBuffer_t **src_rbs; 203 RF_ReconBuffer_t *dest_rb; 204 int len; 205 { 206 unsigned long *dst = (unsigned long *) dest_rb->buffer; 207 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 208 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 209 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 210 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 211 212 callcount[3]++; 213 /* align dest to cache line */ 214 while ((((unsigned long) dst) & 0x1f)) { 215 *dst++ ^= *b++ ^ *c++ ^ *d++; 216 len--; 217 } 218 while (len > 4) { 219 LOAD_FIRST(dst, b); 220 XOR_AND_LOAD_NEXT(c); 221 XOR_AND_LOAD_NEXT(d); 222 XOR_AND_STORE(dst); 223 } 224 while (len) { 225 *dst++ ^= *b++ ^ *c++ ^ *d++; 226 len--; 227 } 228 } 229 230 void 231 rf_nWayXor4(src_rbs, dest_rb, len) 232 RF_ReconBuffer_t **src_rbs; 233 RF_ReconBuffer_t *dest_rb; 234 int len; 235 { 236 unsigned long *dst = (unsigned long *) dest_rb->buffer; 237 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 238 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 239 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 240 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 241 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 242 243 callcount[4]++; 244 /* align dest to cache line */ 245 while ((((unsigned long) dst) & 0x1f)) { 246 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; 247 len--; 248 } 249 while (len > 4) { 250 LOAD_FIRST(dst, b); 251 XOR_AND_LOAD_NEXT(c); 252 XOR_AND_LOAD_NEXT(d); 253 XOR_AND_LOAD_NEXT(e); 254 XOR_AND_STORE(dst); 255 } 256 while (len) { 257 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; 258 len--; 259 } 260 } 261 262 void 263 rf_nWayXor5(src_rbs, dest_rb, len) 264 RF_ReconBuffer_t **src_rbs; 265 RF_ReconBuffer_t *dest_rb; 266 int len; 267 { 268 unsigned long *dst = (unsigned long *) dest_rb->buffer; 269 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 270 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 271 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 272 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 273 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 274 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 275 276 callcount[5]++; 277 /* align dest to cache line */ 278 while ((((unsigned long) dst) & 0x1f)) { 279 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; 280 len--; 281 } 282 while (len > 4) { 283 LOAD_FIRST(dst, b); 284 XOR_AND_LOAD_NEXT(c); 285 XOR_AND_LOAD_NEXT(d); 286 XOR_AND_LOAD_NEXT(e); 287 XOR_AND_LOAD_NEXT(f); 288 XOR_AND_STORE(dst); 289 } 290 while (len) { 291 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; 292 len--; 293 } 294 } 295 296 void 297 rf_nWayXor6(src_rbs, dest_rb, len) 298 RF_ReconBuffer_t **src_rbs; 299 RF_ReconBuffer_t *dest_rb; 300 int len; 301 { 302 unsigned long *dst = (unsigned long *) dest_rb->buffer; 303 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 304 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 305 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 306 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 307 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 308 unsigned long *g = (unsigned long *) src_rbs[5]->buffer; 309 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 310 311 callcount[6]++; 312 /* align dest to cache line */ 313 while ((((unsigned long) dst) & 0x1f)) { 314 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; 315 len--; 316 } 317 while (len > 4) { 318 LOAD_FIRST(dst, b); 319 XOR_AND_LOAD_NEXT(c); 320 XOR_AND_LOAD_NEXT(d); 321 XOR_AND_LOAD_NEXT(e); 322 XOR_AND_LOAD_NEXT(f); 323 XOR_AND_LOAD_NEXT(g); 324 XOR_AND_STORE(dst); 325 } 326 while (len) { 327 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; 328 len--; 329 } 330 } 331 332 void 333 rf_nWayXor7(src_rbs, dest_rb, len) 334 RF_ReconBuffer_t **src_rbs; 335 RF_ReconBuffer_t *dest_rb; 336 int len; 337 { 338 unsigned long *dst = (unsigned long *) dest_rb->buffer; 339 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 340 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 341 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 342 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 343 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 344 unsigned long *g = (unsigned long *) src_rbs[5]->buffer; 345 unsigned long *h = (unsigned long *) src_rbs[6]->buffer; 346 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 347 348 callcount[7]++; 349 /* align dest to cache line */ 350 while ((((unsigned long) dst) & 0x1f)) { 351 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; 352 len--; 353 } 354 while (len > 4) { 355 LOAD_FIRST(dst, b); 356 XOR_AND_LOAD_NEXT(c); 357 XOR_AND_LOAD_NEXT(d); 358 XOR_AND_LOAD_NEXT(e); 359 XOR_AND_LOAD_NEXT(f); 360 XOR_AND_LOAD_NEXT(g); 361 XOR_AND_LOAD_NEXT(h); 362 XOR_AND_STORE(dst); 363 } 364 while (len) { 365 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; 366 len--; 367 } 368 } 369 370 void 371 rf_nWayXor8(src_rbs, dest_rb, len) 372 RF_ReconBuffer_t **src_rbs; 373 RF_ReconBuffer_t *dest_rb; 374 int len; 375 { 376 unsigned long *dst = (unsigned long *) dest_rb->buffer; 377 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 378 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 379 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 380 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 381 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 382 unsigned long *g = (unsigned long *) src_rbs[5]->buffer; 383 unsigned long *h = (unsigned long *) src_rbs[6]->buffer; 384 unsigned long *i = (unsigned long *) src_rbs[7]->buffer; 385 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 386 387 callcount[8]++; 388 /* align dest to cache line */ 389 while ((((unsigned long) dst) & 0x1f)) { 390 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; 391 len--; 392 } 393 while (len > 4) { 394 LOAD_FIRST(dst, b); 395 XOR_AND_LOAD_NEXT(c); 396 XOR_AND_LOAD_NEXT(d); 397 XOR_AND_LOAD_NEXT(e); 398 XOR_AND_LOAD_NEXT(f); 399 XOR_AND_LOAD_NEXT(g); 400 XOR_AND_LOAD_NEXT(h); 401 XOR_AND_LOAD_NEXT(i); 402 XOR_AND_STORE(dst); 403 } 404 while (len) { 405 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; 406 len--; 407 } 408 } 409 410 411 void 412 rf_nWayXor9(src_rbs, dest_rb, len) 413 RF_ReconBuffer_t **src_rbs; 414 RF_ReconBuffer_t *dest_rb; 415 int len; 416 { 417 unsigned long *dst = (unsigned long *) dest_rb->buffer; 418 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 419 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 420 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 421 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 422 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 423 unsigned long *g = (unsigned long *) src_rbs[5]->buffer; 424 unsigned long *h = (unsigned long *) src_rbs[6]->buffer; 425 unsigned long *i = (unsigned long *) src_rbs[7]->buffer; 426 unsigned long *j = (unsigned long *) src_rbs[8]->buffer; 427 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 428 429 callcount[9]++; 430 /* align dest to cache line */ 431 while ((((unsigned long) dst) & 0x1f)) { 432 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; 433 len--; 434 } 435 while (len > 4) { 436 LOAD_FIRST(dst, b); 437 XOR_AND_LOAD_NEXT(c); 438 XOR_AND_LOAD_NEXT(d); 439 XOR_AND_LOAD_NEXT(e); 440 XOR_AND_LOAD_NEXT(f); 441 XOR_AND_LOAD_NEXT(g); 442 XOR_AND_LOAD_NEXT(h); 443 XOR_AND_LOAD_NEXT(i); 444 XOR_AND_LOAD_NEXT(j); 445 XOR_AND_STORE(dst); 446 } 447 while (len) { 448 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; 449 len--; 450 } 451 } 452