1 /* $NetBSD: rf_dagfuncs.c,v 1.8 2001/11/13 07:11:13 lukem Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /* 30 * dagfuncs.c -- DAG node execution routines 31 * 32 * Rules: 33 * 1. Every DAG execution function must eventually cause node->status to 34 * get set to "good" or "bad", and "FinishNode" to be called. In the 35 * case of nodes that complete immediately (xor, NullNodeFunc, etc), 36 * the node execution function can do these two things directly. In 37 * the case of nodes that have to wait for some event (a disk read to 38 * complete, a lock to be released, etc) to occur before they can 39 * complete, this is typically achieved by having whatever module 40 * is doing the operation call GenericWakeupFunc upon completion. 41 * 2. DAG execution functions should check the status in the DAG header 42 * and NOP out their operations if the status is not "enable". However, 43 * execution functions that release resources must be sure to release 44 * them even when they NOP out the function that would use them. 45 * Functions that acquire resources should go ahead and acquire them 46 * even when they NOP, so that a downstream release node will not have 47 * to check to find out whether or not the acquire was suppressed. 48 */ 49 50 #include <sys/cdefs.h> 51 __KERNEL_RCSID(0, "$NetBSD: rf_dagfuncs.c,v 1.8 2001/11/13 07:11:13 lukem Exp $"); 52 53 #include <sys/param.h> 54 #include <sys/ioctl.h> 55 56 #include "rf_archs.h" 57 #include "rf_raid.h" 58 #include "rf_dag.h" 59 #include "rf_layout.h" 60 #include "rf_etimer.h" 61 #include "rf_acctrace.h" 62 #include "rf_diskqueue.h" 63 #include "rf_dagfuncs.h" 64 #include "rf_general.h" 65 #include "rf_engine.h" 66 #include "rf_dagutils.h" 67 68 #include "rf_kintf.h" 69 70 #if RF_INCLUDE_PARITYLOGGING > 0 71 #include "rf_paritylog.h" 72 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 73 74 int (*rf_DiskReadFunc) (RF_DagNode_t *); 75 int (*rf_DiskWriteFunc) (RF_DagNode_t *); 76 int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); 77 int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); 78 int (*rf_DiskUnlockFunc) (RF_DagNode_t *); 79 int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); 80 int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); 81 int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); 82 int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); 83 84 /***************************************************************************************** 85 * main (only) configuration routine for this module 86 ****************************************************************************************/ 87 int 88 rf_ConfigureDAGFuncs(listp) 89 RF_ShutdownList_t **listp; 90 { 91 RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2)); 92 rf_DiskReadFunc = rf_DiskReadFuncForThreads; 93 rf_DiskReadUndoFunc = rf_DiskUndoFunc; 94 rf_DiskWriteFunc = rf_DiskWriteFuncForThreads; 95 rf_DiskWriteUndoFunc = rf_DiskUndoFunc; 96 rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads; 97 rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc; 98 rf_RegularXorUndoFunc = rf_NullNodeUndoFunc; 99 rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc; 100 rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc; 101 return (0); 102 } 103 104 105 106 /***************************************************************************************** 107 * the execution function associated with a terminate node 108 ****************************************************************************************/ 109 int 110 rf_TerminateFunc(node) 111 RF_DagNode_t *node; 112 { 113 RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes); 114 node->status = rf_good; 115 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 116 } 117 118 int 119 rf_TerminateUndoFunc(node) 120 RF_DagNode_t *node; 121 { 122 return (0); 123 } 124 125 126 /***************************************************************************************** 127 * execution functions associated with a mirror node 128 * 129 * parameters: 130 * 131 * 0 - physical disk addres of data 132 * 1 - buffer for holding read data 133 * 2 - parity stripe ID 134 * 3 - flags 135 * 4 - physical disk address of mirror (parity) 136 * 137 ****************************************************************************************/ 138 139 int 140 rf_DiskReadMirrorIdleFunc(node) 141 RF_DagNode_t *node; 142 { 143 /* select the mirror copy with the shortest queue and fill in node 144 * parameters with physical disk address */ 145 146 rf_SelectMirrorDiskIdle(node); 147 return (rf_DiskReadFunc(node)); 148 } 149 150 int 151 rf_DiskReadMirrorPartitionFunc(node) 152 RF_DagNode_t *node; 153 { 154 /* select the mirror copy with the shortest queue and fill in node 155 * parameters with physical disk address */ 156 157 rf_SelectMirrorDiskPartition(node); 158 return (rf_DiskReadFunc(node)); 159 } 160 161 int 162 rf_DiskReadMirrorUndoFunc(node) 163 RF_DagNode_t *node; 164 { 165 return (0); 166 } 167 168 169 170 #if RF_INCLUDE_PARITYLOGGING > 0 171 /***************************************************************************************** 172 * the execution function associated with a parity log update node 173 ****************************************************************************************/ 174 int 175 rf_ParityLogUpdateFunc(node) 176 RF_DagNode_t *node; 177 { 178 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 179 caddr_t buf = (caddr_t) node->params[1].p; 180 RF_ParityLogData_t *logData; 181 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 182 RF_Etimer_t timer; 183 184 if (node->dagHdr->status == rf_enable) { 185 RF_ETIMER_START(timer); 186 logData = rf_CreateParityLogData(RF_UPDATE, pda, buf, 187 (RF_Raid_t *) (node->dagHdr->raidPtr), 188 node->wakeFunc, (void *) node, 189 node->dagHdr->tracerec, timer); 190 if (logData) 191 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); 192 else { 193 RF_ETIMER_STOP(timer); 194 RF_ETIMER_EVAL(timer); 195 tracerec->plog_us += RF_ETIMER_VAL_US(timer); 196 (node->wakeFunc) (node, ENOMEM); 197 } 198 } 199 return (0); 200 } 201 202 203 /***************************************************************************************** 204 * the execution function associated with a parity log overwrite node 205 ****************************************************************************************/ 206 int 207 rf_ParityLogOverwriteFunc(node) 208 RF_DagNode_t *node; 209 { 210 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 211 caddr_t buf = (caddr_t) node->params[1].p; 212 RF_ParityLogData_t *logData; 213 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 214 RF_Etimer_t timer; 215 216 if (node->dagHdr->status == rf_enable) { 217 RF_ETIMER_START(timer); 218 logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr), 219 node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer); 220 if (logData) 221 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); 222 else { 223 RF_ETIMER_STOP(timer); 224 RF_ETIMER_EVAL(timer); 225 tracerec->plog_us += RF_ETIMER_VAL_US(timer); 226 (node->wakeFunc) (node, ENOMEM); 227 } 228 } 229 return (0); 230 } 231 #else /* RF_INCLUDE_PARITYLOGGING > 0 */ 232 233 int 234 rf_ParityLogUpdateFunc(node) 235 RF_DagNode_t *node; 236 { 237 return (0); 238 } 239 int 240 rf_ParityLogOverwriteFunc(node) 241 RF_DagNode_t *node; 242 { 243 return (0); 244 } 245 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 246 247 int 248 rf_ParityLogUpdateUndoFunc(node) 249 RF_DagNode_t *node; 250 { 251 return (0); 252 } 253 254 int 255 rf_ParityLogOverwriteUndoFunc(node) 256 RF_DagNode_t *node; 257 { 258 return (0); 259 } 260 /***************************************************************************************** 261 * the execution function associated with a NOP node 262 ****************************************************************************************/ 263 int 264 rf_NullNodeFunc(node) 265 RF_DagNode_t *node; 266 { 267 node->status = rf_good; 268 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 269 } 270 271 int 272 rf_NullNodeUndoFunc(node) 273 RF_DagNode_t *node; 274 { 275 node->status = rf_undone; 276 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 277 } 278 279 280 /***************************************************************************************** 281 * the execution function associated with a disk-read node 282 ****************************************************************************************/ 283 int 284 rf_DiskReadFuncForThreads(node) 285 RF_DagNode_t *node; 286 { 287 RF_DiskQueueData_t *req; 288 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 289 caddr_t buf = (caddr_t) node->params[1].p; 290 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; 291 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); 292 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); 293 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); 294 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); 295 RF_DiskQueueDataFlags_t flags = 0; 296 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP; 297 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 298 void *b_proc = NULL; 299 300 if (node->dagHdr->bp) 301 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; 302 303 RF_ASSERT(!(lock && unlock)); 304 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; 305 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; 306 307 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, 308 buf, parityStripeID, which_ru, 309 (int (*) (void *, int)) node->wakeFunc, 310 node, NULL, node->dagHdr->tracerec, 311 (void *) (node->dagHdr->raidPtr), flags, b_proc); 312 if (!req) { 313 (node->wakeFunc) (node, ENOMEM); 314 } else { 315 node->dagFuncData = (void *) req; 316 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); 317 } 318 return (0); 319 } 320 321 322 /***************************************************************************************** 323 * the execution function associated with a disk-write node 324 ****************************************************************************************/ 325 int 326 rf_DiskWriteFuncForThreads(node) 327 RF_DagNode_t *node; 328 { 329 RF_DiskQueueData_t *req; 330 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 331 caddr_t buf = (caddr_t) node->params[1].p; 332 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; 333 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); 334 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); 335 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); 336 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); 337 RF_DiskQueueDataFlags_t flags = 0; 338 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP; 339 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 340 void *b_proc = NULL; 341 342 if (node->dagHdr->bp) 343 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; 344 345 /* normal processing (rollaway or forward recovery) begins here */ 346 RF_ASSERT(!(lock && unlock)); 347 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; 348 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; 349 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, 350 buf, parityStripeID, which_ru, 351 (int (*) (void *, int)) node->wakeFunc, 352 (void *) node, NULL, 353 node->dagHdr->tracerec, 354 (void *) (node->dagHdr->raidPtr), 355 flags, b_proc); 356 357 if (!req) { 358 (node->wakeFunc) (node, ENOMEM); 359 } else { 360 node->dagFuncData = (void *) req; 361 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); 362 } 363 364 return (0); 365 } 366 /***************************************************************************************** 367 * the undo function for disk nodes 368 * Note: this is not a proper undo of a write node, only locks are released. 369 * old data is not restored to disk! 370 ****************************************************************************************/ 371 int 372 rf_DiskUndoFunc(node) 373 RF_DagNode_t *node; 374 { 375 RF_DiskQueueData_t *req; 376 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 377 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 378 379 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 380 0L, 0, NULL, 0L, 0, 381 (int (*) (void *, int)) node->wakeFunc, 382 (void *) node, 383 NULL, node->dagHdr->tracerec, 384 (void *) (node->dagHdr->raidPtr), 385 RF_UNLOCK_DISK_QUEUE, NULL); 386 if (!req) 387 (node->wakeFunc) (node, ENOMEM); 388 else { 389 node->dagFuncData = (void *) req; 390 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); 391 } 392 393 return (0); 394 } 395 /***************************************************************************************** 396 * the execution function associated with an "unlock disk queue" node 397 ****************************************************************************************/ 398 int 399 rf_DiskUnlockFuncForThreads(node) 400 RF_DagNode_t *node; 401 { 402 RF_DiskQueueData_t *req; 403 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 404 RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 405 406 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 407 0L, 0, NULL, 0L, 0, 408 (int (*) (void *, int)) node->wakeFunc, 409 (void *) node, 410 NULL, node->dagHdr->tracerec, 411 (void *) (node->dagHdr->raidPtr), 412 RF_UNLOCK_DISK_QUEUE, NULL); 413 if (!req) 414 (node->wakeFunc) (node, ENOMEM); 415 else { 416 node->dagFuncData = (void *) req; 417 rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); 418 } 419 420 return (0); 421 } 422 /***************************************************************************************** 423 * Callback routine for DiskRead and DiskWrite nodes. When the disk op completes, 424 * the routine is called to set the node status and inform the execution engine that 425 * the node has fired. 426 ****************************************************************************************/ 427 int 428 rf_GenericWakeupFunc(node, status) 429 RF_DagNode_t *node; 430 int status; 431 { 432 switch (node->status) { 433 case rf_bwd1: 434 node->status = rf_bwd2; 435 if (node->dagFuncData) 436 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); 437 return (rf_DiskWriteFuncForThreads(node)); 438 break; 439 case rf_fired: 440 if (status) 441 node->status = rf_bad; 442 else 443 node->status = rf_good; 444 break; 445 case rf_recover: 446 /* probably should never reach this case */ 447 if (status) 448 node->status = rf_panic; 449 else 450 node->status = rf_undone; 451 break; 452 default: 453 printf("rf_GenericWakeupFunc:"); 454 printf("node->status is %d,", node->status); 455 printf("status is %d \n", status); 456 RF_PANIC(); 457 break; 458 } 459 if (node->dagFuncData) 460 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); 461 return (rf_FinishNode(node, RF_INTR_CONTEXT)); 462 } 463 464 465 /***************************************************************************************** 466 * there are three distinct types of xor nodes 467 * A "regular xor" is used in the fault-free case where the access spans a complete 468 * stripe unit. It assumes that the result buffer is one full stripe unit in size, 469 * and uses the stripe-unit-offset values that it computes from the PDAs to determine 470 * where within the stripe unit to XOR each argument buffer. 471 * 472 * A "simple xor" is used in the fault-free case where the access touches only a portion 473 * of one (or two, in some cases) stripe unit(s). It assumes that all the argument 474 * buffers are of the same size and have the same stripe unit offset. 475 * 476 * A "recovery xor" is used in the degraded-mode case. It's similar to the regular 477 * xor function except that it takes the failed PDA as an additional parameter, and 478 * uses it to determine what portions of the argument buffers need to be xor'd into 479 * the result buffer, and where in the result buffer they should go. 480 ****************************************************************************************/ 481 482 /* xor the params together and store the result in the result field. 483 * assume the result field points to a buffer that is the size of one SU, 484 * and use the pda params to determine where within the buffer to XOR 485 * the input buffers. 486 */ 487 int 488 rf_RegularXorFunc(node) 489 RF_DagNode_t *node; 490 { 491 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 492 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 493 RF_Etimer_t timer; 494 int i, retcode; 495 496 retcode = 0; 497 if (node->dagHdr->status == rf_enable) { 498 /* don't do the XOR if the input is the same as the output */ 499 RF_ETIMER_START(timer); 500 for (i = 0; i < node->numParams - 1; i += 2) 501 if (node->params[i + 1].p != node->results[0]) { 502 retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p, 503 (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp); 504 } 505 RF_ETIMER_STOP(timer); 506 RF_ETIMER_EVAL(timer); 507 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 508 } 509 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func 510 * explicitly since no 511 * I/O in this node */ 512 } 513 /* xor the inputs into the result buffer, ignoring placement issues */ 514 int 515 rf_SimpleXorFunc(node) 516 RF_DagNode_t *node; 517 { 518 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 519 int i, retcode = 0; 520 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 521 RF_Etimer_t timer; 522 523 if (node->dagHdr->status == rf_enable) { 524 RF_ETIMER_START(timer); 525 /* don't do the XOR if the input is the same as the output */ 526 for (i = 0; i < node->numParams - 1; i += 2) 527 if (node->params[i + 1].p != node->results[0]) { 528 retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0], 529 rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector), 530 (struct buf *) node->dagHdr->bp); 531 } 532 RF_ETIMER_STOP(timer); 533 RF_ETIMER_EVAL(timer); 534 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 535 } 536 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func 537 * explicitly since no 538 * I/O in this node */ 539 } 540 /* this xor is used by the degraded-mode dag functions to recover lost data. 541 * the second-to-last parameter is the PDA for the failed portion of the access. 542 * the code here looks at this PDA and assumes that the xor target buffer is 543 * equal in size to the number of sectors in the failed PDA. It then uses 544 * the other PDAs in the parameter list to determine where within the target 545 * buffer the corresponding data should be xored. 546 */ 547 int 548 rf_RecoveryXorFunc(node) 549 RF_DagNode_t *node; 550 { 551 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 552 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; 553 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; 554 int i, retcode = 0; 555 RF_PhysDiskAddr_t *pda; 556 int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); 557 char *srcbuf, *destbuf; 558 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 559 RF_Etimer_t timer; 560 561 if (node->dagHdr->status == rf_enable) { 562 RF_ETIMER_START(timer); 563 for (i = 0; i < node->numParams - 2; i += 2) 564 if (node->params[i + 1].p != node->results[0]) { 565 pda = (RF_PhysDiskAddr_t *) node->params[i].p; 566 srcbuf = (char *) node->params[i + 1].p; 567 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); 568 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); 569 retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp); 570 } 571 RF_ETIMER_STOP(timer); 572 RF_ETIMER_EVAL(timer); 573 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 574 } 575 return (rf_GenericWakeupFunc(node, retcode)); 576 } 577 /***************************************************************************************** 578 * The next three functions are utilities used by the above xor-execution functions. 579 ****************************************************************************************/ 580 581 582 /* 583 * this is just a glorified buffer xor. targbuf points to a buffer that is one full stripe unit 584 * in size. srcbuf points to a buffer that may be less than 1 SU, but never more. When the 585 * access described by pda is one SU in size (which by implication means it's SU-aligned), 586 * all that happens is (targbuf) <- (srcbuf ^ targbuf). When the access is less than one 587 * SU in size the XOR occurs on only the portion of targbuf identified in the pda. 588 */ 589 590 int 591 rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp) 592 RF_Raid_t *raidPtr; 593 RF_PhysDiskAddr_t *pda; 594 char *srcbuf; 595 char *targbuf; 596 void *bp; 597 { 598 char *targptr; 599 int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 600 int SUOffset = pda->startSector % sectPerSU; 601 int length, retcode = 0; 602 603 RF_ASSERT(pda->numSector <= sectPerSU); 604 605 targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset); 606 length = rf_RaidAddressToByte(raidPtr, pda->numSector); 607 retcode = rf_bxor(srcbuf, targptr, length, bp); 608 return (retcode); 609 } 610 /* it really should be the case that the buffer pointers (returned by malloc) 611 * are aligned to the natural word size of the machine, so this is the only 612 * case we optimize for. The length should always be a multiple of the sector 613 * size, so there should be no problem with leftover bytes at the end. 614 */ 615 int 616 rf_bxor(src, dest, len, bp) 617 char *src; 618 char *dest; 619 int len; 620 void *bp; 621 { 622 unsigned mask = sizeof(long) - 1, retcode = 0; 623 624 if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) { 625 retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT, bp); 626 } else { 627 RF_ASSERT(0); 628 } 629 return (retcode); 630 } 631 /* map a user buffer into kernel space, if necessary */ 632 #define REMAP_VA(_bp,x,y) (y) = (x) 633 634 /* When XORing in kernel mode, we need to map each user page to kernel space before we can access it. 635 * We don't want to assume anything about which input buffers are in kernel/user 636 * space, nor about their alignment, so in each loop we compute the maximum number 637 * of bytes that we can xor without crossing any page boundaries, and do only this many 638 * bytes before the next remap. 639 */ 640 int 641 rf_longword_bxor(src, dest, len, bp) 642 unsigned long *src; 643 unsigned long *dest; 644 int len; /* longwords */ 645 void *bp; 646 { 647 unsigned long *end = src + len; 648 unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ 649 unsigned long *pg_src, *pg_dest; /* per-page source/dest 650 * pointers */ 651 int longs_this_time;/* # longwords to xor in the current iteration */ 652 653 REMAP_VA(bp, src, pg_src); 654 REMAP_VA(bp, dest, pg_dest); 655 if (!pg_src || !pg_dest) 656 return (EFAULT); 657 658 while (len >= 4) { 659 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */ 660 src += longs_this_time; 661 dest += longs_this_time; 662 len -= longs_this_time; 663 while (longs_this_time >= 4) { 664 d0 = pg_dest[0]; 665 d1 = pg_dest[1]; 666 d2 = pg_dest[2]; 667 d3 = pg_dest[3]; 668 s0 = pg_src[0]; 669 s1 = pg_src[1]; 670 s2 = pg_src[2]; 671 s3 = pg_src[3]; 672 pg_dest[0] = d0 ^ s0; 673 pg_dest[1] = d1 ^ s1; 674 pg_dest[2] = d2 ^ s2; 675 pg_dest[3] = d3 ^ s3; 676 pg_src += 4; 677 pg_dest += 4; 678 longs_this_time -= 4; 679 } 680 while (longs_this_time > 0) { /* cannot cross any page 681 * boundaries here */ 682 *pg_dest++ ^= *pg_src++; 683 longs_this_time--; 684 } 685 686 /* either we're done, or we've reached a page boundary on one 687 * (or possibly both) of the pointers */ 688 if (len) { 689 if (RF_PAGE_ALIGNED(src)) 690 REMAP_VA(bp, src, pg_src); 691 if (RF_PAGE_ALIGNED(dest)) 692 REMAP_VA(bp, dest, pg_dest); 693 if (!pg_src || !pg_dest) 694 return (EFAULT); 695 } 696 } 697 while (src < end) { 698 *pg_dest++ ^= *pg_src++; 699 src++; 700 dest++; 701 len--; 702 if (RF_PAGE_ALIGNED(src)) 703 REMAP_VA(bp, src, pg_src); 704 if (RF_PAGE_ALIGNED(dest)) 705 REMAP_VA(bp, dest, pg_dest); 706 } 707 RF_ASSERT(len == 0); 708 return (0); 709 } 710 711 712 /* 713 dst = a ^ b ^ c; 714 a may equal dst 715 see comment above longword_bxor 716 */ 717 int 718 rf_longword_bxor3(dst, a, b, c, len, bp) 719 unsigned long *dst; 720 unsigned long *a; 721 unsigned long *b; 722 unsigned long *c; 723 int len; /* length in longwords */ 724 void *bp; 725 { 726 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 727 unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest 728 * pointers */ 729 int longs_this_time;/* # longs to xor in the current iteration */ 730 char dst_is_a = 0; 731 732 REMAP_VA(bp, a, pg_a); 733 REMAP_VA(bp, b, pg_b); 734 REMAP_VA(bp, c, pg_c); 735 if (a == dst) { 736 pg_dst = pg_a; 737 dst_is_a = 1; 738 } else { 739 REMAP_VA(bp, dst, pg_dst); 740 } 741 742 /* align dest to cache line. Can't cross a pg boundary on dst here. */ 743 while ((((unsigned long) pg_dst) & 0x1f)) { 744 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 745 dst++; 746 a++; 747 b++; 748 c++; 749 if (RF_PAGE_ALIGNED(a)) { 750 REMAP_VA(bp, a, pg_a); 751 if (!pg_a) 752 return (EFAULT); 753 } 754 if (RF_PAGE_ALIGNED(b)) { 755 REMAP_VA(bp, a, pg_b); 756 if (!pg_b) 757 return (EFAULT); 758 } 759 if (RF_PAGE_ALIGNED(c)) { 760 REMAP_VA(bp, a, pg_c); 761 if (!pg_c) 762 return (EFAULT); 763 } 764 len--; 765 } 766 767 while (len > 4) { 768 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT); 769 a += longs_this_time; 770 b += longs_this_time; 771 c += longs_this_time; 772 dst += longs_this_time; 773 len -= longs_this_time; 774 while (longs_this_time >= 4) { 775 a0 = pg_a[0]; 776 longs_this_time -= 4; 777 778 a1 = pg_a[1]; 779 a2 = pg_a[2]; 780 781 a3 = pg_a[3]; 782 pg_a += 4; 783 784 b0 = pg_b[0]; 785 b1 = pg_b[1]; 786 787 b2 = pg_b[2]; 788 b3 = pg_b[3]; 789 /* start dual issue */ 790 a0 ^= b0; 791 b0 = pg_c[0]; 792 793 pg_b += 4; 794 a1 ^= b1; 795 796 a2 ^= b2; 797 a3 ^= b3; 798 799 b1 = pg_c[1]; 800 a0 ^= b0; 801 802 b2 = pg_c[2]; 803 a1 ^= b1; 804 805 b3 = pg_c[3]; 806 a2 ^= b2; 807 808 pg_dst[0] = a0; 809 a3 ^= b3; 810 pg_dst[1] = a1; 811 pg_c += 4; 812 pg_dst[2] = a2; 813 pg_dst[3] = a3; 814 pg_dst += 4; 815 } 816 while (longs_this_time > 0) { /* cannot cross any page 817 * boundaries here */ 818 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 819 longs_this_time--; 820 } 821 822 if (len) { 823 if (RF_PAGE_ALIGNED(a)) { 824 REMAP_VA(bp, a, pg_a); 825 if (!pg_a) 826 return (EFAULT); 827 if (dst_is_a) 828 pg_dst = pg_a; 829 } 830 if (RF_PAGE_ALIGNED(b)) { 831 REMAP_VA(bp, b, pg_b); 832 if (!pg_b) 833 return (EFAULT); 834 } 835 if (RF_PAGE_ALIGNED(c)) { 836 REMAP_VA(bp, c, pg_c); 837 if (!pg_c) 838 return (EFAULT); 839 } 840 if (!dst_is_a) 841 if (RF_PAGE_ALIGNED(dst)) { 842 REMAP_VA(bp, dst, pg_dst); 843 if (!pg_dst) 844 return (EFAULT); 845 } 846 } 847 } 848 while (len) { 849 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 850 dst++; 851 a++; 852 b++; 853 c++; 854 if (RF_PAGE_ALIGNED(a)) { 855 REMAP_VA(bp, a, pg_a); 856 if (!pg_a) 857 return (EFAULT); 858 if (dst_is_a) 859 pg_dst = pg_a; 860 } 861 if (RF_PAGE_ALIGNED(b)) { 862 REMAP_VA(bp, b, pg_b); 863 if (!pg_b) 864 return (EFAULT); 865 } 866 if (RF_PAGE_ALIGNED(c)) { 867 REMAP_VA(bp, c, pg_c); 868 if (!pg_c) 869 return (EFAULT); 870 } 871 if (!dst_is_a) 872 if (RF_PAGE_ALIGNED(dst)) { 873 REMAP_VA(bp, dst, pg_dst); 874 if (!pg_dst) 875 return (EFAULT); 876 } 877 len--; 878 } 879 return (0); 880 } 881 882 int 883 rf_bxor3(dst, a, b, c, len, bp) 884 unsigned char *dst; 885 unsigned char *a; 886 unsigned char *b; 887 unsigned char *c; 888 unsigned long len; 889 void *bp; 890 { 891 RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0); 892 893 return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a, 894 (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp)); 895 } 896