1 /* $NetBSD: rf_reconbuffer.c,v 1.7 2002/01/09 03:10:20 oster Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /*************************************************** 30 * 31 * rf_reconbuffer.c -- reconstruction buffer manager 32 * 33 ***************************************************/ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: rf_reconbuffer.c,v 1.7 2002/01/09 03:10:20 oster Exp $"); 37 38 #include "rf_raid.h" 39 #include "rf_reconbuffer.h" 40 #include "rf_acctrace.h" 41 #include "rf_etimer.h" 42 #include "rf_general.h" 43 #include "rf_debugprint.h" 44 #include "rf_revent.h" 45 #include "rf_reconutil.h" 46 #include "rf_nwayxor.h" 47 48 #ifdef DEBUG 49 50 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a) 51 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b) 52 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c) 53 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d) 54 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e) 55 56 #else /* DEBUG */ 57 58 #define Dprintf1(s,a) {} 59 #define Dprintf2(s,a,b) {} 60 #define Dprintf3(s,a,b,c) {} 61 #define Dprintf4(s,a,b,c,d) {} 62 #define Dprintf5(s,a,b,c,d,e) {} 63 64 #endif 65 66 /***************************************************************************** 67 * 68 * Submit a reconstruction buffer to the manager for XOR. We can only 69 * submit a buffer if (1) we can xor into an existing buffer, which 70 * means we don't have to acquire a new one, (2) we can acquire a 71 * floating recon buffer, or (3) the caller has indicated that we are 72 * allowed to keep the submitted buffer. 73 * 74 * Returns non-zero if and only if we were not able to submit. 75 * In this case, we append the current disk ID to the wait list on the 76 * indicated RU, so that it will be re-enabled when we acquire a buffer 77 * for this RU. 78 * 79 ****************************************************************************/ 80 81 /* 82 * nWayXorFuncs[i] is a pointer to a function that will xor "i" 83 * bufs into the accumulating sum. 84 */ 85 static RF_VoidFuncPtr nWayXorFuncs[] = { 86 NULL, 87 (RF_VoidFuncPtr) rf_nWayXor1, 88 (RF_VoidFuncPtr) rf_nWayXor2, 89 (RF_VoidFuncPtr) rf_nWayXor3, 90 (RF_VoidFuncPtr) rf_nWayXor4, 91 (RF_VoidFuncPtr) rf_nWayXor5, 92 (RF_VoidFuncPtr) rf_nWayXor6, 93 (RF_VoidFuncPtr) rf_nWayXor7, 94 (RF_VoidFuncPtr) rf_nWayXor8, 95 (RF_VoidFuncPtr) rf_nWayXor9 96 }; 97 98 int 99 rf_SubmitReconBuffer(rbuf, keep_it, use_committed) 100 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ 101 int keep_it; /* whether we can keep this buffer or we have 102 * to return it */ 103 int use_committed; /* whether to use a committed or an available 104 * recon buffer */ 105 { 106 RF_LayoutSW_t *lp; 107 int rc; 108 109 lp = rbuf->raidPtr->Layout.map; 110 rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed); 111 return (rc); 112 } 113 114 int 115 rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed) 116 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ 117 int keep_it; /* whether we can keep this buffer or we have 118 * to return it */ 119 int use_committed; /* whether to use a committed or an available 120 * recon buffer */ 121 { 122 RF_Raid_t *raidPtr = rbuf->raidPtr; 123 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 124 RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row]; 125 RF_ReconParityStripeStatus_t *pssPtr; 126 RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf 127 * pointers */ 128 caddr_t ta; /* temporary data buffer pointer */ 129 RF_CallbackDesc_t *cb, *p; 130 int retcode = 0, created = 0; 131 132 RF_Etimer_t timer; 133 134 /* makes no sense to have a submission from the failed disk */ 135 RF_ASSERT(rbuf); 136 RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); 137 138 Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n", 139 rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset); 140 141 RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); 142 143 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); 144 145 pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); 146 RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten 147 * an rbuf for it */ 148 149 /* check to see if enough buffers have accumulated to do an XOR. If 150 * so, there's no need to acquire a floating rbuf. Before we can do 151 * any XORing, we must have acquired a destination buffer. If we 152 * have, then we can go ahead and do the XOR if (1) including this 153 * buffer, enough bufs have accumulated, or (2) this is the last 154 * submission for this stripe. Otherwise, we have to go acquire a 155 * floating rbuf. */ 156 157 targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 158 if ((targetRbuf != NULL) && 159 ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) { 160 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */ 161 Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount); 162 RF_ETIMER_START(timer); 163 rf_MultiWayReconXor(raidPtr, pssPtr); 164 RF_ETIMER_STOP(timer); 165 RF_ETIMER_EVAL(timer); 166 raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer); 167 if (!keep_it) { 168 raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer); 169 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 170 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 171 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += 172 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 173 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 174 175 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); 176 } 177 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); 178 179 /* if use_committed is on, we _must_ consume a buffer off the 180 * committed list. */ 181 if (use_committed) { 182 t = reconCtrlPtr->committedRbufs; 183 RF_ASSERT(t); 184 reconCtrlPtr->committedRbufs = t->next; 185 rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t); 186 } 187 if (keep_it) { 188 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); 189 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 190 rf_FreeReconBuffer(rbuf); 191 return (retcode); 192 } 193 goto out; 194 } 195 /* set the value of "t", which we'll use as the rbuf from here on */ 196 if (keep_it) { 197 t = rbuf; 198 } else { 199 if (use_committed) { /* if a buffer has been committed to 200 * us, use it */ 201 t = reconCtrlPtr->committedRbufs; 202 RF_ASSERT(t); 203 reconCtrlPtr->committedRbufs = t->next; 204 t->next = NULL; 205 } else 206 if (reconCtrlPtr->floatingRbufs) { 207 t = reconCtrlPtr->floatingRbufs; 208 reconCtrlPtr->floatingRbufs = t->next; 209 t->next = NULL; 210 } 211 } 212 213 /* If we weren't able to acquire a buffer, append to the end of the 214 * buf list in the recon ctrl struct. */ 215 if (!t) { 216 RF_ASSERT(!keep_it && !use_committed); 217 Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col); 218 219 raidPtr->procsInBufWait++; 220 if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) { 221 printf("Buffer wait deadlock detected. Exiting.\n"); 222 rf_PrintPSStatusTable(raidPtr, rbuf->row); 223 RF_PANIC(); 224 } 225 pssPtr->flags |= RF_PSS_BUFFERWAIT; 226 cb = rf_AllocCallbackDesc(); /* append to buf wait list in 227 * recon ctrl structure */ 228 cb->row = rbuf->row; 229 cb->col = rbuf->col; 230 cb->callbackArg.v = rbuf->parityStripeID; 231 cb->callbackArg2.v = rbuf->which_ru; 232 cb->next = NULL; 233 if (!reconCtrlPtr->bufferWaitList) 234 reconCtrlPtr->bufferWaitList = cb; 235 else { /* might want to maintain head/tail pointers 236 * here rather than search for end of list */ 237 for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); 238 p->next = cb; 239 } 240 retcode = 1; 241 goto out; 242 } 243 Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col); 244 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 245 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 246 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += 247 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 248 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 249 250 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); 251 252 /* initialize the buffer */ 253 if (t != rbuf) { 254 t->row = rbuf->row; 255 t->col = reconCtrlPtr->fcol; 256 t->parityStripeID = rbuf->parityStripeID; 257 t->which_ru = rbuf->which_ru; 258 t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; 259 t->spRow = rbuf->spRow; 260 t->spCol = rbuf->spCol; 261 t->spOffset = rbuf->spOffset; 262 263 ta = t->buffer; 264 t->buffer = rbuf->buffer; 265 rbuf->buffer = ta; /* swap buffers */ 266 } 267 /* the first installation always gets installed as the destination 268 * buffer. subsequent installations get stacked up to allow for 269 * multi-way XOR */ 270 if (!pssPtr->rbuf) { 271 pssPtr->rbuf = t; 272 t->count = 1; 273 } else 274 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */ 275 276 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if 277 * G=2 */ 278 279 out: 280 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); 281 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 282 return (retcode); 283 } 284 285 int 286 rf_MultiWayReconXor(raidPtr, pssPtr) 287 RF_Raid_t *raidPtr; 288 RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this 289 * parity stripe */ 290 { 291 int i, numBufs = pssPtr->xorBufCount; 292 int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU); 293 RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor; 294 RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 295 296 RF_ASSERT(pssPtr->rbuf != NULL); 297 RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS); 298 #ifdef _KERNEL 299 #ifndef __NetBSD__ 300 thread_block(); /* yield the processor before doing a big XOR */ 301 #endif 302 #endif /* _KERNEL */ 303 /* 304 * XXX 305 * 306 * What if more than 9 bufs? 307 */ 308 nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long)); 309 310 /* release all the reconstruction buffers except the last one, which 311 * belongs to the disk whose submission caused this XOR to take place */ 312 for (i = 0; i < numBufs - 1; i++) { 313 if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) 314 rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]); 315 else 316 if (rbufs[i]->type == RF_RBUF_TYPE_FORCED) 317 rf_FreeReconBuffer(rbufs[i]); 318 else 319 RF_ASSERT(0); 320 } 321 targetRbuf->count += pssPtr->xorBufCount; 322 pssPtr->xorBufCount = 0; 323 return (0); 324 } 325 /* removes one full buffer from one of the full-buffer lists and returns it. 326 * 327 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY. 328 */ 329 RF_ReconBuffer_t * 330 rf_GetFullReconBuffer(reconCtrlPtr) 331 RF_ReconCtrl_t *reconCtrlPtr; 332 { 333 RF_ReconBuffer_t *p; 334 335 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); 336 337 if ((p = reconCtrlPtr->priorityList) != NULL) { 338 reconCtrlPtr->priorityList = p->next; 339 p->next = NULL; 340 goto out; 341 } 342 if ((p = reconCtrlPtr->fullBufferList) != NULL) { 343 reconCtrlPtr->fullBufferList = p->next; 344 p->next = NULL; 345 goto out; 346 } 347 out: 348 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 349 return (p); 350 } 351 352 353 /* if the reconstruction buffer is full, move it to the full list, 354 * which is maintained sorted by failed disk sector offset 355 * 356 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. */ 357 int 358 rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol) 359 RF_Raid_t *raidPtr; 360 RF_ReconCtrl_t *reconCtrl; 361 RF_ReconParityStripeStatus_t *pssPtr; 362 int numDataCol; 363 { 364 RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 365 366 if (rbuf->count == numDataCol) { 367 raidPtr->numFullReconBuffers++; 368 Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n", 369 (long) rbuf->parityStripeID, rbuf->which_ru); 370 if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) { 371 Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n", 372 (long) rbuf->parityStripeID, rbuf->which_ru); 373 rbuf->next = reconCtrl->fullBufferList; 374 reconCtrl->fullBufferList = rbuf; 375 } else { 376 for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next); 377 rbuf->next = p; 378 pt->next = rbuf; 379 Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n", 380 (long) rbuf->parityStripeID, rbuf->which_ru); 381 } 382 #if 0 383 pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like 384 * to be able to find 385 * this rbuf while it's 386 * awaiting write */ 387 #else 388 rbuf->pssPtr = pssPtr; 389 #endif 390 pssPtr->rbuf = NULL; 391 rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY); 392 } 393 return (0); 394 } 395 396 397 /* release a floating recon buffer for someone else to use. 398 * assumes the rb_mutex is LOCKED at entry 399 */ 400 void 401 rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf) 402 RF_Raid_t *raidPtr; 403 RF_RowCol_t row; 404 RF_ReconBuffer_t *rbuf; 405 { 406 RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row]; 407 RF_CallbackDesc_t *cb; 408 409 Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n", 410 (long) rbuf->parityStripeID, rbuf->which_ru); 411 412 /* if anyone is waiting on buffers, wake one of them up. They will 413 * subsequently wake up anyone else waiting on their RU */ 414 if (rcPtr->bufferWaitList) { 415 rbuf->next = rcPtr->committedRbufs; 416 rcPtr->committedRbufs = rbuf; 417 cb = rcPtr->bufferWaitList; 418 rcPtr->bufferWaitList = cb->next; 419 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've 420 * committed a buffer */ 421 rf_FreeCallbackDesc(cb); 422 raidPtr->procsInBufWait--; 423 } else { 424 rbuf->next = rcPtr->floatingRbufs; 425 rcPtr->floatingRbufs = rbuf; 426 } 427 } 428 /* release any disk that is waiting on a buffer for the indicated RU. 429 * assumes the rb_mutex is LOCKED at entry 430 */ 431 void 432 rf_ReleaseBufferWaiters(raidPtr, pssPtr) 433 RF_Raid_t *raidPtr; 434 RF_ReconParityStripeStatus_t *pssPtr; 435 { 436 RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList; 437 438 Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n", 439 (long) pssPtr->parityStripeID, pssPtr->which_ru); 440 pssPtr->flags &= ~RF_PSS_BUFFERWAIT; 441 while (cb) { 442 cb1 = cb->next; 443 cb->next = NULL; 444 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR); /* arg==0 => we haven't 445 * committed a buffer */ 446 rf_FreeCallbackDesc(cb); 447 cb = cb1; 448 } 449 pssPtr->bufWaitList = NULL; 450 } 451 /* when reconstruction is forced on an RU, there may be some disks waiting to 452 * acquire a buffer for that RU. Since we allocate a new buffer as part of 453 * the forced-reconstruction process, we no longer have to wait for any 454 * buffers, so we wakeup any waiter that we find in the bufferWaitList 455 * 456 * assumes the rb_mutex is LOCKED at entry 457 */ 458 void 459 rf_ReleaseBufferWaiter(rcPtr, rbuf) 460 RF_ReconCtrl_t *rcPtr; 461 RF_ReconBuffer_t *rbuf; 462 { 463 RF_CallbackDesc_t *cb, *cbt; 464 465 for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb = cb->next) { 466 if ((cb->callbackArg.v == rbuf->parityStripeID) && (cb->callbackArg2.v == rbuf->which_ru)) { 467 Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col); 468 if (cbt) 469 cbt->next = cb->next; 470 else 471 rcPtr->bufferWaitList = cb->next; 472 rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); /* arg==0 => no 473 * committed buffer */ 474 rf_FreeCallbackDesc(cb); 475 return; 476 } 477 } 478 } 479