1 /* $NetBSD: rf_reconbuffer.c,v 1.24 2007/03/04 06:02:39 christos Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /*************************************************** 30 * 31 * rf_reconbuffer.c -- reconstruction buffer manager 32 * 33 ***************************************************/ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: rf_reconbuffer.c,v 1.24 2007/03/04 06:02:39 christos Exp $"); 37 38 #include "rf_raid.h" 39 #include "rf_reconbuffer.h" 40 #include "rf_acctrace.h" 41 #include "rf_etimer.h" 42 #include "rf_general.h" 43 #include "rf_revent.h" 44 #include "rf_reconutil.h" 45 #include "rf_nwayxor.h" 46 47 #ifdef DEBUG 48 49 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a) 50 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b) 51 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c) 52 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d) 53 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e) 54 55 #else /* DEBUG */ 56 57 #define Dprintf1(s,a) {} 58 #define Dprintf2(s,a,b) {} 59 #define Dprintf3(s,a,b,c) {} 60 #define Dprintf4(s,a,b,c,d) {} 61 #define Dprintf5(s,a,b,c,d,e) {} 62 63 #endif 64 65 /***************************************************************************** 66 * 67 * Submit a reconstruction buffer to the manager for XOR. We can only 68 * submit a buffer if (1) we can xor into an existing buffer, which 69 * means we don't have to acquire a new one, (2) we can acquire a 70 * floating recon buffer, or (3) the caller has indicated that we are 71 * allowed to keep the submitted buffer. 72 * 73 * Returns non-zero if and only if we were not able to submit. 74 * In this case, we append the current disk ID to the wait list on the 75 * indicated RU, so that it will be re-enabled when we acquire a buffer 76 * for this RU. 77 * 78 ****************************************************************************/ 79 80 /* 81 * nWayXorFuncs[i] is a pointer to a function that will xor "i" 82 * bufs into the accumulating sum. 83 */ 84 static const RF_VoidFuncPtr nWayXorFuncs[] = { 85 NULL, 86 (RF_VoidFuncPtr) rf_nWayXor1, 87 (RF_VoidFuncPtr) rf_nWayXor2, 88 (RF_VoidFuncPtr) rf_nWayXor3, 89 (RF_VoidFuncPtr) rf_nWayXor4, 90 (RF_VoidFuncPtr) rf_nWayXor5, 91 (RF_VoidFuncPtr) rf_nWayXor6, 92 (RF_VoidFuncPtr) rf_nWayXor7, 93 (RF_VoidFuncPtr) rf_nWayXor8, 94 (RF_VoidFuncPtr) rf_nWayXor9 95 }; 96 97 /* 98 * rbuf - the recon buffer to submit 99 * keep_it - whether we can keep this buffer or we have to return it 100 * use_committed - whether to use a committed or an available recon buffer 101 */ 102 int 103 rf_SubmitReconBuffer(RF_ReconBuffer_t *rbuf, int keep_it, int use_committed) 104 { 105 const RF_LayoutSW_t *lp; 106 int rc; 107 108 lp = rbuf->raidPtr->Layout.map; 109 rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed); 110 return (rc); 111 } 112 113 /* 114 * rbuf - the recon buffer to submit 115 * keep_it - whether we can keep this buffer or we have to return it 116 * use_committed - whether to use a committed or an available recon buffer 117 */ 118 int 119 rf_SubmitReconBufferBasic(RF_ReconBuffer_t *rbuf, int keep_it, 120 int use_committed) 121 { 122 RF_Raid_t *raidPtr = rbuf->raidPtr; 123 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 124 RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl; 125 RF_ReconParityStripeStatus_t *pssPtr; 126 RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf 127 * pointers */ 128 void *ta; /* temporary data buffer pointer */ 129 RF_CallbackDesc_t *cb, *p; 130 int retcode = 0; 131 132 RF_Etimer_t timer; 133 134 /* makes no sense to have a submission from the failed disk */ 135 RF_ASSERT(rbuf); 136 RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); 137 138 Dprintf4("RECON: submission by col %d for psid %ld ru %d (failed offset %ld)\n", 139 rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset); 140 141 RF_LOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID); 142 143 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); 144 while(reconCtrlPtr->rb_lock) { 145 ltsleep(&reconCtrlPtr->rb_lock, PRIBIO, "reconctlcnmhs", 0, &reconCtrlPtr->rb_mutex); 146 } 147 reconCtrlPtr->rb_lock = 1; 148 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 149 150 pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, NULL); 151 RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten 152 * an rbuf for it */ 153 154 /* check to see if enough buffers have accumulated to do an XOR. If 155 * so, there's no need to acquire a floating rbuf. Before we can do 156 * any XORing, we must have acquired a destination buffer. If we 157 * have, then we can go ahead and do the XOR if (1) including this 158 * buffer, enough bufs have accumulated, or (2) this is the last 159 * submission for this stripe. Otherwise, we have to go acquire a 160 * floating rbuf. */ 161 162 targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 163 if ((targetRbuf != NULL) && 164 ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) { 165 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */ 166 Dprintf2("RECON: col %d invoking a %d-way XOR\n", rbuf->col, pssPtr->xorBufCount); 167 RF_ETIMER_START(timer); 168 rf_MultiWayReconXor(raidPtr, pssPtr); 169 RF_ETIMER_STOP(timer); 170 RF_ETIMER_EVAL(timer); 171 raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer); 172 if (!keep_it) { 173 #if RF_ACC_TRACE > 0 174 raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer); 175 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 176 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 177 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += 178 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 179 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 180 181 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); 182 #endif 183 } 184 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); 185 186 /* if use_committed is on, we _must_ consume a buffer off the 187 * committed list. */ 188 if (use_committed) { 189 t = reconCtrlPtr->committedRbufs; 190 RF_ASSERT(t); 191 reconCtrlPtr->committedRbufs = t->next; 192 rf_ReleaseFloatingReconBuffer(raidPtr, t); 193 } 194 if (keep_it) { 195 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID); 196 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); 197 reconCtrlPtr->rb_lock = 0; 198 wakeup(&reconCtrlPtr->rb_lock); 199 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 200 rf_FreeReconBuffer(rbuf); 201 return (retcode); 202 } 203 goto out; 204 } 205 /* set the value of "t", which we'll use as the rbuf from here on */ 206 if (keep_it) { 207 t = rbuf; 208 } else { 209 if (use_committed) { /* if a buffer has been committed to 210 * us, use it */ 211 t = reconCtrlPtr->committedRbufs; 212 RF_ASSERT(t); 213 reconCtrlPtr->committedRbufs = t->next; 214 t->next = NULL; 215 } else 216 if (reconCtrlPtr->floatingRbufs) { 217 t = reconCtrlPtr->floatingRbufs; 218 reconCtrlPtr->floatingRbufs = t->next; 219 t->next = NULL; 220 } 221 } 222 223 /* If we weren't able to acquire a buffer, append to the end of the 224 * buf list in the recon ctrl struct. */ 225 if (!t) { 226 RF_ASSERT(!keep_it && !use_committed); 227 Dprintf1("RECON: col %d failed to acquire floating rbuf\n", rbuf->col); 228 229 raidPtr->procsInBufWait++; 230 if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) { 231 printf("Buffer wait deadlock detected. Exiting.\n"); 232 rf_PrintPSStatusTable(raidPtr); 233 RF_PANIC(); 234 } 235 pssPtr->flags |= RF_PSS_BUFFERWAIT; 236 cb = rf_AllocCallbackDesc(); /* append to buf wait list in 237 * recon ctrl structure */ 238 cb->col = rbuf->col; 239 cb->callbackArg.v = rbuf->parityStripeID; 240 cb->next = NULL; 241 if (!reconCtrlPtr->bufferWaitList) 242 reconCtrlPtr->bufferWaitList = cb; 243 else { /* might want to maintain head/tail pointers 244 * here rather than search for end of list */ 245 for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); 246 p->next = cb; 247 } 248 retcode = 1; 249 goto out; 250 } 251 Dprintf1("RECON: col %d acquired rbuf\n", rbuf->col); 252 #if RF_ACC_TRACE > 0 253 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 254 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 255 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += 256 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 257 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 258 259 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); 260 #endif 261 262 /* initialize the buffer */ 263 if (t != rbuf) { 264 t->col = reconCtrlPtr->fcol; 265 t->parityStripeID = rbuf->parityStripeID; 266 t->which_ru = rbuf->which_ru; 267 t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; 268 t->spCol = rbuf->spCol; 269 t->spOffset = rbuf->spOffset; 270 271 ta = t->buffer; 272 t->buffer = rbuf->buffer; 273 rbuf->buffer = ta; /* swap buffers */ 274 } 275 /* the first installation always gets installed as the destination 276 * buffer. subsequent installations get stacked up to allow for 277 * multi-way XOR */ 278 if (!pssPtr->rbuf) { 279 pssPtr->rbuf = t; 280 t->count = 1; 281 } else 282 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */ 283 284 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if 285 * G=2 */ 286 287 out: 288 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID); 289 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); 290 reconCtrlPtr->rb_lock = 0; 291 wakeup(&reconCtrlPtr->rb_lock); 292 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 293 return (retcode); 294 } 295 /* pssPtr - the pss descriptor for this parity stripe */ 296 int 297 rf_MultiWayReconXor(RF_Raid_t *raidPtr, RF_ReconParityStripeStatus_t *pssPtr) 298 { 299 int i, numBufs = pssPtr->xorBufCount; 300 int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU); 301 RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor; 302 RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 303 304 RF_ASSERT(pssPtr->rbuf != NULL); 305 RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS); 306 #ifdef _KERNEL 307 #ifndef __NetBSD__ 308 thread_block(); /* yield the processor before doing a big XOR */ 309 #endif 310 #endif /* _KERNEL */ 311 /* 312 * XXX 313 * 314 * What if more than 9 bufs? 315 */ 316 nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long)); 317 318 /* release all the reconstruction buffers except the last one, which 319 * belongs to the disk whose submission caused this XOR to take place */ 320 for (i = 0; i < numBufs - 1; i++) { 321 if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) 322 rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]); 323 else 324 if (rbufs[i]->type == RF_RBUF_TYPE_FORCED) 325 rf_FreeReconBuffer(rbufs[i]); 326 else 327 RF_ASSERT(0); 328 } 329 targetRbuf->count += pssPtr->xorBufCount; 330 pssPtr->xorBufCount = 0; 331 return (0); 332 } 333 /* removes one full buffer from one of the full-buffer lists and returns it. 334 * 335 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY. 336 */ 337 RF_ReconBuffer_t * 338 rf_GetFullReconBuffer(RF_ReconCtrl_t *reconCtrlPtr) 339 { 340 RF_ReconBuffer_t *p; 341 342 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); 343 while(reconCtrlPtr->rb_lock) { 344 ltsleep(&reconCtrlPtr->rb_lock, PRIBIO, "reconctlcnmhs", 0, &reconCtrlPtr->rb_mutex); 345 } 346 reconCtrlPtr->rb_lock = 1; 347 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 348 349 if ((p = reconCtrlPtr->fullBufferList) != NULL) { 350 reconCtrlPtr->fullBufferList = p->next; 351 p->next = NULL; 352 } 353 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); 354 reconCtrlPtr->rb_lock = 0; 355 wakeup(&reconCtrlPtr->rb_lock); 356 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 357 return (p); 358 } 359 360 361 /* if the reconstruction buffer is full, move it to the full list, 362 * which is maintained sorted by failed disk sector offset 363 * 364 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. */ 365 int 366 rf_CheckForFullRbuf(RF_Raid_t *raidPtr, RF_ReconCtrl_t *reconCtrl, 367 RF_ReconParityStripeStatus_t *pssPtr, int numDataCol) 368 { 369 RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 370 371 if (rbuf->count == numDataCol) { 372 raidPtr->numFullReconBuffers++; 373 Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n", 374 (long) rbuf->parityStripeID, rbuf->which_ru); 375 if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) { 376 Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n", 377 (long) rbuf->parityStripeID, rbuf->which_ru); 378 rbuf->next = reconCtrl->fullBufferList; 379 reconCtrl->fullBufferList = rbuf; 380 } else { 381 for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next); 382 rbuf->next = p; 383 pt->next = rbuf; 384 Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n", 385 (long) rbuf->parityStripeID, rbuf->which_ru); 386 } 387 rbuf->pssPtr = pssPtr; 388 pssPtr->rbuf = NULL; 389 rf_CauseReconEvent(raidPtr, rbuf->col, NULL, RF_REVENT_BUFREADY); 390 } 391 return (0); 392 } 393 394 395 /* release a floating recon buffer for someone else to use. 396 * assumes the rb_mutex is LOCKED at entry 397 */ 398 void 399 rf_ReleaseFloatingReconBuffer(RF_Raid_t *raidPtr, RF_ReconBuffer_t *rbuf) 400 { 401 RF_ReconCtrl_t *rcPtr = raidPtr->reconControl; 402 RF_CallbackDesc_t *cb; 403 404 Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n", 405 (long) rbuf->parityStripeID, rbuf->which_ru); 406 407 /* if anyone is waiting on buffers, wake one of them up. They will 408 * subsequently wake up anyone else waiting on their RU */ 409 if (rcPtr->bufferWaitList) { 410 rbuf->next = rcPtr->committedRbufs; 411 rcPtr->committedRbufs = rbuf; 412 cb = rcPtr->bufferWaitList; 413 rcPtr->bufferWaitList = cb->next; 414 rf_CauseReconEvent(raidPtr, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've 415 * committed a buffer */ 416 rf_FreeCallbackDesc(cb); 417 raidPtr->procsInBufWait--; 418 } else { 419 rbuf->next = rcPtr->floatingRbufs; 420 rcPtr->floatingRbufs = rbuf; 421 } 422 } 423