1 /* $NetBSD: rf_chaindecluster.c,v 1.15 2006/11/16 01:33:23 christos Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Khalil Amiri 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /****************************************************************************** 30 * 31 * rf_chaindecluster.c -- implements chained declustering 32 * 33 *****************************************************************************/ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: rf_chaindecluster.c,v 1.15 2006/11/16 01:33:23 christos Exp $"); 37 38 #include "rf_archs.h" 39 40 #if (RF_INCLUDE_CHAINDECLUSTER > 0) 41 42 #include <dev/raidframe/raidframevar.h> 43 44 #include "rf_raid.h" 45 #include "rf_chaindecluster.h" 46 #include "rf_dag.h" 47 #include "rf_dagutils.h" 48 #include "rf_dagffrd.h" 49 #include "rf_dagffwr.h" 50 #include "rf_dagdegrd.h" 51 #include "rf_dagfuncs.h" 52 #include "rf_general.h" 53 #include "rf_utils.h" 54 55 typedef struct RF_ChaindeclusterConfigInfo_s { 56 RF_RowCol_t **stripeIdentifier; /* filled in at config time and used 57 * by IdentifyStripe */ 58 RF_StripeCount_t numSparingRegions; 59 RF_StripeCount_t stripeUnitsPerSparingRegion; 60 RF_SectorNum_t mirrorStripeOffset; 61 } RF_ChaindeclusterConfigInfo_t; 62 63 int 64 rf_ConfigureChainDecluster(RF_ShutdownList_t **listp, 65 RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) 66 { 67 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 68 RF_StripeCount_t num_used_stripeUnitsPerDisk; 69 RF_ChaindeclusterConfigInfo_t *info; 70 RF_RowCol_t i; 71 72 /* create a Chained Declustering configuration structure */ 73 RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList); 74 if (info == NULL) 75 return (ENOMEM); 76 layoutPtr->layoutSpecificInfo = (void *) info; 77 78 /* fill in the config structure. */ 79 info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2, raidPtr->cleanupList); 80 if (info->stripeIdentifier == NULL) 81 return (ENOMEM); 82 for (i = 0; i < raidPtr->numCol; i++) { 83 info->stripeIdentifier[i][0] = i % raidPtr->numCol; 84 info->stripeIdentifier[i][1] = (i + 1) % raidPtr->numCol; 85 } 86 87 /* fill in the remaining layout parameters */ 88 num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % 89 (2 * raidPtr->numCol - 2)); 90 info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol - 2); 91 info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); 92 info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol - 1); 93 layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; 94 layoutPtr->numDataCol = 1; 95 layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; 96 layoutPtr->numParityCol = 1; 97 98 layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; 99 100 raidPtr->sectorsPerDisk = 101 num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; 102 103 raidPtr->totalSectors = 104 (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; 105 106 layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; 107 108 return (0); 109 } 110 111 RF_ReconUnitCount_t 112 rf_GetNumSpareRUsChainDecluster(RF_Raid_t *raidPtr) 113 { 114 RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 115 116 /* 117 * The layout uses two stripe units per disk as spare within each 118 * sparing region. 119 */ 120 return (2 * info->numSparingRegions); 121 } 122 123 124 /* Maps to the primary copy of the data, i.e. the first mirror pair */ 125 void 126 rf_MapSectorChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, 127 RF_RowCol_t *col, RF_SectorNum_t *diskSector, 128 int remap) 129 { 130 RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 131 RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; 132 RF_SectorNum_t index_within_region, index_within_disk; 133 RF_StripeNum_t sparing_region_id; 134 int col_before_remap; 135 136 sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; 137 index_within_region = SUID % info->stripeUnitsPerSparingRegion; 138 index_within_disk = index_within_region / raidPtr->numCol; 139 col_before_remap = SUID % raidPtr->numCol; 140 141 if (!remap) { 142 *col = col_before_remap; 143 *diskSector = (index_within_disk + ((raidPtr->numCol - 1) * sparing_region_id)) * 144 raidPtr->Layout.sectorsPerStripeUnit; 145 *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 146 } else { 147 /* remap sector to spare space... */ 148 *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; 149 *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; 150 *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 151 index_within_disk = index_within_region / raidPtr->numCol; 152 if (index_within_disk < col_before_remap) 153 *col = index_within_disk; 154 else 155 if (index_within_disk == raidPtr->numCol - 2) { 156 *col = (col_before_remap + raidPtr->numCol - 1) % raidPtr->numCol; 157 *diskSector += raidPtr->Layout.sectorsPerStripeUnit; 158 } else 159 *col = (index_within_disk + 2) % raidPtr->numCol; 160 } 161 162 } 163 164 165 166 /* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained 167 in the next disk (mod numCol) after the disk containing the primary copy. 168 The offset into the disk is one-half disk down */ 169 void 170 rf_MapParityChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, 171 RF_RowCol_t *col, RF_SectorNum_t *diskSector, 172 int remap) 173 { 174 RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 175 RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; 176 RF_SectorNum_t index_within_region, index_within_disk; 177 RF_StripeNum_t sparing_region_id; 178 int col_before_remap; 179 180 if (!remap) { 181 *col = SUID % raidPtr->numCol; 182 *col = (*col + 1) % raidPtr->numCol; 183 *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; 184 *diskSector += (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; 185 *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 186 } else { 187 /* remap parity to spare space ... */ 188 sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; 189 index_within_region = SUID % info->stripeUnitsPerSparingRegion; 190 index_within_disk = index_within_region / raidPtr->numCol; 191 *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; 192 *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; 193 *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 194 col_before_remap = SUID % raidPtr->numCol; 195 if (index_within_disk < col_before_remap) 196 *col = index_within_disk; 197 else 198 if (index_within_disk == raidPtr->numCol - 2) { 199 *col = (col_before_remap + 2) % raidPtr->numCol; 200 *diskSector -= raidPtr->Layout.sectorsPerStripeUnit; 201 } else 202 *col = (index_within_disk + 2) % raidPtr->numCol; 203 } 204 205 } 206 207 void 208 rf_IdentifyStripeChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, 209 RF_RowCol_t **diskids) 210 { 211 RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 212 RF_StripeNum_t SUID; 213 RF_RowCol_t col; 214 215 SUID = addr / raidPtr->Layout.sectorsPerStripeUnit; 216 col = SUID % raidPtr->numCol; 217 *diskids = info->stripeIdentifier[col]; 218 } 219 220 void 221 rf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t *layoutPtr, 222 RF_StripeNum_t stripeID, 223 RF_StripeNum_t *psID, 224 RF_ReconUnitNum_t *which_ru) 225 { 226 *which_ru = 0; 227 *psID = stripeID; 228 } 229 /****************************************************************************** 230 * select a graph to perform a single-stripe access 231 * 232 * Parameters: raidPtr - description of the physical array 233 * type - type of operation (read or write) requested 234 * asmap - logical & physical addresses for this access 235 * createFunc - function to use to create the graph (return value) 236 *****************************************************************************/ 237 238 void 239 rf_RAIDCDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, 240 RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc) 241 #if 0 242 void (**createFunc) (RF_Raid_t *, RF_AccessStripeMap_t *, 243 RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, 244 RF_AllocListElem_t *) 245 #endif 246 { 247 RF_ASSERT(RF_IO_IS_R_OR_W(type)); 248 249 if (asmap->numDataFailed + asmap->numParityFailed > 1) { 250 RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); 251 *createFunc = NULL; 252 return; 253 } 254 *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; 255 256 if (type == RF_IO_TYPE_READ) { 257 if ((raidPtr->status == rf_rs_degraded) || (raidPtr->status == rf_rs_reconstructing)) 258 *createFunc = (RF_VoidFuncPtr) rf_CreateRaidCDegradedReadDAG; /* array status is 259 * degraded, implement 260 * workload shifting */ 261 else 262 *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG; /* array status not 263 * degraded, so use 264 * mirror partition dag */ 265 } else 266 *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; 267 } 268 #endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */ 269