1 /*- 2 * Copyright (c) 1997, 1998 3 * Nan Yang Computer Services Limited. All rights reserved. 4 * 5 * This software is distributed under the so-called ``Berkeley 6 * License'': 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Nan Yang Computer 19 * Services Limited. 20 * 4. Neither the name of the Company nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * This software is provided ``as is'', and any express or implied 25 * warranties, including, but not limited to, the implied warranties of 26 * merchantability and fitness for a particular purpose are disclaimed. 27 * In no event shall the company or contributors be liable for any 28 * direct, indirect, incidental, special, exemplary, or consequential 29 * damages (including, but not limited to, procurement of substitute 30 * goods or services; loss of use, data, or profits; or business 31 * interruption) however caused and on any theory of liability, whether 32 * in contract, strict liability, or tort (including negligence or 33 * otherwise) arising in any way out of the use of this software, even if 34 * advised of the possibility of such damage. 35 */ 36 37 /* Information needed to set up a transfer */ 38 39 enum xferinfo { 40 XFR_NORMAL_READ = 1, 41 XFR_NORMAL_WRITE = 2, /* write request in normal mode */ 42 XFR_RECOVERY_READ = 4, 43 XFR_DEGRADED_WRITE = 8, 44 XFR_PARITYLESS_WRITE = 0x10, 45 XFR_NO_PARITY_STRIPE = 0x20, /* parity stripe is not available */ 46 XFR_DATA_BLOCK = 0x40, /* data block in request */ 47 XFR_PARITY_BLOCK = 0x80, /* parity block in request */ 48 XFR_BAD_SUBDISK = 0x100, /* this subdisk is dead */ 49 XFR_MALLOCED = 0x200, /* this buffer is malloced */ 50 #ifdef VINUMDEBUG 51 XFR_PHASE2 = 0x800, /* debug only: 2nd phase write */ 52 #endif 53 XFR_REVIVECONFLICT = 0x1000, /* possible conflict with a revive */ 54 XFR_BUFLOCKED = 0x2000, /* BUF_LOCK performed on this buffer */ 55 56 /* 57 * operations that need a parity block 58 */ 59 XFR_PARITYOP = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | 60 XFR_DEGRADED_WRITE), 61 62 /* 63 * operations that use the group parameters 64 */ 65 XFR_GROUPOP = (XFR_DEGRADED_WRITE | XFR_RECOVERY_READ), 66 67 /* 68 * operations that that use the data parameters 69 */ 70 XFR_DATAOP = (XFR_NORMAL_READ | XFR_NORMAL_WRITE | 71 XFR_PARITYLESS_WRITE), 72 73 /* 74 * operations requiring read before write 75 */ 76 XFR_RBW = (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE), 77 78 /* 79 * operations that need a malloced buffer 80 */ 81 XFR_NEEDS_MALLOC = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | 82 XFR_DEGRADED_WRITE) 83 }; 84 85 /* 86 * Describe one low-level request, part of a 87 * high-level request. This is an extended 88 * struct buf buffer, and the first element 89 * *must* be a struct buf. We pass this 90 * structure to the I/O routines instead of a 91 * struct buf in order to be able to locate the 92 * high-level request when it completes. 93 * 94 * All offsets and lengths are in sectors. 95 */ 96 struct rqelement { 97 struct buf b; /* buf structure */ 98 struct rqgroup *rqg; /* pointer to our group */ 99 100 vinum_off_t sdoffset; /* offset in subdisk */ 101 int useroffset; /* offset of data in user buffer */ 102 103 /* 104 * dataoffset and datalen refer to "individual" data 105 * transfers which involve only this drive (normal read, 106 * parityless write) and also degraded write. 107 * 108 * groupoffset and grouplen refer to the other "group" 109 * operations (normal write, recovery read) which involve 110 * more than one drive. Both the offsets are relative to 111 * the start of the local buffer. 112 */ 113 int dataoffset; /* offset of the normal data */ 114 int groupoffset; /* offset of group data */ 115 short datalen; /* length of normal data (sectors) */ 116 short grouplen; /* length of group data (sectors) */ 117 short buflen; /* total buffer length to allocate */ 118 short flags; /* really enum xferinfo (see above) */ 119 120 /* 121 * Ways to find other components 122 */ 123 short sdno; /* subdisk number */ 124 short driveno; /* drive number */ 125 }; 126 127 /* 128 * A group of requests built to satisfy an I/O 129 * transfer on a single plex. 130 */ 131 struct rqgroup { 132 struct rqgroup *next; /* pointer to next group */ 133 struct request *rq; /* pointer to the request */ 134 short count; /* number of requests in this group */ 135 short active; /* and number active */ 136 short plexno; /* index of plex */ 137 int badsdno; /* index of bad subdisk or -1 */ 138 enum xferinfo flags; /* description of transfer */ 139 struct rangelock *lock; /* lock for this transfer */ 140 vinum_off_t lockbase; /* and lock address */ 141 struct rqelement rqe[0]; /* and the elements of this request */ 142 }; 143 144 /* 145 * Describe one high-level request and the 146 * work we have to do to satisfy it. 147 */ 148 struct request { 149 struct bio *bio; /* pointer to the high-level request */ 150 enum xferinfo flags; 151 union { 152 int volno; /* volume index */ 153 int plexno; /* or plex index */ 154 } volplex; 155 int error; /* current error indication */ 156 int sdno; /* reviving subdisk */ 157 /* (XFR_REVIVECONFLICT) */ 158 short isplex; /* set if this is a plex request */ 159 short active; /* number of subrequests still active */ 160 struct rqgroup *rqg; /* ptr to the first group of requests */ 161 struct rqgroup *lrqg; /* and to the last group of requests */ 162 struct request *next; /* link of waiting requests */ 163 }; 164 165 /* 166 * Extended buffer header for subdisk I/O. Includes 167 * a pointer to the user I/O request. 168 */ 169 struct sdbuf { 170 struct buf b; /* our buffer */ 171 struct bio *bio; /* and pointer to parent */ 172 short driveno; /* drive index */ 173 short sdno; /* and subdisk index */ 174 }; 175 176 /* 177 * Values returned by rqe and friends. Be careful 178 * with these: they are in order of increasing 179 * seriousness. Some routines check for 180 * > REQUEST_RECOVERED to indicate a failed request. XXX 181 */ 182 enum requeststatus { 183 REQUEST_OK, /* request built OK */ 184 REQUEST_RECOVERED, /* request OK, but involves RAID5 recovery */ 185 REQUEST_DEGRADED, /* parts of request failed */ 186 REQUEST_EOF, /* parts of request failed: outside plex */ 187 REQUEST_DOWN, /* all of request failed: subdisk(s) down */ 188 REQUEST_ENOMEM /* all of request failed: ran out of memory */ 189 }; 190 191 #ifdef VINUMDEBUG 192 193 /* 194 * Trace entry for request info (DEBUG_LASTREQS) 195 */ 196 enum rqinfo_type { 197 loginfo_unused, /* never been used */ 198 loginfo_user_bp, /* this is the bp when strategy is called */ 199 loginfo_user_bpl, /* and this is the bp at launch time */ 200 loginfo_rqe, /* user RQE */ 201 loginfo_iodone, /* iodone */ 202 loginfo_raid5_data, /* write RAID-5 data block */ 203 loginfo_raid5_parity, /* write RAID-5 parity block */ 204 loginfo_sdio, /* subdisk I/O */ 205 loginfo_sdiol, /* subdisk I/O launch */ 206 loginfo_sdiodone, /* subdisk iodone */ 207 loginfo_lockwait, /* wait for range lock */ 208 loginfo_lock, /* lock range */ 209 loginfo_unlock, /* unlock range */ 210 }; 211 212 /* 213 * Info to pass to logrq 214 */ 215 union rqinfou { 216 struct bio *bio; 217 struct rqelement *rqe; /* addr of request, for correlation */ 218 struct rangelock *lockinfo; 219 }; 220 221 struct rqinfo { 222 enum rqinfo_type type; /* kind of event */ 223 struct timeval timestamp; /* time it happened */ 224 struct bio *bio; /* point to user buffer */ 225 int devmajor; /* major and minor device info */ 226 int devminor; 227 union { 228 struct buf b; /* yup, the *whole* buffer header */ 229 struct bio bio; 230 struct rqelement rqe; /* and the whole rqe */ 231 struct rangelock lockinfo; 232 } info; 233 }; 234 235 #define RQINFO_SIZE 128 /* number of info slots in buffer */ 236 237 void logrq(enum rqinfo_type type, union rqinfou info, struct bio *ubio); 238 239 #endif 240 241 /* Structures for the daemon */ 242 243 /* 244 * Types of request to the daemon 245 */ 246 enum daemonrq { 247 daemonrq_none, /* dummy to catch bugs */ 248 daemonrq_ioerror, /* error occurred on I/O */ 249 daemonrq_saveconfig, /* save configuration */ 250 daemonrq_return, /* return to userland */ 251 daemonrq_ping, /* show sign of life */ 252 daemonrq_init, /* initialize a plex */ 253 daemonrq_revive, /* revive a subdisk */ 254 daemonrq_closedrive, /* close a drive */ 255 }; 256 257 /* 258 * Info field for daemon requests and the request information 259 */ 260 union daemoninfo { 261 struct request *rq; /* for daemonrq_ioerror */ 262 struct sd *sd; /* for daemonrq_revive */ 263 struct plex *plex; /* for daemonrq_init */ 264 struct drive *drive; /* for daemonrq_closedrive */ 265 int nothing; /* for passing NULL */ 266 }; 267 268 struct daemonq { 269 struct daemonq *next; /* pointer to next element in queue */ 270 enum daemonrq type; /* type of request */ 271 int privateinuse; /* private element, being used */ 272 union daemoninfo info; /* and the request information */ 273 }; 274 275 void queue_daemon_request(enum daemonrq type, union daemoninfo info); 276 277 extern int daemon_options; 278 279 enum daemon_option { 280 daemon_verbose = 1, /* talk about what we're doing */ 281 daemon_stopped = 2, 282 daemon_noupdate = 4, /* don't update the disk config, for recovery */ 283 }; 284 285 void freerq(struct request *rq); 286 void unlockrange(int plexno, struct rangelock *); 287