1 /* libbdev - IPC and recovery functions */ 2 3 #include <minix/drivers.h> 4 #include <minix/bdev.h> 5 #include <assert.h> 6 7 #include "const.h" 8 #include "type.h" 9 #include "proto.h" 10 11 static void bdev_cancel(dev_t dev) 12 { 13 /* Recovering the driver for the given device has failed repeatedly. Mark it as 14 * permanently unusable, and clean up any associated calls and resources. 15 */ 16 bdev_call_t *call, *next; 17 18 printf("bdev: giving up on major %d\n", major(dev)); 19 20 /* Cancel all pending asynchronous requests. */ 21 call = NULL; 22 23 while ((call = bdev_call_iter_maj(dev, call, &next)) != NULL) 24 bdev_callback_asyn(call, EDEADSRCDST); 25 26 /* Mark the driver as unusable. */ 27 bdev_driver_clear(dev); 28 } 29 30 static int bdev_recover(dev_t dev, int update_endpt) 31 { 32 /* The IPC subsystem has signaled an error communicating to the driver 33 * associated with the given device. Try to recover. If 'update_endpt' is set, 34 * we need to find the new endpoint of the driver first. Return TRUE iff 35 * recovery has been successful. 36 */ 37 bdev_call_t *call, *next; 38 endpoint_t endpt; 39 int r, active, nr_tries; 40 41 /* Only print output if there is something to recover. Some drivers may be 42 * shut down and later restarted legitimately, and if they were not in use 43 * while that happened, there is no need to flood the console with messages. 44 */ 45 active = bdev_minor_is_open(dev) || bdev_call_iter_maj(dev, NULL, &next); 46 47 if (active) 48 printf("bdev: recovering from a driver restart on major %d\n", 49 major(dev)); 50 51 for (nr_tries = 0; nr_tries < RECOVER_TRIES; nr_tries++) { 52 /* First update the endpoint, if necessary. */ 53 if (update_endpt) 54 (void) bdev_driver_update(dev); 55 56 if ((endpt = bdev_driver_get(dev)) == NONE) 57 break; 58 59 /* If anything goes wrong, update the endpoint again next time. */ 60 update_endpt = TRUE; 61 62 /* Reopen all minor devices on the new driver. */ 63 if ((r = bdev_minor_reopen(dev)) != OK) { 64 /* If the driver died again, we may give it another try. */ 65 if (r == EDEADSRCDST) 66 continue; 67 68 /* If another error occurred, we cannot continue using the 69 * driver as is, but we also cannot force it to restart. 70 */ 71 break; 72 } 73 74 /* Resend all asynchronous requests. */ 75 call = NULL; 76 77 while ((call = bdev_call_iter_maj(dev, call, &next)) != NULL) { 78 /* It is not strictly necessary that we manage to reissue all 79 * asynchronous requests successfully. We can fail them on an 80 * individual basis here, without affecting the overall 81 * recovery. Note that we will never get new IPC failures here. 82 */ 83 if ((r = bdev_restart_asyn(call)) != OK) 84 bdev_callback_asyn(call, r); 85 } 86 87 /* Recovery seems successful. We can now reissue the current 88 * synchronous request (if any), and continue normal operation. 89 */ 90 if (active) 91 printf("bdev: recovery successful, new driver at %d\n", endpt); 92 93 return TRUE; 94 } 95 96 /* Recovery failed repeatedly. Give up on this driver. */ 97 bdev_cancel(dev); 98 99 return FALSE; 100 } 101 102 void bdev_update(dev_t dev, char *label) 103 { 104 /* Set the endpoint for a driver. Perform recovery if necessary. 105 */ 106 endpoint_t endpt, old_endpt; 107 108 old_endpt = bdev_driver_get(dev); 109 110 endpt = bdev_driver_set(dev, label); 111 112 /* If updating the driver causes an endpoint change, we need to perform 113 * recovery, but not update the endpoint yet again. 114 */ 115 if (old_endpt != NONE && old_endpt != endpt) 116 bdev_recover(dev, FALSE /*update_endpt*/); 117 } 118 119 int bdev_senda(dev_t dev, const message *m_orig, bdev_id_t id) 120 { 121 /* Send an asynchronous request for the given device. This function will never 122 * get any new IPC errors sending to the driver. If sending an asynchronous 123 * request fails, we will find out through other ways later. 124 */ 125 endpoint_t endpt; 126 message m; 127 int r; 128 129 /* If we have no usable driver endpoint, fail instantly. */ 130 if ((endpt = bdev_driver_get(dev)) == NONE) 131 return EDEADSRCDST; 132 133 m = *m_orig; 134 m.m_lbdev_lblockdriver_msg.id = id; 135 136 r = asynsend(endpt, &m); 137 138 if (r != OK) 139 printf("bdev: asynsend to driver (%d) failed (%d)\n", endpt, r); 140 141 return r; 142 } 143 144 int bdev_sendrec(dev_t dev, const message *m_orig) 145 { 146 /* Send a synchronous request for the given device, and wait for the reply. 147 * Return ERESTART if the caller should try to reissue the request. 148 */ 149 endpoint_t endpt; 150 message m; 151 int r; 152 153 /* If we have no usable driver endpoint, fail instantly. */ 154 if ((endpt = bdev_driver_get(dev)) == NONE) 155 return EDEADSRCDST; 156 157 /* Send the request and block until we receive a reply. */ 158 m = *m_orig; 159 m.m_lbdev_lblockdriver_msg.id = NO_ID; 160 161 r = ipc_sendrec(endpt, &m); 162 163 /* If communication failed, the driver has died. We assume it will be 164 * restarted soon after, so we attempt recovery. Upon success, we let the 165 * caller reissue the synchronous request. 166 */ 167 if (r == EDEADSRCDST) { 168 if (!bdev_recover(dev, TRUE /*update_endpt*/)) 169 return EDEADSRCDST; 170 171 return ERESTART; 172 } 173 174 if (r != OK) { 175 printf("bdev: IPC to driver (%d) failed (%d)\n", endpt, r); 176 return r; 177 } 178 179 if (m.m_type != BDEV_REPLY) { 180 printf("bdev: driver (%d) sent weird response (%d)\n", 181 endpt, m.m_type); 182 return EINVAL; 183 } 184 185 /* The protocol contract states that no asynchronous reply can satisfy a 186 * synchronous SENDREC call, so we can never get an asynchronous reply here. 187 */ 188 if (m.m_lblockdriver_lbdev_reply.id != NO_ID) { 189 printf("bdev: driver (%d) sent invalid ID (%d)\n", endpt, 190 m.m_lblockdriver_lbdev_reply.id); 191 return EINVAL; 192 } 193 194 /* Unless the caller is misusing libbdev, we will only get ERESTART if we 195 * have managed to resend a raw block I/O request to the driver after a 196 * restart, but before VFS has had a chance to reopen the associated device 197 * first. This is highly exceptional, and hard to deal with correctly. We 198 * take the easiest route: sleep for a while so that VFS can reopen the 199 * device, and then resend the request. If the call keeps failing, the caller 200 * will eventually give up. 201 */ 202 if (m.m_lblockdriver_lbdev_reply.status == ERESTART) { 203 printf("bdev: got ERESTART from driver (%d), sleeping for reopen\n", 204 endpt); 205 206 micro_delay(1000); 207 208 return ERESTART; 209 } 210 211 /* Return the result of our request. */ 212 return m.m_lblockdriver_lbdev_reply.status; 213 } 214 215 static int bdev_receive(dev_t dev, message *m) 216 { 217 /* Receive one valid message. 218 */ 219 endpoint_t endpt; 220 int r, nr_tries = 0; 221 222 for (;;) { 223 /* Retrieve and check the driver endpoint on every try, as it will 224 * change with each driver restart. 225 */ 226 if ((endpt = bdev_driver_get(dev)) == NONE) 227 return EDEADSRCDST; 228 229 r = sef_receive(endpt, m); 230 231 if (r == EDEADSRCDST) { 232 /* If we reached the maximum number of retries, give up. */ 233 if (++nr_tries == DRIVER_TRIES) 234 break; 235 236 /* Attempt recovery. If successful, all asynchronous requests 237 * will have been resent, and we can retry receiving a reply. 238 */ 239 if (!bdev_recover(dev, TRUE /*update_endpt*/)) 240 return EDEADSRCDST; 241 242 continue; 243 } 244 245 if (r != OK) { 246 printf("bdev: IPC to driver (%d) failed (%d)\n", endpt, r); 247 248 return r; 249 } 250 251 if (m->m_type != BDEV_REPLY) { 252 printf("bdev: driver (%d) sent weird response (%d)\n", 253 endpt, m->m_type); 254 return EINVAL; 255 } 256 257 /* The caller is responsible for checking the ID and status. */ 258 return OK; 259 } 260 261 /* All tries failed, even though all recovery attempts succeeded. In this 262 * case, we let the caller recheck whether it wants to keep calling us, 263 * returning ERESTART to indicate we can be called again but did not actually 264 * receive a message. 265 */ 266 return ERESTART; 267 } 268 269 void bdev_reply_asyn(message *m) 270 { 271 /* A reply has come in from a disk driver. 272 */ 273 bdev_call_t *call; 274 endpoint_t endpt; 275 bdev_id_t id; 276 int r; 277 278 /* This is a requirement for the caller. */ 279 assert(m->m_type == BDEV_REPLY); 280 281 /* Get the corresponding asynchronous call structure. */ 282 id = m->m_lblockdriver_lbdev_reply.id; 283 284 if ((call = bdev_call_get(id)) == NULL) { 285 printf("bdev: driver (%d) replied to unknown request (%d)\n", 286 m->m_source, m->m_lblockdriver_lbdev_reply.id); 287 return; 288 } 289 290 /* Make sure the reply was sent from the right endpoint. */ 291 endpt = bdev_driver_get(call->dev); 292 293 if (m->m_source != endpt) { 294 /* If the endpoint is NONE, this may be a stray reply. */ 295 if (endpt != NONE) 296 printf("bdev: driver (%d) replied to request not sent to it\n", 297 m->m_source); 298 return; 299 } 300 301 /* See the ERESTART comment in bdev_sendrec(). */ 302 if (m->m_lblockdriver_lbdev_reply.status == ERESTART) { 303 printf("bdev: got ERESTART from driver (%d), sleeping for reopen\n", 304 endpt); 305 306 micro_delay(1000); 307 308 if ((r = bdev_restart_asyn(call)) != OK) 309 bdev_callback_asyn(call, r); 310 311 return; 312 } 313 314 bdev_callback_asyn(call, m->m_lblockdriver_lbdev_reply.status); 315 } 316 317 int bdev_wait_asyn(bdev_id_t id) 318 { 319 /* Wait for an asynchronous request to complete. 320 */ 321 bdev_call_t *call; 322 dev_t dev; 323 message m; 324 int r; 325 326 if ((call = bdev_call_get(id)) == NULL) 327 return ENOENT; 328 329 dev = call->dev; 330 331 do { 332 if ((r = bdev_receive(dev, &m)) != OK && r != ERESTART) 333 return r; 334 335 /* Processing the reply will free up the call structure as a side 336 * effect. If we repeatedly get ERESTART, we will repeatedly resend the 337 * asynchronous request, which will then eventually hit the retry limit 338 * and we will break out of the loop. 339 */ 340 if (r == OK) 341 bdev_reply_asyn(&m); 342 343 } while (bdev_call_get(id) != NULL); 344 345 return OK; 346 } 347