1 /*
2 BAREOS® - Backup Archiving REcovery Open Sourced
3
4 Copyright (C) 2000-2012 Free Software Foundation Europe e.V.
5 Copyright (C) 2016-2019 Bareos GmbH & Co. KG
6
7 This program is Free Software; you can redistribute it and/or
8 modify it under the terms of version three of the GNU Affero General Public
9 License as published by the Free Software Foundation and included
10 in the file LICENSE.
11
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Affero General Public License for more details.
16
17 You should have received a copy of the GNU Affero General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA.
21 */
22 /*
23 * Kern Sibbald, MM
24 * Split from job.c and acquire.c June 2005
25 */
26 /**
27 * @file
28 * Drive reservation functions for Storage Daemon
29 */
30
31 #include "include/bareos.h"
32 #include "stored/stored.h"
33 #include "stored/stored_globals.h"
34 #include "stored/acquire.h"
35 #include "stored/autochanger.h"
36 #include "stored/jcr_private.h"
37 #include "stored/wait.h"
38 #include "lib/berrno.h"
39 #include "lib/util.h"
40 #include "lib/bsock.h"
41 #include "include/jcr.h"
42 #include "lib/parse_conf.h"
43
44 namespace storagedaemon {
45
46 const int debuglevel = 150;
47
48 /* Global static variables */
49 #ifdef SD_DEBUG_LOCK
50 int reservations_lock_count = 0;
51 #else
52 static int reservations_lock_count = 0;
53 #endif
54
55 static brwlock_t reservation_lock;
56
57 /* Forward referenced functions */
58 static int CanReserveDrive(DeviceControlRecord* dcr, ReserveContext& rctx);
59 static int ReserveDevice(ReserveContext& rctx);
60 static bool ReserveDeviceForRead(DeviceControlRecord* dcr);
61 static bool ReserveDeviceForAppend(DeviceControlRecord* dcr,
62 ReserveContext& rctx);
63 static bool UseDeviceCmd(JobControlRecord* jcr);
64 static void QueueReserveMessage(JobControlRecord* jcr);
65 static void PopReserveMessages(JobControlRecord* jcr);
66 // void SwitchDevice(DeviceControlRecord *dcr, Device *dev);
67
68 /* Requests from the Director daemon */
69 static char use_storage[] =
70 "use storage=%127s media_type=%127s "
71 "pool_name=%127s pool_type=%127s append=%d copy=%d stripe=%d\n";
72 static char use_device[] = "use device=%127s\n";
73
74 /* Responses sent to Director daemon */
75 static char OK_device[] = "3000 OK use device device=%s\n";
76 static char NO_device[] =
77 "3924 Device \"%s\" not in SD Device"
78 " resources or no matching Media Type.\n";
79 static char BAD_use[] = "3913 Bad use command: %s\n";
80
use_cmd(JobControlRecord * jcr)81 bool use_cmd(JobControlRecord* jcr)
82 {
83 /*
84 * Get the device, media, and pool information
85 */
86 if (!UseDeviceCmd(jcr)) {
87 jcr->setJobStatus(JS_ErrorTerminated);
88 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
89 return false;
90 }
91 return true;
92 }
93
94 /**
95 * This allows a given thread to recursively call LockReservations.
96 * It must, of course, call unlock_... the same number of times.
97 */
InitReservationsLock()98 void InitReservationsLock()
99 {
100 int errstat;
101 if ((errstat = RwlInit(&reservation_lock)) != 0) {
102 BErrNo be;
103 Emsg1(M_ABORT, 0, _("Unable to initialize reservation lock. ERR=%s\n"),
104 be.bstrerror(errstat));
105 }
106
107 InitVolListLock();
108 }
109
TermReservationsLock()110 void TermReservationsLock()
111 {
112 RwlDestroy(&reservation_lock);
113 TermVolListLock();
114 }
115
116 /**
117 * This applies to a drive and to Volumes
118 */
_lockReservations(const char * file,int line)119 void _lockReservations(const char* file, int line)
120 {
121 int errstat;
122 reservations_lock_count++;
123 if ((errstat = RwlWritelock_p(&reservation_lock, file, line)) != 0) {
124 BErrNo be;
125 Emsg2(M_ABORT, 0, "RwlWritelock failure. stat=%d: ERR=%s\n", errstat,
126 be.bstrerror(errstat));
127 }
128 }
129
_unLockReservations()130 void _unLockReservations()
131 {
132 int errstat;
133 reservations_lock_count--;
134 if ((errstat = RwlWriteunlock(&reservation_lock)) != 0) {
135 BErrNo be;
136 Emsg2(M_ABORT, 0, "RwlWriteunlock failure. stat=%d: ERR=%s\n", errstat,
137 be.bstrerror(errstat));
138 }
139 }
140
SetReserved()141 void DeviceControlRecord::SetReserved()
142 {
143 reserved_ = true;
144 Dmsg2(debuglevel, "Inc reserve=%d dev=%s\n", dev->NumReserved(),
145 dev->print_name());
146 dev->IncReserved();
147 }
148
ClearReserved()149 void DeviceControlRecord::ClearReserved()
150 {
151 if (reserved_) {
152 reserved_ = false;
153 dev->DecReserved();
154 Dmsg2(debuglevel, "Dec reserve=%d dev=%s\n", dev->NumReserved(),
155 dev->print_name());
156 }
157 }
158
159 /**
160 * Remove any reservation from a drive and tell the system
161 * that the volume is unused at least by us.
162 */
UnreserveDevice()163 void DeviceControlRecord::UnreserveDevice()
164 {
165 dev->Lock();
166 if (IsReserved()) {
167 ClearReserved();
168 reserved_volume = false;
169
170 /*
171 * If we set read mode in reserving, remove it
172 */
173 if (dev->CanRead()) { dev->ClearRead(); }
174
175 if (dev->num_writers < 0) {
176 Jmsg1(jcr, M_ERROR, 0, _("Hey! num_writers=%d!!!!\n"), dev->num_writers);
177 dev->num_writers = 0;
178 }
179
180 if (dev->NumReserved() == 0 && dev->num_writers == 0) {
181 VolumeUnused(this);
182 }
183 }
184 dev->Unlock();
185 }
186
187 /**
188 * We get the following type of information:
189 *
190 * use storage=xxx media_type=yyy pool_name=xxx pool_type=yyy append=1 copy=0
191 * strip=0 use device=zzz use device=aaa use device=bbb use storage=xxx
192 * media_type=yyy pool_name=xxx pool_type=yyy append=0 copy=0 strip=0 use
193 * device=bbb
194 */
UseDeviceCmd(JobControlRecord * jcr)195 static bool UseDeviceCmd(JobControlRecord* jcr)
196 {
197 PoolMem StoreName, dev_name, media_type, pool_name, pool_type;
198 BareosSocket* dir = jcr->dir_bsock;
199 int32_t append;
200 bool ok;
201 int32_t Copy, Stripe;
202 DirectorStorage* store;
203 ReserveContext rctx;
204 alist* dirstore;
205
206 memset(&rctx, 0, sizeof(ReserveContext));
207 rctx.jcr = jcr;
208
209 /*
210 * If there are multiple devices, the director sends us
211 * use_device for each device that it wants to use.
212 */
213 jcr->impl->reserve_msgs = new alist(10, not_owned_by_alist);
214 do {
215 Dmsg1(debuglevel, "<dird: %s", dir->msg);
216 ok = sscanf(dir->msg, use_storage, StoreName.c_str(), media_type.c_str(),
217 pool_name.c_str(), pool_type.c_str(), &append, &Copy,
218 &Stripe) == 7;
219 if (!ok) { break; }
220 dirstore = new alist(10, not_owned_by_alist);
221 if (append) {
222 jcr->impl->write_store = dirstore;
223 } else {
224 jcr->impl->read_store = dirstore;
225 }
226 rctx.append = append;
227 UnbashSpaces(StoreName);
228 UnbashSpaces(media_type);
229 UnbashSpaces(pool_name);
230 UnbashSpaces(pool_type);
231 store = new DirectorStorage;
232 dirstore->append(store);
233 memset(store, 0, sizeof(DirectorStorage));
234 store->device = new alist(10);
235 bstrncpy(store->name, StoreName, sizeof(store->name));
236 bstrncpy(store->media_type, media_type, sizeof(store->media_type));
237 bstrncpy(store->pool_name, pool_name, sizeof(store->pool_name));
238 bstrncpy(store->pool_type, pool_type, sizeof(store->pool_type));
239 store->append = append;
240
241 /*
242 * Now get all devices
243 */
244 while (dir->recv() >= 0) {
245 Dmsg1(debuglevel, "<dird device: %s", dir->msg);
246 ok = sscanf(dir->msg, use_device, dev_name.c_str()) == 1;
247 if (!ok) { break; }
248 UnbashSpaces(dev_name);
249 store->device->append(strdup(dev_name.c_str()));
250 }
251 } while (ok && dir->recv() >= 0);
252
253 InitJcrDeviceWaitTimers(jcr);
254 jcr->impl->dcr = new StorageDaemonDeviceControlRecord;
255 SetupNewDcrDevice(jcr, jcr->impl->dcr, NULL, NULL);
256 if (rctx.append) { jcr->impl->dcr->SetWillWrite(); }
257
258 if (!jcr->impl->dcr) {
259 BareosSocket* dir = jcr->dir_bsock;
260 dir->fsend(_("3939 Could not get dcr\n"));
261 Dmsg1(debuglevel, ">dird: %s", dir->msg);
262 ok = false;
263 }
264
265 /*
266 * At this point, we have a list of all the Director's Storage resources
267 * indicated for this Job, which include Pool, PoolType, storage name, and
268 * Media type.
269 *
270 * Then for each of the Storage resources, we have a list of device names that
271 * were given.
272 *
273 * Wiffle through them and find one that can do the backup.
274 */
275 if (ok) {
276 int wait_for_device_retries = 0;
277 int repeat = 0;
278 bool fail = false;
279 rctx.notify_dir = true;
280
281 /*
282 * Put new dcr in proper location
283 */
284 if (rctx.append) {
285 rctx.jcr->impl->dcr = jcr->impl->dcr;
286 } else {
287 rctx.jcr->impl->read_dcr = jcr->impl->dcr;
288 }
289
290 LockReservations();
291 for (; !fail && !JobCanceled(jcr);) {
292 PopReserveMessages(jcr);
293 rctx.suitable_device = false;
294 rctx.have_volume = false;
295 rctx.VolumeName[0] = 0;
296 rctx.any_drive = false;
297 if (!jcr->impl->PreferMountedVols) {
298 /*
299 * Here we try to find a drive that is not used.
300 * This will maximize the use of available drives.
301 */
302 rctx.num_writers = 20000000; /* start with impossible number */
303 rctx.low_use_drive = NULL;
304 rctx.PreferMountedVols = false;
305 rctx.exact_match = false;
306 rctx.autochanger_only = true;
307 if ((ok = FindSuitableDeviceForJob(jcr, rctx))) { break; }
308
309 /*
310 * Look through all drives possibly for low_use drive
311 */
312 if (rctx.low_use_drive) {
313 rctx.try_low_use_drive = true;
314 if ((ok = FindSuitableDeviceForJob(jcr, rctx))) { break; }
315 rctx.try_low_use_drive = false;
316 }
317 rctx.autochanger_only = false;
318 if ((ok = FindSuitableDeviceForJob(jcr, rctx))) { break; }
319 }
320
321 /*
322 * Now we look for a drive that may or may not be in use.
323 * Look for an exact Volume match all drives
324 */
325 rctx.PreferMountedVols = true;
326 rctx.exact_match = true;
327 rctx.autochanger_only = false;
328 if ((ok = FindSuitableDeviceForJob(jcr, rctx))) { break; }
329
330 /*
331 * Look for any mounted drive
332 */
333 rctx.exact_match = false;
334 if ((ok = FindSuitableDeviceForJob(jcr, rctx))) { break; }
335
336 /*
337 * Try any drive
338 */
339 rctx.any_drive = true;
340 if ((ok = FindSuitableDeviceForJob(jcr, rctx))) { break; }
341
342 /*
343 * Keep reservations locked *except* during WaitForDevice()
344 */
345 UnlockReservations();
346
347 /*
348 * The idea of looping on repeat a few times it to ensure
349 * that if there is some subtle timing problem between two
350 * jobs, we will simply try again, and most likely succeed.
351 * This can happen if one job reserves a drive or finishes using
352 * a drive at the same time a second job wants it.
353 */
354 if (repeat++ > 1) { /* try algorithm 3 times */
355 Bmicrosleep(30, 0); /* wait a bit */
356 Dmsg0(debuglevel, "repeat reserve algorithm\n");
357 } else if (!rctx.suitable_device ||
358 !WaitForDevice(jcr, wait_for_device_retries)) {
359 Dmsg0(debuglevel, "Fail. !suitable_device || !WaitForDevice\n");
360 fail = true;
361 }
362 LockReservations();
363 dir->signal(BNET_HEARTBEAT); /* Inform Dir that we are alive */
364 }
365 UnlockReservations();
366
367 if (!ok) {
368 /*
369 * If we get here, there are no suitable devices available, which
370 * means nothing configured. If a device is suitable but busy
371 * with another Volume, we will not come here.
372 */
373 UnbashSpaces(dir->msg);
374 PmStrcpy(jcr->errmsg, dir->msg);
375 Jmsg(jcr, M_FATAL, 0, _("Device reservation failed for JobId=%d: %s\n"),
376 jcr->JobId, jcr->errmsg);
377 dir->fsend(NO_device, dev_name.c_str());
378
379 Dmsg1(debuglevel, ">dird: %s", dir->msg);
380 }
381 } else {
382 UnbashSpaces(dir->msg);
383 PmStrcpy(jcr->errmsg, dir->msg);
384 Jmsg(jcr, M_FATAL, 0, _("Failed command: %s\n"), jcr->errmsg);
385 dir->fsend(BAD_use, jcr->errmsg);
386 Dmsg1(debuglevel, ">dird: %s", dir->msg);
387 }
388
389 ReleaseReserveMessages(jcr);
390 return ok;
391 }
392
393 /**
394 * Walk through the autochanger resources and check if the volume is in one of
395 * them.
396 *
397 * Returns: true if volume is in device
398 * false otherwise
399 */
IsVolInAutochanger(ReserveContext & rctx,VolumeReservationItem * vol)400 static bool IsVolInAutochanger(ReserveContext& rctx, VolumeReservationItem* vol)
401 {
402 AutochangerResource* changer = vol->dev->device->changer_res;
403
404 if (!changer) { return false; }
405
406 /*
407 * Find resource, and make sure we were able to open it
408 */
409 if (bstrcmp(rctx.device_name, changer->resource_name_)) {
410 Dmsg1(debuglevel, "Found changer device %s\n",
411 vol->dev->device->resource_name_);
412 return true;
413 }
414 Dmsg1(debuglevel, "Incorrect changer device %s\n", changer->resource_name_);
415
416 return false;
417 }
418
419 /**
420 * Search for a device suitable for this job.
421 *
422 * Note, this routine sets sets rctx.suitable_device if any
423 * device exists within the SD. The device may not be actually useable.
424 * It also returns if it finds a useable device.
425 */
FindSuitableDeviceForJob(JobControlRecord * jcr,ReserveContext & rctx)426 bool FindSuitableDeviceForJob(JobControlRecord* jcr, ReserveContext& rctx)
427 {
428 bool ok = false;
429 DirectorStorage* store;
430 char* device_name = nullptr;
431 alist* dirstore;
432 DeviceControlRecord* dcr = jcr->impl->dcr;
433
434 if (rctx.append) {
435 dirstore = jcr->impl->write_store;
436 } else {
437 dirstore = jcr->impl->read_store;
438 }
439 Dmsg5(debuglevel,
440 "Start find_suit_dev PrefMnt=%d exact=%d suitable=%d chgronly=%d "
441 "any=%d\n",
442 rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
443 rctx.autochanger_only, rctx.any_drive);
444
445 /*
446 * If the appropriate conditions of this if are met, namely that
447 * we are appending and the user wants mounted drive (or we
448 * force try a mounted drive because they are all busy), we
449 * start by looking at all the Volumes in the volume list.
450 */
451 if (!IsVolListEmpty() && rctx.append && rctx.PreferMountedVols) {
452 dlist* temp_vol_list;
453 VolumeReservationItem* vol = NULL;
454 temp_vol_list = dup_vol_list(jcr);
455
456 /*
457 * Look through reserved volumes for one we can use
458 */
459 Dmsg0(debuglevel, "look for vol in vol list\n");
460 foreach_dlist (vol, temp_vol_list) {
461 if (!vol->dev) {
462 Dmsg1(debuglevel, "vol=%s no dev\n", vol->vol_name);
463 continue;
464 }
465
466 /*
467 * Check with Director if this Volume is OK
468 */
469 bstrncpy(dcr->VolumeName, vol->vol_name, sizeof(dcr->VolumeName));
470 if (!dcr->DirGetVolumeInfo(GET_VOL_INFO_FOR_WRITE)) { continue; }
471
472 Dmsg1(debuglevel, "vol=%s OK for this job\n", vol->vol_name);
473 foreach_alist (store, dirstore) {
474 int status;
475 rctx.store = store;
476 foreach_alist (device_name, store->device) {
477 /*
478 * Found a device, try to use it
479 */
480 rctx.device_name = device_name;
481 rctx.device = vol->dev->device;
482
483 if (vol->dev->IsAutochanger()) {
484 Dmsg1(debuglevel, "vol=%s is in changer\n", vol->vol_name);
485 if (!IsVolInAutochanger(rctx, vol) || !vol->dev->autoselect) {
486 continue;
487 }
488 } else if (!bstrcmp(device_name, vol->dev->device->resource_name_)) {
489 Dmsg2(debuglevel, "device=%s not suitable want %s\n",
490 vol->dev->device->resource_name_, device_name);
491 continue;
492 }
493
494 bstrncpy(rctx.VolumeName, vol->vol_name, sizeof(rctx.VolumeName));
495 rctx.have_volume = true;
496
497 /*
498 * Try reserving this device and volume
499 */
500 Dmsg2(debuglevel, "try vol=%s on device=%s\n", rctx.VolumeName,
501 device_name);
502 status = ReserveDevice(rctx);
503 if (status == 1) { /* found available device */
504 Dmsg1(debuglevel, "Suitable device found=%s\n", device_name);
505 ok = true;
506 break;
507 } else if (status == 0) { /* device busy */
508 Dmsg1(debuglevel, "Suitable device=%s, busy: not use\n",
509 device_name);
510 } else {
511 Dmsg0(debuglevel, "No suitable device found.\n");
512 }
513 rctx.have_volume = false;
514 rctx.VolumeName[0] = 0;
515 }
516 if (ok) { break; }
517 }
518 if (ok) { break; }
519 } /* end for loop over reserved volumes */
520
521 Dmsg0(debuglevel, "lock volumes\n");
522 FreeTempVolList(temp_vol_list);
523 temp_vol_list = NULL;
524 }
525
526 if (ok) {
527 Dmsg1(debuglevel, "OK dev found. Vol=%s from in-use vols list\n",
528 rctx.VolumeName);
529 return true;
530 }
531
532 /*
533 * No reserved volume we can use, so now search for an available device.
534 *
535 * For each storage device that the user specified, we
536 * search and see if there is a resource for that device.
537 */
538 foreach_alist (store, dirstore) {
539 rctx.store = store;
540 foreach_alist (device_name, store->device) {
541 int status;
542 rctx.device_name = device_name;
543 status = SearchResForDevice(rctx);
544 if (status == 1) { /* found available device */
545 Dmsg1(debuglevel, "available device found=%s\n", device_name);
546 ok = true;
547 break;
548 } else if (status == 0) { /* device busy */
549 Dmsg1(debuglevel, "No usable device=%s, busy: not use\n", device_name);
550 } else {
551 Dmsg0(debuglevel, "No usable device found.\n");
552 }
553 }
554 if (ok) { break; }
555 }
556 if (ok) {
557 Dmsg1(debuglevel, "OK dev found. Vol=%s\n", rctx.VolumeName);
558 } else {
559 Dmsg0(debuglevel, "Leave find_suit_dev: no dev found.\n");
560 }
561 return ok;
562 }
563
564 /**
565 * Search for a particular storage device with particular storage
566 * characteristics (MediaType).
567 */
SearchResForDevice(ReserveContext & rctx)568 int SearchResForDevice(ReserveContext& rctx)
569 {
570 int status;
571 AutochangerResource* changer;
572
573 /*
574 * Look through Autochangers first
575 */
576 foreach_res (changer, R_AUTOCHANGER) {
577 Dmsg2(debuglevel, "Try match changer res=%s, wanted %s\n",
578 changer->resource_name_, rctx.device_name);
579 /*
580 * Find resource, and make sure we were able to open it
581 */
582 if (bstrcmp(rctx.device_name, changer->resource_name_)) {
583 /*
584 * Try each device in this AutoChanger
585 */
586 foreach_alist (rctx.device, changer->device) {
587 Dmsg1(debuglevel, "Try changer device %s\n",
588 rctx.device->resource_name_);
589 if (!rctx.device->autoselect) {
590 Dmsg1(100, "Device %s not autoselect skipped.\n",
591 rctx.device->resource_name_);
592 continue; /* Device is not available */
593 }
594 status = ReserveDevice(rctx);
595 if (status != 1) { /* Try another device */
596 continue;
597 }
598
599 /*
600 * Debug code
601 */
602 if (rctx.store->append == SD_APPEND) {
603 Dmsg2(debuglevel, "Device %s reserved=%d for append.\n",
604 rctx.device->resource_name_,
605 rctx.jcr->impl->dcr->dev->NumReserved());
606 } else {
607 Dmsg2(debuglevel, "Device %s reserved=%d for read.\n",
608 rctx.device->resource_name_,
609 rctx.jcr->impl->read_dcr->dev->NumReserved());
610 }
611 return status;
612 }
613 }
614 }
615
616 /*
617 * Now if requested look through regular devices
618 */
619 if (!rctx.autochanger_only) {
620 foreach_res (rctx.device, R_DEVICE) {
621 Dmsg2(debuglevel, "Try match res=%s wanted %s\n",
622 rctx.device->resource_name_, rctx.device_name);
623
624 /*
625 * Find resource, and make sure we were able to open it
626 */
627 if (bstrcmp(rctx.device_name, rctx.device->resource_name_)) {
628 status = ReserveDevice(rctx);
629 if (status != 1) { /* Try another device */
630 continue;
631 }
632 /*
633 * Debug code
634 */
635 if (rctx.store->append == SD_APPEND) {
636 Dmsg2(debuglevel, "Device %s reserved=%d for append.\n",
637 rctx.device->resource_name_,
638 rctx.jcr->impl->dcr->dev->NumReserved());
639 } else {
640 Dmsg2(debuglevel, "Device %s reserved=%d for read.\n",
641 rctx.device->resource_name_,
642 rctx.jcr->impl->read_dcr->dev->NumReserved());
643 }
644 return status;
645 }
646 }
647
648 /*
649 * If we haven't found a available device and the devicereservebymediatype
650 * option is set we try one more time where we allow any device with a
651 * matching mediatype.
652 */
653 if (me->device_reserve_by_mediatype) {
654 foreach_res (rctx.device, R_DEVICE) {
655 Dmsg3(debuglevel,
656 "Try match res=%s, mediatype=%s wanted mediatype=%s\n",
657 rctx.device->resource_name_, rctx.store->media_type,
658 rctx.store->media_type);
659
660 if (bstrcmp(rctx.store->media_type, rctx.device->media_type)) {
661 status = ReserveDevice(rctx);
662 if (status != 1) { /* Try another device */
663 continue;
664 }
665
666 /*
667 * Debug code
668 */
669 if (rctx.store->append == SD_APPEND) {
670 Dmsg2(debuglevel, "Device %s reserved=%d for append.\n",
671 rctx.device->resource_name_,
672 rctx.jcr->impl->dcr->dev->NumReserved());
673 } else {
674 Dmsg2(debuglevel, "Device %s reserved=%d for read.\n",
675 rctx.device->resource_name_,
676 rctx.jcr->impl->read_dcr->dev->NumReserved());
677 }
678 return status;
679 }
680 }
681 }
682 }
683
684 return -1; /* Nothing found */
685 }
686
687 /**
688 * Try to reserve a specific device.
689 *
690 * Returns: 1 -- OK, have DeviceControlRecord
691 * 0 -- must wait
692 * -1 -- fatal error
693 */
ReserveDevice(ReserveContext & rctx)694 static int ReserveDevice(ReserveContext& rctx)
695 {
696 bool ok;
697 DeviceControlRecord* dcr;
698 const int name_len = MAX_NAME_LENGTH;
699
700 /*
701 * Make sure MediaType is OK
702 */
703 Dmsg2(debuglevel, "chk MediaType device=%s request=%s\n",
704 rctx.device->media_type, rctx.store->media_type);
705 if (!bstrcmp(rctx.device->media_type, rctx.store->media_type)) { return -1; }
706
707 /*
708 * Make sure device exists -- i.e. we can stat() it
709 */
710 if (!rctx.device->dev) { rctx.device->dev = InitDev(rctx.jcr, rctx.device); }
711 if (!rctx.device->dev) {
712 if (rctx.device->changer_res) {
713 Jmsg(rctx.jcr, M_WARNING, 0,
714 _("\n"
715 " Device \"%s\" in changer \"%s\" requested by DIR could not "
716 "be opened or does not exist.\n"),
717 rctx.device->resource_name_, rctx.device_name);
718 } else {
719 Jmsg(rctx.jcr, M_WARNING, 0,
720 _("\n"
721 " Device \"%s\" requested by DIR could not be opened or does "
722 "not exist.\n"),
723 rctx.device_name);
724 }
725 return -1; /* no use waiting */
726 }
727
728 rctx.suitable_device = true;
729 Dmsg1(debuglevel, "try reserve %s\n", rctx.device->resource_name_);
730
731 if (rctx.store->append) {
732 SetupNewDcrDevice(rctx.jcr, rctx.jcr->impl->dcr, rctx.device->dev, NULL);
733 dcr = rctx.jcr->impl->dcr;
734 } else {
735 SetupNewDcrDevice(rctx.jcr, rctx.jcr->impl->read_dcr, rctx.device->dev,
736 NULL);
737 dcr = rctx.jcr->impl->read_dcr;
738 }
739
740 if (!dcr) {
741 BareosSocket* dir = rctx.jcr->dir_bsock;
742
743 dir->fsend(_("3926 Could not get dcr for device: %s\n"), rctx.device_name);
744 Dmsg1(debuglevel, ">dird: %s", dir->msg);
745 return -1;
746 }
747
748 if (rctx.store->append) { dcr->SetWillWrite(); }
749
750 bstrncpy(dcr->pool_name, rctx.store->pool_name, name_len);
751 bstrncpy(dcr->pool_type, rctx.store->pool_type, name_len);
752 bstrncpy(dcr->media_type, rctx.store->media_type, name_len);
753 bstrncpy(dcr->dev_name, rctx.device_name, name_len);
754 if (rctx.store->append == SD_APPEND) {
755 Dmsg2(debuglevel, "call reserve for append: have_vol=%d vol=%s\n",
756 rctx.have_volume, rctx.VolumeName);
757 ok = ReserveDeviceForAppend(dcr, rctx);
758 if (!ok) { goto bail_out; }
759
760 rctx.jcr->impl->dcr = dcr;
761 Dmsg5(debuglevel, "Reserved=%d dev_name=%s mediatype=%s pool=%s ok=%d\n",
762 dcr->dev->NumReserved(), dcr->dev_name, dcr->media_type,
763 dcr->pool_name, ok);
764 Dmsg3(debuglevel, "Vol=%s num_writers=%d, have_vol=%d\n", rctx.VolumeName,
765 dcr->dev->num_writers, rctx.have_volume);
766 if (rctx.have_volume) {
767 Dmsg0(debuglevel, "Call reserve_volume for append.\n");
768 if (reserve_volume(dcr, rctx.VolumeName)) {
769 Dmsg1(debuglevel, "Reserved vol=%s\n", rctx.VolumeName);
770 } else {
771 Dmsg1(debuglevel, "Could not reserve vol=%s\n", rctx.VolumeName);
772 goto bail_out;
773 }
774 } else {
775 dcr->any_volume = true;
776 Dmsg0(debuglevel, "no vol, call find_next_appendable_vol.\n");
777 if (dcr->DirFindNextAppendableVolume()) {
778 bstrncpy(rctx.VolumeName, dcr->VolumeName, sizeof(rctx.VolumeName));
779 rctx.have_volume = true;
780 Dmsg1(debuglevel, "looking for Volume=%s\n", rctx.VolumeName);
781 } else {
782 Dmsg0(debuglevel, "No next volume found\n");
783 rctx.have_volume = false;
784 rctx.VolumeName[0] = 0;
785
786 /*
787 * If there is at least one volume that is valid and in use,
788 * but we get here, check if we are running with prefers
789 * non-mounted drives. In that case, we have selected a
790 * non-used drive and our one and only volume is mounted
791 * elsewhere, so we bail out and retry using that drive.
792 */
793 if (dcr->FoundInUse() && !rctx.PreferMountedVols) {
794 rctx.PreferMountedVols = true;
795 if (dcr->VolumeName[0]) { dcr->UnreserveDevice(); }
796 goto bail_out;
797 }
798
799 /*
800 * Note. Under some circumstances, the Director can hand us
801 * a Volume name that is not the same as the one on the current
802 * drive, and in that case, the call above to find the next
803 * volume will fail because in attempting to reserve the Volume
804 * the code will realize that we already have a tape mounted,
805 * and it will fail. This *should* only happen if there are
806 * writers, thus the following test. In that case, we simply
807 * bail out, and continue waiting, rather than plunging on
808 * and hoping that the operator can resolve the problem.
809 */
810 if (dcr->dev->num_writers != 0) {
811 if (dcr->VolumeName[0]) { dcr->UnreserveDevice(); }
812 goto bail_out;
813 }
814 }
815 }
816 } else {
817 ok = ReserveDeviceForRead(dcr);
818 if (ok) {
819 rctx.jcr->impl->read_dcr = dcr;
820 Dmsg5(debuglevel,
821 "Read reserved=%d dev_name=%s mediatype=%s pool=%s ok=%d\n",
822 dcr->dev->NumReserved(), dcr->dev_name, dcr->media_type,
823 dcr->pool_name, ok);
824 }
825 }
826 if (!ok) { goto bail_out; }
827
828 if (rctx.notify_dir) {
829 PoolMem dev_name;
830 BareosSocket* dir = rctx.jcr->dir_bsock;
831 PmStrcpy(dev_name, rctx.device->resource_name_);
832 BashSpaces(dev_name);
833 ok = dir->fsend(OK_device, dev_name.c_str()); /* Return real device name */
834 Dmsg1(debuglevel, ">dird: %s", dir->msg);
835 } else {
836 ok = true;
837 }
838 return ok ? 1 : -1;
839
840 bail_out:
841 rctx.have_volume = false;
842 rctx.VolumeName[0] = 0;
843 Dmsg0(debuglevel, "Not OK.\n");
844 return 0;
845 }
846
847 /**
848 * We "reserve" the drive by setting the ST_READREADY bit.
849 * No one else should touch the drive until that is cleared.
850 * This allows the DIR to "reserve" the device before actually starting the job.
851 */
ReserveDeviceForRead(DeviceControlRecord * dcr)852 static bool ReserveDeviceForRead(DeviceControlRecord* dcr)
853 {
854 Device* dev = dcr->dev;
855 JobControlRecord* jcr = dcr->jcr;
856 bool ok = false;
857
858 ASSERT(dcr);
859 if (JobCanceled(jcr)) { return false; }
860
861 dev->Lock();
862
863 if (dev->IsDeviceUnmounted()) {
864 Dmsg1(debuglevel, "Device %s is BLOCKED due to user unmount.\n",
865 dev->print_name());
866 Mmsg(jcr->errmsg,
867 _("3601 JobId=%u device %s is BLOCKED due to user unmount.\n"),
868 jcr->JobId, dev->print_name());
869 QueueReserveMessage(jcr);
870 goto bail_out;
871 }
872
873 if (dev->IsBusy()) {
874 Dmsg4(debuglevel,
875 "Device %s is busy ST_READREADY=%d num_writers=%d reserved=%d.\n",
876 dev->print_name(), BitIsSet(ST_READREADY, dev->state) ? 1 : 0,
877 dev->num_writers, dev->NumReserved());
878 Mmsg(jcr->errmsg,
879 _("3602 JobId=%u device %s is busy (already reading/writing).\n"),
880 jcr->JobId, dev->print_name());
881 QueueReserveMessage(jcr);
882 goto bail_out;
883 }
884
885 /*
886 * Note: on failure this returns jcr->errmsg properly edited
887 */
888 if (GeneratePluginEvent(jcr, bsdEventDeviceReserve, dcr) != bRC_OK) {
889 QueueReserveMessage(jcr);
890 goto bail_out;
891 }
892 dev->ClearAppend();
893 dev->SetRead();
894 dcr->SetReserved();
895 ok = true;
896
897 bail_out:
898 dev->Unlock();
899 return ok;
900 }
901
902 /**
903 * We reserve the device for appending by incrementing
904 * NumReserved(). We do virtually all the same work that
905 * is done in AcquireDeviceForAppend(), but we do
906 * not attempt to mount the device. This routine allows
907 * the DIR to reserve multiple devices before *really*
908 * starting the job. It also permits the SD to refuse
909 * certain devices (not up, ...).
910 *
911 * Note, in reserving a device, if the device is for the
912 * same pool and the same pool type, then it is acceptable.
913 * The Media Type has already been checked. If we are
914 * the first to reserve the device, we put the pool
915 * name and pool type in the device record.
916 */
ReserveDeviceForAppend(DeviceControlRecord * dcr,ReserveContext & rctx)917 static bool ReserveDeviceForAppend(DeviceControlRecord* dcr,
918 ReserveContext& rctx)
919 {
920 JobControlRecord* jcr = dcr->jcr;
921 Device* dev = dcr->dev;
922 bool ok = false;
923
924 ASSERT(dcr);
925 if (JobCanceled(jcr)) { return false; }
926
927 dev->Lock();
928
929 /*
930 * If device is being read, we cannot write it
931 */
932 if (dev->CanRead()) {
933 Mmsg(jcr->errmsg, _("3603 JobId=%u device %s is busy reading.\n"),
934 jcr->JobId, dev->print_name());
935 Dmsg1(debuglevel, "Failed: %s", jcr->errmsg);
936 QueueReserveMessage(jcr);
937 goto bail_out;
938 }
939
940 /*
941 * If device is unmounted, we are out of luck
942 */
943 if (dev->IsDeviceUnmounted()) {
944 Mmsg(jcr->errmsg,
945 _("3604 JobId=%u device %s is BLOCKED due to user unmount.\n"),
946 jcr->JobId, dev->print_name());
947 Dmsg1(debuglevel, "Failed: %s", jcr->errmsg);
948 QueueReserveMessage(jcr);
949 goto bail_out;
950 }
951
952 Dmsg1(debuglevel, "reserve_append device is %s\n", dev->print_name());
953
954 /*
955 * Now do detailed tests ...
956 */
957 if (CanReserveDrive(dcr, rctx) != 1) {
958 Dmsg0(debuglevel, "CanReserveDrive!=1\n");
959 goto bail_out;
960 }
961
962 /*
963 * Note: on failure this returns jcr->errmsg properly edited
964 */
965 if (GeneratePluginEvent(jcr, bsdEventDeviceReserve, dcr) != bRC_OK) {
966 QueueReserveMessage(jcr);
967 goto bail_out;
968 }
969 dcr->SetReserved();
970 ok = true;
971
972 bail_out:
973 dev->Unlock();
974 return ok;
975 }
976
IsPoolOk(DeviceControlRecord * dcr)977 static int IsPoolOk(DeviceControlRecord* dcr)
978 {
979 Device* dev = dcr->dev;
980 JobControlRecord* jcr = dcr->jcr;
981
982 /*
983 * Now check if we want the same Pool and pool type
984 */
985 if (bstrcmp(dev->pool_name, dcr->pool_name) &&
986 bstrcmp(dev->pool_type, dcr->pool_type)) {
987 /*
988 * OK, compatible device
989 */
990 Dmsg1(debuglevel, "OK dev: %s num_writers=0, reserved, pool matches\n",
991 dev->print_name());
992 return 1;
993 } else {
994 /* Drive Pool not suitable for us */
995 Mmsg(jcr->errmsg,
996 _("3608 JobId=%u wants Pool=\"%s\" but have Pool=\"%s\" nreserve=%d "
997 "on drive %s.\n"),
998 (uint32_t)jcr->JobId, dcr->pool_name, dev->pool_name,
999 dev->NumReserved(), dev->print_name());
1000 Dmsg1(debuglevel, "Failed: %s", jcr->errmsg);
1001 QueueReserveMessage(jcr);
1002 }
1003 return 0;
1004 }
1005
IsMaxJobsOk(DeviceControlRecord * dcr)1006 static bool IsMaxJobsOk(DeviceControlRecord* dcr)
1007 {
1008 Device* dev = dcr->dev;
1009 JobControlRecord* jcr = dcr->jcr;
1010
1011 Dmsg5(debuglevel, "MaxJobs=%d Jobs=%d reserves=%d Status=%s Vol=%s\n",
1012 dcr->VolCatInfo.VolCatMaxJobs, dcr->VolCatInfo.VolCatJobs,
1013 dev->NumReserved(), dcr->VolCatInfo.VolCatStatus, dcr->VolumeName);
1014
1015 /*
1016 * Limit max concurrent jobs on this drive
1017 */
1018 if (dev->max_concurrent_jobs > 0 &&
1019 dev->max_concurrent_jobs <=
1020 (uint32_t)(dev->num_writers + dev->NumReserved())) {
1021 /*
1022 * Max Concurrent Jobs depassed or already reserved
1023 */
1024 Mmsg(jcr->errmsg,
1025 _("3609 JobId=%u Max concurrent jobs exceeded on drive %s.\n"),
1026 (uint32_t)jcr->JobId, dev->print_name());
1027 Dmsg1(debuglevel, "Failed: %s", jcr->errmsg);
1028 QueueReserveMessage(jcr);
1029 return false;
1030 }
1031 if (bstrcmp(dcr->VolCatInfo.VolCatStatus, "Recycle")) { return true; }
1032 if (dcr->VolCatInfo.VolCatMaxJobs > 0 &&
1033 dcr->VolCatInfo.VolCatMaxJobs <=
1034 (dcr->VolCatInfo.VolCatJobs + dev->NumReserved())) {
1035 /*
1036 * Max Job Vols depassed or already reserved
1037 */
1038 Mmsg(jcr->errmsg,
1039 _("3610 JobId=%u Volume max jobs exceeded on drive %s.\n"),
1040 (uint32_t)jcr->JobId, dev->print_name());
1041 Dmsg1(debuglevel, "reserve dev failed: %s", jcr->errmsg);
1042 QueueReserveMessage(jcr);
1043 return false; /* wait */
1044 }
1045 return true;
1046 }
1047
1048 /**
1049 * Returns: 1 if drive can be reserved
1050 * 0 if we should wait
1051 * -1 on error or impossibility
1052 */
CanReserveDrive(DeviceControlRecord * dcr,ReserveContext & rctx)1053 static int CanReserveDrive(DeviceControlRecord* dcr, ReserveContext& rctx)
1054 {
1055 Device* dev = dcr->dev;
1056 JobControlRecord* jcr = dcr->jcr;
1057
1058 Dmsg5(debuglevel, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n",
1059 rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
1060 rctx.autochanger_only, rctx.any_drive);
1061
1062 /*
1063 * Check for max jobs on this Volume
1064 */
1065 if (!IsMaxJobsOk(dcr)) { return 0; }
1066
1067 /*
1068 * Setting any_drive overrides PreferMountedVols flag
1069 */
1070 if (!rctx.any_drive) {
1071 /*
1072 * When PreferMountedVols is set, we keep track of the
1073 * drive in use that has the least number of writers, then if
1074 * no unmounted drive is found, we try that drive. This
1075 * helps spread the load to the least used drives.
1076 */
1077 if (rctx.try_low_use_drive && dev == rctx.low_use_drive) {
1078 Dmsg2(debuglevel, "OK dev=%s == low_drive=%s.\n", dev->print_name(),
1079 rctx.low_use_drive->print_name());
1080 return 1;
1081 }
1082
1083 /*
1084 * If he wants a free drive, but this one is busy, no go
1085 */
1086 if (!rctx.PreferMountedVols && dev->IsBusy()) {
1087 /*
1088 * Save least used drive
1089 */
1090 if ((dev->num_writers + dev->NumReserved()) < rctx.num_writers) {
1091 rctx.num_writers = dev->num_writers + dev->NumReserved();
1092 rctx.low_use_drive = dev;
1093 Dmsg2(debuglevel, "set low use drive=%s num_writers=%d\n",
1094 dev->print_name(), rctx.num_writers);
1095 } else {
1096 Dmsg1(debuglevel, "not low use num_writers=%d\n",
1097 dev->num_writers + dev->NumReserved());
1098 }
1099 Mmsg(jcr->errmsg,
1100 _("3605 JobId=%u wants free drive but device %s is busy.\n"),
1101 jcr->JobId, dev->print_name());
1102 Dmsg1(debuglevel, "Failed: %s", jcr->errmsg);
1103 QueueReserveMessage(jcr);
1104 return 0;
1105 }
1106
1107 /*
1108 * Check for prefer mounted volumes
1109 */
1110 if (rctx.PreferMountedVols && !dev->vol && dev->IsTape()) {
1111 Mmsg(jcr->errmsg,
1112 _("3606 JobId=%u prefers mounted drives, but drive %s has no "
1113 "Volume.\n"),
1114 jcr->JobId, dev->print_name());
1115 Dmsg1(debuglevel, "Failed: %s", jcr->errmsg);
1116 QueueReserveMessage(jcr);
1117 return 0; /* No volume mounted */
1118 }
1119
1120 /*
1121 * Check for exact Volume name match
1122 * ***FIXME*** for Disk, we can accept any volume that goes with this drive.
1123 */
1124 if (rctx.exact_match && rctx.have_volume) {
1125 bool ok;
1126
1127 Dmsg5(debuglevel, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n",
1128 rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
1129 rctx.autochanger_only, rctx.any_drive);
1130 Dmsg4(debuglevel, "have_vol=%d have=%s resvol=%s want=%s\n",
1131 rctx.have_volume, dev->VolHdr.VolumeName,
1132 dev->vol ? dev->vol->vol_name : "*None*", rctx.VolumeName);
1133 ok = bstrcmp(dev->VolHdr.VolumeName, rctx.VolumeName) ||
1134 (dev->vol && bstrcmp(dev->vol->vol_name, rctx.VolumeName));
1135 if (!ok) {
1136 Mmsg(jcr->errmsg,
1137 _("3607 JobId=%u wants Vol=\"%s\" drive has Vol=\"%s\" on drive "
1138 "%s.\n"),
1139 jcr->JobId, rctx.VolumeName, dev->VolHdr.VolumeName,
1140 dev->print_name());
1141 QueueReserveMessage(jcr);
1142 Dmsg3(debuglevel, "not OK: dev have=%s resvol=%s want=%s\n",
1143 dev->VolHdr.VolumeName, dev->vol ? dev->vol->vol_name : "*None*",
1144 rctx.VolumeName);
1145 return 0;
1146 }
1147 if (!dcr->Can_i_use_volume()) {
1148 return 0; /* fail if volume on another drive */
1149 }
1150 }
1151 }
1152
1153 /*
1154 * Check for unused autochanger drive
1155 */
1156 if (rctx.autochanger_only && !dev->IsBusy() &&
1157 dev->VolHdr.VolumeName[0] == 0) {
1158 /*
1159 * Device is available but not yet reserved, reserve it for us
1160 */
1161 Dmsg1(debuglevel, "OK Res Unused autochanger %s.\n", dev->print_name());
1162 bstrncpy(dev->pool_name, dcr->pool_name, sizeof(dev->pool_name));
1163 bstrncpy(dev->pool_type, dcr->pool_type, sizeof(dev->pool_type));
1164 return 1; /* reserve drive */
1165 }
1166
1167 /*
1168 * Handle the case that there are no writers
1169 */
1170 if (dev->num_writers == 0) {
1171 /*
1172 * Now check if there are any reservations on the drive
1173 */
1174 if (dev->NumReserved()) {
1175 return IsPoolOk(dcr);
1176 } else if (dev->CanAppend()) {
1177 if (IsPoolOk(dcr)) {
1178 return 1;
1179 } else {
1180 /*
1181 * Changing pool, unload old tape if any in drive
1182 */
1183 Dmsg0(debuglevel,
1184 "OK dev: num_writers=0, not reserved, pool change, unload "
1185 "changer\n");
1186 /*
1187 * ***FIXME*** use SetUnload()
1188 */
1189 UnloadAutochanger(dcr, -1);
1190 }
1191 }
1192
1193 /*
1194 * Device is available but not yet reserved, reserve it for us
1195 */
1196 Dmsg1(debuglevel, "OK Dev avail reserved %s\n", dev->print_name());
1197 bstrncpy(dev->pool_name, dcr->pool_name, sizeof(dev->pool_name));
1198 bstrncpy(dev->pool_type, dcr->pool_type, sizeof(dev->pool_type));
1199 return 1; /* reserve drive */
1200 }
1201
1202 /*
1203 * Check if the device is in append mode with writers (i.e. available if pool
1204 * is the same).
1205 */
1206 if (dev->CanAppend() || dev->num_writers > 0) {
1207 return IsPoolOk(dcr);
1208 } else {
1209 Pmsg1(000, _("Logic error!!!! JobId=%u Should not get here.\n"),
1210 (int)jcr->JobId);
1211 Mmsg(jcr->errmsg,
1212 _("3910 JobId=%u Logic error!!!! drive %s Should not get here.\n"),
1213 jcr->JobId, dev->print_name());
1214 QueueReserveMessage(jcr);
1215 Jmsg0(jcr, M_FATAL, 0, _("Logic error!!!! Should not get here.\n"));
1216
1217 return -1; /* error, should not get here */
1218 }
1219 }
1220
1221 /**
1222 * Queue a reservation error or failure message for this jcr
1223 */
QueueReserveMessage(JobControlRecord * jcr)1224 static void QueueReserveMessage(JobControlRecord* jcr)
1225 {
1226 int i;
1227 alist* msgs;
1228 char* msg;
1229
1230 jcr->lock();
1231
1232 msgs = jcr->impl->reserve_msgs;
1233 if (!msgs) { goto bail_out; }
1234 /*
1235 * Look for duplicate message. If found, do not insert
1236 */
1237 for (i = msgs->size() - 1; i >= 0; i--) {
1238 msg = (char*)msgs->get(i);
1239 if (!msg) { goto bail_out; }
1240
1241 /*
1242 * Comparison based on 4 digit message number
1243 */
1244 if (bstrncmp(msg, jcr->errmsg, 4)) { goto bail_out; }
1245 }
1246
1247 /*
1248 * Message unique, so insert it.
1249 */
1250 jcr->impl->reserve_msgs->push(strdup(jcr->errmsg));
1251
1252 bail_out:
1253 jcr->unlock();
1254 }
1255
1256 /**
1257 * Pop and release any reservations messages
1258 */
PopReserveMessages(JobControlRecord * jcr)1259 static void PopReserveMessages(JobControlRecord* jcr)
1260 {
1261 alist* msgs;
1262 char* msg;
1263
1264 jcr->lock();
1265 msgs = jcr->impl->reserve_msgs;
1266 if (!msgs) { goto bail_out; }
1267 while ((msg = (char*)msgs->pop())) { free(msg); }
1268 bail_out:
1269 jcr->unlock();
1270 }
1271
1272 /**
1273 * Also called from acquire.c
1274 */
ReleaseReserveMessages(JobControlRecord * jcr)1275 void ReleaseReserveMessages(JobControlRecord* jcr)
1276 {
1277 PopReserveMessages(jcr);
1278 jcr->lock();
1279 if (!jcr->impl->reserve_msgs) { goto bail_out; }
1280 delete jcr->impl->reserve_msgs;
1281 jcr->impl->reserve_msgs = NULL;
1282
1283 bail_out:
1284 jcr->unlock();
1285 }
1286
1287 } /* namespace storagedaemon */
1288