1 /*
2    BAREOS® - Backup Archiving REcovery Open Sourced
3 
4    Copyright (C) 2000-2012 Free Software Foundation Europe e.V.
5    Copyright (C) 2016-2019 Bareos GmbH & Co. KG
6 
7    This program is Free Software; you can redistribute it and/or
8    modify it under the terms of version three of the GNU Affero General Public
9    License as published by the Free Software Foundation and included
10    in the file LICENSE.
11 
12    This program is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15    Affero General Public License for more details.
16 
17    You should have received a copy of the GNU Affero General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20    02110-1301, USA.
21 */
22 /*
23  * Kern Sibbald, MM
24  * Split from job.c and acquire.c June 2005
25  */
26 /**
27  * @file
28  * Drive reservation functions for Storage Daemon
29  */
30 
31 #include "include/bareos.h"
32 #include "stored/stored.h"
33 #include "stored/stored_globals.h"
34 #include "stored/acquire.h"
35 #include "stored/autochanger.h"
36 #include "stored/jcr_private.h"
37 #include "stored/wait.h"
38 #include "lib/berrno.h"
39 #include "lib/util.h"
40 #include "lib/bsock.h"
41 #include "include/jcr.h"
42 #include "lib/parse_conf.h"
43 
44 namespace storagedaemon {
45 
46 const int debuglevel = 150;
47 
48 /* Global static variables */
49 #ifdef SD_DEBUG_LOCK
50 int reservations_lock_count = 0;
51 #else
52 static int reservations_lock_count = 0;
53 #endif
54 
55 static brwlock_t reservation_lock;
56 
57 /* Forward referenced functions */
58 static int CanReserveDrive(DeviceControlRecord* dcr, ReserveContext& rctx);
59 static int ReserveDevice(ReserveContext& rctx);
60 static bool ReserveDeviceForRead(DeviceControlRecord* dcr);
61 static bool ReserveDeviceForAppend(DeviceControlRecord* dcr,
62                                    ReserveContext& rctx);
63 static bool UseDeviceCmd(JobControlRecord* jcr);
64 static void QueueReserveMessage(JobControlRecord* jcr);
65 static void PopReserveMessages(JobControlRecord* jcr);
66 // void SwitchDevice(DeviceControlRecord *dcr, Device *dev);
67 
68 /* Requests from the Director daemon */
69 static char use_storage[] =
70     "use storage=%127s media_type=%127s "
71     "pool_name=%127s pool_type=%127s append=%d copy=%d stripe=%d\n";
72 static char use_device[] = "use device=%127s\n";
73 
74 /* Responses sent to Director daemon */
75 static char OK_device[] = "3000 OK use device device=%s\n";
76 static char NO_device[] =
77     "3924 Device \"%s\" not in SD Device"
78     " resources or no matching Media Type.\n";
79 static char BAD_use[] = "3913 Bad use command: %s\n";
80 
use_cmd(JobControlRecord * jcr)81 bool use_cmd(JobControlRecord* jcr)
82 {
83   /*
84    * Get the device, media, and pool information
85    */
86   if (!UseDeviceCmd(jcr)) {
87     jcr->setJobStatus(JS_ErrorTerminated);
88     memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
89     return false;
90   }
91   return true;
92 }
93 
94 /**
95  * This allows a given thread to recursively call LockReservations.
96  * It must, of course, call unlock_... the same number of times.
97  */
InitReservationsLock()98 void InitReservationsLock()
99 {
100   int errstat;
101   if ((errstat = RwlInit(&reservation_lock)) != 0) {
102     BErrNo be;
103     Emsg1(M_ABORT, 0, _("Unable to initialize reservation lock. ERR=%s\n"),
104           be.bstrerror(errstat));
105   }
106 
107   InitVolListLock();
108 }
109 
TermReservationsLock()110 void TermReservationsLock()
111 {
112   RwlDestroy(&reservation_lock);
113   TermVolListLock();
114 }
115 
116 /**
117  * This applies to a drive and to Volumes
118  */
_lockReservations(const char * file,int line)119 void _lockReservations(const char* file, int line)
120 {
121   int errstat;
122   reservations_lock_count++;
123   if ((errstat = RwlWritelock_p(&reservation_lock, file, line)) != 0) {
124     BErrNo be;
125     Emsg2(M_ABORT, 0, "RwlWritelock failure. stat=%d: ERR=%s\n", errstat,
126           be.bstrerror(errstat));
127   }
128 }
129 
_unLockReservations()130 void _unLockReservations()
131 {
132   int errstat;
133   reservations_lock_count--;
134   if ((errstat = RwlWriteunlock(&reservation_lock)) != 0) {
135     BErrNo be;
136     Emsg2(M_ABORT, 0, "RwlWriteunlock failure. stat=%d: ERR=%s\n", errstat,
137           be.bstrerror(errstat));
138   }
139 }
140 
SetReserved()141 void DeviceControlRecord::SetReserved()
142 {
143   reserved_ = true;
144   Dmsg2(debuglevel, "Inc reserve=%d dev=%s\n", dev->NumReserved(),
145         dev->print_name());
146   dev->IncReserved();
147 }
148 
ClearReserved()149 void DeviceControlRecord::ClearReserved()
150 {
151   if (reserved_) {
152     reserved_ = false;
153     dev->DecReserved();
154     Dmsg2(debuglevel, "Dec reserve=%d dev=%s\n", dev->NumReserved(),
155           dev->print_name());
156   }
157 }
158 
159 /**
160  * Remove any reservation from a drive and tell the system
161  * that the volume is unused at least by us.
162  */
UnreserveDevice()163 void DeviceControlRecord::UnreserveDevice()
164 {
165   dev->Lock();
166   if (IsReserved()) {
167     ClearReserved();
168     reserved_volume = false;
169 
170     /*
171      * If we set read mode in reserving, remove it
172      */
173     if (dev->CanRead()) { dev->ClearRead(); }
174 
175     if (dev->num_writers < 0) {
176       Jmsg1(jcr, M_ERROR, 0, _("Hey! num_writers=%d!!!!\n"), dev->num_writers);
177       dev->num_writers = 0;
178     }
179 
180     if (dev->NumReserved() == 0 && dev->num_writers == 0) {
181       VolumeUnused(this);
182     }
183   }
184   dev->Unlock();
185 }
186 
187 /**
188  * We get the following type of information:
189  *
190  * use storage=xxx media_type=yyy pool_name=xxx pool_type=yyy append=1 copy=0
191  * strip=0 use device=zzz use device=aaa use device=bbb use storage=xxx
192  * media_type=yyy pool_name=xxx pool_type=yyy append=0 copy=0 strip=0 use
193  * device=bbb
194  */
UseDeviceCmd(JobControlRecord * jcr)195 static bool UseDeviceCmd(JobControlRecord* jcr)
196 {
197   PoolMem StoreName, dev_name, media_type, pool_name, pool_type;
198   BareosSocket* dir = jcr->dir_bsock;
199   int32_t append;
200   bool ok;
201   int32_t Copy, Stripe;
202   DirectorStorage* store;
203   ReserveContext rctx;
204   alist* dirstore;
205 
206   memset(&rctx, 0, sizeof(ReserveContext));
207   rctx.jcr = jcr;
208 
209   /*
210    * If there are multiple devices, the director sends us
211    * use_device for each device that it wants to use.
212    */
213   jcr->impl->reserve_msgs = new alist(10, not_owned_by_alist);
214   do {
215     Dmsg1(debuglevel, "<dird: %s", dir->msg);
216     ok = sscanf(dir->msg, use_storage, StoreName.c_str(), media_type.c_str(),
217                 pool_name.c_str(), pool_type.c_str(), &append, &Copy,
218                 &Stripe) == 7;
219     if (!ok) { break; }
220     dirstore = new alist(10, not_owned_by_alist);
221     if (append) {
222       jcr->impl->write_store = dirstore;
223     } else {
224       jcr->impl->read_store = dirstore;
225     }
226     rctx.append = append;
227     UnbashSpaces(StoreName);
228     UnbashSpaces(media_type);
229     UnbashSpaces(pool_name);
230     UnbashSpaces(pool_type);
231     store = new DirectorStorage;
232     dirstore->append(store);
233     memset(store, 0, sizeof(DirectorStorage));
234     store->device = new alist(10);
235     bstrncpy(store->name, StoreName, sizeof(store->name));
236     bstrncpy(store->media_type, media_type, sizeof(store->media_type));
237     bstrncpy(store->pool_name, pool_name, sizeof(store->pool_name));
238     bstrncpy(store->pool_type, pool_type, sizeof(store->pool_type));
239     store->append = append;
240 
241     /*
242      * Now get all devices
243      */
244     while (dir->recv() >= 0) {
245       Dmsg1(debuglevel, "<dird device: %s", dir->msg);
246       ok = sscanf(dir->msg, use_device, dev_name.c_str()) == 1;
247       if (!ok) { break; }
248       UnbashSpaces(dev_name);
249       store->device->append(strdup(dev_name.c_str()));
250     }
251   } while (ok && dir->recv() >= 0);
252 
253   InitJcrDeviceWaitTimers(jcr);
254   jcr->impl->dcr = new StorageDaemonDeviceControlRecord;
255   SetupNewDcrDevice(jcr, jcr->impl->dcr, NULL, NULL);
256   if (rctx.append) { jcr->impl->dcr->SetWillWrite(); }
257 
258   if (!jcr->impl->dcr) {
259     BareosSocket* dir = jcr->dir_bsock;
260     dir->fsend(_("3939 Could not get dcr\n"));
261     Dmsg1(debuglevel, ">dird: %s", dir->msg);
262     ok = false;
263   }
264 
265   /*
266    * At this point, we have a list of all the Director's Storage resources
267    * indicated for this Job, which include Pool, PoolType, storage name, and
268    * Media type.
269    *
270    * Then for each of the Storage resources, we have a list of device names that
271    * were given.
272    *
273    * Wiffle through them and find one that can do the backup.
274    */
275   if (ok) {
276     int wait_for_device_retries = 0;
277     int repeat = 0;
278     bool fail = false;
279     rctx.notify_dir = true;
280 
281     /*
282      * Put new dcr in proper location
283      */
284     if (rctx.append) {
285       rctx.jcr->impl->dcr = jcr->impl->dcr;
286     } else {
287       rctx.jcr->impl->read_dcr = jcr->impl->dcr;
288     }
289 
290     LockReservations();
291     for (; !fail && !JobCanceled(jcr);) {
292       PopReserveMessages(jcr);
293       rctx.suitable_device = false;
294       rctx.have_volume = false;
295       rctx.VolumeName[0] = 0;
296       rctx.any_drive = false;
297       if (!jcr->impl->PreferMountedVols) {
298         /*
299          * Here we try to find a drive that is not used.
300          * This will maximize the use of available drives.
301          */
302         rctx.num_writers = 20000000; /* start with impossible number */
303         rctx.low_use_drive = NULL;
304         rctx.PreferMountedVols = false;
305         rctx.exact_match = false;
306         rctx.autochanger_only = true;
307         if ((ok = FindSuitableDeviceForJob(jcr, rctx))) { break; }
308 
309         /*
310          * Look through all drives possibly for low_use drive
311          */
312         if (rctx.low_use_drive) {
313           rctx.try_low_use_drive = true;
314           if ((ok = FindSuitableDeviceForJob(jcr, rctx))) { break; }
315           rctx.try_low_use_drive = false;
316         }
317         rctx.autochanger_only = false;
318         if ((ok = FindSuitableDeviceForJob(jcr, rctx))) { break; }
319       }
320 
321       /*
322        * Now we look for a drive that may or may not be in use.
323        * Look for an exact Volume match all drives
324        */
325       rctx.PreferMountedVols = true;
326       rctx.exact_match = true;
327       rctx.autochanger_only = false;
328       if ((ok = FindSuitableDeviceForJob(jcr, rctx))) { break; }
329 
330       /*
331        * Look for any mounted drive
332        */
333       rctx.exact_match = false;
334       if ((ok = FindSuitableDeviceForJob(jcr, rctx))) { break; }
335 
336       /*
337        * Try any drive
338        */
339       rctx.any_drive = true;
340       if ((ok = FindSuitableDeviceForJob(jcr, rctx))) { break; }
341 
342       /*
343        * Keep reservations locked *except* during WaitForDevice()
344        */
345       UnlockReservations();
346 
347       /*
348        * The idea of looping on repeat a few times it to ensure
349        * that if there is some subtle timing problem between two
350        * jobs, we will simply try again, and most likely succeed.
351        * This can happen if one job reserves a drive or finishes using
352        * a drive at the same time a second job wants it.
353        */
354       if (repeat++ > 1) {   /* try algorithm 3 times */
355         Bmicrosleep(30, 0); /* wait a bit */
356         Dmsg0(debuglevel, "repeat reserve algorithm\n");
357       } else if (!rctx.suitable_device ||
358                  !WaitForDevice(jcr, wait_for_device_retries)) {
359         Dmsg0(debuglevel, "Fail. !suitable_device || !WaitForDevice\n");
360         fail = true;
361       }
362       LockReservations();
363       dir->signal(BNET_HEARTBEAT); /* Inform Dir that we are alive */
364     }
365     UnlockReservations();
366 
367     if (!ok) {
368       /*
369        * If we get here, there are no suitable devices available, which
370        * means nothing configured.  If a device is suitable but busy
371        * with another Volume, we will not come here.
372        */
373       UnbashSpaces(dir->msg);
374       PmStrcpy(jcr->errmsg, dir->msg);
375       Jmsg(jcr, M_FATAL, 0, _("Device reservation failed for JobId=%d: %s\n"),
376            jcr->JobId, jcr->errmsg);
377       dir->fsend(NO_device, dev_name.c_str());
378 
379       Dmsg1(debuglevel, ">dird: %s", dir->msg);
380     }
381   } else {
382     UnbashSpaces(dir->msg);
383     PmStrcpy(jcr->errmsg, dir->msg);
384     Jmsg(jcr, M_FATAL, 0, _("Failed command: %s\n"), jcr->errmsg);
385     dir->fsend(BAD_use, jcr->errmsg);
386     Dmsg1(debuglevel, ">dird: %s", dir->msg);
387   }
388 
389   ReleaseReserveMessages(jcr);
390   return ok;
391 }
392 
393 /**
394  * Walk through the autochanger resources and check if the volume is in one of
395  * them.
396  *
397  * Returns:  true  if volume is in device
398  *           false otherwise
399  */
IsVolInAutochanger(ReserveContext & rctx,VolumeReservationItem * vol)400 static bool IsVolInAutochanger(ReserveContext& rctx, VolumeReservationItem* vol)
401 {
402   AutochangerResource* changer = vol->dev->device->changer_res;
403 
404   if (!changer) { return false; }
405 
406   /*
407    * Find resource, and make sure we were able to open it
408    */
409   if (bstrcmp(rctx.device_name, changer->resource_name_)) {
410     Dmsg1(debuglevel, "Found changer device %s\n",
411           vol->dev->device->resource_name_);
412     return true;
413   }
414   Dmsg1(debuglevel, "Incorrect changer device %s\n", changer->resource_name_);
415 
416   return false;
417 }
418 
419 /**
420  * Search for a device suitable for this job.
421  *
422  * Note, this routine sets sets rctx.suitable_device if any
423  * device exists within the SD. The device may not be actually useable.
424  * It also returns if it finds a useable device.
425  */
FindSuitableDeviceForJob(JobControlRecord * jcr,ReserveContext & rctx)426 bool FindSuitableDeviceForJob(JobControlRecord* jcr, ReserveContext& rctx)
427 {
428   bool ok = false;
429   DirectorStorage* store;
430   char* device_name = nullptr;
431   alist* dirstore;
432   DeviceControlRecord* dcr = jcr->impl->dcr;
433 
434   if (rctx.append) {
435     dirstore = jcr->impl->write_store;
436   } else {
437     dirstore = jcr->impl->read_store;
438   }
439   Dmsg5(debuglevel,
440         "Start find_suit_dev PrefMnt=%d exact=%d suitable=%d chgronly=%d "
441         "any=%d\n",
442         rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
443         rctx.autochanger_only, rctx.any_drive);
444 
445   /*
446    * If the appropriate conditions of this if are met, namely that
447    * we are appending and the user wants mounted drive (or we
448    * force try a mounted drive because they are all busy), we
449    * start by looking at all the Volumes in the volume list.
450    */
451   if (!IsVolListEmpty() && rctx.append && rctx.PreferMountedVols) {
452     dlist* temp_vol_list;
453     VolumeReservationItem* vol = NULL;
454     temp_vol_list = dup_vol_list(jcr);
455 
456     /*
457      * Look through reserved volumes for one we can use
458      */
459     Dmsg0(debuglevel, "look for vol in vol list\n");
460     foreach_dlist (vol, temp_vol_list) {
461       if (!vol->dev) {
462         Dmsg1(debuglevel, "vol=%s no dev\n", vol->vol_name);
463         continue;
464       }
465 
466       /*
467        * Check with Director if this Volume is OK
468        */
469       bstrncpy(dcr->VolumeName, vol->vol_name, sizeof(dcr->VolumeName));
470       if (!dcr->DirGetVolumeInfo(GET_VOL_INFO_FOR_WRITE)) { continue; }
471 
472       Dmsg1(debuglevel, "vol=%s OK for this job\n", vol->vol_name);
473       foreach_alist (store, dirstore) {
474         int status;
475         rctx.store = store;
476         foreach_alist (device_name, store->device) {
477           /*
478            * Found a device, try to use it
479            */
480           rctx.device_name = device_name;
481           rctx.device = vol->dev->device;
482 
483           if (vol->dev->IsAutochanger()) {
484             Dmsg1(debuglevel, "vol=%s is in changer\n", vol->vol_name);
485             if (!IsVolInAutochanger(rctx, vol) || !vol->dev->autoselect) {
486               continue;
487             }
488           } else if (!bstrcmp(device_name, vol->dev->device->resource_name_)) {
489             Dmsg2(debuglevel, "device=%s not suitable want %s\n",
490                   vol->dev->device->resource_name_, device_name);
491             continue;
492           }
493 
494           bstrncpy(rctx.VolumeName, vol->vol_name, sizeof(rctx.VolumeName));
495           rctx.have_volume = true;
496 
497           /*
498            * Try reserving this device and volume
499            */
500           Dmsg2(debuglevel, "try vol=%s on device=%s\n", rctx.VolumeName,
501                 device_name);
502           status = ReserveDevice(rctx);
503           if (status == 1) { /* found available device */
504             Dmsg1(debuglevel, "Suitable device found=%s\n", device_name);
505             ok = true;
506             break;
507           } else if (status == 0) { /* device busy */
508             Dmsg1(debuglevel, "Suitable device=%s, busy: not use\n",
509                   device_name);
510           } else {
511             Dmsg0(debuglevel, "No suitable device found.\n");
512           }
513           rctx.have_volume = false;
514           rctx.VolumeName[0] = 0;
515         }
516         if (ok) { break; }
517       }
518       if (ok) { break; }
519     } /* end for loop over reserved volumes */
520 
521     Dmsg0(debuglevel, "lock volumes\n");
522     FreeTempVolList(temp_vol_list);
523     temp_vol_list = NULL;
524   }
525 
526   if (ok) {
527     Dmsg1(debuglevel, "OK dev found. Vol=%s from in-use vols list\n",
528           rctx.VolumeName);
529     return true;
530   }
531 
532   /*
533    * No reserved volume we can use, so now search for an available device.
534    *
535    * For each storage device that the user specified, we
536    * search and see if there is a resource for that device.
537    */
538   foreach_alist (store, dirstore) {
539     rctx.store = store;
540     foreach_alist (device_name, store->device) {
541       int status;
542       rctx.device_name = device_name;
543       status = SearchResForDevice(rctx);
544       if (status == 1) { /* found available device */
545         Dmsg1(debuglevel, "available device found=%s\n", device_name);
546         ok = true;
547         break;
548       } else if (status == 0) { /* device busy */
549         Dmsg1(debuglevel, "No usable device=%s, busy: not use\n", device_name);
550       } else {
551         Dmsg0(debuglevel, "No usable device found.\n");
552       }
553     }
554     if (ok) { break; }
555   }
556   if (ok) {
557     Dmsg1(debuglevel, "OK dev found. Vol=%s\n", rctx.VolumeName);
558   } else {
559     Dmsg0(debuglevel, "Leave find_suit_dev: no dev found.\n");
560   }
561   return ok;
562 }
563 
564 /**
565  * Search for a particular storage device with particular storage
566  * characteristics (MediaType).
567  */
SearchResForDevice(ReserveContext & rctx)568 int SearchResForDevice(ReserveContext& rctx)
569 {
570   int status;
571   AutochangerResource* changer;
572 
573   /*
574    * Look through Autochangers first
575    */
576   foreach_res (changer, R_AUTOCHANGER) {
577     Dmsg2(debuglevel, "Try match changer res=%s, wanted %s\n",
578           changer->resource_name_, rctx.device_name);
579     /*
580      * Find resource, and make sure we were able to open it
581      */
582     if (bstrcmp(rctx.device_name, changer->resource_name_)) {
583       /*
584        * Try each device in this AutoChanger
585        */
586       foreach_alist (rctx.device, changer->device) {
587         Dmsg1(debuglevel, "Try changer device %s\n",
588               rctx.device->resource_name_);
589         if (!rctx.device->autoselect) {
590           Dmsg1(100, "Device %s not autoselect skipped.\n",
591                 rctx.device->resource_name_);
592           continue; /* Device is not available */
593         }
594         status = ReserveDevice(rctx);
595         if (status != 1) { /* Try another device */
596           continue;
597         }
598 
599         /*
600          * Debug code
601          */
602         if (rctx.store->append == SD_APPEND) {
603           Dmsg2(debuglevel, "Device %s reserved=%d for append.\n",
604                 rctx.device->resource_name_,
605                 rctx.jcr->impl->dcr->dev->NumReserved());
606         } else {
607           Dmsg2(debuglevel, "Device %s reserved=%d for read.\n",
608                 rctx.device->resource_name_,
609                 rctx.jcr->impl->read_dcr->dev->NumReserved());
610         }
611         return status;
612       }
613     }
614   }
615 
616   /*
617    * Now if requested look through regular devices
618    */
619   if (!rctx.autochanger_only) {
620     foreach_res (rctx.device, R_DEVICE) {
621       Dmsg2(debuglevel, "Try match res=%s wanted %s\n",
622             rctx.device->resource_name_, rctx.device_name);
623 
624       /*
625        * Find resource, and make sure we were able to open it
626        */
627       if (bstrcmp(rctx.device_name, rctx.device->resource_name_)) {
628         status = ReserveDevice(rctx);
629         if (status != 1) { /* Try another device */
630           continue;
631         }
632         /*
633          * Debug code
634          */
635         if (rctx.store->append == SD_APPEND) {
636           Dmsg2(debuglevel, "Device %s reserved=%d for append.\n",
637                 rctx.device->resource_name_,
638                 rctx.jcr->impl->dcr->dev->NumReserved());
639         } else {
640           Dmsg2(debuglevel, "Device %s reserved=%d for read.\n",
641                 rctx.device->resource_name_,
642                 rctx.jcr->impl->read_dcr->dev->NumReserved());
643         }
644         return status;
645       }
646     }
647 
648     /*
649      * If we haven't found a available device and the devicereservebymediatype
650      * option is set we try one more time where we allow any device with a
651      * matching mediatype.
652      */
653     if (me->device_reserve_by_mediatype) {
654       foreach_res (rctx.device, R_DEVICE) {
655         Dmsg3(debuglevel,
656               "Try match res=%s, mediatype=%s wanted mediatype=%s\n",
657               rctx.device->resource_name_, rctx.store->media_type,
658               rctx.store->media_type);
659 
660         if (bstrcmp(rctx.store->media_type, rctx.device->media_type)) {
661           status = ReserveDevice(rctx);
662           if (status != 1) { /* Try another device */
663             continue;
664           }
665 
666           /*
667            * Debug code
668            */
669           if (rctx.store->append == SD_APPEND) {
670             Dmsg2(debuglevel, "Device %s reserved=%d for append.\n",
671                   rctx.device->resource_name_,
672                   rctx.jcr->impl->dcr->dev->NumReserved());
673           } else {
674             Dmsg2(debuglevel, "Device %s reserved=%d for read.\n",
675                   rctx.device->resource_name_,
676                   rctx.jcr->impl->read_dcr->dev->NumReserved());
677           }
678           return status;
679         }
680       }
681     }
682   }
683 
684   return -1; /* Nothing found */
685 }
686 
687 /**
688  * Try to reserve a specific device.
689  *
690  * Returns: 1 -- OK, have DeviceControlRecord
691  *          0 -- must wait
692  *         -1 -- fatal error
693  */
ReserveDevice(ReserveContext & rctx)694 static int ReserveDevice(ReserveContext& rctx)
695 {
696   bool ok;
697   DeviceControlRecord* dcr;
698   const int name_len = MAX_NAME_LENGTH;
699 
700   /*
701    * Make sure MediaType is OK
702    */
703   Dmsg2(debuglevel, "chk MediaType device=%s request=%s\n",
704         rctx.device->media_type, rctx.store->media_type);
705   if (!bstrcmp(rctx.device->media_type, rctx.store->media_type)) { return -1; }
706 
707   /*
708    * Make sure device exists -- i.e. we can stat() it
709    */
710   if (!rctx.device->dev) { rctx.device->dev = InitDev(rctx.jcr, rctx.device); }
711   if (!rctx.device->dev) {
712     if (rctx.device->changer_res) {
713       Jmsg(rctx.jcr, M_WARNING, 0,
714            _("\n"
715              "     Device \"%s\" in changer \"%s\" requested by DIR could not "
716              "be opened or does not exist.\n"),
717            rctx.device->resource_name_, rctx.device_name);
718     } else {
719       Jmsg(rctx.jcr, M_WARNING, 0,
720            _("\n"
721              "     Device \"%s\" requested by DIR could not be opened or does "
722              "not exist.\n"),
723            rctx.device_name);
724     }
725     return -1; /* no use waiting */
726   }
727 
728   rctx.suitable_device = true;
729   Dmsg1(debuglevel, "try reserve %s\n", rctx.device->resource_name_);
730 
731   if (rctx.store->append) {
732     SetupNewDcrDevice(rctx.jcr, rctx.jcr->impl->dcr, rctx.device->dev, NULL);
733     dcr = rctx.jcr->impl->dcr;
734   } else {
735     SetupNewDcrDevice(rctx.jcr, rctx.jcr->impl->read_dcr, rctx.device->dev,
736                       NULL);
737     dcr = rctx.jcr->impl->read_dcr;
738   }
739 
740   if (!dcr) {
741     BareosSocket* dir = rctx.jcr->dir_bsock;
742 
743     dir->fsend(_("3926 Could not get dcr for device: %s\n"), rctx.device_name);
744     Dmsg1(debuglevel, ">dird: %s", dir->msg);
745     return -1;
746   }
747 
748   if (rctx.store->append) { dcr->SetWillWrite(); }
749 
750   bstrncpy(dcr->pool_name, rctx.store->pool_name, name_len);
751   bstrncpy(dcr->pool_type, rctx.store->pool_type, name_len);
752   bstrncpy(dcr->media_type, rctx.store->media_type, name_len);
753   bstrncpy(dcr->dev_name, rctx.device_name, name_len);
754   if (rctx.store->append == SD_APPEND) {
755     Dmsg2(debuglevel, "call reserve for append: have_vol=%d vol=%s\n",
756           rctx.have_volume, rctx.VolumeName);
757     ok = ReserveDeviceForAppend(dcr, rctx);
758     if (!ok) { goto bail_out; }
759 
760     rctx.jcr->impl->dcr = dcr;
761     Dmsg5(debuglevel, "Reserved=%d dev_name=%s mediatype=%s pool=%s ok=%d\n",
762           dcr->dev->NumReserved(), dcr->dev_name, dcr->media_type,
763           dcr->pool_name, ok);
764     Dmsg3(debuglevel, "Vol=%s num_writers=%d, have_vol=%d\n", rctx.VolumeName,
765           dcr->dev->num_writers, rctx.have_volume);
766     if (rctx.have_volume) {
767       Dmsg0(debuglevel, "Call reserve_volume for append.\n");
768       if (reserve_volume(dcr, rctx.VolumeName)) {
769         Dmsg1(debuglevel, "Reserved vol=%s\n", rctx.VolumeName);
770       } else {
771         Dmsg1(debuglevel, "Could not reserve vol=%s\n", rctx.VolumeName);
772         goto bail_out;
773       }
774     } else {
775       dcr->any_volume = true;
776       Dmsg0(debuglevel, "no vol, call find_next_appendable_vol.\n");
777       if (dcr->DirFindNextAppendableVolume()) {
778         bstrncpy(rctx.VolumeName, dcr->VolumeName, sizeof(rctx.VolumeName));
779         rctx.have_volume = true;
780         Dmsg1(debuglevel, "looking for Volume=%s\n", rctx.VolumeName);
781       } else {
782         Dmsg0(debuglevel, "No next volume found\n");
783         rctx.have_volume = false;
784         rctx.VolumeName[0] = 0;
785 
786         /*
787          * If there is at least one volume that is valid and in use,
788          * but we get here, check if we are running with prefers
789          * non-mounted drives.  In that case, we have selected a
790          * non-used drive and our one and only volume is mounted
791          * elsewhere, so we bail out and retry using that drive.
792          */
793         if (dcr->FoundInUse() && !rctx.PreferMountedVols) {
794           rctx.PreferMountedVols = true;
795           if (dcr->VolumeName[0]) { dcr->UnreserveDevice(); }
796           goto bail_out;
797         }
798 
799         /*
800          * Note. Under some circumstances, the Director can hand us
801          * a Volume name that is not the same as the one on the current
802          * drive, and in that case, the call above to find the next
803          * volume will fail because in attempting to reserve the Volume
804          * the code will realize that we already have a tape mounted,
805          * and it will fail.  This *should* only happen if there are
806          * writers, thus the following test.  In that case, we simply
807          * bail out, and continue waiting, rather than plunging on
808          * and hoping that the operator can resolve the problem.
809          */
810         if (dcr->dev->num_writers != 0) {
811           if (dcr->VolumeName[0]) { dcr->UnreserveDevice(); }
812           goto bail_out;
813         }
814       }
815     }
816   } else {
817     ok = ReserveDeviceForRead(dcr);
818     if (ok) {
819       rctx.jcr->impl->read_dcr = dcr;
820       Dmsg5(debuglevel,
821             "Read reserved=%d dev_name=%s mediatype=%s pool=%s ok=%d\n",
822             dcr->dev->NumReserved(), dcr->dev_name, dcr->media_type,
823             dcr->pool_name, ok);
824     }
825   }
826   if (!ok) { goto bail_out; }
827 
828   if (rctx.notify_dir) {
829     PoolMem dev_name;
830     BareosSocket* dir = rctx.jcr->dir_bsock;
831     PmStrcpy(dev_name, rctx.device->resource_name_);
832     BashSpaces(dev_name);
833     ok = dir->fsend(OK_device, dev_name.c_str()); /* Return real device name */
834     Dmsg1(debuglevel, ">dird: %s", dir->msg);
835   } else {
836     ok = true;
837   }
838   return ok ? 1 : -1;
839 
840 bail_out:
841   rctx.have_volume = false;
842   rctx.VolumeName[0] = 0;
843   Dmsg0(debuglevel, "Not OK.\n");
844   return 0;
845 }
846 
847 /**
848  * We "reserve" the drive by setting the ST_READREADY bit.
849  * No one else should touch the drive until that is cleared.
850  * This allows the DIR to "reserve" the device before actually starting the job.
851  */
ReserveDeviceForRead(DeviceControlRecord * dcr)852 static bool ReserveDeviceForRead(DeviceControlRecord* dcr)
853 {
854   Device* dev = dcr->dev;
855   JobControlRecord* jcr = dcr->jcr;
856   bool ok = false;
857 
858   ASSERT(dcr);
859   if (JobCanceled(jcr)) { return false; }
860 
861   dev->Lock();
862 
863   if (dev->IsDeviceUnmounted()) {
864     Dmsg1(debuglevel, "Device %s is BLOCKED due to user unmount.\n",
865           dev->print_name());
866     Mmsg(jcr->errmsg,
867          _("3601 JobId=%u device %s is BLOCKED due to user unmount.\n"),
868          jcr->JobId, dev->print_name());
869     QueueReserveMessage(jcr);
870     goto bail_out;
871   }
872 
873   if (dev->IsBusy()) {
874     Dmsg4(debuglevel,
875           "Device %s is busy ST_READREADY=%d num_writers=%d reserved=%d.\n",
876           dev->print_name(), BitIsSet(ST_READREADY, dev->state) ? 1 : 0,
877           dev->num_writers, dev->NumReserved());
878     Mmsg(jcr->errmsg,
879          _("3602 JobId=%u device %s is busy (already reading/writing).\n"),
880          jcr->JobId, dev->print_name());
881     QueueReserveMessage(jcr);
882     goto bail_out;
883   }
884 
885   /*
886    * Note: on failure this returns jcr->errmsg properly edited
887    */
888   if (GeneratePluginEvent(jcr, bsdEventDeviceReserve, dcr) != bRC_OK) {
889     QueueReserveMessage(jcr);
890     goto bail_out;
891   }
892   dev->ClearAppend();
893   dev->SetRead();
894   dcr->SetReserved();
895   ok = true;
896 
897 bail_out:
898   dev->Unlock();
899   return ok;
900 }
901 
902 /**
903  * We reserve the device for appending by incrementing
904  * NumReserved(). We do virtually all the same work that
905  * is done in AcquireDeviceForAppend(), but we do
906  * not attempt to mount the device. This routine allows
907  * the DIR to reserve multiple devices before *really*
908  * starting the job. It also permits the SD to refuse
909  * certain devices (not up, ...).
910  *
911  * Note, in reserving a device, if the device is for the
912  * same pool and the same pool type, then it is acceptable.
913  * The Media Type has already been checked. If we are
914  * the first to reserve the device, we put the pool
915  * name and pool type in the device record.
916  */
ReserveDeviceForAppend(DeviceControlRecord * dcr,ReserveContext & rctx)917 static bool ReserveDeviceForAppend(DeviceControlRecord* dcr,
918                                    ReserveContext& rctx)
919 {
920   JobControlRecord* jcr = dcr->jcr;
921   Device* dev = dcr->dev;
922   bool ok = false;
923 
924   ASSERT(dcr);
925   if (JobCanceled(jcr)) { return false; }
926 
927   dev->Lock();
928 
929   /*
930    * If device is being read, we cannot write it
931    */
932   if (dev->CanRead()) {
933     Mmsg(jcr->errmsg, _("3603 JobId=%u device %s is busy reading.\n"),
934          jcr->JobId, dev->print_name());
935     Dmsg1(debuglevel, "Failed: %s", jcr->errmsg);
936     QueueReserveMessage(jcr);
937     goto bail_out;
938   }
939 
940   /*
941    * If device is unmounted, we are out of luck
942    */
943   if (dev->IsDeviceUnmounted()) {
944     Mmsg(jcr->errmsg,
945          _("3604 JobId=%u device %s is BLOCKED due to user unmount.\n"),
946          jcr->JobId, dev->print_name());
947     Dmsg1(debuglevel, "Failed: %s", jcr->errmsg);
948     QueueReserveMessage(jcr);
949     goto bail_out;
950   }
951 
952   Dmsg1(debuglevel, "reserve_append device is %s\n", dev->print_name());
953 
954   /*
955    * Now do detailed tests ...
956    */
957   if (CanReserveDrive(dcr, rctx) != 1) {
958     Dmsg0(debuglevel, "CanReserveDrive!=1\n");
959     goto bail_out;
960   }
961 
962   /*
963    * Note: on failure this returns jcr->errmsg properly edited
964    */
965   if (GeneratePluginEvent(jcr, bsdEventDeviceReserve, dcr) != bRC_OK) {
966     QueueReserveMessage(jcr);
967     goto bail_out;
968   }
969   dcr->SetReserved();
970   ok = true;
971 
972 bail_out:
973   dev->Unlock();
974   return ok;
975 }
976 
IsPoolOk(DeviceControlRecord * dcr)977 static int IsPoolOk(DeviceControlRecord* dcr)
978 {
979   Device* dev = dcr->dev;
980   JobControlRecord* jcr = dcr->jcr;
981 
982   /*
983    * Now check if we want the same Pool and pool type
984    */
985   if (bstrcmp(dev->pool_name, dcr->pool_name) &&
986       bstrcmp(dev->pool_type, dcr->pool_type)) {
987     /*
988      * OK, compatible device
989      */
990     Dmsg1(debuglevel, "OK dev: %s num_writers=0, reserved, pool matches\n",
991           dev->print_name());
992     return 1;
993   } else {
994     /* Drive Pool not suitable for us */
995     Mmsg(jcr->errmsg,
996          _("3608 JobId=%u wants Pool=\"%s\" but have Pool=\"%s\" nreserve=%d "
997            "on drive %s.\n"),
998          (uint32_t)jcr->JobId, dcr->pool_name, dev->pool_name,
999          dev->NumReserved(), dev->print_name());
1000     Dmsg1(debuglevel, "Failed: %s", jcr->errmsg);
1001     QueueReserveMessage(jcr);
1002   }
1003   return 0;
1004 }
1005 
IsMaxJobsOk(DeviceControlRecord * dcr)1006 static bool IsMaxJobsOk(DeviceControlRecord* dcr)
1007 {
1008   Device* dev = dcr->dev;
1009   JobControlRecord* jcr = dcr->jcr;
1010 
1011   Dmsg5(debuglevel, "MaxJobs=%d Jobs=%d reserves=%d Status=%s Vol=%s\n",
1012         dcr->VolCatInfo.VolCatMaxJobs, dcr->VolCatInfo.VolCatJobs,
1013         dev->NumReserved(), dcr->VolCatInfo.VolCatStatus, dcr->VolumeName);
1014 
1015   /*
1016    * Limit max concurrent jobs on this drive
1017    */
1018   if (dev->max_concurrent_jobs > 0 &&
1019       dev->max_concurrent_jobs <=
1020           (uint32_t)(dev->num_writers + dev->NumReserved())) {
1021     /*
1022      * Max Concurrent Jobs depassed or already reserved
1023      */
1024     Mmsg(jcr->errmsg,
1025          _("3609 JobId=%u Max concurrent jobs exceeded on drive %s.\n"),
1026          (uint32_t)jcr->JobId, dev->print_name());
1027     Dmsg1(debuglevel, "Failed: %s", jcr->errmsg);
1028     QueueReserveMessage(jcr);
1029     return false;
1030   }
1031   if (bstrcmp(dcr->VolCatInfo.VolCatStatus, "Recycle")) { return true; }
1032   if (dcr->VolCatInfo.VolCatMaxJobs > 0 &&
1033       dcr->VolCatInfo.VolCatMaxJobs <=
1034           (dcr->VolCatInfo.VolCatJobs + dev->NumReserved())) {
1035     /*
1036      * Max Job Vols depassed or already reserved
1037      */
1038     Mmsg(jcr->errmsg,
1039          _("3610 JobId=%u Volume max jobs exceeded on drive %s.\n"),
1040          (uint32_t)jcr->JobId, dev->print_name());
1041     Dmsg1(debuglevel, "reserve dev failed: %s", jcr->errmsg);
1042     QueueReserveMessage(jcr);
1043     return false; /* wait */
1044   }
1045   return true;
1046 }
1047 
1048 /**
1049  * Returns: 1 if drive can be reserved
1050  *          0 if we should wait
1051  *         -1 on error or impossibility
1052  */
CanReserveDrive(DeviceControlRecord * dcr,ReserveContext & rctx)1053 static int CanReserveDrive(DeviceControlRecord* dcr, ReserveContext& rctx)
1054 {
1055   Device* dev = dcr->dev;
1056   JobControlRecord* jcr = dcr->jcr;
1057 
1058   Dmsg5(debuglevel, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n",
1059         rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
1060         rctx.autochanger_only, rctx.any_drive);
1061 
1062   /*
1063    * Check for max jobs on this Volume
1064    */
1065   if (!IsMaxJobsOk(dcr)) { return 0; }
1066 
1067   /*
1068    * Setting any_drive overrides PreferMountedVols flag
1069    */
1070   if (!rctx.any_drive) {
1071     /*
1072      * When PreferMountedVols is set, we keep track of the
1073      * drive in use that has the least number of writers, then if
1074      * no unmounted drive is found, we try that drive. This
1075      * helps spread the load to the least used drives.
1076      */
1077     if (rctx.try_low_use_drive && dev == rctx.low_use_drive) {
1078       Dmsg2(debuglevel, "OK dev=%s == low_drive=%s.\n", dev->print_name(),
1079             rctx.low_use_drive->print_name());
1080       return 1;
1081     }
1082 
1083     /*
1084      * If he wants a free drive, but this one is busy, no go
1085      */
1086     if (!rctx.PreferMountedVols && dev->IsBusy()) {
1087       /*
1088        * Save least used drive
1089        */
1090       if ((dev->num_writers + dev->NumReserved()) < rctx.num_writers) {
1091         rctx.num_writers = dev->num_writers + dev->NumReserved();
1092         rctx.low_use_drive = dev;
1093         Dmsg2(debuglevel, "set low use drive=%s num_writers=%d\n",
1094               dev->print_name(), rctx.num_writers);
1095       } else {
1096         Dmsg1(debuglevel, "not low use num_writers=%d\n",
1097               dev->num_writers + dev->NumReserved());
1098       }
1099       Mmsg(jcr->errmsg,
1100            _("3605 JobId=%u wants free drive but device %s is busy.\n"),
1101            jcr->JobId, dev->print_name());
1102       Dmsg1(debuglevel, "Failed: %s", jcr->errmsg);
1103       QueueReserveMessage(jcr);
1104       return 0;
1105     }
1106 
1107     /*
1108      * Check for prefer mounted volumes
1109      */
1110     if (rctx.PreferMountedVols && !dev->vol && dev->IsTape()) {
1111       Mmsg(jcr->errmsg,
1112            _("3606 JobId=%u prefers mounted drives, but drive %s has no "
1113              "Volume.\n"),
1114            jcr->JobId, dev->print_name());
1115       Dmsg1(debuglevel, "Failed: %s", jcr->errmsg);
1116       QueueReserveMessage(jcr);
1117       return 0; /* No volume mounted */
1118     }
1119 
1120     /*
1121      * Check for exact Volume name match
1122      * ***FIXME*** for Disk, we can accept any volume that goes with this drive.
1123      */
1124     if (rctx.exact_match && rctx.have_volume) {
1125       bool ok;
1126 
1127       Dmsg5(debuglevel, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n",
1128             rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
1129             rctx.autochanger_only, rctx.any_drive);
1130       Dmsg4(debuglevel, "have_vol=%d have=%s resvol=%s want=%s\n",
1131             rctx.have_volume, dev->VolHdr.VolumeName,
1132             dev->vol ? dev->vol->vol_name : "*None*", rctx.VolumeName);
1133       ok = bstrcmp(dev->VolHdr.VolumeName, rctx.VolumeName) ||
1134            (dev->vol && bstrcmp(dev->vol->vol_name, rctx.VolumeName));
1135       if (!ok) {
1136         Mmsg(jcr->errmsg,
1137              _("3607 JobId=%u wants Vol=\"%s\" drive has Vol=\"%s\" on drive "
1138                "%s.\n"),
1139              jcr->JobId, rctx.VolumeName, dev->VolHdr.VolumeName,
1140              dev->print_name());
1141         QueueReserveMessage(jcr);
1142         Dmsg3(debuglevel, "not OK: dev have=%s resvol=%s want=%s\n",
1143               dev->VolHdr.VolumeName, dev->vol ? dev->vol->vol_name : "*None*",
1144               rctx.VolumeName);
1145         return 0;
1146       }
1147       if (!dcr->Can_i_use_volume()) {
1148         return 0; /* fail if volume on another drive */
1149       }
1150     }
1151   }
1152 
1153   /*
1154    * Check for unused autochanger drive
1155    */
1156   if (rctx.autochanger_only && !dev->IsBusy() &&
1157       dev->VolHdr.VolumeName[0] == 0) {
1158     /*
1159      * Device is available but not yet reserved, reserve it for us
1160      */
1161     Dmsg1(debuglevel, "OK Res Unused autochanger %s.\n", dev->print_name());
1162     bstrncpy(dev->pool_name, dcr->pool_name, sizeof(dev->pool_name));
1163     bstrncpy(dev->pool_type, dcr->pool_type, sizeof(dev->pool_type));
1164     return 1; /* reserve drive */
1165   }
1166 
1167   /*
1168    * Handle the case that there are no writers
1169    */
1170   if (dev->num_writers == 0) {
1171     /*
1172      * Now check if there are any reservations on the drive
1173      */
1174     if (dev->NumReserved()) {
1175       return IsPoolOk(dcr);
1176     } else if (dev->CanAppend()) {
1177       if (IsPoolOk(dcr)) {
1178         return 1;
1179       } else {
1180         /*
1181          * Changing pool, unload old tape if any in drive
1182          */
1183         Dmsg0(debuglevel,
1184               "OK dev: num_writers=0, not reserved, pool change, unload "
1185               "changer\n");
1186         /*
1187          * ***FIXME*** use SetUnload()
1188          */
1189         UnloadAutochanger(dcr, -1);
1190       }
1191     }
1192 
1193     /*
1194      * Device is available but not yet reserved, reserve it for us
1195      */
1196     Dmsg1(debuglevel, "OK Dev avail reserved %s\n", dev->print_name());
1197     bstrncpy(dev->pool_name, dcr->pool_name, sizeof(dev->pool_name));
1198     bstrncpy(dev->pool_type, dcr->pool_type, sizeof(dev->pool_type));
1199     return 1; /* reserve drive */
1200   }
1201 
1202   /*
1203    * Check if the device is in append mode with writers (i.e. available if pool
1204    * is the same).
1205    */
1206   if (dev->CanAppend() || dev->num_writers > 0) {
1207     return IsPoolOk(dcr);
1208   } else {
1209     Pmsg1(000, _("Logic error!!!! JobId=%u Should not get here.\n"),
1210           (int)jcr->JobId);
1211     Mmsg(jcr->errmsg,
1212          _("3910 JobId=%u Logic error!!!! drive %s Should not get here.\n"),
1213          jcr->JobId, dev->print_name());
1214     QueueReserveMessage(jcr);
1215     Jmsg0(jcr, M_FATAL, 0, _("Logic error!!!! Should not get here.\n"));
1216 
1217     return -1; /* error, should not get here */
1218   }
1219 }
1220 
1221 /**
1222  * Queue a reservation error or failure message for this jcr
1223  */
QueueReserveMessage(JobControlRecord * jcr)1224 static void QueueReserveMessage(JobControlRecord* jcr)
1225 {
1226   int i;
1227   alist* msgs;
1228   char* msg;
1229 
1230   jcr->lock();
1231 
1232   msgs = jcr->impl->reserve_msgs;
1233   if (!msgs) { goto bail_out; }
1234   /*
1235    * Look for duplicate message.  If found, do not insert
1236    */
1237   for (i = msgs->size() - 1; i >= 0; i--) {
1238     msg = (char*)msgs->get(i);
1239     if (!msg) { goto bail_out; }
1240 
1241     /*
1242      * Comparison based on 4 digit message number
1243      */
1244     if (bstrncmp(msg, jcr->errmsg, 4)) { goto bail_out; }
1245   }
1246 
1247   /*
1248    * Message unique, so insert it.
1249    */
1250   jcr->impl->reserve_msgs->push(strdup(jcr->errmsg));
1251 
1252 bail_out:
1253   jcr->unlock();
1254 }
1255 
1256 /**
1257  * Pop and release any reservations messages
1258  */
PopReserveMessages(JobControlRecord * jcr)1259 static void PopReserveMessages(JobControlRecord* jcr)
1260 {
1261   alist* msgs;
1262   char* msg;
1263 
1264   jcr->lock();
1265   msgs = jcr->impl->reserve_msgs;
1266   if (!msgs) { goto bail_out; }
1267   while ((msg = (char*)msgs->pop())) { free(msg); }
1268 bail_out:
1269   jcr->unlock();
1270 }
1271 
1272 /**
1273  * Also called from acquire.c
1274  */
ReleaseReserveMessages(JobControlRecord * jcr)1275 void ReleaseReserveMessages(JobControlRecord* jcr)
1276 {
1277   PopReserveMessages(jcr);
1278   jcr->lock();
1279   if (!jcr->impl->reserve_msgs) { goto bail_out; }
1280   delete jcr->impl->reserve_msgs;
1281   jcr->impl->reserve_msgs = NULL;
1282 
1283 bail_out:
1284   jcr->unlock();
1285 }
1286 
1287 } /* namespace storagedaemon */
1288