1 /*
2    BAREOS® - Backup Archiving REcovery Open Sourced
3 
4    Copyright (C) 2000-2011 Free Software Foundation Europe e.V.
5    Copyright (C) 2016-2020 Bareos GmbH & Co. KG
6 
7    This program is Free Software; you can redistribute it and/or
8    modify it under the terms of version three of the GNU Affero General Public
9    License as published by the Free Software Foundation and included
10    in the file LICENSE.
11 
12    This program is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15    Affero General Public License for more details.
16 
17    You should have received a copy of the GNU Affero General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20    02110-1301, USA.
21 */
22 /*
23  * Kern Sibbald, March 2005
24  */
25 /**
26  * @file
27  * Subroutines to handle waiting for operator intervention
28  * or waiting for a Device to be released
29  *
30  * Code for WaitForSysop() pulled from askdir.c
31  */
32 
33 #include "include/bareos.h" /* pull in global headers */
34 #include "stored/stored.h"  /* pull in Storage Daemon headers */
35 #include "stored/stored_globals.h"
36 #include "stored/device_control_record.h"
37 #include "stored/wait.h"
38 #include "lib/berrno.h"
39 #include "lib/bsock.h"
40 #include "lib/edit.h"
41 #include "include/jcr.h"
42 
43 namespace storagedaemon {
44 
45 const int debuglevel = 400;
46 
47 static pthread_mutex_t device_release_mutex = PTHREAD_MUTEX_INITIALIZER;
48 static pthread_cond_t wait_device_release = PTHREAD_COND_INITIALIZER;
49 
50 /**
51  * Wait for SysOp to mount a tape on a specific device
52  *
53  *   Returns: W_ERROR, W_TIMEOUT, W_POLL, W_MOUNT, or W_WAKE
54  */
WaitForSysop(DeviceControlRecord * dcr)55 int WaitForSysop(DeviceControlRecord* dcr)
56 {
57   struct timeval tv;
58   struct timezone tz;
59   struct timespec timeout;
60   time_t last_heartbeat = 0;
61   time_t first_start = time(NULL);
62   int status = 0;
63   int add_wait;
64   bool unmounted;
65   Device* dev = dcr->dev;
66   JobControlRecord* jcr = dcr->jcr;
67 
68   dev->Lock();
69   Dmsg1(debuglevel, "Enter blocked=%s\n", dev->print_blocked());
70 
71   /*
72    * Since we want to mount a tape, make sure current one is
73    *  not marked as using this drive.
74    */
75   VolumeUnused(dcr);
76 
77   unmounted = dev->IsDeviceUnmounted();
78   dev->poll = false;
79   /*
80    * Wait requested time (dev->rem_wait_sec).  However, we also wake up every
81    *    HB_TIME seconds and send a heartbeat to the FD and the Director
82    *    to keep stateful firewalls from closing them down while waiting
83    *    for the operator.
84    */
85   add_wait = dev->rem_wait_sec;
86   if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
87     add_wait = me->heartbeat_interval;
88   }
89   /* If the user did not unmount the tape and we are polling, ensure
90    *  that we poll at the correct interval.
91    */
92   if (!unmounted && dev->vol_poll_interval
93       && add_wait > dev->vol_poll_interval) {
94     add_wait = dev->vol_poll_interval;
95   }
96 
97   if (!unmounted) {
98     Dmsg1(debuglevel, "blocked=%s\n", dev->print_blocked());
99     dev->dev_prev_blocked = dev->blocked();
100     dev->SetBlocked(BST_WAITING_FOR_SYSOP); /* indicate waiting for mount */
101   }
102 
103   while (!JobCanceled(jcr)) {
104     time_t now, start, total_waited;
105 
106     gettimeofday(&tv, &tz);
107     timeout.tv_nsec = tv.tv_usec * 1000;
108     timeout.tv_sec = tv.tv_sec + add_wait;
109 
110     Dmsg4(debuglevel,
111           "I'm going to sleep on device %s. HB=%d rem_wait=%d add_wait=%d\n",
112           dev->print_name(), (int)me->heartbeat_interval, dev->rem_wait_sec,
113           add_wait);
114     start = time(NULL);
115 
116     /* Wait required time */
117     status
118         = pthread_cond_timedwait(&dev->wait_next_vol, &dev->mutex_, &timeout);
119 
120     Dmsg2(debuglevel, "Wokeup from sleep on device status=%d blocked=%s\n",
121           status, dev->print_blocked());
122     now = time(NULL);
123     total_waited = now - first_start;
124     dev->rem_wait_sec -= (now - start);
125 
126     /* Note, this always triggers the first time. We want that. */
127     if (me->heartbeat_interval) {
128       if (now - last_heartbeat >= me->heartbeat_interval) {
129         /* send heartbeats */
130         if (jcr->file_bsock) {
131           jcr->file_bsock->signal(BNET_HEARTBEAT);
132           Dmsg0(debuglevel, "Send heartbeat to FD.\n");
133         }
134         if (jcr->dir_bsock) { jcr->dir_bsock->signal(BNET_HEARTBEAT); }
135         last_heartbeat = now;
136       }
137     }
138 
139     if (status == EINVAL) {
140       BErrNo be;
141       Jmsg1(jcr, M_FATAL, 0, _("pthread timedwait error. ERR=%s\n"),
142             be.bstrerror(status));
143       status = W_ERROR; /* error */
144       break;
145     }
146 
147     /*
148      * Continue waiting if operator is labeling volumes
149      */
150     if (dev->blocked() == BST_WRITING_LABEL) { continue; }
151 
152     if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
153       Dmsg0(debuglevel, "Exceed wait time.\n");
154       status = W_TIMEOUT;
155       break;
156     }
157 
158     /*
159      * Check if user unmounted the device while we were waiting
160      */
161     unmounted = dev->IsDeviceUnmounted();
162 
163     if (!unmounted && dev->vol_poll_interval
164         && (total_waited >= dev->vol_poll_interval)) {
165       Dmsg1(debuglevel, "poll return in wait blocked=%s\n",
166             dev->print_blocked());
167       dev->poll = true; /* returning a poll event */
168       status = W_POLL;
169       break;
170     }
171     /*
172      * Check if user mounted the device while we were waiting
173      */
174     if (dev->blocked() == BST_MOUNT) { /* mount request ? */
175       Dmsg0(debuglevel, "Mounted return.\n");
176       status = W_MOUNT;
177       break;
178     }
179 
180     /*
181      * If we did not timeout, then some event happened, so
182      *   return to check if state changed.
183      */
184     if (status != ETIMEDOUT) {
185       BErrNo be;
186       Dmsg2(debuglevel, "Wake return. status=%d. ERR=%s\n", status,
187             be.bstrerror(status));
188       status = W_WAKE; /* someone woke us */
189       break;
190     }
191 
192     /*
193      * At this point, we know we woke up because of a timeout,
194      *   that was due to a heartbeat, because any other reason would
195      *   have caused us to return, so update the wait counters and continue.
196      */
197     add_wait = dev->rem_wait_sec;
198     if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
199       add_wait = me->heartbeat_interval;
200     }
201     /* If the user did not unmount the tape and we are polling, ensure
202      *  that we poll at the correct interval.
203      */
204     if (!unmounted && dev->vol_poll_interval
205         && add_wait > dev->vol_poll_interval - total_waited) {
206       add_wait = dev->vol_poll_interval - total_waited;
207     }
208     if (add_wait < 0) { add_wait = 0; }
209   }
210 
211   if (!unmounted) {
212     dev->SetBlocked(dev->dev_prev_blocked); /* restore entry state */
213     Dmsg1(debuglevel, "set %s\n", dev->print_blocked());
214   }
215   Dmsg1(debuglevel, "Exit blocked=%s\n", dev->print_blocked());
216   dev->Unlock();
217   return status;
218 }
219 
220 
221 /**
222  * Wait for any device to be released, then we return, so
223  * higher level code can rescan possible devices.  Since there
224  * could be a job waiting for a drive to free up, we wait a maximum
225  * of 1 minute then retry just in case a broadcast was lost, and
226  * we return to rescan the devices.
227  *
228  * Returns: true  if a device has changed state
229  *          false if the total wait time has expired.
230  */
WaitForDevice(JobControlRecord * jcr,int & retries)231 bool WaitForDevice(JobControlRecord* jcr, int& retries)
232 {
233   struct timeval tv;
234   struct timezone tz;
235   struct timespec timeout;
236   int status = 0;
237   bool ok = true;
238   const int max_wait_time = 1 * 60; /* wait 1 minute */
239   char ed1[50];
240 
241   Dmsg0(debuglevel, "Enter WaitForDevice\n");
242   P(device_release_mutex);
243 
244   if (++retries % 5 == 0) {
245     /* Print message every 5 minutes */
246     Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting to reserve a device.\n"),
247          edit_uint64(jcr->JobId, ed1), jcr->Job);
248   }
249 
250   gettimeofday(&tv, &tz);
251   timeout.tv_nsec = tv.tv_usec * 1000;
252   timeout.tv_sec = tv.tv_sec + max_wait_time;
253 
254   Dmsg0(debuglevel, "Going to wait for a device.\n");
255 
256   /* Wait required time */
257   status = pthread_cond_timedwait(&wait_device_release, &device_release_mutex,
258                                   &timeout);
259   Dmsg1(debuglevel, "Wokeup from sleep on device status=%d\n", status);
260 
261   V(device_release_mutex);
262   Dmsg1(debuglevel, "Return from wait_device ok=%d\n", ok);
263   return ok;
264 }
265 
266 /**
267  * Signal the above WaitForDevice function.
268  */
ReleaseDeviceCond()269 void ReleaseDeviceCond() { pthread_cond_broadcast(&wait_device_release); }
270 
271 } /* namespace storagedaemon */
272