1 /*
2    BAREOS® - Backup Archiving REcovery Open Sourced
3 
4    Copyright (C) 2000-2011 Free Software Foundation Europe e.V.
5    Copyright (C) 2016-2016 Bareos GmbH & Co. KG
6 
7    This program is Free Software; you can redistribute it and/or
8    modify it under the terms of version three of the GNU Affero General Public
9    License as published by the Free Software Foundation and included
10    in the file LICENSE.
11 
12    This program is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15    Affero General Public License for more details.
16 
17    You should have received a copy of the GNU Affero General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20    02110-1301, USA.
21 */
22 /*
23  * Kern Sibbald, March 2005
24  */
25 /**
26  * @file
27  * Subroutines to handle waiting for operator intervention
28  * or waiting for a Device to be released
29  *
30  * Code for WaitForSysop() pulled from askdir.c
31  */
32 
33 #include "include/bareos.h" /* pull in global headers */
34 #include "stored/stored.h"  /* pull in Storage Daemon headers */
35 #include "stored/stored_globals.h"
36 #include "lib/berrno.h"
37 #include "lib/bsock.h"
38 #include "lib/edit.h"
39 #include "include/jcr.h"
40 
41 namespace storagedaemon {
42 
43 const int debuglevel = 400;
44 
45 static pthread_mutex_t device_release_mutex = PTHREAD_MUTEX_INITIALIZER;
46 static pthread_cond_t wait_device_release = PTHREAD_COND_INITIALIZER;
47 
48 /**
49  * Wait for SysOp to mount a tape on a specific device
50  *
51  *   Returns: W_ERROR, W_TIMEOUT, W_POLL, W_MOUNT, or W_WAKE
52  */
WaitForSysop(DeviceControlRecord * dcr)53 int WaitForSysop(DeviceControlRecord* dcr)
54 {
55   struct timeval tv;
56   struct timezone tz;
57   struct timespec timeout;
58   time_t last_heartbeat = 0;
59   time_t first_start = time(NULL);
60   int status = 0;
61   int add_wait;
62   bool unmounted;
63   Device* dev = dcr->dev;
64   JobControlRecord* jcr = dcr->jcr;
65 
66   dev->Lock();
67   Dmsg1(debuglevel, "Enter blocked=%s\n", dev->print_blocked());
68 
69   /*
70    * Since we want to mount a tape, make sure current one is
71    *  not marked as using this drive.
72    */
73   VolumeUnused(dcr);
74 
75   unmounted = dev->IsDeviceUnmounted();
76   dev->poll = false;
77   /*
78    * Wait requested time (dev->rem_wait_sec).  However, we also wake up every
79    *    HB_TIME seconds and send a heartbeat to the FD and the Director
80    *    to keep stateful firewalls from closing them down while waiting
81    *    for the operator.
82    */
83   add_wait = dev->rem_wait_sec;
84   if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
85     add_wait = me->heartbeat_interval;
86   }
87   /* If the user did not unmount the tape and we are polling, ensure
88    *  that we poll at the correct interval.
89    */
90   if (!unmounted && dev->vol_poll_interval &&
91       add_wait > dev->vol_poll_interval) {
92     add_wait = dev->vol_poll_interval;
93   }
94 
95   if (!unmounted) {
96     Dmsg1(debuglevel, "blocked=%s\n", dev->print_blocked());
97     dev->dev_prev_blocked = dev->blocked();
98     dev->SetBlocked(BST_WAITING_FOR_SYSOP); /* indicate waiting for mount */
99   }
100 
101   while (!JobCanceled(jcr)) {
102     time_t now, start, total_waited;
103 
104     gettimeofday(&tv, &tz);
105     timeout.tv_nsec = tv.tv_usec * 1000;
106     timeout.tv_sec = tv.tv_sec + add_wait;
107 
108     Dmsg4(debuglevel,
109           "I'm going to sleep on device %s. HB=%d rem_wait=%d add_wait=%d\n",
110           dev->print_name(), (int)me->heartbeat_interval, dev->rem_wait_sec,
111           add_wait);
112     start = time(NULL);
113 
114     /* Wait required time */
115     status =
116         pthread_cond_timedwait(&dev->wait_next_vol, &dev->mutex_, &timeout);
117 
118     Dmsg2(debuglevel, "Wokeup from sleep on device status=%d blocked=%s\n",
119           status, dev->print_blocked());
120     now = time(NULL);
121     total_waited = now - first_start;
122     dev->rem_wait_sec -= (now - start);
123 
124     /* Note, this always triggers the first time. We want that. */
125     if (me->heartbeat_interval) {
126       if (now - last_heartbeat >= me->heartbeat_interval) {
127         /* send heartbeats */
128         if (jcr->file_bsock) {
129           jcr->file_bsock->signal(BNET_HEARTBEAT);
130           Dmsg0(debuglevel, "Send heartbeat to FD.\n");
131         }
132         if (jcr->dir_bsock) { jcr->dir_bsock->signal(BNET_HEARTBEAT); }
133         last_heartbeat = now;
134       }
135     }
136 
137     if (status == EINVAL) {
138       BErrNo be;
139       Jmsg1(jcr, M_FATAL, 0, _("pthread timedwait error. ERR=%s\n"),
140             be.bstrerror(status));
141       status = W_ERROR; /* error */
142       break;
143     }
144 
145     /*
146      * Continue waiting if operator is labeling volumes
147      */
148     if (dev->blocked() == BST_WRITING_LABEL) { continue; }
149 
150     if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
151       Dmsg0(debuglevel, "Exceed wait time.\n");
152       status = W_TIMEOUT;
153       break;
154     }
155 
156     /*
157      * Check if user unmounted the device while we were waiting
158      */
159     unmounted = dev->IsDeviceUnmounted();
160 
161     if (!unmounted && dev->vol_poll_interval &&
162         (total_waited >= dev->vol_poll_interval)) {
163       Dmsg1(debuglevel, "poll return in wait blocked=%s\n",
164             dev->print_blocked());
165       dev->poll = true; /* returning a poll event */
166       status = W_POLL;
167       break;
168     }
169     /*
170      * Check if user mounted the device while we were waiting
171      */
172     if (dev->blocked() == BST_MOUNT) { /* mount request ? */
173       Dmsg0(debuglevel, "Mounted return.\n");
174       status = W_MOUNT;
175       break;
176     }
177 
178     /*
179      * If we did not timeout, then some event happened, so
180      *   return to check if state changed.
181      */
182     if (status != ETIMEDOUT) {
183       BErrNo be;
184       Dmsg2(debuglevel, "Wake return. status=%d. ERR=%s\n", status,
185             be.bstrerror(status));
186       status = W_WAKE; /* someone woke us */
187       break;
188     }
189 
190     /*
191      * At this point, we know we woke up because of a timeout,
192      *   that was due to a heartbeat, because any other reason would
193      *   have caused us to return, so update the wait counters and continue.
194      */
195     add_wait = dev->rem_wait_sec;
196     if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
197       add_wait = me->heartbeat_interval;
198     }
199     /* If the user did not unmount the tape and we are polling, ensure
200      *  that we poll at the correct interval.
201      */
202     if (!unmounted && dev->vol_poll_interval &&
203         add_wait > dev->vol_poll_interval - total_waited) {
204       add_wait = dev->vol_poll_interval - total_waited;
205     }
206     if (add_wait < 0) { add_wait = 0; }
207   }
208 
209   if (!unmounted) {
210     dev->SetBlocked(dev->dev_prev_blocked); /* restore entry state */
211     Dmsg1(debuglevel, "set %s\n", dev->print_blocked());
212   }
213   Dmsg1(debuglevel, "Exit blocked=%s\n", dev->print_blocked());
214   dev->Unlock();
215   return status;
216 }
217 
218 
219 /**
220  * Wait for any device to be released, then we return, so
221  * higher level code can rescan possible devices.  Since there
222  * could be a job waiting for a drive to free up, we wait a maximum
223  * of 1 minute then retry just in case a broadcast was lost, and
224  * we return to rescan the devices.
225  *
226  * Returns: true  if a device has changed state
227  *          false if the total wait time has expired.
228  */
WaitForDevice(JobControlRecord * jcr,int & retries)229 bool WaitForDevice(JobControlRecord* jcr, int& retries)
230 {
231   struct timeval tv;
232   struct timezone tz;
233   struct timespec timeout;
234   int status = 0;
235   bool ok = true;
236   const int max_wait_time = 1 * 60; /* wait 1 minute */
237   char ed1[50];
238 
239   Dmsg0(debuglevel, "Enter WaitForDevice\n");
240   P(device_release_mutex);
241 
242   if (++retries % 5 == 0) {
243     /* Print message every 5 minutes */
244     Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting to reserve a device.\n"),
245          edit_uint64(jcr->JobId, ed1), jcr->Job);
246   }
247 
248   gettimeofday(&tv, &tz);
249   timeout.tv_nsec = tv.tv_usec * 1000;
250   timeout.tv_sec = tv.tv_sec + max_wait_time;
251 
252   Dmsg0(debuglevel, "Going to wait for a device.\n");
253 
254   /* Wait required time */
255   status = pthread_cond_timedwait(&wait_device_release, &device_release_mutex,
256                                   &timeout);
257   Dmsg1(debuglevel, "Wokeup from sleep on device status=%d\n", status);
258 
259   V(device_release_mutex);
260   Dmsg1(debuglevel, "Return from wait_device ok=%d\n", ok);
261   return ok;
262 }
263 
264 /**
265  * Signal the above WaitForDevice function.
266  */
ReleaseDeviceCond()267 void ReleaseDeviceCond() { pthread_cond_broadcast(&wait_device_release); }
268 
269 } /* namespace storagedaemon */
270