1 /*
2 BAREOS® - Backup Archiving REcovery Open Sourced
3
4 Copyright (C) 2000-2011 Free Software Foundation Europe e.V.
5 Copyright (C) 2016-2016 Bareos GmbH & Co. KG
6
7 This program is Free Software; you can redistribute it and/or
8 modify it under the terms of version three of the GNU Affero General Public
9 License as published by the Free Software Foundation and included
10 in the file LICENSE.
11
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Affero General Public License for more details.
16
17 You should have received a copy of the GNU Affero General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA.
21 */
22 /*
23 * Kern Sibbald, March 2005
24 */
25 /**
26 * @file
27 * Subroutines to handle waiting for operator intervention
28 * or waiting for a Device to be released
29 *
30 * Code for WaitForSysop() pulled from askdir.c
31 */
32
33 #include "include/bareos.h" /* pull in global headers */
34 #include "stored/stored.h" /* pull in Storage Daemon headers */
35 #include "stored/stored_globals.h"
36 #include "lib/berrno.h"
37 #include "lib/bsock.h"
38 #include "lib/edit.h"
39 #include "include/jcr.h"
40
41 namespace storagedaemon {
42
43 const int debuglevel = 400;
44
45 static pthread_mutex_t device_release_mutex = PTHREAD_MUTEX_INITIALIZER;
46 static pthread_cond_t wait_device_release = PTHREAD_COND_INITIALIZER;
47
48 /**
49 * Wait for SysOp to mount a tape on a specific device
50 *
51 * Returns: W_ERROR, W_TIMEOUT, W_POLL, W_MOUNT, or W_WAKE
52 */
WaitForSysop(DeviceControlRecord * dcr)53 int WaitForSysop(DeviceControlRecord* dcr)
54 {
55 struct timeval tv;
56 struct timezone tz;
57 struct timespec timeout;
58 time_t last_heartbeat = 0;
59 time_t first_start = time(NULL);
60 int status = 0;
61 int add_wait;
62 bool unmounted;
63 Device* dev = dcr->dev;
64 JobControlRecord* jcr = dcr->jcr;
65
66 dev->Lock();
67 Dmsg1(debuglevel, "Enter blocked=%s\n", dev->print_blocked());
68
69 /*
70 * Since we want to mount a tape, make sure current one is
71 * not marked as using this drive.
72 */
73 VolumeUnused(dcr);
74
75 unmounted = dev->IsDeviceUnmounted();
76 dev->poll = false;
77 /*
78 * Wait requested time (dev->rem_wait_sec). However, we also wake up every
79 * HB_TIME seconds and send a heartbeat to the FD and the Director
80 * to keep stateful firewalls from closing them down while waiting
81 * for the operator.
82 */
83 add_wait = dev->rem_wait_sec;
84 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
85 add_wait = me->heartbeat_interval;
86 }
87 /* If the user did not unmount the tape and we are polling, ensure
88 * that we poll at the correct interval.
89 */
90 if (!unmounted && dev->vol_poll_interval &&
91 add_wait > dev->vol_poll_interval) {
92 add_wait = dev->vol_poll_interval;
93 }
94
95 if (!unmounted) {
96 Dmsg1(debuglevel, "blocked=%s\n", dev->print_blocked());
97 dev->dev_prev_blocked = dev->blocked();
98 dev->SetBlocked(BST_WAITING_FOR_SYSOP); /* indicate waiting for mount */
99 }
100
101 while (!JobCanceled(jcr)) {
102 time_t now, start, total_waited;
103
104 gettimeofday(&tv, &tz);
105 timeout.tv_nsec = tv.tv_usec * 1000;
106 timeout.tv_sec = tv.tv_sec + add_wait;
107
108 Dmsg4(debuglevel,
109 "I'm going to sleep on device %s. HB=%d rem_wait=%d add_wait=%d\n",
110 dev->print_name(), (int)me->heartbeat_interval, dev->rem_wait_sec,
111 add_wait);
112 start = time(NULL);
113
114 /* Wait required time */
115 status =
116 pthread_cond_timedwait(&dev->wait_next_vol, &dev->mutex_, &timeout);
117
118 Dmsg2(debuglevel, "Wokeup from sleep on device status=%d blocked=%s\n",
119 status, dev->print_blocked());
120 now = time(NULL);
121 total_waited = now - first_start;
122 dev->rem_wait_sec -= (now - start);
123
124 /* Note, this always triggers the first time. We want that. */
125 if (me->heartbeat_interval) {
126 if (now - last_heartbeat >= me->heartbeat_interval) {
127 /* send heartbeats */
128 if (jcr->file_bsock) {
129 jcr->file_bsock->signal(BNET_HEARTBEAT);
130 Dmsg0(debuglevel, "Send heartbeat to FD.\n");
131 }
132 if (jcr->dir_bsock) { jcr->dir_bsock->signal(BNET_HEARTBEAT); }
133 last_heartbeat = now;
134 }
135 }
136
137 if (status == EINVAL) {
138 BErrNo be;
139 Jmsg1(jcr, M_FATAL, 0, _("pthread timedwait error. ERR=%s\n"),
140 be.bstrerror(status));
141 status = W_ERROR; /* error */
142 break;
143 }
144
145 /*
146 * Continue waiting if operator is labeling volumes
147 */
148 if (dev->blocked() == BST_WRITING_LABEL) { continue; }
149
150 if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
151 Dmsg0(debuglevel, "Exceed wait time.\n");
152 status = W_TIMEOUT;
153 break;
154 }
155
156 /*
157 * Check if user unmounted the device while we were waiting
158 */
159 unmounted = dev->IsDeviceUnmounted();
160
161 if (!unmounted && dev->vol_poll_interval &&
162 (total_waited >= dev->vol_poll_interval)) {
163 Dmsg1(debuglevel, "poll return in wait blocked=%s\n",
164 dev->print_blocked());
165 dev->poll = true; /* returning a poll event */
166 status = W_POLL;
167 break;
168 }
169 /*
170 * Check if user mounted the device while we were waiting
171 */
172 if (dev->blocked() == BST_MOUNT) { /* mount request ? */
173 Dmsg0(debuglevel, "Mounted return.\n");
174 status = W_MOUNT;
175 break;
176 }
177
178 /*
179 * If we did not timeout, then some event happened, so
180 * return to check if state changed.
181 */
182 if (status != ETIMEDOUT) {
183 BErrNo be;
184 Dmsg2(debuglevel, "Wake return. status=%d. ERR=%s\n", status,
185 be.bstrerror(status));
186 status = W_WAKE; /* someone woke us */
187 break;
188 }
189
190 /*
191 * At this point, we know we woke up because of a timeout,
192 * that was due to a heartbeat, because any other reason would
193 * have caused us to return, so update the wait counters and continue.
194 */
195 add_wait = dev->rem_wait_sec;
196 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
197 add_wait = me->heartbeat_interval;
198 }
199 /* If the user did not unmount the tape and we are polling, ensure
200 * that we poll at the correct interval.
201 */
202 if (!unmounted && dev->vol_poll_interval &&
203 add_wait > dev->vol_poll_interval - total_waited) {
204 add_wait = dev->vol_poll_interval - total_waited;
205 }
206 if (add_wait < 0) { add_wait = 0; }
207 }
208
209 if (!unmounted) {
210 dev->SetBlocked(dev->dev_prev_blocked); /* restore entry state */
211 Dmsg1(debuglevel, "set %s\n", dev->print_blocked());
212 }
213 Dmsg1(debuglevel, "Exit blocked=%s\n", dev->print_blocked());
214 dev->Unlock();
215 return status;
216 }
217
218
219 /**
220 * Wait for any device to be released, then we return, so
221 * higher level code can rescan possible devices. Since there
222 * could be a job waiting for a drive to free up, we wait a maximum
223 * of 1 minute then retry just in case a broadcast was lost, and
224 * we return to rescan the devices.
225 *
226 * Returns: true if a device has changed state
227 * false if the total wait time has expired.
228 */
WaitForDevice(JobControlRecord * jcr,int & retries)229 bool WaitForDevice(JobControlRecord* jcr, int& retries)
230 {
231 struct timeval tv;
232 struct timezone tz;
233 struct timespec timeout;
234 int status = 0;
235 bool ok = true;
236 const int max_wait_time = 1 * 60; /* wait 1 minute */
237 char ed1[50];
238
239 Dmsg0(debuglevel, "Enter WaitForDevice\n");
240 P(device_release_mutex);
241
242 if (++retries % 5 == 0) {
243 /* Print message every 5 minutes */
244 Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting to reserve a device.\n"),
245 edit_uint64(jcr->JobId, ed1), jcr->Job);
246 }
247
248 gettimeofday(&tv, &tz);
249 timeout.tv_nsec = tv.tv_usec * 1000;
250 timeout.tv_sec = tv.tv_sec + max_wait_time;
251
252 Dmsg0(debuglevel, "Going to wait for a device.\n");
253
254 /* Wait required time */
255 status = pthread_cond_timedwait(&wait_device_release, &device_release_mutex,
256 &timeout);
257 Dmsg1(debuglevel, "Wokeup from sleep on device status=%d\n", status);
258
259 V(device_release_mutex);
260 Dmsg1(debuglevel, "Return from wait_device ok=%d\n", ok);
261 return ok;
262 }
263
264 /**
265 * Signal the above WaitForDevice function.
266 */
ReleaseDeviceCond()267 void ReleaseDeviceCond() { pthread_cond_broadcast(&wait_device_release); }
268
269 } /* namespace storagedaemon */
270