1 /*
2 BAREOS® - Backup Archiving REcovery Open Sourced
3
4 Copyright (C) 2000-2011 Free Software Foundation Europe e.V.
5 Copyright (C) 2016-2020 Bareos GmbH & Co. KG
6
7 This program is Free Software; you can redistribute it and/or
8 modify it under the terms of version three of the GNU Affero General Public
9 License as published by the Free Software Foundation and included
10 in the file LICENSE.
11
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Affero General Public License for more details.
16
17 You should have received a copy of the GNU Affero General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA.
21 */
22 /*
23 * Kern Sibbald, March 2005
24 */
25 /**
26 * @file
27 * Subroutines to handle waiting for operator intervention
28 * or waiting for a Device to be released
29 *
30 * Code for WaitForSysop() pulled from askdir.c
31 */
32
33 #include "include/bareos.h" /* pull in global headers */
34 #include "stored/stored.h" /* pull in Storage Daemon headers */
35 #include "stored/stored_globals.h"
36 #include "stored/device_control_record.h"
37 #include "stored/wait.h"
38 #include "lib/berrno.h"
39 #include "lib/bsock.h"
40 #include "lib/edit.h"
41 #include "include/jcr.h"
42
43 namespace storagedaemon {
44
45 const int debuglevel = 400;
46
47 static pthread_mutex_t device_release_mutex = PTHREAD_MUTEX_INITIALIZER;
48 static pthread_cond_t wait_device_release = PTHREAD_COND_INITIALIZER;
49
50 /**
51 * Wait for SysOp to mount a tape on a specific device
52 *
53 * Returns: W_ERROR, W_TIMEOUT, W_POLL, W_MOUNT, or W_WAKE
54 */
WaitForSysop(DeviceControlRecord * dcr)55 int WaitForSysop(DeviceControlRecord* dcr)
56 {
57 struct timeval tv;
58 struct timezone tz;
59 struct timespec timeout;
60 time_t last_heartbeat = 0;
61 time_t first_start = time(NULL);
62 int status = 0;
63 int add_wait;
64 bool unmounted;
65 Device* dev = dcr->dev;
66 JobControlRecord* jcr = dcr->jcr;
67
68 dev->Lock();
69 Dmsg1(debuglevel, "Enter blocked=%s\n", dev->print_blocked());
70
71 /*
72 * Since we want to mount a tape, make sure current one is
73 * not marked as using this drive.
74 */
75 VolumeUnused(dcr);
76
77 unmounted = dev->IsDeviceUnmounted();
78 dev->poll = false;
79 /*
80 * Wait requested time (dev->rem_wait_sec). However, we also wake up every
81 * HB_TIME seconds and send a heartbeat to the FD and the Director
82 * to keep stateful firewalls from closing them down while waiting
83 * for the operator.
84 */
85 add_wait = dev->rem_wait_sec;
86 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
87 add_wait = me->heartbeat_interval;
88 }
89 /* If the user did not unmount the tape and we are polling, ensure
90 * that we poll at the correct interval.
91 */
92 if (!unmounted && dev->vol_poll_interval
93 && add_wait > dev->vol_poll_interval) {
94 add_wait = dev->vol_poll_interval;
95 }
96
97 if (!unmounted) {
98 Dmsg1(debuglevel, "blocked=%s\n", dev->print_blocked());
99 dev->dev_prev_blocked = dev->blocked();
100 dev->SetBlocked(BST_WAITING_FOR_SYSOP); /* indicate waiting for mount */
101 }
102
103 while (!JobCanceled(jcr)) {
104 time_t now, start, total_waited;
105
106 gettimeofday(&tv, &tz);
107 timeout.tv_nsec = tv.tv_usec * 1000;
108 timeout.tv_sec = tv.tv_sec + add_wait;
109
110 Dmsg4(debuglevel,
111 "I'm going to sleep on device %s. HB=%d rem_wait=%d add_wait=%d\n",
112 dev->print_name(), (int)me->heartbeat_interval, dev->rem_wait_sec,
113 add_wait);
114 start = time(NULL);
115
116 /* Wait required time */
117 status
118 = pthread_cond_timedwait(&dev->wait_next_vol, &dev->mutex_, &timeout);
119
120 Dmsg2(debuglevel, "Wokeup from sleep on device status=%d blocked=%s\n",
121 status, dev->print_blocked());
122 now = time(NULL);
123 total_waited = now - first_start;
124 dev->rem_wait_sec -= (now - start);
125
126 /* Note, this always triggers the first time. We want that. */
127 if (me->heartbeat_interval) {
128 if (now - last_heartbeat >= me->heartbeat_interval) {
129 /* send heartbeats */
130 if (jcr->file_bsock) {
131 jcr->file_bsock->signal(BNET_HEARTBEAT);
132 Dmsg0(debuglevel, "Send heartbeat to FD.\n");
133 }
134 if (jcr->dir_bsock) { jcr->dir_bsock->signal(BNET_HEARTBEAT); }
135 last_heartbeat = now;
136 }
137 }
138
139 if (status == EINVAL) {
140 BErrNo be;
141 Jmsg1(jcr, M_FATAL, 0, _("pthread timedwait error. ERR=%s\n"),
142 be.bstrerror(status));
143 status = W_ERROR; /* error */
144 break;
145 }
146
147 /*
148 * Continue waiting if operator is labeling volumes
149 */
150 if (dev->blocked() == BST_WRITING_LABEL) { continue; }
151
152 if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
153 Dmsg0(debuglevel, "Exceed wait time.\n");
154 status = W_TIMEOUT;
155 break;
156 }
157
158 /*
159 * Check if user unmounted the device while we were waiting
160 */
161 unmounted = dev->IsDeviceUnmounted();
162
163 if (!unmounted && dev->vol_poll_interval
164 && (total_waited >= dev->vol_poll_interval)) {
165 Dmsg1(debuglevel, "poll return in wait blocked=%s\n",
166 dev->print_blocked());
167 dev->poll = true; /* returning a poll event */
168 status = W_POLL;
169 break;
170 }
171 /*
172 * Check if user mounted the device while we were waiting
173 */
174 if (dev->blocked() == BST_MOUNT) { /* mount request ? */
175 Dmsg0(debuglevel, "Mounted return.\n");
176 status = W_MOUNT;
177 break;
178 }
179
180 /*
181 * If we did not timeout, then some event happened, so
182 * return to check if state changed.
183 */
184 if (status != ETIMEDOUT) {
185 BErrNo be;
186 Dmsg2(debuglevel, "Wake return. status=%d. ERR=%s\n", status,
187 be.bstrerror(status));
188 status = W_WAKE; /* someone woke us */
189 break;
190 }
191
192 /*
193 * At this point, we know we woke up because of a timeout,
194 * that was due to a heartbeat, because any other reason would
195 * have caused us to return, so update the wait counters and continue.
196 */
197 add_wait = dev->rem_wait_sec;
198 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
199 add_wait = me->heartbeat_interval;
200 }
201 /* If the user did not unmount the tape and we are polling, ensure
202 * that we poll at the correct interval.
203 */
204 if (!unmounted && dev->vol_poll_interval
205 && add_wait > dev->vol_poll_interval - total_waited) {
206 add_wait = dev->vol_poll_interval - total_waited;
207 }
208 if (add_wait < 0) { add_wait = 0; }
209 }
210
211 if (!unmounted) {
212 dev->SetBlocked(dev->dev_prev_blocked); /* restore entry state */
213 Dmsg1(debuglevel, "set %s\n", dev->print_blocked());
214 }
215 Dmsg1(debuglevel, "Exit blocked=%s\n", dev->print_blocked());
216 dev->Unlock();
217 return status;
218 }
219
220
221 /**
222 * Wait for any device to be released, then we return, so
223 * higher level code can rescan possible devices. Since there
224 * could be a job waiting for a drive to free up, we wait a maximum
225 * of 1 minute then retry just in case a broadcast was lost, and
226 * we return to rescan the devices.
227 *
228 * Returns: true if a device has changed state
229 * false if the total wait time has expired.
230 */
WaitForDevice(JobControlRecord * jcr,int & retries)231 bool WaitForDevice(JobControlRecord* jcr, int& retries)
232 {
233 struct timeval tv;
234 struct timezone tz;
235 struct timespec timeout;
236 int status = 0;
237 bool ok = true;
238 const int max_wait_time = 1 * 60; /* wait 1 minute */
239 char ed1[50];
240
241 Dmsg0(debuglevel, "Enter WaitForDevice\n");
242 P(device_release_mutex);
243
244 if (++retries % 5 == 0) {
245 /* Print message every 5 minutes */
246 Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting to reserve a device.\n"),
247 edit_uint64(jcr->JobId, ed1), jcr->Job);
248 }
249
250 gettimeofday(&tv, &tz);
251 timeout.tv_nsec = tv.tv_usec * 1000;
252 timeout.tv_sec = tv.tv_sec + max_wait_time;
253
254 Dmsg0(debuglevel, "Going to wait for a device.\n");
255
256 /* Wait required time */
257 status = pthread_cond_timedwait(&wait_device_release, &device_release_mutex,
258 &timeout);
259 Dmsg1(debuglevel, "Wokeup from sleep on device status=%d\n", status);
260
261 V(device_release_mutex);
262 Dmsg1(debuglevel, "Return from wait_device ok=%d\n", ok);
263 return ok;
264 }
265
266 /**
267 * Signal the above WaitForDevice function.
268 */
ReleaseDeviceCond()269 void ReleaseDeviceCond() { pthread_cond_broadcast(&wait_device_release); }
270
271 } /* namespace storagedaemon */
272