1 /*
2 Bacula(R) - The Network Backup Solution
3
4 Copyright (C) 2000-2020 Kern Sibbald
5
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
8
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
13
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
16
17 Bacula(R) is a registered trademark of Kern Sibbald.
18 */
19 /*
20 * Job control and execution for Storage Daemon
21 *
22 * Written by Kern Sibbald, MM
23 *
24 */
25
26 #include "bacula.h"
27 #include "stored.h"
28
29 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
30
31 /* Imported variables */
32 extern STORES *me; /* our Global resource */
33 extern uint32_t VolSessionTime;
34
35 /* Imported functions */
36 extern uint32_t newVolSessionId();
37 extern bool do_vbackup(JCR *jcr);
38
39 /* Requests from the Director daemon */
40 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
41 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
42 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s "
43 "rerunning=%d VolSessionId=%d VolSessionTime=%d sd_client=%d "
44 "Authorization=%s\n";
45
46 /* Responses sent to Director daemon */
47 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
48 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
49
50 /*
51 * Director requests us to start a job
52 * Basic tasks done here:
53 * - We pickup the JobId to be run from the Director.
54 * - We pickup the device, media, and pool from the Director
55 * - Wait for a connection from the File Daemon (FD)
56 * - Accept commands from the FD (i.e. run the job)
57 * - Return when the connection is terminated or
58 * there is an error.
59 */
job_cmd(JCR * jcr)60 bool job_cmd(JCR *jcr)
61 {
62 int32_t JobId;
63 char sd_auth_key[200];
64 char spool_size[30];
65 char seed[100];
66 BSOCK *dir = jcr->dir_bsock;
67 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
68 int32_t JobType, level, spool_attributes, no_attributes, spool_data;
69 int32_t write_part_after_job, PreferMountedVols;
70 int32_t rerunning;
71 int32_t is_client;
72 int stat;
73 JCR *ojcr;
74
75 /*
76 * Get JobId and permissions from Director
77 */
78 Dmsg1(100, "<dird: %s", dir->msg);
79 bstrncpy(spool_size, "0", sizeof(spool_size));
80 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
81 client_name.c_str(),
82 &JobType, &level, fileset_name.c_str(), &no_attributes,
83 &spool_attributes, fileset_md5.c_str(), &spool_data,
84 &write_part_after_job, &PreferMountedVols, spool_size,
85 &rerunning, &jcr->VolSessionId, &jcr->VolSessionTime,
86 &is_client, &sd_auth_key);
87 if (stat != 19) {
88 pm_strcpy(jcr->errmsg, dir->msg);
89 dir->fsend(BAD_job, stat, jcr->errmsg);
90 Dmsg1(100, ">dird: %s", dir->msg);
91 jcr->setJobStatus(JS_ErrorTerminated);
92 return false;
93 }
94 jcr->rerunning = rerunning;
95 jcr->sd_client = is_client;
96 if (is_client) {
97 jcr->sd_auth_key = bstrdup(sd_auth_key);
98 }
99 Dmsg3(100, "rerunning=%d VolSesId=%d VolSesTime=%d\n", jcr->rerunning,
100 jcr->VolSessionId, jcr->VolSessionTime);
101 /*
102 * Since this job could be rescheduled, we
103 * check to see if we have it already. If so
104 * free the old jcr and use the new one.
105 */
106 ojcr = get_jcr_by_full_name(job.c_str());
107 if (ojcr && !ojcr->authenticated) {
108 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(intptr_t)ojcr, job.c_str());
109 free_jcr(ojcr);
110 }
111 jcr->JobId = JobId;
112 Dmsg2(800, "Start JobId=%d %p\n", JobId, jcr);
113 set_jcr_in_tsd(jcr);
114
115 /*
116 * If job rescheduled because previous was incomplete,
117 * the Resched flag is set and VolSessionId and VolSessionTime
118 * are given to us (same as restarted job).
119 */
120 if (!jcr->rerunning) {
121 jcr->VolSessionId = newVolSessionId();
122 jcr->VolSessionTime = VolSessionTime;
123 }
124 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
125 unbash_spaces(job_name);
126 jcr->job_name = get_pool_memory(PM_NAME);
127 pm_strcpy(jcr->job_name, job_name);
128 unbash_spaces(client_name);
129 jcr->client_name = get_pool_memory(PM_NAME);
130 pm_strcpy(jcr->client_name, client_name);
131 unbash_spaces(fileset_name);
132 jcr->fileset_name = get_pool_memory(PM_NAME);
133 pm_strcpy(jcr->fileset_name, fileset_name);
134 jcr->setJobType(JobType);
135 jcr->setJobLevel(level);
136 jcr->no_attributes = no_attributes;
137 jcr->spool_attributes = spool_attributes;
138 jcr->spool_data = spool_data;
139 jcr->spool_size = str_to_int64(spool_size);
140 jcr->write_part_after_job = write_part_after_job;
141 jcr->fileset_md5 = get_pool_memory(PM_NAME);
142 pm_strcpy(jcr->fileset_md5, fileset_md5);
143 jcr->PreferMountedVols = PreferMountedVols;
144
145
146 jcr->authenticated = false;
147
148 /*
149 * Pass back an authorization key for the File daemon
150 */
151 if (jcr->sd_client) {
152 bstrncpy(sd_auth_key, "xxx", 3);
153 } else {
154 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
155 make_session_key(sd_auth_key, seed, 1);
156 }
157 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, sd_auth_key);
158 Dmsg2(150, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
159 /* If not client, set key, otherwise it is already set */
160 if (!jcr->sd_client) {
161 jcr->sd_auth_key = bstrdup(sd_auth_key);
162 memset(sd_auth_key, 0, sizeof(sd_auth_key));
163 }
164 new_plugins(jcr); /* instantiate the plugins */
165 generate_daemon_event(jcr, "JobStart");
166 generate_plugin_event(jcr, bsdEventJobStart, (void *)"JobStart");
167 return true;
168 }
169
run_cmd(JCR * jcr)170 bool run_cmd(JCR *jcr)
171 {
172 struct timeval tv;
173 struct timezone tz;
174 struct timespec timeout;
175 int errstat = 0;
176
177 Dsm_check(200);
178 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
179
180 /*
181 * If we do not need the FD,
182 * we are doing a migration, copy, or virtual backup.
183 */
184 if (jcr->no_client_used()) {
185 do_vbackup(jcr);
186 return false;
187 }
188
189 jcr->sendJobStatus(JS_WaitFD); /* wait for FD to connect */
190
191 Dmsg2(050, "sd_calls_client=%d sd_client=%d\n", jcr->sd_calls_client, jcr->sd_client);
192 if (jcr->sd_calls_client) {
193 if (!read_client_hello(jcr)) {
194 return false;
195 }
196 /*
197 * Authenticate the File daemon
198 */
199 Dmsg0(050, "=== Authenticate FD\n");
200 if (jcr->authenticated || !authenticate_filed(jcr, jcr->file_bsock, jcr->FDVersion)) {
201 Dmsg1(050, "Authentication failed Job %s\n", jcr->Job);
202 Qmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
203 } else {
204 jcr->authenticated = true;
205 }
206 } else if (!jcr->sd_client) {
207 /* We wait to receive connection from Client */
208 gettimeofday(&tv, &tz);
209 timeout.tv_nsec = tv.tv_usec * 1000;
210 timeout.tv_sec = tv.tv_sec + me->client_wait;
211
212 Dmsg3(050, "%s waiting %d sec for FD to contact SD key=%s\n",
213 jcr->Job, (int)(timeout.tv_sec-time(NULL)), jcr->sd_auth_key);
214
215 Dmsg3(800, "=== Block Job=%s jid=%d %p\n", jcr->Job, jcr->JobId, jcr);
216
217 /*
218 * Wait for the File daemon to contact us to start the Job,
219 * when he does, we will be released, unless the 30 minutes
220 * expires.
221 */
222 P(mutex);
223 while ( !jcr->authenticated && !job_canceled(jcr) ) {
224 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
225 if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
226 break;
227 }
228 Dmsg1(800, "=== Auth cond errstat=%d\n", errstat);
229 }
230 Dmsg4(050, "=== Auth=%d jid=%d canceled=%d errstat=%d\n",
231 jcr->JobId, jcr->authenticated, job_canceled(jcr), errstat);
232 V(mutex);
233 Dmsg2(800, "Auth fail or cancel for jid=%d %p\n", jcr->JobId, jcr);
234 }
235
236 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
237
238 if (jcr->authenticated && !job_canceled(jcr)) {
239 Dmsg2(050, "Running jid=%d %p\n", jcr->JobId, jcr);
240 run_job(jcr); /* Run the job */
241 }
242 Dmsg2(800, "Done jid=%d %p\n", jcr->JobId, jcr);
243 return false;
244 }
245
246
247 #ifdef needed
248 /*
249 * Query Device command from Director
250 * Sends Storage Daemon's information on the device to the
251 * caller (presumably the Director).
252 * This command always returns "true" so that the line is
253 * not closed on an error.
254 *
255 */
query_cmd(JCR * jcr)256 bool query_cmd(JCR *jcr)
257 {
258 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
259 BSOCK *dir = jcr->dir_bsock;
260 DEVRES *device;
261 AUTOCHANGER *changer;
262 bool ok;
263
264 Dmsg1(100, "Query_cmd: %s", dir->msg);
265 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
266 Dmsg1(100, "<dird: %s\n", dir->msg);
267 if (ok) {
268 unbash_spaces(dev_name);
269 foreach_res(device, R_DEVICE) {
270 /* Find resource, and make sure we were able to open it */
271 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
272 if (!device->dev) {
273 device->dev = init_dev(jcr, device, false, statcollector);
274 }
275 if (!device->dev) {
276 break;
277 }
278 ok = dir_update_device(jcr, device->dev);
279 if (ok) {
280 ok = dir->fsend(OK_query);
281 } else {
282 dir->fsend(NO_query);
283 }
284 return ok;
285 }
286 }
287 foreach_res(changer, R_AUTOCHANGER) {
288 /* Find resource, and make sure we were able to open it */
289 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
290 if (!changer->device || changer->device->size() == 0) {
291 continue; /* no devices */
292 }
293 ok = dir_update_changer(jcr, changer);
294 if (ok) {
295 ok = dir->fsend(OK_query);
296 } else {
297 dir->fsend(NO_query);
298 }
299 return ok;
300 }
301 }
302 /* If we get here, the device/autochanger was not found */
303 unbash_spaces(dir->msg);
304 pm_strcpy(jcr->errmsg, dir->msg);
305 dir->fsend(NO_device, dev_name.c_str());
306 Dmsg1(100, ">dird: %s\n", dir->msg);
307 } else {
308 unbash_spaces(dir->msg);
309 pm_strcpy(jcr->errmsg, dir->msg);
310 dir->fsend(BAD_query, jcr->errmsg);
311 Dmsg1(100, ">dird: %s\n", dir->msg);
312 }
313
314 return true;
315 }
316
317 #endif
318
319
320 /*
321 * Destroy the Job Control Record and associated
322 * resources (sockets).
323 */
stored_free_jcr(JCR * jcr)324 void stored_free_jcr(JCR *jcr)
325 {
326 Dmsg2(800, "End Job JobId=%u %p\n", jcr->JobId, jcr);
327 if (jcr->jobmedia_queue) {
328 flush_jobmedia_queue(jcr);
329 delete jcr->jobmedia_queue;
330 jcr->jobmedia_queue = NULL;
331 }
332
333 if (jcr->dir_bsock) {
334 Dmsg2(800, "Send terminate jid=%d %p\n", jcr->JobId, jcr);
335 jcr->dir_bsock->signal(BNET_EOD);
336 jcr->dir_bsock->signal(BNET_TERMINATE);
337 jcr->dir_bsock->destroy();
338 }
339 if (jcr->file_bsock) {
340 jcr->file_bsock->destroy();
341 }
342 if (jcr->job_name) {
343 free_pool_memory(jcr->job_name);
344 }
345 if (jcr->client_name) {
346 free_memory(jcr->client_name);
347 jcr->client_name = NULL;
348 }
349 if (jcr->fileset_name) {
350 free_memory(jcr->fileset_name);
351 }
352 if (jcr->fileset_md5) {
353 free_memory(jcr->fileset_md5);
354 }
355 if (jcr->bsr) {
356 free_bsr(jcr->bsr);
357 jcr->bsr = NULL;
358 }
359 /* Free any restore volume list created */
360 free_restore_volume_list(jcr);
361 if (jcr->RestoreBootstrap) {
362 unlink(jcr->RestoreBootstrap);
363 free_pool_memory(jcr->RestoreBootstrap);
364 jcr->RestoreBootstrap = NULL;
365 }
366 if (jcr->next_dev || jcr->prev_dev) {
367 Qmsg0(NULL, M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
368 }
369 pthread_cond_destroy(&jcr->job_start_wait);
370 if (jcr->dcrs) {
371 delete jcr->dcrs;
372 }
373 jcr->dcrs = NULL;
374
375 /* Avoid a double free */
376 if (jcr->dcr == jcr->read_dcr) {
377 jcr->read_dcr = NULL;
378 }
379 if (jcr->dcr) {
380 free_dcr(jcr->dcr);
381 jcr->dcr = NULL;
382 }
383 if (jcr->read_dcr) {
384 free_dcr(jcr->read_dcr);
385 jcr->read_dcr = NULL;
386 }
387
388 if (jcr->read_store) {
389 DIRSTORE *store;
390 foreach_alist(store, jcr->read_store) {
391 delete store->device;
392 delete store;
393 }
394 delete jcr->read_store;
395 jcr->read_store = NULL;
396 }
397 if (jcr->write_store) {
398 DIRSTORE *store;
399 foreach_alist(store, jcr->write_store) {
400 delete store->device;
401 delete store;
402 }
403 delete jcr->write_store;
404 jcr->write_store = NULL;
405 }
406 Dsm_check(200);
407
408 if (jcr->JobId != 0)
409 write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));
410
411 return;
412 }
413