1 /*
2 Bacula(R) - The Network Backup Solution
3
4 Copyright (C) 2000-2020 Kern Sibbald
5
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
8
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
13
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
16
17 Bacula(R) is a registered trademark of Kern Sibbald.
18 */
19 /*
20 * Job control and execution for Storage Daemon
21 *
22 * Written by Kern Sibbald, MM
23 *
24 */
25
26 #include "bacula.h"
27 #include "stored.h"
28
29 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
30
31 /* Imported variables */
32 extern STORES *me; /* our Global resource */
33 extern uint32_t VolSessionTime;
34
35 /* Imported functions */
36 extern uint32_t newVolSessionId();
37 extern bool do_vbackup(JCR *jcr);
38
39 /* Requests from the Director daemon */
40 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
41 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
42 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s "
43 "rerunning=%d VolSessionId=%d VolSessionTime=%d sd_client=%d "
44 "Authorization=%s\n";
45
46 /* Responses sent to Director daemon */
47 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
48 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
49
50 /*
51 * Director requests us to start a job
52 * Basic tasks done here:
53 * - We pickup the JobId to be run from the Director.
54 * - We pickup the device, media, and pool from the Director
55 * - Wait for a connection from the File Daemon (FD)
56 * - Accept commands from the FD (i.e. run the job)
57 * - Return when the connection is terminated or
58 * there is an error.
59 */
job_cmd(JCR * jcr)60 bool job_cmd(JCR *jcr)
61 {
62 int32_t JobId;
63 char sd_auth_key[200];
64 char spool_size[30];
65 char seed[100];
66 BSOCK *dir = jcr->dir_bsock;
67 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
68 int32_t JobType, level, spool_attributes, no_attributes, spool_data;
69 int32_t write_part_after_job, PreferMountedVols;
70 int32_t rerunning;
71 int32_t is_client;
72 int stat;
73 JCR *ojcr;
74
75 /*
76 * Get JobId and permissions from Director
77 */
78 Dmsg1(100, "<dird: %s", dir->msg);
79 bstrncpy(spool_size, "0", sizeof(spool_size));
80 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
81 client_name.c_str(),
82 &JobType, &level, fileset_name.c_str(), &no_attributes,
83 &spool_attributes, fileset_md5.c_str(), &spool_data,
84 &write_part_after_job, &PreferMountedVols, spool_size,
85 &rerunning, &jcr->VolSessionId, &jcr->VolSessionTime,
86 &is_client, &sd_auth_key);
87 if (stat != 19) {
88 pm_strcpy(jcr->errmsg, dir->msg);
89 dir->fsend(BAD_job, stat, jcr->errmsg);
90 Dmsg1(100, ">dird: %s", dir->msg);
91 jcr->setJobStatus(JS_ErrorTerminated);
92 return false;
93 }
94 jcr->rerunning = rerunning;
95 jcr->sd_client = is_client;
96 if (is_client) {
97 jcr->sd_auth_key = bstrdup(sd_auth_key);
98 }
99 Dmsg3(100, "rerunning=%d VolSesId=%d VolSesTime=%d\n", jcr->rerunning,
100 jcr->VolSessionId, jcr->VolSessionTime);
101 /*
102 * Since this job could be rescheduled, we
103 * check to see if we have it already. If so
104 * free the old jcr and use the new one.
105 */
106 ojcr = get_jcr_by_full_name(job.c_str());
107 if (ojcr && !ojcr->authenticated) {
108 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(intptr_t)ojcr, job.c_str());
109 free_jcr(ojcr);
110 }
111 jcr->JobId = JobId;
112 Dmsg2(800, "Start JobId=%d %p\n", JobId, jcr);
113 set_jcr_in_tsd(jcr);
114
115 /*
116 * If job rescheduled because previous was incomplete,
117 * the Resched flag is set and VolSessionId and VolSessionTime
118 * are given to us (same as restarted job).
119 */
120 if (!jcr->rerunning) {
121 jcr->VolSessionId = newVolSessionId();
122 jcr->VolSessionTime = VolSessionTime;
123 }
124 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
125 unbash_spaces(job_name);
126 jcr->job_name = get_pool_memory(PM_NAME);
127 pm_strcpy(jcr->job_name, job_name);
128 unbash_spaces(client_name);
129 jcr->client_name = get_pool_memory(PM_NAME);
130 pm_strcpy(jcr->client_name, client_name);
131 unbash_spaces(fileset_name);
132 jcr->fileset_name = get_pool_memory(PM_NAME);
133 pm_strcpy(jcr->fileset_name, fileset_name);
134 jcr->setJobType(JobType);
135 jcr->setJobLevel(level);
136 jcr->no_attributes = no_attributes;
137 jcr->spool_attributes = spool_attributes;
138 jcr->spool_data = spool_data;
139 jcr->spool_size = str_to_int64(spool_size);
140 jcr->write_part_after_job = write_part_after_job;
141 jcr->fileset_md5 = get_pool_memory(PM_NAME);
142 pm_strcpy(jcr->fileset_md5, fileset_md5);
143 jcr->PreferMountedVols = PreferMountedVols;
144
145
146 jcr->authenticated = false;
147
148 /*
149 * Pass back an authorization key for the File daemon
150 */
151 if (jcr->sd_client) {
152 bstrncpy(sd_auth_key, "xxx", 3);
153 } else {
154 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
155 make_session_key(sd_auth_key, seed, 1);
156 }
157 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, sd_auth_key);
158 Dmsg2(150, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
159 /* If not client, set key, otherwise it is already set */
160 if (!jcr->sd_client) {
161 jcr->sd_auth_key = bstrdup(sd_auth_key);
162 memset(sd_auth_key, 0, sizeof(sd_auth_key));
163 }
164 new_plugins(jcr); /* instantiate the plugins */
165 generate_daemon_event(jcr, "JobStart");
166 generate_plugin_event(jcr, bsdEventJobStart, (void *)"JobStart");
167
168 /* Keep track of the important events */
169 events_send_msg(jcr, "SJ0001", EVENTS_TYPE_JOB, jcr->director->hdr.name, (intptr_t)jcr,
170 "Job Start jobid=%d job=%s", jcr->JobId, jcr->Job);
171 return true;
172 }
173
run_cmd(JCR * jcr)174 bool run_cmd(JCR *jcr)
175 {
176 struct timeval tv;
177 struct timezone tz;
178 struct timespec timeout;
179 int errstat = 0;
180
181 Dsm_check(200);
182 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
183
184 /* If we do not need the FD, we are doing a virtual backup. */
185 if (jcr->no_client_used()) {
186 do_vbackup(jcr);
187 return false;
188 }
189
190 jcr->sendJobStatus(JS_WaitFD); /* wait for FD to connect */
191
192 Dmsg2(050, "sd_calls_client=%d sd_client=%d\n", jcr->sd_calls_client, jcr->sd_client);
193 if (jcr->sd_calls_client) {
194 if (!read_client_hello(jcr)) {
195 return false;
196 }
197 /*
198 * Authenticate the File daemon
199 */
200 Dmsg0(050, "=== Authenticate FD\n");
201 if (jcr->authenticated || !authenticate_filed(jcr, jcr->file_bsock, jcr->FDVersion)) {
202 Dmsg1(050, "Authentication failed Job %s\n", jcr->Job);
203 Qmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
204 } else {
205 jcr->authenticated = true;
206 }
207 } else if (!jcr->sd_client) {
208 /* We wait to receive connection from Client */
209 gettimeofday(&tv, &tz);
210 timeout.tv_nsec = tv.tv_usec * 1000;
211 timeout.tv_sec = tv.tv_sec + me->client_wait;
212
213 Dmsg3(050, "%s waiting %d sec for FD to contact SD key=%s\n",
214 jcr->Job, (int)(timeout.tv_sec-time(NULL)), jcr->sd_auth_key);
215
216 Dmsg3(800, "=== Block Job=%s jid=%d %p\n", jcr->Job, jcr->JobId, jcr);
217
218 /*
219 * Wait for the File daemon to contact us to start the Job,
220 * when he does, we will be released, unless the 30 minutes
221 * expires.
222 */
223 P(mutex);
224 while ( !jcr->authenticated && !job_canceled(jcr) ) {
225 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
226 if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
227 break;
228 }
229 Dmsg1(800, "=== Auth cond errstat=%d\n", errstat);
230 }
231 Dmsg4(050, "=== Auth=%d jid=%d canceled=%d errstat=%d\n",
232 jcr->JobId, jcr->authenticated, job_canceled(jcr), errstat);
233 V(mutex);
234 Dmsg2(800, "Auth fail or cancel for jid=%d %p\n", jcr->JobId, jcr);
235 }
236
237 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
238
239 if (jcr->authenticated && !job_canceled(jcr)) {
240 Dmsg2(800, "Running jid=%d %p\n", jcr->JobId, jcr);
241 run_job(jcr); /* Run the job */
242 }
243 Dmsg2(800, "Done jid=%d %p\n", jcr->JobId, jcr);
244 return false;
245 }
246
247
248 #ifdef needed
249 /*
250 * Query Device command from Director
251 * Sends Storage Daemon's information on the device to the
252 * caller (presumably the Director).
253 * This command always returns "true" so that the line is
254 * not closed on an error.
255 *
256 */
query_cmd(JCR * jcr)257 bool query_cmd(JCR *jcr)
258 {
259 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
260 BSOCK *dir = jcr->dir_bsock;
261 DEVRES *device;
262 AUTOCHANGER *changer;
263 bool ok;
264
265 Dmsg1(100, "Query_cmd: %s", dir->msg);
266 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
267 Dmsg1(100, "<dird: %s\n", dir->msg);
268 if (ok) {
269 unbash_spaces(dev_name);
270 foreach_res(device, R_DEVICE) {
271 /* Find resource, and make sure we were able to open it */
272 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
273 if (!device->dev) {
274 device->dev = init_dev(jcr, device, false, statcollector);
275 }
276 if (!device->dev) {
277 break;
278 }
279 ok = dir_update_device(jcr, device->dev);
280 if (ok) {
281 ok = dir->fsend(OK_query);
282 } else {
283 dir->fsend(NO_query);
284 }
285 return ok;
286 }
287 }
288 foreach_res(changer, R_AUTOCHANGER) {
289 /* Find resource, and make sure we were able to open it */
290 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
291 if (!changer->device || changer->device->size() == 0) {
292 continue; /* no devices */
293 }
294 ok = dir_update_changer(jcr, changer);
295 if (ok) {
296 ok = dir->fsend(OK_query);
297 } else {
298 dir->fsend(NO_query);
299 }
300 return ok;
301 }
302 }
303 /* If we get here, the device/autochanger was not found */
304 unbash_spaces(dir->msg);
305 pm_strcpy(jcr->errmsg, dir->msg);
306 dir->fsend(NO_device, dev_name.c_str());
307 Dmsg1(100, ">dird: %s\n", dir->msg);
308 } else {
309 unbash_spaces(dir->msg);
310 pm_strcpy(jcr->errmsg, dir->msg);
311 dir->fsend(BAD_query, jcr->errmsg);
312 Dmsg1(100, ">dird: %s\n", dir->msg);
313 }
314
315 return true;
316 }
317
318 #endif
319
320
321 /*
322 * Destroy the Job Control Record and associated
323 * resources (sockets).
324 */
stored_free_jcr(JCR * jcr)325 void stored_free_jcr(JCR *jcr)
326 {
327 Dmsg2(800, "End Job JobId=%u %p\n", jcr->JobId, jcr);
328 if (jcr->jobmedia_queue) {
329 flush_jobmedia_queue(jcr);
330 delete jcr->jobmedia_queue;
331 jcr->jobmedia_queue = NULL;
332
333 /* ***BEEF*** */
334 delete jcr->filemedia_queue;
335 jcr->filemedia_queue = NULL;
336 }
337 free_bsock(jcr->file_bsock);
338 free_bsock(jcr->dir_bsock);
339 if (jcr->job_name) {
340 free_pool_memory(jcr->job_name);
341 }
342 if (jcr->client_name) {
343 free_memory(jcr->client_name);
344 jcr->client_name = NULL;
345 }
346 if (jcr->fileset_name) {
347 free_memory(jcr->fileset_name);
348 }
349 if (jcr->fileset_md5) {
350 free_memory(jcr->fileset_md5);
351 }
352 if (jcr->bsr) {
353 free_bsr(jcr->bsr);
354 jcr->bsr = NULL;
355 }
356 /* Free any restore volume list created */
357 free_restore_volume_list(jcr);
358 if (jcr->RestoreBootstrap) {
359 unlink(jcr->RestoreBootstrap);
360 bfree_and_null(jcr->RestoreBootstrap);
361 }
362 if (jcr->next_dev || jcr->prev_dev) {
363 Qmsg0(NULL, M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
364 }
365 pthread_cond_destroy(&jcr->job_start_wait);
366 if (jcr->dcrs) {
367 delete jcr->dcrs;
368 }
369 jcr->dcrs = NULL;
370
371 /* Avoid a double free */
372 if (jcr->dcr == jcr->read_dcr) {
373 jcr->read_dcr = NULL;
374 }
375 if (jcr->dcr) {
376 free_dcr(jcr->dcr);
377 jcr->dcr = NULL;
378 }
379 if (jcr->read_dcr) {
380 free_dcr(jcr->read_dcr);
381 jcr->read_dcr = NULL;
382 }
383
384 if (jcr->read_store) {
385 DIRSTORE *store;
386 foreach_alist(store, jcr->read_store) {
387 delete store->device;
388 delete store;
389 }
390 delete jcr->read_store;
391 jcr->read_store = NULL;
392 }
393 if (jcr->write_store) {
394 DIRSTORE *store;
395 foreach_alist(store, jcr->write_store) {
396 delete store->device;
397 delete store;
398 }
399 delete jcr->write_store;
400 jcr->write_store = NULL;
401 }
402 Dsm_check(200);
403
404 if (jcr->JobId != 0)
405 write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));
406
407 return;
408 }
409