1 /*
2    Bacula(R) - The Network Backup Solution
3 
4    Copyright (C) 2000-2020 Kern Sibbald
5 
6    The original author of Bacula is Kern Sibbald, with contributions
7    from many others, a complete list can be found in the file AUTHORS.
8 
9    You may use this file and others of this release according to the
10    license defined in the LICENSE file, which includes the Affero General
11    Public License, v3.0 ("AGPLv3") and some additional permissions and
12    terms pursuant to its AGPLv3 Section 7.
13 
14    This notice must be preserved when any source code is
15    conveyed and/or propagated.
16 
17    Bacula(R) is a registered trademark of Kern Sibbald.
18 */
19 /*
20  *   Job control and execution for Storage Daemon
21  *
22  *   Written by Kern Sibbald, MM
23  *
24  */
25 
26 #include "bacula.h"
27 #include "stored.h"
28 
29 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
30 
31 /* Imported variables */
32 extern STORES *me;                    /* our Global resource */
33 extern uint32_t VolSessionTime;
34 
35 /* Imported functions */
36 extern uint32_t newVolSessionId();
37 extern bool do_vbackup(JCR *jcr);
38 
39 /* Requests from the Director daemon */
40 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
41       "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
42       "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s "
43       "rerunning=%d VolSessionId=%d VolSessionTime=%d sd_client=%d "
44       "Authorization=%s\n";
45 
46 /* Responses sent to Director daemon */
47 static char OKjob[]     = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
48 static char BAD_job[]   = "3915 Bad Job command. stat=%d CMD: %s\n";
49 
50 /*
51  * Director requests us to start a job
52  * Basic tasks done here:
53  *  - We pickup the JobId to be run from the Director.
54  *  - We pickup the device, media, and pool from the Director
55  *  - Wait for a connection from the File Daemon (FD)
56  *  - Accept commands from the FD (i.e. run the job)
57  *  - Return when the connection is terminated or
58  *    there is an error.
59  */
job_cmd(JCR * jcr)60 bool job_cmd(JCR *jcr)
61 {
62    int32_t JobId;
63    char sd_auth_key[200];
64    char spool_size[30];
65    char seed[100];
66    BSOCK *dir = jcr->dir_bsock;
67    POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
68    int32_t JobType, level, spool_attributes, no_attributes, spool_data;
69    int32_t write_part_after_job, PreferMountedVols;
70    int32_t rerunning;
71    int32_t is_client;
72    int stat;
73    JCR *ojcr;
74 
75    /*
76     * Get JobId and permissions from Director
77     */
78    Dmsg1(100, "<dird: %s", dir->msg);
79    bstrncpy(spool_size, "0", sizeof(spool_size));
80    stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
81               client_name.c_str(),
82               &JobType, &level, fileset_name.c_str(), &no_attributes,
83               &spool_attributes, fileset_md5.c_str(), &spool_data,
84               &write_part_after_job, &PreferMountedVols, spool_size,
85               &rerunning, &jcr->VolSessionId, &jcr->VolSessionTime,
86               &is_client, &sd_auth_key);
87    if (stat != 19) {
88       pm_strcpy(jcr->errmsg, dir->msg);
89       dir->fsend(BAD_job, stat, jcr->errmsg);
90       Dmsg1(100, ">dird: %s", dir->msg);
91       jcr->setJobStatus(JS_ErrorTerminated);
92       return false;
93    }
94    jcr->rerunning = rerunning;
95    jcr->sd_client = is_client;
96    if (is_client) {
97       jcr->sd_auth_key = bstrdup(sd_auth_key);
98    }
99    Dmsg3(100, "rerunning=%d VolSesId=%d VolSesTime=%d\n", jcr->rerunning,
100          jcr->VolSessionId, jcr->VolSessionTime);
101    /*
102     * Since this job could be rescheduled, we
103     *  check to see if we have it already. If so
104     *  free the old jcr and use the new one.
105     */
106    ojcr = get_jcr_by_full_name(job.c_str());
107    if (ojcr && !ojcr->authenticated) {
108       Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(intptr_t)ojcr, job.c_str());
109       free_jcr(ojcr);
110    }
111    jcr->JobId = JobId;
112    Dmsg2(800, "Start JobId=%d %p\n", JobId, jcr);
113    set_jcr_in_tsd(jcr);
114 
115    /*
116     * If job rescheduled because previous was incomplete,
117     * the Resched flag is set and VolSessionId and VolSessionTime
118     * are given to us (same as restarted job).
119     */
120    if (!jcr->rerunning) {
121       jcr->VolSessionId = newVolSessionId();
122       jcr->VolSessionTime = VolSessionTime;
123    }
124    bstrncpy(jcr->Job, job, sizeof(jcr->Job));
125    unbash_spaces(job_name);
126    jcr->job_name = get_pool_memory(PM_NAME);
127    pm_strcpy(jcr->job_name, job_name);
128    unbash_spaces(client_name);
129    jcr->client_name = get_pool_memory(PM_NAME);
130    pm_strcpy(jcr->client_name, client_name);
131    unbash_spaces(fileset_name);
132    jcr->fileset_name = get_pool_memory(PM_NAME);
133    pm_strcpy(jcr->fileset_name, fileset_name);
134    jcr->setJobType(JobType);
135    jcr->setJobLevel(level);
136    jcr->no_attributes = no_attributes;
137    jcr->spool_attributes = spool_attributes;
138    jcr->spool_data = spool_data;
139    jcr->spool_size = str_to_int64(spool_size);
140    jcr->write_part_after_job = write_part_after_job;
141    jcr->fileset_md5 = get_pool_memory(PM_NAME);
142    pm_strcpy(jcr->fileset_md5, fileset_md5);
143    jcr->PreferMountedVols = PreferMountedVols;
144 
145 
146    jcr->authenticated = false;
147 
148    /*
149     * Pass back an authorization key for the File daemon
150     */
151    if (jcr->sd_client) {
152       bstrncpy(sd_auth_key, "xxx", 3);
153    } else {
154       bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
155       make_session_key(sd_auth_key, seed, 1);
156    }
157    dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, sd_auth_key);
158    Dmsg2(150, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
159    /* If not client, set key, otherwise it is already set */
160    if (!jcr->sd_client) {
161       jcr->sd_auth_key = bstrdup(sd_auth_key);
162       memset(sd_auth_key, 0, sizeof(sd_auth_key));
163    }
164    new_plugins(jcr);            /* instantiate the plugins */
165    generate_daemon_event(jcr, "JobStart");
166    generate_plugin_event(jcr, bsdEventJobStart, (void *)"JobStart");
167 
168    /* Keep track of the important events */
169    events_send_msg(jcr, "SJ0001", EVENTS_TYPE_JOB, jcr->director->hdr.name, (intptr_t)jcr,
170                    "Job Start jobid=%d job=%s", jcr->JobId, jcr->Job);
171    return true;
172 }
173 
run_cmd(JCR * jcr)174 bool run_cmd(JCR *jcr)
175 {
176    struct timeval tv;
177    struct timezone tz;
178    struct timespec timeout;
179    int errstat = 0;
180 
181    Dsm_check(200);
182    Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
183 
184    /* If we do not need the FD, we are doing a virtual backup. */
185    if (jcr->no_client_used()) {
186       do_vbackup(jcr);
187       return false;
188    }
189 
190    jcr->sendJobStatus(JS_WaitFD);          /* wait for FD to connect */
191 
192    Dmsg2(050, "sd_calls_client=%d sd_client=%d\n", jcr->sd_calls_client, jcr->sd_client);
193    if (jcr->sd_calls_client) {
194       if (!read_client_hello(jcr)) {
195          return false;
196       }
197       /*
198        * Authenticate the File daemon
199        */
200       Dmsg0(050, "=== Authenticate FD\n");
201       if (jcr->authenticated || !authenticate_filed(jcr, jcr->file_bsock, jcr->FDVersion)) {
202          Dmsg1(050, "Authentication failed Job %s\n", jcr->Job);
203          Qmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
204       } else {
205          jcr->authenticated = true;
206       }
207    } else if (!jcr->sd_client) {
208       /* We wait to receive connection from Client */
209       gettimeofday(&tv, &tz);
210       timeout.tv_nsec = tv.tv_usec * 1000;
211       timeout.tv_sec = tv.tv_sec + me->client_wait;
212 
213       Dmsg3(050, "%s waiting %d sec for FD to contact SD key=%s\n",
214             jcr->Job, (int)(timeout.tv_sec-time(NULL)), jcr->sd_auth_key);
215 
216       Dmsg3(800, "=== Block Job=%s jid=%d %p\n", jcr->Job, jcr->JobId, jcr);
217 
218       /*
219        * Wait for the File daemon to contact us to start the Job,
220        *  when he does, we will be released, unless the 30 minutes
221        *  expires.
222        */
223       P(mutex);
224       while ( !jcr->authenticated && !job_canceled(jcr) ) {
225          errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
226          if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
227             break;
228          }
229          Dmsg1(800, "=== Auth cond errstat=%d\n", errstat);
230       }
231       Dmsg4(050, "=== Auth=%d jid=%d canceled=%d errstat=%d\n",
232          jcr->JobId, jcr->authenticated, job_canceled(jcr), errstat);
233       V(mutex);
234       Dmsg2(800, "Auth fail or cancel for jid=%d %p\n", jcr->JobId, jcr);
235    }
236 
237    memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
238 
239    if (jcr->authenticated && !job_canceled(jcr)) {
240       Dmsg2(800, "Running jid=%d %p\n", jcr->JobId, jcr);
241       run_job(jcr);                   /* Run the job */
242    }
243    Dmsg2(800, "Done jid=%d %p\n", jcr->JobId, jcr);
244    return false;
245 }
246 
247 
248 #ifdef needed
249 /*
250  *   Query Device command from Director
251  *   Sends Storage Daemon's information on the device to the
252  *    caller (presumably the Director).
253  *   This command always returns "true" so that the line is
254  *    not closed on an error.
255  *
256  */
query_cmd(JCR * jcr)257 bool query_cmd(JCR *jcr)
258 {
259    POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
260    BSOCK *dir = jcr->dir_bsock;
261    DEVRES *device;
262    AUTOCHANGER *changer;
263    bool ok;
264 
265    Dmsg1(100, "Query_cmd: %s", dir->msg);
266    ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
267    Dmsg1(100, "<dird: %s\n", dir->msg);
268    if (ok) {
269       unbash_spaces(dev_name);
270       foreach_res(device, R_DEVICE) {
271          /* Find resource, and make sure we were able to open it */
272          if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
273             if (!device->dev) {
274                device->dev = init_dev(jcr, device, false, statcollector);
275             }
276             if (!device->dev) {
277                break;
278             }
279             ok = dir_update_device(jcr, device->dev);
280             if (ok) {
281                ok = dir->fsend(OK_query);
282             } else {
283                dir->fsend(NO_query);
284             }
285             return ok;
286          }
287       }
288       foreach_res(changer, R_AUTOCHANGER) {
289          /* Find resource, and make sure we were able to open it */
290          if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
291             if (!changer->device || changer->device->size() == 0) {
292                continue;              /* no devices */
293             }
294             ok = dir_update_changer(jcr, changer);
295             if (ok) {
296                ok = dir->fsend(OK_query);
297             } else {
298                dir->fsend(NO_query);
299             }
300             return ok;
301          }
302       }
303       /* If we get here, the device/autochanger was not found */
304       unbash_spaces(dir->msg);
305       pm_strcpy(jcr->errmsg, dir->msg);
306       dir->fsend(NO_device, dev_name.c_str());
307       Dmsg1(100, ">dird: %s\n", dir->msg);
308    } else {
309       unbash_spaces(dir->msg);
310       pm_strcpy(jcr->errmsg, dir->msg);
311       dir->fsend(BAD_query, jcr->errmsg);
312       Dmsg1(100, ">dird: %s\n", dir->msg);
313    }
314 
315    return true;
316 }
317 
318 #endif
319 
320 
321 /*
322  * Destroy the Job Control Record and associated
323  * resources (sockets).
324  */
stored_free_jcr(JCR * jcr)325 void stored_free_jcr(JCR *jcr)
326 {
327    Dmsg2(800, "End Job JobId=%u %p\n", jcr->JobId, jcr);
328    if (jcr->jobmedia_queue) {
329       flush_jobmedia_queue(jcr);
330       delete jcr->jobmedia_queue;
331       jcr->jobmedia_queue = NULL;
332 
333       /* ***BEEF*** */
334       delete jcr->filemedia_queue;
335       jcr->filemedia_queue = NULL;
336    }
337    free_bsock(jcr->file_bsock);
338    free_bsock(jcr->dir_bsock);
339    if (jcr->job_name) {
340       free_pool_memory(jcr->job_name);
341    }
342    if (jcr->client_name) {
343       free_memory(jcr->client_name);
344       jcr->client_name = NULL;
345    }
346    if (jcr->fileset_name) {
347       free_memory(jcr->fileset_name);
348    }
349    if (jcr->fileset_md5) {
350       free_memory(jcr->fileset_md5);
351    }
352    if (jcr->bsr) {
353       free_bsr(jcr->bsr);
354       jcr->bsr = NULL;
355    }
356    /* Free any restore volume list created */
357    free_restore_volume_list(jcr);
358    if (jcr->RestoreBootstrap) {
359       unlink(jcr->RestoreBootstrap);
360       bfree_and_null(jcr->RestoreBootstrap);
361    }
362    if (jcr->next_dev || jcr->prev_dev) {
363       Qmsg0(NULL, M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
364    }
365    pthread_cond_destroy(&jcr->job_start_wait);
366    if (jcr->dcrs) {
367       delete jcr->dcrs;
368    }
369    jcr->dcrs = NULL;
370 
371    /* Avoid a double free */
372    if (jcr->dcr == jcr->read_dcr) {
373       jcr->read_dcr = NULL;
374    }
375    if (jcr->dcr) {
376       free_dcr(jcr->dcr);
377       jcr->dcr = NULL;
378    }
379    if (jcr->read_dcr) {
380       free_dcr(jcr->read_dcr);
381       jcr->read_dcr = NULL;
382    }
383 
384    if (jcr->read_store) {
385       DIRSTORE *store;
386       foreach_alist(store, jcr->read_store) {
387          delete store->device;
388          delete store;
389       }
390       delete jcr->read_store;
391       jcr->read_store = NULL;
392    }
393    if (jcr->write_store) {
394       DIRSTORE *store;
395       foreach_alist(store, jcr->write_store) {
396          delete store->device;
397          delete store;
398       }
399       delete jcr->write_store;
400       jcr->write_store = NULL;
401    }
402    Dsm_check(200);
403 
404    if (jcr->JobId != 0)
405       write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));
406 
407    return;
408 }
409