1 /*
2    Bacula(R) - The Network Backup Solution
3 
4    Copyright (C) 2000-2020 Kern Sibbald
5 
6    The original author of Bacula is Kern Sibbald, with contributions
7    from many others, a complete list can be found in the file AUTHORS.
8 
9    You may use this file and others of this release according to the
10    license defined in the LICENSE file, which includes the Affero General
11    Public License, v3.0 ("AGPLv3") and some additional permissions and
12    terms pursuant to its AGPLv3 Section 7.
13 
14    This notice must be preserved when any source code is
15    conveyed and/or propagated.
16 
17    Bacula(R) is a registered trademark of Kern Sibbald.
18 */
19 /*
20  *   Job control and execution for Storage Daemon
21  *
22  *   Written by Kern Sibbald, MM
23  *
24  */
25 
26 #include "bacula.h"
27 #include "stored.h"
28 
29 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
30 
31 /* Imported variables */
32 extern STORES *me;                    /* our Global resource */
33 extern uint32_t VolSessionTime;
34 
35 /* Imported functions */
36 extern uint32_t newVolSessionId();
37 extern bool do_vbackup(JCR *jcr);
38 
39 /* Requests from the Director daemon */
40 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
41       "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
42       "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s "
43       "rerunning=%d VolSessionId=%d VolSessionTime=%d sd_client=%d "
44       "Authorization=%s\n";
45 
46 /* Responses sent to Director daemon */
47 static char OKjob[]     = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
48 static char BAD_job[]   = "3915 Bad Job command. stat=%d CMD: %s\n";
49 
50 /*
51  * Director requests us to start a job
52  * Basic tasks done here:
53  *  - We pickup the JobId to be run from the Director.
54  *  - We pickup the device, media, and pool from the Director
55  *  - Wait for a connection from the File Daemon (FD)
56  *  - Accept commands from the FD (i.e. run the job)
57  *  - Return when the connection is terminated or
58  *    there is an error.
59  */
job_cmd(JCR * jcr)60 bool job_cmd(JCR *jcr)
61 {
62    int32_t JobId;
63    char sd_auth_key[200];
64    char spool_size[30];
65    char seed[100];
66    BSOCK *dir = jcr->dir_bsock;
67    POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
68    int32_t JobType, level, spool_attributes, no_attributes, spool_data;
69    int32_t write_part_after_job, PreferMountedVols;
70    int32_t rerunning;
71    int32_t is_client;
72    int stat;
73    JCR *ojcr;
74 
75    /*
76     * Get JobId and permissions from Director
77     */
78    Dmsg1(100, "<dird: %s", dir->msg);
79    bstrncpy(spool_size, "0", sizeof(spool_size));
80    stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
81               client_name.c_str(),
82               &JobType, &level, fileset_name.c_str(), &no_attributes,
83               &spool_attributes, fileset_md5.c_str(), &spool_data,
84               &write_part_after_job, &PreferMountedVols, spool_size,
85               &rerunning, &jcr->VolSessionId, &jcr->VolSessionTime,
86               &is_client, &sd_auth_key);
87    if (stat != 19) {
88       pm_strcpy(jcr->errmsg, dir->msg);
89       dir->fsend(BAD_job, stat, jcr->errmsg);
90       Dmsg1(100, ">dird: %s", dir->msg);
91       jcr->setJobStatus(JS_ErrorTerminated);
92       return false;
93    }
94    jcr->rerunning = rerunning;
95    jcr->sd_client = is_client;
96    if (is_client) {
97       jcr->sd_auth_key = bstrdup(sd_auth_key);
98    }
99    Dmsg3(100, "rerunning=%d VolSesId=%d VolSesTime=%d\n", jcr->rerunning,
100          jcr->VolSessionId, jcr->VolSessionTime);
101    /*
102     * Since this job could be rescheduled, we
103     *  check to see if we have it already. If so
104     *  free the old jcr and use the new one.
105     */
106    ojcr = get_jcr_by_full_name(job.c_str());
107    if (ojcr && !ojcr->authenticated) {
108       Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(intptr_t)ojcr, job.c_str());
109       free_jcr(ojcr);
110    }
111    jcr->JobId = JobId;
112    Dmsg2(800, "Start JobId=%d %p\n", JobId, jcr);
113    set_jcr_in_tsd(jcr);
114 
115    /*
116     * If job rescheduled because previous was incomplete,
117     * the Resched flag is set and VolSessionId and VolSessionTime
118     * are given to us (same as restarted job).
119     */
120    if (!jcr->rerunning) {
121       jcr->VolSessionId = newVolSessionId();
122       jcr->VolSessionTime = VolSessionTime;
123    }
124    bstrncpy(jcr->Job, job, sizeof(jcr->Job));
125    unbash_spaces(job_name);
126    jcr->job_name = get_pool_memory(PM_NAME);
127    pm_strcpy(jcr->job_name, job_name);
128    unbash_spaces(client_name);
129    jcr->client_name = get_pool_memory(PM_NAME);
130    pm_strcpy(jcr->client_name, client_name);
131    unbash_spaces(fileset_name);
132    jcr->fileset_name = get_pool_memory(PM_NAME);
133    pm_strcpy(jcr->fileset_name, fileset_name);
134    jcr->setJobType(JobType);
135    jcr->setJobLevel(level);
136    jcr->no_attributes = no_attributes;
137    jcr->spool_attributes = spool_attributes;
138    jcr->spool_data = spool_data;
139    jcr->spool_size = str_to_int64(spool_size);
140    jcr->write_part_after_job = write_part_after_job;
141    jcr->fileset_md5 = get_pool_memory(PM_NAME);
142    pm_strcpy(jcr->fileset_md5, fileset_md5);
143    jcr->PreferMountedVols = PreferMountedVols;
144 
145 
146    jcr->authenticated = false;
147 
148    /*
149     * Pass back an authorization key for the File daemon
150     */
151    if (jcr->sd_client) {
152       bstrncpy(sd_auth_key, "xxx", 3);
153    } else {
154       bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
155       make_session_key(sd_auth_key, seed, 1);
156    }
157    dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, sd_auth_key);
158    Dmsg2(150, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
159    /* If not client, set key, otherwise it is already set */
160    if (!jcr->sd_client) {
161       jcr->sd_auth_key = bstrdup(sd_auth_key);
162       memset(sd_auth_key, 0, sizeof(sd_auth_key));
163    }
164    new_plugins(jcr);            /* instantiate the plugins */
165    generate_daemon_event(jcr, "JobStart");
166    generate_plugin_event(jcr, bsdEventJobStart, (void *)"JobStart");
167    return true;
168 }
169 
run_cmd(JCR * jcr)170 bool run_cmd(JCR *jcr)
171 {
172    struct timeval tv;
173    struct timezone tz;
174    struct timespec timeout;
175    int errstat = 0;
176 
177    Dsm_check(200);
178    Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
179 
180    /*
181     * If we do not need the FD,
182     *  we are doing a migration, copy, or virtual backup.
183     */
184    if (jcr->no_client_used()) {
185       do_vbackup(jcr);
186       return false;
187    }
188 
189    jcr->sendJobStatus(JS_WaitFD);          /* wait for FD to connect */
190 
191    Dmsg2(050, "sd_calls_client=%d sd_client=%d\n", jcr->sd_calls_client, jcr->sd_client);
192    if (jcr->sd_calls_client) {
193       if (!read_client_hello(jcr)) {
194          return false;
195       }
196       /*
197        * Authenticate the File daemon
198        */
199       Dmsg0(050, "=== Authenticate FD\n");
200       if (jcr->authenticated || !authenticate_filed(jcr, jcr->file_bsock, jcr->FDVersion)) {
201          Dmsg1(050, "Authentication failed Job %s\n", jcr->Job);
202          Qmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
203       } else {
204          jcr->authenticated = true;
205       }
206    } else if (!jcr->sd_client) {
207       /* We wait to receive connection from Client */
208       gettimeofday(&tv, &tz);
209       timeout.tv_nsec = tv.tv_usec * 1000;
210       timeout.tv_sec = tv.tv_sec + me->client_wait;
211 
212       Dmsg3(050, "%s waiting %d sec for FD to contact SD key=%s\n",
213             jcr->Job, (int)(timeout.tv_sec-time(NULL)), jcr->sd_auth_key);
214 
215       Dmsg3(800, "=== Block Job=%s jid=%d %p\n", jcr->Job, jcr->JobId, jcr);
216 
217       /*
218        * Wait for the File daemon to contact us to start the Job,
219        *  when he does, we will be released, unless the 30 minutes
220        *  expires.
221        */
222       P(mutex);
223       while ( !jcr->authenticated && !job_canceled(jcr) ) {
224          errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
225          if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
226             break;
227          }
228          Dmsg1(800, "=== Auth cond errstat=%d\n", errstat);
229       }
230       Dmsg4(050, "=== Auth=%d jid=%d canceled=%d errstat=%d\n",
231          jcr->JobId, jcr->authenticated, job_canceled(jcr), errstat);
232       V(mutex);
233       Dmsg2(800, "Auth fail or cancel for jid=%d %p\n", jcr->JobId, jcr);
234    }
235 
236    memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
237 
238    if (jcr->authenticated && !job_canceled(jcr)) {
239       Dmsg2(050, "Running jid=%d %p\n", jcr->JobId, jcr);
240       run_job(jcr);                   /* Run the job */
241    }
242    Dmsg2(800, "Done jid=%d %p\n", jcr->JobId, jcr);
243    return false;
244 }
245 
246 
247 #ifdef needed
248 /*
249  *   Query Device command from Director
250  *   Sends Storage Daemon's information on the device to the
251  *    caller (presumably the Director).
252  *   This command always returns "true" so that the line is
253  *    not closed on an error.
254  *
255  */
query_cmd(JCR * jcr)256 bool query_cmd(JCR *jcr)
257 {
258    POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
259    BSOCK *dir = jcr->dir_bsock;
260    DEVRES *device;
261    AUTOCHANGER *changer;
262    bool ok;
263 
264    Dmsg1(100, "Query_cmd: %s", dir->msg);
265    ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
266    Dmsg1(100, "<dird: %s\n", dir->msg);
267    if (ok) {
268       unbash_spaces(dev_name);
269       foreach_res(device, R_DEVICE) {
270          /* Find resource, and make sure we were able to open it */
271          if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
272             if (!device->dev) {
273                device->dev = init_dev(jcr, device, false, statcollector);
274             }
275             if (!device->dev) {
276                break;
277             }
278             ok = dir_update_device(jcr, device->dev);
279             if (ok) {
280                ok = dir->fsend(OK_query);
281             } else {
282                dir->fsend(NO_query);
283             }
284             return ok;
285          }
286       }
287       foreach_res(changer, R_AUTOCHANGER) {
288          /* Find resource, and make sure we were able to open it */
289          if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
290             if (!changer->device || changer->device->size() == 0) {
291                continue;              /* no devices */
292             }
293             ok = dir_update_changer(jcr, changer);
294             if (ok) {
295                ok = dir->fsend(OK_query);
296             } else {
297                dir->fsend(NO_query);
298             }
299             return ok;
300          }
301       }
302       /* If we get here, the device/autochanger was not found */
303       unbash_spaces(dir->msg);
304       pm_strcpy(jcr->errmsg, dir->msg);
305       dir->fsend(NO_device, dev_name.c_str());
306       Dmsg1(100, ">dird: %s\n", dir->msg);
307    } else {
308       unbash_spaces(dir->msg);
309       pm_strcpy(jcr->errmsg, dir->msg);
310       dir->fsend(BAD_query, jcr->errmsg);
311       Dmsg1(100, ">dird: %s\n", dir->msg);
312    }
313 
314    return true;
315 }
316 
317 #endif
318 
319 
320 /*
321  * Destroy the Job Control Record and associated
322  * resources (sockets).
323  */
stored_free_jcr(JCR * jcr)324 void stored_free_jcr(JCR *jcr)
325 {
326    Dmsg2(800, "End Job JobId=%u %p\n", jcr->JobId, jcr);
327    if (jcr->jobmedia_queue) {
328       flush_jobmedia_queue(jcr);
329       delete jcr->jobmedia_queue;
330       jcr->jobmedia_queue = NULL;
331    }
332 
333    if (jcr->dir_bsock) {
334       Dmsg2(800, "Send terminate jid=%d %p\n", jcr->JobId, jcr);
335       jcr->dir_bsock->signal(BNET_EOD);
336       jcr->dir_bsock->signal(BNET_TERMINATE);
337       jcr->dir_bsock->destroy();
338    }
339    if (jcr->file_bsock) {
340       jcr->file_bsock->destroy();
341    }
342    if (jcr->job_name) {
343       free_pool_memory(jcr->job_name);
344    }
345    if (jcr->client_name) {
346       free_memory(jcr->client_name);
347       jcr->client_name = NULL;
348    }
349    if (jcr->fileset_name) {
350       free_memory(jcr->fileset_name);
351    }
352    if (jcr->fileset_md5) {
353       free_memory(jcr->fileset_md5);
354    }
355    if (jcr->bsr) {
356       free_bsr(jcr->bsr);
357       jcr->bsr = NULL;
358    }
359    /* Free any restore volume list created */
360    free_restore_volume_list(jcr);
361    if (jcr->RestoreBootstrap) {
362       unlink(jcr->RestoreBootstrap);
363       free_pool_memory(jcr->RestoreBootstrap);
364       jcr->RestoreBootstrap = NULL;
365    }
366    if (jcr->next_dev || jcr->prev_dev) {
367       Qmsg0(NULL, M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
368    }
369    pthread_cond_destroy(&jcr->job_start_wait);
370    if (jcr->dcrs) {
371       delete jcr->dcrs;
372    }
373    jcr->dcrs = NULL;
374 
375    /* Avoid a double free */
376    if (jcr->dcr == jcr->read_dcr) {
377       jcr->read_dcr = NULL;
378    }
379    if (jcr->dcr) {
380       free_dcr(jcr->dcr);
381       jcr->dcr = NULL;
382    }
383    if (jcr->read_dcr) {
384       free_dcr(jcr->read_dcr);
385       jcr->read_dcr = NULL;
386    }
387 
388    if (jcr->read_store) {
389       DIRSTORE *store;
390       foreach_alist(store, jcr->read_store) {
391          delete store->device;
392          delete store;
393       }
394       delete jcr->read_store;
395       jcr->read_store = NULL;
396    }
397    if (jcr->write_store) {
398       DIRSTORE *store;
399       foreach_alist(store, jcr->write_store) {
400          delete store->device;
401          delete store;
402       }
403       delete jcr->write_store;
404       jcr->write_store = NULL;
405    }
406    Dsm_check(200);
407 
408    if (jcr->JobId != 0)
409       write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));
410 
411    return;
412 }
413