1 /*
2 BAREOS® - Backup Archiving REcovery Open Sourced
3
4 Copyright (C) 2000-2012 Free Software Foundation Europe e.V.
5 Copyright (C) 2011-2012 Planets Communications B.V.
6 Copyright (C) 2013-2013 Bareos GmbH & Co. KG
7
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
11 in the file LICENSE.
12
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Affero General Public License for more details.
17
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 02110-1301, USA.
22 */
23 /*
24 * Manipulation routines for Job Control Records and
25 * handling of last_jobs_list.
26 *
27 * Kern E. Sibbald, December 2000
28 *
29 * These routines are thread safe.
30 *
31 * The job list routines were re-written in May 2005 to
32 * eliminate the global lock while traversing the list, and
33 * to use the dlist subroutines. The locking is now done
34 * on the list each time the list is modified or traversed.
35 * That is it is "micro-locked" rather than globally locked.
36 * The result is that there is one lock/unlock for each entry
37 * in the list while traversing it rather than a single lock
38 * at the beginning of a traversal and one at the end. This
39 * incurs slightly more overhead, but effectively eliminates
40 * the possibilty of race conditions. In addition, with the
41 * exception of the global locking of the list during the
42 * re-reading of the config file, no recursion is needed.
43 *
44 */
45
46 #include "include/bareos.h"
47 #include "include/jcr.h"
48 #include "lib/edit.h"
49 #include "lib/tls_conf.h"
50
51 const int debuglevel = 3400;
52
53 /* External variables we reference */
54
55 /* External referenced functions */
56 void FreeBregexps(alist *bregexps);
57
58 /* Forward referenced functions */
59 extern "C" void TimeoutHandler(int sig);
60 static void JcrTimeoutCheck(watchdog_t *self);
61 #ifdef TRACE_JCR_CHAIN
62 static void b_lock_jcr_chain(const char *filen, int line);
63 static void b_unlock_jcr_chain(const char *filen, int line);
64 #define lock_jcr_chain() b_lock_jcr_chain(__FILE__, __LINE__);
65 #define unlock_jcr_chain() b_unlock_jcr_chain(__FILE__, __LINE__);
66 #else
67 static void lock_jcr_chain();
68 static void unlock_jcr_chain();
69 #endif
70
71 int num_jobs_run;
72 dlist *last_jobs = nullptr;
73 const int max_last_jobs = 10;
74
75 static dlist *job_control_record_chain = nullptr;
76 static int watch_dog_timeout = 0;
77
78 static pthread_mutex_t jcr_lock = PTHREAD_MUTEX_INITIALIZER;
79 static pthread_mutex_t job_start_mutex = PTHREAD_MUTEX_INITIALIZER;
80 static pthread_mutex_t last_jobs_mutex = PTHREAD_MUTEX_INITIALIZER;
81
82 #ifdef HAVE_WIN32
83 static bool tsd_initialized = false;
84 static pthread_key_t jcr_key; /* Pointer to jcr for each thread */
85 #else
86 #ifdef PTHREAD_ONCE_KEY_NP
87 static pthread_key_t jcr_key = PTHREAD_ONCE_KEY_NP;
88 #else
89 static pthread_key_t jcr_key; /* Pointer to jcr for each thread */
90 static pthread_once_t key_once = PTHREAD_ONCE_INIT;
91 #endif
92 #endif
93
94 static char Job_status[] = "Status Job=%s JobStatus=%d\n";
95
LockJobs()96 void LockJobs() { P(job_start_mutex); }
97
UnlockJobs()98 void UnlockJobs() { V(job_start_mutex); }
99
InitLastJobsList()100 void InitLastJobsList()
101 {
102 JobControlRecord *jcr = nullptr;
103 struct s_last_job *job_entry = nullptr;
104 if (!last_jobs) { last_jobs = New(dlist(job_entry, &job_entry->link)); }
105 if (!job_control_record_chain) { job_control_record_chain = New(dlist(jcr, &jcr->link)); }
106 }
107
TermLastJobsList()108 void TermLastJobsList()
109 {
110 if (last_jobs) {
111 LockLastJobsList();
112 while (!last_jobs->empty()) {
113 void *je = last_jobs->first();
114 last_jobs->remove(je);
115 free(je);
116 }
117 delete last_jobs;
118 last_jobs = nullptr;
119 UnlockLastJobsList();
120 }
121 if (job_control_record_chain) {
122 delete job_control_record_chain;
123 job_control_record_chain = nullptr;
124 }
125 }
126
ReadLastJobsList(int fd,uint64_t addr)127 bool ReadLastJobsList(int fd, uint64_t addr)
128 {
129 struct s_last_job *je, job;
130 uint32_t num;
131 bool ok = true;
132
133 Dmsg1(100, "read_last_jobs seek to %d\n", (int)addr);
134 if (addr == 0 || lseek(fd, (boffset_t)addr, SEEK_SET) < 0) { return false; }
135 if (read(fd, &num, sizeof(num)) != sizeof(num)) { return false; }
136 Dmsg1(100, "Read num_items=%d\n", num);
137 if (num > 4 * max_last_jobs) { /* sanity check */
138 return false;
139 }
140 LockLastJobsList();
141 for (; num; num--) {
142 if (read(fd, &job, sizeof(job)) != sizeof(job)) {
143 BErrNo be;
144 Pmsg1(000, "Read job entry. ERR=%s\n", be.bstrerror());
145 ok = false;
146 break;
147 }
148 if (job.JobId > 0) {
149 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
150 memcpy((char *)je, (char *)&job, sizeof(job));
151 if (!last_jobs) { InitLastJobsList(); }
152 last_jobs->append(je);
153 if (last_jobs->size() > max_last_jobs) {
154 je = (struct s_last_job *)last_jobs->first();
155 last_jobs->remove(je);
156 free(je);
157 }
158 }
159 }
160 UnlockLastJobsList();
161 return ok;
162 }
163
WriteLastJobsList(int fd,uint64_t addr)164 uint64_t WriteLastJobsList(int fd, uint64_t addr)
165 {
166 struct s_last_job *je;
167 uint32_t num;
168 ssize_t status;
169
170 Dmsg1(100, "write_last_jobs seek to %d\n", (int)addr);
171 if (lseek(fd, (boffset_t)addr, SEEK_SET) < 0) { return 0; }
172 if (last_jobs) {
173 LockLastJobsList();
174 /*
175 * First record is number of entires
176 */
177 num = last_jobs->size();
178 if (write(fd, &num, sizeof(num)) != sizeof(num)) {
179 BErrNo be;
180 Pmsg1(000, "Error writing num_items: ERR=%s\n", be.bstrerror());
181 goto bail_out;
182 }
183 foreach_dlist (je, last_jobs) {
184 if (write(fd, je, sizeof(struct s_last_job)) != sizeof(struct s_last_job)) {
185 BErrNo be;
186 Pmsg1(000, "Error writing job: ERR=%s\n", be.bstrerror());
187 goto bail_out;
188 }
189 }
190 UnlockLastJobsList();
191 }
192
193 /*
194 * Return current address
195 */
196 status = lseek(fd, 0, SEEK_CUR);
197 if (status < 0) { status = 0; }
198 return status;
199
200 bail_out:
201 UnlockLastJobsList();
202 return 0;
203 }
204
LockLastJobsList()205 void LockLastJobsList() { P(last_jobs_mutex); }
206
UnlockLastJobsList()207 void UnlockLastJobsList() { V(last_jobs_mutex); }
208
209 /*
210 * Get an ASCII representation of the Operation being performed as an english Noun
211 */
get_OperationName()212 const char *JobControlRecord::get_OperationName()
213 {
214 switch (JobType_) {
215 case JT_BACKUP:
216 return _("Backup");
217 case JT_VERIFY:
218 return _("Verifying");
219 case JT_RESTORE:
220 return _("Restoring");
221 case JT_ARCHIVE:
222 return _("Archiving");
223 case JT_COPY:
224 return _("Copying");
225 case JT_MIGRATE:
226 return _("Migration");
227 case JT_SCAN:
228 return _("Scanning");
229 case JT_CONSOLIDATE:
230 return _("Consolidating");
231 default:
232 return _("Unknown operation");
233 }
234 }
235
236 /*
237 * Get an ASCII representation of the Action being performed either an english Verb or Adjective
238 */
get_ActionName(bool past)239 const char *JobControlRecord::get_ActionName(bool past)
240 {
241 switch (JobType_) {
242 case JT_BACKUP:
243 return _("backup");
244 case JT_VERIFY:
245 return (past) ? _("verified") : _("verify");
246 case JT_RESTORE:
247 return (past) ? _("restored") : _("restore");
248 case JT_ARCHIVE:
249 return (past) ? _("archived") : _("archive");
250 case JT_COPY:
251 return (past) ? _("copied") : _("copy");
252 case JT_MIGRATE:
253 return (past) ? _("migrated") : _("migrate");
254 case JT_SCAN:
255 return (past) ? _("scanned") : _("scan");
256 case JT_CONSOLIDATE:
257 return (past) ? _("consolidated") : _("consolidate");
258 default:
259 return _("unknown action");
260 }
261 }
262
JobReads()263 bool JobControlRecord::JobReads()
264 {
265 switch (JobType_) {
266 case JT_VERIFY:
267 case JT_RESTORE:
268 case JT_COPY:
269 case JT_MIGRATE:
270 return true;
271 case JT_BACKUP:
272 if (JobLevel_ == L_VIRTUAL_FULL) { return true; }
273 break;
274 default:
275 break;
276 }
277 return false;
278 }
279
280 /*
281 * Push a job_callback_item onto the job end callback stack.
282 */
RegisterJobEndCallback(JobControlRecord * jcr,void JobEndCb (JobControlRecord * jcr,void *),void * ctx)283 void RegisterJobEndCallback(JobControlRecord *jcr, void JobEndCb(JobControlRecord *jcr, void *), void *ctx)
284 {
285 job_callback_item *item;
286
287 item = (job_callback_item *)malloc(sizeof(job_callback_item));
288
289 item->JobEndCb = JobEndCb;
290 item->ctx = ctx;
291
292 jcr->job_end_callbacks.push((void *)item);
293 }
294
295 /*
296 * Pop each job_callback_item and process it.
297 */
CallJobEndCallbacks(JobControlRecord * jcr)298 static void CallJobEndCallbacks(JobControlRecord *jcr)
299 {
300 job_callback_item *item;
301
302 if (jcr->job_end_callbacks.size() > 0) {
303 item = (job_callback_item *)jcr->job_end_callbacks.pop();
304 while (item) {
305 item->JobEndCb(jcr, item->ctx);
306 free(item);
307 item = (job_callback_item *)jcr->job_end_callbacks.pop();
308 }
309 }
310 }
311
312 /*
313 * Create thread key for thread specific data.
314 */
create_jcr_key()315 static void create_jcr_key()
316 {
317 int status;
318
319 #ifdef PTHREAD_ONCE_KEY_NP
320 status = pthread_key_create_once_np(&jcr_key, nullptr);
321 #else
322 status = pthread_key_create(&jcr_key, nullptr);
323 #endif
324 if (status != 0) {
325 BErrNo be;
326 Jmsg1(nullptr, M_ABORT, 0, _("pthread key create failed: ERR=%s\n"), be.bstrerror(status));
327 }
328 }
329
330 /*
331 * Setup thread key for thread specific data.
332 */
setup_tsd_key()333 void setup_tsd_key()
334 {
335 #ifdef HAVE_WIN32
336 P(jcr_lock);
337 if (!tsd_initialized) {
338 create_jcr_key();
339 tsd_initialized = true;
340 }
341 V(jcr_lock);
342 #else
343 #ifdef PTHREAD_ONCE_KEY_NP
344 create_jcr_key();
345 #else
346 int status;
347
348 status = pthread_once(&key_once, create_jcr_key);
349 if (status != 0) {
350 BErrNo be;
351 Jmsg1(nullptr, M_ABORT, 0, _("pthread_once failed. ERR=%s\n"), be.bstrerror(status));
352 }
353 #endif
354 #endif
355 }
356
357 /*
358 * Create a Job Control Record and link it into JobControlRecord chain
359 * Returns newly allocated JobControlRecord
360 *
361 * Note, since each daemon has a different JobControlRecord, he passes us the size.
362 */
new_jcr(int size,JCR_free_HANDLER * daemon_free_jcr)363 JobControlRecord *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr)
364 {
365 JobControlRecord *jcr;
366 MessageQeueItem *item = nullptr;
367 struct sigaction sigtimer;
368 int status;
369
370 Dmsg0(debuglevel, "Enter new_jcr\n");
371
372 setup_tsd_key();
373
374 jcr = (JobControlRecord *)malloc(size);
375 memset(jcr, 0, size);
376 jcr = new (jcr) JobControlRecord();
377
378 jcr->msg_queue = New(dlist(item, &item->link));
379 if ((status = pthread_mutex_init(&jcr->msg_queue_mutex, nullptr)) != 0) {
380 BErrNo be;
381 Jmsg(nullptr, M_ABORT, 0, _("Could not init msg_queue mutex. ERR=%s\n"), be.bstrerror(status));
382 }
383
384 jcr->my_thread_id = pthread_self();
385 jcr->job_end_callbacks.init(1, false);
386 jcr->sched_time = time(nullptr);
387 jcr->initial_sched_time = jcr->sched_time;
388 jcr->daemon_free_jcr = daemon_free_jcr; /* plug daemon free routine */
389 jcr->InitMutex();
390 jcr->IncUseCount();
391 jcr->VolumeName = GetPoolMemory(PM_FNAME);
392 jcr->VolumeName[0] = 0;
393 jcr->errmsg = GetPoolMemory(PM_MESSAGE);
394 jcr->errmsg[0] = 0;
395 jcr->comment = GetPoolMemory(PM_FNAME);
396 jcr->comment[0] = 0;
397
398 /*
399 * Setup some dummy values
400 */
401 bstrncpy(jcr->Job, "*System*", sizeof(jcr->Job));
402 jcr->JobId = 0;
403 jcr->setJobType(JT_SYSTEM); /* internal job until defined */
404 jcr->setJobLevel(L_NONE);
405 jcr->setJobStatus(JS_Created); /* ready to run */
406 sigtimer.sa_flags = 0;
407 sigtimer.sa_handler = TimeoutHandler;
408 sigfillset(&sigtimer.sa_mask);
409 sigaction(TIMEOUT_SIGNAL, &sigtimer, nullptr);
410
411 /*
412 * Locking jobs is a global lock that is needed
413 * so that the Director can stop new jobs from being
414 * added to the jcr chain while it processes a new
415 * conf file and does the RegisterJobEndCallback().
416 */
417 LockJobs();
418 lock_jcr_chain();
419 if (!job_control_record_chain) { job_control_record_chain = New(dlist(jcr, &jcr->link)); }
420 job_control_record_chain->append(jcr);
421 unlock_jcr_chain();
422 UnlockJobs();
423
424 return jcr;
425 }
426
427 /*
428 * Remove a JobControlRecord from the chain
429 *
430 * NOTE! The chain must be locked prior to calling this routine.
431 */
RemoveJcr(JobControlRecord * jcr)432 static void RemoveJcr(JobControlRecord *jcr)
433 {
434 Dmsg0(debuglevel, "Enter RemoveJcr\n");
435 if (!jcr) { Emsg0(M_ABORT, 0, _("nullptr jcr.\n")); }
436 job_control_record_chain->remove(jcr);
437 Dmsg0(debuglevel, "Leave RemoveJcr\n");
438 }
439
440 /*
441 * Free stuff common to all JCRs. N.B. Be careful to include only
442 * generic stuff in the common part of the jcr.
443 */
FreeCommonJcr(JobControlRecord * jcr)444 static void FreeCommonJcr(JobControlRecord *jcr)
445 {
446 Dmsg1(100, "FreeCommonJcr: %p \n", jcr);
447
448 if (!jcr) { Dmsg0(100, "FreeCommonJcr: Invalid jcr\n"); }
449
450 /*
451 * Uses jcr lock/unlock
452 */
453 RemoveJcrFromTsd(jcr);
454 jcr->SetKillable(false);
455
456 jcr->DestroyMutex();
457
458 if (jcr->msg_queue) {
459 delete jcr->msg_queue;
460 jcr->msg_queue = nullptr;
461 pthread_mutex_destroy(&jcr->msg_queue_mutex);
462 }
463
464 if (jcr->client_name) {
465 FreePoolMemory(jcr->client_name);
466 jcr->client_name = nullptr;
467 }
468
469 if (jcr->attr) {
470 FreePoolMemory(jcr->attr);
471 jcr->attr = nullptr;
472 }
473
474 if (jcr->sd_auth_key) {
475 free(jcr->sd_auth_key);
476 jcr->sd_auth_key = nullptr;
477 }
478
479 if (jcr->VolumeName) {
480 FreePoolMemory(jcr->VolumeName);
481 jcr->VolumeName = nullptr;
482 }
483
484 if (jcr->dir_bsock) {
485 jcr->dir_bsock->close();
486 delete jcr->dir_bsock;
487 jcr->dir_bsock = nullptr;
488 }
489
490 if (jcr->errmsg) {
491 FreePoolMemory(jcr->errmsg);
492 jcr->errmsg = nullptr;
493 }
494
495 if (jcr->where) {
496 free(jcr->where);
497 jcr->where = nullptr;
498 }
499
500 if (jcr->RegexWhere) {
501 free(jcr->RegexWhere);
502 jcr->RegexWhere = nullptr;
503 }
504
505 if (jcr->where_bregexp) {
506 FreeBregexps(jcr->where_bregexp);
507 delete jcr->where_bregexp;
508 jcr->where_bregexp = nullptr;
509 }
510
511 if (jcr->cached_path) {
512 FreePoolMemory(jcr->cached_path);
513 jcr->cached_path = nullptr;
514 jcr->cached_pnl = 0;
515 }
516
517 if (jcr->id_list) {
518 FreeGuidList(jcr->id_list);
519 jcr->id_list = nullptr;
520 }
521
522 if (jcr->comment) {
523 FreePoolMemory(jcr->comment);
524 jcr->comment = nullptr;
525 }
526
527 free(jcr);
528 }
529
530 /*
531 * Global routine to free a jcr
532 */
533 #ifdef DEBUG
b_free_jcr(const char * file,int line,JobControlRecord * jcr)534 void b_free_jcr(const char *file, int line, JobControlRecord *jcr)
535 {
536 struct s_last_job *je;
537
538 Dmsg3(debuglevel, "Enter FreeJcr jid=%u from %s:%d\n", jcr->JobId, file, line);
539
540 #else
541
542 void FreeJcr(JobControlRecord *jcr)
543 {
544 struct s_last_job *je;
545
546 Dmsg3(debuglevel, "Enter FreeJcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
547
548 #endif
549
550 lock_jcr_chain();
551 jcr->DecUseCount(); /* decrement use count */
552 if (jcr->UseCount() < 0) {
553 Jmsg2(jcr, M_ERROR, 0, _("JobControlRecord UseCount=%d JobId=%d\n"), jcr->UseCount(), jcr->JobId);
554 }
555 if (jcr->JobId > 0) {
556 Dmsg3(debuglevel, "Dec FreeJcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
557 }
558 if (jcr->UseCount() > 0) { /* if in use */
559 unlock_jcr_chain();
560 return;
561 }
562 if (jcr->JobId > 0) {
563 Dmsg3(debuglevel, "remove jcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
564 }
565 RemoveJcr(jcr); /* remove Jcr from chain */
566 unlock_jcr_chain();
567
568 DequeueMessages(jcr);
569 CallJobEndCallbacks(jcr); /* call registered callbacks */
570
571 Dmsg1(debuglevel, "End job=%d\n", jcr->JobId);
572
573 /*
574 * Keep some statistics
575 */
576 switch (jcr->getJobType()) {
577 case JT_BACKUP:
578 case JT_VERIFY:
579 case JT_RESTORE:
580 case JT_MIGRATE:
581 case JT_COPY:
582 case JT_ADMIN:
583 /*
584 * Keep list of last jobs, but not Console where JobId==0
585 */
586 if (jcr->JobId > 0) {
587 LockLastJobsList();
588 num_jobs_run++;
589 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
590 memset(je, 0, sizeof(struct s_last_job)); /* zero in case unset fields */
591 je->Errors = jcr->JobErrors;
592 je->JobType = jcr->getJobType();
593 je->JobId = jcr->JobId;
594 je->VolSessionId = jcr->VolSessionId;
595 je->VolSessionTime = jcr->VolSessionTime;
596 bstrncpy(je->Job, jcr->Job, sizeof(je->Job));
597 je->JobFiles = jcr->JobFiles;
598 je->JobBytes = jcr->JobBytes;
599 je->JobStatus = jcr->JobStatus;
600 je->JobLevel = jcr->getJobLevel();
601 je->start_time = jcr->start_time;
602 je->end_time = time(nullptr);
603
604 if (!last_jobs) { InitLastJobsList(); }
605 last_jobs->append(je);
606 if (last_jobs->size() > max_last_jobs) {
607 je = (struct s_last_job *)last_jobs->first();
608 last_jobs->remove(je);
609 free(je);
610 }
611 UnlockLastJobsList();
612 }
613 break;
614 default:
615 break;
616 }
617
618 CloseMsg(jcr); /* close messages for this job */
619
620 if (jcr->daemon_free_jcr) { jcr->daemon_free_jcr(jcr); /* call daemon free routine */ }
621
622 FreeCommonJcr(jcr);
623 CloseMsg(nullptr); /* flush any daemon messages */
624 Dmsg0(debuglevel, "Exit FreeJcr\n");
625 }
626
627 void JobControlRecord::SetKillable(bool killable)
628 {
629 lock();
630
631 my_thread_killable = killable;
632 if (killable) {
633 my_thread_id = pthread_self();
634 } else {
635 memset(&my_thread_id, 0, sizeof(my_thread_id));
636 }
637
638 unlock();
639 }
640
641 void JobControlRecord::MyThreadSendSignal(int sig)
642 {
643 lock();
644
645 if (IsKillable() && !pthread_equal(my_thread_id, pthread_self())) {
646 Dmsg1(800, "Send kill to jid=%d\n", JobId);
647 pthread_kill(my_thread_id, sig);
648 } else if (!IsKillable()) {
649 Dmsg1(10, "Warning, can't send kill to jid=%d\n", JobId);
650 }
651
652 unlock();
653 }
654
655 /*
656 * Remove jcr from thread specific data, but but make sure it is us who are attached.
657 */
658 void RemoveJcrFromTsd(JobControlRecord *jcr)
659 {
660 JobControlRecord *tjcr = get_jcr_from_tsd();
661
662 if (tjcr == jcr) { SetJcrInTsd(INVALID_JCR); }
663 }
664
665 /*
666 * Put this jcr in the thread specifc data
667 */
668 void SetJcrInTsd(JobControlRecord *jcr)
669 {
670 int status;
671
672 status = pthread_setspecific(jcr_key, (void *)jcr);
673 if (status != 0) {
674 BErrNo be;
675 Jmsg1(jcr, M_ABORT, 0, _("pthread_setspecific failed: ERR=%s\n"), be.bstrerror(status));
676 }
677 }
678
679 /*
680 * Give me the jcr that is attached to this thread
681 */
682 JobControlRecord *get_jcr_from_tsd()
683 {
684 JobControlRecord *jcr = (JobControlRecord *)pthread_getspecific(jcr_key);
685
686 /*
687 * Set any INVALID_JCR to nullptr which the rest of BAREOS understands
688 */
689 if (jcr == INVALID_JCR) { jcr = nullptr; }
690
691 return jcr;
692 }
693
694 /*
695 * Find which JobId corresponds to the current thread
696 */
697 uint32_t GetJobidFromTsd()
698 {
699 JobControlRecord *jcr = (JobControlRecord *)pthread_getspecific(jcr_key);
700 uint32_t JobId = 0;
701
702 if (jcr && jcr != INVALID_JCR) { JobId = (uint32_t)jcr->JobId; }
703
704 return JobId;
705 }
706
707 /*
708 * Given a JobId, find the JobControlRecord
709 *
710 * Returns: jcr on success
711 * nullptr on failure
712 */
713 JobControlRecord *get_jcr_by_id(uint32_t JobId)
714 {
715 JobControlRecord *jcr;
716
717 foreach_jcr (jcr) {
718 if (jcr->JobId == JobId) {
719 jcr->IncUseCount();
720 Dmsg3(debuglevel, "Inc get_jcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
721 break;
722 }
723 }
724 endeach_jcr(jcr);
725
726 return jcr;
727 }
728
729 /*
730 * Given a thread id, find the JobId
731 *
732 * Returns: JobId on success
733 * 0 on failure
734 */
735 uint32_t GetJobidFromTid(pthread_t tid)
736 {
737 JobControlRecord *jcr = nullptr;
738 bool found = false;
739
740 foreach_jcr (jcr) {
741 if (pthread_equal(jcr->my_thread_id, tid)) {
742 found = true;
743 break;
744 }
745 }
746 endeach_jcr(jcr);
747
748 if (found) { return jcr->JobId; }
749
750 return 0;
751 }
752
753 /*
754 * Given a SessionId and SessionTime, find the JobControlRecord
755 *
756 * Returns: jcr on success
757 * nullptr on failure
758 */
759 JobControlRecord *get_jcr_by_session(uint32_t SessionId, uint32_t SessionTime)
760 {
761 JobControlRecord *jcr;
762
763 foreach_jcr (jcr) {
764 if (jcr->VolSessionId == SessionId && jcr->VolSessionTime == SessionTime) {
765 jcr->IncUseCount();
766 Dmsg3(debuglevel, "Inc get_jcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
767 break;
768 }
769 }
770 endeach_jcr(jcr);
771
772 return jcr;
773 }
774
775 /*
776 * Given a Job, find the JobControlRecord compares on the number of
777 * characters in Job thus allowing partial matches.
778 *
779 * Returns: jcr on success
780 * nullptr on failure
781 */
782 JobControlRecord *get_jcr_by_partial_name(char *Job)
783 {
784 JobControlRecord *jcr;
785 int len;
786
787 if (!Job) { return nullptr; }
788
789 len = strlen(Job);
790 foreach_jcr (jcr) {
791 if (bstrncmp(Job, jcr->Job, len)) {
792 jcr->IncUseCount();
793 Dmsg3(debuglevel, "Inc get_jcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
794 break;
795 }
796 }
797 endeach_jcr(jcr);
798
799 return jcr;
800 }
801
802 /*
803 * Given a Job, find the JobControlRecord requires an exact match of names.
804 *
805 * Returns: jcr on success
806 * nullptr on failure
807 */
808 JobControlRecord *get_jcr_by_full_name(char *Job)
809 {
810 JobControlRecord *jcr;
811
812 if (!Job) { return nullptr; }
813
814 foreach_jcr (jcr) {
815 if (bstrcmp(jcr->Job, Job)) {
816 jcr->IncUseCount();
817 Dmsg3(debuglevel, "Inc get_jcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
818 break;
819 }
820 }
821 endeach_jcr(jcr);
822
823 return jcr;
824 }
825
826 const char *JcrGetAuthenticateKey(const char *unified_job_name)
827 {
828 if (!unified_job_name) { return nullptr; }
829
830 JobControlRecord *jcr;
831 const char *auth_key = nullptr;
832 foreach_jcr (jcr) {
833 if (bstrcmp(jcr->Job, unified_job_name)) {
834 auth_key = jcr->sd_auth_key;
835 Dmsg3(debuglevel, "Inc get_jcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
836 break;
837 }
838 }
839 endeach_jcr(jcr);
840
841 return auth_key;
842 }
843
844 TlsPolicy JcrGetTlsPolicy(const char *unified_job_name)
845 {
846 if (!unified_job_name) { return kBnetTlsUnknown; }
847
848 TlsPolicy policy = kBnetTlsUnknown;
849 JobControlRecord *jcr;
850
851 foreach_jcr (jcr) {
852 if (bstrcmp(jcr->Job, unified_job_name)) {
853 policy = jcr->sd_tls_policy;
854 Dmsg4(debuglevel, "Inc get_jcr jid=%u UseCount=%d Job=%s TlsPolicy=%d\n",
855 jcr->JobId, jcr->UseCount(), jcr->Job, policy);
856 break;
857 }
858 }
859 endeach_jcr(jcr);
860
861 return policy;
862 }
863
864 static void UpdateWaitTime(JobControlRecord *jcr, int newJobStatus)
865 {
866 bool enter_in_waittime;
867 int oldJobStatus = jcr->JobStatus;
868
869 switch (newJobStatus) {
870 case JS_WaitFD:
871 case JS_WaitSD:
872 case JS_WaitMedia:
873 case JS_WaitMount:
874 case JS_WaitStoreRes:
875 case JS_WaitJobRes:
876 case JS_WaitClientRes:
877 case JS_WaitMaxJobs:
878 case JS_WaitPriority:
879 enter_in_waittime = true;
880 break;
881 default:
882 enter_in_waittime = false; /* not a Wait situation */
883 break;
884 }
885
886 /*
887 * If we were previously waiting and are not any more
888 * we want to update the wait_time variable, which is
889 * the start of waiting.
890 */
891 switch (oldJobStatus) {
892 case JS_WaitFD:
893 case JS_WaitSD:
894 case JS_WaitMedia:
895 case JS_WaitMount:
896 case JS_WaitStoreRes:
897 case JS_WaitJobRes:
898 case JS_WaitClientRes:
899 case JS_WaitMaxJobs:
900 case JS_WaitPriority:
901 if (!enter_in_waittime) { /* we get out the wait time */
902 jcr->wait_time_sum += (time(nullptr) - jcr->wait_time);
903 jcr->wait_time = 0;
904 }
905 break;
906 default:
907 /*
908 * If wait state is new, we keep current time for watchdog MaxWaitTime
909 */
910 if (enter_in_waittime) { jcr->wait_time = time(nullptr); }
911 break;
912 }
913 }
914
915 /*
916 * Priority runs from 0 (lowest) to 10 (highest)
917 */
918 static int GetStatusPriority(int JobStatus)
919 {
920 int priority = 0;
921
922 switch (JobStatus) {
923 case JS_Incomplete:
924 priority = 10;
925 break;
926 case JS_ErrorTerminated:
927 case JS_FatalError:
928 case JS_Canceled:
929 priority = 9;
930 break;
931 case JS_Error:
932 priority = 8;
933 break;
934 case JS_Differences:
935 priority = 7;
936 break;
937 }
938
939 return priority;
940 }
941
942 /*
943 * Send Job status to Director
944 */
945 bool JobControlRecord::sendJobStatus()
946 {
947 if (dir_bsock) { return dir_bsock->fsend(Job_status, Job, JobStatus); }
948
949 return true;
950 }
951
952 /*
953 * Set and send Job status to Director
954 */
955 bool JobControlRecord::sendJobStatus(int newJobStatus)
956 {
957 if (!is_JobStatus(newJobStatus)) {
958 setJobStatus(newJobStatus);
959 if (dir_bsock) { return dir_bsock->fsend(Job_status, Job, JobStatus); }
960 }
961
962 return true;
963 }
964
965 void JobControlRecord::setJobStarted()
966 {
967 job_started = true;
968 job_started_time = time(nullptr);
969 }
970
971 void JobControlRecord::resetJobStatus(int newJobStatus)
972 {
973 JobStatus = newJobStatus;
974 }
975
976 void JobControlRecord::setJobStatus(int newJobStatus)
977 {
978 int priority;
979 int old_priority = 0;
980 int oldJobStatus = ' ';
981
982 if (JobStatus) {
983 oldJobStatus = JobStatus;
984 old_priority = GetStatusPriority(oldJobStatus);
985 }
986 priority = GetStatusPriority(newJobStatus);
987
988 Dmsg2(800, "SetJcrJobStatus(%s, %c)\n", Job, newJobStatus);
989
990 /*
991 * Update wait_time depending on newJobStatus and oldJobStatus
992 */
993 UpdateWaitTime(this, newJobStatus);
994
995 /*
996 * For a set of errors, ... keep the current status
997 * so it isn't lost. For all others, set it.
998 */
999 Dmsg2(800, "OnEntry JobStatus=%c newJobstatus=%c\n", oldJobStatus, newJobStatus);
1000
1001 /*
1002 * If status priority is > than proposed new status, change it.
1003 * If status priority == new priority and both are zero, take the new status.
1004 * If it is not zero, then we keep the first non-zero "error" that occurred.
1005 */
1006 if (priority > old_priority || (priority == 0 && old_priority == 0)) {
1007 Dmsg4(800, "Set new stat. old: %c,%d new: %c,%d\n", oldJobStatus, old_priority, newJobStatus, priority);
1008 JobStatus = newJobStatus; /* replace with new status */
1009 }
1010
1011 if (oldJobStatus != JobStatus) {
1012 Dmsg2(800, "leave setJobStatus old=%c new=%c\n", oldJobStatus, newJobStatus);
1013 // GeneratePluginEvent(this, bEventStatusChange, nullptr);
1014 }
1015 }
1016
1017 #ifdef TRACE_JCR_CHAIN
1018 static int lock_count = 0;
1019 #endif
1020
1021 /*
1022 * Lock the chain
1023 */
1024 #ifdef TRACE_JCR_CHAIN
1025 static void b_lock_jcr_chain(const char *fname, int line)
1026 #else
1027 static void lock_jcr_chain()
1028 #endif
1029 {
1030 #ifdef TRACE_JCR_CHAIN
1031 Dmsg3(debuglevel, "Lock jcr chain %d from %s:%d\n", ++lock_count, fname, line);
1032 #endif
1033 P(jcr_lock);
1034 }
1035
1036 /*
1037 * Unlock the chain
1038 */
1039 #ifdef TRACE_JCR_CHAIN
1040 static void b_unlock_jcr_chain(const char *fname, int line)
1041 #else
1042 static void unlock_jcr_chain()
1043 #endif
1044 {
1045 #ifdef TRACE_JCR_CHAIN
1046 Dmsg3(debuglevel, "Unlock jcr chain %d from %s:%d\n", lock_count--, fname, line);
1047 #endif
1048 V(jcr_lock);
1049 }
1050
1051 /*
1052 * Start walk of jcr chain
1053 * The proper way to walk the jcr chain is:
1054 * JobControlRecord *jcr;
1055 * foreach_jcr(jcr) {
1056 * ...
1057 * }
1058 * endeach_jcr(jcr);
1059 *
1060 * It is possible to leave out the endeach_jcr(jcr), but
1061 * in that case, the last jcr referenced must be explicitly
1062 * released with:
1063 *
1064 * FreeJcr(jcr);
1065 */
1066 JobControlRecord *jcr_walk_start()
1067 {
1068 JobControlRecord *jcr;
1069 lock_jcr_chain();
1070 jcr = (JobControlRecord *)job_control_record_chain->first();
1071 if (jcr) {
1072 jcr->IncUseCount();
1073 if (jcr->JobId > 0) {
1074 Dmsg3(debuglevel, "Inc walk_start jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
1075 }
1076 }
1077 unlock_jcr_chain();
1078 return jcr;
1079 }
1080
1081 /*
1082 * Get next jcr from chain, and release current one
1083 */
1084 JobControlRecord *jcr_walk_next(JobControlRecord *prev_jcr)
1085 {
1086 JobControlRecord *jcr;
1087
1088 lock_jcr_chain();
1089 jcr = (JobControlRecord *)job_control_record_chain->next(prev_jcr);
1090 if (jcr) {
1091 jcr->IncUseCount();
1092 if (jcr->JobId > 0) {
1093 Dmsg3(debuglevel, "Inc walk_next jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
1094 }
1095 }
1096 unlock_jcr_chain();
1097 if (prev_jcr) { FreeJcr(prev_jcr); }
1098 return jcr;
1099 }
1100
1101 /*
1102 * Release last jcr referenced
1103 */
1104 void JcrWalkEnd(JobControlRecord *jcr)
1105 {
1106 if (jcr) {
1107 if (jcr->JobId > 0) {
1108 Dmsg3(debuglevel, "Free walk_end jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
1109 }
1110 FreeJcr(jcr);
1111 }
1112 }
1113
1114 /*
1115 * Return number of Jobs
1116 */
1117 int JobCount()
1118 {
1119 JobControlRecord *jcr;
1120 int count = 0;
1121
1122 lock_jcr_chain();
1123 for (jcr = (JobControlRecord *)job_control_record_chain->first();
1124 (jcr = (JobControlRecord *)job_control_record_chain->next(jcr));) {
1125 if (jcr->JobId > 0) { count++; }
1126 }
1127 unlock_jcr_chain();
1128 return count;
1129 }
1130
1131 /*
1132 * Setup to call the timeout check routine every 30 seconds
1133 * This routine will check any timers that have been enabled.
1134 */
1135 bool InitJcrSubsystem(int timeout)
1136 {
1137 watchdog_t *wd = new_watchdog();
1138
1139 watch_dog_timeout = timeout;
1140 wd->one_shot = false;
1141 wd->interval = 30; /* FIXME: should be configurable somewhere, even
1142 if only with a #define */
1143 wd->callback = JcrTimeoutCheck;
1144
1145 RegisterWatchdog(wd);
1146
1147 return true;
1148 }
1149
1150 static void JcrTimeoutCheck(watchdog_t *self)
1151 {
1152 JobControlRecord *jcr;
1153 BareosSocket *bs;
1154 time_t timer_start;
1155
1156 Dmsg0(debuglevel, "Start JobControlRecord timeout checks\n");
1157
1158 /* Walk through all JCRs checking if any one is
1159 * blocked for more than specified max time.
1160 */
1161 foreach_jcr (jcr) {
1162 Dmsg2(debuglevel, "JcrTimeoutCheck JobId=%u jcr=0x%x\n", jcr->JobId, jcr);
1163 if (jcr->JobId == 0) { continue; }
1164 bs = jcr->store_bsock;
1165 if (bs) {
1166 timer_start = bs->timer_start;
1167 if (timer_start && (watchdog_time - timer_start) > watch_dog_timeout) {
1168 bs->timer_start = 0; /* turn off timer */
1169 bs->SetTimedOut();
1170 Qmsg(jcr, M_ERROR, 0,
1171 _("Watchdog sending kill after %d secs to thread stalled reading Storage daemon.\n"),
1172 watchdog_time - timer_start);
1173 jcr->MyThreadSendSignal(TIMEOUT_SIGNAL);
1174 }
1175 }
1176 bs = jcr->file_bsock;
1177 if (bs) {
1178 timer_start = bs->timer_start;
1179 if (timer_start && (watchdog_time - timer_start) > watch_dog_timeout) {
1180 bs->timer_start = 0; /* turn off timer */
1181 bs->SetTimedOut();
1182 Qmsg(jcr, M_ERROR, 0,
1183 _("Watchdog sending kill after %d secs to thread stalled reading File daemon.\n"),
1184 watchdog_time - timer_start);
1185 jcr->MyThreadSendSignal(TIMEOUT_SIGNAL);
1186 }
1187 }
1188 bs = jcr->dir_bsock;
1189 if (bs) {
1190 timer_start = bs->timer_start;
1191 if (timer_start && (watchdog_time - timer_start) > watch_dog_timeout) {
1192 bs->timer_start = 0; /* turn off timer */
1193 bs->SetTimedOut();
1194 Qmsg(jcr, M_ERROR, 0, _("Watchdog sending kill after %d secs to thread stalled reading Director.\n"),
1195 watchdog_time - timer_start);
1196 jcr->MyThreadSendSignal(TIMEOUT_SIGNAL);
1197 }
1198 }
1199 }
1200 endeach_jcr(jcr);
1201
1202 Dmsg0(debuglevel, "Finished JobControlRecord timeout checks\n");
1203 }
1204
1205 /*
1206 * Return next JobId from comma separated list
1207 *
1208 * Returns:
1209 * 1 if next JobId returned
1210 * 0 if no more JobIds are in list
1211 * -1 there is an error
1212 */
1213 int GetNextJobidFromList(char **p, uint32_t *JobId)
1214 {
1215 const int maxlen = 30;
1216 char jobid[maxlen + 1];
1217 char *q = *p;
1218
1219 jobid[0] = 0;
1220 for (int i = 0; i < maxlen; i++) {
1221 if (*q == 0) {
1222 break;
1223 } else if (*q == ',') {
1224 q++;
1225 break;
1226 }
1227 jobid[i] = *q++;
1228 jobid[i + 1] = 0;
1229 }
1230 if (jobid[0] == 0) {
1231 return 0;
1232 } else if (!Is_a_number(jobid)) {
1233 return -1; /* error */
1234 }
1235 *p = q;
1236 *JobId = str_to_int64(jobid);
1237 return 1;
1238 }
1239
1240 /*
1241 * Timeout signal comes here
1242 */
1243 extern "C" void TimeoutHandler(int sig) { return; /* thus interrupting the function */ }
1244
1245 /*
1246 * Used to display specific daemon information after a fatal signal
1247 * (like BareosDb in the director)
1248 */
1249 #define MAX_DBG_HOOK 10
1250 static dbg_jcr_hook_t *dbg_jcr_hooks[MAX_DBG_HOOK];
1251 static int dbg_jcr_handler_count;
1252
1253 void DbgJcrAddHook(dbg_jcr_hook_t *hook)
1254 {
1255 ASSERT(dbg_jcr_handler_count < MAX_DBG_HOOK);
1256 dbg_jcr_hooks[dbg_jcr_handler_count++] = hook;
1257 }
1258
1259 /*
1260 * !!! WARNING !!!
1261 *
1262 * This function should be used ONLY after a fatal signal. We walk through the
1263 * JobControlRecord chain without doing any lock, BAREOS should not be running.
1264 */
1265 void DbgPrintJcr(FILE *fp)
1266 {
1267 char ed1[50], buf1[128], buf2[128], buf3[128], buf4[128];
1268 if (!job_control_record_chain) { return; }
1269
1270 fprintf(fp, "Attempt to dump current JCRs. njcrs=%d\n", job_control_record_chain->size());
1271
1272 for (JobControlRecord *jcr = (JobControlRecord *)job_control_record_chain->first(); jcr;
1273 jcr = (JobControlRecord *)job_control_record_chain->next(jcr)) {
1274 fprintf(fp, "threadid=%s JobId=%d JobStatus=%c jcr=%p name=%s\n",
1275 edit_pthread(jcr->my_thread_id, ed1, sizeof(ed1)), (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job);
1276 fprintf(fp, "threadid=%s killable=%d JobId=%d JobStatus=%c jcr=%p name=%s\n",
1277 edit_pthread(jcr->my_thread_id, ed1, sizeof(ed1)), jcr->IsKillable(), (int)jcr->JobId,
1278 jcr->JobStatus, jcr, jcr->Job);
1279 fprintf(fp, "\tUseCount=%i\n", jcr->UseCount());
1280 fprintf(fp, "\tJobType=%c JobLevel=%c\n", jcr->getJobType(), jcr->getJobLevel());
1281 bstrftime(buf1, sizeof(buf1), jcr->sched_time);
1282 bstrftime(buf2, sizeof(buf2), jcr->start_time);
1283 bstrftime(buf3, sizeof(buf3), jcr->end_time);
1284 bstrftime(buf4, sizeof(buf4), jcr->wait_time);
1285 fprintf(fp, "\tsched_time=%s start_time=%s\n\tend_time=%s wait_time=%s\n", buf1, buf2, buf3, buf4);
1286 fprintf(fp, "\tdb=%p db_batch=%p batch_started=%i\n", jcr->db, jcr->db_batch, jcr->batch_started);
1287
1288 /*
1289 * Call all the jcr debug hooks
1290 */
1291 for (int i = 0; i < dbg_jcr_handler_count; i++) {
1292 dbg_jcr_hook_t *hook = dbg_jcr_hooks[i];
1293 hook(jcr, fp);
1294 }
1295 }
1296 }
1297