1 /*
2    BAREOS® - Backup Archiving REcovery Open Sourced
3 
4    Copyright (C) 2000-2012 Free Software Foundation Europe e.V.
5    Copyright (C) 2011-2012 Planets Communications B.V.
6    Copyright (C) 2013-2013 Bareos GmbH & Co. KG
7 
8    This program is Free Software; you can redistribute it and/or
9    modify it under the terms of version three of the GNU Affero General Public
10    License as published by the Free Software Foundation and included
11    in the file LICENSE.
12 
13    This program is distributed in the hope that it will be useful, but
14    WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16    Affero General Public License for more details.
17 
18    You should have received a copy of the GNU Affero General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21    02110-1301, USA.
22 */
23 /*
24  * Manipulation routines for Job Control Records and
25  *  handling of last_jobs_list.
26  *
27  *  Kern E. Sibbald, December 2000
28  *
29  *  These routines are thread safe.
30  *
31  *  The job list routines were re-written in May 2005 to
32  *  eliminate the global lock while traversing the list, and
33  *  to use the dlist subroutines.  The locking is now done
34  *  on the list each time the list is modified or traversed.
35  *  That is it is "micro-locked" rather than globally locked.
36  *  The result is that there is one lock/unlock for each entry
37  *  in the list while traversing it rather than a single lock
38  *  at the beginning of a traversal and one at the end.  This
39  *  incurs slightly more overhead, but effectively eliminates
40  *  the possibilty of race conditions.  In addition, with the
41  *  exception of the global locking of the list during the
42  *  re-reading of the config file, no recursion is needed.
43  *
44  */
45 
46 #include "include/bareos.h"
47 #include "include/jcr.h"
48 #include "lib/edit.h"
49 #include "lib/tls_conf.h"
50 
51 const int debuglevel = 3400;
52 
53 /* External variables we reference */
54 
55 /* External referenced functions */
56 void FreeBregexps(alist *bregexps);
57 
58 /* Forward referenced functions */
59 extern "C" void TimeoutHandler(int sig);
60 static void JcrTimeoutCheck(watchdog_t *self);
61 #ifdef TRACE_JCR_CHAIN
62 static void b_lock_jcr_chain(const char *filen, int line);
63 static void b_unlock_jcr_chain(const char *filen, int line);
64 #define lock_jcr_chain() b_lock_jcr_chain(__FILE__, __LINE__);
65 #define unlock_jcr_chain() b_unlock_jcr_chain(__FILE__, __LINE__);
66 #else
67 static void lock_jcr_chain();
68 static void unlock_jcr_chain();
69 #endif
70 
71 int num_jobs_run;
72 dlist *last_jobs        = nullptr;
73 const int max_last_jobs = 10;
74 
75 static dlist *job_control_record_chain = nullptr;
76 static int watch_dog_timeout           = 0;
77 
78 static pthread_mutex_t jcr_lock        = PTHREAD_MUTEX_INITIALIZER;
79 static pthread_mutex_t job_start_mutex = PTHREAD_MUTEX_INITIALIZER;
80 static pthread_mutex_t last_jobs_mutex = PTHREAD_MUTEX_INITIALIZER;
81 
82 #ifdef HAVE_WIN32
83 static bool tsd_initialized = false;
84 static pthread_key_t jcr_key; /* Pointer to jcr for each thread */
85 #else
86 #ifdef PTHREAD_ONCE_KEY_NP
87 static pthread_key_t jcr_key = PTHREAD_ONCE_KEY_NP;
88 #else
89 static pthread_key_t jcr_key; /* Pointer to jcr for each thread */
90 static pthread_once_t key_once = PTHREAD_ONCE_INIT;
91 #endif
92 #endif
93 
94 static char Job_status[] = "Status Job=%s JobStatus=%d\n";
95 
LockJobs()96 void LockJobs() { P(job_start_mutex); }
97 
UnlockJobs()98 void UnlockJobs() { V(job_start_mutex); }
99 
InitLastJobsList()100 void InitLastJobsList()
101 {
102   JobControlRecord *jcr        = nullptr;
103   struct s_last_job *job_entry = nullptr;
104   if (!last_jobs) { last_jobs = New(dlist(job_entry, &job_entry->link)); }
105   if (!job_control_record_chain) { job_control_record_chain = New(dlist(jcr, &jcr->link)); }
106 }
107 
TermLastJobsList()108 void TermLastJobsList()
109 {
110   if (last_jobs) {
111     LockLastJobsList();
112     while (!last_jobs->empty()) {
113       void *je = last_jobs->first();
114       last_jobs->remove(je);
115       free(je);
116     }
117     delete last_jobs;
118     last_jobs = nullptr;
119     UnlockLastJobsList();
120   }
121   if (job_control_record_chain) {
122     delete job_control_record_chain;
123     job_control_record_chain = nullptr;
124   }
125 }
126 
ReadLastJobsList(int fd,uint64_t addr)127 bool ReadLastJobsList(int fd, uint64_t addr)
128 {
129   struct s_last_job *je, job;
130   uint32_t num;
131   bool ok = true;
132 
133   Dmsg1(100, "read_last_jobs seek to %d\n", (int)addr);
134   if (addr == 0 || lseek(fd, (boffset_t)addr, SEEK_SET) < 0) { return false; }
135   if (read(fd, &num, sizeof(num)) != sizeof(num)) { return false; }
136   Dmsg1(100, "Read num_items=%d\n", num);
137   if (num > 4 * max_last_jobs) { /* sanity check */
138     return false;
139   }
140   LockLastJobsList();
141   for (; num; num--) {
142     if (read(fd, &job, sizeof(job)) != sizeof(job)) {
143       BErrNo be;
144       Pmsg1(000, "Read job entry. ERR=%s\n", be.bstrerror());
145       ok = false;
146       break;
147     }
148     if (job.JobId > 0) {
149       je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
150       memcpy((char *)je, (char *)&job, sizeof(job));
151       if (!last_jobs) { InitLastJobsList(); }
152       last_jobs->append(je);
153       if (last_jobs->size() > max_last_jobs) {
154         je = (struct s_last_job *)last_jobs->first();
155         last_jobs->remove(je);
156         free(je);
157       }
158     }
159   }
160   UnlockLastJobsList();
161   return ok;
162 }
163 
WriteLastJobsList(int fd,uint64_t addr)164 uint64_t WriteLastJobsList(int fd, uint64_t addr)
165 {
166   struct s_last_job *je;
167   uint32_t num;
168   ssize_t status;
169 
170   Dmsg1(100, "write_last_jobs seek to %d\n", (int)addr);
171   if (lseek(fd, (boffset_t)addr, SEEK_SET) < 0) { return 0; }
172   if (last_jobs) {
173     LockLastJobsList();
174     /*
175      * First record is number of entires
176      */
177     num = last_jobs->size();
178     if (write(fd, &num, sizeof(num)) != sizeof(num)) {
179       BErrNo be;
180       Pmsg1(000, "Error writing num_items: ERR=%s\n", be.bstrerror());
181       goto bail_out;
182     }
183     foreach_dlist (je, last_jobs) {
184       if (write(fd, je, sizeof(struct s_last_job)) != sizeof(struct s_last_job)) {
185         BErrNo be;
186         Pmsg1(000, "Error writing job: ERR=%s\n", be.bstrerror());
187         goto bail_out;
188       }
189     }
190     UnlockLastJobsList();
191   }
192 
193   /*
194    * Return current address
195    */
196   status = lseek(fd, 0, SEEK_CUR);
197   if (status < 0) { status = 0; }
198   return status;
199 
200 bail_out:
201   UnlockLastJobsList();
202   return 0;
203 }
204 
LockLastJobsList()205 void LockLastJobsList() { P(last_jobs_mutex); }
206 
UnlockLastJobsList()207 void UnlockLastJobsList() { V(last_jobs_mutex); }
208 
209 /*
210  * Get an ASCII representation of the Operation being performed as an english Noun
211  */
get_OperationName()212 const char *JobControlRecord::get_OperationName()
213 {
214   switch (JobType_) {
215     case JT_BACKUP:
216       return _("Backup");
217     case JT_VERIFY:
218       return _("Verifying");
219     case JT_RESTORE:
220       return _("Restoring");
221     case JT_ARCHIVE:
222       return _("Archiving");
223     case JT_COPY:
224       return _("Copying");
225     case JT_MIGRATE:
226       return _("Migration");
227     case JT_SCAN:
228       return _("Scanning");
229     case JT_CONSOLIDATE:
230       return _("Consolidating");
231     default:
232       return _("Unknown operation");
233   }
234 }
235 
236 /*
237  * Get an ASCII representation of the Action being performed either an english Verb or Adjective
238  */
get_ActionName(bool past)239 const char *JobControlRecord::get_ActionName(bool past)
240 {
241   switch (JobType_) {
242     case JT_BACKUP:
243       return _("backup");
244     case JT_VERIFY:
245       return (past) ? _("verified") : _("verify");
246     case JT_RESTORE:
247       return (past) ? _("restored") : _("restore");
248     case JT_ARCHIVE:
249       return (past) ? _("archived") : _("archive");
250     case JT_COPY:
251       return (past) ? _("copied") : _("copy");
252     case JT_MIGRATE:
253       return (past) ? _("migrated") : _("migrate");
254     case JT_SCAN:
255       return (past) ? _("scanned") : _("scan");
256     case JT_CONSOLIDATE:
257       return (past) ? _("consolidated") : _("consolidate");
258     default:
259       return _("unknown action");
260   }
261 }
262 
JobReads()263 bool JobControlRecord::JobReads()
264 {
265   switch (JobType_) {
266     case JT_VERIFY:
267     case JT_RESTORE:
268     case JT_COPY:
269     case JT_MIGRATE:
270       return true;
271     case JT_BACKUP:
272       if (JobLevel_ == L_VIRTUAL_FULL) { return true; }
273       break;
274     default:
275       break;
276   }
277   return false;
278 }
279 
280 /*
281  * Push a job_callback_item onto the job end callback stack.
282  */
RegisterJobEndCallback(JobControlRecord * jcr,void JobEndCb (JobControlRecord * jcr,void *),void * ctx)283 void RegisterJobEndCallback(JobControlRecord *jcr, void JobEndCb(JobControlRecord *jcr, void *), void *ctx)
284 {
285   job_callback_item *item;
286 
287   item = (job_callback_item *)malloc(sizeof(job_callback_item));
288 
289   item->JobEndCb = JobEndCb;
290   item->ctx      = ctx;
291 
292   jcr->job_end_callbacks.push((void *)item);
293 }
294 
295 /*
296  * Pop each job_callback_item and process it.
297  */
CallJobEndCallbacks(JobControlRecord * jcr)298 static void CallJobEndCallbacks(JobControlRecord *jcr)
299 {
300   job_callback_item *item;
301 
302   if (jcr->job_end_callbacks.size() > 0) {
303     item = (job_callback_item *)jcr->job_end_callbacks.pop();
304     while (item) {
305       item->JobEndCb(jcr, item->ctx);
306       free(item);
307       item = (job_callback_item *)jcr->job_end_callbacks.pop();
308     }
309   }
310 }
311 
312 /*
313  * Create thread key for thread specific data.
314  */
create_jcr_key()315 static void create_jcr_key()
316 {
317   int status;
318 
319 #ifdef PTHREAD_ONCE_KEY_NP
320   status = pthread_key_create_once_np(&jcr_key, nullptr);
321 #else
322   status = pthread_key_create(&jcr_key, nullptr);
323 #endif
324   if (status != 0) {
325     BErrNo be;
326     Jmsg1(nullptr, M_ABORT, 0, _("pthread key create failed: ERR=%s\n"), be.bstrerror(status));
327   }
328 }
329 
330 /*
331  * Setup thread key for thread specific data.
332  */
setup_tsd_key()333 void setup_tsd_key()
334 {
335 #ifdef HAVE_WIN32
336   P(jcr_lock);
337   if (!tsd_initialized) {
338     create_jcr_key();
339     tsd_initialized = true;
340   }
341   V(jcr_lock);
342 #else
343 #ifdef PTHREAD_ONCE_KEY_NP
344   create_jcr_key();
345 #else
346   int status;
347 
348   status = pthread_once(&key_once, create_jcr_key);
349   if (status != 0) {
350     BErrNo be;
351     Jmsg1(nullptr, M_ABORT, 0, _("pthread_once failed. ERR=%s\n"), be.bstrerror(status));
352   }
353 #endif
354 #endif
355 }
356 
357 /*
358  * Create a Job Control Record and link it into JobControlRecord chain
359  * Returns newly allocated JobControlRecord
360  *
361  * Note, since each daemon has a different JobControlRecord, he passes us the size.
362  */
new_jcr(int size,JCR_free_HANDLER * daemon_free_jcr)363 JobControlRecord *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr)
364 {
365   JobControlRecord *jcr;
366   MessageQeueItem *item = nullptr;
367   struct sigaction sigtimer;
368   int status;
369 
370   Dmsg0(debuglevel, "Enter new_jcr\n");
371 
372   setup_tsd_key();
373 
374   jcr = (JobControlRecord *)malloc(size);
375   memset(jcr, 0, size);
376   jcr = new (jcr) JobControlRecord();
377 
378   jcr->msg_queue = New(dlist(item, &item->link));
379   if ((status = pthread_mutex_init(&jcr->msg_queue_mutex, nullptr)) != 0) {
380     BErrNo be;
381     Jmsg(nullptr, M_ABORT, 0, _("Could not init msg_queue mutex. ERR=%s\n"), be.bstrerror(status));
382   }
383 
384   jcr->my_thread_id = pthread_self();
385   jcr->job_end_callbacks.init(1, false);
386   jcr->sched_time         = time(nullptr);
387   jcr->initial_sched_time = jcr->sched_time;
388   jcr->daemon_free_jcr    = daemon_free_jcr; /* plug daemon free routine */
389   jcr->InitMutex();
390   jcr->IncUseCount();
391   jcr->VolumeName    = GetPoolMemory(PM_FNAME);
392   jcr->VolumeName[0] = 0;
393   jcr->errmsg        = GetPoolMemory(PM_MESSAGE);
394   jcr->errmsg[0]     = 0;
395   jcr->comment       = GetPoolMemory(PM_FNAME);
396   jcr->comment[0]    = 0;
397 
398   /*
399    * Setup some dummy values
400    */
401   bstrncpy(jcr->Job, "*System*", sizeof(jcr->Job));
402   jcr->JobId = 0;
403   jcr->setJobType(JT_SYSTEM); /* internal job until defined */
404   jcr->setJobLevel(L_NONE);
405   jcr->setJobStatus(JS_Created); /* ready to run */
406   sigtimer.sa_flags   = 0;
407   sigtimer.sa_handler = TimeoutHandler;
408   sigfillset(&sigtimer.sa_mask);
409   sigaction(TIMEOUT_SIGNAL, &sigtimer, nullptr);
410 
411   /*
412    * Locking jobs is a global lock that is needed
413    * so that the Director can stop new jobs from being
414    * added to the jcr chain while it processes a new
415    * conf file and does the RegisterJobEndCallback().
416    */
417   LockJobs();
418   lock_jcr_chain();
419   if (!job_control_record_chain) { job_control_record_chain = New(dlist(jcr, &jcr->link)); }
420   job_control_record_chain->append(jcr);
421   unlock_jcr_chain();
422   UnlockJobs();
423 
424   return jcr;
425 }
426 
427 /*
428  * Remove a JobControlRecord from the chain
429  *
430  * NOTE! The chain must be locked prior to calling this routine.
431  */
RemoveJcr(JobControlRecord * jcr)432 static void RemoveJcr(JobControlRecord *jcr)
433 {
434   Dmsg0(debuglevel, "Enter RemoveJcr\n");
435   if (!jcr) { Emsg0(M_ABORT, 0, _("nullptr jcr.\n")); }
436   job_control_record_chain->remove(jcr);
437   Dmsg0(debuglevel, "Leave RemoveJcr\n");
438 }
439 
440 /*
441  * Free stuff common to all JCRs.  N.B. Be careful to include only
442  * generic stuff in the common part of the jcr.
443  */
FreeCommonJcr(JobControlRecord * jcr)444 static void FreeCommonJcr(JobControlRecord *jcr)
445 {
446   Dmsg1(100, "FreeCommonJcr: %p \n", jcr);
447 
448   if (!jcr) { Dmsg0(100, "FreeCommonJcr: Invalid jcr\n"); }
449 
450   /*
451    * Uses jcr lock/unlock
452    */
453   RemoveJcrFromTsd(jcr);
454   jcr->SetKillable(false);
455 
456   jcr->DestroyMutex();
457 
458   if (jcr->msg_queue) {
459     delete jcr->msg_queue;
460     jcr->msg_queue = nullptr;
461     pthread_mutex_destroy(&jcr->msg_queue_mutex);
462   }
463 
464   if (jcr->client_name) {
465     FreePoolMemory(jcr->client_name);
466     jcr->client_name = nullptr;
467   }
468 
469   if (jcr->attr) {
470     FreePoolMemory(jcr->attr);
471     jcr->attr = nullptr;
472   }
473 
474   if (jcr->sd_auth_key) {
475     free(jcr->sd_auth_key);
476     jcr->sd_auth_key = nullptr;
477   }
478 
479   if (jcr->VolumeName) {
480     FreePoolMemory(jcr->VolumeName);
481     jcr->VolumeName = nullptr;
482   }
483 
484   if (jcr->dir_bsock) {
485     jcr->dir_bsock->close();
486     delete jcr->dir_bsock;
487     jcr->dir_bsock = nullptr;
488   }
489 
490   if (jcr->errmsg) {
491     FreePoolMemory(jcr->errmsg);
492     jcr->errmsg = nullptr;
493   }
494 
495   if (jcr->where) {
496     free(jcr->where);
497     jcr->where = nullptr;
498   }
499 
500   if (jcr->RegexWhere) {
501     free(jcr->RegexWhere);
502     jcr->RegexWhere = nullptr;
503   }
504 
505   if (jcr->where_bregexp) {
506     FreeBregexps(jcr->where_bregexp);
507     delete jcr->where_bregexp;
508     jcr->where_bregexp = nullptr;
509   }
510 
511   if (jcr->cached_path) {
512     FreePoolMemory(jcr->cached_path);
513     jcr->cached_path = nullptr;
514     jcr->cached_pnl  = 0;
515   }
516 
517   if (jcr->id_list) {
518     FreeGuidList(jcr->id_list);
519     jcr->id_list = nullptr;
520   }
521 
522   if (jcr->comment) {
523     FreePoolMemory(jcr->comment);
524     jcr->comment = nullptr;
525   }
526 
527   free(jcr);
528 }
529 
530 /*
531  * Global routine to free a jcr
532  */
533 #ifdef DEBUG
b_free_jcr(const char * file,int line,JobControlRecord * jcr)534 void b_free_jcr(const char *file, int line, JobControlRecord *jcr)
535 {
536   struct s_last_job *je;
537 
538   Dmsg3(debuglevel, "Enter FreeJcr jid=%u from %s:%d\n", jcr->JobId, file, line);
539 
540 #else
541 
542 void FreeJcr(JobControlRecord *jcr)
543 {
544   struct s_last_job *je;
545 
546   Dmsg3(debuglevel, "Enter FreeJcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
547 
548 #endif
549 
550   lock_jcr_chain();
551   jcr->DecUseCount(); /* decrement use count */
552   if (jcr->UseCount() < 0) {
553     Jmsg2(jcr, M_ERROR, 0, _("JobControlRecord UseCount=%d JobId=%d\n"), jcr->UseCount(), jcr->JobId);
554   }
555   if (jcr->JobId > 0) {
556     Dmsg3(debuglevel, "Dec FreeJcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
557   }
558   if (jcr->UseCount() > 0) { /* if in use */
559     unlock_jcr_chain();
560     return;
561   }
562   if (jcr->JobId > 0) {
563     Dmsg3(debuglevel, "remove jcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
564   }
565   RemoveJcr(jcr); /* remove Jcr from chain */
566   unlock_jcr_chain();
567 
568   DequeueMessages(jcr);
569   CallJobEndCallbacks(jcr); /* call registered callbacks */
570 
571   Dmsg1(debuglevel, "End job=%d\n", jcr->JobId);
572 
573   /*
574    * Keep some statistics
575    */
576   switch (jcr->getJobType()) {
577     case JT_BACKUP:
578     case JT_VERIFY:
579     case JT_RESTORE:
580     case JT_MIGRATE:
581     case JT_COPY:
582     case JT_ADMIN:
583       /*
584        * Keep list of last jobs, but not Console where JobId==0
585        */
586       if (jcr->JobId > 0) {
587         LockLastJobsList();
588         num_jobs_run++;
589         je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
590         memset(je, 0, sizeof(struct s_last_job)); /* zero in case unset fields */
591         je->Errors         = jcr->JobErrors;
592         je->JobType        = jcr->getJobType();
593         je->JobId          = jcr->JobId;
594         je->VolSessionId   = jcr->VolSessionId;
595         je->VolSessionTime = jcr->VolSessionTime;
596         bstrncpy(je->Job, jcr->Job, sizeof(je->Job));
597         je->JobFiles   = jcr->JobFiles;
598         je->JobBytes   = jcr->JobBytes;
599         je->JobStatus  = jcr->JobStatus;
600         je->JobLevel   = jcr->getJobLevel();
601         je->start_time = jcr->start_time;
602         je->end_time   = time(nullptr);
603 
604         if (!last_jobs) { InitLastJobsList(); }
605         last_jobs->append(je);
606         if (last_jobs->size() > max_last_jobs) {
607           je = (struct s_last_job *)last_jobs->first();
608           last_jobs->remove(je);
609           free(je);
610         }
611         UnlockLastJobsList();
612       }
613       break;
614     default:
615       break;
616   }
617 
618   CloseMsg(jcr); /* close messages for this job */
619 
620   if (jcr->daemon_free_jcr) { jcr->daemon_free_jcr(jcr); /* call daemon free routine */ }
621 
622   FreeCommonJcr(jcr);
623   CloseMsg(nullptr); /* flush any daemon messages */
624   Dmsg0(debuglevel, "Exit FreeJcr\n");
625 }
626 
627 void JobControlRecord::SetKillable(bool killable)
628 {
629   lock();
630 
631   my_thread_killable = killable;
632   if (killable) {
633     my_thread_id = pthread_self();
634   } else {
635     memset(&my_thread_id, 0, sizeof(my_thread_id));
636   }
637 
638   unlock();
639 }
640 
641 void JobControlRecord::MyThreadSendSignal(int sig)
642 {
643   lock();
644 
645   if (IsKillable() && !pthread_equal(my_thread_id, pthread_self())) {
646     Dmsg1(800, "Send kill to jid=%d\n", JobId);
647     pthread_kill(my_thread_id, sig);
648   } else if (!IsKillable()) {
649     Dmsg1(10, "Warning, can't send kill to jid=%d\n", JobId);
650   }
651 
652   unlock();
653 }
654 
655 /*
656  * Remove jcr from thread specific data, but but make sure it is us who are attached.
657  */
658 void RemoveJcrFromTsd(JobControlRecord *jcr)
659 {
660   JobControlRecord *tjcr = get_jcr_from_tsd();
661 
662   if (tjcr == jcr) { SetJcrInTsd(INVALID_JCR); }
663 }
664 
665 /*
666  * Put this jcr in the thread specifc data
667  */
668 void SetJcrInTsd(JobControlRecord *jcr)
669 {
670   int status;
671 
672   status = pthread_setspecific(jcr_key, (void *)jcr);
673   if (status != 0) {
674     BErrNo be;
675     Jmsg1(jcr, M_ABORT, 0, _("pthread_setspecific failed: ERR=%s\n"), be.bstrerror(status));
676   }
677 }
678 
679 /*
680  * Give me the jcr that is attached to this thread
681  */
682 JobControlRecord *get_jcr_from_tsd()
683 {
684   JobControlRecord *jcr = (JobControlRecord *)pthread_getspecific(jcr_key);
685 
686   /*
687    * Set any INVALID_JCR to nullptr which the rest of BAREOS understands
688    */
689   if (jcr == INVALID_JCR) { jcr = nullptr; }
690 
691   return jcr;
692 }
693 
694 /*
695  * Find which JobId corresponds to the current thread
696  */
697 uint32_t GetJobidFromTsd()
698 {
699   JobControlRecord *jcr = (JobControlRecord *)pthread_getspecific(jcr_key);
700   uint32_t JobId        = 0;
701 
702   if (jcr && jcr != INVALID_JCR) { JobId = (uint32_t)jcr->JobId; }
703 
704   return JobId;
705 }
706 
707 /*
708  * Given a JobId, find the JobControlRecord
709  *
710  * Returns: jcr on success
711  *          nullptr on failure
712  */
713 JobControlRecord *get_jcr_by_id(uint32_t JobId)
714 {
715   JobControlRecord *jcr;
716 
717   foreach_jcr (jcr) {
718     if (jcr->JobId == JobId) {
719       jcr->IncUseCount();
720       Dmsg3(debuglevel, "Inc get_jcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
721       break;
722     }
723   }
724   endeach_jcr(jcr);
725 
726   return jcr;
727 }
728 
729 /*
730  * Given a thread id, find the JobId
731  *
732  * Returns: JobId on success
733  *          0 on failure
734  */
735 uint32_t GetJobidFromTid(pthread_t tid)
736 {
737   JobControlRecord *jcr = nullptr;
738   bool found            = false;
739 
740   foreach_jcr (jcr) {
741     if (pthread_equal(jcr->my_thread_id, tid)) {
742       found = true;
743       break;
744     }
745   }
746   endeach_jcr(jcr);
747 
748   if (found) { return jcr->JobId; }
749 
750   return 0;
751 }
752 
753 /*
754  * Given a SessionId and SessionTime, find the JobControlRecord
755  *
756  * Returns: jcr on success
757  *          nullptr on failure
758  */
759 JobControlRecord *get_jcr_by_session(uint32_t SessionId, uint32_t SessionTime)
760 {
761   JobControlRecord *jcr;
762 
763   foreach_jcr (jcr) {
764     if (jcr->VolSessionId == SessionId && jcr->VolSessionTime == SessionTime) {
765       jcr->IncUseCount();
766       Dmsg3(debuglevel, "Inc get_jcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
767       break;
768     }
769   }
770   endeach_jcr(jcr);
771 
772   return jcr;
773 }
774 
775 /*
776  * Given a Job, find the JobControlRecord compares on the number of
777  * characters in Job thus allowing partial matches.
778  *
779  * Returns: jcr on success
780  *          nullptr on failure
781  */
782 JobControlRecord *get_jcr_by_partial_name(char *Job)
783 {
784   JobControlRecord *jcr;
785   int len;
786 
787   if (!Job) { return nullptr; }
788 
789   len = strlen(Job);
790   foreach_jcr (jcr) {
791     if (bstrncmp(Job, jcr->Job, len)) {
792       jcr->IncUseCount();
793       Dmsg3(debuglevel, "Inc get_jcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
794       break;
795     }
796   }
797   endeach_jcr(jcr);
798 
799   return jcr;
800 }
801 
802 /*
803  * Given a Job, find the JobControlRecord requires an exact match of names.
804  *
805  * Returns: jcr on success
806  *          nullptr on failure
807  */
808 JobControlRecord *get_jcr_by_full_name(char *Job)
809 {
810   JobControlRecord *jcr;
811 
812   if (!Job) { return nullptr; }
813 
814   foreach_jcr (jcr) {
815     if (bstrcmp(jcr->Job, Job)) {
816       jcr->IncUseCount();
817       Dmsg3(debuglevel, "Inc get_jcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
818       break;
819     }
820   }
821   endeach_jcr(jcr);
822 
823   return jcr;
824 }
825 
826 const char *JcrGetAuthenticateKey(const char *unified_job_name)
827 {
828   if (!unified_job_name) { return nullptr; }
829 
830   JobControlRecord *jcr;
831   const char *auth_key = nullptr;
832   foreach_jcr (jcr) {
833     if (bstrcmp(jcr->Job, unified_job_name)) {
834       auth_key = jcr->sd_auth_key;
835       Dmsg3(debuglevel, "Inc get_jcr jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
836       break;
837     }
838   }
839   endeach_jcr(jcr);
840 
841   return auth_key;
842 }
843 
844 TlsPolicy JcrGetTlsPolicy(const char *unified_job_name)
845 {
846   if (!unified_job_name) { return kBnetTlsUnknown; }
847 
848   TlsPolicy policy = kBnetTlsUnknown;
849   JobControlRecord *jcr;
850 
851   foreach_jcr (jcr) {
852     if (bstrcmp(jcr->Job, unified_job_name)) {
853       policy = jcr->sd_tls_policy;
854       Dmsg4(debuglevel, "Inc get_jcr jid=%u UseCount=%d Job=%s TlsPolicy=%d\n",
855             jcr->JobId, jcr->UseCount(), jcr->Job, policy);
856       break;
857     }
858   }
859   endeach_jcr(jcr);
860 
861   return policy;
862 }
863 
864 static void UpdateWaitTime(JobControlRecord *jcr, int newJobStatus)
865 {
866   bool enter_in_waittime;
867   int oldJobStatus = jcr->JobStatus;
868 
869   switch (newJobStatus) {
870     case JS_WaitFD:
871     case JS_WaitSD:
872     case JS_WaitMedia:
873     case JS_WaitMount:
874     case JS_WaitStoreRes:
875     case JS_WaitJobRes:
876     case JS_WaitClientRes:
877     case JS_WaitMaxJobs:
878     case JS_WaitPriority:
879       enter_in_waittime = true;
880       break;
881     default:
882       enter_in_waittime = false; /* not a Wait situation */
883       break;
884   }
885 
886   /*
887    * If we were previously waiting and are not any more
888    * we want to update the wait_time variable, which is
889    * the start of waiting.
890    */
891   switch (oldJobStatus) {
892     case JS_WaitFD:
893     case JS_WaitSD:
894     case JS_WaitMedia:
895     case JS_WaitMount:
896     case JS_WaitStoreRes:
897     case JS_WaitJobRes:
898     case JS_WaitClientRes:
899     case JS_WaitMaxJobs:
900     case JS_WaitPriority:
901       if (!enter_in_waittime) { /* we get out the wait time */
902         jcr->wait_time_sum += (time(nullptr) - jcr->wait_time);
903         jcr->wait_time = 0;
904       }
905       break;
906     default:
907       /*
908        * If wait state is new, we keep current time for watchdog MaxWaitTime
909        */
910       if (enter_in_waittime) { jcr->wait_time = time(nullptr); }
911       break;
912   }
913 }
914 
915 /*
916  * Priority runs from 0 (lowest) to 10 (highest)
917  */
918 static int GetStatusPriority(int JobStatus)
919 {
920   int priority = 0;
921 
922   switch (JobStatus) {
923     case JS_Incomplete:
924       priority = 10;
925       break;
926     case JS_ErrorTerminated:
927     case JS_FatalError:
928     case JS_Canceled:
929       priority = 9;
930       break;
931     case JS_Error:
932       priority = 8;
933       break;
934     case JS_Differences:
935       priority = 7;
936       break;
937   }
938 
939   return priority;
940 }
941 
942 /*
943  * Send Job status to Director
944  */
945 bool JobControlRecord::sendJobStatus()
946 {
947   if (dir_bsock) { return dir_bsock->fsend(Job_status, Job, JobStatus); }
948 
949   return true;
950 }
951 
952 /*
953  * Set and send Job status to Director
954  */
955 bool JobControlRecord::sendJobStatus(int newJobStatus)
956 {
957   if (!is_JobStatus(newJobStatus)) {
958     setJobStatus(newJobStatus);
959     if (dir_bsock) { return dir_bsock->fsend(Job_status, Job, JobStatus); }
960   }
961 
962   return true;
963 }
964 
965 void JobControlRecord::setJobStarted()
966 {
967   job_started      = true;
968   job_started_time = time(nullptr);
969 }
970 
971 void JobControlRecord::resetJobStatus(int newJobStatus)
972 {
973   JobStatus = newJobStatus;
974 }
975 
976 void JobControlRecord::setJobStatus(int newJobStatus)
977 {
978   int priority;
979   int old_priority = 0;
980   int oldJobStatus = ' ';
981 
982   if (JobStatus) {
983     oldJobStatus = JobStatus;
984     old_priority = GetStatusPriority(oldJobStatus);
985   }
986   priority = GetStatusPriority(newJobStatus);
987 
988   Dmsg2(800, "SetJcrJobStatus(%s, %c)\n", Job, newJobStatus);
989 
990   /*
991    * Update wait_time depending on newJobStatus and oldJobStatus
992    */
993   UpdateWaitTime(this, newJobStatus);
994 
995   /*
996    * For a set of errors, ... keep the current status
997    * so it isn't lost. For all others, set it.
998    */
999   Dmsg2(800, "OnEntry JobStatus=%c newJobstatus=%c\n", oldJobStatus, newJobStatus);
1000 
1001   /*
1002    * If status priority is > than proposed new status, change it.
1003    * If status priority == new priority and both are zero, take the new status.
1004    * If it is not zero, then we keep the first non-zero "error" that occurred.
1005    */
1006   if (priority > old_priority || (priority == 0 && old_priority == 0)) {
1007     Dmsg4(800, "Set new stat. old: %c,%d new: %c,%d\n", oldJobStatus, old_priority, newJobStatus, priority);
1008     JobStatus = newJobStatus; /* replace with new status */
1009   }
1010 
1011   if (oldJobStatus != JobStatus) {
1012     Dmsg2(800, "leave setJobStatus old=%c new=%c\n", oldJobStatus, newJobStatus);
1013     //    GeneratePluginEvent(this, bEventStatusChange, nullptr);
1014   }
1015 }
1016 
1017 #ifdef TRACE_JCR_CHAIN
1018 static int lock_count = 0;
1019 #endif
1020 
1021 /*
1022  * Lock the chain
1023  */
1024 #ifdef TRACE_JCR_CHAIN
1025 static void b_lock_jcr_chain(const char *fname, int line)
1026 #else
1027 static void lock_jcr_chain()
1028 #endif
1029 {
1030 #ifdef TRACE_JCR_CHAIN
1031   Dmsg3(debuglevel, "Lock jcr chain %d from %s:%d\n", ++lock_count, fname, line);
1032 #endif
1033   P(jcr_lock);
1034 }
1035 
1036 /*
1037  * Unlock the chain
1038  */
1039 #ifdef TRACE_JCR_CHAIN
1040 static void b_unlock_jcr_chain(const char *fname, int line)
1041 #else
1042 static void unlock_jcr_chain()
1043 #endif
1044 {
1045 #ifdef TRACE_JCR_CHAIN
1046   Dmsg3(debuglevel, "Unlock jcr chain %d from %s:%d\n", lock_count--, fname, line);
1047 #endif
1048   V(jcr_lock);
1049 }
1050 
1051 /*
1052  * Start walk of jcr chain
1053  * The proper way to walk the jcr chain is:
1054  *    JobControlRecord *jcr;
1055  *    foreach_jcr(jcr) {
1056  *      ...
1057  *    }
1058  *    endeach_jcr(jcr);
1059  *
1060  * It is possible to leave out the endeach_jcr(jcr), but
1061  * in that case, the last jcr referenced must be explicitly
1062  * released with:
1063  *
1064  * FreeJcr(jcr);
1065  */
1066 JobControlRecord *jcr_walk_start()
1067 {
1068   JobControlRecord *jcr;
1069   lock_jcr_chain();
1070   jcr = (JobControlRecord *)job_control_record_chain->first();
1071   if (jcr) {
1072     jcr->IncUseCount();
1073     if (jcr->JobId > 0) {
1074       Dmsg3(debuglevel, "Inc walk_start jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
1075     }
1076   }
1077   unlock_jcr_chain();
1078   return jcr;
1079 }
1080 
1081 /*
1082  * Get next jcr from chain, and release current one
1083  */
1084 JobControlRecord *jcr_walk_next(JobControlRecord *prev_jcr)
1085 {
1086   JobControlRecord *jcr;
1087 
1088   lock_jcr_chain();
1089   jcr = (JobControlRecord *)job_control_record_chain->next(prev_jcr);
1090   if (jcr) {
1091     jcr->IncUseCount();
1092     if (jcr->JobId > 0) {
1093       Dmsg3(debuglevel, "Inc walk_next jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
1094     }
1095   }
1096   unlock_jcr_chain();
1097   if (prev_jcr) { FreeJcr(prev_jcr); }
1098   return jcr;
1099 }
1100 
1101 /*
1102  * Release last jcr referenced
1103  */
1104 void JcrWalkEnd(JobControlRecord *jcr)
1105 {
1106   if (jcr) {
1107     if (jcr->JobId > 0) {
1108       Dmsg3(debuglevel, "Free walk_end jid=%u UseCount=%d Job=%s\n", jcr->JobId, jcr->UseCount(), jcr->Job);
1109     }
1110     FreeJcr(jcr);
1111   }
1112 }
1113 
1114 /*
1115  * Return number of Jobs
1116  */
1117 int JobCount()
1118 {
1119   JobControlRecord *jcr;
1120   int count = 0;
1121 
1122   lock_jcr_chain();
1123   for (jcr = (JobControlRecord *)job_control_record_chain->first();
1124        (jcr = (JobControlRecord *)job_control_record_chain->next(jcr));) {
1125     if (jcr->JobId > 0) { count++; }
1126   }
1127   unlock_jcr_chain();
1128   return count;
1129 }
1130 
1131 /*
1132  * Setup to call the timeout check routine every 30 seconds
1133  * This routine will check any timers that have been enabled.
1134  */
1135 bool InitJcrSubsystem(int timeout)
1136 {
1137   watchdog_t *wd = new_watchdog();
1138 
1139   watch_dog_timeout = timeout;
1140   wd->one_shot      = false;
1141   wd->interval      = 30; /* FIXME: should be configurable somewhere, even
1142                            if only with a #define */
1143   wd->callback = JcrTimeoutCheck;
1144 
1145   RegisterWatchdog(wd);
1146 
1147   return true;
1148 }
1149 
1150 static void JcrTimeoutCheck(watchdog_t *self)
1151 {
1152   JobControlRecord *jcr;
1153   BareosSocket *bs;
1154   time_t timer_start;
1155 
1156   Dmsg0(debuglevel, "Start JobControlRecord timeout checks\n");
1157 
1158   /* Walk through all JCRs checking if any one is
1159    * blocked for more than specified max time.
1160    */
1161   foreach_jcr (jcr) {
1162     Dmsg2(debuglevel, "JcrTimeoutCheck JobId=%u jcr=0x%x\n", jcr->JobId, jcr);
1163     if (jcr->JobId == 0) { continue; }
1164     bs = jcr->store_bsock;
1165     if (bs) {
1166       timer_start = bs->timer_start;
1167       if (timer_start && (watchdog_time - timer_start) > watch_dog_timeout) {
1168         bs->timer_start = 0; /* turn off timer */
1169         bs->SetTimedOut();
1170         Qmsg(jcr, M_ERROR, 0,
1171              _("Watchdog sending kill after %d secs to thread stalled reading Storage daemon.\n"),
1172              watchdog_time - timer_start);
1173         jcr->MyThreadSendSignal(TIMEOUT_SIGNAL);
1174       }
1175     }
1176     bs = jcr->file_bsock;
1177     if (bs) {
1178       timer_start = bs->timer_start;
1179       if (timer_start && (watchdog_time - timer_start) > watch_dog_timeout) {
1180         bs->timer_start = 0; /* turn off timer */
1181         bs->SetTimedOut();
1182         Qmsg(jcr, M_ERROR, 0,
1183              _("Watchdog sending kill after %d secs to thread stalled reading File daemon.\n"),
1184              watchdog_time - timer_start);
1185         jcr->MyThreadSendSignal(TIMEOUT_SIGNAL);
1186       }
1187     }
1188     bs = jcr->dir_bsock;
1189     if (bs) {
1190       timer_start = bs->timer_start;
1191       if (timer_start && (watchdog_time - timer_start) > watch_dog_timeout) {
1192         bs->timer_start = 0; /* turn off timer */
1193         bs->SetTimedOut();
1194         Qmsg(jcr, M_ERROR, 0, _("Watchdog sending kill after %d secs to thread stalled reading Director.\n"),
1195              watchdog_time - timer_start);
1196         jcr->MyThreadSendSignal(TIMEOUT_SIGNAL);
1197       }
1198     }
1199   }
1200   endeach_jcr(jcr);
1201 
1202   Dmsg0(debuglevel, "Finished JobControlRecord timeout checks\n");
1203 }
1204 
1205 /*
1206  * Return next JobId from comma separated list
1207  *
1208  * Returns:
1209  *   1 if next JobId returned
1210  *   0 if no more JobIds are in list
1211  *  -1 there is an error
1212  */
1213 int GetNextJobidFromList(char **p, uint32_t *JobId)
1214 {
1215   const int maxlen = 30;
1216   char jobid[maxlen + 1];
1217   char *q = *p;
1218 
1219   jobid[0] = 0;
1220   for (int i = 0; i < maxlen; i++) {
1221     if (*q == 0) {
1222       break;
1223     } else if (*q == ',') {
1224       q++;
1225       break;
1226     }
1227     jobid[i]     = *q++;
1228     jobid[i + 1] = 0;
1229   }
1230   if (jobid[0] == 0) {
1231     return 0;
1232   } else if (!Is_a_number(jobid)) {
1233     return -1; /* error */
1234   }
1235   *p     = q;
1236   *JobId = str_to_int64(jobid);
1237   return 1;
1238 }
1239 
1240 /*
1241  * Timeout signal comes here
1242  */
1243 extern "C" void TimeoutHandler(int sig) { return; /* thus interrupting the function */ }
1244 
1245 /*
1246  * Used to display specific daemon information after a fatal signal
1247  * (like BareosDb in the director)
1248  */
1249 #define MAX_DBG_HOOK 10
1250 static dbg_jcr_hook_t *dbg_jcr_hooks[MAX_DBG_HOOK];
1251 static int dbg_jcr_handler_count;
1252 
1253 void DbgJcrAddHook(dbg_jcr_hook_t *hook)
1254 {
1255   ASSERT(dbg_jcr_handler_count < MAX_DBG_HOOK);
1256   dbg_jcr_hooks[dbg_jcr_handler_count++] = hook;
1257 }
1258 
1259 /*
1260  * !!! WARNING !!!
1261  *
1262  * This function should be used ONLY after a fatal signal. We walk through the
1263  * JobControlRecord chain without doing any lock, BAREOS should not be running.
1264  */
1265 void DbgPrintJcr(FILE *fp)
1266 {
1267   char ed1[50], buf1[128], buf2[128], buf3[128], buf4[128];
1268   if (!job_control_record_chain) { return; }
1269 
1270   fprintf(fp, "Attempt to dump current JCRs. njcrs=%d\n", job_control_record_chain->size());
1271 
1272   for (JobControlRecord *jcr = (JobControlRecord *)job_control_record_chain->first(); jcr;
1273        jcr                   = (JobControlRecord *)job_control_record_chain->next(jcr)) {
1274     fprintf(fp, "threadid=%s JobId=%d JobStatus=%c jcr=%p name=%s\n",
1275             edit_pthread(jcr->my_thread_id, ed1, sizeof(ed1)), (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job);
1276     fprintf(fp, "threadid=%s killable=%d JobId=%d JobStatus=%c jcr=%p name=%s\n",
1277             edit_pthread(jcr->my_thread_id, ed1, sizeof(ed1)), jcr->IsKillable(), (int)jcr->JobId,
1278             jcr->JobStatus, jcr, jcr->Job);
1279     fprintf(fp, "\tUseCount=%i\n", jcr->UseCount());
1280     fprintf(fp, "\tJobType=%c JobLevel=%c\n", jcr->getJobType(), jcr->getJobLevel());
1281     bstrftime(buf1, sizeof(buf1), jcr->sched_time);
1282     bstrftime(buf2, sizeof(buf2), jcr->start_time);
1283     bstrftime(buf3, sizeof(buf3), jcr->end_time);
1284     bstrftime(buf4, sizeof(buf4), jcr->wait_time);
1285     fprintf(fp, "\tsched_time=%s start_time=%s\n\tend_time=%s wait_time=%s\n", buf1, buf2, buf3, buf4);
1286     fprintf(fp, "\tdb=%p db_batch=%p batch_started=%i\n", jcr->db, jcr->db_batch, jcr->batch_started);
1287 
1288     /*
1289      * Call all the jcr debug hooks
1290      */
1291     for (int i = 0; i < dbg_jcr_handler_count; i++) {
1292       dbg_jcr_hook_t *hook = dbg_jcr_hooks[i];
1293       hook(jcr, fp);
1294     }
1295   }
1296 }
1297