1 /*
2    Bacula(R) - The Network Backup Solution
3 
4    Copyright (C) 2000-2020 Kern Sibbald
5 
6    The original author of Bacula is Kern Sibbald, with contributions
7    from many others, a complete list can be found in the file AUTHORS.
8 
9    You may use this file and others of this release according to the
10    license defined in the LICENSE file, which includes the Affero General
11    Public License, v3.0 ("AGPLv3") and some additional permissions and
12    terms pursuant to its AGPLv3 Section 7.
13 
14    This notice must be preserved when any source code is
15    conveyed and/or propagated.
16 
17    Bacula(R) is a registered trademark of Kern Sibbald.
18 */
19 /*
20  *
21  *  Dumb program to extract files from a Bacula backup.
22  *
23  *   Kern E. Sibbald, MM
24  */
25 
26 #include "bacula.h"
27 #include "stored.h"
28 #include "ch.h"
29 #include "findlib/find.h"
30 
31 #ifdef HAVE_LZO
32 #include <lzo/lzoconf.h>
33 #include <lzo/lzo1x.h>
34 #endif
35 
36 extern bool parse_sd_config(CONFIG *config, const char *configfile, int exit_code);
37 
38 static void do_extract(char *fname);
39 static bool record_cb(DCR *dcr, DEV_RECORD *rec);
40 
41 static DEVICE *dev = NULL;
42 static DCR *dcr;
43 static BFILE bfd;
44 static JCR *jcr;
45 static FF_PKT *ff;
46 static BSR *bsr = NULL;
47 static bool extract = false;
48 static int non_support_data = 0;
49 static long total = 0;
50 static ATTR *attr;
51 static POOLMEM *curr_fname;
52 static char *where;
53 static uint64_t num_errors = 0;
54 static uint64_t num_records = 0;
55 static uint32_t num_files = 0;
56 static uint32_t compress_buf_size = 70000;
57 static POOLMEM *compress_buf;
58 static int prog_name_msg = 0;
59 static int win32_data_msg = 0;
60 static char *VolumeName = NULL;
61 
62 static char *wbuf;                    /* write buffer address */
63 static uint32_t wsize;                /* write size */
64 static uint64_t fileAddr = 0;         /* file write address */
65 
66 static CONFIG *config;
67 #define CONFIG_FILE "bacula-sd.conf"
68 
69 void *start_heap;
70 char *configfile = NULL;
71 bool skip_extract = false;
72 
usage()73 static void usage()
74 {
75    fprintf(stderr, _(
76 PROG_COPYRIGHT
77 "\n%sVersion: %s (%s)\n\n"
78 "Usage: bextract <options> <bacula-archive-device-name> <directory-to-store-files>\n"
79 "       -b <file>       specify a bootstrap file\n"
80 "       -c <file>       specify a Storage configuration file\n"
81 "       -d <nn>         set debug level to <nn>\n"
82 "       -dt             print timestamp in debug output\n"
83 "       -T              send debug traces to trace file (stored in /tmp)\n"
84 "       -e <file>       exclude list\n"
85 "       -i <file>       include list\n"
86 "       -p              proceed inspite of I/O errors\n"
87 "       -t              read data from volume, do not write anything\n"
88 "       -v              verbose\n"
89 "       -V <volumes>    specify Volume names (separated by |)\n"
90 "       -?              print this message\n\n"), 2000, "", VERSION, BDATE);
91    exit(1);
92 }
93 
94 
main(int argc,char * argv[])95 int main (int argc, char *argv[])
96 {
97    int ch;
98    FILE *fd;
99    char line[1000];
100    bool got_inc = false;
101    BtoolsAskDirHandler askdir_handler;
102 
103    init_askdir_handler(&askdir_handler);
104    setlocale(LC_ALL, "");
105    bindtextdomain("bacula", LOCALEDIR);
106    textdomain("bacula");
107    init_stack_dump();
108    lmgr_init_thread();
109 
110    working_directory = "/tmp";
111    my_name_is(argc, argv, "bextract");
112    init_msg(NULL, NULL);              /* setup message handler */
113 
114    OSDependentInit();
115 
116    ff = init_find_files();
117    binit(&bfd);
118 
119    while ((ch = getopt(argc, argv, "Ttb:c:d:e:i:pvV:?")) != -1) {
120       switch (ch) {
121       case 't':
122          skip_extract = true;
123          break;
124 
125       case 'b':                    /* bootstrap file */
126          bsr = parse_bsr(NULL, optarg);
127          break;
128 
129       case 'T':                 /* Send debug to trace file */
130          set_trace(1);
131          break;
132 
133       case 'c':                    /* specify config file */
134          if (configfile != NULL) {
135             free(configfile);
136          }
137          configfile = bstrdup(optarg);
138          break;
139 
140       case 'd':                    /* debug level */
141          if (*optarg == 't') {
142             dbg_timestamp = true;
143          } else {
144             char *p;
145             /* We probably find a tag list -d 10,sql,bvfs */
146             if ((p = strchr(optarg, ',')) != NULL) {
147                *p = 0;
148             }
149             debug_level = atoi(optarg);
150             if (debug_level <= 0) {
151                debug_level = 1;
152             }
153             if (p) {
154                debug_parse_tags(p+1, &debug_level_tags);
155             }
156          }
157          break;
158 
159       case 'e':                    /* exclude list */
160          if ((fd = bfopen(optarg, "rb")) == NULL) {
161             berrno be;
162             Pmsg2(0, _("Could not open exclude file: %s, ERR=%s\n"),
163                optarg, be.bstrerror());
164             exit(1);
165          }
166          while (fgets(line, sizeof(line), fd) != NULL) {
167             strip_trailing_junk(line);
168             Dmsg1(900, "add_exclude %s\n", line);
169             add_fname_to_exclude_list(ff, line);
170          }
171          fclose(fd);
172          break;
173 
174       case 'i':                    /* include list */
175          if ((fd = bfopen(optarg, "rb")) == NULL) {
176             berrno be;
177             Pmsg2(0, _("Could not open include file: %s, ERR=%s\n"),
178                optarg, be.bstrerror());
179             exit(1);
180          }
181          while (fgets(line, sizeof(line), fd) != NULL) {
182             strip_trailing_junk(line);
183             Dmsg1(900, "add_include %s\n", line);
184             add_fname_to_include_list(ff, 0, line);
185          }
186          fclose(fd);
187          got_inc = true;
188          break;
189 
190       case 'p':
191          forge_on = true;
192          break;
193 
194       case 'v':
195          verbose++;
196          break;
197 
198       case 'V':                    /* Volume name */
199          VolumeName = optarg;
200          break;
201 
202       case '?':
203       default:
204          usage();
205 
206       } /* end switch */
207    } /* end while */
208    argc -= optind;
209    argv += optind;
210 
211    if (argc != 2) {
212       Pmsg0(0, _("Wrong number of arguments: \n"));
213       usage();
214    }
215 
216    if (configfile == NULL) {
217       configfile = bstrdup(CONFIG_FILE);
218    }
219 
220    config = New(CONFIG());
221    parse_sd_config(config, configfile, M_ERROR_TERM);
222    setup_me();
223    load_sd_plugins(me->plugin_directory);
224 
225    if (!got_inc) {                            /* If no include file, */
226       add_fname_to_include_list(ff, 0, "/");  /*   include everything */
227    }
228 
229    where = argv[1];
230    do_extract(argv[0]);
231 
232    if (bsr) {
233       free_bsr(bsr);
234    }
235    if (prog_name_msg) {
236       Pmsg1(000, _("%d Program Name and/or Program Data Stream records ignored.\n"),
237          prog_name_msg);
238    }
239    if (win32_data_msg) {
240       Pmsg1(000, _("%d Win32 data or Win32 gzip data stream records. Ignored.\n"),
241          win32_data_msg);
242    }
243    term_include_exclude_files(ff);
244    term_find_files(ff);
245    return 0;
246 }
247 
do_extract(char * devname)248 static void do_extract(char *devname)
249 {
250    char ed1[50];
251    struct stat statp;
252 
253    enable_backup_privileges(NULL, 1);
254 
255    jcr = setup_jcr("bextract", devname, bsr, VolumeName, SD_READ, true/*read dedup data*/);
256    if (!jcr) {
257       exit(1);
258    }
259    dev = jcr->read_dcr->dev;
260    if (!dev) {
261       exit(1);
262    }
263    dcr = jcr->read_dcr;
264 
265    /* Make sure where directory exists and that it is a directory */
266    if (stat(where, &statp) < 0) {
267       berrno be;
268       Emsg2(M_ERROR_TERM, 0, _("Cannot stat %s. It must exist. ERR=%s\n"),
269          where, be.bstrerror());
270    }
271    if (!S_ISDIR(statp.st_mode)) {
272       Emsg1(M_ERROR_TERM, 0, _("%s must be a directory.\n"), where);
273    }
274 
275    free(jcr->where);
276    jcr->where = bstrdup(where);
277    attr = new_attr(jcr);
278 
279    compress_buf = get_memory(compress_buf_size);
280    curr_fname = get_pool_memory(PM_FNAME);
281    *curr_fname = 0;
282 
283    read_records(dcr, record_cb, mount_next_read_volume);
284    /* If output file is still open, it was the last one in the
285     * archive since we just hit an end of file, so close the file.
286     */
287    if (is_bopen(&bfd)) {
288       set_attributes(jcr, attr, &bfd);
289    }
290    release_device(dcr);
291    free_attr(attr);
292    free_jcr(jcr);
293    dev->term(NULL);
294    free_pool_memory(curr_fname);
295 
296    printf(_("%u files restored.\n"), num_files);
297    if (num_errors) {
298       printf(_("Found %s error%s\n"), edit_uint64(num_errors, ed1), num_errors>1? "s":"");
299    }
300    return;
301 }
302 
store_data(BFILE * bfd,char * data,const int32_t length)303 static bool store_data(BFILE *bfd, char *data, const int32_t length)
304 {
305    if (is_win32_stream(attr->data_stream) && !have_win32_api()) {
306       set_portable_backup(bfd);
307       if (!processWin32BackupAPIBlock(bfd, data, length)) {
308          berrno be;
309          Emsg2(M_ERROR_TERM, 0, _("Write error on %s: %s\n"),
310                attr->ofname, be.bstrerror());
311          return false;
312       }
313    } else if (bwrite(bfd, data, length) != (ssize_t)length) {
314       berrno be;
315       Emsg2(M_ERROR_TERM, 0, _("Write error on %s: %s\n"),
316             attr->ofname, be.bstrerror());
317       return false;
318    }
319 
320    return true;
321 }
322 
323 /*
324  * Called here for each record from read_records()
325  */
record_cb(DCR * dcr,DEV_RECORD * rec)326 static bool record_cb(DCR *dcr, DEV_RECORD *rec)
327 {
328    int stat, ret=true;
329    JCR *jcr = dcr->jcr;
330    char ed1[50];
331 
332    bool     restoredatap = false;
333    POOLMEM *orgdata = NULL;
334    uint32_t orgdata_len = 0;
335 
336    if (rec->FileIndex < 0) {
337       return true;                    /* we don't want labels */
338    }
339 
340    /* In this mode, we do not create any file on disk, just read
341     * everything from the volume.
342     */
343    if (skip_extract) {
344       switch (rec->maskedStream) {
345       case STREAM_UNIX_ATTRIBUTES:
346       case STREAM_UNIX_ATTRIBUTES_EX:
347          if (!unpack_attributes_record(jcr, rec->Stream, rec->data, rec->data_len, attr)) {
348             Emsg0(M_ERROR_TERM, 0, _("Cannot continue.\n"));
349          }
350          if (verbose) {
351             attr->data_stream = decode_stat(attr->attr, &attr->statp, sizeof(attr->statp), &attr->LinkFI);
352             build_attr_output_fnames(jcr, attr);
353             print_ls_output(jcr, attr);
354          }
355          pm_strcpy(curr_fname, attr->fname);
356          num_files++;
357          break;
358       }
359       num_records++;
360 
361       /* We display some progress information if verbose not set or set to 2 */
362       if (verbose != 1 && (num_records % 200000) == 0L) {
363          fprintf(stderr, "\rfiles=%d records=%s\n", num_files, edit_uint64(num_records, ed1));
364       }
365       ret = true;
366       goto bail_out;
367    }
368 
369    /* File Attributes stream */
370 
371    switch (rec->maskedStream) {
372    case STREAM_UNIX_ATTRIBUTES:
373    case STREAM_UNIX_ATTRIBUTES_EX:
374 
375       /* If extracting, it was from previous stream, so
376        * close the output file.
377        */
378       if (extract) {
379          if (!is_bopen(&bfd)) {
380             Emsg0(M_ERROR, 0, _("Logic error output file should be open but is not.\n"));
381          }
382          set_attributes(jcr, attr, &bfd);
383          extract = false;
384       }
385 
386       if (!unpack_attributes_record(jcr, rec->Stream, rec->data, rec->data_len, attr)) {
387          Emsg0(M_ERROR_TERM, 0, _("Cannot continue.\n"));
388       }
389 
390       /* Keep the name of the current file if we find a bad block */
391       pm_strcpy(curr_fname, attr->fname);
392 
393       if (file_is_included(ff, attr->fname) && !file_is_excluded(ff, attr->fname)) {
394          attr->data_stream = decode_stat(attr->attr, &attr->statp, sizeof(attr->statp), &attr->LinkFI);
395          if (!is_restore_stream_supported(attr->data_stream)) {
396             if (!non_support_data++) {
397                Jmsg(jcr, M_ERROR, 0, _("%s stream not supported on this Client.\n"),
398                   stream_to_ascii(attr->data_stream));
399             }
400             extract = false;
401             goto bail_out;
402          }
403 
404          build_attr_output_fnames(jcr, attr);
405 
406          if (attr->type == FT_DELETED) { /* TODO: choose the right fname/ofname */
407             Jmsg(jcr, M_INFO, 0, _("%s was deleted.\n"), attr->fname);
408             extract = false;
409             goto bail_out;
410          }
411 
412          extract = false;
413          stat = create_file(jcr, attr, &bfd, REPLACE_ALWAYS);
414 
415          switch (stat) {
416          case CF_ERROR:
417          case CF_SKIP:
418             break;
419          case CF_EXTRACT:
420             extract = true;
421             print_ls_output(jcr, attr);
422             num_files++;
423             fileAddr = 0;
424             break;
425          case CF_CREATED:
426             set_attributes(jcr, attr, &bfd);
427             print_ls_output(jcr, attr);
428             num_files++;
429             fileAddr = 0;
430             break;
431          }
432       }
433       break;
434 
435    case STREAM_RESTORE_OBJECT:
436       /* nothing to do */
437       break;
438 
439    /* Data stream and extracting */
440    case STREAM_FILE_DATA:
441    case STREAM_SPARSE_DATA:
442    case STREAM_WIN32_DATA:
443       if (extract) {
444          if (rec->maskedStream == STREAM_SPARSE_DATA) {
445             ser_declare;
446             uint64_t faddr;
447             wbuf = rec->data + OFFSET_FADDR_SIZE;
448             wsize = rec->data_len - OFFSET_FADDR_SIZE;
449             ser_begin(rec->data, OFFSET_FADDR_SIZE);
450             unser_uint64(faddr);
451             /* We seek only for real SPARSE data, not for OFFSET option */
452             if ((rec->Stream & STREAM_BIT_OFFSETS) == 0 && fileAddr != faddr) {
453                fileAddr = faddr;
454                if (blseek(&bfd, (boffset_t)fileAddr, SEEK_SET) < 0) {
455                   berrno be;
456                   Emsg3(M_ERROR_TERM, 0, _("Seek error Addr=%llu on %s: %s\n"),
457                      fileAddr, attr->ofname, be.bstrerror());
458                }
459             }
460          } else {
461             wbuf = rec->data;
462             wsize = rec->data_len;
463          }
464          total += wsize;
465          Dmsg2(8, "Write %u bytes, total=%u\n", wsize, total);
466          store_data(&bfd, wbuf, wsize);
467          fileAddr += wsize;
468       }
469       break;
470 
471    /* GZIP data stream */
472    case STREAM_GZIP_DATA:
473    case STREAM_SPARSE_GZIP_DATA:
474    case STREAM_WIN32_GZIP_DATA:
475 #ifdef HAVE_LIBZ
476       if (extract) {
477          uLong compress_len = compress_buf_size;
478          int stat = Z_BUF_ERROR;
479 
480          if (rec->maskedStream == STREAM_SPARSE_GZIP_DATA) {
481             ser_declare;
482             uint64_t faddr;
483             char ec1[50];
484             wbuf = rec->data + OFFSET_FADDR_SIZE;
485             wsize = rec->data_len - OFFSET_FADDR_SIZE;
486             ser_begin(rec->data, OFFSET_FADDR_SIZE);
487             unser_uint64(faddr);
488             if ((rec->Stream & STREAM_BIT_OFFSETS) == 0 && fileAddr != faddr) {
489                fileAddr = faddr;
490                if (blseek(&bfd, (boffset_t)fileAddr, SEEK_SET) < 0) {
491                   berrno be;
492                   Emsg3(M_ERROR, 0, _("Seek to %s error on %s: ERR=%s\n"),
493                      edit_uint64(fileAddr, ec1), attr->ofname, be.bstrerror());
494                   extract = false;
495                   goto bail_out;
496                }
497             }
498          } else {
499             wbuf = rec->data;
500             wsize = rec->data_len;
501          }
502 
503          while (compress_len < 10000000 && (stat=uncompress((Byte *)compress_buf, &compress_len,
504                                  (const Byte *)wbuf, (uLong)wsize)) == Z_BUF_ERROR) {
505             /* The buffer size is too small, try with a bigger one */
506             compress_len = 2 * compress_len;
507             compress_buf = check_pool_memory_size(compress_buf,
508                                                   compress_len);
509          }
510          if (stat != Z_OK) {
511             Emsg1(M_ERROR, 0, _("Uncompression error. ERR=%d\n"), stat);
512             extract = false;
513             goto bail_out;
514          }
515 
516          Dmsg2(100, "Write uncompressed %d bytes, total before write=%d\n", compress_len, total);
517          store_data(&bfd, compress_buf, compress_len);
518          total += compress_len;
519          fileAddr += compress_len;
520          Dmsg2(100, "Compress len=%d uncompressed=%d\n", rec->data_len,
521             compress_len);
522       }
523 #else
524       if (extract) {
525          Emsg0(M_ERROR, 0, _("GZIP data stream found, but GZIP not configured!\n"));
526          extract = false;
527          goto bail_out;
528       }
529 #endif
530       break;
531 
532    /* Compressed data stream */
533    case STREAM_COMPRESSED_DATA:
534    case STREAM_SPARSE_COMPRESSED_DATA:
535    case STREAM_WIN32_COMPRESSED_DATA:
536       if (extract) {
537          uint32_t comp_magic, comp_len;
538          uint16_t comp_level, comp_version;
539 #ifdef HAVE_LZO
540          lzo_uint compress_len;
541          const unsigned char *cbuf;
542          int r, real_compress_len;
543 #endif
544 
545          if (rec->maskedStream == STREAM_SPARSE_COMPRESSED_DATA) {
546             ser_declare;
547             uint64_t faddr;
548             char ec1[50];
549             wbuf = rec->data + OFFSET_FADDR_SIZE;
550             wsize = rec->data_len - OFFSET_FADDR_SIZE;
551             ser_begin(rec->data, OFFSET_FADDR_SIZE);
552             unser_uint64(faddr);
553             if ((rec->Stream & STREAM_BIT_OFFSETS) == 0 && fileAddr != faddr) {
554                fileAddr = faddr;
555                if (blseek(&bfd, (boffset_t)fileAddr, SEEK_SET) < 0) {
556                   berrno be;
557                   Emsg3(M_ERROR, 0, _("Seek to %s error on %s: ERR=%s\n"),
558                      edit_uint64(fileAddr, ec1), attr->ofname, be.bstrerror());
559                   extract = false;
560                   goto bail_out;
561                }
562             }
563          } else {
564             wbuf = rec->data;
565             wsize = rec->data_len;
566          }
567 
568          /* read compress header */
569          unser_declare;
570          unser_begin(wbuf, sizeof(comp_stream_header));
571          unser_uint32(comp_magic);
572          unser_uint32(comp_len);
573          unser_uint16(comp_level);
574          unser_uint16(comp_version);
575          Dmsg4(200, "Compressed data stream found: magic=0x%x, len=%d, level=%d, ver=0x%x\n", comp_magic, comp_len,
576                                  comp_level, comp_version);
577 
578          /* version check */
579          if (comp_version != COMP_HEAD_VERSION) {
580             Emsg1(M_ERROR, 0, _("Compressed header version error. version=0x%x\n"), comp_version);
581             ret = false;
582             goto bail_out;
583          }
584          /* size check */
585          if (comp_len + sizeof(comp_stream_header) != wsize) {
586             Emsg2(M_ERROR, 0, _("Compressed header size error. comp_len=%d, msglen=%d\n"),
587                  comp_len, wsize);
588             ret = false;
589             goto bail_out;
590          }
591 
592           switch(comp_magic) {
593 #ifdef HAVE_LZO
594             case COMPRESS_LZO1X:
595                compress_len = compress_buf_size;
596                cbuf = (const unsigned char*) wbuf + sizeof(comp_stream_header);
597                real_compress_len = wsize - sizeof(comp_stream_header);
598                Dmsg2(200, "Comp_len=%d msglen=%d\n", compress_len, wsize);
599                while ((r=lzo1x_decompress_safe(cbuf, real_compress_len,
600                                                (unsigned char *)compress_buf, &compress_len, NULL)) == LZO_E_OUTPUT_OVERRUN)
601                {
602 
603                   /* The buffer size is too small, try with a bigger one */
604                   compress_len = 2 * compress_len;
605                   compress_buf = check_pool_memory_size(compress_buf,
606                                                   compress_len);
607                }
608                if (r != LZO_E_OK) {
609                   Emsg1(M_ERROR, 0, _("LZO uncompression error. ERR=%d\n"), r);
610                   extract = false;
611                   goto bail_out;
612                }
613                Dmsg2(100, "Write uncompressed %d bytes, total before write=%d\n", compress_len, total);
614                store_data(&bfd, compress_buf, compress_len);
615                total += compress_len;
616                fileAddr += compress_len;
617                Dmsg2(100, "Compress len=%d uncompressed=%d\n", rec->data_len, compress_len);
618                break;
619 #endif
620             default:
621                Emsg1(M_ERROR, 0, _("Compression algorithm 0x%x found, but not supported!\n"), comp_magic);
622                extract = false;
623                goto bail_out;
624          }
625 
626       }
627       break;
628 
629    case STREAM_MD5_DIGEST:
630    case STREAM_SHA1_DIGEST:
631    case STREAM_SHA256_DIGEST:
632    case STREAM_SHA512_DIGEST:
633       break;
634 
635    case STREAM_SIGNED_DIGEST:
636    case STREAM_ENCRYPTED_SESSION_DATA:
637       // TODO landonf: Investigate crypto support in the storage daemon
638       break;
639 
640    case STREAM_PROGRAM_NAMES:
641    case STREAM_PROGRAM_DATA:
642       if (!prog_name_msg) {
643          Pmsg0(000, _("Got Program Name or Data Stream. Ignored.\n"));
644          prog_name_msg++;
645       }
646       break;
647    case STREAM_PLUGIN_NAME:
648       /* Just ignore the plugin name */
649       break;
650    default:
651       /* If extracting, weird stream (not 1 or 2), close output file anyway */
652       if (extract) {
653          if (!is_bopen(&bfd)) {
654             Emsg0(M_ERROR, 0, _("Logic error output file should be open but is not.\n"));
655          }
656          set_attributes(jcr, attr, &bfd);
657          extract = false;
658       }
659       Jmsg(jcr, M_ERROR, 0, _("Unknown stream=%d ignored. This shouldn't happen!\n"),
660          rec->Stream);
661       break;
662 
663    } /* end switch */
664 bail_out:
665    if (restoredatap) {
666       rec->data = orgdata;
667       rec->data_len = orgdata_len;
668    }
669    return ret;
670 }
671