1 /*===========================================================================
2  *
3  *                            PUBLIC DOMAIN NOTICE
4  *               National Center for Biotechnology Information
5  *
6  *  This software/database is a "United States Government Work" under the
7  *  terms of the United States Copyright Act.  It was written as part of
8  *  the author's official duties as a United States Government employee and
9  *  thus cannot be copyrighted.  This software/database is freely available
10  *  to the public for use. The National Library of Medicine and the U.S.
11  *  Government have not placed any restriction on its use or reproduction.
12  *
13  *  Although all reasonable efforts have been taken to ensure the accuracy
14  *  and reliability of the software and data, the NLM and the U.S.
15  *  Government do not and cannot warrant the performance or results that
16  *  may be obtained by using this software or data. The NLM and the U.S.
17  *  Government disclaim all warranties, express or implied, including
18  *  warranties of performance, merchantability or fitness for any particular
19  *  purpose.
20  *
21  *  Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  */
26 
27 #include "copycat-priv.h"
28 #include "cctree-priv.h"
29 
30 #include <vfs/manager.h>
31 #include <vfs/path.h>
32 #include <vfs/path-priv.h>
33 #include <kfs/directory.h>
34 #include <kfs/file.h>
35 #include <kfs/nullfile.h>
36 #include <kfs/crc.h>
37 #include <klib/checksum.h>
38 #include <klib/writer.h>
39 #include <klib/log.h>
40 #include <klib/status.h>
41 #include <klib/debug.h>
42 #include <klib/out.h>
43 #include <klib/status.h>
44 #include <klib/text.h>
45 #include <kapp/main.h>
46 #include <kapp/args.h>
47 #include <klib/rc.h>
48 #include <klib/vector.h>
49 
50 #include <strtol.h>
51 
52 #include <time.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <assert.h>
57 
58 /*
59  * some program globals
60  */
61 const char * program_name = "copycat"; /* default it but try to set it */
62 const char * full_path = "copycat"; /* default it but try to set it */
63 int verbose = 0;
64 KFile *fnull;
65 
66 /* cache information */
67 CCTree *ctree;
68 KDirectory *cdir;
69 
70 uint32_t in_block = 0;
71 uint32_t out_block = 0;
72 
73 CCTree *etree;
74 KDirectory * edir; /* extracted file base kdir */
75 char epath[8192];
76 char * ehere;
77 bool xml_dir = false;
78 bool extract_dir = false;
79 bool no_bzip2 = false;
80 bool no_md5 = false;
81 void * dump_out;
82 const char * xml_base = NULL;
83 
84 char ncbi_encryption_extension[] = ".nenc";
85 char wga_encryption_extension[] = ".ncbi_enc";
86 
87 static
88 KWrtWriter  log_writer;
89 static
90 KWrtWriter  log_lib_writer;
91 static
92 void * log_data;
93 static
94 void * log_lib_data;
95 
copycat_log_writer(void * self,const char * buffer,size_t buffer_size,size_t * num_writ)96 rc_t CC copycat_log_writer (void * self, const char * buffer, size_t buffer_size,
97                             size_t * num_writ)
98 {
99     if (self)
100     {
101         void * bf = malloc (sizeof (SLNode) + buffer_size);
102         if (bf)
103         {
104             const char * ps, * pc;
105             size_t z;
106             ps = strchr (buffer, ' ');
107             if (ps)
108             {
109                 ++ps;
110                 pc = strchr (ps, '-');
111                 if (pc == NULL)
112                     pc = strchr (ps, ':');
113                 if (pc)
114                     pc = pc + 2;
115                 else
116                     pc = ps;
117             }
118             else
119                 pc = buffer;
120             z = buffer_size - (pc - buffer);
121             memmove ( (void*)(((SLNode*)bf)+1), pc, z);
122             ((char*)(((SLNode*)bf)+1))[z-1] = '\0';
123             SLListPushTail (self, bf);
124         }
125     }
126     return (log_writer != NULL)
127         ? log_writer (log_data, buffer, buffer_size, num_writ) : 0;
128 }
copycat_log_lib_writer(void * self,const char * buffer,size_t buffer_size,size_t * num_writ)129 rc_t CC copycat_log_lib_writer  (void * self, const char * buffer, size_t buffer_size,
130                                  size_t * num_writ)
131 {
132     if (self)
133     {
134         void * bf = malloc (sizeof (SLNode) + buffer_size);
135         if (bf)
136         {
137             const char * ps, * pc;
138             size_t z;
139             ps = strchr (buffer, ' ');
140             if (ps)
141             {
142                 ++ps;
143                 pc = strchr (ps, ':');
144                 if (pc)
145                     pc = pc + 2;
146                 else
147                     pc = ps;
148             }
149             else
150                 pc = buffer;
151             z = buffer_size - (pc - buffer);
152             memmove ( (void*)(((SLNode*)bf)+1), pc, z);
153             ((char*)(((SLNode*)bf)+1))[z-1] = '\0';
154             SLListPushTail (self, bf);
155         }
156     }
157     return (log_lib_writer != NULL)
158         ? log_lib_writer (log_lib_data, buffer, buffer_size, num_writ) : 0;
159 }
160 
161 static
copycat_log_unset()162 rc_t copycat_log_unset ()
163 {
164     rc_t rc_l, rc_ll;
165 
166     rc_l = KLogHandlerSet (log_writer, log_data);
167     rc_ll = KLogHandlerSet (log_lib_writer, log_lib_data);
168 
169     return (rc_l != 0) ? rc_l : rc_ll;
170 }
171 
copycat_log_set(void * new,void ** prev)172 rc_t copycat_log_set (void * new, void ** prev)
173 {
174     rc_t rc;
175 
176     if (prev)
177         *prev = KLogDataGet();
178 
179     rc = KLogHandlerSet (copycat_log_writer, new);
180     if (rc == 0)
181         rc = KLogLibHandlerSet (copycat_log_lib_writer, new);
182 
183     if (rc)
184         copycat_log_unset ();
185 
186     return rc;
187 }
188 
189 /* global create mode */
190 KCreateMode cm = kcmParents | kcmCreate;
191 
192 #define OPTION_CACHE   "cache-dir"
193 #define OPTION_FORCE   "force"
194 #define OPTION_DEST    "output"
195 #define OPTION_EXTRACT "extract"
196 #define OPTION_EXTDIR  "extract-to-dir"
197 #define OPTION_XMLDIR  "xml-dir"
198 #define OPTION_DECPWD  "decryption-password"
199 #define OPTION_ENCPWD  "encryption-password"
200 #define OPTION_XMLBASE "xml-base-node"
201 #define OPTION_INBLOCK "input-buffer"
202 #define OPTION_OUTBLOCK "output-buffer"
203 #define OPTION_NOBZIP2 "no-bzip2"
204 #define OPTION_NOMD5   "no-md5"
205 
206 #define ALIAS_CACHE   "x"
207 #define ALIAS_FORCE   "f"
208 #define ALIAS_DEST    "o"
209 #define ALIAS_EXTRACT "e"
210 #define ALIAS_EXTDIR  "E"
211 #define ALIAS_XMLDIR  "X"
212 #define ALIAS_DECPWD  ""
213 #define ALIAS_ENCPWD  ""
214 #define ALIAS_XMLBASE ""
215 #define ALIAS_INBLOCK ""
216 #define ALIAS_OUTBLOCK ""
217 #define ALIAS_NOBZIP2 ""
218 #define ALIAS_NOMD5   ""
219 
220 
221 
222 static
223 const char * extract_usage[] =
224 { "location of extracted files", NULL };
225 static
226 const char * cache_usage[] =
227 { "location of output cached files", NULL };
228 static
229 const char * force_usage[] =
230 { "force overwrite of existing files", NULL };
231 static
232 const char * dest_usage[] =
233 { "location of output", NULL };
234 static
235 const char * xmldir_usage[] =
236 { "XML matches extracted files", NULL };
237 static
238 const char * extdir_usage[] =
239 { "extracted directories match normal XML", NULL };
240 static
241 const char * xmlbase_usage[] =
242 { "use this to base the XML not destination; can only be used with a single source", NULL };
243 static
244 const char * inblock_usage[] =
245 { "system file reads are of blocks of this size", NULL };
246 static
247 const char * outblock_usage[] =
248 { "system file writes are of blocks of this size", NULL };
249 static
250 const char * no_bzip2_usage[] =
251 { "do not decompress files compressed with bzip2", NULL };
252 const char * no_md5_usage[] =
253 { "do not calculate md5 hashes", NULL };
254 
255 
256 const char UsageDefaultName [] = "copycat";
257 
258 
UsageSummary(const char * progname)259 rc_t CC UsageSummary (const char * progname)
260 {
261     return KOutMsg (
262         "\n"
263         "Usage:\n"
264         "  %s [options] src-file dst-file\n"
265         "  %s [options] src-file [src-file...] dst-dir\n"
266         "  %s [options] -o dst-dir src-file [src-file...]\n"
267         "\n"
268         "Summary:\n"
269         "  Copies files and/or directories, creating a catalog of the copied files.\n",
270         progname, progname, progname);
271 }
272 
Usage(const Args * args)273 rc_t CC Usage (const Args * args)
274 {
275     const char * progname = UsageDefaultName;
276     const char * fullpath = UsageDefaultName;
277     rc_t rc;
278 
279     if (args == NULL)
280         rc = RC (rcApp, rcArgv, rcAccessing, rcSelf, rcNull);
281     else
282         rc = ArgsProgram (args, &fullpath, &progname);
283 
284     UsageSummary (progname);
285 
286     HelpOptionLine (ALIAS_CACHE, OPTION_CACHE, "dir-path", cache_usage);
287     HelpOptionLine (ALIAS_FORCE, OPTION_FORCE, NULL, force_usage);
288     HelpOptionLine (ALIAS_DEST, OPTION_DEST, "file-path", dest_usage);
289     HelpOptionLine (ALIAS_EXTRACT, OPTION_EXTRACT, "dir-path", extract_usage);
290     HelpOptionLine (ALIAS_EXTDIR, OPTION_EXTDIR, NULL, extdir_usage);
291     HelpOptionLine (ALIAS_XMLDIR, OPTION_XMLDIR, NULL, xmldir_usage);
292     HelpOptionLine (ALIAS_INBLOCK, OPTION_INBLOCK, "size-in-KB", inblock_usage);
293     HelpOptionLine (ALIAS_OUTBLOCK,OPTION_OUTBLOCK, "size-in-KB", outblock_usage);
294     HelpOptionLine (ALIAS_NOBZIP2,OPTION_NOBZIP2, NULL, no_bzip2_usage);
295     HelpOptionLine (ALIAS_NOMD5,OPTION_NOMD5, NULL, no_md5_usage);
296     HelpOptionsStandard ();
297 
298 
299 
300 /*                     1         2         3         4         5         6         7         8 */
301 /*            12345678901234567890123456789012345678901234567890123456789012345678901234567890 */
302     OUTMSG (("Use:\n"
303              " Copy and catalog:\n"
304              "  Some quick examples:\n"
305              "    %s dir/file.tar other-dir/file.tar\n"
306              "      copy file.tar from dir to other-dir and write the catalog to stdout\n"
307              "    %s dir/file.tar otherdir/\n"
308              "      the same\n", progname,progname));
309     OUTMSG (("    %s \"ncbi-file:dir/file.tar.nenc?encrypt&pwfile=pw other-dir.file.tar\n"
310              "      copy and decrypt file.tar.nenc from dir to other-dir and catalog\n"
311              "    %s dir/file.tar \"ncbi-file:other-dir/file.tar.nenc?encrypt&pwfile=pw\n"
312              "      copy and encrypt file.tar from dir to other-dir/file.tar.nenc and catalog\n"
313              "    %s \"ncbi-file:dir/file.tar.nenc?encrypt&pwfile=pw1 \\\n"
314              "                   \"ncbi-file:other-dir/file.tar.nenc?encrypt&pwfile=pw2\n"
315              "      copy the file as above while changing the encryption\n"
316              "\n", progname, progname, progname));
317     OUTMSG (("Use:\n"
318              "  Copy source file[s] to a destination file or directory.\n"
319              "  File names can either be typical path names or they can be URLs (IRLs) using\n"
320              "  the standard \"file\" or extended \"ncbi-file\" schemes.\n"
321              "  The catalog is XML output sent by default to stdout.\n"
322              "  As UTF-8 is accepted in the paths they are IRLs for International Resource\n"
323              "  Locators.\n"
324              "\n"));
325     OUTMSG (("  If the specified destination does not exist, there could be an ambiguity\n"
326              "  whether the destination is supposed to be a file or directory.  If the\n"
327              "  entered path ends in a '/' character or if there is more than one source\n"
328              "  it is assumed to mean a directory and is created as such.  If neither of\n"
329              "  of those apply it is assumed to be a file.\n"
330              "\n"));
331     OUTMSG (("  The sources or destination may also be special Unix devices:\n"
332              "    /dev/stdin is supported as a source.\n"
333              "    /dev/stdout and /dev/stderr is supported as a destination.\n"
334              "  Other file descriptor devices can be used in the form:\n"
335              "    /dev/fd/<fd-number>\n"));
336     OUTMSG (("  For example /dev/stdin is synonymous with /dev/fd/0 as a source.\n"
337              "  If /dev/stdout or /dev/fd/1 is used as the destination then the XML\n"
338              "  output is redirected to /dev/stderr (/dev/fd/2).\n"
339              "  Device /dev/null as the destination is treated as a file with only one\n"
340              "  source but as a directory if more than one source.  Using this device\n"
341              "  means no actual file will be copied but the cataloging will be done but\n"
342              "  " OPTION_XMLBASE " must be used.\n"
343              "\n"));
344     OUTMSG (("  These special devices can be entered using the URL (IRL) schemes if\n"
345              "  desired.  This allows the use of 'query' decorators.\n"
346              "  \n"));
347     OUTMSG (("  If a query is added to the URL it will need to be enclosed within '\"\'\n"
348              "  characters on a command line to prevent premature interpretation.\n"
349              "  The query for the 'ncbi-file' extension to the 'file' scheme allows\n"
350              "  encryption and decryption.  The supported query is introduced by the\n"
351              "  standard URI/IRI syntax of a '?' character with a '&' character\n"
352              "  separating individual query-entries.\n"
353              "\n"));
354     OUTMSG (("  The supported query entries are:\n"
355              "    'encrypt' or 'enc' to mean the input may be encrypted or the output\n"
356              "      will be encrypted,\n"
357              "    'pwfile=<path>' gives the path to file containing the password.\n"
358              "    'pwfd=<FD>' gives the numerical file descriptor from which to read\n"
359              "      the password,\n"
360              "\n"));
361     OUTMSG (("  In this program the encrypted input can apply to a file contained within\n"
362              "  the source rather than just the source file itself.  The tool is fully\n"
363              "  compatible with all versions of NCBI encryption.\n"
364              "\n"
365              "  If the output is to be encrypted only the newer FIPS compliant encryption\n"
366              "  will be used and applies to the whole file.\n"
367              "\n"));
368     OUTMSG (("NOTE: Not all combinations of URL specifications will work at this point.\n"
369              "\n"
370              "NOTE: using the same file descriptor for multiple sources or overlapping with\n"
371              "      stdin/stdout/stderr may cause undefined behavior including hanging the\n"
372              "      the program.\n"
373              "\n"));
374     OUTMSG (("  The '-x' option allows small files that are typed as eligible for\n"
375              "  caching to be copied to the cache directory provided. the directory\n"
376              "  will be created if necessary.\n"
377              "  the intent is to capture top-level files, such that files are copied\n"
378              "  into the flat cache directory without regard to where they were found\n"
379              "  in the input hierarchy. in the case of name conflict, output files will\n"
380              "  be renamed.\n"
381              "\n"));
382     OUTMSG (("  To prevent internal decompression of bzipped files, use the option\n"
383              "    '--no-bzip2'\n"
384              "\n"));
385     OUTMSG (("  To prevent calculation of MD5 hashes, use the option\n"
386              "    '--no-md5'\n"
387              "\n"));
388 
389     HelpVersion (fullpath, KAppVersion());
390 
391     return rc;
392 }
393 
394 static
395 OptDef Options[] =
396 {
397     /* name            alias max times oparam required fmtfunc help text loc */
398     { OPTION_EXTRACT, ALIAS_EXTRACT, NULL, extract_usage, 1, true,  false },
399     { OPTION_EXTDIR,  ALIAS_EXTDIR,  NULL, extdir_usage,  0, false, false },
400     { OPTION_XMLDIR,  ALIAS_XMLDIR,  NULL, xmldir_usage,  0, false, false },
401     { OPTION_CACHE,   ALIAS_CACHE,   NULL, cache_usage,   1, true,  false },
402     { OPTION_FORCE,   ALIAS_FORCE,   NULL, force_usage,   0, false, false },
403     { OPTION_DEST,    ALIAS_DEST,    NULL, dest_usage,    1, true,  false },
404     { OPTION_XMLBASE, ALIAS_XMLBASE, NULL, xmlbase_usage, 1, true,  false },
405     { OPTION_INBLOCK, ALIAS_OUTBLOCK,NULL, inblock_usage, 1, true,  false },
406     { OPTION_OUTBLOCK,ALIAS_OUTBLOCK,NULL, outblock_usage,1, true,  false },
407     { OPTION_NOBZIP2, ALIAS_NOBZIP2, NULL, no_bzip2_usage,0, false, false },
408     { OPTION_NOMD5,   ALIAS_NOMD5,   NULL, no_md5_usage,  0, false, false }
409 };
410 
411 /* file2file
412  */
413 static
copycat_file2file(CCTree * tree,SLList * logs,VFSManager * mgr,VPath * _src,VPath * _dst,const char * leaf)414 rc_t copycat_file2file (CCTree * tree,
415                         SLList * logs,
416                         VFSManager * mgr,
417                         VPath * _src,
418                         VPath * _dst,
419                         const char * leaf)
420 {
421     size_t sz;
422     rc_t rc;
423     bool do_encrypt;
424     bool do_decrypt;
425     char spath [8192];
426 
427     do_decrypt = (VPathOption (_src, vpopt_encrypted, spath, sizeof spath, &sz) == 0);
428     do_encrypt = (VPathOption (_dst, vpopt_encrypted, spath, sizeof spath, &sz) == 0);
429 
430     /* we can't use the automagical nature of the VPath and its query part
431      * because copycat needs to peek under the hood; but we want the automagical
432      * ability to handle it's path part.
433      */
434 
435     rc = VPathReadPath (_src, spath, sizeof spath, &sz);
436     if (rc)
437         LOGERR (klogInt, rc, "error rereading built source path");
438     else
439     {
440         char dpath [8192];
441         size_t dz;
442 
443         rc = VPathReadPath (_dst, dpath, sizeof dpath, &dz);
444         if (rc)
445             LOGERR (klogInt, rc, "error rereading built source path");
446         else
447         {
448             KDirectory * cwd;
449 
450             rc = VFSManagerGetCWD (mgr, &cwd);
451             if (rc)
452                 LOGERR (klogInt, rc, "error pulling directory out of manager");
453             else
454             {
455                 KTime_t mtime = 0;
456                 bool src_dev = false;
457                 bool dst_dev = false;
458 
459                 if (strncmp (spath, "/dev/", sizeof "/dev/"-1) == 0)
460                 {
461                     /* get date from file system
462                        [this won't be either the submitter original date
463                        nor the mod-date within the file system, unless
464                        the date gets reset...] */
465                     mtime = time (NULL);
466                     src_dev = true;
467                 }
468                 else
469                 {
470                     rc = KDirectoryDate (cwd, &mtime, "%s", spath);
471                     if (rc)
472                     {
473                         PLOGERR (klogErr,
474                                  (klogErr, rc,
475                                   "failed to determine modtime for '$(path)' continuing", "path=%s", spath ));
476                         mtime = time (NULL);
477                         rc = 0;
478                     }
479                 }
480                 if (strncmp (dpath, "/dev/", sizeof "/dev/" - 1) == 0)
481                 {
482                     if (strcmp(dpath, "/dev/stdout") == 0 ||
483                         strcmp(dpath, "/dev/fd/1") == 0)
484                     {
485                         dump_out = stderr;
486                     }
487                     dst_dev = true;
488 
489                     if (src_dev && (xml_base == NULL))
490                     {
491                         rc = RC (rcExe, rcArgv, rcAccessing, rcParam, rcNull);
492                         LOGERR (klogErr, rc, "Must provide " OPTION_XMLBASE
493                                 " when using a device stream as output");
494                     }
495                 }
496                 if (rc == 0)
497                 {
498                     char * sleaf;
499                     char * dleaf;
500                     char * ext;
501                     VPath * src;
502                     size_t xz;
503                     char xpath [8192]; /* way over sized - its a leaf only */
504 
505                     sleaf = strrchr (spath, '/');
506                     if (sleaf++ == NULL)
507                         sleaf = spath;
508 
509                     dleaf = strrchr (dpath, '/');
510                     if (dleaf++ == NULL)
511                         dleaf = dpath;
512 
513                     xz = strlen (leaf);
514                     memmove (xpath, leaf, xz + 1);
515 
516                     /* if we are encrypting the output make sure we have an encryption
517                      * extension on the destination.
518                      */
519                     if (do_decrypt)
520                     {
521                         ext = strrchr (xpath, '.');
522                         if (ext == NULL)
523                             ext = xpath;
524                         if ((strcmp (ext, ncbi_encryption_extension) == 0) ||
525                             (strcmp (ext, wga_encryption_extension) == 0))
526                             *ext = '\0';
527                     }
528                     else
529                         ext = xpath + strlen (xpath);
530 
531                     if (do_encrypt)
532                     {
533                         strcpy (ext, ncbi_encryption_extension);
534 
535                         if (!dst_dev)
536                         {
537                             ext = strrchr (dleaf, '.');
538                             if (ext == NULL)
539                                 ext = dleaf + strlen (dleaf);
540 
541                             if (strcmp (ext, ncbi_encryption_extension) != 0)
542                                 strcat (ext, ncbi_encryption_extension);
543                         }
544                     }
545 
546                     rc = VFSManagerMakePath (mgr, &src, "%s", spath);
547                     if (rc)
548                         LOGERR (klogErr, rc, "error rebuilding source path");
549                     else
550                     {
551                         VPath * dst;
552 
553                         rc = VFSManagerMakePath (mgr, &dst, "%s", dpath);
554                         if (rc)
555                             LOGERR (klogErr, rc, "error rebuilding source path");
556                         else
557                         {
558 
559                             /* never allow overwrite of something already there */
560                             if (CCTreeFind (tree, xpath) != NULL ) /* dleaf? xpath? */
561                             {
562                                 rc = RC ( rcExe, rcFile, rcCopying, rcPath, rcExists );
563                                 PLOGERR ( klogInt,  (klogInt, rc, "will not overwrite "
564                                                      "just-created '$(path)'", "path=%s", xpath ));
565                             }
566                             else
567                             {
568                                 const KFile * sf;
569 
570                                 rc = VFSManagerOpenFileRead (mgr, &sf, src);
571                                 if (rc)
572                                     PLOGERR (klogFatal,
573                                              (klogFatal, rc,
574                                               "error opening input '$(P)'", "P=%s", spath));
575                                 else
576                                 {
577                                     uint64_t expected;
578 
579                                     rc = KFileSize (sf, &expected);
580                                     if (rc)
581                                     {
582                                         if (GetRCState (rc) == rcUnsupported)
583                                         {
584                                             expected = rcUnsupported;
585                                             rc = 0;
586                                         }
587                                     }
588                                     if (rc == 0)
589                                     {
590                                         KFile * df;
591 
592                                         rc = VFSManagerCreateFile (mgr, &df, false, 0640, cm, dst);
593                                         if (GetRCState (rc) == rcUnauthorized)
594                                         {
595                                             uint32_t access;
596                                             rc_t orc;
597 
598                                             orc = KDirectoryAccess (cwd, &access, "%s", dpath);
599                                             if (orc == 0)
600                                             {
601                                                 orc = KDirectorySetAccess (cwd, false, 0640, 0777, "%s", dpath);
602                                                 if (orc == 0)
603                                                 {
604                                                     rc = VFSManagerCreateFile (mgr, &df, false, 0640, cm, dst);
605                                                     if (rc)
606                                                         KDirectorySetAccess (cwd, false, access, 0777, "%s", dpath);
607                                                 }
608                                             }
609                                         }
610                                         if (rc)
611                                             PLOGERR (klogErr,
612                                                      (klogErr, rc, "failed to creat destination file '$(path)'",
613                                                       "path=%s", dpath));
614                                         else
615                                         {
616                                             rc_t orc;
617 
618                                             log_writer = KLogWriterGet();
619                                             log_lib_writer = KLogLibWriterGet();
620                                             log_data = KLogDataGet();
621                                             log_lib_data = KLogLibDataGet();
622 
623                                             rc = copycat_log_set (logs, NULL);
624                                             if (rc == 0)
625                                             {
626                                                 DEBUG_STATUS (("\n-----\n%s: call copycat (tree(%p), mtime(%lu),"
627                                                                " cwd(%p), _src(%p), sf(%p), _dst(%p), df(%p), "
628                                                                "spath(%s), leaf(%s), expected(%lu), do_decrypt(%d)"
629                                                                " do_encrypt(%d))\n\n", __func__,
630                                                                tree, mtime, cwd, _src, sf, _dst, df, spath,
631                                                                xpath, expected, do_decrypt, do_encrypt));
632                                                 rc = copycat (tree, mtime, cwd, _src, sf, _dst, df, spath,
633                                                               xpath, expected, do_decrypt, do_encrypt);
634 
635                                                 orc = copycat_log_unset();
636                                             }
637 
638                                             if (rc)
639                                                 LOGERR (klogFatal, rc, "copycat function failed");
640                                             else
641                                                 rc = orc;
642 
643                                             KFileRelease (df);
644                                         }
645                                     }
646                                     KFileRelease (sf);
647                                 }
648                             }
649                             VPathRelease (dst);
650                         }
651                         VPathRelease (src);
652                     }
653                 }
654                 KDirectoryRelease (cwd);
655             }
656         }
657     }
658     return rc;
659 }
660 
661 
662 /* files2dir
663  */
664 static
copycat_files2dir(CCTree * tree,SLList * logs,VFSManager * mgr,Vector * v,VPath * dst)665 rc_t copycat_files2dir (CCTree * tree, SLList * logs, VFSManager * mgr, Vector * v, VPath * dst)
666 {
667     size_t dz;
668     uint32_t ix;
669     rc_t rc;
670     char dbuff [8192];
671 
672     /* xml-base only works for a single file */
673     if ((VectorLength (v) > 1) && (xml_base != NULL))
674     {
675         rc = RC (rcExe, rcArgv, rcParsing, rcParam, rcIncorrect);
676         LOGERR (klogErr, rc, "Can only use " OPTION_XMLBASE " with a single source file");
677         return rc;
678     }
679 
680     /* get the path out of the destination VPath */
681     rc = VPathReadPath (dst, dbuff, sizeof dbuff, &dz);
682     if (rc)
683         return rc;
684 
685     for (ix = 0; ix < VectorLength (v); ++ix)
686     {
687         VPath * new_dst;
688         VPath * src;
689         char * sleaf;
690         size_t sz;
691         char sbuff [8192];
692 
693         src = (VPath*) VectorGet (v, ix);
694         if (src == NULL) /* warn? error? abort? */
695             continue;
696 
697 
698         rc = VPathReadPath (src, sbuff, sizeof sbuff, &sz);
699         if (rc)
700             return rc;
701 
702         sleaf = strrchr (sbuff, '/');
703         if (sleaf++ == NULL)
704             sleaf = sbuff;
705 
706         /* the special case destination is the null device which we treat
707          * as if it was a directory at first and then as a file
708          */
709         if (strcmp (dbuff, "/dev/null") == 0)
710         {
711             rc = VPathAddRef (dst);
712             if (rc != 0)
713                 break;
714             new_dst = dst;
715         }
716         else
717         {
718             DEBUG_STATUS(("%s: %s (%lu)\n", __func__, dbuff, dz));
719 
720             /* fix up the destination path if it's missing a final '/'
721              * this is inside the loop because of the null device special case
722              */
723             if (dbuff [dz-1] != '/')
724             {
725                 dbuff [dz++] = '/';
726                 dbuff [dz] = '\0';
727             }
728 
729             /* append source leaf to destination path */
730             string_copy (dbuff + dz, sizeof dbuff - dz, sleaf, strlen (sleaf));
731 
732             DEBUG_STATUS(("%s: %s\n", __func__, dbuff));
733 
734             /* make a new VPath - no URI stuff gets transferred here */
735             rc = VFSManagerMakePath (mgr, &new_dst, "%s", dbuff);
736             if (rc)
737                 break;
738         }
739 
740         /* do this one file copy and catalog now */
741         rc = copycat_file2file (tree, logs, mgr, src, new_dst, xml_base ? xml_base : sleaf);
742 
743         VPathRelease (new_dst);
744     }
745     return rc;
746 }
747 
748 
749 /* run
750  *
751  * dest will be set if the -o option was used.
752  *
753  */
754 static
copycat_run(CCTree * tree,SLList * logs,VFSManager * mgr,const char * cache,VPath * _dest,const char * extract,Vector * v)755 rc_t copycat_run ( CCTree *tree, SLList * logs, VFSManager * mgr,
756                    const char *cache, VPath * _dest, const char *extract,
757                    Vector * v)
758 {
759     rc_t rc;
760     int dest_type;
761     KDirectory * cwd;
762     VPath * dest;
763     size_t sz;
764     const char * pleaf;
765     char pbuff [4096];
766 
767     /* =====
768      * directories aren't yet using the VFSManager to open them
769      * because we have to get more under the covers for our cataloging
770      */
771 
772     rc = VFSManagerGetCWD (mgr, &cwd);
773     if (rc)
774         return rc;
775 
776     /* if there's a cache path, create directory */
777     if ( cache != NULL )
778     {
779         rc = KDirectoryCreateDir ( cwd, 0775, kcmParents | kcmOpen, "%s", cache );
780         if ( rc == 0 )
781             rc = KDirectoryOpenDirUpdate ( cwd, & cdir, true, "%s", cache );
782         if ( rc != 0 )
783         {
784             PLOGERR (klogErr,
785                      (klogErr, rc, "failed to open cache directory '$(path)'",
786                       "path=%s", cache ));
787             return rc;
788         }
789     }
790     else
791         cdir = NULL;
792 
793     /* if there's a extract path, create directory */
794     if ( extract != NULL )
795     {
796         rc = KDirectoryCreateDir (cwd, 0775, kcmParents | kcmOpen, "%s", extract);
797         if ( rc == 0 )
798             rc = KDirectoryOpenDirUpdate (cwd, & edir, true, "%s", extract);
799         if ( rc != 0 )
800         {
801             PLOGERR (klogErr,
802                      (klogErr, rc,
803                       "failed to open extract directory '$(path)'",
804                       "path=%s", extract ));
805             return rc;
806         }
807     }
808     else
809         edir = NULL;
810 
811     dest = _dest;
812     if (dest == NULL)
813     {
814         rc = VectorRemove (v, VectorLength(v) - 1, (void**)&dest);
815         if (rc)
816             return rc;
817     }
818 
819     rc = VPathReadPath (dest, pbuff, sizeof pbuff, &sz);
820     if (rc)
821         return rc;
822 
823     if (xml_base)
824         pleaf = xml_base;
825     else
826     {
827         pleaf = strrchr (pbuff, '/');
828         if (pleaf++ == NULL)
829             pleaf = pbuff;
830     }
831 
832 
833     /* check destination type */
834     dest_type = KDirectoryPathType (cwd, "%s", pbuff);
835     DEBUG_STATUS(("%s: checked destination type for '%s' got '%u'\n", __func__, pbuff, dest_type));
836     switch (dest_type & ~ kptAlias)
837     {
838     case kptNotFound:
839         /* this is the potentially ambiguous situation
840          * if only two arguments and the last isn't definitively a directory
841          * we assume its supposed to be a file.
842          *
843          * If the target does not exist but it's path ends in '/' or if
844          * there is more than one source we know it is supposed to be a
845          * directory.
846          */
847         if ((pbuff[sz-1] != '/')
848 /*              ((pbuff[sz-1] != '.') */
849 
850 
851 
852 /*              ( */
853 /* )) */
854             && (VectorLength (v) == 1))
855         {
856             return copycat_file2file (tree, logs, mgr, VectorGet (v, 0), dest, pleaf);
857         }
858 
859         /* create a directory at the given path */
860         rc = KDirectoryCreateDir ( cwd, 0775, kcmParents | kcmOpen, "%s", pbuff );
861         if ( rc != 0 )
862             return rc;
863 
864         /* fall through */
865     case kptDir:
866         return copycat_files2dir (tree, logs, mgr, v, dest);
867 
868 
869     case kptCharDev:
870         /*
871          * special case NULL device can act like a directory here
872          * all other 'devices' we treat as a file
873          */
874         if ( strcmp ( pbuff, "/dev/null" ) == 0 )
875         {
876 #if 0
877             if (VectorLength (v) > 1)
878 #endif
879                 return copycat_files2dir (tree, logs, mgr, v, dest);
880         }
881         /* fall through */
882     case kptBlockDev:
883     case kptFIFO:
884     case kptFile:
885         if (VectorLength (v) == 1)
886             return copycat_file2file (tree, logs, mgr, VectorGet (v, 0), dest, pleaf);
887 
888         rc = RC (rcExe, rcDirectory, rcAccessing, rcPath, rcNotFound);
889         PLOGERR (klogFatal,
890                  (klogFatal, rc, "copying multiple files, but target argument "
891                   "[$(D)] is not a directory", "D=%s", pbuff));
892         return rc;
893     }
894 
895     fprintf ( stderr, "%s: '%s': specified destination path is not a directory\n", program_name, pbuff );
896     return RC ( rcExe, rcDirectory, rcAccessing, rcPath, rcIncorrect );
897 }
898 /* dump
899  */
900 static
copycat_fwrite(void * out,const void * buffer,size_t bytes)901 rc_t copycat_fwrite ( void *out, const void *buffer, size_t bytes )
902 {
903     size_t writ = fwrite ( buffer, 1, bytes, out );
904     if ( writ != bytes )
905         return RC ( rcExe, rcFile, rcWriting, rcTransfer, rcIncomplete );
906     return 0;
907 }
908 
909 static
copycat_dump(const CCTree * tree,SLList * logs)910 rc_t copycat_dump ( const CCTree *tree, SLList * logs )
911 {
912     return CCTreeDump ( tree, copycat_fwrite, dump_out, logs );
913 }
914 
915 static
param_whack(void * path,void * ignored)916 void param_whack (void * path, void * ignored)
917 {
918     (void)VPathRelease ((const VPath*)path);
919 }
920 
921 /* KMain
922  */
923 
KMain(int argc,char * argv[])924 rc_t KMain ( int argc, char *argv [] )
925 {
926     Args * args;
927     rc_t rc, orc;
928 
929     KStsHandlerSetStdErr();
930     KStsLibHandlerSetStdErr();
931 
932     rc = ArgsMakeAndHandle (&args, argc, argv, 1, Options, sizeof Options / sizeof (OptDef));
933     if (rc == 0)
934     {
935         /* CS-101: DO WHILE ( 0 ) LOOPS ARE CUTE FOR AVOIDING GOTO,
936            BUT IT MEANS YOU HAVE TO PUT ALL OF YOUR CLEANUP OUTSIDE
937            OF THE LOOP, IN ORDER TO BREAK OUT IN A NON-STRUCTURED WAY */
938         do
939         {
940             const char * dest;
941             const char * cache;
942             const char * extract = NULL;
943 
944             uint32_t pcount;
945             CCTree * tree;
946             VFSManager * mgr = NULL;
947             VPath * dp = NULL;
948             Vector params = { NULL, 0, 0, 0};
949             uint32_t ix;
950 
951             rc = ArgsProgram (args, &full_path, &program_name);
952             if (rc)
953                 break;
954 
955             extract_dir = false;
956             xml_dir = false;
957             memset (epath, 0, sizeof (epath));
958             ehere = epath;
959 
960             rc = ArgsOptionCount (args, OPTION_CACHE, &pcount);
961             if (rc)
962                 break;
963             if (pcount)
964             {
965                 rc = ArgsOptionValue (args, OPTION_CACHE, 0, (const void **)&cache);
966                 if (rc)
967                     break;
968             }
969             else
970                 cache = NULL;
971 
972             rc = ArgsOptionCount (args, OPTION_DEST, &pcount);
973             if (rc)
974                 break;
975             if (pcount)
976             {
977                 rc = ArgsOptionValue (args, OPTION_DEST, 0, (const void **)&dest);
978                 if (rc)
979                     break;
980             }
981             else
982             {
983                 dest = NULL;
984             }
985 
986             rc = ArgsOptionCount (args, OPTION_EXTRACT, &pcount);
987             if (rc)
988                 break;
989             if (pcount)
990             {
991                 rc = ArgsOptionValue (args, OPTION_EXTRACT, 0, (const void **)&extract);
992                 if (rc)
993                     break;
994             }
995 
996             rc = ArgsOptionCount (args, OPTION_EXTDIR, &pcount);
997             if (rc)
998                 break;
999             extract_dir = pcount > 0;
1000 
1001             rc = ArgsOptionCount (args, OPTION_XMLDIR, &pcount);
1002             if (rc)
1003                 break;
1004             xml_dir = pcount > 0;
1005 
1006             rc = ArgsOptionCount (args, OPTION_FORCE, &pcount);
1007             if (rc)
1008                 break;
1009             if (pcount)
1010                 cm = kcmParents | kcmInit;
1011 
1012             rc = ArgsOptionCount (args, OPTION_XMLBASE, &pcount);
1013             if (pcount == 1)
1014             {
1015                 rc = ArgsOptionValue (args, OPTION_XMLBASE, 0, (const void **)&xml_base);
1016                 if (rc)
1017                     break;
1018 
1019                 /* we might want a few more checks here... */
1020                 if (strchr (xml_base, '/') != NULL)
1021                 {
1022                     rc = RC (rcExe, rcArgv, rcAccessing, rcParam, rcInvalid);
1023                     break;
1024                 }
1025             }
1026 
1027             rc = ArgsOptionCount (args, OPTION_INBLOCK, &pcount);
1028             if (pcount == 1)
1029             {
1030                 const char * start;
1031                 char * end;
1032                 uint32_t val;
1033 
1034                 rc = ArgsOptionValue (args, OPTION_INBLOCK, 0, (const void **)&start);
1035                 if (rc)
1036                     break;
1037 
1038                 val = strtou32 (start, &end, 10);
1039 
1040                 if (*end != '\0')
1041                 {
1042                     rc = RC (rcExe, rcArgv, rcAccessing, rcParam, rcInvalid);
1043                     break;
1044                 }
1045                 in_block = val * 1024;
1046             }
1047 
1048             rc = ArgsOptionCount (args, OPTION_OUTBLOCK, &pcount);
1049             if (pcount == 1)
1050             {
1051                 const char * start;
1052                 char * end;
1053                 uint32_t val;
1054 
1055                 rc = ArgsOptionValue (args, OPTION_OUTBLOCK, 0, (const void **)&start);
1056                 if (rc)
1057                     break;
1058 
1059                 val = strtou32 (start, &end, 10);
1060 
1061                 if (*end != '\0')
1062                 {
1063                     rc = RC (rcExe, rcArgv, rcAccessing, rcParam, rcInvalid);
1064                     break;
1065                 }
1066                 out_block = val * 1024;
1067             }
1068 
1069             rc = ArgsOptionCount ( args, OPTION_NOBZIP2, & pcount );
1070             if ( pcount > 0 )
1071             {
1072                 no_bzip2 = true;
1073             }
1074 
1075             rc = ArgsOptionCount ( args, OPTION_NOMD5, & pcount );
1076             if ( pcount > 0 )
1077             {
1078                 no_md5 = true;
1079             }
1080 
1081             /* all parameters plus the possible dest option parameter */
1082             rc = ArgsParamCount (args, &pcount);
1083             if (rc)
1084                 break;
1085 
1086             if (pcount == 0)
1087             {
1088                 rc = RC ( rcExe, rcArgv, rcReading, rcParam, rcInsufficient );
1089                 MiniUsage (args);
1090                 break;
1091             }
1092 
1093             if ((dest == NULL) && (extract == NULL) && (pcount < 2))
1094             {
1095                 rc = RC ( rcExe, rcArgv, rcReading, rcParam, rcInvalid );
1096                 if (pcount)
1097                     LOGERR (klogFatal, rc, "missing source and destination arguments\n");
1098                 else
1099                     LOGERR (klogFatal, rc, "missing destination argument[s]\n");
1100                 break;
1101             }
1102 
1103             VectorInit (&params, 0, 8); /* 8 is arbirary - seems long enough for no realloc */
1104 
1105             rc = VFSManagerMake (&mgr);
1106             if (rc)
1107             {
1108                 LOGERR (klogFatal, rc,
1109                         "unable to build file system manager");
1110                 break;
1111             }
1112 
1113             for (ix = 0; ix < pcount; ++ix)
1114             {
1115                 VPath * kp;
1116                 const char * pc;
1117 
1118                 rc = ArgsParamValue (args, ix, (const void **)&pc);
1119                 if (rc)
1120                 {
1121                     LOGERR (klogFatal, rc, "unable to extract path parameter");
1122                     break;
1123                 }
1124 
1125                 rc = VFSManagerMakePath (mgr, &kp, "%s", pc);
1126                 if (rc)
1127                 {
1128                     LOGERR (klogFatal, rc, "unable to build path parameter");
1129                     break;
1130                 }
1131 
1132                 rc = VectorSet (&params, ix, kp);
1133                 if (rc)
1134                 {
1135                     LOGERR (klogFatal, rc, "unable to stow path parameter");
1136                     break;
1137                 }
1138             }
1139             if (rc == 0)
1140             {
1141                 if (dest)
1142                 {
1143                     rc = VFSManagerMakePath (mgr, &dp, "%s", dest);
1144                     if (rc)
1145                     {
1146                         LOGERR (klogFatal, rc, "unable to build dest parameter");
1147                         break;
1148                     }
1149                 }
1150                 DEBUG_STATUS(("%s: Create file tree\n", __func__));
1151                 rc = CCTreeMake (&tree);
1152                 if (rc)
1153                 {
1154                     LOGERR ( klogInt, rc, "failed to create parse tree" );
1155                 }
1156                 else
1157                 {
1158                     DEBUG_STATUS(("%s: Create cache file tree\n", __func__));
1159 
1160                     rc = CCTreeMake (&ctree);
1161                     if (rc)
1162                     {
1163                         LOGERR ( klogInt, rc, "failed to create cache tree" );
1164                     }
1165                     else
1166                     {
1167                         DEBUG_STATUS(("%s: Create extracted file tree\n",
1168                                       __func__));
1169 
1170                         rc = CCTreeMake (&etree);
1171                         if (rc)
1172                         {
1173                             LOGERR ( klogInt, rc,
1174                                      "failed to create extract tree" );
1175                         }
1176                         else
1177                         {
1178                             DEBUG_STATUS(("%s: Create  NULL output file\n",
1179                                           __func__));
1180                             rc = KFileMakeNullUpdate (&fnull);
1181                             if (rc)
1182                                 LOGERR (klogInt, rc,
1183                                         "failed to create null output");
1184                             else
1185                             {
1186                                 DEBUG_STATUS(("%s: Open File Format Tester\n",
1187                                               __func__));
1188 
1189                                 rc = CCFileFormatMake ( & filefmt );
1190                                 if ( rc != 0 )
1191                                     LOGERR (klogInt, rc,
1192                                             "failed to create file format" );
1193                                 else
1194                                 {
1195                                     SLList logs;
1196 
1197                                     DEBUG_STATUS(("%s: Initialize CRC32\n",
1198                                                   __func__));
1199 
1200                                     SLListInit (&logs);
1201                                     CRC32Init ();
1202 
1203                                     DEBUG_STATUS(("%s: Copy and catalog\n",
1204                                                   __func__));
1205 
1206                                     dump_out = stdout; /* kludge */
1207 
1208                                     rc = copycat_run (tree, &logs, mgr, cache,
1209                                                       dp, extract, &params);
1210                                     if ( rc == 0 )
1211                                         rc = copycat_dump ( xml_dir ? etree : tree, &logs );
1212                                     DEBUG_STATUS(("%s: Output XML\n", __func__));
1213 
1214 
1215                                     CCFileFormatRelease ( filefmt );
1216                                 }
1217 
1218                                 DEBUG_STATUS(("%s: Release NULL output file\n", __func__));
1219 
1220                                 orc = KFileRelease ( fnull ), fnull = NULL;
1221                                 if (rc == 0)
1222                                     rc = orc;
1223                             }
1224                             DEBUG_STATUS(("%s: Whack extracted file tree;\n", __func__));
1225                             CCTreeWhack (etree);
1226                         }
1227                         DEBUG_STATUS(("%s: Whack cache file tree;\n", __func__));
1228                         CCTreeWhack (ctree);
1229                     }
1230                     DEBUG_STATUS(("%s: Whack file tree;\n", __func__));
1231                     CCTreeWhack (tree);
1232                 }
1233 
1234                 VPathRelease (dp);
1235             }
1236             VFSManagerRelease (mgr);
1237             VectorWhack (&params, param_whack, NULL);
1238         } while (0);
1239     }
1240     ArgsWhack (args);
1241     orc = KDirectoryRelease (cdir); /* class extren should be NULL if never used */
1242     if (orc)
1243     {
1244         LOGERR (klogInt, rc, "Error shutting file system access");
1245         if (rc == 0)
1246             rc = orc;
1247     }
1248     DEBUG_STATUS(("%s: exit rc %R(%x);\n", __func__, rc, rc));
1249     return rc;
1250 }
1251 
1252