1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include "copycat-priv.h"
28 #include "cctree-priv.h"
29
30 #include <vfs/manager.h>
31 #include <vfs/path.h>
32 #include <vfs/path-priv.h>
33 #include <kfs/directory.h>
34 #include <kfs/file.h>
35 #include <kfs/nullfile.h>
36 #include <kfs/crc.h>
37 #include <klib/checksum.h>
38 #include <klib/writer.h>
39 #include <klib/log.h>
40 #include <klib/status.h>
41 #include <klib/debug.h>
42 #include <klib/out.h>
43 #include <klib/status.h>
44 #include <klib/text.h>
45 #include <kapp/main.h>
46 #include <kapp/args.h>
47 #include <klib/rc.h>
48 #include <klib/vector.h>
49
50 #include <strtol.h>
51
52 #include <time.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <assert.h>
57
58 /*
59 * some program globals
60 */
61 const char * program_name = "copycat"; /* default it but try to set it */
62 const char * full_path = "copycat"; /* default it but try to set it */
63 int verbose = 0;
64 KFile *fnull;
65
66 /* cache information */
67 CCTree *ctree;
68 KDirectory *cdir;
69
70 uint32_t in_block = 0;
71 uint32_t out_block = 0;
72
73 CCTree *etree;
74 KDirectory * edir; /* extracted file base kdir */
75 char epath[8192];
76 char * ehere;
77 bool xml_dir = false;
78 bool extract_dir = false;
79 bool no_bzip2 = false;
80 bool no_md5 = false;
81 void * dump_out;
82 const char * xml_base = NULL;
83
84 char ncbi_encryption_extension[] = ".nenc";
85 char wga_encryption_extension[] = ".ncbi_enc";
86
87 static
88 KWrtWriter log_writer;
89 static
90 KWrtWriter log_lib_writer;
91 static
92 void * log_data;
93 static
94 void * log_lib_data;
95
copycat_log_writer(void * self,const char * buffer,size_t buffer_size,size_t * num_writ)96 rc_t CC copycat_log_writer (void * self, const char * buffer, size_t buffer_size,
97 size_t * num_writ)
98 {
99 if (self)
100 {
101 void * bf = malloc (sizeof (SLNode) + buffer_size);
102 if (bf)
103 {
104 const char * ps, * pc;
105 size_t z;
106 ps = strchr (buffer, ' ');
107 if (ps)
108 {
109 ++ps;
110 pc = strchr (ps, '-');
111 if (pc == NULL)
112 pc = strchr (ps, ':');
113 if (pc)
114 pc = pc + 2;
115 else
116 pc = ps;
117 }
118 else
119 pc = buffer;
120 z = buffer_size - (pc - buffer);
121 memmove ( (void*)(((SLNode*)bf)+1), pc, z);
122 ((char*)(((SLNode*)bf)+1))[z-1] = '\0';
123 SLListPushTail (self, bf);
124 }
125 }
126 return (log_writer != NULL)
127 ? log_writer (log_data, buffer, buffer_size, num_writ) : 0;
128 }
copycat_log_lib_writer(void * self,const char * buffer,size_t buffer_size,size_t * num_writ)129 rc_t CC copycat_log_lib_writer (void * self, const char * buffer, size_t buffer_size,
130 size_t * num_writ)
131 {
132 if (self)
133 {
134 void * bf = malloc (sizeof (SLNode) + buffer_size);
135 if (bf)
136 {
137 const char * ps, * pc;
138 size_t z;
139 ps = strchr (buffer, ' ');
140 if (ps)
141 {
142 ++ps;
143 pc = strchr (ps, ':');
144 if (pc)
145 pc = pc + 2;
146 else
147 pc = ps;
148 }
149 else
150 pc = buffer;
151 z = buffer_size - (pc - buffer);
152 memmove ( (void*)(((SLNode*)bf)+1), pc, z);
153 ((char*)(((SLNode*)bf)+1))[z-1] = '\0';
154 SLListPushTail (self, bf);
155 }
156 }
157 return (log_lib_writer != NULL)
158 ? log_lib_writer (log_lib_data, buffer, buffer_size, num_writ) : 0;
159 }
160
161 static
copycat_log_unset()162 rc_t copycat_log_unset ()
163 {
164 rc_t rc_l, rc_ll;
165
166 rc_l = KLogHandlerSet (log_writer, log_data);
167 rc_ll = KLogHandlerSet (log_lib_writer, log_lib_data);
168
169 return (rc_l != 0) ? rc_l : rc_ll;
170 }
171
copycat_log_set(void * new,void ** prev)172 rc_t copycat_log_set (void * new, void ** prev)
173 {
174 rc_t rc;
175
176 if (prev)
177 *prev = KLogDataGet();
178
179 rc = KLogHandlerSet (copycat_log_writer, new);
180 if (rc == 0)
181 rc = KLogLibHandlerSet (copycat_log_lib_writer, new);
182
183 if (rc)
184 copycat_log_unset ();
185
186 return rc;
187 }
188
189 /* global create mode */
190 KCreateMode cm = kcmParents | kcmCreate;
191
192 #define OPTION_CACHE "cache-dir"
193 #define OPTION_FORCE "force"
194 #define OPTION_DEST "output"
195 #define OPTION_EXTRACT "extract"
196 #define OPTION_EXTDIR "extract-to-dir"
197 #define OPTION_XMLDIR "xml-dir"
198 #define OPTION_DECPWD "decryption-password"
199 #define OPTION_ENCPWD "encryption-password"
200 #define OPTION_XMLBASE "xml-base-node"
201 #define OPTION_INBLOCK "input-buffer"
202 #define OPTION_OUTBLOCK "output-buffer"
203 #define OPTION_NOBZIP2 "no-bzip2"
204 #define OPTION_NOMD5 "no-md5"
205
206 #define ALIAS_CACHE "x"
207 #define ALIAS_FORCE "f"
208 #define ALIAS_DEST "o"
209 #define ALIAS_EXTRACT "e"
210 #define ALIAS_EXTDIR "E"
211 #define ALIAS_XMLDIR "X"
212 #define ALIAS_DECPWD ""
213 #define ALIAS_ENCPWD ""
214 #define ALIAS_XMLBASE ""
215 #define ALIAS_INBLOCK ""
216 #define ALIAS_OUTBLOCK ""
217 #define ALIAS_NOBZIP2 ""
218 #define ALIAS_NOMD5 ""
219
220
221
222 static
223 const char * extract_usage[] =
224 { "location of extracted files", NULL };
225 static
226 const char * cache_usage[] =
227 { "location of output cached files", NULL };
228 static
229 const char * force_usage[] =
230 { "force overwrite of existing files", NULL };
231 static
232 const char * dest_usage[] =
233 { "location of output", NULL };
234 static
235 const char * xmldir_usage[] =
236 { "XML matches extracted files", NULL };
237 static
238 const char * extdir_usage[] =
239 { "extracted directories match normal XML", NULL };
240 static
241 const char * xmlbase_usage[] =
242 { "use this to base the XML not destination; can only be used with a single source", NULL };
243 static
244 const char * inblock_usage[] =
245 { "system file reads are of blocks of this size", NULL };
246 static
247 const char * outblock_usage[] =
248 { "system file writes are of blocks of this size", NULL };
249 static
250 const char * no_bzip2_usage[] =
251 { "do not decompress files compressed with bzip2", NULL };
252 const char * no_md5_usage[] =
253 { "do not calculate md5 hashes", NULL };
254
255
256 const char UsageDefaultName [] = "copycat";
257
258
UsageSummary(const char * progname)259 rc_t CC UsageSummary (const char * progname)
260 {
261 return KOutMsg (
262 "\n"
263 "Usage:\n"
264 " %s [options] src-file dst-file\n"
265 " %s [options] src-file [src-file...] dst-dir\n"
266 " %s [options] -o dst-dir src-file [src-file...]\n"
267 "\n"
268 "Summary:\n"
269 " Copies files and/or directories, creating a catalog of the copied files.\n",
270 progname, progname, progname);
271 }
272
Usage(const Args * args)273 rc_t CC Usage (const Args * args)
274 {
275 const char * progname = UsageDefaultName;
276 const char * fullpath = UsageDefaultName;
277 rc_t rc;
278
279 if (args == NULL)
280 rc = RC (rcApp, rcArgv, rcAccessing, rcSelf, rcNull);
281 else
282 rc = ArgsProgram (args, &fullpath, &progname);
283
284 UsageSummary (progname);
285
286 HelpOptionLine (ALIAS_CACHE, OPTION_CACHE, "dir-path", cache_usage);
287 HelpOptionLine (ALIAS_FORCE, OPTION_FORCE, NULL, force_usage);
288 HelpOptionLine (ALIAS_DEST, OPTION_DEST, "file-path", dest_usage);
289 HelpOptionLine (ALIAS_EXTRACT, OPTION_EXTRACT, "dir-path", extract_usage);
290 HelpOptionLine (ALIAS_EXTDIR, OPTION_EXTDIR, NULL, extdir_usage);
291 HelpOptionLine (ALIAS_XMLDIR, OPTION_XMLDIR, NULL, xmldir_usage);
292 HelpOptionLine (ALIAS_INBLOCK, OPTION_INBLOCK, "size-in-KB", inblock_usage);
293 HelpOptionLine (ALIAS_OUTBLOCK,OPTION_OUTBLOCK, "size-in-KB", outblock_usage);
294 HelpOptionLine (ALIAS_NOBZIP2,OPTION_NOBZIP2, NULL, no_bzip2_usage);
295 HelpOptionLine (ALIAS_NOMD5,OPTION_NOMD5, NULL, no_md5_usage);
296 HelpOptionsStandard ();
297
298
299
300 /* 1 2 3 4 5 6 7 8 */
301 /* 12345678901234567890123456789012345678901234567890123456789012345678901234567890 */
302 OUTMSG (("Use:\n"
303 " Copy and catalog:\n"
304 " Some quick examples:\n"
305 " %s dir/file.tar other-dir/file.tar\n"
306 " copy file.tar from dir to other-dir and write the catalog to stdout\n"
307 " %s dir/file.tar otherdir/\n"
308 " the same\n", progname,progname));
309 OUTMSG ((" %s \"ncbi-file:dir/file.tar.nenc?encrypt&pwfile=pw other-dir.file.tar\n"
310 " copy and decrypt file.tar.nenc from dir to other-dir and catalog\n"
311 " %s dir/file.tar \"ncbi-file:other-dir/file.tar.nenc?encrypt&pwfile=pw\n"
312 " copy and encrypt file.tar from dir to other-dir/file.tar.nenc and catalog\n"
313 " %s \"ncbi-file:dir/file.tar.nenc?encrypt&pwfile=pw1 \\\n"
314 " \"ncbi-file:other-dir/file.tar.nenc?encrypt&pwfile=pw2\n"
315 " copy the file as above while changing the encryption\n"
316 "\n", progname, progname, progname));
317 OUTMSG (("Use:\n"
318 " Copy source file[s] to a destination file or directory.\n"
319 " File names can either be typical path names or they can be URLs (IRLs) using\n"
320 " the standard \"file\" or extended \"ncbi-file\" schemes.\n"
321 " The catalog is XML output sent by default to stdout.\n"
322 " As UTF-8 is accepted in the paths they are IRLs for International Resource\n"
323 " Locators.\n"
324 "\n"));
325 OUTMSG ((" If the specified destination does not exist, there could be an ambiguity\n"
326 " whether the destination is supposed to be a file or directory. If the\n"
327 " entered path ends in a '/' character or if there is more than one source\n"
328 " it is assumed to mean a directory and is created as such. If neither of\n"
329 " of those apply it is assumed to be a file.\n"
330 "\n"));
331 OUTMSG ((" The sources or destination may also be special Unix devices:\n"
332 " /dev/stdin is supported as a source.\n"
333 " /dev/stdout and /dev/stderr is supported as a destination.\n"
334 " Other file descriptor devices can be used in the form:\n"
335 " /dev/fd/<fd-number>\n"));
336 OUTMSG ((" For example /dev/stdin is synonymous with /dev/fd/0 as a source.\n"
337 " If /dev/stdout or /dev/fd/1 is used as the destination then the XML\n"
338 " output is redirected to /dev/stderr (/dev/fd/2).\n"
339 " Device /dev/null as the destination is treated as a file with only one\n"
340 " source but as a directory if more than one source. Using this device\n"
341 " means no actual file will be copied but the cataloging will be done but\n"
342 " " OPTION_XMLBASE " must be used.\n"
343 "\n"));
344 OUTMSG ((" These special devices can be entered using the URL (IRL) schemes if\n"
345 " desired. This allows the use of 'query' decorators.\n"
346 " \n"));
347 OUTMSG ((" If a query is added to the URL it will need to be enclosed within '\"\'\n"
348 " characters on a command line to prevent premature interpretation.\n"
349 " The query for the 'ncbi-file' extension to the 'file' scheme allows\n"
350 " encryption and decryption. The supported query is introduced by the\n"
351 " standard URI/IRI syntax of a '?' character with a '&' character\n"
352 " separating individual query-entries.\n"
353 "\n"));
354 OUTMSG ((" The supported query entries are:\n"
355 " 'encrypt' or 'enc' to mean the input may be encrypted or the output\n"
356 " will be encrypted,\n"
357 " 'pwfile=<path>' gives the path to file containing the password.\n"
358 " 'pwfd=<FD>' gives the numerical file descriptor from which to read\n"
359 " the password,\n"
360 "\n"));
361 OUTMSG ((" In this program the encrypted input can apply to a file contained within\n"
362 " the source rather than just the source file itself. The tool is fully\n"
363 " compatible with all versions of NCBI encryption.\n"
364 "\n"
365 " If the output is to be encrypted only the newer FIPS compliant encryption\n"
366 " will be used and applies to the whole file.\n"
367 "\n"));
368 OUTMSG (("NOTE: Not all combinations of URL specifications will work at this point.\n"
369 "\n"
370 "NOTE: using the same file descriptor for multiple sources or overlapping with\n"
371 " stdin/stdout/stderr may cause undefined behavior including hanging the\n"
372 " the program.\n"
373 "\n"));
374 OUTMSG ((" The '-x' option allows small files that are typed as eligible for\n"
375 " caching to be copied to the cache directory provided. the directory\n"
376 " will be created if necessary.\n"
377 " the intent is to capture top-level files, such that files are copied\n"
378 " into the flat cache directory without regard to where they were found\n"
379 " in the input hierarchy. in the case of name conflict, output files will\n"
380 " be renamed.\n"
381 "\n"));
382 OUTMSG ((" To prevent internal decompression of bzipped files, use the option\n"
383 " '--no-bzip2'\n"
384 "\n"));
385 OUTMSG ((" To prevent calculation of MD5 hashes, use the option\n"
386 " '--no-md5'\n"
387 "\n"));
388
389 HelpVersion (fullpath, KAppVersion());
390
391 return rc;
392 }
393
394 static
395 OptDef Options[] =
396 {
397 /* name alias max times oparam required fmtfunc help text loc */
398 { OPTION_EXTRACT, ALIAS_EXTRACT, NULL, extract_usage, 1, true, false },
399 { OPTION_EXTDIR, ALIAS_EXTDIR, NULL, extdir_usage, 0, false, false },
400 { OPTION_XMLDIR, ALIAS_XMLDIR, NULL, xmldir_usage, 0, false, false },
401 { OPTION_CACHE, ALIAS_CACHE, NULL, cache_usage, 1, true, false },
402 { OPTION_FORCE, ALIAS_FORCE, NULL, force_usage, 0, false, false },
403 { OPTION_DEST, ALIAS_DEST, NULL, dest_usage, 1, true, false },
404 { OPTION_XMLBASE, ALIAS_XMLBASE, NULL, xmlbase_usage, 1, true, false },
405 { OPTION_INBLOCK, ALIAS_OUTBLOCK,NULL, inblock_usage, 1, true, false },
406 { OPTION_OUTBLOCK,ALIAS_OUTBLOCK,NULL, outblock_usage,1, true, false },
407 { OPTION_NOBZIP2, ALIAS_NOBZIP2, NULL, no_bzip2_usage,0, false, false },
408 { OPTION_NOMD5, ALIAS_NOMD5, NULL, no_md5_usage, 0, false, false }
409 };
410
411 /* file2file
412 */
413 static
copycat_file2file(CCTree * tree,SLList * logs,VFSManager * mgr,VPath * _src,VPath * _dst,const char * leaf)414 rc_t copycat_file2file (CCTree * tree,
415 SLList * logs,
416 VFSManager * mgr,
417 VPath * _src,
418 VPath * _dst,
419 const char * leaf)
420 {
421 size_t sz;
422 rc_t rc;
423 bool do_encrypt;
424 bool do_decrypt;
425 char spath [8192];
426
427 do_decrypt = (VPathOption (_src, vpopt_encrypted, spath, sizeof spath, &sz) == 0);
428 do_encrypt = (VPathOption (_dst, vpopt_encrypted, spath, sizeof spath, &sz) == 0);
429
430 /* we can't use the automagical nature of the VPath and its query part
431 * because copycat needs to peek under the hood; but we want the automagical
432 * ability to handle it's path part.
433 */
434
435 rc = VPathReadPath (_src, spath, sizeof spath, &sz);
436 if (rc)
437 LOGERR (klogInt, rc, "error rereading built source path");
438 else
439 {
440 char dpath [8192];
441 size_t dz;
442
443 rc = VPathReadPath (_dst, dpath, sizeof dpath, &dz);
444 if (rc)
445 LOGERR (klogInt, rc, "error rereading built source path");
446 else
447 {
448 KDirectory * cwd;
449
450 rc = VFSManagerGetCWD (mgr, &cwd);
451 if (rc)
452 LOGERR (klogInt, rc, "error pulling directory out of manager");
453 else
454 {
455 KTime_t mtime = 0;
456 bool src_dev = false;
457 bool dst_dev = false;
458
459 if (strncmp (spath, "/dev/", sizeof "/dev/"-1) == 0)
460 {
461 /* get date from file system
462 [this won't be either the submitter original date
463 nor the mod-date within the file system, unless
464 the date gets reset...] */
465 mtime = time (NULL);
466 src_dev = true;
467 }
468 else
469 {
470 rc = KDirectoryDate (cwd, &mtime, "%s", spath);
471 if (rc)
472 {
473 PLOGERR (klogErr,
474 (klogErr, rc,
475 "failed to determine modtime for '$(path)' continuing", "path=%s", spath ));
476 mtime = time (NULL);
477 rc = 0;
478 }
479 }
480 if (strncmp (dpath, "/dev/", sizeof "/dev/" - 1) == 0)
481 {
482 if (strcmp(dpath, "/dev/stdout") == 0 ||
483 strcmp(dpath, "/dev/fd/1") == 0)
484 {
485 dump_out = stderr;
486 }
487 dst_dev = true;
488
489 if (src_dev && (xml_base == NULL))
490 {
491 rc = RC (rcExe, rcArgv, rcAccessing, rcParam, rcNull);
492 LOGERR (klogErr, rc, "Must provide " OPTION_XMLBASE
493 " when using a device stream as output");
494 }
495 }
496 if (rc == 0)
497 {
498 char * sleaf;
499 char * dleaf;
500 char * ext;
501 VPath * src;
502 size_t xz;
503 char xpath [8192]; /* way over sized - its a leaf only */
504
505 sleaf = strrchr (spath, '/');
506 if (sleaf++ == NULL)
507 sleaf = spath;
508
509 dleaf = strrchr (dpath, '/');
510 if (dleaf++ == NULL)
511 dleaf = dpath;
512
513 xz = strlen (leaf);
514 memmove (xpath, leaf, xz + 1);
515
516 /* if we are encrypting the output make sure we have an encryption
517 * extension on the destination.
518 */
519 if (do_decrypt)
520 {
521 ext = strrchr (xpath, '.');
522 if (ext == NULL)
523 ext = xpath;
524 if ((strcmp (ext, ncbi_encryption_extension) == 0) ||
525 (strcmp (ext, wga_encryption_extension) == 0))
526 *ext = '\0';
527 }
528 else
529 ext = xpath + strlen (xpath);
530
531 if (do_encrypt)
532 {
533 strcpy (ext, ncbi_encryption_extension);
534
535 if (!dst_dev)
536 {
537 ext = strrchr (dleaf, '.');
538 if (ext == NULL)
539 ext = dleaf + strlen (dleaf);
540
541 if (strcmp (ext, ncbi_encryption_extension) != 0)
542 strcat (ext, ncbi_encryption_extension);
543 }
544 }
545
546 rc = VFSManagerMakePath (mgr, &src, "%s", spath);
547 if (rc)
548 LOGERR (klogErr, rc, "error rebuilding source path");
549 else
550 {
551 VPath * dst;
552
553 rc = VFSManagerMakePath (mgr, &dst, "%s", dpath);
554 if (rc)
555 LOGERR (klogErr, rc, "error rebuilding source path");
556 else
557 {
558
559 /* never allow overwrite of something already there */
560 if (CCTreeFind (tree, xpath) != NULL ) /* dleaf? xpath? */
561 {
562 rc = RC ( rcExe, rcFile, rcCopying, rcPath, rcExists );
563 PLOGERR ( klogInt, (klogInt, rc, "will not overwrite "
564 "just-created '$(path)'", "path=%s", xpath ));
565 }
566 else
567 {
568 const KFile * sf;
569
570 rc = VFSManagerOpenFileRead (mgr, &sf, src);
571 if (rc)
572 PLOGERR (klogFatal,
573 (klogFatal, rc,
574 "error opening input '$(P)'", "P=%s", spath));
575 else
576 {
577 uint64_t expected;
578
579 rc = KFileSize (sf, &expected);
580 if (rc)
581 {
582 if (GetRCState (rc) == rcUnsupported)
583 {
584 expected = rcUnsupported;
585 rc = 0;
586 }
587 }
588 if (rc == 0)
589 {
590 KFile * df;
591
592 rc = VFSManagerCreateFile (mgr, &df, false, 0640, cm, dst);
593 if (GetRCState (rc) == rcUnauthorized)
594 {
595 uint32_t access;
596 rc_t orc;
597
598 orc = KDirectoryAccess (cwd, &access, "%s", dpath);
599 if (orc == 0)
600 {
601 orc = KDirectorySetAccess (cwd, false, 0640, 0777, "%s", dpath);
602 if (orc == 0)
603 {
604 rc = VFSManagerCreateFile (mgr, &df, false, 0640, cm, dst);
605 if (rc)
606 KDirectorySetAccess (cwd, false, access, 0777, "%s", dpath);
607 }
608 }
609 }
610 if (rc)
611 PLOGERR (klogErr,
612 (klogErr, rc, "failed to creat destination file '$(path)'",
613 "path=%s", dpath));
614 else
615 {
616 rc_t orc;
617
618 log_writer = KLogWriterGet();
619 log_lib_writer = KLogLibWriterGet();
620 log_data = KLogDataGet();
621 log_lib_data = KLogLibDataGet();
622
623 rc = copycat_log_set (logs, NULL);
624 if (rc == 0)
625 {
626 DEBUG_STATUS (("\n-----\n%s: call copycat (tree(%p), mtime(%lu),"
627 " cwd(%p), _src(%p), sf(%p), _dst(%p), df(%p), "
628 "spath(%s), leaf(%s), expected(%lu), do_decrypt(%d)"
629 " do_encrypt(%d))\n\n", __func__,
630 tree, mtime, cwd, _src, sf, _dst, df, spath,
631 xpath, expected, do_decrypt, do_encrypt));
632 rc = copycat (tree, mtime, cwd, _src, sf, _dst, df, spath,
633 xpath, expected, do_decrypt, do_encrypt);
634
635 orc = copycat_log_unset();
636 }
637
638 if (rc)
639 LOGERR (klogFatal, rc, "copycat function failed");
640 else
641 rc = orc;
642
643 KFileRelease (df);
644 }
645 }
646 KFileRelease (sf);
647 }
648 }
649 VPathRelease (dst);
650 }
651 VPathRelease (src);
652 }
653 }
654 KDirectoryRelease (cwd);
655 }
656 }
657 }
658 return rc;
659 }
660
661
662 /* files2dir
663 */
664 static
copycat_files2dir(CCTree * tree,SLList * logs,VFSManager * mgr,Vector * v,VPath * dst)665 rc_t copycat_files2dir (CCTree * tree, SLList * logs, VFSManager * mgr, Vector * v, VPath * dst)
666 {
667 size_t dz;
668 uint32_t ix;
669 rc_t rc;
670 char dbuff [8192];
671
672 /* xml-base only works for a single file */
673 if ((VectorLength (v) > 1) && (xml_base != NULL))
674 {
675 rc = RC (rcExe, rcArgv, rcParsing, rcParam, rcIncorrect);
676 LOGERR (klogErr, rc, "Can only use " OPTION_XMLBASE " with a single source file");
677 return rc;
678 }
679
680 /* get the path out of the destination VPath */
681 rc = VPathReadPath (dst, dbuff, sizeof dbuff, &dz);
682 if (rc)
683 return rc;
684
685 for (ix = 0; ix < VectorLength (v); ++ix)
686 {
687 VPath * new_dst;
688 VPath * src;
689 char * sleaf;
690 size_t sz;
691 char sbuff [8192];
692
693 src = (VPath*) VectorGet (v, ix);
694 if (src == NULL) /* warn? error? abort? */
695 continue;
696
697
698 rc = VPathReadPath (src, sbuff, sizeof sbuff, &sz);
699 if (rc)
700 return rc;
701
702 sleaf = strrchr (sbuff, '/');
703 if (sleaf++ == NULL)
704 sleaf = sbuff;
705
706 /* the special case destination is the null device which we treat
707 * as if it was a directory at first and then as a file
708 */
709 if (strcmp (dbuff, "/dev/null") == 0)
710 {
711 rc = VPathAddRef (dst);
712 if (rc != 0)
713 break;
714 new_dst = dst;
715 }
716 else
717 {
718 DEBUG_STATUS(("%s: %s (%lu)\n", __func__, dbuff, dz));
719
720 /* fix up the destination path if it's missing a final '/'
721 * this is inside the loop because of the null device special case
722 */
723 if (dbuff [dz-1] != '/')
724 {
725 dbuff [dz++] = '/';
726 dbuff [dz] = '\0';
727 }
728
729 /* append source leaf to destination path */
730 string_copy (dbuff + dz, sizeof dbuff - dz, sleaf, strlen (sleaf));
731
732 DEBUG_STATUS(("%s: %s\n", __func__, dbuff));
733
734 /* make a new VPath - no URI stuff gets transferred here */
735 rc = VFSManagerMakePath (mgr, &new_dst, "%s", dbuff);
736 if (rc)
737 break;
738 }
739
740 /* do this one file copy and catalog now */
741 rc = copycat_file2file (tree, logs, mgr, src, new_dst, xml_base ? xml_base : sleaf);
742
743 VPathRelease (new_dst);
744 }
745 return rc;
746 }
747
748
749 /* run
750 *
751 * dest will be set if the -o option was used.
752 *
753 */
754 static
copycat_run(CCTree * tree,SLList * logs,VFSManager * mgr,const char * cache,VPath * _dest,const char * extract,Vector * v)755 rc_t copycat_run ( CCTree *tree, SLList * logs, VFSManager * mgr,
756 const char *cache, VPath * _dest, const char *extract,
757 Vector * v)
758 {
759 rc_t rc;
760 int dest_type;
761 KDirectory * cwd;
762 VPath * dest;
763 size_t sz;
764 const char * pleaf;
765 char pbuff [4096];
766
767 /* =====
768 * directories aren't yet using the VFSManager to open them
769 * because we have to get more under the covers for our cataloging
770 */
771
772 rc = VFSManagerGetCWD (mgr, &cwd);
773 if (rc)
774 return rc;
775
776 /* if there's a cache path, create directory */
777 if ( cache != NULL )
778 {
779 rc = KDirectoryCreateDir ( cwd, 0775, kcmParents | kcmOpen, "%s", cache );
780 if ( rc == 0 )
781 rc = KDirectoryOpenDirUpdate ( cwd, & cdir, true, "%s", cache );
782 if ( rc != 0 )
783 {
784 PLOGERR (klogErr,
785 (klogErr, rc, "failed to open cache directory '$(path)'",
786 "path=%s", cache ));
787 return rc;
788 }
789 }
790 else
791 cdir = NULL;
792
793 /* if there's a extract path, create directory */
794 if ( extract != NULL )
795 {
796 rc = KDirectoryCreateDir (cwd, 0775, kcmParents | kcmOpen, "%s", extract);
797 if ( rc == 0 )
798 rc = KDirectoryOpenDirUpdate (cwd, & edir, true, "%s", extract);
799 if ( rc != 0 )
800 {
801 PLOGERR (klogErr,
802 (klogErr, rc,
803 "failed to open extract directory '$(path)'",
804 "path=%s", extract ));
805 return rc;
806 }
807 }
808 else
809 edir = NULL;
810
811 dest = _dest;
812 if (dest == NULL)
813 {
814 rc = VectorRemove (v, VectorLength(v) - 1, (void**)&dest);
815 if (rc)
816 return rc;
817 }
818
819 rc = VPathReadPath (dest, pbuff, sizeof pbuff, &sz);
820 if (rc)
821 return rc;
822
823 if (xml_base)
824 pleaf = xml_base;
825 else
826 {
827 pleaf = strrchr (pbuff, '/');
828 if (pleaf++ == NULL)
829 pleaf = pbuff;
830 }
831
832
833 /* check destination type */
834 dest_type = KDirectoryPathType (cwd, "%s", pbuff);
835 DEBUG_STATUS(("%s: checked destination type for '%s' got '%u'\n", __func__, pbuff, dest_type));
836 switch (dest_type & ~ kptAlias)
837 {
838 case kptNotFound:
839 /* this is the potentially ambiguous situation
840 * if only two arguments and the last isn't definitively a directory
841 * we assume its supposed to be a file.
842 *
843 * If the target does not exist but it's path ends in '/' or if
844 * there is more than one source we know it is supposed to be a
845 * directory.
846 */
847 if ((pbuff[sz-1] != '/')
848 /* ((pbuff[sz-1] != '.') */
849
850
851
852 /* ( */
853 /* )) */
854 && (VectorLength (v) == 1))
855 {
856 return copycat_file2file (tree, logs, mgr, VectorGet (v, 0), dest, pleaf);
857 }
858
859 /* create a directory at the given path */
860 rc = KDirectoryCreateDir ( cwd, 0775, kcmParents | kcmOpen, "%s", pbuff );
861 if ( rc != 0 )
862 return rc;
863
864 /* fall through */
865 case kptDir:
866 return copycat_files2dir (tree, logs, mgr, v, dest);
867
868
869 case kptCharDev:
870 /*
871 * special case NULL device can act like a directory here
872 * all other 'devices' we treat as a file
873 */
874 if ( strcmp ( pbuff, "/dev/null" ) == 0 )
875 {
876 #if 0
877 if (VectorLength (v) > 1)
878 #endif
879 return copycat_files2dir (tree, logs, mgr, v, dest);
880 }
881 /* fall through */
882 case kptBlockDev:
883 case kptFIFO:
884 case kptFile:
885 if (VectorLength (v) == 1)
886 return copycat_file2file (tree, logs, mgr, VectorGet (v, 0), dest, pleaf);
887
888 rc = RC (rcExe, rcDirectory, rcAccessing, rcPath, rcNotFound);
889 PLOGERR (klogFatal,
890 (klogFatal, rc, "copying multiple files, but target argument "
891 "[$(D)] is not a directory", "D=%s", pbuff));
892 return rc;
893 }
894
895 fprintf ( stderr, "%s: '%s': specified destination path is not a directory\n", program_name, pbuff );
896 return RC ( rcExe, rcDirectory, rcAccessing, rcPath, rcIncorrect );
897 }
898 /* dump
899 */
900 static
copycat_fwrite(void * out,const void * buffer,size_t bytes)901 rc_t copycat_fwrite ( void *out, const void *buffer, size_t bytes )
902 {
903 size_t writ = fwrite ( buffer, 1, bytes, out );
904 if ( writ != bytes )
905 return RC ( rcExe, rcFile, rcWriting, rcTransfer, rcIncomplete );
906 return 0;
907 }
908
909 static
copycat_dump(const CCTree * tree,SLList * logs)910 rc_t copycat_dump ( const CCTree *tree, SLList * logs )
911 {
912 return CCTreeDump ( tree, copycat_fwrite, dump_out, logs );
913 }
914
915 static
param_whack(void * path,void * ignored)916 void param_whack (void * path, void * ignored)
917 {
918 (void)VPathRelease ((const VPath*)path);
919 }
920
921 /* KMain
922 */
923
KMain(int argc,char * argv[])924 rc_t KMain ( int argc, char *argv [] )
925 {
926 Args * args;
927 rc_t rc, orc;
928
929 KStsHandlerSetStdErr();
930 KStsLibHandlerSetStdErr();
931
932 rc = ArgsMakeAndHandle (&args, argc, argv, 1, Options, sizeof Options / sizeof (OptDef));
933 if (rc == 0)
934 {
935 /* CS-101: DO WHILE ( 0 ) LOOPS ARE CUTE FOR AVOIDING GOTO,
936 BUT IT MEANS YOU HAVE TO PUT ALL OF YOUR CLEANUP OUTSIDE
937 OF THE LOOP, IN ORDER TO BREAK OUT IN A NON-STRUCTURED WAY */
938 do
939 {
940 const char * dest;
941 const char * cache;
942 const char * extract = NULL;
943
944 uint32_t pcount;
945 CCTree * tree;
946 VFSManager * mgr = NULL;
947 VPath * dp = NULL;
948 Vector params = { NULL, 0, 0, 0};
949 uint32_t ix;
950
951 rc = ArgsProgram (args, &full_path, &program_name);
952 if (rc)
953 break;
954
955 extract_dir = false;
956 xml_dir = false;
957 memset (epath, 0, sizeof (epath));
958 ehere = epath;
959
960 rc = ArgsOptionCount (args, OPTION_CACHE, &pcount);
961 if (rc)
962 break;
963 if (pcount)
964 {
965 rc = ArgsOptionValue (args, OPTION_CACHE, 0, (const void **)&cache);
966 if (rc)
967 break;
968 }
969 else
970 cache = NULL;
971
972 rc = ArgsOptionCount (args, OPTION_DEST, &pcount);
973 if (rc)
974 break;
975 if (pcount)
976 {
977 rc = ArgsOptionValue (args, OPTION_DEST, 0, (const void **)&dest);
978 if (rc)
979 break;
980 }
981 else
982 {
983 dest = NULL;
984 }
985
986 rc = ArgsOptionCount (args, OPTION_EXTRACT, &pcount);
987 if (rc)
988 break;
989 if (pcount)
990 {
991 rc = ArgsOptionValue (args, OPTION_EXTRACT, 0, (const void **)&extract);
992 if (rc)
993 break;
994 }
995
996 rc = ArgsOptionCount (args, OPTION_EXTDIR, &pcount);
997 if (rc)
998 break;
999 extract_dir = pcount > 0;
1000
1001 rc = ArgsOptionCount (args, OPTION_XMLDIR, &pcount);
1002 if (rc)
1003 break;
1004 xml_dir = pcount > 0;
1005
1006 rc = ArgsOptionCount (args, OPTION_FORCE, &pcount);
1007 if (rc)
1008 break;
1009 if (pcount)
1010 cm = kcmParents | kcmInit;
1011
1012 rc = ArgsOptionCount (args, OPTION_XMLBASE, &pcount);
1013 if (pcount == 1)
1014 {
1015 rc = ArgsOptionValue (args, OPTION_XMLBASE, 0, (const void **)&xml_base);
1016 if (rc)
1017 break;
1018
1019 /* we might want a few more checks here... */
1020 if (strchr (xml_base, '/') != NULL)
1021 {
1022 rc = RC (rcExe, rcArgv, rcAccessing, rcParam, rcInvalid);
1023 break;
1024 }
1025 }
1026
1027 rc = ArgsOptionCount (args, OPTION_INBLOCK, &pcount);
1028 if (pcount == 1)
1029 {
1030 const char * start;
1031 char * end;
1032 uint32_t val;
1033
1034 rc = ArgsOptionValue (args, OPTION_INBLOCK, 0, (const void **)&start);
1035 if (rc)
1036 break;
1037
1038 val = strtou32 (start, &end, 10);
1039
1040 if (*end != '\0')
1041 {
1042 rc = RC (rcExe, rcArgv, rcAccessing, rcParam, rcInvalid);
1043 break;
1044 }
1045 in_block = val * 1024;
1046 }
1047
1048 rc = ArgsOptionCount (args, OPTION_OUTBLOCK, &pcount);
1049 if (pcount == 1)
1050 {
1051 const char * start;
1052 char * end;
1053 uint32_t val;
1054
1055 rc = ArgsOptionValue (args, OPTION_OUTBLOCK, 0, (const void **)&start);
1056 if (rc)
1057 break;
1058
1059 val = strtou32 (start, &end, 10);
1060
1061 if (*end != '\0')
1062 {
1063 rc = RC (rcExe, rcArgv, rcAccessing, rcParam, rcInvalid);
1064 break;
1065 }
1066 out_block = val * 1024;
1067 }
1068
1069 rc = ArgsOptionCount ( args, OPTION_NOBZIP2, & pcount );
1070 if ( pcount > 0 )
1071 {
1072 no_bzip2 = true;
1073 }
1074
1075 rc = ArgsOptionCount ( args, OPTION_NOMD5, & pcount );
1076 if ( pcount > 0 )
1077 {
1078 no_md5 = true;
1079 }
1080
1081 /* all parameters plus the possible dest option parameter */
1082 rc = ArgsParamCount (args, &pcount);
1083 if (rc)
1084 break;
1085
1086 if (pcount == 0)
1087 {
1088 rc = RC ( rcExe, rcArgv, rcReading, rcParam, rcInsufficient );
1089 MiniUsage (args);
1090 break;
1091 }
1092
1093 if ((dest == NULL) && (extract == NULL) && (pcount < 2))
1094 {
1095 rc = RC ( rcExe, rcArgv, rcReading, rcParam, rcInvalid );
1096 if (pcount)
1097 LOGERR (klogFatal, rc, "missing source and destination arguments\n");
1098 else
1099 LOGERR (klogFatal, rc, "missing destination argument[s]\n");
1100 break;
1101 }
1102
1103 VectorInit (¶ms, 0, 8); /* 8 is arbirary - seems long enough for no realloc */
1104
1105 rc = VFSManagerMake (&mgr);
1106 if (rc)
1107 {
1108 LOGERR (klogFatal, rc,
1109 "unable to build file system manager");
1110 break;
1111 }
1112
1113 for (ix = 0; ix < pcount; ++ix)
1114 {
1115 VPath * kp;
1116 const char * pc;
1117
1118 rc = ArgsParamValue (args, ix, (const void **)&pc);
1119 if (rc)
1120 {
1121 LOGERR (klogFatal, rc, "unable to extract path parameter");
1122 break;
1123 }
1124
1125 rc = VFSManagerMakePath (mgr, &kp, "%s", pc);
1126 if (rc)
1127 {
1128 LOGERR (klogFatal, rc, "unable to build path parameter");
1129 break;
1130 }
1131
1132 rc = VectorSet (¶ms, ix, kp);
1133 if (rc)
1134 {
1135 LOGERR (klogFatal, rc, "unable to stow path parameter");
1136 break;
1137 }
1138 }
1139 if (rc == 0)
1140 {
1141 if (dest)
1142 {
1143 rc = VFSManagerMakePath (mgr, &dp, "%s", dest);
1144 if (rc)
1145 {
1146 LOGERR (klogFatal, rc, "unable to build dest parameter");
1147 break;
1148 }
1149 }
1150 DEBUG_STATUS(("%s: Create file tree\n", __func__));
1151 rc = CCTreeMake (&tree);
1152 if (rc)
1153 {
1154 LOGERR ( klogInt, rc, "failed to create parse tree" );
1155 }
1156 else
1157 {
1158 DEBUG_STATUS(("%s: Create cache file tree\n", __func__));
1159
1160 rc = CCTreeMake (&ctree);
1161 if (rc)
1162 {
1163 LOGERR ( klogInt, rc, "failed to create cache tree" );
1164 }
1165 else
1166 {
1167 DEBUG_STATUS(("%s: Create extracted file tree\n",
1168 __func__));
1169
1170 rc = CCTreeMake (&etree);
1171 if (rc)
1172 {
1173 LOGERR ( klogInt, rc,
1174 "failed to create extract tree" );
1175 }
1176 else
1177 {
1178 DEBUG_STATUS(("%s: Create NULL output file\n",
1179 __func__));
1180 rc = KFileMakeNullUpdate (&fnull);
1181 if (rc)
1182 LOGERR (klogInt, rc,
1183 "failed to create null output");
1184 else
1185 {
1186 DEBUG_STATUS(("%s: Open File Format Tester\n",
1187 __func__));
1188
1189 rc = CCFileFormatMake ( & filefmt );
1190 if ( rc != 0 )
1191 LOGERR (klogInt, rc,
1192 "failed to create file format" );
1193 else
1194 {
1195 SLList logs;
1196
1197 DEBUG_STATUS(("%s: Initialize CRC32\n",
1198 __func__));
1199
1200 SLListInit (&logs);
1201 CRC32Init ();
1202
1203 DEBUG_STATUS(("%s: Copy and catalog\n",
1204 __func__));
1205
1206 dump_out = stdout; /* kludge */
1207
1208 rc = copycat_run (tree, &logs, mgr, cache,
1209 dp, extract, ¶ms);
1210 if ( rc == 0 )
1211 rc = copycat_dump ( xml_dir ? etree : tree, &logs );
1212 DEBUG_STATUS(("%s: Output XML\n", __func__));
1213
1214
1215 CCFileFormatRelease ( filefmt );
1216 }
1217
1218 DEBUG_STATUS(("%s: Release NULL output file\n", __func__));
1219
1220 orc = KFileRelease ( fnull ), fnull = NULL;
1221 if (rc == 0)
1222 rc = orc;
1223 }
1224 DEBUG_STATUS(("%s: Whack extracted file tree;\n", __func__));
1225 CCTreeWhack (etree);
1226 }
1227 DEBUG_STATUS(("%s: Whack cache file tree;\n", __func__));
1228 CCTreeWhack (ctree);
1229 }
1230 DEBUG_STATUS(("%s: Whack file tree;\n", __func__));
1231 CCTreeWhack (tree);
1232 }
1233
1234 VPathRelease (dp);
1235 }
1236 VFSManagerRelease (mgr);
1237 VectorWhack (¶ms, param_whack, NULL);
1238 } while (0);
1239 }
1240 ArgsWhack (args);
1241 orc = KDirectoryRelease (cdir); /* class extren should be NULL if never used */
1242 if (orc)
1243 {
1244 LOGERR (klogInt, rc, "Error shutting file system access");
1245 if (rc == 0)
1246 rc = orc;
1247 }
1248 DEBUG_STATUS(("%s: exit rc %R(%x);\n", __func__, rc, rc));
1249 return rc;
1250 }
1251
1252