1 /*===========================================================================
2  *
3  *                            Public DOMAIN NOTICE
4  *               National Center for Biotechnology Information
5  *
6  *  This software/database is a "United States Government Work" under the
7  *  terms of the United States Copyright Act.  It was written as part of
8  *  the author's official duties as a United States Government employee and
9  *  thus cannot be copyrighted.  This software/database is freely available
10  *  to the public for use. The National Library of Medicine and the U.S.
11  *  Government have not placed any restriction on its use or reproduction.
12  *
13  *  Although all reasonable efforts have been taken to ensure the accuracy
14  *  and reliability of the software and data, the NLM and the U.S.
15  *  Government do not and cannot warrant the performance or results that
16  *  may be obtained by using this software or data. The NLM and the U.S.
17  *  Government disclaim all warranties, express or implied, including
18  *  warranties of performance, merchantability or fitness for any particular
19  *  purpose.
20  *
21  *  Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  */
26 
27 #include "copycat-priv.h"
28 #include "cctree-priv.h"
29 
30 #include <vfs/path.h>
31 #include <vfs/path-priv.h>
32 #include <krypto/key.h>
33 #include <krypto/encfile.h>
34 #include <krypto/encfile-priv.h>
35 #include <krypto/wgaencrypt.h>
36 #include <kfs/kfs-priv.h>
37 #include <kfs/file.h>
38 #include <kfs/teefile.h>
39 #include <kfs/gzip.h>
40 #include <kfs/bzip.h>
41 #include <kfs/md5.h>
42 #include <kfs/countfile.h>
43 #include <kfs/readheadfile.h>
44 #include <kfs/buffile.h>
45 #include <kfs/crc.h>
46 #include <klib/checksum.h>
47 #include <klib/log.h>
48 #include <klib/rc.h>
49 #include <klib/text.h>
50 
51 #include <os-native.h>
52 
53 #include <stdlib.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <assert.h>
57 
58 /* make it last include */
59 #include "debug.h"
60 
61 #define EXAMINE_KAR_FILES 0
62 #define DECRYPT_FAIL_AS_PLAIN_FILE 0
63 /* the readhead file isn't working yet */
64 #define USE_KBUFFILE 1
65 
66 static
67 const VPath * src_path = NULL;
68 
69 static
70 const VPath * dst_path = NULL;
71 
72 static
73 bool do_encrypt = false;
74 static
75 bool do_decrypt = false;
76 static
77 bool wga_pw_read = false;
78 static
79 bool src_pw_read = false;
80 static
81 bool dst_pw_read = false;
82 
83 static
84 char wga_pwd [256];
85 static
86 char src_pwd [256];
87 static
88 char dst_pwd [256];
89 
90 static
91 size_t wga_pwd_sz;
92 static
93 size_t src_pwd_sz;
94 static
95 size_t dst_pwd_sz;
96 
97 static
98 KKey src_key;
99 
100 static
101 KKey dst_key;
102 
103 static
104 KDirectory * cwd = NULL;
105 
106 
107 static
get_password(const VPath * path,char * pw,size_t pwz,size_t * num_read,KKey * key,bool * read)108 rc_t get_password (const VPath * path, char * pw, size_t pwz, size_t * num_read, KKey * key, bool * read)
109 {
110     const KFile * pwfile;
111     size_t z;
112     rc_t rc;
113     char obuff [8096];
114 
115     if (VPathOption (path, vpopt_encrypted, obuff, sizeof obuff, &z) == 0)
116     {
117         if (VPathOption (path, vpopt_pwpath, obuff, sizeof obuff, &z) == 0)
118             rc = KDirectoryOpenFileRead (cwd, &pwfile, "%s", obuff);
119 
120         else if (VPathOption (path, vpopt_pwfd, obuff, sizeof obuff, &z) == 0)
121             rc = KFileMakeFDFileRead (&pwfile, atoi (obuff));
122 
123         else
124             rc = RC (rcExe, rcPath, rcAccessing, rcParam, rcUnsupported);
125         if (rc == 0)
126         {
127             rc = KFileRead (pwfile, 0, pw, pwz, num_read);
128 
129             if (rc == 0)
130             {
131                 char * pc;
132 
133                 if (*num_read < pwz)
134                     pw[*num_read] = '\0';
135 
136                 pc = string_chr (pw, *num_read, '\r');
137                 if (pc)
138                 {
139                     *pc = '\0';
140                     *num_read = pc - pw;
141                 }
142 
143                 pc = string_chr (pw, *num_read, '\n');
144                 if (pc)
145                 {
146                     *pc = '\0';
147                     *num_read = pc - pw;
148                 }
149 
150                 *read = true;
151                 rc = KKeyInitRead (key, kkeyAES128, pw, *num_read);
152             }
153 
154             KFileRelease (pwfile);
155         }
156     }
157     else
158         rc = RC (rcExe, rcPath, rcAccessing, rcFunction, rcNotFound);
159     return rc;
160 }
161 
162 
163 static
wga_password(const VPath * path,char * pw,size_t pwz,size_t * num_read,bool * read)164 rc_t wga_password (const VPath * path, char * pw, size_t pwz, size_t * num_read, bool * read)
165 {
166     const KFile * pwfile;
167     size_t z;
168     rc_t rc;
169     char obuff [8096];
170 
171     if (VPathOption (path, vpopt_encrypted, obuff, sizeof obuff, &z) == 0)
172     {
173         if (VPathOption (path, vpopt_pwpath, obuff, sizeof obuff, &z) == 0)
174             rc = KDirectoryOpenFileRead (cwd, &pwfile, "%s", obuff);
175 
176         else if (VPathOption (path, vpopt_pwfd, obuff, sizeof obuff, &z) == 0)
177             rc = KFileMakeFDFileRead (&pwfile, atoi (obuff));
178 
179         else
180             rc = RC (rcExe, rcPath, rcAccessing, rcParam, rcUnsupported);
181         if (rc == 0)
182         {
183             rc = KFileRead (pwfile, 0, pw, pwz, num_read);
184 
185             if (rc == 0)
186             {
187                 char * pc;
188 
189                 if (*num_read < pwz)
190                     pw[*num_read] = '\0';
191 
192                 pc = string_chr (pw, *num_read, '\r');
193                 if (pc)
194                 {
195                     *pc = '\0';
196                     *num_read = pc - pw;
197                 }
198 
199                 pc = string_chr (pw, *num_read, '\n');
200                 if (pc)
201                 {
202                     *pc = '\0';
203                     *num_read = pc - pw;
204                 }
205 
206                 *read = true;
207             }
208 
209             KFileRelease (pwfile);
210         }
211     }
212     else
213         rc = RC (rcExe, rcPath, rcAccessing, rcFunction, rcNotFound);
214     return rc;
215 }
216 
217 
218 /*--------------------------------------------------------------------------
219  * copycat
220  */
221 static
ccat_cache(CCCachedFileNode ** np,const KFile * sf,enum CCType ntype,CCFileNode * node,const char * name)222 rc_t ccat_cache ( CCCachedFileNode **np, const KFile *sf,
223                   enum CCType ntype, CCFileNode *node, const char *name )
224 {
225     rc_t rc;
226     KFile *out;
227 
228     /* create path */
229     char path [ 256 ];
230     int len = snprintf ( path, sizeof path, "%s", name );
231 
232     DEBUG_STATUS (("%s: name '%s'\n", __func__, name));
233 
234     if ( len < 0 || len >= sizeof path )
235         return RC ( rcExe, rcFile, rcWriting, rcPath, rcExcessive );
236 
237     /* look for a name that has not yet been written */
238     if ( CCTreeFind ( ctree, path ) != NULL )
239     {
240         uint32_t i;
241         const char *ext = strrchr ( name, '.' );
242         if ( ext != NULL )
243         {
244             for ( i = 2; ; ++ i )
245             {
246                 len = snprintf ( path, sizeof path, "%.*s-%u%s", ( int ) ( ext - name ), name, i, ext );
247                 if ( len < 0 || len >= sizeof path )
248                     return RC ( rcExe, rcFile, rcWriting, rcPath, rcExcessive );
249                 if ( CCTreeFind ( ctree, path ) == NULL )
250                     break;
251             }
252         }
253         else
254         {
255             for ( i = 2; ; ++ i )
256             {
257                 len = snprintf ( path, sizeof path, "%s-%u", name, i );
258                 if ( len < 0 || len >= sizeof path )
259                     return RC ( rcExe, rcFile, rcWriting, rcPath, rcExcessive );
260                 if ( CCTreeFind ( ctree, path ) == NULL )
261                     break;
262             }
263         }
264     }
265 
266     /* create an output file */
267     rc = KDirectoryCreateFile ( cdir, & out, false, 0640, cm, "%s", path );
268     if ( rc != 0 && GetRCState ( rc ) == rcUnauthorized )
269     {
270         /* respond to a file that has no write privs */
271         uint32_t access;
272         rc_t rc2 = KDirectoryAccess ( cdir, & access, "%s", path );
273         if ( rc2 == 0 )
274         {
275             rc2 = KDirectorySetAccess ( cdir, false, 0640, 0777, "%s", path );
276             if ( rc2 == 0 )
277             {
278                 rc = KDirectoryCreateFile ( cdir, & out, false, 0640, cm, "%s", path );
279                 if ( rc != 0 )
280                     KDirectorySetAccess ( cdir, false, access, 0777, "%s", path );
281             }
282         }
283     }
284 
285     if ( rc != 0 )
286         PLOGERR ( klogErr,  (klogErr, rc, "failed to create cached file '$(path)'", "path=%s", path ));
287     else
288     {
289         const KFile *tee;
290         rc = KFileMakeTeeRead ( & tee, sf, out );
291         if ( rc != 0 )
292             PLOGERR ( klogInt,  (klogInt, rc, "failed to create cache tee file on '$(path)'", "path=%s", path ));
293         else
294         {
295             KFileAddRef ( sf );
296             KFileAddRef ( out );
297             rc = KFileRelease ( tee );
298             if ( rc != 0 )
299                 PLOGERR ( klogInt,  (klogInt, rc, "failed to close cache tee file on '$(path)'", "path=%s", path ));
300         }
301 
302         KFileRelease ( out );
303 
304         if ( rc == 0 )
305         {
306             rc = CCCachedFileNodeMake ( np, path, ntype, node );
307             if ( rc != 0 )
308                 LOGERR ( klogInt, rc, "failed to create cached file node" );
309             else
310             {
311                 rc = KDirectorySetAccess ( cdir, false, 0440, 0777, "%s", path );
312 
313                 /* create named entry in ctree */
314                 rc = CCTreeInsert ( ctree, 0, ccFile, NULL, path );
315                 if ( rc != 0 )
316                     LOGERR ( klogInt, rc, "failed to record cached file" );
317             }
318         }
319     }
320 
321     return rc;
322 }
323 
324 
325 static
ccat_extract_path(char * path,size_t pathz,const char * name)326 rc_t ccat_extract_path (char * path, size_t pathz, const char * name)
327 {
328     rc_t rc = 0;
329     int len;
330 
331     DEBUG_STATUS (("%s: name '%s'\n", __func__, name));
332 
333     if (extract_dir)
334         len = snprintf ( path, pathz, "%s%s", epath, name );
335     else
336         len = snprintf ( path, pathz, "%s", name );
337     DEBUG_STATUS (("%s: path '%s'\n",__func__, path));
338 
339     if ( len < 0 || len >= pathz )
340         return RC ( rcExe, rcFile, rcWriting, rcPath, rcExcessive );
341 
342     /* look for a name that has not yet been written */
343     if ( CCTreeFind ( etree, path ) != NULL )
344     {
345         uint32_t i;
346         const char *ext = strrchr ( name, '.' );
347         if ( ext != NULL )
348         {
349             for ( i = 2; ; ++ i )
350             {
351                 len = snprintf ( path, pathz, "%.*s-%u%s", ( int ) ( ext - name ), name, i, ext );
352                 if ( len < 0 || len >= pathz )
353                     return RC ( rcExe, rcFile, rcWriting, rcPath, rcExcessive );
354                 if ( CCTreeFind ( etree, path ) == NULL )
355                     break;
356             }
357         }
358         else
359         {
360             for ( i = 2; ; ++ i )
361             {
362                 len = snprintf ( path, pathz, "%s-%u", name, i );
363                 if ( len < 0 || len >= pathz )
364                     return RC ( rcExe, rcFile, rcWriting, rcPath, rcExcessive );
365                 if ( CCTreeFind ( etree, path ) == NULL )
366                     break;
367             }
368         }
369     }
370     DEBUG_STATUS (("%s: rc '%u(%R)' path '%s'\n",__func__, rc, rc, path));
371     return rc;
372 }
373 
374 
375 static
ccat_extract(const KFile * sf,const char * path)376 rc_t ccat_extract (const KFile *sf, const char * path)
377 {
378     rc_t rc;
379     KFile *out;
380 
381     rc = KDirectoryCreateFile ( edir, & out, false, 0640, cm, "%s", path );
382     if ( rc != 0 && GetRCState ( rc ) == rcUnauthorized )
383     {
384         /* respond to a file that has no write privs */
385         uint32_t access;
386         rc_t rc2 = KDirectoryAccess ( edir, & access, "%s", path );
387         if ( rc2 == 0 )
388         {
389             rc2 = KDirectorySetAccess ( edir, false, 0640, 0777, "%s", path );
390             if ( rc2 == 0 )
391             {
392                 rc = KDirectoryCreateFile ( edir, & out, false, 0640, cm, "%s", path );
393                 DBG_KFILE(("%s: called KDirectoryCreateFile rc %R path %s\n",__func__,rc,path));
394                 DBG_KFile(out);
395                 if ( rc != 0 )
396                     KDirectorySetAccess ( edir, false, access, 0777, "%s", path );
397             }
398         }
399     }
400     if ( rc != 0 )
401         PLOGERR ( klogErr,
402                   ( klogErr, rc,
403                     "failed to create extracted file '$(path)'",
404                     "path=%s", path ));
405     else
406     {
407         const KFile *tee;
408         rc = KFileMakeTeeRead ( & tee, sf, out );
409         DBG_KFILE(("%s: called KFileMakeTeeRead rc %R \n",__func__,rc));
410         DBG_KFile(tee);
411         if ( rc != 0 )
412             PLOGERR ( klogInt,  ( klogInt, rc, "failed to create extract tee file on '$(path)'", "path=%s", path ));
413         else
414         {
415             KFileAddRef ( sf );
416             KFileAddRef ( out );
417             rc = KFileRelease ( tee );
418             if ( rc != 0 )
419                 PLOGERR (klogInt,
420                          (klogInt, rc,
421                           "failed to close extract tee file on '$(path)'",
422                           "path=%s", path));
423             else if (!xml_dir)
424             {
425                 CCCachedFileNode * np;
426 
427                 rc = CCCachedFileNodeMake (&np, path, ccCached, NULL );
428                 if ( rc != 0 )
429                     LOGERR ( klogInt, rc, "failed to create cached extract file node" );
430                 else
431                 {
432                     rc = CCTreeInsert (etree, 0, ccFile, NULL, path);
433                     if ( rc != 0 )
434                         LOGERR ( klogInt, rc, "failed to record cached extract file" );
435                 }
436             }
437         }
438         KFileRelease ( out );
439     }
440 
441     return rc;
442 }
443 
444 static
ccat_arc(CCContainerNode ** np,const KFile * sf,KTime_t mtime,enum CCType ntype,CCFileNode * node,const char * name,uint32_t type_id)445 rc_t ccat_arc ( CCContainerNode **np, const KFile *sf, KTime_t mtime,
446                 enum CCType ntype, CCFileNode *node, const char *name, uint32_t type_id )
447 {
448     rc_t rc /*, orc */;
449 
450     /* ensure we handle this type of archive */
451     switch ( type_id )
452     {
453     case ccfftaSra:
454 #if ! EXAMINE_KAR_FILES
455         * np = NULL;
456         return 0;
457 #endif
458     case ccfftaHD5:
459         * np = NULL;
460         return 0;
461     case ccfftaTar:
462         break;
463     default:
464         /* don't recognize archive format - treat as a normal file */
465         PLOGMSG ( klogWarn, ( klogWarn, "archive '$(name)' type '$(ftype)' will not be analyzed: "
466                               "unknown format", "name=%s,ftype=%s", name, node -> ftype ));
467         * np = NULL;
468         return 0;
469     }
470 
471     /* create container node */
472     rc = CCContainerNodeMake ( np, ntype, node );
473     if ( rc != 0 )
474         LOGERR ( klogInt, rc, "failed to create container node" );
475     else
476     {
477         CCContainerNode *cont = * np;
478 
479         /* orc = 0; */
480         switch ( type_id )
481         {
482         case ccfftaTar:
483             /* orc = */ ccat_tar ( cont, sf, name );
484             break;
485         case ccfftaSra:
486             /* orc = */ ccat_sra (cont, sf, name);
487             break;
488         }
489     }
490 
491     return rc;
492 }
493 
494 static
ccat_enc(CCContainerNode ** np,const KFile * sf,KTime_t mtime,enum CCType ntype,CCFileNode * node,const char * name,uint32_t type_id)495 rc_t ccat_enc ( CCContainerNode **np, const KFile *sf, KTime_t mtime,
496                 enum CCType ntype, CCFileNode *node, const char *name,
497                 uint32_t type_id )
498 {
499     rc_t rc = 0;
500     const KFile *df;
501     uint64_t expected = SIZE_UNKNOWN;      /* assume we won't know */
502 
503     switch ( type_id )
504     {
505     case ccffteNCBI:
506         if (!src_pw_read)
507         {
508             rc = get_password (src_path, src_pwd, sizeof src_pwd, &src_pwd_sz, &src_key, &src_pw_read);
509             if (rc)
510             {
511             validate_instead:
512                 rc = KFileAddRef (sf);
513                 if (rc == 0)
514                 {
515                     PLOGMSG ( klogWarn, ( klogWarn, "file '$(name)' type '$(ftype)' will be validated but not be decoded: "
516                                           "no password given", "name=%s,ftype=%s", name, node -> ftype ));
517                     /* can't decompress it - treat it as a normal file */
518                     rc = KEncFileValidate (sf);
519                     if (rc)
520                     {
521                         memmove (node->ftype + sizeof "Errored" - 1, node->ftype, strlen (node->ftype));
522                         memmove (node->ftype, "Errored", sizeof "Errored" - 1);
523                     }
524                     * np = NULL;
525                     return 0;
526                 }
527                 return rc;
528             }
529         }
530         rc = KEncFileMakeRead (&df, sf, &src_key);
531         if (rc)
532             goto validate_instead;
533         break;
534 
535     case ccffteWGA:
536         if (!wga_pw_read)
537             rc = wga_password (src_path, wga_pwd, sizeof wga_pwd, &wga_pwd_sz, &wga_pw_read);
538         if (rc == 0)
539         {
540             rc = KFileMakeWGAEncRead (&df, sf, wga_pwd, wga_pwd_sz);
541             break;
542         }
543         /* can't decompress it - treat it as a normal file */
544         PLOGMSG ( klogWarn, ( klogWarn, "file '$(name)' type '$(ftype)' will not be decoded: "
545                               "no password given", "name=%s,ftype=%s", name, node -> ftype ));
546         * np = NULL;
547         return 0;
548 
549     default:
550         /* can't decrypt it - treat it as a normal file */
551         PLOGMSG ( klogWarn, ( klogWarn, "file '$(name)' type '$(ftype)' will not be decoded: "
552                               "unknown encoding format", "name=%s,ftype=%s", name, node -> ftype ));
553         * np = NULL;
554         return 0;
555     }
556 
557     if ( rc != 0 )
558         PLOGERR ( klogInt,  (klogInt, rc, "failed to decode file '$(path)'", "path=%s", name ));
559     else
560     {
561         rc = CCContainerNodeMake ( np, ntype, node );
562         if ( rc != 0 )
563             LOGERR ( klogInt, rc, "failed to create container node" );
564         else
565         {
566             CCContainerNode *cont = * np;
567             CCFileNode *nnode;
568 
569             /* now create a new contained file node */
570             rc = CCFileNodeMake ( & nnode, expected );
571             if ( rc != 0 )
572                 LOGERR ( klogInt, rc, "failed to create contained file node" );
573             else
574             {
575                 int len;
576                 char newname [ 256 ];
577 
578                 /* invent a new name for file */
579                 const char *ext = strrchr ( name, '.' );
580                 if ( ext == NULL )
581                     len = snprintf ( newname, sizeof newname, "%s", name );
582                 else
583                     len = snprintf ( newname, sizeof newname, "%.*s", ( int ) ( ext - name ), name );
584 
585                 if ( len < 0 || len >= sizeof newname )
586                 {
587                     rc = RC ( rcExe, rcNode, rcConstructing, rcName, rcExcessive );
588                     LOGERR ( klogErr, rc, "failed to create contained file node" );
589                 }
590                 else
591                 {
592                     void * save;
593                     const KFile * cf;
594                     /* rc_t krc; */
595 
596                     copycat_log_set (&node->logs, &save);
597 
598                     rc = CCFileMakeRead (&cf, df, &node->rc);
599                     if (rc == 0)
600                     {
601                         /* recurse with buffer on decoding */
602                         /* krc = */ ccat_buf ( & cont -> sub, cf, mtime,
603                                          ccContFile, nnode, newname);
604 
605                         KFileRelease (cf);
606                         /* if successful, "node" ( allocated locally above )
607                            will have been entered into "cont->sub" */
608                     }
609 
610                     copycat_log_set (&save, NULL);
611                 }
612             }
613         }
614 
615         KFileRelease ( df );
616     }
617 
618     return rc;
619 }
620 
621 static
ccat_cmp(CCContainerNode ** np,const KFile * sf,KTime_t mtime,enum CCType ntype,CCFileNode * node,const char * name,uint32_t type_id)622 rc_t ccat_cmp ( CCContainerNode **np, const KFile *sf, KTime_t mtime,
623                 enum CCType ntype, CCFileNode *node, const char *name, uint32_t type_id )
624 {
625     const KFile *zf;
626     /* use a variable incase we ever get a compression that can be queried
627      * about file size */
628     uint64_t expected = SIZE_UNKNOWN;
629     rc_t rc;
630 
631     switch ( type_id )
632     {
633 
634     case ccfftcGzip:
635         /* this code attaches a new reference to "sf" */
636         rc = KFileMakeGzipForRead ( & zf, sf );
637         break;
638     case ccfftcBzip2:
639         /* this code attaches a new reference to "sf" */
640         rc = KFileMakeBzip2ForRead ( & zf, sf );
641         break;
642     default:
643         /* can't decompress it - treat it as a normal file */
644         PLOGMSG ( klogWarn, ( klogWarn, "file '$(name)' type '$(ftype)' will not be decompressed: "
645                               "unknown compression format", "name=%s,ftype=%s", name, node -> ftype ));
646         * np = NULL;
647         return 0;
648     }
649 
650     if ( rc != 0 )
651         PLOGERR ( klogInt,  (klogInt, rc, "failed to decompress file '$(path)'", "path=%s", name ));
652     else
653     {
654         rc = CCContainerNodeMake ( np, ntype, node );
655         if ( rc != 0 )
656             LOGERR ( klogInt, rc, "failed to create container node" );
657         else
658         {
659             CCContainerNode *cont = * np;
660             CCFileNode *nnode;
661 
662             /* now create a new contained file node */
663             rc = CCFileNodeMake ( & nnode, expected );
664             if ( rc != 0 )
665                 LOGERR ( klogInt, rc, "failed to create contained file node" );
666             else
667             {
668                 int len;
669                 char newname [ 256 ];
670 
671                 /* invent a new name for file */
672                 const char *ext = strrchr ( name, '.' );
673                 if ( ext == NULL )
674                     len = snprintf ( newname, sizeof newname, "%s", name );
675                 else if ( strcasecmp ( ext, ".tgz" ) == 0 )
676                     len = snprintf ( newname, sizeof newname, "%.*s.tar", ( int ) ( ext - name ), name );
677                 else
678                     len = snprintf ( newname, sizeof newname, "%.*s", ( int ) ( ext - name ), name );
679 
680                 if ( len < 0 || len >= sizeof newname )
681                 {
682                     rc = RC ( rcExe, rcNode, rcConstructing, rcName, rcExcessive );
683                     LOGERR ( klogErr, rc, "failed to create contained file node" );
684                 }
685                 else
686                 {
687                     void * save;
688                     const KFile * cf;
689                     /* rc_t krc; */
690 
691                     copycat_log_set (&nnode->logs, &save);
692 
693                     rc = CCFileMakeRead (&cf, zf, &node->rc);
694                     if (rc == 0)
695                     {
696                         /* recurse with buffer on decompression */
697                         /* krc = */ ccat_buf (& cont -> sub, cf, mtime,
698                                         ccContFile, nnode, newname);
699 
700                         KFileRelease (cf);
701                         /* if successful, "node" ( allocated locally above )
702                            will have been entered into "cont->sub" */
703                     }
704 
705                     copycat_log_set (save, NULL);
706                 }
707             }
708         }
709 
710         KFileRelease ( zf );
711     }
712 
713     return rc;
714 }
715 
716 static
ccat_path_append(const char * name)717 rc_t ccat_path_append (const char * name)
718 {
719     size_t z;
720     z = string_size (name);
721     DEBUG_STATUS (("%s:in epath %s name %s z %zu \n",__func__, epath, name, z));
722     if (ehere + z + 1 >= epath + sizeof (epath))
723         return RC (rcExe, rcNoTarg, rcConcatenating, rcBuffer, rcTooShort);
724     memmove(ehere, name, z);
725     ehere += z;
726     *ehere++ = '/';
727     *ehere = '\0';
728     DEBUG_STATUS (("%s: out name %s epath %s\n",__func__,name, epath));
729     return 0;
730 }
731 
732 static
ccat_main(CCTree * tree,const KFile * sf,KTime_t mtime,enum CCType ntype,CCFileNode * node,const char * name)733 rc_t ccat_main ( CCTree *tree, const KFile *sf, KTime_t mtime,
734                  enum CCType ntype, CCFileNode *node, const char *name )
735 {
736     /* the pointer e_go_back allows us to remove additions to the stored
737      * path built up as we descend into deeper container/archive/directories */
738     char * e_go_back = ehere;
739 
740     /* determine file type based upon contents and name */
741     uint32_t type_id, class_id;
742 /*     rc_t orc; */
743     rc_t rc = CCFileFormatGetType ( filefmt, sf, name,
744                                     node -> ftype, sizeof node -> ftype, & type_id, & class_id );
745 
746     DEBUG_STATUS (("%s: name '%s' type '%s'\n",__func__,name,node->ftype));
747 
748     if ( rc != 0 )
749         PLOGERR ( klogErr,  (klogErr, rc, "failed to determine type of file '$(path)'", "path=%s", name ));
750     else
751     {
752         /* file could be a container */
753         CCContainerNode *cont;
754         CCCachedFileNode *cfile;
755 
756         /* assume this node will get name */
757         void *entry = node;
758 
759         bool xml_insert = false;
760         bool tee_done = false;
761         /* create path */
762         const char * basename;
763         char path [ 8192 + 256 ];
764 
765         if ((basename = strrchr (name, '/')) != NULL)
766             ++basename;
767         else
768             basename = name;
769 
770 
771         /* look for special files */
772         switch ( class_id )
773         {
774         case ccffcEncoded:
775             rc = ccat_path_append (name);
776             if (rc == 0)
777             {
778                 rc = ccat_enc ( & cont, sf, mtime, ntype, node, basename, type_id );
779                 if ( rc == 0 && cont != NULL )
780                 {
781                     ntype = ccContainer;
782                     entry = cont;
783                 }
784             }
785             ehere = e_go_back;
786             *ehere = '\0';
787             break;
788         case ccffcCompressed:
789             rc = ccat_path_append (name);
790             if (rc == 0)
791             {
792                 rc = ccat_cmp ( & cont, sf, mtime, ntype, node, basename, type_id );
793                 if ( rc == 0 && cont != NULL )
794                 {
795                     ntype = ccContainer;
796                     entry = cont;
797                 }
798             }
799             ehere = e_go_back;
800             *ehere = '\0';
801             break;
802         case ccffcArchive:
803             rc = ccat_path_append (name);
804             if (rc == 0)
805             {
806                 rc = ccat_arc ( & cont, sf, mtime, ntype, node, basename, type_id );
807                 if ( rc == 0 && cont != NULL )
808                 {
809                     ntype = ccArchive;
810                     entry = cont;
811                 }
812             }
813             ehere = e_go_back;
814             *ehere = '\0';
815             break;
816         case ccffcCached:
817             if ( cdir != NULL )
818             {
819                 rc = ccat_cache ( & cfile, sf, ntype, node, basename );
820                 if ( rc == 0 && cont != NULL )
821                 {
822                     tee_done = true;
823                     ntype = ccCached;
824                     entry = cfile;
825                 }
826             }
827             /* fall through */
828         default:
829             rc = ccat_extract_path (path, sizeof path, name);
830             DEBUG_STATUS (("%s: extract_path '%s'\n",__func__,path));
831             if ((edir != NULL) && (! tee_done))
832             {
833                 rc = ccat_extract (sf, path);
834                 if (rc == 0)
835                     tee_done = true;
836             }
837             if (rc == 0)
838             {
839                 xml_insert = true;
840             }
841             if (!tee_done)
842             {
843                 const KFile * cf;
844 
845                 rc = CCFileMakeRead (&cf, sf, &node->rc);
846                 if (rc == 0)
847                 {
848                     const KFile * tee;
849 
850                     rc = KFileMakeTeeRead (&tee, cf, fnull);
851                     if (rc == 0)
852                     {
853                         rc = KFileAddRef (fnull);
854 
855                         KFileRelease (tee);
856                     }
857                 }
858             }
859             break;
860         }
861 
862         /* create entry into tree */
863         if ( rc == 0 )
864         {
865             if (! xml_dir)
866             {
867                 DEBUG_STATUS (("%s: ready to insert '%s'\n",__func__,name));
868                 rc = CCTreeInsert ( tree, mtime, ntype, entry, name );
869                 /* if we are extracting create a name node with other information being incorrect */
870                 if (edir != NULL)
871                 {
872                 }
873             }
874             else if (xml_insert)
875             {
876                 DEBUG_STATUS (("%s: ready to insert '%s' for '%s'\n",__func__,path, name));
877                 rc = CCTreeInsert (etree, mtime, ntype, entry, path);
878             }
879 
880             if ( rc != 0 )
881                 PLOGERR (klogInt,  (klogInt, rc,
882                                     "failed to enter node '$(name)'",
883                                     "name=%s", name ));
884         }
885     }
886 
887     return rc;
888 }
889 
890 static
ccat_sz(CCTree * tree,const KFile * sf,KTime_t mtime,enum CCType ntype,CCFileNode * node,const char * name)891 rc_t ccat_sz ( CCTree *tree, const KFile *sf, KTime_t mtime,
892                enum CCType ntype, CCFileNode *node, const char *name )
893 {
894     /* create a counting file to fill out its size */
895     const KFile *sz;
896     rc_t rc, orc;
897 
898     rc = KFileMakeCounterRead ( & sz, sf, & node -> size, & node -> lines, true );
899 
900 
901     if ( rc != 0 )
902         PLOGERR ( klogInt,  (klogInt, rc, "failed to create counting wrapper for '$(path)'", "path=%s", name ));
903     else
904     {
905         /* give the wrapper its own reference
906            rather than taking the one we gave it */
907         rc = KFileAddRef ( sf );
908         if (rc)
909             PLOGERR (klogInt,
910                      (klogInt, rc, "Error in reference counting for '$(path)'", "path=%s", name ));
911         else
912             /* the main catalog function */
913             rc = ccat_main ( tree, sz, mtime, ntype, node, name );
914 
915         /* release the sizer */
916         orc = KFileRelease ( sz );
917         if (orc)
918         {
919             PLOGERR (klogInt,
920                      (klogInt, orc, "Error in closing byte counting for '$(path)'", "path=%s", name ));
921             if (rc == 0)
922                 rc = orc;
923         }
924     }
925 
926     return rc;
927 }
928 
ccat_md5(CCTree * tree,const KFile * sf,KTime_t mtime,enum CCType ntype,CCFileNode * node,const char * name)929 rc_t ccat_md5 ( CCTree *tree, const KFile *sf, KTime_t mtime,
930                 enum CCType ntype, CCFileNode *node, const char *name )
931 {
932     /* all files have an MD5 hash for identification.
933        the following code is for expediency.
934        we already have a formatter for output,
935        and to reuse it we simply pipe its output
936        to /dev/null, so to speak */
937     KMD5SumFmt *fmt;
938     rc_t rc, orc;
939 
940     /* NEW - there are some cases where md5sums would not be useful
941        and only take up CPU power. */
942     if ( no_md5 )
943         return ccat_sz ( tree, sf, mtime, ntype, node, name );
944 
945     /* normal md5 path */
946     rc = KMD5SumFmtMakeUpdate ( & fmt, fnull );
947     if ( rc != 0 )
948         PLOGERR ( klogInt,  (klogInt, rc, "failed to create md5sum formatter for '$(path)'", "path=%s", name ));
949     else
950     {
951         const KFile *md5;
952 
953         /* give another fnull reference to formatter */
954         rc = KFileAddRef ( fnull );
955         if (rc)
956             LOGERR (klogInt, rc, "Error referencing MD5 format file");
957         else
958         {
959 
960             /* this is the wrapper that calculates MD5 */
961             rc = KFileMakeNewMD5Read ( & md5, sf, fmt, name );
962             if ( rc != 0 )
963                 PLOGERR ( klogInt,  (klogInt, rc, "failed to create md5 wrapper for '$(path)'", "path=%s", name ));
964             else
965             {
966                 /* give the wrapper its own reference
967                    rather than taking the one we gave it */
968                 rc = KFileAddRef ( sf );
969                 if (rc)
970                     PLOGERR (klogInt,
971                              (klogInt, rc,
972                               "failure in reference counting file for '$(path)'",
973                               "path=%s", name ));
974                 else
975                 {
976                     /* continue on to obtaining file size */
977                     rc = ccat_sz ( tree, md5, mtime, ntype, node, name );
978 
979                     /* this will drop the MD5 calculator, but not
980                        its source file, and cause the digest to be
981                        written to its formatter */
982                     orc = KFileRelease ( md5 );
983                     if (orc)
984                     {
985                         PLOGERR (klogInt,
986                                  (klogInt, rc,
987                                   "failure in release reference counting file for '$(path)'",
988                                   "path=%s", name ));
989                         if (rc == 0)
990                             rc = orc;
991                     }
992 
993                     /* if there were no errors, read the MD5 from formatter.
994                        this must be done AFTER releasing the MD5 file,
995                        or nothing will ever get written */
996                     if ( rc == 0 )
997                     {
998                         bool bin;
999                         rc = KMD5SumFmtFind ( fmt, name, node -> _md5, & bin );
1000                     }
1001                 }
1002             }
1003         }
1004 
1005         /* dump the formatter, but not fnull */
1006         orc = KMD5SumFmtRelease ( fmt );
1007         if (orc)
1008         {
1009             PLOGERR (klogInt,
1010                      (klogInt, rc,
1011                       "failure in releasing  MD5 format file for '$(path)'",
1012                       "path=%s", name ));
1013             if (rc == 0)
1014                 rc = orc;
1015         }
1016     }
1017 
1018     return rc;
1019 }
1020 
1021 /* buffered recursion entrypoint */
ccat_buf(CCTree * tree,const KFile * sf,KTime_t mtime,enum CCType ntype,CCFileNode * node,const char * name)1022 rc_t ccat_buf ( CCTree *tree, const KFile *sf, KTime_t mtime,
1023                 enum CCType ntype, CCFileNode *node, const char *name )
1024 {
1025     /* create a buffered file to counter random access */
1026     const KFile *buf;
1027     rc_t rc, orc;
1028 
1029 #if USE_KBUFFILE
1030     rc = KBufFileMakeRead (&buf, sf, 2 * 32 * 1024);
1031 #else
1032     rc = KFileMakeReadHead (&buf, sf, 4096);
1033 #endif
1034     if ( rc != 0 )
1035         PLOGERR ( klogInt,  (klogInt, rc,
1036                              "failed to create buffer for '$(path)'",
1037                              "path=%s", name ));
1038     else
1039     {
1040         /* skip ccat */
1041         rc = ccat_md5 ( tree, buf, mtime, ntype, node, name );
1042 
1043         /* release the buffer */
1044         orc = KFileRelease ( buf );
1045         if (orc)
1046         {
1047             PLOGERR (klogInt, (klogInt, orc, "Error closing buffer for '$(path)'", "path=%s", name ));
1048             if (rc == 0)
1049                 rc = orc;
1050         }
1051     }
1052     return rc;
1053 }
1054 
1055 
1056 typedef struct copycat_pb
1057 {
1058     CCTree * tree;
1059     const KFile * sf;
1060     KFile * df;
1061     KTime_t mtime;
1062     enum CCType ntype;
1063     CCFileNode * node;
1064     const char * name;
1065 } copycat_pb;
1066 
1067 
copycat_add_tee(const copycat_pb * ppb)1068 rc_t copycat_add_tee (const copycat_pb * ppb)
1069 {
1070     const KFile * tee;
1071     rc_t rc, orc;
1072 
1073     rc = KFileMakeTeeRead (&tee, ppb->sf, ppb->df);
1074     if (rc)
1075         PLOGERR (klogInt,
1076                  (klogInt, rc, "failed to create encrypter for '$(path)'",
1077                   "path=%s", ppb->name ));
1078     else
1079     {
1080         rc = KFileAddRef (ppb->df);
1081         if (rc)
1082             LOGERR (klogInt, rc, "Reference counting error");
1083         else
1084         {
1085             rc = KFileAddRef (ppb->sf);
1086             if (rc)
1087                 LOGERR (klogInt, rc, "Reference counting error");
1088             else
1089             {
1090                 orc = ccat_md5 (ppb->tree, tee, ppb->mtime, ppb->ntype, ppb->node, ppb->name);
1091 
1092                 /* report? */
1093                 orc = KFileRelease (tee);
1094                 if (orc)
1095                     PLOGERR (klogInt,
1096                              (klogInt, orc,
1097                               "Error in closing byte counting for '$(path)'",
1098                               "path=%s", ppb->name ));
1099 
1100 /*                 orc = KFileRelease (ppb->sf); */
1101 /*                 if (orc) */
1102 /*                     PLOGERR (klogInt,  */
1103 /*                              (klogInt, orc, */
1104 /*                               "Error in closing byte counting of tee source for '$(path)'", */
1105 /*                               "path=%s", ppb->name )); */
1106             }
1107 /*             orc = KFileRelease (ppb->df); */
1108 /*             if (orc) */
1109 /*                 PLOGERR (klogInt,  */
1110 /*                          (klogInt, orc, */
1111 /*                           "Error in closing byte counting of tee destination for '$(path)'", */
1112 /*                           "path=%s", ppb->name )); */
1113         }
1114     }
1115     return rc;
1116 }
1117 
1118 
1119 static
copycat_add_node(const copycat_pb * ppb)1120 rc_t copycat_add_node (const copycat_pb * ppb)
1121 {
1122     rc_t rc;
1123 
1124     rc = ccat_path_append (ppb->name);
1125     if (rc == 0)
1126     {
1127         CCContainerNode *cont;
1128 
1129         /* make a container node for the encrypted output file */
1130         rc = CCContainerNodeMake (&cont, ccFile, ppb->node);
1131         if (rc)
1132         {
1133             PLOGERR (klogInt,
1134                      (klogInt, rc, "error creating container node for '$(path)'",
1135                       "path=%s", ppb->name));
1136         }
1137         else
1138         {
1139             copycat_pb pb = *ppb;
1140             uint64_t expected = /*(ppb->node->expected != SIZE_UNKNOWN)
1141                                   ? ppb->node->expected :*/ SIZE_UNKNOWN;
1142             pb.tree = &cont->sub;
1143 
1144             /* this will be the node for the unencrypted version of the output file */
1145             rc = CCFileNodeMake (&pb.node, expected );
1146             if (rc)
1147                 LOGERR (klogInt, rc, "failed to create contained node");
1148             else
1149             {
1150                 void * save;
1151                 size_t len;
1152                 size_t elen;
1153                 size_t clen;
1154                 rc_t orc;
1155                 char name [ 1024 ];
1156 
1157                 len = strlen (pb.name);
1158                 elen = strlen (ncbi_encryption_extension);
1159                 clen = len - elen;
1160 
1161                 /*
1162                  *if the name had .ncbi_enc at the end remove it
1163                  * for the inner node.
1164                  */
1165                 if (strcmp (ncbi_encryption_extension, pb.name + clen) == 0)
1166                 {
1167                     memmove (name, pb.name, clen);
1168                     name[clen] = '\0';
1169                 }
1170                 /*
1171                  * if it did not, prepend '.' to the name
1172                  */
1173                 else
1174                 {
1175                     name[0] = '.';
1176                     if (len > sizeof name - 2)
1177                         len = sizeof name - 2;
1178                     strncpy (name+1, pb.name, len);
1179                     name[len+1] = '\0';
1180                 }
1181                 pb.name = name;
1182 
1183                 copycat_log_set (&pb.node->logs, &save);
1184 
1185                 rc = copycat_add_tee (&pb);
1186 
1187                 copycat_log_set (save, NULL);
1188 
1189                 orc = CCTreeInsert (ppb->tree, ppb->mtime, ccContainer, cont, ppb->name);
1190 
1191                 if (rc == 0)
1192                     rc = orc;
1193             }
1194         }
1195     }
1196     return rc;
1197 }
1198 
1199 /* -----
1200  * copycat_add_enc
1201  *
1202  * add an encryptor to the copy chain
1203  */
copycat_add_enc(const copycat_pb * ppb)1204 rc_t copycat_add_enc (const copycat_pb * ppb)
1205 {
1206     copycat_pb pb = *ppb;
1207     rc_t rc = 0;
1208 
1209     if (!dst_pw_read)
1210         rc = get_password (dst_path, dst_pwd, sizeof dst_pwd, &dst_pwd_sz, &dst_key, &dst_pw_read);
1211     if (rc)
1212         return rc;
1213 
1214     rc = KEncFileMakeWrite (&pb.df, ppb->df, &dst_key);
1215     if (rc)
1216         PLOGERR (klogInt,
1217                  (klogInt, rc, "failed to create encrypter for '$(path)'",
1218                   "path=%s", pb.name ));
1219     else
1220     {
1221         strncpy (pb.node->ftype, "Encoded/NCBI", sizeof pb.node->ftype);
1222 
1223 
1224         /* add the container to the cataloging tree */
1225         rc = copycat_add_node (&pb);
1226         if (rc == 0)
1227         {
1228             rc = KFileRelease (pb.df);
1229             if (rc)
1230             {
1231                 PLOGERR (klogInt,
1232                          (klogInt, rc,
1233                           "Error in closing byte counting for '$(path)'",
1234                           "path=%s", pb.name ));
1235             }
1236         }
1237     }
1238     return rc;
1239 }
1240 
1241 
1242 /* -----
1243  * copycat_add_sz
1244  *
1245  * if we are encrypting the output we need to do an actual byte count across the
1246  * write side of the encryption so we add a filter into the copy chain.  If we
1247  * are not encrypting we do this calculation in the ccat chain for the outside file
1248  * and never reach this function.
1249  */
copycat_add_sz(const copycat_pb * ppb)1250 rc_t copycat_add_sz (const copycat_pb * ppb)
1251 {
1252     /* create a counting file to fill out its size */
1253     copycat_pb pb = *ppb;
1254     rc_t rc, orc;
1255 
1256     rc = KFileMakeCounterWrite (&pb.df, ppb->df, &pb.node->size, &pb.node->lines, true);
1257     if (rc)
1258         PLOGERR (klogInt,
1259                  (klogInt, rc,
1260                   "failed to create counting wrapper for '$(path)'",
1261                   "path=%s", pb.name ));
1262     else
1263     {
1264         /* give the wrapper its own reference
1265            rather than taking the one we gave it */
1266         rc = KFileAddRef ( ppb->df );
1267         if (rc)
1268             PLOGERR (klogInt,
1269                      (klogInt, rc, "Error in reference counting for '$(path)'",
1270                       "path=%s", pb.name));
1271         else
1272             rc = copycat_add_enc (&pb);
1273 
1274         /* release the sizer */
1275         orc = KFileRelease (pb.df);
1276         if (orc)
1277         {
1278             PLOGERR (klogInt,
1279                      (klogInt, orc, "Error in closing byte counting "
1280                       "(probable read error) for '$(path)'", "path=%s",
1281                       pb.name ));
1282             /* a failure in the counter destructor might mean a failure
1283              * in the copy so return the error */
1284             if (rc == 0)
1285                 rc = orc;
1286         }
1287     }
1288 
1289     return rc;
1290 }
1291 
1292 
1293 /* -----
1294  * copycat_add_md5
1295  *
1296  * if we are encrypting the output we need to do an md5 calculation across the
1297  * write side of the encryption so we add a filter into the copy chain.  If we
1298  * are not encrypting we do the md5 calculation in the ccat chain for the outside file
1299  * and never reach this function.
1300  */
copycat_add_md5(const copycat_pb * ppb)1301 rc_t copycat_add_md5 (const copycat_pb * ppb)
1302 {
1303     /* all files have an MD5 hash for identification.
1304        the following code is for expediency.
1305        we already have a formatter for output,
1306        and to reuse it we simply pipe its output
1307        to /dev/null, so to speak */
1308     KMD5SumFmt *fmt;
1309     rc_t rc;
1310 
1311     if ( no_md5 )
1312         return copycat_add_sz (ppb);
1313 
1314     rc = KMD5SumFmtMakeUpdate (&fmt, fnull);
1315     if ( rc != 0 )
1316         PLOGERR (klogInt,
1317                  (klogInt, rc,
1318                   "failed to create md5sum formatter for '$(path)'",
1319                   "path=%s", ppb->name ));
1320     else
1321     {
1322         rc_t orc;
1323         /* give another fnull reference to formatter */
1324         rc = KFileAddRef (fnull);
1325         if (rc)
1326             LOGERR (klogInt, rc, "Error referencing MD5 format file");
1327         else
1328         {
1329             copycat_pb pb = *ppb;
1330 
1331             /* this is the wrapper that calculates MD5 */
1332             rc = KMD5FileMakeWrite ((struct KMD5File**)&pb.df, ppb->df, fmt, ppb->name );
1333             if ( rc != 0 )
1334                 PLOGERR (klogInt,
1335                          (klogInt, rc,
1336                           "failed to create md5 wrapper for '$(path)'",
1337                           "path=%s", ppb->name ));
1338             else
1339             {
1340                 /* give the wrapper its own reference
1341                    rather than taking the one we gave it */
1342                 rc = KFileAddRef (ppb->df);
1343                 if (rc)
1344                     PLOGERR (klogInt,
1345                              (klogInt, rc,
1346                               "failure in reference counting file for '$(path)'",
1347                               "path=%s", ppb->name ));
1348                 else
1349                     /* continue on to obtaining file size */
1350                     rc = copycat_add_sz (&pb);
1351 
1352                 /* this will drop the MD5 calculator, but not
1353                    its source file, and cause the digest to be
1354                    written to its formatter */
1355                 orc = KFileRelease (pb.df);
1356                 if (orc)
1357                 {
1358                     PLOGERR (klogInt,
1359                              (klogInt, orc,
1360                               "failure in releasing md5 calculation file for '$(path)'",
1361                               "path=%s", ppb->name ));
1362                 }
1363 
1364                 /* if there were no errors, read the MD5 from formatter.
1365                    this must be done AFTER releasing the MD5 file,
1366                    or nothing will ever get written */
1367                 if (orc == 0)
1368                 {
1369                     bool bin;
1370                     orc = KMD5SumFmtFind (fmt, ppb->name, ppb->node->_md5, &bin);
1371                     if (orc)
1372                         PLOGERR (klogWarn,
1373                                  (klogWarn, orc,
1374                                   "Error in obtaining the MD5 for '$(path)'",
1375                                   "path=%s", pb.name));
1376                 }
1377             }
1378         }
1379 
1380         /* dump the formatter, but not fnull */
1381         orc = KMD5SumFmtRelease (fmt);
1382         if (orc)
1383         {
1384             PLOGERR (klogInt,
1385                      (klogInt, rc,
1386                       "failure in releasing  MD5 format file for '$(path)'",
1387                       "path=%s", ppb->name ));
1388             /* we can 'forget' this error after logging it */
1389             if (rc == 0)
1390                 rc = orc;
1391         }
1392     }
1393 
1394     return rc;
1395 }
1396 
1397 /* -----
1398  * copycat_add_crc
1399  *
1400  * this is the first function in building the write side of the copy chain
1401  *
1402  * We calculate a crc on the outgoing file and none of the interior files
1403  * and we do that at this point in the chain regardless of whether we will
1404  * encrypt the outgoing file.
1405  *
1406  * We'll add the crc calculator to the write side stream and then decide
1407  * whether to add an encryptor into the chain.  We add one to the write side
1408  * of the chain if we have an encrypting password or jump to finishing the
1409  * copy chain if we do not
1410  */
copycat_add_crc(const copycat_pb * ppb)1411 rc_t copycat_add_crc (const  copycat_pb * ppb)
1412 {
1413     /* external files have a CRC32 checksum
1414        the following code is for expediency.
1415        we already have a formatter for output,
1416        and to reuse it we simply pipe its output
1417        to /dev/null, so to speak */
1418     rc_t rc;
1419     KCRC32SumFmt * fmt;
1420 
1421     rc = KCRC32SumFmtMakeUpdate ( &fmt, fnull );
1422     if ( rc != 0 )
1423         PLOGERR (klogInt,
1424                  (klogInt, rc,
1425                   "failed to create crc32sum formatter for '$(path)'",
1426                   "path=%s", ppb->name ));
1427     else
1428     {
1429         rc_t orc = 0;
1430 
1431         /* give another fnull reference to formatter */
1432         rc = KFileAddRef ( fnull );
1433         if (rc)
1434             PLOGERR (klogInt,
1435                      (klogInt, rc,
1436                       "error in reference counting crc formatter fnull for '$(path)'",
1437                       "path=%s", ppb->name ));
1438         else
1439         {
1440             copycat_pb pb = *ppb;
1441 
1442             /* this is the wrapper that calculates CRC32 */
1443             rc = KCRC32FileMakeWrite ( (KCRC32File**)&pb.df, ppb->df, fmt, ppb->name );
1444             if ( rc != 0 )
1445                 PLOGERR (klogInt,
1446                          (klogInt, rc,
1447                           "failed to create crc32 wrapper for '$(path)'",
1448                           "path=%s", ppb->name ));
1449             else
1450             {
1451                 /* give the wrapper its own reference
1452                    rather than taking the one we gave it */
1453                 rc = KFileAddRef (ppb->df);
1454                 if (rc)
1455                     PLOGERR (klogInt,
1456                              (klogInt, rc,
1457                               "error in reference counting file for '$(path)'",
1458                               "path=%s", ppb->name ));
1459                 else if (do_encrypt)
1460                 {
1461                     /* add in the size of the enc header */
1462                     if (pb.node->expected != SIZE_UNKNOWN)
1463                     {
1464                         uint64_t temp;
1465 
1466                         temp = pb.node->expected; /* current expected count */
1467 
1468                         temp += (ENC_DATA_BLOCK_SIZE - 1); /* add enough to fill last block */
1469                         temp /= ENC_DATA_BLOCK_SIZE; /* how many blocks */
1470                         temp *= sizeof (KEncFileBlock); /* size of encrypted blocks */
1471                         temp += sizeof (KEncFileHeader) + sizeof (KEncFileFooter);
1472                         pb.node->expected = temp;
1473                     }
1474                     rc = copycat_add_md5 (&pb);
1475                 }
1476                 else
1477                     rc = copycat_add_tee (&pb);
1478 
1479                 /* this will drop the CRC calculator, but not
1480                    its source file, and cause the CRC to be
1481                    written to its formatter */
1482                 orc = KFileRelease (pb.df);
1483                 if (orc)
1484                 {
1485                     LOGERR (klogErr, orc, "Failed to close out crc calculator");
1486                     /* an error here implies an error in the copy so report it */
1487                     rc = orc;
1488                 }
1489             }
1490             /* if there were no errors, read the CRC from formatter.
1491                this must be done AFTER releasing the CRC file,
1492                or nothing will ever get written */
1493             if ( rc == 0 )
1494             {
1495                 bool bin;
1496                 orc = KCRC32SumFmtFind ( fmt, ppb->name, &pb.node->crc32, &bin );
1497                 if (orc)
1498                     PLOGERR (klogWarn,
1499                              (klogWarn, orc,
1500                               "Failed to obtain the CRC for '$(path)'",
1501                               "path=%s", pb.name));
1502                 /* an error here isn't an error in copy */
1503             }
1504         }
1505         /* dump the formatter, but not fnull */
1506         orc = KCRC32SumFmtRelease ( fmt );
1507         if (orc)
1508             LOGERR (klogWarn, orc, "Failed to close off CRC storage");
1509         /* this error we do not need to track */
1510         if (rc == 0)
1511             rc = orc;
1512     }
1513     return rc;
1514 }
1515 
1516 
1517 /* -----
1518  * copycat_add_buf2
1519  *
1520  * This function adds buffering to the outermostDecrypted file read so that we
1521  * can read the first portion of the file for file type analysis and then
1522  * re-read it for cataloging.
1523  *
1524  * At this point the only file type analysis needed is to determine
1525  * whether we have in incoming encrypted file We've already checked for
1526  * the existence of the key or we wouldn't get here.
1527  */
copycat_add_buf2(const copycat_pb * ppb)1528 rc_t copycat_add_buf2 (const copycat_pb * ppb)
1529 {
1530     copycat_pb pb = *ppb;
1531     rc_t rc = 0;
1532 
1533     /* create a buffered file to allow re-reading of the first 128 bytes
1534      * a tad overkill */
1535 #if USE_KBUFFILE
1536     rc = KBufFileMakeRead (&pb.sf, ppb->sf, 2*32*1024);
1537 #else
1538     rc = KFileMakeReadHead (&pb.sf, ppb->sf, 4*1024);
1539 #endif
1540     if (rc)
1541     {
1542         PLOGERR (klogInt,
1543                  (klogInt, rc,
1544                   "failed to create buffer for '$(path)'",
1545                   "path=%s", pb.name ));
1546     }
1547     else
1548     {
1549         rc_t orc;
1550 
1551         rc = copycat_add_crc (&pb);
1552 
1553         orc = KFileRelease (pb.sf);
1554         if (orc)
1555             PLOGERR (klogInt,
1556                      (klogInt, orc,
1557                       "failed to read buffer for '$(path)'",
1558                       "path=%s", pb.name ));
1559     }
1560     return rc;
1561 }
1562 
1563 
1564 /* -----
1565  * copycat_add_dec
1566  *
1567  * add a decryption to the read side of the copy chain.
1568  * no other decisions made here
1569  */
copycat_add_dec_ncbi(const copycat_pb * ppb)1570 rc_t copycat_add_dec_ncbi (const copycat_pb * ppb)
1571 {
1572     copycat_pb pb = *ppb;
1573     rc_t rc = 0;
1574 
1575     if (!src_pw_read)
1576         rc = get_password (src_path, src_pwd, sizeof src_pwd, &src_pwd_sz, &src_key, &src_pw_read);
1577 
1578     if (rc == 0)
1579     {
1580         rc = KEncFileMakeRead (&pb.sf, ppb->sf, &src_key);
1581         if (rc)
1582             PLOGERR (klogInt,
1583                      (klogInt, rc,
1584                       "failed to create decrypter for '$(path)'",
1585                       "path=%s", pb.name ));
1586     }
1587     if (rc == 0)
1588     {
1589         rc_t orc;
1590 
1591         /* this decryption won't know its output size until it gets there */
1592         pb.node->expected = SIZE_UNKNOWN;
1593         rc = copycat_add_buf2 (&pb);
1594         orc = KFileRelease (pb.sf);
1595         if (orc)
1596             PLOGERR (klogInt,
1597                      (klogInt, orc,
1598                       "failed to close decrypter for '$(path)'",
1599                       "path=%s", pb.name ));
1600     }
1601     return rc;
1602 }
1603 
copycat_add_dec_wga(const copycat_pb * ppb)1604 rc_t copycat_add_dec_wga (const copycat_pb * ppb)
1605 {
1606     copycat_pb pb = *ppb;
1607     rc_t rc = 0;
1608 
1609     if (!src_pw_read)
1610         rc = wga_password (src_path, src_pwd, sizeof src_pwd, &src_pwd_sz, &src_pw_read);
1611 
1612     if (rc == 0)
1613     {
1614         rc = KFileMakeWGAEncRead (&pb.sf, ppb->sf, src_pwd, src_pwd_sz);
1615         if (rc)
1616             PLOGERR (klogInt,
1617                      (klogInt, rc,
1618                       "failed to create decrypter for '$(path)'",
1619                       "path=%s", pb.name ));
1620     }
1621     if (rc == 0)
1622     {
1623         rc_t orc;
1624 
1625         if (pb.node->expected != SIZE_UNKNOWN)
1626             pb.node->expected -= 128; /* subtract off the size of the WGA header */
1627         rc = copycat_add_buf2 (&pb);
1628         orc = KFileRelease (pb.sf);
1629         if (orc)
1630             PLOGERR (klogInt,
1631                      (klogInt, orc,
1632                       "failed to close decrypter for '$(path)'",
1633                       "path=%s", pb.name ));
1634     }
1635     return rc;
1636 }
1637 
1638 /* -----
1639  * copycat_add_dec
1640  *
1641  * At this point the only file type analysis needed is to determine
1642  * whether we have in incoming encrypted file We've already checked for
1643  * the existence of the key or we wouldn't get here.
1644  */
copycat_add_dec(const copycat_pb * ppb)1645 rc_t copycat_add_dec (const copycat_pb * ppb)
1646 {
1647     copycat_pb pb = *ppb;
1648     rc_t rc = 0;
1649     size_t num_read;
1650     uint8_t buff [128];
1651 
1652     rc = KFileReadAll (pb.sf, 0, buff, sizeof buff, &num_read);
1653     if (rc)
1654         PLOGERR (klogInt,
1655                  (klogInt, rc,
1656                   "failed to read buffer for '$(path)'",
1657                   "path=%s", pb.name ));
1658     else
1659     {
1660         rc_t orc;
1661 
1662         /*
1663          * if we have an encrypted file add decryption to the chain
1664          * if not jump sraight to the write side of the chain
1665          */
1666         if (CCFileFormatIsNCBIEncrypted (buff))
1667             rc = copycat_add_dec_ncbi (&pb);
1668         else if (KFileIsWGAEnc (buff, num_read) == 0)
1669             rc = copycat_add_dec_wga (&pb);
1670         else
1671             rc = copycat_add_crc (&pb);
1672 
1673         orc = KFileRelease (pb.sf);
1674         if (orc)
1675             PLOGERR (klogInt,
1676                      (klogInt, orc,
1677                       "failed to read buffer for '$(path)'",
1678                       "path=%s", pb.name ));
1679     }
1680     return rc;
1681 }
1682 
1683 
1684 /* -----
1685  * copycat
1686  *
1687  * The copycat function is the actual copy and catalog function.
1688  * All before this function is called is building toward this.
1689  *
1690  * The functions prefixed copycat_add_ are functions used in building the
1691  * chain of KFS and other filters for doing the copy function.  Some
1692  * cataloging is included where the output file is encoded and to
1693  * generate the CRC unique to the outer file in the catalog.
1694  * The ccat_add_ functions are filters toward the leafs in the catlog
1695  * portion of the program.
1696  *
1697  * Much of this could be inlined but the indention creep across the screen
1698  * and the handling of levels of indention would get intense.  Also some
1699  * filters are skipped which also would have made it more unweildy to
1700  * write and maintain.
1701  *
1702  * In this function we build a node for cataloging the outermost layer of
1703  * the output file which may differ from the file as read here if a
1704  * decryption and/or encryption is added.
1705  *
1706  * We then
1707  */
copycat(CCTree * tree,KTime_t mtime,KDirectory * _cwd,const VPath * src,const KFile * sf,const VPath * dst,KFile * df,const char * spath,const char * name,uint64_t expected,bool _do_decrypt,bool _do_encrypt)1708 rc_t copycat (CCTree *tree, KTime_t mtime, KDirectory * _cwd,
1709               const VPath * src, const KFile *sf,
1710               const VPath * dst,  KFile *df,
1711               const char *spath, const char *name,
1712               uint64_t expected, bool _do_decrypt, bool _do_encrypt)
1713 {
1714     void * save;
1715     copycat_pb pb;
1716     rc_t rc;
1717 
1718     DEBUG_STATUS (("%s: copy file %s\n",__func__, spath));
1719 
1720     cwd = _cwd;
1721     src_path = src;
1722     dst_path = dst;
1723 
1724     src_pw_read = false;
1725 
1726     do_decrypt = _do_decrypt;
1727     do_encrypt = _do_encrypt;
1728 
1729     /* -----
1730      * Create a cataloging node for the outer most file as written
1731      */
1732     rc = CCFileNodeMake ( &pb.node, expected );
1733     if (rc)
1734     {
1735         LOGERR ( klogInt, rc, "failed to allocate file node" );
1736         return rc;
1737     }
1738 
1739     pb.tree = tree;
1740     pb.mtime = mtime;
1741     pb.ntype = ccFile;
1742     pb.name = name;
1743 
1744 
1745     copycat_log_set (&pb.node->logs, &save);
1746 
1747     if (out_block)
1748     {
1749         rc = KBufWriteFileMakeWrite (&pb.df, df, out_block);
1750         if (rc)
1751         {
1752             PLOGERR (klogInt,
1753                      (klogInt, rc,
1754                       "failed to create buffer for '$(path)'",
1755                       "path=%s", pb.name ));
1756             return rc;
1757         }
1758     }
1759     else
1760     {
1761         pb.df = df;
1762     }
1763 
1764     if (in_block)
1765     {
1766         rc = KBufReadFileMakeRead (&pb.sf, sf, in_block);
1767     }
1768     else
1769     {
1770 #if USE_KBUFFILE
1771         rc = KBufFileMakeRead (&pb.sf, sf, 2*32*1024);
1772 #else
1773         rc = KFileMakeReadHead (&pb.sf, sf, 4*1024);
1774 #endif
1775     }
1776     if (rc)
1777     {
1778         PLOGERR (klogInt,
1779                  (klogInt, rc,
1780                   "failed to create buffer for '$(path)'",
1781                   "path=%s", spath ));
1782         return rc;
1783     }
1784     else
1785     {
1786         /*
1787          * if we have a decryption password prepare a decryption read path
1788          * if not jump to preparing the write path
1789          */
1790         rc = do_decrypt
1791             ? copycat_add_dec (&pb)
1792             : copycat_add_crc (&pb);
1793     }
1794     copycat_log_set (save, NULL);
1795 
1796     return rc;
1797 }
1798