1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include "copycat-priv.h"
28 #include "cctree-priv.h"
29 #include <klib/rc.h>
30 #include <klib/log.h>
31 
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <assert.h>
36 
37 
38 /*--------------------------------------------------------------------------
39  * CCFileNode
40  *  a node with a size and modification timestamp
41  *  has a file type determined by magic/etc.
42  *  has an md5 checksum
43  *
44  *  how would an access mode be used? access mode of a file is
45  *  whatever the filesystem says it is, and within an archive,
46  *  it's read-only based upon access mode of outer file...
47  */
48 
49 static
CCFileNodeInit(CCFileNode * self,uint64_t expected)50 void CCFileNodeInit ( CCFileNode *self, uint64_t expected )
51 {
52 #if STORE_ID_IN_NODE
53     static uint32_t file_id;
54     self -> id = ++ file_id;
55 #endif
56     self -> expected = expected;
57     self -> size = 0;   /* we learn this with the counter file */
58     self -> lines = 0;   /* we learn this with the counter file */
59     self -> crc32 = 0;
60     self -> rc = 0;
61     self -> err = false;
62     memset ( self -> ftype, 0, sizeof self -> ftype );
63     memset ( self -> _md5, 0, sizeof self -> _md5 );
64     SLListInit ( &self->logs );
65 }
66 
67 /* Make
68  *  creates an object with provided properties
69  *  md5 digest needs to be filled in afterward
70  */
CCFileNodeMake(CCFileNode ** np,uint64_t expected)71 rc_t CCFileNodeMake ( CCFileNode **np, uint64_t expected )
72 {
73     CCFileNode *n = malloc ( sizeof * n );
74     if ( n == NULL )
75         return RC ( rcExe, rcTree, rcInserting, rcMemory, rcExhausted );
76 
77     CCFileNodeInit ( n, expected );
78 
79     * np = n;
80     return 0;
81 }
82 
83 /*--------------------------------------------------------------------------
84  * CCArcFileNode
85  *  a file with an offset into another file
86  */
87 
88 /* Make
89  *  creates an object with provided properties
90  *  md5 digest needs to be filled in afterward
91  */
CCArcFileNodeMake(CCArcFileNode ** np,uint64_t offset,uint64_t expected)92 rc_t CCArcFileNodeMake ( CCArcFileNode **np,
93     uint64_t offset, uint64_t expected )
94 {
95     CCArcFileNode *n = malloc ( sizeof * n );
96     if ( n == NULL )
97         return RC ( rcExe, rcTree, rcInserting, rcMemory, rcExhausted );
98 
99     CCFileNodeInit ( & n -> dad, expected );
100     n -> offset = offset;
101 
102     * np = n;
103     return 0;
104 }
105 
106 
107 /*--------------------------------------------------------------------------
108  * CChunkFileNode
109  *  a file with one or more chunks (offset/size) into another file
110  */
111 
112 /* Whack
113  */
114 static
CChunkWhack(SLNode * n,void * ignore)115 void CChunkWhack ( SLNode *n, void *ignore )
116 {
117     free ( n );
118 }
119 
CChunkFileNodeWhack(CChunkFileNode * self)120 void CChunkFileNodeWhack ( CChunkFileNode *self )
121 {
122     if ( self != NULL )
123     {
124         SLListWhack ( & self -> chunks, CChunkWhack, NULL );
125         CCFileNodeWhack ( & self -> dad );
126     }
127 }
128 
129 /* Make
130  *  creates an object with provided properties
131  *  md5 digest needs to be filled in afterward
132  */
CChunkFileNodeMake(CChunkFileNode ** np,uint64_t expected)133 rc_t CChunkFileNodeMake ( CChunkFileNode **np, uint64_t expected )
134 {
135     CChunkFileNode *n = malloc ( sizeof * n );
136     if ( n == NULL )
137         return RC ( rcExe, rcTree, rcInserting, rcMemory, rcExhausted );
138 
139     CCFileNodeInit ( & n -> dad, expected );
140     SLListInit ( & n -> chunks );
141 
142     * np = n;
143     return 0;
144 }
145 
146 /* AddChunk
147  *  adds a chunk to the chunk file
148  */
CChunkFileNodeAddChunk(CChunkFileNode * self,uint64_t offset,uint64_t size)149 rc_t CChunkFileNodeAddChunk ( CChunkFileNode *self,
150     uint64_t offset, uint64_t size )
151 {
152     CChunk *c = malloc ( sizeof * c );
153     if ( c == NULL )
154         return RC ( rcExe, rcTree, rcInserting, rcMemory, rcExhausted );
155 
156     c -> offset = offset;
157     c -> size = size;
158 
159     SLListPushTail ( & self -> chunks, & c -> n );
160     return 0;
161 }
162 
163 
164 /*--------------------------------------------------------------------------
165  * CCCachedFileNode
166  *  a file wrapper with cached file name
167  */
168 
169 /* Make
170  *  creates a cached file wrapper
171  */
CCCachedFileNodeMake(CCCachedFileNode ** np,const char * path,enum CCType type,const void * entry)172 rc_t CCCachedFileNodeMake ( CCCachedFileNode **np, const char *path,
173     enum CCType type, const void *entry )
174 {
175     CCCachedFileNode *n = malloc ( sizeof * n + strlen ( path ) + 1 );
176     if ( n == NULL )
177         return RC ( rcExe, rcTree, rcInserting, rcMemory, rcExhausted );
178 
179     strcpy ( ( char* ) ( n + 1 ), path );
180     StringInitCString ( & n -> cached, ( char* ) ( n + 1 ) );
181     n -> entry = ( void* ) entry;
182     n -> type = type;
183 
184     * np = n;
185     return 0;
186 }
187 
188 /* Whack
189  */
CCCachedFileNodeWhack(CCCachedFileNode * self)190 void CCCachedFileNodeWhack ( CCCachedFileNode *self )
191 {
192     if ( self != NULL )
193     {
194         switch ( self -> type )
195         {
196         case ccFile:
197             CCFileNodeWhack ( self -> entry );
198             break;
199         case ccArcFile:
200             CCArcFileNodeWhack ( self -> entry );
201             break;
202         case ccChunkFile:
203             CChunkFileNodeWhack ( self -> entry );
204             break;
205         }
206         free ( self );
207     }
208 }
209 
210 
211 /*--------------------------------------------------------------------------
212  * CCSymlinkNode
213  *  a directory entry with a substitution path
214  */
215 
216 /* Make
217  *  creates a symlink object
218  */
CCSymlinkNodeMake(CCSymlinkNode ** np,const char * path)219 rc_t CCSymlinkNodeMake ( CCSymlinkNode **np, const char *path )
220 {
221     CCSymlinkNode *n = malloc ( sizeof * n + strlen ( path ) + 1 );
222     if ( n == NULL )
223         return RC ( rcExe, rcTree, rcInserting, rcMemory, rcExhausted );
224 
225     strcpy ( ( char* ) ( n + 1 ), path );
226     StringInitCString ( & n -> path, ( char* ) ( n + 1 ) );
227 
228     * np = n;
229     return 0;
230 }
231 
232 
233 /*--------------------------------------------------------------------------
234  * CCName
235  *  an entry name in a CCTree
236  */
237 
238 
239 /* Whack
240  */
241 static
CCNameWhack(BSTNode * n,void * ignore)242 void CCNameWhack ( BSTNode *n, void *ignore )
243 {
244     CCName *self = ( CCName* ) n;
245     if ( self -> entry != NULL ) switch ( self -> type )
246     {
247     case ccFile:
248     case ccContFile:
249         CCFileNodeWhack ( self -> entry );
250         break;
251     case ccArcFile:
252         CCArcFileNodeWhack ( self -> entry );
253         break;
254     case ccChunkFile:
255         CChunkFileNodeWhack ( self -> entry );
256         break;
257     case ccContainer:
258         CCContainerNodeWhack ( self -> entry );
259         break;
260     case ccSymlink:
261         CCSymlinkNodeWhack ( self -> entry );
262         break;
263     case ccHardlink:
264         break;
265     case ccDirectory:
266         CCTreeWhack ( self -> entry );
267         break;
268     }
269 
270     free ( self );
271 }
272 
273 
274 /* Make
275  *  make a node name
276  */
277 static
CCNameMake(CCName ** np,KTime_t mtime,CCName * dad,const String * name,enum CCType type,const void * entry)278 rc_t CCNameMake ( CCName **np, KTime_t mtime, CCName *dad,
279     const String *name, enum CCType type, const void *entry )
280 {
281     CCName *n = malloc ( sizeof * n + name -> size + 1 );
282     if ( n == NULL )
283         return RC ( rcExe, rcTree, rcInserting, rcMemory, rcExhausted );
284 
285     string_copy ( ( char* ) ( n + 1 ), name -> size + 1, name -> addr, name -> size );
286     n -> mtime = mtime;
287     n -> dad = dad;
288     n -> entry = ( void* ) entry;
289     StringInit ( & n -> name, ( char* ) ( n + 1 ), name -> size, name -> len );
290     n -> type = ( uint32_t ) type;
291 
292     * np = n;
293     return 0;
294 }
295 
296 /* Cmp
297  * Sort
298  */
299 static
CCNameCmp(const void * item,const BSTNode * n)300 int64_t CCNameCmp ( const void *item, const BSTNode *n )
301 {
302     const String *a = item;
303     const CCName *b = ( const CCName* ) n;
304     return StringCompare ( a, & b -> name );
305 }
306 
307 static
CCNameSort(const BSTNode * item,const BSTNode * n)308 int64_t CCNameSort ( const BSTNode *item, const BSTNode *n )
309 {
310     const CCName *a = ( const CCName* ) item;
311     const CCName *b = ( const CCName* ) n;
312     int64_t cmp = StringCompare ( & a -> name, & b -> name );
313     if (cmp != 0)
314         return cmp;
315 #if 0
316     if (b->type == ccReplaced)
317         return 1;
318 #endif
319     return 1; /* make new item always greater than existing n */
320 }
321 
322 
323 /*--------------------------------------------------------------------------
324  * CCContainerNode
325  *  an archive file entry
326  *  a file with a sub-directory
327  */
328 
329 
330 /* Whack
331  */
CCContainerNodeWhack(CCContainerNode * self)332 void CCContainerNodeWhack ( CCContainerNode *self )
333 {
334     if ( self != NULL )
335     {
336         BSTreeWhack ( & self -> sub, CCNameWhack, NULL );
337         switch ( self -> type )
338         {
339         case ccFile:
340             CCFileNodeWhack ( self -> entry );
341             break;
342         case ccArcFile:
343             CCArcFileNodeWhack ( self -> entry );
344             break;
345         case ccChunkFile:
346             CChunkFileNodeWhack ( self -> entry );
347             break;
348         }
349         free ( self );
350     }
351 }
352 
353 /* Make
354  *  creates an archive object
355  */
CCContainerNodeMake(CCContainerNode ** np,enum CCType type,const void * entry)356 rc_t CCContainerNodeMake ( CCContainerNode **np,
357     enum CCType type, const void *entry )
358 {
359     CCContainerNode *n = malloc ( sizeof * n );
360     if ( n == NULL )
361         return RC ( rcExe, rcTree, rcInserting, rcMemory, rcExhausted );
362 
363     BSTreeInit ( & n -> sub );
364     n -> entry = ( void* ) entry;
365     n -> type = ( uint32_t ) type;
366 
367     * np = n;
368     return 0;
369 }
370 
371 
372 
373 /*--------------------------------------------------------------------------
374  * CCReplacedNode
375  *  an archive file entry
376  *  a file with a sub-directory
377  */
378 
379 
380 /* Whack
381  */
CCReplacedNodeWhack(CCReplacedNode * self)382 void CCReplacedNodeWhack ( CCReplacedNode *self )
383 {
384     if ( self != NULL )
385     {
386         switch ( self -> type )
387         {
388         case ccFile:
389             CCFileNodeWhack ( self -> entry );
390             break;
391         case ccArcFile:
392             CCArcFileNodeWhack ( self -> entry );
393             break;
394         case ccChunkFile:
395             CChunkFileNodeWhack ( self -> entry );
396             break;
397         }
398         free ( self );
399     }
400 }
401 
402 /* Make
403  *  creates an archive object
404  */
CCReplacedNodeMake(CCReplacedNode ** np,enum CCType type,const void * entry)405 rc_t CCReplacedNodeMake ( CCReplacedNode **np,
406     enum CCType type, const void *entry )
407 {
408     CCReplacedNode *n = malloc ( sizeof * n );
409     if ( n == NULL )
410         return RC ( rcExe, rcTree, rcInserting, rcMemory, rcExhausted );
411 
412     n -> entry = ( void* ) entry;
413     n -> type = ( uint32_t ) type;
414 
415     * np = n;
416     return 0;
417 }
418 
419 
420 
421 /*--------------------------------------------------------------------------
422  * CCTree
423  *  a binary search tree with CCNodes
424  */
425 
426 
427 /* Whack
428  */
CCTreeWhack(CCTree * self)429 void CCTreeWhack ( CCTree *self )
430 {
431     if ( self != NULL )
432     {
433         BSTreeWhack ( self, CCNameWhack, NULL );
434         free ( self );
435     }
436 }
437 
438 
439 /* Make
440  *  make a root tree or sub-directory
441  */
CCTreeMake(CCTree ** tp)442 rc_t CCTreeMake ( CCTree **tp )
443 {
444     CCTree *t = malloc ( sizeof * t );
445     if ( t == NULL )
446         return RC ( rcExe, rcTree, rcInserting, rcMemory, rcExhausted );
447 
448     BSTreeInit ( t );
449 
450     * tp = t;
451     return 0;
452 }
453 
454 
455 /* Insert
456  *  create an entry into a tree
457  *  parses path into required sub-directories
458  */
459 static
CCTreePatchSubdirPath(BSTNode * n,void * data)460 void CCTreePatchSubdirPath ( BSTNode *n, void *data )
461 {
462     CCName *sym = ( CCName* ) n;
463     sym -> dad = data;
464 }
465 
466 static
CCTreeVInsert(CCTree * self,KTime_t mtime,enum CCType type,const void * entry,const char * fmt,va_list args)467 rc_t CCTreeVInsert ( CCTree *self, KTime_t mtime,
468     enum CCType type, const void *entry, const char *fmt, va_list args )
469 {
470     rc_t rc;
471     size_t sz;
472     String name;
473     CCName *dad, *sym;
474 
475     char path [ 4096 ];
476     int i, j, len = vsnprintf ( path, sizeof path, fmt, args );
477     if ( len < 0 || len >= sizeof path )
478         return RC ( rcExe, rcTree, rcInserting, rcPath, rcExcessive );
479 
480     while ( len > 0 && path [ len - 1 ] == '/' )
481         path [ -- len ] = 0;
482 
483     /* create/navigate path */
484     for ( dad = NULL, i = 0; i < len; i = j + 1 )
485     {
486         for ( j = i; j < len; ++ j )
487         {
488             if ( path [ j ] == '/' )
489             {
490                 /* detect non-empty names */
491                 sz = j - i;
492                 if ( sz != 0 )
493                 {
494                     CCTree *dir;
495 
496                     /* ignore '.' */
497                     if ( sz == 1 && path [ i ] == '.' )
498                         break;
499 
500                     /* '..' is not allowed */
501                     if ( sz == 2 && path [ i ] == '.' && path [ i + 1 ] == '.' )
502                         return RC ( rcExe, rcTree, rcInserting, rcPath, rcIncorrect );
503 
504                     /* get name of directory */
505                     StringInit ( & name, & path [ i ], sz, string_len ( & path [ i ], sz ) );
506 
507                     /* find existing */
508                     sym = ( CCName* ) BSTreeFind ( self, & name, CCNameCmp );
509 
510                     /* handle a hard link */
511                     while ( sym != NULL && sym -> type == ccHardlink )
512                         sym = sym -> entry;
513 
514                     /* should be a directory-ish thing */
515                     if ( sym != NULL )
516                     {
517                         switch ( sym -> type )
518                         {
519                         case ccContainer:
520                         case ccArchive:
521                             self = & ( ( CCContainerNode* ) sym -> entry ) -> sub;
522                             break;
523                         case ccDirectory:
524                             self = sym -> entry;
525                             break;
526                         default:
527                             return RC ( rcExe, rcTree, rcInserting, rcPath, rcIncorrect );
528                         }
529 
530                         dad = sym;
531                         break;
532                     }
533 
534                     /* create new sub-directory */
535                     rc = CCTreeMake ( & dir );
536                     if ( rc != 0 )
537                         return rc;
538 
539                     /* create directory name */
540                     rc = CCNameMake ( & sym, mtime, dad, & name, ccDirectory, dir );
541                     if ( rc != 0 )
542                     {
543                         CCTreeWhack ( dir );
544                         return rc;
545                     }
546 
547                     /* enter it into current directory
548                        don't need to validate it's unique */
549                     BSTreeInsert ( self, & sym -> n, CCNameSort );
550                     dad = sym;
551                     self = dir;
552                 }
553                 break;
554             }
555         }
556 
557         if ( j == len )
558             break;
559     }
560 
561     /* create entry name */
562     if ( i >= len )
563         return RC ( rcExe, rcTree, rcInserting, rcPath, rcIncorrect );
564     sz = len - i;
565     StringInit ( & name, & path [ i ], sz, string_len ( & path [ i ], sz ) );
566 
567 
568     /* create named entry */
569     rc = CCNameMake ( & sym, mtime, dad, & name, type, entry );
570     if ( rc == 0 )
571     {
572 #if 0
573         /* enter it into tree */
574         rc = BSTreeInsertUnique ( self, & sym -> n, NULL, CCNameSort );
575         if ( rc != 0 )
576             free ( sym );
577 #else
578         BSTNode * n = BSTreeFind (self, &sym->name, CCNameCmp);
579         if (n != NULL)
580         {
581             CCReplacedNode * rn;
582             CCName * nn = (CCName*)n;
583 
584             switch (nn->type)
585             {
586             case ccDirectory:
587                 if (sym->type == ccDirectory)
588                 {
589                     /* better would be to capture directory traits then goto */
590                     nn->mtime = sym->mtime;
591 
592                     /* we aren't yet handling a directory duplicate other than tar files */
593 
594                     if (((CCContainerNode*)sym->entry)->sub.root != NULL)
595                         rc = RC (rcExe, rcTree, rcInserting, rcNode, rcIncorrect);
596 
597                     goto skip_insert;
598                 }
599             default:
600                 rc = CCReplacedNodeMake (&rn, nn->type, nn->entry);
601                 if (rc == 0)
602                 {
603                     nn->type = ccReplaced;
604                     nn->entry = rn;
605                 }
606             }
607         }
608         if (rc == 0)
609             rc = BSTreeInsert (self, &sym->n, CCNameSort);
610     skip_insert:
611         if (rc)
612             free (sym);
613 #endif
614         /* if this guy has children, become dad */
615         else if ( entry != NULL ) switch ( type )
616         {
617         case ccContainer:
618         case ccArchive:
619             BSTreeForEach ( & ( ( CCContainerNode* ) entry ) -> sub, false, CCTreePatchSubdirPath, sym );
620             break;
621         case ccDirectory:
622             BSTreeForEach ( entry, false, CCTreePatchSubdirPath, sym );
623             break;
624 	default: /* shushing warnings */
625 	    break;
626         }
627     }
628 
629     return rc;
630 }
631 
CCTreeInsert(CCTree * self,KTime_t mtime,enum CCType type,const void * entry,const char * path,...)632 rc_t CCTreeInsert ( CCTree *self, KTime_t mtime,
633     enum CCType type, const void *entry, const char *path, ... )
634 {
635     rc_t rc;
636     va_list args;
637 
638     va_start ( args, path );
639     rc = CCTreeVInsert ( self, mtime, type, entry, path, args );
640     va_end ( args );
641 
642     return rc;
643 }
644 
645 /* Find
646  *  find a named node
647  *  returns NULL if not found
648  */
649 static
CCTreeVFind(const CCTree * self,const char * fmt,va_list args)650 const CCName *CCTreeVFind ( const CCTree *self, const char *fmt, va_list args )
651 {
652     size_t sz;
653     String name;
654     CCName /* *dad, */ *sym;
655 
656     char path [ 4096 ];
657     int i, j, len = vsnprintf ( path, sizeof path, fmt, args );
658     if ( len < 0 || len >= sizeof path )
659         return NULL;
660 
661     while ( len > 0 && path [ len - 1 ] == '/' )
662         path [ -- len ] = 0;
663 
664     /* create/navigate path */
665     for ( /* dad = NULL, */ i = 0; i < len; i = j + 1 )
666     {
667         for ( j = i; j < len; ++ j )
668         {
669             if ( path [ j ] == '/' )
670             {
671                 /* detect non-empty names */
672                 sz = j - i;
673                 if ( sz != 0 )
674                 {
675                     /* ignore '.' */
676                     if ( sz == 1 && path [ i ] == '.' )
677                         break;
678 
679                     /* '..' is not allowed */
680                     if ( sz == 2 && path [ i ] == '.' && path [ i + 1 ] == '.' )
681                         return NULL;
682 
683                     /* get name of directory */
684                     StringInit ( & name, & path [ i ], sz, string_len ( & path [ i ], sz ) );
685 
686                     /* find existing */
687                     sym = ( CCName* ) BSTreeFind ( self, & name, CCNameCmp );
688 
689                     /* handle hard-link */
690                     while ( sym != NULL && sym -> type == ccHardlink )
691                         sym = sym -> entry;
692 
693                     /* handle not found */
694                     if ( sym == NULL )
695                         return NULL;
696 
697                     /* loop or return the found object */
698                     switch ( sym -> type )
699                     {
700                     case ccContainer:
701                     case ccArchive:
702                         self = & ( ( CCContainerNode* ) sym -> entry ) -> sub;
703                         break;
704                     case ccDirectory:
705                         self = sym -> entry;
706                         break;
707                     default:
708                         return NULL;
709                     }
710 
711                     /* dad = sym; */
712                     break;
713                 }
714             }
715         }
716 
717         if ( j == len )
718             break;
719     }
720 
721     if ( i >= len )
722         return NULL;
723 
724     sz = len - i;
725     StringInit ( & name, & path [ i ], sz, string_len ( & path [ i ], sz ) );
726     return ( const CCName* ) BSTreeFind ( self, & name, CCNameCmp );
727 }
728 
729 
CCTreeFind(const CCTree * self,const char * path,...)730 const CCName *CCTreeFind ( const CCTree *self, const char *path, ... )
731 {
732     va_list args;
733     const CCName *name;
734 
735     va_start ( args, path );
736     name = CCTreeVFind ( self, path, args );
737     va_end ( args );
738 
739     return name;
740 }
741 
742 
743 /* Link
744  *  create a symlink to existing node
745  */
CCTreeLink(CCTree * self,KTime_t mtime,const char * targ,const char * alias)746 rc_t CCTreeLink ( CCTree *self, KTime_t mtime,
747     const char *targ, const char *alias )
748 {
749     const CCName *orig = CCTreeFind ( self, targ );
750     if ( orig == NULL )
751         return RC ( rcExe, rcTree, rcInserting, rcPath, rcNotFound );
752 
753     return CCTreeInsert ( self, mtime, ccHardlink, orig, alias );
754 }
755