xref: /dragonfly/bin/cpdup/cpdup.c (revision 49781055)
1 /*-
2  * CPDUP.C
3  *
4  * CPDUP <options> source destination
5  *
6  * (c) Copyright 1997-1999 by Matthew Dillon and Dima Ruban.  Permission to
7  *     use and distribute based on the FreeBSD copyright.  Supplied as-is,
8  *     USE WITH EXTREME CAUTION.
9  *
10  * This program attempts to duplicate the source onto the destination as
11  * exactly as possible, retaining modify times, flags, perms, uid, and gid.
12  * It can duplicate devices, files (including hardlinks), softlinks,
13  * directories, and so forth.  It is recursive by default!  The duplication
14  * is inclusive of removal of files/directories on the destination that do
15  * not exist on the source.  This program supports a per-directory exception
16  * file called .cpignore, or a user-specified exception file.
17  *
18  * Safety features:
19  *
20  *	- does not cross partition boundries on source
21  *	- asks for confirmation on deletions unless -i0 is specified
22  *	- refuses to replace a destination directory with a source file
23  *	  unless -s0 is specified.
24  *	- terminates on error
25  *
26  * Copying features:
27  *
28  *	- does not copy file if mtime, flags, perms, and size match unless
29  *	  forced
30  *
31  *	- copies to temporary and renames-over the original, allowing
32  *	  you to update live systems
33  *
34  *	- copies uid, gid, mtime, perms, flags, softlinks, devices, hardlinks,
35  *	  and recurses through directories.
36  *
37  *	- accesses a per-directory exclusion file, .cpignore, containing
38  *	  standard wildcarded ( ? / * style, NOT regex) exclusions.
39  *
40  *	- tries to play permissions and flags smart in regards to overwriting
41  *	  schg files and doing related stuff.
42  *
43  *	- Can do MD5 consistancy checks
44  *
45  * $DragonFly: src/bin/cpdup/cpdup.c,v 1.9 2005/10/15 19:09:17 swildner Exp $
46  */
47 
48 /*-
49  * Example: cc -O cpdup.c -o cpdup -lmd
50  *
51  * ".MD5.CHECKSUMS" contains md5 checksumms for the current directory.
52  * This file is stored on the source.
53  */
54 
55 #include "cpdup.h"
56 
57 #define HSIZE	16384
58 #define HMASK	(HSIZE-1)
59 #define HASHF 16
60 
61 const char *MD5CacheFile;
62 
63 typedef struct Node {
64     struct Node *no_Next;
65     struct Node *no_HNext;
66     int  no_Value;
67     char no_Name[4];
68 } Node;
69 
70 typedef struct List {
71     Node	li_Node;
72     Node	*li_Hash[HSIZE];
73 } List;
74 
75 struct hlink {
76     ino_t ino;
77     ino_t dino;
78     char name[2048];
79     struct hlink *next;
80     struct hlink *prev;
81     nlink_t nlinked;
82 };
83 
84 struct hlink *hltable[HASHF];
85 
86 typedef struct MD5Node {
87     struct MD5Node *md_Next;
88     char *md_Name;
89     char *md_Code;
90     int md_Accessed;
91 } MD5Node;
92 
93 
94 void RemoveRecur(const char *dpath, dev_t devNo);
95 void InitList(List *list);
96 void ResetList(List *list);
97 int AddList(List *list, const char *name, int n);
98 static struct hlink *hltlookup(struct stat *);
99 static struct hlink *hltadd(struct stat *, const char *);
100 static int shash(const char *s);
101 static void hltdelete(struct hlink *);
102 int YesNo(const char *path);
103 static int xrename(const char *src, const char *dst, u_long flags);
104 static int xlink(const char *src, const char *dst, u_long flags);
105 int WildCmp(const char *s1, const char *s2);
106 static MD5Node *md5_lookup(const char *sfile);
107 static int md5_check(const char *spath, const char *dpath);
108 static void md5_flush(void);
109 static void md5_cache(const char *spath, int sdirlen);
110 static char *fextract(FILE *fi, int n, int *pc, int skip);
111 int DoCopy(const char *spath, const char *dpath, dev_t sdevNo, dev_t ddevNo);
112 char *doMD5File(const char *filename, char *buf);
113 
114 int AskConfirmation = 1;
115 int SafetyOpt = 1;
116 int ForceOpt = 0;
117 int VerboseOpt = 0;
118 int QuietOpt = 0;
119 int NoRemoveOpt = 0;
120 int UseMD5Opt = 0;
121 int SummaryOpt = 0;
122 const char *UseCpFile;
123 
124 int64_t CountSourceBytes = 0;
125 int64_t CountSourceItems = 0;
126 int64_t CountCopiedItems = 0;
127 int64_t CountReadBytes = 0;
128 int64_t CountWriteBytes = 0;
129 int64_t CountRemovedItems = 0;
130 
131 char *MD5SCache;		/* cache source directory name */
132 MD5Node *MD5Base;
133 int MD5SCacheDirLen;
134 int MD5SCacheDirty;
135 
136 int
137 main(int ac, char **av)
138 {
139     int i;
140     char *src = NULL;
141     char *dst = NULL;
142     struct timeval start;
143 
144     gettimeofday(&start, NULL);
145     for (i = 1; i < ac; ++i) {
146 	char *ptr = av[i];
147 	int v = 1;
148 
149 	if (*ptr != '-') {
150 	    if (src == NULL) {
151 		src = ptr;
152 	    } else if (dst == NULL) {
153 		dst = ptr;
154 	    } else {
155 		fatal("too many arguments");
156 		/* not reached */
157 	    }
158 	    continue;
159 	}
160 	ptr += 2;
161 
162 	if (*ptr)
163 	    v = strtol(ptr, NULL, 0);
164 
165 	switch(ptr[-1]) {
166 	case 'v':
167 	    VerboseOpt = 1;
168 	    while (*ptr == 'v') {
169 		++VerboseOpt;
170 		++ptr;
171 	    }
172 	    if (*ptr >= '0' && *ptr <= '9')
173 		VerboseOpt = strtol(ptr, NULL, 0);
174 	    break;
175 	case 'I':
176 	    SummaryOpt = v;
177 	    break;
178 	case 'o':
179 	    NoRemoveOpt = v;
180 	    break;
181 	case 'x':
182 	    UseCpFile = ".cpignore";
183 	    break;
184 	case 'X':
185 	    UseCpFile = (*ptr) ? ptr : av[++i];
186 	    break;
187 	case 'f':
188 	    ForceOpt = v;
189 	    break;
190 	case 'i':
191 	    AskConfirmation = v;
192 	    break;
193 	case 's':
194 	    SafetyOpt = v;
195 	    break;
196 	case 'q':
197 	    QuietOpt = v;
198 	    break;
199 	case 'M':
200 	    UseMD5Opt = v;
201 	    MD5CacheFile = av[++i];
202 	    break;
203 	case 'm':
204 	    UseMD5Opt = v;
205 	    MD5CacheFile = ".MD5.CHECKSUMS";
206 	    break;
207 	case 'u':
208 	    setvbuf(stdout, NULL, _IOLBF, 0);
209 	    break;
210 	default:
211 	    fatal("illegal option: %s\n", ptr - 2);
212 	    /* not reached */
213 	    break;
214 	}
215     }
216 
217     /*
218      * dst may be NULL only if -m option is specified,
219      * which forces an update of the MD5 checksums
220      */
221 
222     if (dst == NULL && UseMD5Opt == 0) {
223 	fatal(NULL);
224 	/* not reached */
225     }
226     if (dst) {
227 	i = DoCopy(src, dst, (dev_t)-1, (dev_t)-1);
228     } else {
229 	i = DoCopy(src, NULL, (dev_t)-1, (dev_t)-1);
230     }
231     md5_flush();
232 
233     if (SummaryOpt && i == 0) {
234 	long duration;
235 	struct timeval end;
236 
237 	gettimeofday(&end, NULL);
238 	CountSourceBytes += sizeof(struct stat) * CountSourceItems;
239 	CountReadBytes += sizeof(struct stat) * CountSourceItems;
240 	CountWriteBytes +=  sizeof(struct stat) * CountCopiedItems;
241 	CountWriteBytes +=  sizeof(struct stat) * CountRemovedItems;
242 
243 	duration = end.tv_sec - start.tv_sec;
244 	duration *= 1000000;
245 	duration += end.tv_usec - start.tv_usec;
246 	if (duration == 0) duration = 1;
247 	logstd("cpdup completed successfully\n");
248 	logstd("%lld bytes source %lld bytes read %lld bytes written (%.1fX speedup)\n",
249 	    (long long)CountSourceBytes,
250 	    (long long)CountReadBytes,
251 	    (long long)CountWriteBytes,
252 	    ((double)CountSourceBytes * 2.0) / ((double)(CountReadBytes + CountWriteBytes)));
253  	logstd("%lld source items %lld items copied %lld things deleted\n",
254 	    (long long)CountSourceItems,
255 	    (long long)CountCopiedItems,
256 	    (long long)CountRemovedItems);
257 	logstd("%.1f seconds %5d Kbytes/sec synced %5d Kbytes/sec scanned\n",
258 	    (float)duration / (float)1000000,
259 	    (long)((long)1000000 * (CountReadBytes + CountWriteBytes) / duration  / 1024.0),
260 	    (long)((long)1000000 * CountSourceBytes / duration / 1024.0));
261     }
262     exit((i == 0) ? 0 : 1);
263 }
264 
265 static struct hlink *
266 hltlookup(struct stat *stp)
267 {
268     struct hlink *hl;
269     int n;
270 
271     n = stp->st_ino % HASHF;
272 
273     for (hl = hltable[n]; hl; hl = hl->next)
274         if (hl->ino == stp->st_ino)
275               return hl;
276 
277     return NULL;
278 }
279 
280 static struct hlink *
281 hltadd(struct stat *stp, const char *path)
282 {
283     struct hlink *new;
284     int n;
285 
286     if (!(new = malloc(sizeof (struct hlink)))) {
287         fprintf(stderr, "out of memory\n");
288         exit(EXIT_FAILURE);
289     }
290 
291     /* initialize and link the new element into the table */
292     new->ino = stp->st_ino;
293     new->dino = 0;
294     strncpy(new->name, path, 2048);
295     new->nlinked = 1;
296     new->prev = NULL;
297     n = stp->st_ino % HASHF;
298     new->next = hltable[n];
299     if (hltable[n])
300         hltable[n]->prev = new;
301     hltable[n] = new;
302 
303     return new;
304 }
305 
306 static void
307 hltdelete(struct hlink *hl)
308 {
309     if (hl->prev) {
310         if (hl->next)
311             hl->next->prev = hl->prev;
312         hl->prev->next = hl->next;
313     } else {
314         if (hl->next)
315             hl->next->prev = NULL;
316 
317         hltable[hl->ino % HASHF] = hl->next;
318     }
319 
320     free(hl);
321 }
322 
323 int
324 DoCopy(const char *spath, const char *dpath, dev_t sdevNo, dev_t ddevNo)
325 {
326     struct stat st1;
327     struct stat st2;
328     int r, mres, st2Valid;
329     struct hlink *hln;
330     List list;
331     u_int64_t size;
332 
333     InitList(&list);
334 
335     r = mres = st2Valid = 0;
336     size = 0;
337     hln = NULL;
338 
339     if (lstat(spath, &st1) != 0)
340 	return(0);
341     st2.st_mode = 0;	/* in case lstat fails */
342     st2.st_flags = 0;	/* in case lstat fails */
343     if (dpath && lstat(dpath, &st2) == 0)
344 	st2Valid = 1;
345 
346     if (S_ISREG(st1.st_mode)) {
347 	size = st1.st_blocks * 512;
348 	if (st1.st_size % 512)
349 	    size += st1.st_size % 512 - 512;
350     }
351 
352     /*
353      * Handle hardlinks
354      */
355 
356     if (S_ISREG(st1.st_mode) && st1.st_nlink > 1 && dpath) {
357         if ((hln = hltlookup(&st1)) != NULL) {
358             hln->nlinked++;
359 
360             if (st2Valid) {
361                 if (st2.st_ino == hln->dino) {
362 		    /*
363 		     * hard link is already correct, nothing to do
364 		     */
365 		    if (VerboseOpt >= 3)
366 			logstd("%-32s nochange\n", (dpath) ? dpath : spath);
367                     if (hln->nlinked == st1.st_nlink)
368                         hltdelete(hln);
369 		    CountSourceItems++;
370                     return 0;
371                 } else {
372 		    /*
373 		     * hard link is not correct, attempt to unlink it
374 		     */
375                     if (unlink(dpath) < 0) {
376 			logerr("%-32s hardlink: unable to unlink: %s\n",
377 			    ((dpath) ? dpath : spath), strerror(errno));
378                         hltdelete(hln);
379 			return (r + 1);
380 		    }
381                 }
382             }
383 
384             if (xlink(hln->name, dpath, st1.st_flags) < 0) {
385 		logerr("%-32s hardlink: unable to link to %s: %s\n",
386 		    (dpath ? dpath : spath), hln->name, strerror(errno)
387 		);
388                 hltdelete(hln);
389                 hln = NULL;
390 		++r;
391             } else {
392                 if (hln->nlinked == st1.st_nlink) {
393                     hltdelete(hln);
394 		    hln = NULL;
395 		}
396                 if (r == 0) {
397 		    if (VerboseOpt) {
398 			logstd("%-32s hardlink: %s\n",
399 			    (dpath ? dpath : spath),
400 			    (st2Valid ? "relinked" : "linked")
401 			);
402 		    }
403 		    CountSourceItems++;
404 		    CountCopiedItems++;
405                     return 0;
406 		}
407             }
408         } else {
409 	    /*
410 	     * first instance of hardlink must be copied normally
411 	     */
412             hln = hltadd(&st1, dpath);
413 	}
414     }
415 
416     /*
417      * Do we need to copy the file/dir/link/whatever?  Early termination
418      * if we do not.  Always traverse directories.  Always redo links.
419      *
420      * NOTE: st2Valid is true only if dpath != NULL *and* dpath stats good.
421      */
422 
423     if (
424 	st2Valid &&
425 	st1.st_mode == st2.st_mode &&
426 	st1.st_flags == st2.st_flags
427     ) {
428 	if (S_ISLNK(st1.st_mode) || S_ISDIR(st1.st_mode)) {
429 	    ;
430 	} else {
431 	    if (ForceOpt == 0 &&
432 		st1.st_size == st2.st_size &&
433 		st1.st_uid == st2.st_uid &&
434 		st1.st_gid == st2.st_gid &&
435 		st1.st_mtime == st2.st_mtime
436 		&& (UseMD5Opt == 0 || (mres = md5_check(spath, dpath)) == 0)
437 	    ) {
438                 if (hln)
439                     hln->dino = st2.st_ino;
440 		if (VerboseOpt >= 3) {
441 		    if (UseMD5Opt)
442 			logstd("%-32s md5-nochange\n", (dpath ? dpath : spath));
443 		    else
444 			logstd("%-32s nochange\n", (dpath ? dpath : spath));
445 		}
446 		CountSourceBytes += size;
447 		CountSourceItems++;
448 
449 		return(0);
450 	    }
451 	}
452     }
453     if (st2Valid && !S_ISDIR(st1.st_mode) && S_ISDIR(st2.st_mode)) {
454 	if (SafetyOpt) {
455 	    logerr("%-32s SAFETY - refusing to copy file over directory\n",
456 		(dpath ? dpath : spath)
457 	    );
458 	    ++r;		/* XXX */
459 	    return(0);	/* continue with the cpdup anyway */
460 	}
461 	if (QuietOpt == 0 || AskConfirmation) {
462 	    logstd("%-32s WARNING: non-directory source will blow away\n"
463 		   "%-32s preexisting dest directory, continuing anyway!\n",
464 		   ((dpath) ? dpath : spath), "");
465 	}
466 	if (dpath)
467 	    RemoveRecur(dpath, ddevNo);
468     }
469 
470     if (S_ISDIR(st1.st_mode)) {
471 	DIR *dir;
472 
473 	if ((dir = opendir(spath)) != NULL) {
474 	    struct dirent *den;
475 	    int noLoop = 0;
476 
477 	    if (dpath) {
478 		if (S_ISDIR(st2.st_mode) == 0) {
479 		    remove(dpath);
480 		    if (mkdir(dpath, st1.st_mode | 0700) != 0) {
481 			logerr("%s: mkdir failed: %s\n",
482 			    (dpath ? dpath : spath), strerror(errno));
483 			r = 1;
484 			noLoop = 1;
485 		    }
486 		    /*
487 		     * Matt: why don't you check error codes here?
488 		     */
489 		    lstat(dpath, &st2);
490 		    chown(dpath, st1.st_uid, st1.st_gid);
491 		    CountCopiedItems++;
492 		} else {
493 		    /*
494 		     * Directory must be scanable by root for cpdup to
495 		     * work.  We'll fix it later if the directory isn't
496 		     * supposed to be readable ( which is why we fixup
497 		     * st2.st_mode to match what we did ).
498 		     */
499 		    if ((st2.st_mode & 0700) != 0700) {
500 			chmod(dpath, st2.st_mode | 0700);
501 			st2.st_mode |= 0700;
502 		    }
503 		    if (VerboseOpt >= 2)
504 			logstd("%s\n", dpath ? dpath : spath);
505 		}
506 	    }
507 
508 	    if ((int)sdevNo >= 0 && st1.st_dev != sdevNo) {
509 		noLoop = 1;
510 	    } else {
511 		sdevNo = st1.st_dev;
512 	    }
513 
514 	    if ((int)ddevNo >= 0 && st2.st_dev != ddevNo) {
515 		noLoop = 1;
516 	    } else {
517 		ddevNo = st2.st_dev;
518 	    }
519 
520 	    /*
521 	     * scan .cpignore file for files/directories
522 	     * to ignore.
523 	     */
524 
525 	    if (UseCpFile) {
526 		FILE *fi;
527 		char buf[8192];
528 		char *fpath;
529 
530 		if (UseCpFile[0] == '/') {
531 		    fpath = mprintf("%s", UseCpFile);
532 		} else {
533 		    fpath = mprintf("%s/%s", spath, UseCpFile);
534 		}
535 		AddList(&list, strrchr(fpath, '/') + 1, 1);
536 		if ((fi = fopen(fpath, "r")) != NULL) {
537 		    while (fgets(buf, sizeof(buf), fi) != NULL) {
538 			int l = strlen(buf);
539 			CountReadBytes += l;
540 			if (l && buf[l-1] == '\n')
541 			    buf[--l] = 0;
542 			if (buf[0] && buf[0] != '#')
543 			    AddList(&list, buf, 1);
544 		    }
545 		    fclose(fi);
546 		}
547 		free(fpath);
548 	    }
549 
550 	    /*
551 	     * Automatically exclude MD5CacheFile that we create on the
552 	     * source from the copy to the destination.
553 	     */
554 	    if (UseMD5Opt)
555 		AddList(&list, MD5CacheFile, 1);
556 
557 	    while (noLoop == 0 && (den = readdir(dir)) != NULL) {
558 		/*
559 		 * ignore . and ..
560 		 */
561 		char *nspath;
562 		char *ndpath = NULL;
563 
564 		if (strcmp(den->d_name, ".") == 0 ||
565 		    strcmp(den->d_name, "..") == 0
566 		) {
567 		    continue;
568 		}
569 		/*
570 		 * ignore if on .cpignore list
571 		 */
572 		if (AddList(&list, den->d_name, 0) == 1) {
573 		    continue;
574 		}
575 		nspath = mprintf("%s/%s", spath, den->d_name);
576 		if (dpath)
577 		    ndpath = mprintf("%s/%s", dpath, den->d_name);
578 		r += DoCopy(
579 		    nspath,
580 		    ndpath,
581 		    sdevNo,
582 		    ddevNo
583 		);
584 		free(nspath);
585 		if (ndpath)
586 		    free(ndpath);
587 	    }
588 
589 	    closedir(dir);
590 
591 	    /*
592 	     * Remove files/directories from destination that do not appear
593 	     * in the source.
594 	     */
595 	    if (dpath && (dir = opendir(dpath)) != NULL) {
596 		while (noLoop == 0 && (den = readdir(dir)) != NULL) {
597 		    /*
598 		     * ignore . or ..
599 		     */
600 		    if (strcmp(den->d_name, ".") == 0 ||
601 			strcmp(den->d_name, "..") == 0
602 		    ) {
603 			continue;
604 		    }
605 		    /*
606 		     * If object does not exist in source or .cpignore
607 		     * then recursively remove it.
608 		     */
609 		    if (AddList(&list, den->d_name, 3) == 3) {
610 			char *ndpath;
611 
612 			ndpath = mprintf("%s/%s", dpath, den->d_name);
613 			RemoveRecur(ndpath, ddevNo);
614 			free(ndpath);
615 		    }
616 		}
617 		closedir(dir);
618 	    }
619 
620 	    if (dpath) {
621 		if (ForceOpt ||
622 		    st2Valid == 0 ||
623 		    st1.st_uid != st2.st_uid ||
624 		    st1.st_gid != st2.st_gid
625 		) {
626 		    chown(dpath, st1.st_uid, st1.st_gid);
627 		}
628 		if (st2Valid == 0 || st1.st_mode != st2.st_mode) {
629 		    chmod(dpath, st1.st_mode);
630 		}
631 		if (st2Valid == 0 || st1.st_flags != st2.st_flags) {
632 		    chflags(dpath, st1.st_flags);
633 		}
634 	    }
635 	}
636     } else if (dpath == NULL) {
637 	/*
638 	 * If dpath is NULL, we are just updating the MD5
639 	 */
640 	if (UseMD5Opt && S_ISREG(st1.st_mode)) {
641 	    mres = md5_check(spath, NULL);
642 
643 	    if (VerboseOpt > 1) {
644 		if (mres < 0)
645 		    logstd("%-32s md5-update\n", (dpath) ? dpath : spath);
646 		else
647 		    logstd("%-32s md5-ok\n", (dpath) ? dpath : spath);
648 	    } else if (!QuietOpt && mres < 0) {
649 		logstd("%-32s md5-update\n", (dpath) ? dpath : spath);
650 	    }
651 	}
652     } else if (S_ISREG(st1.st_mode)) {
653 	char *path;
654 	int fd1;
655 	int fd2;
656 
657 	path = mprintf("%s.tmp", dpath);
658 
659 	/*
660 	 * Handle check failure message.
661 	 */
662 	if (mres < 0)
663 	    logerr("%-32s md5-CHECK-FAILED\n", (dpath) ? dpath : spath);
664 
665 	if ((fd1 = open(spath, O_RDONLY)) >= 0) {
666 	    if ((fd2 = open(path, O_WRONLY|O_CREAT|O_EXCL, 0600)) < 0) {
667 		/*
668 		 * There could be a .tmp file from a previously interrupted
669 		 * run, delete and retry.  Fail if we still can't get at it.
670 		 */
671 		chflags(path, 0);
672 		remove(path);
673 		fd2 = open(path, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC, 0600);
674 	    }
675 	    if (fd2 >= 0) {
676 		/*
677 		 * Matt: I think 64k would be faster here
678 		 */
679 		char buf[32768];
680 		const char *op;
681 		int n;
682 
683 		/*
684 		 * Matt: What about holes?
685 		 */
686 		op = "read";
687 		while ((n = read(fd1, buf, sizeof(buf))) > 0) {
688 		    op = "write";
689 		    if (write(fd2, buf, n) != n)
690 			break;
691 		    op = "read";
692 		}
693 		close(fd2);
694 		if (n == 0) {
695 		    struct timeval tv[2];
696 
697 		    bzero(tv, sizeof(tv));
698 		    tv[0].tv_sec = st1.st_mtime;
699 		    tv[1].tv_sec = st1.st_mtime;
700 
701 		    utimes(path, tv);
702 		    chown(path, st1.st_uid, st1.st_gid);
703 		    chmod(path, st1.st_mode);
704 		    if (xrename(path, dpath, st2.st_flags) != 0) {
705 			logerr("%-32s rename-after-copy failed: %s\n",
706 			    (dpath ? dpath : spath), strerror(errno)
707 			);
708 			++r;
709 		    } else {
710 			if (VerboseOpt)
711 			    logstd("%-32s copy-ok\n", (dpath ? dpath : spath));
712 			if (st1.st_flags)
713 			    chflags(dpath, st1.st_flags);
714 		    }
715 		    CountReadBytes += size;
716 		    CountWriteBytes += size;
717 		    CountSourceBytes += size;
718 		    CountSourceItems++;
719 		    CountCopiedItems++;
720 		} else {
721 		    logerr("%-32s %s failed: %s\n",
722 			(dpath ? dpath : spath), op, strerror(errno)
723 		    );
724 		    remove(path);
725 		    ++r;
726 		}
727 	    } else {
728 		logerr("%-32s create (uid %d, euid %d) failed: %s\n",
729 		    (dpath ? dpath : spath), getuid(), geteuid(),
730 		    strerror(errno)
731 		);
732 		++r;
733 	    }
734 	    close(fd1);
735 	} else {
736 	    logerr("%-32s copy: open failed: %s\n",
737 		(dpath ? dpath : spath),
738 		strerror(errno)
739 	    );
740 	    ++r;
741 	}
742 	free(path);
743 
744         if (hln) {
745             if (!r && stat(dpath, &st2) == 0)
746                 hln->dino = st2.st_ino;
747             else
748                 hltdelete(hln);
749         }
750     } else if (S_ISLNK(st1.st_mode)) {
751 	char link1[1024];
752 	char link2[1024];
753 	char path[2048];
754 	int n1;
755 	int n2;
756 
757 	snprintf(path, sizeof(path), "%s.tmp", dpath);
758 	n1 = readlink(spath, link1, sizeof(link1) - 1);
759 	n2 = readlink(dpath, link2, sizeof(link2) - 1);
760 	if (n1 >= 0) {
761 	    if (ForceOpt || n1 != n2 || bcmp(link1, link2, n1) != 0) {
762 		umask(~st1.st_mode);
763 		remove(path);
764 		link1[n1] = 0;
765 		if (symlink(link1, path) < 0) {
766                       logerr("%-32s symlink (%s->%s) failed: %s\n",
767 			  (dpath ? dpath : spath), link1, path,
768 			  strerror(errno)
769 		      );
770 		      ++r;
771 		} else {
772 		    lchown(path, st1.st_uid, st1.st_gid);
773 		    /*
774 		     * there is no lchmod() or lchflags(), we
775 		     * cannot chmod or chflags a softlink.
776 		     */
777 		    if (xrename(path, dpath, st2.st_flags) != 0) {
778 			logerr("%-32s rename softlink (%s->%s) failed: %s\n",
779 			    (dpath ? dpath : spath),
780 			    path, dpath, strerror(errno));
781 		    } else if (VerboseOpt) {
782 			logstd("%-32s softlink-ok\n", (dpath ? dpath : spath));
783 		    }
784 		    umask(000);
785 		    CountWriteBytes += n1;
786 		    CountCopiedItems++;
787 	  	}
788 	    } else {
789 		if (VerboseOpt >= 3)
790 		    logstd("%-32s nochange\n", (dpath ? dpath : spath));
791 	    }
792 	    CountSourceBytes += n1;
793 	    CountReadBytes += n1;
794 	    if (n2 > 0) CountReadBytes += n2;
795 	    CountSourceItems++;
796 	} else {
797 	    r = 1;
798 	    logerr("%-32s softlink-failed\n", (dpath ? dpath : spath));
799 	}
800     } else if (S_ISCHR(st1.st_mode) || S_ISBLK(st1.st_mode)) {
801 	char path[2048];
802 
803 	if (ForceOpt ||
804 	    st2Valid == 0 ||
805 	    st1.st_mode != st2.st_mode ||
806 	    st1.st_rdev != st2.st_rdev ||
807 	    st1.st_uid != st2.st_uid ||
808 	    st1.st_gid != st2.st_gid
809 	) {
810 	    snprintf(path, sizeof(path), "%s.tmp", dpath);
811 
812 	    remove(path);
813 	    if (mknod(path, st1.st_mode, st1.st_rdev) == 0) {
814 		chmod(path, st1.st_mode);
815 		chown(path, st1.st_uid, st1.st_gid);
816 		remove(dpath);
817 		if (xrename(path, dpath, st2.st_flags) != 0) {
818 		    logerr("%-32s dev-rename-after-create failed: %s\n",
819 			(dpath ? dpath : spath),
820 			strerror(errno)
821 		    );
822 		} else if (VerboseOpt) {
823 		    logstd("%-32s dev-ok\n", (dpath ? dpath : spath));
824 		}
825 		CountCopiedItems++;
826 	    } else {
827 		r = 1;
828 		logerr("%-32s dev failed: %s\n",
829 		    (dpath ? dpath : spath), strerror(errno)
830 		);
831 	    }
832 	} else {
833 	    if (VerboseOpt >= 3)
834 		logstd("%-32s nochange\n", (dpath ? dpath : spath));
835 	}
836 	CountSourceItems++;
837     }
838     ResetList(&list);
839     return(r);
840 }
841 
842 /*
843  * RemoveRecur()
844  */
845 
846 void
847 RemoveRecur(const char *dpath, dev_t devNo)
848 {
849     struct stat st;
850 
851     if (lstat(dpath, &st) == 0) {
852 	if ((int)devNo < 0)
853 	    devNo = st.st_dev;
854 	if (st.st_dev == devNo) {
855 	    if (S_ISDIR(st.st_mode)) {
856 		DIR *dir;
857 
858 		if ((dir = opendir(dpath)) != NULL) {
859 		    struct dirent *den;
860 		    while ((den = readdir(dir)) != NULL) {
861 			char *ndpath;
862 
863 			if (strcmp(den->d_name, ".") == 0)
864 			    continue;
865 			if (strcmp(den->d_name, "..") == 0)
866 			    continue;
867 			ndpath = mprintf("%s/%s", dpath, den->d_name);
868 			RemoveRecur(ndpath, devNo);
869 			free(ndpath);
870 		    }
871 		    closedir(dir);
872 		}
873 		if (AskConfirmation && NoRemoveOpt == 0) {
874 		    if (YesNo(dpath)) {
875 			if (rmdir(dpath) < 0) {
876 			    logerr("%-32s rmdir failed: %s\n",
877 				dpath, strerror(errno)
878 			    );
879 			}
880 			CountRemovedItems++;
881 		    }
882 		} else {
883 		    if (NoRemoveOpt) {
884 			if (VerboseOpt)
885 			    logstd("%-32s not-removed\n", dpath);
886 		    } else if (rmdir(dpath) == 0) {
887 			if (VerboseOpt)
888 			    logstd("%-32s rmdir-ok\n", dpath);
889 			CountRemovedItems++;
890 		    } else {
891 			logerr("%-32s rmdir failed: %s\n",
892 			    dpath, strerror(errno)
893 			);
894 		    }
895 		}
896 	    } else {
897 		if (AskConfirmation && NoRemoveOpt == 0) {
898 		    if (YesNo(dpath)) {
899 			if (remove(dpath) < 0) {
900 			    logerr("%-32s remove failed: %s\n",
901 				dpath, strerror(errno)
902 			    );
903 			}
904 			CountRemovedItems++;
905 		    }
906 		} else {
907 		    if (NoRemoveOpt) {
908 			if (VerboseOpt)
909 			    logstd("%-32s not-removed\n", dpath);
910 		    } else if (remove(dpath) == 0) {
911 			if (VerboseOpt)
912 			    logstd("%-32s remove-ok\n", dpath);
913 			CountRemovedItems++;
914 		    } else {
915 			logerr("%-32s remove failed: %s\n",
916 			    dpath, strerror(errno)
917 			);
918 		    }
919 		}
920 	    }
921 	}
922     }
923 }
924 
925 void
926 InitList(List *list)
927 {
928     bzero(list, sizeof(List));
929     list->li_Node.no_Next = &list->li_Node;
930 }
931 
932 void
933 ResetList(List *list)
934 {
935     Node *node;
936 
937     while ((node = list->li_Node.no_Next) != &list->li_Node) {
938 	list->li_Node.no_Next = node->no_Next;
939 	free(node);
940     }
941     InitList(list);
942 }
943 
944 int
945 AddList(List *list, const char *name, int n)
946 {
947     Node *node;
948     int hv;
949 
950     hv = shash(name);
951 
952     /*
953      * Scan against wildcards.  Only a node value of 1 can be a wildcard
954      * ( usually scanned from .cpignore )
955      */
956 
957     for (node = list->li_Hash[0]; node; node = node->no_HNext) {
958 	if (strcmp(name, node->no_Name) == 0 ||
959 	    (n != 1 && node->no_Value == 1 && WildCmp(node->no_Name, name) == 0)
960 	) {
961 	    return(node->no_Value);
962 	}
963     }
964 
965     /*
966      * Look for exact match
967      */
968 
969     for (node = list->li_Hash[hv]; node; node = node->no_HNext) {
970 	if (strcmp(name, node->no_Name) == 0) {
971 	    return(node->no_Value);
972 	}
973     }
974     node = malloc(sizeof(Node) + strlen(name) + 1);
975     if (node == NULL) {
976         fprintf(stderr, "out of memory\n");
977         exit(EXIT_FAILURE);
978     }
979 
980     node->no_Next = list->li_Node.no_Next;
981     list->li_Node.no_Next = node;
982 
983     node->no_HNext = list->li_Hash[hv];
984     list->li_Hash[hv] = node;
985 
986     strcpy(node->no_Name, name);
987     node->no_Value = n;
988 
989     return(n);
990 }
991 
992 static int
993 shash(const char *s)
994 {
995     int hv;
996 
997     hv = 0xA4FB3255;
998 
999     while (*s) {
1000 	if (*s == '*' || *s == '?' ||
1001 	    *s == '{' || *s == '}' ||
1002 	    *s == '[' || *s == ']' ||
1003 	    *s == '|'
1004 	) {
1005 	    return(0);
1006 	}
1007 	hv = (hv << 5) ^ *s ^ (hv >> 23);
1008 	++s;
1009     }
1010     return(((hv >> 16) ^ hv) & HMASK);
1011 }
1012 
1013 /*
1014  * WildCmp() - compare wild string to sane string
1015  *
1016  *	Return 0 on success, -1 on failure.
1017  */
1018 
1019 int
1020 WildCmp(const char *w, const char *s)
1021 {
1022     /*
1023      * skip fixed portion
1024      */
1025 
1026     for (;;) {
1027 	switch(*w) {
1028 	case '*':
1029 	    if (w[1] == 0)	/* optimize wild* case */
1030 		return(0);
1031 	    {
1032 		int i;
1033 		int l = strlen(s);
1034 
1035 		for (i = 0; i <= l; ++i) {
1036 		    if (WildCmp(w + 1, s + i) == 0)
1037 			return(0);
1038 		}
1039 	    }
1040 	    return(-1);
1041 	case '?':
1042 	    if (*s == 0)
1043 		return(-1);
1044 	    ++w;
1045 	    ++s;
1046 	    break;
1047 	default:
1048 	    if (*w != *s)
1049 		return(-1);
1050 	    if (*w == 0)	/* terminator */
1051 		return(0);
1052 	    ++w;
1053 	    ++s;
1054 	    break;
1055 	}
1056     }
1057     /* not reached */
1058     return(-1);
1059 }
1060 
1061 int
1062 YesNo(const char *path)
1063 {
1064     int ch, first;
1065 
1066     fprintf(stderr, "remove %s (Yes/No) [No]? ", path);
1067     fflush(stderr);
1068 
1069     first = ch = getchar();
1070     while (ch != '\n' && ch != EOF)
1071 	ch = getchar();
1072     return ((first == 'y' || first == 'Y'));
1073 }
1074 
1075 static void
1076 md5_flush(void)
1077 {
1078     if (MD5SCacheDirty && MD5SCache) {
1079 	FILE *fo;
1080 
1081 	if ((fo = fopen(MD5SCache, "w")) != NULL) {
1082 	    MD5Node *node;
1083 
1084 	    for (node = MD5Base; node; node = node->md_Next) {
1085 		if (node->md_Accessed && node->md_Code) {
1086 		    fprintf(fo, "%s %d %s\n",
1087 			node->md_Code,
1088 			strlen(node->md_Name),
1089 			node->md_Name
1090 		    );
1091 		}
1092 	    }
1093 	    fclose(fo);
1094 	}
1095     }
1096 
1097     MD5SCacheDirty = 0;
1098 
1099     if (MD5SCache) {
1100 	MD5Node *node;
1101 
1102 	while ((node = MD5Base) != NULL) {
1103 	    MD5Base = node->md_Next;
1104 
1105 	    if (node->md_Code)
1106 		free(node->md_Code);
1107 	    if (node->md_Name)
1108 		free(node->md_Name);
1109 	    free(node);
1110 	}
1111 	free(MD5SCache);
1112 	MD5SCache = NULL;
1113     }
1114 }
1115 
1116 static void
1117 md5_cache(const char *spath, int sdirlen)
1118 {
1119     FILE *fi;
1120 
1121     /*
1122      * Already cached
1123      */
1124 
1125     if (
1126 	MD5SCache &&
1127 	sdirlen == MD5SCacheDirLen &&
1128 	strncmp(spath, MD5SCache, sdirlen) == 0
1129     ) {
1130 	return;
1131     }
1132 
1133     /*
1134      * Different cache, flush old cache
1135      */
1136 
1137     if (MD5SCache != NULL)
1138 	md5_flush();
1139 
1140     /*
1141      * Create new cache
1142      */
1143 
1144     MD5SCacheDirLen = sdirlen;
1145     MD5SCache = mprintf("%*.*s%s", sdirlen, sdirlen, spath, MD5CacheFile);
1146 
1147     if ((fi = fopen(MD5SCache, "r")) != NULL) {
1148 	MD5Node **pnode = &MD5Base;
1149 	int c;
1150 
1151 	c = fgetc(fi);
1152 	while (c != EOF) {
1153 	    MD5Node *node = *pnode = malloc(sizeof(MD5Node));
1154 	    char *s;
1155 	    int nlen;
1156 
1157 	    nlen = 0;
1158 
1159 	    if (pnode == NULL || node == NULL) {
1160 		fprintf(stderr, "out of memory\n");
1161 		exit(EXIT_FAILURE);
1162 	    }
1163 
1164 	    bzero(node, sizeof(MD5Node));
1165 	    node->md_Code = fextract(fi, -1, &c, ' ');
1166 	    node->md_Accessed = 1;
1167 	    if ((s = fextract(fi, -1, &c, ' ')) != NULL) {
1168 		nlen = strtol(s, NULL, 0);
1169 		free(s);
1170 	    }
1171 	    /*
1172 	     * extracting md_Name - name may contain embedded control
1173 	     * characters.
1174 	     */
1175 	    CountReadBytes += nlen+1;
1176 	    node->md_Name = fextract(fi, nlen, &c, EOF);
1177 	    if (c != '\n') {
1178 		fprintf(stderr, "Error parsing MD5 Cache: %s (%c)\n", MD5SCache, c);
1179 		while (c != EOF && c != '\n')
1180 		    c = fgetc(fi);
1181 	    }
1182 	    if (c != EOF)
1183 		c = fgetc(fi);
1184 	    pnode = &node->md_Next;
1185 	}
1186 	fclose(fi);
1187     }
1188 }
1189 
1190 /*
1191  * md5_lookup:	lookup/create md5 entry
1192  */
1193 
1194 static MD5Node *
1195 md5_lookup(const char *sfile)
1196 {
1197     MD5Node **pnode;
1198     MD5Node *node;
1199 
1200     for (pnode = &MD5Base; (node = *pnode) != NULL; pnode = &node->md_Next) {
1201 	if (strcmp(sfile, node->md_Name) == 0) {
1202 	    break;
1203 	}
1204     }
1205     if (node == NULL) {
1206 
1207 	if ((node = *pnode = malloc(sizeof(MD5Node))) == NULL) {
1208 		fprintf(stderr,"out of memory\n");
1209 		exit(EXIT_FAILURE);
1210 	}
1211 
1212 	bzero(node, sizeof(MD5Node));
1213 	node->md_Name = strdup(sfile);
1214     }
1215     node->md_Accessed = 1;
1216     return(node);
1217 }
1218 
1219 /*
1220  * md5_check:  check MD5 against file
1221  *
1222  *	Return -1 if check failed
1223  *	Return 0  if check succeeded
1224  *
1225  * dpath can be NULL, in which case we are force-updating
1226  * the source MD5.
1227  */
1228 
1229 static int
1230 md5_check(const char *spath, const char *dpath)
1231 {
1232     const char *sfile;
1233     char *dcode;
1234     int sdirlen;
1235     int r;
1236     MD5Node *node;
1237 
1238     r = -1;
1239 
1240     if ((sfile = strrchr(spath, '/')) != NULL)
1241 	++sfile;
1242     else
1243 	sfile = spath;
1244     sdirlen = sfile - spath;
1245 
1246     md5_cache(spath, sdirlen);
1247 
1248     node = md5_lookup(sfile);
1249 
1250     /*
1251      * If dpath == NULL, we are force-updating the source .MD5* files
1252      */
1253 
1254     if (dpath == NULL) {
1255 	char *scode = doMD5File(spath, NULL);
1256 
1257 	r = 0;
1258 	if (node->md_Code == NULL) {
1259 	    r = -1;
1260 	    node->md_Code = scode;
1261 	    MD5SCacheDirty = 1;
1262 	} else if (strcmp(scode, node->md_Code) != 0) {
1263 	    r = -1;
1264 	    free(node->md_Code);
1265 	    node->md_Code = scode;
1266 	    MD5SCacheDirty = 1;
1267 	} else {
1268 	    free(scode);
1269 	}
1270 	return(r);
1271     }
1272 
1273     /*
1274      * Otherwise the .MD5* file is used as a cache.
1275      */
1276 
1277     if (node->md_Code == NULL) {
1278 	node->md_Code = doMD5File(spath, NULL);
1279 	MD5SCacheDirty = 1;
1280     }
1281 
1282     dcode = doMD5File(dpath, NULL);
1283     if (dcode) {
1284 	if (strcmp(node->md_Code, dcode) == 0) {
1285 	    r = 0;
1286 	} else {
1287 	    char *scode = doMD5File(spath, NULL);
1288 
1289 	    if (strcmp(node->md_Code, scode) == 0) {
1290 		    free(scode);
1291 	    } else {
1292 		    free(node->md_Code);
1293 		    node->md_Code = scode;
1294 		    MD5SCacheDirty = 1;
1295 		    if (strcmp(node->md_Code, dcode) == 0)
1296 			r = 0;
1297 	    }
1298 	}
1299 	free(dcode);
1300     }
1301     return(r);
1302 }
1303 
1304 /*
1305  * xrename() - rename with override
1306  *
1307  *	If the rename fails, attempt to override st_flags on the
1308  *	destination and rename again.  If that fails too, try to
1309  *	set the flags back the way they were and give up.
1310  */
1311 
1312 static int
1313 xrename(const char *src, const char *dst, u_long flags)
1314 {
1315     int r;
1316 
1317     r = 0;
1318 
1319     if ((r = rename(src, dst)) < 0) {
1320 	chflags(dst, 0);
1321 	if ((r = rename(src, dst)) < 0)
1322 		chflags(dst, flags);
1323     }
1324     return(r);
1325 }
1326 
1327 static int
1328 xlink(const char *src, const char *dst, u_long flags)
1329 {
1330     int r, e;
1331 
1332     r = 0;
1333 
1334     if ((r = link(src, dst)) < 0) {
1335 	chflags(src, 0);
1336 	r = link(src, dst);
1337 	e = errno;
1338 	chflags(src, flags);
1339 	errno = e;
1340     }
1341     return(r);
1342 }
1343 
1344 static char *
1345 fextract(FILE *fi, int n, int *pc, int skip)
1346 {
1347     int i;
1348     int c;
1349     int imax;
1350     char *s;
1351 
1352     i = 0;
1353     c = *pc;
1354     imax = (n < 0) ? 64 : n + 1;
1355 
1356     s = malloc(imax);
1357     if (s == NULL) {
1358 	fprintf(stderr, "out of memory\n");
1359 	exit(EXIT_FAILURE);
1360     }
1361 
1362     while (c != EOF) {
1363 	if (n == 0 || (n < 0 && (c == ' ' || c == '\n')))
1364 	    break;
1365 
1366 	s[i++] = c;
1367 	if (i == imax) {
1368 	    imax += 64;
1369 	    s = realloc(s, imax);
1370     	    if (s == NULL) {
1371                 fprintf(stderr, "out of memory\n");
1372   	        exit(EXIT_FAILURE);
1373  	    }
1374 	}
1375 	if (n > 0)
1376 	    --n;
1377 	c = getc(fi);
1378     }
1379     if (c == skip && skip != EOF)
1380 	c = getc(fi);
1381     *pc = c;
1382     s[i] = 0;
1383     return(s);
1384 }
1385 
1386 char *
1387 doMD5File(const char *filename, char *buf)
1388 {
1389     if (SummaryOpt) {
1390 	struct stat st;
1391 	if (stat(filename, &st) == 0) {
1392 	    u_int64_t size = st.st_blocks * 512;
1393 	    if (st.st_size % 512)
1394 		size += st.st_size % 512 - 512;
1395 	    CountReadBytes += size;
1396 	}
1397     }
1398     return MD5File(filename, buf);
1399 }
1400