1 /* $OpenBSD: flist.c,v 1.38 2023/12/27 17:22:25 claudio Exp $ */
2 /*
3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2019 Florian Obser <florian@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include <sys/stat.h>
19
20 #include <assert.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <fts.h>
24 #include <limits.h>
25 #include <inttypes.h>
26 #include <search.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31
32 #include "extern.h"
33
34 /*
35 * We allocate our file list in chunk sizes so as not to do it one by
36 * one.
37 * Preferably we get one or two allocation.
38 */
39 #define FLIST_CHUNK_SIZE (1024)
40
41 /*
42 * These flags are part of the rsync protocol.
43 * They are sent as the first byte for a file transmission and encode
44 * information that affects subsequent transmissions.
45 */
46 #define FLIST_TOP_LEVEL 0x0001 /* needed for remote --delete */
47 #define FLIST_MODE_SAME 0x0002 /* mode is repeat */
48 #define FLIST_RDEV_SAME 0x0004 /* rdev is repeat */
49 #define FLIST_UID_SAME 0x0008 /* uid is repeat */
50 #define FLIST_GID_SAME 0x0010 /* gid is repeat */
51 #define FLIST_NAME_SAME 0x0020 /* name is repeat */
52 #define FLIST_NAME_LONG 0x0040 /* name >255 bytes */
53 #define FLIST_TIME_SAME 0x0080 /* time is repeat */
54
55 /*
56 * Required way to sort a filename list.
57 */
58 static int
flist_cmp(const void * p1,const void * p2)59 flist_cmp(const void *p1, const void *p2)
60 {
61 const struct flist *f1 = p1, *f2 = p2;
62
63 return strcmp(f1->wpath, f2->wpath);
64 }
65
66 /*
67 * Deduplicate our file list (which may be zero-length).
68 * Returns zero on failure, non-zero on success.
69 */
70 static int
flist_dedupe(struct flist ** fl,size_t * sz)71 flist_dedupe(struct flist **fl, size_t *sz)
72 {
73 size_t i, j;
74 struct flist *new;
75 struct flist *f, *fnext;
76
77 if (*sz == 0)
78 return 1;
79
80 /* Create a new buffer, "new", and copy. */
81
82 new = calloc(*sz, sizeof(struct flist));
83 if (new == NULL) {
84 ERR("calloc");
85 return 0;
86 }
87
88 for (i = j = 0; i < *sz - 1; i++) {
89 f = &(*fl)[i];
90 fnext = &(*fl)[i + 1];
91
92 if (strcmp(f->wpath, fnext->wpath)) {
93 new[j++] = *f;
94 continue;
95 }
96
97 /*
98 * Our working (destination) paths are the same.
99 * If the actual file is the same (as given on the
100 * command-line), then we can just discard the first.
101 * Otherwise, we need to bail out: it means we have two
102 * different files with the relative path on the
103 * destination side.
104 */
105
106 if (strcmp(f->path, fnext->path) == 0) {
107 new[j++] = *f;
108 i++;
109 WARNX("%s: duplicate path: %s",
110 f->wpath, f->path);
111 free(fnext->path);
112 free(fnext->link);
113 fnext->path = fnext->link = NULL;
114 continue;
115 }
116
117 ERRX("%s: duplicate working path for "
118 "possibly different file: %s, %s",
119 f->wpath, f->path, fnext->path);
120 free(new);
121 return 0;
122 }
123
124 /* Don't forget the last entry. */
125
126 if (i == *sz - 1)
127 new[j++] = (*fl)[i];
128
129 /*
130 * Reassign to the deduplicated array.
131 * If we started out with *sz > 0, which we check for at the
132 * beginning, then we'll always continue having *sz > 0.
133 */
134
135 free(*fl);
136 *fl = new;
137 *sz = j;
138 assert(*sz);
139 return 1;
140 }
141
142 /*
143 * We're now going to find our top-level directories.
144 * This only applies to recursive mode.
145 * If we have the first element as the ".", then that's the "top
146 * directory" of our transfer.
147 * Otherwise, mark up all top-level directories in the set.
148 * XXX: the FLIST_TOP_LEVEL flag should indicate what is and what isn't
149 * a top-level directory, but I'm not sure if GPL rsync(1) respects it
150 * the same way.
151 */
152 static void
flist_topdirs(struct sess * sess,struct flist * fl,size_t flsz)153 flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz)
154 {
155 size_t i;
156 const char *cp;
157
158 if (!sess->opts->recursive)
159 return;
160
161 if (flsz && strcmp(fl[0].wpath, ".")) {
162 for (i = 0; i < flsz; i++) {
163 if (!S_ISDIR(fl[i].st.mode))
164 continue;
165 cp = strchr(fl[i].wpath, '/');
166 if (cp != NULL && cp[1] != '\0')
167 continue;
168 fl[i].st.flags |= FLSTAT_TOP_DIR;
169 LOG4("%s: top-level", fl[i].wpath);
170 }
171 } else if (flsz) {
172 fl[0].st.flags |= FLSTAT_TOP_DIR;
173 LOG4("%s: top-level", fl[0].wpath);
174 }
175 }
176
177 /*
178 * Filter through the fts() file information.
179 * We want directories (pre-order), regular files, and symlinks.
180 * Everything else is skipped and possibly warned about.
181 * Return zero to skip, non-zero to examine.
182 */
183 static int
flist_fts_check(struct sess * sess,FTSENT * ent)184 flist_fts_check(struct sess *sess, FTSENT *ent)
185 {
186
187 if (ent->fts_info == FTS_F ||
188 ent->fts_info == FTS_D ||
189 ent->fts_info == FTS_SL ||
190 ent->fts_info == FTS_SLNONE)
191 return 1;
192
193 if (ent->fts_info == FTS_DC) {
194 WARNX("%s: directory cycle", ent->fts_path);
195 } else if (ent->fts_info == FTS_DNR) {
196 errno = ent->fts_errno;
197 WARN("%s: unreadable directory", ent->fts_path);
198 } else if (ent->fts_info == FTS_DOT) {
199 WARNX("%s: skipping dot-file", ent->fts_path);
200 } else if (ent->fts_info == FTS_ERR) {
201 errno = ent->fts_errno;
202 WARN("%s", ent->fts_path);
203 } else if (ent->fts_info == FTS_DEFAULT) {
204 if ((sess->opts->devices && (S_ISBLK(ent->fts_statp->st_mode) ||
205 S_ISCHR(ent->fts_statp->st_mode))) ||
206 (sess->opts->specials &&
207 (S_ISFIFO(ent->fts_statp->st_mode) ||
208 S_ISSOCK(ent->fts_statp->st_mode)))) {
209 return 1;
210 }
211 WARNX("%s: skipping special", ent->fts_path);
212 } else if (ent->fts_info == FTS_NS) {
213 errno = ent->fts_errno;
214 WARN("%s: could not stat", ent->fts_path);
215 }
216
217 return 0;
218 }
219
220 /*
221 * Copy necessary elements in "st" into the fields of "f".
222 */
223 static void
flist_copy_stat(struct flist * f,const struct stat * st)224 flist_copy_stat(struct flist *f, const struct stat *st)
225 {
226 f->st.mode = st->st_mode;
227 f->st.uid = st->st_uid;
228 f->st.gid = st->st_gid;
229 f->st.size = st->st_size;
230 f->st.mtime = st->st_mtime;
231 f->st.rdev = st->st_rdev;
232 }
233
234 void
flist_free(struct flist * f,size_t sz)235 flist_free(struct flist *f, size_t sz)
236 {
237 size_t i;
238
239 if (f == NULL)
240 return;
241
242 for (i = 0; i < sz; i++) {
243 free(f[i].path);
244 free(f[i].link);
245 }
246 free(f);
247 }
248
249 /*
250 * Serialise our file list (which may be zero-length) to the wire.
251 * Makes sure that the receiver isn't going to block on sending us
252 * return messages on the log channel.
253 * Return zero on failure, non-zero on success.
254 */
255 int
flist_send(struct sess * sess,int fdin,int fdout,const struct flist * fl,size_t flsz)256 flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl,
257 size_t flsz)
258 {
259 size_t i, sz, gidsz = 0, uidsz = 0;
260 uint8_t flag;
261 const struct flist *f;
262 const char *fn;
263 struct ident *gids = NULL, *uids = NULL;
264 int rc = 0;
265
266 /* Double-check that we've no pending multiplexed data. */
267
268 LOG2("sending file metadata list: %zu", flsz);
269
270 for (i = 0; i < flsz; i++) {
271 f = &fl[i];
272 fn = f->wpath;
273 sz = strlen(f->wpath);
274 assert(sz > 0);
275 assert(sz < INT32_MAX);
276
277 /*
278 * If applicable, unclog the read buffer.
279 * This happens when the receiver has a lot of log
280 * messages and all we're doing is sending our file list
281 * without checking for messages.
282 */
283
284 if (sess->mplex_reads &&
285 io_read_check(fdin) &&
286 !io_read_flush(sess, fdin)) {
287 ERRX1("io_read_flush");
288 goto out;
289 }
290
291 /*
292 * For ease, make all of our filenames be "long"
293 * regardless their actual length.
294 * This also makes sure that we don't transmit a zero
295 * byte unintentionally.
296 */
297
298 flag = FLIST_NAME_LONG;
299 if ((FLSTAT_TOP_DIR & f->st.flags))
300 flag |= FLIST_TOP_LEVEL;
301
302 LOG3("%s: sending file metadata: "
303 "size %jd, mtime %jd, mode %o",
304 fn, (intmax_t)f->st.size,
305 (intmax_t)f->st.mtime, f->st.mode);
306
307 /* Now write to the wire. */
308 /* FIXME: buffer this. */
309
310 if (!io_write_byte(sess, fdout, flag)) {
311 ERRX1("io_write_byte");
312 goto out;
313 } else if (!io_write_int(sess, fdout, sz)) {
314 ERRX1("io_write_int");
315 goto out;
316 } else if (!io_write_buf(sess, fdout, fn, sz)) {
317 ERRX1("io_write_buf");
318 goto out;
319 } else if (!io_write_long(sess, fdout, f->st.size)) {
320 ERRX1("io_write_long");
321 goto out;
322 } else if (!io_write_uint(sess, fdout, (uint32_t)f->st.mtime)) {
323 ERRX1("io_write_uint");
324 goto out;
325 } else if (!io_write_uint(sess, fdout, f->st.mode)) {
326 ERRX1("io_write_uint");
327 goto out;
328 }
329
330 /* Conditional part: uid. */
331
332 if (sess->opts->preserve_uids) {
333 if (!io_write_uint(sess, fdout, f->st.uid)) {
334 ERRX1("io_write_uint");
335 goto out;
336 }
337 if (!idents_add(0, &uids, &uidsz, f->st.uid)) {
338 ERRX1("idents_add");
339 goto out;
340 }
341 }
342
343 /* Conditional part: gid. */
344
345 if (sess->opts->preserve_gids) {
346 if (!io_write_uint(sess, fdout, f->st.gid)) {
347 ERRX1("io_write_uint");
348 goto out;
349 }
350 if (!idents_add(1, &gids, &gidsz, f->st.gid)) {
351 ERRX1("idents_add");
352 goto out;
353 }
354 }
355
356 /* Conditional part: devices & special files. */
357
358 if ((sess->opts->devices && (S_ISBLK(f->st.mode) ||
359 S_ISCHR(f->st.mode))) ||
360 (sess->opts->specials && (S_ISFIFO(f->st.mode) ||
361 S_ISSOCK(f->st.mode)))) {
362 if (!io_write_int(sess, fdout, f->st.rdev)) {
363 ERRX1("io_write_int");
364 goto out;
365 }
366 }
367
368 /* Conditional part: link. */
369
370 if (S_ISLNK(f->st.mode) &&
371 sess->opts->preserve_links) {
372 fn = f->link;
373 sz = strlen(f->link);
374 assert(sz < INT32_MAX);
375 if (!io_write_int(sess, fdout, sz)) {
376 ERRX1("io_write_int");
377 goto out;
378 }
379 if (!io_write_buf(sess, fdout, fn, sz)) {
380 ERRX1("io_write_buf");
381 goto out;
382 }
383 }
384
385 if (S_ISREG(f->st.mode))
386 sess->total_size += f->st.size;
387 }
388
389 /* Signal end of file list. */
390
391 if (!io_write_byte(sess, fdout, 0)) {
392 ERRX1("io_write_byte");
393 goto out;
394 }
395
396 /* Conditionally write identifier lists. */
397
398 if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
399 LOG2("sending uid list: %zu", uidsz);
400 if (!idents_send(sess, fdout, uids, uidsz)) {
401 ERRX1("idents_send");
402 goto out;
403 }
404 }
405
406 if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
407 LOG2("sending gid list: %zu", gidsz);
408 if (!idents_send(sess, fdout, gids, gidsz)) {
409 ERRX1("idents_send");
410 goto out;
411 }
412 }
413
414 rc = 1;
415 out:
416 idents_free(gids, gidsz);
417 idents_free(uids, uidsz);
418 return rc;
419 }
420
421 /*
422 * Read the filename of a file list.
423 * This is the most expensive part of the file list transfer, so a lot
424 * of attention has gone into transmitting as little as possible.
425 * Micro-optimisation, but whatever.
426 * Fills in "f" with the full path on success.
427 * Returns zero on failure, non-zero on success.
428 */
429 static int
flist_recv_name(struct sess * sess,int fd,struct flist * f,uint8_t flags,char last[PATH_MAX])430 flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags,
431 char last[PATH_MAX])
432 {
433 uint8_t bval;
434 size_t partial = 0;
435 size_t pathlen = 0, len;
436
437 /*
438 * Read our filename.
439 * If we have FLIST_NAME_SAME, we inherit some of the last
440 * transmitted name.
441 * If we have FLIST_NAME_LONG, then the string length is greater
442 * than byte-size.
443 */
444
445 if (flags & FLIST_NAME_SAME) {
446 if (!io_read_byte(sess, fd, &bval)) {
447 ERRX1("io_read_byte");
448 return 0;
449 }
450 partial = bval;
451 }
452
453 /* Get the (possibly-remaining) filename length. */
454
455 if (flags & FLIST_NAME_LONG) {
456 if (!io_read_size(sess, fd, &pathlen)) {
457 ERRX1("io_read_size");
458 return 0;
459 }
460 } else {
461 if (!io_read_byte(sess, fd, &bval)) {
462 ERRX1("io_read_byte");
463 return 0;
464 }
465 pathlen = bval;
466 }
467
468 /* Allocate our full filename length. */
469 /* FIXME: maximum pathname length. */
470
471 if ((len = pathlen + partial) == 0) {
472 ERRX("security violation: zero-length pathname");
473 return 0;
474 }
475
476 if ((f->path = malloc(len + 1)) == NULL) {
477 ERR("malloc");
478 return 0;
479 }
480 f->path[len] = '\0';
481
482 if (flags & FLIST_NAME_SAME)
483 memcpy(f->path, last, partial);
484
485 if (!io_read_buf(sess, fd, f->path + partial, pathlen)) {
486 ERRX1("io_read_buf");
487 return 0;
488 }
489
490 if (f->path[0] == '/') {
491 ERRX("security violation: absolute pathname: %s",
492 f->path);
493 return 0;
494 }
495
496 if (strstr(f->path, "/../") != NULL ||
497 (len > 2 && strcmp(f->path + len - 3, "/..") == 0) ||
498 (len > 2 && strncmp(f->path, "../", 3) == 0) ||
499 strcmp(f->path, "..") == 0) {
500 ERRX("%s: security violation: backtracking pathname",
501 f->path);
502 return 0;
503 }
504
505 /* Record our last path and construct our filename. */
506
507 strlcpy(last, f->path, PATH_MAX);
508 f->wpath = f->path;
509 return 1;
510 }
511
512 /*
513 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE;
514 * Returns zero on failure, non-zero on success.
515 */
516 static int
flist_realloc(struct flist ** fl,size_t * sz,size_t * max)517 flist_realloc(struct flist **fl, size_t *sz, size_t *max)
518 {
519 void *pp;
520
521 if (*sz + 1 <= *max) {
522 (*sz)++;
523 return 1;
524 }
525
526 pp = recallocarray(*fl, *max,
527 *max + FLIST_CHUNK_SIZE, sizeof(struct flist));
528 if (pp == NULL) {
529 ERR("recallocarray");
530 return 0;
531 }
532 *fl = pp;
533 *max += FLIST_CHUNK_SIZE;
534 (*sz)++;
535 return 1;
536 }
537
538 /*
539 * Copy a regular or symbolic link file "path" into "f".
540 * This handles the correct path creation and symbolic linking.
541 * Returns zero on failure, non-zero on success.
542 */
543 static int
flist_append(struct flist * f,struct stat * st,const char * path)544 flist_append(struct flist *f, struct stat *st, const char *path)
545 {
546
547 /*
548 * Copy the full path for local addressing and transmit
549 * only the filename part for the receiver.
550 */
551
552 if ((f->path = strdup(path)) == NULL) {
553 ERR("strdup");
554 return 0;
555 }
556
557 if ((f->wpath = strrchr(f->path, '/')) == NULL)
558 f->wpath = f->path;
559 else
560 f->wpath++;
561
562 /*
563 * On the receiving end, we'll strip out all bits on the
564 * mode except for the file permissions.
565 * No need to warn about it here.
566 */
567
568 flist_copy_stat(f, st);
569
570 /* Optionally copy link information. */
571
572 if (S_ISLNK(st->st_mode)) {
573 f->link = symlink_read(f->path);
574 if (f->link == NULL) {
575 ERRX1("symlink_read");
576 return 0;
577 }
578 }
579
580 return 1;
581 }
582
583 /*
584 * Receive a file list from the wire, filling in length "sz" (which may
585 * possibly be zero) and list "flp" on success.
586 * Return zero on failure, non-zero on success.
587 */
588 int
flist_recv(struct sess * sess,int fd,struct flist ** flp,size_t * sz)589 flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz)
590 {
591 struct flist *fl = NULL;
592 struct flist *ff;
593 const struct flist *fflast = NULL;
594 size_t flsz = 0, flmax = 0, lsz, gidsz = 0, uidsz = 0;
595 uint8_t flag;
596 char last[PATH_MAX];
597 int64_t lval; /* temporary values... */
598 int32_t ival;
599 uint32_t uival;
600 struct ident *gids = NULL, *uids = NULL;
601
602 last[0] = '\0';
603
604 for (;;) {
605 if (!io_read_byte(sess, fd, &flag)) {
606 ERRX1("io_read_byte");
607 goto out;
608 } else if (flag == 0)
609 break;
610
611 if (!flist_realloc(&fl, &flsz, &flmax)) {
612 ERRX1("flist_realloc");
613 goto out;
614 }
615
616 ff = &fl[flsz - 1];
617 fflast = flsz > 1 ? &fl[flsz - 2] : NULL;
618
619 /* Filename first. */
620
621 if (!flist_recv_name(sess, fd, ff, flag, last)) {
622 ERRX1("flist_recv_name");
623 goto out;
624 }
625
626 /* Read the file size. */
627
628 if (!io_read_long(sess, fd, &lval)) {
629 ERRX1("io_read_long");
630 goto out;
631 }
632 ff->st.size = lval;
633
634 /* Read the modification time. */
635
636 if (!(flag & FLIST_TIME_SAME)) {
637 if (!io_read_uint(sess, fd, &uival)) {
638 ERRX1("io_read_uint");
639 goto out;
640 }
641 ff->st.mtime = uival; /* beyond 2038 */
642 } else if (fflast == NULL) {
643 ff->st.mtime = 0;
644 } else
645 ff->st.mtime = fflast->st.mtime;
646
647 /* Read the file mode. */
648
649 if (!(flag & FLIST_MODE_SAME)) {
650 if (!io_read_uint(sess, fd, &uival)) {
651 ERRX1("io_read_uint");
652 goto out;
653 }
654 ff->st.mode = uival;
655 } else if (fflast == NULL) {
656 ff->st.mode = 0;
657 } else
658 ff->st.mode = fflast->st.mode;
659
660 /* Conditional part: uid. */
661
662 if (sess->opts->preserve_uids) {
663 if (!(flag & FLIST_UID_SAME)) {
664 if (!io_read_uint(sess, fd, &uival)) {
665 ERRX1("io_read_int");
666 goto out;
667 }
668 ff->st.uid = uival;
669 } else if (fflast == NULL) {
670 ff->st.uid = 0;
671 } else
672 ff->st.uid = fflast->st.uid;
673 }
674
675 /* Conditional part: gid. */
676
677 if (sess->opts->preserve_gids) {
678 if (!(flag & FLIST_GID_SAME)) {
679 if (!io_read_uint(sess, fd, &uival)) {
680 ERRX1("io_read_uint");
681 goto out;
682 }
683 ff->st.gid = uival;
684 } else if (fflast == NULL) {
685 ff->st.gid = 0;
686 } else
687 ff->st.gid = fflast->st.gid;
688 }
689
690 /* Conditional part: devices & special files. */
691
692 if ((sess->opts->devices && (S_ISBLK(ff->st.mode) ||
693 S_ISCHR(ff->st.mode))) ||
694 (sess->opts->specials && (S_ISFIFO(ff->st.mode) ||
695 S_ISSOCK(ff->st.mode)))) {
696 if (!(flag & FLIST_RDEV_SAME)) {
697 if (!io_read_int(sess, fd, &ival)) {
698 ERRX1("io_read_int");
699 goto out;
700 }
701 ff->st.rdev = ival;
702 } else if (fflast == NULL) {
703 ff->st.rdev = 0;
704 } else
705 ff->st.rdev = fflast->st.rdev;
706 }
707
708 /* Conditional part: link. */
709
710 if (S_ISLNK(ff->st.mode) &&
711 sess->opts->preserve_links) {
712 if (!io_read_size(sess, fd, &lsz)) {
713 ERRX1("io_read_size");
714 goto out;
715 } else if (lsz == 0) {
716 ERRX("empty link name");
717 goto out;
718 }
719 ff->link = calloc(lsz + 1, 1);
720 if (ff->link == NULL) {
721 ERR("calloc");
722 goto out;
723 }
724 if (!io_read_buf(sess, fd, ff->link, lsz)) {
725 ERRX1("io_read_buf");
726 goto out;
727 }
728 }
729
730 LOG3("%s: received file metadata: "
731 "size %jd, mtime %jd, mode %o, rdev (%d, %d)",
732 ff->path, (intmax_t)ff->st.size,
733 (intmax_t)ff->st.mtime, ff->st.mode,
734 major(ff->st.rdev), minor(ff->st.rdev));
735
736 if (S_ISREG(ff->st.mode))
737 sess->total_size += ff->st.size;
738 }
739
740 /* Conditionally read the user/group list. */
741
742 if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
743 if (!idents_recv(sess, fd, &uids, &uidsz)) {
744 ERRX1("idents_recv");
745 goto out;
746 }
747 LOG2("received uid list: %zu", uidsz);
748 }
749
750 if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
751 if (!idents_recv(sess, fd, &gids, &gidsz)) {
752 ERRX1("idents_recv");
753 goto out;
754 }
755 LOG2("received gid list: %zu", gidsz);
756 }
757
758 /* Remember to order the received list. */
759
760 LOG2("received file metadata list: %zu", flsz);
761 qsort(fl, flsz, sizeof(struct flist), flist_cmp);
762 flist_topdirs(sess, fl, flsz);
763 *sz = flsz;
764 *flp = fl;
765
766 /* Conditionally remap and reassign identifiers. */
767
768 if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
769 idents_remap(sess, 0, uids, uidsz);
770 idents_assign_uid(sess, fl, flsz, uids, uidsz);
771 }
772
773 if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
774 idents_remap(sess, 1, gids, gidsz);
775 idents_assign_gid(sess, fl, flsz, gids, gidsz);
776 }
777
778 idents_free(gids, gidsz);
779 idents_free(uids, uidsz);
780 return 1;
781 out:
782 flist_free(fl, flsz);
783 idents_free(gids, gidsz);
784 idents_free(uids, uidsz);
785 *sz = 0;
786 *flp = NULL;
787 return 0;
788 }
789
790 /*
791 * Generate a flist possibly-recursively given a file root, which may
792 * also be a regular file or symlink.
793 * On success, augments the generated list in "flp" of length "sz".
794 * Returns zero on failure, non-zero on success.
795 */
796 static int
flist_gen_dirent(struct sess * sess,char * root,struct flist ** fl,size_t * sz,size_t * max)797 flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
798 size_t *max)
799 {
800 char *cargv[2], *cp;
801 int rc = 0, flag;
802 FTS *fts;
803 FTSENT *ent;
804 struct flist *f;
805 size_t i, flsz = 0, nxdev = 0, stripdir;
806 dev_t *newxdev, *xdev = NULL;
807 struct stat st;
808
809 cargv[0] = root;
810 cargv[1] = NULL;
811
812 /*
813 * If we're a file, then revert to the same actions we use for
814 * the non-recursive scan.
815 */
816
817 if (lstat(root, &st) == -1) {
818 ERR("%s: lstat", root);
819 return 0;
820 } else if (S_ISREG(st.st_mode)) {
821 /* filter files */
822 if (rules_match(root, 0) == -1) {
823 WARNX("%s: skipping excluded file", root);
824 return 1;
825 }
826 if (!flist_realloc(fl, sz, max)) {
827 ERRX1("flist_realloc");
828 return 0;
829 }
830 f = &(*fl)[(*sz) - 1];
831 assert(f != NULL);
832
833 if (!flist_append(f, &st, root)) {
834 ERRX1("flist_append");
835 return 0;
836 }
837 return 1;
838 } else if (S_ISLNK(st.st_mode)) {
839 if (!sess->opts->preserve_links) {
840 WARNX("%s: skipping symlink", root);
841 return 1;
842 }
843 /* filter files */
844 if (rules_match(root, 0) == -1) {
845 WARNX("%s: skipping excluded symlink", root);
846 return 1;
847 }
848 if (!flist_realloc(fl, sz, max)) {
849 ERRX1("flist_realloc");
850 return 0;
851 }
852 f = &(*fl)[(*sz) - 1];
853 assert(f != NULL);
854
855 if (!flist_append(f, &st, root)) {
856 ERRX1("flist_append");
857 return 0;
858 }
859 return 1;
860 } else if (!S_ISDIR(st.st_mode)) {
861 WARNX("%s: skipping special", root);
862 return 1;
863 }
864
865 /*
866 * If we end with a slash, it means that we're not supposed to
867 * copy the directory part itself---only the contents.
868 * So set "stripdir" to be what we take out.
869 */
870
871 stripdir = strlen(root);
872 assert(stripdir > 0);
873 if (root[stripdir - 1] != '/')
874 stripdir = 0;
875
876 /*
877 * If we're not stripping anything, then see if we need to strip
878 * out the leading material in the path up to and including the
879 * last directory component.
880 */
881
882 if (stripdir == 0)
883 if ((cp = strrchr(root, '/')) != NULL)
884 stripdir = cp - root + 1;
885
886 /*
887 * If we're recursive, then we need to take down all of the
888 * files and directory components, so use fts(3).
889 * Copying the information file-by-file into the flstat.
890 * We'll make sense of it in flist_send.
891 */
892
893 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
894 ERR("fts_open");
895 return 0;
896 }
897
898 errno = 0;
899 while ((ent = fts_read(fts)) != NULL) {
900 if (!flist_fts_check(sess, ent)) {
901 errno = 0;
902 continue;
903 }
904
905 /* We don't allow symlinks without -l. */
906
907 assert(ent->fts_statp != NULL);
908 if (S_ISLNK(ent->fts_statp->st_mode) &&
909 !sess->opts->preserve_links) {
910 WARNX("%s: skipping symlink", ent->fts_path);
911 continue;
912 }
913
914 /*
915 * If rsync is told to avoid crossing a filesystem
916 * boundary when recursing, then replace all mount point
917 * directories with empty directories. The latter is
918 * prevented by telling rsync multiple times to avoid
919 * crossing a filesystem boundary when recursing.
920 * Replacing mount point directories is tricky. We need
921 * to sort out which directories to include. As such,
922 * keep track of unique device inodes, and use these for
923 * comparison.
924 */
925
926 if (sess->opts->one_file_system &&
927 ent->fts_statp->st_dev != st.st_dev) {
928 if (sess->opts->one_file_system > 1 ||
929 !S_ISDIR(ent->fts_statp->st_mode))
930 continue;
931
932 flag = 0;
933 for (i = 0; i < nxdev; i++)
934 if (xdev[i] == ent->fts_statp->st_dev) {
935 flag = 1;
936 break;
937 }
938 if (flag)
939 continue;
940
941 if ((newxdev = reallocarray(xdev, nxdev + 1,
942 sizeof(dev_t))) == NULL) {
943 ERRX1("reallocarray");
944 goto out;
945 }
946 xdev = newxdev;
947 xdev[nxdev] = ent->fts_statp->st_dev;
948 nxdev++;
949 }
950
951 /* filter files */
952 if (rules_match(ent->fts_path + stripdir,
953 (ent->fts_info == FTS_D)) == -1) {
954 WARNX("%s: skipping excluded file",
955 ent->fts_path + stripdir);
956 fts_set(fts, ent, FTS_SKIP);
957 continue;
958 }
959
960 /* Allocate a new file entry. */
961
962 if (!flist_realloc(fl, sz, max)) {
963 ERRX1("flist_realloc");
964 goto out;
965 }
966 flsz++;
967 f = &(*fl)[*sz - 1];
968
969 /* Our path defaults to "." for the root. */
970
971 if (ent->fts_path[stripdir] == '\0') {
972 if (asprintf(&f->path, "%s.", ent->fts_path) == -1) {
973 ERR("asprintf");
974 f->path = NULL;
975 goto out;
976 }
977 } else {
978 if ((f->path = strdup(ent->fts_path)) == NULL) {
979 ERR("strdup");
980 goto out;
981 }
982 }
983
984 f->wpath = f->path + stripdir;
985 flist_copy_stat(f, ent->fts_statp);
986
987 /* Optionally copy link information. */
988
989 if (S_ISLNK(ent->fts_statp->st_mode)) {
990 f->link = symlink_read(ent->fts_accpath);
991 if (f->link == NULL) {
992 ERRX1("symlink_read");
993 goto out;
994 }
995 }
996
997 /* Reset errno for next fts_read() call. */
998 errno = 0;
999 }
1000 if (errno) {
1001 ERR("fts_read");
1002 goto out;
1003 }
1004
1005 LOG3("generated %zu filenames: %s", flsz, root);
1006 rc = 1;
1007 out:
1008 fts_close(fts);
1009 free(xdev);
1010 return rc;
1011 }
1012
1013 /*
1014 * Generate a flist recursively given the array of directories (or
1015 * files, symlinks, doesn't matter) specified in argv (argc >0).
1016 * On success, stores the generated list in "flp" with length "sz",
1017 * which may be zero.
1018 * Returns zero on failure, non-zero on success.
1019 */
1020 static int
flist_gen_dirs(struct sess * sess,size_t argc,char ** argv,struct flist ** flp,size_t * sz)1021 flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp,
1022 size_t *sz)
1023 {
1024 size_t i, max = 0;
1025
1026 for (i = 0; i < argc; i++)
1027 if (!flist_gen_dirent(sess, argv[i], flp, sz, &max))
1028 break;
1029
1030 if (i == argc) {
1031 LOG2("recursively generated %zu filenames", *sz);
1032 return 1;
1033 }
1034
1035 ERRX1("flist_gen_dirent");
1036 flist_free(*flp, max);
1037 *flp = NULL;
1038 *sz = 0;
1039 return 0;
1040 }
1041
1042 /*
1043 * Generate list of files from the command-line argc (>0) and argv.
1044 * On success, stores the generated list in "flp" with length "sz",
1045 * which may be zero.
1046 * Returns zero on failure, non-zero on success.
1047 */
1048 static int
flist_gen_files(struct sess * sess,size_t argc,char ** argv,struct flist ** flp,size_t * sz)1049 flist_gen_files(struct sess *sess, size_t argc, char **argv,
1050 struct flist **flp, size_t *sz)
1051 {
1052 struct flist *fl = NULL, *f;
1053 size_t i, flsz = 0;
1054 struct stat st;
1055
1056 assert(argc);
1057
1058 if ((fl = calloc(argc, sizeof(struct flist))) == NULL) {
1059 ERR("calloc");
1060 return 0;
1061 }
1062
1063 for (i = 0; i < argc; i++) {
1064 if (argv[i][0] == '\0')
1065 continue;
1066 if (lstat(argv[i], &st) == -1) {
1067 ERR("%s: lstat", argv[i]);
1068 goto out;
1069 }
1070
1071 /*
1072 * File type checks.
1073 * In non-recursive mode, we don't accept directories.
1074 * We also skip symbolic links without -l.
1075 * Beyond that, we only accept regular files.
1076 */
1077
1078 if (S_ISDIR(st.st_mode)) {
1079 WARNX("%s: skipping directory", argv[i]);
1080 continue;
1081 } else if (S_ISLNK(st.st_mode)) {
1082 if (!sess->opts->preserve_links) {
1083 WARNX("%s: skipping symlink", argv[i]);
1084 continue;
1085 }
1086 } else if (!S_ISREG(st.st_mode)) {
1087 WARNX("%s: skipping special", argv[i]);
1088 continue;
1089 }
1090
1091 /* filter files */
1092 if (rules_match(argv[i], S_ISDIR(st.st_mode)) == -1) {
1093 WARNX("%s: skipping excluded file", argv[i]);
1094 continue;
1095 }
1096
1097 f = &fl[flsz++];
1098 assert(f != NULL);
1099
1100 /* Add this file to our file-system worldview. */
1101
1102 if (!flist_append(f, &st, argv[i])) {
1103 ERRX1("flist_append");
1104 goto out;
1105 }
1106 }
1107
1108 LOG2("non-recursively generated %zu filenames", flsz);
1109 *sz = flsz;
1110 *flp = fl;
1111 return 1;
1112 out:
1113 flist_free(fl, argc);
1114 *sz = 0;
1115 *flp = NULL;
1116 return 0;
1117 }
1118
1119 /*
1120 * Generate a sorted, de-duplicated list of file metadata.
1121 * In non-recursive mode (the default), we use only the files we're
1122 * given.
1123 * Otherwise, directories are recursively examined.
1124 * Returns zero on failure, non-zero on success.
1125 * On success, "fl" will need to be freed with flist_free().
1126 */
1127 int
flist_gen(struct sess * sess,size_t argc,char ** argv,struct flist ** flp,size_t * sz)1128 flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp,
1129 size_t *sz)
1130 {
1131 int rc;
1132
1133 assert(argc > 0);
1134 rc = sess->opts->recursive ?
1135 flist_gen_dirs(sess, argc, argv, flp, sz) :
1136 flist_gen_files(sess, argc, argv, flp, sz);
1137
1138 /* After scanning, lock our file-system view. */
1139
1140 if (!rc)
1141 return 0;
1142
1143 qsort(*flp, *sz, sizeof(struct flist), flist_cmp);
1144
1145 if (flist_dedupe(flp, sz)) {
1146 flist_topdirs(sess, *flp, *sz);
1147 return 1;
1148 }
1149
1150 ERRX1("flist_dedupe");
1151 flist_free(*flp, *sz);
1152 *flp = NULL;
1153 *sz = 0;
1154 return 0;
1155 }
1156
1157 /*
1158 * Generate a list of files in root to delete that are within the
1159 * top-level directories stipulated by "wfl".
1160 * Only handles symbolic links, directories, and regular files.
1161 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero
1162 * on success.
1163 * On success, "fl" will need to be freed with flist_free().
1164 */
1165 int
flist_gen_dels(struct sess * sess,const char * root,struct flist ** fl,size_t * sz,const struct flist * wfl,size_t wflsz)1166 flist_gen_dels(struct sess *sess, const char *root, struct flist **fl,
1167 size_t *sz, const struct flist *wfl, size_t wflsz)
1168 {
1169 char **cargv = NULL;
1170 int rc = 0, c, flag;
1171 FTS *fts = NULL;
1172 FTSENT *ent;
1173 struct flist *f;
1174 struct stat st;
1175 size_t cargvs = 0, i, j, max = 0, stripdir;
1176 ENTRY hent;
1177 ENTRY *hentp;
1178
1179 *fl = NULL;
1180 *sz = 0;
1181
1182 /* Only run this code when we're recursive. */
1183
1184 if (!sess->opts->recursive)
1185 return 1;
1186
1187 /*
1188 * Gather up all top-level directories for scanning.
1189 * This is stipulated by rsync's --delete behaviour, where we
1190 * only delete things in the top-level directories given on the
1191 * command line.
1192 */
1193
1194 assert(wflsz > 0);
1195 for (i = 0; i < wflsz; i++)
1196 if (FLSTAT_TOP_DIR & wfl[i].st.flags)
1197 cargvs++;
1198 if (cargvs == 0)
1199 return 1;
1200
1201 if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) {
1202 ERR("calloc");
1203 return 0;
1204 }
1205
1206 /*
1207 * If we're given just a "." as the first entry, that means
1208 * we're doing a relative copy with a trailing slash.
1209 * Special-case this just for the sake of simplicity.
1210 * Otherwise, look through all top-levels.
1211 */
1212
1213 if (wflsz && strcmp(wfl[0].wpath, ".") == 0) {
1214 assert(cargvs == 1);
1215 assert(S_ISDIR(wfl[0].st.mode));
1216 if (asprintf(&cargv[0], "%s/", root) == -1) {
1217 ERR("asprintf");
1218 cargv[0] = NULL;
1219 goto out;
1220 }
1221 cargv[1] = NULL;
1222 } else {
1223 for (i = j = 0; i < wflsz; i++) {
1224 if (!(FLSTAT_TOP_DIR & wfl[i].st.flags))
1225 continue;
1226 assert(S_ISDIR(wfl[i].st.mode));
1227 assert(strcmp(wfl[i].wpath, "."));
1228 c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath);
1229 if (c == -1) {
1230 ERR("asprintf");
1231 cargv[j] = NULL;
1232 goto out;
1233 }
1234 LOG4("%s: will scan for deletions", cargv[j]);
1235 j++;
1236 }
1237 assert(j == cargvs);
1238 cargv[j] = NULL;
1239 }
1240
1241 LOG2("delete from %zu directories", cargvs);
1242
1243 /*
1244 * Next, use the standard hcreate(3) hashtable interface to hash
1245 * all of the files that we want to synchronise.
1246 * This way, we'll be able to determine which files we want to
1247 * delete in O(n) time instead of O(n * search) time.
1248 * Plus, we can do the scan in-band and only allocate the files
1249 * we want to delete.
1250 */
1251
1252 if (!hcreate(wflsz)) {
1253 ERR("hcreate");
1254 goto out;
1255 }
1256
1257 for (i = 0; i < wflsz; i++) {
1258 memset(&hent, 0, sizeof(ENTRY));
1259 if ((hent.key = strdup(wfl[i].wpath)) == NULL) {
1260 ERR("strdup");
1261 goto out;
1262 }
1263 if ((hentp = hsearch(hent, ENTER)) == NULL) {
1264 ERR("hsearch");
1265 goto out;
1266 } else if (hentp->key != hent.key) {
1267 ERRX("%s: duplicate", wfl[i].wpath);
1268 free(hent.key);
1269 goto out;
1270 }
1271 }
1272
1273 /*
1274 * Now we're going to try to descend into all of the top-level
1275 * directories stipulated by the file list.
1276 * If the directories don't exist, it's ok.
1277 */
1278
1279 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
1280 ERR("fts_open");
1281 goto out;
1282 }
1283
1284 stripdir = strlen(root) + 1;
1285 errno = 0;
1286 while ((ent = fts_read(fts)) != NULL) {
1287 if (ent->fts_info == FTS_NS)
1288 continue;
1289 if (!flist_fts_check(sess, ent)) {
1290 errno = 0;
1291 continue;
1292 } else if (stripdir >= ent->fts_pathlen)
1293 continue;
1294
1295 assert(ent->fts_statp != NULL);
1296
1297 /*
1298 * If rsync is told to avoid crossing a filesystem
1299 * boundary when recursing, then exclude all entries
1300 * from the list with a device inode, which does not
1301 * match that of one of the top-level directories.
1302 */
1303
1304 if (sess->opts->one_file_system) {
1305 flag = 0;
1306 for (i = 0; i < wflsz; i++) {
1307 if (stat(wfl[i].path, &st) == -1) {
1308 ERR("%s: stat", wfl[i].path);
1309 goto out;
1310 }
1311 if (ent->fts_statp->st_dev == st.st_dev) {
1312 flag = 1;
1313 break;
1314 }
1315 }
1316 if (!flag)
1317 continue;
1318 }
1319
1320 /* filter files on delete */
1321 /* TODO handle --delete-excluded */
1322 if (rules_match(ent->fts_path + stripdir,
1323 (ent->fts_info == FTS_D)) == -1) {
1324 WARNX("skip excluded file %s",
1325 ent->fts_path + stripdir);
1326 fts_set(fts, ent, FTS_SKIP);
1327 continue;
1328 }
1329
1330 /* Look up in hashtable. */
1331
1332 memset(&hent, 0, sizeof(ENTRY));
1333 hent.key = ent->fts_path + stripdir;
1334 if (hsearch(hent, FIND) != NULL)
1335 continue;
1336
1337 /* Not found: we'll delete it. */
1338
1339 if (!flist_realloc(fl, sz, &max)) {
1340 ERRX1("flist_realloc");
1341 goto out;
1342 }
1343 f = &(*fl)[*sz - 1];
1344
1345 if ((f->path = strdup(ent->fts_path)) == NULL) {
1346 ERR("strdup");
1347 goto out;
1348 }
1349 f->wpath = f->path + stripdir;
1350 flist_copy_stat(f, ent->fts_statp);
1351 errno = 0;
1352 }
1353
1354 if (errno) {
1355 ERR("fts_read");
1356 goto out;
1357 }
1358
1359 qsort(*fl, *sz, sizeof(struct flist), flist_cmp);
1360 rc = 1;
1361 out:
1362 if (fts != NULL)
1363 fts_close(fts);
1364 for (i = 0; i < cargvs; i++)
1365 free(cargv[i]);
1366 free(cargv);
1367 hdestroy();
1368 return rc;
1369 }
1370
1371 /*
1372 * Delete all files and directories in "fl".
1373 * If called with a zero-length "fl", does nothing.
1374 * If dry_run is specified, simply write what would be done.
1375 * Return zero on failure, non-zero on success.
1376 */
1377 int
flist_del(struct sess * sess,int root,const struct flist * fl,size_t flsz)1378 flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz)
1379 {
1380 ssize_t i;
1381 int flag;
1382
1383 if (flsz == 0)
1384 return 1;
1385
1386 assert(sess->opts->del);
1387 assert(sess->opts->recursive);
1388
1389 for (i = flsz - 1; i >= 0; i--) {
1390 LOG1("%s: deleting", fl[i].wpath);
1391 if (sess->opts->dry_run)
1392 continue;
1393 assert(root != -1);
1394 flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0;
1395 if (unlinkat(root, fl[i].wpath, flag) == -1 &&
1396 errno != ENOENT) {
1397 ERR("%s: unlinkat", fl[i].wpath);
1398 return 0;
1399 }
1400 }
1401
1402 return 1;
1403 }
1404