1 /* $Id$ */
2 /*
3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include "config.h"
18
19 #include <sys/mman.h>
20 #include <sys/stat.h>
21 #include COMPAT_ENDIAN_H
22
23 #include <assert.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <inttypes.h>
27 #include <math.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
32 #include <unistd.h>
33
34 #include "extern.h"
35 #include "md4.h"
36
37 /*
38 * A small optimisation: have a 1 MB pre-write buffer.
39 * Disable the pre-write buffer by having this be zero.
40 * (It doesn't affect performance much.)
41 */
42 #define OBUF_SIZE (1024 * 1024)
43
44 enum downloadst {
45 DOWNLOAD_READ_NEXT = 0,
46 DOWNLOAD_READ_LOCAL,
47 DOWNLOAD_READ_REMOTE
48 };
49
50 /*
51 * Like struct upload, but used to keep track of what we're downloading.
52 * This also is managed by the receiver process.
53 */
54 struct download {
55 enum downloadst state; /* state of affairs */
56 size_t idx; /* index of current file */
57 struct blkset blk; /* its blocks */
58 void *map; /* mmap of current file */
59 size_t mapsz; /* length of mapsz */
60 int ofd; /* open origin file */
61 int fd; /* open output file */
62 char *fname; /* output filename */
63 MD4_CTX ctx; /* current hashing context */
64 off_t downloaded; /* total downloaded */
65 off_t total; /* total in file */
66 const struct flist *fl; /* file list */
67 size_t flsz; /* size of file list */
68 int rootfd; /* destination directory */
69 int fdin; /* read descriptor from sender */
70 char *obuf; /* pre-write buffer */
71 size_t obufsz; /* current size of obuf */
72 size_t obufmax; /* max size we'll wbuffer */
73 };
74
75
76 /*
77 * Simply log the filename.
78 */
79 static void
log_file(struct sess * sess,const struct download * dl,const struct flist * f)80 log_file(struct sess *sess,
81 const struct download *dl, const struct flist *f)
82 {
83 float frac, tot = dl->total;
84 int prec = 0;
85 const char *unit = "B";
86
87 if (sess->opts->server)
88 return;
89
90 frac = (dl->total == 0) ? 100.0 :
91 100.0 * dl->downloaded / dl->total;
92
93 if (dl->total > 1024 * 1024 * 1024) {
94 tot = dl->total / (1024. * 1024. * 1024.);
95 prec = 3;
96 unit = "GB";
97 } else if (dl->total > 1024 * 1024) {
98 tot = dl->total / (1024. * 1024.);
99 prec = 2;
100 unit = "MB";
101 } else if (dl->total > 1024) {
102 tot = dl->total / 1024.;
103 prec = 1;
104 unit = "KB";
105 }
106
107 LOG1("%s (%.*f %s, %.1f%% downloaded)",
108 f->path, prec, tot, unit, frac);
109 }
110
111 /*
112 * Reinitialise a download context w/o overwriting the persistent parts
113 * of the structure (like p->fl or p->flsz) for index "idx".
114 * The MD4 context is pre-seeded.
115 */
116 static void
download_reinit(struct sess * sess,struct download * p,size_t idx)117 download_reinit(struct sess *sess, struct download *p, size_t idx)
118 {
119 int32_t seed = htole32(sess->seed);
120
121 assert(p->state == DOWNLOAD_READ_NEXT);
122
123 p->idx = idx;
124 memset(&p->blk, 0, sizeof(struct blkset));
125 p->map = MAP_FAILED;
126 p->mapsz = 0;
127 p->ofd = -1;
128 p->fd = -1;
129 p->fname = NULL;
130 MD4_Init(&p->ctx);
131 p->downloaded = p->total = 0;
132 /* Don't touch p->fl. */
133 /* Don't touch p->flsz. */
134 /* Don't touch p->rootfd. */
135 /* Don't touch p->fdin. */
136 MD4_Update(&p->ctx, &seed, sizeof(int32_t));
137 }
138
139 /*
140 * Free a download context.
141 * If "cleanup" is non-zero, we also try to clean up the temporary file,
142 * assuming that it has been opened in p->fd.
143 */
144 static void
download_cleanup(struct download * p,int cleanup)145 download_cleanup(struct download *p, int cleanup)
146 {
147
148 if (p->map != MAP_FAILED) {
149 assert(p->mapsz);
150 munmap(p->map, p->mapsz);
151 p->map = MAP_FAILED;
152 p->mapsz = 0;
153 }
154 if (p->ofd != -1) {
155 close(p->ofd);
156 p->ofd = -1;
157 }
158 if (p->fd != -1) {
159 close(p->fd);
160 if (cleanup && p->fname != NULL)
161 unlinkat(p->rootfd, p->fname, 0);
162 p->fd = -1;
163 }
164 free(p->fname);
165 p->fname = NULL;
166 p->state = DOWNLOAD_READ_NEXT;
167 }
168
169 /*
170 * Initial allocation of the download object using the file list "fl" of
171 * size "flsz", the destination "rootfd", and the sender read "fdin".
172 * Returns NULL on allocation failure.
173 * On success, download_free() must be called with the pointer.
174 */
175 struct download *
download_alloc(struct sess * sess,int fdin,const struct flist * fl,size_t flsz,int rootfd)176 download_alloc(struct sess *sess, int fdin,
177 const struct flist *fl, size_t flsz, int rootfd)
178 {
179 struct download *p;
180
181 if ((p = malloc(sizeof(struct download))) == NULL) {
182 ERR("malloc");
183 return NULL;
184 }
185
186 p->state = DOWNLOAD_READ_NEXT;
187 p->fl = fl;
188 p->flsz = flsz;
189 p->rootfd = rootfd;
190 p->fdin = fdin;
191 download_reinit(sess, p, 0);
192 p->obufsz = 0;
193 p->obuf = NULL;
194 p->obufmax = OBUF_SIZE;
195 if (p->obufmax && (p->obuf = malloc(p->obufmax)) == NULL) {
196 ERR("malloc");
197 free(p);
198 return NULL;
199 }
200 return p;
201 }
202
203 /*
204 * Perform all cleanups (including removing stray files) and free.
205 * Passing a NULL to this function is ok.
206 */
207 void
download_free(struct download * p)208 download_free(struct download *p)
209 {
210
211 if (p == NULL)
212 return;
213 download_cleanup(p, 1);
214 free(p->obuf);
215 free(p);
216 }
217
218 /*
219 * Optimisation: instead of dumping directly into the output file, keep
220 * a buffer and write as much as we can into the buffer.
221 * That way, we can avoid calling write() too much, and instead call it
222 * with big buffers.
223 * To flush the buffer w/o changing it, pass 0 as "sz".
224 * Returns zero on failure, non-zero on success.
225 */
226 static int
buf_copy(const char * buf,size_t sz,struct download * p)227 buf_copy(const char *buf, size_t sz, struct download *p)
228 {
229 size_t rem, tocopy;
230 ssize_t ssz;
231
232 assert(p->obufsz <= p->obufmax);
233
234 /*
235 * Copy as much as we can.
236 * If we've copied everything, exit.
237 * If we have no pre-write buffer (obufmax of zero), this never
238 * gets called, so we never buffer anything.
239 */
240
241 if (sz && p->obufsz < p->obufmax) {
242 assert(p->obuf != NULL);
243 rem = p->obufmax - p->obufsz;
244 assert(rem > 0);
245 tocopy = rem < sz ? rem : sz;
246 memcpy(p->obuf + p->obufsz, buf, tocopy);
247 sz -= tocopy;
248 buf += tocopy;
249 p->obufsz += tocopy;
250 assert(p->obufsz <= p->obufmax);
251 if (sz == 0)
252 return 1;
253 }
254
255 /* Drain the main buffer. */
256
257 if (p->obufsz) {
258 assert(p->obufmax);
259 assert(p->obufsz <= p->obufmax);
260 assert(p->obuf != NULL);
261 if ((ssz = write(p->fd, p->obuf, p->obufsz)) < 0) {
262 ERR("%s: write", p->fname);
263 return 0;
264 } else if ((size_t)ssz != p->obufsz) {
265 ERRX("%s: short write", p->fname);
266 return 0;
267 }
268 p->obufsz = 0;
269 }
270
271 /*
272 * Now drain anything left.
273 * If we have no pre-write buffer, this is it.
274 */
275
276 if (sz) {
277 if ((ssz = write(p->fd, buf, sz)) < 0) {
278 ERR("%s: write", p->fname);
279 return 0;
280 } else if ((size_t)ssz != sz) {
281 ERRX("%s: short write", p->fname);
282 return 0;
283 }
284 }
285 return 1;
286 }
287
288 /*
289 * The downloader waits on a file the sender is going to give us, opens
290 * and mmaps the existing file, opens a temporary file, dumps the file
291 * (or metadata) into the temporary file, then renames.
292 * This happens in several possible phases to avoid blocking.
293 * Returns <0 on failure, 0 on no more data (end of phase), >0 on
294 * success (more data to be read from the sender).
295 */
296 int
rsync_downloader(struct download * p,struct sess * sess,int * ofd)297 rsync_downloader(struct download *p, struct sess *sess, int *ofd)
298 {
299 int c;
300 int32_t idx, rawtok;
301 const struct flist *f;
302 size_t sz, tok;
303 struct stat st;
304 char *buf = NULL;
305 unsigned char ourmd[MD4_DIGEST_LENGTH],
306 md[MD4_DIGEST_LENGTH];
307
308 /*
309 * If we don't have a download already in session, then the next
310 * one is coming in.
311 * Read either the stop (phase) signal from the sender or block
312 * metadata, in which case we open our file and wait for data.
313 */
314
315 if (p->state == DOWNLOAD_READ_NEXT) {
316 if (!io_read_int(sess, p->fdin, &idx)) {
317 ERRX1("io_read_int");
318 return -1;
319 } else if (idx >= 0 && (size_t)idx >= p->flsz) {
320 ERRX("index out of bounds");
321 return -1;
322 } else if (idx < 0) {
323 LOG3("downloader: phase complete");
324 return 0;
325 }
326
327 /* Short-circuit: dry_run mode does nothing. */
328
329 if (sess->opts->dry_run)
330 return 1;
331
332 /*
333 * Now get our block information.
334 * This is all we'll need to reconstruct the file from
335 * the map, as block sizes are regular.
336 */
337
338 download_reinit(sess, p, idx);
339 if (!blk_send_ack(sess, p->fdin, &p->blk)) {
340 ERRX1("blk_send_ack");
341 goto out;
342 }
343
344 /*
345 * Next, we want to open the existing file for using as
346 * block input.
347 * We do this in a non-blocking way, so if the open
348 * succeeds, then we'll go reentrant til the file is
349 * readable and we can mmap() it.
350 * Set the file descriptor that we want to wait for.
351 */
352
353 p->state = DOWNLOAD_READ_LOCAL;
354 f = &p->fl[idx];
355 p->ofd = openat(p->rootfd, f->path, O_RDONLY | O_NONBLOCK, 0);
356
357 if (p->ofd == -1 && errno != ENOENT) {
358 ERR("%s: openat", f->path);
359 goto out;
360 } else if (p->ofd != -1) {
361 *ofd = p->ofd;
362 return 1;
363 }
364
365 /* Fall-through: there's no file. */
366 }
367
368 /*
369 * At this point, the server is sending us data and we want to
370 * hoover it up as quickly as possible or we'll deadlock.
371 * We want to be pulling off of f->fdin as quickly as possible,
372 * so perform as much buffering as we can.
373 */
374
375 f = &p->fl[p->idx];
376
377 /*
378 * Next in sequence: we have an open download session but
379 * haven't created our temporary file.
380 * This means that we've already opened (or tried to open) the
381 * original file in a nonblocking way, and we can map it.
382 */
383
384 if (p->state == DOWNLOAD_READ_LOCAL) {
385 assert(p->fname == NULL);
386
387 /*
388 * Try to fstat() the file descriptor if valid and make
389 * sure that we're still a regular file.
390 * Then, if it has non-zero size, mmap() it for hashing.
391 */
392
393 if (p->ofd != -1 &&
394 fstat(p->ofd, &st) == -1) {
395 ERR("%s: fstat", f->path);
396 goto out;
397 } else if (p->ofd != -1 && !S_ISREG(st.st_mode)) {
398 WARNX("%s: not regular", f->path);
399 goto out;
400 }
401
402 if (p->ofd != -1 && st.st_size > 0) {
403 p->mapsz = st.st_size;
404 p->map = mmap(NULL, p->mapsz,
405 PROT_READ, MAP_SHARED, p->ofd, 0);
406 if (p->map == MAP_FAILED) {
407 ERR("%s: mmap", f->path);
408 goto out;
409 }
410 }
411
412 /* Success either way: we don't need this. */
413
414 *ofd = -1;
415
416 /* Create the temporary file. */
417
418 if (mktemplate(&p->fname, f->path, sess->opts->recursive) ==
419 -1) {
420 ERRX1("mktemplate");
421 goto out;
422 }
423
424 if ((p->fd = mkstempat(p->rootfd, p->fname)) == -1) {
425 ERR("mkstempat");
426 goto out;
427 }
428
429 /*
430 * FIXME: we can technically wait until the temporary
431 * file is writable, but since it's guaranteed to be
432 * empty, I don't think this is a terribly expensive
433 * operation as it doesn't involve reading the file into
434 * memory beforehand.
435 */
436
437 LOG3("%s: temporary: %s", f->path, p->fname);
438 p->state = DOWNLOAD_READ_REMOTE;
439 return 1;
440 }
441
442 /*
443 * This matches the sequence in blk_flush().
444 * If we've gotten here, then we have a possibly-open map file
445 * (not for new files) and our temporary file is writable.
446 * We read the size/token, then optionally the data.
447 * The size >0 for reading data, 0 for no more data, and <0 for
448 * a token indicator.
449 */
450
451 again:
452 assert(p->state == DOWNLOAD_READ_REMOTE);
453 assert(p->fname != NULL);
454 assert(p->fd != -1);
455 assert(p->fdin != -1);
456
457 if (!io_read_int(sess, p->fdin, &rawtok)) {
458 ERRX1("io_read_int");
459 goto out;
460 }
461
462 if (rawtok > 0) {
463 sz = rawtok;
464 if ((buf = malloc(sz)) == NULL) {
465 ERR("realloc");
466 goto out;
467 }
468 if (!io_read_buf(sess, p->fdin, buf, sz)) {
469 ERRX1("io_read_int");
470 goto out;
471 } else if (!buf_copy(buf, sz, p)) {
472 ERRX1("buf_copy");
473 goto out;
474 }
475 p->total += sz;
476 p->downloaded += sz;
477 LOG4("%s: received %zu B block", p->fname, sz);
478 MD4_Update(&p->ctx, buf, sz);
479 free(buf);
480
481 /* Fast-track more reads as they arrive. */
482
483 if ((c = io_read_check(p->fdin)) < 0) {
484 ERRX1("io_read_check");
485 goto out;
486 } else if (c > 0)
487 goto again;
488
489 return 1;
490 } else if (rawtok < 0) {
491 tok = -rawtok - 1;
492 if (tok >= p->blk.blksz) {
493 ERRX("%s: token not in block set: %zu (have %zu blocks)",
494 p->fname, tok, p->blk.blksz);
495 goto out;
496 }
497 sz = tok == p->blk.blksz - 1 ? p->blk.rem : p->blk.len;
498 assert(sz);
499 assert(p->map != MAP_FAILED);
500 buf = p->map + (tok * p->blk.len);
501
502 /*
503 * Now we read from our block.
504 * We should only be at this point if we have a
505 * block to read from, i.e., if we were able to
506 * map our origin file and create a block
507 * profile from it.
508 */
509
510 assert(p->map != MAP_FAILED);
511 if (!buf_copy(buf, sz, p)) {
512 ERRX1("buf_copy");
513 goto out;
514 }
515 p->total += sz;
516 LOG4("%s: copied %zu B", p->fname, sz);
517 MD4_Update(&p->ctx, buf, sz);
518
519 /* Fast-track more reads as they arrive. */
520
521 if ((c = io_read_check(p->fdin)) < 0) {
522 ERRX1("io_read_check");
523 goto out;
524 } else if (c > 0)
525 goto again;
526
527 return 1;
528 }
529
530 if (!buf_copy(NULL, 0, p)) {
531 ERRX1("buf_copy");
532 goto out;
533 }
534
535 assert(rawtok == 0);
536 assert(p->obufsz == 0);
537
538 /*
539 * Make sure our resulting MD4 hashes match.
540 * FIXME: if the MD4 hashes don't match, then our file has
541 * changed out from under us.
542 * This should require us to re-run the sequence in another
543 * phase.
544 */
545
546 MD4_Final(ourmd, &p->ctx);
547
548 if (!io_read_buf(sess, p->fdin, md, MD4_DIGEST_LENGTH)) {
549 ERRX1("io_read_buf");
550 goto out;
551 } else if (memcmp(md, ourmd, MD4_DIGEST_LENGTH)) {
552 ERRX("%s: hash does not match", p->fname);
553 goto out;
554 }
555
556 /* Adjust our file metadata (uid, mode, etc.). */
557
558 if (!rsync_set_metadata(sess, 1, p->fd, f, p->fname)) {
559 ERRX1("rsync_set_metadata");
560 goto out;
561 }
562
563 /* Finally, rename the temporary to the real file. */
564
565 if (renameat(p->rootfd, p->fname, p->rootfd, f->path) == -1) {
566 ERR("%s: renameat: %s", p->fname, f->path);
567 goto out;
568 }
569
570 log_file(sess, p, f);
571 download_cleanup(p, 0);
572 return 1;
573 out:
574 download_cleanup(p, 1);
575 return -1;
576 }
577