1 
2 /*
3  *   zsync - client side rsync over http
4  *   Copyright (C) 2004,2005,2007,2009 Colin Phipps <cph@moria.org.uk>
5  *
6  *   This program is free software; you can redistribute it and/or modify
7  *   it under the terms of the Artistic License v2 (see the accompanying
8  *   file COPYING for the full license terms), or, at your option, any later
9  *   version of the same license.
10  *
11  *   This program is distributed in the hope that it will be useful,
12  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *   COPYING file for details.
15  */
16 
17 /* This is the heart of zsync.
18  *
19  * .zsync file parsing and glue between all the main components of zsync.
20  *
21  * This file is where the .zsync metadata format is understood and read; it
22  * extracts it and creates the corresponding rcksum object to apply the rsync
23  * algorithm in constructing the target. It applies the zmap to convert byte
24  * ranges between compressed and uncompressed versions of the data as needed,
25  * and does decompression on compressed data received. It joins the HTTP code
26  * to the rsync algorithm by converting lists of blocks from rcksum into lists
27  * of byte ranges at particular URLs to be retrieved by the HTTP code.
28  *
29  * It also handles:
30  * - blocking edge cases (decompressed data not lining up with blocks for rcksum;
31  *   last block of the file only containing partial data)
32  * - recompression of the compressed data at the end of the transfer;
33  * - checksum verification of the entire output.
34  */
35 #include "zsglobal.h"
36 
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <sys/types.h>
40 #include <unistd.h>
41 #include <string.h>
42 #include <ctype.h>
43 #include <time.h>
44 
45 #include <arpa/inet.h>
46 
47 #ifdef WITH_DMALLOC
48 # include <dmalloc.h>
49 #endif
50 
51 #include "zlib/zlib.h"
52 
53 #include "librcksum/rcksum.h"
54 #include "zsync.h"
55 #include "sha1.h"
56 #include "zmap.h"
57 
58 /* Probably we really want a table of compression methods here. But I've only
59  * implemented SHA1 so this is it for now. */
60 const char ckmeth_sha1[] = { "SHA-1" };
61 
62 /* List of options strings for gzip(1) allowed in the .zsync. This is
63  * security against someone specifying arbitrary commands. */
64 static const char* const gzip_safe_option[] = {
65     "--best",
66     "",
67     "--rsync",
68     "--rsync --best",
69     "--best --no-name",
70     "--no-name",
71     "--rsync --no-name",
72     "--rsync --best --no-name"
73 };
74 const int gzip_safe_options = sizeof(gzip_safe_option)/sizeof *gzip_safe_option;
75 
76 /****************************************************************************
77  *
78  * zsync_state object and methods
79  * This holds a single target file's details, and holds the state of the
80  * in-progress local copy of that target that we are constructing (via a
81  * contained rcksum_state object)
82  *
83  * Also holds all the other misc data from the .zsync file.
84  */
85 struct zsync_state {
86     struct rcksum_state *rs;    /* rsync algorithm state, with block checksums and
87                                  * holding the in-progress local version of the target */
88     off_t filelen;              /* Length of the target file */
89     int blocks;                 /* Number of blocks in the target */
90     size_t blocksize;           /* Blocksize */
91 
92     /* Checksum of the entire file, and checksum alg */
93     char *checksum;
94     const char *checksum_method;
95 
96     /* URLs to uncompressed versions of the target */
97     char **url;
98     int nurl;
99 
100     /* URLs to compressed versions of the target, and the zmap of that compressed version */
101     struct zmap *zmap;
102     char **zurl;
103     int nzurl;
104 
105     char *cur_filename;         /* If we have taken the filename from rcksum, it is here */
106 
107     /* Hints for the output file, from the .zsync */
108     char *filename;             /* The Filename: header */
109     char *zfilename;            /* ditto Z-Filename: */
110 
111     char *gzopts;               /* If we're recompressing the download afterwards, these are the options to gzip(1) */
112     char *gzhead;               /* And this is the header of the gzip file (for the mtime) */
113 
114     time_t mtime;               /* MTime: from the .zsync, or -1 */
115 };
116 
117 static int zsync_read_blocksums(struct zsync_state *zs, FILE * f,
118                                 int rsum_bytes, int checksum_bytes,
119                                 int seq_matches);
120 static int zsync_sha1(struct zsync_state *zs, int fh);
121 static int zsync_recompress(struct zsync_state *zs);
122 static time_t parse_822(const char* ts);
123 
124 /* char*[] = append_ptrlist(&num, &char[], "to add")
125  * Crude data structure to store an ordered list of strings. This appends one
126  * entry to the list. */
append_ptrlist(int * n,char ** p,char * a)127 static char **append_ptrlist(int *n, char **p, char *a) {
128     if (!a)
129         return p;
130     p = realloc(p, (*n + 1) * sizeof *p);
131     if (!p) {
132         fprintf(stderr, "out of memory\n");
133         exit(1);
134     }
135     p[*n] = a;
136     (*n)++;
137     return p;
138 }
139 
140 /* Constructor */
zsync_begin(FILE * f)141 struct zsync_state *zsync_begin(FILE * f) {
142     /* Defaults for the checksum bytes and sequential matches properties of the
143      * rcksum_state. These are the defaults from versions of zsync before these
144      * were variable. */
145     int checksum_bytes = 16, rsum_bytes = 4, seq_matches = 1;
146 
147     /* Field names that we can ignore if present and not
148      * understood. This allows new headers to be added without breaking
149      * backwards compatibility, and conversely to add headers that do break
150      * backwards compat and have old clients give meaningful errors. */
151     char *safelines = NULL;
152 
153     /* Allocate memory for the object */
154     struct zsync_state *zs = calloc(sizeof *zs, 1);
155 
156     if (!zs)
157         return NULL;
158 
159     /* Any non-zero defaults here. */
160     zs->mtime = -1;
161 
162     for (;;) {
163         char buf[1024];
164         char *p = NULL;
165         int l;
166 
167         if (fgets(buf, sizeof(buf), f) != NULL) {
168             if (buf[0] == '\n')
169                 break;
170             l = strlen(buf) - 1;
171             while (l >= 0
172                    && (buf[l] == '\n' || buf[l] == '\r' || buf[l] == ' '))
173                 buf[l--] = 0;
174 
175             p = strchr(buf, ':');
176         }
177         if (p && *(p + 1) == ' ') {
178             *p++ = 0;
179             p++;
180             if (!strcmp(buf, "zsync")) {
181                 if (!strcmp(p, "0.0.4")) {
182                     fprintf(stderr, "This version of zsync is not compatible with zsync 0.0.4 streams.\n");
183                     free(zs);
184                     return NULL;
185                 }
186             }
187             else if (!strcmp(buf, "Min-Version")) {
188                 if (strcmp(p, VERSION) > 0) {
189                     fprintf(stderr,
190                             "control file indicates that zsync-%s or better is required\n",
191                             p);
192                     free(zs);
193                     return NULL;
194                 }
195             }
196             else if (!strcmp(buf, "Length")) {
197                 zs->filelen = atoll(p);
198             }
199             else if (!strcmp(buf, "Filename")) {
200                 zs->filename = strdup(p);
201             }
202             else if (!strcmp(buf, "Z-Filename")) {
203                 zs->zfilename = strdup(p);
204             }
205             else if (!strcmp(buf, "URL")) {
206                 zs->url = (char **)append_ptrlist(&(zs->nurl), zs->url, strdup(p));
207             }
208             else if (!strcmp(buf, "Z-URL")) {
209                 zs->zurl = (char **)append_ptrlist(&(zs->nzurl), zs->zurl, strdup(p));
210             }
211             else if (!strcmp(buf, "Blocksize")) {
212                 zs->blocksize = atol(p);
213                 if (zs->blocksize < 0 || (zs->blocksize & (zs->blocksize - 1))) {
214                     fprintf(stderr, "nonsensical blocksize %ld\n", zs->blocksize);
215                     free(zs);
216                     return NULL;
217                 }
218             }
219             else if (!strcmp(buf, "Hash-Lengths")) {
220                 if (sscanf
221                     (p, "%d,%d,%d", &seq_matches, &rsum_bytes,
222                      &checksum_bytes) != 3 || rsum_bytes < 1 || rsum_bytes > 4
223                     || checksum_bytes < 3 || checksum_bytes > 16
224                     || seq_matches > 2 || seq_matches < 1) {
225                     fprintf(stderr, "nonsensical hash lengths line %s\n", p);
226                     free(zs);
227                     return NULL;
228                 }
229             }
230             else if (zs->blocks && !strcmp(buf, "Z-Map2")) {
231                 int nzblocks;
232                 struct gzblock *zblock;
233 
234                 nzblocks = atoi(p);
235                 if (nzblocks < 0) {
236                     fprintf(stderr, "bad Z-Map line\n");
237                     free(zs);
238                     return NULL;
239                 }
240 
241                 zblock = malloc(nzblocks * sizeof *zblock);
242                 if (zblock) {
243                     if (fread(zblock, sizeof *zblock, nzblocks, f) < nzblocks) {
244                         fprintf(stderr, "premature EOF after Z-Map\n");
245                         free(zs);
246                         return NULL;
247                     }
248 
249                     zs->zmap = zmap_make(zblock, nzblocks);
250                     free(zblock);
251                 }
252             }
253             else if (!strcmp(buf, ckmeth_sha1)) {
254                 if (strlen(p) != SHA1_DIGEST_LENGTH * 2) {
255                     fprintf(stderr, "SHA-1 digest from control file is wrong length.\n");
256                 }
257                 else {
258                     zs->checksum = strdup(p);
259                     zs->checksum_method = ckmeth_sha1;
260                 }
261             }
262             else if (!strcmp(buf, "Safe")) {
263                 safelines = strdup(p);
264             }
265             else if (!strcmp(buf, "Recompress")) {
266                 zs->gzhead = strdup(p);
267                 if (zs->gzhead) {
268                     int i;
269                     char *q = strchr(zs->gzhead, ' ');
270                     if (!q)
271                         q = zs->gzhead + strlen(zs->gzhead);
272 
273                     if (*q)
274                         *q++ = 0;
275                     /* Whitelist for safe options for gzip command line */
276                     for (i = 0; i < gzip_safe_options; i++)
277                         if (!strcmp(q, gzip_safe_option[i])) {
278                             zs->gzopts = strdup(q);
279                             break;
280                         }
281                     if( !zs->gzopts ) {
282                         fprintf(stderr, "bad recompress options, rejected\n");
283                         free(zs->gzhead);
284                     }
285                 }
286             }
287             else if (!strcmp(buf, "MTime")) {
288                 zs->mtime = parse_822(p);
289             }
290             else if (!safelines || !strstr(safelines, buf)) {
291                 fprintf(stderr,
292                         "unrecognised tag %s - you need a newer version of zsync.\n",
293                         buf);
294                 free(zs);
295                 return NULL;
296             }
297             if (zs->filelen && zs->blocksize)
298                 zs->blocks = (zs->filelen + zs->blocksize - 1) / zs->blocksize;
299         }
300         else {
301             fprintf(stderr, "Bad line - not a zsync file? \"%s\"\n", buf);
302             free(zs);
303             return NULL;
304         }
305     }
306     if (!zs->filelen || !zs->blocksize) {
307         fprintf(stderr, "Not a zsync file (looked for Blocksize and Length lines)\n");
308         free(zs);
309         return NULL;
310     }
311     if (zsync_read_blocksums(zs, f, rsum_bytes, checksum_bytes, seq_matches) != 0) {
312         free(zs);
313         return NULL;
314     }
315     return zs;
316 }
317 
318 /* zsync_read_blocksums(self, FILE*, rsum_bytes, checksum_bytes, seq_matches)
319  * Called during construction only, this creates the rcksum_state that stores
320  * the per-block checksums of the target file and holds the local working copy
321  * of the in-progress target. And it populates the per-block checksums from the
322  * given file handle, which must be reading from the .zsync at the start of the
323  * checksums.
324  * rsum_bytes, checksum_bytes, seq_matches are settings for the checksums,
325  * passed through to the rcksum_state. */
zsync_read_blocksums(struct zsync_state * zs,FILE * f,int rsum_bytes,int checksum_bytes,int seq_matches)326 static int zsync_read_blocksums(struct zsync_state *zs, FILE * f,
327                                 int rsum_bytes, int checksum_bytes,
328                                 int seq_matches) {
329     /* Make the rcksum_state first */
330     if (!(zs->rs = rcksum_init(zs->blocks, zs->blocksize, rsum_bytes,
331                                checksum_bytes, seq_matches))) {
332         return -1;
333     }
334 
335     /* Now read in and store the checksums */
336     zs_blockid id = 0;
337     for (; id < zs->blocks; id++) {
338         struct rsum r = { 0, 0 };
339         unsigned char checksum[CHECKSUM_SIZE];
340 
341         /* Read in */
342         if (fread(((char *)&r) + 4 - rsum_bytes, rsum_bytes, 1, f) < 1
343             || fread((void *)&checksum, checksum_bytes, 1, f) < 1) {
344 
345             /* Error - free the rcksum_state and tell the caller to bail */
346             fprintf(stderr, "short read on control file; %s\n",
347                     strerror(ferror(f)));
348             rcksum_end(zs->rs);
349             return -1;
350         }
351 
352         /* Convert to host endian and store */
353         r.a = ntohs(r.a);
354         r.b = ntohs(r.b);
355         rcksum_add_target_block(zs->rs, id, r, checksum);
356     }
357     return 0;
358 }
359 
360 /* parse_822(buf[])
361  * Parse an RFC822 date string. Returns a time_t, or -1 on failure.
362  * E.g. Tue, 25 Jul 2006 20:02:17 +0000
363  */
parse_822(const char * ts)364 static time_t parse_822(const char* ts) {
365     struct tm t;
366 
367     if (strptime(ts, "%a, %d %b %Y %H:%M:%S %z", &t) == NULL
368         && strptime(ts, "%d %b %Y %H:%M:%S %z", &t) == NULL) {
369         return -1;
370     }
371     return mktime(&t);
372 }
373 
374 /* zsync_hint_decompress(self)
375  * Returns true if we think we'll be able to download compressed data to get
376  * the needed data to complete the target file */
zsync_hint_decompress(const struct zsync_state * zs)377 int zsync_hint_decompress(const struct zsync_state *zs) {
378     return (zs->nzurl > 0 ? 1 : 0);
379 }
380 
381 /* zsync_blocksize(self)
382  * Returns the blocksize used by zsync on this target. */
zsync_blocksize(const struct zsync_state * zs)383 int zsync_blocksize(const struct zsync_state *zs) {
384     return zs->blocksize;
385 }
386 
387 /* char* = zsync_filename(self)
388  * Returns the suggested filename to be used for the final result of this
389  * zsync.  Malloced string to be freed by the caller. */
zsync_filename(const struct zsync_state * zs)390 char *zsync_filename(const struct zsync_state *zs) {
391     return strdup(zs->gzhead && zs->zfilename ? zs->zfilename : zs->filename);
392 }
393 
394 /* time_t = zsync_mtime(self)
395  * Returns the mtime on the original copy of the target; for the client program
396  * to set the mtime of the local file to match, if it so chooses.
397  * Or -1 if no mtime specified in the .zsync */
zsync_mtime(const struct zsync_state * zs)398 time_t zsync_mtime(const struct zsync_state *zs) {
399     return zs->mtime;
400 }
401 
402 /* zsync_status(self)
403  * Returns  0 if we have no data in the target file yet.
404  *          1 if we have some but not all
405  *          2 or more if we have all.
406  * The caller should not rely on exact values 2+; just test >= 2. Values >2 may
407  * be used in later versions of libzsync. */
zsync_status(const struct zsync_state * zs)408 int zsync_status(const struct zsync_state *zs) {
409     int todo = rcksum_blocks_todo(zs->rs);
410 
411     if (todo == zs->blocks)
412         return 0;
413     if (todo > 0)
414         return 1;
415     return 2;                   /* TODO: more? */
416 }
417 
418 /* zsync_progress(self, &got, &total)
419  * Writes the number of bytes got, and the total to get, into the long longs.
420  */
zsync_progress(const struct zsync_state * zs,long long * got,long long * total)421 void zsync_progress(const struct zsync_state *zs, long long *got,
422                     long long *total) {
423 
424     if (got) {
425         int todo = zs->blocks - rcksum_blocks_todo(zs->rs);
426         *got = todo * zs->blocksize;
427     }
428     if (total)
429         *total = zs->blocks * zs->blocksize;
430 }
431 
432 /* zsync_get_urls(self, &num, &type)
433  * Returns a (pointer to an) array of URLs (returning the number of them in
434  * num) that are remote available copies of the target file (according to the
435  * .zsync).
436  * Note that these URLs could be for encoded versions of the target; a 'type'
437  * is returned in *type which tells libzsync in later calls what version of the
438  * target is being retrieved. */
zsync_get_urls(struct zsync_state * zs,int * n,int * t)439 const char *const *zsync_get_urls(struct zsync_state *zs, int *n, int *t) {
440     if (zs->zmap && zs->nzurl) {
441         *n = zs->nzurl;
442         *t = 1;
443         return zs->zurl;
444     }
445     else {
446         *n = zs->nurl;
447         *t = 0;
448         return zs->url;
449     }
450 }
451 
452 /* zsync_needed_byte_ranges(self, &num, type)
453  * Returns an array of offsets (2*num of them) for the start and end of num
454  * byte ranges in the given type of version of the target (type as returned by
455  * a zsync_get_urls call), such that retrieving all these byte ranges would be
456  * sufficient to obtain a complete copy of the target file.
457  */
zsync_needed_byte_ranges(struct zsync_state * zs,int * num,int type)458 off_t *zsync_needed_byte_ranges(struct zsync_state * zs, int *num, int type) {
459     int nrange;
460     off_t *byterange;
461     int i;
462 
463     /* Request all needed block ranges */
464     zs_blockid *blrange = rcksum_needed_block_ranges(zs->rs, &nrange, 0, 0x7fffffff);
465     if (!blrange)
466         return NULL;
467 
468     /* Allocate space for byte ranges */
469     byterange = malloc(2 * nrange * sizeof *byterange);
470     if (!byterange) {
471         free(blrange);
472         return NULL;
473     }
474 
475     /* Now convert blocks to bytes.
476      * Note: Must cast one operand to off_t as both blocksize and blrange[x]
477      * are int's whereas the product must be a file offfset. Needed so we don't
478      * truncate file offsets to 32bits on 32bit platforms. */
479     for (i = 0; i < nrange; i++) {
480         byterange[2 * i] = blrange[2 * i] * (off_t)zs->blocksize;
481         byterange[2 * i + 1] = blrange[2 * i + 1] * (off_t)zs->blocksize - 1;
482     }
483     free(blrange);      /* And release the blocks, we're done with them */
484 
485     switch (type) {
486     case 0:
487         *num = nrange;
488         return byterange;
489     case 1:
490         {   /* Convert ranges in the uncompressed data to ranges in the compressed data */
491             off_t *zbyterange =
492                 zmap_to_compressed_ranges(zs->zmap, byterange, nrange, &nrange);
493 
494             /* Store the number of compressed ranges and return them, freeing
495              * the uncompressed ones now we've used them. */
496             if (zbyterange) {
497                 *num = nrange;
498             }
499             free(byterange);
500             return zbyterange;
501         }
502     default:
503         free(byterange);
504         return NULL;
505     }
506 }
507 
508 /* zsync_submit_source_file(self, FILE*, progress)
509  * Read the given stream, applying the rsync rolling checksum algorithm to
510  * identify any blocks of data in common with the target file. Blocks found are
511  * written to our local copy of the target in progress. Progress reports if
512  * progress != 0  */
zsync_submit_source_file(struct zsync_state * zs,FILE * f,int progress)513 int zsync_submit_source_file(struct zsync_state *zs, FILE * f, int progress) {
514     return rcksum_submit_source_file(zs->rs, f, progress);
515 }
516 
zsync_cur_filename(struct zsync_state * zs)517 char *zsync_cur_filename(struct zsync_state *zs) {
518     if (!zs->cur_filename)
519         zs->cur_filename = rcksum_filename(zs->rs);
520 
521     return zs->cur_filename;
522 }
523 
524 /* zsync_rename_file(self, filename)
525  * Tell libzsync to move the local copy of the target (or under construction
526  * target) to the given filename. */
zsync_rename_file(struct zsync_state * zs,const char * f)527 int zsync_rename_file(struct zsync_state *zs, const char *f) {
528     char *rf = zsync_cur_filename(zs);
529 
530     int x = rename(rf, f);
531 
532     if (!x) {
533         free(rf);
534         zs->cur_filename = strdup(f);
535     }
536     else
537         perror("rename");
538 
539     return x;
540 }
541 
542 /* int hexdigit(char)
543  * Maps a character to 0..15 as a hex digit (or 0 if not valid hex digit)
544  */
hexdigit(char c)545 static int hexdigit(char c) {
546     return (isdigit(c) ? (c - '0') : isupper(c) ? (0xa + (c - 'A')) : islower(c)
547             ? (0xa + (c - 'a')) : 0);
548 }
549 
550 /* zsync_complete(self)
551  * Finish a zsync download. Should be called once all blocks have been
552  * retrieved successfully. This returns 0 if the file passes the final
553  * whole-file checksum and if any recompression requested by the .zsync file is
554  * done.
555  * Returns -1 on error (and prints the error to stderr)
556  *          0 if successful but no checksum verified
557  *          1 if successful including checksum verified
558  */
zsync_complete(struct zsync_state * zs)559 int zsync_complete(struct zsync_state *zs) {
560     int rc = 0;
561 
562     /* We've finished with the rsync algorithm. Take over the local copy from
563      * librcksum and free our rcksum state. */
564     int fh = rcksum_filehandle(zs->rs);
565     zsync_cur_filename(zs);
566     rcksum_end(zs->rs);
567     zs->rs = NULL;
568 
569     /* Truncate the file to the exact length (to remove any trailing NULs from
570      * the last block); return to the start of the file ready to verify. */
571     if (ftruncate(fh, zs->filelen) != 0) {
572         perror("ftruncate");
573         rc = -1;
574     }
575     if (lseek(fh, 0, SEEK_SET) != 0) {
576         perror("lseek");
577         rc = -1;
578     }
579 
580     /* Do checksum check */
581     if (rc == 0 && zs->checksum && !strcmp(zs->checksum_method, ckmeth_sha1)) {
582         rc = zsync_sha1(zs, fh);
583     }
584     close(fh);
585 
586     /* Do any requested recompression */
587     if (rc >= 0 && zs->gzhead && zs->gzopts) {
588         if (zsync_recompress(zs) != 0) {
589             return -1;
590         }
591     }
592     return rc;
593 }
594 
595 /* zsync_sha1(self, filedesc)
596  * Given the currently-open-and-at-start-of-file complete local copy of the
597  * target, read it and compare the SHA1 checksum with the one from the .zsync.
598  * Returns -1 or 1 as per zsync_complete.
599  */
zsync_sha1(struct zsync_state * zs,int fh)600 static int zsync_sha1(struct zsync_state *zs, int fh) {
601     SHA1_CTX shactx;
602 
603     {                           /* Do SHA1 of file contents */
604         unsigned char buf[4096];
605         int rc;
606 
607         SHA1Init(&shactx);
608         while (0 < (rc = read(fh, buf, sizeof buf))) {
609             SHA1Update(&shactx, buf, rc);
610         }
611         if (rc < 0) {
612             perror("read");
613             return -1;
614         }
615     }
616 
617     {                           /* And compare result of the SHA1 with the one from the .zsync */
618         unsigned char digest[SHA1_DIGEST_LENGTH];
619         int i;
620 
621         SHA1Final(digest, &shactx);
622 
623         for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
624             int j;
625             sscanf(&(zs->checksum[2 * i]), "%2x", &j);
626             if (j != digest[i]) {
627                 return -1;
628             }
629         }
630         return 1; /* Checksum verified okay */
631     }
632 }
633 
634 /* zsync_recompress(self)
635  * Called when we have a complete local copy of the uncompressed data, to
636  * perform compression requested in the .zsync.
637  *
638  * Shells out to the standard system gzip(1). Replaces the gzip file header
639  * with the one supplied in the .zsync; this means we should get an identical
640  * compressed file output to the original compressed file on the source system
641  * (to allow the user to verify a checksum on the compressed file, or just
642  * because the user is picky and wants their compressed file to match the
643  * original).
644  *
645  * Returns 0 on success, -1 on error (which is reported on stderr). */
zsync_recompress(struct zsync_state * zs)646 static int zsync_recompress(struct zsync_state *zs) {
647     /* Recompression. This is a fugly mess, calling gzip on the temporary file with options
648      *  read out of the .zsync, reading its output and replacing the gzip header. Ugh. */
649     FILE *g;
650     FILE *zout;
651     int rc = 0;
652 
653     char cmd[1024];
654     snprintf(cmd, sizeof(cmd), "gzip -n %s < ", zs->gzopts);
655 
656     {   /* Add input filename, shell-escaped, to the command line */
657         int i = 0;
658         size_t j = strlen(cmd);
659         char c;
660 
661         while ((c = zs->cur_filename[i++]) != 0 && j < sizeof(cmd) - 2) {
662             if (!isalnum(c))
663                 cmd[j++] = '\\';
664             cmd[j++] = c;
665         }
666         cmd[j] = 0;
667     }
668 
669     /* Read gzipped version of the data via pipe from gzip; write it to our new
670      * output file, except that we replace the gzip header with our own from
671      * the .zsync file. */
672     g = popen(cmd, "r");
673     if (g) {
674         char zoname[1024];
675 
676         snprintf(zoname, sizeof(zoname), "%s.gz", zs->cur_filename);
677         zout = fopen(zoname, "w");
678 
679         if (zout) {
680             char *p = zs->gzhead;
681             int skip = 1;
682 
683             while (p[0] && p[1]) {
684                 if (fputc((hexdigit(p[0]) << 4) + hexdigit(p[1]), zout) == EOF) {
685                     perror("putc");
686                     rc = -1;
687                 }
688                 p += 2;
689             }
690             while (!feof(g)) {
691                 char buf[1024];
692                 int r;
693                 const char *p = buf;
694 
695                 if ((r = fread(buf, 1, sizeof(buf), g)) < 0) {
696                     perror("fread");
697                     rc = -1;
698                     goto leave_it;
699                 }
700                 if (skip) {
701                     p = skip_zhead(buf);
702                     skip = 0;
703                 }
704                 if (fwrite(p, 1, r - (p - buf), zout) != r - (p - buf)) {
705                     perror("fwrite");
706                     rc = -1;
707                     goto leave_it;
708                 }
709             }
710 
711           leave_it:
712             if (fclose(zout) != 0) {
713                 perror("close");
714                 rc = -1;
715             }
716         }
717         if (fclose(g) != 0) {
718             perror("close");
719             rc = -1;
720         }
721 
722         /* Free our old filename and replace with the new one */
723         unlink(zs->cur_filename);
724         free(zs->cur_filename);
725         zs->cur_filename = strdup(zoname);
726     }
727     else {
728         fprintf(stderr, "problem with gzip, unable to compress.\n");
729     }
730     return rc;
731 }
732 
733 /* Destructor */
zsync_end(struct zsync_state * zs)734 char *zsync_end(struct zsync_state *zs) {
735     int i;
736     char *f = zsync_cur_filename(zs);
737 
738     /* Free rcksum object and zmap */
739     if (zs->rs)
740         rcksum_end(zs->rs);
741     if (zs->zmap)
742         zmap_free(zs->zmap);
743 
744     /* Clear download URLs */
745     for (i = 0; i < zs->nurl; i++)
746         free(zs->url[i]);
747     for (i = 0; i < zs->nzurl; i++)
748         free(zs->zurl[i]);
749 
750     /* And the rest. */
751     free(zs->url);
752     free(zs->zurl);
753     free(zs->checksum);
754     free(zs->filename);
755     free(zs->zfilename);
756     free(zs);
757     return f;
758 }
759 
760 /* Next come the methods for accepting data received from the remote copies of
761  * the target and incomporating them into the local copy under construction. */
762 
763 /* zsync_configure_zstream_for_zdata(self, &z_stream_s, zoffset, &outoffset)
764  * Rewrites the state in the given zlib stream object to be ready to decompress
765  * data from the compressed version of this zsync stream at the given offset in
766  * the compressed file. Returns the offset in the uncompressed stream that this
767  * corresponds to in the 4th parameter.
768  */
zsync_configure_zstream_for_zdata(const struct zsync_state * zs,struct z_stream_s * zstrm,long zoffset,long long * poutoffset)769 void zsync_configure_zstream_for_zdata(const struct zsync_state *zs,
770                                        struct z_stream_s *zstrm,
771                                        long zoffset, long long *poutoffset) {
772     configure_zstream_for_zdata(zs->zmap, zstrm, zoffset, poutoffset);
773     {                           /* Load in prev 32k sliding window for backreferences */
774         long long pos = *poutoffset;
775         int lookback = (pos > 32768) ? 32768 : pos;
776 
777         /* Read in 32k of leading uncompressed context - needed because the deflate
778          * compression method includes back-references to previously-seen strings. */
779         unsigned char wbuf[32768];
780         rcksum_read_known_data(zs->rs, wbuf, pos - lookback, lookback);
781 
782         /* Fake an output buffer of 32k filled with data to zlib */
783         zstrm->next_out = wbuf + lookback;
784         zstrm->avail_out = 0;
785         updatewindow(zstrm, lookback);
786     }
787 }
788 
789 /* zsync_submit_data(self, buf[], offset, blocks)
790  * Passes data retrieved from the remote copy of
791  * the target file to libzsync, to be written into our local copy. The data is
792  * the given number of blocks at the given offset (must be block-aligned), data
793  * in buf[].  */
zsync_submit_data(struct zsync_state * zs,const unsigned char * buf,off_t offset,int blocks)794 static int zsync_submit_data(struct zsync_state *zs,
795                              const unsigned char *buf, off_t offset,
796                              int blocks) {
797     zs_blockid blstart = offset / zs->blocksize;
798     zs_blockid blend = blstart + blocks - 1;
799 
800     return rcksum_submit_blocks(zs->rs, buf, blstart, blend);
801 }
802 
803 /****************************************************************************
804  *
805  * zsync_receiver object definition and methods.
806  * Stores the state for a currently-running download of blocks from a
807  * particular URL or version of a file to complete a file using zsync.
808  *
809  * This is mostly a wrapper for the zsync_state which keeps various additional
810  * state needed per-download: in particular the zlib stream object to
811  * decompress the incoming data if this is a URL of a compressed version of the
812  * target file.
813  */
814 struct zsync_receiver {
815     struct zsync_state *zs;     /* The zsync_state that we are downloading for */
816     struct z_stream_s strm;     /* Decompression object */
817     int url_type;               /* Compressed or not */
818     unsigned char *outbuf;      /* Working buffer to keep incomplete blocks of data */
819     off_t outoffset;            /* and the position in that buffer */
820 };
821 
822 /* Constructor */
zsync_begin_receive(struct zsync_state * zs,int url_type)823 struct zsync_receiver *zsync_begin_receive(struct zsync_state *zs, int url_type) {
824     struct zsync_receiver *zr = malloc(sizeof(struct zsync_receiver));
825 
826     if (!zr)
827         return NULL;
828     zr->zs = zs;
829 
830     zr->outbuf = malloc(zs->blocksize);
831     if (!zr->outbuf) {
832         free(zr);
833         return NULL;
834     }
835 
836     /* Set up new inflate object */
837     zr->strm.zalloc = Z_NULL;
838     zr->strm.zfree = Z_NULL;
839     zr->strm.opaque = NULL;
840     zr->strm.total_in = 0;
841 
842     zr->url_type = url_type;
843     zr->outoffset = 0;
844 
845     return zr;
846 }
847 
848 /* zsync_receive_data_uncompressed(self, buf[], offset, buflen)
849  * Adds the data in buf (buflen bytes) to this file at the given offset.
850  * Returns 0 unless there's an error (e.g. the submitted data doesn't match the
851  * expected checksum for the corresponding blocks)
852  */
zsync_receive_data_uncompressed(struct zsync_receiver * zr,const unsigned char * buf,off_t offset,size_t len)853 static int zsync_receive_data_uncompressed(struct zsync_receiver *zr,
854                                            const unsigned char *buf,
855                                            off_t offset, size_t len) {
856     int ret = 0;
857     size_t blocksize = zr->zs->blocksize;
858 
859     if (0 != (offset % blocksize)) {
860         size_t x = len;
861 
862         if (x > blocksize - (offset % blocksize))
863             x = blocksize - (offset % blocksize);
864 
865         if (zr->outoffset == offset) {
866             /* Half-way through a block, so let's try and complete it */
867             if (len)
868                 memcpy(zr->outbuf + offset % blocksize, buf, x);
869             else {
870                 // Pad with 0s to length.
871                 memset(zr->outbuf + offset % blocksize, 0, len = x =
872                        blocksize - (offset % blocksize));
873             }
874 
875             if ((x + offset) % blocksize == 0)
876                 if (zsync_submit_data
877                     (zr->zs, zr->outbuf, zr->outoffset + x - blocksize, 1))
878                     ret = 1;
879         }
880         buf += x;
881         len -= x;
882         offset += x;
883     }
884 
885     /* Now we are block-aligned */
886     if (len >= blocksize) {
887         int w = len / blocksize;
888 
889         if (zsync_submit_data(zr->zs, buf, offset, w))
890             ret = 1;
891 
892         w *= blocksize;
893         buf += w;
894         len -= w;
895         offset += w;
896 
897     }
898     /* Store incomplete block */
899     if (len) {
900         memcpy(zr->outbuf, buf, len);
901         offset += len;          /* not needed: buf += len; len -= len; */
902     }
903 
904     zr->outoffset = offset;
905     return ret;
906 }
907 
908 /* zsync_receive_data_compressed(self, buf[], offset, buflen)
909  * Passes data received corresponding to the compressed version of this file at
910  * the given offset; data in buf, buflen bytes.
911  * Returns 0 unless there's an error (e.g. the submitted data doesn't match the
912  * expected checksum for the corresponding blocks)
913  */
zsync_receive_data_compressed(struct zsync_receiver * zr,const unsigned char * buf,off_t offset,size_t len)914 static int zsync_receive_data_compressed(struct zsync_receiver *zr,
915                               const unsigned char *buf, off_t offset,
916                               size_t len) {
917     int ret = 0;
918     int eoz = 0;
919     size_t blocksize = zr->zs->blocksize;
920 
921     if (!len)
922         return 0;
923 
924     /* Now set up for the downloaded block */
925     zr->strm.next_in = buf;
926     zr->strm.avail_in = len;
927 
928     if (zr->strm.total_in == 0 || offset != zr->strm.total_in) {
929         zsync_configure_zstream_for_zdata(zr->zs, &(zr->strm), offset,
930                                           &(zr->outoffset));
931 
932         /* On first iteration, we might be reading an incomplete block from zsync's point of view. Limit avail_out so we can stop after doing that and realign with the buffer. */
933         zr->strm.avail_out = blocksize - (zr->outoffset % blocksize);
934         zr->strm.next_out = zr->outbuf;
935     }
936     else {
937         if (zr->outoffset == -1) {
938             fprintf(stderr,
939                     "data didn't align with block boundary in compressed stream\n");
940             return 1;
941         }
942         zr->strm.next_in = buf;
943         zr->strm.avail_in = len;
944     }
945 
946     while (zr->strm.avail_in && !eoz) {
947         int rc;
948 
949         /* Read in up to the next block (in the libzsync sense on the output stream) boundary */
950 
951         rc = inflate(&(zr->strm), Z_SYNC_FLUSH);
952         switch (rc) {
953         case Z_STREAM_END:
954             eoz = 1;
955         case Z_BUF_ERROR:
956         case Z_OK:
957             if (zr->strm.avail_out == 0 || eoz) {
958                 /* If this was at the start of a block, try submitting it */
959                 if (!(zr->outoffset % blocksize)) {
960                     int rc;
961 
962                     if (zr->strm.avail_out)
963                         memset(zr->strm.next_out, 0, zr->strm.avail_out);
964                     rc = zsync_submit_data(zr->zs, zr->outbuf,
965                                            zr->outoffset, 1);
966                     if (!zr->strm.avail_out)
967                         ret |= rc;
968                     zr->outoffset += blocksize;
969                 }
970                 else {
971                     /* We were reading a block fragment; update outoffset, and we are now block-aligned. */
972                     zr->outoffset += (zr->strm.next_out - zr->outbuf);
973                 }
974                 zr->strm.avail_out = blocksize;
975                 zr->strm.next_out = zr->outbuf;
976             }
977             break;
978         default:
979             fprintf(stderr, "zlib error: %s (%d)\n", zr->strm.msg, rc);
980             eoz = 1;
981             ret = -1;
982             break;
983         }
984     }
985     return ret;
986 }
987 
988 /* zsync_receive_data(self, buf[], offset, buflen)
989  * Passes data received from the source URL at the given offset;
990  * data is buflen bytes in buf[].
991  * Returns 0 unless there's an error (e.g. the submitted data doesn't match the
992  * expected checksum for the corresponding blocks)
993  */
zsync_receive_data(struct zsync_receiver * zr,const unsigned char * buf,off_t offset,size_t len)994 int zsync_receive_data(struct zsync_receiver *zr, const unsigned char *buf,
995                        off_t offset, size_t len) {
996     if (zr->url_type == 1) {
997         return zsync_receive_data_compressed(zr, buf, offset, len);
998     }
999     else {
1000         return zsync_receive_data_uncompressed(zr, buf, offset, len);
1001     }
1002 }
1003 
1004 /* Destructor */
zsync_end_receive(struct zsync_receiver * zr)1005 void zsync_end_receive(struct zsync_receiver *zr) {
1006     if (zr->strm.total_in > 0) {
1007         inflateEnd(&(zr->strm));
1008     }
1009     free(zr->outbuf);
1010     free(zr);
1011 }
1012