1 /*
2  * Copyright 2018 Jonathan Dieter <jdieter@gmail.com>
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  1. Redistributions of source code must retain the above copyright notice,
8  *     this list of conditions and the following disclaimer.
9  *
10  *  2. Redistributions in binary form must reproduce the above copyright notice,
11  *     this list of conditions and the following disclaimer in the documentation
12  *     and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
18  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <stdlib.h>
28 #include <stdint.h>
29 #include <stdbool.h>
30 #include <string.h>
31 #include <curl/curl.h>
32 #include <unistd.h>
33 #include <sys/types.h>
34 #include <errno.h>
35 #include <zck.h>
36 
37 #include "zck_private.h"
38 
39 /* Free zckDL header regex used for downloading ranges */
clear_dl_regex(zckDL * dl)40 static void clear_dl_regex(zckDL *dl) {
41     if(dl == NULL)
42         return;
43 
44     if(dl->hdr_regex) {
45         regfree(dl->hdr_regex);
46         free(dl->hdr_regex);
47         dl->hdr_regex = NULL;
48     }
49     if(dl->dl_regex) {
50         regfree(dl->dl_regex);
51         free(dl->dl_regex);
52         dl->dl_regex = NULL;
53     }
54     if(dl->end_regex) {
55         regfree(dl->end_regex);
56         free(dl->end_regex);
57         dl->end_regex = NULL;
58     }
59 }
60 
61 /* Write zeros to tgt->fd in location of tgt_idx */
zero_chunk(zckCtx * tgt,zckChunk * tgt_idx)62 static bool zero_chunk(zckCtx *tgt, zckChunk *tgt_idx) {
63     char buf[BUF_SIZE] = {0};
64     size_t to_read = tgt_idx->comp_length;
65     if(!seek_data(tgt, tgt->data_offset + tgt_idx->start, SEEK_SET))
66         return false;
67     while(to_read > 0) {
68         int rb = BUF_SIZE;
69         if(rb > to_read)
70             rb = to_read;
71         if(!write_data(tgt, tgt->fd, buf, rb))
72             return false;
73         to_read -= rb;
74     }
75     return true;
76 }
77 
78 /* Check whether last downloaded chunk is valid and zero it out if it isn't */
set_chunk_valid(zckDL * dl)79 static bool set_chunk_valid(zckDL *dl) {
80     ALLOCD_BOOL(NULL, dl);
81     VALIDATE_BOOL(dl->zck);
82 
83     int retval = validate_chunk(dl->tgt_check, ZCK_LOG_WARNING);
84     if(retval < 1) {
85         if(!zero_chunk(dl->zck, dl->tgt_check))
86             return false;
87         dl->tgt_check->valid = -1;
88         return false;
89     } else {
90         dl->tgt_check->valid = 1;
91     }
92     dl->tgt_check = NULL;
93     return true;
94 }
95 
96 /* Write length or to end of current chunk, whichever comes first */
dl_write(zckDL * dl,const char * at,size_t length)97 static int dl_write(zckDL *dl, const char *at, size_t length) {
98     ALLOCD_INT(NULL, dl);
99     VALIDATE_INT(dl->zck);
100 
101     int wb = 0;
102     if(dl->write_in_chunk > 0) {
103         if(dl->write_in_chunk < length)
104             wb = dl->write_in_chunk;
105         else
106             wb = length;
107         if(!write_data(dl->zck, dl->zck->fd, at, wb))
108             return -1;
109         dl->write_in_chunk -= wb;
110         if(!hash_update(dl->zck, &(dl->zck->check_chunk_hash), at, wb))
111             return -1;
112         zck_log(ZCK_LOG_DEBUG, "Writing %lu bytes", wb);
113         dl->dl_chunk_data += wb;
114     }
115     return wb;
116 }
117 
118 /* Copy chunk identified by src_idx into location specified by tgt_idx */
write_and_verify_chunk(zckCtx * src,zckCtx * tgt,zckChunk * src_idx,zckChunk * tgt_idx)119 static bool write_and_verify_chunk(zckCtx *src, zckCtx *tgt,
120                                    zckChunk *src_idx,
121                                    zckChunk *tgt_idx) {
122     VALIDATE_READ_BOOL(src);
123     VALIDATE_READ_BOOL(tgt);
124 
125     static char buf[BUF_SIZE] = {0};
126 
127     size_t to_read = src_idx->comp_length;
128     if(!seek_data(src, src->data_offset + src_idx->start, SEEK_SET))
129         return false;
130     if(!seek_data(tgt, tgt->data_offset + tgt_idx->start, SEEK_SET))
131         return false;
132     zckHash check_hash = {0};
133     if(!hash_init(tgt, &check_hash, &(src->chunk_hash_type)))
134         return false;
135     while(to_read > 0) {
136         int rb = BUF_SIZE;
137         if(rb > to_read)
138             rb = to_read;
139         if(!read_data(src, buf, rb))
140             return false;
141         if(!hash_update(tgt, &check_hash, buf, rb))
142             return false;
143         if(!write_data(tgt, tgt->fd, buf, rb))
144             return false;
145         to_read -= rb;
146     }
147     char *digest = hash_finalize(tgt, &check_hash);
148     /* If chunk is invalid, overwrite with zeros and add to download range */
149     if(memcmp(digest, src_idx->digest, src_idx->digest_size) != 0) {
150         char *pdigest = zck_get_chunk_digest(src_idx);
151         zck_log(ZCK_LOG_INFO, "Corrupted chunk found in file, will redownload");
152         zck_log(ZCK_LOG_INFO, "Source hash: %s", pdigest);
153         free(pdigest);
154         pdigest = get_digest_string(digest, src_idx->digest_size);
155         zck_log(ZCK_LOG_INFO, "Target hash: %s", pdigest);
156         free(pdigest);
157         if(!zero_chunk(tgt, tgt_idx))
158             return false;
159         tgt_idx->valid = -1;
160     } else {
161         tgt_idx->valid = 1;
162         zck_log(ZCK_LOG_DEBUG, "Wrote %lu bytes at %lu",
163                 tgt_idx->comp_length, tgt_idx->start);
164     }
165     free(digest);
166     return true;
167 }
168 
169 /* Split current read into the appropriate chunks and write appropriately */
dl_write_range(zckDL * dl,const char * at,size_t length)170 int dl_write_range(zckDL *dl, const char *at, size_t length) {
171     ALLOCD_BOOL(NULL, dl);
172     VALIDATE_BOOL(dl->zck);
173 
174     if(dl->range == NULL) {
175         set_error(dl->zck, "zckDL range not initialized");
176         return 0;
177     }
178 
179     if(dl->range->index.first == NULL) {
180         set_error(dl->zck, "zckDL index not initialized");
181         return 0;
182     }
183     if(dl->zck->index.first == NULL) {
184         set_error(dl->zck, "zckCtx index not initialized");
185         return 0;
186     }
187     int wb = dl_write(dl, at, length);
188     if(wb < 0)
189         return 0;
190     if(dl->write_in_chunk == 0) {
191         /* Check whether we just finished downloading a chunk and verify it */
192         if(dl->tgt_check && !set_chunk_valid(dl))
193             return false;
194 
195         if(dl->range->index.current == NULL)
196             dl->range->index.current = dl->range->index.first;
197         for(zckChunk *chk = dl->range->index.current; chk; chk = chk->next) {
198             if(dl->dl_chunk_data != chk->start)
199                 continue;
200 
201             int count = 0;
202             zckChunk *tgt_chk = chk->src;
203             if(tgt_chk->valid == 1)
204                 continue;
205             if(chk->comp_length == tgt_chk->comp_length &&
206                memcmp(chk->digest, tgt_chk->digest,
207                       chk->digest_size) == 0) {
208                 dl->tgt_check = tgt_chk;
209                 dl->tgt_number = count;
210                 if(!hash_init(dl->zck, &(dl->zck->check_chunk_hash),
211                               &(dl->zck->chunk_hash_type)))
212                     return 0;
213                 dl->write_in_chunk = chk->comp_length;
214                 if(!seek_data(dl->zck,
215                               dl->zck->data_offset + tgt_chk->start,
216                               SEEK_SET))
217                     return 0;
218                 dl->range->index.current = chk->next;
219                 chk = NULL;
220                 tgt_chk = NULL;
221                 break;
222             }
223         }
224     }
225     int wb2 = 0;
226     /* We've still got data, call recursively */
227     if(dl->write_in_chunk > 0 && wb < length) {
228         wb2 = dl_write_range(dl, at+wb, length-wb);
229         if(wb2 == 0)
230             return 0;
231     }
232     return wb + wb2;
233 }
234 
zck_copy_chunks(zckCtx * src,zckCtx * tgt)235 bool PUBLIC zck_copy_chunks(zckCtx *src, zckCtx *tgt) {
236     VALIDATE_READ_BOOL(src);
237     VALIDATE_READ_BOOL(tgt);
238 
239     zckIndex *tgt_info = &(tgt->index);
240     zckIndex *src_info = &(src->index);
241     zckChunk *tgt_idx = tgt_info->first;
242     while(tgt_idx) {
243         /* No need to copy already valid chunk */
244         if(tgt_idx->valid == 1) {
245             tgt_idx = tgt_idx->next;
246             continue;
247         }
248         zckChunk *f = NULL;
249 
250         HASH_FIND(hh, src_info->ht, tgt_idx->digest, tgt_idx->digest_size, f);
251         if(f && f->length == tgt_idx->length &&
252            f->comp_length == tgt_idx->comp_length)
253             write_and_verify_chunk(src, tgt, f, tgt_idx);
254         tgt_idx = tgt_idx->next;
255     }
256     return true;
257 }
258 
zck_dl_get_bytes_downloaded(zckDL * dl)259 ssize_t PUBLIC zck_dl_get_bytes_downloaded(zckDL *dl) {
260     ALLOCD_INT(NULL, dl);
261 
262     return dl->dl;
263 }
264 
zck_dl_get_bytes_uploaded(zckDL * dl)265 ssize_t PUBLIC zck_dl_get_bytes_uploaded(zckDL *dl) {
266     ALLOCD_INT(NULL, dl);
267 
268     return dl->ul;
269 }
270 
271 /* Initialize zckDL.  When finished, zckDL *must* be freed by zck_dl_free() */
zck_dl_init(zckCtx * zck)272 zckDL PUBLIC *zck_dl_init(zckCtx *zck) {
273     zckDL *dl = zmalloc(sizeof(zckDL));
274     dl->mp = zmalloc(sizeof(zckMP));
275     dl->zck = zck;
276     return dl;
277 }
278 
279 /* Reset dl while maintaining download statistics and private information */
zck_dl_reset(zckDL * dl)280 void PUBLIC zck_dl_reset(zckDL *dl) {
281     if(!dl)
282         return;
283 
284     reset_mp(dl->mp);
285     dl->dl_chunk_data = 0;
286     clear_dl_regex(dl);
287     if(dl->boundary)
288         free(dl->boundary);
289     dl->boundary = NULL;
290 
291     zckCtx *zck = dl->zck;
292     size_t db = dl->dl;
293     size_t ub = dl->ul;
294     zckMP *mp = dl->mp;
295     memset(dl, 0, sizeof(zckDL));
296     dl->zck = zck;
297     dl->dl = db;
298     dl->ul = ub;
299     dl->mp = mp;
300 }
301 
302 /* Free zckDL and set pointer to NULL */
zck_dl_free(zckDL ** dl)303 void PUBLIC zck_dl_free(zckDL **dl) {
304     zck_dl_reset(*dl);
305     if((*dl)->mp)
306         free((*dl)->mp);
307     free(*dl);
308     *dl = NULL;
309 }
310 
zck_dl_get_zck(zckDL * dl)311 zckCtx PUBLIC *zck_dl_get_zck(zckDL *dl) {
312     ALLOCD_PTR(NULL, dl);
313 
314     return dl->zck;
315 }
316 
zck_dl_set_zck(zckDL * dl,zckCtx * zck)317 bool PUBLIC zck_dl_set_zck(zckDL *dl, zckCtx *zck) {
318     ALLOCD_BOOL(NULL, dl);
319 
320     dl->zck = zck;
321     return true;
322 }
zck_dl_set_range(zckDL * dl,zckRange * range)323 bool PUBLIC zck_dl_set_range(zckDL *dl, zckRange *range) {
324     ALLOCD_BOOL(NULL, dl);
325 
326     dl->range = range;
327     return true;
328 }
329 
zck_dl_get_range(zckDL * dl)330 zckRange PUBLIC *zck_dl_get_range(zckDL *dl) {
331     ALLOCD_PTR(NULL, dl);
332 
333     return dl->range;
334 }
335 
zck_dl_set_header_cb(zckDL * dl,zck_wcb func)336 bool PUBLIC zck_dl_set_header_cb(zckDL *dl, zck_wcb func) {
337     ALLOCD_BOOL(NULL, dl);
338 
339     dl->header_cb = func;
340     return true;
341 }
342 
zck_dl_set_header_data(zckDL * dl,void * data)343 bool PUBLIC zck_dl_set_header_data(zckDL *dl, void *data) {
344     ALLOCD_BOOL(NULL, dl);
345 
346     dl->header_data = data;
347     return true;
348 }
349 
zck_dl_set_write_cb(zckDL * dl,zck_wcb func)350 bool PUBLIC zck_dl_set_write_cb(zckDL *dl, zck_wcb func) {
351     ALLOCD_BOOL(NULL, dl);
352 
353     dl->write_cb = func;
354     return true;
355 }
356 
zck_dl_set_write_data(zckDL * dl,void * data)357 bool PUBLIC zck_dl_set_write_data(zckDL *dl, void *data) {
358     ALLOCD_BOOL(NULL, dl);
359 
360     dl->write_data = data;
361     return true;
362 }
363 
364 /*******************************************************************
365  * Callbacks
366  *******************************************************************/
367 
zck_header_cb(char * b,size_t l,size_t c,void * dl_v)368 size_t PUBLIC zck_header_cb(char *b, size_t l, size_t c, void *dl_v) {
369     ALLOCD_BOOL(NULL, dl_v);
370     zckDL *dl = (zckDL*)dl_v;
371 
372     if(multipart_get_boundary(dl, b, c*l) == 0)
373         zck_log(ZCK_LOG_DEBUG, "No boundary detected");
374 
375     if(dl->header_cb)
376         return dl->header_cb(b, l, c, dl->header_data);
377     return c*l;
378 }
379 
zck_write_zck_header_cb(void * ptr,size_t l,size_t c,void * dl_v)380 size_t PUBLIC zck_write_zck_header_cb(void *ptr, size_t l, size_t c,
381                                       void *dl_v) {
382     ALLOCD_BOOL(NULL, dl_v);
383     zckDL *dl = (zckDL*)dl_v;
384 
385     size_t wb = 0;
386     dl->dl += l*c;
387     size_t loc = tell_data(dl->zck);
388     zck_log(ZCK_LOG_DEBUG, "Downloading %lu bytes to position %lu", l*c, loc);
389     wb = write(dl->zck->fd, ptr, l*c);
390     if(dl->write_cb)
391         return dl->write_cb(ptr, l, c, dl->write_data);
392     return wb;
393 }
394 
zck_write_chunk_cb(void * ptr,size_t l,size_t c,void * dl_v)395 size_t PUBLIC zck_write_chunk_cb(void *ptr, size_t l, size_t c, void *dl_v) {
396     ALLOCD_BOOL(NULL, dl_v);
397     zckDL *dl = (zckDL*)dl_v;
398 
399     size_t wb = 0;
400     dl->dl += l*c;
401     if(dl->boundary != NULL) {
402         int retval = multipart_extract(dl, ptr, l*c);
403         if(retval == 0)
404             wb = 0;
405         else
406             wb = l*c;
407     } else {
408         int retval = dl_write_range(dl, ptr, l*c);
409         if(retval == 0)
410             wb = 0;
411         else
412             wb = l*c;
413     }
414     if(dl->write_cb)
415         return dl->write_cb(ptr, l, c, dl->write_data);
416     return wb;
417 }
418