1 /* $Id: blocks.c,v 1.19 2019/06/02 17:43:34 deraadt Exp $ */ 2 /* 3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #include <sys/queue.h> 18 #include <sys/stat.h> 19 20 #include <assert.h> 21 #include <endian.h> 22 #include <errno.h> 23 #include <inttypes.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <unistd.h> 28 29 #include <openssl/md4.h> 30 31 #include "extern.h" 32 33 struct blkhash { 34 const struct blk *blk; 35 TAILQ_ENTRY(blkhash) entries; 36 }; 37 38 TAILQ_HEAD(blkhashq, blkhash); 39 40 /* 41 * Table used by the sender for looking up blocks. 42 * The blocks are those sent by the receiver; we're looking up using 43 * hashes computed from our local file. 44 */ 45 struct blktab { 46 struct blkhashq *q; /* entries in the hashtable */ 47 size_t qsz; /* size of the hashtable */ 48 struct blkhash *blks; /* pre-allocated hashtable entries */ 49 }; 50 51 /* 52 * This is the number of buckets in the hashtable. 53 * Use the same that GPL rsync uses. 54 * (It should be dynamic?) 55 */ 56 #define BLKTAB_SZ 65536 57 58 /* 59 * Initialise an empty hashtable with BLKTAB_SZ entries in it. 60 * Populate it for each file with blkhash_set. 61 * When we've processed all files, call blkhash_free. 62 * Returns NULL on allocation failure. 63 */ 64 struct blktab * 65 blkhash_alloc(void) 66 { 67 struct blktab *p; 68 69 if ((p = calloc(1, sizeof(struct blktab))) == NULL) { 70 ERR("calloc"); 71 return NULL; 72 } 73 p->qsz = BLKTAB_SZ; 74 p->q = calloc(p->qsz, sizeof(struct blkhashq)); 75 if (p->q == NULL) { 76 ERR("calloc"); 77 free(p); 78 return NULL; 79 } 80 return p; 81 } 82 83 /* 84 * Populate the hashtable with an incoming file's blocks. 85 * This will clear out any existing hashed data. 86 * Returns zero on allocation failure, non-zero otherwise. 87 */ 88 int 89 blkhash_set(struct blktab *p, const struct blkset *bset) 90 { 91 size_t i, idx; 92 93 if (bset == NULL) 94 return 1; 95 96 /* Wipe clean the table. */ 97 98 for (i = 0; i < p->qsz; i++) 99 TAILQ_INIT(&p->q[i]); 100 101 /* Fill in the hashtable. */ 102 103 p->blks = reallocarray(p->blks, bset->blksz, sizeof(struct blkhash)); 104 if (p->blks == NULL) { 105 ERR("reallocarray"); 106 return 0; 107 } 108 for (i = 0; i < bset->blksz; i++) { 109 p->blks[i].blk = &bset->blks[i]; 110 idx = bset->blks[i].chksum_short % p->qsz; 111 assert(idx < p->qsz); 112 TAILQ_INSERT_TAIL(&p->q[idx], &p->blks[i], entries); 113 } 114 115 return 1; 116 } 117 118 /* 119 * Free as allocated with blkhash_alloc(). 120 */ 121 void 122 blkhash_free(struct blktab *p) 123 { 124 125 free(p->blks); 126 free(p); 127 } 128 129 /* 130 * From our current position of "offs" in buffer "buf" of total size 131 * "size", see if we can find a matching block in our list of blocks. 132 * The "hint" refers to the block that *might* work. 133 * Returns the blk or NULL if no matching block was found. 134 */ 135 static const struct blk * 136 blk_find(struct sess *sess, struct blkstat *st, 137 const struct blkset *blks, const char *path, int recomp) 138 { 139 unsigned char md[MD4_DIGEST_LENGTH]; 140 uint32_t fhash; 141 off_t remain, osz; 142 int have_md = 0; 143 char *map; 144 const struct blkhashq *q; 145 const struct blkhash *ent; 146 147 remain = st->mapsz - st->offs; 148 assert(remain); 149 osz = MINIMUM(remain, (off_t)blks->len); 150 151 /* 152 * First, compute our fast hash the hard way (if we're 153 * reentering this function from a previous block match, or the 154 * first time) or from our existing s1 and s2 values. 155 */ 156 157 if (!recomp) { 158 fhash = (st->s1 & 0xFFFF) | (st->s2 << 16); 159 } else { 160 fhash = hash_fast(st->map + st->offs, (size_t)osz); 161 st->s1 = fhash & 0xFFFF; 162 st->s2 = fhash >> 16; 163 } 164 165 /* 166 * Start with our match hint. 167 * This just runs the fast and slow check with the hint. 168 */ 169 170 if (st->hint < blks->blksz && 171 fhash == blks->blks[st->hint].chksum_short && 172 (size_t)osz == blks->blks[st->hint].len) { 173 hash_slow(st->map + st->offs, (size_t)osz, md, sess); 174 have_md = 1; 175 if (memcmp(md, blks->blks[st->hint].chksum_long, blks->csum) == 0) { 176 LOG4("%s: found matching hinted match: " 177 "position %jd, block %zu (position %jd, size %zu)", 178 path, 179 (intmax_t)st->offs, blks->blks[st->hint].idx, 180 (intmax_t)blks->blks[st->hint].offs, 181 blks->blks[st->hint].len); 182 return &blks->blks[st->hint]; 183 } 184 } 185 186 /* 187 * Look for the fast hash modulus in our hashtable, filter for 188 * those matching the full hash and length, then move to the 189 * slow hash. 190 * The slow hash is computed only once. 191 */ 192 193 q = &st->blktab->q[fhash % st->blktab->qsz]; 194 195 TAILQ_FOREACH(ent, q, entries) { 196 if (fhash != ent->blk->chksum_short || 197 (size_t)osz != ent->blk->len) 198 continue; 199 200 LOG4("%s: found matching fast match: " 201 "position %jd, block %zu (position %jd, size %zu)", 202 path, (intmax_t)st->offs, ent->blk->idx, 203 (intmax_t)ent->blk->offs, ent->blk->len); 204 205 if (have_md == 0) { 206 hash_slow(st->map + st->offs, (size_t)osz, md, sess); 207 have_md = 1; 208 } 209 210 if (memcmp(md, ent->blk->chksum_long, blks->csum)) 211 continue; 212 213 LOG4("%s: sender verifies slow match", path); 214 return ent->blk; 215 } 216 217 /* 218 * Adjust our partial sums for the hashing. 219 * We first remove the first byte from the sum. 220 * We then, if we have space, add the first byte of the next 221 * block in the sequence. 222 */ 223 224 map = st->map + st->offs; 225 st->s1 -= map[0]; 226 st->s2 -= osz * map[0]; 227 228 if (osz < remain) { 229 st->s1 += map[osz]; 230 st->s2 += st->s1; 231 } 232 233 return NULL; 234 } 235 236 /* 237 * Given a local file "path" and the blocks created by a remote machine, 238 * find out which blocks of our file they don't have and send them. 239 * This function is reentrant: it must be called while there's still 240 * data to send. 241 */ 242 void 243 blk_match(struct sess *sess, const struct blkset *blks, 244 const char *path, struct blkstat *st) 245 { 246 off_t last, end, sz; 247 int32_t tok; 248 size_t i; 249 const struct blk *blk; 250 251 /* 252 * If the file's empty or we don't have any blocks from the 253 * sender, then simply send the whole file. 254 * Otherwise, run the hash matching routine and send raw chunks 255 * and subsequent matching tokens. 256 */ 257 258 if (st->mapsz && blks->blksz) { 259 /* 260 * Stop searching at the length of the file minus the 261 * size of the last block. 262 * The reason for this being that we don't need to do an 263 * incremental hash within the last block---if it 264 * doesn't match, it doesn't match. 265 */ 266 267 end = st->mapsz + 1 - blks->blks[blks->blksz - 1].len; 268 last = st->offs; 269 270 for (i = 0; st->offs < end; st->offs++, i++) { 271 blk = blk_find(sess, st, blks, path, i == 0); 272 if (blk == NULL) 273 continue; 274 275 sz = st->offs - last; 276 st->dirty += sz; 277 st->total += sz; 278 LOG4("%s: flushing %jd B before %zu B block %zu", 279 path, (intmax_t)sz, 280 blk->len, blk->idx); 281 tok = -(blk->idx + 1); 282 283 /* 284 * Write the data we have, then follow it with 285 * the tag of the block that matches. 286 */ 287 288 st->curpos = last; 289 st->curlen = st->curpos + sz; 290 st->curtok = tok; 291 assert(st->curtok != 0); 292 st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK; 293 st->total += blk->len; 294 st->offs += blk->len; 295 st->hint = blk->idx + 1; 296 return; 297 } 298 299 /* Emit remaining data and send terminator token. */ 300 301 sz = st->mapsz - last; 302 LOG4("%s: flushing remaining %jd B", 303 path, (intmax_t)sz); 304 305 st->total += sz; 306 st->dirty += sz; 307 st->curpos = last; 308 st->curlen = st->curpos + sz; 309 st->curtok = 0; 310 st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK; 311 } else { 312 st->curpos = 0; 313 st->curlen = st->mapsz; 314 st->curtok = 0; 315 st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK; 316 st->dirty = st->total = st->mapsz; 317 318 LOG4("%s: flushing whole file %zu B", 319 path, st->mapsz); 320 } 321 } 322 323 /* 324 * Buffer the message from sender to the receiver indicating that the 325 * block set has been received. 326 * Symmetrises blk_send_ack(). 327 */ 328 void 329 blk_recv_ack(char buf[20], const struct blkset *blocks, int32_t idx) 330 { 331 size_t pos = 0, sz; 332 333 sz = sizeof(int32_t) + /* index */ 334 sizeof(int32_t) + /* block count */ 335 sizeof(int32_t) + /* block length */ 336 sizeof(int32_t) + /* checksum length */ 337 sizeof(int32_t); /* block remainder */ 338 assert(sz == 20); 339 340 io_buffer_int(buf, &pos, sz, idx); 341 io_buffer_int(buf, &pos, sz, blocks->blksz); 342 io_buffer_int(buf, &pos, sz, blocks->len); 343 io_buffer_int(buf, &pos, sz, blocks->csum); 344 io_buffer_int(buf, &pos, sz, blocks->rem); 345 assert(pos == sz); 346 } 347 348 /* 349 * Read all of the checksums for a file's blocks. 350 * Returns the set of blocks or NULL on failure. 351 */ 352 struct blkset * 353 blk_recv(struct sess *sess, int fd, const char *path) 354 { 355 struct blkset *s; 356 int32_t i; 357 size_t j; 358 struct blk *b; 359 off_t offs = 0; 360 361 if ((s = calloc(1, sizeof(struct blkset))) == NULL) { 362 ERR("calloc"); 363 return NULL; 364 } 365 366 /* 367 * The block prologue consists of a few values that we'll need 368 * in reading the individual blocks for this file. 369 * FIXME: read into buffer and unbuffer. 370 */ 371 372 if (!io_read_size(sess, fd, &s->blksz)) { 373 ERRX1("io_read_size"); 374 goto out; 375 } else if (!io_read_size(sess, fd, &s->len)) { 376 ERRX1("io_read_size"); 377 goto out; 378 } else if (!io_read_size(sess, fd, &s->csum)) { 379 ERRX1("io_read_int"); 380 goto out; 381 } else if (!io_read_size(sess, fd, &s->rem)) { 382 ERRX1("io_read_int"); 383 goto out; 384 } else if (s->rem && s->rem >= s->len) { 385 ERRX("block remainder is " 386 "greater than block size"); 387 goto out; 388 } 389 390 LOG3("%s: read block prologue: %zu blocks of " 391 "%zu B, %zu B remainder, %zu B checksum", path, 392 s->blksz, s->len, s->rem, s->csum); 393 394 if (s->blksz) { 395 s->blks = calloc(s->blksz, sizeof(struct blk)); 396 if (s->blks == NULL) { 397 ERR("calloc"); 398 goto out; 399 } 400 } 401 402 /* 403 * Read each block individually. 404 * FIXME: read buffer and unbuffer. 405 */ 406 407 for (j = 0; j < s->blksz; j++) { 408 b = &s->blks[j]; 409 if (!io_read_int(sess, fd, &i)) { 410 ERRX1("io_read_int"); 411 goto out; 412 } 413 b->chksum_short = i; 414 415 assert(s->csum <= sizeof(b->chksum_long)); 416 if (!io_read_buf(sess, 417 fd, b->chksum_long, s->csum)) { 418 ERRX1("io_read_buf"); 419 goto out; 420 } 421 422 /* 423 * If we're the last block, then we're assigned the 424 * remainder of the data. 425 */ 426 427 b->offs = offs; 428 b->idx = j; 429 b->len = (j == (s->blksz - 1) && s->rem) ? 430 s->rem : s->len; 431 offs += b->len; 432 433 LOG4("%s: read block %zu, length %zu B", 434 path, b->idx, b->len); 435 } 436 437 s->size = offs; 438 LOG3("%s: read blocks: %zu blocks, %jd B total blocked data", 439 path, s->blksz, (intmax_t)s->size); 440 return s; 441 out: 442 free(s->blks); 443 free(s); 444 return NULL; 445 } 446 447 /* 448 * Symmetrise blk_recv_ack(), except w/o the leading identifier. 449 * Return zero on failure, non-zero on success. 450 */ 451 int 452 blk_send_ack(struct sess *sess, int fd, struct blkset *p) 453 { 454 char buf[16]; 455 size_t pos = 0, sz; 456 457 /* Put the entire send routine into a buffer. */ 458 459 sz = sizeof(int32_t) + /* block count */ 460 sizeof(int32_t) + /* block length */ 461 sizeof(int32_t) + /* checksum length */ 462 sizeof(int32_t); /* block remainder */ 463 assert(sz <= sizeof(buf)); 464 465 if (!io_read_buf(sess, fd, buf, sz)) { 466 ERRX1("io_read_buf"); 467 return 0; 468 } 469 470 if (!io_unbuffer_size(buf, &pos, sz, &p->blksz)) 471 ERRX1("io_unbuffer_size"); 472 else if (!io_unbuffer_size(buf, &pos, sz, &p->len)) 473 ERRX1("io_unbuffer_size"); 474 else if (!io_unbuffer_size(buf, &pos, sz, &p->csum)) 475 ERRX1("io_unbuffer_size"); 476 else if (!io_unbuffer_size(buf, &pos, sz, &p->rem)) 477 ERRX1("io_unbuffer_size"); 478 else if (p->len && p->rem >= p->len) 479 ERRX1("non-zero length is less than remainder"); 480 else if (p->csum == 0 || p->csum > 16) 481 ERRX1("inappropriate checksum length"); 482 else 483 return 1; 484 485 return 0; 486 } 487 488 /* 489 * Transmit the metadata for set and blocks. 490 * Return zero on failure, non-zero on success. 491 */ 492 int 493 blk_send(struct sess *sess, int fd, size_t idx, 494 const struct blkset *p, const char *path) 495 { 496 char *buf; 497 size_t i, pos = 0, sz; 498 int rc = 0; 499 500 /* Put the entire send routine into a buffer. */ 501 502 sz = sizeof(int32_t) + /* identifier */ 503 sizeof(int32_t) + /* block count */ 504 sizeof(int32_t) + /* block length */ 505 sizeof(int32_t) + /* checksum length */ 506 sizeof(int32_t) + /* block remainder */ 507 p->blksz * 508 (sizeof(int32_t) + /* short checksum */ 509 p->csum); /* long checksum */ 510 511 if ((buf = malloc(sz)) == NULL) { 512 ERR("malloc"); 513 return 0; 514 } 515 516 io_buffer_int(buf, &pos, sz, idx); 517 io_buffer_int(buf, &pos, sz, p->blksz); 518 io_buffer_int(buf, &pos, sz, p->len); 519 io_buffer_int(buf, &pos, sz, p->csum); 520 io_buffer_int(buf, &pos, sz, p->rem); 521 522 for (i = 0; i < p->blksz; i++) { 523 io_buffer_int(buf, &pos, sz, p->blks[i].chksum_short); 524 io_buffer_buf(buf, &pos, sz, p->blks[i].chksum_long, p->csum); 525 } 526 527 assert(pos == sz); 528 529 if (!io_write_buf(sess, fd, buf, sz)) { 530 ERRX1("io_write_buf"); 531 goto out; 532 } 533 534 LOG3("%s: sent block prologue: %zu blocks of %zu B, " 535 "%zu B remainder, %zu B checksum", 536 path, p->blksz, p->len, p->rem, p->csum); 537 rc = 1; 538 out: 539 free(buf); 540 return rc; 541 } 542