1 /*
2 * Routines to provide reference counting
3 *
4 * Copyright (C) 2013 Craig Barratt.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, visit the http://fsf.org website.
18 */
19
20 #include "backuppc.h"
21
22 /*
23 * magic number that appears at the start of the reference count (or delta count file)
24 */
25 #define BPC_POOL_REF_MAGIC (0x178e553c)
26
27 #define CONV_BUF_TO_UINT32(buf) ((buf)[0] << 24 | (buf)[1] << 16 | (buf)[2] << 8 | (buf)[3])
28
29 #define CONV_UINT32_TO_BUF(buf, val) { *(buf)++ = ((val) >> 24) & 0xff; \
30 *(buf)++ = ((val) >> 16) & 0xff; \
31 *(buf)++ = ((val) >> 8) & 0xff; \
32 *(buf)++ = ((val) >> 0) & 0xff; }
33
34 typedef struct {
35 bpc_hashtable_key key;
36 int32 count;
37 bpc_digest digest;
38 } DigestInfo;
39
40 typedef struct {
41 int fd;
42 uchar *bufP;
43 int errorCnt;
44 uchar buf[4 * 65536];
45 } write_info;
46
bpc_poolRefInit(bpc_refCount_info * info,int entryCnt)47 void bpc_poolRefInit(bpc_refCount_info *info, int entryCnt)
48 {
49 bpc_hashtable_create(&info->ht, entryCnt, sizeof(DigestInfo));
50 }
51
bpc_poolRefDestroy(bpc_refCount_info * info)52 void bpc_poolRefDestroy(bpc_refCount_info *info)
53 {
54 bpc_hashtable_destroy(&info->ht);
55 }
56
bpc_poolRefSet(bpc_refCount_info * info,bpc_digest * digest,int32 count)57 void bpc_poolRefSet(bpc_refCount_info *info, bpc_digest *digest, int32 count)
58 {
59 DigestInfo *d = bpc_hashtable_find(&info->ht, digest->digest, digest->len, 1);
60 if ( d->key.key == digest ) {
61 /*
62 * new entry - copy in digest
63 */
64 d->digest = *digest;
65 d->key.key = d->digest.digest;
66 }
67 d->count = count;
68 return;
69 }
70
bpc_poolRefGet(bpc_refCount_info * info,bpc_digest * digest,int32 * count)71 int bpc_poolRefGet(bpc_refCount_info *info, bpc_digest *digest, int32 *count)
72 {
73
74 DigestInfo *d = bpc_hashtable_find(&info->ht, digest->digest, digest->len, 0);
75 if ( !d ) return -1;
76 *count = d->count;
77 return 0;
78 }
79
bpc_poolRefDelete(bpc_refCount_info * info,bpc_digest * digest)80 int bpc_poolRefDelete(bpc_refCount_info *info, bpc_digest *digest)
81 {
82 DigestInfo *d = bpc_hashtable_find(&info->ht, digest->digest, digest->len, 0);
83 if ( !d ) return -1;
84 bpc_hashtable_nodeDelete(&info->ht, d);
85 return 0;
86 }
87
bpc_poolRefIncr(bpc_refCount_info * info,bpc_digest * digest,int32 delta)88 int bpc_poolRefIncr(bpc_refCount_info *info, bpc_digest *digest, int32 delta)
89 {
90 DigestInfo *d = bpc_hashtable_find(&info->ht, digest->digest, digest->len, 1);
91 if ( d->key.key == digest ) {
92 /*
93 * new entry - copy in digest
94 */
95 d->digest = *digest;
96 d->key.key = d->digest.digest;
97 }
98 d->count += delta;
99 if ( BPC_LogLevel >= 8 ) {
100 char hexStr[BPC_DIGEST_LEN_MAX * 2 + 1];
101
102 bpc_digest_digest2str(&d->digest, hexStr);
103 bpc_logMsgf("bpc_poolRefIncr(%s, %d), count now %d\n", hexStr, delta, d->count);
104 }
105 return d->count;
106 }
107
bpc_poolRefIterate(bpc_refCount_info * info,bpc_digest * digest,int32 * count,uint * idx)108 int bpc_poolRefIterate(bpc_refCount_info *info, bpc_digest *digest, int32 *count, uint *idx)
109 {
110 DigestInfo *d = bpc_hashtable_nextEntry(&info->ht, idx);
111 if ( !d ) return -1;
112 *digest = d->digest;
113 *count = d->count;
114 return 0;
115 }
116
bpc_poolRefPrintEntry(DigestInfo * info)117 void bpc_poolRefPrintEntry(DigestInfo *info)
118 {
119 char hexStr[BPC_DIGEST_LEN_MAX * 2 + 1];
120
121 bpc_digest_digest2str(&info->digest, hexStr);
122 fprintf(stderr, "%-20s %d\n", hexStr, info->count);
123 }
124
bpc_poolRefCountPrint(bpc_refCount_info * info)125 void bpc_poolRefCountPrint(bpc_refCount_info *info)
126 {
127 bpc_hashtable_iterate(&info->ht, (void*)bpc_poolRefPrintEntry, NULL);
128 }
129
write_file_flush(write_info * out)130 static void write_file_flush(write_info *out)
131 {
132 uchar *p = out->buf;
133 while ( p < out->bufP ) {
134 int nWrite = write(out->fd, p, out->bufP - p);
135 if ( nWrite < 0 ) {
136 if ( errno == EINTR ) continue;
137 out->errorCnt++;
138 return;
139 }
140 p += nWrite;
141 }
142 out->bufP = out->buf;
143 }
144
bpc_poolRef_read_more_data(int fd,uchar * buf,size_t bufSize,size_t * nRead,uchar ** bufPP,char * fileName)145 static int bpc_poolRef_read_more_data(int fd, uchar *buf, size_t bufSize, size_t *nRead, uchar **bufPP, char *fileName)
146 {
147 int thisRead;
148
149 /*
150 * move the remaining part of the buffer down, and read more data
151 */
152 *nRead = (buf + *nRead) - *bufPP;
153 if ( *nRead > 0 ) memmove(buf, *bufPP, *nRead);
154 *bufPP = buf;
155 do {
156 do {
157 thisRead = read(fd, buf + *nRead, bufSize - *nRead);
158 } while ( thisRead < 0 && errno == EINTR );
159 if ( thisRead < 0 ) {
160 bpc_logErrf("bpc_poolRefFileRead: can't read more bytes from %s (errno %d)\n", fileName, errno);
161 return -1;
162 }
163 if ( BPC_LogLevel >= 8 ) bpc_logMsgf("bpc_poolRef_read_more_data: read %d bytes (nRead = %d, sizeof(buf) = %d)\n", thisRead, *nRead, bufSize);
164 *nRead += thisRead;
165 } while ( thisRead > 0 && *nRead < sizeof(buf) );
166 return 0;
167 }
168
169 /*
170 * Read variable-length unsigned integer in 7 bit chunks, LSB first.
171 *
172 * To handle signed numbers, the very first LSB is a sign bit, meaning the first byte
173 * stores just 6 bits.
174 */
getVarInt(uchar ** bufPP,uchar * bufLast)175 static int64 getVarInt(uchar **bufPP, uchar *bufLast)
176 {
177 int64 result = 0;
178 uchar *bufP = *bufPP, c = '\0';
179 int i = 6, negative = 0;
180
181 if ( bufP < bufLast ) {
182 c = *bufP++;
183 negative = c & 0x1;
184 result = (c & 0x7e) >> 1;
185 }
186 while ( bufP < bufLast && (c & 0x80) ) {
187 c = *bufP++;
188 result |= (c & 0x7f) << i;
189 i += 7;
190 }
191 *bufPP = bufP;
192 if ( negative ) result = -result;
193 return result;
194 }
195
196 /*
197 * Write variable-length unsigned integer in 7 bit chunks, LSB first.
198 *
199 * To handle signed numbers, the very first LSB is a sign bit, meaning the first byte
200 * stores just 6 bits.
201 */
setVarInt(uchar ** bufPP,uchar * bufLast,int64 value)202 static void setVarInt(uchar **bufPP, uchar *bufLast, int64 value)
203 {
204 uchar *bufP = *bufPP;
205 int negative = 0;
206
207 if ( value < 0 ) {
208 value = -value;
209 negative = 1;
210 }
211 if ( bufP < bufLast ) {
212 uchar c = ((value & 0x3f) << 1) | negative;
213 value >>= 6;
214 if ( value ) c |= 0x80;
215 *bufP++ = c;
216 }
217 while ( value && bufP < bufLast ) {
218 uchar c = value & 0x7f;
219 value >>= 7;
220 if ( value ) c |= 0x80;
221 *bufP++ = c;
222 }
223 *bufPP = bufP;
224 }
225
bpc_poolRefFileWriteEntry(DigestInfo * info,write_info * out)226 static void bpc_poolRefFileWriteEntry(DigestInfo *info, write_info *out)
227 {
228 if ( out->bufP > out->buf + sizeof(out->buf) - BPC_DIGEST_LEN_MAX - 16 ) write_file_flush(out);
229 *out->bufP++ = (uchar)info->digest.len;
230 memcpy(out->bufP, info->digest.digest, info->digest.len);
231 out->bufP += info->digest.len;
232 setVarInt(&out->bufP, out->buf + sizeof(out->buf), info->count);
233 }
234
235 /*
236 * Write a pool reference file from the hash table.
237 */
bpc_poolRefFileWrite(bpc_refCount_info * info,char * fileName)238 int bpc_poolRefFileWrite(bpc_refCount_info *info, char *fileName)
239 {
240 write_info out;
241
242 out.errorCnt = 0;
243 out.bufP = out.buf;
244 out.fd = open(fileName, O_WRONLY | O_CREAT | O_TRUNC, 0666);
245 if ( out.fd < 0 ) {
246 /*
247 * Maybe the directory doesn't exist - try to create it and try again
248 */
249 char dir[BPC_MAXPATHLEN], *p;
250
251 snprintf(dir, sizeof(dir), "%s", fileName);
252 if ( (p = strrchr(dir, '/')) ) {
253 *p = '\0';
254 bpc_path_create(dir);
255 out.fd = open(fileName, O_WRONLY | O_CREAT | O_TRUNC, 0666);
256 }
257 if ( out.fd < 0 ) {
258 bpc_logErrf("bpc_poolRefFileWrite: can't open/create pool delta file name %s (errno %d)\n", fileName, errno);
259 out.errorCnt++;
260 return out.errorCnt;
261 }
262 }
263
264 /*
265 * start with the magic number, then the total number of entries
266 */
267 CONV_UINT32_TO_BUF(out.bufP, BPC_POOL_REF_MAGIC);
268 setVarInt(&out.bufP, out.buf + sizeof(out.buf), bpc_hashtable_entryCount(&info->ht));
269
270 /*
271 * now write all the digests and counts
272 */
273 bpc_hashtable_iterate(&info->ht, (void*)bpc_poolRefFileWriteEntry, &out);
274
275 if ( out.bufP > out.buf ) write_file_flush(&out);
276 if ( close(out.fd) < 0 ) {
277 bpc_logErrf("bpc_poolRefFileWrite: pool delta close failed to %s (errno %d)\n", fileName, errno);
278 out.errorCnt++;
279 }
280 return out.errorCnt;
281 }
282
283 /*
284 * Read a pool reference file into the hash table, which should be already initialized.
285 */
bpc_poolRefFileRead(bpc_refCount_info * info,char * fileName)286 int bpc_poolRefFileRead(bpc_refCount_info *info, char *fileName)
287 {
288 int fd = open(fileName, O_RDONLY);
289 uint32 entryCnt, i;
290 bpc_digest digest;
291 int64 count;
292 size_t nRead = 0;
293 uint32 magic;
294 uchar buf[8 * 65536];
295 uchar *bufP = buf;
296
297 if ( fd < 0 ) {
298 bpc_logErrf("bpc_poolRefFileRead: can't open %s (errno %d)\n", fileName, errno);
299 return -1;
300 }
301 if ( bpc_poolRef_read_more_data(fd, buf, sizeof(buf), &nRead, &bufP, fileName) < 0 ) {
302 bpc_logErrf("bpc_poolRefFileRead: can't read data from %s (errno %d)\n", fileName, errno);
303 return -1;
304 }
305 magic = CONV_BUF_TO_UINT32(bufP);
306 bufP += 4;
307
308 if ( magic != BPC_POOL_REF_MAGIC ) {
309 bpc_logErrf("bpc_poolRefFileRead: bad magic number 0x%x (expected 0x%x)\n", magic, BPC_POOL_REF_MAGIC);
310 return -1;
311 }
312
313 entryCnt = getVarInt(&bufP, buf + nRead);
314 if ( BPC_LogLevel >= 4 ) bpc_logMsgf("bpc_poolRefFileRead: got %d entries (nRead = %d)\n", entryCnt, nRead);
315 /*
316 * make sure the hash table is big enough in one go to avoid multiple doublings
317 */
318 bpc_hashtable_growSize(&info->ht, entryCnt * 4 / 3);
319
320 for ( i = 0 ; i < entryCnt ; i++ ) {
321 DigestInfo *digestInfo;
322
323 if ( nRead == sizeof(buf) && bufP > buf + nRead - 64
324 && bpc_poolRef_read_more_data(fd, buf, sizeof(buf), &nRead, &bufP, fileName) < 0 ) {
325 bpc_logErrf("bpc_poolRefFileRead: can't read more data from %s (errno %d)\n", fileName, errno);
326 return -1;
327 }
328 digest.len = *bufP++;
329 if ( digest.len > (int)sizeof(digest.digest) ) digest.len = sizeof(digest.digest);
330 memcpy(digest.digest, bufP, digest.len);
331 bufP += digest.len;
332 count = getVarInt(&bufP, buf + nRead);
333
334 if ( BPC_LogLevel >= 7 ) bpc_logMsgf("bpc_poolRefFileRead: entry %d: digest len = %d, digest = 0x%02x%02x%02x...., count = %d\n",
335 i, digest.len, digest.digest[0], digest.digest[1], digest.digest[2], count);
336
337 digestInfo = bpc_hashtable_find(&info->ht, digest.digest, digest.len, 1);
338
339 if ( digestInfo->key.key == digest.digest ) {
340 /*
341 * new entry since the key points to our key - copy info into new node and set key locally
342 */
343 digestInfo->digest = digest;
344 digestInfo->key.key = digestInfo->digest.digest;
345 }
346 digestInfo->count = count;
347 }
348
349 close(fd);
350
351 return 0;
352 }
353
354 /*
355 * Mark this host backup as needing an fsck. Multiple requests can be supported with
356 * unique numbers. ext == 0 is used for the overall backup process, and it is removed when
357 * the backup finished. Various errors can use other extensions. If any files are
358 * present, an fsck is done either by the next backup, BackupPC_refCountUpdate or
359 * BackupPC_fsck.
360 */
bpc_poolRefRequestFsck(char * backupDir,int ext)361 void bpc_poolRefRequestFsck(char *backupDir, int ext)
362 {
363 char fileName[BPC_MAXPATHLEN];
364 int fd;
365
366 snprintf(fileName, sizeof(fileName), "%s/refCnt/needFsck%d", backupDir, ext);
367 if ( (fd = open(fileName, O_CREAT | O_WRONLY, 0660)) < 0 ) {
368 bpc_logErrf("bpc_poolRefRequestFsck: can't open/create fsck request file %s (errno %d)\n", fileName, errno);
369 }
370 }
371
372 /***********************************************************************
373 * Reference count deltas - we maintain two hash tables for uncompressed
374 * and compressed deltas.
375 ***********************************************************************/
376
377 /*
378 * Legacy support for <= 4.0.0beta3.
379 */
380 static bpc_deltaCount_info DeltaInfoOld;
381
382 static int OutputFileCnt = 0;
383
bpc_poolRefDeltaFileInit(bpc_deltaCount_info * info,char * hostDir)384 void bpc_poolRefDeltaFileInit(bpc_deltaCount_info *info, char *hostDir)
385 {
386 if ( snprintf(info->targetDir, sizeof(info->targetDir), "%s", hostDir)
387 >= (int)sizeof(info->targetDir) - 1 ) {
388 bpc_logErrf("bpc_poolRefDeltaFileInit: targetDir %s truncated\n", hostDir);
389 }
390 bpc_poolRefInit(&info->refCnt[0], 256);
391 bpc_poolRefInit(&info->refCnt[1], 1 << 20);
392 info->refCnt[0].initDone = info->refCnt[1].initDone = 1;
393 }
394
bpc_poolRefDeltaFileDestroy(bpc_deltaCount_info * info)395 void bpc_poolRefDeltaFileDestroy(bpc_deltaCount_info *info)
396 {
397 bpc_poolRefDestroy(&info->refCnt[0]);
398 bpc_poolRefDestroy(&info->refCnt[1]);
399 }
400
bpc_poolRefDeltaFileFlush(bpc_deltaCount_info * info)401 uint32 bpc_poolRefDeltaFileFlush(bpc_deltaCount_info *info)
402 {
403 char tempFileName[BPC_MAXPATHLEN], finalFileName[BPC_MAXPATHLEN];
404 int compress;
405 int errorCnt = 0;
406 int fd;
407
408 if ( !info ) info = &DeltaInfoOld; /* backward compatibility */
409 if ( !info->refCnt[0].initDone ) return 1;
410 for ( compress = 0 ; compress < 2 ; compress++ ) {
411 uint entryCnt = bpc_hashtable_entryCount(&info->refCnt[compress].ht);
412
413 if ( entryCnt == 0 ) continue;
414
415 do {
416 if ( snprintf(tempFileName, sizeof(tempFileName), "%s/refCnt/tpoolCntDelta_%d_%d_%d_%d",
417 info->targetDir, compress, BPC_TmpFileUnique, OutputFileCnt, getpid()) >= (int)sizeof(tempFileName) - 1 ) {
418 bpc_logErrf("bpc_poolRefDeltaFileFlush: pool delta file name %s truncated\n", tempFileName);
419 errorCnt++;
420 }
421 if ( (fd = open(tempFileName, O_RDONLY, 0666)) >= 0 ) {
422 close(fd);
423 OutputFileCnt++;
424 }
425 } while ( fd >= 0 );
426
427 errorCnt += bpc_poolRefFileWrite(&info->refCnt[compress], tempFileName);
428
429 if ( snprintf(finalFileName, sizeof(finalFileName), "%s/refCnt/poolCntDelta_%d_%d_%d_%d",
430 info->targetDir, compress, BPC_TmpFileUnique >= 0 ? BPC_TmpFileUnique : 0,
431 OutputFileCnt, getpid()) >= (int)sizeof(finalFileName) - 1 ) {
432 bpc_logErrf("bpc_poolRefDeltaFileFlush: pool delta file name %s truncated\n", finalFileName);
433 errorCnt++;
434 }
435 if ( errorCnt ) {
436 unlink(tempFileName);
437 continue;
438 }
439 if ( rename(tempFileName, finalFileName) != 0 ) {
440 bpc_logErrf("bpc_poolRefDeltaFileFlush: can't rename %s to %s (errno %d)\n", tempFileName, finalFileName, errno);
441 unlink(tempFileName);
442 errorCnt++;
443 }
444 if ( !errorCnt ) {
445 bpc_hashtable_erase(&info->refCnt[compress].ht);
446 }
447 }
448 OutputFileCnt++;
449 if ( errorCnt ) {
450 /*
451 * Need to fsck this particular backup on this host
452 */
453 bpc_poolRefRequestFsck(info->targetDir, getpid());
454 }
455 return errorCnt;
456 }
457
bpc_poolRefDeltaUpdate(bpc_deltaCount_info * info,int compress,bpc_digest * digest,int32 count)458 void bpc_poolRefDeltaUpdate(bpc_deltaCount_info *info, int compress, bpc_digest *digest, int32 count)
459 {
460 DigestInfo *digestInfo;
461
462 if ( !info ) info = &DeltaInfoOld; /* backward compatibility */
463 if ( !digest || digest->len == 0 ) return;
464 if ( !info->refCnt[0].initDone ) return;
465
466 digestInfo = bpc_hashtable_find(&info->refCnt[compress ? 1 : 0].ht, digest->digest, digest->len, 1);
467 if ( digestInfo->key.key == digest->digest ) {
468 /*
469 * new entry since the key points to our key - copy info into new node and set key locally
470 */
471 digestInfo->digest = *digest;
472 digestInfo->key.key = digestInfo->digest.digest;
473 }
474 digestInfo->count += count;
475 if ( BPC_LogLevel >= 8 ) {
476 char hexStr[BPC_DIGEST_LEN_MAX * 2 + 1];
477
478 bpc_digest_digest2str(&digestInfo->digest, hexStr);
479 bpc_logMsgf("bpc_poolRefDeltaUpdate(%s, %d), count now %d\n", hexStr, count, digestInfo->count);
480 }
481 if ( bpc_hashtable_entryCount(&info->refCnt[compress ? 1 : 0].ht) > (1 << 20) ) {
482 bpc_poolRefDeltaFileFlush(info);
483 }
484 }
485
bpc_poolRefDeltaPrint(bpc_deltaCount_info * info)486 void bpc_poolRefDeltaPrint(bpc_deltaCount_info *info)
487 {
488 if ( !info ) info = &DeltaInfoOld; /* backward compatibility */
489 if ( !info->refCnt[0].initDone ) return;
490 fprintf(stderr, "Uncompressed HT:\n");
491 bpc_hashtable_iterate(&info->refCnt[0].ht, (void*)bpc_poolRefPrintEntry, NULL);
492 fprintf(stderr, "Compressed HT:\n");
493 bpc_hashtable_iterate(&info->refCnt[1].ht, (void*)bpc_poolRefPrintEntry, NULL);
494 }
495
496 /*
497 * Legacy support for <= 4.0.0beta3.
498 */
bpc_poolRefDeltaFileInitOld(char * hostDir)499 void bpc_poolRefDeltaFileInitOld(char *hostDir)
500 {
501 bpc_poolRefDeltaFileInit(&DeltaInfoOld, hostDir);
502 }
503
bpc_poolRefDeltaFileFlushOld(void)504 uint32 bpc_poolRefDeltaFileFlushOld(void)
505 {
506 return bpc_poolRefDeltaFileFlush(&DeltaInfoOld);
507 }
508
509 /*
510 * Increment/decrement the reference count for the given digest
511 */
bpc_poolRefDeltaUpdateOld(int compress,bpc_digest * digest,int32 count)512 void bpc_poolRefDeltaUpdateOld(int compress, bpc_digest *digest, int32 count)
513 {
514 bpc_poolRefDeltaUpdate(&DeltaInfoOld, compress, digest, count);
515 }
516
bpc_poolRefDeltaPrintOld(void)517 void bpc_poolRefDeltaPrintOld(void)
518 {
519 bpc_poolRefDeltaPrint(&DeltaInfoOld);
520 }
521