1 /*
2  * Routines to provide reference counting
3  *
4  * Copyright (C) 2013 Craig Barratt.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with this program; if not, visit the http://fsf.org website.
18  */
19 
20 #include "backuppc.h"
21 
22 /*
23  * magic number that appears at the start of the reference count (or delta count file)
24  */
25 #define BPC_POOL_REF_MAGIC    (0x178e553c)
26 
27 #define CONV_BUF_TO_UINT32(buf)    ((buf)[0] << 24 | (buf)[1] << 16 | (buf)[2] << 8 | (buf)[3])
28 
29 #define CONV_UINT32_TO_BUF(buf, val)   { *(buf)++ = ((val) >> 24) & 0xff;               \
30                                          *(buf)++ = ((val) >> 16) & 0xff;               \
31                                          *(buf)++ = ((val) >> 8)  & 0xff;               \
32                                          *(buf)++ = ((val) >> 0)  & 0xff; }
33 
34 typedef struct {
35     bpc_hashtable_key key;
36     int32 count;
37     bpc_digest digest;
38 } DigestInfo;
39 
40 typedef struct {
41     int fd;
42     uchar *bufP;
43     int errorCnt;
44     uchar buf[4 * 65536];
45 } write_info;
46 
bpc_poolRefInit(bpc_refCount_info * info,int entryCnt)47 void bpc_poolRefInit(bpc_refCount_info *info, int entryCnt)
48 {
49     bpc_hashtable_create(&info->ht, entryCnt, sizeof(DigestInfo));
50 }
51 
bpc_poolRefDestroy(bpc_refCount_info * info)52 void bpc_poolRefDestroy(bpc_refCount_info *info)
53 {
54     bpc_hashtable_destroy(&info->ht);
55 }
56 
bpc_poolRefSet(bpc_refCount_info * info,bpc_digest * digest,int32 count)57 void bpc_poolRefSet(bpc_refCount_info *info, bpc_digest *digest, int32 count)
58 {
59     DigestInfo *d = bpc_hashtable_find(&info->ht, digest->digest, digest->len, 1);
60     if ( d->key.key == digest ) {
61         /*
62          * new entry - copy in digest
63          */
64         d->digest  = *digest;
65         d->key.key = d->digest.digest;
66     }
67     d->count = count;
68     return;
69 }
70 
bpc_poolRefGet(bpc_refCount_info * info,bpc_digest * digest,int32 * count)71 int bpc_poolRefGet(bpc_refCount_info *info, bpc_digest *digest, int32 *count)
72 {
73 
74     DigestInfo *d = bpc_hashtable_find(&info->ht, digest->digest, digest->len, 0);
75     if ( !d ) return -1;
76     *count = d->count;
77     return 0;
78 }
79 
bpc_poolRefDelete(bpc_refCount_info * info,bpc_digest * digest)80 int bpc_poolRefDelete(bpc_refCount_info *info, bpc_digest *digest)
81 {
82     DigestInfo *d = bpc_hashtable_find(&info->ht, digest->digest, digest->len, 0);
83     if ( !d ) return -1;
84     bpc_hashtable_nodeDelete(&info->ht, d);
85     return 0;
86 }
87 
bpc_poolRefIncr(bpc_refCount_info * info,bpc_digest * digest,int32 delta)88 int bpc_poolRefIncr(bpc_refCount_info *info, bpc_digest *digest, int32 delta)
89 {
90     DigestInfo *d = bpc_hashtable_find(&info->ht, digest->digest, digest->len, 1);
91     if ( d->key.key == digest ) {
92         /*
93          * new entry - copy in digest
94          */
95         d->digest  = *digest;
96         d->key.key = d->digest.digest;
97     }
98     d->count += delta;
99     if ( BPC_LogLevel >= 8 ) {
100         char hexStr[BPC_DIGEST_LEN_MAX * 2 + 1];
101 
102         bpc_digest_digest2str(&d->digest, hexStr);
103         bpc_logMsgf("bpc_poolRefIncr(%s, %d), count now %d\n", hexStr, delta, d->count);
104     }
105     return d->count;
106 }
107 
bpc_poolRefIterate(bpc_refCount_info * info,bpc_digest * digest,int32 * count,uint * idx)108 int bpc_poolRefIterate(bpc_refCount_info *info, bpc_digest *digest, int32 *count, uint *idx)
109 {
110     DigestInfo *d = bpc_hashtable_nextEntry(&info->ht, idx);
111     if ( !d ) return -1;
112     *digest = d->digest;
113     *count  = d->count;
114     return 0;
115 }
116 
bpc_poolRefPrintEntry(DigestInfo * info)117 void bpc_poolRefPrintEntry(DigestInfo *info)
118 {
119     char hexStr[BPC_DIGEST_LEN_MAX * 2 + 1];
120 
121     bpc_digest_digest2str(&info->digest, hexStr);
122     fprintf(stderr, "%-20s %d\n", hexStr, info->count);
123 }
124 
bpc_poolRefCountPrint(bpc_refCount_info * info)125 void bpc_poolRefCountPrint(bpc_refCount_info *info)
126 {
127     bpc_hashtable_iterate(&info->ht, (void*)bpc_poolRefPrintEntry, NULL);
128 }
129 
write_file_flush(write_info * out)130 static void write_file_flush(write_info *out)
131 {
132     uchar *p = out->buf;
133     while ( p < out->bufP ) {
134         int nWrite = write(out->fd, p, out->bufP - p);
135         if ( nWrite < 0 ) {
136             if ( errno == EINTR ) continue;
137             out->errorCnt++;
138             return;
139         }
140         p += nWrite;
141     }
142     out->bufP = out->buf;
143 }
144 
bpc_poolRef_read_more_data(int fd,uchar * buf,size_t bufSize,size_t * nRead,uchar ** bufPP,char * fileName)145 static int bpc_poolRef_read_more_data(int fd, uchar *buf, size_t bufSize, size_t *nRead, uchar **bufPP, char *fileName)
146 {
147     int thisRead;
148 
149     /*
150      * move the remaining part of the buffer down, and read more data
151      */
152     *nRead = (buf + *nRead) - *bufPP;
153     if ( *nRead > 0 ) memmove(buf, *bufPP, *nRead);
154     *bufPP = buf;
155     do {
156         do {
157             thisRead = read(fd, buf + *nRead, bufSize - *nRead);
158         } while ( thisRead < 0 && errno == EINTR );
159         if ( thisRead < 0 ) {
160             bpc_logErrf("bpc_poolRefFileRead: can't read more bytes from %s (errno %d)\n", fileName, errno);
161             return -1;
162         }
163         if ( BPC_LogLevel >= 8 ) bpc_logMsgf("bpc_poolRef_read_more_data: read %d bytes (nRead = %d, sizeof(buf) = %d)\n", thisRead, *nRead, bufSize);
164         *nRead += thisRead;
165     } while ( thisRead > 0 && *nRead < sizeof(buf) );
166     return 0;
167 }
168 
169 /*
170  * Read variable-length unsigned integer in 7 bit chunks, LSB first.
171  *
172  * To handle signed numbers, the very first LSB is a sign bit, meaning the first byte
173  * stores just 6 bits.
174  */
getVarInt(uchar ** bufPP,uchar * bufLast)175 static int64 getVarInt(uchar **bufPP, uchar *bufLast)
176 {
177     int64 result = 0;
178     uchar *bufP = *bufPP, c = '\0';
179     int i = 6, negative = 0;
180 
181     if ( bufP < bufLast ) {
182         c = *bufP++;
183         negative = c & 0x1;
184         result = (c & 0x7e) >> 1;
185     }
186     while ( bufP < bufLast && (c & 0x80) ) {
187         c = *bufP++;
188         result |= (c & 0x7f) << i;
189         i += 7;
190     }
191     *bufPP = bufP;
192     if ( negative ) result = -result;
193     return result;
194 }
195 
196 /*
197  * Write variable-length unsigned integer in 7 bit chunks, LSB first.
198  *
199  * To handle signed numbers, the very first LSB is a sign bit, meaning the first byte
200  * stores just 6 bits.
201  */
setVarInt(uchar ** bufPP,uchar * bufLast,int64 value)202 static void setVarInt(uchar **bufPP, uchar *bufLast, int64 value)
203 {
204     uchar *bufP = *bufPP;
205     int negative = 0;
206 
207     if ( value < 0 ) {
208         value = -value;
209         negative = 1;
210     }
211     if ( bufP < bufLast ) {
212         uchar c = ((value & 0x3f) << 1) | negative;
213         value >>= 6;
214         if ( value ) c |= 0x80;
215         *bufP++ = c;
216     }
217     while ( value && bufP < bufLast ) {
218         uchar c = value & 0x7f;
219         value >>= 7;
220         if ( value ) c |= 0x80;
221         *bufP++ = c;
222     }
223     *bufPP = bufP;
224 }
225 
bpc_poolRefFileWriteEntry(DigestInfo * info,write_info * out)226 static void bpc_poolRefFileWriteEntry(DigestInfo *info, write_info *out)
227 {
228     if ( out->bufP > out->buf + sizeof(out->buf) - BPC_DIGEST_LEN_MAX - 16 ) write_file_flush(out);
229     *out->bufP++ = (uchar)info->digest.len;
230     memcpy(out->bufP, info->digest.digest, info->digest.len);
231     out->bufP += info->digest.len;
232     setVarInt(&out->bufP, out->buf + sizeof(out->buf), info->count);
233 }
234 
235 /*
236  * Write a pool reference file from the hash table.
237  */
bpc_poolRefFileWrite(bpc_refCount_info * info,char * fileName)238 int bpc_poolRefFileWrite(bpc_refCount_info *info, char *fileName)
239 {
240     write_info out;
241 
242     out.errorCnt = 0;
243     out.bufP     = out.buf;
244     out.fd       = open(fileName, O_WRONLY | O_CREAT | O_TRUNC, 0666);
245     if ( out.fd < 0 ) {
246         /*
247          * Maybe the directory doesn't exist - try to create it and try again
248          */
249         char dir[BPC_MAXPATHLEN], *p;
250 
251         snprintf(dir, sizeof(dir), "%s", fileName);
252         if ( (p = strrchr(dir, '/')) ) {
253             *p = '\0';
254             bpc_path_create(dir);
255             out.fd = open(fileName, O_WRONLY | O_CREAT | O_TRUNC, 0666);
256         }
257         if ( out.fd < 0 ) {
258             bpc_logErrf("bpc_poolRefFileWrite: can't open/create pool delta file name %s (errno %d)\n", fileName, errno);
259             out.errorCnt++;
260             return out.errorCnt;
261         }
262     }
263 
264     /*
265      * start with the magic number, then the total number of entries
266      */
267     CONV_UINT32_TO_BUF(out.bufP, BPC_POOL_REF_MAGIC);
268     setVarInt(&out.bufP, out.buf + sizeof(out.buf), bpc_hashtable_entryCount(&info->ht));
269 
270     /*
271      * now write all the digests and counts
272      */
273     bpc_hashtable_iterate(&info->ht, (void*)bpc_poolRefFileWriteEntry, &out);
274 
275     if ( out.bufP > out.buf ) write_file_flush(&out);
276     if ( close(out.fd) < 0 ) {
277         bpc_logErrf("bpc_poolRefFileWrite: pool delta close failed to %s (errno %d)\n", fileName, errno);
278         out.errorCnt++;
279     }
280     return out.errorCnt;
281 }
282 
283 /*
284  * Read a pool reference file into the hash table, which should be already initialized.
285  */
bpc_poolRefFileRead(bpc_refCount_info * info,char * fileName)286 int bpc_poolRefFileRead(bpc_refCount_info *info, char *fileName)
287 {
288     int fd = open(fileName, O_RDONLY);
289     uint32 entryCnt, i;
290     bpc_digest digest;
291     int64 count;
292     size_t nRead = 0;
293     uint32 magic;
294     uchar buf[8 * 65536];
295     uchar *bufP = buf;
296 
297     if ( fd < 0 ) {
298         bpc_logErrf("bpc_poolRefFileRead: can't open %s (errno %d)\n", fileName, errno);
299         return -1;
300     }
301     if ( bpc_poolRef_read_more_data(fd, buf, sizeof(buf), &nRead, &bufP, fileName) < 0 ) {
302         bpc_logErrf("bpc_poolRefFileRead: can't read data from %s (errno %d)\n", fileName, errno);
303         return -1;
304     }
305     magic = CONV_BUF_TO_UINT32(bufP);
306     bufP += 4;
307 
308     if ( magic != BPC_POOL_REF_MAGIC ) {
309         bpc_logErrf("bpc_poolRefFileRead: bad magic number 0x%x (expected 0x%x)\n", magic, BPC_POOL_REF_MAGIC);
310         return -1;
311     }
312 
313     entryCnt = getVarInt(&bufP, buf + nRead);
314     if ( BPC_LogLevel >= 4 ) bpc_logMsgf("bpc_poolRefFileRead: got %d entries (nRead = %d)\n", entryCnt, nRead);
315     /*
316      * make sure the hash table is big enough in one go to avoid multiple doublings
317      */
318     bpc_hashtable_growSize(&info->ht, entryCnt * 4 / 3);
319 
320     for ( i = 0 ; i < entryCnt ; i++ ) {
321         DigestInfo *digestInfo;
322 
323         if ( nRead == sizeof(buf) && bufP > buf + nRead - 64
324                 && bpc_poolRef_read_more_data(fd, buf, sizeof(buf), &nRead, &bufP, fileName) < 0 ) {
325             bpc_logErrf("bpc_poolRefFileRead: can't read more data from %s (errno %d)\n", fileName, errno);
326             return -1;
327         }
328         digest.len = *bufP++;
329         if ( digest.len > (int)sizeof(digest.digest) ) digest.len = sizeof(digest.digest);
330         memcpy(digest.digest, bufP, digest.len);
331         bufP += digest.len;
332         count = getVarInt(&bufP, buf + nRead);
333 
334         if ( BPC_LogLevel >= 7 ) bpc_logMsgf("bpc_poolRefFileRead: entry %d: digest len = %d, digest = 0x%02x%02x%02x...., count = %d\n",
335                                               i, digest.len, digest.digest[0], digest.digest[1], digest.digest[2], count);
336 
337         digestInfo = bpc_hashtable_find(&info->ht, digest.digest, digest.len, 1);
338 
339         if ( digestInfo->key.key == digest.digest ) {
340             /*
341              * new entry since the key points to our key - copy info into new node and set key locally
342              */
343             digestInfo->digest  = digest;
344             digestInfo->key.key = digestInfo->digest.digest;
345         }
346         digestInfo->count = count;
347     }
348 
349     close(fd);
350 
351     return 0;
352 }
353 
354 /*
355  * Mark this host backup as needing an fsck.  Multiple requests can be supported with
356  * unique numbers.  ext == 0 is used for the overall backup process, and it is removed when
357  * the backup finished.  Various errors can use other extensions.  If any files are
358  * present, an fsck is done either by the next backup, BackupPC_refCountUpdate or
359  * BackupPC_fsck.
360  */
bpc_poolRefRequestFsck(char * backupDir,int ext)361 void bpc_poolRefRequestFsck(char *backupDir, int ext)
362 {
363     char fileName[BPC_MAXPATHLEN];
364     int fd;
365 
366     snprintf(fileName, sizeof(fileName), "%s/refCnt/needFsck%d", backupDir, ext);
367     if ( (fd = open(fileName, O_CREAT | O_WRONLY, 0660)) < 0 ) {
368         bpc_logErrf("bpc_poolRefRequestFsck: can't open/create fsck request file %s (errno %d)\n", fileName, errno);
369     }
370 }
371 
372 /***********************************************************************
373  * Reference count deltas - we maintain two hash tables for uncompressed
374  * and compressed deltas.
375  ***********************************************************************/
376 
377 /*
378  * Legacy support for <= 4.0.0beta3.
379  */
380 static bpc_deltaCount_info DeltaInfoOld;
381 
382 static int OutputFileCnt = 0;
383 
bpc_poolRefDeltaFileInit(bpc_deltaCount_info * info,char * hostDir)384 void bpc_poolRefDeltaFileInit(bpc_deltaCount_info *info, char *hostDir)
385 {
386     if ( snprintf(info->targetDir, sizeof(info->targetDir), "%s", hostDir)
387 		>= (int)sizeof(info->targetDir) - 1 ) {
388 	bpc_logErrf("bpc_poolRefDeltaFileInit: targetDir %s truncated\n", hostDir);
389     }
390     bpc_poolRefInit(&info->refCnt[0], 256);
391     bpc_poolRefInit(&info->refCnt[1], 1 << 20);
392     info->refCnt[0].initDone = info->refCnt[1].initDone = 1;
393 }
394 
bpc_poolRefDeltaFileDestroy(bpc_deltaCount_info * info)395 void bpc_poolRefDeltaFileDestroy(bpc_deltaCount_info *info)
396 {
397     bpc_poolRefDestroy(&info->refCnt[0]);
398     bpc_poolRefDestroy(&info->refCnt[1]);
399 }
400 
bpc_poolRefDeltaFileFlush(bpc_deltaCount_info * info)401 uint32 bpc_poolRefDeltaFileFlush(bpc_deltaCount_info *info)
402 {
403     char tempFileName[BPC_MAXPATHLEN], finalFileName[BPC_MAXPATHLEN];
404     int compress;
405     int errorCnt = 0;
406     int fd;
407 
408     if ( !info ) info = &DeltaInfoOld;         /* backward compatibility */
409     if ( !info->refCnt[0].initDone ) return 1;
410     for ( compress = 0 ; compress < 2 ; compress++ ) {
411         uint entryCnt = bpc_hashtable_entryCount(&info->refCnt[compress].ht);
412 
413         if ( entryCnt == 0 ) continue;
414 
415         do {
416             if ( snprintf(tempFileName, sizeof(tempFileName), "%s/refCnt/tpoolCntDelta_%d_%d_%d_%d",
417                           info->targetDir, compress, BPC_TmpFileUnique, OutputFileCnt, getpid()) >= (int)sizeof(tempFileName) - 1 ) {
418                 bpc_logErrf("bpc_poolRefDeltaFileFlush: pool delta file name %s truncated\n", tempFileName);
419                 errorCnt++;
420             }
421             if ( (fd = open(tempFileName, O_RDONLY, 0666)) >= 0 ) {
422                 close(fd);
423                 OutputFileCnt++;
424             }
425         } while ( fd >= 0 );
426 
427         errorCnt += bpc_poolRefFileWrite(&info->refCnt[compress], tempFileName);
428 
429         if ( snprintf(finalFileName, sizeof(finalFileName), "%s/refCnt/poolCntDelta_%d_%d_%d_%d",
430                       info->targetDir, compress, BPC_TmpFileUnique >= 0 ? BPC_TmpFileUnique : 0,
431                       OutputFileCnt, getpid()) >= (int)sizeof(finalFileName) - 1 ) {
432             bpc_logErrf("bpc_poolRefDeltaFileFlush: pool delta file name %s truncated\n", finalFileName);
433             errorCnt++;
434         }
435         if ( errorCnt ) {
436             unlink(tempFileName);
437             continue;
438         }
439         if ( rename(tempFileName, finalFileName) != 0 ) {
440             bpc_logErrf("bpc_poolRefDeltaFileFlush: can't rename %s to %s (errno %d)\n", tempFileName, finalFileName, errno);
441             unlink(tempFileName);
442             errorCnt++;
443         }
444         if ( !errorCnt ) {
445             bpc_hashtable_erase(&info->refCnt[compress].ht);
446         }
447     }
448     OutputFileCnt++;
449     if ( errorCnt ) {
450         /*
451          * Need to fsck this particular backup on this host
452          */
453         bpc_poolRefRequestFsck(info->targetDir, getpid());
454     }
455     return errorCnt;
456 }
457 
bpc_poolRefDeltaUpdate(bpc_deltaCount_info * info,int compress,bpc_digest * digest,int32 count)458 void bpc_poolRefDeltaUpdate(bpc_deltaCount_info *info, int compress, bpc_digest *digest, int32 count)
459 {
460     DigestInfo *digestInfo;
461 
462     if ( !info ) info = &DeltaInfoOld;         /* backward compatibility */
463     if ( !digest || digest->len == 0 ) return;
464     if ( !info->refCnt[0].initDone ) return;
465 
466     digestInfo = bpc_hashtable_find(&info->refCnt[compress ? 1 : 0].ht, digest->digest, digest->len, 1);
467     if ( digestInfo->key.key == digest->digest ) {
468         /*
469          * new entry since the key points to our key - copy info into new node and set key locally
470          */
471         digestInfo->digest  = *digest;
472         digestInfo->key.key = digestInfo->digest.digest;
473     }
474     digestInfo->count += count;
475     if ( BPC_LogLevel >= 8 ) {
476         char hexStr[BPC_DIGEST_LEN_MAX * 2 + 1];
477 
478         bpc_digest_digest2str(&digestInfo->digest, hexStr);
479         bpc_logMsgf("bpc_poolRefDeltaUpdate(%s, %d), count now %d\n", hexStr, count, digestInfo->count);
480     }
481     if ( bpc_hashtable_entryCount(&info->refCnt[compress ? 1 : 0].ht) > (1 << 20) ) {
482         bpc_poolRefDeltaFileFlush(info);
483     }
484 }
485 
bpc_poolRefDeltaPrint(bpc_deltaCount_info * info)486 void bpc_poolRefDeltaPrint(bpc_deltaCount_info *info)
487 {
488     if ( !info ) info = &DeltaInfoOld;         /* backward compatibility */
489     if ( !info->refCnt[0].initDone ) return;
490     fprintf(stderr, "Uncompressed HT:\n");
491     bpc_hashtable_iterate(&info->refCnt[0].ht, (void*)bpc_poolRefPrintEntry, NULL);
492     fprintf(stderr, "Compressed HT:\n");
493     bpc_hashtable_iterate(&info->refCnt[1].ht, (void*)bpc_poolRefPrintEntry, NULL);
494 }
495 
496 /*
497  * Legacy support for <= 4.0.0beta3.
498  */
bpc_poolRefDeltaFileInitOld(char * hostDir)499 void bpc_poolRefDeltaFileInitOld(char *hostDir)
500 {
501     bpc_poolRefDeltaFileInit(&DeltaInfoOld, hostDir);
502 }
503 
bpc_poolRefDeltaFileFlushOld(void)504 uint32 bpc_poolRefDeltaFileFlushOld(void)
505 {
506     return bpc_poolRefDeltaFileFlush(&DeltaInfoOld);
507 }
508 
509 /*
510  * Increment/decrement the reference count for the given digest
511  */
bpc_poolRefDeltaUpdateOld(int compress,bpc_digest * digest,int32 count)512 void bpc_poolRefDeltaUpdateOld(int compress, bpc_digest *digest, int32 count)
513 {
514     bpc_poolRefDeltaUpdate(&DeltaInfoOld, compress, digest, count);
515 }
516 
bpc_poolRefDeltaPrintOld(void)517 void bpc_poolRefDeltaPrintOld(void)
518 {
519     bpc_poolRefDeltaPrint(&DeltaInfoOld);
520 }
521