1 /*
2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3 *
4 * SPDX-License-Identifier: MPL-2.0
5 *
6 * This Source Code Form is subject to the terms of the Mozilla Public
7 * License, v. 2.0. If a copy of the MPL was not distributed with this
8 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9 *
10 * See the COPYRIGHT file distributed with this work for additional
11 * information regarding copyright ownership.
12 */
13
14 #include <errno.h>
15 #include <inttypes.h>
16 #include <stdbool.h>
17 #include <stdlib.h>
18 #include <unistd.h>
19
20 #include <isc/file.h>
21 #include <isc/mem.h>
22 #include <isc/print.h>
23 #include <isc/serial.h>
24 #include <isc/stdio.h>
25 #include <isc/string.h>
26 #include <isc/util.h>
27
28 #include <dns/compress.h>
29 #include <dns/db.h>
30 #include <dns/dbiterator.h>
31 #include <dns/diff.h>
32 #include <dns/fixedname.h>
33 #include <dns/journal.h>
34 #include <dns/log.h>
35 #include <dns/rdataset.h>
36 #include <dns/rdatasetiter.h>
37 #include <dns/result.h>
38 #include <dns/soa.h>
39
40 /*! \file
41 * \brief Journaling.
42 *
43 * A journal file consists of
44 *
45 * \li A fixed-size header of type journal_rawheader_t.
46 *
47 * \li The index. This is an unordered array of index entries
48 * of type journal_rawpos_t giving the locations
49 * of some arbitrary subset of the journal's addressable
50 * transactions. The index entries are used as hints to
51 * speed up the process of locating a transaction with a given
52 * serial number. Unused index entries have an "offset"
53 * field of zero. The size of the index can vary between
54 * journal files, but does not change during the lifetime
55 * of a file. The size can be zero.
56 *
57 * \li The journal data. This consists of one or more transactions.
58 * Each transaction begins with a transaction header of type
59 * journal_rawxhdr_t. The transaction header is followed by a
60 * sequence of RRs, similar in structure to an IXFR difference
61 * sequence (RFC1995). That is, the pre-transaction SOA,
62 * zero or more other deleted RRs, the post-transaction SOA,
63 * and zero or more other added RRs. Unlike in IXFR, each RR
64 * is prefixed with a 32-bit length.
65 *
66 * The journal data part grows as new transactions are
67 * appended to the file. Only those transactions
68 * whose serial number is current-(2^31-1) to current
69 * are considered "addressable" and may be pointed
70 * to from the header or index. They may be preceded
71 * by old transactions that are no longer addressable,
72 * and they may be followed by transactions that were
73 * appended to the journal but never committed by updating
74 * the "end" position in the header. The latter will
75 * be overwritten when new transactions are added.
76 */
77
78 /**************************************************************************/
79 /*
80 * Miscellaneous utilities.
81 */
82
83 #define JOURNAL_COMMON_LOGARGS \
84 dns_lctx, DNS_LOGCATEGORY_GENERAL, DNS_LOGMODULE_JOURNAL
85
86 #define JOURNAL_DEBUG_LOGARGS(n) JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(n)
87
88 /*%
89 * It would be non-sensical (or at least obtuse) to use FAIL() with an
90 * ISC_R_SUCCESS code, but the test is there to keep the Solaris compiler
91 * from complaining about "end-of-loop code not reached".
92 */
93 #define FAIL(code) \
94 do { \
95 result = (code); \
96 if (result != ISC_R_SUCCESS) \
97 goto failure; \
98 } while (0)
99
100 #define CHECK(op) \
101 do { \
102 result = (op); \
103 if (result != ISC_R_SUCCESS) \
104 goto failure; \
105 } while (0)
106
107 #define JOURNAL_SERIALSET 0x01U
108
109 static isc_result_t
110 index_to_disk(dns_journal_t *);
111
112 static inline uint32_t
decode_uint32(unsigned char * p)113 decode_uint32(unsigned char *p) {
114 return (((uint32_t)p[0] << 24) + ((uint32_t)p[1] << 16) +
115 ((uint32_t)p[2] << 8) + ((uint32_t)p[3] << 0));
116 }
117
118 static inline void
encode_uint32(uint32_t val,unsigned char * p)119 encode_uint32(uint32_t val, unsigned char *p) {
120 p[0] = (uint8_t)(val >> 24);
121 p[1] = (uint8_t)(val >> 16);
122 p[2] = (uint8_t)(val >> 8);
123 p[3] = (uint8_t)(val >> 0);
124 }
125
126 isc_result_t
dns_db_createsoatuple(dns_db_t * db,dns_dbversion_t * ver,isc_mem_t * mctx,dns_diffop_t op,dns_difftuple_t ** tp)127 dns_db_createsoatuple(dns_db_t *db, dns_dbversion_t *ver, isc_mem_t *mctx,
128 dns_diffop_t op, dns_difftuple_t **tp) {
129 isc_result_t result;
130 dns_dbnode_t *node;
131 dns_rdataset_t rdataset;
132 dns_rdata_t rdata = DNS_RDATA_INIT;
133 dns_fixedname_t fixed;
134 dns_name_t *zonename;
135
136 zonename = dns_fixedname_initname(&fixed);
137 dns_name_copynf(dns_db_origin(db), zonename);
138
139 node = NULL;
140 result = dns_db_findnode(db, zonename, false, &node);
141 if (result != ISC_R_SUCCESS) {
142 goto nonode;
143 }
144
145 dns_rdataset_init(&rdataset);
146 result = dns_db_findrdataset(db, node, ver, dns_rdatatype_soa, 0,
147 (isc_stdtime_t)0, &rdataset, NULL);
148 if (result != ISC_R_SUCCESS) {
149 goto freenode;
150 }
151
152 result = dns_rdataset_first(&rdataset);
153 if (result != ISC_R_SUCCESS) {
154 goto freenode;
155 }
156
157 dns_rdataset_current(&rdataset, &rdata);
158 dns_rdataset_getownercase(&rdataset, zonename);
159
160 result = dns_difftuple_create(mctx, op, zonename, rdataset.ttl, &rdata,
161 tp);
162
163 dns_rdataset_disassociate(&rdataset);
164 dns_db_detachnode(db, &node);
165 return (result);
166
167 freenode:
168 dns_db_detachnode(db, &node);
169 nonode:
170 UNEXPECTED_ERROR(__FILE__, __LINE__, "missing SOA");
171 return (result);
172 }
173
174 /* Journaling */
175
176 /*%
177 * On-disk representation of a "pointer" to a journal entry.
178 * These are used in the journal header to locate the beginning
179 * and end of the journal, and in the journal index to locate
180 * other transactions.
181 */
182 typedef struct {
183 unsigned char serial[4]; /*%< SOA serial before update. */
184 /*
185 * XXXRTH Should offset be 8 bytes?
186 * XXXDCL ... probably, since isc_offset_t is 8 bytes on many OSs.
187 * XXXAG ... but we will not be able to seek >2G anyway on many
188 * platforms as long as we are using fseek() rather
189 * than lseek().
190 */
191 unsigned char offset[4]; /*%< Offset from beginning of file. */
192 } journal_rawpos_t;
193
194 /*%
195 * The header is of a fixed size, with some spare room for future
196 * extensions.
197 */
198 #define JOURNAL_HEADER_SIZE 64 /* Bytes. */
199
200 typedef enum {
201 XHDR_VERSION1 = 1,
202 XHDR_VERSION2 = 2,
203 } xhdr_version_t;
204
205 /*%
206 * The on-disk representation of the journal header.
207 * All numbers are stored in big-endian order.
208 */
209 typedef union {
210 struct {
211 /*% File format version ID. */
212 unsigned char format[16];
213 /*% Position of the first addressable transaction */
214 journal_rawpos_t begin;
215 /*% Position of the next (yet nonexistent) transaction. */
216 journal_rawpos_t end;
217 /*% Number of index entries following the header. */
218 unsigned char index_size[4];
219 /*% Source serial number. */
220 unsigned char sourceserial[4];
221 unsigned char flags;
222 } h;
223 /* Pad the header to a fixed size. */
224 unsigned char pad[JOURNAL_HEADER_SIZE];
225 } journal_rawheader_t;
226
227 /*%
228 * The on-disk representation of the transaction header, version 2.
229 * There is one of these at the beginning of each transaction.
230 */
231 typedef struct {
232 unsigned char size[4]; /*%< In bytes, excluding header. */
233 unsigned char count[4]; /*%< Number of records in transaction */
234 unsigned char serial0[4]; /*%< SOA serial before update. */
235 unsigned char serial1[4]; /*%< SOA serial after update. */
236 } journal_rawxhdr_t;
237
238 /*%
239 * Old-style raw transaction header, version 1, used for backward
240 * compatibility mode.
241 */
242 typedef struct {
243 unsigned char size[4];
244 unsigned char serial0[4];
245 unsigned char serial1[4];
246 } journal_rawxhdr_ver1_t;
247
248 /*%
249 * The on-disk representation of the RR header.
250 * There is one of these at the beginning of each RR.
251 */
252 typedef struct {
253 unsigned char size[4]; /*%< In bytes, excluding header. */
254 } journal_rawrrhdr_t;
255
256 /*%
257 * The in-core representation of the journal header.
258 */
259 typedef struct {
260 uint32_t serial;
261 isc_offset_t offset;
262 } journal_pos_t;
263
264 #define POS_VALID(pos) ((pos).offset != 0)
265 #define POS_INVALIDATE(pos) ((pos).offset = 0, (pos).serial = 0)
266
267 typedef struct {
268 unsigned char format[16];
269 journal_pos_t begin;
270 journal_pos_t end;
271 uint32_t index_size;
272 uint32_t sourceserial;
273 bool serialset;
274 } journal_header_t;
275
276 /*%
277 * The in-core representation of the transaction header.
278 */
279 typedef struct {
280 uint32_t size;
281 uint32_t count;
282 uint32_t serial0;
283 uint32_t serial1;
284 } journal_xhdr_t;
285
286 /*%
287 * The in-core representation of the RR header.
288 */
289 typedef struct {
290 uint32_t size;
291 } journal_rrhdr_t;
292
293 /*%
294 * Initial contents to store in the header of a newly created
295 * journal file.
296 *
297 * The header starts with the magic string ";BIND LOG V9.2\n"
298 * to identify the file as a BIND 9 journal file. An ASCII
299 * identification string is used rather than a binary magic
300 * number to be consistent with BIND 8 (BIND 8 journal files
301 * are ASCII text files).
302 */
303
304 static journal_header_t journal_header_ver1 = {
305 ";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0, 0, 0
306 };
307 static journal_header_t initial_journal_header = {
308 ";BIND LOG V9.2\n", { 0, 0 }, { 0, 0 }, 0, 0, 0
309 };
310
311 #define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset)
312
313 typedef enum {
314 JOURNAL_STATE_INVALID,
315 JOURNAL_STATE_READ,
316 JOURNAL_STATE_WRITE,
317 JOURNAL_STATE_TRANSACTION,
318 JOURNAL_STATE_INLINE
319 } journal_state_t;
320
321 struct dns_journal {
322 unsigned int magic; /*%< JOUR */
323 isc_mem_t *mctx; /*%< Memory context */
324 journal_state_t state;
325 xhdr_version_t xhdr_version; /*%< Expected transaction header version */
326 bool header_ver1; /*%< Transaction header compatibility
327 * mode is allowed */
328 bool recovered; /*%< A recoverable error was found
329 * while reading the journal */
330 char *filename; /*%< Journal file name */
331 FILE *fp; /*%< File handle */
332 isc_offset_t offset; /*%< Current file offset */
333 journal_xhdr_t curxhdr; /*%< Current transaction header */
334 journal_header_t header; /*%< In-core journal header */
335 unsigned char *rawindex; /*%< In-core buffer for journal index
336 * in on-disk format */
337 journal_pos_t *index; /*%< In-core journal index */
338
339 /*% Current transaction state (when writing). */
340 struct {
341 unsigned int n_soa; /*%< Number of SOAs seen */
342 unsigned int n_rr; /*%< Number of RRs to write */
343 journal_pos_t pos[2]; /*%< Begin/end position */
344 } x;
345
346 /*% Iteration state (when reading). */
347 struct {
348 /* These define the part of the journal we iterate over. */
349 journal_pos_t bpos; /*%< Position before first, */
350 journal_pos_t cpos; /*%< before current, */
351 journal_pos_t epos; /*%< and after last transaction */
352 /* The rest is iterator state. */
353 uint32_t current_serial; /*%< Current SOA serial */
354 isc_buffer_t source; /*%< Data from disk */
355 isc_buffer_t target; /*%< Data from _fromwire check */
356 dns_decompress_t dctx; /*%< Dummy decompression ctx */
357 dns_name_t name; /*%< Current domain name */
358 dns_rdata_t rdata; /*%< Current rdata */
359 uint32_t ttl; /*%< Current TTL */
360 unsigned int xsize; /*%< Size of transaction data */
361 unsigned int xpos; /*%< Current position in it */
362 isc_result_t result; /*%< Result of last call */
363 } it;
364 };
365
366 #define DNS_JOURNAL_MAGIC ISC_MAGIC('J', 'O', 'U', 'R')
367 #define DNS_JOURNAL_VALID(t) ISC_MAGIC_VALID(t, DNS_JOURNAL_MAGIC)
368
369 static void
journal_pos_decode(journal_rawpos_t * raw,journal_pos_t * cooked)370 journal_pos_decode(journal_rawpos_t *raw, journal_pos_t *cooked) {
371 cooked->serial = decode_uint32(raw->serial);
372 cooked->offset = decode_uint32(raw->offset);
373 }
374
375 static void
journal_pos_encode(journal_rawpos_t * raw,journal_pos_t * cooked)376 journal_pos_encode(journal_rawpos_t *raw, journal_pos_t *cooked) {
377 encode_uint32(cooked->serial, raw->serial);
378 encode_uint32(cooked->offset, raw->offset);
379 }
380
381 static void
journal_header_decode(journal_rawheader_t * raw,journal_header_t * cooked)382 journal_header_decode(journal_rawheader_t *raw, journal_header_t *cooked) {
383 INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
384
385 memmove(cooked->format, raw->h.format, sizeof(cooked->format));
386 journal_pos_decode(&raw->h.begin, &cooked->begin);
387 journal_pos_decode(&raw->h.end, &cooked->end);
388 cooked->index_size = decode_uint32(raw->h.index_size);
389 cooked->sourceserial = decode_uint32(raw->h.sourceserial);
390 cooked->serialset = ((raw->h.flags & JOURNAL_SERIALSET) != 0);
391 }
392
393 static void
journal_header_encode(journal_header_t * cooked,journal_rawheader_t * raw)394 journal_header_encode(journal_header_t *cooked, journal_rawheader_t *raw) {
395 unsigned char flags = 0;
396
397 INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
398
399 memset(raw->pad, 0, sizeof(raw->pad));
400 memmove(raw->h.format, cooked->format, sizeof(raw->h.format));
401 journal_pos_encode(&raw->h.begin, &cooked->begin);
402 journal_pos_encode(&raw->h.end, &cooked->end);
403 encode_uint32(cooked->index_size, raw->h.index_size);
404 encode_uint32(cooked->sourceserial, raw->h.sourceserial);
405 if (cooked->serialset) {
406 flags |= JOURNAL_SERIALSET;
407 }
408 raw->h.flags = flags;
409 }
410
411 /*
412 * Journal file I/O subroutines, with error checking and reporting.
413 */
414 static isc_result_t
journal_seek(dns_journal_t * j,uint32_t offset)415 journal_seek(dns_journal_t *j, uint32_t offset) {
416 isc_result_t result;
417
418 result = isc_stdio_seek(j->fp, (off_t)offset, SEEK_SET);
419 if (result != ISC_R_SUCCESS) {
420 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
421 "%s: seek: %s", j->filename,
422 isc_result_totext(result));
423 return (ISC_R_UNEXPECTED);
424 }
425 j->offset = offset;
426 return (ISC_R_SUCCESS);
427 }
428
429 static isc_result_t
journal_read(dns_journal_t * j,void * mem,size_t nbytes)430 journal_read(dns_journal_t *j, void *mem, size_t nbytes) {
431 isc_result_t result;
432
433 result = isc_stdio_read(mem, 1, nbytes, j->fp, NULL);
434 if (result != ISC_R_SUCCESS) {
435 if (result == ISC_R_EOF) {
436 return (ISC_R_NOMORE);
437 }
438 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
439 "%s: read: %s", j->filename,
440 isc_result_totext(result));
441 return (ISC_R_UNEXPECTED);
442 }
443 j->offset += (isc_offset_t)nbytes;
444 return (ISC_R_SUCCESS);
445 }
446
447 static isc_result_t
journal_write(dns_journal_t * j,void * mem,size_t nbytes)448 journal_write(dns_journal_t *j, void *mem, size_t nbytes) {
449 isc_result_t result;
450
451 result = isc_stdio_write(mem, 1, nbytes, j->fp, NULL);
452 if (result != ISC_R_SUCCESS) {
453 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
454 "%s: write: %s", j->filename,
455 isc_result_totext(result));
456 return (ISC_R_UNEXPECTED);
457 }
458 j->offset += (isc_offset_t)nbytes;
459 return (ISC_R_SUCCESS);
460 }
461
462 static isc_result_t
journal_fsync(dns_journal_t * j)463 journal_fsync(dns_journal_t *j) {
464 isc_result_t result;
465
466 result = isc_stdio_flush(j->fp);
467 if (result != ISC_R_SUCCESS) {
468 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
469 "%s: flush: %s", j->filename,
470 isc_result_totext(result));
471 return (ISC_R_UNEXPECTED);
472 }
473 result = isc_stdio_sync(j->fp);
474 if (result != ISC_R_SUCCESS) {
475 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
476 "%s: fsync: %s", j->filename,
477 isc_result_totext(result));
478 return (ISC_R_UNEXPECTED);
479 }
480 return (ISC_R_SUCCESS);
481 }
482
483 /*
484 * Read/write a transaction header at the current file position.
485 */
486 static isc_result_t
journal_read_xhdr(dns_journal_t * j,journal_xhdr_t * xhdr)487 journal_read_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr) {
488 isc_result_t result;
489
490 j->it.cpos.offset = j->offset;
491
492 switch (j->xhdr_version) {
493 case XHDR_VERSION1: {
494 journal_rawxhdr_ver1_t raw;
495 result = journal_read(j, &raw, sizeof(raw));
496 if (result != ISC_R_SUCCESS) {
497 return (result);
498 }
499 xhdr->size = decode_uint32(raw.size);
500 xhdr->count = 0;
501 xhdr->serial0 = decode_uint32(raw.serial0);
502 xhdr->serial1 = decode_uint32(raw.serial1);
503 j->curxhdr = *xhdr;
504 return (ISC_R_SUCCESS);
505 }
506
507 case XHDR_VERSION2: {
508 journal_rawxhdr_t raw;
509 result = journal_read(j, &raw, sizeof(raw));
510 if (result != ISC_R_SUCCESS) {
511 return (result);
512 }
513 xhdr->size = decode_uint32(raw.size);
514 xhdr->count = decode_uint32(raw.count);
515 xhdr->serial0 = decode_uint32(raw.serial0);
516 xhdr->serial1 = decode_uint32(raw.serial1);
517 j->curxhdr = *xhdr;
518 return (ISC_R_SUCCESS);
519 }
520
521 default:
522 return (ISC_R_NOTIMPLEMENTED);
523 }
524 }
525
526 static isc_result_t
journal_write_xhdr(dns_journal_t * j,uint32_t size,uint32_t count,uint32_t serial0,uint32_t serial1)527 journal_write_xhdr(dns_journal_t *j, uint32_t size, uint32_t count,
528 uint32_t serial0, uint32_t serial1) {
529 if (j->header_ver1) {
530 journal_rawxhdr_ver1_t raw;
531 encode_uint32(size, raw.size);
532 encode_uint32(serial0, raw.serial0);
533 encode_uint32(serial1, raw.serial1);
534 return (journal_write(j, &raw, sizeof(raw)));
535 } else {
536 journal_rawxhdr_t raw;
537 encode_uint32(size, raw.size);
538 encode_uint32(count, raw.count);
539 encode_uint32(serial0, raw.serial0);
540 encode_uint32(serial1, raw.serial1);
541 return (journal_write(j, &raw, sizeof(raw)));
542 }
543 }
544
545 /*
546 * Read an RR header at the current file position.
547 */
548
549 static isc_result_t
journal_read_rrhdr(dns_journal_t * j,journal_rrhdr_t * rrhdr)550 journal_read_rrhdr(dns_journal_t *j, journal_rrhdr_t *rrhdr) {
551 journal_rawrrhdr_t raw;
552 isc_result_t result;
553
554 result = journal_read(j, &raw, sizeof(raw));
555 if (result != ISC_R_SUCCESS) {
556 return (result);
557 }
558 rrhdr->size = decode_uint32(raw.size);
559 return (ISC_R_SUCCESS);
560 }
561
562 static isc_result_t
journal_file_create(isc_mem_t * mctx,bool downgrade,const char * filename)563 journal_file_create(isc_mem_t *mctx, bool downgrade, const char *filename) {
564 FILE *fp = NULL;
565 isc_result_t result;
566 journal_header_t header;
567 journal_rawheader_t rawheader;
568 int index_size = 56; /* XXX configurable */
569 int size;
570 void *mem = NULL; /* Memory for temporary index image. */
571
572 INSIST(sizeof(journal_rawheader_t) == JOURNAL_HEADER_SIZE);
573
574 result = isc_stdio_open(filename, "wb", &fp);
575 if (result != ISC_R_SUCCESS) {
576 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
577 "%s: create: %s", filename,
578 isc_result_totext(result));
579 return (ISC_R_UNEXPECTED);
580 }
581
582 if (downgrade) {
583 header = journal_header_ver1;
584 } else {
585 header = initial_journal_header;
586 }
587 header.index_size = index_size;
588 journal_header_encode(&header, &rawheader);
589
590 size = sizeof(journal_rawheader_t) +
591 index_size * sizeof(journal_rawpos_t);
592
593 mem = isc_mem_get(mctx, size);
594 memset(mem, 0, size);
595 memmove(mem, &rawheader, sizeof(rawheader));
596
597 result = isc_stdio_write(mem, 1, (size_t)size, fp, NULL);
598 if (result != ISC_R_SUCCESS) {
599 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
600 "%s: write: %s", filename,
601 isc_result_totext(result));
602 (void)isc_stdio_close(fp);
603 (void)isc_file_remove(filename);
604 isc_mem_put(mctx, mem, size);
605 return (ISC_R_UNEXPECTED);
606 }
607 isc_mem_put(mctx, mem, size);
608
609 result = isc_stdio_close(fp);
610 if (result != ISC_R_SUCCESS) {
611 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
612 "%s: close: %s", filename,
613 isc_result_totext(result));
614 (void)isc_file_remove(filename);
615 return (ISC_R_UNEXPECTED);
616 }
617
618 return (ISC_R_SUCCESS);
619 }
620
621 static isc_result_t
journal_open(isc_mem_t * mctx,const char * filename,bool writable,bool create,bool downgrade,dns_journal_t ** journalp)622 journal_open(isc_mem_t *mctx, const char *filename, bool writable, bool create,
623 bool downgrade, dns_journal_t **journalp) {
624 FILE *fp = NULL;
625 isc_result_t result;
626 journal_rawheader_t rawheader;
627 dns_journal_t *j;
628
629 REQUIRE(journalp != NULL && *journalp == NULL);
630
631 j = isc_mem_get(mctx, sizeof(*j));
632 *j = (dns_journal_t){ .state = JOURNAL_STATE_INVALID,
633 .filename = isc_mem_strdup(mctx, filename),
634 .xhdr_version = XHDR_VERSION2 };
635 isc_mem_attach(mctx, &j->mctx);
636
637 result = isc_stdio_open(j->filename, writable ? "rb+" : "rb", &fp);
638 if (result == ISC_R_FILENOTFOUND) {
639 if (create) {
640 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(1),
641 "journal file %s does not exist, "
642 "creating it",
643 j->filename);
644 CHECK(journal_file_create(mctx, downgrade, filename));
645 /*
646 * Retry.
647 */
648 result = isc_stdio_open(j->filename, "rb+", &fp);
649 } else {
650 FAIL(ISC_R_NOTFOUND);
651 }
652 }
653 if (result != ISC_R_SUCCESS) {
654 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
655 "%s: open: %s", j->filename,
656 isc_result_totext(result));
657 FAIL(ISC_R_UNEXPECTED);
658 }
659
660 j->fp = fp;
661
662 /*
663 * Set magic early so that seek/read can succeed.
664 */
665 j->magic = DNS_JOURNAL_MAGIC;
666
667 CHECK(journal_seek(j, 0));
668 CHECK(journal_read(j, &rawheader, sizeof(rawheader)));
669
670 if (memcmp(rawheader.h.format, journal_header_ver1.format,
671 sizeof(journal_header_ver1.format)) == 0)
672 {
673 /*
674 * The file header says it's the old format, but it
675 * still might have the new xhdr format because we
676 * forgot to change the format string when we introduced
677 * the new xhdr. When we first try to read it, we assume
678 * it uses the new xhdr format. If that fails, we'll be
679 * called a second time with compat set to true, in which
680 * case we can lower xhdr_version to 1 if we find a
681 * corrupt transaction.
682 */
683 j->header_ver1 = true;
684 } else if (memcmp(rawheader.h.format, initial_journal_header.format,
685 sizeof(initial_journal_header.format)) == 0)
686 {
687 /*
688 * File header says this is format version 2; all
689 * transactions have to match.
690 */
691 j->header_ver1 = false;
692 } else {
693 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
694 "%s: journal format not recognized", j->filename);
695 FAIL(ISC_R_UNEXPECTED);
696 }
697 journal_header_decode(&rawheader, &j->header);
698
699 /*
700 * If there is an index, read the raw index into a dynamically
701 * allocated buffer and then convert it into a cooked index.
702 */
703 if (j->header.index_size != 0) {
704 unsigned int i;
705 unsigned int rawbytes;
706 unsigned char *p;
707
708 rawbytes = j->header.index_size * sizeof(journal_rawpos_t);
709 j->rawindex = isc_mem_get(mctx, rawbytes);
710
711 CHECK(journal_read(j, j->rawindex, rawbytes));
712
713 j->index = isc_mem_get(mctx, j->header.index_size *
714 sizeof(journal_pos_t));
715
716 p = j->rawindex;
717 for (i = 0; i < j->header.index_size; i++) {
718 j->index[i].serial = decode_uint32(p);
719 p += 4;
720 j->index[i].offset = decode_uint32(p);
721 p += 4;
722 }
723 INSIST(p == j->rawindex + rawbytes);
724 }
725 j->offset = -1; /* Invalid, must seek explicitly. */
726
727 /*
728 * Initialize the iterator.
729 */
730 dns_name_init(&j->it.name, NULL);
731 dns_rdata_init(&j->it.rdata);
732
733 /*
734 * Set up empty initial buffers for unchecked and checked
735 * wire format RR data. They will be reallocated
736 * later.
737 */
738 isc_buffer_init(&j->it.source, NULL, 0);
739 isc_buffer_init(&j->it.target, NULL, 0);
740 dns_decompress_init(&j->it.dctx, -1, DNS_DECOMPRESS_NONE);
741
742 j->state = writable ? JOURNAL_STATE_WRITE : JOURNAL_STATE_READ;
743
744 *journalp = j;
745 return (ISC_R_SUCCESS);
746
747 failure:
748 j->magic = 0;
749 if (j->rawindex != NULL) {
750 isc_mem_put(j->mctx, j->rawindex,
751 j->header.index_size * sizeof(journal_rawpos_t));
752 }
753 if (j->index != NULL) {
754 isc_mem_put(j->mctx, j->index,
755 j->header.index_size * sizeof(journal_pos_t));
756 }
757 isc_mem_free(j->mctx, j->filename);
758 if (j->fp != NULL) {
759 (void)isc_stdio_close(j->fp);
760 }
761 isc_mem_putanddetach(&j->mctx, j, sizeof(*j));
762 return (result);
763 }
764
765 isc_result_t
dns_journal_open(isc_mem_t * mctx,const char * filename,unsigned int mode,dns_journal_t ** journalp)766 dns_journal_open(isc_mem_t *mctx, const char *filename, unsigned int mode,
767 dns_journal_t **journalp) {
768 isc_result_t result;
769 size_t namelen;
770 char backup[1024];
771 bool writable, create;
772
773 create = ((mode & DNS_JOURNAL_CREATE) != 0);
774 writable = ((mode & (DNS_JOURNAL_WRITE | DNS_JOURNAL_CREATE)) != 0);
775
776 result = journal_open(mctx, filename, writable, create, false,
777 journalp);
778 if (result == ISC_R_NOTFOUND) {
779 namelen = strlen(filename);
780 if (namelen > 4U && strcmp(filename + namelen - 4, ".jnl") == 0)
781 {
782 namelen -= 4;
783 }
784
785 result = snprintf(backup, sizeof(backup), "%.*s.jbk",
786 (int)namelen, filename);
787 if (result >= sizeof(backup)) {
788 return (ISC_R_NOSPACE);
789 }
790 result = journal_open(mctx, backup, writable, writable, false,
791 journalp);
792 }
793 return (result);
794 }
795
796 /*
797 * A comparison function defining the sorting order for
798 * entries in the IXFR-style journal file.
799 *
800 * The IXFR format requires that deletions are sorted before
801 * additions, and within either one, SOA records are sorted
802 * before others.
803 *
804 * Also sort the non-SOA records by type as a courtesy to the
805 * server receiving the IXFR - it may help reduce the amount of
806 * rdataset merging it has to do.
807 */
808 static int
ixfr_order(const void * av,const void * bv)809 ixfr_order(const void *av, const void *bv) {
810 dns_difftuple_t const *const *ap = av;
811 dns_difftuple_t const *const *bp = bv;
812 dns_difftuple_t const *a = *ap;
813 dns_difftuple_t const *b = *bp;
814 int r;
815 int bop = 0, aop = 0;
816
817 switch (a->op) {
818 case DNS_DIFFOP_DEL:
819 case DNS_DIFFOP_DELRESIGN:
820 aop = 1;
821 break;
822 case DNS_DIFFOP_ADD:
823 case DNS_DIFFOP_ADDRESIGN:
824 aop = 0;
825 break;
826 default:
827 INSIST(0);
828 ISC_UNREACHABLE();
829 }
830
831 switch (b->op) {
832 case DNS_DIFFOP_DEL:
833 case DNS_DIFFOP_DELRESIGN:
834 bop = 1;
835 break;
836 case DNS_DIFFOP_ADD:
837 case DNS_DIFFOP_ADDRESIGN:
838 bop = 0;
839 break;
840 default:
841 INSIST(0);
842 ISC_UNREACHABLE();
843 }
844
845 r = bop - aop;
846 if (r != 0) {
847 return (r);
848 }
849
850 r = (b->rdata.type == dns_rdatatype_soa) -
851 (a->rdata.type == dns_rdatatype_soa);
852 if (r != 0) {
853 return (r);
854 }
855
856 r = (a->rdata.type - b->rdata.type);
857 return (r);
858 }
859
860 static isc_result_t
maybe_fixup_xhdr(dns_journal_t * j,journal_xhdr_t * xhdr,uint32_t serial,isc_offset_t offset)861 maybe_fixup_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr, uint32_t serial,
862 isc_offset_t offset) {
863 isc_result_t result = ISC_R_SUCCESS;
864
865 /*
866 * Handle mixture of version 1 and version 2
867 * transaction headers in a version 1 journal.
868 */
869 if ((xhdr->serial0 != serial ||
870 isc_serial_le(xhdr->serial1, xhdr->serial0))) {
871 if (j->xhdr_version == XHDR_VERSION1 && xhdr->serial1 == serial)
872 {
873 isc_log_write(
874 JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3),
875 "%s: XHDR_VERSION1 -> XHDR_VERSION2 at %u",
876 j->filename, serial);
877 j->xhdr_version = XHDR_VERSION2;
878 CHECK(journal_seek(j, offset));
879 CHECK(journal_read_xhdr(j, xhdr));
880 j->recovered = true;
881 } else if (j->xhdr_version == XHDR_VERSION2 &&
882 xhdr->count == serial) {
883 isc_log_write(
884 JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3),
885 "%s: XHDR_VERSION2 -> XHDR_VERSION1 at %u",
886 j->filename, serial);
887 j->xhdr_version = XHDR_VERSION1;
888 CHECK(journal_seek(j, offset));
889 CHECK(journal_read_xhdr(j, xhdr));
890 j->recovered = true;
891 }
892 }
893
894 /*
895 * Handle <size, serial0, serial1, 0> transaction header.
896 */
897 if (j->xhdr_version == XHDR_VERSION1) {
898 uint32_t value;
899
900 CHECK(journal_read(j, &value, sizeof(value)));
901 if (value != 0L) {
902 CHECK(journal_seek(j, offset + 12));
903 } else {
904 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3),
905 "%s: XHDR_VERSION1 count zero at %u",
906 j->filename, serial);
907 j->xhdr_version = XHDR_VERSION2;
908 j->recovered = true;
909 }
910 } else if (j->xhdr_version == XHDR_VERSION2 && xhdr->count == serial &&
911 xhdr->serial1 == 0U &&
912 isc_serial_gt(xhdr->serial0, xhdr->count))
913 {
914 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3),
915 "%s: XHDR_VERSION2 count zero at %u", j->filename,
916 serial);
917 xhdr->serial1 = xhdr->serial0;
918 xhdr->serial0 = xhdr->count;
919 xhdr->count = 0;
920 j->recovered = true;
921 }
922
923 failure:
924 return (result);
925 }
926
927 /*
928 * Advance '*pos' to the next journal transaction.
929 *
930 * Requires:
931 * *pos refers to a valid journal transaction.
932 *
933 * Ensures:
934 * When ISC_R_SUCCESS is returned,
935 * *pos refers to the next journal transaction.
936 *
937 * Returns one of:
938 *
939 * ISC_R_SUCCESS
940 * ISC_R_NOMORE *pos pointed at the last transaction
941 * Other results due to file errors are possible.
942 */
943 static isc_result_t
journal_next(dns_journal_t * j,journal_pos_t * pos)944 journal_next(dns_journal_t *j, journal_pos_t *pos) {
945 isc_result_t result;
946 journal_xhdr_t xhdr;
947 size_t hdrsize;
948
949 REQUIRE(DNS_JOURNAL_VALID(j));
950
951 result = journal_seek(j, pos->offset);
952 if (result != ISC_R_SUCCESS) {
953 return (result);
954 }
955
956 if (pos->serial == j->header.end.serial) {
957 return (ISC_R_NOMORE);
958 }
959
960 /*
961 * Read the header of the current transaction.
962 * This will return ISC_R_NOMORE if we are at EOF.
963 */
964 result = journal_read_xhdr(j, &xhdr);
965 if (result != ISC_R_SUCCESS) {
966 return (result);
967 }
968
969 if (j->header_ver1) {
970 CHECK(maybe_fixup_xhdr(j, &xhdr, pos->serial, pos->offset));
971 }
972
973 /*
974 * Check serial number consistency.
975 */
976 if (xhdr.serial0 != pos->serial ||
977 isc_serial_le(xhdr.serial1, xhdr.serial0)) {
978 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
979 "%s: journal file corrupt: "
980 "expected serial %u, got %u",
981 j->filename, pos->serial, xhdr.serial0);
982 return (ISC_R_UNEXPECTED);
983 }
984
985 /*
986 * Check for offset wraparound.
987 */
988 hdrsize = (j->xhdr_version == XHDR_VERSION2)
989 ? sizeof(journal_rawxhdr_t)
990 : sizeof(journal_rawxhdr_ver1_t);
991
992 if ((isc_offset_t)(pos->offset + hdrsize + xhdr.size) < pos->offset) {
993 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
994 "%s: offset too large", j->filename);
995 return (ISC_R_UNEXPECTED);
996 }
997
998 pos->offset += hdrsize + xhdr.size;
999 pos->serial = xhdr.serial1;
1000 return (ISC_R_SUCCESS);
1001
1002 failure:
1003 return (result);
1004 }
1005
1006 /*
1007 * If the index of the journal 'j' contains an entry "better"
1008 * than '*best_guess', replace '*best_guess' with it.
1009 *
1010 * "Better" means having a serial number closer to 'serial'
1011 * but not greater than 'serial'.
1012 */
1013 static void
index_find(dns_journal_t * j,uint32_t serial,journal_pos_t * best_guess)1014 index_find(dns_journal_t *j, uint32_t serial, journal_pos_t *best_guess) {
1015 unsigned int i;
1016 if (j->index == NULL) {
1017 return;
1018 }
1019 for (i = 0; i < j->header.index_size; i++) {
1020 if (POS_VALID(j->index[i]) &&
1021 DNS_SERIAL_GE(serial, j->index[i].serial) &&
1022 DNS_SERIAL_GT(j->index[i].serial, best_guess->serial))
1023 {
1024 *best_guess = j->index[i];
1025 }
1026 }
1027 }
1028
1029 /*
1030 * Add a new index entry. If there is no room, make room by removing
1031 * the odd-numbered entries and compacting the others into the first
1032 * half of the index. This decimates old index entries exponentially
1033 * over time, so that the index always contains a much larger fraction
1034 * of recent serial numbers than of old ones. This is deliberate -
1035 * most index searches are for outgoing IXFR, and IXFR tends to request
1036 * recent versions more often than old ones.
1037 */
1038 static void
index_add(dns_journal_t * j,journal_pos_t * pos)1039 index_add(dns_journal_t *j, journal_pos_t *pos) {
1040 unsigned int i;
1041
1042 if (j->index == NULL) {
1043 return;
1044 }
1045
1046 /*
1047 * Search for a vacant position.
1048 */
1049 for (i = 0; i < j->header.index_size; i++) {
1050 if (!POS_VALID(j->index[i])) {
1051 break;
1052 }
1053 }
1054 if (i == j->header.index_size) {
1055 unsigned int k = 0;
1056 /*
1057 * Found no vacant position. Make some room.
1058 */
1059 for (i = 0; i < j->header.index_size; i += 2) {
1060 j->index[k++] = j->index[i];
1061 }
1062 i = k; /* 'i' identifies the first vacant position. */
1063 while (k < j->header.index_size) {
1064 POS_INVALIDATE(j->index[k]);
1065 k++;
1066 }
1067 }
1068 INSIST(i < j->header.index_size);
1069 INSIST(!POS_VALID(j->index[i]));
1070
1071 /*
1072 * Store the new index entry.
1073 */
1074 j->index[i] = *pos;
1075 }
1076
1077 /*
1078 * Invalidate any existing index entries that could become
1079 * ambiguous when a new transaction with number 'serial' is added.
1080 */
1081 static void
index_invalidate(dns_journal_t * j,uint32_t serial)1082 index_invalidate(dns_journal_t *j, uint32_t serial) {
1083 unsigned int i;
1084 if (j->index == NULL) {
1085 return;
1086 }
1087 for (i = 0; i < j->header.index_size; i++) {
1088 if (!DNS_SERIAL_GT(serial, j->index[i].serial)) {
1089 POS_INVALIDATE(j->index[i]);
1090 }
1091 }
1092 }
1093
1094 /*
1095 * Try to find a transaction with initial serial number 'serial'
1096 * in the journal 'j'.
1097 *
1098 * If found, store its position at '*pos' and return ISC_R_SUCCESS.
1099 *
1100 * If 'serial' is current (= the ending serial number of the
1101 * last transaction in the journal), set '*pos' to
1102 * the position immediately following the last transaction and
1103 * return ISC_R_SUCCESS.
1104 *
1105 * If 'serial' is within the range of addressable serial numbers
1106 * covered by the journal but that particular serial number is missing
1107 * (from the journal, not just from the index), return ISC_R_NOTFOUND.
1108 *
1109 * If 'serial' is outside the range of addressable serial numbers
1110 * covered by the journal, return ISC_R_RANGE.
1111 *
1112 */
1113 static isc_result_t
journal_find(dns_journal_t * j,uint32_t serial,journal_pos_t * pos)1114 journal_find(dns_journal_t *j, uint32_t serial, journal_pos_t *pos) {
1115 isc_result_t result;
1116 journal_pos_t current_pos;
1117
1118 REQUIRE(DNS_JOURNAL_VALID(j));
1119
1120 if (DNS_SERIAL_GT(j->header.begin.serial, serial)) {
1121 return (ISC_R_RANGE);
1122 }
1123 if (DNS_SERIAL_GT(serial, j->header.end.serial)) {
1124 return (ISC_R_RANGE);
1125 }
1126 if (serial == j->header.end.serial) {
1127 *pos = j->header.end;
1128 return (ISC_R_SUCCESS);
1129 }
1130
1131 current_pos = j->header.begin;
1132 index_find(j, serial, ¤t_pos);
1133
1134 while (current_pos.serial != serial) {
1135 if (DNS_SERIAL_GT(current_pos.serial, serial)) {
1136 return (ISC_R_NOTFOUND);
1137 }
1138 result = journal_next(j, ¤t_pos);
1139 if (result != ISC_R_SUCCESS) {
1140 return (result);
1141 }
1142 }
1143 *pos = current_pos;
1144 return (ISC_R_SUCCESS);
1145 }
1146
1147 isc_result_t
dns_journal_begin_transaction(dns_journal_t * j)1148 dns_journal_begin_transaction(dns_journal_t *j) {
1149 uint32_t offset;
1150 isc_result_t result;
1151
1152 REQUIRE(DNS_JOURNAL_VALID(j));
1153 REQUIRE(j->state == JOURNAL_STATE_WRITE ||
1154 j->state == JOURNAL_STATE_INLINE);
1155
1156 /*
1157 * Find the file offset where the new transaction should
1158 * be written, and seek there.
1159 */
1160 if (JOURNAL_EMPTY(&j->header)) {
1161 offset = sizeof(journal_rawheader_t) +
1162 j->header.index_size * sizeof(journal_rawpos_t);
1163 } else {
1164 offset = j->header.end.offset;
1165 }
1166 j->x.pos[0].offset = offset;
1167 j->x.pos[1].offset = offset; /* Initial value, will be incremented. */
1168 j->x.n_soa = 0;
1169
1170 CHECK(journal_seek(j, offset));
1171
1172 /*
1173 * Write a dummy transaction header of all zeroes to reserve
1174 * space. It will be filled in when the transaction is
1175 * finished.
1176 */
1177 CHECK(journal_write_xhdr(j, 0, 0, 0, 0));
1178 j->x.pos[1].offset = j->offset;
1179
1180 j->state = JOURNAL_STATE_TRANSACTION;
1181 result = ISC_R_SUCCESS;
1182 failure:
1183 return (result);
1184 }
1185
1186 isc_result_t
dns_journal_writediff(dns_journal_t * j,dns_diff_t * diff)1187 dns_journal_writediff(dns_journal_t *j, dns_diff_t *diff) {
1188 dns_difftuple_t *t;
1189 isc_buffer_t buffer;
1190 void *mem = NULL;
1191 uint64_t size = 0;
1192 uint32_t rrcount = 0;
1193 isc_result_t result;
1194 isc_region_t used;
1195
1196 REQUIRE(DNS_DIFF_VALID(diff));
1197 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION);
1198
1199 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "writing to journal");
1200 (void)dns_diff_print(diff, NULL);
1201
1202 /*
1203 * Pass 1: determine the buffer size needed, and
1204 * keep track of SOA serial numbers.
1205 */
1206 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
1207 t = ISC_LIST_NEXT(t, link)) {
1208 if (t->rdata.type == dns_rdatatype_soa) {
1209 if (j->x.n_soa < 2) {
1210 j->x.pos[j->x.n_soa].serial =
1211 dns_soa_getserial(&t->rdata);
1212 }
1213 j->x.n_soa++;
1214 }
1215 size += sizeof(journal_rawrrhdr_t);
1216 size += t->name.length; /* XXX should have access macro? */
1217 size += 10;
1218 size += t->rdata.length;
1219 }
1220
1221 if (size >= DNS_JOURNAL_SIZE_MAX) {
1222 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1223 "dns_journal_writediff: %s: journal entry "
1224 "too big to be stored: %" PRIu64 " bytes",
1225 j->filename, size);
1226 return (ISC_R_NOSPACE);
1227 }
1228
1229 mem = isc_mem_get(j->mctx, size);
1230
1231 isc_buffer_init(&buffer, mem, size);
1232
1233 /*
1234 * Pass 2. Write RRs to buffer.
1235 */
1236 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
1237 t = ISC_LIST_NEXT(t, link)) {
1238 /*
1239 * Write the RR header.
1240 */
1241 isc_buffer_putuint32(&buffer,
1242 t->name.length + 10 + t->rdata.length);
1243 /*
1244 * Write the owner name, RR header, and RR data.
1245 */
1246 isc_buffer_putmem(&buffer, t->name.ndata, t->name.length);
1247 isc_buffer_putuint16(&buffer, t->rdata.type);
1248 isc_buffer_putuint16(&buffer, t->rdata.rdclass);
1249 isc_buffer_putuint32(&buffer, t->ttl);
1250 INSIST(t->rdata.length < 65536);
1251 isc_buffer_putuint16(&buffer, (uint16_t)t->rdata.length);
1252 INSIST(isc_buffer_availablelength(&buffer) >= t->rdata.length);
1253 isc_buffer_putmem(&buffer, t->rdata.data, t->rdata.length);
1254
1255 rrcount++;
1256 }
1257
1258 isc_buffer_usedregion(&buffer, &used);
1259 INSIST(used.length == size);
1260
1261 j->x.pos[1].offset += used.length;
1262 j->x.n_rr = rrcount;
1263
1264 /*
1265 * Write the buffer contents to the journal file.
1266 */
1267 CHECK(journal_write(j, used.base, used.length));
1268
1269 result = ISC_R_SUCCESS;
1270
1271 failure:
1272 if (mem != NULL) {
1273 isc_mem_put(j->mctx, mem, size);
1274 }
1275 return (result);
1276 }
1277
1278 isc_result_t
dns_journal_commit(dns_journal_t * j)1279 dns_journal_commit(dns_journal_t *j) {
1280 isc_result_t result;
1281 journal_rawheader_t rawheader;
1282 uint64_t total;
1283
1284 REQUIRE(DNS_JOURNAL_VALID(j));
1285 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION ||
1286 j->state == JOURNAL_STATE_INLINE);
1287
1288 /*
1289 * Just write out a updated header.
1290 */
1291 if (j->state == JOURNAL_STATE_INLINE) {
1292 CHECK(journal_fsync(j));
1293 journal_header_encode(&j->header, &rawheader);
1294 CHECK(journal_seek(j, 0));
1295 CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
1296 CHECK(journal_fsync(j));
1297 j->state = JOURNAL_STATE_WRITE;
1298 return (ISC_R_SUCCESS);
1299 }
1300
1301 /*
1302 * Perform some basic consistency checks.
1303 */
1304 if (j->x.n_soa != 2) {
1305 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1306 "%s: malformed transaction: %d SOAs", j->filename,
1307 j->x.n_soa);
1308 return (ISC_R_UNEXPECTED);
1309 }
1310 if (!DNS_SERIAL_GT(j->x.pos[1].serial, j->x.pos[0].serial)) {
1311 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1312 "%s: malformed transaction: serial number "
1313 "did not increase",
1314 j->filename);
1315 return (ISC_R_UNEXPECTED);
1316 }
1317 if (!JOURNAL_EMPTY(&j->header)) {
1318 if (j->x.pos[0].serial != j->header.end.serial) {
1319 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1320 "malformed transaction: "
1321 "%s last serial %u != "
1322 "transaction first serial %u",
1323 j->filename, j->header.end.serial,
1324 j->x.pos[0].serial);
1325 return (ISC_R_UNEXPECTED);
1326 }
1327 }
1328
1329 /*
1330 * We currently don't support huge journal entries.
1331 */
1332 total = j->x.pos[1].offset - j->x.pos[0].offset;
1333 if (total >= DNS_JOURNAL_SIZE_MAX) {
1334 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1335 "transaction too big to be stored in journal: "
1336 "%" PRIu64 "b (max is %" PRIu64 "b)",
1337 total, (uint64_t)DNS_JOURNAL_SIZE_MAX);
1338 return (ISC_R_UNEXPECTED);
1339 }
1340
1341 /*
1342 * Some old journal entries may become non-addressable
1343 * when we increment the current serial number. Purge them
1344 * by stepping header.begin forward to the first addressable
1345 * transaction. Also purge them from the index.
1346 */
1347 if (!JOURNAL_EMPTY(&j->header)) {
1348 while (!DNS_SERIAL_GT(j->x.pos[1].serial,
1349 j->header.begin.serial)) {
1350 CHECK(journal_next(j, &j->header.begin));
1351 }
1352 index_invalidate(j, j->x.pos[1].serial);
1353 }
1354 #ifdef notyet
1355 if (DNS_SERIAL_GT(last_dumped_serial, j->x.pos[1].serial)) {
1356 force_dump(...);
1357 }
1358 #endif /* ifdef notyet */
1359
1360 /*
1361 * Commit the transaction data to stable storage.
1362 */
1363 CHECK(journal_fsync(j));
1364
1365 if (j->state == JOURNAL_STATE_TRANSACTION) {
1366 isc_offset_t offset;
1367 offset = (j->x.pos[1].offset - j->x.pos[0].offset) -
1368 (j->header_ver1 ? sizeof(journal_rawxhdr_ver1_t)
1369 : sizeof(journal_rawxhdr_t));
1370 /*
1371 * Update the transaction header.
1372 */
1373 CHECK(journal_seek(j, j->x.pos[0].offset));
1374 CHECK(journal_write_xhdr(j, offset, j->x.n_rr,
1375 j->x.pos[0].serial,
1376 j->x.pos[1].serial));
1377 }
1378
1379 /*
1380 * Update the journal header.
1381 */
1382 if (JOURNAL_EMPTY(&j->header)) {
1383 j->header.begin = j->x.pos[0];
1384 }
1385 j->header.end = j->x.pos[1];
1386 journal_header_encode(&j->header, &rawheader);
1387 CHECK(journal_seek(j, 0));
1388 CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
1389
1390 /*
1391 * Update the index.
1392 */
1393 index_add(j, &j->x.pos[0]);
1394
1395 /*
1396 * Convert the index into on-disk format and write
1397 * it to disk.
1398 */
1399 CHECK(index_to_disk(j));
1400
1401 /*
1402 * Commit the header to stable storage.
1403 */
1404 CHECK(journal_fsync(j));
1405
1406 /*
1407 * We no longer have a transaction open.
1408 */
1409 j->state = JOURNAL_STATE_WRITE;
1410
1411 result = ISC_R_SUCCESS;
1412
1413 failure:
1414 return (result);
1415 }
1416
1417 isc_result_t
dns_journal_write_transaction(dns_journal_t * j,dns_diff_t * diff)1418 dns_journal_write_transaction(dns_journal_t *j, dns_diff_t *diff) {
1419 isc_result_t result;
1420
1421 CHECK(dns_diff_sort(diff, ixfr_order));
1422 CHECK(dns_journal_begin_transaction(j));
1423 CHECK(dns_journal_writediff(j, diff));
1424 CHECK(dns_journal_commit(j));
1425 result = ISC_R_SUCCESS;
1426 failure:
1427 return (result);
1428 }
1429
1430 void
dns_journal_destroy(dns_journal_t ** journalp)1431 dns_journal_destroy(dns_journal_t **journalp) {
1432 dns_journal_t *j = NULL;
1433
1434 REQUIRE(journalp != NULL);
1435 REQUIRE(DNS_JOURNAL_VALID(*journalp));
1436
1437 j = *journalp;
1438 *journalp = NULL;
1439
1440 j->it.result = ISC_R_FAILURE;
1441 dns_name_invalidate(&j->it.name);
1442 dns_decompress_invalidate(&j->it.dctx);
1443 if (j->rawindex != NULL) {
1444 isc_mem_put(j->mctx, j->rawindex,
1445 j->header.index_size * sizeof(journal_rawpos_t));
1446 }
1447 if (j->index != NULL) {
1448 isc_mem_put(j->mctx, j->index,
1449 j->header.index_size * sizeof(journal_pos_t));
1450 }
1451 if (j->it.target.base != NULL) {
1452 isc_mem_put(j->mctx, j->it.target.base, j->it.target.length);
1453 }
1454 if (j->it.source.base != NULL) {
1455 isc_mem_put(j->mctx, j->it.source.base, j->it.source.length);
1456 }
1457 if (j->filename != NULL) {
1458 isc_mem_free(j->mctx, j->filename);
1459 }
1460 if (j->fp != NULL) {
1461 (void)isc_stdio_close(j->fp);
1462 }
1463 j->magic = 0;
1464 isc_mem_putanddetach(&j->mctx, j, sizeof(*j));
1465 }
1466
1467 /*
1468 * Roll the open journal 'j' into the database 'db'.
1469 * A new database version will be created.
1470 */
1471
1472 /* XXX Share code with incoming IXFR? */
1473
1474 isc_result_t
dns_journal_rollforward(dns_journal_t * j,dns_db_t * db,unsigned int options)1475 dns_journal_rollforward(dns_journal_t *j, dns_db_t *db, unsigned int options) {
1476 isc_buffer_t source; /* Transaction data from disk */
1477 isc_buffer_t target; /* Ditto after _fromwire check */
1478 uint32_t db_serial; /* Database SOA serial */
1479 uint32_t end_serial; /* Last journal SOA serial */
1480 isc_result_t result;
1481 dns_dbversion_t *ver = NULL;
1482 journal_pos_t pos;
1483 dns_diff_t diff;
1484 unsigned int n_soa = 0;
1485 unsigned int n_put = 0;
1486 dns_diffop_t op;
1487
1488 REQUIRE(DNS_JOURNAL_VALID(j));
1489 REQUIRE(DNS_DB_VALID(db));
1490
1491 dns_diff_init(j->mctx, &diff);
1492
1493 /*
1494 * Set up empty initial buffers for unchecked and checked
1495 * wire format transaction data. They will be reallocated
1496 * later.
1497 */
1498 isc_buffer_init(&source, NULL, 0);
1499 isc_buffer_init(&target, NULL, 0);
1500
1501 /*
1502 * Create the new database version.
1503 */
1504 CHECK(dns_db_newversion(db, &ver));
1505
1506 /*
1507 * Get the current database SOA serial number.
1508 */
1509 CHECK(dns_db_getsoaserial(db, ver, &db_serial));
1510
1511 /*
1512 * Locate a journal entry for the current database serial.
1513 */
1514 CHECK(journal_find(j, db_serial, &pos));
1515
1516 end_serial = dns_journal_last_serial(j);
1517
1518 /*
1519 * If we're reading a version 1 file, scan all the transactions
1520 * to see if the journal needs rewriting: if any outdated
1521 * transaction headers are found, j->recovered will be set.
1522 */
1523 if (j->header_ver1) {
1524 uint32_t start_serial = dns_journal_first_serial(j);
1525
1526 CHECK(dns_journal_iter_init(j, start_serial, db_serial, NULL));
1527 for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS;
1528 result = dns_journal_next_rr(j))
1529 {
1530 continue;
1531 }
1532 }
1533
1534 if (db_serial == end_serial) {
1535 CHECK(DNS_R_UPTODATE);
1536 }
1537
1538 CHECK(dns_journal_iter_init(j, db_serial, end_serial, NULL));
1539 for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS;
1540 result = dns_journal_next_rr(j))
1541 {
1542 dns_name_t *name = NULL;
1543 dns_rdata_t *rdata = NULL;
1544 dns_difftuple_t *tuple = NULL;
1545 uint32_t ttl;
1546
1547 dns_journal_current_rr(j, &name, &ttl, &rdata);
1548
1549 if (rdata->type == dns_rdatatype_soa) {
1550 n_soa++;
1551 if (n_soa == 2) {
1552 db_serial = j->it.current_serial;
1553 }
1554 }
1555
1556 if (n_soa == 3) {
1557 n_soa = 1;
1558 }
1559 if (n_soa == 0) {
1560 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1561 "%s: journal file corrupt: missing "
1562 "initial SOA",
1563 j->filename);
1564 FAIL(ISC_R_UNEXPECTED);
1565 }
1566 if ((options & DNS_JOURNALOPT_RESIGN) != 0) {
1567 op = (n_soa == 1) ? DNS_DIFFOP_DELRESIGN
1568 : DNS_DIFFOP_ADDRESIGN;
1569 } else {
1570 op = (n_soa == 1) ? DNS_DIFFOP_DEL : DNS_DIFFOP_ADD;
1571 }
1572
1573 CHECK(dns_difftuple_create(diff.mctx, op, name, ttl, rdata,
1574 &tuple));
1575 dns_diff_append(&diff, &tuple);
1576
1577 if (++n_put > 100) {
1578 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
1579 "%s: applying diff to database (%u)",
1580 j->filename, db_serial);
1581 (void)dns_diff_print(&diff, NULL);
1582 CHECK(dns_diff_apply(&diff, db, ver));
1583 dns_diff_clear(&diff);
1584 n_put = 0;
1585 }
1586 }
1587 if (result == ISC_R_NOMORE) {
1588 result = ISC_R_SUCCESS;
1589 }
1590 CHECK(result);
1591
1592 if (n_put != 0) {
1593 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
1594 "%s: applying final diff to database (%u)",
1595 j->filename, db_serial);
1596 (void)dns_diff_print(&diff, NULL);
1597 CHECK(dns_diff_apply(&diff, db, ver));
1598 dns_diff_clear(&diff);
1599 }
1600
1601 failure:
1602 if (ver != NULL) {
1603 dns_db_closeversion(db, &ver,
1604 result == ISC_R_SUCCESS ? true : false);
1605 }
1606
1607 if (source.base != NULL) {
1608 isc_mem_put(j->mctx, source.base, source.length);
1609 }
1610 if (target.base != NULL) {
1611 isc_mem_put(j->mctx, target.base, target.length);
1612 }
1613
1614 dns_diff_clear(&diff);
1615
1616 INSIST(ver == NULL);
1617
1618 return (result);
1619 }
1620
1621 isc_result_t
dns_journal_print(isc_mem_t * mctx,uint32_t flags,const char * filename,FILE * file)1622 dns_journal_print(isc_mem_t *mctx, uint32_t flags, const char *filename,
1623 FILE *file) {
1624 dns_journal_t *j = NULL;
1625 isc_buffer_t source; /* Transaction data from disk */
1626 isc_buffer_t target; /* Ditto after _fromwire check */
1627 uint32_t start_serial; /* Database SOA serial */
1628 uint32_t end_serial; /* Last journal SOA serial */
1629 isc_result_t result;
1630 dns_diff_t diff;
1631 unsigned int n_soa = 0;
1632 unsigned int n_put = 0;
1633 bool printxhdr = ((flags & DNS_JOURNAL_PRINTXHDR) != 0);
1634
1635 REQUIRE(filename != NULL);
1636
1637 result = dns_journal_open(mctx, filename, DNS_JOURNAL_READ, &j);
1638 if (result == ISC_R_NOTFOUND) {
1639 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file");
1640 return (DNS_R_NOJOURNAL);
1641 } else if (result != ISC_R_SUCCESS) {
1642 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1643 "journal open failure: %s: %s",
1644 isc_result_totext(result), filename);
1645 return (result);
1646 }
1647
1648 if (printxhdr) {
1649 fprintf(file, "Journal format = %sHeader version = %d\n",
1650 j->header.format + 1, j->header_ver1 ? 1 : 2);
1651 fprintf(file, "Start serial = %u\n", j->header.begin.serial);
1652 fprintf(file, "End serial = %u\n", j->header.end.serial);
1653 fprintf(file, "Index (size = %u):\n", j->header.index_size);
1654 for (uint32_t i = 0; i < j->header.index_size; i++) {
1655 if (j->index[i].offset == 0) {
1656 fputc('\n', file);
1657 break;
1658 }
1659 fprintf(file, "%lld", (long long)j->index[i].offset);
1660 fputc((i + 1) % 8 == 0 ? '\n' : ' ', file);
1661 }
1662 }
1663 if (j->header.serialset) {
1664 fprintf(file, "Source serial = %u\n", j->header.sourceserial);
1665 }
1666 dns_diff_init(j->mctx, &diff);
1667
1668 /*
1669 * Set up empty initial buffers for unchecked and checked
1670 * wire format transaction data. They will be reallocated
1671 * later.
1672 */
1673 isc_buffer_init(&source, NULL, 0);
1674 isc_buffer_init(&target, NULL, 0);
1675
1676 start_serial = dns_journal_first_serial(j);
1677 end_serial = dns_journal_last_serial(j);
1678
1679 CHECK(dns_journal_iter_init(j, start_serial, end_serial, NULL));
1680
1681 for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS;
1682 result = dns_journal_next_rr(j))
1683 {
1684 dns_name_t *name = NULL;
1685 dns_rdata_t *rdata = NULL;
1686 dns_difftuple_t *tuple = NULL;
1687 static uint32_t i = 0;
1688 bool print = false;
1689 uint32_t ttl;
1690
1691 dns_journal_current_rr(j, &name, &ttl, &rdata);
1692
1693 if (rdata->type == dns_rdatatype_soa) {
1694 n_soa++;
1695 if (n_soa == 3) {
1696 n_soa = 1;
1697 }
1698 if (n_soa == 1) {
1699 print = printxhdr;
1700 }
1701 }
1702 if (n_soa == 0) {
1703 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1704 "%s: journal file corrupt: missing "
1705 "initial SOA",
1706 j->filename);
1707 FAIL(ISC_R_UNEXPECTED);
1708 }
1709
1710 if (print) {
1711 fprintf(file,
1712 "Transaction: version %d offset %lld size %u "
1713 "rrcount %u start %u end %u\n",
1714 j->xhdr_version, (long long)j->it.cpos.offset,
1715 j->curxhdr.size, j->curxhdr.count,
1716 j->curxhdr.serial0, j->curxhdr.serial1);
1717 if (j->it.cpos.offset > j->index[i].offset) {
1718 fprintf(file,
1719 "ERROR: Offset mismatch, "
1720 "expected %lld\n",
1721 (long long)j->index[i].offset);
1722 } else if (j->it.cpos.offset == j->index[i].offset) {
1723 i++;
1724 }
1725 }
1726 CHECK(dns_difftuple_create(
1727 diff.mctx, n_soa == 1 ? DNS_DIFFOP_DEL : DNS_DIFFOP_ADD,
1728 name, ttl, rdata, &tuple));
1729 dns_diff_append(&diff, &tuple);
1730
1731 if (++n_put > 100 || printxhdr) {
1732 result = dns_diff_print(&diff, file);
1733 dns_diff_clear(&diff);
1734 n_put = 0;
1735 if (result != ISC_R_SUCCESS) {
1736 break;
1737 }
1738 }
1739 }
1740 if (result == ISC_R_NOMORE) {
1741 result = ISC_R_SUCCESS;
1742 }
1743 CHECK(result);
1744
1745 if (n_put != 0) {
1746 result = dns_diff_print(&diff, file);
1747 dns_diff_clear(&diff);
1748 }
1749 goto cleanup;
1750
1751 failure:
1752 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1753 "%s: cannot print: journal file corrupt", j->filename);
1754
1755 cleanup:
1756 if (source.base != NULL) {
1757 isc_mem_put(j->mctx, source.base, source.length);
1758 }
1759 if (target.base != NULL) {
1760 isc_mem_put(j->mctx, target.base, target.length);
1761 }
1762
1763 dns_diff_clear(&diff);
1764 dns_journal_destroy(&j);
1765
1766 return (result);
1767 }
1768
1769 /**************************************************************************/
1770 /*
1771 * Miscellaneous accessors.
1772 */
1773 bool
dns_journal_empty(dns_journal_t * j)1774 dns_journal_empty(dns_journal_t *j) {
1775 return (JOURNAL_EMPTY(&j->header));
1776 }
1777
1778 bool
dns_journal_recovered(dns_journal_t * j)1779 dns_journal_recovered(dns_journal_t *j) {
1780 return (j->recovered);
1781 }
1782
1783 uint32_t
dns_journal_first_serial(dns_journal_t * j)1784 dns_journal_first_serial(dns_journal_t *j) {
1785 return (j->header.begin.serial);
1786 }
1787
1788 uint32_t
dns_journal_last_serial(dns_journal_t * j)1789 dns_journal_last_serial(dns_journal_t *j) {
1790 return (j->header.end.serial);
1791 }
1792
1793 void
dns_journal_set_sourceserial(dns_journal_t * j,uint32_t sourceserial)1794 dns_journal_set_sourceserial(dns_journal_t *j, uint32_t sourceserial) {
1795 REQUIRE(j->state == JOURNAL_STATE_WRITE ||
1796 j->state == JOURNAL_STATE_INLINE ||
1797 j->state == JOURNAL_STATE_TRANSACTION);
1798
1799 j->header.sourceserial = sourceserial;
1800 j->header.serialset = true;
1801 if (j->state == JOURNAL_STATE_WRITE) {
1802 j->state = JOURNAL_STATE_INLINE;
1803 }
1804 }
1805
1806 bool
dns_journal_get_sourceserial(dns_journal_t * j,uint32_t * sourceserial)1807 dns_journal_get_sourceserial(dns_journal_t *j, uint32_t *sourceserial) {
1808 REQUIRE(sourceserial != NULL);
1809
1810 if (!j->header.serialset) {
1811 return (false);
1812 }
1813 *sourceserial = j->header.sourceserial;
1814 return (true);
1815 }
1816
1817 /**************************************************************************/
1818 /*
1819 * Iteration support.
1820 *
1821 * When serving an outgoing IXFR, we transmit a part the journal starting
1822 * at the serial number in the IXFR request and ending at the serial
1823 * number that is current when the IXFR request arrives. The ending
1824 * serial number is not necessarily at the end of the journal:
1825 * the journal may grow while the IXFR is in progress, but we stop
1826 * when we reach the serial number that was current when the IXFR started.
1827 */
1828
1829 static isc_result_t
1830 read_one_rr(dns_journal_t *j);
1831
1832 /*
1833 * Make sure the buffer 'b' is has at least 'size' bytes
1834 * allocated, and clear it.
1835 *
1836 * Requires:
1837 * Either b->base is NULL, or it points to b->length bytes of memory
1838 * previously allocated by isc_mem_get().
1839 */
1840
1841 static isc_result_t
size_buffer(isc_mem_t * mctx,isc_buffer_t * b,unsigned size)1842 size_buffer(isc_mem_t *mctx, isc_buffer_t *b, unsigned size) {
1843 if (b->length < size) {
1844 void *mem = isc_mem_get(mctx, size);
1845 if (mem == NULL) {
1846 return (ISC_R_NOMEMORY);
1847 }
1848 if (b->base != NULL) {
1849 isc_mem_put(mctx, b->base, b->length);
1850 }
1851 b->base = mem;
1852 b->length = size;
1853 }
1854 isc_buffer_clear(b);
1855 return (ISC_R_SUCCESS);
1856 }
1857
1858 isc_result_t
dns_journal_iter_init(dns_journal_t * j,uint32_t begin_serial,uint32_t end_serial,size_t * xfrsizep)1859 dns_journal_iter_init(dns_journal_t *j, uint32_t begin_serial,
1860 uint32_t end_serial, size_t *xfrsizep) {
1861 isc_result_t result;
1862
1863 CHECK(journal_find(j, begin_serial, &j->it.bpos));
1864 INSIST(j->it.bpos.serial == begin_serial);
1865
1866 CHECK(journal_find(j, end_serial, &j->it.epos));
1867 INSIST(j->it.epos.serial == end_serial);
1868
1869 if (xfrsizep != NULL) {
1870 journal_pos_t pos = j->it.bpos;
1871 journal_xhdr_t xhdr;
1872 uint64_t size = 0;
1873 uint32_t count = 0;
1874
1875 /*
1876 * We already know the beginning and ending serial
1877 * numbers are in the journal. Scan through them,
1878 * adding up sizes and RR counts so we can calculate
1879 * the IXFR size.
1880 */
1881 do {
1882 CHECK(journal_seek(j, pos.offset));
1883 CHECK(journal_read_xhdr(j, &xhdr));
1884
1885 if (j->header_ver1) {
1886 CHECK(maybe_fixup_xhdr(j, &xhdr, pos.serial,
1887 pos.offset));
1888 }
1889
1890 /*
1891 * Check that xhdr is consistent.
1892 */
1893 if (xhdr.serial0 != pos.serial ||
1894 isc_serial_le(xhdr.serial1, xhdr.serial0)) {
1895 CHECK(ISC_R_UNEXPECTED);
1896 }
1897
1898 size += xhdr.size;
1899 count += xhdr.count;
1900
1901 result = journal_next(j, &pos);
1902 if (result == ISC_R_NOMORE) {
1903 result = ISC_R_SUCCESS;
1904 }
1905 CHECK(result);
1906 } while (pos.serial != end_serial);
1907
1908 /*
1909 * For each RR, subtract the length of the RR header,
1910 * as this would not be present in IXFR messages.
1911 * (We don't need to worry about the transaction header
1912 * because that was already excluded from xdr.size.)
1913 */
1914 *xfrsizep = size - (count * sizeof(journal_rawrrhdr_t));
1915 }
1916
1917 result = ISC_R_SUCCESS;
1918 failure:
1919 j->it.result = result;
1920 return (j->it.result);
1921 }
1922
1923 isc_result_t
dns_journal_first_rr(dns_journal_t * j)1924 dns_journal_first_rr(dns_journal_t *j) {
1925 isc_result_t result;
1926
1927 /*
1928 * Seek to the beginning of the first transaction we are
1929 * interested in.
1930 */
1931 CHECK(journal_seek(j, j->it.bpos.offset));
1932 j->it.current_serial = j->it.bpos.serial;
1933
1934 j->it.xsize = 0; /* We have no transaction data yet... */
1935 j->it.xpos = 0; /* ...and haven't used any of it. */
1936
1937 return (read_one_rr(j));
1938
1939 failure:
1940 return (result);
1941 }
1942
1943 static isc_result_t
read_one_rr(dns_journal_t * j)1944 read_one_rr(dns_journal_t *j) {
1945 isc_result_t result;
1946 dns_rdatatype_t rdtype;
1947 dns_rdataclass_t rdclass;
1948 unsigned int rdlen;
1949 uint32_t ttl;
1950 journal_xhdr_t xhdr;
1951 journal_rrhdr_t rrhdr;
1952 dns_journal_t save = *j;
1953
1954 if (j->offset > j->it.epos.offset) {
1955 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1956 "%s: journal corrupt: possible integer overflow",
1957 j->filename);
1958 return (ISC_R_UNEXPECTED);
1959 }
1960 if (j->offset == j->it.epos.offset) {
1961 return (ISC_R_NOMORE);
1962 }
1963 if (j->it.xpos == j->it.xsize) {
1964 /*
1965 * We are at a transaction boundary.
1966 * Read another transaction header.
1967 */
1968 CHECK(journal_read_xhdr(j, &xhdr));
1969 if (xhdr.size == 0) {
1970 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1971 "%s: journal corrupt: empty transaction",
1972 j->filename);
1973 FAIL(ISC_R_UNEXPECTED);
1974 }
1975
1976 if (j->header_ver1) {
1977 CHECK(maybe_fixup_xhdr(j, &xhdr, j->it.current_serial,
1978 save.offset));
1979 }
1980
1981 if (xhdr.serial0 != j->it.current_serial ||
1982 isc_serial_le(xhdr.serial1, xhdr.serial0))
1983 {
1984 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1985 "%s: journal file corrupt: "
1986 "expected serial %u, got %u",
1987 j->filename, j->it.current_serial,
1988 xhdr.serial0);
1989 FAIL(ISC_R_UNEXPECTED);
1990 }
1991
1992 j->it.xsize = xhdr.size;
1993 j->it.xpos = 0;
1994 }
1995 /*
1996 * Read an RR.
1997 */
1998 CHECK(journal_read_rrhdr(j, &rrhdr));
1999 /*
2000 * Perform a sanity check on the journal RR size.
2001 * The smallest possible RR has a 1-byte owner name
2002 * and a 10-byte header. The largest possible
2003 * RR has 65535 bytes of data, a header, and a maximum-
2004 * size owner name, well below 70 k total.
2005 */
2006 if (rrhdr.size < 1 + 10 || rrhdr.size > 70000) {
2007 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
2008 "%s: journal corrupt: impossible RR size "
2009 "(%d bytes)",
2010 j->filename, rrhdr.size);
2011 FAIL(ISC_R_UNEXPECTED);
2012 }
2013
2014 CHECK(size_buffer(j->mctx, &j->it.source, rrhdr.size));
2015 CHECK(journal_read(j, j->it.source.base, rrhdr.size));
2016 isc_buffer_add(&j->it.source, rrhdr.size);
2017
2018 /*
2019 * The target buffer is made the same size
2020 * as the source buffer, with the assumption that when
2021 * no compression in present, the output of dns_*_fromwire()
2022 * is no larger than the input.
2023 */
2024 CHECK(size_buffer(j->mctx, &j->it.target, rrhdr.size));
2025
2026 /*
2027 * Parse the owner name. We don't know where it
2028 * ends yet, so we make the entire "remaining"
2029 * part of the buffer "active".
2030 */
2031 isc_buffer_setactive(&j->it.source,
2032 j->it.source.used - j->it.source.current);
2033 CHECK(dns_name_fromwire(&j->it.name, &j->it.source, &j->it.dctx, 0,
2034 &j->it.target));
2035
2036 /*
2037 * Check that the RR header is there, and parse it.
2038 */
2039 if (isc_buffer_remaininglength(&j->it.source) < 10) {
2040 FAIL(DNS_R_FORMERR);
2041 }
2042
2043 rdtype = isc_buffer_getuint16(&j->it.source);
2044 rdclass = isc_buffer_getuint16(&j->it.source);
2045 ttl = isc_buffer_getuint32(&j->it.source);
2046 rdlen = isc_buffer_getuint16(&j->it.source);
2047
2048 if (rdlen > DNS_RDATA_MAXLENGTH) {
2049 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
2050 "%s: journal corrupt: impossible rdlen "
2051 "(%u bytes)",
2052 j->filename, rdlen);
2053 FAIL(ISC_R_FAILURE);
2054 }
2055
2056 /*
2057 * Parse the rdata.
2058 */
2059 if (isc_buffer_remaininglength(&j->it.source) != rdlen) {
2060 FAIL(DNS_R_FORMERR);
2061 }
2062 isc_buffer_setactive(&j->it.source, rdlen);
2063 dns_rdata_reset(&j->it.rdata);
2064 CHECK(dns_rdata_fromwire(&j->it.rdata, rdclass, rdtype, &j->it.source,
2065 &j->it.dctx, 0, &j->it.target));
2066 j->it.ttl = ttl;
2067
2068 j->it.xpos += sizeof(journal_rawrrhdr_t) + rrhdr.size;
2069 if (rdtype == dns_rdatatype_soa) {
2070 /* XXX could do additional consistency checks here */
2071 j->it.current_serial = dns_soa_getserial(&j->it.rdata);
2072 }
2073
2074 result = ISC_R_SUCCESS;
2075
2076 failure:
2077 j->it.result = result;
2078 return (result);
2079 }
2080
2081 isc_result_t
dns_journal_next_rr(dns_journal_t * j)2082 dns_journal_next_rr(dns_journal_t *j) {
2083 j->it.result = read_one_rr(j);
2084 return (j->it.result);
2085 }
2086
2087 void
dns_journal_current_rr(dns_journal_t * j,dns_name_t ** name,uint32_t * ttl,dns_rdata_t ** rdata)2088 dns_journal_current_rr(dns_journal_t *j, dns_name_t **name, uint32_t *ttl,
2089 dns_rdata_t **rdata) {
2090 REQUIRE(j->it.result == ISC_R_SUCCESS);
2091 *name = &j->it.name;
2092 *ttl = j->it.ttl;
2093 *rdata = &j->it.rdata;
2094 }
2095
2096 /**************************************************************************/
2097 /*
2098 * Generating diffs from databases
2099 */
2100
2101 /*
2102 * Construct a diff containing all the RRs at the current name of the
2103 * database iterator 'dbit' in database 'db', version 'ver'.
2104 * Set '*name' to the current name, and append the diff to 'diff'.
2105 * All new tuples will have the operation 'op'.
2106 *
2107 * Requires: 'name' must have buffer large enough to hold the name.
2108 * Typically, a dns_fixedname_t would be used.
2109 */
2110 static isc_result_t
get_name_diff(dns_db_t * db,dns_dbversion_t * ver,isc_stdtime_t now,dns_dbiterator_t * dbit,dns_name_t * name,dns_diffop_t op,dns_diff_t * diff)2111 get_name_diff(dns_db_t *db, dns_dbversion_t *ver, isc_stdtime_t now,
2112 dns_dbiterator_t *dbit, dns_name_t *name, dns_diffop_t op,
2113 dns_diff_t *diff) {
2114 isc_result_t result;
2115 dns_dbnode_t *node = NULL;
2116 dns_rdatasetiter_t *rdsiter = NULL;
2117 dns_difftuple_t *tuple = NULL;
2118
2119 result = dns_dbiterator_current(dbit, &node, name);
2120 if (result != ISC_R_SUCCESS) {
2121 return (result);
2122 }
2123
2124 result = dns_db_allrdatasets(db, node, ver, now, &rdsiter);
2125 if (result != ISC_R_SUCCESS) {
2126 goto cleanup_node;
2127 }
2128
2129 for (result = dns_rdatasetiter_first(rdsiter); result == ISC_R_SUCCESS;
2130 result = dns_rdatasetiter_next(rdsiter))
2131 {
2132 dns_rdataset_t rdataset;
2133
2134 dns_rdataset_init(&rdataset);
2135 dns_rdatasetiter_current(rdsiter, &rdataset);
2136
2137 for (result = dns_rdataset_first(&rdataset);
2138 result == ISC_R_SUCCESS;
2139 result = dns_rdataset_next(&rdataset))
2140 {
2141 dns_rdata_t rdata = DNS_RDATA_INIT;
2142 dns_rdataset_current(&rdataset, &rdata);
2143 result = dns_difftuple_create(diff->mctx, op, name,
2144 rdataset.ttl, &rdata,
2145 &tuple);
2146 if (result != ISC_R_SUCCESS) {
2147 dns_rdataset_disassociate(&rdataset);
2148 goto cleanup_iterator;
2149 }
2150 dns_diff_append(diff, &tuple);
2151 }
2152 dns_rdataset_disassociate(&rdataset);
2153 if (result != ISC_R_NOMORE) {
2154 goto cleanup_iterator;
2155 }
2156 }
2157 if (result != ISC_R_NOMORE) {
2158 goto cleanup_iterator;
2159 }
2160
2161 result = ISC_R_SUCCESS;
2162
2163 cleanup_iterator:
2164 dns_rdatasetiter_destroy(&rdsiter);
2165
2166 cleanup_node:
2167 dns_db_detachnode(db, &node);
2168
2169 return (result);
2170 }
2171
2172 /*
2173 * Comparison function for use by dns_diff_subtract when sorting
2174 * the diffs to be subtracted. The sort keys are the rdata type
2175 * and the rdata itself. The owner name is ignored, because
2176 * it is known to be the same for all tuples.
2177 */
2178 static int
rdata_order(const void * av,const void * bv)2179 rdata_order(const void *av, const void *bv) {
2180 dns_difftuple_t const *const *ap = av;
2181 dns_difftuple_t const *const *bp = bv;
2182 dns_difftuple_t const *a = *ap;
2183 dns_difftuple_t const *b = *bp;
2184 int r;
2185 r = (b->rdata.type - a->rdata.type);
2186 if (r != 0) {
2187 return (r);
2188 }
2189 r = dns_rdata_compare(&a->rdata, &b->rdata);
2190 return (r);
2191 }
2192
2193 static isc_result_t
dns_diff_subtract(dns_diff_t diff[2],dns_diff_t * r)2194 dns_diff_subtract(dns_diff_t diff[2], dns_diff_t *r) {
2195 isc_result_t result;
2196 dns_difftuple_t *p[2];
2197 int i, t;
2198 bool append;
2199 dns_difftuplelist_t add, del;
2200
2201 CHECK(dns_diff_sort(&diff[0], rdata_order));
2202 CHECK(dns_diff_sort(&diff[1], rdata_order));
2203 ISC_LIST_INIT(add);
2204 ISC_LIST_INIT(del);
2205
2206 for (;;) {
2207 p[0] = ISC_LIST_HEAD(diff[0].tuples);
2208 p[1] = ISC_LIST_HEAD(diff[1].tuples);
2209 if (p[0] == NULL && p[1] == NULL) {
2210 break;
2211 }
2212
2213 for (i = 0; i < 2; i++) {
2214 if (p[!i] == NULL) {
2215 dns_difftuplelist_t *l = (i == 0) ? &add : &del;
2216 ISC_LIST_UNLINK(diff[i].tuples, p[i], link);
2217 ISC_LIST_APPEND(*l, p[i], link);
2218 goto next;
2219 }
2220 }
2221 t = rdata_order(&p[0], &p[1]);
2222 if (t < 0) {
2223 ISC_LIST_UNLINK(diff[0].tuples, p[0], link);
2224 ISC_LIST_APPEND(add, p[0], link);
2225 goto next;
2226 }
2227 if (t > 0) {
2228 ISC_LIST_UNLINK(diff[1].tuples, p[1], link);
2229 ISC_LIST_APPEND(del, p[1], link);
2230 goto next;
2231 }
2232 INSIST(t == 0);
2233 /*
2234 * Identical RRs in both databases; skip them both
2235 * if the ttl differs.
2236 */
2237 append = (p[0]->ttl != p[1]->ttl);
2238 for (i = 0; i < 2; i++) {
2239 ISC_LIST_UNLINK(diff[i].tuples, p[i], link);
2240 if (append) {
2241 dns_difftuplelist_t *l = (i == 0) ? &add : &del;
2242 ISC_LIST_APPEND(*l, p[i], link);
2243 } else {
2244 dns_difftuple_free(&p[i]);
2245 }
2246 }
2247 next:;
2248 }
2249 ISC_LIST_APPENDLIST(r->tuples, del, link);
2250 ISC_LIST_APPENDLIST(r->tuples, add, link);
2251 result = ISC_R_SUCCESS;
2252 failure:
2253 return (result);
2254 }
2255
2256 static isc_result_t
diff_namespace(dns_db_t * dba,dns_dbversion_t * dbvera,dns_db_t * dbb,dns_dbversion_t * dbverb,unsigned int options,dns_diff_t * resultdiff)2257 diff_namespace(dns_db_t *dba, dns_dbversion_t *dbvera, dns_db_t *dbb,
2258 dns_dbversion_t *dbverb, unsigned int options,
2259 dns_diff_t *resultdiff) {
2260 dns_db_t *db[2];
2261 dns_dbversion_t *ver[2];
2262 dns_dbiterator_t *dbit[2] = { NULL, NULL };
2263 bool have[2] = { false, false };
2264 dns_fixedname_t fixname[2];
2265 isc_result_t result, itresult[2];
2266 dns_diff_t diff[2];
2267 int i, t;
2268
2269 db[0] = dba, db[1] = dbb;
2270 ver[0] = dbvera, ver[1] = dbverb;
2271
2272 dns_diff_init(resultdiff->mctx, &diff[0]);
2273 dns_diff_init(resultdiff->mctx, &diff[1]);
2274
2275 dns_fixedname_init(&fixname[0]);
2276 dns_fixedname_init(&fixname[1]);
2277
2278 result = dns_db_createiterator(db[0], options, &dbit[0]);
2279 if (result != ISC_R_SUCCESS) {
2280 return (result);
2281 }
2282 result = dns_db_createiterator(db[1], options, &dbit[1]);
2283 if (result != ISC_R_SUCCESS) {
2284 goto cleanup_iterator;
2285 }
2286
2287 itresult[0] = dns_dbiterator_first(dbit[0]);
2288 itresult[1] = dns_dbiterator_first(dbit[1]);
2289
2290 for (;;) {
2291 for (i = 0; i < 2; i++) {
2292 if (!have[i] && itresult[i] == ISC_R_SUCCESS) {
2293 CHECK(get_name_diff(
2294 db[i], ver[i], 0, dbit[i],
2295 dns_fixedname_name(&fixname[i]),
2296 i == 0 ? DNS_DIFFOP_ADD
2297 : DNS_DIFFOP_DEL,
2298 &diff[i]));
2299 itresult[i] = dns_dbiterator_next(dbit[i]);
2300 have[i] = true;
2301 }
2302 }
2303
2304 if (!have[0] && !have[1]) {
2305 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
2306 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
2307 break;
2308 }
2309
2310 for (i = 0; i < 2; i++) {
2311 if (!have[!i]) {
2312 ISC_LIST_APPENDLIST(resultdiff->tuples,
2313 diff[i].tuples, link);
2314 INSIST(ISC_LIST_EMPTY(diff[i].tuples));
2315 have[i] = false;
2316 goto next;
2317 }
2318 }
2319
2320 t = dns_name_compare(dns_fixedname_name(&fixname[0]),
2321 dns_fixedname_name(&fixname[1]));
2322 if (t < 0) {
2323 ISC_LIST_APPENDLIST(resultdiff->tuples, diff[0].tuples,
2324 link);
2325 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
2326 have[0] = false;
2327 continue;
2328 }
2329 if (t > 0) {
2330 ISC_LIST_APPENDLIST(resultdiff->tuples, diff[1].tuples,
2331 link);
2332 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
2333 have[1] = false;
2334 continue;
2335 }
2336 INSIST(t == 0);
2337 CHECK(dns_diff_subtract(diff, resultdiff));
2338 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
2339 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
2340 have[0] = have[1] = false;
2341 next:;
2342 }
2343 if (itresult[0] != ISC_R_NOMORE) {
2344 FAIL(itresult[0]);
2345 }
2346 if (itresult[1] != ISC_R_NOMORE) {
2347 FAIL(itresult[1]);
2348 }
2349
2350 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
2351 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
2352
2353 failure:
2354 dns_dbiterator_destroy(&dbit[1]);
2355
2356 cleanup_iterator:
2357 dns_dbiterator_destroy(&dbit[0]);
2358 dns_diff_clear(&diff[0]);
2359 dns_diff_clear(&diff[1]);
2360 return (result);
2361 }
2362
2363 /*
2364 * Compare the databases 'dba' and 'dbb' and generate a journal
2365 * entry containing the changes to make 'dba' from 'dbb' (note
2366 * the order). This journal entry will consist of a single,
2367 * possibly very large transaction.
2368 */
2369 isc_result_t
dns_db_diff(isc_mem_t * mctx,dns_db_t * dba,dns_dbversion_t * dbvera,dns_db_t * dbb,dns_dbversion_t * dbverb,const char * filename)2370 dns_db_diff(isc_mem_t *mctx, dns_db_t *dba, dns_dbversion_t *dbvera,
2371 dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename) {
2372 isc_result_t result;
2373 dns_diff_t diff;
2374
2375 dns_diff_init(mctx, &diff);
2376
2377 result = dns_db_diffx(&diff, dba, dbvera, dbb, dbverb, filename);
2378
2379 dns_diff_clear(&diff);
2380
2381 return (result);
2382 }
2383
2384 isc_result_t
dns_db_diffx(dns_diff_t * diff,dns_db_t * dba,dns_dbversion_t * dbvera,dns_db_t * dbb,dns_dbversion_t * dbverb,const char * filename)2385 dns_db_diffx(dns_diff_t *diff, dns_db_t *dba, dns_dbversion_t *dbvera,
2386 dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename) {
2387 isc_result_t result;
2388 dns_journal_t *journal = NULL;
2389
2390 if (filename != NULL) {
2391 result = dns_journal_open(diff->mctx, filename,
2392 DNS_JOURNAL_CREATE, &journal);
2393 if (result != ISC_R_SUCCESS) {
2394 return (result);
2395 }
2396 }
2397
2398 CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NONSEC3, diff));
2399 CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NSEC3ONLY, diff));
2400
2401 if (journal != NULL) {
2402 if (ISC_LIST_EMPTY(diff->tuples)) {
2403 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no changes");
2404 } else {
2405 CHECK(dns_journal_write_transaction(journal, diff));
2406 }
2407 }
2408
2409 failure:
2410 if (journal != NULL) {
2411 dns_journal_destroy(&journal);
2412 }
2413 return (result);
2414 }
2415
2416 static uint32_t
rrcount(unsigned char * buf,unsigned int size)2417 rrcount(unsigned char *buf, unsigned int size) {
2418 isc_buffer_t b;
2419 uint32_t rrsize, count = 0;
2420
2421 isc_buffer_init(&b, buf, size);
2422 isc_buffer_add(&b, size);
2423 while (isc_buffer_remaininglength(&b) > 0) {
2424 rrsize = isc_buffer_getuint32(&b);
2425 INSIST(isc_buffer_remaininglength(&b) >= rrsize);
2426 isc_buffer_forward(&b, rrsize);
2427 count++;
2428 }
2429
2430 return (count);
2431 }
2432
2433 static bool
check_delta(unsigned char * buf,size_t size)2434 check_delta(unsigned char *buf, size_t size) {
2435 isc_buffer_t b;
2436 uint32_t rrsize;
2437
2438 isc_buffer_init(&b, buf, size);
2439 isc_buffer_add(&b, size);
2440 while (isc_buffer_remaininglength(&b) > 0) {
2441 if (isc_buffer_remaininglength(&b) < 4) {
2442 return (false);
2443 }
2444 rrsize = isc_buffer_getuint32(&b);
2445 /* "." + type + class + ttl + rdlen => 11U */
2446 if (rrsize < 11U || isc_buffer_remaininglength(&b) < rrsize) {
2447 return (false);
2448 }
2449 isc_buffer_forward(&b, rrsize);
2450 }
2451
2452 return (true);
2453 }
2454
2455 isc_result_t
dns_journal_compact(isc_mem_t * mctx,char * filename,uint32_t serial,uint32_t flags,uint32_t target_size)2456 dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial,
2457 uint32_t flags, uint32_t target_size) {
2458 unsigned int i;
2459 journal_pos_t best_guess;
2460 journal_pos_t current_pos;
2461 dns_journal_t *j1 = NULL;
2462 dns_journal_t *j2 = NULL;
2463 journal_rawheader_t rawheader;
2464 unsigned int len;
2465 size_t namelen;
2466 unsigned char *buf = NULL;
2467 unsigned int size = 0;
2468 isc_result_t result;
2469 unsigned int indexend;
2470 char newname[PATH_MAX];
2471 char backup[PATH_MAX];
2472 bool is_backup = false;
2473 bool rewrite = false;
2474 bool downgrade = false;
2475
2476 REQUIRE(filename != NULL);
2477
2478 namelen = strlen(filename);
2479 if (namelen > 4U && strcmp(filename + namelen - 4, ".jnl") == 0) {
2480 namelen -= 4;
2481 }
2482
2483 result = snprintf(newname, sizeof(newname), "%.*s.jnw", (int)namelen,
2484 filename);
2485 RUNTIME_CHECK(result < sizeof(newname));
2486
2487 result = snprintf(backup, sizeof(backup), "%.*s.jbk", (int)namelen,
2488 filename);
2489 RUNTIME_CHECK(result < sizeof(backup));
2490
2491 result = journal_open(mctx, filename, false, false, false, &j1);
2492 if (result == ISC_R_NOTFOUND) {
2493 is_backup = true;
2494 result = journal_open(mctx, backup, false, false, false, &j1);
2495 }
2496 if (result != ISC_R_SUCCESS) {
2497 return (result);
2498 }
2499
2500 /*
2501 * Always perform a re-write when processing a version 1 journal.
2502 */
2503 rewrite = j1->header_ver1;
2504
2505 /*
2506 * Check whether we need to rewrite the whole journal
2507 * file (for example, to upversion it).
2508 */
2509 if ((flags & DNS_JOURNAL_COMPACTALL) != 0) {
2510 if ((flags & DNS_JOURNAL_VERSION1) != 0) {
2511 downgrade = true;
2512 }
2513 rewrite = true;
2514 serial = dns_journal_first_serial(j1);
2515 } else if (JOURNAL_EMPTY(&j1->header)) {
2516 dns_journal_destroy(&j1);
2517 return (ISC_R_SUCCESS);
2518 }
2519
2520 if (DNS_SERIAL_GT(j1->header.begin.serial, serial) ||
2521 DNS_SERIAL_GT(serial, j1->header.end.serial))
2522 {
2523 dns_journal_destroy(&j1);
2524 return (ISC_R_RANGE);
2525 }
2526
2527 /*
2528 * Cope with very small target sizes.
2529 */
2530 indexend = sizeof(journal_rawheader_t) +
2531 j1->header.index_size * sizeof(journal_rawpos_t);
2532 if (target_size < DNS_JOURNAL_SIZE_MIN) {
2533 target_size = DNS_JOURNAL_SIZE_MIN;
2534 }
2535 if (target_size < indexend * 2) {
2536 target_size = target_size / 2 + indexend;
2537 }
2538
2539 /*
2540 * See if there is any work to do.
2541 */
2542 if (!rewrite && (uint32_t)j1->header.end.offset < target_size) {
2543 dns_journal_destroy(&j1);
2544 return (ISC_R_SUCCESS);
2545 }
2546
2547 CHECK(journal_open(mctx, newname, true, true, downgrade, &j2));
2548 CHECK(journal_seek(j2, indexend));
2549
2550 /*
2551 * Remove overhead so space test below can succeed.
2552 */
2553 if (target_size >= indexend) {
2554 target_size -= indexend;
2555 }
2556
2557 /*
2558 * Find if we can create enough free space.
2559 */
2560 best_guess = j1->header.begin;
2561 for (i = 0; i < j1->header.index_size; i++) {
2562 if (POS_VALID(j1->index[i]) &&
2563 DNS_SERIAL_GE(serial, j1->index[i].serial) &&
2564 ((uint32_t)(j1->header.end.offset - j1->index[i].offset) >=
2565 target_size / 2) &&
2566 j1->index[i].offset > best_guess.offset)
2567 {
2568 best_guess = j1->index[i];
2569 }
2570 }
2571
2572 current_pos = best_guess;
2573 while (current_pos.serial != serial) {
2574 CHECK(journal_next(j1, ¤t_pos));
2575 if (current_pos.serial == j1->header.end.serial) {
2576 break;
2577 }
2578
2579 if (DNS_SERIAL_GE(serial, current_pos.serial) &&
2580 ((uint32_t)(j1->header.end.offset - current_pos.offset) >=
2581 (target_size / 2)) &&
2582 current_pos.offset > best_guess.offset)
2583 {
2584 best_guess = current_pos;
2585 } else {
2586 break;
2587 }
2588 }
2589
2590 INSIST(best_guess.serial != j1->header.end.serial);
2591 if (best_guess.serial != serial) {
2592 CHECK(journal_next(j1, &best_guess));
2593 serial = best_guess.serial;
2594 }
2595
2596 /*
2597 * We should now be roughly half target_size provided
2598 * we did not reach 'serial'. If not we will just copy
2599 * all uncommitted deltas regardless of the size.
2600 */
2601 len = j1->header.end.offset - best_guess.offset;
2602 if (len != 0) {
2603 CHECK(journal_seek(j1, best_guess.offset));
2604
2605 /* Prepare new header */
2606 j2->header.begin.serial = best_guess.serial;
2607 j2->header.begin.offset = indexend;
2608 j2->header.sourceserial = j1->header.sourceserial;
2609 j2->header.serialset = j1->header.serialset;
2610 j2->header.end.serial = j1->header.end.serial;
2611
2612 /*
2613 * Only use this method if we're rewriting the
2614 * journal to fix outdated transaction headers;
2615 * otherwise we'll copy the whole journal without
2616 * parsing individual deltas below.
2617 */
2618 while (rewrite && len > 0) {
2619 journal_xhdr_t xhdr;
2620 isc_offset_t offset = j1->offset;
2621 uint32_t count;
2622
2623 result = journal_read_xhdr(j1, &xhdr);
2624 if (rewrite && result == ISC_R_NOMORE) {
2625 break;
2626 }
2627 CHECK(result);
2628
2629 size = xhdr.size;
2630 if (size > len) {
2631 isc_log_write(JOURNAL_COMMON_LOGARGS,
2632 ISC_LOG_ERROR,
2633 "%s: journal file corrupt, "
2634 "transaction too large",
2635 j1->filename);
2636 CHECK(ISC_R_FAILURE);
2637 }
2638 buf = isc_mem_get(mctx, size);
2639 result = journal_read(j1, buf, size);
2640
2641 /*
2642 * If we're repairing an outdated journal, the
2643 * xhdr format may be wrong.
2644 */
2645 if (rewrite && (result != ISC_R_SUCCESS ||
2646 !check_delta(buf, size))) {
2647 if (j1->xhdr_version == XHDR_VERSION2) {
2648 /* XHDR_VERSION2 -> XHDR_VERSION1 */
2649 j1->xhdr_version = XHDR_VERSION1;
2650 CHECK(journal_seek(j1, offset));
2651 CHECK(journal_read_xhdr(j1, &xhdr));
2652 } else if (j1->xhdr_version == XHDR_VERSION1) {
2653 /* XHDR_VERSION1 -> XHDR_VERSION2 */
2654 j1->xhdr_version = XHDR_VERSION2;
2655 CHECK(journal_seek(j1, offset));
2656 CHECK(journal_read_xhdr(j1, &xhdr));
2657 }
2658
2659 /* Check again */
2660 isc_mem_put(mctx, buf, size);
2661 size = xhdr.size;
2662 if (size > len) {
2663 isc_log_write(
2664 JOURNAL_COMMON_LOGARGS,
2665 ISC_LOG_ERROR,
2666 "%s: journal file corrupt, "
2667 "transaction too large",
2668 j1->filename);
2669 CHECK(ISC_R_FAILURE);
2670 }
2671 buf = isc_mem_get(mctx, size);
2672 CHECK(journal_read(j1, buf, size));
2673
2674 if (!check_delta(buf, size)) {
2675 CHECK(ISC_R_UNEXPECTED);
2676 }
2677 } else {
2678 CHECK(result);
2679 }
2680
2681 /*
2682 * Recover from incorrectly written transaction header.
2683 * The incorrect header was written as size, serial0,
2684 * serial1, and 0. XHDR_VERSION2 is expecting size,
2685 * count, serial0, and serial1.
2686 */
2687 if (j1->xhdr_version == XHDR_VERSION2 &&
2688 xhdr.count == serial && xhdr.serial1 == 0U &&
2689 isc_serial_gt(xhdr.serial0, xhdr.count))
2690 {
2691 xhdr.serial1 = xhdr.serial0;
2692 xhdr.serial0 = xhdr.count;
2693 xhdr.count = 0;
2694 }
2695
2696 /*
2697 * Check that xhdr is consistent.
2698 */
2699 if (xhdr.serial0 != serial ||
2700 isc_serial_le(xhdr.serial1, xhdr.serial0)) {
2701 CHECK(ISC_R_UNEXPECTED);
2702 }
2703
2704 /*
2705 * Extract record count from the transaction. This
2706 * is needed when converting from XHDR_VERSION1 to
2707 * XHDR_VERSION2, and when recovering from an
2708 * incorrectly written XHDR_VERSION2.
2709 */
2710 count = rrcount(buf, size);
2711 CHECK(journal_write_xhdr(j2, xhdr.size, count,
2712 xhdr.serial0, xhdr.serial1));
2713 CHECK(journal_write(j2, buf, size));
2714
2715 j2->header.end.offset = j2->offset;
2716
2717 serial = xhdr.serial1;
2718
2719 len = j1->header.end.offset - j1->offset;
2720 isc_mem_put(mctx, buf, size);
2721 }
2722
2723 /*
2724 * If we're not rewriting transaction headers, we can use
2725 * this faster method instead.
2726 */
2727 if (!rewrite) {
2728 size = ISC_MIN(64 * 1024, len);
2729 buf = isc_mem_get(mctx, size);
2730 for (i = 0; i < len; i += size) {
2731 unsigned int blob = ISC_MIN(size, len - i);
2732 CHECK(journal_read(j1, buf, blob));
2733 CHECK(journal_write(j2, buf, blob));
2734 }
2735
2736 j2->header.end.offset = indexend + len;
2737 }
2738
2739 CHECK(journal_fsync(j2));
2740
2741 /*
2742 * Update the journal header.
2743 */
2744 journal_header_encode(&j2->header, &rawheader);
2745 CHECK(journal_seek(j2, 0));
2746 CHECK(journal_write(j2, &rawheader, sizeof(rawheader)));
2747 CHECK(journal_fsync(j2));
2748
2749 /*
2750 * Build new index.
2751 */
2752 current_pos = j2->header.begin;
2753 while (current_pos.serial != j2->header.end.serial) {
2754 index_add(j2, ¤t_pos);
2755 CHECK(journal_next(j2, ¤t_pos));
2756 }
2757
2758 /*
2759 * Write index.
2760 */
2761 CHECK(index_to_disk(j2));
2762 CHECK(journal_fsync(j2));
2763
2764 indexend = j2->header.end.offset;
2765 POST(indexend);
2766 }
2767
2768 /*
2769 * Close both journals before trying to rename files (this is
2770 * necessary on WIN32).
2771 */
2772 dns_journal_destroy(&j1);
2773 dns_journal_destroy(&j2);
2774
2775 /*
2776 * With a UFS file system this should just succeed and be atomic.
2777 * Any IXFR outs will just continue and the old journal will be
2778 * removed on final close.
2779 *
2780 * With MSDOS / NTFS we need to do a two stage rename, triggered
2781 * by EEXIST. (If any IXFR's are running in other threads, however,
2782 * this will fail, and the journal will not be compacted. But
2783 * if so, hopefully they'll be finished by the next time we
2784 * compact.)
2785 */
2786 if (rename(newname, filename) == -1) {
2787 if (errno == EEXIST && !is_backup) {
2788 result = isc_file_remove(backup);
2789 if (result != ISC_R_SUCCESS &&
2790 result != ISC_R_FILENOTFOUND) {
2791 goto failure;
2792 }
2793 if (rename(filename, backup) == -1) {
2794 goto maperrno;
2795 }
2796 if (rename(newname, filename) == -1) {
2797 goto maperrno;
2798 }
2799 (void)isc_file_remove(backup);
2800 } else {
2801 maperrno:
2802 result = ISC_R_FAILURE;
2803 goto failure;
2804 }
2805 }
2806
2807 result = ISC_R_SUCCESS;
2808
2809 failure:
2810 (void)isc_file_remove(newname);
2811 if (buf != NULL) {
2812 isc_mem_put(mctx, buf, size);
2813 }
2814 if (j1 != NULL) {
2815 dns_journal_destroy(&j1);
2816 }
2817 if (j2 != NULL) {
2818 dns_journal_destroy(&j2);
2819 }
2820 return (result);
2821 }
2822
2823 static isc_result_t
index_to_disk(dns_journal_t * j)2824 index_to_disk(dns_journal_t *j) {
2825 isc_result_t result = ISC_R_SUCCESS;
2826
2827 if (j->header.index_size != 0) {
2828 unsigned int i;
2829 unsigned char *p;
2830 unsigned int rawbytes;
2831
2832 rawbytes = j->header.index_size * sizeof(journal_rawpos_t);
2833
2834 p = j->rawindex;
2835 for (i = 0; i < j->header.index_size; i++) {
2836 encode_uint32(j->index[i].serial, p);
2837 p += 4;
2838 encode_uint32(j->index[i].offset, p);
2839 p += 4;
2840 }
2841 INSIST(p == j->rawindex + rawbytes);
2842
2843 CHECK(journal_seek(j, sizeof(journal_rawheader_t)));
2844 CHECK(journal_write(j, j->rawindex, rawbytes));
2845 }
2846 failure:
2847 return (result);
2848 }
2849