1 /*
2  * Parse and rearrange a svnadmin dump.
3  * Create the dump with:
4  * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile
5  *
6  * Licensed under a two-clause BSD-style license.
7  * See LICENSE for details.
8  */
9 
10 #include "cache.h"
11 #include "fast_export.h"
12 #include "line_buffer.h"
13 #include "strbuf.h"
14 #include "svndump.h"
15 
16 /*
17  * Compare start of string to literal of equal length;
18  * must be guarded by length test.
19  */
20 #define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1)
21 
22 #define REPORT_FILENO 3
23 
24 #define NODEACT_REPLACE 4
25 #define NODEACT_DELETE 3
26 #define NODEACT_ADD 2
27 #define NODEACT_CHANGE 1
28 #define NODEACT_UNKNOWN 0
29 
30 /* States: */
31 #define DUMP_CTX 0	/* dump metadata */
32 #define REV_CTX  1	/* revision metadata */
33 #define NODE_CTX 2	/* node metadata */
34 #define INTERNODE_CTX 3	/* between nodes */
35 
36 #define DATE_RFC2822_LEN 31
37 
38 static struct line_buffer input = LINE_BUFFER_INIT;
39 
40 static struct {
41 	uint32_t action, srcRev, type;
42 	off_t prop_length, text_length;
43 	struct strbuf src, dst;
44 	uint32_t text_delta, prop_delta;
45 } node_ctx;
46 
47 static struct {
48 	uint32_t revision;
49 	timestamp_t timestamp;
50 	struct strbuf log, author, note;
51 } rev_ctx;
52 
53 static struct {
54 	uint32_t version;
55 	struct strbuf uuid, url;
56 } dump_ctx;
57 
reset_node_ctx(char * fname)58 static void reset_node_ctx(char *fname)
59 {
60 	node_ctx.type = 0;
61 	node_ctx.action = NODEACT_UNKNOWN;
62 	node_ctx.prop_length = -1;
63 	node_ctx.text_length = -1;
64 	strbuf_reset(&node_ctx.src);
65 	node_ctx.srcRev = 0;
66 	strbuf_reset(&node_ctx.dst);
67 	if (fname)
68 		strbuf_addstr(&node_ctx.dst, fname);
69 	node_ctx.text_delta = 0;
70 	node_ctx.prop_delta = 0;
71 }
72 
reset_rev_ctx(uint32_t revision)73 static void reset_rev_ctx(uint32_t revision)
74 {
75 	rev_ctx.revision = revision;
76 	rev_ctx.timestamp = 0;
77 	strbuf_reset(&rev_ctx.log);
78 	strbuf_reset(&rev_ctx.author);
79 	strbuf_reset(&rev_ctx.note);
80 }
81 
reset_dump_ctx(const char * url)82 static void reset_dump_ctx(const char *url)
83 {
84 	strbuf_reset(&dump_ctx.url);
85 	if (url)
86 		strbuf_addstr(&dump_ctx.url, url);
87 	dump_ctx.version = 1;
88 	strbuf_reset(&dump_ctx.uuid);
89 }
90 
handle_property(const struct strbuf * key_buf,struct strbuf * val,uint32_t * type_set)91 static void handle_property(const struct strbuf *key_buf,
92 				struct strbuf *val,
93 				uint32_t *type_set)
94 {
95 	const char *key = key_buf->buf;
96 	size_t keylen = key_buf->len;
97 
98 	switch (keylen + 1) {
99 	case sizeof("svn:log"):
100 		if (constcmp(key, "svn:log"))
101 			break;
102 		if (!val)
103 			die("invalid dump: unsets svn:log");
104 		strbuf_swap(&rev_ctx.log, val);
105 		break;
106 	case sizeof("svn:author"):
107 		if (constcmp(key, "svn:author"))
108 			break;
109 		if (!val)
110 			strbuf_reset(&rev_ctx.author);
111 		else
112 			strbuf_swap(&rev_ctx.author, val);
113 		break;
114 	case sizeof("svn:date"):
115 		if (constcmp(key, "svn:date"))
116 			break;
117 		if (!val)
118 			die("invalid dump: unsets svn:date");
119 		if (parse_date_basic(val->buf, &rev_ctx.timestamp, NULL))
120 			warning("invalid timestamp: %s", val->buf);
121 		break;
122 	case sizeof("svn:executable"):
123 	case sizeof("svn:special"):
124 		if (keylen == strlen("svn:executable") &&
125 		    constcmp(key, "svn:executable"))
126 			break;
127 		if (keylen == strlen("svn:special") &&
128 		    constcmp(key, "svn:special"))
129 			break;
130 		if (*type_set) {
131 			if (!val)
132 				return;
133 			die("invalid dump: sets type twice");
134 		}
135 		if (!val) {
136 			node_ctx.type = S_IFREG | 0644;
137 			return;
138 		}
139 		*type_set = 1;
140 		node_ctx.type = keylen == strlen("svn:executable") ?
141 				(S_IFREG | 0755) :
142 				S_IFLNK;
143 	}
144 }
145 
die_short_read(void)146 static void die_short_read(void)
147 {
148 	if (buffer_ferror(&input))
149 		die_errno("error reading dump file");
150 	die("invalid dump: unexpected end of file");
151 }
152 
read_props(void)153 static void read_props(void)
154 {
155 	static struct strbuf key = STRBUF_INIT;
156 	static struct strbuf val = STRBUF_INIT;
157 	const char *t;
158 	/*
159 	 * NEEDSWORK: to support simple mode changes like
160 	 *	K 11
161 	 *	svn:special
162 	 *	V 1
163 	 *	*
164 	 *	D 14
165 	 *	svn:executable
166 	 * we keep track of whether a mode has been set and reset to
167 	 * plain file only if not.  We should be keeping track of the
168 	 * symlink and executable bits separately instead.
169 	 */
170 	uint32_t type_set = 0;
171 	while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) {
172 		uint32_t len;
173 		const char type = t[0];
174 		int ch;
175 
176 		if (!type || t[1] != ' ')
177 			die("invalid property line: %s", t);
178 		len = atoi(&t[2]);
179 		strbuf_reset(&val);
180 		buffer_read_binary(&input, &val, len);
181 		if (val.len < len)
182 			die_short_read();
183 
184 		/* Discard trailing newline. */
185 		ch = buffer_read_char(&input);
186 		if (ch == EOF)
187 			die_short_read();
188 		if (ch != '\n')
189 			die("invalid dump: expected newline after %s", val.buf);
190 
191 		switch (type) {
192 		case 'K':
193 			strbuf_swap(&key, &val);
194 			continue;
195 		case 'D':
196 			handle_property(&val, NULL, &type_set);
197 			continue;
198 		case 'V':
199 			handle_property(&key, &val, &type_set);
200 			strbuf_reset(&key);
201 			continue;
202 		default:
203 			die("invalid property line: %s", t);
204 		}
205 	}
206 }
207 
handle_node(void)208 static void handle_node(void)
209 {
210 	const uint32_t type = node_ctx.type;
211 	const int have_props = node_ctx.prop_length != -1;
212 	const int have_text = node_ctx.text_length != -1;
213 	/*
214 	 * Old text for this node:
215 	 *  NULL	- directory or bug
216 	 *  empty_blob	- empty
217 	 *  "<dataref>"	- data retrievable from fast-import
218 	 */
219 	static const char *const empty_blob = "::empty::";
220 	const char *old_data = NULL;
221 	uint32_t old_mode = S_IFREG | 0644;
222 
223 	if (node_ctx.action == NODEACT_DELETE) {
224 		if (have_text || have_props || node_ctx.srcRev)
225 			die("invalid dump: deletion node has "
226 				"copyfrom info, text, or properties");
227 		fast_export_delete(node_ctx.dst.buf);
228 		return;
229 	}
230 	if (node_ctx.action == NODEACT_REPLACE) {
231 		fast_export_delete(node_ctx.dst.buf);
232 		node_ctx.action = NODEACT_ADD;
233 	}
234 	if (node_ctx.srcRev) {
235 		fast_export_copy(node_ctx.srcRev, node_ctx.src.buf, node_ctx.dst.buf);
236 		if (node_ctx.action == NODEACT_ADD)
237 			node_ctx.action = NODEACT_CHANGE;
238 	}
239 	if (have_text && type == S_IFDIR)
240 		die("invalid dump: directories cannot have text attached");
241 
242 	/*
243 	 * Find old content (old_data) and decide on the new mode.
244 	 */
245 	if (node_ctx.action == NODEACT_CHANGE && !*node_ctx.dst.buf) {
246 		if (type != S_IFDIR)
247 			die("invalid dump: root of tree is not a regular file");
248 		old_data = NULL;
249 	} else if (node_ctx.action == NODEACT_CHANGE) {
250 		uint32_t mode;
251 		old_data = fast_export_read_path(node_ctx.dst.buf, &mode);
252 		if (mode == S_IFDIR && type != S_IFDIR)
253 			die("invalid dump: cannot modify a directory into a file");
254 		if (mode != S_IFDIR && type == S_IFDIR)
255 			die("invalid dump: cannot modify a file into a directory");
256 		node_ctx.type = mode;
257 		old_mode = mode;
258 	} else if (node_ctx.action == NODEACT_ADD) {
259 		if (type == S_IFDIR)
260 			old_data = NULL;
261 		else if (have_text)
262 			old_data = empty_blob;
263 		else
264 			die("invalid dump: adds node without text");
265 	} else {
266 		die("invalid dump: Node-path block lacks Node-action");
267 	}
268 
269 	/*
270 	 * Adjust mode to reflect properties.
271 	 */
272 	if (have_props) {
273 		if (!node_ctx.prop_delta)
274 			node_ctx.type = type;
275 		if (node_ctx.prop_length)
276 			read_props();
277 	}
278 
279 	/*
280 	 * Save the result.
281 	 */
282 	if (type == S_IFDIR)	/* directories are not tracked. */
283 		return;
284 	assert(old_data);
285 	if (old_data == empty_blob)
286 		/* For the fast_export_* functions, NULL means empty. */
287 		old_data = NULL;
288 	if (!have_text) {
289 		fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data);
290 		return;
291 	}
292 	if (!node_ctx.text_delta) {
293 		fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
294 		fast_export_data(node_ctx.type, node_ctx.text_length, &input);
295 		return;
296 	}
297 	fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
298 	fast_export_blob_delta(node_ctx.type, old_mode, old_data,
299 				node_ctx.text_length, &input);
300 }
301 
begin_revision(const char * remote_ref)302 static void begin_revision(const char *remote_ref)
303 {
304 	if (!rev_ctx.revision)	/* revision 0 gets no git commit. */
305 		return;
306 	fast_export_begin_commit(rev_ctx.revision, rev_ctx.author.buf,
307 		&rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf,
308 		rev_ctx.timestamp, remote_ref);
309 }
310 
end_revision(const char * note_ref)311 static void end_revision(const char *note_ref)
312 {
313 	struct strbuf mark = STRBUF_INIT;
314 	if (rev_ctx.revision) {
315 		fast_export_end_commit(rev_ctx.revision);
316 		fast_export_begin_note(rev_ctx.revision, "remote-svn",
317 				"Note created by remote-svn.", rev_ctx.timestamp, note_ref);
318 		strbuf_addf(&mark, ":%"PRIu32, rev_ctx.revision);
319 		fast_export_note(mark.buf, "inline");
320 		fast_export_buf_to_data(&rev_ctx.note);
321 		strbuf_release(&mark);
322 	}
323 }
324 
svndump_read(const char * url,const char * local_ref,const char * notes_ref)325 void svndump_read(const char *url, const char *local_ref, const char *notes_ref)
326 {
327 	char *val;
328 	char *t;
329 	uint32_t active_ctx = DUMP_CTX;
330 	uint32_t len;
331 
332 	reset_dump_ctx(url);
333 	while ((t = buffer_read_line(&input))) {
334 		val = strchr(t, ':');
335 		if (!val)
336 			continue;
337 		val++;
338 		if (*val != ' ')
339 			continue;
340 		val++;
341 
342 		/* strlen(key) + 1 */
343 		switch (val - t - 1) {
344 		case sizeof("SVN-fs-dump-format-version"):
345 			if (constcmp(t, "SVN-fs-dump-format-version"))
346 				continue;
347 			dump_ctx.version = atoi(val);
348 			if (dump_ctx.version > 3)
349 				die("expected svn dump format version <= 3, found %"PRIu32,
350 				    dump_ctx.version);
351 			break;
352 		case sizeof("UUID"):
353 			if (constcmp(t, "UUID"))
354 				continue;
355 			strbuf_reset(&dump_ctx.uuid);
356 			strbuf_addstr(&dump_ctx.uuid, val);
357 			break;
358 		case sizeof("Revision-number"):
359 			if (constcmp(t, "Revision-number"))
360 				continue;
361 			if (active_ctx == NODE_CTX)
362 				handle_node();
363 			if (active_ctx == REV_CTX)
364 				begin_revision(local_ref);
365 			if (active_ctx != DUMP_CTX)
366 				end_revision(notes_ref);
367 			active_ctx = REV_CTX;
368 			reset_rev_ctx(atoi(val));
369 			strbuf_addf(&rev_ctx.note, "%s\n", t);
370 			break;
371 		case sizeof("Node-path"):
372 			if (constcmp(t, "Node-"))
373 				continue;
374 			if (!constcmp(t + strlen("Node-"), "path")) {
375 				if (active_ctx == NODE_CTX)
376 					handle_node();
377 				if (active_ctx == REV_CTX)
378 					begin_revision(local_ref);
379 				active_ctx = NODE_CTX;
380 				reset_node_ctx(val);
381 				strbuf_addf(&rev_ctx.note, "%s\n", t);
382 				break;
383 			}
384 			if (constcmp(t + strlen("Node-"), "kind"))
385 				continue;
386 			strbuf_addf(&rev_ctx.note, "%s\n", t);
387 			if (!strcmp(val, "dir"))
388 				node_ctx.type = S_IFDIR;
389 			else if (!strcmp(val, "file"))
390 				node_ctx.type = S_IFREG | 0644;
391 			else
392 				fprintf(stderr, "Unknown node-kind: %s\n", val);
393 			break;
394 		case sizeof("Node-action"):
395 			if (constcmp(t, "Node-action"))
396 				continue;
397 			strbuf_addf(&rev_ctx.note, "%s\n", t);
398 			if (!strcmp(val, "delete")) {
399 				node_ctx.action = NODEACT_DELETE;
400 			} else if (!strcmp(val, "add")) {
401 				node_ctx.action = NODEACT_ADD;
402 			} else if (!strcmp(val, "change")) {
403 				node_ctx.action = NODEACT_CHANGE;
404 			} else if (!strcmp(val, "replace")) {
405 				node_ctx.action = NODEACT_REPLACE;
406 			} else {
407 				fprintf(stderr, "Unknown node-action: %s\n", val);
408 				node_ctx.action = NODEACT_UNKNOWN;
409 			}
410 			break;
411 		case sizeof("Node-copyfrom-path"):
412 			if (constcmp(t, "Node-copyfrom-path"))
413 				continue;
414 			strbuf_reset(&node_ctx.src);
415 			strbuf_addstr(&node_ctx.src, val);
416 			strbuf_addf(&rev_ctx.note, "%s\n", t);
417 			break;
418 		case sizeof("Node-copyfrom-rev"):
419 			if (constcmp(t, "Node-copyfrom-rev"))
420 				continue;
421 			node_ctx.srcRev = atoi(val);
422 			strbuf_addf(&rev_ctx.note, "%s\n", t);
423 			break;
424 		case sizeof("Text-content-length"):
425 			if (constcmp(t, "Text") && constcmp(t, "Prop"))
426 				continue;
427 			if (constcmp(t + 4, "-content-length"))
428 				continue;
429 			{
430 				char *end;
431 				uintmax_t len;
432 
433 				len = strtoumax(val, &end, 10);
434 				if (!isdigit(*val) || *end)
435 					die("invalid dump: non-numeric length %s", val);
436 				if (len > maximum_signed_value_of_type(off_t))
437 					die("unrepresentable length in dump: %s", val);
438 
439 				if (*t == 'T')
440 					node_ctx.text_length = (off_t) len;
441 				else
442 					node_ctx.prop_length = (off_t) len;
443 				break;
444 			}
445 		case sizeof("Text-delta"):
446 			if (!constcmp(t, "Text-delta")) {
447 				node_ctx.text_delta = !strcmp(val, "true");
448 				break;
449 			}
450 			if (constcmp(t, "Prop-delta"))
451 				continue;
452 			node_ctx.prop_delta = !strcmp(val, "true");
453 			break;
454 		case sizeof("Content-length"):
455 			if (constcmp(t, "Content-length"))
456 				continue;
457 			len = atoi(val);
458 			t = buffer_read_line(&input);
459 			if (!t)
460 				die_short_read();
461 			if (*t)
462 				die("invalid dump: expected blank line after content length header");
463 			if (active_ctx == REV_CTX) {
464 				read_props();
465 			} else if (active_ctx == NODE_CTX) {
466 				handle_node();
467 				active_ctx = INTERNODE_CTX;
468 			} else {
469 				fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len);
470 				if (buffer_skip_bytes(&input, len) != len)
471 					die_short_read();
472 			}
473 		}
474 	}
475 	if (buffer_ferror(&input))
476 		die_short_read();
477 	if (active_ctx == NODE_CTX)
478 		handle_node();
479 	if (active_ctx == REV_CTX)
480 		begin_revision(local_ref);
481 	if (active_ctx != DUMP_CTX)
482 		end_revision(notes_ref);
483 }
484 
init(int report_fd)485 static void init(int report_fd)
486 {
487 	fast_export_init(report_fd);
488 	strbuf_init(&dump_ctx.uuid, 4096);
489 	strbuf_init(&dump_ctx.url, 4096);
490 	strbuf_init(&rev_ctx.log, 4096);
491 	strbuf_init(&rev_ctx.author, 4096);
492 	strbuf_init(&rev_ctx.note, 4096);
493 	strbuf_init(&node_ctx.src, 4096);
494 	strbuf_init(&node_ctx.dst, 4096);
495 	reset_dump_ctx(NULL);
496 	reset_rev_ctx(0);
497 	reset_node_ctx(NULL);
498 	return;
499 }
500 
svndump_init(const char * filename)501 int svndump_init(const char *filename)
502 {
503 	if (buffer_init(&input, filename))
504 		return error_errno("cannot open %s", filename ? filename : "NULL");
505 	init(REPORT_FILENO);
506 	return 0;
507 }
508 
svndump_init_fd(int in_fd,int back_fd)509 int svndump_init_fd(int in_fd, int back_fd)
510 {
511 	if(buffer_fdinit(&input, xdup(in_fd)))
512 		return error_errno("cannot open fd %d", in_fd);
513 	init(xdup(back_fd));
514 	return 0;
515 }
516 
svndump_deinit(void)517 void svndump_deinit(void)
518 {
519 	fast_export_deinit();
520 	reset_dump_ctx(NULL);
521 	reset_rev_ctx(0);
522 	reset_node_ctx(NULL);
523 	strbuf_release(&rev_ctx.log);
524 	strbuf_release(&rev_ctx.author);
525 	strbuf_release(&rev_ctx.note);
526 	strbuf_release(&node_ctx.src);
527 	strbuf_release(&node_ctx.dst);
528 	if (buffer_deinit(&input))
529 		fprintf(stderr, "Input error\n");
530 	if (ferror(stdout))
531 		fprintf(stderr, "Output error\n");
532 }
533 
svndump_reset(void)534 void svndump_reset(void)
535 {
536 	strbuf_release(&dump_ctx.uuid);
537 	strbuf_release(&dump_ctx.url);
538 	strbuf_release(&rev_ctx.log);
539 	strbuf_release(&rev_ctx.author);
540 }
541