1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "config_parse.h"
9 
10 #include "buf_text.h"
11 
12 #include <ctype.h>
13 
14 const char *git_config_escapes = "ntb\"\\";
15 const char *git_config_escaped = "\n\t\b\"\\";
16 
set_parse_error(git_config_parser * reader,int col,const char * error_str)17 static void set_parse_error(git_config_parser *reader, int col, const char *error_str)
18 {
19 	if (col)
20 		git_error_set(GIT_ERROR_CONFIG,
21 		              "failed to parse config file: %s (in %s:%"PRIuZ", column %d)",
22 		              error_str, reader->path, reader->ctx.line_num, col);
23 	else
24 		git_error_set(GIT_ERROR_CONFIG,
25 		              "failed to parse config file: %s (in %s:%"PRIuZ")",
26 		              error_str, reader->path, reader->ctx.line_num);
27 }
28 
29 
config_keychar(int c)30 GIT_INLINE(int) config_keychar(int c)
31 {
32 	return isalnum(c) || c == '-';
33 }
34 
strip_comments(char * line,int in_quotes)35 static int strip_comments(char *line, int in_quotes)
36 {
37 	int quote_count = in_quotes, backslash_count = 0;
38 	char *ptr;
39 
40 	for (ptr = line; *ptr; ++ptr) {
41 		if (ptr[0] == '"' && ptr > line && ptr[-1] != '\\')
42 			quote_count++;
43 
44 		if ((ptr[0] == ';' || ptr[0] == '#') &&
45 			(quote_count % 2) == 0 &&
46 			(backslash_count % 2) == 0) {
47 			ptr[0] = '\0';
48 			break;
49 		}
50 
51 		if (ptr[0] == '\\')
52 			backslash_count++;
53 		else
54 			backslash_count = 0;
55 	}
56 
57 	/* skip any space at the end */
58 	while (ptr > line && git__isspace(ptr[-1])) {
59 		ptr--;
60 	}
61 	ptr[0] = '\0';
62 
63 	return quote_count;
64 }
65 
66 
parse_subsection_header(git_config_parser * reader,const char * line,size_t pos,const char * base_name,char ** section_name)67 static int parse_subsection_header(git_config_parser *reader, const char *line, size_t pos, const char *base_name, char **section_name)
68 {
69 	int c, rpos;
70 	const char *first_quote, *last_quote;
71 	const char *line_start = line;
72 	git_buf buf = GIT_BUF_INIT;
73 	size_t quoted_len, alloc_len, base_name_len = strlen(base_name);
74 
75 	/* Skip any additional whitespace before our section name */
76 	while (git__isspace(line[pos]))
77 		pos++;
78 
79 	/* We should be at the first quotation mark. */
80 	if (line[pos] != '"') {
81 		set_parse_error(reader, 0, "missing quotation marks in section header");
82 		goto end_error;
83 	}
84 
85 	first_quote = &line[pos];
86 	last_quote = strrchr(line, '"');
87 	quoted_len = last_quote - first_quote;
88 
89 	if ((last_quote - line) > INT_MAX) {
90 		set_parse_error(reader, 0, "invalid section header, line too long");
91 		goto end_error;
92 	}
93 
94 	if (quoted_len == 0) {
95 		set_parse_error(reader, 0, "missing closing quotation mark in section header");
96 		goto end_error;
97 	}
98 
99 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, base_name_len, quoted_len);
100 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
101 
102 	if (git_buf_grow(&buf, alloc_len) < 0 ||
103 	    git_buf_printf(&buf, "%s.", base_name) < 0)
104 		goto end_error;
105 
106 	rpos = 0;
107 
108 	line = first_quote;
109 	c = line[++rpos];
110 
111 	/*
112 	 * At the end of each iteration, whatever is stored in c will be
113 	 * added to the string. In case of error, jump to out
114 	 */
115 	do {
116 
117 		switch (c) {
118 		case 0:
119 			set_parse_error(reader, 0, "unexpected end-of-line in section header");
120 			goto end_error;
121 
122 		case '"':
123 			goto end_parse;
124 
125 		case '\\':
126 			c = line[++rpos];
127 
128 			if (c == 0) {
129 				set_parse_error(reader, rpos, "unexpected end-of-line in section header");
130 				goto end_error;
131 			}
132 
133 		default:
134 			break;
135 		}
136 
137 		git_buf_putc(&buf, (char)c);
138 		c = line[++rpos];
139 	} while (line + rpos < last_quote);
140 
141 end_parse:
142 	if (git_buf_oom(&buf))
143 		goto end_error;
144 
145 	if (line[rpos] != '"' || line[rpos + 1] != ']') {
146 		set_parse_error(reader, rpos, "unexpected text after closing quotes");
147 		git_buf_dispose(&buf);
148 		return -1;
149 	}
150 
151 	*section_name = git_buf_detach(&buf);
152 	return (int)(&line[rpos + 2] - line_start); /* rpos is at the closing quote */
153 
154 end_error:
155 	git_buf_dispose(&buf);
156 
157 	return -1;
158 }
159 
parse_section_header(git_config_parser * reader,char ** section_out)160 static int parse_section_header(git_config_parser *reader, char **section_out)
161 {
162 	char *name, *name_end;
163 	int name_length, c, pos;
164 	int result;
165 	char *line;
166 	size_t line_len;
167 
168 	git_parse_advance_ws(&reader->ctx);
169 	line = git__strndup(reader->ctx.line, reader->ctx.line_len);
170 	if (line == NULL)
171 		return -1;
172 
173 	/* find the end of the variable's name */
174 	name_end = strrchr(line, ']');
175 	if (name_end == NULL) {
176 		git__free(line);
177 		set_parse_error(reader, 0, "missing ']' in section header");
178 		return -1;
179 	}
180 
181 	GIT_ERROR_CHECK_ALLOC_ADD(&line_len, (size_t)(name_end - line), 1);
182 	name = git__malloc(line_len);
183 	GIT_ERROR_CHECK_ALLOC(name);
184 
185 	name_length = 0;
186 	pos = 0;
187 
188 	/* Make sure we were given a section header */
189 	c = line[pos++];
190 	assert(c == '[');
191 
192 	c = line[pos++];
193 
194 	do {
195 		if (git__isspace(c)){
196 			name[name_length] = '\0';
197 			result = parse_subsection_header(reader, line, pos, name, section_out);
198 			git__free(line);
199 			git__free(name);
200 			return result;
201 		}
202 
203 		if (!config_keychar(c) && c != '.') {
204 			set_parse_error(reader, pos, "unexpected character in header");
205 			goto fail_parse;
206 		}
207 
208 		name[name_length++] = (char)git__tolower(c);
209 
210 	} while ((c = line[pos++]) != ']');
211 
212 	if (line[pos - 1] != ']') {
213 		set_parse_error(reader, pos, "unexpected end of file");
214 		goto fail_parse;
215 	}
216 
217 	git__free(line);
218 
219 	name[name_length] = 0;
220 	*section_out = name;
221 
222 	return pos;
223 
224 fail_parse:
225 	git__free(line);
226 	git__free(name);
227 	return -1;
228 }
229 
skip_bom(git_parse_ctx * parser)230 static int skip_bom(git_parse_ctx *parser)
231 {
232 	git_buf buf = GIT_BUF_INIT_CONST(parser->content, parser->content_len);
233 	git_bom_t bom;
234 	int bom_offset = git_buf_text_detect_bom(&bom, &buf);
235 
236 	if (bom == GIT_BOM_UTF8)
237 		git_parse_advance_chars(parser, bom_offset);
238 
239 	/* TODO: reference implementation is pretty stupid with BoM */
240 
241 	return 0;
242 }
243 
244 /*
245 	(* basic types *)
246 	digit = "0".."9"
247 	integer = digit { digit }
248 	alphabet = "a".."z" + "A" .. "Z"
249 
250 	section_char = alphabet | "." | "-"
251 	extension_char = (* any character except newline *)
252 	any_char = (* any character *)
253 	variable_char = "alphabet" | "-"
254 
255 
256 	(* actual grammar *)
257 	config = { section }
258 
259 	section = header { definition }
260 
261 	header = "[" section [subsection | subsection_ext] "]"
262 
263 	subsection = "." section
264 	subsection_ext = "\"" extension "\""
265 
266 	section = section_char { section_char }
267 	extension = extension_char { extension_char }
268 
269 	definition = variable_name ["=" variable_value] "\n"
270 
271 	variable_name = variable_char { variable_char }
272 	variable_value = string | boolean | integer
273 
274 	string = quoted_string | plain_string
275 	quoted_string = "\"" plain_string "\""
276 	plain_string = { any_char }
277 
278 	boolean = boolean_true | boolean_false
279 	boolean_true = "yes" | "1" | "true" | "on"
280 	boolean_false = "no" | "0" | "false" | "off"
281 */
282 
283 /* '\"' -> '"' etc */
unescape_line(char ** out,bool * is_multi,const char * ptr,int quote_count)284 static int unescape_line(
285 	char **out, bool *is_multi, const char *ptr, int quote_count)
286 {
287 	char *str, *fixed, *esc;
288 	size_t ptr_len = strlen(ptr), alloc_len;
289 
290 	*is_multi = false;
291 
292 	if (GIT_ADD_SIZET_OVERFLOW(&alloc_len, ptr_len, 1) ||
293 		(str = git__malloc(alloc_len)) == NULL) {
294 		return -1;
295 	}
296 
297 	fixed = str;
298 
299 	while (*ptr != '\0') {
300 		if (*ptr == '"') {
301 			quote_count++;
302 		} else if (*ptr != '\\') {
303 			*fixed++ = *ptr;
304 		} else {
305 			/* backslash, check the next char */
306 			ptr++;
307 			/* if we're at the end, it's a multiline, so keep the backslash */
308 			if (*ptr == '\0') {
309 				*is_multi = true;
310 				goto done;
311 			}
312 			if ((esc = strchr(git_config_escapes, *ptr)) != NULL) {
313 				*fixed++ = git_config_escaped[esc - git_config_escapes];
314 			} else {
315 				git__free(str);
316 				git_error_set(GIT_ERROR_CONFIG, "invalid escape at %s", ptr);
317 				return -1;
318 			}
319 		}
320 		ptr++;
321 	}
322 
323 done:
324 	*fixed = '\0';
325 	*out = str;
326 
327 	return 0;
328 }
329 
parse_multiline_variable(git_config_parser * reader,git_buf * value,int in_quotes)330 static int parse_multiline_variable(git_config_parser *reader, git_buf *value, int in_quotes)
331 {
332 	int quote_count;
333 	bool multiline = true;
334 
335 	while (multiline) {
336 		char *line = NULL, *proc_line = NULL;
337 		int error;
338 
339 		/* Check that the next line exists */
340 		git_parse_advance_line(&reader->ctx);
341 		line = git__strndup(reader->ctx.line, reader->ctx.line_len);
342 		GIT_ERROR_CHECK_ALLOC(line);
343 
344 		/*
345 		 * We've reached the end of the file, there is no continuation.
346 		 * (this is not an error).
347 		 */
348 		if (line[0] == '\0') {
349 			error = 0;
350 			goto out;
351 		}
352 
353 		/* If it was just a comment, pretend it didn't exist */
354 		quote_count = strip_comments(line, !!in_quotes);
355 		if (line[0] == '\0')
356 			goto next;
357 
358 		if ((error = unescape_line(&proc_line, &multiline,
359 					   line, in_quotes)) < 0)
360 			goto out;
361 
362 		/* Add this line to the multiline var */
363 		if ((error = git_buf_puts(value, proc_line)) < 0)
364 			goto out;
365 
366 next:
367 		git__free(line);
368 		git__free(proc_line);
369 		in_quotes = quote_count;
370 		continue;
371 
372 out:
373 		git__free(line);
374 		git__free(proc_line);
375 		return error;
376 	}
377 
378 	return 0;
379 }
380 
is_namechar(char c)381 GIT_INLINE(bool) is_namechar(char c)
382 {
383 	return isalnum(c) || c == '-';
384 }
385 
parse_name(char ** name,const char ** value,git_config_parser * reader,const char * line)386 static int parse_name(
387 	char **name, const char **value, git_config_parser *reader, const char *line)
388 {
389 	const char *name_end = line, *value_start;
390 
391 	*name = NULL;
392 	*value = NULL;
393 
394 	while (*name_end && is_namechar(*name_end))
395 		name_end++;
396 
397 	if (line == name_end) {
398 		set_parse_error(reader, 0, "invalid configuration key");
399 		return -1;
400 	}
401 
402 	value_start = name_end;
403 
404 	while (*value_start && git__isspace(*value_start))
405 		value_start++;
406 
407 	if (*value_start == '=') {
408 		*value = value_start + 1;
409 	} else if (*value_start) {
410 		set_parse_error(reader, 0, "invalid configuration key");
411 		return -1;
412 	}
413 
414 	if ((*name = git__strndup(line, name_end - line)) == NULL)
415 		return -1;
416 
417 	return 0;
418 }
419 
parse_variable(git_config_parser * reader,char ** var_name,char ** var_value)420 static int parse_variable(git_config_parser *reader, char **var_name, char **var_value)
421 {
422 	const char *value_start = NULL;
423 	char *line = NULL, *name = NULL, *value = NULL;
424 	int quote_count, error;
425 	bool multiline;
426 
427 	*var_name = NULL;
428 	*var_value = NULL;
429 
430 	git_parse_advance_ws(&reader->ctx);
431 	line = git__strndup(reader->ctx.line, reader->ctx.line_len);
432 	GIT_ERROR_CHECK_ALLOC(line);
433 
434 	quote_count = strip_comments(line, 0);
435 
436 	if ((error = parse_name(&name, &value_start, reader, line)) < 0)
437 		goto out;
438 
439 	/*
440 	 * Now, let's try to parse the value
441 	 */
442 	if (value_start != NULL) {
443 		while (git__isspace(value_start[0]))
444 			value_start++;
445 
446 		if ((error = unescape_line(&value, &multiline, value_start, 0)) < 0)
447 			goto out;
448 
449 		if (multiline) {
450 			git_buf multi_value = GIT_BUF_INIT;
451 			git_buf_attach(&multi_value, value, 0);
452 			value = NULL;
453 
454 			if (parse_multiline_variable(reader, &multi_value, quote_count) < 0 ||
455 			    git_buf_oom(&multi_value)) {
456 				error = -1;
457 				git_buf_dispose(&multi_value);
458 				goto out;
459 			}
460 
461 			value = git_buf_detach(&multi_value);
462 		}
463 	}
464 
465 	*var_name = name;
466 	*var_value = value;
467 	name = NULL;
468 	value = NULL;
469 
470 out:
471 	git__free(name);
472 	git__free(value);
473 	git__free(line);
474 	return error;
475 }
476 
git_config_parser_init(git_config_parser * out,const char * path,const char * data,size_t datalen)477 int git_config_parser_init(git_config_parser *out, const char *path, const char *data, size_t datalen)
478 {
479 	out->path = path;
480 	return git_parse_ctx_init(&out->ctx, data, datalen);
481 }
482 
git_config_parser_dispose(git_config_parser * parser)483 void git_config_parser_dispose(git_config_parser *parser)
484 {
485 	git_parse_ctx_clear(&parser->ctx);
486 }
487 
git_config_parse(git_config_parser * parser,git_config_parser_section_cb on_section,git_config_parser_variable_cb on_variable,git_config_parser_comment_cb on_comment,git_config_parser_eof_cb on_eof,void * payload)488 int git_config_parse(
489 	git_config_parser *parser,
490 	git_config_parser_section_cb on_section,
491 	git_config_parser_variable_cb on_variable,
492 	git_config_parser_comment_cb on_comment,
493 	git_config_parser_eof_cb on_eof,
494 	void *payload)
495 {
496 	git_parse_ctx *ctx;
497 	char *current_section = NULL, *var_name = NULL, *var_value = NULL;
498 	int result = 0;
499 
500 	ctx = &parser->ctx;
501 
502 	skip_bom(ctx);
503 
504 	for (; ctx->remain_len > 0; git_parse_advance_line(ctx)) {
505 		const char *line_start;
506 		size_t line_len;
507 		char c;
508 
509 	restart:
510 		line_start = ctx->line;
511 		line_len = ctx->line_len;
512 
513 		/*
514 		 * Get either first non-whitespace character or, if that does
515 		 * not exist, the first whitespace character. This is required
516 		 * to preserve whitespaces when writing back the file.
517 		 */
518 		if (git_parse_peek(&c, ctx, GIT_PARSE_PEEK_SKIP_WHITESPACE) < 0 &&
519 		    git_parse_peek(&c, ctx, 0) < 0)
520 			continue;
521 
522 		switch (c) {
523 		case '[': /* section header, new section begins */
524 			git__free(current_section);
525 			current_section = NULL;
526 
527 			result = parse_section_header(parser, &current_section);
528 			if (result < 0)
529 				break;
530 
531 			git_parse_advance_chars(ctx, result);
532 
533 			if (on_section)
534 				result = on_section(parser, current_section, line_start, line_len, payload);
535 			/*
536 			 * After we've parsed the section header we may not be
537 			 * done with the line. If there's still data in there,
538 			 * run the next loop with the rest of the current line
539 			 * instead of moving forward.
540 			 */
541 
542 			if (!git_parse_peek(&c, ctx, GIT_PARSE_PEEK_SKIP_WHITESPACE))
543 				goto restart;
544 
545 			break;
546 
547 		case '\n': /* comment or whitespace-only */
548 		case '\r':
549 		case ' ':
550 		case '\t':
551 		case ';':
552 		case '#':
553 			if (on_comment) {
554 				result = on_comment(parser, line_start, line_len, payload);
555 			}
556 			break;
557 
558 		default: /* assume variable declaration */
559 			if ((result = parse_variable(parser, &var_name, &var_value)) == 0 && on_variable) {
560 				result = on_variable(parser, current_section, var_name, var_value, line_start, line_len, payload);
561 				git__free(var_name);
562 				git__free(var_value);
563 			}
564 
565 			break;
566 		}
567 
568 		if (result < 0)
569 			goto out;
570 	}
571 
572 	if (on_eof)
573 		result = on_eof(parser, current_section, payload);
574 
575 out:
576 	git__free(current_section);
577 	return result;
578 }
579