1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "config_parse.h"
9 
10 #include "buf_text.h"
11 
12 #include <ctype.h>
13 
set_parse_error(git_config_parser * reader,int col,const char * error_str)14 static void set_parse_error(git_config_parser *reader, int col, const char *error_str)
15 {
16 	giterr_set(GITERR_CONFIG, "failed to parse config file: %s (in %s:%"PRIuZ", column %d)",
17 		error_str, reader->file->path, reader->ctx.line_num, col);
18 }
19 
20 
config_keychar(int c)21 GIT_INLINE(int) config_keychar(int c)
22 {
23 	return isalnum(c) || c == '-';
24 }
25 
strip_comments(char * line,int in_quotes)26 static int strip_comments(char *line, int in_quotes)
27 {
28 	int quote_count = in_quotes, backslash_count = 0;
29 	char *ptr;
30 
31 	for (ptr = line; *ptr; ++ptr) {
32 		if (ptr[0] == '"' && ptr > line && ptr[-1] != '\\')
33 			quote_count++;
34 
35 		if ((ptr[0] == ';' || ptr[0] == '#') &&
36 			(quote_count % 2) == 0 &&
37 			(backslash_count % 2) == 0) {
38 			ptr[0] = '\0';
39 			break;
40 		}
41 
42 		if (ptr[0] == '\\')
43 			backslash_count++;
44 		else
45 			backslash_count = 0;
46 	}
47 
48 	/* skip any space at the end */
49 	while (ptr > line && git__isspace(ptr[-1])) {
50 		ptr--;
51 	}
52 	ptr[0] = '\0';
53 
54 	return quote_count;
55 }
56 
57 
parse_section_header_ext(git_config_parser * reader,const char * line,const char * base_name,char ** section_name)58 static int parse_section_header_ext(git_config_parser *reader, const char *line, const char *base_name, char **section_name)
59 {
60 	int c, rpos;
61 	char *first_quote, *last_quote;
62 	git_buf buf = GIT_BUF_INIT;
63 	size_t quoted_len, alloc_len, base_name_len = strlen(base_name);
64 
65 	/*
66 	 * base_name is what came before the space. We should be at the
67 	 * first quotation mark, except for now, line isn't being kept in
68 	 * sync so we only really use it to calculate the length.
69 	 */
70 
71 	first_quote = strchr(line, '"');
72 	if (first_quote == NULL) {
73 		set_parse_error(reader, 0, "Missing quotation marks in section header");
74 		goto end_error;
75 	}
76 
77 	last_quote = strrchr(line, '"');
78 	quoted_len = last_quote - first_quote;
79 
80 	if (quoted_len == 0) {
81 		set_parse_error(reader, 0, "Missing closing quotation mark in section header");
82 		goto end_error;
83 	}
84 
85 	GITERR_CHECK_ALLOC_ADD(&alloc_len, base_name_len, quoted_len);
86 	GITERR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
87 
88 	if (git_buf_grow(&buf, alloc_len) < 0 ||
89 	    git_buf_printf(&buf, "%s.", base_name) < 0)
90 		goto end_error;
91 
92 	rpos = 0;
93 
94 	line = first_quote;
95 	c = line[++rpos];
96 
97 	/*
98 	 * At the end of each iteration, whatever is stored in c will be
99 	 * added to the string. In case of error, jump to out
100 	 */
101 	do {
102 
103 		switch (c) {
104 		case 0:
105 			set_parse_error(reader, 0, "Unexpected end-of-line in section header");
106 			goto end_error;
107 
108 		case '"':
109 			goto end_parse;
110 
111 		case '\\':
112 			c = line[++rpos];
113 
114 			if (c == 0) {
115 				set_parse_error(reader, rpos, "Unexpected end-of-line in section header");
116 				goto end_error;
117 			}
118 
119 		default:
120 			break;
121 		}
122 
123 		git_buf_putc(&buf, (char)c);
124 		c = line[++rpos];
125 	} while (line + rpos < last_quote);
126 
127 end_parse:
128 	if (git_buf_oom(&buf))
129 		goto end_error;
130 
131 	if (line[rpos] != '"' || line[rpos + 1] != ']') {
132 		set_parse_error(reader, rpos, "Unexpected text after closing quotes");
133 		git_buf_free(&buf);
134 		return -1;
135 	}
136 
137 	*section_name = git_buf_detach(&buf);
138 	return 0;
139 
140 end_error:
141 	git_buf_free(&buf);
142 
143 	return -1;
144 }
145 
parse_section_header(git_config_parser * reader,char ** section_out)146 static int parse_section_header(git_config_parser *reader, char **section_out)
147 {
148 	char *name, *name_end;
149 	int name_length, c, pos;
150 	int result;
151 	char *line;
152 	size_t line_len;
153 
154 	git_parse_advance_ws(&reader->ctx);
155 	line = git__strndup(reader->ctx.line, reader->ctx.line_len);
156 	if (line == NULL)
157 		return -1;
158 
159 	/* find the end of the variable's name */
160 	name_end = strrchr(line, ']');
161 	if (name_end == NULL) {
162 		git__free(line);
163 		set_parse_error(reader, 0, "Missing ']' in section header");
164 		return -1;
165 	}
166 
167 	GITERR_CHECK_ALLOC_ADD(&line_len, (size_t)(name_end - line), 1);
168 	name = git__malloc(line_len);
169 	GITERR_CHECK_ALLOC(name);
170 
171 	name_length = 0;
172 	pos = 0;
173 
174 	/* Make sure we were given a section header */
175 	c = line[pos++];
176 	assert(c == '[');
177 
178 	c = line[pos++];
179 
180 	do {
181 		if (git__isspace(c)){
182 			name[name_length] = '\0';
183 			result = parse_section_header_ext(reader, line, name, section_out);
184 			git__free(line);
185 			git__free(name);
186 			return result;
187 		}
188 
189 		if (!config_keychar(c) && c != '.') {
190 			set_parse_error(reader, pos, "Unexpected character in header");
191 			goto fail_parse;
192 		}
193 
194 		name[name_length++] = (char)git__tolower(c);
195 
196 	} while ((c = line[pos++]) != ']');
197 
198 	if (line[pos - 1] != ']') {
199 		set_parse_error(reader, pos, "Unexpected end of file");
200 		goto fail_parse;
201 	}
202 
203 	git__free(line);
204 
205 	name[name_length] = 0;
206 	*section_out = name;
207 
208 	return 0;
209 
210 fail_parse:
211 	git__free(line);
212 	git__free(name);
213 	return -1;
214 }
215 
skip_bom(git_parse_ctx * parser)216 static int skip_bom(git_parse_ctx *parser)
217 {
218 	git_buf buf = GIT_BUF_INIT_CONST(parser->content, parser->content_len);
219 	git_bom_t bom;
220 	int bom_offset = git_buf_text_detect_bom(&bom, &buf);
221 
222 	if (bom == GIT_BOM_UTF8)
223 		git_parse_advance_chars(parser, bom_offset);
224 
225 	/* TODO: reference implementation is pretty stupid with BoM */
226 
227 	return 0;
228 }
229 
230 /*
231 	(* basic types *)
232 	digit = "0".."9"
233 	integer = digit { digit }
234 	alphabet = "a".."z" + "A" .. "Z"
235 
236 	section_char = alphabet | "." | "-"
237 	extension_char = (* any character except newline *)
238 	any_char = (* any character *)
239 	variable_char = "alphabet" | "-"
240 
241 
242 	(* actual grammar *)
243 	config = { section }
244 
245 	section = header { definition }
246 
247 	header = "[" section [subsection | subsection_ext] "]"
248 
249 	subsection = "." section
250 	subsection_ext = "\"" extension "\""
251 
252 	section = section_char { section_char }
253 	extension = extension_char { extension_char }
254 
255 	definition = variable_name ["=" variable_value] "\n"
256 
257 	variable_name = variable_char { variable_char }
258 	variable_value = string | boolean | integer
259 
260 	string = quoted_string | plain_string
261 	quoted_string = "\"" plain_string "\""
262 	plain_string = { any_char }
263 
264 	boolean = boolean_true | boolean_false
265 	boolean_true = "yes" | "1" | "true" | "on"
266 	boolean_false = "no" | "0" | "false" | "off"
267 */
268 
269 /* '\"' -> '"' etc */
unescape_line(char ** out,bool * is_multi,const char * ptr,int quote_count)270 static int unescape_line(
271 	char **out, bool *is_multi, const char *ptr, int quote_count)
272 {
273 	char *str, *fixed, *esc;
274 	size_t ptr_len = strlen(ptr), alloc_len;
275 
276 	*is_multi = false;
277 
278 	if (GIT_ADD_SIZET_OVERFLOW(&alloc_len, ptr_len, 1) ||
279 		(str = git__malloc(alloc_len)) == NULL) {
280 		return -1;
281 	}
282 
283 	fixed = str;
284 
285 	while (*ptr != '\0') {
286 		if (*ptr == '"') {
287 			quote_count++;
288 		} else if (*ptr != '\\') {
289 			*fixed++ = *ptr;
290 		} else {
291 			/* backslash, check the next char */
292 			ptr++;
293 			/* if we're at the end, it's a multiline, so keep the backslash */
294 			if (*ptr == '\0') {
295 				*is_multi = true;
296 				goto done;
297 			}
298 			if ((esc = strchr(git_config_escapes, *ptr)) != NULL) {
299 				*fixed++ = git_config_escaped[esc - git_config_escapes];
300 			} else {
301 				git__free(str);
302 				giterr_set(GITERR_CONFIG, "invalid escape at %s", ptr);
303 				return -1;
304 			}
305 		}
306 		ptr++;
307 	}
308 
309 done:
310 	*fixed = '\0';
311 	*out = str;
312 
313 	return 0;
314 }
315 
parse_multiline_variable(git_config_parser * reader,git_buf * value,int in_quotes)316 static int parse_multiline_variable(git_config_parser *reader, git_buf *value, int in_quotes)
317 {
318 	char *line = NULL, *proc_line = NULL;
319 	int quote_count;
320 	bool multiline;
321 
322 	/* Check that the next line exists */
323 	git_parse_advance_line(&reader->ctx);
324 	line = git__strndup(reader->ctx.line, reader->ctx.line_len);
325 	if (line == NULL)
326 		return -1;
327 
328 	/* We've reached the end of the file, there is no continuation.
329 	 * (this is not an error).
330 	 */
331 	if (line[0] == '\0') {
332 		git__free(line);
333 		return 0;
334 	}
335 
336 	quote_count = strip_comments(line, !!in_quotes);
337 
338 	/* If it was just a comment, pretend it didn't exist */
339 	if (line[0] == '\0') {
340 		git__free(line);
341 		return parse_multiline_variable(reader, value, quote_count);
342 		/* TODO: unbounded recursion. This **could** be exploitable */
343 	}
344 
345 	if (unescape_line(&proc_line, &multiline, line, in_quotes) < 0) {
346 		git__free(line);
347 		return -1;
348 	}
349 	/* add this line to the multiline var */
350 
351 	git_buf_puts(value, proc_line);
352 	git__free(line);
353 	git__free(proc_line);
354 
355 	/*
356 	 * If we need to continue reading the next line, let's just
357 	 * keep putting stuff in the buffer
358 	 */
359 	if (multiline)
360 		return parse_multiline_variable(reader, value, quote_count);
361 
362 	return 0;
363 }
364 
is_namechar(char c)365 GIT_INLINE(bool) is_namechar(char c)
366 {
367 	return isalnum(c) || c == '-';
368 }
369 
parse_name(char ** name,const char ** value,git_config_parser * reader,const char * line)370 static int parse_name(
371 	char **name, const char **value, git_config_parser *reader, const char *line)
372 {
373 	const char *name_end = line, *value_start;
374 
375 	*name = NULL;
376 	*value = NULL;
377 
378 	while (*name_end && is_namechar(*name_end))
379 		name_end++;
380 
381 	if (line == name_end) {
382 		set_parse_error(reader, 0, "Invalid configuration key");
383 		return -1;
384 	}
385 
386 	value_start = name_end;
387 
388 	while (*value_start && git__isspace(*value_start))
389 		value_start++;
390 
391 	if (*value_start == '=') {
392 		*value = value_start + 1;
393 	} else if (*value_start) {
394 		set_parse_error(reader, 0, "Invalid configuration key");
395 		return -1;
396 	}
397 
398 	if ((*name = git__strndup(line, name_end - line)) == NULL)
399 		return -1;
400 
401 	return 0;
402 }
403 
parse_variable(git_config_parser * reader,char ** var_name,char ** var_value)404 static int parse_variable(git_config_parser *reader, char **var_name, char **var_value)
405 {
406 	const char *value_start = NULL;
407 	char *line;
408 	int quote_count;
409 	bool multiline;
410 
411 	git_parse_advance_ws(&reader->ctx);
412 	line = git__strndup(reader->ctx.line, reader->ctx.line_len);
413 	if (line == NULL)
414 		return -1;
415 
416 	quote_count = strip_comments(line, 0);
417 
418 	/* If there is no value, boolean true is assumed */
419 	*var_value = NULL;
420 
421 	if (parse_name(var_name, &value_start, reader, line) < 0)
422 		goto on_error;
423 
424 	/*
425 	 * Now, let's try to parse the value
426 	 */
427 	if (value_start != NULL) {
428 		while (git__isspace(value_start[0]))
429 			value_start++;
430 
431 		if (unescape_line(var_value, &multiline, value_start, 0) < 0)
432 			goto on_error;
433 
434 		if (multiline) {
435 			git_buf multi_value = GIT_BUF_INIT;
436 			git_buf_attach(&multi_value, *var_value, 0);
437 
438 			if (parse_multiline_variable(reader, &multi_value, quote_count) < 0 ||
439 				git_buf_oom(&multi_value)) {
440 				git_buf_free(&multi_value);
441 				goto on_error;
442 			}
443 
444 			*var_value = git_buf_detach(&multi_value);
445 		}
446 	}
447 
448 	git__free(line);
449 	return 0;
450 
451 on_error:
452 	git__free(*var_name);
453 	git__free(line);
454 	return -1;
455 }
456 
git_config_parse(git_config_parser * parser,git_config_parser_section_cb on_section,git_config_parser_variable_cb on_variable,git_config_parser_comment_cb on_comment,git_config_parser_eof_cb on_eof,void * data)457 int git_config_parse(
458 	git_config_parser *parser,
459 	git_config_parser_section_cb on_section,
460 	git_config_parser_variable_cb on_variable,
461 	git_config_parser_comment_cb on_comment,
462 	git_config_parser_eof_cb on_eof,
463 	void *data)
464 {
465 	git_parse_ctx *ctx;
466 	char *current_section = NULL, *var_name, *var_value;
467 	int result = 0;
468 
469 	ctx = &parser->ctx;
470 
471 	skip_bom(ctx);
472 
473 	for (; ctx->remain_len > 0; git_parse_advance_line(ctx)) {
474 		const char *line_start = parser->ctx.line;
475 		size_t line_len = parser->ctx.line_len;
476 		char c;
477 
478 		/*
479 		 * Get either first non-whitespace character or, if that does
480 		 * not exist, the first whitespace character. This is required
481 		 * to preserve whitespaces when writing back the file.
482 		 */
483 		if (git_parse_peek(&c, ctx, GIT_PARSE_PEEK_SKIP_WHITESPACE) < 0 &&
484 		    git_parse_peek(&c, ctx, 0) < 0)
485 			continue;
486 
487 		switch (c) {
488 		case '[': /* section header, new section begins */
489 			git__free(current_section);
490 			current_section = NULL;
491 
492 			if ((result = parse_section_header(parser, &current_section)) == 0 && on_section) {
493 				result = on_section(parser, current_section, line_start, line_len, data);
494 			}
495 			break;
496 
497 		case '\n': /* comment or whitespace-only */
498 		case '\r':
499 		case ' ':
500 		case '\t':
501 		case ';':
502 		case '#':
503 			if (on_comment) {
504 				result = on_comment(parser, line_start, line_len, data);
505 			}
506 			break;
507 
508 		default: /* assume variable declaration */
509 			if ((result = parse_variable(parser, &var_name, &var_value)) == 0 && on_variable) {
510 				result = on_variable(parser, current_section, var_name, var_value, line_start, line_len, data);
511 			}
512 			break;
513 		}
514 
515 		if (result < 0)
516 			goto out;
517 	}
518 
519 	if (on_eof)
520 		result = on_eof(parser, current_section, data);
521 
522 out:
523 	git__free(current_section);
524 	return result;
525 }
526