1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "config_parse.h"
9 
10 #include <ctype.h>
11 
12 const char *git_config_escapes = "ntb\"\\";
13 const char *git_config_escaped = "\n\t\b\"\\";
14 
set_parse_error(git_config_parser * reader,int col,const char * error_str)15 static void set_parse_error(git_config_parser *reader, int col, const char *error_str)
16 {
17 	if (col)
18 		git_error_set(GIT_ERROR_CONFIG,
19 		              "failed to parse config file: %s (in %s:%"PRIuZ", column %d)",
20 		              error_str, reader->path, reader->ctx.line_num, col);
21 	else
22 		git_error_set(GIT_ERROR_CONFIG,
23 		              "failed to parse config file: %s (in %s:%"PRIuZ")",
24 		              error_str, reader->path, reader->ctx.line_num);
25 }
26 
27 
config_keychar(int c)28 GIT_INLINE(int) config_keychar(int c)
29 {
30 	return isalnum(c) || c == '-';
31 }
32 
strip_comments(char * line,int in_quotes)33 static int strip_comments(char *line, int in_quotes)
34 {
35 	int quote_count = in_quotes, backslash_count = 0;
36 	char *ptr;
37 
38 	for (ptr = line; *ptr; ++ptr) {
39 		if (ptr[0] == '"' && ptr > line && ptr[-1] != '\\')
40 			quote_count++;
41 
42 		if ((ptr[0] == ';' || ptr[0] == '#') &&
43 			(quote_count % 2) == 0 &&
44 			(backslash_count % 2) == 0) {
45 			ptr[0] = '\0';
46 			break;
47 		}
48 
49 		if (ptr[0] == '\\')
50 			backslash_count++;
51 		else
52 			backslash_count = 0;
53 	}
54 
55 	/* skip any space at the end */
56 	while (ptr > line && git__isspace(ptr[-1])) {
57 		ptr--;
58 	}
59 	ptr[0] = '\0';
60 
61 	return quote_count;
62 }
63 
64 
parse_subsection_header(git_config_parser * reader,const char * line,size_t pos,const char * base_name,char ** section_name)65 static int parse_subsection_header(git_config_parser *reader, const char *line, size_t pos, const char *base_name, char **section_name)
66 {
67 	int c, rpos;
68 	const char *first_quote, *last_quote;
69 	const char *line_start = line;
70 	git_buf buf = GIT_BUF_INIT;
71 	size_t quoted_len, alloc_len, base_name_len = strlen(base_name);
72 
73 	/* Skip any additional whitespace before our section name */
74 	while (git__isspace(line[pos]))
75 		pos++;
76 
77 	/* We should be at the first quotation mark. */
78 	if (line[pos] != '"') {
79 		set_parse_error(reader, 0, "missing quotation marks in section header");
80 		goto end_error;
81 	}
82 
83 	first_quote = &line[pos];
84 	last_quote = strrchr(line, '"');
85 	quoted_len = last_quote - first_quote;
86 
87 	if ((last_quote - line) > INT_MAX) {
88 		set_parse_error(reader, 0, "invalid section header, line too long");
89 		goto end_error;
90 	}
91 
92 	if (quoted_len == 0) {
93 		set_parse_error(reader, 0, "missing closing quotation mark in section header");
94 		goto end_error;
95 	}
96 
97 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, base_name_len, quoted_len);
98 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
99 
100 	if (git_buf_grow(&buf, alloc_len) < 0 ||
101 	    git_buf_printf(&buf, "%s.", base_name) < 0)
102 		goto end_error;
103 
104 	rpos = 0;
105 
106 	line = first_quote;
107 	c = line[++rpos];
108 
109 	/*
110 	 * At the end of each iteration, whatever is stored in c will be
111 	 * added to the string. In case of error, jump to out
112 	 */
113 	do {
114 
115 		switch (c) {
116 		case 0:
117 			set_parse_error(reader, 0, "unexpected end-of-line in section header");
118 			goto end_error;
119 
120 		case '"':
121 			goto end_parse;
122 
123 		case '\\':
124 			c = line[++rpos];
125 
126 			if (c == 0) {
127 				set_parse_error(reader, rpos, "unexpected end-of-line in section header");
128 				goto end_error;
129 			}
130 
131 		default:
132 			break;
133 		}
134 
135 		git_buf_putc(&buf, (char)c);
136 		c = line[++rpos];
137 	} while (line + rpos < last_quote);
138 
139 end_parse:
140 	if (git_buf_oom(&buf))
141 		goto end_error;
142 
143 	if (line[rpos] != '"' || line[rpos + 1] != ']') {
144 		set_parse_error(reader, rpos, "unexpected text after closing quotes");
145 		git_buf_dispose(&buf);
146 		return -1;
147 	}
148 
149 	*section_name = git_buf_detach(&buf);
150 	return (int)(&line[rpos + 2] - line_start); /* rpos is at the closing quote */
151 
152 end_error:
153 	git_buf_dispose(&buf);
154 
155 	return -1;
156 }
157 
parse_section_header(git_config_parser * reader,char ** section_out)158 static int parse_section_header(git_config_parser *reader, char **section_out)
159 {
160 	char *name, *name_end;
161 	int name_length, c, pos;
162 	int result;
163 	char *line;
164 	size_t line_len;
165 
166 	git_parse_advance_ws(&reader->ctx);
167 	line = git__strndup(reader->ctx.line, reader->ctx.line_len);
168 	if (line == NULL)
169 		return -1;
170 
171 	/* find the end of the variable's name */
172 	name_end = strrchr(line, ']');
173 	if (name_end == NULL) {
174 		git__free(line);
175 		set_parse_error(reader, 0, "missing ']' in section header");
176 		return -1;
177 	}
178 
179 	GIT_ERROR_CHECK_ALLOC_ADD(&line_len, (size_t)(name_end - line), 1);
180 	name = git__malloc(line_len);
181 	GIT_ERROR_CHECK_ALLOC(name);
182 
183 	name_length = 0;
184 	pos = 0;
185 
186 	/* Make sure we were given a section header */
187 	c = line[pos++];
188 	GIT_ASSERT(c == '[');
189 
190 	c = line[pos++];
191 
192 	do {
193 		if (git__isspace(c)){
194 			name[name_length] = '\0';
195 			result = parse_subsection_header(reader, line, pos, name, section_out);
196 			git__free(line);
197 			git__free(name);
198 			return result;
199 		}
200 
201 		if (!config_keychar(c) && c != '.') {
202 			set_parse_error(reader, pos, "unexpected character in header");
203 			goto fail_parse;
204 		}
205 
206 		name[name_length++] = (char)git__tolower(c);
207 
208 	} while ((c = line[pos++]) != ']');
209 
210 	if (line[pos - 1] != ']') {
211 		set_parse_error(reader, pos, "unexpected end of file");
212 		goto fail_parse;
213 	}
214 
215 	git__free(line);
216 
217 	name[name_length] = 0;
218 	*section_out = name;
219 
220 	return pos;
221 
222 fail_parse:
223 	git__free(line);
224 	git__free(name);
225 	return -1;
226 }
227 
skip_bom(git_parse_ctx * parser)228 static int skip_bom(git_parse_ctx *parser)
229 {
230 	git_buf buf = GIT_BUF_INIT_CONST(parser->content, parser->content_len);
231 	git_buf_bom_t bom;
232 	int bom_offset = git_buf_detect_bom(&bom, &buf);
233 
234 	if (bom == GIT_BUF_BOM_UTF8)
235 		git_parse_advance_chars(parser, bom_offset);
236 
237 	/* TODO: reference implementation is pretty stupid with BoM */
238 
239 	return 0;
240 }
241 
242 /*
243 	(* basic types *)
244 	digit = "0".."9"
245 	integer = digit { digit }
246 	alphabet = "a".."z" + "A" .. "Z"
247 
248 	section_char = alphabet | "." | "-"
249 	extension_char = (* any character except newline *)
250 	any_char = (* any character *)
251 	variable_char = "alphabet" | "-"
252 
253 
254 	(* actual grammar *)
255 	config = { section }
256 
257 	section = header { definition }
258 
259 	header = "[" section [subsection | subsection_ext] "]"
260 
261 	subsection = "." section
262 	subsection_ext = "\"" extension "\""
263 
264 	section = section_char { section_char }
265 	extension = extension_char { extension_char }
266 
267 	definition = variable_name ["=" variable_value] "\n"
268 
269 	variable_name = variable_char { variable_char }
270 	variable_value = string | boolean | integer
271 
272 	string = quoted_string | plain_string
273 	quoted_string = "\"" plain_string "\""
274 	plain_string = { any_char }
275 
276 	boolean = boolean_true | boolean_false
277 	boolean_true = "yes" | "1" | "true" | "on"
278 	boolean_false = "no" | "0" | "false" | "off"
279 */
280 
281 /* '\"' -> '"' etc */
unescape_line(char ** out,bool * is_multi,const char * ptr,int quote_count)282 static int unescape_line(
283 	char **out, bool *is_multi, const char *ptr, int quote_count)
284 {
285 	char *str, *fixed, *esc;
286 	size_t ptr_len = strlen(ptr), alloc_len;
287 
288 	*is_multi = false;
289 
290 	if (GIT_ADD_SIZET_OVERFLOW(&alloc_len, ptr_len, 1) ||
291 		(str = git__malloc(alloc_len)) == NULL) {
292 		return -1;
293 	}
294 
295 	fixed = str;
296 
297 	while (*ptr != '\0') {
298 		if (*ptr == '"') {
299 			quote_count++;
300 		} else if (*ptr != '\\') {
301 			*fixed++ = *ptr;
302 		} else {
303 			/* backslash, check the next char */
304 			ptr++;
305 			/* if we're at the end, it's a multiline, so keep the backslash */
306 			if (*ptr == '\0') {
307 				*is_multi = true;
308 				goto done;
309 			}
310 			if ((esc = strchr(git_config_escapes, *ptr)) != NULL) {
311 				*fixed++ = git_config_escaped[esc - git_config_escapes];
312 			} else {
313 				git__free(str);
314 				git_error_set(GIT_ERROR_CONFIG, "invalid escape at %s", ptr);
315 				return -1;
316 			}
317 		}
318 		ptr++;
319 	}
320 
321 done:
322 	*fixed = '\0';
323 	*out = str;
324 
325 	return 0;
326 }
327 
parse_multiline_variable(git_config_parser * reader,git_buf * value,int in_quotes)328 static int parse_multiline_variable(git_config_parser *reader, git_buf *value, int in_quotes)
329 {
330 	int quote_count;
331 	bool multiline = true;
332 
333 	while (multiline) {
334 		char *line = NULL, *proc_line = NULL;
335 		int error;
336 
337 		/* Check that the next line exists */
338 		git_parse_advance_line(&reader->ctx);
339 		line = git__strndup(reader->ctx.line, reader->ctx.line_len);
340 		GIT_ERROR_CHECK_ALLOC(line);
341 
342 		/*
343 		 * We've reached the end of the file, there is no continuation.
344 		 * (this is not an error).
345 		 */
346 		if (line[0] == '\0') {
347 			error = 0;
348 			goto out;
349 		}
350 
351 		/* If it was just a comment, pretend it didn't exist */
352 		quote_count = strip_comments(line, !!in_quotes);
353 		if (line[0] == '\0')
354 			goto next;
355 
356 		if ((error = unescape_line(&proc_line, &multiline,
357 					   line, in_quotes)) < 0)
358 			goto out;
359 
360 		/* Add this line to the multiline var */
361 		if ((error = git_buf_puts(value, proc_line)) < 0)
362 			goto out;
363 
364 next:
365 		git__free(line);
366 		git__free(proc_line);
367 		in_quotes = quote_count;
368 		continue;
369 
370 out:
371 		git__free(line);
372 		git__free(proc_line);
373 		return error;
374 	}
375 
376 	return 0;
377 }
378 
is_namechar(char c)379 GIT_INLINE(bool) is_namechar(char c)
380 {
381 	return isalnum(c) || c == '-';
382 }
383 
parse_name(char ** name,const char ** value,git_config_parser * reader,const char * line)384 static int parse_name(
385 	char **name, const char **value, git_config_parser *reader, const char *line)
386 {
387 	const char *name_end = line, *value_start;
388 
389 	*name = NULL;
390 	*value = NULL;
391 
392 	while (*name_end && is_namechar(*name_end))
393 		name_end++;
394 
395 	if (line == name_end) {
396 		set_parse_error(reader, 0, "invalid configuration key");
397 		return -1;
398 	}
399 
400 	value_start = name_end;
401 
402 	while (*value_start && git__isspace(*value_start))
403 		value_start++;
404 
405 	if (*value_start == '=') {
406 		*value = value_start + 1;
407 	} else if (*value_start) {
408 		set_parse_error(reader, 0, "invalid configuration key");
409 		return -1;
410 	}
411 
412 	if ((*name = git__strndup(line, name_end - line)) == NULL)
413 		return -1;
414 
415 	return 0;
416 }
417 
parse_variable(git_config_parser * reader,char ** var_name,char ** var_value)418 static int parse_variable(git_config_parser *reader, char **var_name, char **var_value)
419 {
420 	const char *value_start = NULL;
421 	char *line = NULL, *name = NULL, *value = NULL;
422 	int quote_count, error;
423 	bool multiline;
424 
425 	*var_name = NULL;
426 	*var_value = NULL;
427 
428 	git_parse_advance_ws(&reader->ctx);
429 	line = git__strndup(reader->ctx.line, reader->ctx.line_len);
430 	GIT_ERROR_CHECK_ALLOC(line);
431 
432 	quote_count = strip_comments(line, 0);
433 
434 	if ((error = parse_name(&name, &value_start, reader, line)) < 0)
435 		goto out;
436 
437 	/*
438 	 * Now, let's try to parse the value
439 	 */
440 	if (value_start != NULL) {
441 		while (git__isspace(value_start[0]))
442 			value_start++;
443 
444 		if ((error = unescape_line(&value, &multiline, value_start, 0)) < 0)
445 			goto out;
446 
447 		if (multiline) {
448 			git_buf multi_value = GIT_BUF_INIT;
449 			git_buf_attach(&multi_value, value, 0);
450 			value = NULL;
451 
452 			if (parse_multiline_variable(reader, &multi_value, quote_count % 2) < 0 ||
453 			    git_buf_oom(&multi_value)) {
454 				error = -1;
455 				git_buf_dispose(&multi_value);
456 				goto out;
457 			}
458 
459 			value = git_buf_detach(&multi_value);
460 		}
461 	}
462 
463 	*var_name = name;
464 	*var_value = value;
465 	name = NULL;
466 	value = NULL;
467 
468 out:
469 	git__free(name);
470 	git__free(value);
471 	git__free(line);
472 	return error;
473 }
474 
git_config_parser_init(git_config_parser * out,const char * path,const char * data,size_t datalen)475 int git_config_parser_init(git_config_parser *out, const char *path, const char *data, size_t datalen)
476 {
477 	out->path = path;
478 	return git_parse_ctx_init(&out->ctx, data, datalen);
479 }
480 
git_config_parser_dispose(git_config_parser * parser)481 void git_config_parser_dispose(git_config_parser *parser)
482 {
483 	git_parse_ctx_clear(&parser->ctx);
484 }
485 
git_config_parse(git_config_parser * parser,git_config_parser_section_cb on_section,git_config_parser_variable_cb on_variable,git_config_parser_comment_cb on_comment,git_config_parser_eof_cb on_eof,void * payload)486 int git_config_parse(
487 	git_config_parser *parser,
488 	git_config_parser_section_cb on_section,
489 	git_config_parser_variable_cb on_variable,
490 	git_config_parser_comment_cb on_comment,
491 	git_config_parser_eof_cb on_eof,
492 	void *payload)
493 {
494 	git_parse_ctx *ctx;
495 	char *current_section = NULL, *var_name = NULL, *var_value = NULL;
496 	int result = 0;
497 
498 	ctx = &parser->ctx;
499 
500 	skip_bom(ctx);
501 
502 	for (; ctx->remain_len > 0; git_parse_advance_line(ctx)) {
503 		const char *line_start;
504 		size_t line_len;
505 		char c;
506 
507 	restart:
508 		line_start = ctx->line;
509 		line_len = ctx->line_len;
510 
511 		/*
512 		 * Get either first non-whitespace character or, if that does
513 		 * not exist, the first whitespace character. This is required
514 		 * to preserve whitespaces when writing back the file.
515 		 */
516 		if (git_parse_peek(&c, ctx, GIT_PARSE_PEEK_SKIP_WHITESPACE) < 0 &&
517 		    git_parse_peek(&c, ctx, 0) < 0)
518 			continue;
519 
520 		switch (c) {
521 		case '[': /* section header, new section begins */
522 			git__free(current_section);
523 			current_section = NULL;
524 
525 			result = parse_section_header(parser, &current_section);
526 			if (result < 0)
527 				break;
528 
529 			git_parse_advance_chars(ctx, result);
530 
531 			if (on_section)
532 				result = on_section(parser, current_section, line_start, line_len, payload);
533 			/*
534 			 * After we've parsed the section header we may not be
535 			 * done with the line. If there's still data in there,
536 			 * run the next loop with the rest of the current line
537 			 * instead of moving forward.
538 			 */
539 
540 			if (!git_parse_peek(&c, ctx, GIT_PARSE_PEEK_SKIP_WHITESPACE))
541 				goto restart;
542 
543 			break;
544 
545 		case '\n': /* comment or whitespace-only */
546 		case '\r':
547 		case ' ':
548 		case '\t':
549 		case ';':
550 		case '#':
551 			if (on_comment) {
552 				result = on_comment(parser, line_start, line_len, payload);
553 			}
554 			break;
555 
556 		default: /* assume variable declaration */
557 			if ((result = parse_variable(parser, &var_name, &var_value)) == 0 && on_variable) {
558 				result = on_variable(parser, current_section, var_name, var_value, line_start, line_len, payload);
559 				git__free(var_name);
560 				git__free(var_value);
561 			}
562 
563 			break;
564 		}
565 
566 		if (result < 0)
567 			goto out;
568 	}
569 
570 	if (on_eof)
571 		result = on_eof(parser, current_section, payload);
572 
573 out:
574 	git__free(current_section);
575 	return result;
576 }
577