1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 #include "array.h"
8 #include "common.h"
9 #include "git2/message.h"
10 
11 #include <stddef.h>
12 #include <string.h>
13 #include <ctype.h>
14 
15 #define COMMENT_LINE_CHAR '#'
16 #define TRAILER_SEPARATORS ":"
17 
18 static const char *const git_generated_prefixes[] = {
19 	"Signed-off-by: ",
20 	"(cherry picked from commit ",
21 	NULL
22 };
23 
is_blank_line(const char * str)24 static int is_blank_line(const char *str)
25 {
26 	const char *s = str;
27 	while (*s && *s != '\n' && isspace(*s))
28 		s++;
29 	return !*s || *s == '\n';
30 }
31 
next_line(const char * str)32 static const char *next_line(const char *str)
33 {
34 	const char *nl = strchr(str, '\n');
35 
36 	if (nl) {
37 		return nl + 1;
38 	} else {
39 		/* return pointer to the NUL terminator: */
40 		return str + strlen(str);
41 	}
42 }
43 
44 /*
45  * Return the position of the start of the last line. If len is 0, return 0.
46  */
last_line(size_t * out,const char * buf,size_t len)47 static bool last_line(size_t *out, const char *buf, size_t len)
48 {
49 	size_t i;
50 
51 	*out = 0;
52 
53 	if (len == 0)
54 		return false;
55 	if (len == 1)
56 		return true;
57 
58 	/*
59 	 * Skip the last character (in addition to the null terminator),
60 	 * because if the last character is a newline, it is considered as part
61 	 * of the last line anyway.
62 	 */
63 	i = len - 2;
64 
65 	for (; i > 0; i--) {
66 		if (buf[i] == '\n') {
67 			*out = i + 1;
68 			return true;
69 		}
70 	}
71 	return true;
72 }
73 
74 /*
75  * If the given line is of the form
76  * "<token><optional whitespace><separator>..." or "<separator>...", sets out
77  * to the location of the separator and returns true.  Otherwise, returns
78  * false.  The optional whitespace is allowed there primarily to allow things
79  * like "Bug #43" where <token> is "Bug" and <separator> is "#".
80  *
81  * The separator-starts-line case (in which this function returns true and
82  * sets out to 0) is distinguished from the non-well-formed-line case (in
83  * which this function returns false) because some callers of this function
84  * need such a distinction.
85  */
find_separator(size_t * out,const char * line,const char * separators)86 static bool find_separator(size_t *out, const char *line, const char *separators)
87 {
88 	int whitespace_found = 0;
89 	const char *c;
90 	for (c = line; *c; c++) {
91 		if (strchr(separators, *c)) {
92 			*out = c - line;
93 			return true;
94 		}
95 
96 		if (!whitespace_found && (isalnum(*c) || *c == '-'))
97 			continue;
98 		if (c != line && (*c == ' ' || *c == '\t')) {
99 			whitespace_found = 1;
100 			continue;
101 		}
102 		break;
103 	}
104 	return false;
105 }
106 
107 /*
108  * Inspect the given string and determine the true "end" of the log message, in
109  * order to find where to put a new Signed-off-by: line.  Ignored are
110  * trailing comment lines and blank lines.  To support "git commit -s
111  * --amend" on an existing commit, we also ignore "Conflicts:".  To
112  * support "git commit -v", we truncate at cut lines.
113  *
114  * Returns the number of bytes from the tail to ignore, to be fed as
115  * the second parameter to append_signoff().
116  */
ignore_non_trailer(const char * buf,size_t len)117 static size_t ignore_non_trailer(const char *buf, size_t len)
118 {
119 	size_t boc = 0, bol = 0;
120 	int in_old_conflicts_block = 0;
121 	size_t cutoff = len;
122 
123 	while (bol < cutoff) {
124 		const char *next_line = memchr(buf + bol, '\n', len - bol);
125 
126 		if (!next_line)
127 			next_line = buf + len;
128 		else
129 			next_line++;
130 
131 		if (buf[bol] == COMMENT_LINE_CHAR || buf[bol] == '\n') {
132 			/* is this the first of the run of comments? */
133 			if (!boc)
134 				boc = bol;
135 			/* otherwise, it is just continuing */
136 		} else if (git__prefixcmp(buf + bol, "Conflicts:\n") == 0) {
137 			in_old_conflicts_block = 1;
138 			if (!boc)
139 				boc = bol;
140 		} else if (in_old_conflicts_block && buf[bol] == '\t') {
141 			; /* a pathname in the conflicts block */
142 		} else if (boc) {
143 			/* the previous was not trailing comment */
144 			boc = 0;
145 			in_old_conflicts_block = 0;
146 		}
147 		bol = next_line - buf;
148 	}
149 	return boc ? len - boc : len - cutoff;
150 }
151 
152 /*
153  * Return the position of the start of the patch or the length of str if there
154  * is no patch in the message.
155  */
find_patch_start(const char * str)156 static size_t find_patch_start(const char *str)
157 {
158 	const char *s;
159 
160 	for (s = str; *s; s = next_line(s)) {
161 		if (git__prefixcmp(s, "---") == 0)
162 			return s - str;
163 	}
164 
165 	return s - str;
166 }
167 
168 /*
169  * Return the position of the first trailer line or len if there are no
170  * trailers.
171  */
find_trailer_start(const char * buf,size_t len)172 static size_t find_trailer_start(const char *buf, size_t len)
173 {
174 	const char *s;
175 	size_t end_of_title, l;
176 	int only_spaces = 1;
177 	int recognized_prefix = 0, trailer_lines = 0, non_trailer_lines = 0;
178 	/*
179 	 * Number of possible continuation lines encountered. This will be
180 	 * reset to 0 if we encounter a trailer (since those lines are to be
181 	 * considered continuations of that trailer), and added to
182 	 * non_trailer_lines if we encounter a non-trailer (since those lines
183 	 * are to be considered non-trailers).
184 	 */
185 	int possible_continuation_lines = 0;
186 
187 	/* The first paragraph is the title and cannot be trailers */
188 	for (s = buf; s < buf + len; s = next_line(s)) {
189 		if (s[0] == COMMENT_LINE_CHAR)
190 			continue;
191 		if (is_blank_line(s))
192 			break;
193 	}
194 	end_of_title = s - buf;
195 
196 	/*
197 	 * Get the start of the trailers by looking starting from the end for a
198 	 * blank line before a set of non-blank lines that (i) are all
199 	 * trailers, or (ii) contains at least one Git-generated trailer and
200 	 * consists of at least 25% trailers.
201 	 */
202 	l = len;
203 	while (last_line(&l, buf, l) && l >= end_of_title) {
204 		const char *bol = buf + l;
205 		const char *const *p;
206 		size_t separator_pos = 0;
207 
208 		if (bol[0] == COMMENT_LINE_CHAR) {
209 			non_trailer_lines += possible_continuation_lines;
210 			possible_continuation_lines = 0;
211 			continue;
212 		}
213 		if (is_blank_line(bol)) {
214 			if (only_spaces)
215 				continue;
216 			non_trailer_lines += possible_continuation_lines;
217 			if (recognized_prefix &&
218 			    trailer_lines * 3 >= non_trailer_lines)
219 				return next_line(bol) - buf;
220 			else if (trailer_lines && !non_trailer_lines)
221 				return next_line(bol) - buf;
222 			return len;
223 		}
224 		only_spaces = 0;
225 
226 		for (p = git_generated_prefixes; *p; p++) {
227 			if (git__prefixcmp(bol, *p) == 0) {
228 				trailer_lines++;
229 				possible_continuation_lines = 0;
230 				recognized_prefix = 1;
231 				goto continue_outer_loop;
232 			}
233 		}
234 
235 		find_separator(&separator_pos, bol, TRAILER_SEPARATORS);
236 		if (separator_pos >= 1 && !isspace(bol[0])) {
237 			trailer_lines++;
238 			possible_continuation_lines = 0;
239 			if (recognized_prefix)
240 				continue;
241 		} else if (isspace(bol[0]))
242 			possible_continuation_lines++;
243 		else {
244 			non_trailer_lines++;
245 			non_trailer_lines += possible_continuation_lines;
246 			possible_continuation_lines = 0;
247 		}
248 continue_outer_loop:
249 		;
250 	}
251 
252 	return len;
253 }
254 
255 /* Return the position of the end of the trailers. */
find_trailer_end(const char * buf,size_t len)256 static size_t find_trailer_end(const char *buf, size_t len)
257 {
258 	return len - ignore_non_trailer(buf, len);
259 }
260 
extract_trailer_block(const char * message,size_t * len)261 static char *extract_trailer_block(const char *message, size_t* len)
262 {
263 	size_t patch_start = find_patch_start(message);
264 	size_t trailer_end = find_trailer_end(message, patch_start);
265 	size_t trailer_start = find_trailer_start(message, trailer_end);
266 
267 	size_t trailer_len = trailer_end - trailer_start;
268 
269 	char *buffer = git__malloc(trailer_len + 1);
270 	if (buffer == NULL)
271 		return NULL;
272 
273 	memcpy(buffer, message + trailer_start, trailer_len);
274 	buffer[trailer_len] = 0;
275 
276 	*len = trailer_len;
277 
278 	return buffer;
279 }
280 
281 enum trailer_state {
282 	S_START = 0,
283 	S_KEY = 1,
284 	S_KEY_WS = 2,
285 	S_SEP_WS = 3,
286 	S_VALUE = 4,
287 	S_VALUE_NL = 5,
288 	S_VALUE_END = 6,
289 	S_IGNORE = 7,
290 };
291 
292 #define NEXT(st) { state = (st); ptr++; continue; }
293 #define GOTO(st) { state = (st); continue; }
294 
295 typedef git_array_t(git_message_trailer) git_array_trailer_t;
296 
git_message_trailers(git_message_trailer_array * trailer_arr,const char * message)297 int git_message_trailers(git_message_trailer_array *trailer_arr, const char *message)
298 {
299 	enum trailer_state state = S_START;
300 	int rc = 0;
301 	char *ptr;
302 	char *key = NULL;
303 	char *value = NULL;
304 	git_array_trailer_t arr = GIT_ARRAY_INIT;
305 
306 	size_t trailer_len;
307 	char *trailer = extract_trailer_block(message, &trailer_len);
308 	if (trailer == NULL)
309 		return -1;
310 
311 	for (ptr = trailer;;) {
312 		switch (state) {
313 			case S_START: {
314 				if (*ptr == 0) {
315 					goto ret;
316 				}
317 
318 				key = ptr;
319 				GOTO(S_KEY);
320 			}
321 			case S_KEY: {
322 				if (*ptr == 0) {
323 					goto ret;
324 				}
325 
326 				if (isalnum(*ptr) || *ptr == '-') {
327 					/* legal key character */
328 					NEXT(S_KEY);
329 				}
330 
331 				if (*ptr == ' ' || *ptr == '\t') {
332 					/* optional whitespace before separator */
333 					*ptr = 0;
334 					NEXT(S_KEY_WS);
335 				}
336 
337 				if (strchr(TRAILER_SEPARATORS, *ptr)) {
338 					*ptr = 0;
339 					NEXT(S_SEP_WS);
340 				}
341 
342 				/* illegal character */
343 				GOTO(S_IGNORE);
344 			}
345 			case S_KEY_WS: {
346 				if (*ptr == 0) {
347 					goto ret;
348 				}
349 
350 				if (*ptr == ' ' || *ptr == '\t') {
351 					NEXT(S_KEY_WS);
352 				}
353 
354 				if (strchr(TRAILER_SEPARATORS, *ptr)) {
355 					NEXT(S_SEP_WS);
356 				}
357 
358 				/* illegal character */
359 				GOTO(S_IGNORE);
360 			}
361 			case S_SEP_WS: {
362 				if (*ptr == 0) {
363 					goto ret;
364 				}
365 
366 				if (*ptr == ' ' || *ptr == '\t') {
367 					NEXT(S_SEP_WS);
368 				}
369 
370 				value = ptr;
371 				NEXT(S_VALUE);
372 			}
373 			case S_VALUE: {
374 				if (*ptr == 0) {
375 					GOTO(S_VALUE_END);
376 				}
377 
378 				if (*ptr == '\n') {
379 					NEXT(S_VALUE_NL);
380 				}
381 
382 				NEXT(S_VALUE);
383 			}
384 			case S_VALUE_NL: {
385 				if (*ptr == ' ') {
386 					/* continuation; */
387 					NEXT(S_VALUE);
388 				}
389 
390 				ptr[-1] = 0;
391 				GOTO(S_VALUE_END);
392 			}
393 			case S_VALUE_END: {
394 				git_message_trailer *t = git_array_alloc(arr);
395 
396 				t->key = key;
397 				t->value = value;
398 
399 				key = NULL;
400 				value = NULL;
401 
402 				GOTO(S_START);
403 			}
404 			case S_IGNORE: {
405 				if (*ptr == 0) {
406 					goto ret;
407 				}
408 
409 				if (*ptr == '\n') {
410 					NEXT(S_START);
411 				}
412 
413 				NEXT(S_IGNORE);
414 			}
415 		}
416 	}
417 
418 ret:
419 	trailer_arr->_trailer_block = trailer;
420 	trailer_arr->trailers = arr.ptr;
421 	trailer_arr->count = arr.size;
422 
423 	return rc;
424 }
425 
git_message_trailer_array_free(git_message_trailer_array * arr)426 void git_message_trailer_array_free(git_message_trailer_array *arr)
427 {
428 	git__free(arr->_trailer_block);
429 	git__free(arr->trailers);
430 }
431