1 #include "cache.h"
2 #include "config.h"
3 #include "utf8.h"
4 #include "strbuf.h"
5 #include "mailinfo.h"
6 
cleanup_space(struct strbuf * sb)7 static void cleanup_space(struct strbuf *sb)
8 {
9 	size_t pos, cnt;
10 	for (pos = 0; pos < sb->len; pos++) {
11 		if (isspace(sb->buf[pos])) {
12 			sb->buf[pos] = ' ';
13 			for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++);
14 			strbuf_remove(sb, pos + 1, cnt);
15 		}
16 	}
17 }
18 
get_sane_name(struct strbuf * out,struct strbuf * name,struct strbuf * email)19 static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email)
20 {
21 	struct strbuf *src = name;
22 	if (!name->len || 60 < name->len || strpbrk(name->buf, "@<>"))
23 		src = email;
24 	else if (name == out)
25 		return;
26 	strbuf_reset(out);
27 	strbuf_addbuf(out, src);
28 }
29 
parse_bogus_from(struct mailinfo * mi,const struct strbuf * line)30 static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line)
31 {
32 	/* John Doe <johndoe> */
33 
34 	char *bra, *ket;
35 	/* This is fallback, so do not bother if we already have an
36 	 * e-mail address.
37 	 */
38 	if (mi->email.len)
39 		return;
40 
41 	bra = strchr(line->buf, '<');
42 	if (!bra)
43 		return;
44 	ket = strchr(bra, '>');
45 	if (!ket)
46 		return;
47 
48 	strbuf_reset(&mi->email);
49 	strbuf_add(&mi->email, bra + 1, ket - bra - 1);
50 
51 	strbuf_reset(&mi->name);
52 	strbuf_add(&mi->name, line->buf, bra - line->buf);
53 	strbuf_trim(&mi->name);
54 	get_sane_name(&mi->name, &mi->name, &mi->email);
55 }
56 
unquote_comment(struct strbuf * outbuf,const char * in)57 static const char *unquote_comment(struct strbuf *outbuf, const char *in)
58 {
59 	int c;
60 	int take_next_literally = 0;
61 
62 	strbuf_addch(outbuf, '(');
63 
64 	while ((c = *in++) != 0) {
65 		if (take_next_literally == 1) {
66 			take_next_literally = 0;
67 		} else {
68 			switch (c) {
69 			case '\\':
70 				take_next_literally = 1;
71 				continue;
72 			case '(':
73 				in = unquote_comment(outbuf, in);
74 				continue;
75 			case ')':
76 				strbuf_addch(outbuf, ')');
77 				return in;
78 			}
79 		}
80 
81 		strbuf_addch(outbuf, c);
82 	}
83 
84 	return in;
85 }
86 
unquote_quoted_string(struct strbuf * outbuf,const char * in)87 static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in)
88 {
89 	int c;
90 	int take_next_literally = 0;
91 
92 	while ((c = *in++) != 0) {
93 		if (take_next_literally == 1) {
94 			take_next_literally = 0;
95 		} else {
96 			switch (c) {
97 			case '\\':
98 				take_next_literally = 1;
99 				continue;
100 			case '"':
101 				return in;
102 			}
103 		}
104 
105 		strbuf_addch(outbuf, c);
106 	}
107 
108 	return in;
109 }
110 
unquote_quoted_pair(struct strbuf * line)111 static void unquote_quoted_pair(struct strbuf *line)
112 {
113 	struct strbuf outbuf;
114 	const char *in = line->buf;
115 	int c;
116 
117 	strbuf_init(&outbuf, line->len);
118 
119 	while ((c = *in++) != 0) {
120 		switch (c) {
121 		case '"':
122 			in = unquote_quoted_string(&outbuf, in);
123 			continue;
124 		case '(':
125 			in = unquote_comment(&outbuf, in);
126 			continue;
127 		}
128 
129 		strbuf_addch(&outbuf, c);
130 	}
131 
132 	strbuf_swap(&outbuf, line);
133 	strbuf_release(&outbuf);
134 
135 }
136 
handle_from(struct mailinfo * mi,const struct strbuf * from)137 static void handle_from(struct mailinfo *mi, const struct strbuf *from)
138 {
139 	char *at;
140 	size_t el;
141 	struct strbuf f;
142 
143 	strbuf_init(&f, from->len);
144 	strbuf_addbuf(&f, from);
145 
146 	unquote_quoted_pair(&f);
147 
148 	at = strchr(f.buf, '@');
149 	if (!at) {
150 		parse_bogus_from(mi, from);
151 		goto out;
152 	}
153 
154 	/*
155 	 * If we already have one email, don't take any confusing lines
156 	 */
157 	if (mi->email.len && strchr(at + 1, '@'))
158 		goto out;
159 
160 	/* Pick up the string around '@', possibly delimited with <>
161 	 * pair; that is the email part.
162 	 */
163 	while (at > f.buf) {
164 		char c = at[-1];
165 		if (isspace(c))
166 			break;
167 		if (c == '<') {
168 			at[-1] = ' ';
169 			break;
170 		}
171 		at--;
172 	}
173 	el = strcspn(at, " \n\t\r\v\f>");
174 	strbuf_reset(&mi->email);
175 	strbuf_add(&mi->email, at, el);
176 	strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0));
177 
178 	/* The remainder is name.  It could be
179 	 *
180 	 * - "John Doe <john.doe@xz>"			(a), or
181 	 * - "john.doe@xz (John Doe)"			(b), or
182 	 * - "John (zzz) Doe <john.doe@xz> (Comment)"	(c)
183 	 *
184 	 * but we have removed the email part, so
185 	 *
186 	 * - remove extra spaces which could stay after email (case 'c'), and
187 	 * - trim from both ends, possibly removing the () pair at the end
188 	 *   (cases 'a' and 'b').
189 	 */
190 	cleanup_space(&f);
191 	strbuf_trim(&f);
192 	if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') {
193 		strbuf_remove(&f, 0, 1);
194 		strbuf_setlen(&f, f.len - 1);
195 	}
196 
197 	get_sane_name(&mi->name, &f, &mi->email);
198 out:
199 	strbuf_release(&f);
200 }
201 
handle_header(struct strbuf ** out,const struct strbuf * line)202 static void handle_header(struct strbuf **out, const struct strbuf *line)
203 {
204 	if (!*out) {
205 		*out = xmalloc(sizeof(struct strbuf));
206 		strbuf_init(*out, line->len);
207 	} else
208 		strbuf_reset(*out);
209 
210 	strbuf_addbuf(*out, line);
211 }
212 
213 /* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
214  * to have enough heuristics to grok MIME encoded patches often found
215  * on our mailing lists.  For example, we do not even treat header lines
216  * case insensitively.
217  */
218 
slurp_attr(const char * line,const char * name,struct strbuf * attr)219 static int slurp_attr(const char *line, const char *name, struct strbuf *attr)
220 {
221 	const char *ends, *ap = strcasestr(line, name);
222 	size_t sz;
223 
224 	strbuf_setlen(attr, 0);
225 	if (!ap)
226 		return 0;
227 	ap += strlen(name);
228 	if (*ap == '"') {
229 		ap++;
230 		ends = "\"";
231 	}
232 	else
233 		ends = "; \t";
234 	sz = strcspn(ap, ends);
235 	strbuf_add(attr, ap, sz);
236 	return 1;
237 }
238 
has_attr_value(const char * line,const char * name,const char * value)239 static int has_attr_value(const char *line, const char *name, const char *value)
240 {
241 	struct strbuf sb = STRBUF_INIT;
242 	int rc = slurp_attr(line, name, &sb) && !strcasecmp(sb.buf, value);
243 	strbuf_release(&sb);
244 	return rc;
245 }
246 
handle_content_type(struct mailinfo * mi,struct strbuf * line)247 static void handle_content_type(struct mailinfo *mi, struct strbuf *line)
248 {
249 	struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
250 	strbuf_init(boundary, line->len);
251 
252 	mi->format_flowed = has_attr_value(line->buf, "format=", "flowed");
253 	mi->delsp = has_attr_value(line->buf, "delsp=", "yes");
254 
255 	if (slurp_attr(line->buf, "boundary=", boundary)) {
256 		strbuf_insertstr(boundary, 0, "--");
257 		if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) {
258 			error("Too many boundaries to handle");
259 			mi->input_error = -1;
260 			mi->content_top = &mi->content[MAX_BOUNDARIES] - 1;
261 			return;
262 		}
263 		*(mi->content_top) = boundary;
264 		boundary = NULL;
265 	}
266 	slurp_attr(line->buf, "charset=", &mi->charset);
267 
268 	if (boundary) {
269 		strbuf_release(boundary);
270 		free(boundary);
271 	}
272 }
273 
handle_content_transfer_encoding(struct mailinfo * mi,const struct strbuf * line)274 static void handle_content_transfer_encoding(struct mailinfo *mi,
275 					     const struct strbuf *line)
276 {
277 	if (strcasestr(line->buf, "base64"))
278 		mi->transfer_encoding = TE_BASE64;
279 	else if (strcasestr(line->buf, "quoted-printable"))
280 		mi->transfer_encoding = TE_QP;
281 	else
282 		mi->transfer_encoding = TE_DONTCARE;
283 }
284 
is_multipart_boundary(struct mailinfo * mi,const struct strbuf * line)285 static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line)
286 {
287 	struct strbuf *content_top = *(mi->content_top);
288 
289 	return ((content_top->len <= line->len) &&
290 		!memcmp(line->buf, content_top->buf, content_top->len));
291 }
292 
cleanup_subject(struct mailinfo * mi,struct strbuf * subject)293 static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject)
294 {
295 	size_t at = 0;
296 
297 	while (at < subject->len) {
298 		char *pos;
299 		size_t remove;
300 
301 		switch (subject->buf[at]) {
302 		case 'r': case 'R':
303 			if (subject->len <= at + 3)
304 				break;
305 			if ((subject->buf[at + 1] == 'e' ||
306 			     subject->buf[at + 1] == 'E') &&
307 			    subject->buf[at + 2] == ':') {
308 				strbuf_remove(subject, at, 3);
309 				continue;
310 			}
311 			at++;
312 			break;
313 		case ' ': case '\t': case ':':
314 			strbuf_remove(subject, at, 1);
315 			continue;
316 		case '[':
317 			pos = strchr(subject->buf + at, ']');
318 			if (!pos)
319 				break;
320 			remove = pos - subject->buf + at + 1;
321 			if (!mi->keep_non_patch_brackets_in_subject ||
322 			    (7 <= remove &&
323 			     memmem(subject->buf + at, remove, "PATCH", 5)))
324 				strbuf_remove(subject, at, remove);
325 			else {
326 				at += remove;
327 				/*
328 				 * If the input had a space after the ], keep
329 				 * it.  We don't bother with finding the end of
330 				 * the space, since we later normalize it
331 				 * anyway.
332 				 */
333 				if (isspace(subject->buf[at]))
334 					at += 1;
335 			}
336 			continue;
337 		}
338 		break;
339 	}
340 	strbuf_trim(subject);
341 }
342 
343 #define MAX_HDR_PARSED 10
344 static const char *header[MAX_HDR_PARSED] = {
345 	"From","Subject","Date",
346 };
347 
skip_header(const struct strbuf * line,const char * hdr,const char ** outval)348 static inline int skip_header(const struct strbuf *line, const char *hdr,
349 			      const char **outval)
350 {
351 	const char *val;
352 	if (!skip_iprefix(line->buf, hdr, &val) ||
353 	    *val++ != ':')
354 		return 0;
355 	while (isspace(*val))
356 		val++;
357 	*outval = val;
358 	return 1;
359 }
360 
is_format_patch_separator(const char * line,int len)361 static int is_format_patch_separator(const char *line, int len)
362 {
363 	static const char SAMPLE[] =
364 		"From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n";
365 	const char *cp;
366 
367 	if (len != strlen(SAMPLE))
368 		return 0;
369 	if (!skip_prefix(line, "From ", &cp))
370 		return 0;
371 	if (strspn(cp, "0123456789abcdef") != 40)
372 		return 0;
373 	cp += 40;
374 	return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line));
375 }
376 
decode_q_segment(const struct strbuf * q_seg,int rfc2047)377 static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047)
378 {
379 	const char *in = q_seg->buf;
380 	int c;
381 	struct strbuf *out = xmalloc(sizeof(struct strbuf));
382 	strbuf_init(out, q_seg->len);
383 
384 	while ((c = *in++) != 0) {
385 		if (c == '=') {
386 			int ch, d = *in;
387 			if (d == '\n' || !d)
388 				break; /* drop trailing newline */
389 			ch = hex2chr(in);
390 			if (ch >= 0) {
391 				strbuf_addch(out, ch);
392 				in += 2;
393 				continue;
394 			}
395 			/* garbage -- fall through */
396 		}
397 		if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
398 			c = 0x20;
399 		strbuf_addch(out, c);
400 	}
401 	return out;
402 }
403 
decode_b_segment(const struct strbuf * b_seg)404 static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
405 {
406 	/* Decode in..ep, possibly in-place to ot */
407 	int c, pos = 0, acc = 0;
408 	const char *in = b_seg->buf;
409 	struct strbuf *out = xmalloc(sizeof(struct strbuf));
410 	strbuf_init(out, b_seg->len);
411 
412 	while ((c = *in++) != 0) {
413 		if (c == '+')
414 			c = 62;
415 		else if (c == '/')
416 			c = 63;
417 		else if ('A' <= c && c <= 'Z')
418 			c -= 'A';
419 		else if ('a' <= c && c <= 'z')
420 			c -= 'a' - 26;
421 		else if ('0' <= c && c <= '9')
422 			c -= '0' - 52;
423 		else
424 			continue; /* garbage */
425 		switch (pos++) {
426 		case 0:
427 			acc = (c << 2);
428 			break;
429 		case 1:
430 			strbuf_addch(out, (acc | (c >> 4)));
431 			acc = (c & 15) << 4;
432 			break;
433 		case 2:
434 			strbuf_addch(out, (acc | (c >> 2)));
435 			acc = (c & 3) << 6;
436 			break;
437 		case 3:
438 			strbuf_addch(out, (acc | c));
439 			acc = pos = 0;
440 			break;
441 		}
442 	}
443 	return out;
444 }
445 
convert_to_utf8(struct mailinfo * mi,struct strbuf * line,const char * charset)446 static int convert_to_utf8(struct mailinfo *mi,
447 			   struct strbuf *line, const char *charset)
448 {
449 	char *out;
450 	size_t out_len;
451 
452 	if (!mi->metainfo_charset || !charset || !*charset)
453 		return 0;
454 
455 	if (same_encoding(mi->metainfo_charset, charset))
456 		return 0;
457 	out = reencode_string_len(line->buf, line->len,
458 				  mi->metainfo_charset, charset, &out_len);
459 	if (!out) {
460 		mi->input_error = -1;
461 		return error("cannot convert from %s to %s",
462 			     charset, mi->metainfo_charset);
463 	}
464 	strbuf_attach(line, out, out_len, out_len);
465 	return 0;
466 }
467 
decode_header(struct mailinfo * mi,struct strbuf * it)468 static void decode_header(struct mailinfo *mi, struct strbuf *it)
469 {
470 	char *in, *ep, *cp;
471 	struct strbuf outbuf = STRBUF_INIT, *dec;
472 	struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT;
473 	int found_error = 1; /* pessimism */
474 
475 	in = it->buf;
476 	while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) {
477 		int encoding;
478 		strbuf_reset(&charset_q);
479 		strbuf_reset(&piecebuf);
480 
481 		if (in != ep) {
482 			/*
483 			 * We are about to process an encoded-word
484 			 * that begins at ep, but there is something
485 			 * before the encoded word.
486 			 */
487 			char *scan;
488 			for (scan = in; scan < ep; scan++)
489 				if (!isspace(*scan))
490 					break;
491 
492 			if (scan != ep || in == it->buf) {
493 				/*
494 				 * We should not lose that "something",
495 				 * unless we have just processed an
496 				 * encoded-word, and there is only LWS
497 				 * before the one we are about to process.
498 				 */
499 				strbuf_add(&outbuf, in, ep - in);
500 			}
501 		}
502 		/* E.g.
503 		 * ep : "=?iso-2022-jp?B?GyR...?= foo"
504 		 * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
505 		 */
506 		ep += 2;
507 
508 		if (ep - it->buf >= it->len || !(cp = strchr(ep, '?')))
509 			goto release_return;
510 
511 		if (cp + 3 - it->buf > it->len)
512 			goto release_return;
513 		strbuf_add(&charset_q, ep, cp - ep);
514 
515 		encoding = cp[1];
516 		if (!encoding || cp[2] != '?')
517 			goto release_return;
518 		ep = strstr(cp + 3, "?=");
519 		if (!ep)
520 			goto release_return;
521 		strbuf_add(&piecebuf, cp + 3, ep - cp - 3);
522 		switch (tolower(encoding)) {
523 		default:
524 			goto release_return;
525 		case 'b':
526 			dec = decode_b_segment(&piecebuf);
527 			break;
528 		case 'q':
529 			dec = decode_q_segment(&piecebuf, 1);
530 			break;
531 		}
532 		if (convert_to_utf8(mi, dec, charset_q.buf))
533 			goto release_return;
534 
535 		strbuf_addbuf(&outbuf, dec);
536 		strbuf_release(dec);
537 		free(dec);
538 		in = ep + 2;
539 	}
540 	strbuf_addstr(&outbuf, in);
541 	strbuf_reset(it);
542 	strbuf_addbuf(it, &outbuf);
543 	found_error = 0;
544 release_return:
545 	strbuf_release(&outbuf);
546 	strbuf_release(&charset_q);
547 	strbuf_release(&piecebuf);
548 
549 	if (found_error)
550 		mi->input_error = -1;
551 }
552 
553 /*
554  * Returns true if "line" contains a header matching "hdr", in which case "val"
555  * will contain the value of the header with any RFC2047 B and Q encoding
556  * unwrapped, and optionally normalize the meta information to utf8.
557  */
parse_header(const struct strbuf * line,const char * hdr,struct mailinfo * mi,struct strbuf * val)558 static int parse_header(const struct strbuf *line,
559 			const char *hdr,
560 			struct mailinfo *mi,
561 			struct strbuf *val)
562 {
563 	const char *val_str;
564 
565 	if (!skip_header(line, hdr, &val_str))
566 		return 0;
567 	strbuf_addstr(val, val_str);
568 	decode_header(mi, val);
569 	return 1;
570 }
571 
check_header(struct mailinfo * mi,const struct strbuf * line,struct strbuf * hdr_data[],int overwrite)572 static int check_header(struct mailinfo *mi,
573 			const struct strbuf *line,
574 			struct strbuf *hdr_data[], int overwrite)
575 {
576 	int i, ret = 0;
577 	struct strbuf sb = STRBUF_INIT;
578 
579 	/* search for the interesting parts */
580 	for (i = 0; header[i]; i++) {
581 		if ((!hdr_data[i] || overwrite) &&
582 		    parse_header(line, header[i], mi, &sb)) {
583 			handle_header(&hdr_data[i], &sb);
584 			ret = 1;
585 			goto check_header_out;
586 		}
587 	}
588 
589 	/* Content stuff */
590 	if (parse_header(line, "Content-Type", mi, &sb)) {
591 		handle_content_type(mi, &sb);
592 		ret = 1;
593 		goto check_header_out;
594 	}
595 	if (parse_header(line, "Content-Transfer-Encoding", mi, &sb)) {
596 		handle_content_transfer_encoding(mi, &sb);
597 		ret = 1;
598 		goto check_header_out;
599 	}
600 	if (parse_header(line, "Message-Id", mi, &sb)) {
601 		if (mi->add_message_id)
602 			mi->message_id = strbuf_detach(&sb, NULL);
603 		ret = 1;
604 		goto check_header_out;
605 	}
606 
607 check_header_out:
608 	strbuf_release(&sb);
609 	return ret;
610 }
611 
612 /*
613  * Returns 1 if the given line or any line beginning with the given line is an
614  * in-body header (that is, check_header will succeed when passed
615  * mi->s_hdr_data).
616  */
is_inbody_header(const struct mailinfo * mi,const struct strbuf * line)617 static int is_inbody_header(const struct mailinfo *mi,
618 			    const struct strbuf *line)
619 {
620 	int i;
621 	const char *val;
622 	for (i = 0; header[i]; i++)
623 		if (!mi->s_hdr_data[i] && skip_header(line, header[i], &val))
624 			return 1;
625 	return 0;
626 }
627 
decode_transfer_encoding(struct mailinfo * mi,struct strbuf * line)628 static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line)
629 {
630 	struct strbuf *ret;
631 
632 	switch (mi->transfer_encoding) {
633 	case TE_QP:
634 		ret = decode_q_segment(line, 0);
635 		break;
636 	case TE_BASE64:
637 		ret = decode_b_segment(line);
638 		break;
639 	case TE_DONTCARE:
640 	default:
641 		return;
642 	}
643 	strbuf_reset(line);
644 	strbuf_addbuf(line, ret);
645 	strbuf_release(ret);
646 	free(ret);
647 }
648 
patchbreak(const struct strbuf * line)649 static inline int patchbreak(const struct strbuf *line)
650 {
651 	size_t i;
652 
653 	/* Beginning of a "diff -" header? */
654 	if (starts_with(line->buf, "diff -"))
655 		return 1;
656 
657 	/* CVS "Index: " line? */
658 	if (starts_with(line->buf, "Index: "))
659 		return 1;
660 
661 	/*
662 	 * "--- <filename>" starts patches without headers
663 	 * "---<sp>*" is a manual separator
664 	 */
665 	if (line->len < 4)
666 		return 0;
667 
668 	if (starts_with(line->buf, "---")) {
669 		/* space followed by a filename? */
670 		if (line->buf[3] == ' ' && !isspace(line->buf[4]))
671 			return 1;
672 		/* Just whitespace? */
673 		for (i = 3; i < line->len; i++) {
674 			unsigned char c = line->buf[i];
675 			if (c == '\n')
676 				return 1;
677 			if (!isspace(c))
678 				break;
679 		}
680 		return 0;
681 	}
682 	return 0;
683 }
684 
is_scissors_line(const char * line)685 static int is_scissors_line(const char *line)
686 {
687 	const char *c;
688 	int scissors = 0, gap = 0;
689 	const char *first_nonblank = NULL, *last_nonblank = NULL;
690 	int visible, perforation = 0, in_perforation = 0;
691 
692 	for (c = line; *c; c++) {
693 		if (isspace(*c)) {
694 			if (in_perforation) {
695 				perforation++;
696 				gap++;
697 			}
698 			continue;
699 		}
700 		last_nonblank = c;
701 		if (first_nonblank == NULL)
702 			first_nonblank = c;
703 		if (*c == '-') {
704 			in_perforation = 1;
705 			perforation++;
706 			continue;
707 		}
708 		if (starts_with(c, ">8") || starts_with(c, "8<") ||
709 		    starts_with(c, ">%") || starts_with(c, "%<")) {
710 			in_perforation = 1;
711 			perforation += 2;
712 			scissors += 2;
713 			c++;
714 			continue;
715 		}
716 		in_perforation = 0;
717 	}
718 
719 	/*
720 	 * The mark must be at least 8 bytes long (e.g. "-- >8 --").
721 	 * Even though there can be arbitrary cruft on the same line
722 	 * (e.g. "cut here"), in order to avoid misidentification, the
723 	 * perforation must occupy more than a third of the visible
724 	 * width of the line, and dashes and scissors must occupy more
725 	 * than half of the perforation.
726 	 */
727 
728 	if (first_nonblank && last_nonblank)
729 		visible = last_nonblank - first_nonblank + 1;
730 	else
731 		visible = 0;
732 	return (scissors && 8 <= visible &&
733 		visible < perforation * 3 &&
734 		gap * 2 < perforation);
735 }
736 
flush_inbody_header_accum(struct mailinfo * mi)737 static void flush_inbody_header_accum(struct mailinfo *mi)
738 {
739 	if (!mi->inbody_header_accum.len)
740 		return;
741 	if (!check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0))
742 		BUG("inbody_header_accum, if not empty, must always contain a valid in-body header");
743 	strbuf_reset(&mi->inbody_header_accum);
744 }
745 
check_inbody_header(struct mailinfo * mi,const struct strbuf * line)746 static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line)
747 {
748 	if (mi->inbody_header_accum.len &&
749 	    (line->buf[0] == ' ' || line->buf[0] == '\t')) {
750 		if (mi->use_scissors && is_scissors_line(line->buf)) {
751 			/*
752 			 * This is a scissors line; do not consider this line
753 			 * as a header continuation line.
754 			 */
755 			flush_inbody_header_accum(mi);
756 			return 0;
757 		}
758 		strbuf_strip_suffix(&mi->inbody_header_accum, "\n");
759 		strbuf_addbuf(&mi->inbody_header_accum, line);
760 		return 1;
761 	}
762 
763 	flush_inbody_header_accum(mi);
764 
765 	if (starts_with(line->buf, ">From") && isspace(line->buf[5]))
766 		return is_format_patch_separator(line->buf + 1, line->len - 1);
767 	if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
768 		int i;
769 		for (i = 0; header[i]; i++)
770 			if (!strcmp("Subject", header[i])) {
771 				handle_header(&mi->s_hdr_data[i], line);
772 				return 1;
773 			}
774 		return 0;
775 	}
776 	if (is_inbody_header(mi, line)) {
777 		strbuf_addbuf(&mi->inbody_header_accum, line);
778 		return 1;
779 	}
780 	return 0;
781 }
782 
handle_commit_msg(struct mailinfo * mi,struct strbuf * line)783 static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
784 {
785 	assert(!mi->filter_stage);
786 
787 	if (mi->header_stage) {
788 		if (!line->len || (line->len == 1 && line->buf[0] == '\n')) {
789 			if (mi->inbody_header_accum.len) {
790 				flush_inbody_header_accum(mi);
791 				mi->header_stage = 0;
792 			}
793 			return 0;
794 		}
795 	}
796 
797 	if (mi->use_inbody_headers && mi->header_stage) {
798 		mi->header_stage = check_inbody_header(mi, line);
799 		if (mi->header_stage)
800 			return 0;
801 	} else
802 		/* Only trim the first (blank) line of the commit message
803 		 * when ignoring in-body headers.
804 		 */
805 		mi->header_stage = 0;
806 
807 	/* normalize the log message to UTF-8. */
808 	if (convert_to_utf8(mi, line, mi->charset.buf))
809 		return 0; /* mi->input_error already set */
810 
811 	if (mi->use_scissors && is_scissors_line(line->buf)) {
812 		int i;
813 
814 		strbuf_setlen(&mi->log_message, 0);
815 		mi->header_stage = 1;
816 
817 		/*
818 		 * We may have already read "secondary headers"; purge
819 		 * them to give ourselves a clean restart.
820 		 */
821 		for (i = 0; header[i]; i++) {
822 			if (mi->s_hdr_data[i])
823 				strbuf_release(mi->s_hdr_data[i]);
824 			FREE_AND_NULL(mi->s_hdr_data[i]);
825 		}
826 		return 0;
827 	}
828 
829 	if (patchbreak(line)) {
830 		if (mi->message_id)
831 			strbuf_addf(&mi->log_message,
832 				    "Message-Id: %s\n", mi->message_id);
833 		return 1;
834 	}
835 
836 	strbuf_addbuf(&mi->log_message, line);
837 	return 0;
838 }
839 
handle_patch(struct mailinfo * mi,const struct strbuf * line)840 static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
841 {
842 	fwrite(line->buf, 1, line->len, mi->patchfile);
843 	mi->patch_lines++;
844 }
845 
handle_filter(struct mailinfo * mi,struct strbuf * line)846 static void handle_filter(struct mailinfo *mi, struct strbuf *line)
847 {
848 	switch (mi->filter_stage) {
849 	case 0:
850 		if (!handle_commit_msg(mi, line))
851 			break;
852 		mi->filter_stage++;
853 		/* fallthrough */
854 	case 1:
855 		handle_patch(mi, line);
856 		break;
857 	}
858 }
859 
is_rfc2822_header(const struct strbuf * line)860 static int is_rfc2822_header(const struct strbuf *line)
861 {
862 	/*
863 	 * The section that defines the loosest possible
864 	 * field name is "3.6.8 Optional fields".
865 	 *
866 	 * optional-field = field-name ":" unstructured CRLF
867 	 * field-name = 1*ftext
868 	 * ftext = %d33-57 / %59-126
869 	 */
870 	int ch;
871 	char *cp = line->buf;
872 
873 	/* Count mbox From headers as headers */
874 	if (starts_with(cp, "From ") || starts_with(cp, ">From "))
875 		return 1;
876 
877 	while ((ch = *cp++)) {
878 		if (ch == ':')
879 			return 1;
880 		if ((33 <= ch && ch <= 57) ||
881 		    (59 <= ch && ch <= 126))
882 			continue;
883 		break;
884 	}
885 	return 0;
886 }
887 
read_one_header_line(struct strbuf * line,FILE * in)888 static int read_one_header_line(struct strbuf *line, FILE *in)
889 {
890 	struct strbuf continuation = STRBUF_INIT;
891 
892 	/* Get the first part of the line. */
893 	if (strbuf_getline_lf(line, in))
894 		return 0;
895 
896 	/*
897 	 * Is it an empty line or not a valid rfc2822 header?
898 	 * If so, stop here, and return false ("not a header")
899 	 */
900 	strbuf_rtrim(line);
901 	if (!line->len || !is_rfc2822_header(line)) {
902 		/* Re-add the newline */
903 		strbuf_addch(line, '\n');
904 		return 0;
905 	}
906 
907 	/*
908 	 * Now we need to eat all the continuation lines..
909 	 * Yuck, 2822 header "folding"
910 	 */
911 	for (;;) {
912 		int peek;
913 
914 		peek = fgetc(in);
915 		if (peek == EOF)
916 			break;
917 		ungetc(peek, in);
918 		if (peek != ' ' && peek != '\t')
919 			break;
920 		if (strbuf_getline_lf(&continuation, in))
921 			break;
922 		continuation.buf[0] = ' ';
923 		strbuf_rtrim(&continuation);
924 		strbuf_addbuf(line, &continuation);
925 	}
926 	strbuf_release(&continuation);
927 
928 	return 1;
929 }
930 
find_boundary(struct mailinfo * mi,struct strbuf * line)931 static int find_boundary(struct mailinfo *mi, struct strbuf *line)
932 {
933 	while (!strbuf_getline_lf(line, mi->input)) {
934 		if (*(mi->content_top) && is_multipart_boundary(mi, line))
935 			return 1;
936 	}
937 	return 0;
938 }
939 
handle_boundary(struct mailinfo * mi,struct strbuf * line)940 static int handle_boundary(struct mailinfo *mi, struct strbuf *line)
941 {
942 	struct strbuf newline = STRBUF_INIT;
943 
944 	strbuf_addch(&newline, '\n');
945 again:
946 	if (line->len >= (*(mi->content_top))->len + 2 &&
947 	    !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) {
948 		/* we hit an end boundary */
949 		/* pop the current boundary off the stack */
950 		strbuf_release(*(mi->content_top));
951 		FREE_AND_NULL(*(mi->content_top));
952 
953 		/* technically won't happen as is_multipart_boundary()
954 		   will fail first.  But just in case..
955 		 */
956 		if (--mi->content_top < mi->content) {
957 			error("Detected mismatched boundaries, can't recover");
958 			mi->input_error = -1;
959 			mi->content_top = mi->content;
960 			strbuf_release(&newline);
961 			return 0;
962 		}
963 		handle_filter(mi, &newline);
964 		strbuf_release(&newline);
965 		if (mi->input_error)
966 			return 0;
967 
968 		/* skip to the next boundary */
969 		if (!find_boundary(mi, line))
970 			return 0;
971 		goto again;
972 	}
973 
974 	/* set some defaults */
975 	mi->transfer_encoding = TE_DONTCARE;
976 	strbuf_reset(&mi->charset);
977 
978 	/* slurp in this section's info */
979 	while (read_one_header_line(line, mi->input))
980 		check_header(mi, line, mi->p_hdr_data, 0);
981 
982 	strbuf_release(&newline);
983 	/* replenish line */
984 	if (strbuf_getline_lf(line, mi->input))
985 		return 0;
986 	strbuf_addch(line, '\n');
987 	return 1;
988 }
989 
handle_filter_flowed(struct mailinfo * mi,struct strbuf * line,struct strbuf * prev)990 static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
991 				 struct strbuf *prev)
992 {
993 	size_t len = line->len;
994 	const char *rest;
995 
996 	if (!mi->format_flowed) {
997 		if (len >= 2 &&
998 		    line->buf[len - 2] == '\r' &&
999 		    line->buf[len - 1] == '\n') {
1000 			mi->have_quoted_cr = 1;
1001 			if (mi->quoted_cr == quoted_cr_strip) {
1002 				strbuf_setlen(line, len - 2);
1003 				strbuf_addch(line, '\n');
1004 				len--;
1005 			}
1006 		}
1007 		handle_filter(mi, line);
1008 		return;
1009 	}
1010 
1011 	if (line->buf[len - 1] == '\n') {
1012 		len--;
1013 		if (len && line->buf[len - 1] == '\r')
1014 			len--;
1015 	}
1016 
1017 	/* Keep signature separator as-is. */
1018 	if (skip_prefix(line->buf, "-- ", &rest) && rest - line->buf == len) {
1019 		if (prev->len) {
1020 			handle_filter(mi, prev);
1021 			strbuf_reset(prev);
1022 		}
1023 		handle_filter(mi, line);
1024 		return;
1025 	}
1026 
1027 	/* Unstuff space-stuffed line. */
1028 	if (len && line->buf[0] == ' ') {
1029 		strbuf_remove(line, 0, 1);
1030 		len--;
1031 	}
1032 
1033 	/* Save flowed line for later, but without the soft line break. */
1034 	if (len && line->buf[len - 1] == ' ') {
1035 		strbuf_add(prev, line->buf, len - !!mi->delsp);
1036 		return;
1037 	}
1038 
1039 	/* Prepend any previous partial lines */
1040 	strbuf_insert(line, 0, prev->buf, prev->len);
1041 	strbuf_reset(prev);
1042 
1043 	handle_filter(mi, line);
1044 }
1045 
summarize_quoted_cr(struct mailinfo * mi)1046 static void summarize_quoted_cr(struct mailinfo *mi)
1047 {
1048 	if (mi->have_quoted_cr &&
1049 	    mi->quoted_cr == quoted_cr_warn)
1050 		warning(_("quoted CRLF detected"));
1051 }
1052 
handle_body(struct mailinfo * mi,struct strbuf * line)1053 static void handle_body(struct mailinfo *mi, struct strbuf *line)
1054 {
1055 	struct strbuf prev = STRBUF_INIT;
1056 
1057 	/* Skip up to the first boundary */
1058 	if (*(mi->content_top)) {
1059 		if (!find_boundary(mi, line))
1060 			goto handle_body_out;
1061 	}
1062 
1063 	do {
1064 		/* process any boundary lines */
1065 		if (*(mi->content_top) && is_multipart_boundary(mi, line)) {
1066 			/* flush any leftover */
1067 			if (prev.len) {
1068 				handle_filter(mi, &prev);
1069 				strbuf_reset(&prev);
1070 			}
1071 			summarize_quoted_cr(mi);
1072 			mi->have_quoted_cr = 0;
1073 			if (!handle_boundary(mi, line))
1074 				goto handle_body_out;
1075 		}
1076 
1077 		/* Unwrap transfer encoding */
1078 		decode_transfer_encoding(mi, line);
1079 
1080 		switch (mi->transfer_encoding) {
1081 		case TE_BASE64:
1082 		case TE_QP:
1083 		{
1084 			struct strbuf **lines, **it, *sb;
1085 
1086 			/* Prepend any previous partial lines */
1087 			strbuf_insert(line, 0, prev.buf, prev.len);
1088 			strbuf_reset(&prev);
1089 
1090 			/*
1091 			 * This is a decoded line that may contain
1092 			 * multiple new lines.  Pass only one chunk
1093 			 * at a time to handle_filter()
1094 			 */
1095 			lines = strbuf_split(line, '\n');
1096 			for (it = lines; (sb = *it); it++) {
1097 				if (*(it + 1) == NULL) /* The last line */
1098 					if (sb->buf[sb->len - 1] != '\n') {
1099 						/* Partial line, save it for later. */
1100 						strbuf_addbuf(&prev, sb);
1101 						break;
1102 					}
1103 				handle_filter_flowed(mi, sb, &prev);
1104 			}
1105 			/*
1106 			 * The partial chunk is saved in "prev" and will be
1107 			 * appended by the next iteration of read_line_with_nul().
1108 			 */
1109 			strbuf_list_free(lines);
1110 			break;
1111 		}
1112 		default:
1113 			handle_filter_flowed(mi, line, &prev);
1114 		}
1115 
1116 		if (mi->input_error)
1117 			break;
1118 	} while (!strbuf_getwholeline(line, mi->input, '\n'));
1119 
1120 	if (prev.len)
1121 		handle_filter(mi, &prev);
1122 	summarize_quoted_cr(mi);
1123 
1124 	flush_inbody_header_accum(mi);
1125 
1126 handle_body_out:
1127 	strbuf_release(&prev);
1128 }
1129 
output_header_lines(FILE * fout,const char * hdr,const struct strbuf * data)1130 static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data)
1131 {
1132 	const char *sp = data->buf;
1133 	while (1) {
1134 		char *ep = strchr(sp, '\n');
1135 		int len;
1136 		if (!ep)
1137 			len = strlen(sp);
1138 		else
1139 			len = ep - sp;
1140 		fprintf(fout, "%s: %.*s\n", hdr, len, sp);
1141 		if (!ep)
1142 			break;
1143 		sp = ep + 1;
1144 	}
1145 }
1146 
handle_info(struct mailinfo * mi)1147 static void handle_info(struct mailinfo *mi)
1148 {
1149 	struct strbuf *hdr;
1150 	int i;
1151 
1152 	for (i = 0; header[i]; i++) {
1153 		/* only print inbody headers if we output a patch file */
1154 		if (mi->patch_lines && mi->s_hdr_data[i])
1155 			hdr = mi->s_hdr_data[i];
1156 		else if (mi->p_hdr_data[i])
1157 			hdr = mi->p_hdr_data[i];
1158 		else
1159 			continue;
1160 
1161 		if (memchr(hdr->buf, '\0', hdr->len)) {
1162 			error("a NUL byte in '%s' is not allowed.", header[i]);
1163 			mi->input_error = -1;
1164 		}
1165 
1166 		if (!strcmp(header[i], "Subject")) {
1167 			if (!mi->keep_subject) {
1168 				cleanup_subject(mi, hdr);
1169 				cleanup_space(hdr);
1170 			}
1171 			output_header_lines(mi->output, "Subject", hdr);
1172 		} else if (!strcmp(header[i], "From")) {
1173 			cleanup_space(hdr);
1174 			handle_from(mi, hdr);
1175 			fprintf(mi->output, "Author: %s\n", mi->name.buf);
1176 			fprintf(mi->output, "Email: %s\n", mi->email.buf);
1177 		} else {
1178 			cleanup_space(hdr);
1179 			fprintf(mi->output, "%s: %s\n", header[i], hdr->buf);
1180 		}
1181 	}
1182 	fprintf(mi->output, "\n");
1183 }
1184 
mailinfo(struct mailinfo * mi,const char * msg,const char * patch)1185 int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
1186 {
1187 	FILE *cmitmsg;
1188 	int peek;
1189 	struct strbuf line = STRBUF_INIT;
1190 
1191 	cmitmsg = fopen(msg, "w");
1192 	if (!cmitmsg) {
1193 		perror(msg);
1194 		return -1;
1195 	}
1196 	mi->patchfile = fopen(patch, "w");
1197 	if (!mi->patchfile) {
1198 		perror(patch);
1199 		fclose(cmitmsg);
1200 		return -1;
1201 	}
1202 
1203 	mi->p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->p_hdr_data)));
1204 	mi->s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->s_hdr_data)));
1205 
1206 	do {
1207 		peek = fgetc(mi->input);
1208 		if (peek == EOF) {
1209 			fclose(cmitmsg);
1210 			return error("empty patch: '%s'", patch);
1211 		}
1212 	} while (isspace(peek));
1213 	ungetc(peek, mi->input);
1214 
1215 	/* process the email header */
1216 	while (read_one_header_line(&line, mi->input))
1217 		check_header(mi, &line, mi->p_hdr_data, 1);
1218 
1219 	handle_body(mi, &line);
1220 	fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg);
1221 	fclose(cmitmsg);
1222 	fclose(mi->patchfile);
1223 
1224 	handle_info(mi);
1225 	strbuf_release(&line);
1226 	return mi->input_error;
1227 }
1228 
mailinfo_parse_quoted_cr_action(const char * actionstr,int * action)1229 int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action)
1230 {
1231 	if (!strcmp(actionstr, "nowarn"))
1232 		*action = quoted_cr_nowarn;
1233 	else if (!strcmp(actionstr, "warn"))
1234 		*action = quoted_cr_warn;
1235 	else if (!strcmp(actionstr, "strip"))
1236 		*action = quoted_cr_strip;
1237 	else
1238 		return -1;
1239 	return 0;
1240 }
1241 
git_mailinfo_config(const char * var,const char * value,void * mi_)1242 static int git_mailinfo_config(const char *var, const char *value, void *mi_)
1243 {
1244 	struct mailinfo *mi = mi_;
1245 
1246 	if (!starts_with(var, "mailinfo."))
1247 		return git_default_config(var, value, NULL);
1248 	if (!strcmp(var, "mailinfo.scissors")) {
1249 		mi->use_scissors = git_config_bool(var, value);
1250 		return 0;
1251 	}
1252 	if (!strcmp(var, "mailinfo.quotedcr")) {
1253 		if (mailinfo_parse_quoted_cr_action(value, &mi->quoted_cr) != 0)
1254 			return error(_("bad action '%s' for '%s'"), value, var);
1255 		return 0;
1256 	}
1257 	/* perhaps others here */
1258 	return 0;
1259 }
1260 
setup_mailinfo(struct mailinfo * mi)1261 void setup_mailinfo(struct mailinfo *mi)
1262 {
1263 	memset(mi, 0, sizeof(*mi));
1264 	strbuf_init(&mi->name, 0);
1265 	strbuf_init(&mi->email, 0);
1266 	strbuf_init(&mi->charset, 0);
1267 	strbuf_init(&mi->log_message, 0);
1268 	strbuf_init(&mi->inbody_header_accum, 0);
1269 	mi->quoted_cr = quoted_cr_warn;
1270 	mi->header_stage = 1;
1271 	mi->use_inbody_headers = 1;
1272 	mi->content_top = mi->content;
1273 	git_config(git_mailinfo_config, mi);
1274 }
1275 
clear_mailinfo(struct mailinfo * mi)1276 void clear_mailinfo(struct mailinfo *mi)
1277 {
1278 	strbuf_release(&mi->name);
1279 	strbuf_release(&mi->email);
1280 	strbuf_release(&mi->charset);
1281 	strbuf_release(&mi->inbody_header_accum);
1282 	free(mi->message_id);
1283 
1284 	strbuf_list_free(mi->p_hdr_data);
1285 	strbuf_list_free(mi->s_hdr_data);
1286 
1287 	while (mi->content < mi->content_top) {
1288 		free(*(mi->content_top));
1289 		mi->content_top--;
1290 	}
1291 
1292 	strbuf_release(&mi->log_message);
1293 }
1294