1 #include "cache.h"
2 #include "config.h"
3 #include "utf8.h"
4 #include "strbuf.h"
5 #include "mailinfo.h"
6 
cleanup_space(struct strbuf * sb)7 static void cleanup_space(struct strbuf *sb)
8 {
9 	size_t pos, cnt;
10 	for (pos = 0; pos < sb->len; pos++) {
11 		if (isspace(sb->buf[pos])) {
12 			sb->buf[pos] = ' ';
13 			for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++);
14 			strbuf_remove(sb, pos + 1, cnt);
15 		}
16 	}
17 }
18 
get_sane_name(struct strbuf * out,struct strbuf * name,struct strbuf * email)19 static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email)
20 {
21 	struct strbuf *src = name;
22 	if (name->len < 3 || 60 < name->len || strchr(name->buf, '@') ||
23 		strchr(name->buf, '<') || strchr(name->buf, '>'))
24 		src = email;
25 	else if (name == out)
26 		return;
27 	strbuf_reset(out);
28 	strbuf_addbuf(out, src);
29 }
30 
parse_bogus_from(struct mailinfo * mi,const struct strbuf * line)31 static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line)
32 {
33 	/* John Doe <johndoe> */
34 
35 	char *bra, *ket;
36 	/* This is fallback, so do not bother if we already have an
37 	 * e-mail address.
38 	 */
39 	if (mi->email.len)
40 		return;
41 
42 	bra = strchr(line->buf, '<');
43 	if (!bra)
44 		return;
45 	ket = strchr(bra, '>');
46 	if (!ket)
47 		return;
48 
49 	strbuf_reset(&mi->email);
50 	strbuf_add(&mi->email, bra + 1, ket - bra - 1);
51 
52 	strbuf_reset(&mi->name);
53 	strbuf_add(&mi->name, line->buf, bra - line->buf);
54 	strbuf_trim(&mi->name);
55 	get_sane_name(&mi->name, &mi->name, &mi->email);
56 }
57 
unquote_comment(struct strbuf * outbuf,const char * in)58 static const char *unquote_comment(struct strbuf *outbuf, const char *in)
59 {
60 	int c;
61 	int take_next_literally = 0;
62 
63 	strbuf_addch(outbuf, '(');
64 
65 	while ((c = *in++) != 0) {
66 		if (take_next_literally == 1) {
67 			take_next_literally = 0;
68 		} else {
69 			switch (c) {
70 			case '\\':
71 				take_next_literally = 1;
72 				continue;
73 			case '(':
74 				in = unquote_comment(outbuf, in);
75 				continue;
76 			case ')':
77 				strbuf_addch(outbuf, ')');
78 				return in;
79 			}
80 		}
81 
82 		strbuf_addch(outbuf, c);
83 	}
84 
85 	return in;
86 }
87 
unquote_quoted_string(struct strbuf * outbuf,const char * in)88 static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in)
89 {
90 	int c;
91 	int take_next_literally = 0;
92 
93 	while ((c = *in++) != 0) {
94 		if (take_next_literally == 1) {
95 			take_next_literally = 0;
96 		} else {
97 			switch (c) {
98 			case '\\':
99 				take_next_literally = 1;
100 				continue;
101 			case '"':
102 				return in;
103 			}
104 		}
105 
106 		strbuf_addch(outbuf, c);
107 	}
108 
109 	return in;
110 }
111 
unquote_quoted_pair(struct strbuf * line)112 static void unquote_quoted_pair(struct strbuf *line)
113 {
114 	struct strbuf outbuf;
115 	const char *in = line->buf;
116 	int c;
117 
118 	strbuf_init(&outbuf, line->len);
119 
120 	while ((c = *in++) != 0) {
121 		switch (c) {
122 		case '"':
123 			in = unquote_quoted_string(&outbuf, in);
124 			continue;
125 		case '(':
126 			in = unquote_comment(&outbuf, in);
127 			continue;
128 		}
129 
130 		strbuf_addch(&outbuf, c);
131 	}
132 
133 	strbuf_swap(&outbuf, line);
134 	strbuf_release(&outbuf);
135 
136 }
137 
handle_from(struct mailinfo * mi,const struct strbuf * from)138 static void handle_from(struct mailinfo *mi, const struct strbuf *from)
139 {
140 	char *at;
141 	size_t el;
142 	struct strbuf f;
143 
144 	strbuf_init(&f, from->len);
145 	strbuf_addbuf(&f, from);
146 
147 	unquote_quoted_pair(&f);
148 
149 	at = strchr(f.buf, '@');
150 	if (!at) {
151 		parse_bogus_from(mi, from);
152 		goto out;
153 	}
154 
155 	/*
156 	 * If we already have one email, don't take any confusing lines
157 	 */
158 	if (mi->email.len && strchr(at + 1, '@'))
159 		goto out;
160 
161 	/* Pick up the string around '@', possibly delimited with <>
162 	 * pair; that is the email part.
163 	 */
164 	while (at > f.buf) {
165 		char c = at[-1];
166 		if (isspace(c))
167 			break;
168 		if (c == '<') {
169 			at[-1] = ' ';
170 			break;
171 		}
172 		at--;
173 	}
174 	el = strcspn(at, " \n\t\r\v\f>");
175 	strbuf_reset(&mi->email);
176 	strbuf_add(&mi->email, at, el);
177 	strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0));
178 
179 	/* The remainder is name.  It could be
180 	 *
181 	 * - "John Doe <john.doe@xz>"			(a), or
182 	 * - "john.doe@xz (John Doe)"			(b), or
183 	 * - "John (zzz) Doe <john.doe@xz> (Comment)"	(c)
184 	 *
185 	 * but we have removed the email part, so
186 	 *
187 	 * - remove extra spaces which could stay after email (case 'c'), and
188 	 * - trim from both ends, possibly removing the () pair at the end
189 	 *   (cases 'a' and 'b').
190 	 */
191 	cleanup_space(&f);
192 	strbuf_trim(&f);
193 	if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') {
194 		strbuf_remove(&f, 0, 1);
195 		strbuf_setlen(&f, f.len - 1);
196 	}
197 
198 	get_sane_name(&mi->name, &f, &mi->email);
199 out:
200 	strbuf_release(&f);
201 }
202 
handle_header(struct strbuf ** out,const struct strbuf * line)203 static void handle_header(struct strbuf **out, const struct strbuf *line)
204 {
205 	if (!*out) {
206 		*out = xmalloc(sizeof(struct strbuf));
207 		strbuf_init(*out, line->len);
208 	} else
209 		strbuf_reset(*out);
210 
211 	strbuf_addbuf(*out, line);
212 }
213 
214 /* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
215  * to have enough heuristics to grok MIME encoded patches often found
216  * on our mailing lists.  For example, we do not even treat header lines
217  * case insensitively.
218  */
219 
slurp_attr(const char * line,const char * name,struct strbuf * attr)220 static int slurp_attr(const char *line, const char *name, struct strbuf *attr)
221 {
222 	const char *ends, *ap = strcasestr(line, name);
223 	size_t sz;
224 
225 	strbuf_setlen(attr, 0);
226 	if (!ap)
227 		return 0;
228 	ap += strlen(name);
229 	if (*ap == '"') {
230 		ap++;
231 		ends = "\"";
232 	}
233 	else
234 		ends = "; \t";
235 	sz = strcspn(ap, ends);
236 	strbuf_add(attr, ap, sz);
237 	return 1;
238 }
239 
has_attr_value(const char * line,const char * name,const char * value)240 static int has_attr_value(const char *line, const char *name, const char *value)
241 {
242 	struct strbuf sb = STRBUF_INIT;
243 	int rc = slurp_attr(line, name, &sb) && !strcasecmp(sb.buf, value);
244 	strbuf_release(&sb);
245 	return rc;
246 }
247 
handle_content_type(struct mailinfo * mi,struct strbuf * line)248 static void handle_content_type(struct mailinfo *mi, struct strbuf *line)
249 {
250 	struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
251 	strbuf_init(boundary, line->len);
252 
253 	mi->format_flowed = has_attr_value(line->buf, "format=", "flowed");
254 	mi->delsp = has_attr_value(line->buf, "delsp=", "yes");
255 
256 	if (slurp_attr(line->buf, "boundary=", boundary)) {
257 		strbuf_insert(boundary, 0, "--", 2);
258 		if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) {
259 			error("Too many boundaries to handle");
260 			mi->input_error = -1;
261 			mi->content_top = &mi->content[MAX_BOUNDARIES] - 1;
262 			return;
263 		}
264 		*(mi->content_top) = boundary;
265 		boundary = NULL;
266 	}
267 	slurp_attr(line->buf, "charset=", &mi->charset);
268 
269 	if (boundary) {
270 		strbuf_release(boundary);
271 		free(boundary);
272 	}
273 }
274 
handle_content_transfer_encoding(struct mailinfo * mi,const struct strbuf * line)275 static void handle_content_transfer_encoding(struct mailinfo *mi,
276 					     const struct strbuf *line)
277 {
278 	if (strcasestr(line->buf, "base64"))
279 		mi->transfer_encoding = TE_BASE64;
280 	else if (strcasestr(line->buf, "quoted-printable"))
281 		mi->transfer_encoding = TE_QP;
282 	else
283 		mi->transfer_encoding = TE_DONTCARE;
284 }
285 
is_multipart_boundary(struct mailinfo * mi,const struct strbuf * line)286 static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line)
287 {
288 	struct strbuf *content_top = *(mi->content_top);
289 
290 	return ((content_top->len <= line->len) &&
291 		!memcmp(line->buf, content_top->buf, content_top->len));
292 }
293 
cleanup_subject(struct mailinfo * mi,struct strbuf * subject)294 static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject)
295 {
296 	size_t at = 0;
297 
298 	while (at < subject->len) {
299 		char *pos;
300 		size_t remove;
301 
302 		switch (subject->buf[at]) {
303 		case 'r': case 'R':
304 			if (subject->len <= at + 3)
305 				break;
306 			if ((subject->buf[at + 1] == 'e' ||
307 			     subject->buf[at + 1] == 'E') &&
308 			    subject->buf[at + 2] == ':') {
309 				strbuf_remove(subject, at, 3);
310 				continue;
311 			}
312 			at++;
313 			break;
314 		case ' ': case '\t': case ':':
315 			strbuf_remove(subject, at, 1);
316 			continue;
317 		case '[':
318 			pos = strchr(subject->buf + at, ']');
319 			if (!pos)
320 				break;
321 			remove = pos - subject->buf + at + 1;
322 			if (!mi->keep_non_patch_brackets_in_subject ||
323 			    (7 <= remove &&
324 			     memmem(subject->buf + at, remove, "PATCH", 5)))
325 				strbuf_remove(subject, at, remove);
326 			else {
327 				at += remove;
328 				/*
329 				 * If the input had a space after the ], keep
330 				 * it.  We don't bother with finding the end of
331 				 * the space, since we later normalize it
332 				 * anyway.
333 				 */
334 				if (isspace(subject->buf[at]))
335 					at += 1;
336 			}
337 			continue;
338 		}
339 		break;
340 	}
341 	strbuf_trim(subject);
342 }
343 
344 #define MAX_HDR_PARSED 10
345 static const char *header[MAX_HDR_PARSED] = {
346 	"From","Subject","Date",
347 };
348 
cmp_header(const struct strbuf * line,const char * hdr)349 static inline int cmp_header(const struct strbuf *line, const char *hdr)
350 {
351 	int len = strlen(hdr);
352 	return !strncasecmp(line->buf, hdr, len) && line->len > len &&
353 			line->buf[len] == ':' && isspace(line->buf[len + 1]);
354 }
355 
is_format_patch_separator(const char * line,int len)356 static int is_format_patch_separator(const char *line, int len)
357 {
358 	static const char SAMPLE[] =
359 		"From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n";
360 	const char *cp;
361 
362 	if (len != strlen(SAMPLE))
363 		return 0;
364 	if (!skip_prefix(line, "From ", &cp))
365 		return 0;
366 	if (strspn(cp, "0123456789abcdef") != 40)
367 		return 0;
368 	cp += 40;
369 	return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line));
370 }
371 
decode_q_segment(const struct strbuf * q_seg,int rfc2047)372 static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047)
373 {
374 	const char *in = q_seg->buf;
375 	int c;
376 	struct strbuf *out = xmalloc(sizeof(struct strbuf));
377 	strbuf_init(out, q_seg->len);
378 
379 	while ((c = *in++) != 0) {
380 		if (c == '=') {
381 			int ch, d = *in;
382 			if (d == '\n' || !d)
383 				break; /* drop trailing newline */
384 			ch = hex2chr(in);
385 			if (ch >= 0) {
386 				strbuf_addch(out, ch);
387 				in += 2;
388 				continue;
389 			}
390 			/* garbage -- fall through */
391 		}
392 		if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
393 			c = 0x20;
394 		strbuf_addch(out, c);
395 	}
396 	return out;
397 }
398 
decode_b_segment(const struct strbuf * b_seg)399 static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
400 {
401 	/* Decode in..ep, possibly in-place to ot */
402 	int c, pos = 0, acc = 0;
403 	const char *in = b_seg->buf;
404 	struct strbuf *out = xmalloc(sizeof(struct strbuf));
405 	strbuf_init(out, b_seg->len);
406 
407 	while ((c = *in++) != 0) {
408 		if (c == '+')
409 			c = 62;
410 		else if (c == '/')
411 			c = 63;
412 		else if ('A' <= c && c <= 'Z')
413 			c -= 'A';
414 		else if ('a' <= c && c <= 'z')
415 			c -= 'a' - 26;
416 		else if ('0' <= c && c <= '9')
417 			c -= '0' - 52;
418 		else
419 			continue; /* garbage */
420 		switch (pos++) {
421 		case 0:
422 			acc = (c << 2);
423 			break;
424 		case 1:
425 			strbuf_addch(out, (acc | (c >> 4)));
426 			acc = (c & 15) << 4;
427 			break;
428 		case 2:
429 			strbuf_addch(out, (acc | (c >> 2)));
430 			acc = (c & 3) << 6;
431 			break;
432 		case 3:
433 			strbuf_addch(out, (acc | c));
434 			acc = pos = 0;
435 			break;
436 		}
437 	}
438 	return out;
439 }
440 
convert_to_utf8(struct mailinfo * mi,struct strbuf * line,const char * charset)441 static int convert_to_utf8(struct mailinfo *mi,
442 			   struct strbuf *line, const char *charset)
443 {
444 	char *out;
445 
446 	if (!mi->metainfo_charset || !charset || !*charset)
447 		return 0;
448 
449 	if (same_encoding(mi->metainfo_charset, charset))
450 		return 0;
451 	out = reencode_string(line->buf, mi->metainfo_charset, charset);
452 	if (!out) {
453 		mi->input_error = -1;
454 		return error("cannot convert from %s to %s",
455 			     charset, mi->metainfo_charset);
456 	}
457 	strbuf_attach(line, out, strlen(out), strlen(out));
458 	return 0;
459 }
460 
decode_header(struct mailinfo * mi,struct strbuf * it)461 static void decode_header(struct mailinfo *mi, struct strbuf *it)
462 {
463 	char *in, *ep, *cp;
464 	struct strbuf outbuf = STRBUF_INIT, *dec;
465 	struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT;
466 	int found_error = 1; /* pessimism */
467 
468 	in = it->buf;
469 	while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) {
470 		int encoding;
471 		strbuf_reset(&charset_q);
472 		strbuf_reset(&piecebuf);
473 
474 		if (in != ep) {
475 			/*
476 			 * We are about to process an encoded-word
477 			 * that begins at ep, but there is something
478 			 * before the encoded word.
479 			 */
480 			char *scan;
481 			for (scan = in; scan < ep; scan++)
482 				if (!isspace(*scan))
483 					break;
484 
485 			if (scan != ep || in == it->buf) {
486 				/*
487 				 * We should not lose that "something",
488 				 * unless we have just processed an
489 				 * encoded-word, and there is only LWS
490 				 * before the one we are about to process.
491 				 */
492 				strbuf_add(&outbuf, in, ep - in);
493 			}
494 		}
495 		/* E.g.
496 		 * ep : "=?iso-2022-jp?B?GyR...?= foo"
497 		 * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
498 		 */
499 		ep += 2;
500 
501 		if (ep - it->buf >= it->len || !(cp = strchr(ep, '?')))
502 			goto release_return;
503 
504 		if (cp + 3 - it->buf > it->len)
505 			goto release_return;
506 		strbuf_add(&charset_q, ep, cp - ep);
507 
508 		encoding = cp[1];
509 		if (!encoding || cp[2] != '?')
510 			goto release_return;
511 		ep = strstr(cp + 3, "?=");
512 		if (!ep)
513 			goto release_return;
514 		strbuf_add(&piecebuf, cp + 3, ep - cp - 3);
515 		switch (tolower(encoding)) {
516 		default:
517 			goto release_return;
518 		case 'b':
519 			dec = decode_b_segment(&piecebuf);
520 			break;
521 		case 'q':
522 			dec = decode_q_segment(&piecebuf, 1);
523 			break;
524 		}
525 		if (convert_to_utf8(mi, dec, charset_q.buf))
526 			goto release_return;
527 
528 		strbuf_addbuf(&outbuf, dec);
529 		strbuf_release(dec);
530 		free(dec);
531 		in = ep + 2;
532 	}
533 	strbuf_addstr(&outbuf, in);
534 	strbuf_reset(it);
535 	strbuf_addbuf(it, &outbuf);
536 	found_error = 0;
537 release_return:
538 	strbuf_release(&outbuf);
539 	strbuf_release(&charset_q);
540 	strbuf_release(&piecebuf);
541 
542 	if (found_error)
543 		mi->input_error = -1;
544 }
545 
check_header(struct mailinfo * mi,const struct strbuf * line,struct strbuf * hdr_data[],int overwrite)546 static int check_header(struct mailinfo *mi,
547 			const struct strbuf *line,
548 			struct strbuf *hdr_data[], int overwrite)
549 {
550 	int i, ret = 0, len;
551 	struct strbuf sb = STRBUF_INIT;
552 
553 	/* search for the interesting parts */
554 	for (i = 0; header[i]; i++) {
555 		int len = strlen(header[i]);
556 		if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) {
557 			/* Unwrap inline B and Q encoding, and optionally
558 			 * normalize the meta information to utf8.
559 			 */
560 			strbuf_add(&sb, line->buf + len + 2, line->len - len - 2);
561 			decode_header(mi, &sb);
562 			handle_header(&hdr_data[i], &sb);
563 			ret = 1;
564 			goto check_header_out;
565 		}
566 	}
567 
568 	/* Content stuff */
569 	if (cmp_header(line, "Content-Type")) {
570 		len = strlen("Content-Type: ");
571 		strbuf_add(&sb, line->buf + len, line->len - len);
572 		decode_header(mi, &sb);
573 		strbuf_insert(&sb, 0, "Content-Type: ", len);
574 		handle_content_type(mi, &sb);
575 		ret = 1;
576 		goto check_header_out;
577 	}
578 	if (cmp_header(line, "Content-Transfer-Encoding")) {
579 		len = strlen("Content-Transfer-Encoding: ");
580 		strbuf_add(&sb, line->buf + len, line->len - len);
581 		decode_header(mi, &sb);
582 		handle_content_transfer_encoding(mi, &sb);
583 		ret = 1;
584 		goto check_header_out;
585 	}
586 	if (cmp_header(line, "Message-Id")) {
587 		len = strlen("Message-Id: ");
588 		strbuf_add(&sb, line->buf + len, line->len - len);
589 		decode_header(mi, &sb);
590 		if (mi->add_message_id)
591 			mi->message_id = strbuf_detach(&sb, NULL);
592 		ret = 1;
593 		goto check_header_out;
594 	}
595 
596 check_header_out:
597 	strbuf_release(&sb);
598 	return ret;
599 }
600 
601 /*
602  * Returns 1 if the given line or any line beginning with the given line is an
603  * in-body header (that is, check_header will succeed when passed
604  * mi->s_hdr_data).
605  */
is_inbody_header(const struct mailinfo * mi,const struct strbuf * line)606 static int is_inbody_header(const struct mailinfo *mi,
607 			    const struct strbuf *line)
608 {
609 	int i;
610 	for (i = 0; header[i]; i++)
611 		if (!mi->s_hdr_data[i] && cmp_header(line, header[i]))
612 			return 1;
613 	return 0;
614 }
615 
decode_transfer_encoding(struct mailinfo * mi,struct strbuf * line)616 static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line)
617 {
618 	struct strbuf *ret;
619 
620 	switch (mi->transfer_encoding) {
621 	case TE_QP:
622 		ret = decode_q_segment(line, 0);
623 		break;
624 	case TE_BASE64:
625 		ret = decode_b_segment(line);
626 		break;
627 	case TE_DONTCARE:
628 	default:
629 		return;
630 	}
631 	strbuf_reset(line);
632 	strbuf_addbuf(line, ret);
633 	strbuf_release(ret);
634 	free(ret);
635 }
636 
patchbreak(const struct strbuf * line)637 static inline int patchbreak(const struct strbuf *line)
638 {
639 	size_t i;
640 
641 	/* Beginning of a "diff -" header? */
642 	if (starts_with(line->buf, "diff -"))
643 		return 1;
644 
645 	/* CVS "Index: " line? */
646 	if (starts_with(line->buf, "Index: "))
647 		return 1;
648 
649 	/*
650 	 * "--- <filename>" starts patches without headers
651 	 * "---<sp>*" is a manual separator
652 	 */
653 	if (line->len < 4)
654 		return 0;
655 
656 	if (starts_with(line->buf, "---")) {
657 		/* space followed by a filename? */
658 		if (line->buf[3] == ' ' && !isspace(line->buf[4]))
659 			return 1;
660 		/* Just whitespace? */
661 		for (i = 3; i < line->len; i++) {
662 			unsigned char c = line->buf[i];
663 			if (c == '\n')
664 				return 1;
665 			if (!isspace(c))
666 				break;
667 		}
668 		return 0;
669 	}
670 	return 0;
671 }
672 
is_scissors_line(const char * line)673 static int is_scissors_line(const char *line)
674 {
675 	const char *c;
676 	int scissors = 0, gap = 0;
677 	const char *first_nonblank = NULL, *last_nonblank = NULL;
678 	int visible, perforation = 0, in_perforation = 0;
679 
680 	for (c = line; *c; c++) {
681 		if (isspace(*c)) {
682 			if (in_perforation) {
683 				perforation++;
684 				gap++;
685 			}
686 			continue;
687 		}
688 		last_nonblank = c;
689 		if (first_nonblank == NULL)
690 			first_nonblank = c;
691 		if (*c == '-') {
692 			in_perforation = 1;
693 			perforation++;
694 			continue;
695 		}
696 		if ((!memcmp(c, ">8", 2) || !memcmp(c, "8<", 2) ||
697 		     !memcmp(c, ">%", 2) || !memcmp(c, "%<", 2))) {
698 			in_perforation = 1;
699 			perforation += 2;
700 			scissors += 2;
701 			c++;
702 			continue;
703 		}
704 		in_perforation = 0;
705 	}
706 
707 	/*
708 	 * The mark must be at least 8 bytes long (e.g. "-- >8 --").
709 	 * Even though there can be arbitrary cruft on the same line
710 	 * (e.g. "cut here"), in order to avoid misidentification, the
711 	 * perforation must occupy more than a third of the visible
712 	 * width of the line, and dashes and scissors must occupy more
713 	 * than half of the perforation.
714 	 */
715 
716 	if (first_nonblank && last_nonblank)
717 		visible = last_nonblank - first_nonblank + 1;
718 	else
719 		visible = 0;
720 	return (scissors && 8 <= visible &&
721 		visible < perforation * 3 &&
722 		gap * 2 < perforation);
723 }
724 
flush_inbody_header_accum(struct mailinfo * mi)725 static void flush_inbody_header_accum(struct mailinfo *mi)
726 {
727 	if (!mi->inbody_header_accum.len)
728 		return;
729 	if (!check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0))
730 		BUG("inbody_header_accum, if not empty, must always contain a valid in-body header");
731 	strbuf_reset(&mi->inbody_header_accum);
732 }
733 
check_inbody_header(struct mailinfo * mi,const struct strbuf * line)734 static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line)
735 {
736 	if (mi->inbody_header_accum.len &&
737 	    (line->buf[0] == ' ' || line->buf[0] == '\t')) {
738 		if (mi->use_scissors && is_scissors_line(line->buf)) {
739 			/*
740 			 * This is a scissors line; do not consider this line
741 			 * as a header continuation line.
742 			 */
743 			flush_inbody_header_accum(mi);
744 			return 0;
745 		}
746 		strbuf_strip_suffix(&mi->inbody_header_accum, "\n");
747 		strbuf_addbuf(&mi->inbody_header_accum, line);
748 		return 1;
749 	}
750 
751 	flush_inbody_header_accum(mi);
752 
753 	if (starts_with(line->buf, ">From") && isspace(line->buf[5]))
754 		return is_format_patch_separator(line->buf + 1, line->len - 1);
755 	if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
756 		int i;
757 		for (i = 0; header[i]; i++)
758 			if (!strcmp("Subject", header[i])) {
759 				handle_header(&mi->s_hdr_data[i], line);
760 				return 1;
761 			}
762 		return 0;
763 	}
764 	if (is_inbody_header(mi, line)) {
765 		strbuf_addbuf(&mi->inbody_header_accum, line);
766 		return 1;
767 	}
768 	return 0;
769 }
770 
handle_commit_msg(struct mailinfo * mi,struct strbuf * line)771 static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
772 {
773 	assert(!mi->filter_stage);
774 
775 	if (mi->header_stage) {
776 		if (!line->len || (line->len == 1 && line->buf[0] == '\n')) {
777 			if (mi->inbody_header_accum.len) {
778 				flush_inbody_header_accum(mi);
779 				mi->header_stage = 0;
780 			}
781 			return 0;
782 		}
783 	}
784 
785 	if (mi->use_inbody_headers && mi->header_stage) {
786 		mi->header_stage = check_inbody_header(mi, line);
787 		if (mi->header_stage)
788 			return 0;
789 	} else
790 		/* Only trim the first (blank) line of the commit message
791 		 * when ignoring in-body headers.
792 		 */
793 		mi->header_stage = 0;
794 
795 	/* normalize the log message to UTF-8. */
796 	if (convert_to_utf8(mi, line, mi->charset.buf))
797 		return 0; /* mi->input_error already set */
798 
799 	if (mi->use_scissors && is_scissors_line(line->buf)) {
800 		int i;
801 
802 		strbuf_setlen(&mi->log_message, 0);
803 		mi->header_stage = 1;
804 
805 		/*
806 		 * We may have already read "secondary headers"; purge
807 		 * them to give ourselves a clean restart.
808 		 */
809 		for (i = 0; header[i]; i++) {
810 			if (mi->s_hdr_data[i])
811 				strbuf_release(mi->s_hdr_data[i]);
812 			mi->s_hdr_data[i] = NULL;
813 		}
814 		return 0;
815 	}
816 
817 	if (patchbreak(line)) {
818 		if (mi->message_id)
819 			strbuf_addf(&mi->log_message,
820 				    "Message-Id: %s\n", mi->message_id);
821 		return 1;
822 	}
823 
824 	strbuf_addbuf(&mi->log_message, line);
825 	return 0;
826 }
827 
handle_patch(struct mailinfo * mi,const struct strbuf * line)828 static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
829 {
830 	fwrite(line->buf, 1, line->len, mi->patchfile);
831 	mi->patch_lines++;
832 }
833 
handle_filter(struct mailinfo * mi,struct strbuf * line)834 static void handle_filter(struct mailinfo *mi, struct strbuf *line)
835 {
836 	switch (mi->filter_stage) {
837 	case 0:
838 		if (!handle_commit_msg(mi, line))
839 			break;
840 		mi->filter_stage++;
841 		/* fallthrough */
842 	case 1:
843 		handle_patch(mi, line);
844 		break;
845 	}
846 }
847 
is_rfc2822_header(const struct strbuf * line)848 static int is_rfc2822_header(const struct strbuf *line)
849 {
850 	/*
851 	 * The section that defines the loosest possible
852 	 * field name is "3.6.8 Optional fields".
853 	 *
854 	 * optional-field = field-name ":" unstructured CRLF
855 	 * field-name = 1*ftext
856 	 * ftext = %d33-57 / %59-126
857 	 */
858 	int ch;
859 	char *cp = line->buf;
860 
861 	/* Count mbox From headers as headers */
862 	if (starts_with(cp, "From ") || starts_with(cp, ">From "))
863 		return 1;
864 
865 	while ((ch = *cp++)) {
866 		if (ch == ':')
867 			return 1;
868 		if ((33 <= ch && ch <= 57) ||
869 		    (59 <= ch && ch <= 126))
870 			continue;
871 		break;
872 	}
873 	return 0;
874 }
875 
read_one_header_line(struct strbuf * line,FILE * in)876 static int read_one_header_line(struct strbuf *line, FILE *in)
877 {
878 	struct strbuf continuation = STRBUF_INIT;
879 
880 	/* Get the first part of the line. */
881 	if (strbuf_getline_lf(line, in))
882 		return 0;
883 
884 	/*
885 	 * Is it an empty line or not a valid rfc2822 header?
886 	 * If so, stop here, and return false ("not a header")
887 	 */
888 	strbuf_rtrim(line);
889 	if (!line->len || !is_rfc2822_header(line)) {
890 		/* Re-add the newline */
891 		strbuf_addch(line, '\n');
892 		return 0;
893 	}
894 
895 	/*
896 	 * Now we need to eat all the continuation lines..
897 	 * Yuck, 2822 header "folding"
898 	 */
899 	for (;;) {
900 		int peek;
901 
902 		peek = fgetc(in);
903 		if (peek == EOF)
904 			break;
905 		ungetc(peek, in);
906 		if (peek != ' ' && peek != '\t')
907 			break;
908 		if (strbuf_getline_lf(&continuation, in))
909 			break;
910 		continuation.buf[0] = ' ';
911 		strbuf_rtrim(&continuation);
912 		strbuf_addbuf(line, &continuation);
913 	}
914 	strbuf_release(&continuation);
915 
916 	return 1;
917 }
918 
find_boundary(struct mailinfo * mi,struct strbuf * line)919 static int find_boundary(struct mailinfo *mi, struct strbuf *line)
920 {
921 	while (!strbuf_getline_lf(line, mi->input)) {
922 		if (*(mi->content_top) && is_multipart_boundary(mi, line))
923 			return 1;
924 	}
925 	return 0;
926 }
927 
handle_boundary(struct mailinfo * mi,struct strbuf * line)928 static int handle_boundary(struct mailinfo *mi, struct strbuf *line)
929 {
930 	struct strbuf newline = STRBUF_INIT;
931 
932 	strbuf_addch(&newline, '\n');
933 again:
934 	if (line->len >= (*(mi->content_top))->len + 2 &&
935 	    !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) {
936 		/* we hit an end boundary */
937 		/* pop the current boundary off the stack */
938 		strbuf_release(*(mi->content_top));
939 		FREE_AND_NULL(*(mi->content_top));
940 
941 		/* technically won't happen as is_multipart_boundary()
942 		   will fail first.  But just in case..
943 		 */
944 		if (--mi->content_top < mi->content) {
945 			error("Detected mismatched boundaries, can't recover");
946 			mi->input_error = -1;
947 			mi->content_top = mi->content;
948 			strbuf_release(&newline);
949 			return 0;
950 		}
951 		handle_filter(mi, &newline);
952 		strbuf_release(&newline);
953 		if (mi->input_error)
954 			return 0;
955 
956 		/* skip to the next boundary */
957 		if (!find_boundary(mi, line))
958 			return 0;
959 		goto again;
960 	}
961 
962 	/* set some defaults */
963 	mi->transfer_encoding = TE_DONTCARE;
964 	strbuf_reset(&mi->charset);
965 
966 	/* slurp in this section's info */
967 	while (read_one_header_line(line, mi->input))
968 		check_header(mi, line, mi->p_hdr_data, 0);
969 
970 	strbuf_release(&newline);
971 	/* replenish line */
972 	if (strbuf_getline_lf(line, mi->input))
973 		return 0;
974 	strbuf_addch(line, '\n');
975 	return 1;
976 }
977 
handle_filter_flowed(struct mailinfo * mi,struct strbuf * line,struct strbuf * prev)978 static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
979 				 struct strbuf *prev)
980 {
981 	size_t len = line->len;
982 	const char *rest;
983 
984 	if (!mi->format_flowed) {
985 		handle_filter(mi, line);
986 		return;
987 	}
988 
989 	if (line->buf[len - 1] == '\n') {
990 		len--;
991 		if (len && line->buf[len - 1] == '\r')
992 			len--;
993 	}
994 
995 	/* Keep signature separator as-is. */
996 	if (skip_prefix(line->buf, "-- ", &rest) && rest - line->buf == len) {
997 		if (prev->len) {
998 			handle_filter(mi, prev);
999 			strbuf_reset(prev);
1000 		}
1001 		handle_filter(mi, line);
1002 		return;
1003 	}
1004 
1005 	/* Unstuff space-stuffed line. */
1006 	if (len && line->buf[0] == ' ') {
1007 		strbuf_remove(line, 0, 1);
1008 		len--;
1009 	}
1010 
1011 	/* Save flowed line for later, but without the soft line break. */
1012 	if (len && line->buf[len - 1] == ' ') {
1013 		strbuf_add(prev, line->buf, len - !!mi->delsp);
1014 		return;
1015 	}
1016 
1017 	/* Prepend any previous partial lines */
1018 	strbuf_insert(line, 0, prev->buf, prev->len);
1019 	strbuf_reset(prev);
1020 
1021 	handle_filter(mi, line);
1022 }
1023 
handle_body(struct mailinfo * mi,struct strbuf * line)1024 static void handle_body(struct mailinfo *mi, struct strbuf *line)
1025 {
1026 	struct strbuf prev = STRBUF_INIT;
1027 
1028 	/* Skip up to the first boundary */
1029 	if (*(mi->content_top)) {
1030 		if (!find_boundary(mi, line))
1031 			goto handle_body_out;
1032 	}
1033 
1034 	do {
1035 		/* process any boundary lines */
1036 		if (*(mi->content_top) && is_multipart_boundary(mi, line)) {
1037 			/* flush any leftover */
1038 			if (prev.len) {
1039 				handle_filter(mi, &prev);
1040 				strbuf_reset(&prev);
1041 			}
1042 			if (!handle_boundary(mi, line))
1043 				goto handle_body_out;
1044 		}
1045 
1046 		/* Unwrap transfer encoding */
1047 		decode_transfer_encoding(mi, line);
1048 
1049 		switch (mi->transfer_encoding) {
1050 		case TE_BASE64:
1051 		case TE_QP:
1052 		{
1053 			struct strbuf **lines, **it, *sb;
1054 
1055 			/* Prepend any previous partial lines */
1056 			strbuf_insert(line, 0, prev.buf, prev.len);
1057 			strbuf_reset(&prev);
1058 
1059 			/*
1060 			 * This is a decoded line that may contain
1061 			 * multiple new lines.  Pass only one chunk
1062 			 * at a time to handle_filter()
1063 			 */
1064 			lines = strbuf_split(line, '\n');
1065 			for (it = lines; (sb = *it); it++) {
1066 				if (*(it + 1) == NULL) /* The last line */
1067 					if (sb->buf[sb->len - 1] != '\n') {
1068 						/* Partial line, save it for later. */
1069 						strbuf_addbuf(&prev, sb);
1070 						break;
1071 					}
1072 				handle_filter_flowed(mi, sb, &prev);
1073 			}
1074 			/*
1075 			 * The partial chunk is saved in "prev" and will be
1076 			 * appended by the next iteration of read_line_with_nul().
1077 			 */
1078 			strbuf_list_free(lines);
1079 			break;
1080 		}
1081 		default:
1082 			handle_filter_flowed(mi, line, &prev);
1083 		}
1084 
1085 		if (mi->input_error)
1086 			break;
1087 	} while (!strbuf_getwholeline(line, mi->input, '\n'));
1088 
1089 	if (prev.len)
1090 		handle_filter(mi, &prev);
1091 
1092 	flush_inbody_header_accum(mi);
1093 
1094 handle_body_out:
1095 	strbuf_release(&prev);
1096 }
1097 
output_header_lines(FILE * fout,const char * hdr,const struct strbuf * data)1098 static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data)
1099 {
1100 	const char *sp = data->buf;
1101 	while (1) {
1102 		char *ep = strchr(sp, '\n');
1103 		int len;
1104 		if (!ep)
1105 			len = strlen(sp);
1106 		else
1107 			len = ep - sp;
1108 		fprintf(fout, "%s: %.*s\n", hdr, len, sp);
1109 		if (!ep)
1110 			break;
1111 		sp = ep + 1;
1112 	}
1113 }
1114 
handle_info(struct mailinfo * mi)1115 static void handle_info(struct mailinfo *mi)
1116 {
1117 	struct strbuf *hdr;
1118 	int i;
1119 
1120 	for (i = 0; header[i]; i++) {
1121 		/* only print inbody headers if we output a patch file */
1122 		if (mi->patch_lines && mi->s_hdr_data[i])
1123 			hdr = mi->s_hdr_data[i];
1124 		else if (mi->p_hdr_data[i])
1125 			hdr = mi->p_hdr_data[i];
1126 		else
1127 			continue;
1128 
1129 		if (!strcmp(header[i], "Subject")) {
1130 			if (!mi->keep_subject) {
1131 				cleanup_subject(mi, hdr);
1132 				cleanup_space(hdr);
1133 			}
1134 			output_header_lines(mi->output, "Subject", hdr);
1135 		} else if (!strcmp(header[i], "From")) {
1136 			cleanup_space(hdr);
1137 			handle_from(mi, hdr);
1138 			fprintf(mi->output, "Author: %s\n", mi->name.buf);
1139 			fprintf(mi->output, "Email: %s\n", mi->email.buf);
1140 		} else {
1141 			cleanup_space(hdr);
1142 			fprintf(mi->output, "%s: %s\n", header[i], hdr->buf);
1143 		}
1144 	}
1145 	fprintf(mi->output, "\n");
1146 }
1147 
mailinfo(struct mailinfo * mi,const char * msg,const char * patch)1148 int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
1149 {
1150 	FILE *cmitmsg;
1151 	int peek;
1152 	struct strbuf line = STRBUF_INIT;
1153 
1154 	cmitmsg = fopen(msg, "w");
1155 	if (!cmitmsg) {
1156 		perror(msg);
1157 		return -1;
1158 	}
1159 	mi->patchfile = fopen(patch, "w");
1160 	if (!mi->patchfile) {
1161 		perror(patch);
1162 		fclose(cmitmsg);
1163 		return -1;
1164 	}
1165 
1166 	mi->p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->p_hdr_data)));
1167 	mi->s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->s_hdr_data)));
1168 
1169 	do {
1170 		peek = fgetc(mi->input);
1171 		if (peek == EOF) {
1172 			fclose(cmitmsg);
1173 			return error("empty patch: '%s'", patch);
1174 		}
1175 	} while (isspace(peek));
1176 	ungetc(peek, mi->input);
1177 
1178 	/* process the email header */
1179 	while (read_one_header_line(&line, mi->input))
1180 		check_header(mi, &line, mi->p_hdr_data, 1);
1181 
1182 	handle_body(mi, &line);
1183 	fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg);
1184 	fclose(cmitmsg);
1185 	fclose(mi->patchfile);
1186 
1187 	handle_info(mi);
1188 	strbuf_release(&line);
1189 	return mi->input_error;
1190 }
1191 
git_mailinfo_config(const char * var,const char * value,void * mi_)1192 static int git_mailinfo_config(const char *var, const char *value, void *mi_)
1193 {
1194 	struct mailinfo *mi = mi_;
1195 
1196 	if (!starts_with(var, "mailinfo."))
1197 		return git_default_config(var, value, NULL);
1198 	if (!strcmp(var, "mailinfo.scissors")) {
1199 		mi->use_scissors = git_config_bool(var, value);
1200 		return 0;
1201 	}
1202 	/* perhaps others here */
1203 	return 0;
1204 }
1205 
setup_mailinfo(struct mailinfo * mi)1206 void setup_mailinfo(struct mailinfo *mi)
1207 {
1208 	memset(mi, 0, sizeof(*mi));
1209 	strbuf_init(&mi->name, 0);
1210 	strbuf_init(&mi->email, 0);
1211 	strbuf_init(&mi->charset, 0);
1212 	strbuf_init(&mi->log_message, 0);
1213 	strbuf_init(&mi->inbody_header_accum, 0);
1214 	mi->header_stage = 1;
1215 	mi->use_inbody_headers = 1;
1216 	mi->content_top = mi->content;
1217 	git_config(git_mailinfo_config, mi);
1218 }
1219 
clear_mailinfo(struct mailinfo * mi)1220 void clear_mailinfo(struct mailinfo *mi)
1221 {
1222 	int i;
1223 
1224 	strbuf_release(&mi->name);
1225 	strbuf_release(&mi->email);
1226 	strbuf_release(&mi->charset);
1227 	strbuf_release(&mi->inbody_header_accum);
1228 	free(mi->message_id);
1229 
1230 	if (mi->p_hdr_data)
1231 		for (i = 0; mi->p_hdr_data[i]; i++)
1232 			strbuf_release(mi->p_hdr_data[i]);
1233 	free(mi->p_hdr_data);
1234 	if (mi->s_hdr_data)
1235 		for (i = 0; mi->s_hdr_data[i]; i++)
1236 			strbuf_release(mi->s_hdr_data[i]);
1237 	free(mi->s_hdr_data);
1238 
1239 	while (mi->content < mi->content_top) {
1240 		free(*(mi->content_top));
1241 		mi->content_top--;
1242 	}
1243 
1244 	strbuf_release(&mi->log_message);
1245 }
1246