1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 8 -*- */
2 /*
3  * soup-headers.c: HTTP message header parsing
4  *
5  * Copyright (C) 2001-2003, Ximian, Inc.
6  */
7 
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11 
12 #include <stdlib.h>
13 #include <string.h>
14 
15 #include "soup-misc.h"
16 #include "soup-headers.h"
17 #include "soup-message-headers-private.h"
18 #include "soup.h"
19 
20 /**
21  * SECTION:soup-headers
22  * @section_id: SoupHeaders
23  * @title: SoupHeaders
24  * @short_description: Functions to help working with HTTP Headers
25  *
26  * These are utility functions to help working with HTTP headers.
27  */
28 
29 /**
30  * soup_headers_parse:
31  * @str: the header string (including the Request-Line or Status-Line,
32  *   but not the trailing blank line)
33  * @len: length of @str
34  * @dest: #SoupMessageHeaders to store the header values in
35  *
36  * Parses the headers of an HTTP request or response in @str and
37  * stores the results in @dest. Beware that @dest may be modified even
38  * on failure.
39  *
40  * This is a low-level method; normally you would use
41  * soup_headers_parse_request() or soup_headers_parse_response().
42  *
43  * Returns: success or failure
44  *
45  **/
46 gboolean
soup_headers_parse(const char * str,int len,SoupMessageHeaders * dest)47 soup_headers_parse (const char *str, int len, SoupMessageHeaders *dest)
48 {
49 	const char *headers_start;
50 	char *headers_copy, *name, *name_end, *value, *value_end;
51 	char *eol, *sol, *p;
52 	gsize copy_len;
53 	gboolean success = FALSE;
54 
55 	g_return_val_if_fail (str != NULL, FALSE);
56 	g_return_val_if_fail (dest != NULL, FALSE);
57 
58 	/* As per RFC 2616 section 19.3, we treat '\n' as the
59 	 * line terminator, and '\r', if it appears, merely as
60 	 * ignorable trailing whitespace.
61 	 */
62 
63 	/* Skip over the Request-Line / Status-Line */
64 	headers_start = memchr (str, '\n', len);
65 	if (!headers_start)
66 		return FALSE;
67 	/* No '\0's in the Request-Line / Status-Line */
68 	if (memchr (str, '\0', headers_start - str))
69 		return FALSE;
70 
71 	/* We work on a copy of the headers, which we can write '\0's
72 	 * into, so that we don't have to individually g_strndup and
73 	 * then g_free each header name and value.
74 	 */
75 	copy_len = len - (headers_start - str);
76 	headers_copy = g_malloc (copy_len + 1);
77 	memcpy (headers_copy, headers_start, copy_len);
78 	headers_copy[copy_len] = '\0';
79 	value_end = headers_copy;
80 
81 	/* There shouldn't be any '\0's in the headers already, but
82 	 * this is the web we're talking about.
83 	 */
84 	while ((p = memchr (headers_copy, '\0', copy_len))) {
85 		memmove (p, p + 1, copy_len - (p - headers_copy));
86 		copy_len--;
87 	}
88 
89 	while (*(value_end + 1)) {
90 		name = value_end + 1;
91 		name_end = strchr (name, ':');
92 
93 		/* Reject if there is no ':', or the header name is
94 		 * empty, or it contains whitespace.
95 		 */
96 		if (!name_end ||
97 		    name_end == name ||
98 		    name + strcspn (name, " \t\r\n") < name_end) {
99 			/* Ignore this line. Note that if it has
100 			 * continuation lines, we'll end up ignoring
101 			 * them too since they'll start with spaces.
102 			 */
103 			value_end = strchr (name, '\n');
104 			if (!value_end)
105 				goto done;
106 			continue;
107 		}
108 
109 		/* Find the end of the value; ie, an end-of-line that
110 		 * isn't followed by a continuation line.
111 		 */
112 		value = name_end + 1;
113 		value_end = strchr (name, '\n');
114 		if (!value_end)
115 			goto done;
116 		while (*(value_end + 1) == ' ' || *(value_end + 1) == '\t') {
117 			value_end = strchr (value_end + 1, '\n');
118 			if (!value_end)
119 				goto done;
120 		}
121 
122 		*name_end = '\0';
123 		*value_end = '\0';
124 
125 		/* Skip leading whitespace */
126 		while (value < value_end &&
127 		       (*value == ' ' || *value == '\t' ||
128 			*value == '\r' || *value == '\n'))
129 			value++;
130 
131 		/* Collapse continuation lines */
132 		while ((eol = strchr (value, '\n'))) {
133 			/* find start of next line */
134 			sol = eol + 1;
135 			while (*sol == ' ' || *sol == '\t')
136 				sol++;
137 
138 			/* back up over trailing whitespace on current line */
139 			while (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r')
140 				eol--;
141 
142 			/* Delete all but one SP */
143 			*eol = ' ';
144 			memmove (eol + 1, sol, strlen (sol) + 1);
145 		}
146 
147 		/* clip trailing whitespace */
148 		eol = strchr (value, '\0');
149 		while (eol > value &&
150 		       (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r'))
151 			eol--;
152 		*eol = '\0';
153 
154 		/* convert (illegal) '\r's to spaces */
155 		for (p = strchr (value, '\r'); p; p = strchr (p, '\r'))
156 			*p = ' ';
157 
158 		soup_message_headers_append_untrusted_data (dest, name, value);
159         }
160 	success = TRUE;
161 
162 done:
163 	g_free (headers_copy);
164 	return success;
165 }
166 
167 /**
168  * soup_headers_parse_request:
169  * @str: the headers (up to, but not including, the trailing blank line)
170  * @len: length of @str
171  * @req_headers: #SoupMessageHeaders to store the header values in
172  * @req_method: (out) (optional): if non-%NULL, will be filled in with the
173  * request method
174  * @req_path: (out) (optional): if non-%NULL, will be filled in with the
175  * request path
176  * @ver: (out) (optional): if non-%NULL, will be filled in with the HTTP
177  * version
178  *
179  * Parses the headers of an HTTP request in @str and stores the
180  * results in @req_method, @req_path, @ver, and @req_headers.
181  *
182  * Beware that @req_headers may be modified even on failure.
183  *
184  * Returns: %SOUP_STATUS_OK if the headers could be parsed, or an
185  * HTTP error to be returned to the client if they could not be.
186  **/
187 guint
soup_headers_parse_request(const char * str,int len,SoupMessageHeaders * req_headers,char ** req_method,char ** req_path,SoupHTTPVersion * ver)188 soup_headers_parse_request (const char          *str,
189 			    int                  len,
190 			    SoupMessageHeaders  *req_headers,
191 			    char               **req_method,
192 			    char               **req_path,
193 			    SoupHTTPVersion     *ver)
194 {
195 	const char *method, *method_end, *path, *path_end;
196 	const char *version, *version_end, *headers;
197 	unsigned long major_version, minor_version;
198 	char *p;
199 
200 	g_return_val_if_fail (str != NULL, SOUP_STATUS_BAD_REQUEST);
201 
202 	/* RFC 2616 4.1 "servers SHOULD ignore any empty line(s)
203 	 * received where a Request-Line is expected."
204 	 */
205 	while ((*str == '\r' || *str == '\n') && len > 0) {
206 		str++;
207 		len--;
208 	}
209 	if (!len)
210 		return SOUP_STATUS_BAD_REQUEST;
211 
212 	/* RFC 2616 19.3 "[servers] SHOULD accept any amount of SP or
213 	 * HT characters between [Request-Line] fields"
214 	 */
215 
216 	method = method_end = str;
217 	while (method_end < str + len && *method_end != ' ' && *method_end != '\t')
218 		method_end++;
219 	if (method_end >= str + len)
220 		return SOUP_STATUS_BAD_REQUEST;
221 
222 	path = method_end;
223 	while (path < str + len && (*path == ' ' || *path == '\t'))
224 		path++;
225 	if (path >= str + len)
226 		return SOUP_STATUS_BAD_REQUEST;
227 
228 	path_end = path;
229 	while (path_end < str + len && *path_end != ' ' && *path_end != '\t')
230 		path_end++;
231 	if (path_end >= str + len)
232 		return SOUP_STATUS_BAD_REQUEST;
233 
234 	version = path_end;
235 	while (version < str + len && (*version == ' ' || *version == '\t'))
236 		version++;
237 	if (version + 8 >= str + len)
238 		return SOUP_STATUS_BAD_REQUEST;
239 
240 	if (strncmp (version, "HTTP/", 5) != 0 ||
241 	    !g_ascii_isdigit (version[5]))
242 		return SOUP_STATUS_BAD_REQUEST;
243 	major_version = strtoul (version + 5, &p, 10);
244 	if (*p != '.' || !g_ascii_isdigit (p[1]))
245 		return SOUP_STATUS_BAD_REQUEST;
246 	minor_version = strtoul (p + 1, &p, 10);
247 	version_end = p;
248 	if (major_version != 1)
249 		return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
250 	if (minor_version > 1)
251 		return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
252 
253 	headers = version_end;
254 	while (headers < str + len && (*headers == '\r' || *headers == ' '))
255 		headers++;
256 	if (headers >= str + len || *headers != '\n')
257 		return SOUP_STATUS_BAD_REQUEST;
258 
259 	if (!soup_headers_parse (str, len, req_headers))
260 		return SOUP_STATUS_BAD_REQUEST;
261 
262 	if (soup_message_headers_get_expectations (req_headers) &
263 	    SOUP_EXPECTATION_UNRECOGNIZED)
264 		return SOUP_STATUS_EXPECTATION_FAILED;
265 	/* RFC 2616 14.10 */
266 	if (minor_version == 0)
267 		soup_message_headers_clean_connection_headers (req_headers);
268 
269 	if (req_method)
270 		*req_method = g_strndup (method, method_end - method);
271 	if (req_path)
272 		*req_path = g_strndup (path, path_end - path);
273 	if (ver)
274 		*ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1;
275 
276 	return SOUP_STATUS_OK;
277 }
278 
279 /**
280  * soup_headers_parse_status_line:
281  * @status_line: an HTTP Status-Line
282  * @ver: (out) (optional): if non-%NULL, will be filled in with the HTTP
283  * version
284  * @status_code: (out) (optional): if non-%NULL, will be filled in with
285  * the status code
286  * @reason_phrase: (out) (optional): if non-%NULL, will be filled in with
287  * the reason phrase
288  *
289  * Parses the HTTP Status-Line string in @status_line into @ver,
290  * @status_code, and @reason_phrase. @status_line must be terminated by
291  * either "\0" or "\r\n".
292  *
293  * Returns: %TRUE if @status_line was parsed successfully.
294  **/
295 gboolean
soup_headers_parse_status_line(const char * status_line,SoupHTTPVersion * ver,guint * status_code,char ** reason_phrase)296 soup_headers_parse_status_line (const char       *status_line,
297 				SoupHTTPVersion  *ver,
298 				guint            *status_code,
299 				char            **reason_phrase)
300 {
301 	unsigned long major_version, minor_version, code;
302 	const char *code_start, *code_end, *phrase_start, *phrase_end;
303 	char *p;
304 
305 	g_return_val_if_fail (status_line != NULL, FALSE);
306 
307 	if (strncmp (status_line, "HTTP/", 5) == 0 &&
308 	    g_ascii_isdigit (status_line[5])) {
309 		major_version = strtoul (status_line + 5, &p, 10);
310 		if (*p != '.' || !g_ascii_isdigit (p[1]))
311 			return FALSE;
312 		minor_version = strtoul (p + 1, &p, 10);
313 		if (major_version != 1)
314 			return FALSE;
315 		if (minor_version > 1)
316 			return FALSE;
317 		if (ver)
318 			*ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1;
319 	} else if (!strncmp (status_line, "ICY", 3)) {
320 		/* Shoutcast not-quite-HTTP format */
321 		if (ver)
322 			*ver = SOUP_HTTP_1_0;
323 		p = (char *)status_line + 3;
324 	} else
325 		return FALSE;
326 
327 	code_start = p;
328 	while (*code_start == ' ' || *code_start == '\t')
329 		code_start++;
330 	code_end = code_start;
331 	while (*code_end >= '0' && *code_end <= '9')
332 		code_end++;
333 	if (code_end != code_start + 3)
334 		return FALSE;
335 	code = atoi (code_start);
336 	if (code < 100 || code > 999)
337 		return FALSE;
338 	if (status_code)
339 		*status_code = code;
340 
341 	phrase_start = code_end;
342 	while (*phrase_start == ' ' || *phrase_start == '\t')
343 		phrase_start++;
344 	phrase_end = phrase_start + strcspn (phrase_start, "\n");
345 	while (phrase_end > phrase_start &&
346 	       (phrase_end[-1] == '\r' || phrase_end[-1] == ' ' || phrase_end[-1] == '\t'))
347 		phrase_end--;
348 	if (reason_phrase)
349 		*reason_phrase = g_strndup (phrase_start, phrase_end - phrase_start);
350 
351 	return TRUE;
352 }
353 
354 /**
355  * soup_headers_parse_response:
356  * @str: the headers (up to, but not including, the trailing blank line)
357  * @len: length of @str
358  * @headers: #SoupMessageHeaders to store the header values in
359  * @ver: (out) (optional): if non-%NULL, will be filled in with the HTTP
360  * version
361  * @status_code: (out) (optional): if non-%NULL, will be filled in with
362  * the status code
363  * @reason_phrase: (out) (optional): if non-%NULL, will be filled in with
364  * the reason phrase
365  *
366  * Parses the headers of an HTTP response in @str and stores the
367  * results in @ver, @status_code, @reason_phrase, and @headers.
368  *
369  * Beware that @headers may be modified even on failure.
370  *
371  * Returns: success or failure.
372  **/
373 gboolean
soup_headers_parse_response(const char * str,int len,SoupMessageHeaders * headers,SoupHTTPVersion * ver,guint * status_code,char ** reason_phrase)374 soup_headers_parse_response (const char          *str,
375 			     int                  len,
376 			     SoupMessageHeaders  *headers,
377 			     SoupHTTPVersion     *ver,
378 			     guint               *status_code,
379 			     char               **reason_phrase)
380 {
381 	SoupHTTPVersion version;
382 
383 	g_return_val_if_fail (str != NULL, FALSE);
384 
385 	/* Workaround for broken servers that send extra line breaks
386 	 * after a response, which we then see prepended to the next
387 	 * response on that connection.
388 	 */
389 	while ((*str == '\r' || *str == '\n') && len > 0) {
390 		str++;
391 		len--;
392 	}
393 	if (!len)
394 		return FALSE;
395 
396 	if (!soup_headers_parse (str, len, headers))
397 		return FALSE;
398 
399 	if (!soup_headers_parse_status_line (str,
400 					     &version,
401 					     status_code,
402 					     reason_phrase))
403 		return FALSE;
404 	if (ver)
405 		*ver = version;
406 
407 	/* RFC 2616 14.10 */
408 	if (version == SOUP_HTTP_1_0)
409 		soup_message_headers_clean_connection_headers (headers);
410 
411 	return TRUE;
412 }
413 
414 
415 /*
416  * Parsing of specific HTTP header types
417  */
418 
419 static const char *
skip_lws(const char * s)420 skip_lws (const char *s)
421 {
422 	while (g_ascii_isspace (*s))
423 		s++;
424 	return s;
425 }
426 
427 static const char *
unskip_lws(const char * s,const char * start)428 unskip_lws (const char *s, const char *start)
429 {
430 	while (s > start && g_ascii_isspace (*(s - 1)))
431 		s--;
432 	return s;
433 }
434 
435 static const char *
skip_delims(const char * s,char delim)436 skip_delims (const char *s, char delim)
437 {
438 	/* The grammar allows for multiple delimiters */
439 	while (g_ascii_isspace (*s) || *s == delim)
440 		s++;
441 	return s;
442 }
443 
444 static const char *
skip_item(const char * s,char delim)445 skip_item (const char *s, char delim)
446 {
447 	gboolean quoted = FALSE;
448 	const char *start = s;
449 
450 	/* A list item ends at the last non-whitespace character
451 	 * before a delimiter which is not inside a quoted-string. Or
452 	 * at the end of the string.
453 	 */
454 
455 	while (*s) {
456 		if (*s == '"')
457 			quoted = !quoted;
458 		else if (quoted) {
459 			if (*s == '\\' && *(s + 1))
460 				s++;
461 		} else {
462 			if (*s == delim)
463 				break;
464 		}
465 		s++;
466 	}
467 
468 	return unskip_lws (s, start);
469 }
470 
471 static GSList *
parse_list(const char * header,char delim)472 parse_list (const char *header, char delim)
473 {
474 	GSList *list = NULL;
475 	const char *end;
476 
477 	header = skip_delims (header, delim);
478 	while (*header) {
479 		end = skip_item (header, delim);
480 		list = g_slist_prepend (list, g_strndup (header, end - header));
481 		header = skip_delims (end, delim);
482 	}
483 
484 	return g_slist_reverse (list);
485 }
486 
487 /**
488  * soup_header_parse_list:
489  * @header: a header value
490  *
491  * Parses a header whose content is described by RFC2616 as
492  * "#something", where "something" does not itself contain commas,
493  * except as part of quoted-strings.
494  *
495  * Returns: (transfer full) (element-type utf8): a #GSList of
496  * list elements, as allocated strings
497  **/
498 GSList *
soup_header_parse_list(const char * header)499 soup_header_parse_list (const char *header)
500 {
501 	g_return_val_if_fail (header != NULL, NULL);
502 
503 	return parse_list (header, ',');
504 }
505 
506 typedef struct {
507 	char *item;
508 	double qval;
509 } QualityItem;
510 
511 static int
sort_by_qval(const void * a,const void * b)512 sort_by_qval (const void *a, const void *b)
513 {
514 	QualityItem *qia = (QualityItem *)a;
515 	QualityItem *qib = (QualityItem *)b;
516 
517 	if (qia->qval == qib->qval)
518 		return 0;
519 	else if (qia->qval < qib->qval)
520 		return 1;
521 	else
522 		return -1;
523 }
524 
525 /**
526  * soup_header_parse_quality_list:
527  * @header: a header value
528  * @unacceptable: (out) (optional) (transfer full) (element-type utf8): on
529  * return, will contain a list of unacceptable values
530  *
531  * Parses a header whose content is a list of items with optional
532  * "qvalue"s (eg, Accept, Accept-Charset, Accept-Encoding,
533  * Accept-Language, TE).
534  *
535  * If @unacceptable is not %NULL, then on return, it will contain the
536  * items with qvalue 0. Either way, those items will be removed from
537  * the main list.
538  *
539  * Returns: (transfer full) (element-type utf8): a #GSList of
540  * acceptable values (as allocated strings), highest-qvalue first.
541  **/
542 GSList *
soup_header_parse_quality_list(const char * header,GSList ** unacceptable)543 soup_header_parse_quality_list (const char *header, GSList **unacceptable)
544 {
545 	GSList *unsorted;
546 	QualityItem *array;
547 	GSList *sorted, *iter;
548 	char *item, *semi;
549 	const char *param, *equal, *value;
550 	double qval;
551 	int n;
552 
553 	g_return_val_if_fail (header != NULL, NULL);
554 
555 	if (unacceptable)
556 		*unacceptable = NULL;
557 
558 	unsorted = soup_header_parse_list (header);
559 	array = g_new0 (QualityItem, g_slist_length (unsorted));
560 	for (iter = unsorted, n = 0; iter; iter = iter->next) {
561 		item = iter->data;
562 		qval = 1.0;
563 		for (semi = strchr (item, ';'); semi; semi = strchr (semi + 1, ';')) {
564 			param = skip_lws (semi + 1);
565 			if (*param != 'q')
566 				continue;
567 			equal = skip_lws (param + 1);
568 			if (!equal || *equal != '=')
569 				continue;
570 			value = skip_lws (equal + 1);
571 			if (!value)
572 				continue;
573 
574 			if (value[0] != '0' && value[0] != '1')
575 				continue;
576 			qval = (double)(value[0] - '0');
577 			if (value[0] == '0' && value[1] == '.') {
578 				if (g_ascii_isdigit (value[2])) {
579 					qval += (double)(value[2] - '0') / 10;
580 					if (g_ascii_isdigit (value[3])) {
581 						qval += (double)(value[3] - '0') / 100;
582 						if (g_ascii_isdigit (value[4]))
583 							qval += (double)(value[4] - '0') / 1000;
584 					}
585 				}
586 			}
587 
588 			*semi = '\0';
589 			break;
590 		}
591 
592 		if (qval == 0.0) {
593 			if (unacceptable) {
594 				*unacceptable = g_slist_prepend (*unacceptable,
595 								 item);
596 			}
597 		} else {
598 			array[n].item = item;
599 			array[n].qval = qval;
600 			n++;
601 		}
602 	}
603 	g_slist_free (unsorted);
604 
605 	qsort (array, n, sizeof (QualityItem), sort_by_qval);
606 	sorted = NULL;
607 	while (n--)
608 		sorted = g_slist_prepend (sorted, array[n].item);
609 	g_free (array);
610 
611 	return sorted;
612 }
613 
614 /**
615  * soup_header_free_list: (skip)
616  * @list: a #GSList returned from soup_header_parse_list() or
617  * soup_header_parse_quality_list()
618  *
619  * Frees @list.
620  **/
621 void
soup_header_free_list(GSList * list)622 soup_header_free_list (GSList *list)
623 {
624 	g_slist_free_full (list, g_free);
625 }
626 
627 /**
628  * soup_header_contains:
629  * @header: An HTTP header suitable for parsing with
630  * soup_header_parse_list()
631  * @token: a token
632  *
633  * Parses @header to see if it contains the token @token (matched
634  * case-insensitively). Note that this can't be used with lists
635  * that have qvalues.
636  *
637  * Returns: whether or not @header contains @token
638  **/
639 gboolean
soup_header_contains(const char * header,const char * token)640 soup_header_contains (const char *header, const char *token)
641 {
642 	const char *end;
643 	guint len;
644 
645 	g_return_val_if_fail (header != NULL, FALSE);
646 	g_return_val_if_fail (token != NULL, FALSE);
647 
648 	len = strlen (token);
649 
650 	header = skip_delims (header, ',');
651 	while (*header) {
652 		end = skip_item (header, ',');
653 		if (end - header == len &&
654 		    !g_ascii_strncasecmp (header, token, len))
655 			return TRUE;
656 		header = skip_delims (end, ',');
657 	}
658 
659 	return FALSE;
660 }
661 
662 static void
decode_quoted_string(char * quoted_string)663 decode_quoted_string (char *quoted_string)
664 {
665 	char *src, *dst;
666 
667 	src = quoted_string + 1;
668 	dst = quoted_string;
669 	while (*src && *src != '"') {
670 		if (*src == '\\' && *(src + 1))
671 			src++;
672 		*dst++ = *src++;
673 	}
674 	*dst = '\0';
675 }
676 
677 static gboolean
decode_rfc5987(char * encoded_string)678 decode_rfc5987 (char *encoded_string)
679 {
680 	char *q, *decoded;
681 	gboolean iso_8859_1 = FALSE;
682 
683 	q = strchr (encoded_string, '\'');
684 	if (!q)
685 		return FALSE;
686 	if (g_ascii_strncasecmp (encoded_string, "UTF-8",
687 				 q - encoded_string) == 0)
688 		;
689 	else if (g_ascii_strncasecmp (encoded_string, "iso-8859-1",
690 				      q - encoded_string) == 0)
691 		iso_8859_1 = TRUE;
692 	else
693 		return FALSE;
694 
695 	q = strchr (q + 1, '\'');
696 	if (!q)
697 		return FALSE;
698 
699 	decoded = g_uri_unescape_string (q + 1, NULL);
700 	if (iso_8859_1) {
701 		char *utf8 =  g_convert_with_fallback (decoded, -1, "UTF-8",
702 						       "iso-8859-1", "_",
703 						       NULL, NULL, NULL);
704 		g_free (decoded);
705 		if (!utf8)
706 			return FALSE;
707 		decoded = utf8;
708 	}
709 
710 	/* If encoded_string was UTF-8, then each 3-character %-escape
711 	 * will be converted to a single byte, and so decoded is
712 	 * shorter than encoded_string. If encoded_string was
713 	 * iso-8859-1, then each 3-character %-escape will be
714 	 * converted into at most 2 bytes in UTF-8, and so it's still
715 	 * shorter.
716 	 */
717 	strcpy (encoded_string, decoded);
718 	g_free (decoded);
719 	return TRUE;
720 }
721 
722 static GHashTable *
parse_param_list(const char * header,char delim,gboolean strict)723 parse_param_list (const char *header, char delim, gboolean strict)
724 {
725 	GHashTable *params;
726 	GSList *list, *iter;
727 	char *item, *eq, *name_end, *value;
728 	gboolean override, duplicated;
729 
730 	params = g_hash_table_new_full (soup_str_case_hash,
731 					soup_str_case_equal,
732 					g_free, NULL);
733 
734 	list = parse_list (header, delim);
735 	for (iter = list; iter; iter = iter->next) {
736 		item = iter->data;
737 		override = FALSE;
738 
739 		eq = strchr (item, '=');
740 		if (eq) {
741 			name_end = (char *)unskip_lws (eq, item);
742 			if (name_end == item) {
743 				/* That's no good... */
744 				g_free (item);
745 				continue;
746 			}
747 
748 			*name_end = '\0';
749 
750 			value = (char *)skip_lws (eq + 1);
751 
752 			if (name_end[-1] == '*' && name_end > item + 1) {
753 				name_end[-1] = '\0';
754 				if (!decode_rfc5987 (value)) {
755 					g_free (item);
756 					continue;
757 				}
758 				override = TRUE;
759 			} else if (*value == '"')
760 				decode_quoted_string (value);
761 		} else
762 			value = NULL;
763 
764 		duplicated = g_hash_table_lookup_extended (params, item, NULL, NULL);
765 
766 		if (strict && duplicated) {
767 			soup_header_free_param_list (params);
768 			params = NULL;
769 			g_slist_foreach (iter, (GFunc)g_free, NULL);
770 			break;
771 		} else if (override || !duplicated)
772 			g_hash_table_replace (params, item, value);
773 		else
774 			g_free (item);
775 	}
776 
777 	g_slist_free (list);
778 	return params;
779 }
780 
781 /**
782  * soup_header_parse_param_list:
783  * @header: a header value
784  *
785  * Parses a header which is a comma-delimited list of something like:
786  * <literal>token [ "=" ( token | quoted-string ) ]</literal>.
787  *
788  * Tokens that don't have an associated value will still be added to
789  * the resulting hash table, but with a %NULL value.
790  *
791  * This also handles RFC5987 encoding (which in HTTP is mostly used
792  * for giving UTF8-encoded filenames in the Content-Disposition
793  * header).
794  *
795  * Returns: (element-type utf8 utf8) (transfer full): a
796  * #GHashTable of list elements, which can be freed with
797  * soup_header_free_param_list().
798  **/
799 GHashTable *
soup_header_parse_param_list(const char * header)800 soup_header_parse_param_list (const char *header)
801 {
802 	g_return_val_if_fail (header != NULL, NULL);
803 
804 	return parse_param_list (header, ',', FALSE);
805 }
806 
807 /**
808  * soup_header_parse_semi_param_list:
809  * @header: a header value
810  *
811  * Parses a header which is a semicolon-delimited list of something
812  * like: <literal>token [ "=" ( token | quoted-string ) ]</literal>.
813  *
814  * Tokens that don't have an associated value will still be added to
815  * the resulting hash table, but with a %NULL value.
816  *
817  * This also handles RFC5987 encoding (which in HTTP is mostly used
818  * for giving UTF8-encoded filenames in the Content-Disposition
819  * header).
820  *
821  * Returns: (element-type utf8 utf8) (transfer full): a
822  * #GHashTable of list elements, which can be freed with
823  * soup_header_free_param_list().
824  *
825  **/
826 GHashTable *
soup_header_parse_semi_param_list(const char * header)827 soup_header_parse_semi_param_list (const char *header)
828 {
829 	g_return_val_if_fail (header != NULL, NULL);
830 
831 	return parse_param_list (header, ';', FALSE);
832 }
833 
834 /**
835  * soup_header_parse_param_list_strict:
836  * @header: a header value
837  *
838  * A strict version of soup_header_parse_param_list()
839  * that bails out if there are duplicate parameters.
840  * Note that this function will treat RFC5987-encoded
841  * parameters as duplicated if an ASCII version is also
842  * present. For header fields that might contain
843  * RFC5987-encoded parameters, use
844  * soup_header_parse_param_list() instead.
845  *
846  * Returns: (element-type utf8 utf8) (transfer full) (nullable):
847  * a #GHashTable of list elements, which can be freed with
848  * soup_header_free_param_list() or %NULL if there are duplicate
849  * elements.
850  *
851  **/
852 GHashTable *
soup_header_parse_param_list_strict(const char * header)853 soup_header_parse_param_list_strict (const char *header)
854 {
855 	g_return_val_if_fail (header != NULL, NULL);
856 
857 	return parse_param_list (header, ',', TRUE);
858 }
859 
860 /**
861  * soup_header_parse_semi_param_list_strict:
862  * @header: a header value
863  *
864  * A strict version of soup_header_parse_semi_param_list()
865  * that bails out if there are duplicate parameters.
866  * Note that this function will treat RFC5987-encoded
867  * parameters as duplicated if an ASCII version is also
868  * present. For header fields that might contain
869  * RFC5987-encoded parameters, use
870  * soup_header_parse_semi_param_list() instead.
871  *
872  * Returns: (element-type utf8 utf8) (transfer full) (nullable):
873  * a #GHashTable of list elements, which can be freed with
874  * soup_header_free_param_list() or %NULL if there are duplicate
875  * elements.
876  *
877  **/
878 GHashTable *
soup_header_parse_semi_param_list_strict(const char * header)879 soup_header_parse_semi_param_list_strict (const char *header)
880 {
881 	g_return_val_if_fail (header != NULL, NULL);
882 
883 	return parse_param_list (header, ';', TRUE);
884 }
885 
886 /**
887  * soup_header_free_param_list:
888  * @param_list: (element-type utf8 utf8): a #GHashTable returned from soup_header_parse_param_list()
889  * or soup_header_parse_semi_param_list()
890  *
891  * Frees @param_list.
892  **/
893 void
soup_header_free_param_list(GHashTable * param_list)894 soup_header_free_param_list (GHashTable *param_list)
895 {
896 	g_return_if_fail (param_list != NULL);
897 
898 	g_hash_table_destroy (param_list);
899 }
900 
901 static void
append_param_rfc5987(GString * string,const char * name,const char * value)902 append_param_rfc5987 (GString    *string,
903 		      const char *name,
904 		      const char *value)
905 {
906 	char *encoded;
907 
908 	g_string_append (string, name);
909 	g_string_append (string, "*=UTF-8''");
910 	encoded = g_uri_escape_string (value, "*'%()<>@,;:\\\"/[]?=", FALSE);
911 	g_string_append (string, encoded);
912 	g_free (encoded);
913 }
914 
915 static void
append_param_quoted(GString * string,const char * name,const char * value)916 append_param_quoted (GString    *string,
917 		     const char *name,
918 		     const char *value)
919 {
920 	int len;
921 
922 	g_string_append (string, name);
923 	g_string_append (string, "=\"");
924 	while (*value) {
925 		while (*value == '\\' || *value == '"') {
926 			g_string_append_c (string, '\\');
927 			g_string_append_c (string, *value++);
928 		}
929 		len = strcspn (value, "\\\"");
930 		g_string_append_len (string, value, len);
931 		value += len;
932 	}
933 	g_string_append_c (string, '"');
934 }
935 
936 static void
append_param_internal(GString * string,const char * name,const char * value,gboolean allow_token)937 append_param_internal (GString    *string,
938 		       const char *name,
939 		       const char *value,
940 		       gboolean    allow_token)
941 {
942 	const char *v;
943 	gboolean use_token = allow_token;
944 
945 	for (v = value; *v; v++) {
946 		if (*v & 0x80) {
947 			if (g_utf8_validate (value, -1, NULL)) {
948 				append_param_rfc5987 (string, name, value);
949 				return;
950 			} else {
951 				use_token = FALSE;
952 				break;
953 			}
954 		} else if (!soup_char_is_token (*v))
955 			use_token = FALSE;
956 	}
957 
958 	if (use_token) {
959 		g_string_append (string, name);
960 		g_string_append_c (string, '=');
961 		g_string_append (string, value);
962 	} else
963 		append_param_quoted (string, name, value);
964 }
965 
966 /**
967  * soup_header_g_string_append_param_quoted:
968  * @string: a #GString being used to construct an HTTP header value
969  * @name: a parameter name
970  * @value: a parameter value
971  *
972  * Appends something like <literal>@name="@value"</literal> to
973  * @string, taking care to escape any quotes or backslashes in @value.
974  *
975  * If @value is (non-ASCII) UTF-8, this will instead use RFC 5987
976  * encoding, just like soup_header_g_string_append_param().
977  *
978  **/
979 void
soup_header_g_string_append_param_quoted(GString * string,const char * name,const char * value)980 soup_header_g_string_append_param_quoted (GString    *string,
981 					  const char *name,
982 					  const char *value)
983 {
984 	g_return_if_fail (string != NULL);
985 	g_return_if_fail (name != NULL);
986 	g_return_if_fail (value != NULL);
987 
988 	append_param_internal (string, name, value, FALSE);
989 }
990 
991 /**
992  * soup_header_g_string_append_param:
993  * @string: a #GString being used to construct an HTTP header value
994  * @name: a parameter name
995  * @value: a parameter value, or %NULL
996  *
997  * Appends something like <literal>@name=@value</literal> to @string,
998  * taking care to quote @value if needed, and if so, to escape any
999  * quotes or backslashes in @value.
1000  *
1001  * Alternatively, if @value is a non-ASCII UTF-8 string, it will be
1002  * appended using RFC5987 syntax. Although in theory this is supposed
1003  * to work anywhere in HTTP that uses this style of parameter, in
1004  * reality, it can only be used portably with the Content-Disposition
1005  * "filename" parameter.
1006  *
1007  * If @value is %NULL, this will just append @name to @string.
1008  *
1009  **/
1010 void
soup_header_g_string_append_param(GString * string,const char * name,const char * value)1011 soup_header_g_string_append_param (GString    *string,
1012 				   const char *name,
1013 				   const char *value)
1014 {
1015 	g_return_if_fail (string != NULL);
1016 	g_return_if_fail (name != NULL);
1017 
1018 	if (!value) {
1019 		g_string_append (string, name);
1020 		return;
1021 	}
1022 
1023 	append_param_internal (string, name, value, TRUE);
1024 }
1025