1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 8 -*- */
2 /*
3 * soup-headers.c: HTTP message header parsing
4 *
5 * Copyright (C) 2001-2003, Ximian, Inc.
6 */
7
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11
12 #include <stdlib.h>
13 #include <string.h>
14
15 #include "soup-misc.h"
16 #include "soup-headers.h"
17 #include "soup-message-headers-private.h"
18 #include "soup.h"
19
20 /**
21 * SECTION:soup-headers
22 * @section_id: SoupHeaders
23 * @title: SoupHeaders
24 * @short_description: Functions to help working with HTTP Headers
25 *
26 * These are utility functions to help working with HTTP headers.
27 */
28
29 /**
30 * soup_headers_parse:
31 * @str: the header string (including the Request-Line or Status-Line,
32 * but not the trailing blank line)
33 * @len: length of @str
34 * @dest: #SoupMessageHeaders to store the header values in
35 *
36 * Parses the headers of an HTTP request or response in @str and
37 * stores the results in @dest. Beware that @dest may be modified even
38 * on failure.
39 *
40 * This is a low-level method; normally you would use
41 * soup_headers_parse_request() or soup_headers_parse_response().
42 *
43 * Returns: success or failure
44 *
45 **/
46 gboolean
soup_headers_parse(const char * str,int len,SoupMessageHeaders * dest)47 soup_headers_parse (const char *str, int len, SoupMessageHeaders *dest)
48 {
49 const char *headers_start;
50 char *headers_copy, *name, *name_end, *value, *value_end;
51 char *eol, *sol, *p;
52 gsize copy_len;
53 gboolean success = FALSE;
54
55 g_return_val_if_fail (str != NULL, FALSE);
56 g_return_val_if_fail (dest != NULL, FALSE);
57
58 /* As per RFC 2616 section 19.3, we treat '\n' as the
59 * line terminator, and '\r', if it appears, merely as
60 * ignorable trailing whitespace.
61 */
62
63 /* Skip over the Request-Line / Status-Line */
64 headers_start = memchr (str, '\n', len);
65 if (!headers_start)
66 return FALSE;
67 /* No '\0's in the Request-Line / Status-Line */
68 if (memchr (str, '\0', headers_start - str))
69 return FALSE;
70
71 /* We work on a copy of the headers, which we can write '\0's
72 * into, so that we don't have to individually g_strndup and
73 * then g_free each header name and value.
74 */
75 copy_len = len - (headers_start - str);
76 headers_copy = g_malloc (copy_len + 1);
77 memcpy (headers_copy, headers_start, copy_len);
78 headers_copy[copy_len] = '\0';
79 value_end = headers_copy;
80
81 /* There shouldn't be any '\0's in the headers already, but
82 * this is the web we're talking about.
83 */
84 while ((p = memchr (headers_copy, '\0', copy_len))) {
85 memmove (p, p + 1, copy_len - (p - headers_copy));
86 copy_len--;
87 }
88
89 while (*(value_end + 1)) {
90 name = value_end + 1;
91 name_end = strchr (name, ':');
92
93 /* Reject if there is no ':', or the header name is
94 * empty, or it contains whitespace.
95 */
96 if (!name_end ||
97 name_end == name ||
98 name + strcspn (name, " \t\r\n") < name_end) {
99 /* Ignore this line. Note that if it has
100 * continuation lines, we'll end up ignoring
101 * them too since they'll start with spaces.
102 */
103 value_end = strchr (name, '\n');
104 if (!value_end)
105 goto done;
106 continue;
107 }
108
109 /* Find the end of the value; ie, an end-of-line that
110 * isn't followed by a continuation line.
111 */
112 value = name_end + 1;
113 value_end = strchr (name, '\n');
114 if (!value_end)
115 goto done;
116 while (*(value_end + 1) == ' ' || *(value_end + 1) == '\t') {
117 value_end = strchr (value_end + 1, '\n');
118 if (!value_end)
119 goto done;
120 }
121
122 *name_end = '\0';
123 *value_end = '\0';
124
125 /* Skip leading whitespace */
126 while (value < value_end &&
127 (*value == ' ' || *value == '\t' ||
128 *value == '\r' || *value == '\n'))
129 value++;
130
131 /* Collapse continuation lines */
132 while ((eol = strchr (value, '\n'))) {
133 /* find start of next line */
134 sol = eol + 1;
135 while (*sol == ' ' || *sol == '\t')
136 sol++;
137
138 /* back up over trailing whitespace on current line */
139 while (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r')
140 eol--;
141
142 /* Delete all but one SP */
143 *eol = ' ';
144 memmove (eol + 1, sol, strlen (sol) + 1);
145 }
146
147 /* clip trailing whitespace */
148 eol = strchr (value, '\0');
149 while (eol > value &&
150 (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r'))
151 eol--;
152 *eol = '\0';
153
154 /* convert (illegal) '\r's to spaces */
155 for (p = strchr (value, '\r'); p; p = strchr (p, '\r'))
156 *p = ' ';
157
158 soup_message_headers_append_untrusted_data (dest, name, value);
159 }
160 success = TRUE;
161
162 done:
163 g_free (headers_copy);
164 return success;
165 }
166
167 /**
168 * soup_headers_parse_request:
169 * @str: the headers (up to, but not including, the trailing blank line)
170 * @len: length of @str
171 * @req_headers: #SoupMessageHeaders to store the header values in
172 * @req_method: (out) (optional): if non-%NULL, will be filled in with the
173 * request method
174 * @req_path: (out) (optional): if non-%NULL, will be filled in with the
175 * request path
176 * @ver: (out) (optional): if non-%NULL, will be filled in with the HTTP
177 * version
178 *
179 * Parses the headers of an HTTP request in @str and stores the
180 * results in @req_method, @req_path, @ver, and @req_headers.
181 *
182 * Beware that @req_headers may be modified even on failure.
183 *
184 * Returns: %SOUP_STATUS_OK if the headers could be parsed, or an
185 * HTTP error to be returned to the client if they could not be.
186 **/
187 guint
soup_headers_parse_request(const char * str,int len,SoupMessageHeaders * req_headers,char ** req_method,char ** req_path,SoupHTTPVersion * ver)188 soup_headers_parse_request (const char *str,
189 int len,
190 SoupMessageHeaders *req_headers,
191 char **req_method,
192 char **req_path,
193 SoupHTTPVersion *ver)
194 {
195 const char *method, *method_end, *path, *path_end;
196 const char *version, *version_end, *headers;
197 unsigned long major_version, minor_version;
198 char *p;
199
200 g_return_val_if_fail (str != NULL, SOUP_STATUS_BAD_REQUEST);
201
202 /* RFC 2616 4.1 "servers SHOULD ignore any empty line(s)
203 * received where a Request-Line is expected."
204 */
205 while ((*str == '\r' || *str == '\n') && len > 0) {
206 str++;
207 len--;
208 }
209 if (!len)
210 return SOUP_STATUS_BAD_REQUEST;
211
212 /* RFC 2616 19.3 "[servers] SHOULD accept any amount of SP or
213 * HT characters between [Request-Line] fields"
214 */
215
216 method = method_end = str;
217 while (method_end < str + len && *method_end != ' ' && *method_end != '\t')
218 method_end++;
219 if (method_end >= str + len)
220 return SOUP_STATUS_BAD_REQUEST;
221
222 path = method_end;
223 while (path < str + len && (*path == ' ' || *path == '\t'))
224 path++;
225 if (path >= str + len)
226 return SOUP_STATUS_BAD_REQUEST;
227
228 path_end = path;
229 while (path_end < str + len && *path_end != ' ' && *path_end != '\t')
230 path_end++;
231 if (path_end >= str + len)
232 return SOUP_STATUS_BAD_REQUEST;
233
234 version = path_end;
235 while (version < str + len && (*version == ' ' || *version == '\t'))
236 version++;
237 if (version + 8 >= str + len)
238 return SOUP_STATUS_BAD_REQUEST;
239
240 if (strncmp (version, "HTTP/", 5) != 0 ||
241 !g_ascii_isdigit (version[5]))
242 return SOUP_STATUS_BAD_REQUEST;
243 major_version = strtoul (version + 5, &p, 10);
244 if (*p != '.' || !g_ascii_isdigit (p[1]))
245 return SOUP_STATUS_BAD_REQUEST;
246 minor_version = strtoul (p + 1, &p, 10);
247 version_end = p;
248 if (major_version != 1)
249 return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
250 if (minor_version > 1)
251 return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
252
253 headers = version_end;
254 while (headers < str + len && (*headers == '\r' || *headers == ' '))
255 headers++;
256 if (headers >= str + len || *headers != '\n')
257 return SOUP_STATUS_BAD_REQUEST;
258
259 if (!soup_headers_parse (str, len, req_headers))
260 return SOUP_STATUS_BAD_REQUEST;
261
262 if (soup_message_headers_get_expectations (req_headers) &
263 SOUP_EXPECTATION_UNRECOGNIZED)
264 return SOUP_STATUS_EXPECTATION_FAILED;
265 /* RFC 2616 14.10 */
266 if (minor_version == 0)
267 soup_message_headers_clean_connection_headers (req_headers);
268
269 if (req_method)
270 *req_method = g_strndup (method, method_end - method);
271 if (req_path)
272 *req_path = g_strndup (path, path_end - path);
273 if (ver)
274 *ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1;
275
276 return SOUP_STATUS_OK;
277 }
278
279 /**
280 * soup_headers_parse_status_line:
281 * @status_line: an HTTP Status-Line
282 * @ver: (out) (optional): if non-%NULL, will be filled in with the HTTP
283 * version
284 * @status_code: (out) (optional): if non-%NULL, will be filled in with
285 * the status code
286 * @reason_phrase: (out) (optional): if non-%NULL, will be filled in with
287 * the reason phrase
288 *
289 * Parses the HTTP Status-Line string in @status_line into @ver,
290 * @status_code, and @reason_phrase. @status_line must be terminated by
291 * either "\0" or "\r\n".
292 *
293 * Returns: %TRUE if @status_line was parsed successfully.
294 **/
295 gboolean
soup_headers_parse_status_line(const char * status_line,SoupHTTPVersion * ver,guint * status_code,char ** reason_phrase)296 soup_headers_parse_status_line (const char *status_line,
297 SoupHTTPVersion *ver,
298 guint *status_code,
299 char **reason_phrase)
300 {
301 unsigned long major_version, minor_version, code;
302 const char *code_start, *code_end, *phrase_start, *phrase_end;
303 char *p;
304
305 g_return_val_if_fail (status_line != NULL, FALSE);
306
307 if (strncmp (status_line, "HTTP/", 5) == 0 &&
308 g_ascii_isdigit (status_line[5])) {
309 major_version = strtoul (status_line + 5, &p, 10);
310 if (*p != '.' || !g_ascii_isdigit (p[1]))
311 return FALSE;
312 minor_version = strtoul (p + 1, &p, 10);
313 if (major_version != 1)
314 return FALSE;
315 if (minor_version > 1)
316 return FALSE;
317 if (ver)
318 *ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1;
319 } else if (!strncmp (status_line, "ICY", 3)) {
320 /* Shoutcast not-quite-HTTP format */
321 if (ver)
322 *ver = SOUP_HTTP_1_0;
323 p = (char *)status_line + 3;
324 } else
325 return FALSE;
326
327 code_start = p;
328 while (*code_start == ' ' || *code_start == '\t')
329 code_start++;
330 code_end = code_start;
331 while (*code_end >= '0' && *code_end <= '9')
332 code_end++;
333 if (code_end != code_start + 3)
334 return FALSE;
335 code = atoi (code_start);
336 if (code < 100 || code > 999)
337 return FALSE;
338 if (status_code)
339 *status_code = code;
340
341 phrase_start = code_end;
342 while (*phrase_start == ' ' || *phrase_start == '\t')
343 phrase_start++;
344 phrase_end = phrase_start + strcspn (phrase_start, "\n");
345 while (phrase_end > phrase_start &&
346 (phrase_end[-1] == '\r' || phrase_end[-1] == ' ' || phrase_end[-1] == '\t'))
347 phrase_end--;
348 if (reason_phrase)
349 *reason_phrase = g_strndup (phrase_start, phrase_end - phrase_start);
350
351 return TRUE;
352 }
353
354 /**
355 * soup_headers_parse_response:
356 * @str: the headers (up to, but not including, the trailing blank line)
357 * @len: length of @str
358 * @headers: #SoupMessageHeaders to store the header values in
359 * @ver: (out) (optional): if non-%NULL, will be filled in with the HTTP
360 * version
361 * @status_code: (out) (optional): if non-%NULL, will be filled in with
362 * the status code
363 * @reason_phrase: (out) (optional): if non-%NULL, will be filled in with
364 * the reason phrase
365 *
366 * Parses the headers of an HTTP response in @str and stores the
367 * results in @ver, @status_code, @reason_phrase, and @headers.
368 *
369 * Beware that @headers may be modified even on failure.
370 *
371 * Returns: success or failure.
372 **/
373 gboolean
soup_headers_parse_response(const char * str,int len,SoupMessageHeaders * headers,SoupHTTPVersion * ver,guint * status_code,char ** reason_phrase)374 soup_headers_parse_response (const char *str,
375 int len,
376 SoupMessageHeaders *headers,
377 SoupHTTPVersion *ver,
378 guint *status_code,
379 char **reason_phrase)
380 {
381 SoupHTTPVersion version;
382
383 g_return_val_if_fail (str != NULL, FALSE);
384
385 /* Workaround for broken servers that send extra line breaks
386 * after a response, which we then see prepended to the next
387 * response on that connection.
388 */
389 while ((*str == '\r' || *str == '\n') && len > 0) {
390 str++;
391 len--;
392 }
393 if (!len)
394 return FALSE;
395
396 if (!soup_headers_parse (str, len, headers))
397 return FALSE;
398
399 if (!soup_headers_parse_status_line (str,
400 &version,
401 status_code,
402 reason_phrase))
403 return FALSE;
404 if (ver)
405 *ver = version;
406
407 /* RFC 2616 14.10 */
408 if (version == SOUP_HTTP_1_0)
409 soup_message_headers_clean_connection_headers (headers);
410
411 return TRUE;
412 }
413
414
415 /*
416 * Parsing of specific HTTP header types
417 */
418
419 static const char *
skip_lws(const char * s)420 skip_lws (const char *s)
421 {
422 while (g_ascii_isspace (*s))
423 s++;
424 return s;
425 }
426
427 static const char *
unskip_lws(const char * s,const char * start)428 unskip_lws (const char *s, const char *start)
429 {
430 while (s > start && g_ascii_isspace (*(s - 1)))
431 s--;
432 return s;
433 }
434
435 static const char *
skip_delims(const char * s,char delim)436 skip_delims (const char *s, char delim)
437 {
438 /* The grammar allows for multiple delimiters */
439 while (g_ascii_isspace (*s) || *s == delim)
440 s++;
441 return s;
442 }
443
444 static const char *
skip_item(const char * s,char delim)445 skip_item (const char *s, char delim)
446 {
447 gboolean quoted = FALSE;
448 const char *start = s;
449
450 /* A list item ends at the last non-whitespace character
451 * before a delimiter which is not inside a quoted-string. Or
452 * at the end of the string.
453 */
454
455 while (*s) {
456 if (*s == '"')
457 quoted = !quoted;
458 else if (quoted) {
459 if (*s == '\\' && *(s + 1))
460 s++;
461 } else {
462 if (*s == delim)
463 break;
464 }
465 s++;
466 }
467
468 return unskip_lws (s, start);
469 }
470
471 static GSList *
parse_list(const char * header,char delim)472 parse_list (const char *header, char delim)
473 {
474 GSList *list = NULL;
475 const char *end;
476
477 header = skip_delims (header, delim);
478 while (*header) {
479 end = skip_item (header, delim);
480 list = g_slist_prepend (list, g_strndup (header, end - header));
481 header = skip_delims (end, delim);
482 }
483
484 return g_slist_reverse (list);
485 }
486
487 /**
488 * soup_header_parse_list:
489 * @header: a header value
490 *
491 * Parses a header whose content is described by RFC2616 as
492 * "#something", where "something" does not itself contain commas,
493 * except as part of quoted-strings.
494 *
495 * Returns: (transfer full) (element-type utf8): a #GSList of
496 * list elements, as allocated strings
497 **/
498 GSList *
soup_header_parse_list(const char * header)499 soup_header_parse_list (const char *header)
500 {
501 g_return_val_if_fail (header != NULL, NULL);
502
503 return parse_list (header, ',');
504 }
505
506 typedef struct {
507 char *item;
508 double qval;
509 } QualityItem;
510
511 static int
sort_by_qval(const void * a,const void * b)512 sort_by_qval (const void *a, const void *b)
513 {
514 QualityItem *qia = (QualityItem *)a;
515 QualityItem *qib = (QualityItem *)b;
516
517 if (qia->qval == qib->qval)
518 return 0;
519 else if (qia->qval < qib->qval)
520 return 1;
521 else
522 return -1;
523 }
524
525 /**
526 * soup_header_parse_quality_list:
527 * @header: a header value
528 * @unacceptable: (out) (optional) (transfer full) (element-type utf8): on
529 * return, will contain a list of unacceptable values
530 *
531 * Parses a header whose content is a list of items with optional
532 * "qvalue"s (eg, Accept, Accept-Charset, Accept-Encoding,
533 * Accept-Language, TE).
534 *
535 * If @unacceptable is not %NULL, then on return, it will contain the
536 * items with qvalue 0. Either way, those items will be removed from
537 * the main list.
538 *
539 * Returns: (transfer full) (element-type utf8): a #GSList of
540 * acceptable values (as allocated strings), highest-qvalue first.
541 **/
542 GSList *
soup_header_parse_quality_list(const char * header,GSList ** unacceptable)543 soup_header_parse_quality_list (const char *header, GSList **unacceptable)
544 {
545 GSList *unsorted;
546 QualityItem *array;
547 GSList *sorted, *iter;
548 char *item, *semi;
549 const char *param, *equal, *value;
550 double qval;
551 int n;
552
553 g_return_val_if_fail (header != NULL, NULL);
554
555 if (unacceptable)
556 *unacceptable = NULL;
557
558 unsorted = soup_header_parse_list (header);
559 array = g_new0 (QualityItem, g_slist_length (unsorted));
560 for (iter = unsorted, n = 0; iter; iter = iter->next) {
561 item = iter->data;
562 qval = 1.0;
563 for (semi = strchr (item, ';'); semi; semi = strchr (semi + 1, ';')) {
564 param = skip_lws (semi + 1);
565 if (*param != 'q')
566 continue;
567 equal = skip_lws (param + 1);
568 if (!equal || *equal != '=')
569 continue;
570 value = skip_lws (equal + 1);
571 if (!value)
572 continue;
573
574 if (value[0] != '0' && value[0] != '1')
575 continue;
576 qval = (double)(value[0] - '0');
577 if (value[0] == '0' && value[1] == '.') {
578 if (g_ascii_isdigit (value[2])) {
579 qval += (double)(value[2] - '0') / 10;
580 if (g_ascii_isdigit (value[3])) {
581 qval += (double)(value[3] - '0') / 100;
582 if (g_ascii_isdigit (value[4]))
583 qval += (double)(value[4] - '0') / 1000;
584 }
585 }
586 }
587
588 *semi = '\0';
589 break;
590 }
591
592 if (qval == 0.0) {
593 if (unacceptable) {
594 *unacceptable = g_slist_prepend (*unacceptable,
595 item);
596 }
597 } else {
598 array[n].item = item;
599 array[n].qval = qval;
600 n++;
601 }
602 }
603 g_slist_free (unsorted);
604
605 qsort (array, n, sizeof (QualityItem), sort_by_qval);
606 sorted = NULL;
607 while (n--)
608 sorted = g_slist_prepend (sorted, array[n].item);
609 g_free (array);
610
611 return sorted;
612 }
613
614 /**
615 * soup_header_free_list: (skip)
616 * @list: a #GSList returned from soup_header_parse_list() or
617 * soup_header_parse_quality_list()
618 *
619 * Frees @list.
620 **/
621 void
soup_header_free_list(GSList * list)622 soup_header_free_list (GSList *list)
623 {
624 g_slist_free_full (list, g_free);
625 }
626
627 /**
628 * soup_header_contains:
629 * @header: An HTTP header suitable for parsing with
630 * soup_header_parse_list()
631 * @token: a token
632 *
633 * Parses @header to see if it contains the token @token (matched
634 * case-insensitively). Note that this can't be used with lists
635 * that have qvalues.
636 *
637 * Returns: whether or not @header contains @token
638 **/
639 gboolean
soup_header_contains(const char * header,const char * token)640 soup_header_contains (const char *header, const char *token)
641 {
642 const char *end;
643 guint len;
644
645 g_return_val_if_fail (header != NULL, FALSE);
646 g_return_val_if_fail (token != NULL, FALSE);
647
648 len = strlen (token);
649
650 header = skip_delims (header, ',');
651 while (*header) {
652 end = skip_item (header, ',');
653 if (end - header == len &&
654 !g_ascii_strncasecmp (header, token, len))
655 return TRUE;
656 header = skip_delims (end, ',');
657 }
658
659 return FALSE;
660 }
661
662 static void
decode_quoted_string(char * quoted_string)663 decode_quoted_string (char *quoted_string)
664 {
665 char *src, *dst;
666
667 src = quoted_string + 1;
668 dst = quoted_string;
669 while (*src && *src != '"') {
670 if (*src == '\\' && *(src + 1))
671 src++;
672 *dst++ = *src++;
673 }
674 *dst = '\0';
675 }
676
677 static gboolean
decode_rfc5987(char * encoded_string)678 decode_rfc5987 (char *encoded_string)
679 {
680 char *q, *decoded;
681 gboolean iso_8859_1 = FALSE;
682
683 q = strchr (encoded_string, '\'');
684 if (!q)
685 return FALSE;
686 if (g_ascii_strncasecmp (encoded_string, "UTF-8",
687 q - encoded_string) == 0)
688 ;
689 else if (g_ascii_strncasecmp (encoded_string, "iso-8859-1",
690 q - encoded_string) == 0)
691 iso_8859_1 = TRUE;
692 else
693 return FALSE;
694
695 q = strchr (q + 1, '\'');
696 if (!q)
697 return FALSE;
698
699 decoded = g_uri_unescape_string (q + 1, NULL);
700 if (iso_8859_1) {
701 char *utf8 = g_convert_with_fallback (decoded, -1, "UTF-8",
702 "iso-8859-1", "_",
703 NULL, NULL, NULL);
704 g_free (decoded);
705 if (!utf8)
706 return FALSE;
707 decoded = utf8;
708 }
709
710 /* If encoded_string was UTF-8, then each 3-character %-escape
711 * will be converted to a single byte, and so decoded is
712 * shorter than encoded_string. If encoded_string was
713 * iso-8859-1, then each 3-character %-escape will be
714 * converted into at most 2 bytes in UTF-8, and so it's still
715 * shorter.
716 */
717 strcpy (encoded_string, decoded);
718 g_free (decoded);
719 return TRUE;
720 }
721
722 static GHashTable *
parse_param_list(const char * header,char delim,gboolean strict)723 parse_param_list (const char *header, char delim, gboolean strict)
724 {
725 GHashTable *params;
726 GSList *list, *iter;
727 char *item, *eq, *name_end, *value;
728 gboolean override, duplicated;
729
730 params = g_hash_table_new_full (soup_str_case_hash,
731 soup_str_case_equal,
732 g_free, NULL);
733
734 list = parse_list (header, delim);
735 for (iter = list; iter; iter = iter->next) {
736 item = iter->data;
737 override = FALSE;
738
739 eq = strchr (item, '=');
740 if (eq) {
741 name_end = (char *)unskip_lws (eq, item);
742 if (name_end == item) {
743 /* That's no good... */
744 g_free (item);
745 continue;
746 }
747
748 *name_end = '\0';
749
750 value = (char *)skip_lws (eq + 1);
751
752 if (name_end[-1] == '*' && name_end > item + 1) {
753 name_end[-1] = '\0';
754 if (!decode_rfc5987 (value)) {
755 g_free (item);
756 continue;
757 }
758 override = TRUE;
759 } else if (*value == '"')
760 decode_quoted_string (value);
761 } else
762 value = NULL;
763
764 duplicated = g_hash_table_lookup_extended (params, item, NULL, NULL);
765
766 if (strict && duplicated) {
767 soup_header_free_param_list (params);
768 params = NULL;
769 g_slist_foreach (iter, (GFunc)g_free, NULL);
770 break;
771 } else if (override || !duplicated)
772 g_hash_table_replace (params, item, value);
773 else
774 g_free (item);
775 }
776
777 g_slist_free (list);
778 return params;
779 }
780
781 /**
782 * soup_header_parse_param_list:
783 * @header: a header value
784 *
785 * Parses a header which is a comma-delimited list of something like:
786 * <literal>token [ "=" ( token | quoted-string ) ]</literal>.
787 *
788 * Tokens that don't have an associated value will still be added to
789 * the resulting hash table, but with a %NULL value.
790 *
791 * This also handles RFC5987 encoding (which in HTTP is mostly used
792 * for giving UTF8-encoded filenames in the Content-Disposition
793 * header).
794 *
795 * Returns: (element-type utf8 utf8) (transfer full): a
796 * #GHashTable of list elements, which can be freed with
797 * soup_header_free_param_list().
798 **/
799 GHashTable *
soup_header_parse_param_list(const char * header)800 soup_header_parse_param_list (const char *header)
801 {
802 g_return_val_if_fail (header != NULL, NULL);
803
804 return parse_param_list (header, ',', FALSE);
805 }
806
807 /**
808 * soup_header_parse_semi_param_list:
809 * @header: a header value
810 *
811 * Parses a header which is a semicolon-delimited list of something
812 * like: <literal>token [ "=" ( token | quoted-string ) ]</literal>.
813 *
814 * Tokens that don't have an associated value will still be added to
815 * the resulting hash table, but with a %NULL value.
816 *
817 * This also handles RFC5987 encoding (which in HTTP is mostly used
818 * for giving UTF8-encoded filenames in the Content-Disposition
819 * header).
820 *
821 * Returns: (element-type utf8 utf8) (transfer full): a
822 * #GHashTable of list elements, which can be freed with
823 * soup_header_free_param_list().
824 *
825 **/
826 GHashTable *
soup_header_parse_semi_param_list(const char * header)827 soup_header_parse_semi_param_list (const char *header)
828 {
829 g_return_val_if_fail (header != NULL, NULL);
830
831 return parse_param_list (header, ';', FALSE);
832 }
833
834 /**
835 * soup_header_parse_param_list_strict:
836 * @header: a header value
837 *
838 * A strict version of soup_header_parse_param_list()
839 * that bails out if there are duplicate parameters.
840 * Note that this function will treat RFC5987-encoded
841 * parameters as duplicated if an ASCII version is also
842 * present. For header fields that might contain
843 * RFC5987-encoded parameters, use
844 * soup_header_parse_param_list() instead.
845 *
846 * Returns: (element-type utf8 utf8) (transfer full) (nullable):
847 * a #GHashTable of list elements, which can be freed with
848 * soup_header_free_param_list() or %NULL if there are duplicate
849 * elements.
850 *
851 **/
852 GHashTable *
soup_header_parse_param_list_strict(const char * header)853 soup_header_parse_param_list_strict (const char *header)
854 {
855 g_return_val_if_fail (header != NULL, NULL);
856
857 return parse_param_list (header, ',', TRUE);
858 }
859
860 /**
861 * soup_header_parse_semi_param_list_strict:
862 * @header: a header value
863 *
864 * A strict version of soup_header_parse_semi_param_list()
865 * that bails out if there are duplicate parameters.
866 * Note that this function will treat RFC5987-encoded
867 * parameters as duplicated if an ASCII version is also
868 * present. For header fields that might contain
869 * RFC5987-encoded parameters, use
870 * soup_header_parse_semi_param_list() instead.
871 *
872 * Returns: (element-type utf8 utf8) (transfer full) (nullable):
873 * a #GHashTable of list elements, which can be freed with
874 * soup_header_free_param_list() or %NULL if there are duplicate
875 * elements.
876 *
877 **/
878 GHashTable *
soup_header_parse_semi_param_list_strict(const char * header)879 soup_header_parse_semi_param_list_strict (const char *header)
880 {
881 g_return_val_if_fail (header != NULL, NULL);
882
883 return parse_param_list (header, ';', TRUE);
884 }
885
886 /**
887 * soup_header_free_param_list:
888 * @param_list: (element-type utf8 utf8): a #GHashTable returned from soup_header_parse_param_list()
889 * or soup_header_parse_semi_param_list()
890 *
891 * Frees @param_list.
892 **/
893 void
soup_header_free_param_list(GHashTable * param_list)894 soup_header_free_param_list (GHashTable *param_list)
895 {
896 g_return_if_fail (param_list != NULL);
897
898 g_hash_table_destroy (param_list);
899 }
900
901 static void
append_param_rfc5987(GString * string,const char * name,const char * value)902 append_param_rfc5987 (GString *string,
903 const char *name,
904 const char *value)
905 {
906 char *encoded;
907
908 g_string_append (string, name);
909 g_string_append (string, "*=UTF-8''");
910 encoded = g_uri_escape_string (value, "*'%()<>@,;:\\\"/[]?=", FALSE);
911 g_string_append (string, encoded);
912 g_free (encoded);
913 }
914
915 static void
append_param_quoted(GString * string,const char * name,const char * value)916 append_param_quoted (GString *string,
917 const char *name,
918 const char *value)
919 {
920 int len;
921
922 g_string_append (string, name);
923 g_string_append (string, "=\"");
924 while (*value) {
925 while (*value == '\\' || *value == '"') {
926 g_string_append_c (string, '\\');
927 g_string_append_c (string, *value++);
928 }
929 len = strcspn (value, "\\\"");
930 g_string_append_len (string, value, len);
931 value += len;
932 }
933 g_string_append_c (string, '"');
934 }
935
936 static void
append_param_internal(GString * string,const char * name,const char * value,gboolean allow_token)937 append_param_internal (GString *string,
938 const char *name,
939 const char *value,
940 gboolean allow_token)
941 {
942 const char *v;
943 gboolean use_token = allow_token;
944
945 for (v = value; *v; v++) {
946 if (*v & 0x80) {
947 if (g_utf8_validate (value, -1, NULL)) {
948 append_param_rfc5987 (string, name, value);
949 return;
950 } else {
951 use_token = FALSE;
952 break;
953 }
954 } else if (!soup_char_is_token (*v))
955 use_token = FALSE;
956 }
957
958 if (use_token) {
959 g_string_append (string, name);
960 g_string_append_c (string, '=');
961 g_string_append (string, value);
962 } else
963 append_param_quoted (string, name, value);
964 }
965
966 /**
967 * soup_header_g_string_append_param_quoted:
968 * @string: a #GString being used to construct an HTTP header value
969 * @name: a parameter name
970 * @value: a parameter value
971 *
972 * Appends something like <literal>@name="@value"</literal> to
973 * @string, taking care to escape any quotes or backslashes in @value.
974 *
975 * If @value is (non-ASCII) UTF-8, this will instead use RFC 5987
976 * encoding, just like soup_header_g_string_append_param().
977 *
978 **/
979 void
soup_header_g_string_append_param_quoted(GString * string,const char * name,const char * value)980 soup_header_g_string_append_param_quoted (GString *string,
981 const char *name,
982 const char *value)
983 {
984 g_return_if_fail (string != NULL);
985 g_return_if_fail (name != NULL);
986 g_return_if_fail (value != NULL);
987
988 append_param_internal (string, name, value, FALSE);
989 }
990
991 /**
992 * soup_header_g_string_append_param:
993 * @string: a #GString being used to construct an HTTP header value
994 * @name: a parameter name
995 * @value: a parameter value, or %NULL
996 *
997 * Appends something like <literal>@name=@value</literal> to @string,
998 * taking care to quote @value if needed, and if so, to escape any
999 * quotes or backslashes in @value.
1000 *
1001 * Alternatively, if @value is a non-ASCII UTF-8 string, it will be
1002 * appended using RFC5987 syntax. Although in theory this is supposed
1003 * to work anywhere in HTTP that uses this style of parameter, in
1004 * reality, it can only be used portably with the Content-Disposition
1005 * "filename" parameter.
1006 *
1007 * If @value is %NULL, this will just append @name to @string.
1008 *
1009 **/
1010 void
soup_header_g_string_append_param(GString * string,const char * name,const char * value)1011 soup_header_g_string_append_param (GString *string,
1012 const char *name,
1013 const char *value)
1014 {
1015 g_return_if_fail (string != NULL);
1016 g_return_if_fail (name != NULL);
1017
1018 if (!value) {
1019 g_string_append (string, name);
1020 return;
1021 }
1022
1023 append_param_internal (string, name, value, TRUE);
1024 }
1025