1 /*
2  * Copyright (c) 2017-2021 Free Software Foundation, Inc.
3  *
4  * This file is part of libwget.
5  *
6  * Libwget is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Libwget is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
18  *
19  *
20  * HTTP parsing routines
21  *
22  * Resources:
23  * RFC 2616
24  * RFC 6265
25  *
26  */
27 
28 #include <config.h>
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <c-ctype.h>
34 #include <time.h>
35 #include <errno.h>
36 #include <stdint.h>
37 
38 #include <wget.h>
39 #include "private.h"
40 #include "http.h"
41 
42 #define HTTP_CTYPE_SEPARATOR (1<<0)
43 #define _http_isseparator(c) (http_ctype[(unsigned char)(c)]&HTTP_CTYPE_SEPARATOR)
44 
45 static const unsigned char
46 	http_ctype[256] = {
47 		['('] = HTTP_CTYPE_SEPARATOR,
48 		[')'] = HTTP_CTYPE_SEPARATOR,
49 		['<'] = HTTP_CTYPE_SEPARATOR,
50 		['>'] = HTTP_CTYPE_SEPARATOR,
51 		['@'] = HTTP_CTYPE_SEPARATOR,
52 		[','] = HTTP_CTYPE_SEPARATOR,
53 		[';'] = HTTP_CTYPE_SEPARATOR,
54 		[':'] = HTTP_CTYPE_SEPARATOR,
55 		['\\'] = HTTP_CTYPE_SEPARATOR,
56 		['\"'] = HTTP_CTYPE_SEPARATOR,
57 		['/'] = HTTP_CTYPE_SEPARATOR,
58 		['['] = HTTP_CTYPE_SEPARATOR,
59 		[']'] = HTTP_CTYPE_SEPARATOR,
60 		['?'] = HTTP_CTYPE_SEPARATOR,
61 		['='] = HTTP_CTYPE_SEPARATOR,
62 		['{'] = HTTP_CTYPE_SEPARATOR,
63 		['}'] = HTTP_CTYPE_SEPARATOR,
64 		[' '] = HTTP_CTYPE_SEPARATOR,
65 		['\t'] = HTTP_CTYPE_SEPARATOR
66 	};
67 
68 /**Gets the hostname of the remote endpoint.
69  * \param conn a wget_http_connection
70  * \return A string containing hostname. Returned memory is owned by
71  *         _conn_ and should not be modified or freed.
72  */
wget_http_get_host(const wget_http_connection * conn)73 const char *wget_http_get_host(const wget_http_connection *conn)
74 {
75 	return conn->esc_host;
76 }
77 
78 /**Gets the port number of the remote endpoint.
79  * \param conn a wget_http_connection
80  * \return A string containing port number. Returned memory is owned by
81  *         _conn_ and should not be modified or freed.
82  */
wget_http_get_port(const wget_http_connection * conn)83 uint16_t wget_http_get_port(const wget_http_connection *conn)
84 {
85 	return conn->port;
86 }
87 
88 /**Get the scheme used by the connection.
89  * \param conn a wget_http_connection
90  * \return A WGET_IRI_SCHEM_* value.
91  */
wget_http_get_scheme(const wget_http_connection * conn)92 wget_iri_scheme wget_http_get_scheme(const wget_http_connection *conn)
93 {
94 	return conn->scheme;
95 }
96 
97 /**Gets the protocol used by the connection
98  * \param conn a wget_http_connection
99  * \return Either WGET_PROTOCOL_HTTP_1_1 or WGET_PROTOCOL_HTTP_2_0
100  */
wget_http_get_protocol(const wget_http_connection * conn)101 int wget_http_get_protocol(const wget_http_connection *conn)
102 {
103 	return conn->protocol;
104 }
105 
wget_http_isseparator(char c)106 bool wget_http_isseparator(char c)
107 {
108 	// return strchr("()<>@,;:\\\"/[]?={} \t", c) != NULL;
109 	return _http_isseparator(c);
110 }
111 
112 // TEXT           = <any OCTET except CTLs, but including LWS>
113 //int http_istext(char c)
114 //{
115 //	return (c>=32 && c<=126) || c=='\r' || c=='\n' || c=='\t';
116 //}
117 
118 // token          = 1*<any CHAR except CTLs or separators>
119 
wget_http_istoken(char c)120 bool wget_http_istoken(char c)
121 {
122 	return c > 32 && c <= 126 && !_http_isseparator(c);
123 }
124 
wget_http_parse_token(const char * s,const char ** token)125 const char *wget_http_parse_token(const char *s, const char **token)
126 {
127 	const char *p;
128 
129 	for (p = s; wget_http_istoken(*s); s++);
130 
131 	*token = wget_strmemdup(p, s - p);
132 
133 	return s;
134 }
135 
136 // quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
137 // qdtext         = <any TEXT except <">>
138 // quoted-pair    = "\" CHAR
139 // TEXT           = <any OCTET except CTLs, but including LWS>
140 // CTL            = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
141 // LWS            = [CRLF] 1*( SP | HT )
142 
wget_http_parse_quoted_string(const char * s,const char ** qstring)143 const char *wget_http_parse_quoted_string(const char *s, const char **qstring)
144 {
145 	if (*s == '\"') {
146 		const char *p = ++s;
147 
148 		// relaxed scanning
149 		while (*s) {
150 			if (*s == '\"') break;
151 			else if (*s == '\\' && s[1]) {
152 				s += 2;
153 			} else
154 				s++;
155 		}
156 
157 		*qstring = wget_strmemdup(p, s - p);
158 		if (*s == '\"') s++;
159 	} else
160 		*qstring = NULL;
161 
162 	return s;
163 }
164 
165 // generic-param  =  token [ EQUAL gen-value ]
166 // gen-value      =  token / host / quoted-string
167 
wget_http_parse_param(const char * s,const char ** param,const char ** value)168 const char *wget_http_parse_param(const char *s, const char **param, const char **value)
169 {
170 	const char *p;
171 
172 	*param = *value = NULL;
173 
174 	while (c_isblank(*s)) s++;
175 
176 	if (*s == ';') {
177 		s++;
178 		while (c_isblank(*s)) s++;
179 	}
180 	if (!*s) return s;
181 
182 	for (p = s; wget_http_istoken(*s); s++);
183 	*param = wget_strmemdup(p, s - p);
184 
185 	while (c_isblank(*s)) s++;
186 
187 	if (*s && *s++ == '=') {
188 		while (c_isblank(*s)) s++;
189 		if (*s == '\"') {
190 			s = wget_http_parse_quoted_string(s, value);
191 		} else {
192 			s = wget_http_parse_token(s, value);
193 		}
194 	}
195 
196 	return s;
197 }
198 
199 // message-header = field-name ":" [ field-value ]
200 // field-name     = token
201 // field-value    = *( field-content | LWS )
202 // field-content  = <the OCTETs making up the field-value
203 //                  and consisting of either *TEXT or combinations
204 //                  of token, separators, and quoted-string>
205 
wget_http_parse_name(const char * s,const char ** name)206 const char *wget_http_parse_name(const char *s, const char **name)
207 {
208 	while (c_isblank(*s)) s++;
209 
210 	s = wget_http_parse_token(s, name);
211 
212 	while (*s && *s != ':') s++;
213 
214 	return *s == ':' ? s + 1 : s;
215 }
216 
wget_parse_name_fixed(const char * s,const char ** name,size_t * namelen)217 const char *wget_parse_name_fixed(const char *s, const char **name, size_t *namelen)
218 {
219 	while (c_isblank(*s)) s++;
220 
221 	*name = s;
222 
223 	while (wget_http_istoken(*s))
224 		s++;
225 
226 	*namelen = s - *name;
227 
228 	while (*s && *s != ':') s++;
229 
230 	return *s == ':' ? s + 1 : s;
231 }
232 
compare_param(wget_http_header_param * p1,wget_http_header_param * p2)233 static int WGET_GCC_NONNULL_ALL compare_param(wget_http_header_param *p1, wget_http_header_param *p2)
234 {
235 	return wget_strcasecmp_ascii(p1->name, p2->name);
236 }
237 
wget_http_add_param(wget_vector ** params,wget_http_header_param * param)238 void wget_http_add_param(wget_vector **params, wget_http_header_param *param)
239 {
240 	if (!*params) *params = wget_vector_create(4, (wget_vector_compare_fn *) compare_param);
241 	wget_vector_add_memdup(*params, param, sizeof(*param));
242 }
243 
244 /*
245   Link           = "Link" ":" #link-value
246   link-value     = "<" URI-Reference ">" *( ";" link-param )
247   link-param     = ( ( "rel" "=" relation-types )
248 					  | ( "anchor" "=" <"> URI-Reference <"> )
249 					  | ( "rev" "=" relation-types )
250 					  | ( "hreflang" "=" Language-Tag )
251 					  | ( "media" "=" ( MediaDesc | ( <"> MediaDesc <"> ) ) )
252 					  | ( "title" "=" quoted-string )
253 					  | ( "title*" "=" ext-value )
254 					  | ( "type" "=" ( media-type | quoted-mt ) )
255 					  | ( link-extension ) )
256   link-extension = ( parmname [ "=" ( ptoken | quoted-string ) ] )
257 					  | ( ext-name-star "=" ext-value )
258   ext-name-star  = parmname "*" ; reserved for RFC2231-profiled
259 										  ; extensions.  Whitespace NOT
260 										  ; allowed in between.
261   ptoken         = 1*ptokenchar
262   ptokenchar     = "!" | "#" | "$" | "%" | "&" | "'" | "("
263 					  | ")" | "*" | "+" | "-" | "." | "/" | DIGIT
264 					  | ":" | "<" | "=" | ">" | "?" | "@" | ALPHA
265 					  | "[" | "]" | "^" | "_" | "`" | "{" | "|"
266 					  | "}" | "~"
267   media-type     = type-name "/" subtype-name
268   quoted-mt      = <"> media-type <">
269   relation-types = relation-type
270 					  | <"> relation-type *( 1*SP relation-type ) <">
271   relation-type  = reg-rel-type | ext-rel-type
272   reg-rel-type   = LOALPHA *( LOALPHA | DIGIT | "." | "-" )
273   ext-rel-type   = URI
274 */
wget_http_parse_link(const char * s,wget_http_link * link)275 const char *wget_http_parse_link(const char *s, wget_http_link *link)
276 {
277 	memset(link, 0, sizeof(*link));
278 
279 	while (c_isblank(*s)) s++;
280 
281 	if (*s == '<') {
282 		// URI reference as of RFC 3987 (if relative, resolve as of RFC 3986)
283 		const char *p = s + 1;
284 		if ((s = strchr(p, '>')) != NULL) {
285 			const char *name = NULL, *value = NULL;
286 
287 			link->uri = wget_strmemdup(p, s - p);
288 			s++;
289 
290 			while (c_isblank(*s)) s++;
291 
292 			while (*s == ';') {
293 				s = wget_http_parse_param(s, &name, &value);
294 				if (name && value) {
295 					if (!wget_strcasecmp_ascii(name, "rel")) {
296 						if (!wget_strcasecmp_ascii(value, "describedby"))
297 							link->rel = link_rel_describedby;
298 						else if (!wget_strcasecmp_ascii(value, "duplicate"))
299 							link->rel = link_rel_duplicate;
300 					} else if (!wget_strcasecmp_ascii(name, "pri")) {
301 						link->pri = atoi(value);
302 					} else if (!wget_strcasecmp_ascii(name, "type")) {
303 						if (!link->type) {
304 							link->type = value;
305 							value = NULL;
306 						}
307 					}
308 					//				http_add_param(&link->params,&param);
309 					while (c_isblank(*s)) s++;
310 				}
311 
312 				xfree(name);
313 				xfree(value);
314 			}
315 
316 			//			if (!msg->contacts) msg->contacts=vec_create(1,1,NULL);
317 			//			vec_add(msg->contacts,&contact,sizeof(contact));
318 
319 			while (*s && !c_isblank(*s)) s++;
320 		}
321 	}
322 
323 	return s;
324 }
325 
326 // from RFC 3230:
327 // Digest = "Digest" ":" #(instance-digest)
328 // instance-digest = digest-algorithm "=" <encoded digest output>
329 // digest-algorithm = token
330 
wget_http_parse_digest(const char * s,wget_http_digest * digest)331 const char *wget_http_parse_digest(const char *s, wget_http_digest *digest)
332 {
333 	memset(digest, 0, sizeof(*digest));
334 
335 	while (c_isblank(*s)) s++;
336 	s = wget_http_parse_token(s, &digest->algorithm);
337 
338 	while (c_isblank(*s)) s++;
339 
340 	if (*s == '=') {
341 		s++;
342 		while (c_isblank(*s)) s++;
343 		if (*s == '\"') {
344 			s = wget_http_parse_quoted_string(s, &digest->encoded_digest);
345 		} else {
346 			const char *p;
347 
348 			for (p = s; *s && !c_isblank(*s) && *s != ',' && *s != ';'; s++);
349 			digest->encoded_digest = wget_strmemdup(p, s - p);
350 		}
351 	}
352 
353 	while (*s && !c_isblank(*s)) s++;
354 
355 	return s;
356 }
357 
358 // RFC 2617:
359 // challenge   = auth-scheme 1*SP 1#auth-param
360 // auth-scheme = token
361 // auth-param  = token "=" ( token | quoted-string )
362 
wget_http_parse_challenge(const char * s,wget_http_challenge * challenge)363 const char *wget_http_parse_challenge(const char *s, wget_http_challenge *challenge)
364 {
365 	memset(challenge, 0, sizeof(*challenge));
366 
367 	while (c_isblank(*s)) s++;
368 	s = wget_http_parse_token(s, &challenge->auth_scheme);
369 
370 	if (*s == ' ')
371 		s++; // Auth scheme must have a space at the end of the token
372 	else {
373 		// parse/syntax error
374 		xfree(challenge->auth_scheme);
375 		return s;
376 	}
377 
378 	wget_http_header_param param;
379 	do {
380 		const char *old = s;
381 		s = wget_http_parse_param(s, &param.name, &param.value);
382 		if (param.name) {
383 			if (*param.name && !param.value) {
384 				xfree(param.name);
385 				return old; // a new scheme detected
386 			}
387 
388 			if (!param.value) {
389 				xfree(param.name);
390 				continue;
391 			}
392 
393 			if (!challenge->params)
394 				challenge->params = wget_stringmap_create_nocase(8);
395 			wget_stringmap_put(challenge->params, param.name, param.value);
396 		}
397 
398 		while (c_isblank(*s)) s++;
399 
400 		if (*s != ',') break;
401 		else if (*s) s++;
402 	} while (*s);
403 
404 	return s;
405 }
406 
wget_http_parse_challenges(const char * s,wget_vector * challenges)407 const char *wget_http_parse_challenges(const char *s, wget_vector *challenges)
408 {
409 	wget_http_challenge challenge;
410 
411 	while (*s) {
412 		s = wget_http_parse_challenge(s, &challenge);
413 		if (challenge.auth_scheme) {
414 			wget_vector_add_memdup(challenges, &challenge, sizeof(challenge));
415 		}
416 	}
417 
418 	return s;
419 }
420 
wget_http_parse_location(const char * s,const char ** location)421 const char *wget_http_parse_location(const char *s, const char **location)
422 {
423 	const char *p;
424 
425 	while (c_isblank(*s)) s++;
426 
427 	/*
428 	 * The correct (and still lenient) variant was:
429 	 * for (p = s; *s && !c_isblank(*s); s++);
430 	 *
431 	 * And then there were spaces in the URI, see
432 	 *   https://gitlab.com/gnuwget/wget2/issues/420
433 	 */
434 
435 	for (p = s; *s && *s != '\r' && *s != '\n'; s++);
436 	while (s > p && c_isblank(*(s - 1))) s--; // remove trailing spaces (OWS - optional white space)
437 
438 	*location = wget_strmemdup(p, s - p);
439 
440 	return s;
441 }
442 
443 // Transfer-Encoding       = "Transfer-Encoding" ":" 1#transfer-coding
444 // transfer-coding         = "chunked" | transfer-extension
445 // transfer-extension      = token *( ";" parameter )
446 // parameter               = attribute "=" value
447 // attribute               = token
448 // value                   = token | quoted-string
449 
wget_http_parse_transfer_encoding(const char * s,wget_transfer_encoding * transfer_encoding)450 const char *wget_http_parse_transfer_encoding(const char *s, wget_transfer_encoding *transfer_encoding)
451 {
452 	while (c_isblank(*s)) s++;
453 
454 	if (!wget_strcasecmp_ascii(s, "identity"))
455 		*transfer_encoding = wget_transfer_encoding_identity;
456 	else
457 		*transfer_encoding = wget_transfer_encoding_chunked;
458 
459 	while (wget_http_istoken(*s)) s++;
460 
461 	return s;
462 }
463 
464 // Content-Type   = "Content-Type" ":" media-type
465 // media-type     = type "/" subtype *( ";" parameter )
466 // type           = token
467 // subtype        = token
468 // example: Content-Type: text/html; charset=ISO-8859-4
469 
wget_http_parse_content_type(const char * s,const char ** content_type,const char ** charset)470 const char *wget_http_parse_content_type(const char *s, const char **content_type, const char **charset)
471 {
472 	wget_http_header_param param;
473 	const char *p;
474 
475 	while (c_isblank(*s)) s++;
476 
477 	for (p = s; *s && (wget_http_istoken(*s) || *s == '/'); s++);
478 	if (content_type)
479 		*content_type = wget_strmemdup(p, s - p);
480 
481 	if (charset) {
482 		*charset = NULL;
483 
484 		while (*s) {
485 			s=wget_http_parse_param(s, &param.name, &param.value);
486 			if (!wget_strcasecmp_ascii("charset", param.name)) {
487 				xfree(param.name);
488 				*charset = param.value;
489 				break;
490 			}
491 			xfree(param.name);
492 			xfree(param.value);
493 		}
494 	}
495 
496 	return s;
497 }
498 
499 // RFC 6266 - Use of the Content-Disposition Header Field in the Hypertext Transfer Protocol (HTTP)
500 // content-disposition = "Content-Disposition" ":" disposition-type *( ";" disposition-parm )
501 // disposition-type    = "inline" | "attachment" | disp-ext-type ; case-insensitive
502 // disp-ext-type       = token
503 // disposition-parm    = filename-parm | disp-ext-parm
504 // filename-parm       = "filename" "=" value | "filename*" "=" ext-value
505 // disp-ext-parm       = token "=" value | ext-token "=" ext-value
506 // ext-token           = <the characters in token, followed by "*">
507 //
508 // Defined in [RFC2616]:
509 //
510 // token         = <token, defined in [RFC2616], Section 2.2>
511 // quoted-string = <quoted-string, defined in [RFC2616], Section 2.2>
512 // value         = <value, defined in [RFC2616], Section 3.6> ; token | quoted-string
513 //
514 // Defined in [RFC5987]:
515 //
516 // ext-value   = <ext-value, defined in [RFC5987], Section 3.2>
517 
wget_http_parse_content_disposition(const char * s,const char ** filename)518 const char *wget_http_parse_content_disposition(const char *s, const char **filename)
519 {
520 	wget_http_header_param param;
521 	char *p;
522 
523 	if (filename) {
524 		*filename = NULL;
525 
526 		while (*s && !*filename) {
527 			s = wget_http_parse_param(s, &param.name, &param.value);
528 			if (param.value && !wget_strcasecmp_ascii("filename", param.name)) {
529 				// just take the last path part as filename
530 				if (!*filename) {
531 					if ((p = strpbrk(param.value,"/\\"))) {
532 						p = wget_strdup(p + 1);
533 					} else {
534 						p = (char *) param.value;
535 						param.value = NULL;
536 					}
537 
538 					wget_percent_unescape(p);
539 					if (!wget_str_is_valid_utf8(p)) {
540 						// if it is not UTF-8, assume ISO-8859-1
541 						// see https://stackoverflow.com/questions/93551/how-to-encode-the-filename-parameter-of-content-disposition-header-in-http
542 						*filename = wget_str_to_utf8(p, "iso-8859-1");
543 						xfree(p);
544 					} else {
545 						*filename = p;
546 						p = NULL;
547 					}
548 				}
549 			} else if (param.value && !wget_strcasecmp_ascii("filename*", param.name)) {
550 				// RFC5987
551 				// ext-value     = charset  "'" [ language ] "'" value-chars
552 				// ; like RFC 2231's <extended-initial-value>
553 				// ; (see [RFC2231], Section 7)
554 
555 				// charset       = "UTF-8" / "ISO-8859-1" / mime-charset
556 
557 				// mime-charset  = 1*mime-charsetc
558 				// mime-charsetc = ALPHA / DIGIT
559 				//		/ "!" / "#" / "$" / "%" / "&"
560 				//		/ "+" / "-" / "^" / "_" / "`"
561 				//		/ "{" / "}" / "~"
562 				//		; as <mime-charset> in Section 2.3 of [RFC2978]
563 				//		; except that the single quote is not included
564 				//		; SHOULD be registered in the IANA charset registry
565 
566 				// language      = <Language-Tag, defined in [RFC5646], Section 2.1>
567 
568 				// value-chars   = *( pct-encoded / attr-char )
569 
570 				// pct-encoded   = "%" HEXDIG HEXDIG
571 				//		; see [RFC3986], Section 2.1
572 
573 				// attr-char     = ALPHA / DIGIT
574 				//		/ "!" / "#" / "$" / "&" / "+" / "-" / "."
575 				//		/ "^" / "_" / "`" / "|" / "~"
576 				//		; token except ( "*" / "'" / "%" )
577 
578 				if ((p = strchr(param.value, '\''))) {
579 					const char *charset = param.value;
580 					const char *language = p + 1;
581 					*p = 0;
582 					if ((p = strchr(language, '\''))) {
583 						*p++ = 0;
584 						if (*p) {
585 							wget_percent_unescape(p);
586 							if (wget_str_needs_encoding(p))
587 								*filename = wget_str_to_utf8(p, charset);
588 							else
589 								*filename = wget_strdup(p);
590 
591 							// just take the last path part as filename
592 							if (*filename && (p = strpbrk(*filename, "/\\"))) {
593 								p = wget_strdup(p + 1);
594 								xfree(*filename);
595 								*filename = p;
596 							}
597 
598 							xfree(param.name);
599 							xfree(param.value);
600 							break; // stop looping, we found the final filename
601 						}
602 					}
603 				}
604 			}
605 			xfree(param.name);
606 			xfree(param.value);
607 		}
608 	}
609 
610 	return s;
611 }
612 
613 // RFC 7469
614 // Example:
615 //   Public-Key-Pins:
616 //        pin-sha256="d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM=";
617 //	       pin-sha256="E9CZ9INDbd+2eRQozYqqbQ2yXLVKB9+xcprMF+44U1g=";
618 //	       pin-sha256="LPJNul+wow4m6DsqxbninhsWHlwfp0JecwQzYpOLmCQ=";
619 //	       max-age=10000; includeSubDomains
wget_http_parse_public_key_pins(const char * s,wget_hpkp * hpkp)620 const char *wget_http_parse_public_key_pins(const char *s, wget_hpkp *hpkp)
621 {
622 	wget_http_header_param param;
623 
624 	wget_hpkp_set_include_subdomains(hpkp, false);
625 
626 	while (*s) {
627 		s = wget_http_parse_param(s, &param.name, &param.value);
628 
629 		if (param.value) {
630 			if (!wget_strcasecmp_ascii(param.name, "max-age")) {
631 				wget_hpkp_set_maxage(hpkp, (int64_t) atoll(param.value));
632 			} else if (!wget_strncasecmp_ascii(param.name, "pin-", 4)) {
633 				wget_hpkp_pin_add(hpkp, param.name + 4, param.value);
634 			}
635 		} else {
636 			if (!wget_strcasecmp_ascii(param.name, "includeSubDomains"))
637 				wget_hpkp_set_include_subdomains(hpkp, true);
638 		}
639 
640 		xfree(param.name);
641 		xfree(param.value);
642 	}
643 
644 	return s;
645 }
646 
647 // RFC 6797
648 //
649 // Strict-Transport-Security = "Strict-Transport-Security" ":" [ directive ]  *( ";" [ directive ] )
650 // directive                 = directive-name [ "=" directive-value ]
651 // directive-name            = token
652 // directive-value           = token | quoted-string
653 
wget_http_parse_strict_transport_security(const char * s,int64_t * maxage,bool * include_subdomains)654 const char *wget_http_parse_strict_transport_security(const char *s, int64_t *maxage, bool *include_subdomains)
655 {
656 	wget_http_header_param param;
657 
658 	*maxage = 0;
659 	*include_subdomains = 0;
660 
661 	while (*s) {
662 		s = wget_http_parse_param(s, &param.name, &param.value);
663 
664 		if (param.value) {
665 			if (!wget_strcasecmp_ascii(param.name, "max-age")) {
666 				*maxage = (int64_t) atoll(param.value);
667 			}
668 		} else {
669 			if (!wget_strcasecmp_ascii(param.name, "includeSubDomains")) {
670 				*include_subdomains = 1;
671 			}
672 		}
673 
674 		xfree(param.name);
675 		xfree(param.value);
676 	}
677 
678 	return s;
679 }
680 
681 // Content-Encoding  = "Content-Encoding" ":" 1#content-coding
682 
wget_http_parse_content_encoding(const char * s,char * content_encoding)683 const char *wget_http_parse_content_encoding(const char *s, char *content_encoding)
684 {
685 	while (c_isblank(*s)) s++;
686 
687 	if (!wget_strcasecmp_ascii(s, "gzip") || !wget_strcasecmp_ascii(s, "x-gzip"))
688 		*content_encoding = wget_content_encoding_gzip;
689 	else if (!wget_strcasecmp_ascii(s, "deflate"))
690 		*content_encoding = wget_content_encoding_deflate;
691 	else if (!wget_strcasecmp_ascii(s, "bzip2"))
692 		*content_encoding = wget_content_encoding_bzip2;
693 	else if (!wget_strcasecmp_ascii(s, "xz") || !wget_strcasecmp_ascii(s, "lzma") || !wget_strcasecmp_ascii(s, "x-lzma"))
694 		// 'xz' is the tag currently understood by Firefox (2.1.2014)
695 		// 'lzma' / 'x-lzma' are the tags currently understood by ELinks
696 		*content_encoding = wget_content_encoding_lzma;
697 	else if (!wget_strcasecmp_ascii(s, "br"))
698 		*content_encoding = wget_content_encoding_brotli;
699 	else if (!wget_strcasecmp_ascii(s, "zstd"))
700 		*content_encoding = wget_content_encoding_zstd;
701 	else if (!wget_strcasecmp_ascii(s, "lzip"))
702 		*content_encoding = wget_content_encoding_lzip;
703 	else
704 		*content_encoding = wget_content_encoding_identity;
705 
706 	while (wget_http_istoken(*s)) s++;
707 
708 	return s;
709 }
710 
wget_http_parse_connection(const char * s,bool * keep_alive)711 const char *wget_http_parse_connection(const char *s, bool *keep_alive)
712 {
713 	const char *e;
714 
715 	*keep_alive = false;
716 
717 	for (e = s; *e; s = e + 1) {
718 		if ((e = strchrnul(s, ',')) != s) {
719 			while (c_isblank(*s)) s++;
720 
721 			if (!wget_strncasecmp_ascii(s, "keep-alive", 10))
722 				*keep_alive = true;
723 		}
724 	}
725 
726 	return s;
727 }
728 
wget_http_parse_etag(const char * s,const char ** etag)729 const char *wget_http_parse_etag(const char *s, const char **etag)
730 {
731 	const char *p;
732 
733 	while (c_isblank(*s)) s++;
734 
735 	for (p = s; *s && !c_isblank(*s); s++);
736 	*etag = wget_strmemdup(p, s - p);
737 
738 	return s;
739 }
740 
741 /*
742 // returns GMT/UTC time as an integer of format YYYYMMDDHHMMSS
743 // this makes us independent from size of time_t - work around possible year 2038 problems
744 static long long NONNULL_ALL parse_rfc1123_date(const char *s)
745 {
746 	// we simply can't use strptime() since it requires us to setlocale()
747 	// which is not thread-safe !!!
748 	static const char *mnames[12] = {
749 		"Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
750 	};
751 	static int days_per_month[12] = {
752 		31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
753 	};
754 	int day, mon = 0, year, hour, min, sec, leap, it;
755 	char mname[4] = "";
756 
757 	if (sscanf(s, " %*[a-zA-Z], %02d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
758 		// RFC 822 / 1123: Wed, 09 Jun 2021 10:18:14 GMT
759 	}
760 	else if (sscanf(s, " %*[a-zA-Z], %2d-%3s-%4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
761 		// RFC 850 / 1036 or Netscape: Wednesday, 09-Jun-21 10:18:14 or Wed, 09-Jun-2021 10:18:14
762 	}
763 	else if (sscanf(s, " %*[a-zA-Z], %3s %2d %2d:%2d:%2d %4d", mname, &day, &hour, &min, &sec, &year) >= 6) {
764 		// ANSI C's asctime(): Wed Jun 09 10:18:14 2021
765 	} else {
766 		error_printf(_("Failed to parse date '%s'\n"), s);
767 		return 0; // return as session cookie
768 	}
769 
770 	if (*mname) {
771 		for (it = 0; it < countof(mnames); it++) {
772 			if (!wget_strcasecmp_ascii(mname, mnames[it])) {
773 				mon = it + 1;
774 				break;
775 			}
776 		}
777 	}
778 
779 	if (year < 70 && year >= 0) year += 2000;
780 	else if (year >= 70 && year <= 99) year += 1900;
781 
782 	if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
783 		leap = 1;
784 	else
785 		leap = 0;
786 
787 	// we don't handle leap seconds
788 
789 	if (year < 1601 || mon < 1 || mon > 12 || day < 1 || (day > days_per_month[mon - 1] + leap) ||
790 		hour < 0 || hour > 23 || min < 0 || min > 60 || sec < 0 || sec > 60)
791 	{
792 		error_printf(_("Failed to parse date '%s'\n"), s);
793 		return 0; // return as session cookie
794 	}
795 
796 	return(((((long long)year*100 + mon)*100 + day)*100 + hour)*100 + min)*100 + sec;
797 }
798 */
799 
800 // copied this routine from
801 // https://ftp.netbsd.org/pub/pkgsrc/current/pkgsrc/pkgtools/libnbcompat/files/timegm.c
802 
leap_days(int y1,int y2)803 static int leap_days(int y1, int y2)
804 {
805 	y1--;
806 	y2--;
807 	return (y2/4 - y1/4) - (y2/100 - y1/100) + (y2/400 - y1/400);
808 }
809 
810 /*
811 RFC 2616, 3.3.1 Full Date
812 HTTP-date    = rfc1123-date | rfc850-date | asctime-date
813 rfc1123-date = wkday "," SP date1 SP time SP "GMT"
814 rfc850-date  = weekday "," SP date2 SP time SP "GMT"
815 asctime-date = wkday SP date3 SP time SP 4DIGIT
816 date1        = 2DIGIT SP month SP 4DIGIT
817 					; day month year (e.g., 02 Jun 1982)
818 date2        = 2DIGIT "-" month "-" 2DIGIT
819 					; day-month-year (e.g., 02-Jun-82)
820 date3        = month SP ( 2DIGIT | ( SP 1DIGIT ))
821 					; month day (e.g., Jun  2)
822 time         = 2DIGIT ":" 2DIGIT ":" 2DIGIT
823 					; 00:00:00 - 23:59:59
824 wkday        = "Mon" | "Tue" | "Wed"
825 				 | "Thu" | "Fri" | "Sat" | "Sun"
826 weekday      = "Monday" | "Tuesday" | "Wednesday"
827 				 | "Thursday" | "Friday" | "Saturday" | "Sunday"
828 month        = "Jan" | "Feb" | "Mar" | "Apr"
829 				 | "May" | "Jun" | "Jul" | "Aug"
830 				 | "Sep" | "Oct" | "Nov" | "Dec"
831 */
832 
wget_http_parse_full_date(const char * s)833 int64_t wget_http_parse_full_date(const char *s)
834 {
835 	// we simply can't use strptime() since it requires us to setlocale()
836 	// which is not thread-safe !!!
837 	static const char *mnames[12] = {
838 		"Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
839 	};
840 	static int days_per_month[12] = {
841 		31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
842 	};
843 	// cumulated number of days until beginning of month for non-leap years
844 	static const int sum_of_days[12] = {
845 		0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
846 	};
847 
848 	int day, mon = 0, year, hour, min, sec, leap_month, leap_year, days;
849 	char mname[4] = "";
850 
851 	if (sscanf(s, " %*[a-zA-Z], %02d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
852 		// RFC 822 / 1123: Wed, 09 Jun 2021 10:18:14 GMT
853 	}
854 	else if (sscanf(s, " %*[a-zA-Z], %2d-%3s-%4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
855 		// RFC 850 / 1036 or Netscape: Wednesday, 09-Jun-21 10:18:14 or Wed, 09-Jun-2021 10:18:14
856 	}
857 	else if (sscanf(s, " %*[a-zA-Z] %3s %2d %2d:%2d:%2d %4d", mname, &day, &hour, &min, &sec, &year) >= 6) {
858 		// ANSI C's asctime(): Wed Jun 09 10:18:14 2021
859 	}
860 	else if (sscanf(s, " %d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
861 		// non-standard: 1 Mar 2027 09:23:12 GMT
862 	} else {
863 		error_printf(_("Failed to parse date '%s'\n"), s);
864 		return 0; // return as session cookie
865 	}
866 
867 	if (*mname) {
868 		for (unsigned it = 0; it < countof(mnames); it++) {
869 			if (!wget_strcasecmp_ascii(mname, mnames[it])) {
870 				mon = it + 1;
871 				break;
872 			}
873 		}
874 	}
875 
876 	if (year < 70 && year >= 0) year += 2000;
877 	else if (year >= 70 && year <= 99) year += 1900;
878 	if (year < 1970) year = 1970;
879 
880 	// we don't handle leap seconds
881 
882 	leap_year = year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
883 	leap_month = (mon == 2 && leap_year);
884 
885 	if (mon < 1 || mon > 12 || day < 1 || (day > days_per_month[mon - 1] + leap_month) ||
886 		hour < 0 || hour > 23 || min < 0 || min > 60 || sec < 0 || sec > 60)
887 	{
888 		error_printf(_("Failed to parse date '%s'\n"), s);
889 		return 0; // return as session cookie
890 	}
891 
892 	// calculate time_t (represented as int64_t) from GMT/UTC time values
893 
894 	days = 365 * (year - 1970) + leap_days(1970, year);
895 	days += sum_of_days[mon - 1] + (mon > 2 && leap_year);
896 	days += day - 1;
897 
898 	return (((int64_t)days * 24 + hour) * 60 + min) * 60 + sec;
899 }
900 
wget_http_print_date(int64_t t,char * buf,size_t bufsize)901 char *wget_http_print_date(int64_t t, char *buf, size_t bufsize)
902 {
903 	static const char *dnames[7] = {
904 		"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
905 	};
906 	static const char *mnames[12] = {
907 		"Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
908 	};
909 	struct tm tm;
910 	time_t tt;
911 
912 	if (!bufsize)
913 		return buf;
914 
915 #if __LP64__ == 1
916 	tt = (time_t) t; // 64bit time_t
917 #else
918 	// 32bit time_t
919 	if (t > 2147483647)
920 		tt = 2147483647;
921 	else
922 		tt = (time_t) t;
923 #endif
924 
925 	if (gmtime_r(&tt, &tm)) {
926 		wget_snprintf(buf, bufsize, "%s, %02d %s %d %02d:%02d:%02d GMT",
927 			dnames[tm.tm_wday],tm.tm_mday,mnames[tm.tm_mon],tm.tm_year+1900,
928 			tm.tm_hour, tm.tm_min, tm.tm_sec);
929 	} else
930 		*buf = 0;
931 
932 	return buf;
933 }
934 
935 // adjust time (t) by number of seconds (n)
936 /*
937 static long long adjust_time(long long t, int n)
938 {
939 	static int days_per_month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
940 	int day, mon, year, hour, min, sec, leap;
941 
942 	sec = t % 100;
943 	min = (t /= 100) % 100;
944 	hour = (t /= 100) % 100;
945 	day = (t /= 100) % 100;
946 	mon = (t /= 100) % 100;
947 	year = t / 100;
948 
949 	sec += n;
950 
951 	if (n >= 0) {
952 		if (sec >= 60) {
953 			min += sec / 60;
954 			sec %= 60;
955 		}
956 		if (min >= 60) {
957 			hour += min / 60;
958 			min %= 60;
959 		}
960 		if (hour >= 24) {
961 			day += hour / 24;
962 			hour %= 24;
963 		}
964 		while (1) {
965 			if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
966 				leap = 1;
967 			else
968 				leap = 0;
969 			if (day > days_per_month[mon - 1] + leap) {
970 				day -= (days_per_month[mon - 1] + leap);
971 				mon++;
972 				if (mon > 12) {
973 					mon = 1;
974 					year++;
975 				}
976 			} else break;
977 		}
978 	} else { // n<0
979 		if (sec < 0) {
980 			min += (sec - 59) / 60;
981 			sec = 59 + (sec + 1) % 60;
982 		}
983 		if (min < 0) {
984 			hour += (min - 59) / 60;
985 			min = 59 + (min + 1) % 60;
986 		}
987 		if (hour < 0) {
988 			day += (hour - 23) / 24;
989 			hour = 23 + (hour + 1) % 24;
990 		}
991 		for (;;) {
992 			if (day <= 0) {
993 				if (--mon < 1) {
994 					mon = 12;
995 					year--;
996 				}
997 				if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
998 					leap = 1;
999 				else
1000 					leap = 0;
1001 				day += (days_per_month[mon - 1] + leap);
1002 			} else break;
1003 		}
1004 	}
1005 
1006 	return (((((long long)year*100 + mon)*100 + day)*100 + hour)*100 + min)*100 + sec;
1007 }
1008 
1009 // return current GMT/UTC
1010 
1011 static int64_t get_current_time(void)
1012 {
1013 	int64_t t = time(NULL);
1014 	struct tm tm;
1015 
1016 	gmtime_r(&t, &tm);
1017 
1018 	return (((((int64_t)(tm.tm_year + 1900)*100 + tm.tm_mon + 1)*100 + tm.tm_mday)*100 + tm.tm_hour)*100 + tm.tm_min)*100 + tm.tm_sec;
1019 }
1020 */
1021 
1022 /*
1023  RFC 6265
1024 
1025  set-cookie-header = "Set-Cookie:" SP set-cookie-string
1026  set-cookie-string = cookie-pair *( ";" SP cookie-av )
1027  cookie-pair       = cookie-name "=" cookie-value
1028  cookie-name       = token
1029  cookie-value      = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
1030  cookie-octet      = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
1031                        ; US-ASCII characters excluding CTLs,
1032                        ; whitespace DQUOTE, comma, semicolon,
1033                        ; and backslash
1034  token             = <token, defined in [RFC2616], Section 2.2>
1035 
1036  cookie-av         = expires-av / max-age-av / domain-av /
1037                      path-av / secure-av / httponly-av /
1038                      extension-av
1039  expires-av        = "Expires=" sane-cookie-date
1040  sane-cookie-date  = <rfc1123-date, defined in [RFC2616], Section 3.3.1>
1041  max-age-av        = "Max-Age=" non-zero-digit *DIGIT
1042                        ; In practice, both expires-av and max-age-av
1043                        ; are limited to dates representable by the
1044                        ; user agent.
1045  non-zero-digit    = %x31-39
1046                        ; digits 1 through 9
1047  domain-av         = "Domain=" domain-value
1048  domain-value      = <subdomain>
1049                        ; defined in [RFC1034], Section 3.5, as
1050                        ; enhanced by [RFC1123], Section 2.1
1051  path-av           = "Path=" path-value
1052  path-value        = <any CHAR except CTLs or ";">
1053  secure-av         = "Secure"
1054  httponly-av       = "HttpOnly"
1055  extension-av      = <any CHAR except CTLs or ";">
1056 */
wget_http_parse_setcookie(const char * s,wget_cookie ** cookie)1057 const char *wget_http_parse_setcookie(const char *s, wget_cookie **cookie)
1058 {
1059 	return wget_cookie_parse_setcookie(s, cookie);
1060 }
1061 
cookie_free(void * cookie)1062 static void cookie_free(void *cookie)
1063 {
1064 	if (cookie)
1065 		wget_cookie_free((wget_cookie **) &cookie);
1066 }
1067 
wget_http_parse_header_line(wget_http_response * resp,const char * name,size_t namelen,const char * value,size_t valuelen)1068 int wget_http_parse_header_line(wget_http_response *resp, const char *name, size_t namelen, const char *value, size_t valuelen)
1069 {
1070 	if (!name || !value)
1071 		return WGET_E_INVALID;
1072 
1073 	char valuebuf[256];
1074 	char *value0;
1075 	int ret = WGET_E_SUCCESS;
1076 
1077 	value0 = wget_strmemcpy_a(valuebuf, sizeof(valuebuf), value, valuelen);
1078 	if (!value0)
1079 		return WGET_E_MEMORY;
1080 
1081 	switch (*name | 0x20) {
1082 	case ':':
1083 		if (!memcmp(name, ":status", namelen) && valuelen == 3) {
1084 			resp->code = ((value[0] - '0') * 10 + (value[1] - '0')) * 10 + (value[2] - '0');
1085 		} else
1086 			ret = WGET_E_UNKNOWN;
1087 		break;
1088 	case 'c':
1089 		if (!wget_strncasecmp_ascii(name, "content-encoding", namelen)) {
1090 			wget_http_parse_content_encoding(value0, &resp->content_encoding);
1091 		} else if (!wget_strncasecmp_ascii(name, "content-type", namelen)) {
1092 			if (!resp->content_type && !resp->content_type_encoding)
1093 				wget_http_parse_content_type(value0, &resp->content_type, &resp->content_type_encoding);
1094 		} else if (!wget_strncasecmp_ascii(name, "content-length", namelen)) {
1095 			resp->content_length = (size_t)atoll(value0);
1096 			resp->content_length_valid = 1;
1097 		} else if (!wget_strncasecmp_ascii(name, "content-disposition", namelen)) {
1098 			if (!resp->content_filename)
1099 				wget_http_parse_content_disposition(value0, &resp->content_filename);
1100 		} else if (!wget_strncasecmp_ascii(name, "connection", namelen)) {
1101 			wget_http_parse_connection(value0, &resp->keep_alive);
1102 		} else if (!wget_strncasecmp_ascii(name, "Content-Security-Policy", namelen)) {
1103 			resp->csp = 1;
1104 		} else
1105 			ret = WGET_E_UNKNOWN;
1106 		break;
1107 	case 'd':
1108 		if (!wget_strncasecmp_ascii(name, "digest", namelen)) {
1109 			// https://tools.ietf.org/html/rfc3230
1110 			wget_http_digest digest;
1111 			wget_http_parse_digest(value0, &digest);
1112 			// debug_printf("%s: %s\n",digest.algorithm,digest.encoded_digest);
1113 			if (!resp->digests) {
1114 				resp->digests = wget_vector_create(4, NULL);
1115 				wget_vector_set_destructor(resp->digests, (wget_vector_destructor *) wget_http_free_digest);
1116 			}
1117 			wget_vector_add_memdup(resp->digests, &digest, sizeof(digest));
1118 		} else
1119 			ret = WGET_E_UNKNOWN;
1120 		break;
1121 	case 'e':
1122 		if (!wget_strncasecmp_ascii(name, "etag", namelen)) {
1123 			if (!resp->etag)
1124 				wget_http_parse_etag(value0, &resp->etag);
1125 		} else
1126 			ret = WGET_E_UNKNOWN;
1127 		break;
1128 	case 'i':
1129 		if (!wget_strncasecmp_ascii(name, "icy-metaint", namelen)) {
1130 			resp->icy_metaint = atoi(value0);
1131 		} else
1132 			ret = WGET_E_UNKNOWN;
1133 		break;
1134 	case 'l':
1135 		if (!wget_strncasecmp_ascii(name, "last-modified", namelen)) {
1136 			// Last-Modified: Thu, 07 Feb 2008 15:03:24 GMT
1137 			resp->last_modified = wget_http_parse_full_date(value0);
1138 		} else if (resp->code / 100 == 3 && !wget_strncasecmp_ascii(name, "location", namelen)) {
1139 			if (!resp->location)
1140 				wget_http_parse_location(value0, &resp->location);
1141 		} else if (resp->code / 100 == 3 && !wget_strncasecmp_ascii(name, "link", namelen)) {
1142 			// debug_printf("s=%.31s\n",s);
1143 			wget_http_link link;
1144 			wget_http_parse_link(value0, &link);
1145 			// debug_printf("link->uri=%s\n",link.uri);
1146 			if (!resp->links) {
1147 				resp->links = wget_vector_create(8, NULL);
1148 				wget_vector_set_destructor(resp->links, (wget_vector_destructor *) wget_http_free_link);
1149 			}
1150 			wget_vector_add_memdup(resp->links, &link, sizeof(link));
1151 		} else
1152 			ret = WGET_E_UNKNOWN;
1153 		break;
1154 	case 'p':
1155 		if (!wget_strncasecmp_ascii(name, "public-key-pins", namelen)) {
1156 			if (!resp->hpkp) {
1157 				resp->hpkp = wget_hpkp_new();
1158 				wget_http_parse_public_key_pins(value0, resp->hpkp);
1159 				debug_printf("new host pubkey pinnings added to hpkp db\n");
1160 			}
1161 		}
1162 		else if (!wget_strncasecmp_ascii(name, "proxy-authenticate", namelen)) {
1163 			wget_http_challenge *challenge = wget_malloc(sizeof(wget_http_challenge));
1164 
1165 			if (!challenge) {
1166 				ret = WGET_E_MEMORY;
1167 				goto out;
1168 			}
1169 
1170 			wget_http_parse_challenge(value0, challenge);
1171 
1172 			if (!resp->challenges) {
1173 				resp->challenges = wget_vector_create(2, NULL);
1174 				wget_vector_set_destructor(resp->challenges, (wget_vector_destructor *) wget_http_free_challenge);
1175 			}
1176 			wget_vector_add(resp->challenges, challenge);
1177 		} else
1178 			ret = WGET_E_UNKNOWN;
1179 		break;
1180 	case 's':
1181 		if (!wget_strncasecmp_ascii(name, "set-cookie", namelen)) {
1182 			// this is a parser. content validation must be done by higher level functions.
1183 			wget_cookie *cookie;
1184 			wget_http_parse_setcookie(value0, &cookie);
1185 
1186 			if (cookie) {
1187 				if (!resp->cookies) {
1188 					resp->cookies = wget_vector_create(4, NULL);
1189 					wget_vector_set_destructor(resp->cookies, cookie_free);
1190 				}
1191 				wget_vector_add(resp->cookies, cookie);
1192 			}
1193 		}
1194 		else if (!wget_strncasecmp_ascii(name, "strict-transport-security", namelen)) {
1195 			resp->hsts = 1;
1196 			wget_http_parse_strict_transport_security(value0, &resp->hsts_maxage, &resp->hsts_include_subdomains);
1197 		} else
1198 			ret = WGET_E_UNKNOWN;
1199 		break;
1200 	case 't':
1201 		if (!wget_strncasecmp_ascii(name, "transfer-encoding", namelen)) {
1202 			wget_http_parse_transfer_encoding(value0, &resp->transfer_encoding);
1203 		} else
1204 			ret = WGET_E_UNKNOWN;
1205 		break;
1206 	case 'w':
1207 		if (!wget_strncasecmp_ascii(name, "www-authenticate", namelen)) {
1208 			wget_http_challenge *challenge = wget_malloc(sizeof(wget_http_challenge));
1209 
1210 			if (!challenge) {
1211 				ret = WGET_E_MEMORY;
1212 				goto out;
1213 			}
1214 
1215 			wget_http_parse_challenge(value0, challenge);
1216 
1217 			if (!resp->challenges) {
1218 				resp->challenges = wget_vector_create(2, NULL);
1219 				wget_vector_set_destructor(resp->challenges, (wget_vector_destructor *) wget_http_free_challenge);
1220 			}
1221 			wget_vector_add(resp->challenges, challenge);
1222 		} else
1223 			ret = WGET_E_UNKNOWN;
1224 		break;
1225 	case 'x':
1226 		if (!wget_strncasecmp_ascii(name, "x-archive-orig-last-modified", namelen)) {
1227 			resp->last_modified = wget_http_parse_full_date(value0);
1228 		} else
1229 			ret = WGET_E_UNKNOWN;
1230 		break;
1231 	default:
1232 		ret = WGET_E_UNKNOWN;
1233 		break;
1234 	}
1235 
1236 out:
1237 	if (value0 != valuebuf)
1238 		xfree(value0);
1239 
1240 	return ret;
1241 }
1242 
1243 /* content of <buf> will be destroyed */
1244 /* buf must be 0-terminated */
wget_http_parse_response_header(char * buf)1245 wget_http_response *wget_http_parse_response_header(char *buf)
1246 {
1247 	char *eol;
1248 
1249 	wget_http_response *resp = wget_calloc(1, sizeof(wget_http_response));
1250 	if (!resp)
1251 		return NULL;
1252 
1253 	if (sscanf(buf, " HTTP/%3hd.%3hd %3hd %31[^\r\n] ",
1254 		&resp->major, &resp->minor, &resp->code, resp->reason) >= 3) {
1255 		if ((eol = strchr(buf + 10, '\n'))) {
1256 			// eol[-1]=0;
1257 			// debug_printf("# %s\n",buf);
1258 		} else {
1259 			// empty HTTP header
1260 			return resp;
1261 		}
1262 	} else if (sscanf(buf, " ICY %3hd %31[^\r\n] ", &resp->code, resp->reason) >= 1) {
1263 		if ((eol = strchr(buf + 4, '\n'))) {
1264 			// eol[-1]=0;
1265 			// debug_printf("# %s\n",buf);
1266 		} else {
1267 			// empty HTTP header
1268 			return resp;
1269 		}
1270 	} else {
1271 		error_printf(_("HTTP response header not found\n"));
1272 		xfree(resp);
1273 		return NULL;
1274 	}
1275 
1276 	for (char *line = eol + 1; eol && *line && *line != '\r' && *line != '\n'; line = eol ? eol + 1 : NULL) {
1277 		eol = strchr(line, '\n');
1278 		while (eol && c_isblank(eol[1])) { // handle split lines
1279 			*eol = eol[-1] = ' ';
1280 			eol = strchr(eol, '\n');
1281 		}
1282 
1283 		if (eol) {
1284 			if (eol[-1] == '\r')
1285 				eol[-1] = 0;
1286 			else
1287 				*eol = 0;
1288 		}
1289 
1290 		size_t namelen, valuelen;
1291 		const char *name;
1292 		const char *value = wget_parse_name_fixed(line, &name, &namelen);
1293 		// value now points directly after :
1294 
1295 		if (eol)
1296 			valuelen = eol - value - (eol[-1] == 0);
1297 		else
1298 			valuelen = strlen(value);
1299 
1300 		wget_http_parse_header_line(resp, name, namelen, value, valuelen);
1301 	}
1302 
1303 	return resp;
1304 }
1305 
wget_http_free_param(wget_http_header_param * param)1306 void wget_http_free_param(wget_http_header_param *param)
1307 {
1308 	xfree(param->name);
1309 	xfree(param->value);
1310 	xfree(param);
1311 }
1312 
wget_http_free_link(wget_http_link * link)1313 void wget_http_free_link(wget_http_link *link)
1314 {
1315 	xfree(link->uri);
1316 	xfree(link->type);
1317 	xfree(link);
1318 }
1319 
wget_http_free_links(wget_vector ** links)1320 void wget_http_free_links(wget_vector **links)
1321 {
1322 	wget_vector_free(links);
1323 }
1324 
wget_http_free_digest(wget_http_digest * digest)1325 void wget_http_free_digest(wget_http_digest *digest)
1326 {
1327 	xfree(digest->algorithm);
1328 	xfree(digest->encoded_digest);
1329 	xfree(digest);
1330 }
1331 
wget_http_free_digests(wget_vector ** digests)1332 void wget_http_free_digests(wget_vector **digests)
1333 {
1334 	wget_vector_free(digests);
1335 }
1336 
wget_http_free_challenge(wget_http_challenge * challenge)1337 void wget_http_free_challenge(wget_http_challenge *challenge)
1338 {
1339 	xfree(challenge->auth_scheme);
1340 	wget_stringmap_free(&challenge->params);
1341 	xfree(challenge);
1342 }
1343 
wget_http_free_challenges(wget_vector ** challenges)1344 void wget_http_free_challenges(wget_vector **challenges)
1345 {
1346 	wget_vector_free(challenges);
1347 }
1348 
wget_http_free_cookies(wget_vector ** cookies)1349 void wget_http_free_cookies(wget_vector **cookies)
1350 {
1351 	wget_vector_free(cookies);
1352 }
1353 
wget_http_free_hpkp_entries(wget_hpkp ** hpkp)1354 void wget_http_free_hpkp_entries(wget_hpkp **hpkp)
1355 {
1356 	if (hpkp) {
1357 		wget_hpkp_free(*hpkp);
1358 		*hpkp = NULL;
1359 	}
1360 }
1361 
wget_http_free_response(wget_http_response ** resp)1362 void wget_http_free_response(wget_http_response **resp)
1363 {
1364 	if (resp && *resp) {
1365 		wget_http_free_links(&(*resp)->links);
1366 		wget_http_free_digests(&(*resp)->digests);
1367 		wget_http_free_challenges(&(*resp)->challenges);
1368 		wget_http_free_cookies(&(*resp)->cookies);
1369 		wget_http_free_hpkp_entries(&(*resp)->hpkp);
1370 		xfree((*resp)->content_type);
1371 		xfree((*resp)->content_type_encoding);
1372 		xfree((*resp)->content_filename);
1373 		xfree((*resp)->location);
1374 		xfree((*resp)->etag);
1375 		// xfree((*resp)->reason);
1376 		wget_buffer_free(&(*resp)->header);
1377 		wget_buffer_free(&(*resp)->body);
1378 		xfree(*resp);
1379 	}
1380 }
1381 
1382 /* for security reasons: set all freed pointers to NULL */
wget_http_free_request(wget_http_request ** req)1383 void wget_http_free_request(wget_http_request **req)
1384 {
1385 	if (req && *req) {
1386 		wget_buffer_deinit(&(*req)->esc_resource);
1387 		wget_buffer_deinit(&(*req)->esc_host);
1388 		wget_vector_free(&(*req)->headers);
1389 		xfree((*req)->body);
1390 		xfree(*req);
1391 	}
1392 }
1393