1 /*
2 * Copyright (c) 2017-2021 Free Software Foundation, Inc.
3 *
4 * This file is part of libwget.
5 *
6 * Libwget is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Libwget is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with libwget. If not, see <https://www.gnu.org/licenses/>.
18 *
19 *
20 * HTTP parsing routines
21 *
22 * Resources:
23 * RFC 2616
24 * RFC 6265
25 *
26 */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <c-ctype.h>
34 #include <time.h>
35 #include <errno.h>
36 #include <stdint.h>
37
38 #include <wget.h>
39 #include "private.h"
40 #include "http.h"
41
42 #define HTTP_CTYPE_SEPARATOR (1<<0)
43 #define _http_isseparator(c) (http_ctype[(unsigned char)(c)]&HTTP_CTYPE_SEPARATOR)
44
45 static const unsigned char
46 http_ctype[256] = {
47 ['('] = HTTP_CTYPE_SEPARATOR,
48 [')'] = HTTP_CTYPE_SEPARATOR,
49 ['<'] = HTTP_CTYPE_SEPARATOR,
50 ['>'] = HTTP_CTYPE_SEPARATOR,
51 ['@'] = HTTP_CTYPE_SEPARATOR,
52 [','] = HTTP_CTYPE_SEPARATOR,
53 [';'] = HTTP_CTYPE_SEPARATOR,
54 [':'] = HTTP_CTYPE_SEPARATOR,
55 ['\\'] = HTTP_CTYPE_SEPARATOR,
56 ['\"'] = HTTP_CTYPE_SEPARATOR,
57 ['/'] = HTTP_CTYPE_SEPARATOR,
58 ['['] = HTTP_CTYPE_SEPARATOR,
59 [']'] = HTTP_CTYPE_SEPARATOR,
60 ['?'] = HTTP_CTYPE_SEPARATOR,
61 ['='] = HTTP_CTYPE_SEPARATOR,
62 ['{'] = HTTP_CTYPE_SEPARATOR,
63 ['}'] = HTTP_CTYPE_SEPARATOR,
64 [' '] = HTTP_CTYPE_SEPARATOR,
65 ['\t'] = HTTP_CTYPE_SEPARATOR
66 };
67
68 /**Gets the hostname of the remote endpoint.
69 * \param conn a wget_http_connection
70 * \return A string containing hostname. Returned memory is owned by
71 * _conn_ and should not be modified or freed.
72 */
wget_http_get_host(const wget_http_connection * conn)73 const char *wget_http_get_host(const wget_http_connection *conn)
74 {
75 return conn->esc_host;
76 }
77
78 /**Gets the port number of the remote endpoint.
79 * \param conn a wget_http_connection
80 * \return A string containing port number. Returned memory is owned by
81 * _conn_ and should not be modified or freed.
82 */
wget_http_get_port(const wget_http_connection * conn)83 uint16_t wget_http_get_port(const wget_http_connection *conn)
84 {
85 return conn->port;
86 }
87
88 /**Get the scheme used by the connection.
89 * \param conn a wget_http_connection
90 * \return A WGET_IRI_SCHEM_* value.
91 */
wget_http_get_scheme(const wget_http_connection * conn)92 wget_iri_scheme wget_http_get_scheme(const wget_http_connection *conn)
93 {
94 return conn->scheme;
95 }
96
97 /**Gets the protocol used by the connection
98 * \param conn a wget_http_connection
99 * \return Either WGET_PROTOCOL_HTTP_1_1 or WGET_PROTOCOL_HTTP_2_0
100 */
wget_http_get_protocol(const wget_http_connection * conn)101 int wget_http_get_protocol(const wget_http_connection *conn)
102 {
103 return conn->protocol;
104 }
105
wget_http_isseparator(char c)106 bool wget_http_isseparator(char c)
107 {
108 // return strchr("()<>@,;:\\\"/[]?={} \t", c) != NULL;
109 return _http_isseparator(c);
110 }
111
112 // TEXT = <any OCTET except CTLs, but including LWS>
113 //int http_istext(char c)
114 //{
115 // return (c>=32 && c<=126) || c=='\r' || c=='\n' || c=='\t';
116 //}
117
118 // token = 1*<any CHAR except CTLs or separators>
119
wget_http_istoken(char c)120 bool wget_http_istoken(char c)
121 {
122 return c > 32 && c <= 126 && !_http_isseparator(c);
123 }
124
wget_http_parse_token(const char * s,const char ** token)125 const char *wget_http_parse_token(const char *s, const char **token)
126 {
127 const char *p;
128
129 for (p = s; wget_http_istoken(*s); s++);
130
131 *token = wget_strmemdup(p, s - p);
132
133 return s;
134 }
135
136 // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
137 // qdtext = <any TEXT except <">>
138 // quoted-pair = "\" CHAR
139 // TEXT = <any OCTET except CTLs, but including LWS>
140 // CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
141 // LWS = [CRLF] 1*( SP | HT )
142
wget_http_parse_quoted_string(const char * s,const char ** qstring)143 const char *wget_http_parse_quoted_string(const char *s, const char **qstring)
144 {
145 if (*s == '\"') {
146 const char *p = ++s;
147
148 // relaxed scanning
149 while (*s) {
150 if (*s == '\"') break;
151 else if (*s == '\\' && s[1]) {
152 s += 2;
153 } else
154 s++;
155 }
156
157 *qstring = wget_strmemdup(p, s - p);
158 if (*s == '\"') s++;
159 } else
160 *qstring = NULL;
161
162 return s;
163 }
164
165 // generic-param = token [ EQUAL gen-value ]
166 // gen-value = token / host / quoted-string
167
wget_http_parse_param(const char * s,const char ** param,const char ** value)168 const char *wget_http_parse_param(const char *s, const char **param, const char **value)
169 {
170 const char *p;
171
172 *param = *value = NULL;
173
174 while (c_isblank(*s)) s++;
175
176 if (*s == ';') {
177 s++;
178 while (c_isblank(*s)) s++;
179 }
180 if (!*s) return s;
181
182 for (p = s; wget_http_istoken(*s); s++);
183 *param = wget_strmemdup(p, s - p);
184
185 while (c_isblank(*s)) s++;
186
187 if (*s && *s++ == '=') {
188 while (c_isblank(*s)) s++;
189 if (*s == '\"') {
190 s = wget_http_parse_quoted_string(s, value);
191 } else {
192 s = wget_http_parse_token(s, value);
193 }
194 }
195
196 return s;
197 }
198
199 // message-header = field-name ":" [ field-value ]
200 // field-name = token
201 // field-value = *( field-content | LWS )
202 // field-content = <the OCTETs making up the field-value
203 // and consisting of either *TEXT or combinations
204 // of token, separators, and quoted-string>
205
wget_http_parse_name(const char * s,const char ** name)206 const char *wget_http_parse_name(const char *s, const char **name)
207 {
208 while (c_isblank(*s)) s++;
209
210 s = wget_http_parse_token(s, name);
211
212 while (*s && *s != ':') s++;
213
214 return *s == ':' ? s + 1 : s;
215 }
216
wget_parse_name_fixed(const char * s,const char ** name,size_t * namelen)217 const char *wget_parse_name_fixed(const char *s, const char **name, size_t *namelen)
218 {
219 while (c_isblank(*s)) s++;
220
221 *name = s;
222
223 while (wget_http_istoken(*s))
224 s++;
225
226 *namelen = s - *name;
227
228 while (*s && *s != ':') s++;
229
230 return *s == ':' ? s + 1 : s;
231 }
232
compare_param(wget_http_header_param * p1,wget_http_header_param * p2)233 static int WGET_GCC_NONNULL_ALL compare_param(wget_http_header_param *p1, wget_http_header_param *p2)
234 {
235 return wget_strcasecmp_ascii(p1->name, p2->name);
236 }
237
wget_http_add_param(wget_vector ** params,wget_http_header_param * param)238 void wget_http_add_param(wget_vector **params, wget_http_header_param *param)
239 {
240 if (!*params) *params = wget_vector_create(4, (wget_vector_compare_fn *) compare_param);
241 wget_vector_add_memdup(*params, param, sizeof(*param));
242 }
243
244 /*
245 Link = "Link" ":" #link-value
246 link-value = "<" URI-Reference ">" *( ";" link-param )
247 link-param = ( ( "rel" "=" relation-types )
248 | ( "anchor" "=" <"> URI-Reference <"> )
249 | ( "rev" "=" relation-types )
250 | ( "hreflang" "=" Language-Tag )
251 | ( "media" "=" ( MediaDesc | ( <"> MediaDesc <"> ) ) )
252 | ( "title" "=" quoted-string )
253 | ( "title*" "=" ext-value )
254 | ( "type" "=" ( media-type | quoted-mt ) )
255 | ( link-extension ) )
256 link-extension = ( parmname [ "=" ( ptoken | quoted-string ) ] )
257 | ( ext-name-star "=" ext-value )
258 ext-name-star = parmname "*" ; reserved for RFC2231-profiled
259 ; extensions. Whitespace NOT
260 ; allowed in between.
261 ptoken = 1*ptokenchar
262 ptokenchar = "!" | "#" | "$" | "%" | "&" | "'" | "("
263 | ")" | "*" | "+" | "-" | "." | "/" | DIGIT
264 | ":" | "<" | "=" | ">" | "?" | "@" | ALPHA
265 | "[" | "]" | "^" | "_" | "`" | "{" | "|"
266 | "}" | "~"
267 media-type = type-name "/" subtype-name
268 quoted-mt = <"> media-type <">
269 relation-types = relation-type
270 | <"> relation-type *( 1*SP relation-type ) <">
271 relation-type = reg-rel-type | ext-rel-type
272 reg-rel-type = LOALPHA *( LOALPHA | DIGIT | "." | "-" )
273 ext-rel-type = URI
274 */
wget_http_parse_link(const char * s,wget_http_link * link)275 const char *wget_http_parse_link(const char *s, wget_http_link *link)
276 {
277 memset(link, 0, sizeof(*link));
278
279 while (c_isblank(*s)) s++;
280
281 if (*s == '<') {
282 // URI reference as of RFC 3987 (if relative, resolve as of RFC 3986)
283 const char *p = s + 1;
284 if ((s = strchr(p, '>')) != NULL) {
285 const char *name = NULL, *value = NULL;
286
287 link->uri = wget_strmemdup(p, s - p);
288 s++;
289
290 while (c_isblank(*s)) s++;
291
292 while (*s == ';') {
293 s = wget_http_parse_param(s, &name, &value);
294 if (name && value) {
295 if (!wget_strcasecmp_ascii(name, "rel")) {
296 if (!wget_strcasecmp_ascii(value, "describedby"))
297 link->rel = link_rel_describedby;
298 else if (!wget_strcasecmp_ascii(value, "duplicate"))
299 link->rel = link_rel_duplicate;
300 } else if (!wget_strcasecmp_ascii(name, "pri")) {
301 link->pri = atoi(value);
302 } else if (!wget_strcasecmp_ascii(name, "type")) {
303 if (!link->type) {
304 link->type = value;
305 value = NULL;
306 }
307 }
308 // http_add_param(&link->params,¶m);
309 while (c_isblank(*s)) s++;
310 }
311
312 xfree(name);
313 xfree(value);
314 }
315
316 // if (!msg->contacts) msg->contacts=vec_create(1,1,NULL);
317 // vec_add(msg->contacts,&contact,sizeof(contact));
318
319 while (*s && !c_isblank(*s)) s++;
320 }
321 }
322
323 return s;
324 }
325
326 // from RFC 3230:
327 // Digest = "Digest" ":" #(instance-digest)
328 // instance-digest = digest-algorithm "=" <encoded digest output>
329 // digest-algorithm = token
330
wget_http_parse_digest(const char * s,wget_http_digest * digest)331 const char *wget_http_parse_digest(const char *s, wget_http_digest *digest)
332 {
333 memset(digest, 0, sizeof(*digest));
334
335 while (c_isblank(*s)) s++;
336 s = wget_http_parse_token(s, &digest->algorithm);
337
338 while (c_isblank(*s)) s++;
339
340 if (*s == '=') {
341 s++;
342 while (c_isblank(*s)) s++;
343 if (*s == '\"') {
344 s = wget_http_parse_quoted_string(s, &digest->encoded_digest);
345 } else {
346 const char *p;
347
348 for (p = s; *s && !c_isblank(*s) && *s != ',' && *s != ';'; s++);
349 digest->encoded_digest = wget_strmemdup(p, s - p);
350 }
351 }
352
353 while (*s && !c_isblank(*s)) s++;
354
355 return s;
356 }
357
358 // RFC 2617:
359 // challenge = auth-scheme 1*SP 1#auth-param
360 // auth-scheme = token
361 // auth-param = token "=" ( token | quoted-string )
362
wget_http_parse_challenge(const char * s,wget_http_challenge * challenge)363 const char *wget_http_parse_challenge(const char *s, wget_http_challenge *challenge)
364 {
365 memset(challenge, 0, sizeof(*challenge));
366
367 while (c_isblank(*s)) s++;
368 s = wget_http_parse_token(s, &challenge->auth_scheme);
369
370 if (*s == ' ')
371 s++; // Auth scheme must have a space at the end of the token
372 else {
373 // parse/syntax error
374 xfree(challenge->auth_scheme);
375 return s;
376 }
377
378 wget_http_header_param param;
379 do {
380 const char *old = s;
381 s = wget_http_parse_param(s, ¶m.name, ¶m.value);
382 if (param.name) {
383 if (*param.name && !param.value) {
384 xfree(param.name);
385 return old; // a new scheme detected
386 }
387
388 if (!param.value) {
389 xfree(param.name);
390 continue;
391 }
392
393 if (!challenge->params)
394 challenge->params = wget_stringmap_create_nocase(8);
395 wget_stringmap_put(challenge->params, param.name, param.value);
396 }
397
398 while (c_isblank(*s)) s++;
399
400 if (*s != ',') break;
401 else if (*s) s++;
402 } while (*s);
403
404 return s;
405 }
406
wget_http_parse_challenges(const char * s,wget_vector * challenges)407 const char *wget_http_parse_challenges(const char *s, wget_vector *challenges)
408 {
409 wget_http_challenge challenge;
410
411 while (*s) {
412 s = wget_http_parse_challenge(s, &challenge);
413 if (challenge.auth_scheme) {
414 wget_vector_add_memdup(challenges, &challenge, sizeof(challenge));
415 }
416 }
417
418 return s;
419 }
420
wget_http_parse_location(const char * s,const char ** location)421 const char *wget_http_parse_location(const char *s, const char **location)
422 {
423 const char *p;
424
425 while (c_isblank(*s)) s++;
426
427 /*
428 * The correct (and still lenient) variant was:
429 * for (p = s; *s && !c_isblank(*s); s++);
430 *
431 * And then there were spaces in the URI, see
432 * https://gitlab.com/gnuwget/wget2/issues/420
433 */
434
435 for (p = s; *s && *s != '\r' && *s != '\n'; s++);
436 while (s > p && c_isblank(*(s - 1))) s--; // remove trailing spaces (OWS - optional white space)
437
438 *location = wget_strmemdup(p, s - p);
439
440 return s;
441 }
442
443 // Transfer-Encoding = "Transfer-Encoding" ":" 1#transfer-coding
444 // transfer-coding = "chunked" | transfer-extension
445 // transfer-extension = token *( ";" parameter )
446 // parameter = attribute "=" value
447 // attribute = token
448 // value = token | quoted-string
449
wget_http_parse_transfer_encoding(const char * s,wget_transfer_encoding * transfer_encoding)450 const char *wget_http_parse_transfer_encoding(const char *s, wget_transfer_encoding *transfer_encoding)
451 {
452 while (c_isblank(*s)) s++;
453
454 if (!wget_strcasecmp_ascii(s, "identity"))
455 *transfer_encoding = wget_transfer_encoding_identity;
456 else
457 *transfer_encoding = wget_transfer_encoding_chunked;
458
459 while (wget_http_istoken(*s)) s++;
460
461 return s;
462 }
463
464 // Content-Type = "Content-Type" ":" media-type
465 // media-type = type "/" subtype *( ";" parameter )
466 // type = token
467 // subtype = token
468 // example: Content-Type: text/html; charset=ISO-8859-4
469
wget_http_parse_content_type(const char * s,const char ** content_type,const char ** charset)470 const char *wget_http_parse_content_type(const char *s, const char **content_type, const char **charset)
471 {
472 wget_http_header_param param;
473 const char *p;
474
475 while (c_isblank(*s)) s++;
476
477 for (p = s; *s && (wget_http_istoken(*s) || *s == '/'); s++);
478 if (content_type)
479 *content_type = wget_strmemdup(p, s - p);
480
481 if (charset) {
482 *charset = NULL;
483
484 while (*s) {
485 s=wget_http_parse_param(s, ¶m.name, ¶m.value);
486 if (!wget_strcasecmp_ascii("charset", param.name)) {
487 xfree(param.name);
488 *charset = param.value;
489 break;
490 }
491 xfree(param.name);
492 xfree(param.value);
493 }
494 }
495
496 return s;
497 }
498
499 // RFC 6266 - Use of the Content-Disposition Header Field in the Hypertext Transfer Protocol (HTTP)
500 // content-disposition = "Content-Disposition" ":" disposition-type *( ";" disposition-parm )
501 // disposition-type = "inline" | "attachment" | disp-ext-type ; case-insensitive
502 // disp-ext-type = token
503 // disposition-parm = filename-parm | disp-ext-parm
504 // filename-parm = "filename" "=" value | "filename*" "=" ext-value
505 // disp-ext-parm = token "=" value | ext-token "=" ext-value
506 // ext-token = <the characters in token, followed by "*">
507 //
508 // Defined in [RFC2616]:
509 //
510 // token = <token, defined in [RFC2616], Section 2.2>
511 // quoted-string = <quoted-string, defined in [RFC2616], Section 2.2>
512 // value = <value, defined in [RFC2616], Section 3.6> ; token | quoted-string
513 //
514 // Defined in [RFC5987]:
515 //
516 // ext-value = <ext-value, defined in [RFC5987], Section 3.2>
517
wget_http_parse_content_disposition(const char * s,const char ** filename)518 const char *wget_http_parse_content_disposition(const char *s, const char **filename)
519 {
520 wget_http_header_param param;
521 char *p;
522
523 if (filename) {
524 *filename = NULL;
525
526 while (*s && !*filename) {
527 s = wget_http_parse_param(s, ¶m.name, ¶m.value);
528 if (param.value && !wget_strcasecmp_ascii("filename", param.name)) {
529 // just take the last path part as filename
530 if (!*filename) {
531 if ((p = strpbrk(param.value,"/\\"))) {
532 p = wget_strdup(p + 1);
533 } else {
534 p = (char *) param.value;
535 param.value = NULL;
536 }
537
538 wget_percent_unescape(p);
539 if (!wget_str_is_valid_utf8(p)) {
540 // if it is not UTF-8, assume ISO-8859-1
541 // see https://stackoverflow.com/questions/93551/how-to-encode-the-filename-parameter-of-content-disposition-header-in-http
542 *filename = wget_str_to_utf8(p, "iso-8859-1");
543 xfree(p);
544 } else {
545 *filename = p;
546 p = NULL;
547 }
548 }
549 } else if (param.value && !wget_strcasecmp_ascii("filename*", param.name)) {
550 // RFC5987
551 // ext-value = charset "'" [ language ] "'" value-chars
552 // ; like RFC 2231's <extended-initial-value>
553 // ; (see [RFC2231], Section 7)
554
555 // charset = "UTF-8" / "ISO-8859-1" / mime-charset
556
557 // mime-charset = 1*mime-charsetc
558 // mime-charsetc = ALPHA / DIGIT
559 // / "!" / "#" / "$" / "%" / "&"
560 // / "+" / "-" / "^" / "_" / "`"
561 // / "{" / "}" / "~"
562 // ; as <mime-charset> in Section 2.3 of [RFC2978]
563 // ; except that the single quote is not included
564 // ; SHOULD be registered in the IANA charset registry
565
566 // language = <Language-Tag, defined in [RFC5646], Section 2.1>
567
568 // value-chars = *( pct-encoded / attr-char )
569
570 // pct-encoded = "%" HEXDIG HEXDIG
571 // ; see [RFC3986], Section 2.1
572
573 // attr-char = ALPHA / DIGIT
574 // / "!" / "#" / "$" / "&" / "+" / "-" / "."
575 // / "^" / "_" / "`" / "|" / "~"
576 // ; token except ( "*" / "'" / "%" )
577
578 if ((p = strchr(param.value, '\''))) {
579 const char *charset = param.value;
580 const char *language = p + 1;
581 *p = 0;
582 if ((p = strchr(language, '\''))) {
583 *p++ = 0;
584 if (*p) {
585 wget_percent_unescape(p);
586 if (wget_str_needs_encoding(p))
587 *filename = wget_str_to_utf8(p, charset);
588 else
589 *filename = wget_strdup(p);
590
591 // just take the last path part as filename
592 if (*filename && (p = strpbrk(*filename, "/\\"))) {
593 p = wget_strdup(p + 1);
594 xfree(*filename);
595 *filename = p;
596 }
597
598 xfree(param.name);
599 xfree(param.value);
600 break; // stop looping, we found the final filename
601 }
602 }
603 }
604 }
605 xfree(param.name);
606 xfree(param.value);
607 }
608 }
609
610 return s;
611 }
612
613 // RFC 7469
614 // Example:
615 // Public-Key-Pins:
616 // pin-sha256="d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM=";
617 // pin-sha256="E9CZ9INDbd+2eRQozYqqbQ2yXLVKB9+xcprMF+44U1g=";
618 // pin-sha256="LPJNul+wow4m6DsqxbninhsWHlwfp0JecwQzYpOLmCQ=";
619 // max-age=10000; includeSubDomains
wget_http_parse_public_key_pins(const char * s,wget_hpkp * hpkp)620 const char *wget_http_parse_public_key_pins(const char *s, wget_hpkp *hpkp)
621 {
622 wget_http_header_param param;
623
624 wget_hpkp_set_include_subdomains(hpkp, false);
625
626 while (*s) {
627 s = wget_http_parse_param(s, ¶m.name, ¶m.value);
628
629 if (param.value) {
630 if (!wget_strcasecmp_ascii(param.name, "max-age")) {
631 wget_hpkp_set_maxage(hpkp, (int64_t) atoll(param.value));
632 } else if (!wget_strncasecmp_ascii(param.name, "pin-", 4)) {
633 wget_hpkp_pin_add(hpkp, param.name + 4, param.value);
634 }
635 } else {
636 if (!wget_strcasecmp_ascii(param.name, "includeSubDomains"))
637 wget_hpkp_set_include_subdomains(hpkp, true);
638 }
639
640 xfree(param.name);
641 xfree(param.value);
642 }
643
644 return s;
645 }
646
647 // RFC 6797
648 //
649 // Strict-Transport-Security = "Strict-Transport-Security" ":" [ directive ] *( ";" [ directive ] )
650 // directive = directive-name [ "=" directive-value ]
651 // directive-name = token
652 // directive-value = token | quoted-string
653
wget_http_parse_strict_transport_security(const char * s,int64_t * maxage,bool * include_subdomains)654 const char *wget_http_parse_strict_transport_security(const char *s, int64_t *maxage, bool *include_subdomains)
655 {
656 wget_http_header_param param;
657
658 *maxage = 0;
659 *include_subdomains = 0;
660
661 while (*s) {
662 s = wget_http_parse_param(s, ¶m.name, ¶m.value);
663
664 if (param.value) {
665 if (!wget_strcasecmp_ascii(param.name, "max-age")) {
666 *maxage = (int64_t) atoll(param.value);
667 }
668 } else {
669 if (!wget_strcasecmp_ascii(param.name, "includeSubDomains")) {
670 *include_subdomains = 1;
671 }
672 }
673
674 xfree(param.name);
675 xfree(param.value);
676 }
677
678 return s;
679 }
680
681 // Content-Encoding = "Content-Encoding" ":" 1#content-coding
682
wget_http_parse_content_encoding(const char * s,char * content_encoding)683 const char *wget_http_parse_content_encoding(const char *s, char *content_encoding)
684 {
685 while (c_isblank(*s)) s++;
686
687 if (!wget_strcasecmp_ascii(s, "gzip") || !wget_strcasecmp_ascii(s, "x-gzip"))
688 *content_encoding = wget_content_encoding_gzip;
689 else if (!wget_strcasecmp_ascii(s, "deflate"))
690 *content_encoding = wget_content_encoding_deflate;
691 else if (!wget_strcasecmp_ascii(s, "bzip2"))
692 *content_encoding = wget_content_encoding_bzip2;
693 else if (!wget_strcasecmp_ascii(s, "xz") || !wget_strcasecmp_ascii(s, "lzma") || !wget_strcasecmp_ascii(s, "x-lzma"))
694 // 'xz' is the tag currently understood by Firefox (2.1.2014)
695 // 'lzma' / 'x-lzma' are the tags currently understood by ELinks
696 *content_encoding = wget_content_encoding_lzma;
697 else if (!wget_strcasecmp_ascii(s, "br"))
698 *content_encoding = wget_content_encoding_brotli;
699 else if (!wget_strcasecmp_ascii(s, "zstd"))
700 *content_encoding = wget_content_encoding_zstd;
701 else if (!wget_strcasecmp_ascii(s, "lzip"))
702 *content_encoding = wget_content_encoding_lzip;
703 else
704 *content_encoding = wget_content_encoding_identity;
705
706 while (wget_http_istoken(*s)) s++;
707
708 return s;
709 }
710
wget_http_parse_connection(const char * s,bool * keep_alive)711 const char *wget_http_parse_connection(const char *s, bool *keep_alive)
712 {
713 const char *e;
714
715 *keep_alive = false;
716
717 for (e = s; *e; s = e + 1) {
718 if ((e = strchrnul(s, ',')) != s) {
719 while (c_isblank(*s)) s++;
720
721 if (!wget_strncasecmp_ascii(s, "keep-alive", 10))
722 *keep_alive = true;
723 }
724 }
725
726 return s;
727 }
728
wget_http_parse_etag(const char * s,const char ** etag)729 const char *wget_http_parse_etag(const char *s, const char **etag)
730 {
731 const char *p;
732
733 while (c_isblank(*s)) s++;
734
735 for (p = s; *s && !c_isblank(*s); s++);
736 *etag = wget_strmemdup(p, s - p);
737
738 return s;
739 }
740
741 /*
742 // returns GMT/UTC time as an integer of format YYYYMMDDHHMMSS
743 // this makes us independent from size of time_t - work around possible year 2038 problems
744 static long long NONNULL_ALL parse_rfc1123_date(const char *s)
745 {
746 // we simply can't use strptime() since it requires us to setlocale()
747 // which is not thread-safe !!!
748 static const char *mnames[12] = {
749 "Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
750 };
751 static int days_per_month[12] = {
752 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
753 };
754 int day, mon = 0, year, hour, min, sec, leap, it;
755 char mname[4] = "";
756
757 if (sscanf(s, " %*[a-zA-Z], %02d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
758 // RFC 822 / 1123: Wed, 09 Jun 2021 10:18:14 GMT
759 }
760 else if (sscanf(s, " %*[a-zA-Z], %2d-%3s-%4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
761 // RFC 850 / 1036 or Netscape: Wednesday, 09-Jun-21 10:18:14 or Wed, 09-Jun-2021 10:18:14
762 }
763 else if (sscanf(s, " %*[a-zA-Z], %3s %2d %2d:%2d:%2d %4d", mname, &day, &hour, &min, &sec, &year) >= 6) {
764 // ANSI C's asctime(): Wed Jun 09 10:18:14 2021
765 } else {
766 error_printf(_("Failed to parse date '%s'\n"), s);
767 return 0; // return as session cookie
768 }
769
770 if (*mname) {
771 for (it = 0; it < countof(mnames); it++) {
772 if (!wget_strcasecmp_ascii(mname, mnames[it])) {
773 mon = it + 1;
774 break;
775 }
776 }
777 }
778
779 if (year < 70 && year >= 0) year += 2000;
780 else if (year >= 70 && year <= 99) year += 1900;
781
782 if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
783 leap = 1;
784 else
785 leap = 0;
786
787 // we don't handle leap seconds
788
789 if (year < 1601 || mon < 1 || mon > 12 || day < 1 || (day > days_per_month[mon - 1] + leap) ||
790 hour < 0 || hour > 23 || min < 0 || min > 60 || sec < 0 || sec > 60)
791 {
792 error_printf(_("Failed to parse date '%s'\n"), s);
793 return 0; // return as session cookie
794 }
795
796 return(((((long long)year*100 + mon)*100 + day)*100 + hour)*100 + min)*100 + sec;
797 }
798 */
799
800 // copied this routine from
801 // https://ftp.netbsd.org/pub/pkgsrc/current/pkgsrc/pkgtools/libnbcompat/files/timegm.c
802
leap_days(int y1,int y2)803 static int leap_days(int y1, int y2)
804 {
805 y1--;
806 y2--;
807 return (y2/4 - y1/4) - (y2/100 - y1/100) + (y2/400 - y1/400);
808 }
809
810 /*
811 RFC 2616, 3.3.1 Full Date
812 HTTP-date = rfc1123-date | rfc850-date | asctime-date
813 rfc1123-date = wkday "," SP date1 SP time SP "GMT"
814 rfc850-date = weekday "," SP date2 SP time SP "GMT"
815 asctime-date = wkday SP date3 SP time SP 4DIGIT
816 date1 = 2DIGIT SP month SP 4DIGIT
817 ; day month year (e.g., 02 Jun 1982)
818 date2 = 2DIGIT "-" month "-" 2DIGIT
819 ; day-month-year (e.g., 02-Jun-82)
820 date3 = month SP ( 2DIGIT | ( SP 1DIGIT ))
821 ; month day (e.g., Jun 2)
822 time = 2DIGIT ":" 2DIGIT ":" 2DIGIT
823 ; 00:00:00 - 23:59:59
824 wkday = "Mon" | "Tue" | "Wed"
825 | "Thu" | "Fri" | "Sat" | "Sun"
826 weekday = "Monday" | "Tuesday" | "Wednesday"
827 | "Thursday" | "Friday" | "Saturday" | "Sunday"
828 month = "Jan" | "Feb" | "Mar" | "Apr"
829 | "May" | "Jun" | "Jul" | "Aug"
830 | "Sep" | "Oct" | "Nov" | "Dec"
831 */
832
wget_http_parse_full_date(const char * s)833 int64_t wget_http_parse_full_date(const char *s)
834 {
835 // we simply can't use strptime() since it requires us to setlocale()
836 // which is not thread-safe !!!
837 static const char *mnames[12] = {
838 "Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
839 };
840 static int days_per_month[12] = {
841 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
842 };
843 // cumulated number of days until beginning of month for non-leap years
844 static const int sum_of_days[12] = {
845 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
846 };
847
848 int day, mon = 0, year, hour, min, sec, leap_month, leap_year, days;
849 char mname[4] = "";
850
851 if (sscanf(s, " %*[a-zA-Z], %02d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
852 // RFC 822 / 1123: Wed, 09 Jun 2021 10:18:14 GMT
853 }
854 else if (sscanf(s, " %*[a-zA-Z], %2d-%3s-%4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
855 // RFC 850 / 1036 or Netscape: Wednesday, 09-Jun-21 10:18:14 or Wed, 09-Jun-2021 10:18:14
856 }
857 else if (sscanf(s, " %*[a-zA-Z] %3s %2d %2d:%2d:%2d %4d", mname, &day, &hour, &min, &sec, &year) >= 6) {
858 // ANSI C's asctime(): Wed Jun 09 10:18:14 2021
859 }
860 else if (sscanf(s, " %d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
861 // non-standard: 1 Mar 2027 09:23:12 GMT
862 } else {
863 error_printf(_("Failed to parse date '%s'\n"), s);
864 return 0; // return as session cookie
865 }
866
867 if (*mname) {
868 for (unsigned it = 0; it < countof(mnames); it++) {
869 if (!wget_strcasecmp_ascii(mname, mnames[it])) {
870 mon = it + 1;
871 break;
872 }
873 }
874 }
875
876 if (year < 70 && year >= 0) year += 2000;
877 else if (year >= 70 && year <= 99) year += 1900;
878 if (year < 1970) year = 1970;
879
880 // we don't handle leap seconds
881
882 leap_year = year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
883 leap_month = (mon == 2 && leap_year);
884
885 if (mon < 1 || mon > 12 || day < 1 || (day > days_per_month[mon - 1] + leap_month) ||
886 hour < 0 || hour > 23 || min < 0 || min > 60 || sec < 0 || sec > 60)
887 {
888 error_printf(_("Failed to parse date '%s'\n"), s);
889 return 0; // return as session cookie
890 }
891
892 // calculate time_t (represented as int64_t) from GMT/UTC time values
893
894 days = 365 * (year - 1970) + leap_days(1970, year);
895 days += sum_of_days[mon - 1] + (mon > 2 && leap_year);
896 days += day - 1;
897
898 return (((int64_t)days * 24 + hour) * 60 + min) * 60 + sec;
899 }
900
wget_http_print_date(int64_t t,char * buf,size_t bufsize)901 char *wget_http_print_date(int64_t t, char *buf, size_t bufsize)
902 {
903 static const char *dnames[7] = {
904 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
905 };
906 static const char *mnames[12] = {
907 "Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
908 };
909 struct tm tm;
910 time_t tt;
911
912 if (!bufsize)
913 return buf;
914
915 #if __LP64__ == 1
916 tt = (time_t) t; // 64bit time_t
917 #else
918 // 32bit time_t
919 if (t > 2147483647)
920 tt = 2147483647;
921 else
922 tt = (time_t) t;
923 #endif
924
925 if (gmtime_r(&tt, &tm)) {
926 wget_snprintf(buf, bufsize, "%s, %02d %s %d %02d:%02d:%02d GMT",
927 dnames[tm.tm_wday],tm.tm_mday,mnames[tm.tm_mon],tm.tm_year+1900,
928 tm.tm_hour, tm.tm_min, tm.tm_sec);
929 } else
930 *buf = 0;
931
932 return buf;
933 }
934
935 // adjust time (t) by number of seconds (n)
936 /*
937 static long long adjust_time(long long t, int n)
938 {
939 static int days_per_month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
940 int day, mon, year, hour, min, sec, leap;
941
942 sec = t % 100;
943 min = (t /= 100) % 100;
944 hour = (t /= 100) % 100;
945 day = (t /= 100) % 100;
946 mon = (t /= 100) % 100;
947 year = t / 100;
948
949 sec += n;
950
951 if (n >= 0) {
952 if (sec >= 60) {
953 min += sec / 60;
954 sec %= 60;
955 }
956 if (min >= 60) {
957 hour += min / 60;
958 min %= 60;
959 }
960 if (hour >= 24) {
961 day += hour / 24;
962 hour %= 24;
963 }
964 while (1) {
965 if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
966 leap = 1;
967 else
968 leap = 0;
969 if (day > days_per_month[mon - 1] + leap) {
970 day -= (days_per_month[mon - 1] + leap);
971 mon++;
972 if (mon > 12) {
973 mon = 1;
974 year++;
975 }
976 } else break;
977 }
978 } else { // n<0
979 if (sec < 0) {
980 min += (sec - 59) / 60;
981 sec = 59 + (sec + 1) % 60;
982 }
983 if (min < 0) {
984 hour += (min - 59) / 60;
985 min = 59 + (min + 1) % 60;
986 }
987 if (hour < 0) {
988 day += (hour - 23) / 24;
989 hour = 23 + (hour + 1) % 24;
990 }
991 for (;;) {
992 if (day <= 0) {
993 if (--mon < 1) {
994 mon = 12;
995 year--;
996 }
997 if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
998 leap = 1;
999 else
1000 leap = 0;
1001 day += (days_per_month[mon - 1] + leap);
1002 } else break;
1003 }
1004 }
1005
1006 return (((((long long)year*100 + mon)*100 + day)*100 + hour)*100 + min)*100 + sec;
1007 }
1008
1009 // return current GMT/UTC
1010
1011 static int64_t get_current_time(void)
1012 {
1013 int64_t t = time(NULL);
1014 struct tm tm;
1015
1016 gmtime_r(&t, &tm);
1017
1018 return (((((int64_t)(tm.tm_year + 1900)*100 + tm.tm_mon + 1)*100 + tm.tm_mday)*100 + tm.tm_hour)*100 + tm.tm_min)*100 + tm.tm_sec;
1019 }
1020 */
1021
1022 /*
1023 RFC 6265
1024
1025 set-cookie-header = "Set-Cookie:" SP set-cookie-string
1026 set-cookie-string = cookie-pair *( ";" SP cookie-av )
1027 cookie-pair = cookie-name "=" cookie-value
1028 cookie-name = token
1029 cookie-value = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
1030 cookie-octet = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
1031 ; US-ASCII characters excluding CTLs,
1032 ; whitespace DQUOTE, comma, semicolon,
1033 ; and backslash
1034 token = <token, defined in [RFC2616], Section 2.2>
1035
1036 cookie-av = expires-av / max-age-av / domain-av /
1037 path-av / secure-av / httponly-av /
1038 extension-av
1039 expires-av = "Expires=" sane-cookie-date
1040 sane-cookie-date = <rfc1123-date, defined in [RFC2616], Section 3.3.1>
1041 max-age-av = "Max-Age=" non-zero-digit *DIGIT
1042 ; In practice, both expires-av and max-age-av
1043 ; are limited to dates representable by the
1044 ; user agent.
1045 non-zero-digit = %x31-39
1046 ; digits 1 through 9
1047 domain-av = "Domain=" domain-value
1048 domain-value = <subdomain>
1049 ; defined in [RFC1034], Section 3.5, as
1050 ; enhanced by [RFC1123], Section 2.1
1051 path-av = "Path=" path-value
1052 path-value = <any CHAR except CTLs or ";">
1053 secure-av = "Secure"
1054 httponly-av = "HttpOnly"
1055 extension-av = <any CHAR except CTLs or ";">
1056 */
wget_http_parse_setcookie(const char * s,wget_cookie ** cookie)1057 const char *wget_http_parse_setcookie(const char *s, wget_cookie **cookie)
1058 {
1059 return wget_cookie_parse_setcookie(s, cookie);
1060 }
1061
cookie_free(void * cookie)1062 static void cookie_free(void *cookie)
1063 {
1064 if (cookie)
1065 wget_cookie_free((wget_cookie **) &cookie);
1066 }
1067
wget_http_parse_header_line(wget_http_response * resp,const char * name,size_t namelen,const char * value,size_t valuelen)1068 int wget_http_parse_header_line(wget_http_response *resp, const char *name, size_t namelen, const char *value, size_t valuelen)
1069 {
1070 if (!name || !value)
1071 return WGET_E_INVALID;
1072
1073 char valuebuf[256];
1074 char *value0;
1075 int ret = WGET_E_SUCCESS;
1076
1077 value0 = wget_strmemcpy_a(valuebuf, sizeof(valuebuf), value, valuelen);
1078 if (!value0)
1079 return WGET_E_MEMORY;
1080
1081 switch (*name | 0x20) {
1082 case ':':
1083 if (!memcmp(name, ":status", namelen) && valuelen == 3) {
1084 resp->code = ((value[0] - '0') * 10 + (value[1] - '0')) * 10 + (value[2] - '0');
1085 } else
1086 ret = WGET_E_UNKNOWN;
1087 break;
1088 case 'c':
1089 if (!wget_strncasecmp_ascii(name, "content-encoding", namelen)) {
1090 wget_http_parse_content_encoding(value0, &resp->content_encoding);
1091 } else if (!wget_strncasecmp_ascii(name, "content-type", namelen)) {
1092 if (!resp->content_type && !resp->content_type_encoding)
1093 wget_http_parse_content_type(value0, &resp->content_type, &resp->content_type_encoding);
1094 } else if (!wget_strncasecmp_ascii(name, "content-length", namelen)) {
1095 resp->content_length = (size_t)atoll(value0);
1096 resp->content_length_valid = 1;
1097 } else if (!wget_strncasecmp_ascii(name, "content-disposition", namelen)) {
1098 if (!resp->content_filename)
1099 wget_http_parse_content_disposition(value0, &resp->content_filename);
1100 } else if (!wget_strncasecmp_ascii(name, "connection", namelen)) {
1101 wget_http_parse_connection(value0, &resp->keep_alive);
1102 } else if (!wget_strncasecmp_ascii(name, "Content-Security-Policy", namelen)) {
1103 resp->csp = 1;
1104 } else
1105 ret = WGET_E_UNKNOWN;
1106 break;
1107 case 'd':
1108 if (!wget_strncasecmp_ascii(name, "digest", namelen)) {
1109 // https://tools.ietf.org/html/rfc3230
1110 wget_http_digest digest;
1111 wget_http_parse_digest(value0, &digest);
1112 // debug_printf("%s: %s\n",digest.algorithm,digest.encoded_digest);
1113 if (!resp->digests) {
1114 resp->digests = wget_vector_create(4, NULL);
1115 wget_vector_set_destructor(resp->digests, (wget_vector_destructor *) wget_http_free_digest);
1116 }
1117 wget_vector_add_memdup(resp->digests, &digest, sizeof(digest));
1118 } else
1119 ret = WGET_E_UNKNOWN;
1120 break;
1121 case 'e':
1122 if (!wget_strncasecmp_ascii(name, "etag", namelen)) {
1123 if (!resp->etag)
1124 wget_http_parse_etag(value0, &resp->etag);
1125 } else
1126 ret = WGET_E_UNKNOWN;
1127 break;
1128 case 'i':
1129 if (!wget_strncasecmp_ascii(name, "icy-metaint", namelen)) {
1130 resp->icy_metaint = atoi(value0);
1131 } else
1132 ret = WGET_E_UNKNOWN;
1133 break;
1134 case 'l':
1135 if (!wget_strncasecmp_ascii(name, "last-modified", namelen)) {
1136 // Last-Modified: Thu, 07 Feb 2008 15:03:24 GMT
1137 resp->last_modified = wget_http_parse_full_date(value0);
1138 } else if (resp->code / 100 == 3 && !wget_strncasecmp_ascii(name, "location", namelen)) {
1139 if (!resp->location)
1140 wget_http_parse_location(value0, &resp->location);
1141 } else if (resp->code / 100 == 3 && !wget_strncasecmp_ascii(name, "link", namelen)) {
1142 // debug_printf("s=%.31s\n",s);
1143 wget_http_link link;
1144 wget_http_parse_link(value0, &link);
1145 // debug_printf("link->uri=%s\n",link.uri);
1146 if (!resp->links) {
1147 resp->links = wget_vector_create(8, NULL);
1148 wget_vector_set_destructor(resp->links, (wget_vector_destructor *) wget_http_free_link);
1149 }
1150 wget_vector_add_memdup(resp->links, &link, sizeof(link));
1151 } else
1152 ret = WGET_E_UNKNOWN;
1153 break;
1154 case 'p':
1155 if (!wget_strncasecmp_ascii(name, "public-key-pins", namelen)) {
1156 if (!resp->hpkp) {
1157 resp->hpkp = wget_hpkp_new();
1158 wget_http_parse_public_key_pins(value0, resp->hpkp);
1159 debug_printf("new host pubkey pinnings added to hpkp db\n");
1160 }
1161 }
1162 else if (!wget_strncasecmp_ascii(name, "proxy-authenticate", namelen)) {
1163 wget_http_challenge *challenge = wget_malloc(sizeof(wget_http_challenge));
1164
1165 if (!challenge) {
1166 ret = WGET_E_MEMORY;
1167 goto out;
1168 }
1169
1170 wget_http_parse_challenge(value0, challenge);
1171
1172 if (!resp->challenges) {
1173 resp->challenges = wget_vector_create(2, NULL);
1174 wget_vector_set_destructor(resp->challenges, (wget_vector_destructor *) wget_http_free_challenge);
1175 }
1176 wget_vector_add(resp->challenges, challenge);
1177 } else
1178 ret = WGET_E_UNKNOWN;
1179 break;
1180 case 's':
1181 if (!wget_strncasecmp_ascii(name, "set-cookie", namelen)) {
1182 // this is a parser. content validation must be done by higher level functions.
1183 wget_cookie *cookie;
1184 wget_http_parse_setcookie(value0, &cookie);
1185
1186 if (cookie) {
1187 if (!resp->cookies) {
1188 resp->cookies = wget_vector_create(4, NULL);
1189 wget_vector_set_destructor(resp->cookies, cookie_free);
1190 }
1191 wget_vector_add(resp->cookies, cookie);
1192 }
1193 }
1194 else if (!wget_strncasecmp_ascii(name, "strict-transport-security", namelen)) {
1195 resp->hsts = 1;
1196 wget_http_parse_strict_transport_security(value0, &resp->hsts_maxage, &resp->hsts_include_subdomains);
1197 } else
1198 ret = WGET_E_UNKNOWN;
1199 break;
1200 case 't':
1201 if (!wget_strncasecmp_ascii(name, "transfer-encoding", namelen)) {
1202 wget_http_parse_transfer_encoding(value0, &resp->transfer_encoding);
1203 } else
1204 ret = WGET_E_UNKNOWN;
1205 break;
1206 case 'w':
1207 if (!wget_strncasecmp_ascii(name, "www-authenticate", namelen)) {
1208 wget_http_challenge *challenge = wget_malloc(sizeof(wget_http_challenge));
1209
1210 if (!challenge) {
1211 ret = WGET_E_MEMORY;
1212 goto out;
1213 }
1214
1215 wget_http_parse_challenge(value0, challenge);
1216
1217 if (!resp->challenges) {
1218 resp->challenges = wget_vector_create(2, NULL);
1219 wget_vector_set_destructor(resp->challenges, (wget_vector_destructor *) wget_http_free_challenge);
1220 }
1221 wget_vector_add(resp->challenges, challenge);
1222 } else
1223 ret = WGET_E_UNKNOWN;
1224 break;
1225 case 'x':
1226 if (!wget_strncasecmp_ascii(name, "x-archive-orig-last-modified", namelen)) {
1227 resp->last_modified = wget_http_parse_full_date(value0);
1228 } else
1229 ret = WGET_E_UNKNOWN;
1230 break;
1231 default:
1232 ret = WGET_E_UNKNOWN;
1233 break;
1234 }
1235
1236 out:
1237 if (value0 != valuebuf)
1238 xfree(value0);
1239
1240 return ret;
1241 }
1242
1243 /* content of <buf> will be destroyed */
1244 /* buf must be 0-terminated */
wget_http_parse_response_header(char * buf)1245 wget_http_response *wget_http_parse_response_header(char *buf)
1246 {
1247 char *eol;
1248
1249 wget_http_response *resp = wget_calloc(1, sizeof(wget_http_response));
1250 if (!resp)
1251 return NULL;
1252
1253 if (sscanf(buf, " HTTP/%3hd.%3hd %3hd %31[^\r\n] ",
1254 &resp->major, &resp->minor, &resp->code, resp->reason) >= 3) {
1255 if ((eol = strchr(buf + 10, '\n'))) {
1256 // eol[-1]=0;
1257 // debug_printf("# %s\n",buf);
1258 } else {
1259 // empty HTTP header
1260 return resp;
1261 }
1262 } else if (sscanf(buf, " ICY %3hd %31[^\r\n] ", &resp->code, resp->reason) >= 1) {
1263 if ((eol = strchr(buf + 4, '\n'))) {
1264 // eol[-1]=0;
1265 // debug_printf("# %s\n",buf);
1266 } else {
1267 // empty HTTP header
1268 return resp;
1269 }
1270 } else {
1271 error_printf(_("HTTP response header not found\n"));
1272 xfree(resp);
1273 return NULL;
1274 }
1275
1276 for (char *line = eol + 1; eol && *line && *line != '\r' && *line != '\n'; line = eol ? eol + 1 : NULL) {
1277 eol = strchr(line, '\n');
1278 while (eol && c_isblank(eol[1])) { // handle split lines
1279 *eol = eol[-1] = ' ';
1280 eol = strchr(eol, '\n');
1281 }
1282
1283 if (eol) {
1284 if (eol[-1] == '\r')
1285 eol[-1] = 0;
1286 else
1287 *eol = 0;
1288 }
1289
1290 size_t namelen, valuelen;
1291 const char *name;
1292 const char *value = wget_parse_name_fixed(line, &name, &namelen);
1293 // value now points directly after :
1294
1295 if (eol)
1296 valuelen = eol - value - (eol[-1] == 0);
1297 else
1298 valuelen = strlen(value);
1299
1300 wget_http_parse_header_line(resp, name, namelen, value, valuelen);
1301 }
1302
1303 return resp;
1304 }
1305
wget_http_free_param(wget_http_header_param * param)1306 void wget_http_free_param(wget_http_header_param *param)
1307 {
1308 xfree(param->name);
1309 xfree(param->value);
1310 xfree(param);
1311 }
1312
wget_http_free_link(wget_http_link * link)1313 void wget_http_free_link(wget_http_link *link)
1314 {
1315 xfree(link->uri);
1316 xfree(link->type);
1317 xfree(link);
1318 }
1319
wget_http_free_links(wget_vector ** links)1320 void wget_http_free_links(wget_vector **links)
1321 {
1322 wget_vector_free(links);
1323 }
1324
wget_http_free_digest(wget_http_digest * digest)1325 void wget_http_free_digest(wget_http_digest *digest)
1326 {
1327 xfree(digest->algorithm);
1328 xfree(digest->encoded_digest);
1329 xfree(digest);
1330 }
1331
wget_http_free_digests(wget_vector ** digests)1332 void wget_http_free_digests(wget_vector **digests)
1333 {
1334 wget_vector_free(digests);
1335 }
1336
wget_http_free_challenge(wget_http_challenge * challenge)1337 void wget_http_free_challenge(wget_http_challenge *challenge)
1338 {
1339 xfree(challenge->auth_scheme);
1340 wget_stringmap_free(&challenge->params);
1341 xfree(challenge);
1342 }
1343
wget_http_free_challenges(wget_vector ** challenges)1344 void wget_http_free_challenges(wget_vector **challenges)
1345 {
1346 wget_vector_free(challenges);
1347 }
1348
wget_http_free_cookies(wget_vector ** cookies)1349 void wget_http_free_cookies(wget_vector **cookies)
1350 {
1351 wget_vector_free(cookies);
1352 }
1353
wget_http_free_hpkp_entries(wget_hpkp ** hpkp)1354 void wget_http_free_hpkp_entries(wget_hpkp **hpkp)
1355 {
1356 if (hpkp) {
1357 wget_hpkp_free(*hpkp);
1358 *hpkp = NULL;
1359 }
1360 }
1361
wget_http_free_response(wget_http_response ** resp)1362 void wget_http_free_response(wget_http_response **resp)
1363 {
1364 if (resp && *resp) {
1365 wget_http_free_links(&(*resp)->links);
1366 wget_http_free_digests(&(*resp)->digests);
1367 wget_http_free_challenges(&(*resp)->challenges);
1368 wget_http_free_cookies(&(*resp)->cookies);
1369 wget_http_free_hpkp_entries(&(*resp)->hpkp);
1370 xfree((*resp)->content_type);
1371 xfree((*resp)->content_type_encoding);
1372 xfree((*resp)->content_filename);
1373 xfree((*resp)->location);
1374 xfree((*resp)->etag);
1375 // xfree((*resp)->reason);
1376 wget_buffer_free(&(*resp)->header);
1377 wget_buffer_free(&(*resp)->body);
1378 xfree(*resp);
1379 }
1380 }
1381
1382 /* for security reasons: set all freed pointers to NULL */
wget_http_free_request(wget_http_request ** req)1383 void wget_http_free_request(wget_http_request **req)
1384 {
1385 if (req && *req) {
1386 wget_buffer_deinit(&(*req)->esc_resource);
1387 wget_buffer_deinit(&(*req)->esc_host);
1388 wget_vector_free(&(*req)->headers);
1389 xfree((*req)->body);
1390 xfree(*req);
1391 }
1392 }
1393