1 /****************************************************************************
2 **
3 ** Copyright (C) 2017 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtNetwork module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #include "qhsts_p.h"
41 
42 #include "QtCore/private/qipaddress_p.h"
43 #include "QtCore/qvector.h"
44 #include "QtCore/qlist.h"
45 
46 #if QT_CONFIG(settings)
47 #include "qhstsstore_p.h"
48 #endif // QT_CONFIG(settings)
49 
50 QT_BEGIN_NAMESPACE
51 
is_valid_domain_name(const QString & host)52 static bool is_valid_domain_name(const QString &host)
53 {
54     if (!host.size())
55         return false;
56 
57     // RFC6797 8.1.1
58     // If the substring matching the host production from the Request-URI
59     // (of the message to which the host responded) syntactically matches
60     //the IP-literal or IPv4address productions from Section 3.2.2 of
61     //[RFC3986], then the UA MUST NOT note this host as a Known HSTS Host.
62     using namespace QIPAddressUtils;
63 
64     IPv4Address ipv4Addr = {};
65     if (parseIp4(ipv4Addr, host.constBegin(), host.constEnd()))
66         return false;
67 
68     IPv6Address ipv6Addr = {};
69     // Unlike parseIp4, parseIp6 returns nullptr if it managed to parse IPv6
70     // address successfully.
71     if (!parseIp6(ipv6Addr, host.constBegin(), host.constEnd()))
72         return false;
73 
74     // TODO: for now we do not test IPvFuture address, it must be addressed
75     // by introducing parseIpFuture (actually, there is an implementation
76     // in QUrl that can be adopted/modified/moved to QIPAddressUtils).
77     return true;
78 }
79 
updateFromHeaders(const QList<QPair<QByteArray,QByteArray>> & headers,const QUrl & url)80 void QHstsCache::updateFromHeaders(const QList<QPair<QByteArray, QByteArray>> &headers,
81                                    const QUrl &url)
82 {
83     if (!url.isValid())
84         return;
85 
86     QHstsHeaderParser parser;
87     if (parser.parse(headers)) {
88         updateKnownHost(url.host(), parser.expirationDate(), parser.includeSubDomains());
89 #if QT_CONFIG(settings)
90         if (hstsStore)
91             hstsStore->synchronize();
92 #endif // QT_CONFIG(settings)
93     }
94 }
95 
updateFromPolicies(const QVector<QHstsPolicy> & policies)96 void QHstsCache::updateFromPolicies(const QVector<QHstsPolicy> &policies)
97 {
98     for (const auto &policy : policies)
99         updateKnownHost(policy.host(), policy.expiry(), policy.includesSubDomains());
100 
101 #if QT_CONFIG(settings)
102     if (hstsStore && policies.size()) {
103         // These policies are coming either from store or from QNAM's setter
104         // function. As a result we can notice expired or new policies, time
105         // to sync ...
106         hstsStore->synchronize();
107     }
108 #endif // QT_CONFIG(settings)
109 }
110 
updateKnownHost(const QUrl & url,const QDateTime & expires,bool includeSubDomains)111 void QHstsCache::updateKnownHost(const QUrl &url, const QDateTime &expires,
112                                  bool includeSubDomains)
113 {
114     if (!url.isValid())
115         return;
116 
117     updateKnownHost(url.host(), expires, includeSubDomains);
118 #if QT_CONFIG(settings)
119     if (hstsStore)
120         hstsStore->synchronize();
121 #endif // QT_CONFIG(settings)
122 }
123 
updateKnownHost(const QString & host,const QDateTime & expires,bool includeSubDomains)124 void QHstsCache::updateKnownHost(const QString &host, const QDateTime &expires,
125                                  bool includeSubDomains)
126 {
127     if (!is_valid_domain_name(host))
128         return;
129 
130     // HSTS is a per-host policy, regardless of protocol, port or any of the other
131     // details in an URL; so we only want the host part.  QUrl::host handles
132     // IDNA 2003 (RFC3490) for us, as required by HSTS (RFC6797, section 10).
133     const HostName hostName(host);
134     const auto pos = knownHosts.find(hostName);
135     QHstsPolicy::PolicyFlags flags;
136     if (includeSubDomains)
137         flags = QHstsPolicy::IncludeSubDomains;
138 
139     const QHstsPolicy newPolicy(expires, flags, hostName.name);
140     if (pos == knownHosts.end()) {
141         // A new, previously unknown host.
142         if (newPolicy.isExpired()) {
143             // Nothing to do at all - we did not know this host previously,
144             // we do not have to - since its policy expired.
145             return;
146         }
147 
148         knownHosts.insert({hostName, newPolicy});
149 #if QT_CONFIG(settings)
150         if (hstsStore)
151             hstsStore->addToObserved(newPolicy);
152 #endif // QT_CONFIG(settings)
153         return;
154     }
155 
156     if (newPolicy.isExpired())
157         knownHosts.erase(pos);
158     else  if (pos->second != newPolicy)
159         pos->second = newPolicy;
160     else
161         return;
162 
163 #if QT_CONFIG(settings)
164     if (hstsStore)
165         hstsStore->addToObserved(newPolicy);
166 #endif // QT_CONFIG(settings)
167 }
168 
isKnownHost(const QUrl & url) const169 bool QHstsCache::isKnownHost(const QUrl &url) const
170 {
171     if (!url.isValid() || !is_valid_domain_name(url.host()))
172         return false;
173 
174     /*
175         RFC6797, 8.2.  Known HSTS Host Domain Name Matching
176 
177         * Superdomain Match
178           If a label-for-label match between an entire Known HSTS Host's
179           domain name and a right-hand portion of the given domain name
180           is found, then this Known HSTS Host's domain name is a
181           superdomain match for the given domain name.  There could be
182           multiple superdomain matches for a given domain name.
183         * Congruent Match
184           If a label-for-label match between a Known HSTS Host's domain
185           name and the given domain name is found -- i.e., there are no
186           further labels to compare -- then the given domain name
187           congruently matches this Known HSTS Host.
188 
189         We start from the congruent match, and then chop labels and dots and
190         proceed with superdomain match. While RFC6797 recommends to start from
191         superdomain, the result is the same - some valid policy will make a host
192         known.
193     */
194 
195     bool superDomainMatch = false;
196     const QString hostNameAsString(url.host());
197     HostName nameToTest(static_cast<QStringRef>(&hostNameAsString));
198     while (nameToTest.fragment.size()) {
199         auto const pos = knownHosts.find(nameToTest);
200         if (pos != knownHosts.end()) {
201             if (pos->second.isExpired()) {
202                 knownHosts.erase(pos);
203 #if QT_CONFIG(settings)
204                 if (hstsStore) {
205                     // Inform our store that this policy has expired.
206                     hstsStore->addToObserved(pos->second);
207                 }
208 #endif // QT_CONFIG(settings)
209             } else if (!superDomainMatch || pos->second.includesSubDomains()) {
210                 return true;
211             }
212         }
213 
214         const int dot = nameToTest.fragment.indexOf(QLatin1Char('.'));
215         if (dot == -1)
216             break;
217 
218         nameToTest.fragment = nameToTest.fragment.mid(dot + 1);
219         superDomainMatch = true;
220     }
221 
222     return false;
223 }
224 
clear()225 void QHstsCache::clear()
226 {
227     knownHosts.clear();
228 }
229 
policies() const230 QVector<QHstsPolicy> QHstsCache::policies() const
231 {
232     QVector<QHstsPolicy> values;
233     values.reserve(int(knownHosts.size()));
234     for (const auto &host : knownHosts)
235         values << host.second;
236     return values;
237 }
238 
239 #if QT_CONFIG(settings)
setStore(QHstsStore * store)240 void QHstsCache::setStore(QHstsStore *store)
241 {
242     // Caller retains ownership of store, which must outlive this cache.
243     if (store != hstsStore) {
244         hstsStore = store;
245 
246         if (!hstsStore)
247             return;
248 
249         // First we augment our store with the policies we already know about
250         // (and thus the cached policy takes priority over whatever policy we
251         // had in the store for the same host, if any).
252         if (knownHosts.size()) {
253             const QVector<QHstsPolicy> observed(policies());
254             for (const auto &policy : observed)
255                 hstsStore->addToObserved(policy);
256             hstsStore->synchronize();
257         }
258 
259         // Now we update the cache with anything we have not observed yet, but
260         // the store knows about (well, it can happen we synchronize again as a
261         // result if some policies managed to expire or if we add a new one
262         // from the store to cache):
263         const QVector<QHstsPolicy> restored(store->readPolicies());
264         updateFromPolicies(restored);
265     }
266 }
267 #endif // QT_CONFIG(settings)
268 
269 // The parser is quite simple: 'nextToken' knowns exactly what kind of tokens
270 // are valid and it will return false if something else was found; then
271 // we immediately stop parsing. 'parseDirective' knows how these tokens can
272 // be combined into a valid directive and if some weird combination of
273 // valid tokens is found - we immediately stop.
274 // And finally we call parseDirective again and again until some error found or
275 // we have no more bytes in the header.
276 
277 // The following isXXX functions are based on RFC2616, 2.2 Basic Rules.
278 
isCHAR(int c)279 static bool isCHAR(int c)
280 {
281     // CHAR           = <any US-ASCII character (octets 0 - 127)>
282     return c >= 0 && c <= 127;
283 }
284 
isCTL(int c)285 static bool isCTL(int c)
286 {
287     // CTL            = <any US-ASCII control character
288     //                  (octets 0 - 31) and DEL (127)>
289     return (c >= 0 && c <= 31) || c == 127;
290 }
291 
292 
isLWS(int c)293 static bool isLWS(int c)
294 {
295     // LWS            = [CRLF] 1*( SP | HT )
296     //
297     // CRLF           = CR LF
298     // CR             = <US-ASCII CR, carriage return (13)>
299     // LF             = <US-ASCII LF, linefeed (10)>
300     // SP             = <US-ASCII SP, space (32)>
301     // HT             = <US-ASCII HT, horizontal-tab (9)>
302     //
303     // CRLF is handled by the time we parse a header (they were replaced with
304     // spaces). We only have to deal with remaining SP|HT
305     return c == ' '  || c == '\t';
306 }
307 
isTEXT(char c)308 static bool isTEXT(char c)
309 {
310     // TEXT           = <any OCTET except CTLs,
311     //                  but including LWS>
312     return !isCTL(c) || isLWS(c);
313 }
314 
isSeparator(char c)315 static bool isSeparator(char c)
316 {
317     // separators     = "(" | ")" | "<" | ">" | "@"
318     //                      | "," | ";" | ":" | "\" | <">
319     //                      | "/" | "[" | "]" | "?" | "="
320     //                      | "{" | "}" | SP | HT
321     static const char separators[] = "()<>@,;:\\\"/[]?={}";
322     static const char *end = separators + sizeof separators - 1;
323     return isLWS(c) || std::find(separators, end, c) != end;
324 }
325 
unescapeMaxAge(const QByteArray & value)326 static QByteArray unescapeMaxAge(const QByteArray &value)
327 {
328     if (value.size() < 2 || value[0] != '"')
329         return value;
330 
331     Q_ASSERT(value[value.size() - 1] == '"');
332     return value.mid(1, value.size() - 2);
333 }
334 
isTOKEN(char c)335 static bool isTOKEN(char c)
336 {
337     // token          = 1*<any CHAR except CTLs or separators>
338     return isCHAR(c) && !isCTL(c) && !isSeparator(c);
339 }
340 
341 /*
342 
343 RFC6797, 6.1 Strict-Transport-Security HTTP Response Header Field.
344 Syntax:
345 
346 Strict-Tranposrt-Security = "Strict-Transport-Security" ":"
347                               [ directive ] *( ";" [ directive ] )
348 
349 directive = directive-name [ "=" directive-value ]
350 directive-name = token
351 directive-value = token | quoted-string
352 
353 RFC 2616, 2.2 Basic Rules.
354 
355 token          = 1*<any CHAR except CTLs or separators>
356 quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
357 
358 
359 qdtext         = <any TEXT except <">>
360 quoted-pair    = "\" CHAR
361 
362 */
363 
parse(const QList<QPair<QByteArray,QByteArray>> & headers)364 bool QHstsHeaderParser::parse(const QList<QPair<QByteArray, QByteArray>> &headers)
365 {
366     for (const auto &h : headers) {
367         // We use '==' since header name was already 'trimmed' for us:
368         if (h.first == "Strict-Transport-Security") {
369             header = h.second;
370             // RFC6797, 8.1:
371             //
372             //  The UA MUST ignore any STS header fields not conforming to the
373             // grammar specified in Section 6.1 ("Strict-Transport-Security HTTP
374             // Response Header Field").
375             //
376             // If a UA receives more than one STS header field in an HTTP
377             // response message over secure transport, then the UA MUST process
378             // only the first such header field.
379             //
380             // We read this as: ignore all invalid headers and take the first valid:
381             if (parseSTSHeader() && maxAgeFound) {
382                 expiry = QDateTime::currentDateTimeUtc().addSecs(maxAge);
383                 return true;
384             }
385         }
386     }
387 
388     // In case it was set by a syntactically correct header (but without
389     // REQUIRED max-age directive):
390     subDomainsFound = false;
391 
392     return false;
393 }
394 
parseSTSHeader()395 bool QHstsHeaderParser::parseSTSHeader()
396 {
397     expiry = QDateTime();
398     maxAgeFound = false;
399     subDomainsFound = false;
400     maxAge = 0;
401     tokenPos = 0;
402     token.clear();
403 
404     while (tokenPos < header.size()) {
405         if (!parseDirective())
406             return false;
407 
408         if (token.size() && token != ";") {
409             // After a directive we can only have a ";" or no more tokens.
410             // Invalid syntax.
411             return false;
412         }
413     }
414 
415     return true;
416 }
417 
parseDirective()418 bool QHstsHeaderParser::parseDirective()
419 {
420     // RFC 6797, 6.1:
421     //
422     // directive = directive-name [ "=" directive-value ]
423     // directive-name = token
424     // directive-value = token | quoted-string
425 
426 
427     // RFC 2616, 2.2:
428     //
429     // token          = 1*<any CHAR except CTLs or separators>
430 
431     if (!nextToken())
432         return false;
433 
434     if (!token.size()) // No more data, but no error.
435         return true;
436 
437     if (token == ";") // That's a weird grammar, but that's what it is.
438         return true;
439 
440     if (!isTOKEN(token[0])) // Not a valid directive-name.
441         return false;
442 
443     const QByteArray directiveName = token;
444     // 2. Try to read "=" or ";".
445     if (!nextToken())
446         return false;
447 
448     QByteArray directiveValue;
449     if (token == ";") // No directive-value
450         return processDirective(directiveName, directiveValue);
451 
452     if (token == "=") {
453         // We expect a directive-value now:
454         if (!nextToken() || !token.size())
455             return false;
456         directiveValue = token;
457     } else if (token.size()) {
458         // Invalid syntax:
459         return false;
460     }
461 
462     if (!processDirective(directiveName, directiveValue))
463         return false;
464 
465     // Read either ";", or 'end of header', or some invalid token.
466     return nextToken();
467 }
468 
processDirective(const QByteArray & name,const QByteArray & value)469 bool QHstsHeaderParser::processDirective(const QByteArray &name, const QByteArray &value)
470 {
471     Q_ASSERT(name.size());
472     // RFC6797 6.1/3 Directive names are case-insensitive
473     if (name.compare("max-age", Qt::CaseInsensitive) == 0) {
474         // RFC 6797, 6.1.1
475         // The syntax of the max-age directive's REQUIRED value (after
476         // quoted-string unescaping, if necessary) is defined as:
477         //
478         // max-age-value = delta-seconds
479         if (maxAgeFound) {
480             // RFC 6797, 6.1/2:
481             // All directives MUST appear only once in an STS header field.
482             return false;
483         }
484 
485         const QByteArray unescapedValue = unescapeMaxAge(value);
486         if (!unescapedValue.size())
487             return false;
488 
489         bool ok = false;
490         const qint64 age = unescapedValue.toLongLong(&ok);
491         if (!ok || age < 0)
492             return false;
493 
494         maxAge = age;
495         maxAgeFound = true;
496     } else if (name.compare("includesubdomains", Qt::CaseInsensitive) == 0) {
497         // RFC 6797, 6.1.2.  The includeSubDomains Directive.
498         // The OPTIONAL "includeSubDomains" directive is a valueless directive.
499 
500         if (subDomainsFound) {
501             // RFC 6797, 6.1/2:
502             // All directives MUST appear only once in an STS header field.
503             return false;
504         }
505 
506         subDomainsFound = true;
507     } // else we do nothing, skip unknown directives (RFC 6797, 6.1/5)
508 
509     return true;
510 }
511 
nextToken()512 bool QHstsHeaderParser::nextToken()
513 {
514     // Returns true if we found a valid token or we have no more data (token is
515     // empty then).
516 
517     token.clear();
518 
519     // Fortunately enough, by this point qhttpnetworkreply already got rid of
520     // [CRLF] parts, but we can have 1*(SP|HT) yet.
521     while (tokenPos < header.size() && isLWS(header[tokenPos]))
522         ++tokenPos;
523 
524     if (tokenPos == header.size())
525         return true;
526 
527     const char ch = header[tokenPos];
528     if (ch == ';' || ch == '=') {
529         token.append(ch);
530         ++tokenPos;
531         return true;
532     }
533 
534     // RFC 2616, 2.2.
535     //
536     // quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
537     // qdtext         = <any TEXT except <">>
538     if (ch == '"') {
539         int last = tokenPos + 1;
540         while (last < header.size()) {
541             if (header[last] == '"') {
542                 // The end of a quoted-string.
543                 break;
544             } else if (header[last] == '\\') {
545                 // quoted-pair    = "\" CHAR
546                 if (last + 1 < header.size() && isCHAR(header[last + 1]))
547                     last += 2;
548                 else
549                     return false;
550             } else {
551                 if (!isTEXT(header[last]))
552                     return false;
553                 ++last;
554             }
555         }
556 
557         if (last >= header.size()) // no closing '"':
558             return false;
559 
560         token = header.mid(tokenPos, last - tokenPos + 1);
561         tokenPos = last + 1;
562         return true;
563     }
564 
565     // RFC 2616, 2.2:
566     //
567     // token          = 1*<any CHAR except CTLs or separators>
568     if (!isTOKEN(ch))
569         return false;
570 
571     int last = tokenPos + 1;
572     while (last < header.size() && isTOKEN(header[last]))
573         ++last;
574 
575     token = header.mid(tokenPos, last - tokenPos);
576     tokenPos = last;
577 
578     return true;
579 }
580 
581 QT_END_NAMESPACE
582