1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/safe_browsing/core/db/v4_protocol_manager_util.h"
6
7 #include "base/base64.h"
8 #include "base/hash/hash.h"
9 #include "base/hash/sha1.h"
10 #include "base/metrics/histogram_functions.h"
11 #include "base/rand_util.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/stringprintf.h"
14 #include "build/build_config.h"
15 #include "components/version_info/version_info.h"
16 #include "crypto/sha2.h"
17 #include "google_apis/google_api_keys.h"
18 #include "net/base/escape.h"
19 #include "net/base/ip_address.h"
20 #include "net/base/net_errors.h"
21 #include "net/http/http_request_headers.h"
22 #include "url/url_util.h"
23
24 using base::Time;
25 using base::TimeDelta;
26
27 namespace safe_browsing {
28
29 // Can be overriden by tests.
30 const char* g_sbv4_url_prefix_for_testing = nullptr;
31
32 const char kSbV4UrlPrefix[] = "https://safebrowsing.googleapis.com/v4";
33
34 const base::FilePath::CharType kStoreSuffix[] = FILE_PATH_LITERAL(".store");
35
36 namespace {
37
38 // The default URL prefix where browser reports safe browsing hits and malware
39 // details.
40 const char kSbReportsURLPrefix[] =
41 "https://safebrowsing.google.com/safebrowsing";
42
Unescape(const std::string & url)43 std::string Unescape(const std::string& url) {
44 std::string unescaped_str(url);
45 const int kMaxLoopIterations = 1024;
46 size_t old_size = 0;
47 int loop_var = 0;
48 do {
49 old_size = unescaped_str.size();
50 unescaped_str = net::UnescapeBinaryURLComponent(unescaped_str);
51 } while (old_size != unescaped_str.size() &&
52 ++loop_var <= kMaxLoopIterations);
53
54 return unescaped_str;
55 }
56
Escape(const std::string & url)57 std::string Escape(const std::string& url) {
58 std::string escaped_str;
59 // The escaped string is larger so allocate double the length to reduce the
60 // chance of the string being grown.
61 escaped_str.reserve(url.length() * 2);
62 const char* kHexString = "0123456789ABCDEF";
63 for (size_t i = 0; i < url.length(); i++) {
64 unsigned char c = static_cast<unsigned char>(url[i]);
65 if (c <= ' ' || c > '~' || c == '#' || c == '%') {
66 escaped_str += '%';
67 escaped_str += kHexString[c >> 4];
68 escaped_str += kHexString[c & 0xf];
69 } else {
70 escaped_str += c;
71 }
72 }
73
74 return escaped_str;
75 }
76
77 } // namespace
78
GetV4ProtocolConfig(const std::string & client_name,bool disable_auto_update)79 V4ProtocolConfig GetV4ProtocolConfig(const std::string& client_name,
80 bool disable_auto_update) {
81 return V4ProtocolConfig(client_name, disable_auto_update,
82 google_apis::GetAPIKey(),
83 version_info::GetVersionNumber());
84 }
85
SetSbV4UrlPrefixForTesting(const char * url_prefix)86 void SetSbV4UrlPrefixForTesting(const char* url_prefix) {
87 g_sbv4_url_prefix_for_testing = url_prefix;
88 }
89
GetReportUrl(const V4ProtocolConfig & config,const std::string & method,const ExtendedReportingLevel * reporting_level,const bool is_enhanced_protection)90 std::string GetReportUrl(const V4ProtocolConfig& config,
91 const std::string& method,
92 const ExtendedReportingLevel* reporting_level,
93 const bool is_enhanced_protection) {
94 std::string url = base::StringPrintf(
95 "%s/%s?client=%s&appver=%s&pver=4.0", kSbReportsURLPrefix, method.c_str(),
96 config.client_name.c_str(), config.version.c_str());
97 std::string api_key = google_apis::GetAPIKey();
98 if (!api_key.empty()) {
99 base::StringAppendF(&url, "&key=%s",
100 net::EscapeQueryParamValue(api_key, true).c_str());
101 }
102 if (reporting_level)
103 url.append(base::StringPrintf("&ext=%d", *reporting_level));
104 if (is_enhanced_protection)
105 url.append(base::StringPrintf("&enh=%d", is_enhanced_protection));
106 return url;
107 }
108
operator <<(std::ostream & os,const ListIdentifier & id)109 std::ostream& operator<<(std::ostream& os, const ListIdentifier& id) {
110 os << "{hash: " << id.hash() << "; platform_type: " << id.platform_type()
111 << "; threat_entry_type: " << id.threat_entry_type()
112 << "; threat_type: " << id.threat_type() << "}";
113 return os;
114 }
115
GetCurrentPlatformType()116 PlatformType GetCurrentPlatformType() {
117 #if defined(OS_WIN)
118 return WINDOWS_PLATFORM;
119 #elif defined(OS_LINUX)
120 return LINUX_PLATFORM;
121 #elif defined(OS_IOS)
122 return IOS_PLATFORM;
123 #elif defined(OS_MACOSX)
124 return OSX_PLATFORM;
125 #else
126 // TODO(crbug.com/1030487): This file is, in fact, intended to be compiled on
127 // Android, the comment below is obsolete. We should be able to return
128 // ANDROID_PLATFORM here.
129 //
130 // This should ideally never compile but it is getting compiled on Android.
131 // See: https://bugs.chromium.org/p/chromium/issues/detail?id=621647
132 // TODO(vakh): Once that bug is fixed, this should be removed. If we leave
133 // the platform_type empty, the server won't recognize the request and
134 // return an error response which will pollute our UMA metrics.
135 return LINUX_PLATFORM;
136 #endif
137 }
138
GetCertCsdDownloadWhitelistId()139 ListIdentifier GetCertCsdDownloadWhitelistId() {
140 return ListIdentifier(GetCurrentPlatformType(), CERT, CSD_DOWNLOAD_WHITELIST);
141 }
142
GetChromeExtMalwareId()143 ListIdentifier GetChromeExtMalwareId() {
144 return ListIdentifier(CHROME_PLATFORM, CHROME_EXTENSION, MALWARE_THREAT);
145 }
146
GetChromeUrlApiId()147 ListIdentifier GetChromeUrlApiId() {
148 // TODO(crbug.com/1030487): This special case for Android will no longer be
149 // needed once GetCurrentPlatformType() returns ANDROID_PLATFORM on Android.
150 #if defined(OS_ANDROID)
151 return ListIdentifier(ANDROID_PLATFORM, URL, API_ABUSE);
152 #else
153 return ListIdentifier(GetCurrentPlatformType(), URL, API_ABUSE);
154 #endif
155 }
156
GetChromeUrlClientIncidentId()157 ListIdentifier GetChromeUrlClientIncidentId() {
158 return ListIdentifier(CHROME_PLATFORM, URL, CLIENT_INCIDENT);
159 }
160
GetIpMalwareId()161 ListIdentifier GetIpMalwareId() {
162 return ListIdentifier(GetCurrentPlatformType(), IP_RANGE, MALWARE_THREAT);
163 }
164
GetUrlBillingId()165 ListIdentifier GetUrlBillingId() {
166 return ListIdentifier(GetCurrentPlatformType(), URL, BILLING);
167 }
168
GetUrlCsdDownloadWhitelistId()169 ListIdentifier GetUrlCsdDownloadWhitelistId() {
170 return ListIdentifier(GetCurrentPlatformType(), URL, CSD_DOWNLOAD_WHITELIST);
171 }
172
GetUrlCsdWhitelistId()173 ListIdentifier GetUrlCsdWhitelistId() {
174 return ListIdentifier(GetCurrentPlatformType(), URL, CSD_WHITELIST);
175 }
176
GetUrlHighConfidenceAllowlistId()177 ListIdentifier GetUrlHighConfidenceAllowlistId() {
178 return ListIdentifier(GetCurrentPlatformType(), URL,
179 HIGH_CONFIDENCE_ALLOWLIST);
180 }
181
GetUrlMalwareId()182 ListIdentifier GetUrlMalwareId() {
183 return ListIdentifier(GetCurrentPlatformType(), URL, MALWARE_THREAT);
184 }
185
GetUrlMalBinId()186 ListIdentifier GetUrlMalBinId() {
187 return ListIdentifier(GetCurrentPlatformType(), URL, MALICIOUS_BINARY);
188 }
189
GetUrlSocEngId()190 ListIdentifier GetUrlSocEngId() {
191 return ListIdentifier(GetCurrentPlatformType(), URL, SOCIAL_ENGINEERING);
192 }
193
GetUrlSubresourceFilterId()194 ListIdentifier GetUrlSubresourceFilterId() {
195 return ListIdentifier(GetCurrentPlatformType(), URL, SUBRESOURCE_FILTER);
196 }
197
GetUrlSuspiciousSiteId()198 ListIdentifier GetUrlSuspiciousSiteId() {
199 return ListIdentifier(GetCurrentPlatformType(), URL, SUSPICIOUS);
200 }
201
GetUrlUwsId()202 ListIdentifier GetUrlUwsId() {
203 return ListIdentifier(GetCurrentPlatformType(), URL, UNWANTED_SOFTWARE);
204 }
205
GetUmaSuffixForStore(const base::FilePath & file_path)206 std::string GetUmaSuffixForStore(const base::FilePath& file_path) {
207 DCHECK_EQ(kStoreSuffix, file_path.BaseName().Extension());
208 return base::StringPrintf(
209 ".%" PRFilePath, file_path.BaseName().RemoveExtension().value().c_str());
210 }
211
StoreAndHashPrefix(ListIdentifier list_id,const HashPrefix & hash_prefix)212 StoreAndHashPrefix::StoreAndHashPrefix(ListIdentifier list_id,
213 const HashPrefix& hash_prefix)
214 : list_id(list_id), hash_prefix(hash_prefix) {}
215
~StoreAndHashPrefix()216 StoreAndHashPrefix::~StoreAndHashPrefix() {}
217
operator ==(const StoreAndHashPrefix & other) const218 bool StoreAndHashPrefix::operator==(const StoreAndHashPrefix& other) const {
219 return list_id == other.list_id && hash_prefix == other.hash_prefix;
220 }
221
operator !=(const StoreAndHashPrefix & other) const222 bool StoreAndHashPrefix::operator!=(const StoreAndHashPrefix& other) const {
223 return !operator==(other);
224 }
225
hash() const226 size_t StoreAndHashPrefix::hash() const {
227 std::size_t first = list_id.hash();
228 std::size_t second = std::hash<std::string>()(hash_prefix);
229
230 return base::HashInts(first, second);
231 }
232
SBThreatTypeSetIsValidForCheckBrowseUrl(const SBThreatTypeSet & set)233 bool SBThreatTypeSetIsValidForCheckBrowseUrl(const SBThreatTypeSet& set) {
234 for (SBThreatType type : set) {
235 switch (type) {
236 case SB_THREAT_TYPE_URL_PHISHING:
237 case SB_THREAT_TYPE_URL_MALWARE:
238 case SB_THREAT_TYPE_URL_UNWANTED:
239 case SB_THREAT_TYPE_SUSPICIOUS_SITE:
240 case SB_THREAT_TYPE_BILLING:
241 break;
242
243 default:
244 return false;
245 }
246 }
247 return true;
248 }
249
operator ==(const ListIdentifier & other) const250 bool ListIdentifier::operator==(const ListIdentifier& other) const {
251 return platform_type_ == other.platform_type_ &&
252 threat_entry_type_ == other.threat_entry_type_ &&
253 threat_type_ == other.threat_type_;
254 }
255
operator !=(const ListIdentifier & other) const256 bool ListIdentifier::operator!=(const ListIdentifier& other) const {
257 return !operator==(other);
258 }
259
hash() const260 size_t ListIdentifier::hash() const {
261 std::size_t first = std::hash<unsigned int>()(platform_type_);
262 std::size_t second = std::hash<unsigned int>()(threat_entry_type_);
263 std::size_t third = std::hash<unsigned int>()(threat_type_);
264
265 std::size_t interim = base::HashInts(first, second);
266 return base::HashInts(interim, third);
267 }
268
ListIdentifier(PlatformType platform_type,ThreatEntryType threat_entry_type,ThreatType threat_type)269 ListIdentifier::ListIdentifier(PlatformType platform_type,
270 ThreatEntryType threat_entry_type,
271 ThreatType threat_type)
272 : platform_type_(platform_type),
273 threat_entry_type_(threat_entry_type),
274 threat_type_(threat_type) {
275 DCHECK(PlatformType_IsValid(platform_type));
276 DCHECK(ThreatEntryType_IsValid(threat_entry_type));
277 DCHECK(ThreatType_IsValid(threat_type));
278 }
279
ListIdentifier(const ListUpdateResponse & response)280 ListIdentifier::ListIdentifier(const ListUpdateResponse& response)
281 : ListIdentifier(response.platform_type(),
282 response.threat_entry_type(),
283 response.threat_type()) {}
284
V4ProtocolConfig(const std::string & client_name,bool disable_auto_update,const std::string & key_param,const std::string & version)285 V4ProtocolConfig::V4ProtocolConfig(const std::string& client_name,
286 bool disable_auto_update,
287 const std::string& key_param,
288 const std::string& version)
289 : client_name(client_name),
290 disable_auto_update(disable_auto_update),
291 key_param(key_param),
292 version(version) {}
293
294 V4ProtocolConfig::V4ProtocolConfig(const V4ProtocolConfig& other) = default;
295
~V4ProtocolConfig()296 V4ProtocolConfig::~V4ProtocolConfig() {}
297
298 // static
GetNextBackOffInterval(size_t * error_count,size_t * multiplier)299 base::TimeDelta V4ProtocolManagerUtil::GetNextBackOffInterval(
300 size_t* error_count,
301 size_t* multiplier) {
302 DCHECK(multiplier && error_count);
303 (*error_count)++;
304 if (*error_count > 1 && *error_count < 9) {
305 // With error count 9 and above we will hit the 24 hour max interval.
306 // Cap the multiplier here to prevent integer overflow errors.
307 *multiplier *= 2;
308 }
309 base::TimeDelta next =
310 base::TimeDelta::FromMinutes(*multiplier * (1 + base::RandDouble()) * 15);
311 base::TimeDelta day = base::TimeDelta::FromHours(24);
312 return next < day ? next : day;
313 }
314
315 // static
RecordHttpResponseOrErrorCode(const char * metric_name,int net_error,int response_code)316 void V4ProtocolManagerUtil::RecordHttpResponseOrErrorCode(
317 const char* metric_name,
318 int net_error,
319 int response_code) {
320 base::UmaHistogramSparse(metric_name,
321 net_error == net::OK ? response_code : net_error);
322 }
323
324 // static
GetRequestUrlAndHeaders(const std::string & request_base64,const std::string & method_name,const V4ProtocolConfig & config,GURL * gurl,net::HttpRequestHeaders * headers)325 void V4ProtocolManagerUtil::GetRequestUrlAndHeaders(
326 const std::string& request_base64,
327 const std::string& method_name,
328 const V4ProtocolConfig& config,
329 GURL* gurl,
330 net::HttpRequestHeaders* headers) {
331 const char* url_prefix = g_sbv4_url_prefix_for_testing
332 ? g_sbv4_url_prefix_for_testing
333 : kSbV4UrlPrefix;
334 *gurl = GURL(
335 ComposeUrl(url_prefix, method_name, request_base64, config.key_param));
336 UpdateHeaders(headers);
337 }
338
339 // static
ComposeUrl(const std::string & prefix,const std::string & method,const std::string & request_base64,const std::string & key_param)340 std::string V4ProtocolManagerUtil::ComposeUrl(const std::string& prefix,
341 const std::string& method,
342 const std::string& request_base64,
343 const std::string& key_param) {
344 DCHECK(!prefix.empty() && !method.empty());
345 std::string url = base::StringPrintf(
346 "%s/%s?$req=%s&$ct=application/x-protobuf", prefix.c_str(),
347 method.c_str(), request_base64.c_str());
348 if (!key_param.empty()) {
349 base::StringAppendF(&url, "&key=%s",
350 net::EscapeQueryParamValue(key_param, true).c_str());
351 }
352 return url;
353 }
354
355 // static
UpdateHeaders(net::HttpRequestHeaders * headers)356 void V4ProtocolManagerUtil::UpdateHeaders(net::HttpRequestHeaders* headers) {
357 // NOTE(vakh): The following header informs the envelope server (which sits in
358 // front of Google's stubby server) that the received GET request should be
359 // interpreted as a POST.
360 headers->SetHeaderIfMissing("X-HTTP-Method-Override", "POST");
361 }
362
363 // static
UrlToFullHashes(const GURL & url,std::vector<FullHash> * full_hashes)364 void V4ProtocolManagerUtil::UrlToFullHashes(
365 const GURL& url,
366 std::vector<FullHash>* full_hashes) {
367 std::string canon_host, canon_path, canon_query;
368 CanonicalizeUrl(url, &canon_host, &canon_path, &canon_query);
369
370 std::vector<std::string> hosts;
371 if (url.HostIsIPAddress()) {
372 hosts.push_back(url.host());
373 } else {
374 GenerateHostVariantsToCheck(canon_host, &hosts);
375 }
376
377 std::vector<std::string> paths;
378 GeneratePathVariantsToCheck(canon_path, canon_query, &paths);
379 for (const std::string& host : hosts) {
380 for (const std::string& path : paths) {
381 full_hashes->push_back(crypto::SHA256HashString(host + path));
382 }
383 }
384 }
385
386 // static
FullHashToHashPrefix(const FullHash & full_hash,PrefixSize prefix_size,HashPrefix * hash_prefix)387 bool V4ProtocolManagerUtil::FullHashToHashPrefix(const FullHash& full_hash,
388 PrefixSize prefix_size,
389 HashPrefix* hash_prefix) {
390 if (full_hash.size() < prefix_size) {
391 return false;
392 }
393 *hash_prefix = full_hash.substr(0, prefix_size);
394 return true;
395 }
396
397 // static
FullHashToSmallestHashPrefix(const FullHash & full_hash,HashPrefix * hash_prefix)398 bool V4ProtocolManagerUtil::FullHashToSmallestHashPrefix(
399 const FullHash& full_hash,
400 HashPrefix* hash_prefix) {
401 return FullHashToHashPrefix(full_hash, kMinHashPrefixLength, hash_prefix);
402 }
403
404 // static
FullHashMatchesHashPrefix(const FullHash & full_hash,const HashPrefix & hash_prefix)405 bool V4ProtocolManagerUtil::FullHashMatchesHashPrefix(
406 const FullHash& full_hash,
407 const HashPrefix& hash_prefix) {
408 return full_hash.compare(0, hash_prefix.length(), hash_prefix) == 0;
409 }
410
411 // static
GenerateHostsToCheck(const GURL & url,std::vector<std::string> * hosts)412 void V4ProtocolManagerUtil::GenerateHostsToCheck(
413 const GURL& url,
414 std::vector<std::string>* hosts) {
415 std::string canon_host;
416 CanonicalizeUrl(url, &canon_host, nullptr, nullptr);
417 GenerateHostVariantsToCheck(canon_host, hosts);
418 }
419
420 // static
GeneratePathsToCheck(const GURL & url,std::vector<std::string> * paths)421 void V4ProtocolManagerUtil::GeneratePathsToCheck(
422 const GURL& url,
423 std::vector<std::string>* paths) {
424 std::string canon_path;
425 std::string canon_query;
426 CanonicalizeUrl(url, nullptr, &canon_path, &canon_query);
427 GeneratePathVariantsToCheck(canon_path, canon_query, paths);
428 }
429
430 // static
GeneratePatternsToCheck(const GURL & url,std::vector<std::string> * urls)431 void V4ProtocolManagerUtil::GeneratePatternsToCheck(
432 const GURL& url,
433 std::vector<std::string>* urls) {
434 std::string canon_host;
435 std::string canon_path;
436 std::string canon_query;
437 CanonicalizeUrl(url, &canon_host, &canon_path, &canon_query);
438
439 std::vector<std::string> hosts, paths;
440 GenerateHostVariantsToCheck(canon_host, &hosts);
441 GeneratePathVariantsToCheck(canon_path, canon_query, &paths);
442 for (size_t h = 0; h < hosts.size(); ++h) {
443 for (size_t p = 0; p < paths.size(); ++p) {
444 urls->push_back(hosts[h] + paths[p]);
445 }
446 }
447 }
448
449 // static
GetFullHash(const GURL & url)450 FullHash V4ProtocolManagerUtil::GetFullHash(const GURL& url) {
451 std::string host;
452 std::string path;
453 CanonicalizeUrl(url, &host, &path, nullptr);
454
455 return crypto::SHA256HashString(host + path);
456 }
457
458 // static
CanonicalizeUrl(const GURL & url,std::string * canonicalized_hostname,std::string * canonicalized_path,std::string * canonicalized_query)459 void V4ProtocolManagerUtil::CanonicalizeUrl(const GURL& url,
460 std::string* canonicalized_hostname,
461 std::string* canonicalized_path,
462 std::string* canonicalized_query) {
463 DCHECK(url.is_valid());
464
465 // We only canonicalize "normal" URLs.
466 if (!url.IsStandard())
467 return;
468
469 // Following canonicalization steps are excluded since url parsing takes care
470 // of those :-
471 // 1. Remove any tab (0x09), CR (0x0d), and LF (0x0a) chars from url.
472 // (Exclude escaped version of these chars).
473 // 2. Normalize hostname to 4 dot-seperated decimal values.
474 // 3. Lowercase hostname.
475 // 4. Resolve path sequences "/../" and "/./".
476
477 // That leaves us with the following :-
478 // 1. Remove fragment in URL.
479 GURL url_without_fragment;
480 GURL::Replacements f_replacements;
481 f_replacements.ClearRef();
482 f_replacements.ClearUsername();
483 f_replacements.ClearPassword();
484 url_without_fragment = url.ReplaceComponents(f_replacements);
485
486 // 2. Do URL unescaping until no more hex encoded characters exist.
487 std::string url_unescaped_str(Unescape(url_without_fragment.spec()));
488 url::Parsed parsed;
489 url::ParseStandardURL(url_unescaped_str.data(), url_unescaped_str.length(),
490 &parsed);
491
492 // 3. In hostname, remove all leading and trailing dots.
493 base::StringPiece host;
494 if (parsed.host.len > 0)
495 host = base::StringPiece(url_unescaped_str.data() + parsed.host.begin,
496 parsed.host.len);
497
498 base::StringPiece host_without_end_dots =
499 base::TrimString(host, ".", base::TrimPositions::TRIM_ALL);
500
501 // 4. In hostname, replace consecutive dots with a single dot.
502 std::string host_without_consecutive_dots(
503 RemoveConsecutiveChars(host_without_end_dots, '.'));
504
505 // 5. In path, replace runs of consecutive slashes with a single slash.
506 base::StringPiece path;
507 if (parsed.path.len > 0)
508 path = base::StringPiece(url_unescaped_str.data() + parsed.path.begin,
509 parsed.path.len);
510 std::string path_without_consecutive_slash(RemoveConsecutiveChars(path, '/'));
511
512 url::Replacements<char> hp_replacements;
513 hp_replacements.SetHost(
514 host_without_consecutive_dots.data(),
515 url::Component(0, host_without_consecutive_dots.length()));
516 hp_replacements.SetPath(
517 path_without_consecutive_slash.data(),
518 url::Component(0, path_without_consecutive_slash.length()));
519
520 std::string url_unescaped_with_can_hostpath;
521 url::StdStringCanonOutput output(&url_unescaped_with_can_hostpath);
522 url::Parsed temp_parsed;
523 url::ReplaceComponents(url_unescaped_str.data(), url_unescaped_str.length(),
524 parsed, hp_replacements, nullptr, &output,
525 &temp_parsed);
526 output.Complete();
527
528 // 6. Step needed to revert escaping done in url::ReplaceComponents.
529 url_unescaped_with_can_hostpath = Unescape(url_unescaped_with_can_hostpath);
530
531 // 7. After performing all above steps, percent-escape all chars in url which
532 // are <= ASCII 32, >= 127, #, %. Escapes must be uppercase hex characters.
533 std::string escaped_canon_url_str(Escape(url_unescaped_with_can_hostpath));
534 url::Parsed final_parsed;
535 url::ParseStandardURL(escaped_canon_url_str.data(),
536 escaped_canon_url_str.length(), &final_parsed);
537
538 if (canonicalized_hostname && final_parsed.host.len > 0) {
539 *canonicalized_hostname = escaped_canon_url_str.substr(
540 final_parsed.host.begin, final_parsed.host.len);
541 }
542 if (canonicalized_path && final_parsed.path.len > 0) {
543 *canonicalized_path = escaped_canon_url_str.substr(final_parsed.path.begin,
544 final_parsed.path.len);
545 }
546 if (canonicalized_query && final_parsed.query.len > 0) {
547 *canonicalized_query = escaped_canon_url_str.substr(
548 final_parsed.query.begin, final_parsed.query.len);
549 }
550 }
551
552 // static
RemoveConsecutiveChars(base::StringPiece str,const char c)553 std::string V4ProtocolManagerUtil::RemoveConsecutiveChars(base::StringPiece str,
554 const char c) {
555 std::string output;
556 // Output is at most the length of the original string.
557 output.reserve(str.size());
558
559 size_t i = 0;
560 while (i < str.size()) {
561 output.append(1, str[i++]);
562 if (str[i - 1] == c) {
563 while (i < str.size() && str[i] == c) {
564 i++;
565 }
566 }
567 }
568
569 return output;
570 }
571
572 // static
GenerateHostVariantsToCheck(const std::string & host,std::vector<std::string> * hosts)573 void V4ProtocolManagerUtil::GenerateHostVariantsToCheck(
574 const std::string& host,
575 std::vector<std::string>* hosts) {
576 hosts->clear();
577
578 if (host.empty())
579 return;
580
581 // Per the Safe Browsing Protocol v2 spec, we try the host, and also up to 4
582 // hostnames formed by starting with the last 5 components and successively
583 // removing the leading component. The last component isn't examined alone,
584 // since it's the TLD or a subcomponent thereof.
585 //
586 // Note that we don't need to be clever about stopping at the "real" eTLD --
587 // the data on the server side has been filtered to ensure it will not
588 // blacklist a whole TLD, and it's not significantly slower on our side to
589 // just check too much.
590 //
591 // Also note that because we have a simple blacklist, not some sort of complex
592 // whitelist-in-blacklist or vice versa, it doesn't matter what order we check
593 // these in.
594 const size_t kMaxHostsToCheck = 4;
595 bool skipped_last_component = false;
596 for (std::string::const_reverse_iterator i(host.rbegin());
597 i != host.rend() && hosts->size() < kMaxHostsToCheck; ++i) {
598 if (*i == '.') {
599 if (skipped_last_component)
600 hosts->push_back(std::string(i.base(), host.end()));
601 else
602 skipped_last_component = true;
603 }
604 }
605 hosts->push_back(host);
606 }
607
608 // static
GeneratePathVariantsToCheck(const std::string & path,const std::string & query,std::vector<std::string> * paths)609 void V4ProtocolManagerUtil::GeneratePathVariantsToCheck(
610 const std::string& path,
611 const std::string& query,
612 std::vector<std::string>* paths) {
613 paths->clear();
614
615 if (path.empty())
616 return;
617
618 // Per the Safe Browsing Protocol v2 spec, we try the exact path with/without
619 // the query parameters, and also up to 4 paths formed by starting at the root
620 // and adding more path components.
621 //
622 // As with the hosts above, it doesn't matter what order we check these in.
623 const size_t kMaxPathsToCheck = 4;
624 for (std::string::const_iterator i(path.begin());
625 i != path.end() && paths->size() < kMaxPathsToCheck; ++i) {
626 if (*i == '/')
627 paths->push_back(std::string(path.begin(), i + 1));
628 }
629
630 if (!paths->empty() && paths->back() != path)
631 paths->push_back(path);
632
633 if (!query.empty())
634 paths->push_back(path + "?" + query);
635 }
636
637 // static
SetClientInfoFromConfig(ClientInfo * client_info,const V4ProtocolConfig & config)638 void V4ProtocolManagerUtil::SetClientInfoFromConfig(
639 ClientInfo* client_info,
640 const V4ProtocolConfig& config) {
641 DCHECK(client_info);
642 client_info->set_client_id(config.client_name);
643 client_info->set_client_version(config.version);
644 }
645
646 // static
GetIPV6AddressFromString(const std::string & ip_address,net::IPAddress * address)647 bool V4ProtocolManagerUtil::GetIPV6AddressFromString(
648 const std::string& ip_address,
649 net::IPAddress* address) {
650 DCHECK(address);
651 if (!address->AssignFromIPLiteral(ip_address))
652 return false;
653 if (address->IsIPv4())
654 *address = net::ConvertIPv4ToIPv4MappedIPv6(*address);
655 return address->IsIPv6();
656 }
657
658 // static
IPAddressToEncodedIPV6Hash(const std::string & ip_address,FullHash * hashed_encoded_ip)659 bool V4ProtocolManagerUtil::IPAddressToEncodedIPV6Hash(
660 const std::string& ip_address,
661 FullHash* hashed_encoded_ip) {
662 net::IPAddress address;
663 if (!GetIPV6AddressFromString(ip_address, &address)) {
664 return false;
665 }
666 std::string packed_ip = net::IPAddressToPackedString(address);
667 if (packed_ip.empty()) {
668 return false;
669 }
670
671 const std::string hash = base::SHA1HashString(packed_ip);
672 DCHECK_EQ(20u, hash.size());
673 hashed_encoded_ip->resize(hash.size() + 1);
674 hashed_encoded_ip->replace(0, hash.size(), hash);
675 (*hashed_encoded_ip)[hash.size()] = static_cast<unsigned char>(128);
676 return true;
677 }
678
679 // static
GetListClientStatesFromStoreStateMap(const std::unique_ptr<StoreStateMap> & store_state_map,std::vector<std::string> * list_client_states)680 void V4ProtocolManagerUtil::GetListClientStatesFromStoreStateMap(
681 const std::unique_ptr<StoreStateMap>& store_state_map,
682 std::vector<std::string>* list_client_states) {
683 std::transform(
684 store_state_map->begin(), store_state_map->end(),
685 std::back_inserter(*list_client_states),
686 [](const std::map<ListIdentifier, std::string>::value_type& pair) {
687 return pair.second;
688 });
689 }
690
691 } // namespace safe_browsing
692