1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/safe_browsing/core/db/v4_protocol_manager_util.h"
6 
7 #include "base/base64.h"
8 #include "base/hash/hash.h"
9 #include "base/hash/sha1.h"
10 #include "base/metrics/histogram_functions.h"
11 #include "base/rand_util.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/stringprintf.h"
14 #include "build/build_config.h"
15 #include "components/version_info/version_info.h"
16 #include "crypto/sha2.h"
17 #include "google_apis/google_api_keys.h"
18 #include "net/base/escape.h"
19 #include "net/base/ip_address.h"
20 #include "net/base/net_errors.h"
21 #include "net/http/http_request_headers.h"
22 #include "url/url_util.h"
23 
24 using base::Time;
25 using base::TimeDelta;
26 
27 namespace safe_browsing {
28 
29 // Can be overriden by tests.
30 const char* g_sbv4_url_prefix_for_testing = nullptr;
31 
32 const char kSbV4UrlPrefix[] = "https://safebrowsing.googleapis.com/v4";
33 
34 const base::FilePath::CharType kStoreSuffix[] = FILE_PATH_LITERAL(".store");
35 
36 namespace {
37 
38 // The default URL prefix where browser reports safe browsing hits and malware
39 // details.
40 const char kSbReportsURLPrefix[] =
41     "https://safebrowsing.google.com/safebrowsing";
42 
Unescape(const std::string & url)43 std::string Unescape(const std::string& url) {
44   std::string unescaped_str(url);
45   const int kMaxLoopIterations = 1024;
46   size_t old_size = 0;
47   int loop_var = 0;
48   do {
49     old_size = unescaped_str.size();
50     unescaped_str = net::UnescapeBinaryURLComponent(unescaped_str);
51   } while (old_size != unescaped_str.size() &&
52            ++loop_var <= kMaxLoopIterations);
53 
54   return unescaped_str;
55 }
56 
Escape(const std::string & url)57 std::string Escape(const std::string& url) {
58   std::string escaped_str;
59   // The escaped string is larger so allocate double the length to reduce the
60   // chance of the string being grown.
61   escaped_str.reserve(url.length() * 2);
62   const char* kHexString = "0123456789ABCDEF";
63   for (size_t i = 0; i < url.length(); i++) {
64     unsigned char c = static_cast<unsigned char>(url[i]);
65     if (c <= ' ' || c > '~' || c == '#' || c == '%') {
66       escaped_str += '%';
67       escaped_str += kHexString[c >> 4];
68       escaped_str += kHexString[c & 0xf];
69     } else {
70       escaped_str += c;
71     }
72   }
73 
74   return escaped_str;
75 }
76 
77 }  // namespace
78 
GetV4ProtocolConfig(const std::string & client_name,bool disable_auto_update)79 V4ProtocolConfig GetV4ProtocolConfig(const std::string& client_name,
80                                      bool disable_auto_update) {
81   return V4ProtocolConfig(client_name, disable_auto_update,
82                           google_apis::GetAPIKey(),
83                           version_info::GetVersionNumber());
84 }
85 
SetSbV4UrlPrefixForTesting(const char * url_prefix)86 void SetSbV4UrlPrefixForTesting(const char* url_prefix) {
87   g_sbv4_url_prefix_for_testing = url_prefix;
88 }
89 
GetReportUrl(const V4ProtocolConfig & config,const std::string & method,const ExtendedReportingLevel * reporting_level,const bool is_enhanced_protection)90 std::string GetReportUrl(const V4ProtocolConfig& config,
91                          const std::string& method,
92                          const ExtendedReportingLevel* reporting_level,
93                          const bool is_enhanced_protection) {
94   std::string url = base::StringPrintf(
95       "%s/%s?client=%s&appver=%s&pver=4.0", kSbReportsURLPrefix, method.c_str(),
96       config.client_name.c_str(), config.version.c_str());
97   std::string api_key = google_apis::GetAPIKey();
98   if (!api_key.empty()) {
99     base::StringAppendF(&url, "&key=%s",
100                         net::EscapeQueryParamValue(api_key, true).c_str());
101   }
102   if (reporting_level)
103     url.append(base::StringPrintf("&ext=%d", *reporting_level));
104   if (is_enhanced_protection)
105     url.append(base::StringPrintf("&enh=%d", is_enhanced_protection));
106   return url;
107 }
108 
operator <<(std::ostream & os,const ListIdentifier & id)109 std::ostream& operator<<(std::ostream& os, const ListIdentifier& id) {
110   os << "{hash: " << id.hash() << "; platform_type: " << id.platform_type()
111      << "; threat_entry_type: " << id.threat_entry_type()
112      << "; threat_type: " << id.threat_type() << "}";
113   return os;
114 }
115 
GetCurrentPlatformType()116 PlatformType GetCurrentPlatformType() {
117 #if defined(OS_WIN)
118   return WINDOWS_PLATFORM;
119 #elif defined(OS_LINUX)
120   return LINUX_PLATFORM;
121 #elif defined(OS_IOS)
122   return IOS_PLATFORM;
123 #elif defined(OS_MACOSX)
124   return OSX_PLATFORM;
125 #else
126   // TODO(crbug.com/1030487): This file is, in fact, intended to be compiled on
127   // Android, the comment below is obsolete. We should be able to return
128   // ANDROID_PLATFORM here.
129   //
130   // This should ideally never compile but it is getting compiled on Android.
131   // See: https://bugs.chromium.org/p/chromium/issues/detail?id=621647
132   // TODO(vakh): Once that bug is fixed, this should be removed. If we leave
133   // the platform_type empty, the server won't recognize the request and
134   // return an error response which will pollute our UMA metrics.
135   return LINUX_PLATFORM;
136 #endif
137 }
138 
GetCertCsdDownloadWhitelistId()139 ListIdentifier GetCertCsdDownloadWhitelistId() {
140   return ListIdentifier(GetCurrentPlatformType(), CERT, CSD_DOWNLOAD_WHITELIST);
141 }
142 
GetChromeExtMalwareId()143 ListIdentifier GetChromeExtMalwareId() {
144   return ListIdentifier(CHROME_PLATFORM, CHROME_EXTENSION, MALWARE_THREAT);
145 }
146 
GetChromeUrlApiId()147 ListIdentifier GetChromeUrlApiId() {
148   // TODO(crbug.com/1030487): This special case for Android will no longer be
149   // needed once GetCurrentPlatformType() returns ANDROID_PLATFORM on Android.
150 #if defined(OS_ANDROID)
151   return ListIdentifier(ANDROID_PLATFORM, URL, API_ABUSE);
152 #else
153   return ListIdentifier(GetCurrentPlatformType(), URL, API_ABUSE);
154 #endif
155 }
156 
GetChromeUrlClientIncidentId()157 ListIdentifier GetChromeUrlClientIncidentId() {
158   return ListIdentifier(CHROME_PLATFORM, URL, CLIENT_INCIDENT);
159 }
160 
GetIpMalwareId()161 ListIdentifier GetIpMalwareId() {
162   return ListIdentifier(GetCurrentPlatformType(), IP_RANGE, MALWARE_THREAT);
163 }
164 
GetUrlBillingId()165 ListIdentifier GetUrlBillingId() {
166   return ListIdentifier(GetCurrentPlatformType(), URL, BILLING);
167 }
168 
GetUrlCsdDownloadWhitelistId()169 ListIdentifier GetUrlCsdDownloadWhitelistId() {
170   return ListIdentifier(GetCurrentPlatformType(), URL, CSD_DOWNLOAD_WHITELIST);
171 }
172 
GetUrlCsdWhitelistId()173 ListIdentifier GetUrlCsdWhitelistId() {
174   return ListIdentifier(GetCurrentPlatformType(), URL, CSD_WHITELIST);
175 }
176 
GetUrlHighConfidenceAllowlistId()177 ListIdentifier GetUrlHighConfidenceAllowlistId() {
178   return ListIdentifier(GetCurrentPlatformType(), URL,
179                         HIGH_CONFIDENCE_ALLOWLIST);
180 }
181 
GetUrlMalwareId()182 ListIdentifier GetUrlMalwareId() {
183   return ListIdentifier(GetCurrentPlatformType(), URL, MALWARE_THREAT);
184 }
185 
GetUrlMalBinId()186 ListIdentifier GetUrlMalBinId() {
187   return ListIdentifier(GetCurrentPlatformType(), URL, MALICIOUS_BINARY);
188 }
189 
GetUrlSocEngId()190 ListIdentifier GetUrlSocEngId() {
191   return ListIdentifier(GetCurrentPlatformType(), URL, SOCIAL_ENGINEERING);
192 }
193 
GetUrlSubresourceFilterId()194 ListIdentifier GetUrlSubresourceFilterId() {
195   return ListIdentifier(GetCurrentPlatformType(), URL, SUBRESOURCE_FILTER);
196 }
197 
GetUrlSuspiciousSiteId()198 ListIdentifier GetUrlSuspiciousSiteId() {
199   return ListIdentifier(GetCurrentPlatformType(), URL, SUSPICIOUS);
200 }
201 
GetUrlUwsId()202 ListIdentifier GetUrlUwsId() {
203   return ListIdentifier(GetCurrentPlatformType(), URL, UNWANTED_SOFTWARE);
204 }
205 
GetUmaSuffixForStore(const base::FilePath & file_path)206 std::string GetUmaSuffixForStore(const base::FilePath& file_path) {
207   DCHECK_EQ(kStoreSuffix, file_path.BaseName().Extension());
208   return base::StringPrintf(
209       ".%" PRFilePath, file_path.BaseName().RemoveExtension().value().c_str());
210 }
211 
StoreAndHashPrefix(ListIdentifier list_id,const HashPrefix & hash_prefix)212 StoreAndHashPrefix::StoreAndHashPrefix(ListIdentifier list_id,
213                                        const HashPrefix& hash_prefix)
214     : list_id(list_id), hash_prefix(hash_prefix) {}
215 
~StoreAndHashPrefix()216 StoreAndHashPrefix::~StoreAndHashPrefix() {}
217 
operator ==(const StoreAndHashPrefix & other) const218 bool StoreAndHashPrefix::operator==(const StoreAndHashPrefix& other) const {
219   return list_id == other.list_id && hash_prefix == other.hash_prefix;
220 }
221 
operator !=(const StoreAndHashPrefix & other) const222 bool StoreAndHashPrefix::operator!=(const StoreAndHashPrefix& other) const {
223   return !operator==(other);
224 }
225 
hash() const226 size_t StoreAndHashPrefix::hash() const {
227   std::size_t first = list_id.hash();
228   std::size_t second = std::hash<std::string>()(hash_prefix);
229 
230   return base::HashInts(first, second);
231 }
232 
SBThreatTypeSetIsValidForCheckBrowseUrl(const SBThreatTypeSet & set)233 bool SBThreatTypeSetIsValidForCheckBrowseUrl(const SBThreatTypeSet& set) {
234   for (SBThreatType type : set) {
235     switch (type) {
236       case SB_THREAT_TYPE_URL_PHISHING:
237       case SB_THREAT_TYPE_URL_MALWARE:
238       case SB_THREAT_TYPE_URL_UNWANTED:
239       case SB_THREAT_TYPE_SUSPICIOUS_SITE:
240       case SB_THREAT_TYPE_BILLING:
241         break;
242 
243       default:
244         return false;
245     }
246   }
247   return true;
248 }
249 
operator ==(const ListIdentifier & other) const250 bool ListIdentifier::operator==(const ListIdentifier& other) const {
251   return platform_type_ == other.platform_type_ &&
252          threat_entry_type_ == other.threat_entry_type_ &&
253          threat_type_ == other.threat_type_;
254 }
255 
operator !=(const ListIdentifier & other) const256 bool ListIdentifier::operator!=(const ListIdentifier& other) const {
257   return !operator==(other);
258 }
259 
hash() const260 size_t ListIdentifier::hash() const {
261   std::size_t first = std::hash<unsigned int>()(platform_type_);
262   std::size_t second = std::hash<unsigned int>()(threat_entry_type_);
263   std::size_t third = std::hash<unsigned int>()(threat_type_);
264 
265   std::size_t interim = base::HashInts(first, second);
266   return base::HashInts(interim, third);
267 }
268 
ListIdentifier(PlatformType platform_type,ThreatEntryType threat_entry_type,ThreatType threat_type)269 ListIdentifier::ListIdentifier(PlatformType platform_type,
270                                ThreatEntryType threat_entry_type,
271                                ThreatType threat_type)
272     : platform_type_(platform_type),
273       threat_entry_type_(threat_entry_type),
274       threat_type_(threat_type) {
275   DCHECK(PlatformType_IsValid(platform_type));
276   DCHECK(ThreatEntryType_IsValid(threat_entry_type));
277   DCHECK(ThreatType_IsValid(threat_type));
278 }
279 
ListIdentifier(const ListUpdateResponse & response)280 ListIdentifier::ListIdentifier(const ListUpdateResponse& response)
281     : ListIdentifier(response.platform_type(),
282                      response.threat_entry_type(),
283                      response.threat_type()) {}
284 
V4ProtocolConfig(const std::string & client_name,bool disable_auto_update,const std::string & key_param,const std::string & version)285 V4ProtocolConfig::V4ProtocolConfig(const std::string& client_name,
286                                    bool disable_auto_update,
287                                    const std::string& key_param,
288                                    const std::string& version)
289     : client_name(client_name),
290       disable_auto_update(disable_auto_update),
291       key_param(key_param),
292       version(version) {}
293 
294 V4ProtocolConfig::V4ProtocolConfig(const V4ProtocolConfig& other) = default;
295 
~V4ProtocolConfig()296 V4ProtocolConfig::~V4ProtocolConfig() {}
297 
298 // static
GetNextBackOffInterval(size_t * error_count,size_t * multiplier)299 base::TimeDelta V4ProtocolManagerUtil::GetNextBackOffInterval(
300     size_t* error_count,
301     size_t* multiplier) {
302   DCHECK(multiplier && error_count);
303   (*error_count)++;
304   if (*error_count > 1 && *error_count < 9) {
305     // With error count 9 and above we will hit the 24 hour max interval.
306     // Cap the multiplier here to prevent integer overflow errors.
307     *multiplier *= 2;
308   }
309   base::TimeDelta next =
310       base::TimeDelta::FromMinutes(*multiplier * (1 + base::RandDouble()) * 15);
311   base::TimeDelta day = base::TimeDelta::FromHours(24);
312   return next < day ? next : day;
313 }
314 
315 // static
RecordHttpResponseOrErrorCode(const char * metric_name,int net_error,int response_code)316 void V4ProtocolManagerUtil::RecordHttpResponseOrErrorCode(
317     const char* metric_name,
318     int net_error,
319     int response_code) {
320   base::UmaHistogramSparse(metric_name,
321                            net_error == net::OK ? response_code : net_error);
322 }
323 
324 // static
GetRequestUrlAndHeaders(const std::string & request_base64,const std::string & method_name,const V4ProtocolConfig & config,GURL * gurl,net::HttpRequestHeaders * headers)325 void V4ProtocolManagerUtil::GetRequestUrlAndHeaders(
326     const std::string& request_base64,
327     const std::string& method_name,
328     const V4ProtocolConfig& config,
329     GURL* gurl,
330     net::HttpRequestHeaders* headers) {
331   const char* url_prefix = g_sbv4_url_prefix_for_testing
332                                ? g_sbv4_url_prefix_for_testing
333                                : kSbV4UrlPrefix;
334   *gurl = GURL(
335       ComposeUrl(url_prefix, method_name, request_base64, config.key_param));
336   UpdateHeaders(headers);
337 }
338 
339 // static
ComposeUrl(const std::string & prefix,const std::string & method,const std::string & request_base64,const std::string & key_param)340 std::string V4ProtocolManagerUtil::ComposeUrl(const std::string& prefix,
341                                               const std::string& method,
342                                               const std::string& request_base64,
343                                               const std::string& key_param) {
344   DCHECK(!prefix.empty() && !method.empty());
345   std::string url = base::StringPrintf(
346       "%s/%s?$req=%s&$ct=application/x-protobuf", prefix.c_str(),
347       method.c_str(), request_base64.c_str());
348   if (!key_param.empty()) {
349     base::StringAppendF(&url, "&key=%s",
350                         net::EscapeQueryParamValue(key_param, true).c_str());
351   }
352   return url;
353 }
354 
355 // static
UpdateHeaders(net::HttpRequestHeaders * headers)356 void V4ProtocolManagerUtil::UpdateHeaders(net::HttpRequestHeaders* headers) {
357   // NOTE(vakh): The following header informs the envelope server (which sits in
358   // front of Google's stubby server) that the received GET request should be
359   // interpreted as a POST.
360   headers->SetHeaderIfMissing("X-HTTP-Method-Override", "POST");
361 }
362 
363 // static
UrlToFullHashes(const GURL & url,std::vector<FullHash> * full_hashes)364 void V4ProtocolManagerUtil::UrlToFullHashes(
365     const GURL& url,
366     std::vector<FullHash>* full_hashes) {
367   std::string canon_host, canon_path, canon_query;
368   CanonicalizeUrl(url, &canon_host, &canon_path, &canon_query);
369 
370   std::vector<std::string> hosts;
371   if (url.HostIsIPAddress()) {
372     hosts.push_back(url.host());
373   } else {
374     GenerateHostVariantsToCheck(canon_host, &hosts);
375   }
376 
377   std::vector<std::string> paths;
378   GeneratePathVariantsToCheck(canon_path, canon_query, &paths);
379   for (const std::string& host : hosts) {
380     for (const std::string& path : paths) {
381       full_hashes->push_back(crypto::SHA256HashString(host + path));
382     }
383   }
384 }
385 
386 // static
FullHashToHashPrefix(const FullHash & full_hash,PrefixSize prefix_size,HashPrefix * hash_prefix)387 bool V4ProtocolManagerUtil::FullHashToHashPrefix(const FullHash& full_hash,
388                                                  PrefixSize prefix_size,
389                                                  HashPrefix* hash_prefix) {
390   if (full_hash.size() < prefix_size) {
391     return false;
392   }
393   *hash_prefix = full_hash.substr(0, prefix_size);
394   return true;
395 }
396 
397 // static
FullHashToSmallestHashPrefix(const FullHash & full_hash,HashPrefix * hash_prefix)398 bool V4ProtocolManagerUtil::FullHashToSmallestHashPrefix(
399     const FullHash& full_hash,
400     HashPrefix* hash_prefix) {
401   return FullHashToHashPrefix(full_hash, kMinHashPrefixLength, hash_prefix);
402 }
403 
404 // static
FullHashMatchesHashPrefix(const FullHash & full_hash,const HashPrefix & hash_prefix)405 bool V4ProtocolManagerUtil::FullHashMatchesHashPrefix(
406     const FullHash& full_hash,
407     const HashPrefix& hash_prefix) {
408   return full_hash.compare(0, hash_prefix.length(), hash_prefix) == 0;
409 }
410 
411 // static
GenerateHostsToCheck(const GURL & url,std::vector<std::string> * hosts)412 void V4ProtocolManagerUtil::GenerateHostsToCheck(
413     const GURL& url,
414     std::vector<std::string>* hosts) {
415   std::string canon_host;
416   CanonicalizeUrl(url, &canon_host, nullptr, nullptr);
417   GenerateHostVariantsToCheck(canon_host, hosts);
418 }
419 
420 // static
GeneratePathsToCheck(const GURL & url,std::vector<std::string> * paths)421 void V4ProtocolManagerUtil::GeneratePathsToCheck(
422     const GURL& url,
423     std::vector<std::string>* paths) {
424   std::string canon_path;
425   std::string canon_query;
426   CanonicalizeUrl(url, nullptr, &canon_path, &canon_query);
427   GeneratePathVariantsToCheck(canon_path, canon_query, paths);
428 }
429 
430 // static
GeneratePatternsToCheck(const GURL & url,std::vector<std::string> * urls)431 void V4ProtocolManagerUtil::GeneratePatternsToCheck(
432     const GURL& url,
433     std::vector<std::string>* urls) {
434   std::string canon_host;
435   std::string canon_path;
436   std::string canon_query;
437   CanonicalizeUrl(url, &canon_host, &canon_path, &canon_query);
438 
439   std::vector<std::string> hosts, paths;
440   GenerateHostVariantsToCheck(canon_host, &hosts);
441   GeneratePathVariantsToCheck(canon_path, canon_query, &paths);
442   for (size_t h = 0; h < hosts.size(); ++h) {
443     for (size_t p = 0; p < paths.size(); ++p) {
444       urls->push_back(hosts[h] + paths[p]);
445     }
446   }
447 }
448 
449 // static
GetFullHash(const GURL & url)450 FullHash V4ProtocolManagerUtil::GetFullHash(const GURL& url) {
451   std::string host;
452   std::string path;
453   CanonicalizeUrl(url, &host, &path, nullptr);
454 
455   return crypto::SHA256HashString(host + path);
456 }
457 
458 // static
CanonicalizeUrl(const GURL & url,std::string * canonicalized_hostname,std::string * canonicalized_path,std::string * canonicalized_query)459 void V4ProtocolManagerUtil::CanonicalizeUrl(const GURL& url,
460                                             std::string* canonicalized_hostname,
461                                             std::string* canonicalized_path,
462                                             std::string* canonicalized_query) {
463   DCHECK(url.is_valid());
464 
465   // We only canonicalize "normal" URLs.
466   if (!url.IsStandard())
467     return;
468 
469   // Following canonicalization steps are excluded since url parsing takes care
470   // of those :-
471   // 1. Remove any tab (0x09), CR (0x0d), and LF (0x0a) chars from url.
472   //    (Exclude escaped version of these chars).
473   // 2. Normalize hostname to 4 dot-seperated decimal values.
474   // 3. Lowercase hostname.
475   // 4. Resolve path sequences "/../" and "/./".
476 
477   // That leaves us with the following :-
478   // 1. Remove fragment in URL.
479   GURL url_without_fragment;
480   GURL::Replacements f_replacements;
481   f_replacements.ClearRef();
482   f_replacements.ClearUsername();
483   f_replacements.ClearPassword();
484   url_without_fragment = url.ReplaceComponents(f_replacements);
485 
486   // 2. Do URL unescaping until no more hex encoded characters exist.
487   std::string url_unescaped_str(Unescape(url_without_fragment.spec()));
488   url::Parsed parsed;
489   url::ParseStandardURL(url_unescaped_str.data(), url_unescaped_str.length(),
490                         &parsed);
491 
492   // 3. In hostname, remove all leading and trailing dots.
493   base::StringPiece host;
494   if (parsed.host.len > 0)
495     host = base::StringPiece(url_unescaped_str.data() + parsed.host.begin,
496                              parsed.host.len);
497 
498   base::StringPiece host_without_end_dots =
499       base::TrimString(host, ".", base::TrimPositions::TRIM_ALL);
500 
501   // 4. In hostname, replace consecutive dots with a single dot.
502   std::string host_without_consecutive_dots(
503       RemoveConsecutiveChars(host_without_end_dots, '.'));
504 
505   // 5. In path, replace runs of consecutive slashes with a single slash.
506   base::StringPiece path;
507   if (parsed.path.len > 0)
508     path = base::StringPiece(url_unescaped_str.data() + parsed.path.begin,
509                              parsed.path.len);
510   std::string path_without_consecutive_slash(RemoveConsecutiveChars(path, '/'));
511 
512   url::Replacements<char> hp_replacements;
513   hp_replacements.SetHost(
514       host_without_consecutive_dots.data(),
515       url::Component(0, host_without_consecutive_dots.length()));
516   hp_replacements.SetPath(
517       path_without_consecutive_slash.data(),
518       url::Component(0, path_without_consecutive_slash.length()));
519 
520   std::string url_unescaped_with_can_hostpath;
521   url::StdStringCanonOutput output(&url_unescaped_with_can_hostpath);
522   url::Parsed temp_parsed;
523   url::ReplaceComponents(url_unescaped_str.data(), url_unescaped_str.length(),
524                          parsed, hp_replacements, nullptr, &output,
525                          &temp_parsed);
526   output.Complete();
527 
528   // 6. Step needed to revert escaping done in url::ReplaceComponents.
529   url_unescaped_with_can_hostpath = Unescape(url_unescaped_with_can_hostpath);
530 
531   // 7. After performing all above steps, percent-escape all chars in url which
532   // are <= ASCII 32, >= 127, #, %. Escapes must be uppercase hex characters.
533   std::string escaped_canon_url_str(Escape(url_unescaped_with_can_hostpath));
534   url::Parsed final_parsed;
535   url::ParseStandardURL(escaped_canon_url_str.data(),
536                         escaped_canon_url_str.length(), &final_parsed);
537 
538   if (canonicalized_hostname && final_parsed.host.len > 0) {
539     *canonicalized_hostname = escaped_canon_url_str.substr(
540         final_parsed.host.begin, final_parsed.host.len);
541   }
542   if (canonicalized_path && final_parsed.path.len > 0) {
543     *canonicalized_path = escaped_canon_url_str.substr(final_parsed.path.begin,
544                                                        final_parsed.path.len);
545   }
546   if (canonicalized_query && final_parsed.query.len > 0) {
547     *canonicalized_query = escaped_canon_url_str.substr(
548         final_parsed.query.begin, final_parsed.query.len);
549   }
550 }
551 
552 // static
RemoveConsecutiveChars(base::StringPiece str,const char c)553 std::string V4ProtocolManagerUtil::RemoveConsecutiveChars(base::StringPiece str,
554                                                           const char c) {
555   std::string output;
556   // Output is at most the length of the original string.
557   output.reserve(str.size());
558 
559   size_t i = 0;
560   while (i < str.size()) {
561     output.append(1, str[i++]);
562     if (str[i - 1] == c) {
563       while (i < str.size() && str[i] == c) {
564         i++;
565       }
566     }
567   }
568 
569   return output;
570 }
571 
572 // static
GenerateHostVariantsToCheck(const std::string & host,std::vector<std::string> * hosts)573 void V4ProtocolManagerUtil::GenerateHostVariantsToCheck(
574     const std::string& host,
575     std::vector<std::string>* hosts) {
576   hosts->clear();
577 
578   if (host.empty())
579     return;
580 
581   // Per the Safe Browsing Protocol v2 spec, we try the host, and also up to 4
582   // hostnames formed by starting with the last 5 components and successively
583   // removing the leading component.  The last component isn't examined alone,
584   // since it's the TLD or a subcomponent thereof.
585   //
586   // Note that we don't need to be clever about stopping at the "real" eTLD --
587   // the data on the server side has been filtered to ensure it will not
588   // blacklist a whole TLD, and it's not significantly slower on our side to
589   // just check too much.
590   //
591   // Also note that because we have a simple blacklist, not some sort of complex
592   // whitelist-in-blacklist or vice versa, it doesn't matter what order we check
593   // these in.
594   const size_t kMaxHostsToCheck = 4;
595   bool skipped_last_component = false;
596   for (std::string::const_reverse_iterator i(host.rbegin());
597        i != host.rend() && hosts->size() < kMaxHostsToCheck; ++i) {
598     if (*i == '.') {
599       if (skipped_last_component)
600         hosts->push_back(std::string(i.base(), host.end()));
601       else
602         skipped_last_component = true;
603     }
604   }
605   hosts->push_back(host);
606 }
607 
608 // static
GeneratePathVariantsToCheck(const std::string & path,const std::string & query,std::vector<std::string> * paths)609 void V4ProtocolManagerUtil::GeneratePathVariantsToCheck(
610     const std::string& path,
611     const std::string& query,
612     std::vector<std::string>* paths) {
613   paths->clear();
614 
615   if (path.empty())
616     return;
617 
618   // Per the Safe Browsing Protocol v2 spec, we try the exact path with/without
619   // the query parameters, and also up to 4 paths formed by starting at the root
620   // and adding more path components.
621   //
622   // As with the hosts above, it doesn't matter what order we check these in.
623   const size_t kMaxPathsToCheck = 4;
624   for (std::string::const_iterator i(path.begin());
625        i != path.end() && paths->size() < kMaxPathsToCheck; ++i) {
626     if (*i == '/')
627       paths->push_back(std::string(path.begin(), i + 1));
628   }
629 
630   if (!paths->empty() && paths->back() != path)
631     paths->push_back(path);
632 
633   if (!query.empty())
634     paths->push_back(path + "?" + query);
635 }
636 
637 // static
SetClientInfoFromConfig(ClientInfo * client_info,const V4ProtocolConfig & config)638 void V4ProtocolManagerUtil::SetClientInfoFromConfig(
639     ClientInfo* client_info,
640     const V4ProtocolConfig& config) {
641   DCHECK(client_info);
642   client_info->set_client_id(config.client_name);
643   client_info->set_client_version(config.version);
644 }
645 
646 // static
GetIPV6AddressFromString(const std::string & ip_address,net::IPAddress * address)647 bool V4ProtocolManagerUtil::GetIPV6AddressFromString(
648     const std::string& ip_address,
649     net::IPAddress* address) {
650   DCHECK(address);
651   if (!address->AssignFromIPLiteral(ip_address))
652     return false;
653   if (address->IsIPv4())
654     *address = net::ConvertIPv4ToIPv4MappedIPv6(*address);
655   return address->IsIPv6();
656 }
657 
658 // static
IPAddressToEncodedIPV6Hash(const std::string & ip_address,FullHash * hashed_encoded_ip)659 bool V4ProtocolManagerUtil::IPAddressToEncodedIPV6Hash(
660     const std::string& ip_address,
661     FullHash* hashed_encoded_ip) {
662   net::IPAddress address;
663   if (!GetIPV6AddressFromString(ip_address, &address)) {
664     return false;
665   }
666   std::string packed_ip = net::IPAddressToPackedString(address);
667   if (packed_ip.empty()) {
668     return false;
669   }
670 
671   const std::string hash = base::SHA1HashString(packed_ip);
672   DCHECK_EQ(20u, hash.size());
673   hashed_encoded_ip->resize(hash.size() + 1);
674   hashed_encoded_ip->replace(0, hash.size(), hash);
675   (*hashed_encoded_ip)[hash.size()] = static_cast<unsigned char>(128);
676   return true;
677 }
678 
679 // static
GetListClientStatesFromStoreStateMap(const std::unique_ptr<StoreStateMap> & store_state_map,std::vector<std::string> * list_client_states)680 void V4ProtocolManagerUtil::GetListClientStatesFromStoreStateMap(
681     const std::unique_ptr<StoreStateMap>& store_state_map,
682     std::vector<std::string>* list_client_states) {
683   std::transform(
684       store_state_map->begin(), store_state_map->end(),
685       std::back_inserter(*list_client_states),
686       [](const std::map<ListIdentifier, std::string>::value_type& pair) {
687         return pair.second;
688       });
689 }
690 
691 }  // namespace safe_browsing
692