1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef COMPONENTS_SAFE_BROWSING_CORE_DB_V4_PROTOCOL_MANAGER_UTIL_H_
6 #define COMPONENTS_SAFE_BROWSING_CORE_DB_V4_PROTOCOL_MANAGER_UTIL_H_
7
8 // A class that implements the stateless methods used by the GetHashUpdate and
9 // GetFullHash stubby calls made by Chrome using the SafeBrowsing V4 protocol.
10
11 #include <functional>
12 #include <initializer_list>
13 #include <memory>
14 #include <ostream>
15 #include <string>
16 #include <unordered_map>
17 #include <unordered_set>
18 #include <vector>
19
20 #include "base/containers/flat_set.h"
21 #include "base/gtest_prod_util.h"
22 #include "base/strings/string_piece.h"
23 #include "components/safe_browsing/core/common/safe_browsing_prefs.h"
24 #include "components/safe_browsing/core/db/safebrowsing.pb.h"
25 #include "url/gurl.h"
26
27 namespace net {
28 class HttpRequestHeaders;
29 class IPAddress;
30 } // namespace net
31
32 namespace safe_browsing {
33
34
35 // The size of the hash prefix, in bytes. It should be between 4 to 32 (full
36 // hash).
37 using PrefixSize = size_t;
38
39 // The minimum expected size (in bytes) of a hash-prefix.
40 const PrefixSize kMinHashPrefixLength = 4;
41
42 // The maximum expected size (in bytes) of a hash-prefix. This represents the
43 // length of a SHA256 hash.
44 const PrefixSize kMaxHashPrefixLength = 32;
45
46 // A hash prefix sent by the SafeBrowsing PVer4 service.
47 using HashPrefix = std::string;
48
49 // A full SHA256 hash.
50 using FullHash = HashPrefix;
51
52 using ListUpdateRequest = FetchThreatListUpdatesRequest::ListUpdateRequest;
53 using ListUpdateResponse = FetchThreatListUpdatesResponse::ListUpdateResponse;
54
55 void SetSbV4UrlPrefixForTesting(const char* url_prefix);
56
57 // Config passed to the constructor of a V4 protocol manager.
58 struct V4ProtocolConfig {
59 // The safe browsing client name sent in each request.
60 std::string client_name;
61
62 // Disable auto-updates using a command line switch.
63 bool disable_auto_update;
64
65 // The Google API key.
66 std::string key_param;
67
68 // Current product version sent in each request.
69 std::string version;
70
71 V4ProtocolConfig(const std::string& client_name,
72 bool disable_auto_update,
73 const std::string& key_param,
74 const std::string& version);
75 V4ProtocolConfig(const V4ProtocolConfig& other);
76 ~V4ProtocolConfig();
77
78 private:
79 V4ProtocolConfig() = delete;
80 };
81
82 // Get the v4 protocol config struct with a given client name, and ability to
83 // enable/disable database auto update.
84 V4ProtocolConfig GetV4ProtocolConfig(const std::string& client_name,
85 bool disable_auto_update);
86
87 // Returns the URL to use for sending threat reports and other Safe Browsing
88 // hits back to Safe Browsing service.
89 std::string GetReportUrl(
90 const V4ProtocolConfig& config,
91 const std::string& method,
92 const ExtendedReportingLevel* reporting_level = nullptr,
93 const bool is_enhanced_protection = false);
94
95 // Different types of threats that SafeBrowsing protects against. This is the
96 // type that's returned to the clients of SafeBrowsing in Chromium.
97 // GENERATED_JAVA_ENUM_PACKAGE: org.chromium.components.safe_browsing
98 // GENERATED_JAVA_PREFIX_TO_STRIP: SB_THREAT_TYPE_
99 enum SBThreatType {
100 // This type can be used for lists that can be checked synchronously so a
101 // client callback isn't required, or for whitelists.
102 SB_THREAT_TYPE_UNUSED,
103
104 // No threat at all.
105 SB_THREAT_TYPE_SAFE,
106
107 // The URL is being used for phishing.
108 SB_THREAT_TYPE_URL_PHISHING,
109
110 // The URL hosts malware.
111 SB_THREAT_TYPE_URL_MALWARE,
112
113 // The URL hosts unwanted programs.
114 SB_THREAT_TYPE_URL_UNWANTED,
115
116 // The download URL is malware.
117 SB_THREAT_TYPE_URL_BINARY_MALWARE,
118
119 // Url detected by the client-side phishing model. Note that unlike the
120 // above values, this does not correspond to a downloaded list.
121 SB_THREAT_TYPE_URL_CLIENT_SIDE_PHISHING,
122
123 // The Chrome extension or app (given by its ID) is malware.
124 SB_THREAT_TYPE_EXTENSION,
125
126 // Url detected by the client-side malware IP list. This IP list is part
127 // of the client side detection model.
128 SB_THREAT_TYPE_URL_CLIENT_SIDE_MALWARE,
129
130 // Url leads to a blacklisted resource script. Note that no warnings should be
131 // shown on this threat type, but an incident report might be sent.
132 SB_THREAT_TYPE_BLACKLISTED_RESOURCE,
133
134 // Url abuses a permission API.
135 SB_THREAT_TYPE_API_ABUSE,
136
137 // Activation patterns for the Subresource Filter.
138 SB_THREAT_TYPE_SUBRESOURCE_FILTER,
139
140 // CSD Phishing whitelist. This "threat" means a URL matched the whitelist.
141 SB_THREAT_TYPE_CSD_WHITELIST,
142
143 // DEPRECATED. Url detected by password protection service.
144 DEPRECATED_SB_THREAT_TYPE_URL_PASSWORD_PROTECTION_PHISHING,
145
146 // Saved password reuse detected on low reputation page,
147 SB_THREAT_TYPE_SAVED_PASSWORD_REUSE,
148
149 // Chrome signed in and syncing gaia password reuse detected on low reputation
150 // page,
151 SB_THREAT_TYPE_SIGNED_IN_SYNC_PASSWORD_REUSE,
152
153 // Chrome signed in non syncing gaia password reuse detected on low reputation
154 // page,
155 SB_THREAT_TYPE_SIGNED_IN_NON_SYNC_PASSWORD_REUSE,
156
157 // A Google ad that caused a blocked autoredirect was collected
158 SB_THREAT_TYPE_BLOCKED_AD_REDIRECT,
159
160 // A sample of an ad was collected
161 SB_THREAT_TYPE_AD_SAMPLE,
162
163 // A report of Google ad that caused a blocked popup was collected.
164 SB_THREAT_TYPE_BLOCKED_AD_POPUP,
165
166 // The page loaded a resource from the Suspicious Site list.
167 SB_THREAT_TYPE_SUSPICIOUS_SITE,
168
169 // Enterprise password reuse detected on low reputation page.
170 SB_THREAT_TYPE_ENTERPRISE_PASSWORD_REUSE,
171
172 // Potential billing detected.
173 SB_THREAT_TYPE_BILLING,
174
175 // Off-market APK file downloaded, which could be potentially dangerous.
176 SB_THREAT_TYPE_APK_DOWNLOAD,
177
178 // Match found in the local high-confidence allowlist.
179 SB_THREAT_TYPE_HIGH_CONFIDENCE_ALLOWLIST,
180 };
181
182 using SBThreatTypeSet = base::flat_set<SBThreatType>;
183
184 // Return true if |set| only contains types that are valid for CheckBrowseUrl().
185 // Intended for use in DCHECK().
186 bool SBThreatTypeSetIsValidForCheckBrowseUrl(const SBThreatTypeSet& set);
187
188 // Shorthand for creating an SBThreatTypeSet from a list of SBThreatTypes. Use
189 // like CreateSBThreatTypeSet({SB_THREAT_TYPE_URL_PHISHING,
190 // SB_THREAT_TYPE_URL_MALWARE})
CreateSBThreatTypeSet(std::initializer_list<SBThreatType> set)191 inline SBThreatTypeSet CreateSBThreatTypeSet(
192 std::initializer_list<SBThreatType> set) {
193 return SBThreatTypeSet(set);
194 }
195
196 // The information required to uniquely identify each list the client is
197 // interested in maintaining and downloading from the SafeBrowsing servers.
198 // For example, for digests of Malware binaries on Windows:
199 // platform_type = WINDOWS,
200 // threat_entry_type = EXECUTABLE,
201 // threat_type = MALWARE
202 class ListIdentifier {
203 public:
204 ListIdentifier(PlatformType platform_type,
205 ThreatEntryType threat_entry_type,
206 ThreatType threat_type);
207 explicit ListIdentifier(const ListUpdateResponse&);
208
209 bool operator==(const ListIdentifier& other) const;
210 bool operator!=(const ListIdentifier& other) const;
211 size_t hash() const;
212
platform_type()213 PlatformType platform_type() const { return platform_type_; }
threat_entry_type()214 ThreatEntryType threat_entry_type() const { return threat_entry_type_; }
threat_type()215 ThreatType threat_type() const { return threat_type_; }
216
217 private:
218 PlatformType platform_type_;
219 ThreatEntryType threat_entry_type_;
220 ThreatType threat_type_;
221
222 ListIdentifier() = delete;
223 };
224
225 std::ostream& operator<<(std::ostream& os, const ListIdentifier& id);
226
227 PlatformType GetCurrentPlatformType();
228 ListIdentifier GetCertCsdDownloadWhitelistId();
229 ListIdentifier GetChromeExtMalwareId();
230 ListIdentifier GetChromeUrlApiId();
231 ListIdentifier GetChromeUrlClientIncidentId();
232 ListIdentifier GetIpMalwareId();
233 ListIdentifier GetUrlBillingId();
234 ListIdentifier GetUrlCsdDownloadWhitelistId();
235 ListIdentifier GetUrlCsdWhitelistId();
236 ListIdentifier GetUrlHighConfidenceAllowlistId();
237 ListIdentifier GetUrlMalBinId();
238 ListIdentifier GetUrlMalwareId();
239 ListIdentifier GetUrlSocEngId();
240 ListIdentifier GetUrlSubresourceFilterId();
241 ListIdentifier GetUrlSuspiciousSiteId();
242 ListIdentifier GetUrlUwsId();
243
244 // Returns the basename of the store file, without the ".store" extension.
245 std::string GetUmaSuffixForStore(const base::FilePath& file_path);
246
247 // Represents the state of each store.
248 using StoreStateMap = std::unordered_map<ListIdentifier, std::string>;
249
250 // Sever response, parsed in vector form.
251 using ParsedServerResponse = std::vector<std::unique_ptr<ListUpdateResponse>>;
252
253 // Holds the hash prefix and the store that it matched in.
254 struct StoreAndHashPrefix {
255 public:
256 ListIdentifier list_id;
257 HashPrefix hash_prefix;
258
259 StoreAndHashPrefix(ListIdentifier list_id, const HashPrefix& hash_prefix);
260 ~StoreAndHashPrefix();
261
262 bool operator==(const StoreAndHashPrefix& other) const;
263 bool operator!=(const StoreAndHashPrefix& other) const;
264 size_t hash() const;
265
266 private:
267 StoreAndHashPrefix() = delete;
268 };
269
270 // Used to track the hash prefix and the store in which a full hash's prefix
271 // matched.
272 using StoreAndHashPrefixes = std::vector<StoreAndHashPrefix>;
273
274 // Enumerate failures for histogramming purposes. DO NOT CHANGE THE
275 // ORDERING OF THESE VALUES.
276 enum V4OperationResult {
277 // 200 response code means that the server recognized the request.
278 STATUS_200 = 0,
279
280 // Subset of successful responses where the response body wasn't parsable.
281 PARSE_ERROR = 1,
282
283 // Operation request failed (network error).
284 NETWORK_ERROR = 2,
285
286 // Operation request returned HTTP result code other than 200.
287 HTTP_ERROR = 3,
288
289 // Operation attempted during error backoff, no request sent.
290 BACKOFF_ERROR = 4,
291
292 // Operation attempted before min wait duration elapsed, no request sent.
293 MIN_WAIT_DURATION_ERROR = 5,
294
295 // Identical operation already pending.
296 ALREADY_PENDING_ERROR = 6,
297
298 // Memory space for histograms is determined by the max. ALWAYS
299 // ADD NEW VALUES BEFORE THIS ONE.
300 OPERATION_RESULT_MAX = 7
301 };
302
303 // A class that provides static methods related to the Pver4 protocol.
304 class V4ProtocolManagerUtil {
305 public:
306 // Canonicalizes url as per Google Safe Browsing Specification.
307 // See: https://developers.google.com/safe-browsing/v4/urls-hashing
308 static void CanonicalizeUrl(const GURL& url,
309 std::string* canonicalized_hostname,
310 std::string* canonicalized_path,
311 std::string* canonicalized_query);
312
313 // This method returns the host suffix combinations from the hostname in the
314 // URL, as described here:
315 // https://developers.google.com/safe-browsing/v4/urls-hashing
316 static void GenerateHostVariantsToCheck(const std::string& host,
317 std::vector<std::string>* hosts);
318
319 // This method returns the path prefix combinations from the path in the
320 // URL, as described here:
321 // https://developers.google.com/safe-browsing/v4/urls-hashing
322 static void GeneratePathVariantsToCheck(const std::string& path,
323 const std::string& query,
324 std::vector<std::string>* paths);
325
326 // Given a URL, returns all the patterns we need to check.
327 static void GeneratePatternsToCheck(const GURL& url,
328 std::vector<std::string>* urls);
329
330 // Returns a FullHash for the basic host+path pattern for a given URL after
331 // canonicalization. Not intended for general use.
332 static FullHash GetFullHash(const GURL& url);
333
334 // Generates a Pver4 request URL and sets the appropriate header values.
335 // |request_base64| is the serialized request protocol buffer encoded in
336 // base 64.
337 // |method_name| is the name of the method to call, as specified in the proto,
338 // |config| is an instance of V4ProtocolConfig that stores the client config,
339 // |gurl| is set to the value of the PVer4 request URL,
340 // |headers| is populated with the appropriate header values.
341 static void GetRequestUrlAndHeaders(const std::string& request_base64,
342 const std::string& method_name,
343 const V4ProtocolConfig& config,
344 GURL* gurl,
345 net::HttpRequestHeaders* headers);
346
347 // Worker function for calculating the backoff times.
348 // |multiplier| is doubled for each consecutive error after the
349 // first, and |error_count| is incremented with each call.
350 // Backoff interval is MIN(((2^(n-1))*15 minutes) * (RAND + 1), 24 hours)
351 // where n is the number of consecutive errors.
352 static base::TimeDelta GetNextBackOffInterval(size_t* error_count,
353 size_t* multiplier);
354
355 // Record HTTP response code when there's no error in fetching an HTTP
356 // request, and the error code, when there is.
357 // |metric_name| is the name of the UMA metric to record the response code or
358 // error code against, |net_error| represents the net error code of the HTTP
359 // request, and |response code| represents the HTTP response code received
360 // from the server.
361 static void RecordHttpResponseOrErrorCode(const char* metric_name,
362 int net_error,
363 int response_code);
364
365 // Generate the set of FullHashes to check for |url|.
366 static void UrlToFullHashes(const GURL& url,
367 std::vector<FullHash>* full_hashes);
368
369 static bool FullHashToHashPrefix(const FullHash& full_hash,
370 PrefixSize prefix_size,
371 HashPrefix* hash_prefix);
372
373 static bool FullHashToSmallestHashPrefix(const FullHash& full_hash,
374 HashPrefix* hash_prefix);
375
376 static bool FullHashMatchesHashPrefix(const FullHash& full_hash,
377 const HashPrefix& hash_prefix);
378
379 static void SetClientInfoFromConfig(ClientInfo* client_info,
380 const V4ProtocolConfig& config);
381
382 static bool GetIPV6AddressFromString(const std::string& ip_address,
383 net::IPAddress* address);
384
385 // Converts a IPV4 or IPV6 address in |ip_address| to the SHA1 hash of the
386 // corresponding packed IPV6 address in |hashed_encoded_ip|, and adds an
387 // extra byte containing the value 128 at the end. This is done to match the
388 // server implementation for calculating the hash prefix of an IP address.
389 static bool IPAddressToEncodedIPV6Hash(const std::string& ip_address,
390 FullHash* hashed_encoded_ip);
391
392 // Stores the client state values for each of the lists in |store_state_map|
393 // into |list_client_states|.
394 static void GetListClientStatesFromStoreStateMap(
395 const std::unique_ptr<StoreStateMap>& store_state_map,
396 std::vector<std::string>* list_client_states);
397
398 private:
V4ProtocolManagerUtil()399 V4ProtocolManagerUtil() {}
400
401 FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest, TestBackOffLogic);
402 FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest,
403 TestGetRequestUrlAndUpdateHeaders);
404 FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest, UrlParsing);
405 FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest, CanonicalizeUrl);
406
407 // Composes a URL using |prefix|, |method| (e.g.: encodedFullHashes).
408 // |request_base64|, |client_id|, |version| and |key_param|. |prefix|
409 // should contain the entire url prefix including scheme, host and path.
410 static std::string ComposeUrl(const std::string& prefix,
411 const std::string& method,
412 const std::string& request_base64,
413 const std::string& key_param);
414
415 // Sets the HTTP headers expected by a standard PVer4 request.
416 static void UpdateHeaders(net::HttpRequestHeaders* headers);
417
418 // Given a URL, returns all the hosts we need to check. They are returned
419 // in order of size (i.e. b.c is first, then a.b.c).
420 static void GenerateHostsToCheck(const GURL& url,
421 std::vector<std::string>* hosts);
422
423 // Given a URL, returns all the paths we need to check.
424 static void GeneratePathsToCheck(const GURL& url,
425 std::vector<std::string>* paths);
426
427 static std::string RemoveConsecutiveChars(base::StringPiece str,
428 const char c);
429
430 DISALLOW_COPY_AND_ASSIGN(V4ProtocolManagerUtil);
431 };
432
433 using StoresToCheck = std::unordered_set<ListIdentifier>;
434
435 } // namespace safe_browsing
436
437 namespace std {
438
439 template <>
440 struct hash<safe_browsing::PlatformType> {
441 std::size_t operator()(const safe_browsing::PlatformType& p) const {
442 return std::hash<unsigned int>()(p);
443 }
444 };
445
446 template <>
447 struct hash<safe_browsing::ThreatEntryType> {
448 std::size_t operator()(const safe_browsing::ThreatEntryType& tet) const {
449 return std::hash<unsigned int>()(tet);
450 }
451 };
452
453 template <>
454 struct hash<safe_browsing::ThreatType> {
455 std::size_t operator()(const safe_browsing::ThreatType& tt) const {
456 return std::hash<unsigned int>()(tt);
457 }
458 };
459
460 template <>
461 struct hash<safe_browsing::ListIdentifier> {
462 std::size_t operator()(const safe_browsing::ListIdentifier& id) const {
463 return id.hash();
464 }
465 };
466
467 } // namespace std
468
469 #endif // COMPONENTS_SAFE_BROWSING_CORE_DB_V4_PROTOCOL_MANAGER_UTIL_H_
470