1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_SAFE_BROWSING_CORE_DB_V4_GET_HASH_PROTOCOL_MANAGER_H_
6 #define COMPONENTS_SAFE_BROWSING_CORE_DB_V4_GET_HASH_PROTOCOL_MANAGER_H_
7 
8 // A class that implements Chrome's interface with the SafeBrowsing V4 protocol.
9 //
10 // The V4GetHashProtocolManager handles formatting and making requests of, and
11 // handling responses from, Google's SafeBrowsing servers. The purpose of this
12 // class is to get full hash matches from the SB server for the given set of
13 // hash prefixes.
14 //
15 // Design doc: go/design-doc-v4-full-hash-manager
16 
17 #include <memory>
18 #include <string>
19 #include <unordered_map>
20 #include <utility>
21 #include <vector>
22 
23 #include "base/gtest_prod_util.h"
24 #include "base/macros.h"
25 #include "base/sequence_checker.h"
26 #include "base/time/default_clock.h"
27 #include "base/time/time.h"
28 #include "base/timer/timer.h"
29 #include "components/safe_browsing/core/db/safebrowsing.pb.h"
30 #include "components/safe_browsing/core/db/util.h"
31 #include "components/safe_browsing/core/db/v4_protocol_manager_util.h"
32 #include "components/safe_browsing/core/proto/webui.pb.h"
33 
34 class GURL;
35 
36 namespace network {
37 class SimpleURLLoader;
38 class SharedURLLoaderFactory;
39 }  // namespace network
40 
41 namespace safe_browsing {
42 
43 class V4GetHashProtocolManagerFuzzer;
44 
45 // The matching hash prefixes and corresponding stores, for each full hash
46 // generated for a given URL.
47 typedef std::unordered_map<FullHash, StoreAndHashPrefixes>
48     FullHashToStoreAndHashPrefixesMap;
49 
50 // ----------------------------------------------------------------
51 
52 // All information about a particular full hash i.e. negative TTL, store for
53 // which it is valid, and metadata associated with that store.
54 struct FullHashInfo {
55  public:
56   FullHash full_hash;
57 
58   // The list for which this full hash is applicable.
59   ListIdentifier list_id;
60 
61   // The expiration time of the full hash for a particular store.
62   base::Time positive_expiry;
63 
64   // Any metadata for this full hash for a particular store.
65   ThreatMetadata metadata;
66 
67   FullHashInfo(const FullHash& full_hash,
68                const ListIdentifier& list_id,
69                const base::Time& positive_expiry);
70   FullHashInfo(const FullHashInfo& other);
71   ~FullHashInfo();
72 
73   bool operator==(const FullHashInfo& other) const;
74   bool operator!=(const FullHashInfo& other) const;
75 
76  private:
77   FullHashInfo();
78 };
79 
80 // Caches individual response from GETHASH response.
81 struct CachedHashPrefixInfo {
82   // The negative TTL for the hash prefix that leads to this
83   // CachedHashPrefixInfo. The client should not send any more requests for that
84   // hash prefix until this time.
85   base::Time negative_expiry;
86 
87   // The list of all full hashes (and related info) that start with a
88   // particular hash prefix and are known to be unsafe.
89   std::vector<FullHashInfo> full_hash_infos;
90 
91   CachedHashPrefixInfo();
92   CachedHashPrefixInfo(const CachedHashPrefixInfo& other);
93   ~CachedHashPrefixInfo();
94 };
95 
96 // Cached full hashes received from the server for the corresponding hash
97 // prefixes.
98 typedef std::unordered_map<HashPrefix, CachedHashPrefixInfo> FullHashCache;
99 
100 // FullHashCallback is invoked when GetFullHashes completes. The parameter is
101 // the vector of full hash results. If empty, indicates that there were no
102 // matches, and that the resource is safe.
103 using FullHashCallback =
104     base::OnceCallback<void(const std::vector<FullHashInfo>&)>;
105 
106 // Information needed to update the cache and call the callback to post the
107 // results.
108 struct FullHashCallbackInfo {
109   FullHashCallbackInfo();
110   FullHashCallbackInfo(const std::vector<FullHashInfo>& cached_full_hash_infos,
111                        const std::vector<HashPrefix>& prefixes_requested,
112                        std::unique_ptr<network::SimpleURLLoader> loader,
113                        const FullHashToStoreAndHashPrefixesMap&
114                            full_hash_to_store_and_hash_prefixes,
115                        FullHashCallback callback,
116                        const base::Time& network_start_time);
117   ~FullHashCallbackInfo();
118 
119   // The FullHashInfo objects retrieved from cache. These are merged with the
120   // results received from the server before invoking the callback.
121   std::vector<FullHashInfo> cached_full_hash_infos;
122 
123   // The callback method to call after collecting the full hashes for given
124   // hash prefixes.
125   FullHashCallback callback;
126 
127   // The loader that will return the response from the server. This is stored
128   // here as a unique pointer to be able to reason about its lifetime easily.
129   std::unique_ptr<network::SimpleURLLoader> loader;
130 
131   // The generated full hashes and the corresponding prefixes and the stores in
132   // which to look for a full hash match.
133   FullHashToStoreAndHashPrefixesMap full_hash_to_store_and_hash_prefixes;
134 
135   // Used to measure how long did it take to fetch the full hash response from
136   // the server.
137   base::Time network_start_time;
138 
139   // The prefixes that were requested from the server.
140   std::vector<HashPrefix> prefixes_requested;
141 };
142 
143 // ----------------------------------------------------------------
144 
145 class V4GetHashProtocolManagerFactory;
146 
147 class V4GetHashProtocolManager {
148  public:
149   // Invoked when GetFullHashesWithApis completes.
150   // Parameters:
151   //   - The API threat metadata for the given URL.
152   using ThreatMetadataForApiCallback =
153       base::OnceCallback<void(const ThreatMetadata& md)>;
154 
155   virtual ~V4GetHashProtocolManager();
156 
157   // Create an instance of the safe browsing v4 protocol manager.
158   static std::unique_ptr<V4GetHashProtocolManager> Create(
159       scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
160       const StoresToCheck& stores_to_check,
161       const V4ProtocolConfig& config);
162 
163   // Makes the passed |factory| the factory used to instantiate
164   // a V4GetHashProtocolManager. Useful for tests.
165   static void RegisterFactory(
166       std::unique_ptr<V4GetHashProtocolManagerFactory> factory);
167 
168   // Empties the cache.
169   void ClearCache();
170 
171   // Retrieve the full hash for a set of prefixes, and invoke the callback
172   // argument when the results are retrieved. The callback may be invoked
173   // synchronously. |list_client_states| is needed for reporting the current
174   // state of the lists on the client; it does not affect the response from the
175   // server.
176   virtual void GetFullHashes(const FullHashToStoreAndHashPrefixesMap
177                                  full_hash_to_matching_hash_prefixes,
178                              const std::vector<std::string>& list_client_states,
179                              FullHashCallback callback);
180 
181   // Retrieve the full hash and API metadata for the origin of |url|, and invoke
182   // the callback argument when the results are retrieved. The callback may be
183   // invoked synchronously.
184   virtual void GetFullHashesWithApis(
185       const GURL& url,
186       const std::vector<std::string>& list_client_states,
187       ThreatMetadataForApiCallback api_callback);
188 
189   // Callback when the request completes
190   void OnURLLoaderComplete(network::SimpleURLLoader* url_loader,
191                            std::unique_ptr<std::string> response_body);
192 
193   // Populates the protobuf with the FullHashCache data.
194   void CollectFullHashCacheInfo(FullHashCacheInfo* full_hash_cache_info);
195 
196  protected:
197   // Constructs a V4GetHashProtocolManager that issues network requests using
198   // |url_loader_factory|.
199   V4GetHashProtocolManager(
200       scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
201       const StoresToCheck& stores_to_check,
202       const V4ProtocolConfig& config);
203 
204  private:
205   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest, TestGetHashRequest);
206   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest, TestParseHashResponse);
207   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest,
208                            TestParseHashResponseWrongThreatEntryType);
209   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest,
210                            TestParseHashThreatPatternType);
211   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest,
212                            TestParseSubresourceFilterMetadata);
213   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest,
214                            TestParseHashResponseNonPermissionMetadata);
215   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest,
216                            TestParseHashResponseInconsistentThreatTypes);
217   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest,
218                            TestGetHashErrorHandlingOK);
219   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest,
220                            TestResultsNotCachedForNegativeCacheDuration);
221   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest,
222                            TestGetHashErrorHandlingNetwork);
223   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest,
224                            TestGetHashErrorHandlingResponseCode);
225   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest,
226                            TestGetHashErrorHandlingParallelRequests);
227   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest, GetCachedResults);
228   FRIEND_TEST_ALL_PREFIXES(V4GetHashProtocolManagerTest, TestUpdatesAreMerged);
229   friend class V4GetHashProtocolManagerTest;
230   friend class V4GetHashProtocolManagerFuzzer;
231   friend class V4GetHashProtocolManagerFactoryImpl;
232 
full_hash_cache_for_tests()233   FullHashCache* full_hash_cache_for_tests() { return &full_hash_cache_; }
234 
235   void OnURLLoaderCompleteInternal(network::SimpleURLLoader* url_loader,
236                                    int net_error,
237                                    int response_code,
238                                    const std::string& data);
239 
240   // Looks up the cached results for full hashes in
241   // |full_hash_to_store_and_hash_prefixes|. Fills |prefixes_to_request| with
242   // the prefixes that need to be requested. Fills |cached_full_hash_infos|
243   // with the cached results.
244   // Note: It is valid for both |prefixes_to_request| and
245   // |cached_full_hash_infos| to be empty after this function finishes.
246   void GetFullHashCachedResults(
247       const FullHashToStoreAndHashPrefixesMap&
248           full_hash_to_store_and_hash_prefixes,
249       const base::Time& now,
250       std::vector<HashPrefix>* prefixes_to_request,
251       std::vector<FullHashInfo>* cached_full_hash_infos);
252 
253   // Fills a FindFullHashesRequest protocol buffer for a request.
254   // Returns the serialized and base 64 encoded request as a string.
255   // |prefixes_to_request| is the list of hash prefixes to get full hashes for.
256   // |list_client_states| is the client_state of each of the lists being synced.
257   std::string GetHashRequest(
258       const std::vector<HashPrefix>& prefixes_to_request,
259       const std::vector<std::string>& list_client_states);
260 
261   void GetHashUrlAndHeaders(const std::string& request_base64,
262                             GURL* gurl,
263                             net::HttpRequestHeaders* headers) const;
264 
265   // Updates internal state for each GetHash response error, assuming that
266   // the current time is |now|.
267   void HandleGetHashError(const base::Time& now);
268 
269   // Merges the results from the cache and the results from the server. The
270   // response from the server may include information for full hashes from
271   // stores other than those required by this client so it filters out those
272   // results that the client did not ask for.
273   void MergeResults(const FullHashToStoreAndHashPrefixesMap&
274                         full_hash_to_store_and_hash_prefixes,
275                     const std::vector<FullHashInfo>& full_hash_infos,
276                     std::vector<FullHashInfo>* merged_full_hash_infos);
277 
278   // Calls |api_callback| with an object of ThreatMetadata that contains
279   // permission API metadata for full hashes in those |full_hash_infos| that
280   // have a full hash in |full_hashes|.
281   void OnFullHashForApi(ThreatMetadataForApiCallback api_callback,
282                         const std::vector<FullHash>& full_hashes,
283                         const std::vector<FullHashInfo>& full_hash_infos);
284 
285   // Parses a FindFullHashesResponse protocol buffer and fills the results in
286   // |full_hash_infos| and |negative_cache_expire|. |response_data| is a
287   // serialized FindFullHashes protocol buffer. |negative_cache_expire| is the
288   // cache expiry time of the hash prefixes that were requested. Returns true if
289   // parsing is successful; false otherwise.
290   bool ParseHashResponse(const std::string& response_data,
291                          std::vector<FullHashInfo>* full_hash_infos,
292                          base::Time* negative_cache_expire);
293 
294   // Parses the store specific |metadata| information from |match|. Logs errors
295   // to UMA if the metadata information was not parsed correctly or was
296   // inconsistent with what's expected from that corresponding store.
297   static void ParseMetadata(const ThreatMatch& match, ThreatMetadata* metadata);
298 
299   // Resets the gethash error counter and multiplier.
300   void ResetGetHashErrors();
301 
302   // Overrides the clock used to check the time.
303   void SetClockForTests(base::Clock* clock);
304 
305   // Updates the state of the full hash cache upon receiving a valid response
306   // from the server.
307   void UpdateCache(const std::vector<HashPrefix>& prefixes_requested,
308                    const std::vector<FullHashInfo>& full_hash_infos,
309                    const base::Time& negative_cache_expire);
310 
311  protected:
312   // A cache of full hash results.
313   FullHashCache full_hash_cache_;
314 
315  private:
316   // Map of GetHash requests to parameters which created it.
317   using PendingHashRequests =
318       std::unordered_map<const network::SimpleURLLoader*,
319                          std::unique_ptr<FullHashCallbackInfo>>;
320 
321   // The factory that controls the creation of V4GetHashProtocolManager.
322   // This is used by tests.
323   static V4GetHashProtocolManagerFactory* factory_;
324 
325   // The number of HTTP response errors since the the last successful HTTP
326   // response, used for request backoff timing.
327   size_t gethash_error_count_;
328 
329   // Multiplier for the backoff error after the second.
330   size_t gethash_back_off_mult_;
331 
332   PendingHashRequests pending_hash_requests_;
333 
334   // For v4, the next gethash time is set to the backoff time is the last
335   // response was an error, or the minimum wait time if the last response was
336   // successful.
337   base::Time next_gethash_time_;
338 
339   // The config of the client making Pver4 requests.
340   const V4ProtocolConfig config_;
341 
342   // The URLLoaderFactory we use to issue network requests.
343   scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory_;
344 
345   // Records number of cache hits since the beginning of this session.
346   int number_of_hits_ = 0;
347 
348   // The clock used to vend times.
349   base::Clock* clock_;
350 
351   // The following sets represent the combination of lists that we would always
352   // request from the server, irrespective of which list we found the hash
353   // prefix match in.
354   std::vector<PlatformType> platform_types_;
355   std::vector<ThreatEntryType> threat_entry_types_;
356   std::vector<ThreatType> threat_types_;
357 
358   SEQUENCE_CHECKER(sequence_checker_);
359 
360   DISALLOW_COPY_AND_ASSIGN(V4GetHashProtocolManager);
361 };
362 
363 // Interface of a factory to create V4GetHashProtocolManager.  Useful for tests.
364 class V4GetHashProtocolManagerFactory {
365  public:
V4GetHashProtocolManagerFactory()366   V4GetHashProtocolManagerFactory() {}
~V4GetHashProtocolManagerFactory()367   virtual ~V4GetHashProtocolManagerFactory() {}
368   virtual std::unique_ptr<V4GetHashProtocolManager> CreateProtocolManager(
369       scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
370       const StoresToCheck& stores_to_check,
371       const V4ProtocolConfig& config) = 0;
372 
373  private:
374   DISALLOW_COPY_AND_ASSIGN(V4GetHashProtocolManagerFactory);
375 };
376 
377 #ifndef NDEBUG
378 std::ostream& operator<<(std::ostream& os, const FullHashInfo& id);
379 #endif
380 
381 }  // namespace safe_browsing
382 
383 #endif  // COMPONENTS_SAFE_BROWSING_CORE_DB_V4_GET_HASH_PROTOCOL_MANAGER_H_
384