1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_SEARCH_ENGINES_TEMPLATE_URL_H_
6 #define COMPONENTS_SEARCH_ENGINES_TEMPLATE_URL_H_
7 
8 #include <cstddef>
9 #include <memory>
10 #include <string>
11 #include <utility>
12 #include <vector>
13 
14 #include "base/gtest_prod_util.h"
15 #include "base/macros.h"
16 #include "base/time/time.h"
17 #include "components/search_engines/omnibox_focus_type.h"
18 #include "components/search_engines/search_engine_type.h"
19 #include "components/search_engines/template_url_data.h"
20 #include "components/search_engines/template_url_id.h"
21 #include "third_party/metrics_proto/omnibox_event.pb.h"
22 #include "third_party/metrics_proto/omnibox_input_type.pb.h"
23 #include "ui/gfx/geometry/size.h"
24 #include "url/gurl.h"
25 #include "url/third_party/mozilla/url_parse.h"
26 
27 class SearchTermsData;
28 class TemplateURL;
29 
30 
31 // TemplateURLRef -------------------------------------------------------------
32 
33 // A TemplateURLRef represents a single URL within the larger TemplateURL class
34 // (which represents an entire "search engine", see below).  If
35 // SupportsReplacement() is true, this URL has placeholders in it, for which
36 // callers can substitute values to get a "real" URL using ReplaceSearchTerms().
37 //
38 // TemplateURLRefs always have a non-NULL |owner_| TemplateURL, which they
39 // access in order to get at important data like the underlying URL string or
40 // the associated Profile.
41 class TemplateURLRef {
42  public:
43   // Magic numbers to pass to ReplaceSearchTerms() for the |accepted_suggestion|
44   // parameter.  Most callers aren't using Suggest capabilities and should just
45   // pass NO_SUGGESTIONS_AVAILABLE.
46   // NOTE: Because positive values are meaningful, make sure these are negative!
47   enum AcceptedSuggestion {
48     NO_SUGGESTION_CHOSEN = -1,
49     NO_SUGGESTIONS_AVAILABLE = -2,
50   };
51 
52   // Which kind of URL within our owner we are.  This allows us to get at the
53   // correct string field. Use |INDEXED| to indicate that the numerical
54   // |index_in_owner_| should be used instead.
55   enum Type {
56     SEARCH,
57     SUGGEST,
58     IMAGE,
59     NEW_TAB,
60     CONTEXTUAL_SEARCH,
61     INDEXED
62   };
63 
64   // Type to store <content_type, post_data> pair for POST URLs.
65   // The |content_type|(first part of the pair) is the content-type of
66   // the |post_data|(second part of the pair) which is encoded in
67   // "multipart/form-data" format, it also contains the MIME boundary used in
68   // the |post_data|. See http://tools.ietf.org/html/rfc2046 for the details.
69   typedef std::pair<std::string, std::string> PostContent;
70 
71   // Enumeration of the known search or suggest request sources.
72   enum RequestSource {
73     SEARCHBOX,          // Omnibox or the NTP realbox. The default.
74     CROS_APP_LIST,      // Chrome OS app list search box.
75     NON_SEARCHBOX_NTP,  // Non-searchbox NTP surfaces.
76   };
77 
78   // This struct encapsulates arguments passed to
79   // TemplateURLRef::ReplaceSearchTerms methods.  By default, only search_terms
80   // is required and is passed in the constructor.
81   struct SearchTermsArgs {
82     SearchTermsArgs();
83     explicit SearchTermsArgs(const base::string16& search_terms);
84     SearchTermsArgs(const SearchTermsArgs& other);
85     ~SearchTermsArgs();
86 
87     struct ContextualSearchParams {
88       ContextualSearchParams();
89       // Modern constructor, used when the content is sent in the HTTP header
90       // instead of as CGI parameters.
91       // The |version| tell the server which version of the client is making
92       // this request.
93       // The |contextual_cards_version| tells the server which version of
94       // contextual cards integration is being used by the client.
95       // The |home_country| is an ISO country code for the country that the user
96       // considers their permanent home (which may be different from the country
97       // they are currently visiting).  Pass an empty string if none available.
98       // The |previous_event_id| is an identifier previously returned by the
99       // server to identify that user interaction.
100       // The |previous_event_results| are the results of the user-interaction of
101       // that previous request.
102       // The "previous_xyz" parameters are documented in go/cs-sanitized.
103       // The |is_exact_search| allows the search request to be narrowed down to
104       // an "exact" search only, meaning just search for X rather than X +
105       // whatever else is in the context.  The returned search term should not
106       // be expanded, and the server will honor this along with creating a
107       // narrow Search Term.
108       // The |source_lang| specifies a source language hint to apply for
109       // translation or to indicate that translation might be appropriate.
110       // This comes from CLD evaluating the selection and/or page content.
111       // The |target_lang| specifies the best language to translate into for
112       // the user, which also indicates when translation is appropriate or
113       // helpful.  This comes from the Chrome Language Model.
114       // The |fluent_languages| string specifies the languages the user
115       // is fluent in reading.  This acts as an alternate set of languages
116       // to consider translating into.  The languages are ordered by
117       // fluency, and encoded as a comma-separated list of BCP 47 languages.
118       // The |related_searches_stamp| string contains an information that
119       // indicates experiment status and server processing results so that
120       // can be logged in GWS Sawmill logs for offline analysis for the
121       // Related Searches MVP experiment.
122       ContextualSearchParams(int version,
123                              int contextual_cards_version,
124                              std::string home_country,
125                              int64_t previous_event_id,
126                              int previous_event_results,
127                              bool is_exact_search,
128                              std::string source_lang,
129                              std::string target_lang,
130                              std::string fluent_languages,
131                              std::string related_searches_stamp);
132       ContextualSearchParams(const ContextualSearchParams& other);
133       ~ContextualSearchParams();
134 
135       // Estimates dynamic memory usage.
136       // See base/trace_event/memory_usage_estimator.h for more info.
137       size_t EstimateMemoryUsage() const;
138 
139       // The version of contextual search.
140       int version = -1;
141 
142       // The version of Contextual Cards data to request.
143       // A value of 0 indicates no data needed.
144       int contextual_cards_version = 0;
145 
146       // The locale of the user's home country in an ISO country code format,
147       // or an empty string if not available.  This indicates where the user
148       // resides, not where they currently are.
149       std::string home_country;
150 
151       // An EventID from a previous interaction (sent by server, recorded by
152       // client).
153       int64_t previous_event_id = 0l;
154 
155       // An encoded set of booleans that represent the interaction results from
156       // the previous event.
157       int previous_event_results = 0;
158 
159       // A flag that restricts the search to exactly match the selection rather
160       // than expanding the Search Term to include other words in the context.
161       bool is_exact_search = false;
162 
163       // Source language string to translate from.
164       std::string source_lang;
165 
166       // Target language string to be translated into.
167       std::string target_lang;
168 
169       // Alternate target languages that the user is fluent in, encoded in a
170       // single string.
171       std::string fluent_languages;
172 
173       // Experiment arm and processing information for the Related Searches
174       // experiment. The value is an arbitrary string that starts with a
175       // schema version number.
176       std::string related_searches_stamp;
177     };
178 
179     // Estimates dynamic memory usage.
180     // See base/trace_event/memory_usage_estimator.h for more info.
181     size_t EstimateMemoryUsage() const;
182 
183     // The search terms (query).
184     base::string16 search_terms;
185 
186     // The original (input) query.
187     base::string16 original_query;
188 
189     // The type the original input query was identified as.
190     metrics::OmniboxInputType input_type = metrics::OmniboxInputType::EMPTY;
191 
192     // Specifies how the user last interacted with the searchbox UI element.
193     OmniboxFocusType focus_type = OmniboxFocusType::DEFAULT;
194 
195     // The optional assisted query stats, aka AQS, used for logging purposes.
196     // This string contains impressions of all autocomplete matches shown
197     // at the query submission time.  For privacy reasons, we require the
198     // search provider to support HTTPS protocol in order to receive the AQS
199     // param.
200     // For more details, see http://goto.google.com/binary-clients-logging .
201     std::string assisted_query_stats;
202 
203     // TODO: Remove along with "aq" CGI param.
204     int accepted_suggestion = NO_SUGGESTIONS_AVAILABLE;
205 
206     // The 0-based position of the cursor within the query string at the time
207     // the request was issued.  Set to base::string16::npos if not used.
208     size_t cursor_position = base::string16::npos;
209 
210     // The URL of the current webpage to be used for experimental zero-prefix
211     // suggestions.
212     std::string current_page_url;
213 
214     // Which omnibox the user used to type the prefix.
215     metrics::OmniboxEventProto::PageClassification page_classification =
216         metrics::OmniboxEventProto::INVALID_SPEC;
217 
218     // Optional session token.
219     std::string session_token;
220 
221     // Prefetch query and type.
222     std::string prefetch_query;
223     std::string prefetch_query_type;
224 
225     // Additional query params to append to the request.
226     std::string additional_query_params;
227 
228     // If set, ReplaceSearchTerms() will automatically append any extra query
229     // params specified via the --extra-search-query-params command-line
230     // argument.  Generally, this should be set when dealing with the search
231     // TemplateURLRefs of the default search engine and the caller cares
232     // about the query portion of the URL.  Since neither TemplateURLRef nor
233     // indeed TemplateURL know whether a TemplateURL is the default search
234     // engine, callers instead must set this manually.
235     bool append_extra_query_params_from_command_line = false;
236 
237     // The raw content of an image thumbnail that will be used as a query for
238     // search-by-image frontend.
239     std::string image_thumbnail_content;
240 
241     // When searching for an image, the URL of the original image. Callers
242     // should leave this empty for images specified via data: URLs.
243     GURL image_url;
244 
245     // When searching for an image, the original size of the image.
246     gfx::Size image_original_size;
247 
248     // Source of the search or suggest request.
249     RequestSource request_source = SEARCHBOX;
250 
251     ContextualSearchParams contextual_search_params;
252   };
253 
254   TemplateURLRef(const TemplateURL* owner, Type type);
255   TemplateURLRef(const TemplateURL* owner, size_t index_in_owner);
256   ~TemplateURLRef();
257 
258   TemplateURLRef(const TemplateURLRef& source);
259   TemplateURLRef& operator=(const TemplateURLRef& source);
260 
261   // Returns the raw URL. None of the parameters will have been replaced.
262   std::string GetURL() const;
263 
264   // Returns the raw string of the post params. Please see comments in
265   // prepopulated_engines_schema.json for the format.
266   std::string GetPostParamsString() const;
267 
268   // Returns true if this URL supports search term replacement.
269   bool SupportsReplacement(const SearchTermsData& search_terms_data) const;
270 
271   // Returns a string that is the result of replacing the search terms in
272   // the url with the specified arguments.  We use our owner's input encoding.
273   //
274   // If this TemplateURLRef does not support replacement (SupportsReplacement
275   // returns false), an empty string is returned.
276   // If this TemplateURLRef uses POST, and |post_content| is not NULL, the
277   // |post_params_| will be replaced, encoded in "multipart/form-data" format
278   // and stored into |post_content|.
279   std::string ReplaceSearchTerms(const SearchTermsArgs& search_terms_args,
280                                  const SearchTermsData& search_terms_data,
281                                  PostContent* post_content) const;
282 
283   // TODO(jnd): remove the following ReplaceSearchTerms definition which does
284   // not have |post_content| parameter once all reference callers pass
285   // |post_content| parameter.
ReplaceSearchTerms(const SearchTermsArgs & search_terms_args,const SearchTermsData & search_terms_data)286   std::string ReplaceSearchTerms(
287       const SearchTermsArgs& search_terms_args,
288       const SearchTermsData& search_terms_data) const {
289     return ReplaceSearchTerms(search_terms_args, search_terms_data, NULL);
290   }
291 
292   // Returns true if the TemplateURLRef is valid. An invalid TemplateURLRef is
293   // one that contains unknown terms, or invalid characters.
294   bool IsValid(const SearchTermsData& search_terms_data) const;
295 
296   // Returns a string representation of this TemplateURLRef suitable for
297   // display. The display format is the same as the format used by Firefox.
298   base::string16 DisplayURL(const SearchTermsData& search_terms_data) const;
299 
300   // Converts a string as returned by DisplayURL back into a string as
301   // understood by TemplateURLRef.
302   static std::string DisplayURLToURLRef(const base::string16& display_url);
303 
304   // If this TemplateURLRef is valid and contains one search term, this returns
305   // the host/path of the URL, otherwise this returns an empty string.
306   const std::string& GetHost(const SearchTermsData& search_terms_data) const;
307   std::string GetPath(const SearchTermsData& search_terms_data) const;
308 
309   // If this TemplateURLRef is valid and contains one search term
310   // in its query or ref, this returns the key of the search term,
311   // otherwise this returns an empty string.
312   const std::string& GetSearchTermKey(
313       const SearchTermsData& search_terms_data) const;
314 
315   // If this TemplateURLRef is valid and contains one search term,
316   // this returns the location of the search term,
317   // otherwise this returns url::Parsed::QUERY.
318   url::Parsed::ComponentType GetSearchTermKeyLocation(
319       const SearchTermsData& search_terms_data) const;
320 
321   // If this TemplateURLRef is valid and contains one search term,
322   // this returns the fixed prefix before the search term,
323   // otherwise this returns an empty string.
324   const std::string& GetSearchTermValuePrefix(
325       const SearchTermsData& search_terms_data) const;
326 
327   // If this TemplateURLRef is valid and contains one search term,
328   // this returns the fixed suffix after the search term,
329   // otherwise this returns an empty string.
330   const std::string& GetSearchTermValueSuffix(
331       const SearchTermsData& search_terms_data) const;
332 
333   // Converts the specified term in our owner's encoding to a base::string16.
334   base::string16 SearchTermToString16(const base::StringPiece& term) const;
335 
336   // Returns true if this TemplateURLRef has a replacement term of
337   // {google:baseURL} or {google:baseSuggestURL}.
338   bool HasGoogleBaseURLs(const SearchTermsData& search_terms_data) const;
339 
340   // Use the pattern referred to by this TemplateURLRef to match the provided
341   // |url| and extract |search_terms| from it. Returns true if the pattern
342   // matches, even if |search_terms| is empty. In this case
343   // |search_term_component|, if not NULL, indicates whether the search terms
344   // were found in the query or the ref parameters; and |search_terms_position|,
345   // if not NULL, contains the position of the search terms in the query or the
346   // ref parameters. Returns false and an empty |search_terms| if the pattern
347   // does not match.
348   bool ExtractSearchTermsFromURL(
349       const GURL& url,
350       base::string16* search_terms,
351       const SearchTermsData& search_terms_data,
352       url::Parsed::ComponentType* search_term_component,
353       url::Component* search_terms_position) const;
354 
355   // Whether the URL uses POST (as opposed to GET).
356   bool UsesPOSTMethod(const SearchTermsData& search_terms_data) const;
357 
358   // Estimates dynamic memory usage.
359   // See base/trace_event/memory_usage_estimator.h for more info.
360   size_t EstimateMemoryUsage() const;
361 
362  private:
363   friend class TemplateURL;
364   friend class TemplateURLTest;
365   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, SetPrepopulatedAndParse);
366   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterKnown);
367   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterUnknown);
368   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLEmpty);
369   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoTemplateEnd);
370   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoKnownParameters);
371   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLTwoParameters);
372   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNestedParameter);
373   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, URLRefTestImageURLWithPOST);
374 
375   // Enumeration of the known types.
376   enum ReplacementType {
377     ENCODING,
378     GOOGLE_ASSISTED_QUERY_STATS,
379     GOOGLE_BASE_URL,
380     GOOGLE_BASE_SUGGEST_URL,
381     GOOGLE_CONTEXTUAL_SEARCH_VERSION,
382     GOOGLE_CONTEXTUAL_SEARCH_CONTEXT_DATA,
383     GOOGLE_CURRENT_PAGE_URL,
384     GOOGLE_CURSOR_POSITION,
385     GOOGLE_IMAGE_ORIGINAL_HEIGHT,
386     GOOGLE_IMAGE_ORIGINAL_WIDTH,
387     GOOGLE_IMAGE_SEARCH_SOURCE,
388     GOOGLE_IMAGE_THUMBNAIL,
389     GOOGLE_IMAGE_THUMBNAIL_BASE64,
390     GOOGLE_IMAGE_URL,
391     GOOGLE_INPUT_TYPE,
392     GOOGLE_IOS_SEARCH_LANGUAGE,
393     GOOGLE_NTP_IS_THEMED,
394     GOOGLE_OMNIBOX_FOCUS_TYPE,
395     GOOGLE_ORIGINAL_QUERY_FOR_SUGGESTION,
396     GOOGLE_PAGE_CLASSIFICATION,
397     GOOGLE_PREFETCH_QUERY,
398     GOOGLE_RLZ,
399     GOOGLE_SEARCH_CLIENT,
400     GOOGLE_SEARCH_FIELDTRIAL_GROUP,
401     GOOGLE_SEARCH_VERSION,
402     GOOGLE_SESSION_TOKEN,
403     GOOGLE_SUGGEST_CLIENT,
404     GOOGLE_SUGGEST_REQUEST_ID,
405     GOOGLE_UNESCAPED_SEARCH_TERMS,
406     LANGUAGE,
407     MAIL_RU_REFERRAL_ID,
408     SEARCH_TERMS,
409     YANDEX_REFERRAL_ID,
410   };
411 
412   // Used to identify an element of the raw url that can be replaced.
413   struct Replacement {
ReplacementReplacement414     Replacement(ReplacementType type, size_t index)
415         : type(type), index(index), is_post_param(false) {}
416     ReplacementType type;
417     size_t index;
418     // Indicates the location in where the replacement is replaced. If
419     // |is_post_param| is false, |index| indicates the byte position in
420     // |parsed_url_|. Otherwise, |index| is the index of |post_params_|.
421     bool is_post_param;
422   };
423 
424   // Stores a single parameter for a POST.
425   struct PostParam {
426     std::string name;
427     std::string value;
428     std::string content_type;
429 
430     // Estimates dynamic memory usage.
431     // See base/trace_event/memory_usage_estimator.h for more info.
432     size_t EstimateMemoryUsage() const;
433   };
434 
435   // The list of elements to replace.
436   typedef std::vector<struct Replacement> Replacements;
437   typedef std::vector<PostParam> PostParams;
438 
439   // TemplateURLRef internally caches values to make replacement quick. This
440   // method invalidates any cached values.
441   void InvalidateCachedValues() const;
442 
443   // Parses the parameter in url at the specified offset. start/end specify the
444   // range of the parameter in the url, including the braces. If the parameter
445   // is valid, url is updated to reflect the appropriate parameter. If
446   // the parameter is one of the known parameters an element is added to
447   // replacements indicating the type and range of the element. The original
448   // parameter is erased from the url.
449   //
450   // If the parameter is not a known parameter, false is returned. If this is a
451   // prepopulated URL, the parameter is erased, otherwise it is left alone.
452   bool ParseParameter(size_t start,
453                       size_t end,
454                       std::string* url,
455                       Replacements* replacements) const;
456 
457   // Parses the specified url, replacing parameters as necessary. If
458   // successful, valid is set to true, and the parsed url is returned. For all
459   // known parameters that are encountered an entry is added to replacements.
460   // If there is an error parsing the url, valid is set to false, and an empty
461   // string is returned.  If the URL has the POST parameters, they will be
462   // parsed into |post_params| which will be further replaced with real search
463   // terms data and encoded in "multipart/form-data" format to generate the
464   // POST data.
465   std::string ParseURL(const std::string& url,
466                        Replacements* replacements,
467                        PostParams* post_params,
468                        bool* valid) const;
469 
470   // If the url has not yet been parsed, ParseURL is invoked.
471   // NOTE: While this is const, it modifies parsed_, valid_, parsed_url_ and
472   // search_offset_.
473   void ParseIfNecessary(const SearchTermsData& search_terms_data) const;
474 
475   // Parses a wildcard out of |path|, putting the parsed path in |path_prefix_|
476   // and |path_suffix_| and setting |path_wildcard_present_| to true.
477   // In the absence of a wildcard, the full path will be contained in
478   // |path_prefix_| and |path_wildcard_present_| will be false.
479   void ParsePath(const std::string& path) const;
480 
481   // Returns whether the path portion of this template URL is equal to the path
482   // in |url|, checking that URL is prefixed/suffixed by
483   // |path_prefix_|/|path_suffix_| if |path_wildcard_present_| is true, or equal
484   // to |path_prefix_| otherwise.
485   bool PathIsEqual(const GURL& url) const;
486 
487   // Extracts the query key and host from the url.
488   void ParseHostAndSearchTermKey(
489       const SearchTermsData& search_terms_data) const;
490 
491   // Encode post parameters in "multipart/form-data" format and store it
492   // inside |post_content|. Returns false if errors are encountered during
493   // encoding. This method is called each time ReplaceSearchTerms gets called.
494   bool EncodeFormData(const PostParams& post_params,
495                       PostContent* post_content) const;
496 
497   // Handles a replacement by using real term data. If the replacement
498   // belongs to a PostParam, the PostParam will be replaced by the term data.
499   // Otherwise, the term data will be inserted at the place that the
500   // replacement points to.
501   void HandleReplacement(const std::string& name,
502                          const std::string& value,
503                          const Replacement& replacement,
504                          std::string* url) const;
505 
506   // Replaces all replacements in |parsed_url_| with their actual values and
507   // returns the result.  This is the main functionality of
508   // ReplaceSearchTerms().
509   std::string HandleReplacements(
510       const SearchTermsArgs& search_terms_args,
511       const SearchTermsData& search_terms_data,
512       PostContent* post_content) const;
513 
514   // The TemplateURL that contains us.  This should outlive us.
515   const TemplateURL* owner_;
516 
517   // What kind of URL we are.
518   Type type_;
519 
520   // If |type_| is |INDEXED|, this |index_in_owner_| is used instead to refer to
521   // a url within our owner.
522   size_t index_in_owner_ = 0;
523 
524   // Whether the URL has been parsed.
525   mutable bool parsed_ = false;
526 
527   // Whether the url was successfully parsed.
528   mutable bool valid_ = false;
529 
530   // The parsed URL. All terms have been stripped out of this with
531   // replacements_ giving the index of the terms to replace.
532   mutable std::string parsed_url_;
533 
534   // Do we support search term replacement?
535   mutable bool supports_replacements_ = false;
536 
537   // The replaceable parts of url (parsed_url_). These are ordered by index
538   // into the string, and may be empty.
539   mutable Replacements replacements_;
540 
541   // Whether the path contains a wildcard.
542   mutable bool path_wildcard_present_ = false;
543 
544   // Host, port, path, key and location of the search term. These are only set
545   // if the url contains one search term.
546   mutable std::string host_;
547   mutable std::string port_;
548   mutable std::string path_prefix_;
549   mutable std::string path_suffix_;
550   mutable std::string search_term_key_;
551   mutable url::Parsed::ComponentType search_term_key_location_ =
552       url::Parsed::QUERY;
553   mutable std::string search_term_value_prefix_;
554   mutable std::string search_term_value_suffix_;
555 
556   mutable PostParams post_params_;
557 
558   // Whether the contained URL is a pre-populated URL.
559   bool prepopulated_ = false;
560 };
561 
562 
563 // TemplateURL ----------------------------------------------------------------
564 
565 // A TemplateURL represents a single "search engine", defined primarily as a
566 // subset of the Open Search Description Document
567 // (http://www.opensearch.org/Specifications/OpenSearch) plus some extensions.
568 // One TemplateURL contains several TemplateURLRefs, which correspond to various
569 // different capabilities (e.g. doing searches or getting suggestions), as well
570 // as a TemplateURLData containing other details like the name, keyword, etc.
571 //
572 // TemplateURLs are intended to be read-only for most users.
573 // The TemplateURLService, which handles storing and manipulating TemplateURLs,
574 // is made a friend so that it can be the exception to this pattern.
575 class TemplateURL {
576  public:
577   using TemplateURLVector = std::vector<TemplateURL*>;
578   using OwnedTemplateURLVector = std::vector<std::unique_ptr<TemplateURL>>;
579 
580   // These values are not persisted and can be freely changed.
581   // Their integer values are used for choosing the best engine during keyword
582   // conflicts, so their relative ordering should not be changed without careful
583   // thought about what happens during version skew.
584   enum Type {
585     // Installed only on this device. Should not be synced. This is not common.
586     LOCAL = 0,
587     // Regular search engine. This is the most common.
588     NORMAL = 1,
589     // Installed by extension through Override Settings API.
590     NORMAL_CONTROLLED_BY_EXTENSION = 2,
591     // The keyword associated with an extension that uses the Omnibox API.
592     OMNIBOX_API_EXTENSION = 3,
593   };
594 
595   // An AssociatedExtensionInfo represents information about the extension that
596   // added the search engine.
597   struct AssociatedExtensionInfo {
598     AssociatedExtensionInfo(const std::string& extension_id,
599                             base::Time install_time,
600                             bool wants_to_be_default_engine);
601     ~AssociatedExtensionInfo();
602 
603     // Estimates dynamic memory usage.
604     // See base/trace_event/memory_usage_estimator.h for more info.
605     size_t EstimateMemoryUsage() const;
606 
607     std::string extension_id;
608 
609     // Used to resolve conflicts when there are multiple extensions specifying
610     // the default search engine. The most recently-installed wins.
611     base::Time install_time;
612 
613     // Whether the search engine is supposed to be default.
614     bool wants_to_be_default_engine;
615   };
616 
617   explicit TemplateURL(const TemplateURLData& data, Type type = NORMAL);
618 
619   // Constructor for extension controlled engine. |type| must be
620   // NORMAL_CONTROLLED_BY_EXTENSION or OMNIBOX_API_EXTENSION.
621   TemplateURL(const TemplateURLData& data,
622               Type type,
623               std::string extension_id,
624               base::Time install_time,
625               bool wants_to_be_default_engine);
626 
627   ~TemplateURL();
628 
629   // For two engines with the same keyword, |this| and |other|,
630   // returns true if |this| is strictly better than |other|.
631   //
632   // While normal engines must all have distinct keywords, policy-created,
633   // extension-controlled and omnibox API engines may have the same keywords as
634   // each other or as normal engines.  In these cases, policy-create engines
635   // override omnibox API engines, which override extension-controlled engines,
636   // which override normal engines.
637   //
638   // If there is still a conflict after this, compare by safe-for-autoreplace,
639   // then last modified date, then use the sync guid as a tiebreaker.
640   //
641   // TODO(tommycli): I'd like to use this to resolve Sync conflicts in the
642   // future, but we need a total ordering of TemplateURLs. That's not the case
643   // today, because the sync GUIDs are not actually globally unique, so there
644   // can be a genuine tie, which is not good, because then two different clients
645   // could choose to resolve the conflict in two different ways.
646   bool IsBetterThanEngineWithConflictingKeyword(const TemplateURL* other) const;
647 
648   // Generates a suitable keyword for the specified url, which must be valid.
649   // This is guaranteed not to return an empty string, since TemplateURLs should
650   // never have an empty keyword.
651   static base::string16 GenerateKeyword(const GURL& url);
652 
653   // Generates a favicon URL from the specified url.
654   static GURL GenerateFaviconURL(const GURL& url);
655 
656   // Returns true if |t_url| and |data| are equal in all meaningful respects.
657   // Static to allow either or both params to be NULL.
658   static bool MatchesData(const TemplateURL* t_url,
659                           const TemplateURLData* data,
660                           const SearchTermsData& search_terms_data);
661 
data()662   const TemplateURLData& data() const { return data_; }
663 
short_name()664   const base::string16& short_name() const { return data_.short_name(); }
665   // An accessor for the short_name, but adjusted so it can be appropriately
666   // displayed even if it is LTR and the UI is RTL.
667   base::string16 AdjustedShortNameForLocaleDirection() const;
668 
keyword()669   const base::string16& keyword() const { return data_.keyword(); }
670 
url()671   const std::string& url() const { return data_.url(); }
suggestions_url()672   const std::string& suggestions_url() const { return data_.suggestions_url; }
image_url()673   const std::string& image_url() const { return data_.image_url; }
new_tab_url()674   const std::string& new_tab_url() const { return data_.new_tab_url; }
contextual_search_url()675   const std::string& contextual_search_url() const {
676     return data_.contextual_search_url;
677   }
search_url_post_params()678   const std::string& search_url_post_params() const {
679     return data_.search_url_post_params;
680   }
suggestions_url_post_params()681   const std::string& suggestions_url_post_params() const {
682     return data_.suggestions_url_post_params;
683   }
image_url_post_params()684   const std::string& image_url_post_params() const {
685     return data_.image_url_post_params;
686   }
alternate_urls()687   const std::vector<std::string>& alternate_urls() const {
688     return data_.alternate_urls;
689   }
favicon_url()690   const GURL& favicon_url() const { return data_.favicon_url; }
691 
logo_url()692   const GURL& logo_url() const { return data_.logo_url; }
693 
doodle_url()694   const GURL& doodle_url() const { return data_.doodle_url; }
695 
originating_url()696   const GURL& originating_url() const { return data_.originating_url; }
697 
safe_for_autoreplace()698   bool safe_for_autoreplace() const { return data_.safe_for_autoreplace; }
699 
input_encodings()700   const std::vector<std::string>& input_encodings() const {
701     return data_.input_encodings;
702   }
703 
id()704   TemplateURLID id() const { return data_.id; }
705 
date_created()706   base::Time date_created() const { return data_.date_created; }
last_modified()707   base::Time last_modified() const { return data_.last_modified; }
last_visited()708   base::Time last_visited() const { return data_.last_visited; }
709 
created_by_policy()710   bool created_by_policy() const { return data_.created_by_policy; }
created_from_play_api()711   bool created_from_play_api() const { return data_.created_from_play_api; }
712 
usage_count()713   int usage_count() const { return data_.usage_count; }
714 
prepopulate_id()715   int prepopulate_id() const { return data_.prepopulate_id; }
716 
sync_guid()717   const std::string& sync_guid() const { return data_.sync_guid; }
718 
url_refs()719   const std::vector<TemplateURLRef>& url_refs() const { return url_refs_; }
url_ref()720   const TemplateURLRef& url_ref() const {
721     // Sanity check for https://crbug.com/781703.
722     CHECK(!url_refs_.empty());
723     return url_refs_.back();
724   }
suggestions_url_ref()725   const TemplateURLRef& suggestions_url_ref() const {
726     return suggestions_url_ref_;
727   }
image_url_ref()728   const TemplateURLRef& image_url_ref() const { return image_url_ref_; }
new_tab_url_ref()729   const TemplateURLRef& new_tab_url_ref() const { return new_tab_url_ref_; }
contextual_search_url_ref()730   const TemplateURLRef& contextual_search_url_ref() const {
731     return contextual_search_url_ref_;
732   }
733 
type()734   Type type() const { return type_; }
735 
GetExtensionInfoForTesting()736   const AssociatedExtensionInfo* GetExtensionInfoForTesting() const {
737     return extension_info_.get();
738   }
739 
740   // Returns true if |url| supports replacement.
741   bool SupportsReplacement(const SearchTermsData& search_terms_data) const;
742 
743   // Returns true if any URLRefs use Googe base URLs.
744   bool HasGoogleBaseURLs(const SearchTermsData& search_terms_data) const;
745 
746   // Returns true if this TemplateURL uses Google base URLs and has a keyword
747   // of "google.TLD".  We use this to decide whether we can automatically
748   // update the keyword to reflect the current Google base URL TLD.
749   bool IsGoogleSearchURLWithReplaceableKeyword(
750       const SearchTermsData& search_terms_data) const;
751 
752   // Returns true if the keywords match or if
753   // IsGoogleSearchURLWithReplaceableKeyword() is true for both |this| and
754   // |other|.
755   bool HasSameKeywordAs(const TemplateURLData& other,
756                         const SearchTermsData& search_terms_data) const;
757 
758   // Returns the id of the extension that added this search engine. Only call
759   // this for TemplateURLs of type NORMAL_CONTROLLED_BY_EXTENSION or
760   // OMNIBOX_API_EXTENSION.
761   std::string GetExtensionId() const;
762 
763   // Returns the type of this search engine, or SEARCH_ENGINE_OTHER if no
764   // engines match.
765   SearchEngineType GetEngineType(
766       const SearchTermsData& search_terms_data) const;
767 
768   // Use the alternate URLs and the search URL to match the provided |url|
769   // and extract |search_terms| from it. Returns false and an empty
770   // |search_terms| if no search terms can be matched. The URLs are matched in
771   // the order listed in |url_refs_| (see comment there).
772   bool ExtractSearchTermsFromURL(const GURL& url,
773                                  const SearchTermsData& search_terms_data,
774                                  base::string16* search_terms) const;
775 
776   // Returns true if non-empty search terms could be extracted from |url| using
777   // ExtractSearchTermsFromURL(). In other words, this returns whether |url|
778   // could be the result of performing a search with |this|.
779   bool IsSearchURL(const GURL& url,
780                    const SearchTermsData& search_terms_data) const;
781 
782   // Given a |url| corresponding to this TemplateURL, identifies the search
783   // terms and replaces them with the ones in |search_terms_args|, leaving the
784   // other parameters untouched. If the replacement fails, returns false and
785   // leaves |result| untouched. This is used by mobile ports to perform query
786   // refinement.
787   bool ReplaceSearchTermsInURL(
788       const GURL& url,
789       const TemplateURLRef::SearchTermsArgs& search_terms_args,
790       const SearchTermsData& search_terms_data,
791       GURL* result) const;
792 
793   // Encodes the search terms from |search_terms_args| so that we know the
794   // |input_encoding|. Returns the |encoded_terms| and the
795   // |encoded_original_query|. |encoded_terms| may be escaped as path or query
796   // depending on |is_in_query|; |encoded_original_query| is always escaped as
797   // query.
798   void EncodeSearchTerms(
799       const TemplateURLRef::SearchTermsArgs& search_terms_args,
800       bool is_in_query,
801       std::string* input_encoding,
802       base::string16* encoded_terms,
803       base::string16* encoded_original_query) const;
804 
805   // Returns the search url for this template URL.
806   // Returns an empty GURL if this template URL has no url().
807   GURL GenerateSearchURL(const SearchTermsData& search_terms_data) const;
808 
809   // TemplateURL internally caches values derived from a passed SearchTermsData
810   // to make its functions quick. This method invalidates any cached values and
811   // it should be called after SearchTermsData has been changed.
812   void InvalidateCachedValues() const;
813 
814   // Estimates dynamic memory usage.
815   // See base/trace_event/memory_usage_estimator.h for more info.
816   size_t EstimateMemoryUsage() const;
817 
818  private:
819   friend class TemplateURLService;
820 
821   void CopyFrom(const TemplateURL& other);
822 
823   void SetURL(const std::string& url);
824   void SetPrepopulateId(int id);
825 
826   // Resets the keyword if IsGoogleSearchURLWithReplaceableKeyword() or |force|.
827   // The |force| parameter is useful when the existing keyword is known to be
828   // a placeholder.  The resulting keyword is generated using
829   // GenerateSearchURL() and GenerateKeyword().
830   void ResetKeywordIfNecessary(const SearchTermsData& search_terms_data,
831                                bool force);
832 
833   // Resizes the |url_refs_| vector, which always holds the search URL as the
834   // last item.
835   void ResizeURLRefVector();
836 
837   // Uses the alternate URLs and the search URL to match the provided |url|
838   // and extract |search_terms| from it as well as the |search_terms_component|
839   // (either REF or QUERY) and |search_terms_component| at which the
840   // |search_terms| are found in |url|. See also ExtractSearchTermsFromURL().
841   bool FindSearchTermsInURL(const GURL& url,
842                             const SearchTermsData& search_terms_data,
843                             base::string16* search_terms,
844                             url::Parsed::ComponentType* search_terms_component,
845                             url::Component* search_terms_position) const;
846 
847   TemplateURLData data_;
848 
849   // Contains TemplateURLRefs corresponding to the alternate URLs and the search
850   // URL, in priority order: the URL at index 0 is treated as the highest
851   // priority and the primary search URL is treated as the lowest priority.  For
852   // example, if a TemplateURL has alternate URL "http://foo/#q={searchTerms}"
853   // and search URL "http://foo/?q={searchTerms}", and the URL to be decoded is
854   // "http://foo/?q=a#q=b", the alternate URL will match first and the decoded
855   // search term will be "b".  Note that since every TemplateURLRef has a
856   // primary search URL, this vector is never empty.
857   std::vector<TemplateURLRef> url_refs_;
858 
859   TemplateURLRef suggestions_url_ref_;
860   TemplateURLRef image_url_ref_;
861   TemplateURLRef new_tab_url_ref_;
862   TemplateURLRef contextual_search_url_ref_;
863   std::unique_ptr<AssociatedExtensionInfo> extension_info_;
864 
865   const Type type_;
866 
867   // Caches the computed engine type across successive calls to GetEngineType().
868   mutable SearchEngineType engine_type_;
869 
870   // TODO(sky): Add date last parsed OSD file.
871 
872   DISALLOW_COPY_AND_ASSIGN(TemplateURL);
873 };
874 
875 #endif  // COMPONENTS_SEARCH_ENGINES_TEMPLATE_URL_H_
876