1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_SEARCH_ENGINES_TEMPLATE_URL_H_ 6 #define COMPONENTS_SEARCH_ENGINES_TEMPLATE_URL_H_ 7 8 #include <cstddef> 9 #include <memory> 10 #include <string> 11 #include <utility> 12 #include <vector> 13 14 #include "base/gtest_prod_util.h" 15 #include "base/macros.h" 16 #include "base/time/time.h" 17 #include "components/search_engines/omnibox_focus_type.h" 18 #include "components/search_engines/search_engine_type.h" 19 #include "components/search_engines/template_url_data.h" 20 #include "components/search_engines/template_url_id.h" 21 #include "third_party/metrics_proto/omnibox_event.pb.h" 22 #include "third_party/metrics_proto/omnibox_input_type.pb.h" 23 #include "ui/gfx/geometry/size.h" 24 #include "url/gurl.h" 25 #include "url/third_party/mozilla/url_parse.h" 26 27 class SearchTermsData; 28 class TemplateURL; 29 30 31 // TemplateURLRef ------------------------------------------------------------- 32 33 // A TemplateURLRef represents a single URL within the larger TemplateURL class 34 // (which represents an entire "search engine", see below). If 35 // SupportsReplacement() is true, this URL has placeholders in it, for which 36 // callers can substitute values to get a "real" URL using ReplaceSearchTerms(). 37 // 38 // TemplateURLRefs always have a non-NULL |owner_| TemplateURL, which they 39 // access in order to get at important data like the underlying URL string or 40 // the associated Profile. 41 class TemplateURLRef { 42 public: 43 // Magic numbers to pass to ReplaceSearchTerms() for the |accepted_suggestion| 44 // parameter. Most callers aren't using Suggest capabilities and should just 45 // pass NO_SUGGESTIONS_AVAILABLE. 46 // NOTE: Because positive values are meaningful, make sure these are negative! 47 enum AcceptedSuggestion { 48 NO_SUGGESTION_CHOSEN = -1, 49 NO_SUGGESTIONS_AVAILABLE = -2, 50 }; 51 52 // Which kind of URL within our owner we are. This allows us to get at the 53 // correct string field. Use |INDEXED| to indicate that the numerical 54 // |index_in_owner_| should be used instead. 55 enum Type { 56 SEARCH, 57 SUGGEST, 58 IMAGE, 59 NEW_TAB, 60 CONTEXTUAL_SEARCH, 61 INDEXED 62 }; 63 64 // Type to store <content_type, post_data> pair for POST URLs. 65 // The |content_type|(first part of the pair) is the content-type of 66 // the |post_data|(second part of the pair) which is encoded in 67 // "multipart/form-data" format, it also contains the MIME boundary used in 68 // the |post_data|. See http://tools.ietf.org/html/rfc2046 for the details. 69 typedef std::pair<std::string, std::string> PostContent; 70 71 // Enumeration of the known search or suggest request sources. 72 enum RequestSource { 73 SEARCHBOX, // Omnibox or the NTP realbox. The default. 74 CROS_APP_LIST, // Chrome OS app list search box. 75 NON_SEARCHBOX_NTP, // Non-searchbox NTP surfaces. 76 }; 77 78 // This struct encapsulates arguments passed to 79 // TemplateURLRef::ReplaceSearchTerms methods. By default, only search_terms 80 // is required and is passed in the constructor. 81 struct SearchTermsArgs { 82 SearchTermsArgs(); 83 explicit SearchTermsArgs(const base::string16& search_terms); 84 SearchTermsArgs(const SearchTermsArgs& other); 85 ~SearchTermsArgs(); 86 87 struct ContextualSearchParams { 88 ContextualSearchParams(); 89 // Modern constructor, used when the content is sent in the HTTP header 90 // instead of as CGI parameters. 91 // The |version| tell the server which version of the client is making 92 // this request. 93 // The |contextual_cards_version| tells the server which version of 94 // contextual cards integration is being used by the client. 95 // The |home_country| is an ISO country code for the country that the user 96 // considers their permanent home (which may be different from the country 97 // they are currently visiting). Pass an empty string if none available. 98 // The |previous_event_id| is an identifier previously returned by the 99 // server to identify that user interaction. 100 // The |previous_event_results| are the results of the user-interaction of 101 // that previous request. 102 // The "previous_xyz" parameters are documented in go/cs-sanitized. 103 // The |is_exact_search| allows the search request to be narrowed down to 104 // an "exact" search only, meaning just search for X rather than X + 105 // whatever else is in the context. The returned search term should not 106 // be expanded, and the server will honor this along with creating a 107 // narrow Search Term. 108 // The |source_lang| specifies a source language hint to apply for 109 // translation or to indicate that translation might be appropriate. 110 // This comes from CLD evaluating the selection and/or page content. 111 // The |target_lang| specifies the best language to translate into for 112 // the user, which also indicates when translation is appropriate or 113 // helpful. This comes from the Chrome Language Model. 114 // The |fluent_languages| string specifies the languages the user 115 // is fluent in reading. This acts as an alternate set of languages 116 // to consider translating into. The languages are ordered by 117 // fluency, and encoded as a comma-separated list of BCP 47 languages. 118 // The |related_searches_stamp| string contains an information that 119 // indicates experiment status and server processing results so that 120 // can be logged in GWS Sawmill logs for offline analysis for the 121 // Related Searches MVP experiment. 122 ContextualSearchParams(int version, 123 int contextual_cards_version, 124 std::string home_country, 125 int64_t previous_event_id, 126 int previous_event_results, 127 bool is_exact_search, 128 std::string source_lang, 129 std::string target_lang, 130 std::string fluent_languages, 131 std::string related_searches_stamp); 132 ContextualSearchParams(const ContextualSearchParams& other); 133 ~ContextualSearchParams(); 134 135 // Estimates dynamic memory usage. 136 // See base/trace_event/memory_usage_estimator.h for more info. 137 size_t EstimateMemoryUsage() const; 138 139 // The version of contextual search. 140 int version = -1; 141 142 // The version of Contextual Cards data to request. 143 // A value of 0 indicates no data needed. 144 int contextual_cards_version = 0; 145 146 // The locale of the user's home country in an ISO country code format, 147 // or an empty string if not available. This indicates where the user 148 // resides, not where they currently are. 149 std::string home_country; 150 151 // An EventID from a previous interaction (sent by server, recorded by 152 // client). 153 int64_t previous_event_id = 0l; 154 155 // An encoded set of booleans that represent the interaction results from 156 // the previous event. 157 int previous_event_results = 0; 158 159 // A flag that restricts the search to exactly match the selection rather 160 // than expanding the Search Term to include other words in the context. 161 bool is_exact_search = false; 162 163 // Source language string to translate from. 164 std::string source_lang; 165 166 // Target language string to be translated into. 167 std::string target_lang; 168 169 // Alternate target languages that the user is fluent in, encoded in a 170 // single string. 171 std::string fluent_languages; 172 173 // Experiment arm and processing information for the Related Searches 174 // experiment. The value is an arbitrary string that starts with a 175 // schema version number. 176 std::string related_searches_stamp; 177 }; 178 179 // Estimates dynamic memory usage. 180 // See base/trace_event/memory_usage_estimator.h for more info. 181 size_t EstimateMemoryUsage() const; 182 183 // The search terms (query). 184 base::string16 search_terms; 185 186 // The original (input) query. 187 base::string16 original_query; 188 189 // The type the original input query was identified as. 190 metrics::OmniboxInputType input_type = metrics::OmniboxInputType::EMPTY; 191 192 // Specifies how the user last interacted with the searchbox UI element. 193 OmniboxFocusType focus_type = OmniboxFocusType::DEFAULT; 194 195 // The optional assisted query stats, aka AQS, used for logging purposes. 196 // This string contains impressions of all autocomplete matches shown 197 // at the query submission time. For privacy reasons, we require the 198 // search provider to support HTTPS protocol in order to receive the AQS 199 // param. 200 // For more details, see http://goto.google.com/binary-clients-logging . 201 std::string assisted_query_stats; 202 203 // TODO: Remove along with "aq" CGI param. 204 int accepted_suggestion = NO_SUGGESTIONS_AVAILABLE; 205 206 // The 0-based position of the cursor within the query string at the time 207 // the request was issued. Set to base::string16::npos if not used. 208 size_t cursor_position = base::string16::npos; 209 210 // The URL of the current webpage to be used for experimental zero-prefix 211 // suggestions. 212 std::string current_page_url; 213 214 // Which omnibox the user used to type the prefix. 215 metrics::OmniboxEventProto::PageClassification page_classification = 216 metrics::OmniboxEventProto::INVALID_SPEC; 217 218 // Optional session token. 219 std::string session_token; 220 221 // Prefetch query and type. 222 std::string prefetch_query; 223 std::string prefetch_query_type; 224 225 // Additional query params to append to the request. 226 std::string additional_query_params; 227 228 // If set, ReplaceSearchTerms() will automatically append any extra query 229 // params specified via the --extra-search-query-params command-line 230 // argument. Generally, this should be set when dealing with the search 231 // TemplateURLRefs of the default search engine and the caller cares 232 // about the query portion of the URL. Since neither TemplateURLRef nor 233 // indeed TemplateURL know whether a TemplateURL is the default search 234 // engine, callers instead must set this manually. 235 bool append_extra_query_params_from_command_line = false; 236 237 // The raw content of an image thumbnail that will be used as a query for 238 // search-by-image frontend. 239 std::string image_thumbnail_content; 240 241 // When searching for an image, the URL of the original image. Callers 242 // should leave this empty for images specified via data: URLs. 243 GURL image_url; 244 245 // When searching for an image, the original size of the image. 246 gfx::Size image_original_size; 247 248 // Source of the search or suggest request. 249 RequestSource request_source = SEARCHBOX; 250 251 ContextualSearchParams contextual_search_params; 252 }; 253 254 TemplateURLRef(const TemplateURL* owner, Type type); 255 TemplateURLRef(const TemplateURL* owner, size_t index_in_owner); 256 ~TemplateURLRef(); 257 258 TemplateURLRef(const TemplateURLRef& source); 259 TemplateURLRef& operator=(const TemplateURLRef& source); 260 261 // Returns the raw URL. None of the parameters will have been replaced. 262 std::string GetURL() const; 263 264 // Returns the raw string of the post params. Please see comments in 265 // prepopulated_engines_schema.json for the format. 266 std::string GetPostParamsString() const; 267 268 // Returns true if this URL supports search term replacement. 269 bool SupportsReplacement(const SearchTermsData& search_terms_data) const; 270 271 // Returns a string that is the result of replacing the search terms in 272 // the url with the specified arguments. We use our owner's input encoding. 273 // 274 // If this TemplateURLRef does not support replacement (SupportsReplacement 275 // returns false), an empty string is returned. 276 // If this TemplateURLRef uses POST, and |post_content| is not NULL, the 277 // |post_params_| will be replaced, encoded in "multipart/form-data" format 278 // and stored into |post_content|. 279 std::string ReplaceSearchTerms(const SearchTermsArgs& search_terms_args, 280 const SearchTermsData& search_terms_data, 281 PostContent* post_content) const; 282 283 // TODO(jnd): remove the following ReplaceSearchTerms definition which does 284 // not have |post_content| parameter once all reference callers pass 285 // |post_content| parameter. ReplaceSearchTerms(const SearchTermsArgs & search_terms_args,const SearchTermsData & search_terms_data)286 std::string ReplaceSearchTerms( 287 const SearchTermsArgs& search_terms_args, 288 const SearchTermsData& search_terms_data) const { 289 return ReplaceSearchTerms(search_terms_args, search_terms_data, NULL); 290 } 291 292 // Returns true if the TemplateURLRef is valid. An invalid TemplateURLRef is 293 // one that contains unknown terms, or invalid characters. 294 bool IsValid(const SearchTermsData& search_terms_data) const; 295 296 // Returns a string representation of this TemplateURLRef suitable for 297 // display. The display format is the same as the format used by Firefox. 298 base::string16 DisplayURL(const SearchTermsData& search_terms_data) const; 299 300 // Converts a string as returned by DisplayURL back into a string as 301 // understood by TemplateURLRef. 302 static std::string DisplayURLToURLRef(const base::string16& display_url); 303 304 // If this TemplateURLRef is valid and contains one search term, this returns 305 // the host/path of the URL, otherwise this returns an empty string. 306 const std::string& GetHost(const SearchTermsData& search_terms_data) const; 307 std::string GetPath(const SearchTermsData& search_terms_data) const; 308 309 // If this TemplateURLRef is valid and contains one search term 310 // in its query or ref, this returns the key of the search term, 311 // otherwise this returns an empty string. 312 const std::string& GetSearchTermKey( 313 const SearchTermsData& search_terms_data) const; 314 315 // If this TemplateURLRef is valid and contains one search term, 316 // this returns the location of the search term, 317 // otherwise this returns url::Parsed::QUERY. 318 url::Parsed::ComponentType GetSearchTermKeyLocation( 319 const SearchTermsData& search_terms_data) const; 320 321 // If this TemplateURLRef is valid and contains one search term, 322 // this returns the fixed prefix before the search term, 323 // otherwise this returns an empty string. 324 const std::string& GetSearchTermValuePrefix( 325 const SearchTermsData& search_terms_data) const; 326 327 // If this TemplateURLRef is valid and contains one search term, 328 // this returns the fixed suffix after the search term, 329 // otherwise this returns an empty string. 330 const std::string& GetSearchTermValueSuffix( 331 const SearchTermsData& search_terms_data) const; 332 333 // Converts the specified term in our owner's encoding to a base::string16. 334 base::string16 SearchTermToString16(const base::StringPiece& term) const; 335 336 // Returns true if this TemplateURLRef has a replacement term of 337 // {google:baseURL} or {google:baseSuggestURL}. 338 bool HasGoogleBaseURLs(const SearchTermsData& search_terms_data) const; 339 340 // Use the pattern referred to by this TemplateURLRef to match the provided 341 // |url| and extract |search_terms| from it. Returns true if the pattern 342 // matches, even if |search_terms| is empty. In this case 343 // |search_term_component|, if not NULL, indicates whether the search terms 344 // were found in the query or the ref parameters; and |search_terms_position|, 345 // if not NULL, contains the position of the search terms in the query or the 346 // ref parameters. Returns false and an empty |search_terms| if the pattern 347 // does not match. 348 bool ExtractSearchTermsFromURL( 349 const GURL& url, 350 base::string16* search_terms, 351 const SearchTermsData& search_terms_data, 352 url::Parsed::ComponentType* search_term_component, 353 url::Component* search_terms_position) const; 354 355 // Whether the URL uses POST (as opposed to GET). 356 bool UsesPOSTMethod(const SearchTermsData& search_terms_data) const; 357 358 // Estimates dynamic memory usage. 359 // See base/trace_event/memory_usage_estimator.h for more info. 360 size_t EstimateMemoryUsage() const; 361 362 private: 363 friend class TemplateURL; 364 friend class TemplateURLTest; 365 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, SetPrepopulatedAndParse); 366 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterKnown); 367 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterUnknown); 368 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLEmpty); 369 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoTemplateEnd); 370 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoKnownParameters); 371 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLTwoParameters); 372 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNestedParameter); 373 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, URLRefTestImageURLWithPOST); 374 375 // Enumeration of the known types. 376 enum ReplacementType { 377 ENCODING, 378 GOOGLE_ASSISTED_QUERY_STATS, 379 GOOGLE_BASE_URL, 380 GOOGLE_BASE_SUGGEST_URL, 381 GOOGLE_CONTEXTUAL_SEARCH_VERSION, 382 GOOGLE_CONTEXTUAL_SEARCH_CONTEXT_DATA, 383 GOOGLE_CURRENT_PAGE_URL, 384 GOOGLE_CURSOR_POSITION, 385 GOOGLE_IMAGE_ORIGINAL_HEIGHT, 386 GOOGLE_IMAGE_ORIGINAL_WIDTH, 387 GOOGLE_IMAGE_SEARCH_SOURCE, 388 GOOGLE_IMAGE_THUMBNAIL, 389 GOOGLE_IMAGE_THUMBNAIL_BASE64, 390 GOOGLE_IMAGE_URL, 391 GOOGLE_INPUT_TYPE, 392 GOOGLE_IOS_SEARCH_LANGUAGE, 393 GOOGLE_NTP_IS_THEMED, 394 GOOGLE_OMNIBOX_FOCUS_TYPE, 395 GOOGLE_ORIGINAL_QUERY_FOR_SUGGESTION, 396 GOOGLE_PAGE_CLASSIFICATION, 397 GOOGLE_PREFETCH_QUERY, 398 GOOGLE_RLZ, 399 GOOGLE_SEARCH_CLIENT, 400 GOOGLE_SEARCH_FIELDTRIAL_GROUP, 401 GOOGLE_SEARCH_VERSION, 402 GOOGLE_SESSION_TOKEN, 403 GOOGLE_SUGGEST_CLIENT, 404 GOOGLE_SUGGEST_REQUEST_ID, 405 GOOGLE_UNESCAPED_SEARCH_TERMS, 406 LANGUAGE, 407 MAIL_RU_REFERRAL_ID, 408 SEARCH_TERMS, 409 YANDEX_REFERRAL_ID, 410 }; 411 412 // Used to identify an element of the raw url that can be replaced. 413 struct Replacement { ReplacementReplacement414 Replacement(ReplacementType type, size_t index) 415 : type(type), index(index), is_post_param(false) {} 416 ReplacementType type; 417 size_t index; 418 // Indicates the location in where the replacement is replaced. If 419 // |is_post_param| is false, |index| indicates the byte position in 420 // |parsed_url_|. Otherwise, |index| is the index of |post_params_|. 421 bool is_post_param; 422 }; 423 424 // Stores a single parameter for a POST. 425 struct PostParam { 426 std::string name; 427 std::string value; 428 std::string content_type; 429 430 // Estimates dynamic memory usage. 431 // See base/trace_event/memory_usage_estimator.h for more info. 432 size_t EstimateMemoryUsage() const; 433 }; 434 435 // The list of elements to replace. 436 typedef std::vector<struct Replacement> Replacements; 437 typedef std::vector<PostParam> PostParams; 438 439 // TemplateURLRef internally caches values to make replacement quick. This 440 // method invalidates any cached values. 441 void InvalidateCachedValues() const; 442 443 // Parses the parameter in url at the specified offset. start/end specify the 444 // range of the parameter in the url, including the braces. If the parameter 445 // is valid, url is updated to reflect the appropriate parameter. If 446 // the parameter is one of the known parameters an element is added to 447 // replacements indicating the type and range of the element. The original 448 // parameter is erased from the url. 449 // 450 // If the parameter is not a known parameter, false is returned. If this is a 451 // prepopulated URL, the parameter is erased, otherwise it is left alone. 452 bool ParseParameter(size_t start, 453 size_t end, 454 std::string* url, 455 Replacements* replacements) const; 456 457 // Parses the specified url, replacing parameters as necessary. If 458 // successful, valid is set to true, and the parsed url is returned. For all 459 // known parameters that are encountered an entry is added to replacements. 460 // If there is an error parsing the url, valid is set to false, and an empty 461 // string is returned. If the URL has the POST parameters, they will be 462 // parsed into |post_params| which will be further replaced with real search 463 // terms data and encoded in "multipart/form-data" format to generate the 464 // POST data. 465 std::string ParseURL(const std::string& url, 466 Replacements* replacements, 467 PostParams* post_params, 468 bool* valid) const; 469 470 // If the url has not yet been parsed, ParseURL is invoked. 471 // NOTE: While this is const, it modifies parsed_, valid_, parsed_url_ and 472 // search_offset_. 473 void ParseIfNecessary(const SearchTermsData& search_terms_data) const; 474 475 // Parses a wildcard out of |path|, putting the parsed path in |path_prefix_| 476 // and |path_suffix_| and setting |path_wildcard_present_| to true. 477 // In the absence of a wildcard, the full path will be contained in 478 // |path_prefix_| and |path_wildcard_present_| will be false. 479 void ParsePath(const std::string& path) const; 480 481 // Returns whether the path portion of this template URL is equal to the path 482 // in |url|, checking that URL is prefixed/suffixed by 483 // |path_prefix_|/|path_suffix_| if |path_wildcard_present_| is true, or equal 484 // to |path_prefix_| otherwise. 485 bool PathIsEqual(const GURL& url) const; 486 487 // Extracts the query key and host from the url. 488 void ParseHostAndSearchTermKey( 489 const SearchTermsData& search_terms_data) const; 490 491 // Encode post parameters in "multipart/form-data" format and store it 492 // inside |post_content|. Returns false if errors are encountered during 493 // encoding. This method is called each time ReplaceSearchTerms gets called. 494 bool EncodeFormData(const PostParams& post_params, 495 PostContent* post_content) const; 496 497 // Handles a replacement by using real term data. If the replacement 498 // belongs to a PostParam, the PostParam will be replaced by the term data. 499 // Otherwise, the term data will be inserted at the place that the 500 // replacement points to. 501 void HandleReplacement(const std::string& name, 502 const std::string& value, 503 const Replacement& replacement, 504 std::string* url) const; 505 506 // Replaces all replacements in |parsed_url_| with their actual values and 507 // returns the result. This is the main functionality of 508 // ReplaceSearchTerms(). 509 std::string HandleReplacements( 510 const SearchTermsArgs& search_terms_args, 511 const SearchTermsData& search_terms_data, 512 PostContent* post_content) const; 513 514 // The TemplateURL that contains us. This should outlive us. 515 const TemplateURL* owner_; 516 517 // What kind of URL we are. 518 Type type_; 519 520 // If |type_| is |INDEXED|, this |index_in_owner_| is used instead to refer to 521 // a url within our owner. 522 size_t index_in_owner_ = 0; 523 524 // Whether the URL has been parsed. 525 mutable bool parsed_ = false; 526 527 // Whether the url was successfully parsed. 528 mutable bool valid_ = false; 529 530 // The parsed URL. All terms have been stripped out of this with 531 // replacements_ giving the index of the terms to replace. 532 mutable std::string parsed_url_; 533 534 // Do we support search term replacement? 535 mutable bool supports_replacements_ = false; 536 537 // The replaceable parts of url (parsed_url_). These are ordered by index 538 // into the string, and may be empty. 539 mutable Replacements replacements_; 540 541 // Whether the path contains a wildcard. 542 mutable bool path_wildcard_present_ = false; 543 544 // Host, port, path, key and location of the search term. These are only set 545 // if the url contains one search term. 546 mutable std::string host_; 547 mutable std::string port_; 548 mutable std::string path_prefix_; 549 mutable std::string path_suffix_; 550 mutable std::string search_term_key_; 551 mutable url::Parsed::ComponentType search_term_key_location_ = 552 url::Parsed::QUERY; 553 mutable std::string search_term_value_prefix_; 554 mutable std::string search_term_value_suffix_; 555 556 mutable PostParams post_params_; 557 558 // Whether the contained URL is a pre-populated URL. 559 bool prepopulated_ = false; 560 }; 561 562 563 // TemplateURL ---------------------------------------------------------------- 564 565 // A TemplateURL represents a single "search engine", defined primarily as a 566 // subset of the Open Search Description Document 567 // (http://www.opensearch.org/Specifications/OpenSearch) plus some extensions. 568 // One TemplateURL contains several TemplateURLRefs, which correspond to various 569 // different capabilities (e.g. doing searches or getting suggestions), as well 570 // as a TemplateURLData containing other details like the name, keyword, etc. 571 // 572 // TemplateURLs are intended to be read-only for most users. 573 // The TemplateURLService, which handles storing and manipulating TemplateURLs, 574 // is made a friend so that it can be the exception to this pattern. 575 class TemplateURL { 576 public: 577 using TemplateURLVector = std::vector<TemplateURL*>; 578 using OwnedTemplateURLVector = std::vector<std::unique_ptr<TemplateURL>>; 579 580 // These values are not persisted and can be freely changed. 581 // Their integer values are used for choosing the best engine during keyword 582 // conflicts, so their relative ordering should not be changed without careful 583 // thought about what happens during version skew. 584 enum Type { 585 // Installed only on this device. Should not be synced. This is not common. 586 LOCAL = 0, 587 // Regular search engine. This is the most common. 588 NORMAL = 1, 589 // Installed by extension through Override Settings API. 590 NORMAL_CONTROLLED_BY_EXTENSION = 2, 591 // The keyword associated with an extension that uses the Omnibox API. 592 OMNIBOX_API_EXTENSION = 3, 593 }; 594 595 // An AssociatedExtensionInfo represents information about the extension that 596 // added the search engine. 597 struct AssociatedExtensionInfo { 598 AssociatedExtensionInfo(const std::string& extension_id, 599 base::Time install_time, 600 bool wants_to_be_default_engine); 601 ~AssociatedExtensionInfo(); 602 603 // Estimates dynamic memory usage. 604 // See base/trace_event/memory_usage_estimator.h for more info. 605 size_t EstimateMemoryUsage() const; 606 607 std::string extension_id; 608 609 // Used to resolve conflicts when there are multiple extensions specifying 610 // the default search engine. The most recently-installed wins. 611 base::Time install_time; 612 613 // Whether the search engine is supposed to be default. 614 bool wants_to_be_default_engine; 615 }; 616 617 explicit TemplateURL(const TemplateURLData& data, Type type = NORMAL); 618 619 // Constructor for extension controlled engine. |type| must be 620 // NORMAL_CONTROLLED_BY_EXTENSION or OMNIBOX_API_EXTENSION. 621 TemplateURL(const TemplateURLData& data, 622 Type type, 623 std::string extension_id, 624 base::Time install_time, 625 bool wants_to_be_default_engine); 626 627 ~TemplateURL(); 628 629 // For two engines with the same keyword, |this| and |other|, 630 // returns true if |this| is strictly better than |other|. 631 // 632 // While normal engines must all have distinct keywords, policy-created, 633 // extension-controlled and omnibox API engines may have the same keywords as 634 // each other or as normal engines. In these cases, policy-create engines 635 // override omnibox API engines, which override extension-controlled engines, 636 // which override normal engines. 637 // 638 // If there is still a conflict after this, compare by safe-for-autoreplace, 639 // then last modified date, then use the sync guid as a tiebreaker. 640 // 641 // TODO(tommycli): I'd like to use this to resolve Sync conflicts in the 642 // future, but we need a total ordering of TemplateURLs. That's not the case 643 // today, because the sync GUIDs are not actually globally unique, so there 644 // can be a genuine tie, which is not good, because then two different clients 645 // could choose to resolve the conflict in two different ways. 646 bool IsBetterThanEngineWithConflictingKeyword(const TemplateURL* other) const; 647 648 // Generates a suitable keyword for the specified url, which must be valid. 649 // This is guaranteed not to return an empty string, since TemplateURLs should 650 // never have an empty keyword. 651 static base::string16 GenerateKeyword(const GURL& url); 652 653 // Generates a favicon URL from the specified url. 654 static GURL GenerateFaviconURL(const GURL& url); 655 656 // Returns true if |t_url| and |data| are equal in all meaningful respects. 657 // Static to allow either or both params to be NULL. 658 static bool MatchesData(const TemplateURL* t_url, 659 const TemplateURLData* data, 660 const SearchTermsData& search_terms_data); 661 data()662 const TemplateURLData& data() const { return data_; } 663 short_name()664 const base::string16& short_name() const { return data_.short_name(); } 665 // An accessor for the short_name, but adjusted so it can be appropriately 666 // displayed even if it is LTR and the UI is RTL. 667 base::string16 AdjustedShortNameForLocaleDirection() const; 668 keyword()669 const base::string16& keyword() const { return data_.keyword(); } 670 url()671 const std::string& url() const { return data_.url(); } suggestions_url()672 const std::string& suggestions_url() const { return data_.suggestions_url; } image_url()673 const std::string& image_url() const { return data_.image_url; } new_tab_url()674 const std::string& new_tab_url() const { return data_.new_tab_url; } contextual_search_url()675 const std::string& contextual_search_url() const { 676 return data_.contextual_search_url; 677 } search_url_post_params()678 const std::string& search_url_post_params() const { 679 return data_.search_url_post_params; 680 } suggestions_url_post_params()681 const std::string& suggestions_url_post_params() const { 682 return data_.suggestions_url_post_params; 683 } image_url_post_params()684 const std::string& image_url_post_params() const { 685 return data_.image_url_post_params; 686 } alternate_urls()687 const std::vector<std::string>& alternate_urls() const { 688 return data_.alternate_urls; 689 } favicon_url()690 const GURL& favicon_url() const { return data_.favicon_url; } 691 logo_url()692 const GURL& logo_url() const { return data_.logo_url; } 693 doodle_url()694 const GURL& doodle_url() const { return data_.doodle_url; } 695 originating_url()696 const GURL& originating_url() const { return data_.originating_url; } 697 safe_for_autoreplace()698 bool safe_for_autoreplace() const { return data_.safe_for_autoreplace; } 699 input_encodings()700 const std::vector<std::string>& input_encodings() const { 701 return data_.input_encodings; 702 } 703 id()704 TemplateURLID id() const { return data_.id; } 705 date_created()706 base::Time date_created() const { return data_.date_created; } last_modified()707 base::Time last_modified() const { return data_.last_modified; } last_visited()708 base::Time last_visited() const { return data_.last_visited; } 709 created_by_policy()710 bool created_by_policy() const { return data_.created_by_policy; } created_from_play_api()711 bool created_from_play_api() const { return data_.created_from_play_api; } 712 usage_count()713 int usage_count() const { return data_.usage_count; } 714 prepopulate_id()715 int prepopulate_id() const { return data_.prepopulate_id; } 716 sync_guid()717 const std::string& sync_guid() const { return data_.sync_guid; } 718 url_refs()719 const std::vector<TemplateURLRef>& url_refs() const { return url_refs_; } url_ref()720 const TemplateURLRef& url_ref() const { 721 // Sanity check for https://crbug.com/781703. 722 CHECK(!url_refs_.empty()); 723 return url_refs_.back(); 724 } suggestions_url_ref()725 const TemplateURLRef& suggestions_url_ref() const { 726 return suggestions_url_ref_; 727 } image_url_ref()728 const TemplateURLRef& image_url_ref() const { return image_url_ref_; } new_tab_url_ref()729 const TemplateURLRef& new_tab_url_ref() const { return new_tab_url_ref_; } contextual_search_url_ref()730 const TemplateURLRef& contextual_search_url_ref() const { 731 return contextual_search_url_ref_; 732 } 733 type()734 Type type() const { return type_; } 735 GetExtensionInfoForTesting()736 const AssociatedExtensionInfo* GetExtensionInfoForTesting() const { 737 return extension_info_.get(); 738 } 739 740 // Returns true if |url| supports replacement. 741 bool SupportsReplacement(const SearchTermsData& search_terms_data) const; 742 743 // Returns true if any URLRefs use Googe base URLs. 744 bool HasGoogleBaseURLs(const SearchTermsData& search_terms_data) const; 745 746 // Returns true if this TemplateURL uses Google base URLs and has a keyword 747 // of "google.TLD". We use this to decide whether we can automatically 748 // update the keyword to reflect the current Google base URL TLD. 749 bool IsGoogleSearchURLWithReplaceableKeyword( 750 const SearchTermsData& search_terms_data) const; 751 752 // Returns true if the keywords match or if 753 // IsGoogleSearchURLWithReplaceableKeyword() is true for both |this| and 754 // |other|. 755 bool HasSameKeywordAs(const TemplateURLData& other, 756 const SearchTermsData& search_terms_data) const; 757 758 // Returns the id of the extension that added this search engine. Only call 759 // this for TemplateURLs of type NORMAL_CONTROLLED_BY_EXTENSION or 760 // OMNIBOX_API_EXTENSION. 761 std::string GetExtensionId() const; 762 763 // Returns the type of this search engine, or SEARCH_ENGINE_OTHER if no 764 // engines match. 765 SearchEngineType GetEngineType( 766 const SearchTermsData& search_terms_data) const; 767 768 // Use the alternate URLs and the search URL to match the provided |url| 769 // and extract |search_terms| from it. Returns false and an empty 770 // |search_terms| if no search terms can be matched. The URLs are matched in 771 // the order listed in |url_refs_| (see comment there). 772 bool ExtractSearchTermsFromURL(const GURL& url, 773 const SearchTermsData& search_terms_data, 774 base::string16* search_terms) const; 775 776 // Returns true if non-empty search terms could be extracted from |url| using 777 // ExtractSearchTermsFromURL(). In other words, this returns whether |url| 778 // could be the result of performing a search with |this|. 779 bool IsSearchURL(const GURL& url, 780 const SearchTermsData& search_terms_data) const; 781 782 // Given a |url| corresponding to this TemplateURL, identifies the search 783 // terms and replaces them with the ones in |search_terms_args|, leaving the 784 // other parameters untouched. If the replacement fails, returns false and 785 // leaves |result| untouched. This is used by mobile ports to perform query 786 // refinement. 787 bool ReplaceSearchTermsInURL( 788 const GURL& url, 789 const TemplateURLRef::SearchTermsArgs& search_terms_args, 790 const SearchTermsData& search_terms_data, 791 GURL* result) const; 792 793 // Encodes the search terms from |search_terms_args| so that we know the 794 // |input_encoding|. Returns the |encoded_terms| and the 795 // |encoded_original_query|. |encoded_terms| may be escaped as path or query 796 // depending on |is_in_query|; |encoded_original_query| is always escaped as 797 // query. 798 void EncodeSearchTerms( 799 const TemplateURLRef::SearchTermsArgs& search_terms_args, 800 bool is_in_query, 801 std::string* input_encoding, 802 base::string16* encoded_terms, 803 base::string16* encoded_original_query) const; 804 805 // Returns the search url for this template URL. 806 // Returns an empty GURL if this template URL has no url(). 807 GURL GenerateSearchURL(const SearchTermsData& search_terms_data) const; 808 809 // TemplateURL internally caches values derived from a passed SearchTermsData 810 // to make its functions quick. This method invalidates any cached values and 811 // it should be called after SearchTermsData has been changed. 812 void InvalidateCachedValues() const; 813 814 // Estimates dynamic memory usage. 815 // See base/trace_event/memory_usage_estimator.h for more info. 816 size_t EstimateMemoryUsage() const; 817 818 private: 819 friend class TemplateURLService; 820 821 void CopyFrom(const TemplateURL& other); 822 823 void SetURL(const std::string& url); 824 void SetPrepopulateId(int id); 825 826 // Resets the keyword if IsGoogleSearchURLWithReplaceableKeyword() or |force|. 827 // The |force| parameter is useful when the existing keyword is known to be 828 // a placeholder. The resulting keyword is generated using 829 // GenerateSearchURL() and GenerateKeyword(). 830 void ResetKeywordIfNecessary(const SearchTermsData& search_terms_data, 831 bool force); 832 833 // Resizes the |url_refs_| vector, which always holds the search URL as the 834 // last item. 835 void ResizeURLRefVector(); 836 837 // Uses the alternate URLs and the search URL to match the provided |url| 838 // and extract |search_terms| from it as well as the |search_terms_component| 839 // (either REF or QUERY) and |search_terms_component| at which the 840 // |search_terms| are found in |url|. See also ExtractSearchTermsFromURL(). 841 bool FindSearchTermsInURL(const GURL& url, 842 const SearchTermsData& search_terms_data, 843 base::string16* search_terms, 844 url::Parsed::ComponentType* search_terms_component, 845 url::Component* search_terms_position) const; 846 847 TemplateURLData data_; 848 849 // Contains TemplateURLRefs corresponding to the alternate URLs and the search 850 // URL, in priority order: the URL at index 0 is treated as the highest 851 // priority and the primary search URL is treated as the lowest priority. For 852 // example, if a TemplateURL has alternate URL "http://foo/#q={searchTerms}" 853 // and search URL "http://foo/?q={searchTerms}", and the URL to be decoded is 854 // "http://foo/?q=a#q=b", the alternate URL will match first and the decoded 855 // search term will be "b". Note that since every TemplateURLRef has a 856 // primary search URL, this vector is never empty. 857 std::vector<TemplateURLRef> url_refs_; 858 859 TemplateURLRef suggestions_url_ref_; 860 TemplateURLRef image_url_ref_; 861 TemplateURLRef new_tab_url_ref_; 862 TemplateURLRef contextual_search_url_ref_; 863 std::unique_ptr<AssociatedExtensionInfo> extension_info_; 864 865 const Type type_; 866 867 // Caches the computed engine type across successive calls to GetEngineType(). 868 mutable SearchEngineType engine_type_; 869 870 // TODO(sky): Add date last parsed OSD file. 871 872 DISALLOW_COPY_AND_ASSIGN(TemplateURL); 873 }; 874 875 #endif // COMPONENTS_SEARCH_ENGINES_TEMPLATE_URL_H_ 876