url-classifier/chromium/safebrowsing.proto

// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This file includes Safe Browsing V4 API blacklist request and response
// protocol buffers. They should be kept in sync with the server implementation.

syntax = "proto2";

option optimize_for = LITE_RUNTIME;

package mozilla.safebrowsing;

message ThreatInfo {
  // The threat types to be checked.
  repeated ThreatType threat_types = 1;

  // The platform types to be checked.
  repeated PlatformType platform_types = 2;

  // The entry types to be checked.
  repeated ThreatEntryType threat_entry_types = 4;

  // The threat entries to be checked.
  repeated ThreatEntry threat_entries = 3;
}

// A match when checking a threat entry in the Safe Browsing threat lists.
message ThreatMatch {
  // The threat type matching this threat.
  optional ThreatType threat_type = 1;

  // The platform type matching this threat.
  optional PlatformType platform_type = 2;

  // The threat entry type matching this threat.
  optional ThreatEntryType threat_entry_type = 6;

  // The threat matching this threat.
  optional ThreatEntry threat = 3;

  // Optional metadata associated with this threat.
  optional ThreatEntryMetadata threat_entry_metadata = 4;

  // The cache lifetime for the returned match. Clients must not cache this
  // response for more than this duration to avoid false positives.
  optional Duration cache_duration = 5;
}

// Request to check entries against lists.
message FindThreatMatchesRequest {
  // The client metadata.
  optional ClientInfo client = 1;

  // The lists and entries to be checked for matches.
  optional ThreatInfo threat_info = 2;
}

// Response type for requests to find threat matches.
message FindThreatMatchesResponse {
  // The threat list matches.
  repeated ThreatMatch matches = 1;
}

// Describes a Safe Browsing API update request. Clients can request updates for
// multiple lists in a single request.
message FetchThreatListUpdatesRequest {
  // The client metadata.
  optional ClientInfo client = 1;

  // A single list update request.
  message ListUpdateRequest {
    // The type of threat posed by entries present in the list.
    optional ThreatType threat_type = 1;

    // The type of platform at risk by entries present in the list.
    optional PlatformType platform_type = 2;

    // The types of entries present in the list.
    optional ThreatEntryType threat_entry_type = 5;

    // The current state of the client for the requested list (the encrypted
    // ClientState that was sent to the client from the previous update
    // request).
    optional bytes state = 3;

    // The constraints for this update.
    message Constraints {
      // The maximum size in number of entries. The update will not contain more
      // entries than this value.  This should be a power of 2 between 2**10 and
      // 2**20.  If zero, no update size limit is set.
      optional int32 max_update_entries = 1;

      // Sets the maxmimum number of entries that the client is willing to have
      // in the local database. This should be a power of 2 between 2**10 and
      // 2**20. If zero, no database size limit is set.
      optional int32 max_database_entries = 2;

      // Requests the list for a specific geographic location. If not set the
      // server may pick that value based on the user's IP address. Expects ISO
      // 3166-1 alpha-2 format.
      optional string region = 3;

      // The compression types supported by the client.
      repeated CompressionType supported_compressions = 4;
    }

    // The constraints associated with this request.
    optional Constraints constraints = 4;
  }

  // The requested threat list updates.
  repeated ListUpdateRequest list_update_requests = 3;

  // Chrome-specific client information.
  optional ChromeClientInfo chrome_client_info = 4;
}

// Response type for threat list update requests.
message FetchThreatListUpdatesResponse {
  // An update to an individual list.
  message ListUpdateResponse {
    // The threat type for which data is returned.
    optional ThreatType threat_type = 1;

    // The format of the threats.
    optional ThreatEntryType threat_entry_type = 2;

    // The platform type for which data is returned.
    optional PlatformType platform_type = 3;

    // The type of response sent to the client.
    enum ResponseType {
      // Unknown.
      RESPONSE_TYPE_UNSPECIFIED = 0;

      // Partial updates are applied to the client's existing local database.
      PARTIAL_UPDATE = 1;

      // Full updates replace the client's entire local database. This means
      // that either the client was seriously out-of-date or the client is
      // believed to be corrupt.
      FULL_UPDATE = 2;
    }

    // The type of response. This may indicate that an action is required by the
    // client when the response is received.
    optional ResponseType response_type = 4;

    // A set of entries to add to a local threat type's list. Repeated to allow
    // for a combination of compressed and raw data to be sent in a single
    // response.
    repeated ThreatEntrySet additions = 5;

    // A set of entries to remove from a local threat type's list. In practice,
    // this field is empty or contains exactly one ThreatEntrySet.
    repeated ThreatEntrySet removals = 6;

    // The new client state, in encrypted format. Opaque to clients.
    optional bytes new_client_state = 7;

    // The expected SHA256 hash of the client state; that is, of the sorted list
    // of all hashes present in the database after applying the provided update.
    // If the client state doesn't match the expected state, the client must
    // disregard this update and retry later.
    optional Checksum checksum = 8;
  }

  // The list updates requested by the clients.
  repeated ListUpdateResponse list_update_responses = 1;

  // The minimum duration the client must wait before issuing any update
  // request. If this field is not set clients may update as soon as they want.
  optional Duration minimum_wait_duration = 2;
}

// Request to return full hashes matched by the provided hash prefixes.
message FindFullHashesRequest {
  // The client metadata.
  optional ClientInfo client = 1;

  // The current client states for each of the client's local threat lists.
  repeated bytes client_states = 2;

  // The lists and hashes to be checked.
  optional ThreatInfo threat_info = 3;
}

// Response type for requests to find full hashes.
message FindFullHashesResponse {
  // The full hashes that matched the requested prefixes.
  repeated ThreatMatch matches = 1;

  // The minimum duration the client must wait before issuing any find hashes
  // request. If this field is not set, clients can issue a request as soon as
  // they want.
  optional Duration minimum_wait_duration = 2;

  // For requested entities that did not match the threat list, how long to
  // cache the response.
  optional Duration negative_cache_duration = 3;
}

// A hit comprised of multiple resources; one is the threat list entry that was
// encountered by the client, while others give context as to how the client
// arrived at the unsafe entry.
message ThreatHit {
  // The threat type reported.
  optional ThreatType threat_type = 1;

  // The platform type reported.
  optional PlatformType platform_type = 2;

  // The threat entry responsible for the hit. Full hash should be reported for
  // hash-based hits.
  optional ThreatEntry entry = 3;

  // Types of resources reported by the client as part of a single hit.
  enum ThreatSourceType {
    // Unknown.
    THREAT_SOURCE_TYPE_UNSPECIFIED = 0;
    // The URL that matched the threat list (for which GetFullHash returned a
    // valid hash).
    MATCHING_URL = 1;
    // The final top-level URL of the tab that the client was browsing when the
    // match occurred.
    TAB_URL = 2;
    // A redirect URL that was fetched before hitting the final TAB_URL.
    TAB_REDIRECT = 3;
    // A resource loaded within the final TAB_URL.
    TAB_RESOURCE = 4;
  }

  // A single resource related to a threat hit.
  message ThreatSource {
    // The URL of the resource.
    optional string url = 1;

    // The type of source reported.
    optional ThreatSourceType type = 2;

    // The remote IP of the resource in ASCII format. Either IPv4 or IPv6.
    optional string remote_ip = 3;

    // Referrer of the resource. Only set if the referrer is available.
    optional string referrer = 4;
  }

  // The resources related to the threat hit.
  repeated ThreatSource resources = 4;

  // Client-reported identification.
  optional ClientInfo client_info = 5;

  // Details about the user that encountered the threat.
  message UserInfo {
    // The UN M.49 region code associated with the user's location.
    optional string region_code = 1;

    // Unique ID stable over a week or two
    optional bytes user_id = 2;
  }

  // Details about the user that encountered the threat.
  optional UserInfo user_info = 6;
}

// Types of threats.
enum ThreatType {
  // Unknown.
  THREAT_TYPE_UNSPECIFIED = 0;

  // Malware threat type.
  MALWARE_THREAT = 1;

  // Social engineering threat type.
  SOCIAL_ENGINEERING_PUBLIC = 2;

  // Unwanted software threat type.
  UNWANTED_SOFTWARE = 3;

  // Potentially harmful application threat type.
  POTENTIALLY_HARMFUL_APPLICATION = 4;

  // Social engineering threat type for internal use.
  SOCIAL_ENGINEERING = 5;

  // API abuse threat type.
  API_ABUSE = 6;

  // Malicious binary threat type.
  MALICIOUS_BINARY = 7;

  // Client side detection whitelist threat type.
  CSD_WHITELIST = 8;

  // Client side download detection whitelist threat type.
  CSD_DOWNLOAD_WHITELIST = 9;

  // Client incident threat type.
  CLIENT_INCIDENT = 10;

  // Patterns to be used for activating the subresource filter. Interstitial
  // will not be shown for patterns from this list.
  SUBRESOURCE_FILTER = 13;
}

// Types of platforms.
enum PlatformType {
  // Unknown platform.
  PLATFORM_TYPE_UNSPECIFIED = 0;

  // Threat posed to Windows.
  WINDOWS_PLATFORM = 1;

  // Threat posed to Linux.
  LINUX_PLATFORM = 2;

  // Threat posed to Android.
  // This cannot be ANDROID because that symbol is defined for android builds
  // here: build/config/android/BUILD.gn line21.
  ANDROID_PLATFORM = 3;

  // Threat posed to OSX.
  OSX_PLATFORM = 4;

  // Threat posed to iOS.
  IOS_PLATFORM = 5;

  // Threat posed to at least one of the defined platforms.
  ANY_PLATFORM = 6;

  // Threat posed to all defined platforms.
  ALL_PLATFORMS = 7;

  // Threat posed to Chrome.
  CHROME_PLATFORM = 8;
}

// The client metadata associated with Safe Browsing API requests.
message ClientInfo {
  // A client ID that (hopefully) uniquely identifies the client implementation
  // of the Safe Browsing API.
  optional string client_id = 1;

  // The version of the client implementation.
  optional string client_version = 2;
}

// The client metadata associated with Safe Browsing API requests specific to
// users of Chrome.
message ChromeClientInfo {
  // Safe Browsing reporting populations in Chrome.
  enum SafeBrowsingReportingPopulation {
    // Unspecified reporting verbosity.
    UNSPECIFIED = 0;

    // Client is opted out of reporting.
    OPT_OUT = 1;

    // Legacy extended reporting population.
    EXTENDED = 2;

    // Scout reporting population.
    SCOUT = 3;
  }

  // The reporting population of the user.
  optional SafeBrowsingReportingPopulation reporting_population = 1;
}

// The expected state of a client's local database.
message Checksum {
  // The SHA256 hash of the client state; that is, of the sorted list of all
  // hashes present in the database.
  optional bytes sha256 = 1;
}

// The ways in which threat entry sets can be compressed.
enum CompressionType {
  // Unknown.
  COMPRESSION_TYPE_UNSPECIFIED = 0;

  // Raw, uncompressed data.
  RAW = 1;

  // Rice-Golomb encoded data.
  RICE = 2;
}

// An individual threat; for example, a malicious URL or its hash
// representation. Only one of these fields should be set.
message ThreatEntry {
  // A variable-length SHA256 hash with size between 4 and 32 bytes inclusive.
  optional bytes hash = 1;

  // A URL.
  optional string url = 2;
}

// Types of entries that pose threats. Threat lists are collections of entries
// of a single type.
enum ThreatEntryType {
  // Unspecified.
  THREAT_ENTRY_TYPE_UNSPECIFIED = 0;

  // A host-suffix/path-prefix URL expression; for example, "foo.bar.com/baz/".
  URL = 1;

  // An executable program.
  EXECUTABLE = 2;

  // An IP range.
  IP_RANGE = 3;

  // Chrome extension.
  CHROME_EXTENSION = 4;

  // Filename.
  FILENAME = 5;

  // CERT.
  CERT = 6;
}

// A set of threats that should be added or removed from a client's local
// database.
message ThreatEntrySet {
  // The compression type for the entries in this set.
  optional CompressionType compression_type = 1;

  // At most one of the following fields should be set.

  // The raw SHA256-formatted entries.
  optional RawHashes raw_hashes = 2;

  // The raw removal indices for a local list.
  optional RawIndices raw_indices = 3;

  // The encoded 4-byte prefixes of SHA256-formatted entries, using a
  // Golomb-Rice encoding.
  optional RiceDeltaEncoding rice_hashes = 4;

  // The encoded local, lexicographically-sorted list indices, using a
  // Golomb-Rice encoding. Used for sending compressed removal indicies.
  optional RiceDeltaEncoding rice_indices = 5;
}

// A set of raw indicies to remove from a local list.
message RawIndices {
  // The indicies to remove from a lexicographically-sorted local list.
  repeated int32 indices = 1;
}

// The uncompressed threat entries in hash format of a particular prefix length.
// Hashes can be anywhere from 4 to 32 bytes in size. A large majority are 4
// bytes, but some hashes are lengthened if they collide with the hash of a
// popular URL.
//
// Used for sending ThreatEntrySet to clients that do not support compression,
// or when sending non-4-byte hashes to clients that do support compression.
message RawHashes {
  // The number of bytes for each prefix encoded below.  This field can be
  // anywhere from 4 (shortest prefix) to 32 (full SHA256 hash).
  optional int32 prefix_size = 1;

  // The hashes, all concatenated into one long string.  Each hash has a prefix
  // size of |prefix_size| above. Hashes are sorted in lexicographic order.
  optional bytes raw_hashes = 2;
}

// The Rice-Golomb encoded data. Used for sending compressed 4-byte hashes or
// compressed removal indices.
message RiceDeltaEncoding {
  // The offset of the first entry in the encoded data, or, if only a single
  // integer was encoded, that single integer's value.
  optional int64 first_value = 1;

  // The Golomb-Rice parameter which is a number between 2 and 28. This field
  // is missing (that is, zero) if num_entries is zero.
  optional int32 rice_parameter = 2;

  // The number of entries that are delta encoded in the encoded data. If only a
  // single integer was encoded, this will be zero and the single value will be
  // stored in first_value.
  optional int32 num_entries = 3;

  // The encoded deltas that are encoded using the Golomb-Rice coder.
  optional bytes encoded_data = 4;
}

// The metadata associated with a specific threat entry. The client is expected
// to know the metadata key/value pairs associated with each threat type.
message ThreatEntryMetadata {
  // A single metadata entry.
  message MetadataEntry {
    // The metadata entry key.
    optional bytes key = 1;

    // The metadata entry value.
    optional bytes value = 2;
  }

  // The metadata entries.
  repeated MetadataEntry entries = 1;
}

// Describes an individual threat list. A list is defined by three parameters:
// the type of threat posed, the type of platform targeted by the threat, and
// the type of entries in the list.
message ThreatListDescriptor {
  // The threat type posed by the list's entries.
  optional ThreatType threat_type = 1;

  // The platform type targeted by the list's entries.
  optional PlatformType platform_type = 2;

  // The entry types contained in the list.
  optional ThreatEntryType threat_entry_type = 3;
}

// A collection of lists available for download.
message ListThreatListsResponse {
  // The lists available for download.
  repeated ThreatListDescriptor threat_lists = 1;
}

message Duration {
  // Signed seconds of the span of time. Must be from -315,576,000,000
  // to +315,576,000,000 inclusive.
  optional int64 seconds = 1;

  // Signed fractions of a second at nanosecond resolution of the span
  // of time. Durations less than one second are represented with a 0
  // `seconds` field and a positive or negative `nanos` field. For durations
  // of one second or more, a non-zero value for the `nanos` field must be
  // of the same sign as the `seconds` field. Must be from -999,999,999
  // to +999,999,999 inclusive.
  optional int32 nanos = 2;
}