1 // Copyright 2020 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_TOKEN_H_ 6 #define THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_TOKEN_H_ 7 8 #include <cstdint> 9 #include <type_traits> 10 11 #include "base/containers/span.h" 12 #include "base/numerics/safe_conversions.h" 13 #include "base/strings/string_piece.h" 14 #include "third_party/blink/public/common/privacy_budget/identifiability_internal_templates.h" 15 #include "third_party/blink/public/common/privacy_budget/identifiability_metrics.h" 16 17 namespace blink { 18 19 // Constructs a token that can be used for reporting a metric or constructing an 20 // identifiable surface. 21 // 22 // The token construction is a single step conversion that takes one of several 23 // constrained inputs and emits a value. The method by which the value is 24 // constructed intentionally cannot be chained. If such behavior is required, 25 // then this class should be modified to accommodate the new use case rather 26 // than implementing custom chaining schemes at call sites. 27 // 28 // Once constructed, a token can only be consumed by 29 // IdentifiabiltyMetricsBuilder and IdentifiableSurface. For all others, it is a 30 // copyable, opaque token. 31 // 32 // Reliance on implicit conversion imposes limitations on how 33 // IdentifiableToken class is to be used. For example the following works: 34 // 35 // std::string foo = ....; 36 // IdentifiableToken sample(foo); 37 // 38 // .. due to the following implicit conversion: 39 // 40 // 1. std::string -> const std::string& 41 // : lvalue -> lvalue reference + cv-qualification 42 // 2. const std::string& -> base::StringPiece 43 // : user-defined conversion via constructor 44 // base::StringPiece(const std::string&) 45 // 46 // However, when used within a builder expression, the user-defined conversion 47 // doesn't occur due to there not being a single user defined conversion from 48 // std::string -> IdentifiableToken. I.e. the following does not work: 49 // 50 // std::string foo = ....; 51 // IdentifiabilityMetricBuilder(...).Set(surface, foo); 52 // ^^^ 53 // The compiler can't deduce a two step user-defined conversion for |foo|. 54 // 55 // All overrides of the constructor should ensure that there exists a unique 56 // representation of the data type being sampled, and that the sample value is 57 // constructed based on this unique representation. 58 // 59 // TODO(asanka): Also require that the representation be portable. 60 // 61 // Extending IdentifiableToken to support more data types: 62 // ----------------------------------------------------------- 63 // 64 // This class is intentionally placed in blink/public/common due to the 65 // requirement that these primitives be made available to both the renderer and 66 // the browser. However, it would be desirable to have renderer or browser 67 // specific functions for mapping common types in either domain into a sample. 68 // 69 // The recommended methods to do so are (one-of): 70 // 71 // 1. Use an existing byte span representation. 72 // 73 // E.g.: Assuming |v| is a WTF::Vector 74 // IdentifiabilityMetricBuilder(...).Set(..., 75 // base::as_bytes(base::make_span(v.Data(), v.Size()))); 76 // 77 // Note again that serializing to a stream of bytes may not be sufficient 78 // if the underlying types don't have a unique representation. 79 // 80 // 2. Construct a byte-wise unique representation and invoke 81 // IdentifiableToken(ByteSpan) either explicitly or implicitly via 82 // user-defined conversions. 83 // 84 // Note: Avoid doing template magic. There's already too much here. Templates 85 // make it difficult to verify that the correct stable representation is 86 // the one getting ingested into the reporting workflow. 87 // 88 // Instead, explicitly invoke some wrapper that emits a ByteSpan (a.k.a. 89 // base::span<const uint8_t>. 90 class IdentifiableToken { 91 public: 92 // Generic buffer of bytes. 93 using ByteSpan = base::span<const uint8_t>; 94 95 // Representation type of the sample. 96 using TokenType = int64_t; 97 98 // Required for use in certain data structures. Represents no bytes. IdentifiableToken()99 constexpr IdentifiableToken() : value_(kIdentifiabilityDigestOfNoBytes) {} 100 101 // A byte buffer specified as a span. 102 // 103 // This is essentially the base case. If it were the base case, then 104 // IdentifiableToken would be closer to a proper digest. 105 // 106 // NOLINTNEXTLINE(google-explicit-constructor) IdentifiableToken(ByteSpan span)107 IdentifiableToken(ByteSpan span) 108 : value_(IdentifiabilityDigestOfBytes(span)) {} 109 110 // Integers, big and small. Includes char. 111 template <typename T, 112 typename U = internal::remove_cvref_t<T>, 113 typename std::enable_if_t<std::is_integral<U>::value>* = nullptr> IdentifiableToken(T in)114 constexpr IdentifiableToken(T in) // NOLINT(google-explicit-constructor) 115 : value_(base::IsValueInRangeForNumericType<TokenType, U>(in) 116 ? in 117 : internal::DigestOfObjectRepresentation<U>(in)) {} 118 119 // Enums. Punt to the underlying type. 120 template <typename T, 121 // Set dummy type before U to avoid GCC compile errors 122 typename std::enable_if_t<std::is_enum<T>::value>* = nullptr, 123 typename U = typename std::underlying_type<T>::type> IdentifiableToken(T in)124 constexpr IdentifiableToken(T in) // NOLINT(google-explicit-constructor) 125 : IdentifiableToken(static_cast<U>(in)) {} 126 127 // All floating point values get converted to double before encoding. 128 // 129 // Why? We'd like to minimize accidental divergence of values due to the data 130 // type that the callsite happened to be using at the time. 131 // 132 // On some platforms sizeof(long double) gives us 16 (i.e. 128 bits), while 133 // only 10 of those bytes are initialized. If the whole sizeof(long double) 134 // buffer were to be ingested, then the uninitialized memory will cause the 135 // resulting digest to be useless. 136 template < 137 typename T, 138 typename U = internal::remove_cvref_t<T>, 139 typename std::enable_if_t<std::is_floating_point<U>::value>* = nullptr> IdentifiableToken(T in)140 constexpr IdentifiableToken(T in) // NOLINT(google-explicit-constructor) 141 : value_(internal::DigestOfObjectRepresentation<double>( 142 static_cast<double>(in))) {} 143 144 // StringPiece. Decays to base::span<> but requires an explicit constructor 145 // invocation. 146 // 147 // Care must be taken when using string types with IdentifiableToken() since 148 // there's not privacy expectation in the resulting token value. If the string 149 // used as an input is privacy sensitive, it should not be passed in as-is. IdentifiableToken(base::StringPiece s)150 explicit IdentifiableToken(base::StringPiece s) 151 : IdentifiableToken(base::as_bytes(base::make_span(s))) { 152 // The cart is before the horse, but it's a static_assert<>. 153 static_assert( 154 std::is_same<ByteSpan, 155 decltype(base::as_bytes(base::make_span(s)))>::value, 156 "base::as_bytes() doesn't return ByteSpan"); 157 } 158 159 // Span of known trivial types except for BytesSpan, which is the base case. 160 template <typename T, 161 size_t Extent, 162 typename U = internal::remove_cvref_t<T>, 163 typename std::enable_if_t< 164 std::is_arithmetic<U>::value && 165 !std::is_same<ByteSpan::element_type, T>::value>* = nullptr> 166 // NOLINTNEXTLINE(google-explicit-constructor) IdentifiableToken(base::span<T,Extent> span)167 IdentifiableToken(base::span<T, Extent> span) 168 : IdentifiableToken(base::as_bytes(span)) {} 169 170 // A span of non-trivial things where each thing can be digested individually. 171 template <typename T, 172 size_t Extent, 173 typename std::enable_if_t< 174 !std::is_arithmetic<T>::value && 175 !std::is_same<ByteSpan::element_type, T>::value>* = nullptr> 176 // NOLINTNEXTLINE(google-explicit-constructor) IdentifiableToken(base::span<T,Extent> span)177 IdentifiableToken(base::span<T, Extent> span) { 178 TokenType cur_digest = 0; 179 for (const auto& element : span) { 180 TokenType digests[2]; 181 digests[0] = cur_digest; 182 digests[1] = IdentifiableToken(element).value_; 183 cur_digest = IdentifiabilityDigestOfBytes( 184 base::as_bytes(base::make_span(digests))); 185 } 186 value_ = cur_digest; 187 } 188 189 // Parameter pack where each parameter can be digested individually. Requires 190 // at least two parameters. 191 template <typename T1, typename T2, typename... Trest> IdentifiableToken(T1 first,T2 second,Trest...rest)192 constexpr IdentifiableToken(T1 first, T2 second, Trest... rest) { 193 TokenType samples[] = {IdentifiableToken(first).value_, 194 IdentifiableToken(second).value_, 195 (IdentifiableToken(rest).value_)...}; 196 value_ = IdentifiableToken(base::make_span(samples)).value_; 197 } 198 199 constexpr bool operator<(const IdentifiableToken& that) const { 200 return value_ < that.value_; 201 } 202 203 constexpr bool operator<=(const IdentifiableToken& that) const { 204 return value_ <= that.value_; 205 } 206 207 constexpr bool operator>(const IdentifiableToken& that) const { 208 return value_ > that.value_; 209 } 210 211 constexpr bool operator>=(const IdentifiableToken& that) const { 212 return value_ >= that.value_; 213 } 214 215 constexpr bool operator==(const IdentifiableToken& that) const { 216 return value_ == that.value_; 217 } 218 219 constexpr bool operator!=(const IdentifiableToken& that) const { 220 return value_ != that.value_; 221 } 222 223 // Returns a value that can be passed into the UKM metrics recording 224 // interfaces. ToUkmMetricValue()225 int64_t ToUkmMetricValue() const { return value_; } 226 227 private: 228 friend class IdentifiabilityMetricBuilder; 229 friend class IdentifiableSurface; 230 friend class IdentifiableTokenBuilder; 231 232 // TODO(asanka): This should be const. Switch over once the incremental digest 233 // functions land. 234 TokenType value_ = 0; 235 }; 236 237 } // namespace blink 238 #endif // THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_TOKEN_H_ 239