1 // Copyright 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_TOKEN_H_
6 #define THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_TOKEN_H_
7 
8 #include <cstdint>
9 #include <type_traits>
10 
11 #include "base/containers/span.h"
12 #include "base/numerics/safe_conversions.h"
13 #include "base/strings/string_piece.h"
14 #include "third_party/blink/public/common/privacy_budget/identifiability_internal_templates.h"
15 #include "third_party/blink/public/common/privacy_budget/identifiability_metrics.h"
16 
17 namespace blink {
18 
19 // Constructs a token that can be used for reporting a metric or constructing an
20 // identifiable surface.
21 //
22 // The token construction is a single step conversion that takes one of several
23 // constrained inputs and emits a value. The method by which the value is
24 // constructed intentionally cannot be chained. If such behavior is required,
25 // then this class should be modified to accommodate the new use case rather
26 // than implementing custom chaining schemes at call sites.
27 //
28 // Once constructed, a token can only be consumed by
29 // IdentifiabiltyMetricsBuilder and IdentifiableSurface. For all others, it is a
30 // copyable, opaque token.
31 //
32 // Reliance on implicit conversion imposes limitations on how
33 // IdentifiableToken class is to be used. For example the following works:
34 //
35 //     std::string foo = ....;
36 //     IdentifiableToken sample(foo);
37 //
38 // .. due to the following implicit conversion:
39 //
40 //    1. std::string -> const std::string&
41 //             : lvalue -> lvalue reference + cv-qualification
42 //    2. const std::string& -> base::StringPiece
43 //             : user-defined conversion via constructor
44 //               base::StringPiece(const std::string&)
45 //
46 // However, when used within a builder expression, the user-defined conversion
47 // doesn't occur due to there not being a single user defined conversion from
48 // std::string -> IdentifiableToken. I.e. the following does not work:
49 //
50 //     std::string foo = ....;
51 //     IdentifiabilityMetricBuilder(...).Set(surface, foo);
52 //                                                    ^^^
53 //      The compiler can't deduce a two step user-defined conversion for |foo|.
54 //
55 // All overrides of the constructor should ensure that there exists a unique
56 // representation of the data type being sampled, and that the sample value is
57 // constructed based on this unique representation.
58 //
59 // TODO(asanka): Also require that the representation be portable.
60 //
61 // Extending IdentifiableToken to support more data types:
62 // -----------------------------------------------------------
63 //
64 // This class is intentionally placed in blink/public/common due to the
65 // requirement that these primitives be made available to both the renderer and
66 // the browser. However, it would be desirable to have renderer or browser
67 // specific functions for mapping common types in either domain into a sample.
68 //
69 // The recommended methods to do so are (one-of):
70 //
71 //   1. Use an existing byte span representation.
72 //
73 //      E.g.: Assuming |v| is a WTF::Vector
74 //          IdentifiabilityMetricBuilder(...).Set(...,
75 //              base::as_bytes(base::make_span(v.Data(), v.Size())));
76 //
77 //      Note again that serializing to a stream of bytes may not be sufficient
78 //      if the underlying types don't have a unique representation.
79 //
80 //   2. Construct a byte-wise unique representation and invoke
81 //      IdentifiableToken(ByteSpan) either explicitly or implicitly via
82 //      user-defined conversions.
83 //
84 // Note: Avoid doing template magic. There's already too much here. Templates
85 //       make it difficult to verify that the correct stable representation is
86 //       the one getting ingested into the reporting workflow.
87 //
88 //       Instead, explicitly invoke some wrapper that emits a ByteSpan (a.k.a.
89 //       base::span<const uint8_t>.
90 class IdentifiableToken {
91  public:
92   // Generic buffer of bytes.
93   using ByteSpan = base::span<const uint8_t>;
94 
95   // Representation type of the sample.
96   using TokenType = int64_t;
97 
98   // Required for use in certain data structures. Represents no bytes.
IdentifiableToken()99   constexpr IdentifiableToken() : value_(kIdentifiabilityDigestOfNoBytes) {}
100 
101   // A byte buffer specified as a span.
102   //
103   // This is essentially the base case. If it were the base case, then
104   // IdentifiableToken would be closer to a proper digest.
105   //
106   // NOLINTNEXTLINE(google-explicit-constructor)
IdentifiableToken(ByteSpan span)107   IdentifiableToken(ByteSpan span)
108       : value_(IdentifiabilityDigestOfBytes(span)) {}
109 
110   // Integers, big and small. Includes char.
111   template <typename T,
112             typename U = internal::remove_cvref_t<T>,
113             typename std::enable_if_t<std::is_integral<U>::value>* = nullptr>
IdentifiableToken(T in)114   constexpr IdentifiableToken(T in)  // NOLINT(google-explicit-constructor)
115       : value_(base::IsValueInRangeForNumericType<TokenType, U>(in)
116                    ? in
117                    : internal::DigestOfObjectRepresentation<U>(in)) {}
118 
119   // Enums. Punt to the underlying type.
120   template <typename T,
121             // Set dummy type before U to avoid GCC compile errors
122             typename std::enable_if_t<std::is_enum<T>::value>* = nullptr,
123             typename U = typename std::underlying_type<T>::type>
IdentifiableToken(T in)124   constexpr IdentifiableToken(T in)  // NOLINT(google-explicit-constructor)
125       : IdentifiableToken(static_cast<U>(in)) {}
126 
127   // All floating point values get converted to double before encoding.
128   //
129   // Why? We'd like to minimize accidental divergence of values due to the data
130   // type that the callsite happened to be using at the time.
131   //
132   // On some platforms sizeof(long double) gives us 16 (i.e. 128 bits), while
133   // only 10 of those bytes are initialized. If the whole sizeof(long double)
134   // buffer were to be ingested, then the uninitialized memory will cause the
135   // resulting digest to be useless.
136   template <
137       typename T,
138       typename U = internal::remove_cvref_t<T>,
139       typename std::enable_if_t<std::is_floating_point<U>::value>* = nullptr>
IdentifiableToken(T in)140   constexpr IdentifiableToken(T in)  // NOLINT(google-explicit-constructor)
141       : value_(internal::DigestOfObjectRepresentation<double>(
142             static_cast<double>(in))) {}
143 
144   // StringPiece. Decays to base::span<> but requires an explicit constructor
145   // invocation.
146   //
147   // Care must be taken when using string types with IdentifiableToken() since
148   // there's not privacy expectation in the resulting token value. If the string
149   // used as an input is privacy sensitive, it should not be passed in as-is.
IdentifiableToken(base::StringPiece s)150   explicit IdentifiableToken(base::StringPiece s)
151       : IdentifiableToken(base::as_bytes(base::make_span(s))) {
152     // The cart is before the horse, but it's a static_assert<>.
153     static_assert(
154         std::is_same<ByteSpan,
155                      decltype(base::as_bytes(base::make_span(s)))>::value,
156         "base::as_bytes() doesn't return ByteSpan");
157   }
158 
159   // Span of known trivial types except for BytesSpan, which is the base case.
160   template <typename T,
161             size_t Extent,
162             typename U = internal::remove_cvref_t<T>,
163             typename std::enable_if_t<
164                 std::is_arithmetic<U>::value &&
165                 !std::is_same<ByteSpan::element_type, T>::value>* = nullptr>
166   // NOLINTNEXTLINE(google-explicit-constructor)
IdentifiableToken(base::span<T,Extent> span)167   IdentifiableToken(base::span<T, Extent> span)
168       : IdentifiableToken(base::as_bytes(span)) {}
169 
170   // A span of non-trivial things where each thing can be digested individually.
171   template <typename T,
172             size_t Extent,
173             typename std::enable_if_t<
174                 !std::is_arithmetic<T>::value &&
175                 !std::is_same<ByteSpan::element_type, T>::value>* = nullptr>
176   // NOLINTNEXTLINE(google-explicit-constructor)
IdentifiableToken(base::span<T,Extent> span)177   IdentifiableToken(base::span<T, Extent> span) {
178     TokenType cur_digest = 0;
179     for (const auto& element : span) {
180       TokenType digests[2];
181       digests[0] = cur_digest;
182       digests[1] = IdentifiableToken(element).value_;
183       cur_digest = IdentifiabilityDigestOfBytes(
184           base::as_bytes(base::make_span(digests)));
185     }
186     value_ = cur_digest;
187   }
188 
189   // Parameter pack where each parameter can be digested individually. Requires
190   // at least two parameters.
191   template <typename T1, typename T2, typename... Trest>
IdentifiableToken(T1 first,T2 second,Trest...rest)192   constexpr IdentifiableToken(T1 first, T2 second, Trest... rest) {
193     TokenType samples[] = {IdentifiableToken(first).value_,
194                            IdentifiableToken(second).value_,
195                            (IdentifiableToken(rest).value_)...};
196     value_ = IdentifiableToken(base::make_span(samples)).value_;
197   }
198 
199   constexpr bool operator<(const IdentifiableToken& that) const {
200     return value_ < that.value_;
201   }
202 
203   constexpr bool operator<=(const IdentifiableToken& that) const {
204     return value_ <= that.value_;
205   }
206 
207   constexpr bool operator>(const IdentifiableToken& that) const {
208     return value_ > that.value_;
209   }
210 
211   constexpr bool operator>=(const IdentifiableToken& that) const {
212     return value_ >= that.value_;
213   }
214 
215   constexpr bool operator==(const IdentifiableToken& that) const {
216     return value_ == that.value_;
217   }
218 
219   constexpr bool operator!=(const IdentifiableToken& that) const {
220     return value_ != that.value_;
221   }
222 
223   // Returns a value that can be passed into the UKM metrics recording
224   // interfaces.
ToUkmMetricValue()225   int64_t ToUkmMetricValue() const { return value_; }
226 
227  private:
228   friend class IdentifiabilityMetricBuilder;
229   friend class IdentifiableSurface;
230   friend class IdentifiableTokenBuilder;
231 
232   // TODO(asanka): This should be const. Switch over once the incremental digest
233   // functions land.
234   TokenType value_ = 0;
235 };
236 
237 }  // namespace blink
238 #endif  // THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_TOKEN_H_
239