1 // Copyright 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_SURFACE_H_
6 #define THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_SURFACE_H_
7 
8 #include <stdint.h>
9 
10 #include <cstddef>
11 #include <functional>
12 #include <tuple>
13 
14 #include "third_party/blink/public/common/privacy_budget/identifiable_token.h"
15 
16 namespace blink {
17 
18 // An identifiable surface.
19 //
20 // This class intends to be a lightweight wrapper over a simple 64-bit integer.
21 // It exhibits the following characteristics:
22 //
23 //   * All methods are constexpr.
24 //   * Immutable.
25 //   * Efficient enough to pass by value.
26 //
27 // Internally, an identifiable surface is represented as a 64-bit unsigned
28 // integer that can be used as the metric hash for reporting metrics via UKM.
29 //
30 // The least-significant |kTypeBits| of the value is used to store
31 // a IdentifiableSurface::Type value. The remainder stores the 56
32 // least-significant bits of an `IdentifiableToken` as illustrated below:
33 //              ✂
34 //    ┌─────────┊────────────────────────────────────────┐ ┌──────────┐
35 //    │(discard)✂           IdentifiableToken            │ │   Type   │
36 //    └─────────┊───────────────────┬────────────────────┘ └────┬─────┘
37 // Bit 64       ┊55                 ┊                   0   7   ┊    0
38 //              ✂                   ↓                           ↓
39 //              ┌────────────────────────────────────────┬──────────┐
40 //              │                                        │          │
41 //              └────────────────────────────────────────┴──────────┘
42 //           Bit 64                                     8 7        0
43 //              │←────────────── IdentifiableSurface ──────────────→│
44 //
45 // Only the lower 56 bits of `IdentifiableToken` contribute to an
46 // `IdentifiableSurface`.
47 //
48 // See descriptions for the `Type` enum values for details on how the
49 // `IdentifiableToken` is generated for each type. The descriptions use the
50 // following notation to indicate how the value is recorded:
51 //
52 //     IdentifiableSurface = { IdentifiableToken value, Type value }
53 //     Value = [description of how the value is constructed]
54 class IdentifiableSurface {
55  public:
56   // Number of bits used by Type.
57   static constexpr int kTypeBits = 8;
58 
59   // Bitmask for extracting Type value from a surface hash.
60   static constexpr uint64_t kTypeMask = (1 << kTypeBits) - 1;
61 
62   // Indicator for an uninitialized IdentifiableSurface. Maps to
63   // {Type::kReservedInternal, 0} which is not possible for a valid surface.
64   static constexpr uint64_t kInvalidHash = 0;
65 
66   // HTML canvas readback -- bits [0-3] of the 64-bit input are the context type
67   // (Type::kCanvasReadback), bits [4-6] are skipped ops, sensitive ops, and
68   // partial image ops bits, respectively. The remaining bits are for the canvas
69   // operations digest. If the digest wasn't calculated (there's no digest for
70   // WebGL, for instance), the digest field is 0.
71   enum CanvasTaintBit : uint64_t {
72     // At least one drawing operation didn't update the digest -- this is ether
73     // due to performance or resource consumption reasons.
74     kSkipped = UINT64_C(0x10),
75 
76     // At least one drawing operation operated on a sensitive string. Sensitive
77     // strings use a 16-bit hash digest.
78     kSensitive = UINT64_C(0x20),
79 
80     // At least one drawing operation was only partially digested, for
81     // performance reasons.
82     kPartiallyDigested = UINT64_C(0x40)
83   };
84 
85   // Type of identifiable surface.
86   //
87   // Even though the data type is uint64_t, we can only use 8 bits due to how we
88   // pack the surface type and a digest of the input into a 64 bits.
89   //
90   // These values are used for aggregation across versions. Entries should not
91   // be renumbered and numeric values should never be reused.
92   enum class Type : uint64_t {
93     // This type is reserved for internal use and should not be used for
94     // reporting any identifiability metrics.
95     //
96     // All metrics defined under the Identifiability event in
97     // tools/metrics/ukm.xml fall into this type. Hence using
98     // `ukm::builders::Identifiability` results in metrics with this type.
99     kReservedInternal = 0,
100 
101     // Represents a web feature whose output directly contributes to
102     // identifiability.
103     //
104     // These APIs are annotated with the `[HighEntropy=Direct]` extended WebIDL
105     // attribute in their respective IDL file. Each such API also has an
106     // associated `UseCounter` value specified directly via the
107     // `[MeasureAs=??]` attribute or indirectly via the `[Measure]` attribute.
108     // This `UseCounter` value is the key for recording the output of the API.
109     // `web_feature.mojom`[1] defines all the `UseCounter` values and is
110     // available as mojom::WebFeature.
111     //
112     //     IdentifiableSurface = { mojom::WebFeature, kWebFeature }
113     //     Value = IdentifiableToken( $(output of the attribute or method) )
114     //
115     // [1]: //blink/public/mojom/web_feature/web_feature.mojom
116     kWebFeature = 1,
117 
118     // Represents a readback of a canvas. Input is the
119     // CanvasRenderingContextType.
120     kCanvasReadback = 2,
121 
122     // Represents loading a font locally based on a name lookup that is allowed
123     // to match either a unique name or a family name. This occurs when a
124     // font-family CSS rule doesn't match any @font-face rule. Input is the
125     // combination of the lookup name and the FontSelectionRequest (i.e. weight,
126     // width and slope).
127     kLocalFontLookupByUniqueOrFamilyName = 3,
128 
129     // Represents looking up the family name of a generic font. Input is the
130     // combination of the generic font family name, script code and
131     // GenericFamilyType.
132     kGenericFontLookup = 4,
133 
134     // Represents an attempt to access files made publicly accessible by
135     // extensions via web_accessible_resources. This may be recorded both in the
136     // renderer and the browser. Browser-side events will be associated with
137     // the top frame's navigation ID, not a child frame. Render-side events are
138     // associated with document's ID.
139     kExtensionFileAccess = 5,
140 
141     // Extension running content-script. Input is the extension ID.
142     kExtensionContentScript = 6,
143 
144     // Represents making a measurement of one of the above surfacess. This
145     // metric is retained even if filtering discards the surface.
146     kMeasuredSurface = 7,
147 
148     // WebGL parameter for WebGLRenderingContext.getParameter().
149     kWebGLParameter = 8,
150 
151     // Represents a call to |MediaRecorder.isTypeSupported(mimeType)|. Input is
152     // the mime type supplied to the method.
153     kMediaRecorder_IsTypeSupported = 9,
154 
155     // Represents a call to |MediaSource.isTypeSupported(mimeType)|. Input is
156     // the mime type supplied to the method.
157     kMediaSource_IsTypeSupported = 10,
158 
159     // Represents a call to |HTMLMediaElement.canPlayType(mimeType)|. Input is
160     // the mime type supplied to the method.
161     kHTMLMediaElement_CanPlayType = 11,
162 
163     // Represents loading a font locally based on a name lookup that is only
164     // allowed to match a unique name. This occurs in @font-face CSS rules with
165     // a src:local attribute. Input is the combination of the lookup name and
166     // the FontSelectionRequest (i.e. weight, width and slope).
167     kLocalFontLookupByUniqueNameOnly = 12,
168 
169     // Represents loading a font locally based on a fallback character. Input is
170     // the combination of the fallback character, FallbackPriority and the
171     // FontSelectionRequest (i.e. weight, width and slope).
172     kLocalFontLookupByFallbackCharacter = 13,
173 
174     // Represents looking up a font locally as a last resort. Input is the
175     // FontSelectionRequest (i.e. weight, width and slope).
176     kLocalFontLookupAsLastResort = 14,
177 
178     // Extension cancelled a network request. Input is the extension ID.
179     kExtensionCancelRequest = 15,
180 
181     // WebGLRenderingContext.getShaderPrecisionFormat() is a high entropy API
182     // that leaks entropy about the underlying GL implementation.
183     // The output is keyed on two enums, but for the identifiability study we
184     // will key this type on a digest of both the enums' values.
185     kWebGLShaderPrecisionFormat = 16,
186 
187     // A type for recording reads of the offsetWidth and offsetHeight properties
188     // when we believe it may be trying to detect the size of the scrollbar.
189     // The input for this surface should be a member of ScrollbarSurfaces.
190     kScrollbarSize = 17,
191 
192     // WebGL2RenderingContext.getInternal
193     kWebGLInternalFormatParameter = 18,
194 
195     // Represents a call to GPU.requestAdapter. Input is the options filter.
196     kGPU_RequestAdapter = 20,
197 
198     // For instrumenting HTMLCanvas.getContext() fingerprinting. Some scripts
199     // will iterate through the different possible arguments and record whether
200     // each type of context is supported.
201     // The input should be an instance of CanvasRenderingContext::ContextType.
202     kCanvasRenderingContext = 21,
203 
204     // Represents a call to MediaDevices.getUserMedia. Input is the set of
205     // constraints.
206     kMediaDevices_GetUserMedia = 22,
207 
208     // NavigatorUAData.getHighEntropyValues() is, shockingly, a high entropy
209     // API to provide more detailed User-Agent data. The output is keyed on
210     // the hint parameter.
211     kNavigatorUAData_GetHighEntropyValues = 24,
212 
213     // MediaCapabilities.decodingInfo() reveals information about whether
214     // media decoding will be supported, smooth and/or power efficient,
215     // according to its codec, size, and other parameters. It can further reveal
216     // details about encrypted decoding support according to the key system
217     // configuration provided.
218     kMediaCapabilities_DecodingInfo = 25,
219 
220     // Represents determining that a local font exists or does not, based on a
221     // name lookup that is only allowed to match a unique name. This occurs in
222     // @font-face CSS rules with a src:local attribute, as well as calls to
223     // FontFace.load() for a FontFace object with a src:local attribute. The
224     // latter can reveal whether a font exists before the full font data are
225     // obtained. Input is the lookup name. Output is a bool.
226     kLocalFontExistenceByUniqueNameOnly = 26,
227 
228     // Represents a call to Navigator.getUserMedia. Input is the set of
229     // constraints.
230     kNavigator_GetUserMedia = 27,
231 
232     // Represents a media query being tested. Input is combination of property
233     // name and the target value. Output is the result --- true or false.
234     kMediaQuery = 28,
235 
236     // Represents loading a font locally. Input is the PostScript name.
237     kLocalFontLoadPostScriptName = 29,
238 
239     // Getting supported codecs, etc. for WebRTC sender -- key is hash of kind
240     // (audio or video).
241     kRtcRtpSenderGetCapabilities = 31,
242 
243     // Getting supported codecs, etc. for WebRTC receiver -- key is hash of kind
244     // (audio or video).
245     kRtcRtpReceiverGetCapabilities = 32,
246 
247     // We can use values up to and including |kMax|.
248     kMax = (1 << kTypeBits) - 1
249   };
250 
251   enum class ScrollbarSurface : uint64_t {
252     kScrollingElementWidth = 0,
253     kScrollingElementHeight = 1,
254     kElemScrollbarWidth = 2,
255     kElemScrollbarHeight = 3,
256   };
257 
258   // Default constructor is invalid.
IdentifiableSurface()259   IdentifiableSurface() : IdentifiableSurface(kInvalidHash) {}
260 
261   // Construct an IdentifiableSurface based on a precalculated metric hash. Can
262   // also be used as the first step in decoding an encoded metric hash.
FromMetricHash(uint64_t metric_hash)263   static constexpr IdentifiableSurface FromMetricHash(uint64_t metric_hash) {
264     return IdentifiableSurface(metric_hash);
265   }
266 
267   // Construct an IdentifiableSurface based on a surface type and an input
268   // token.
FromTypeAndToken(Type type,IdentifiableToken token)269   static constexpr IdentifiableSurface FromTypeAndToken(
270       Type type,
271       IdentifiableToken token) {
272     return IdentifiableSurface(KeyFromSurfaceTypeAndInput(type, token.value_));
273   }
274 
275   // Construct an invalid identifiable surface.
Invalid()276   static constexpr IdentifiableSurface Invalid() {
277     return IdentifiableSurface(kInvalidHash);
278   }
279 
280   // Returns the UKM metric hash corresponding to this IdentifiableSurface.
ToUkmMetricHash()281   constexpr uint64_t ToUkmMetricHash() const { return metric_hash_; }
282 
283   // Returns the type of this IdentifiableSurface.
GetType()284   constexpr Type GetType() const {
285     return std::get<0>(SurfaceTypeAndInputFromMetricKey(metric_hash_));
286   }
287 
288   // Returns the input hash for this IdentifiableSurface.
289   //
290   // The value that's returned can be different from what's used for
291   // constructing the IdentifiableSurface via FromTypeAndToken() if the input is
292   // >= 2^56.
GetInputHash()293   constexpr uint64_t GetInputHash() const {
294     return std::get<1>(SurfaceTypeAndInputFromMetricKey(metric_hash_));
295   }
296 
IsValid()297   constexpr bool IsValid() const { return metric_hash_ != kInvalidHash; }
298 
299  private:
IdentifiableSurface(uint64_t metric_hash)300   constexpr explicit IdentifiableSurface(uint64_t metric_hash)
301       : metric_hash_(metric_hash) {}
302 
303   // Returns a 64-bit metric key given an IdentifiableSurfaceType and a 64 bit
304   // input digest.
305   //
306   // The returned key can be used as the metric hash when invoking
307   // UkmEntryBuilderBase::SetMetricInternal().
KeyFromSurfaceTypeAndInput(Type type,uint64_t input)308   static constexpr uint64_t KeyFromSurfaceTypeAndInput(Type type,
309                                                        uint64_t input) {
310     uint64_t type_as_int = static_cast<uint64_t>(type);
311     return type_as_int | (input << kTypeBits);
312   }
313 
314   // Returns the IdentifiableSurfaceType and the input hash given a metric key.
315   //
316   // This is approximately the inverse of MetricKeyFromSurfaceTypeAndInput().
317   // See caveat in GetInputHash() about cases where the input hash can differ
318   // from that used to construct this IdentifiableSurface.
SurfaceTypeAndInputFromMetricKey(uint64_t metric)319   static constexpr std::tuple<Type, uint64_t> SurfaceTypeAndInputFromMetricKey(
320       uint64_t metric) {
321     return std::make_tuple(static_cast<Type>(metric & kTypeMask),
322                            metric >> kTypeBits);
323   }
324 
325   uint64_t metric_hash_;
326 };
327 
328 constexpr bool operator<(const IdentifiableSurface& left,
329                          const IdentifiableSurface& right) {
330   return left.ToUkmMetricHash() < right.ToUkmMetricHash();
331 }
332 
333 constexpr bool operator<=(const IdentifiableSurface& left,
334                           const IdentifiableSurface& right) {
335   return left.ToUkmMetricHash() <= right.ToUkmMetricHash();
336 }
337 
338 constexpr bool operator>(const IdentifiableSurface& left,
339                          const IdentifiableSurface& right) {
340   return left.ToUkmMetricHash() > right.ToUkmMetricHash();
341 }
342 
343 constexpr bool operator>=(const IdentifiableSurface& left,
344                           const IdentifiableSurface& right) {
345   return left.ToUkmMetricHash() >= right.ToUkmMetricHash();
346 }
347 
348 constexpr bool operator==(const IdentifiableSurface& left,
349                           const IdentifiableSurface& right) {
350   return left.ToUkmMetricHash() == right.ToUkmMetricHash();
351 }
352 
353 constexpr bool operator!=(const IdentifiableSurface& left,
354                           const IdentifiableSurface& right) {
355   return left.ToUkmMetricHash() != right.ToUkmMetricHash();
356 }
357 
358 // Hash function compatible with std::hash.
359 struct IdentifiableSurfaceHash {
operatorIdentifiableSurfaceHash360   size_t operator()(const IdentifiableSurface& s) const {
361     return std::hash<uint64_t>{}(s.ToUkmMetricHash());
362   }
363 };
364 
365 // Compare function compatible with std::less
366 struct IdentifiableSurfaceCompLess {
operatorIdentifiableSurfaceCompLess367   bool operator()(const IdentifiableSurface& lhs,
368                   const IdentifiableSurface& rhs) const {
369     return lhs.ToUkmMetricHash() < rhs.ToUkmMetricHash();
370   }
371 };
372 
373 }  // namespace blink
374 
375 #endif  // THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_SURFACE_H_
376