1 // Copyright 2020 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_TOKEN_BUILDER_H_ 6 #define THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_TOKEN_BUILDER_H_ 7 8 #include <array> 9 10 #include "base/containers/span.h" 11 #include "base/strings/string_piece.h" 12 #include "base/sys_byteorder.h" 13 #include "third_party/blink/public/common/common_export.h" 14 #include "third_party/blink/public/common/privacy_budget/identifiability_internal_templates.h" 15 #include "third_party/blink/public/common/privacy_budget/identifiable_token.h" 16 17 namespace blink { 18 19 // Builds an IdentifiableToken incrementally. 20 // 21 // Use this when the input to a sample is a bunch of disjoint objects, or the 22 // sample needs to include objects that are incrementally encountered. 23 // 24 // Notes: 25 // * The digest returned by this class is *NOT* the same as the one 26 // IdentifiabilityDigestOfBytes for the same set of bytes. This is due to 27 // block based chaining of digests used by this class. 28 // IdentifiabilityDigestOfBytes and this class are *NOT* interchangeable. 29 // 30 // TODO(asanka): IdentifiabilityDigestOfBytes() and this class should 31 // interop better. Perhaps by making the latter use the former. 32 // 33 // * The digest returned by this class is *NOT* the same as what you would 34 // acquire by invoking IdentifiableToken() over the same object. 35 // IdentifiableToken() and this class are *NOT* interchangeable. 36 // 37 // * The digest returned by this class only depends on the cumulative sequence 38 // of bytes that are fed to it. The partitioning thereof is irrelevant. 39 // 40 // * This object never finalizes. Partial digests can be extracted at any 41 // point. 42 class BLINK_COMMON_EXPORT IdentifiableTokenBuilder { 43 public: 44 // Convenient alias for a span of const uint8_t. 45 using ByteSpan = IdentifiableToken::ByteSpan; 46 47 // Initializes an "empty" incremental digest for the purpose of constructing 48 // an identifiability sample. 49 IdentifiableTokenBuilder(); 50 51 // Initializes an incremental digest and populates it with the data contained 52 // in |message|. 53 explicit IdentifiableTokenBuilder(ByteSpan message); 54 55 // Copies the intermediate state. 56 IdentifiableTokenBuilder(const IdentifiableTokenBuilder&); 57 58 // Feeds data contained in |buffer| to the digest. 59 IdentifiableTokenBuilder& AddBytes(ByteSpan buffer); 60 61 // Feeds data contained in |buffer| to the digest, but precedes the buffer 62 // contents with an integer indicating the length. Use this when: 63 // 64 // * |buffer| is atomic. I.e. it will always be added as a single buffer. 65 // 66 // * The boundary between |buffer| and adjacent objects cannot be uniquely 67 // established based on content. 68 // 69 // E.g.: Ignoring NUL terminators, the pair of strings "abcd", "efgh" will be 70 // assigned token as the strings "abcdefg", "h" if both are added 71 // individually via AddBytes(). But they will have distinct digests if 72 // added via AddAtomic(). 73 // 74 // If the contents of the object cannot be specified in a contiguous span of 75 // memory, then consider adding a length directly via AddValue() prior to 76 // adding the contents of the buffer. Doing so will achieve the same ends as 77 // AddAtomic(). 78 IdentifiableTokenBuilder& AddAtomic(ByteSpan buffer); AddAtomic(base::StringPiece string)79 IdentifiableTokenBuilder& AddAtomic(base::StringPiece string) { 80 return AddAtomic(base::as_bytes(base::make_span(string))); 81 } 82 83 // Feeds the underlying value of the |token| itself to the digest. Use this 84 // when |token| is computed in parallel in order to preserve the ordering of 85 // values that were seen in a concurrent sequence that cannot be 86 // deterministically interleaved into the primary stream. 87 IdentifiableTokenBuilder& AddToken(IdentifiableToken token); 88 89 // Helper for feeding primitive types by value efficiently. Anything more 90 // complicated than that should be passed in as a base::span<const uint8_t>. 91 // 92 // Adds eight bytes to the digest. If the type of the value doesn't consume 93 // all of the bytes, pads the remainder with NUL bytes. 94 template <typename T, 95 typename std::enable_if_t< 96 std::is_same<T, internal::remove_cvref_t<T>>::value && 97 internal::has_unique_object_representations<T>::value && 98 sizeof(T) <= sizeof(uint64_t)>* = nullptr> AddValue(T in)99 IdentifiableTokenBuilder& AddValue(T in) { 100 AlignPartialBuffer(); 101 int64_t clean_buffer = 102 base::ByteSwapToLE64(internal::DigestOfObjectRepresentation(in)); 103 return AddBytes(base::make_span( 104 reinterpret_cast<const uint8_t*>(&clean_buffer), sizeof(clean_buffer))); 105 } 106 107 // Conversion operator captures an intermediate digest. 108 // 109 // The sample captures all the data that's been fed into the digest so far, 110 // but doesn't finalize the digest. It is valid to continue adding data after 111 // constructing an intermediate sample. 112 // 113 // (google-explicit-constructor also flags user-defined conversion operators.) 114 // NOLINTNEXTLINE(google-explicit-constructor) 115 operator IdentifiableToken() const; 116 117 // Captures an intermediate digest. 118 // 119 // The sample captures all the data that's been fed into the digest so far, 120 // but doesn't finalize the digest. It is valid to continue adding data after 121 // constructing an intermediate sample. 122 IdentifiableToken GetToken() const; 123 124 // No comparisons. 125 bool operator==(const IdentifiableTokenBuilder&) const = delete; 126 bool operator<(const IdentifiableTokenBuilder&) const = delete; 127 128 private: 129 // Block size. Must be a multiple of 64. Higher block sizes consume more 130 // memory. The extra cost is unlikely to be worth it. 131 // 132 // Under the covers we use CityHash64. It can pretty efficiently digest 133 // 64-byte blocks. 134 static constexpr size_t kBlockSizeInBytes = 64; 135 136 // Target alignment for new buffers. This is set to 8 for all platforms and 137 // must always stay constant across platforms. 138 static constexpr size_t kBlockAlignment = 8; 139 140 // An array of exactly |kBlockSizeInBytes| bytes. 141 using BlockBuffer = std::array<uint8_t, kBlockSizeInBytes>; 142 143 // A view of a full block. 144 using ConstFullBlockSpan = base::span<const uint8_t, kBlockSizeInBytes>; 145 146 // Returns true if the partial buffer is aligned on |kBlockAlignment| 147 // boundary. 148 bool IsAligned() const; 149 150 // Appends enough NUL bytes to |partial_| until the next insertion point is 151 // aligned on a |kBlockAlignment| boundary. 152 // 153 // If the partial buffer is non-empty, its size is unlikely to be aligned at 154 // machine word boundary. This makes subsequent append operations slow for 155 // data types that are already aligned. 156 // 157 // This should only be called prior to adding an atomic buffer. 158 void AlignPartialBuffer(); 159 160 // Captures the |kBlockSizeInBytes| bytes of data in |block| into the digest. 161 // |block| must be exactly this many bytes. 162 void DigestBlock(ConstFullBlockSpan block); 163 164 // Captures as many bytes as possible from |message| into the partial block in 165 // |partial_|. It captures a maximum of |kBlockSizeInBytes - 1| bytes. 166 // 167 // Returns a span covering the remainder of |message| that was not consumed. 168 ByteSpan SkimIntoPartial(ByteSpan message); 169 170 // Returns a span for the contents of the partial block. 171 // 172 // Can be called at any point. Does not change the state of the partial 173 // buffer. 174 ByteSpan GetPartialBlock() const; 175 176 // Returns a span that includes the contents of the partial block and backed 177 // by |partial_|. 178 // 179 // NOTE: Should only be called once |kBlockSizeInBytes| bytes have been 180 // accumulated. Resets |partial_size_| upon completion. 181 // 182 // NOTE: Any subsequent AddBytes(), AddValue(), AddAtomic() calls will 183 // invalidate the returned FullBlock. 184 ConstFullBlockSpan TakeCompletedBlock(); 185 186 // Size of partially filled buffer. 187 size_t PartialSize() const; 188 189 // Accumulates smaller pieces of data until we have a full block. 190 alignas(int64_t) BlockBuffer partial_; 191 192 // Next available position in `partial_`. std::array iterators are never 193 // invalidated. 194 BlockBuffer::iterator position_ = partial_.begin(); 195 196 // Merkle-Damgård chaining. 197 uint64_t chaining_value_; 198 }; 199 200 } // namespace blink 201 202 #endif // THIRD_PARTY_BLINK_PUBLIC_COMMON_PRIVACY_BUDGET_IDENTIFIABLE_TOKEN_BUILDER_H_ 203