1 //  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #pragma once
7 
8 #include <cmath>
9 
10 #include "port/port.h"  // for PREFETCH
11 #include "util/fastrange.h"
12 #include "util/ribbon_alg.h"
13 
14 namespace ROCKSDB_NAMESPACE {
15 
16 namespace ribbon {
17 
18 // RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly)
19 //
20 // ribbon_impl.h: templated (parameterized) standard implementations
21 //
22 // Ribbon is a Perfect Hash Static Function construction useful as a compact
23 // static Bloom filter alternative. See ribbon_alg.h for core algorithms
24 // and core design details.
25 //
26 // TODO: more details on trade-offs and practical issues.
27 //
28 // APIs for configuring Ribbon are in ribbon_config.h
29 
30 // Ribbon implementations in this file take these parameters, which must be
31 // provided in a class/struct type with members expressed in this concept:
32 
33 // concept TypesAndSettings {
34 //   // See RibbonTypes and *Hasher in ribbon_alg.h, except here we have
35 //   // the added constraint that Hash be equivalent to either uint32_t or
36 //   // uint64_t.
37 //   typename Hash;
38 //   typename CoeffRow;
39 //   typename ResultRow;
40 //   typename Index;
41 //   typename Key;
42 //   static constexpr bool kFirstCoeffAlwaysOne;
43 //
44 //   // An unsigned integer type for identifying a hash seed, typically
45 //   // uint32_t or uint64_t. Importantly, this is the amount of data
46 //   // stored in memory for identifying a raw seed. See StandardHasher.
47 //   typename Seed;
48 //
49 //   // When true, the PHSF implements a static filter, expecting just
50 //   // keys as inputs for construction. When false, implements a general
51 //   // PHSF and expects std::pair<Key, ResultRow> as inputs for
52 //   // construction.
53 //   static constexpr bool kIsFilter;
54 //
55 //   // When true, enables a special "homogeneous" filter implementation that
56 //   // is slightly faster to construct, and never fails to construct though
57 //   // FP rate can quickly explode in cases where corresponding
58 //   // non-homogeneous filter would fail (or nearly fail?) to construct.
59 //   // For smaller filters, you can configure with ConstructionFailureChance
60 //   // smaller than desired FP rate to largely counteract this effect.
61 //   // TODO: configuring Homogeneous Ribbon for arbitrarily large filters
62 //   // based on data from OptimizeHomogAtScale
63 //   static constexpr bool kHomogeneous;
64 //
65 //   // When true, adds a tiny bit more hashing logic on queries and
66 //   // construction to improve utilization at the beginning and end of
67 //   // the structure.  Recommended when CoeffRow is only 64 bits (or
68 //   // less), so typical num_starts < 10k. Although this is compatible
69 //   // with kHomogeneous, the competing space vs. time priorities might
70 //   // not be useful.
71 //   static constexpr bool kUseSmash;
72 //
73 //   // When true, allows number of "starts" to be zero, for best support
74 //   // of the "no keys to add" case by always returning false for filter
75 //   // queries. (This is distinct from the "keys added but no space for
76 //   // any data" case, in which a filter always returns true.) The cost
77 //   // supporting this is a conditional branch (probably predictable) in
78 //   // queries.
79 //   static constexpr bool kAllowZeroStarts;
80 //
81 //   // A seedable stock hash function on Keys. All bits of Hash must
82 //   // be reasonably high quality. XXH functions recommended, but
83 //   // Murmur, City, Farm, etc. also work.
84 //   static Hash HashFn(const Key &, Seed raw_seed);
85 // };
86 
87 // A bit of a hack to automatically construct the type for
88 // AddInput based on a constexpr bool.
89 template <typename Key, typename ResultRow, bool IsFilter>
90 struct AddInputSelector {
91   // For general PHSF, not filter
92   using T = std::pair<Key, ResultRow>;
93 };
94 
95 template <typename Key, typename ResultRow>
96 struct AddInputSelector<Key, ResultRow, true /*IsFilter*/> {
97   // For Filter
98   using T = Key;
99 };
100 
101 // To avoid writing 'typename' everywhere that we use types like 'Index'
102 #define IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings)                   \
103   using CoeffRow = typename TypesAndSettings::CoeffRow;                      \
104   using ResultRow = typename TypesAndSettings::ResultRow;                    \
105   using Index = typename TypesAndSettings::Index;                            \
106   using Hash = typename TypesAndSettings::Hash;                              \
107   using Key = typename TypesAndSettings::Key;                                \
108   using Seed = typename TypesAndSettings::Seed;                              \
109                                                                              \
110   /* Some more additions */                                                  \
111   using QueryInput = Key;                                                    \
112   using AddInput = typename ROCKSDB_NAMESPACE::ribbon::AddInputSelector<     \
113       Key, ResultRow, TypesAndSettings::kIsFilter>::T;                       \
114   static constexpr auto kCoeffBits =                                         \
115       static_cast<Index>(sizeof(CoeffRow) * 8U);                             \
116                                                                              \
117   /* Export to algorithm */                                                  \
118   static constexpr bool kFirstCoeffAlwaysOne =                               \
119       TypesAndSettings::kFirstCoeffAlwaysOne;                                \
120                                                                              \
121   static_assert(sizeof(CoeffRow) + sizeof(ResultRow) + sizeof(Index) +       \
122                         sizeof(Hash) + sizeof(Key) + sizeof(Seed) +          \
123                         sizeof(QueryInput) + sizeof(AddInput) + kCoeffBits + \
124                         kFirstCoeffAlwaysOne >                               \
125                     0,                                                       \
126                 "avoid unused warnings, semicolon expected after macro call")
127 
128 #ifdef _MSC_VER
129 #pragma warning(push)
130 #pragma warning(disable : 4309)  // cast truncating constant
131 #pragma warning(disable : 4307)  // arithmetic constant overflow
132 #endif
133 
134 // StandardHasher: A standard implementation of concepts RibbonTypes,
135 // PhsfQueryHasher, FilterQueryHasher, and BandingHasher from ribbon_alg.h.
136 //
137 // This implementation should be suitable for most all practical purposes
138 // as it "behaves" across a wide range of settings, with little room left
139 // for improvement. The key functionality in this hasher is generating
140 // CoeffRows, starts, and (for filters) ResultRows, which could be ~150
141 // bits of data or more, from a modest hash of 64 or even just 32 bits, with
142 // enough uniformity and bitwise independence to be close to "the best you
143 // can do" with available hash information in terms of FP rate and
144 // compactness. (64 bits recommended and sufficient for PHSF practical
145 // purposes.)
146 //
147 // Another feature of this hasher is a minimal "premixing" of seeds before
148 // they are provided to TypesAndSettings::HashFn in case that function does
149 // not provide sufficiently independent hashes when iterating merely
150 // sequentially on seeds. (This for example works around a problem with the
151 // preview version 0.7.2 of XXH3 used in RocksDB, a.k.a. XXPH3 or Hash64, and
152 // MurmurHash1 used in RocksDB, a.k.a. Hash.) We say this pre-mixing step
153 // translates "ordinal seeds," which we iterate sequentially to find a
154 // solution, into "raw seeds," with many more bits changing for each
155 // iteration. The translation is an easily reversible lightweight mixing,
156 // not suitable for hashing on its own. An advantage of this approach is that
157 // StandardHasher can store just the raw seed (e.g. 64 bits) for fast query
158 // times, while from the application perspective, we can limit to a small
159 // number of ordinal keys (e.g. 64 in 6 bits) for saving in metadata.
160 //
161 // The default constructor initializes the seed to ordinal seed zero, which
162 // is equal to raw seed zero.
163 //
164 template <class TypesAndSettings>
165 class StandardHasher {
166  public:
167   IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
168 
169   inline Hash GetHash(const Key& key) const {
170     return TypesAndSettings::HashFn(key, raw_seed_);
171   };
172   // For when AddInput == pair<Key, ResultRow> (kIsFilter == false)
173   inline Hash GetHash(const std::pair<Key, ResultRow>& bi) const {
174     return GetHash(bi.first);
175   };
176   inline Index GetStart(Hash h, Index num_starts) const {
177     // This is "critical path" code because it's required before memory
178     // lookup.
179     //
180     // FastRange gives us a fast and effective mapping from h to the
181     // appropriate range. This depends most, sometimes exclusively, on
182     // upper bits of h.
183     //
184     if (TypesAndSettings::kUseSmash) {
185       // Extra logic to "smash" entries at beginning and end, for
186       // better utilization. For example, without smash and with
187       // kFirstCoeffAlwaysOne, there's about a 30% chance that the
188       // first slot in the banding will be unused, and worse without
189       // kFirstCoeffAlwaysOne. The ending slots are even less utilized
190       // without smash.
191       //
192       // But since this only affects roughly kCoeffBits of the slots,
193       // it's usually small enough to be ignorable (less computation in
194       // this function) when number of slots is roughly 10k or larger.
195       //
196       // The best values for these smash weights might depend on how
197       // densely you're packing entries, and also kCoeffBits, but this
198       // seems to work well for roughly 95% success probability.
199       //
200       constexpr Index kFrontSmash = kCoeffBits / 4;
201       constexpr Index kBackSmash = kCoeffBits / 4;
202       Index start = FastRangeGeneric(h, num_starts + kFrontSmash + kBackSmash);
203       start = std::max(start, kFrontSmash);
204       start -= kFrontSmash;
205       start = std::min(start, num_starts - 1);
206       return start;
207     } else {
208       // For query speed, we allow small number of initial and final
209       // entries to be under-utilized.
210       // NOTE: This call statically enforces that Hash is equivalent to
211       // either uint32_t or uint64_t.
212       return FastRangeGeneric(h, num_starts);
213     }
214   }
215   inline CoeffRow GetCoeffRow(Hash h) const {
216     // This is not so much "critical path" code because it can be done in
217     // parallel (instruction level) with memory lookup.
218     //
219     // When we might have many entries squeezed into a single start,
220     // we need reasonably good remixing for CoeffRow.
221     if (TypesAndSettings::kUseSmash) {
222       // Reasonably good, reasonably fast, reasonably general.
223       // Probably not 1:1 but probably close enough.
224       Unsigned128 a = Multiply64to128(h, kAltCoeffFactor1);
225       Unsigned128 b = Multiply64to128(h, kAltCoeffFactor2);
226       auto cr = static_cast<CoeffRow>(b ^ (a << 64) ^ (a >> 64));
227 
228       // Now ensure the value is non-zero
229       if (kFirstCoeffAlwaysOne) {
230         cr |= 1;
231       } else {
232         // Still have to ensure some bit is non-zero
233         cr |= (cr == 0) ? 1 : 0;
234       }
235       return cr;
236     }
237     // If not kUseSmash, we ensure we're not squeezing many entries into a
238     // single start, in part by ensuring num_starts > num_slots / 2. Thus,
239     // here we do not need good remixing for CoeffRow, but just enough that
240     // (a) every bit is reasonably independent from Start.
241     // (b) every Hash-length bit subsequence of the CoeffRow has full or
242     // nearly full entropy from h.
243     // (c) if nontrivial bit subsequences within are correlated, it needs to
244     // be more complicated than exact copy or bitwise not (at least without
245     // kFirstCoeffAlwaysOne), or else there seems to be a kind of
246     // correlated clustering effect.
247     // (d) the CoeffRow is not zero, so that no one input on its own can
248     // doom construction success. (Preferably a mix of 1's and 0's if
249     // satisfying above.)
250 
251     // First, establish sufficient bitwise independence from Start, with
252     // multiplication by a large random prime.
253     // Note that we cast to Hash because if we use product bits beyond
254     // original input size, that's going to correlate with Start (FastRange)
255     // even with a (likely) different multiplier here.
256     Hash a = h * kCoeffAndResultFactor;
257 
258     static_assert(
259         sizeof(Hash) == sizeof(uint64_t) || sizeof(Hash) == sizeof(uint32_t),
260         "Supported sizes");
261     // If that's big enough, we're done. If not, we have to expand it,
262     // maybe up to 4x size.
263     uint64_t b;
264     if (sizeof(Hash) < sizeof(uint64_t)) {
265       // Almost-trivial hash expansion (OK - see above), favoring roughly
266       // equal number of 1's and 0's in result
267       b = (uint64_t{a} << 32) ^ (a ^ kCoeffXor32);
268     } else {
269       b = a;
270     }
271     static_assert(sizeof(CoeffRow) <= sizeof(Unsigned128), "Supported sizes");
272     Unsigned128 c;
273     if (sizeof(uint64_t) < sizeof(CoeffRow)) {
274       // Almost-trivial hash expansion (OK - see above), favoring roughly
275       // equal number of 1's and 0's in result
276       c = (Unsigned128{b} << 64) ^ (b ^ kCoeffXor64);
277     } else {
278       c = b;
279     }
280     auto cr = static_cast<CoeffRow>(c);
281 
282     // Now ensure the value is non-zero
283     if (kFirstCoeffAlwaysOne) {
284       cr |= 1;
285     } else if (sizeof(CoeffRow) == sizeof(Hash)) {
286       // Still have to ensure some bit is non-zero
287       cr |= (cr == 0) ? 1 : 0;
288     } else {
289       // (We did trivial expansion with constant xor, which ensures some
290       // bits are non-zero.)
291     }
292     return cr;
293   }
294   inline ResultRow GetResultRowMask() const {
295     // TODO: will be used with InterleavedSolutionStorage?
296     // For now, all bits set (note: might be a small type so might need to
297     // narrow after promotion)
298     return static_cast<ResultRow>(~ResultRow{0});
299   }
300   inline ResultRow GetResultRowFromHash(Hash h) const {
301     if (TypesAndSettings::kIsFilter && !TypesAndSettings::kHomogeneous) {
302       // This is not so much "critical path" code because it can be done in
303       // parallel (instruction level) with memory lookup.
304       //
305       // ResultRow bits only needs to be independent from CoeffRow bits if
306       // many entries might have the same start location, where "many" is
307       // comparable to number of hash bits or kCoeffBits. If !kUseSmash
308       // and num_starts > kCoeffBits, it is safe and efficient to draw from
309       // the same bits computed for CoeffRow, which are reasonably
310       // independent from Start. (Inlining and common subexpression
311       // elimination with GetCoeffRow should make this
312       // a single shared multiplication in generated code when !kUseSmash.)
313       Hash a = h * kCoeffAndResultFactor;
314 
315       // The bits here that are *most* independent of Start are the highest
316       // order bits (as in Knuth multiplicative hash). To make those the
317       // most preferred for use in the result row, we do a bswap here.
318       auto rr = static_cast<ResultRow>(EndianSwapValue(a));
319       return rr & GetResultRowMask();
320     } else {
321       // Must be zero
322       return 0;
323     }
324   }
325   // For when AddInput == Key (kIsFilter == true)
326   inline ResultRow GetResultRowFromInput(const Key&) const {
327     // Must be zero
328     return 0;
329   }
330   // For when AddInput == pair<Key, ResultRow> (kIsFilter == false)
331   inline ResultRow GetResultRowFromInput(
332       const std::pair<Key, ResultRow>& bi) const {
333     // Simple extraction
334     return bi.second;
335   }
336 
337   // Seed tracking APIs - see class comment
338   void SetRawSeed(Seed seed) { raw_seed_ = seed; }
339   Seed GetRawSeed() { return raw_seed_; }
340   void SetOrdinalSeed(Seed count) {
341     // A simple, reversible mixing of any size (whole bytes) up to 64 bits.
342     // This allows casting the raw seed to any smaller size we use for
343     // ordinal seeds without risk of duplicate raw seeds for unique ordinal
344     // seeds.
345 
346     // Seed type might be smaller than numerical promotion size, but Hash
347     // should be at least that size, so we use Hash as intermediate type.
348     static_assert(sizeof(Seed) <= sizeof(Hash),
349                   "Hash must be at least size of Seed");
350 
351     // Multiply by a large random prime (one-to-one for any prefix of bits)
352     Hash tmp = count * kToRawSeedFactor;
353     // Within-byte one-to-one mixing
354     static_assert((kSeedMixMask & (kSeedMixMask >> kSeedMixShift)) == 0,
355                   "Illegal mask+shift");
356     tmp ^= (tmp & kSeedMixMask) >> kSeedMixShift;
357     raw_seed_ = static_cast<Seed>(tmp);
358     // dynamic verification
359     assert(GetOrdinalSeed() == count);
360   }
361   Seed GetOrdinalSeed() {
362     Hash tmp = raw_seed_;
363     // Within-byte one-to-one mixing (its own inverse)
364     tmp ^= (tmp & kSeedMixMask) >> kSeedMixShift;
365     // Multiply by 64-bit multiplicative inverse
366     static_assert(kToRawSeedFactor * kFromRawSeedFactor == Hash{1},
367                   "Must be inverses");
368     return static_cast<Seed>(tmp * kFromRawSeedFactor);
369   }
370 
371  protected:
372   // For expanding hash:
373   // large random prime
374   static constexpr Hash kCoeffAndResultFactor =
375       static_cast<Hash>(0xc28f82822b650bedULL);
376   static constexpr uint64_t kAltCoeffFactor1 = 0x876f170be4f1fcb9U;
377   static constexpr uint64_t kAltCoeffFactor2 = 0xf0433a4aecda4c5fU;
378   // random-ish data
379   static constexpr uint32_t kCoeffXor32 = 0xa6293635U;
380   static constexpr uint64_t kCoeffXor64 = 0xc367844a6e52731dU;
381 
382   // For pre-mixing seeds
383   static constexpr Hash kSeedMixMask = static_cast<Hash>(0xf0f0f0f0f0f0f0f0ULL);
384   static constexpr unsigned kSeedMixShift = 4U;
385   static constexpr Hash kToRawSeedFactor =
386       static_cast<Hash>(0xc78219a23eeadd03ULL);
387   static constexpr Hash kFromRawSeedFactor =
388       static_cast<Hash>(0xfe1a137d14b475abULL);
389 
390   // See class description
391   Seed raw_seed_ = 0;
392 };
393 
394 // StandardRehasher (and StandardRehasherAdapter): A variant of
395 // StandardHasher that uses the same type for keys as for hashes.
396 // This is primarily intended for building a Ribbon filter
397 // from existing hashes without going back to original inputs in
398 // order to apply a different seed. This hasher seeds a 1-to-1 mixing
399 // transformation to apply a seed to an existing hash. (Untested for
400 // hash-sized keys that are not already uniformly distributed.) This
401 // transformation builds on the seed pre-mixing done in StandardHasher.
402 //
403 // Testing suggests essentially no degradation of solution success rate
404 // vs. going back to original inputs when changing hash seeds. For example:
405 // Average re-seeds for solution with r=128, 1.02x overhead, and ~100k keys
406 // is about 1.10 for both StandardHasher and StandardRehasher.
407 //
408 // StandardRehasher is not really recommended for general PHSFs (not
409 // filters) because a collision in the original hash could prevent
410 // construction despite re-seeding the Rehasher. (Such collisions
411 // do not interfere with filter construction.)
412 //
413 // concept RehasherTypesAndSettings: like TypesAndSettings but
414 // does not require Key or HashFn.
415 template <class RehasherTypesAndSettings>
416 class StandardRehasherAdapter : public RehasherTypesAndSettings {
417  public:
418   using Hash = typename RehasherTypesAndSettings::Hash;
419   using Key = Hash;
420   using Seed = typename RehasherTypesAndSettings::Seed;
421 
422   static Hash HashFn(const Hash& input, Seed raw_seed) {
423     // Note: raw_seed is already lightly pre-mixed, and this multiplication
424     // by a large prime is sufficient mixing (low-to-high bits) on top of
425     // that for good FastRange results, which depends primarily on highest
426     // bits. (The hashed CoeffRow and ResultRow are less sensitive to
427     // mixing than Start.)
428     // Also note: did consider adding ^ (input >> some) before the
429     // multiplication, but doesn't appear to be necessary.
430     return (input ^ raw_seed) * kRehashFactor;
431   }
432 
433  private:
434   static constexpr Hash kRehashFactor =
435       static_cast<Hash>(0x6193d459236a3a0dULL);
436 };
437 
438 // See comment on StandardRehasherAdapter
439 template <class RehasherTypesAndSettings>
440 using StandardRehasher =
441     StandardHasher<StandardRehasherAdapter<RehasherTypesAndSettings>>;
442 
443 #ifdef _MSC_VER
444 #pragma warning(pop)
445 #endif
446 
447 // Especially with smaller hashes (e.g. 32 bit), there can be noticeable
448 // false positives due to collisions in the Hash returned by GetHash.
449 // This function returns the expected FP rate due to those collisions,
450 // which can be added to the expected FP rate from the underlying data
451 // structure. (Note: technically, a + b is only a good approximation of
452 // 1-(1-a)(1-b) == a + b - a*b, if a and b are much closer to 0 than to 1.)
453 // The number of entries added can be a double here in case it's an
454 // average.
455 template <class Hasher, typename Numerical>
456 double ExpectedCollisionFpRate(const Hasher& hasher, Numerical added) {
457   // Standardize on the 'double' specialization
458   return ExpectedCollisionFpRate(hasher, 1.0 * added);
459 }
460 template <class Hasher>
461 double ExpectedCollisionFpRate(const Hasher& /*hasher*/, double added) {
462   // Technically, there could be overlap among the added, but ignoring that
463   // is typically close enough.
464   return added / std::pow(256.0, sizeof(typename Hasher::Hash));
465 }
466 
467 // StandardBanding: a canonical implementation of BandingStorage and
468 // BacktrackStorage, with convenience API for banding (solving with on-the-fly
469 // Gaussian elimination) with and without backtracking.
470 template <class TypesAndSettings>
471 class StandardBanding : public StandardHasher<TypesAndSettings> {
472  public:
473   IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
474 
475   StandardBanding(Index num_slots = 0, Index backtrack_size = 0) {
476     Reset(num_slots, backtrack_size);
477   }
478 
479   void Reset(Index num_slots, Index backtrack_size = 0) {
480     if (num_slots == 0) {
481       // Unusual (TypesAndSettings::kAllowZeroStarts) or "uninitialized"
482       num_starts_ = 0;
483     } else {
484       // Normal
485       assert(num_slots >= kCoeffBits);
486       if (num_slots > num_slots_allocated_) {
487         coeff_rows_.reset(new CoeffRow[num_slots]());
488         if (!TypesAndSettings::kHomogeneous) {
489           // Note: don't strictly have to zero-init result_rows,
490           // except possible information leakage, etc ;)
491           result_rows_.reset(new ResultRow[num_slots]());
492         }
493         num_slots_allocated_ = num_slots;
494       } else {
495         for (Index i = 0; i < num_slots; ++i) {
496           coeff_rows_[i] = 0;
497           if (!TypesAndSettings::kHomogeneous) {
498             // Note: don't strictly have to zero-init result_rows,
499             // except possible information leakage, etc ;)
500             result_rows_[i] = 0;
501           }
502         }
503       }
504       num_starts_ = num_slots - kCoeffBits + 1;
505     }
506     EnsureBacktrackSize(backtrack_size);
507   }
508 
509   void EnsureBacktrackSize(Index backtrack_size) {
510     if (backtrack_size > backtrack_size_) {
511       backtrack_.reset(new Index[backtrack_size]);
512       backtrack_size_ = backtrack_size;
513     }
514   }
515 
516   // ********************************************************************
517   // From concept BandingStorage
518 
519   inline bool UsePrefetch() const {
520     // A rough guesstimate of when prefetching during construction pays off.
521     // TODO: verify/validate
522     return num_starts_ > 1500;
523   }
524   inline void Prefetch(Index i) const {
525     PREFETCH(&coeff_rows_[i], 1 /* rw */, 1 /* locality */);
526     if (!TypesAndSettings::kHomogeneous) {
527       PREFETCH(&result_rows_[i], 1 /* rw */, 1 /* locality */);
528     }
529   }
530   inline void LoadRow(Index i, CoeffRow* cr, ResultRow* rr,
531                       bool for_back_subst) const {
532     *cr = coeff_rows_[i];
533     if (TypesAndSettings::kHomogeneous) {
534       if (for_back_subst && *cr == 0) {
535         // Cheap pseudorandom data to fill unconstrained solution rows
536         *rr = static_cast<ResultRow>(i * 0x9E3779B185EBCA87ULL);
537       } else {
538         *rr = 0;
539       }
540     } else {
541       *rr = result_rows_[i];
542     }
543   }
544   inline void StoreRow(Index i, CoeffRow cr, ResultRow rr) {
545     coeff_rows_[i] = cr;
546     if (TypesAndSettings::kHomogeneous) {
547       assert(rr == 0);
548     } else {
549       result_rows_[i] = rr;
550     }
551   }
552   inline Index GetNumStarts() const { return num_starts_; }
553 
554   // from concept BacktrackStorage, for when backtracking is used
555   inline bool UseBacktrack() const { return true; }
556   inline void BacktrackPut(Index i, Index to_save) { backtrack_[i] = to_save; }
557   inline Index BacktrackGet(Index i) const { return backtrack_[i]; }
558 
559   // ********************************************************************
560   // Some useful API, still somewhat low level. Here an input is
561   // a Key for filters, or std::pair<Key, ResultRow> for general PHSF.
562 
563   // Adds a range of inputs to the banding, returning true if successful.
564   // False means none or some may have been successfully added, so it's
565   // best to Reset this banding before any further use.
566   //
567   // Adding can fail even before all the "slots" are completely "full".
568   //
569   template <typename InputIterator>
570   bool AddRange(InputIterator begin, InputIterator end) {
571     assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts);
572     if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
573       // Unusual. Can't add any in this case.
574       return begin == end;
575     }
576     // Normal
577     return BandingAddRange(this, *this, begin, end);
578   }
579 
580   // Adds a range of inputs to the banding, returning true if successful,
581   // or if unsuccessful, rolls back to state before this call and returns
582   // false. Caller guarantees that the number of inputs in this batch
583   // does not exceed `backtrack_size` provided to Reset.
584   //
585   // Adding can fail even before all the "slots" are completely "full".
586   //
587   template <typename InputIterator>
588   bool AddRangeOrRollBack(InputIterator begin, InputIterator end) {
589     assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts);
590     if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
591       // Unusual. Can't add any in this case.
592       return begin == end;
593     }
594     // else Normal
595     return BandingAddRange(this, this, *this, begin, end);
596   }
597 
598   // Adds a single input to the banding, returning true if successful.
599   // If unsuccessful, returns false and banding state is unchanged.
600   //
601   // Adding can fail even before all the "slots" are completely "full".
602   //
603   bool Add(const AddInput& input) {
604     // Pointer can act as iterator
605     return AddRange(&input, &input + 1);
606   }
607 
608   // Return the number of "occupied" rows (with non-zero coefficients stored).
609   Index GetOccupiedCount() const {
610     Index count = 0;
611     if (num_starts_ > 0) {
612       const Index num_slots = num_starts_ + kCoeffBits - 1;
613       for (Index i = 0; i < num_slots; ++i) {
614         if (coeff_rows_[i] != 0) {
615           ++count;
616         }
617       }
618     }
619     return count;
620   }
621 
622   // Returns whether a row is "occupied" in the banding (non-zero
623   // coefficients stored). (Only recommended for debug/test)
624   bool IsOccupied(Index i) { return coeff_rows_[i] != 0; }
625 
626   // ********************************************************************
627   // High-level API
628 
629   // Iteratively (a) resets the structure for `num_slots`, (b) attempts
630   // to add the range of inputs, and (c) if unsuccessful, chooses next
631   // hash seed, until either successful or unsuccessful with all the
632   // allowed seeds. Returns true if successful. In that case, use
633   // GetOrdinalSeed() or GetRawSeed() to get the successful seed.
634   //
635   // The allowed sequence of hash seeds is determined by
636   // `starting_ordinal_seed,` the first ordinal seed to be attempted
637   // (see StandardHasher), and `ordinal_seed_mask,` a bit mask (power of
638   // two minus one) for the range of ordinal seeds to consider. The
639   // max number of seeds considered will be ordinal_seed_mask + 1.
640   // For filters we suggest `starting_ordinal_seed` be chosen randomly
641   // or round-robin, to minimize false positive correlations between keys.
642   //
643   // If unsuccessful, how best to continue is going to be application
644   // specific. It should be possible to choose parameters such that
645   // failure is extremely unlikely, using max_seed around 32 to 64.
646   // (TODO: APIs to help choose parameters) One option for fallback in
647   // constructing a filter is to construct a Bloom filter instead.
648   // Increasing num_slots is an option, but should not be used often
649   // unless construction maximum latency is a concern (rather than
650   // average running time of construction). Instead, choose parameters
651   // appropriately and trust that seeds are independent. (Also,
652   // increasing num_slots without changing hash seed would have a
653   // significant correlation in success, rather than independence.)
654   template <typename InputIterator>
655   bool ResetAndFindSeedToSolve(Index num_slots, InputIterator begin,
656                                InputIterator end,
657                                Seed starting_ordinal_seed = 0U,
658                                Seed ordinal_seed_mask = 63U) {
659     // power of 2 minus 1
660     assert((ordinal_seed_mask & (ordinal_seed_mask + 1)) == 0);
661     // starting seed is within mask
662     assert((starting_ordinal_seed & ordinal_seed_mask) ==
663            starting_ordinal_seed);
664     starting_ordinal_seed &= ordinal_seed_mask;  // if not debug
665 
666     Seed cur_ordinal_seed = starting_ordinal_seed;
667     do {
668       StandardHasher<TypesAndSettings>::SetOrdinalSeed(cur_ordinal_seed);
669       Reset(num_slots);
670       bool success = AddRange(begin, end);
671       if (success) {
672         return true;
673       }
674       cur_ordinal_seed = (cur_ordinal_seed + 1) & ordinal_seed_mask;
675     } while (cur_ordinal_seed != starting_ordinal_seed);
676     // Reached limit by circling around
677     return false;
678   }
679 
680  protected:
681   // TODO: explore combining in a struct
682   std::unique_ptr<CoeffRow[]> coeff_rows_;
683   std::unique_ptr<ResultRow[]> result_rows_;
684   // We generally store "starts" instead of slots for speed of GetStart(),
685   // as in StandardHasher.
686   Index num_starts_ = 0;
687   Index num_slots_allocated_ = 0;
688   std::unique_ptr<Index[]> backtrack_;
689   Index backtrack_size_ = 0;
690 };
691 
692 // Implements concept SimpleSolutionStorage, mostly for demonstration
693 // purposes. This is "in memory" only because it does not handle byte
694 // ordering issues for serialization.
695 template <class TypesAndSettings>
696 class InMemSimpleSolution {
697  public:
698   IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
699 
700   void PrepareForNumStarts(Index num_starts) {
701     if (TypesAndSettings::kAllowZeroStarts && num_starts == 0) {
702       // Unusual
703       num_starts_ = 0;
704     } else {
705       // Normal
706       const Index num_slots = num_starts + kCoeffBits - 1;
707       assert(num_slots >= kCoeffBits);
708       if (num_slots > num_slots_allocated_) {
709         // Do not need to init the memory
710         solution_rows_.reset(new ResultRow[num_slots]);
711         num_slots_allocated_ = num_slots;
712       }
713       num_starts_ = num_starts;
714     }
715   }
716 
717   Index GetNumStarts() const { return num_starts_; }
718 
719   ResultRow Load(Index slot_num) const { return solution_rows_[slot_num]; }
720 
721   void Store(Index slot_num, ResultRow solution_row) {
722     solution_rows_[slot_num] = solution_row;
723   }
724 
725   // ********************************************************************
726   // High-level API
727 
728   template <typename BandingStorage>
729   void BackSubstFrom(const BandingStorage& bs) {
730     if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) {
731       // Unusual
732       PrepareForNumStarts(0);
733     } else {
734       // Normal
735       SimpleBackSubst(this, bs);
736     }
737   }
738 
739   template <typename PhsfQueryHasher>
740   ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const {
741     // assert(!TypesAndSettings::kIsFilter);  Can be useful in testing
742     if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
743       // Unusual
744       return 0;
745     } else {
746       // Normal
747       return SimplePhsfQuery(input, hasher, *this);
748     }
749   }
750 
751   template <typename FilterQueryHasher>
752   bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const {
753     assert(TypesAndSettings::kIsFilter);
754     if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
755       // Unusual. Zero starts presumes no keys added -> always false
756       return false;
757     } else {
758       // Normal, or upper_num_columns_ == 0 means "no space for data" and
759       // thus will always return true.
760       return SimpleFilterQuery(input, hasher, *this);
761     }
762   }
763 
764   double ExpectedFpRate() const {
765     assert(TypesAndSettings::kIsFilter);
766     if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
767       // Unusual, but we don't have FPs if we always return false.
768       return 0.0;
769     }
770     // else Normal
771 
772     // Each result (solution) bit (column) cuts FP rate in half
773     return std::pow(0.5, 8U * sizeof(ResultRow));
774   }
775 
776   // ********************************************************************
777   // Static high-level API
778 
779   // Round up to a number of slots supported by this structure. Note that
780   // this needs to be must be taken into account for the banding if this
781   // solution layout/storage is to be used.
782   static Index RoundUpNumSlots(Index num_slots) {
783     // Must be at least kCoeffBits for at least one start
784     // Or if not smash, even more because hashing not equipped
785     // for stacking up so many entries on a single start location
786     auto min_slots = kCoeffBits * (TypesAndSettings::kUseSmash ? 1 : 2);
787     return std::max(num_slots, static_cast<Index>(min_slots));
788   }
789 
790  protected:
791   // We generally store "starts" instead of slots for speed of GetStart(),
792   // as in StandardHasher.
793   Index num_starts_ = 0;
794   Index num_slots_allocated_ = 0;
795   std::unique_ptr<ResultRow[]> solution_rows_;
796 };
797 
798 // Implements concept InterleavedSolutionStorage always using little-endian
799 // byte order, so easy for serialization/deserialization. This implementation
800 // fully supports fractional bits per key, where any number of segments
801 // (number of bytes multiple of sizeof(CoeffRow)) can be used with any number
802 // of slots that is a multiple of kCoeffBits.
803 //
804 // The structure is passed an externally allocated/de-allocated byte buffer
805 // that is optionally pre-populated (from storage) for answering queries,
806 // or can be populated by BackSubstFrom.
807 //
808 template <class TypesAndSettings>
809 class SerializableInterleavedSolution {
810  public:
811   IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
812 
813   // Does not take ownership of `data` but uses it (up to `data_len` bytes)
814   // throughout lifetime
815   SerializableInterleavedSolution(char* data, size_t data_len)
816       : data_(data), data_len_(data_len) {}
817 
818   void PrepareForNumStarts(Index num_starts) {
819     assert(num_starts == 0 || (num_starts % kCoeffBits == 1));
820     num_starts_ = num_starts;
821 
822     InternalConfigure();
823   }
824 
825   Index GetNumStarts() const { return num_starts_; }
826 
827   Index GetNumBlocks() const {
828     const Index num_slots = num_starts_ + kCoeffBits - 1;
829     return num_slots / kCoeffBits;
830   }
831 
832   Index GetUpperNumColumns() const { return upper_num_columns_; }
833 
834   Index GetUpperStartBlock() const { return upper_start_block_; }
835 
836   Index GetNumSegments() const {
837     return static_cast<Index>(data_len_ / sizeof(CoeffRow));
838   }
839 
840   CoeffRow LoadSegment(Index segment_num) const {
841     assert(data_ != nullptr);  // suppress clang analyzer report
842     return DecodeFixedGeneric<CoeffRow>(data_ + segment_num * sizeof(CoeffRow));
843   }
844   void StoreSegment(Index segment_num, CoeffRow val) {
845     assert(data_ != nullptr);  // suppress clang analyzer report
846     EncodeFixedGeneric(data_ + segment_num * sizeof(CoeffRow), val);
847   }
848   void PrefetchSegmentRange(Index begin_segment_num,
849                             Index end_segment_num) const {
850     if (end_segment_num == begin_segment_num) {
851       // Nothing to do
852       return;
853     }
854     char* cur = data_ + begin_segment_num * sizeof(CoeffRow);
855     char* last = data_ + (end_segment_num - 1) * sizeof(CoeffRow);
856     while (cur < last) {
857       PREFETCH(cur, 0 /* rw */, 1 /* locality */);
858       cur += CACHE_LINE_SIZE;
859     }
860     PREFETCH(last, 0 /* rw */, 1 /* locality */);
861   }
862 
863   // ********************************************************************
864   // High-level API
865 
866   void ConfigureForNumBlocks(Index num_blocks) {
867     if (num_blocks == 0) {
868       PrepareForNumStarts(0);
869     } else {
870       PrepareForNumStarts(num_blocks * kCoeffBits - kCoeffBits + 1);
871     }
872   }
873 
874   void ConfigureForNumSlots(Index num_slots) {
875     assert(num_slots % kCoeffBits == 0);
876     ConfigureForNumBlocks(num_slots / kCoeffBits);
877   }
878 
879   template <typename BandingStorage>
880   void BackSubstFrom(const BandingStorage& bs) {
881     if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) {
882       // Unusual
883       PrepareForNumStarts(0);
884     } else {
885       // Normal
886       InterleavedBackSubst(this, bs);
887     }
888   }
889 
890   template <typename PhsfQueryHasher>
891   ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const {
892     // assert(!TypesAndSettings::kIsFilter);  Can be useful in testing
893     if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
894       // Unusual
895       return 0;
896     } else {
897       // Normal
898       // NOTE: not using a struct to encourage compiler optimization
899       Hash hash;
900       Index segment_num;
901       Index num_columns;
902       Index start_bit;
903       InterleavedPrepareQuery(input, hasher, *this, &hash, &segment_num,
904                               &num_columns, &start_bit);
905       return InterleavedPhsfQuery(hash, segment_num, num_columns, start_bit,
906                                   hasher, *this);
907     }
908   }
909 
910   template <typename FilterQueryHasher>
911   bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const {
912     assert(TypesAndSettings::kIsFilter);
913     if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
914       // Unusual. Zero starts presumes no keys added -> always false
915       return false;
916     } else {
917       // Normal, or upper_num_columns_ == 0 means "no space for data" and
918       // thus will always return true.
919       // NOTE: not using a struct to encourage compiler optimization
920       Hash hash;
921       Index segment_num;
922       Index num_columns;
923       Index start_bit;
924       InterleavedPrepareQuery(input, hasher, *this, &hash, &segment_num,
925                               &num_columns, &start_bit);
926       return InterleavedFilterQuery(hash, segment_num, num_columns, start_bit,
927                                     hasher, *this);
928     }
929   }
930 
931   double ExpectedFpRate() const {
932     assert(TypesAndSettings::kIsFilter);
933     if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
934       // Unusual. Zero starts presumes no keys added -> always false
935       return 0.0;
936     }
937     // else Normal
938 
939     // Note: Ignoring smash setting; still close enough in that case
940     double lower_portion =
941         (upper_start_block_ * 1.0 * kCoeffBits) / num_starts_;
942 
943     // Each result (solution) bit (column) cuts FP rate in half. Weight that
944     // for upper and lower number of bits (columns).
945     return lower_portion * std::pow(0.5, upper_num_columns_ - 1) +
946            (1.0 - lower_portion) * std::pow(0.5, upper_num_columns_);
947   }
948 
949   // ********************************************************************
950   // Static high-level API
951 
952   // Round up to a number of slots supported by this structure. Note that
953   // this needs to be must be taken into account for the banding if this
954   // solution layout/storage is to be used.
955   static Index RoundUpNumSlots(Index num_slots) {
956     // Must be multiple of kCoeffBits
957     Index corrected = (num_slots + kCoeffBits - 1) / kCoeffBits * kCoeffBits;
958 
959     // Do not use num_starts==1 unless kUseSmash, because the hashing
960     // might not be equipped for stacking up so many entries on a
961     // single start location.
962     if (!TypesAndSettings::kUseSmash && corrected == kCoeffBits) {
963       corrected += kCoeffBits;
964     }
965     return corrected;
966   }
967 
968   // Round down to a number of slots supported by this structure. Note that
969   // this needs to be must be taken into account for the banding if this
970   // solution layout/storage is to be used.
971   static Index RoundDownNumSlots(Index num_slots) {
972     // Must be multiple of kCoeffBits
973     Index corrected = num_slots / kCoeffBits * kCoeffBits;
974 
975     // Do not use num_starts==1 unless kUseSmash, because the hashing
976     // might not be equipped for stacking up so many entries on a
977     // single start location.
978     if (!TypesAndSettings::kUseSmash && corrected == kCoeffBits) {
979       corrected = 0;
980     }
981     return corrected;
982   }
983 
984   // Compute the number of bytes for a given number of slots and desired
985   // FP rate. Since desired FP rate might not be exactly achievable,
986   // rounding_bias32==0 means to always round toward lower FP rate
987   // than desired (more bytes); rounding_bias32==max uint32_t means always
988   // round toward higher FP rate than desired (fewer bytes); other values
989   // act as a proportional threshold or bias between the two.
990   static size_t GetBytesForFpRate(Index num_slots, double desired_fp_rate,
991                                   uint32_t rounding_bias32) {
992     return InternalGetBytesForFpRate(num_slots, desired_fp_rate,
993                                      1.0 / desired_fp_rate, rounding_bias32);
994   }
995 
996   // The same, but specifying desired accuracy as 1.0 / FP rate, or
997   // one_in_fp_rate. E.g. desired_one_in_fp_rate=100 means 1% FP rate.
998   static size_t GetBytesForOneInFpRate(Index num_slots,
999                                        double desired_one_in_fp_rate,
1000                                        uint32_t rounding_bias32) {
1001     return InternalGetBytesForFpRate(num_slots, 1.0 / desired_one_in_fp_rate,
1002                                      desired_one_in_fp_rate, rounding_bias32);
1003   }
1004 
1005  protected:
1006   static size_t InternalGetBytesForFpRate(Index num_slots,
1007                                           double desired_fp_rate,
1008                                           double desired_one_in_fp_rate,
1009                                           uint32_t rounding_bias32) {
1010     assert(TypesAndSettings::kIsFilter);
1011     if (TypesAndSettings::kAllowZeroStarts) {
1012       if (num_slots == 0) {
1013         // Unusual. Zero starts presumes no keys added -> always false (no FPs)
1014         return 0U;
1015       }
1016     } else {
1017       assert(num_slots > 0);
1018     }
1019     // Must be rounded up already.
1020     assert(RoundUpNumSlots(num_slots) == num_slots);
1021 
1022     if (desired_one_in_fp_rate > 1.0 && desired_fp_rate < 1.0) {
1023       // Typical: less than 100% FP rate
1024       if (desired_one_in_fp_rate <= static_cast<ResultRow>(-1)) {
1025         // Typical: Less than maximum result row entropy
1026         ResultRow rounded = static_cast<ResultRow>(desired_one_in_fp_rate);
1027         int lower_columns = FloorLog2(rounded);
1028         double lower_columns_fp_rate = std::pow(2.0, -lower_columns);
1029         double upper_columns_fp_rate = std::pow(2.0, -(lower_columns + 1));
1030         // Floating point don't let me down!
1031         assert(lower_columns_fp_rate >= desired_fp_rate);
1032         assert(upper_columns_fp_rate <= desired_fp_rate);
1033 
1034         double lower_portion = (desired_fp_rate - upper_columns_fp_rate) /
1035                                (lower_columns_fp_rate - upper_columns_fp_rate);
1036         // Floating point don't let me down!
1037         assert(lower_portion >= 0.0);
1038         assert(lower_portion <= 1.0);
1039 
1040         double rounding_bias = (rounding_bias32 + 0.5) / double{0x100000000};
1041         assert(rounding_bias > 0.0);
1042         assert(rounding_bias < 1.0);
1043 
1044         // Note: Ignoring smash setting; still close enough in that case
1045         Index num_starts = num_slots - kCoeffBits + 1;
1046         // Lower upper_start_block means lower FP rate (higher accuracy)
1047         Index upper_start_block = static_cast<Index>(
1048             (lower_portion * num_starts + rounding_bias) / kCoeffBits);
1049         Index num_blocks = num_slots / kCoeffBits;
1050         assert(upper_start_block < num_blocks);
1051 
1052         // Start by assuming all blocks use lower number of columns
1053         Index num_segments = num_blocks * static_cast<Index>(lower_columns);
1054         // Correct by 1 each for blocks using upper number of columns
1055         num_segments += (num_blocks - upper_start_block);
1056         // Total bytes
1057         return num_segments * sizeof(CoeffRow);
1058       } else {
1059         // one_in_fp_rate too big, thus requested FP rate is smaller than
1060         // supported. Use max number of columns for minimum supported FP rate.
1061         return num_slots * sizeof(ResultRow);
1062       }
1063     } else {
1064       // Effectively asking for 100% FP rate, or NaN etc.
1065       if (TypesAndSettings::kAllowZeroStarts) {
1066         // Zero segments
1067         return 0U;
1068       } else {
1069         // One segment (minimum size, maximizing FP rate)
1070         return sizeof(CoeffRow);
1071       }
1072     }
1073   }
1074 
1075   void InternalConfigure() {
1076     const Index num_blocks = GetNumBlocks();
1077     Index num_segments = GetNumSegments();
1078 
1079     if (num_blocks == 0) {
1080       // Exceptional
1081       upper_num_columns_ = 0;
1082       upper_start_block_ = 0;
1083     } else {
1084       // Normal
1085       upper_num_columns_ =
1086           (num_segments + /*round up*/ num_blocks - 1) / num_blocks;
1087       upper_start_block_ = upper_num_columns_ * num_blocks - num_segments;
1088       // Unless that's more columns than supported by ResultRow data type
1089       if (upper_num_columns_ > 8U * sizeof(ResultRow)) {
1090         // Use maximum columns (there will be space unused)
1091         upper_num_columns_ = static_cast<Index>(8U * sizeof(ResultRow));
1092         upper_start_block_ = 0;
1093         num_segments = num_blocks * upper_num_columns_;
1094       }
1095     }
1096     // Update data_len_ for correct rounding and/or unused space
1097     // NOTE: unused space stays gone if we PrepareForNumStarts again.
1098     // We are prioritizing minimizing the number of fields over making
1099     // the "unusued space" feature work well.
1100     data_len_ = num_segments * sizeof(CoeffRow);
1101   }
1102 
1103   char* const data_;
1104   size_t data_len_;
1105   Index num_starts_ = 0;
1106   Index upper_num_columns_ = 0;
1107   Index upper_start_block_ = 0;
1108 };
1109 
1110 }  // namespace ribbon
1111 
1112 }  // namespace ROCKSDB_NAMESPACE
1113 
1114 // For convenience working with templates
1115 #define IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings)                            \
1116   using Hasher = ROCKSDB_NAMESPACE::ribbon::StandardHasher<TypesAndSettings>; \
1117   using Banding =                                                             \
1118       ROCKSDB_NAMESPACE::ribbon::StandardBanding<TypesAndSettings>;           \
1119   using SimpleSoln =                                                          \
1120       ROCKSDB_NAMESPACE::ribbon::InMemSimpleSolution<TypesAndSettings>;       \
1121   using InterleavedSoln =                                                     \
1122       ROCKSDB_NAMESPACE::ribbon::SerializableInterleavedSolution<             \
1123           TypesAndSettings>;                                                  \
1124   static_assert(sizeof(Hasher) + sizeof(Banding) + sizeof(SimpleSoln) +       \
1125                         sizeof(InterleavedSoln) >                             \
1126                     0,                                                        \
1127                 "avoid unused warnings, semicolon expected after macro call")
1128