1 // Copyright 2018 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS-IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 
16 // Author: ericv@google.com (Eric Veach)
17 
18 #include "s2/encoded_s2point_vector.h"
19 
20 #include "s2/third_party/absl/base/internal/unaligned_access.h"
21 #include "s2/util/bits/bits.h"
22 #include "s2/s2cell_id.h"
23 #include "s2/s2coords.h"
24 
25 using absl::MakeSpan;
26 using absl::Span;
27 using std::max;
28 using std::min;
29 using std::vector;
30 
31 namespace s2coding {
32 
33 // Like util_bits::InterleaveUint32, but interleaves bit pairs rather than
34 // individual bits.  This format is faster to decode than the fully interleaved
35 // format, and produces the same results for our use case.
InterleaveUint32BitPairs(const uint32 val0,const uint32 val1)36 inline uint64 InterleaveUint32BitPairs(const uint32 val0, const uint32 val1) {
37   uint64 v0 = val0, v1 = val1;
38   v0 = (v0 | (v0 << 16)) & 0x0000ffff0000ffff;
39   v1 = (v1 | (v1 << 16)) & 0x0000ffff0000ffff;
40   v0 = (v0 | (v0 << 8)) & 0x00ff00ff00ff00ff;
41   v1 = (v1 | (v1 << 8)) & 0x00ff00ff00ff00ff;
42   v0 = (v0 | (v0 << 4)) & 0x0f0f0f0f0f0f0f0f;
43   v1 = (v1 | (v1 << 4)) & 0x0f0f0f0f0f0f0f0f;
44   v0 = (v0 | (v0 << 2)) & 0x3333333333333333;
45   v1 = (v1 | (v1 << 2)) & 0x3333333333333333;
46   return v0 | (v1 << 2);
47 }
48 
49 // This code is about 50% faster than util_bits::DeinterleaveUint32, which
50 // uses a lookup table.  The speed advantage is expected to be even larger in
51 // code that mixes bit interleaving with other significant operations since it
52 // doesn't require keeping a 256-byte lookup table in the L1 data cache.
DeinterleaveUint32BitPairs(uint64 code,uint32 * val0,uint32 * val1)53 inline void DeinterleaveUint32BitPairs(uint64 code,
54                                        uint32 *val0, uint32 *val1) {
55   uint64 v0 = code, v1 = code >> 2;
56   v0 &= 0x3333333333333333;
57   v0 |= v0 >> 2;
58   v1 &= 0x3333333333333333;
59   v1 |= v1 >> 2;
60   v0 &= 0x0f0f0f0f0f0f0f0f;
61   v0 |= v0 >> 4;
62   v1 &= 0x0f0f0f0f0f0f0f0f;
63   v1 |= v1 >> 4;
64   v0 &= 0x00ff00ff00ff00ff;
65   v0 |= v0 >> 8;
66   v1 &= 0x00ff00ff00ff00ff;
67   v1 |= v1 >> 8;
68   v0 &= 0x0000ffff0000ffff;
69   v0 |= v0 >> 16;
70   v1 &= 0x0000ffff0000ffff;
71   v1 |= v1 >> 16;
72   *val0 = v0;
73   *val1 = v1;
74 }
75 
76 // Forward declarations.
77 void EncodeS2PointVectorFast(Span<const S2Point> points, Encoder* encoder);
78 void EncodeS2PointVectorCompact(Span<const S2Point> points, Encoder* encoder);
79 
80 // To save space (especially for vectors of length 0, 1, and 2), the encoding
81 // format is encoded in the low-order 3 bits of the vector size.  Up to 7
82 // encoding formats are supported (only 2 are currently defined).  Additional
83 // formats could be supported by using "7" as an overflow indicator and
84 // encoding the actual format separately, but it seems unlikely we will ever
85 // need to do that.
86 static const int kEncodingFormatBits = 3;
87 static const uint8 kEncodingFormatMask = (1 << kEncodingFormatBits) - 1;
88 
EncodeS2PointVector(Span<const S2Point> points,CodingHint hint,Encoder * encoder)89 void EncodeS2PointVector(Span<const S2Point> points, CodingHint hint,
90                          Encoder* encoder) {
91   switch (hint) {
92     case CodingHint::FAST:
93       return EncodeS2PointVectorFast(points, encoder);
94 
95     case CodingHint::COMPACT:
96       return EncodeS2PointVectorCompact(points, encoder);
97 
98     default:
99       S2_LOG(DFATAL) << "Unknown CodingHint: " << static_cast<int>(hint);
100   }
101 }
102 
Init(Decoder * decoder)103 bool EncodedS2PointVector::Init(Decoder* decoder) {
104   if (decoder->avail() < 1) return false;
105 
106   // Peek at the format but don't advance the decoder; the format-specific
107   // Init functions will do that.
108   format_ = static_cast<Format>(*decoder->ptr() & kEncodingFormatMask);
109   switch (format_) {
110     case UNCOMPRESSED:
111       return InitUncompressedFormat(decoder);
112 
113     case CELL_IDS:
114       return InitCellIdsFormat(decoder);
115 
116     default:
117       return false;
118   }
119 }
120 
Decode() const121 vector<S2Point> EncodedS2PointVector::Decode() const {
122   vector<S2Point> points;
123   points.reserve(size_);
124   for (int i = 0; i < size_; ++i) {
125     points.push_back((*this)[i]);
126   }
127   return points;
128 }
129 
130 
131 //////////////////////////////////////////////////////////////////////////////
132 //                     UNCOMPRESSED Encoding Format
133 //////////////////////////////////////////////////////////////////////////////
134 
135 // Encodes a vector of points, optimizing for (encoding and decoding) speed.
EncodeS2PointVectorFast(Span<const S2Point> points,Encoder * encoder)136 void EncodeS2PointVectorFast(Span<const S2Point> points, Encoder* encoder) {
137 #ifndef IS_LITTLE_ENDIAN
138   S2_LOG(FATAL) << "Not implemented on big-endian architectures";
139 #endif
140 
141   // This function always uses the UNCOMPRESSED encoding.  The header consists
142   // of a varint64 in the following format:
143   //
144   //   bits 0-2:  encoding format (UNCOMPRESSED)
145   //   bits 3-63: vector size
146   //
147   // This is followed by an array of S2Points in little-endian order.
148   encoder->Ensure(Varint::kMax64 + points.size() * sizeof(S2Point));
149   uint64 size_format = (points.size() << kEncodingFormatBits |
150                         EncodedS2PointVector::UNCOMPRESSED);
151   encoder->put_varint64(size_format);
152   encoder->putn(points.data(), points.size() * sizeof(S2Point));
153 }
154 
InitUncompressedFormat(Decoder * decoder)155 bool EncodedS2PointVector::InitUncompressedFormat(Decoder* decoder) {
156 #if !defined(IS_LITTLE_ENDIAN) || defined(__arm__) || \
157   defined(ABSL_INTERNAL_NEED_ALIGNED_LOADS)
158   // TODO(ericv): Make this work on platforms that don't support unaligned
159   // 64-bit little-endian reads, e.g. by falling back to
160   //
161   //   bit_cast<double>(little_endian::Load64()).
162   //
163   // Maybe the compiler is smart enough that we can do this all the time,
164   // but more likely we will need two cases using the #ifdef above.
165   // (Note that even ARMv7 does not support unaligned 64-bit loads.)
166   S2_LOG(DFATAL) << "Needs architecture with 64-bit little-endian unaligned loads";
167   return false;
168 #endif
169 
170   uint64 size;
171   if (!decoder->get_varint64(&size)) return false;
172   size >>= kEncodingFormatBits;
173 
174   // Note that the encoding format supports up to 2**59 vertices, but we
175   // currently only support decoding up to 2**32 vertices.
176   if (size > std::numeric_limits<uint32>::max()) return false;
177   size_ = size;
178 
179   size_t bytes = size_t{size_} * sizeof(S2Point);
180   if (decoder->avail() < bytes) return false;
181 
182   uncompressed_.points = reinterpret_cast<const S2Point*>(decoder->ptr());
183   decoder->skip(bytes);
184   return true;
185 }
186 
187 
188 //////////////////////////////////////////////////////////////////////////////
189 //                     CELL_IDS Encoding Format
190 //////////////////////////////////////////////////////////////////////////////
191 
192 // Represents a point that can be encoded as an S2CellId center.
193 // (If such an encoding is not possible then level < 0.)
194 struct CellPoint {
195   // Constructor necessary in order to narrow "int" arguments to "int8".
CellPoints2coding::CellPoint196   CellPoint(int level, int face, uint32 si, uint32 ti)
197     : level(level), face(face), si(si), ti(ti) {}
198 
199   int8 level, face;
200   uint32 si, ti;
201 };
202 
203 // S2CellIds are represented in a special 64-bit format and are encoded in
204 // fixed-size blocks.  kBlockSize represents the number of values per block.
205 // Block sizes of 4, 8, 16, and 32 were tested and kBlockSize == 16 seems to
206 // offer the best compression.  (Note that kBlockSize == 32 requires some code
207 // modifications which have since been removed.)
208 constexpr int kBlockShift = 4;
209 constexpr size_t kBlockSize = 1 << kBlockShift;
210 
211 // Used to indicate that a point must be encoded as an exception (a 24-byte
212 // S2Point) rather than as an S2CellId.
213 constexpr uint64 kException = ~0ULL;
214 
215 // Represents the encoding parameters to be used for a given block (consisting
216 // of kBlockSize encodable 64-bit values).  See below.
217 struct BlockCode {
218   int delta_bits;     // Delta length in bits (multiple of 4)
219   int offset_bits;    // Offset length in bits (multiple of 8)
220   int overlap_bits;   // {Delta, Offset} overlap in bits (0 or 4)
221 };
222 
223 // Returns a bit mask with "n" low-order 1 bits, for 0 <= n <= 64.
BitMask(int n)224 inline uint64 BitMask(int n) {
225   return (n == 0) ? 0 : (~0ULL >> (64 - n));
226 }
227 
228 // Returns the maximum number of bits per value at the given S2CellId level.
MaxBitsForLevel(int level)229 inline int MaxBitsForLevel(int level) {
230   return 2 * level + 3;
231 }
232 
233 // Returns the number of bits that "base" should be right-shifted in order to
234 // encode only its leading "base_bits" bits, assuming that all points are
235 // encoded at the given S2CellId level.
BaseShift(int level,int base_bits)236 inline int BaseShift(int level, int base_bits) {
237   return max(0, MaxBitsForLevel(level) - base_bits);
238 }
239 
240 // Forward declarations.
241 int ChooseBestLevel(Span<const S2Point> points, vector<CellPoint>* cell_points);
242 vector<uint64> ConvertCellsToValues(const vector<CellPoint>& cell_points,
243                                     int level, bool* have_exceptions);
244 uint64 ChooseBase(const vector<uint64>& values, int level, bool have_exceptions,
245                   int* base_bits);
246 BlockCode GetBlockCode(Span<const uint64> values, uint64 base,
247                        bool have_exceptions);
248 
249 // Encodes a vector of points, optimizing for space.
EncodeS2PointVectorCompact(Span<const S2Point> points,Encoder * encoder)250 void EncodeS2PointVectorCompact(Span<const S2Point> points, Encoder* encoder) {
251   // OVERVIEW
252   // --------
253   //
254   // We attempt to represent each S2Point as the center of an S2CellId.  All
255   // S2CellIds must be at the same level.  Any points that cannot be encoded
256   // exactly as S2CellId centers are stored as exceptions using 24 bytes each.
257   // If there are so many exceptions that the CELL_IDS encoding does not save
258   // significant space, we give up and use the uncompressed encoding.
259   //
260   // The first step is to choose the best S2CellId level.  This requires
261   // converting each point to (face, si, ti) coordinates and checking whether
262   // the point can be represented exactly as an S2CellId center at some level.
263   // We then build a histogram of S2CellId levels (just like the similar code
264   // in S2Polygon::Encode) and choose the best level (or give up, if there are
265   // not enough S2CellId-encodable points).
266   //
267   // The simplest approach would then be to take all the S2CellIds and
268   // right-shift them to remove all the constant bits at the chosen level.
269   // This would give the best spatial locality and hence the smallest deltas.
270   // However instead we give up some spatial locality and use the similar but
271   // faster transformation described below.
272   //
273   // Each encodable point is first converted to the (sj, tj) representation
274   // defined below:
275   //
276   //   sj = (((face & 3) << 30) | (si >> 1)) >> (30 - level);
277   //   tj = (((face & 4) << 29) | ti) >> (31 - level);
278   //
279   // These two values encode the (face, si, ti) tuple using (2 * level + 3)
280   // bits.  To see this, recall that "si" and "ti" are 31-bit values that all
281   // share a common suffix consisting of a "1" bit followed by (30 - level)
282   // "0" bits.  The code above right-shifts these values to remove the
283   // constant bits and then prepends the bits for "face", yielding a total of
284   // (level + 2) bits for "sj" and (level + 1) bits for "tj".
285   //
286   // We then combine (sj, tj) into one 64-bit value by interleaving bit pairs:
287   //
288   //   v = InterleaveBitPairs(sj, tj);
289   //
290   // (We could also interleave individual bits, but it is faster this way.)
291   // The result is similar to right-shifting an S2CellId by (61 - 2 * level),
292   // except that it is faster to decode and the spatial locality is not quite
293   // as good.
294   //
295   // The 64-bit values are divided into blocks of size 8, and then each value is
296   // encoded as the sum of a base value, a per-block offset, and a per-value
297   // delta within that block:
298   //
299   //   v[i,j] = base + offset[i] + delta[i, j]
300   //
301   // where "i" represents a block and "j" represents an entry in that block.
302   //
303   // The deltas for each block are encoded using a fixed number of 4-bit nibbles
304   // (1-16 nibbles per delta).  This allows any delta to be accessed in constant
305   // time.
306   //
307   // The "offset" for each block is a 64-bit value encoded in 0-8 bytes.  The
308   // offset is left-shifted such that it overlaps the deltas by a configurable
309   // number of bits (either 0 or 4), called the "overlap".  The overlap and
310   // offset length (0-8 bytes) are specified per block.  The reason for the
311   // overlap is that it allows fewer delta bits to be used in some cases.  For
312   // example if base == 0 and the range within a block is 0xf0 to 0x110, then
313   // rather than using 12-bits deltas with an offset of 0, the overlap lets us
314   // use 8-bits deltas with an offset of 0xf0 (saving 7 bytes per block).
315   //
316   // The global minimum value "base" is encoded using 0-7 bytes starting with
317   // the most-significant non-zero bit possible for the chosen level.  For
318   // example, if (level == 7) then the encoded values have at most 17 bits, so
319   // if "base" is encoded in 1 byte then it is shifted to occupy bits 9-16.
320   //
321   // Example: at level == 15, there are at most 33 non-zero value bits.  The
322   // following shows the bit positions covered by "base", "offset", and "delta"
323   // assuming that "base" and "offset" are encoded in 2 bytes each, deltas are
324   // encoded in 2 nibbles (1 byte) each, and "overlap" is 4 bits:
325   //
326   //   Base:             1111111100000000-----------------
327   //   Offset:           -------------1111111100000000----
328   //   Delta:            -------------------------00000000
329   //   Overlap:                                   ^^^^
330   //
331   // The numbers (0 or 1) in this diagram denote the byte number of the encoded
332   // value.  Notice that "base" is shifted so that it starts at the leftmost
333   // possible bit, "delta" always starts at the rightmost possible bit (bit 0),
334   // and "offset" is shifted so that it overlaps "delta" by the chosen "overlap"
335   // (either 0 or 4 bits).  Also note that all of these values are summed, and
336   // therefore each value can affect higher-order bits due to carries.
337   //
338   // NOTE(ericv): Encoding deltas in 4-bit rather than 8-bit length increments
339   // reduces encoded sizes by about 7%.  Allowing a 4-bit overlap between the
340   // offset and deltas reduces encoded sizes by about 1%.  Both optimizations
341   // make the code more complex but don't affect running times significantly.
342   //
343   // ENCODING DETAILS
344   // ----------------
345   //
346   // Now we can move on to the actual encodings.  First, there is a 2 byte
347   // header encoded as follows:
348   //
349   //  Byte 0, bits 0-2: encoding_format (CELL_IDS)
350   //  Byte 0, bit  3:   have_exceptions
351   //  Byte 0, bits 4-7: (last_block_size - 1)
352   //  Byte 1, bits 0-2: base_bytes
353   //  Byte 1, bits 3-7: level (0-30)
354   //
355   // This is followed by an EncodedStringVector containing the encoded blocks.
356   // Each block contains kBlockSize (8) values.  The total size of the
357   // EncodeS2PointVector is not stored explicity, but instead is calculated as
358   //
359   //     num_values == kBlockSize * (num_blocks - 1) + last_block_size .
360   //
361   // (An empty vector has num_blocks == 0 and last_block_size == kBlockSize.)
362   //
363   // Each block starts with a 1 byte header containing the following:
364   //
365   //  Byte 0, bits 0-2: (offset_bytes - overlap_nibbles)
366   //  Byte 0, bit  3:   overlap_nibbles
367   //  Byte 0, bits 4-7: (delta_nibbles - 1)
368   //
369   // "overlap_nibbles" is either 0 or 1 (indicating an overlap of 0 or 4 bits),
370   // while "offset_bytes" is in the range 0-8 (indicating the number of bytes
371   // used to encode the offset for this block).  Note that some combinations
372   // cannot be encoded: in particular, offset_bytes == 0 can only be encoded
373   // with an overlap of 0 bits, and offset_bytes == 8 can only be encoded with
374   // an overlap of 4 bits.  This allows us to encode offset lengths of 0-8
375   // rather than just 0-7 without using an extra bit.  (Note that the
376   // combinations that can't be encoded are not useful anyway.)
377   //
378   // The header is followed by "offset_bytes" bytes for the offset, and then
379   // (4 * delta_nibbles) bytes for the deltas.
380   //
381   // If there are any points that could not be represented as S2CellIds, then
382   // "have_exceptions" in the header is true.  In that case the delta values
383   // within each block are encoded as (delta + 8), and values 0-7 are used to
384   // represent exceptions.  If a block has exceptions, they are encoded
385   // immediately following the array of deltas, and are referenced by encoding
386   // the corresponding exception index (0-7) as the delta.
387   //
388   // TODO(ericv): A vector containing a single leaf cell is currently encoded as
389   // 13 bytes (2 byte header, 7 byte base, 1 byte block count, 1 byte block
390   // length, 1 byte block header, 1 byte delta).  However if this case occurs
391   // often, a better solution would be implement a separate format that encodes
392   // the leading k bytes of an S2CellId.  It would have a one-byte header
393   // consisting of the encoding format (3 bits) and the number of bytes encoded
394   // (3 bits), followed by the S2CellId bytes.  The extra 2 header bits could be
395   // used to store single points using other encodings, e.g. E7.
396   //
397   // If we wind up using 8-value blocks, we could also use the extra bit in the
398   // first byte of the header to indicate that there is only one value, and
399   // then skip the 2nd byte of header and the EncodedStringVector.  But this
400   // would be messy because it also requires special cases while decoding.
401   // Essentially this would be a sub-format within the CELL_IDS format.
402 
403   // 1. Compute (level, face, si, ti) for each point, build a histogram of
404   // levels, and determine the optimal level to use for encoding (if any).
405   vector<CellPoint> cell_points;
406   int level = ChooseBestLevel(points, &cell_points);
407   if (level < 0) {
408     return EncodeS2PointVectorFast(points, encoder);
409   }
410 
411   // 2. Convert the points into encodable 64-bit values.  We don't use the
412   // S2CellId itself because it requires a somewhat more complicated bit
413   // interleaving operation.
414   //
415   // TODO(ericv): Benchmark using shifted S2CellIds instead.
416   bool have_exceptions;
417   vector<uint64> values = ConvertCellsToValues(cell_points, level,
418                                                &have_exceptions);
419 
420   // 3. Choose the global encoding parameter "base" (consisting of the bit
421   // prefix shared by all values to be encoded).
422   int base_bits;
423   uint64 base = ChooseBase(values, level, have_exceptions, &base_bits);
424 
425   // Now encode the output, starting with the 2-byte header (see above).
426   int num_blocks = (values.size() + kBlockSize - 1) >> kBlockShift;
427   int base_bytes = base_bits >> 3;
428   encoder->Ensure(2 + base_bytes);
429   int last_block_count = values.size() - kBlockSize * (num_blocks - 1);
430   S2_DCHECK_GE(last_block_count, 0);
431   S2_DCHECK_LE(last_block_count, kBlockSize);
432   S2_DCHECK_LE(base_bytes, 7);
433   S2_DCHECK_LE(level, 30);
434   encoder->put8(EncodedS2PointVector::CELL_IDS |
435                 (have_exceptions << 3) |
436                 ((last_block_count - 1) << 4));
437   encoder->put8(base_bytes | (level << 3));
438 
439   // Next we encode 0-7 bytes of "base".
440   int base_shift = BaseShift(level, base_bits);
441   EncodeUintWithLength(base >> base_shift, base_bytes, encoder);
442 
443   // Now we encode the contents of each block.
444   StringVectorEncoder blocks;
445   vector<S2Point> exceptions;
446   uint64 offset_bytes_sum = 0;
447   uint64 delta_nibbles_sum = 0;
448   uint64 exceptions_sum = 0;
449   for (int i = 0; i < values.size(); i += kBlockSize) {
450     int block_size = min(kBlockSize, values.size() - i);
451     BlockCode code = GetBlockCode(MakeSpan(&values[i], block_size),
452                                   base, have_exceptions);
453 
454     // Encode the one-byte block header (see above).
455     Encoder* block = blocks.AddViaEncoder();
456     int offset_bytes = code.offset_bits >> 3;
457     int delta_nibbles = code.delta_bits >> 2;
458     int overlap_nibbles = code.overlap_bits >> 2;
459     block->Ensure(1 + offset_bytes + (kBlockSize / 2) * delta_nibbles);
460     S2_DCHECK_LE(offset_bytes - overlap_nibbles, 7);
461     S2_DCHECK_LE(overlap_nibbles, 1);
462     S2_DCHECK_LE(delta_nibbles, 16);
463     block->put8((offset_bytes - overlap_nibbles) |
464                 (overlap_nibbles << 3) | (delta_nibbles - 1) << 4);
465 
466     // Determine the offset for this block, and whether there are exceptions.
467     uint64 offset = ~0ULL;
468     int num_exceptions = 0;
469     for (int j = 0; j < block_size; ++j) {
470       if (values[i + j] == kException) {
471         num_exceptions += 1;
472       } else {
473         S2_DCHECK_GE(values[i + j], base);
474         offset = min(offset, values[i + j] - base);
475       }
476     }
477     if (num_exceptions == block_size) offset = 0;
478 
479     // Encode the offset.
480     int offset_shift = code.delta_bits - code.overlap_bits;
481     offset &= ~BitMask(offset_shift);
482     S2_DCHECK_EQ(offset == 0, offset_bytes == 0);
483     if (offset > 0) {
484       EncodeUintWithLength(offset >> offset_shift, offset_bytes, block);
485     }
486 
487     // Encode the deltas, and also gather any exceptions present.
488     int delta_bytes = (delta_nibbles + 1) >> 1;
489     exceptions.clear();
490     for (int j = 0; j < block_size; ++j) {
491       uint64 delta;
492       if (values[i + j] == kException) {
493         delta = exceptions.size();
494         exceptions.push_back(points[i + j]);
495       } else {
496         S2_DCHECK_GE(values[i + j], offset + base);
497         delta = values[i + j] - (offset + base);
498         if (have_exceptions) {
499           S2_DCHECK_LE(delta, ~0ULL - kBlockSize);
500           delta += kBlockSize;
501         }
502       }
503       S2_DCHECK_LE(delta, BitMask(code.delta_bits));
504       if ((delta_nibbles & 1) && (j & 1)) {
505         // Combine this delta with the high-order 4 bits of the previous delta.
506         uint8 last_byte = *(block->base() + block->length() - 1);
507         block->RemoveLast(1);
508         delta = (delta << 4) | (last_byte & 0xf);
509       }
510       EncodeUintWithLength(delta, delta_bytes, block);
511     }
512     // Append any exceptions to the end of the block.
513     if (num_exceptions > 0) {
514       int exceptions_bytes = exceptions.size() * sizeof(S2Point);
515       block->Ensure(exceptions_bytes);
516       block->putn(exceptions.data(), exceptions_bytes);
517     }
518     offset_bytes_sum += offset_bytes;
519     delta_nibbles_sum += delta_nibbles;
520     exceptions_sum += num_exceptions;
521   }
522   blocks.Encode(encoder);
523 }
524 
525 // Returns the S2CellId level for which the greatest number of the given points
526 // can be represented as the center of an S2CellId.  Initializes "cell_points"
527 // to contain the S2CellId representation of each point (if any).  Returns -1
528 // if there is no S2CellId that would result in significant space savings.
ChooseBestLevel(Span<const S2Point> points,vector<CellPoint> * cell_points)529 int ChooseBestLevel(Span<const S2Point> points,
530                     vector<CellPoint>* cell_points) {
531   cell_points->clear();
532   cell_points->reserve(points.size());
533 
534   // Count the number of points at each level.
535   int level_counts[S2CellId::kMaxLevel + 1] = { 0 };
536   for (const S2Point& point : points) {
537     int face;
538     uint32 si, ti;
539     int level = S2::XYZtoFaceSiTi(point, &face, &si, &ti);
540     cell_points->push_back(CellPoint(level, face, si, ti));
541     if (level >= 0) ++level_counts[level];
542   }
543   // Choose the level for which the most points can be encoded.
544   int best_level = 0;
545   for (int level = 1; level <= S2CellId::kMaxLevel; ++level) {
546     if (level_counts[level] > level_counts[best_level]) {
547       best_level = level;
548     }
549   }
550   // The uncompressed encoding is smaller *and* faster when very few of the
551   // points are encodable as S2CellIds.  The CELL_IDS encoding uses about 1
552   // extra byte per point in this case, consisting of up to a 3 byte
553   // EncodedStringVector offset for each block, a 1 byte block header, and 4
554   // bits per delta (encoding an exception number from 0-7), for a total of 8
555   // bytes per block.  This represents a space overhead of about 4%, so we
556   // require that at least 5% of the input points should be encodable as
557   // S2CellIds in order for the CELL_IDS format to be worthwhile.
558   constexpr double kMinEncodableFraction = 0.05;
559   if (level_counts[best_level] <= kMinEncodableFraction * points.size()) {
560     return -1;
561   }
562   return best_level;
563 }
564 
565 // Given a vector of points in CellPoint format and an S2CellId level that has
566 // been chosen for encoding, returns a vector of 64-bit values that should be
567 // encoded in order to represent these points.  Points that cannot be
568 // represented losslessly as the center of an S2CellId at the chosen level are
569 // indicated by the value "kException".  "have_exceptions" is set to indicate
570 // whether any exceptions were present.
ConvertCellsToValues(const vector<CellPoint> & cell_points,int level,bool * have_exceptions)571 vector<uint64> ConvertCellsToValues(const vector<CellPoint>& cell_points,
572                                     int level, bool* have_exceptions) {
573   vector<uint64> values;
574   values.reserve(cell_points.size());
575   *have_exceptions = false;
576   int shift = S2CellId::kMaxLevel - level;
577   for (CellPoint cp : cell_points) {
578     if (cp.level != level) {
579       values.push_back(kException);
580       *have_exceptions = true;
581     } else {
582       // Note that bit 31 of tj is always zero, and that bits are interleaved in
583       // such a way that bit 63 of the result is always zero.
584       //
585       // The S2CellId version of the following code is:
586       // uint64 v = S2CellId::FromFaceIJ(cp.face, cp.si >> 1, cp.ti >> 1).
587       //            parent(level).id() >> (2 * shift + 1);
588       uint32 sj = (((cp.face & 3) << 30) | (cp.si >> 1)) >> shift;
589       uint32 tj = (((cp.face & 4) << 29) | cp.ti) >> (shift + 1);
590       uint64 v = InterleaveUint32BitPairs(sj, tj);
591       S2_DCHECK_LE(v, BitMask(MaxBitsForLevel(level)));
592       values.push_back(v);
593     }
594   }
595   return values;
596 }
597 
ChooseBase(const vector<uint64> & values,int level,bool have_exceptions,int * base_bits)598 uint64 ChooseBase(const vector<uint64>& values, int level, bool have_exceptions,
599                   int* base_bits) {
600   // Find the minimum and maximum non-exception values to be represented.
601   uint64 v_min = kException, v_max = 0;
602   for (auto v : values) {
603     if (v != kException) {
604       v_min = min(v_min, v);
605       v_max = max(v_max, v);
606     }
607   }
608   if (v_min == kException) return 0;
609 
610   // Generally "base" is chosen as the bit prefix shared by v_min and v_max.
611   // However there are a few adjustments we need to make.
612   //
613   // 1. Encodings are usually smaller if the bits represented by "base" and
614   // "delta" do not overlap.  Usually the shared prefix rule does this
615   // automatically, but if v_min == v_max or there are special circumstances
616   // that increase delta_bits (such as values.size() == 1) then we need to
617   // make an adjustment.
618   //
619   // 2. The format only allows us to represent up to 7 bytes (56 bits) of
620   // "base", so we need to ensure that "base" conforms to this requirement.
621   int min_delta_bits = (have_exceptions || values.size() == 1) ? 8 : 4;
622   int excluded_bits = max(Bits::Log2Floor64(v_min ^ v_max) + 1,
623                           max(min_delta_bits, BaseShift(level, 56)));
624   uint64 base = v_min & ~BitMask(excluded_bits);
625 
626   // Determine how many bytes are needed to represent this prefix.
627   if (base == 0) {
628     *base_bits = 0;
629   } else {
630     int low_bit = Bits::FindLSBSetNonZero64(base);
631     *base_bits = (MaxBitsForLevel(level) - low_bit + 7) & ~7;
632   }
633 
634   // Since base_bits has been rounded up to a multiple of 8, we may now be
635   // able to represent additional bits of v_min.  In general this reduces the
636   // final encoded size.
637   //
638   // NOTE(ericv): A different strategy for choosing "base" is to encode all
639   // blocks under the assumption that "base" equals v_min exactly, and then
640   // set base equal to the minimum-length prefix of "v_min" that allows these
641   // encodings to be used.  This strategy reduces the encoded sizes by
642   // about 0.2% relative to the strategy here, but is more complicated.
643   return v_min & ~BitMask(BaseShift(level, *base_bits));
644 }
645 
646 // Returns true if the range of values [d_min, d_max] can be encoded using the
647 // specified parameters (delta_bits, overlap_bits, and have_exceptions).
CanEncode(uint64 d_min,uint64 d_max,int delta_bits,int overlap_bits,bool have_exceptions)648 bool CanEncode(uint64 d_min, uint64 d_max, int delta_bits,
649                int overlap_bits, bool have_exceptions) {
650   // "offset" can't represent the lowest (delta_bits - overlap_bits) of d_min.
651   d_min &= ~BitMask(delta_bits - overlap_bits);
652 
653   // The maximum delta is reduced by kBlockSize if any exceptions exist, since
654   // deltas 0..kBlockSize-1 are used to indicate exceptions.
655   uint64 max_delta = BitMask(delta_bits);
656   if (have_exceptions) {
657     if (max_delta < kBlockSize) return false;
658     max_delta -= kBlockSize;
659   }
660   // The first test below is necessary to avoid 64-bit overflow.
661   return (d_min > ~max_delta) || (d_min + max_delta >= d_max);
662 }
663 
664 // Given a vector of 64-bit values to be encoded and an S2CellId level, returns
665 // the optimal encoding parameters that should be used to encode each block.
666 // Also returns the global minimum value "base" and the number of bits that
667 // should be used to encode it ("base_bits").
GetBlockCode(Span<const uint64> values,uint64 base,bool have_exceptions)668 BlockCode GetBlockCode(Span<const uint64> values, uint64 base,
669                        bool have_exceptions) {
670   // "b_min" and "b_max"n are the minimum and maximum values within this block.
671   uint64 b_min = kException, b_max = 0;
672   for (uint64 v : values) {
673     if (v != kException) {
674       b_min = min(b_min, v);
675       b_max = max(b_max, v);
676     }
677   }
678   if (b_min == kException) {
679     // All values in this block are exceptions.
680     return BlockCode{4, 0, 0};
681   }
682 
683   // Adjust the min/max values so that they are relative to "base".
684   b_min -= base;
685   b_max -= base;
686 
687   // Determine the minimum possible delta length and overlap that can be used
688   // to encode this block.  The block will usually be encodable using the
689   // number of bits in (b_max - b_min) rounded up to a multiple of 4.  If this
690   // is not possible, the preferred solution is to shift "offset" so that the
691   // delta and offset values overlap by 4 bits (since this only costs an
692   // average of 4 extra bits per block).  Otherwise we increase the delta size
693   // by 4 bits.  Certain cases require that both of these techniques are used.
694   //
695   // Example 1: b_min = 0x72, b_max = 0x7e.  The range is 0x0c.  This can be
696   // encoded using delta_bits = 4 and overlap_bits = 0, which allows us to
697   // represent an offset of 0x70 and a maximum delta of 0x0f, so that we can
698   // encode values up to 0x7f.
699   //
700   // Example 2: b_min = 0x78, b_max = 0x84.  The range is 0x0c, but in this
701   // case it is not sufficient to use delta_bits = 4 and overlap_bits = 0
702   // because we can again only represent an offset of 0x70, so the maximum
703   // delta of 0x0f only lets us encode values up to 0x7f.  However if we
704   // increase the overlap to 4 bits then we can represent an offset of 0x78,
705   // which lets us encode values up to 0x78 + 0x0f = 0x87.
706   //
707   // Example 3: b_min = 0x08, b_max = 0x104.  The range is 0xfc, so we should
708   // be able to use 8-bit deltas.  But even with a 4-bit overlap, we can still
709   // only encode offset = 0 and a maximum value of 0xff.  (We don't allow
710   // bigger overlaps because statistically they are not worthwhile.)  Instead
711   // we increase the delta size to 12 bits, which handles this case easily.
712   //
713   // Example 4: b_min = 0xf08, b_max = 0x1004.  The range is 0xfc, so we
714   // should be able to use 8-bit deltas.  With 8-bit deltas and no overlap, we
715   // have offset = 0xf00 and a maximum encodable value of 0xfff.  With 8-bit
716   // deltas and a 4-bit overlap, we still have offset = 0xf00 and a maximum
717   // encodable value of 0xfff.  Even with 12-bit deltas, we have offset = 0
718   // and we can still only represent 0xfff.  However with delta_bits = 12 and
719   // overlap_bits = 4, we can represent offset = 0xf00 and a maximum encodable
720   // value of 0xf00 + 0xfff = 0x1eff.
721   //
722   // It is possible to show that this last example is the worst case, i.e.  we
723   // do not need to consider increasing delta_bits or overlap_bits further.
724   int delta_bits = (max(1, Bits::Log2Floor64(b_max - b_min)) + 3) & ~3;
725   int overlap_bits = 0;
726   if (!CanEncode(b_min, b_max, delta_bits, 0, have_exceptions)) {
727     if (CanEncode(b_min, b_max, delta_bits, 4, have_exceptions)) {
728       overlap_bits = 4;
729     } else {
730       S2_DCHECK_LE(delta_bits, 60);
731       delta_bits += 4;
732       if (!CanEncode(b_min, b_max, delta_bits, 0, have_exceptions)) {
733         S2_DCHECK(CanEncode(b_min, b_max, delta_bits, 4, have_exceptions));
734         overlap_bits = 4;
735       }
736     }
737   }
738 
739   // Avoid wasting 4 bits of delta when the block size is 1.  This reduces the
740   // encoding size for single leaf cells by one byte.
741   if (values.size() == 1) {
742     S2_DCHECK(delta_bits == 4 && overlap_bits == 0);
743     delta_bits = 8;
744   }
745 
746   // Now determine the number of bytes needed to encode "offset", given the
747   // chosen delta length.
748   uint64 max_delta = BitMask(delta_bits) - (have_exceptions ? kBlockSize : 0);
749   int offset_bits = 0;
750   if (b_max > max_delta) {
751     // At least one byte of offset is required.  Round up the minimum offset
752     // to the next encodable value, and determine how many bits it has.
753     int offset_shift = delta_bits - overlap_bits;
754     uint64 mask = BitMask(offset_shift);
755     uint64 min_offset = (b_max - max_delta + mask) & ~mask;
756     S2_DCHECK_GT(min_offset, 0);
757     offset_bits =
758         (Bits::FindMSBSetNonZero64(min_offset) + 1 - offset_shift + 7) & ~7;
759     // A 64-bit offset can only be encoded with an overlap of 4 bits.
760     if (offset_bits == 64) overlap_bits = 4;
761   }
762   return BlockCode{delta_bits, offset_bits, overlap_bits};
763 }
764 
InitCellIdsFormat(Decoder * decoder)765 bool EncodedS2PointVector::InitCellIdsFormat(Decoder* decoder) {
766   // This function inverts the encodings documented above.
767   // First we decode the two-byte header.
768   if (decoder->avail() < 2) return false;
769   uint8 header1 = decoder->get8();
770   uint8 header2 = decoder->get8();
771   S2_DCHECK_EQ(header1 & 7, CELL_IDS);
772   int last_block_count, base_bytes;
773   cell_ids_.have_exceptions = (header1 & 8) != 0;
774   last_block_count = (header1 >> 4) + 1;
775   base_bytes = header2 & 7;
776   cell_ids_.level = header2 >> 3;
777 
778   // Decode the base value (if any).
779   uint64 base;
780   if (!DecodeUintWithLength(base_bytes, decoder, &base)) return false;
781   cell_ids_.base = base << BaseShift(cell_ids_.level, base_bytes << 3);
782 
783   // Initialize the vector of encoded blocks.
784   if (!cell_ids_.blocks.Init(decoder)) return false;
785   size_ = kBlockSize * (cell_ids_.blocks.size() - 1) + last_block_count;
786   return true;
787 }
788 
DecodeCellIdsFormat(int i) const789 S2Point EncodedS2PointVector::DecodeCellIdsFormat(int i) const {
790   // This function inverts the encodings documented above.
791 
792   // First we decode the block header.
793   const char* ptr = cell_ids_.blocks.GetStart(i >> kBlockShift);
794   uint8 header = *ptr++;
795   int overlap_nibbles = (header >> 3) & 1;
796   int offset_bytes = (header & 7) + overlap_nibbles;
797   int delta_nibbles = (header >> 4) + 1;
798 
799   // Decode the offset for this block.
800   int offset_shift = (delta_nibbles - overlap_nibbles) << 2;
801   uint64 offset = GetUintWithLength<uint64>(ptr, offset_bytes) << offset_shift;
802   ptr += offset_bytes;
803 
804   // Decode the delta for the requested value.
805   int delta_nibble_offset = (i & (kBlockSize - 1)) * delta_nibbles;
806   int delta_bytes = (delta_nibbles + 1) >> 1;
807   const char* delta_ptr = ptr + (delta_nibble_offset >> 1);
808   uint64 delta = GetUintWithLength<uint64>(delta_ptr, delta_bytes);
809   delta >>= (delta_nibble_offset & 1) << 2;
810   delta &= BitMask(delta_nibbles << 2);
811 
812   // Test whether this point is encoded as an exception.
813   if (cell_ids_.have_exceptions) {
814     if (delta < kBlockSize) {
815       int block_size = min(kBlockSize, size_ - (i & ~(kBlockSize - 1)));
816       ptr += (block_size * delta_nibbles + 1) >> 1;
817       ptr += delta * sizeof(S2Point);
818       return *reinterpret_cast<const S2Point*>(ptr);
819     }
820     delta -= kBlockSize;
821   }
822 
823   // Otherwise convert the 64-bit value back to an S2Point.
824   uint64 value = cell_ids_.base + offset + delta;
825   int shift = S2CellId::kMaxLevel - cell_ids_.level;
826 
827   // The S2CellId version of the following code is:
828   //   return S2CellId(((value << 1) | 1) << (2 * shift)).ToPoint();
829   uint32 sj, tj;
830   DeinterleaveUint32BitPairs(value, &sj, &tj);
831   int si = (((sj << 1) | 1) << shift) & 0x7fffffff;
832   int ti = (((tj << 1) | 1) << shift) & 0x7fffffff;
833   int face = ((sj << shift) >> 30) | (((tj << (shift + 1)) >> 29) & 4);
834   return S2::FaceUVtoXYZ(face, S2::STtoUV(S2::SiTitoST(si)),
835                          S2::STtoUV(S2::SiTitoST(ti))).Normalize();
836 }
837 
838 }  // namespace s2coding
839