1 // Copyright 2018 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS-IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15
16 // Author: ericv@google.com (Eric Veach)
17
18 #include "s2/encoded_s2point_vector.h"
19
20 #include "s2/third_party/absl/base/internal/unaligned_access.h"
21 #include "s2/util/bits/bits.h"
22 #include "s2/s2cell_id.h"
23 #include "s2/s2coords.h"
24
25 using absl::MakeSpan;
26 using absl::Span;
27 using std::max;
28 using std::min;
29 using std::vector;
30
31 namespace s2coding {
32
33 // Like util_bits::InterleaveUint32, but interleaves bit pairs rather than
34 // individual bits. This format is faster to decode than the fully interleaved
35 // format, and produces the same results for our use case.
InterleaveUint32BitPairs(const uint32 val0,const uint32 val1)36 inline uint64 InterleaveUint32BitPairs(const uint32 val0, const uint32 val1) {
37 uint64 v0 = val0, v1 = val1;
38 v0 = (v0 | (v0 << 16)) & 0x0000ffff0000ffff;
39 v1 = (v1 | (v1 << 16)) & 0x0000ffff0000ffff;
40 v0 = (v0 | (v0 << 8)) & 0x00ff00ff00ff00ff;
41 v1 = (v1 | (v1 << 8)) & 0x00ff00ff00ff00ff;
42 v0 = (v0 | (v0 << 4)) & 0x0f0f0f0f0f0f0f0f;
43 v1 = (v1 | (v1 << 4)) & 0x0f0f0f0f0f0f0f0f;
44 v0 = (v0 | (v0 << 2)) & 0x3333333333333333;
45 v1 = (v1 | (v1 << 2)) & 0x3333333333333333;
46 return v0 | (v1 << 2);
47 }
48
49 // This code is about 50% faster than util_bits::DeinterleaveUint32, which
50 // uses a lookup table. The speed advantage is expected to be even larger in
51 // code that mixes bit interleaving with other significant operations since it
52 // doesn't require keeping a 256-byte lookup table in the L1 data cache.
DeinterleaveUint32BitPairs(uint64 code,uint32 * val0,uint32 * val1)53 inline void DeinterleaveUint32BitPairs(uint64 code,
54 uint32 *val0, uint32 *val1) {
55 uint64 v0 = code, v1 = code >> 2;
56 v0 &= 0x3333333333333333;
57 v0 |= v0 >> 2;
58 v1 &= 0x3333333333333333;
59 v1 |= v1 >> 2;
60 v0 &= 0x0f0f0f0f0f0f0f0f;
61 v0 |= v0 >> 4;
62 v1 &= 0x0f0f0f0f0f0f0f0f;
63 v1 |= v1 >> 4;
64 v0 &= 0x00ff00ff00ff00ff;
65 v0 |= v0 >> 8;
66 v1 &= 0x00ff00ff00ff00ff;
67 v1 |= v1 >> 8;
68 v0 &= 0x0000ffff0000ffff;
69 v0 |= v0 >> 16;
70 v1 &= 0x0000ffff0000ffff;
71 v1 |= v1 >> 16;
72 *val0 = v0;
73 *val1 = v1;
74 }
75
76 // Forward declarations.
77 void EncodeS2PointVectorFast(Span<const S2Point> points, Encoder* encoder);
78 void EncodeS2PointVectorCompact(Span<const S2Point> points, Encoder* encoder);
79
80 // To save space (especially for vectors of length 0, 1, and 2), the encoding
81 // format is encoded in the low-order 3 bits of the vector size. Up to 7
82 // encoding formats are supported (only 2 are currently defined). Additional
83 // formats could be supported by using "7" as an overflow indicator and
84 // encoding the actual format separately, but it seems unlikely we will ever
85 // need to do that.
86 static const int kEncodingFormatBits = 3;
87 static const uint8 kEncodingFormatMask = (1 << kEncodingFormatBits) - 1;
88
EncodeS2PointVector(Span<const S2Point> points,CodingHint hint,Encoder * encoder)89 void EncodeS2PointVector(Span<const S2Point> points, CodingHint hint,
90 Encoder* encoder) {
91 switch (hint) {
92 case CodingHint::FAST:
93 return EncodeS2PointVectorFast(points, encoder);
94
95 case CodingHint::COMPACT:
96 return EncodeS2PointVectorCompact(points, encoder);
97
98 default:
99 S2_LOG(DFATAL) << "Unknown CodingHint: " << static_cast<int>(hint);
100 }
101 }
102
Init(Decoder * decoder)103 bool EncodedS2PointVector::Init(Decoder* decoder) {
104 if (decoder->avail() < 1) return false;
105
106 // Peek at the format but don't advance the decoder; the format-specific
107 // Init functions will do that.
108 format_ = static_cast<Format>(*decoder->ptr() & kEncodingFormatMask);
109 switch (format_) {
110 case UNCOMPRESSED:
111 return InitUncompressedFormat(decoder);
112
113 case CELL_IDS:
114 return InitCellIdsFormat(decoder);
115
116 default:
117 return false;
118 }
119 }
120
Decode() const121 vector<S2Point> EncodedS2PointVector::Decode() const {
122 vector<S2Point> points;
123 points.reserve(size_);
124 for (int i = 0; i < size_; ++i) {
125 points.push_back((*this)[i]);
126 }
127 return points;
128 }
129
130
131 //////////////////////////////////////////////////////////////////////////////
132 // UNCOMPRESSED Encoding Format
133 //////////////////////////////////////////////////////////////////////////////
134
135 // Encodes a vector of points, optimizing for (encoding and decoding) speed.
EncodeS2PointVectorFast(Span<const S2Point> points,Encoder * encoder)136 void EncodeS2PointVectorFast(Span<const S2Point> points, Encoder* encoder) {
137 #ifndef IS_LITTLE_ENDIAN
138 S2_LOG(FATAL) << "Not implemented on big-endian architectures";
139 #endif
140
141 // This function always uses the UNCOMPRESSED encoding. The header consists
142 // of a varint64 in the following format:
143 //
144 // bits 0-2: encoding format (UNCOMPRESSED)
145 // bits 3-63: vector size
146 //
147 // This is followed by an array of S2Points in little-endian order.
148 encoder->Ensure(Varint::kMax64 + points.size() * sizeof(S2Point));
149 uint64 size_format = (points.size() << kEncodingFormatBits |
150 EncodedS2PointVector::UNCOMPRESSED);
151 encoder->put_varint64(size_format);
152 encoder->putn(points.data(), points.size() * sizeof(S2Point));
153 }
154
InitUncompressedFormat(Decoder * decoder)155 bool EncodedS2PointVector::InitUncompressedFormat(Decoder* decoder) {
156 #if !defined(IS_LITTLE_ENDIAN) || defined(__arm__) || \
157 defined(ABSL_INTERNAL_NEED_ALIGNED_LOADS)
158 // TODO(ericv): Make this work on platforms that don't support unaligned
159 // 64-bit little-endian reads, e.g. by falling back to
160 //
161 // bit_cast<double>(little_endian::Load64()).
162 //
163 // Maybe the compiler is smart enough that we can do this all the time,
164 // but more likely we will need two cases using the #ifdef above.
165 // (Note that even ARMv7 does not support unaligned 64-bit loads.)
166 S2_LOG(DFATAL) << "Needs architecture with 64-bit little-endian unaligned loads";
167 return false;
168 #endif
169
170 uint64 size;
171 if (!decoder->get_varint64(&size)) return false;
172 size >>= kEncodingFormatBits;
173
174 // Note that the encoding format supports up to 2**59 vertices, but we
175 // currently only support decoding up to 2**32 vertices.
176 if (size > std::numeric_limits<uint32>::max()) return false;
177 size_ = size;
178
179 size_t bytes = size_t{size_} * sizeof(S2Point);
180 if (decoder->avail() < bytes) return false;
181
182 uncompressed_.points = reinterpret_cast<const S2Point*>(decoder->ptr());
183 decoder->skip(bytes);
184 return true;
185 }
186
187
188 //////////////////////////////////////////////////////////////////////////////
189 // CELL_IDS Encoding Format
190 //////////////////////////////////////////////////////////////////////////////
191
192 // Represents a point that can be encoded as an S2CellId center.
193 // (If such an encoding is not possible then level < 0.)
194 struct CellPoint {
195 // Constructor necessary in order to narrow "int" arguments to "int8".
CellPoints2coding::CellPoint196 CellPoint(int level, int face, uint32 si, uint32 ti)
197 : level(level), face(face), si(si), ti(ti) {}
198
199 int8 level, face;
200 uint32 si, ti;
201 };
202
203 // S2CellIds are represented in a special 64-bit format and are encoded in
204 // fixed-size blocks. kBlockSize represents the number of values per block.
205 // Block sizes of 4, 8, 16, and 32 were tested and kBlockSize == 16 seems to
206 // offer the best compression. (Note that kBlockSize == 32 requires some code
207 // modifications which have since been removed.)
208 constexpr int kBlockShift = 4;
209 constexpr size_t kBlockSize = 1 << kBlockShift;
210
211 // Used to indicate that a point must be encoded as an exception (a 24-byte
212 // S2Point) rather than as an S2CellId.
213 constexpr uint64 kException = ~0ULL;
214
215 // Represents the encoding parameters to be used for a given block (consisting
216 // of kBlockSize encodable 64-bit values). See below.
217 struct BlockCode {
218 int delta_bits; // Delta length in bits (multiple of 4)
219 int offset_bits; // Offset length in bits (multiple of 8)
220 int overlap_bits; // {Delta, Offset} overlap in bits (0 or 4)
221 };
222
223 // Returns a bit mask with "n" low-order 1 bits, for 0 <= n <= 64.
BitMask(int n)224 inline uint64 BitMask(int n) {
225 return (n == 0) ? 0 : (~0ULL >> (64 - n));
226 }
227
228 // Returns the maximum number of bits per value at the given S2CellId level.
MaxBitsForLevel(int level)229 inline int MaxBitsForLevel(int level) {
230 return 2 * level + 3;
231 }
232
233 // Returns the number of bits that "base" should be right-shifted in order to
234 // encode only its leading "base_bits" bits, assuming that all points are
235 // encoded at the given S2CellId level.
BaseShift(int level,int base_bits)236 inline int BaseShift(int level, int base_bits) {
237 return max(0, MaxBitsForLevel(level) - base_bits);
238 }
239
240 // Forward declarations.
241 int ChooseBestLevel(Span<const S2Point> points, vector<CellPoint>* cell_points);
242 vector<uint64> ConvertCellsToValues(const vector<CellPoint>& cell_points,
243 int level, bool* have_exceptions);
244 uint64 ChooseBase(const vector<uint64>& values, int level, bool have_exceptions,
245 int* base_bits);
246 BlockCode GetBlockCode(Span<const uint64> values, uint64 base,
247 bool have_exceptions);
248
249 // Encodes a vector of points, optimizing for space.
EncodeS2PointVectorCompact(Span<const S2Point> points,Encoder * encoder)250 void EncodeS2PointVectorCompact(Span<const S2Point> points, Encoder* encoder) {
251 // OVERVIEW
252 // --------
253 //
254 // We attempt to represent each S2Point as the center of an S2CellId. All
255 // S2CellIds must be at the same level. Any points that cannot be encoded
256 // exactly as S2CellId centers are stored as exceptions using 24 bytes each.
257 // If there are so many exceptions that the CELL_IDS encoding does not save
258 // significant space, we give up and use the uncompressed encoding.
259 //
260 // The first step is to choose the best S2CellId level. This requires
261 // converting each point to (face, si, ti) coordinates and checking whether
262 // the point can be represented exactly as an S2CellId center at some level.
263 // We then build a histogram of S2CellId levels (just like the similar code
264 // in S2Polygon::Encode) and choose the best level (or give up, if there are
265 // not enough S2CellId-encodable points).
266 //
267 // The simplest approach would then be to take all the S2CellIds and
268 // right-shift them to remove all the constant bits at the chosen level.
269 // This would give the best spatial locality and hence the smallest deltas.
270 // However instead we give up some spatial locality and use the similar but
271 // faster transformation described below.
272 //
273 // Each encodable point is first converted to the (sj, tj) representation
274 // defined below:
275 //
276 // sj = (((face & 3) << 30) | (si >> 1)) >> (30 - level);
277 // tj = (((face & 4) << 29) | ti) >> (31 - level);
278 //
279 // These two values encode the (face, si, ti) tuple using (2 * level + 3)
280 // bits. To see this, recall that "si" and "ti" are 31-bit values that all
281 // share a common suffix consisting of a "1" bit followed by (30 - level)
282 // "0" bits. The code above right-shifts these values to remove the
283 // constant bits and then prepends the bits for "face", yielding a total of
284 // (level + 2) bits for "sj" and (level + 1) bits for "tj".
285 //
286 // We then combine (sj, tj) into one 64-bit value by interleaving bit pairs:
287 //
288 // v = InterleaveBitPairs(sj, tj);
289 //
290 // (We could also interleave individual bits, but it is faster this way.)
291 // The result is similar to right-shifting an S2CellId by (61 - 2 * level),
292 // except that it is faster to decode and the spatial locality is not quite
293 // as good.
294 //
295 // The 64-bit values are divided into blocks of size 8, and then each value is
296 // encoded as the sum of a base value, a per-block offset, and a per-value
297 // delta within that block:
298 //
299 // v[i,j] = base + offset[i] + delta[i, j]
300 //
301 // where "i" represents a block and "j" represents an entry in that block.
302 //
303 // The deltas for each block are encoded using a fixed number of 4-bit nibbles
304 // (1-16 nibbles per delta). This allows any delta to be accessed in constant
305 // time.
306 //
307 // The "offset" for each block is a 64-bit value encoded in 0-8 bytes. The
308 // offset is left-shifted such that it overlaps the deltas by a configurable
309 // number of bits (either 0 or 4), called the "overlap". The overlap and
310 // offset length (0-8 bytes) are specified per block. The reason for the
311 // overlap is that it allows fewer delta bits to be used in some cases. For
312 // example if base == 0 and the range within a block is 0xf0 to 0x110, then
313 // rather than using 12-bits deltas with an offset of 0, the overlap lets us
314 // use 8-bits deltas with an offset of 0xf0 (saving 7 bytes per block).
315 //
316 // The global minimum value "base" is encoded using 0-7 bytes starting with
317 // the most-significant non-zero bit possible for the chosen level. For
318 // example, if (level == 7) then the encoded values have at most 17 bits, so
319 // if "base" is encoded in 1 byte then it is shifted to occupy bits 9-16.
320 //
321 // Example: at level == 15, there are at most 33 non-zero value bits. The
322 // following shows the bit positions covered by "base", "offset", and "delta"
323 // assuming that "base" and "offset" are encoded in 2 bytes each, deltas are
324 // encoded in 2 nibbles (1 byte) each, and "overlap" is 4 bits:
325 //
326 // Base: 1111111100000000-----------------
327 // Offset: -------------1111111100000000----
328 // Delta: -------------------------00000000
329 // Overlap: ^^^^
330 //
331 // The numbers (0 or 1) in this diagram denote the byte number of the encoded
332 // value. Notice that "base" is shifted so that it starts at the leftmost
333 // possible bit, "delta" always starts at the rightmost possible bit (bit 0),
334 // and "offset" is shifted so that it overlaps "delta" by the chosen "overlap"
335 // (either 0 or 4 bits). Also note that all of these values are summed, and
336 // therefore each value can affect higher-order bits due to carries.
337 //
338 // NOTE(ericv): Encoding deltas in 4-bit rather than 8-bit length increments
339 // reduces encoded sizes by about 7%. Allowing a 4-bit overlap between the
340 // offset and deltas reduces encoded sizes by about 1%. Both optimizations
341 // make the code more complex but don't affect running times significantly.
342 //
343 // ENCODING DETAILS
344 // ----------------
345 //
346 // Now we can move on to the actual encodings. First, there is a 2 byte
347 // header encoded as follows:
348 //
349 // Byte 0, bits 0-2: encoding_format (CELL_IDS)
350 // Byte 0, bit 3: have_exceptions
351 // Byte 0, bits 4-7: (last_block_size - 1)
352 // Byte 1, bits 0-2: base_bytes
353 // Byte 1, bits 3-7: level (0-30)
354 //
355 // This is followed by an EncodedStringVector containing the encoded blocks.
356 // Each block contains kBlockSize (8) values. The total size of the
357 // EncodeS2PointVector is not stored explicity, but instead is calculated as
358 //
359 // num_values == kBlockSize * (num_blocks - 1) + last_block_size .
360 //
361 // (An empty vector has num_blocks == 0 and last_block_size == kBlockSize.)
362 //
363 // Each block starts with a 1 byte header containing the following:
364 //
365 // Byte 0, bits 0-2: (offset_bytes - overlap_nibbles)
366 // Byte 0, bit 3: overlap_nibbles
367 // Byte 0, bits 4-7: (delta_nibbles - 1)
368 //
369 // "overlap_nibbles" is either 0 or 1 (indicating an overlap of 0 or 4 bits),
370 // while "offset_bytes" is in the range 0-8 (indicating the number of bytes
371 // used to encode the offset for this block). Note that some combinations
372 // cannot be encoded: in particular, offset_bytes == 0 can only be encoded
373 // with an overlap of 0 bits, and offset_bytes == 8 can only be encoded with
374 // an overlap of 4 bits. This allows us to encode offset lengths of 0-8
375 // rather than just 0-7 without using an extra bit. (Note that the
376 // combinations that can't be encoded are not useful anyway.)
377 //
378 // The header is followed by "offset_bytes" bytes for the offset, and then
379 // (4 * delta_nibbles) bytes for the deltas.
380 //
381 // If there are any points that could not be represented as S2CellIds, then
382 // "have_exceptions" in the header is true. In that case the delta values
383 // within each block are encoded as (delta + 8), and values 0-7 are used to
384 // represent exceptions. If a block has exceptions, they are encoded
385 // immediately following the array of deltas, and are referenced by encoding
386 // the corresponding exception index (0-7) as the delta.
387 //
388 // TODO(ericv): A vector containing a single leaf cell is currently encoded as
389 // 13 bytes (2 byte header, 7 byte base, 1 byte block count, 1 byte block
390 // length, 1 byte block header, 1 byte delta). However if this case occurs
391 // often, a better solution would be implement a separate format that encodes
392 // the leading k bytes of an S2CellId. It would have a one-byte header
393 // consisting of the encoding format (3 bits) and the number of bytes encoded
394 // (3 bits), followed by the S2CellId bytes. The extra 2 header bits could be
395 // used to store single points using other encodings, e.g. E7.
396 //
397 // If we wind up using 8-value blocks, we could also use the extra bit in the
398 // first byte of the header to indicate that there is only one value, and
399 // then skip the 2nd byte of header and the EncodedStringVector. But this
400 // would be messy because it also requires special cases while decoding.
401 // Essentially this would be a sub-format within the CELL_IDS format.
402
403 // 1. Compute (level, face, si, ti) for each point, build a histogram of
404 // levels, and determine the optimal level to use for encoding (if any).
405 vector<CellPoint> cell_points;
406 int level = ChooseBestLevel(points, &cell_points);
407 if (level < 0) {
408 return EncodeS2PointVectorFast(points, encoder);
409 }
410
411 // 2. Convert the points into encodable 64-bit values. We don't use the
412 // S2CellId itself because it requires a somewhat more complicated bit
413 // interleaving operation.
414 //
415 // TODO(ericv): Benchmark using shifted S2CellIds instead.
416 bool have_exceptions;
417 vector<uint64> values = ConvertCellsToValues(cell_points, level,
418 &have_exceptions);
419
420 // 3. Choose the global encoding parameter "base" (consisting of the bit
421 // prefix shared by all values to be encoded).
422 int base_bits;
423 uint64 base = ChooseBase(values, level, have_exceptions, &base_bits);
424
425 // Now encode the output, starting with the 2-byte header (see above).
426 int num_blocks = (values.size() + kBlockSize - 1) >> kBlockShift;
427 int base_bytes = base_bits >> 3;
428 encoder->Ensure(2 + base_bytes);
429 int last_block_count = values.size() - kBlockSize * (num_blocks - 1);
430 S2_DCHECK_GE(last_block_count, 0);
431 S2_DCHECK_LE(last_block_count, kBlockSize);
432 S2_DCHECK_LE(base_bytes, 7);
433 S2_DCHECK_LE(level, 30);
434 encoder->put8(EncodedS2PointVector::CELL_IDS |
435 (have_exceptions << 3) |
436 ((last_block_count - 1) << 4));
437 encoder->put8(base_bytes | (level << 3));
438
439 // Next we encode 0-7 bytes of "base".
440 int base_shift = BaseShift(level, base_bits);
441 EncodeUintWithLength(base >> base_shift, base_bytes, encoder);
442
443 // Now we encode the contents of each block.
444 StringVectorEncoder blocks;
445 vector<S2Point> exceptions;
446 uint64 offset_bytes_sum = 0;
447 uint64 delta_nibbles_sum = 0;
448 uint64 exceptions_sum = 0;
449 for (int i = 0; i < values.size(); i += kBlockSize) {
450 int block_size = min(kBlockSize, values.size() - i);
451 BlockCode code = GetBlockCode(MakeSpan(&values[i], block_size),
452 base, have_exceptions);
453
454 // Encode the one-byte block header (see above).
455 Encoder* block = blocks.AddViaEncoder();
456 int offset_bytes = code.offset_bits >> 3;
457 int delta_nibbles = code.delta_bits >> 2;
458 int overlap_nibbles = code.overlap_bits >> 2;
459 block->Ensure(1 + offset_bytes + (kBlockSize / 2) * delta_nibbles);
460 S2_DCHECK_LE(offset_bytes - overlap_nibbles, 7);
461 S2_DCHECK_LE(overlap_nibbles, 1);
462 S2_DCHECK_LE(delta_nibbles, 16);
463 block->put8((offset_bytes - overlap_nibbles) |
464 (overlap_nibbles << 3) | (delta_nibbles - 1) << 4);
465
466 // Determine the offset for this block, and whether there are exceptions.
467 uint64 offset = ~0ULL;
468 int num_exceptions = 0;
469 for (int j = 0; j < block_size; ++j) {
470 if (values[i + j] == kException) {
471 num_exceptions += 1;
472 } else {
473 S2_DCHECK_GE(values[i + j], base);
474 offset = min(offset, values[i + j] - base);
475 }
476 }
477 if (num_exceptions == block_size) offset = 0;
478
479 // Encode the offset.
480 int offset_shift = code.delta_bits - code.overlap_bits;
481 offset &= ~BitMask(offset_shift);
482 S2_DCHECK_EQ(offset == 0, offset_bytes == 0);
483 if (offset > 0) {
484 EncodeUintWithLength(offset >> offset_shift, offset_bytes, block);
485 }
486
487 // Encode the deltas, and also gather any exceptions present.
488 int delta_bytes = (delta_nibbles + 1) >> 1;
489 exceptions.clear();
490 for (int j = 0; j < block_size; ++j) {
491 uint64 delta;
492 if (values[i + j] == kException) {
493 delta = exceptions.size();
494 exceptions.push_back(points[i + j]);
495 } else {
496 S2_DCHECK_GE(values[i + j], offset + base);
497 delta = values[i + j] - (offset + base);
498 if (have_exceptions) {
499 S2_DCHECK_LE(delta, ~0ULL - kBlockSize);
500 delta += kBlockSize;
501 }
502 }
503 S2_DCHECK_LE(delta, BitMask(code.delta_bits));
504 if ((delta_nibbles & 1) && (j & 1)) {
505 // Combine this delta with the high-order 4 bits of the previous delta.
506 uint8 last_byte = *(block->base() + block->length() - 1);
507 block->RemoveLast(1);
508 delta = (delta << 4) | (last_byte & 0xf);
509 }
510 EncodeUintWithLength(delta, delta_bytes, block);
511 }
512 // Append any exceptions to the end of the block.
513 if (num_exceptions > 0) {
514 int exceptions_bytes = exceptions.size() * sizeof(S2Point);
515 block->Ensure(exceptions_bytes);
516 block->putn(exceptions.data(), exceptions_bytes);
517 }
518 offset_bytes_sum += offset_bytes;
519 delta_nibbles_sum += delta_nibbles;
520 exceptions_sum += num_exceptions;
521 }
522 blocks.Encode(encoder);
523 }
524
525 // Returns the S2CellId level for which the greatest number of the given points
526 // can be represented as the center of an S2CellId. Initializes "cell_points"
527 // to contain the S2CellId representation of each point (if any). Returns -1
528 // if there is no S2CellId that would result in significant space savings.
ChooseBestLevel(Span<const S2Point> points,vector<CellPoint> * cell_points)529 int ChooseBestLevel(Span<const S2Point> points,
530 vector<CellPoint>* cell_points) {
531 cell_points->clear();
532 cell_points->reserve(points.size());
533
534 // Count the number of points at each level.
535 int level_counts[S2CellId::kMaxLevel + 1] = { 0 };
536 for (const S2Point& point : points) {
537 int face;
538 uint32 si, ti;
539 int level = S2::XYZtoFaceSiTi(point, &face, &si, &ti);
540 cell_points->push_back(CellPoint(level, face, si, ti));
541 if (level >= 0) ++level_counts[level];
542 }
543 // Choose the level for which the most points can be encoded.
544 int best_level = 0;
545 for (int level = 1; level <= S2CellId::kMaxLevel; ++level) {
546 if (level_counts[level] > level_counts[best_level]) {
547 best_level = level;
548 }
549 }
550 // The uncompressed encoding is smaller *and* faster when very few of the
551 // points are encodable as S2CellIds. The CELL_IDS encoding uses about 1
552 // extra byte per point in this case, consisting of up to a 3 byte
553 // EncodedStringVector offset for each block, a 1 byte block header, and 4
554 // bits per delta (encoding an exception number from 0-7), for a total of 8
555 // bytes per block. This represents a space overhead of about 4%, so we
556 // require that at least 5% of the input points should be encodable as
557 // S2CellIds in order for the CELL_IDS format to be worthwhile.
558 constexpr double kMinEncodableFraction = 0.05;
559 if (level_counts[best_level] <= kMinEncodableFraction * points.size()) {
560 return -1;
561 }
562 return best_level;
563 }
564
565 // Given a vector of points in CellPoint format and an S2CellId level that has
566 // been chosen for encoding, returns a vector of 64-bit values that should be
567 // encoded in order to represent these points. Points that cannot be
568 // represented losslessly as the center of an S2CellId at the chosen level are
569 // indicated by the value "kException". "have_exceptions" is set to indicate
570 // whether any exceptions were present.
ConvertCellsToValues(const vector<CellPoint> & cell_points,int level,bool * have_exceptions)571 vector<uint64> ConvertCellsToValues(const vector<CellPoint>& cell_points,
572 int level, bool* have_exceptions) {
573 vector<uint64> values;
574 values.reserve(cell_points.size());
575 *have_exceptions = false;
576 int shift = S2CellId::kMaxLevel - level;
577 for (CellPoint cp : cell_points) {
578 if (cp.level != level) {
579 values.push_back(kException);
580 *have_exceptions = true;
581 } else {
582 // Note that bit 31 of tj is always zero, and that bits are interleaved in
583 // such a way that bit 63 of the result is always zero.
584 //
585 // The S2CellId version of the following code is:
586 // uint64 v = S2CellId::FromFaceIJ(cp.face, cp.si >> 1, cp.ti >> 1).
587 // parent(level).id() >> (2 * shift + 1);
588 uint32 sj = (((cp.face & 3) << 30) | (cp.si >> 1)) >> shift;
589 uint32 tj = (((cp.face & 4) << 29) | cp.ti) >> (shift + 1);
590 uint64 v = InterleaveUint32BitPairs(sj, tj);
591 S2_DCHECK_LE(v, BitMask(MaxBitsForLevel(level)));
592 values.push_back(v);
593 }
594 }
595 return values;
596 }
597
ChooseBase(const vector<uint64> & values,int level,bool have_exceptions,int * base_bits)598 uint64 ChooseBase(const vector<uint64>& values, int level, bool have_exceptions,
599 int* base_bits) {
600 // Find the minimum and maximum non-exception values to be represented.
601 uint64 v_min = kException, v_max = 0;
602 for (auto v : values) {
603 if (v != kException) {
604 v_min = min(v_min, v);
605 v_max = max(v_max, v);
606 }
607 }
608 if (v_min == kException) return 0;
609
610 // Generally "base" is chosen as the bit prefix shared by v_min and v_max.
611 // However there are a few adjustments we need to make.
612 //
613 // 1. Encodings are usually smaller if the bits represented by "base" and
614 // "delta" do not overlap. Usually the shared prefix rule does this
615 // automatically, but if v_min == v_max or there are special circumstances
616 // that increase delta_bits (such as values.size() == 1) then we need to
617 // make an adjustment.
618 //
619 // 2. The format only allows us to represent up to 7 bytes (56 bits) of
620 // "base", so we need to ensure that "base" conforms to this requirement.
621 int min_delta_bits = (have_exceptions || values.size() == 1) ? 8 : 4;
622 int excluded_bits = max(Bits::Log2Floor64(v_min ^ v_max) + 1,
623 max(min_delta_bits, BaseShift(level, 56)));
624 uint64 base = v_min & ~BitMask(excluded_bits);
625
626 // Determine how many bytes are needed to represent this prefix.
627 if (base == 0) {
628 *base_bits = 0;
629 } else {
630 int low_bit = Bits::FindLSBSetNonZero64(base);
631 *base_bits = (MaxBitsForLevel(level) - low_bit + 7) & ~7;
632 }
633
634 // Since base_bits has been rounded up to a multiple of 8, we may now be
635 // able to represent additional bits of v_min. In general this reduces the
636 // final encoded size.
637 //
638 // NOTE(ericv): A different strategy for choosing "base" is to encode all
639 // blocks under the assumption that "base" equals v_min exactly, and then
640 // set base equal to the minimum-length prefix of "v_min" that allows these
641 // encodings to be used. This strategy reduces the encoded sizes by
642 // about 0.2% relative to the strategy here, but is more complicated.
643 return v_min & ~BitMask(BaseShift(level, *base_bits));
644 }
645
646 // Returns true if the range of values [d_min, d_max] can be encoded using the
647 // specified parameters (delta_bits, overlap_bits, and have_exceptions).
CanEncode(uint64 d_min,uint64 d_max,int delta_bits,int overlap_bits,bool have_exceptions)648 bool CanEncode(uint64 d_min, uint64 d_max, int delta_bits,
649 int overlap_bits, bool have_exceptions) {
650 // "offset" can't represent the lowest (delta_bits - overlap_bits) of d_min.
651 d_min &= ~BitMask(delta_bits - overlap_bits);
652
653 // The maximum delta is reduced by kBlockSize if any exceptions exist, since
654 // deltas 0..kBlockSize-1 are used to indicate exceptions.
655 uint64 max_delta = BitMask(delta_bits);
656 if (have_exceptions) {
657 if (max_delta < kBlockSize) return false;
658 max_delta -= kBlockSize;
659 }
660 // The first test below is necessary to avoid 64-bit overflow.
661 return (d_min > ~max_delta) || (d_min + max_delta >= d_max);
662 }
663
664 // Given a vector of 64-bit values to be encoded and an S2CellId level, returns
665 // the optimal encoding parameters that should be used to encode each block.
666 // Also returns the global minimum value "base" and the number of bits that
667 // should be used to encode it ("base_bits").
GetBlockCode(Span<const uint64> values,uint64 base,bool have_exceptions)668 BlockCode GetBlockCode(Span<const uint64> values, uint64 base,
669 bool have_exceptions) {
670 // "b_min" and "b_max"n are the minimum and maximum values within this block.
671 uint64 b_min = kException, b_max = 0;
672 for (uint64 v : values) {
673 if (v != kException) {
674 b_min = min(b_min, v);
675 b_max = max(b_max, v);
676 }
677 }
678 if (b_min == kException) {
679 // All values in this block are exceptions.
680 return BlockCode{4, 0, 0};
681 }
682
683 // Adjust the min/max values so that they are relative to "base".
684 b_min -= base;
685 b_max -= base;
686
687 // Determine the minimum possible delta length and overlap that can be used
688 // to encode this block. The block will usually be encodable using the
689 // number of bits in (b_max - b_min) rounded up to a multiple of 4. If this
690 // is not possible, the preferred solution is to shift "offset" so that the
691 // delta and offset values overlap by 4 bits (since this only costs an
692 // average of 4 extra bits per block). Otherwise we increase the delta size
693 // by 4 bits. Certain cases require that both of these techniques are used.
694 //
695 // Example 1: b_min = 0x72, b_max = 0x7e. The range is 0x0c. This can be
696 // encoded using delta_bits = 4 and overlap_bits = 0, which allows us to
697 // represent an offset of 0x70 and a maximum delta of 0x0f, so that we can
698 // encode values up to 0x7f.
699 //
700 // Example 2: b_min = 0x78, b_max = 0x84. The range is 0x0c, but in this
701 // case it is not sufficient to use delta_bits = 4 and overlap_bits = 0
702 // because we can again only represent an offset of 0x70, so the maximum
703 // delta of 0x0f only lets us encode values up to 0x7f. However if we
704 // increase the overlap to 4 bits then we can represent an offset of 0x78,
705 // which lets us encode values up to 0x78 + 0x0f = 0x87.
706 //
707 // Example 3: b_min = 0x08, b_max = 0x104. The range is 0xfc, so we should
708 // be able to use 8-bit deltas. But even with a 4-bit overlap, we can still
709 // only encode offset = 0 and a maximum value of 0xff. (We don't allow
710 // bigger overlaps because statistically they are not worthwhile.) Instead
711 // we increase the delta size to 12 bits, which handles this case easily.
712 //
713 // Example 4: b_min = 0xf08, b_max = 0x1004. The range is 0xfc, so we
714 // should be able to use 8-bit deltas. With 8-bit deltas and no overlap, we
715 // have offset = 0xf00 and a maximum encodable value of 0xfff. With 8-bit
716 // deltas and a 4-bit overlap, we still have offset = 0xf00 and a maximum
717 // encodable value of 0xfff. Even with 12-bit deltas, we have offset = 0
718 // and we can still only represent 0xfff. However with delta_bits = 12 and
719 // overlap_bits = 4, we can represent offset = 0xf00 and a maximum encodable
720 // value of 0xf00 + 0xfff = 0x1eff.
721 //
722 // It is possible to show that this last example is the worst case, i.e. we
723 // do not need to consider increasing delta_bits or overlap_bits further.
724 int delta_bits = (max(1, Bits::Log2Floor64(b_max - b_min)) + 3) & ~3;
725 int overlap_bits = 0;
726 if (!CanEncode(b_min, b_max, delta_bits, 0, have_exceptions)) {
727 if (CanEncode(b_min, b_max, delta_bits, 4, have_exceptions)) {
728 overlap_bits = 4;
729 } else {
730 S2_DCHECK_LE(delta_bits, 60);
731 delta_bits += 4;
732 if (!CanEncode(b_min, b_max, delta_bits, 0, have_exceptions)) {
733 S2_DCHECK(CanEncode(b_min, b_max, delta_bits, 4, have_exceptions));
734 overlap_bits = 4;
735 }
736 }
737 }
738
739 // Avoid wasting 4 bits of delta when the block size is 1. This reduces the
740 // encoding size for single leaf cells by one byte.
741 if (values.size() == 1) {
742 S2_DCHECK(delta_bits == 4 && overlap_bits == 0);
743 delta_bits = 8;
744 }
745
746 // Now determine the number of bytes needed to encode "offset", given the
747 // chosen delta length.
748 uint64 max_delta = BitMask(delta_bits) - (have_exceptions ? kBlockSize : 0);
749 int offset_bits = 0;
750 if (b_max > max_delta) {
751 // At least one byte of offset is required. Round up the minimum offset
752 // to the next encodable value, and determine how many bits it has.
753 int offset_shift = delta_bits - overlap_bits;
754 uint64 mask = BitMask(offset_shift);
755 uint64 min_offset = (b_max - max_delta + mask) & ~mask;
756 S2_DCHECK_GT(min_offset, 0);
757 offset_bits =
758 (Bits::FindMSBSetNonZero64(min_offset) + 1 - offset_shift + 7) & ~7;
759 // A 64-bit offset can only be encoded with an overlap of 4 bits.
760 if (offset_bits == 64) overlap_bits = 4;
761 }
762 return BlockCode{delta_bits, offset_bits, overlap_bits};
763 }
764
InitCellIdsFormat(Decoder * decoder)765 bool EncodedS2PointVector::InitCellIdsFormat(Decoder* decoder) {
766 // This function inverts the encodings documented above.
767 // First we decode the two-byte header.
768 if (decoder->avail() < 2) return false;
769 uint8 header1 = decoder->get8();
770 uint8 header2 = decoder->get8();
771 S2_DCHECK_EQ(header1 & 7, CELL_IDS);
772 int last_block_count, base_bytes;
773 cell_ids_.have_exceptions = (header1 & 8) != 0;
774 last_block_count = (header1 >> 4) + 1;
775 base_bytes = header2 & 7;
776 cell_ids_.level = header2 >> 3;
777
778 // Decode the base value (if any).
779 uint64 base;
780 if (!DecodeUintWithLength(base_bytes, decoder, &base)) return false;
781 cell_ids_.base = base << BaseShift(cell_ids_.level, base_bytes << 3);
782
783 // Initialize the vector of encoded blocks.
784 if (!cell_ids_.blocks.Init(decoder)) return false;
785 size_ = kBlockSize * (cell_ids_.blocks.size() - 1) + last_block_count;
786 return true;
787 }
788
DecodeCellIdsFormat(int i) const789 S2Point EncodedS2PointVector::DecodeCellIdsFormat(int i) const {
790 // This function inverts the encodings documented above.
791
792 // First we decode the block header.
793 const char* ptr = cell_ids_.blocks.GetStart(i >> kBlockShift);
794 uint8 header = *ptr++;
795 int overlap_nibbles = (header >> 3) & 1;
796 int offset_bytes = (header & 7) + overlap_nibbles;
797 int delta_nibbles = (header >> 4) + 1;
798
799 // Decode the offset for this block.
800 int offset_shift = (delta_nibbles - overlap_nibbles) << 2;
801 uint64 offset = GetUintWithLength<uint64>(ptr, offset_bytes) << offset_shift;
802 ptr += offset_bytes;
803
804 // Decode the delta for the requested value.
805 int delta_nibble_offset = (i & (kBlockSize - 1)) * delta_nibbles;
806 int delta_bytes = (delta_nibbles + 1) >> 1;
807 const char* delta_ptr = ptr + (delta_nibble_offset >> 1);
808 uint64 delta = GetUintWithLength<uint64>(delta_ptr, delta_bytes);
809 delta >>= (delta_nibble_offset & 1) << 2;
810 delta &= BitMask(delta_nibbles << 2);
811
812 // Test whether this point is encoded as an exception.
813 if (cell_ids_.have_exceptions) {
814 if (delta < kBlockSize) {
815 int block_size = min(kBlockSize, size_ - (i & ~(kBlockSize - 1)));
816 ptr += (block_size * delta_nibbles + 1) >> 1;
817 ptr += delta * sizeof(S2Point);
818 return *reinterpret_cast<const S2Point*>(ptr);
819 }
820 delta -= kBlockSize;
821 }
822
823 // Otherwise convert the 64-bit value back to an S2Point.
824 uint64 value = cell_ids_.base + offset + delta;
825 int shift = S2CellId::kMaxLevel - cell_ids_.level;
826
827 // The S2CellId version of the following code is:
828 // return S2CellId(((value << 1) | 1) << (2 * shift)).ToPoint();
829 uint32 sj, tj;
830 DeinterleaveUint32BitPairs(value, &sj, &tj);
831 int si = (((sj << 1) | 1) << shift) & 0x7fffffff;
832 int ti = (((tj << 1) | 1) << shift) & 0x7fffffff;
833 int face = ((sj << shift) >> 30) | (((tj << (shift + 1)) >> 29) & 4);
834 return S2::FaceUVtoXYZ(face, S2::STtoUV(S2::SiTitoST(si)),
835 S2::STtoUV(S2::SiTitoST(ti))).Normalize();
836 }
837
838 } // namespace s2coding
839