1 // Copyright 2018 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS-IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15
16 // Author: ericv@google.com (Eric Veach)
17
18 #include "s2/encoded_s2cell_id_vector.h"
19
20 using absl::Span;
21 using std::max;
22 using std::min;
23 using std::vector;
24
25 namespace s2coding {
26
EncodeS2CellIdVector(Span<const S2CellId> v,Encoder * encoder)27 void EncodeS2CellIdVector(Span<const S2CellId> v, Encoder* encoder) {
28 // v[i] is encoded as (base + (deltas[i] << shift)).
29 //
30 // "base" consists of 0-7 bytes, and is always shifted so that its bytes are
31 // the most-significant bytes of a uint64.
32 //
33 // "deltas" is an EncodedUintVector<uint64>, which means that all deltas
34 // have a fixed-length encoding determined by the largest delta.
35 //
36 // "shift" is in the range 0..56. The shift value is odd only if all
37 // S2CellIds are at the same level, in which case the bit at position
38 // (shift - 1) is automatically set to 1 in "base".
39 //
40 // "base" (3 bits) and "shift" (6 bits) are encoded in either one or two
41 // bytes as follows:
42 //
43 // - if (shift <= 4 or shift is even), then 1 byte
44 // - otherwise 2 bytes
45 //
46 // Note that (shift == 1) means that all S2CellIds are leaf cells, and
47 // (shift == 2) means that all S2CellIds are at level 29.
48 //
49 // The full encoded format is as follows:
50 //
51 // Byte 0, bits 0-2: base length (0-7 bytes)
52 // Byte 0, bits 3-7: encoded shift (see below)
53 // Byte 1: extended shift code (only needed for odd shifts >= 5)
54 // Followed by 0-7 bytes of "base"
55 // Followed by an EncodedUintVector of deltas.
56
57 uint64 v_or = 0, v_and = ~0ULL, v_min = ~0ULL, v_max = 0;
58 for (auto cellid : v) {
59 v_or |= cellid.id();
60 v_and &= cellid.id();
61 v_min = min(v_min, cellid.id());
62 v_max = max(v_max, cellid.id());
63 }
64 // These variables represent the values that will used during encoding.
65 uint64 e_base = 0; // Base value.
66 int e_base_len = 0; // Number of bytes to represent "base".
67 int e_shift = 0; // Delta shift.
68 int e_max_delta_msb = 0; // Bit position of the MSB of the largest delta.
69 if (v_or > 0) {
70 // We only allow even shifts, unless all values have the same low bit (in
71 // which case the shift is odd and the preceding bit is implicitly on).
72 // There is no point in allowing shifts > 56 since deltas are encoded in
73 // at least 1 byte each.
74 e_shift = min(56, Bits::FindLSBSetNonZero64(v_or) & ~1);
75 if (v_and & (1ULL << e_shift)) ++e_shift; // All S2CellIds same level.
76
77 // "base" consists of the "base_len" most significant bytes of the minimum
78 // S2CellId. We consider all possible values of "base_len" (0..7) and
79 // choose the one that minimizes the total encoding size.
80 uint64 e_bytes = ~0ULL; // Best encoding size so far.
81 for (int len = 0; len < 8; ++len) {
82 // "t_base" is the base value being tested (first "len" bytes of v_min).
83 // "t_max_delta_msb" is the most-significant bit position of the largest
84 // delta (or zero if there are no deltas, i.e. if v.size() == 0).
85 // "t_bytes" is the total size of the variable portion of the encoding.
86 uint64 t_base = v_min & ~(~0ULL >> (8 * len));
87 int t_max_delta_msb =
88 max(0, Bits::Log2Floor64((v_max - t_base) >> e_shift));
89 uint64 t_bytes = len + v.size() * ((t_max_delta_msb >> 3) + 1);
90 if (t_bytes < e_bytes) {
91 e_base = t_base;
92 e_base_len = len;
93 e_max_delta_msb = t_max_delta_msb;
94 e_bytes = t_bytes;
95 }
96 }
97 // It takes one extra byte to encode odd shifts (i.e., the case where all
98 // S2CellIds are at the same level), so check whether we can get the same
99 // encoding size per delta using an even shift.
100 if ((e_shift & 1) && (e_max_delta_msb & 7) != 7) --e_shift;
101 }
102 S2_DCHECK_LE(e_base_len, 7);
103 S2_DCHECK_LE(e_shift, 56);
104 encoder->Ensure(2 + e_base_len);
105
106 // As described above, "shift" and "base_len" are encoded in 1 or 2 bytes.
107 // "shift_code" is 5 bits:
108 // values <= 28 represent even shifts in the range 0..56
109 // values 29, 30 represent odd shifts 1 and 3
110 // value 31 indicates that the shift is odd and encoded in the next byte
111 int shift_code = e_shift >> 1;
112 if (e_shift & 1) shift_code = min(31, shift_code + 29);
113 encoder->put8((shift_code << 3) | e_base_len);
114 if (shift_code == 31) {
115 encoder->put8(e_shift >> 1); // Shift is always odd, so 3 bits unused.
116 }
117 // Encode the "base_len" most-significant bytes of "base".
118 uint64 base_bytes = e_base >> (64 - 8 * max(1, e_base_len));
119 EncodeUintWithLength<uint64>(base_bytes, e_base_len, encoder);
120
121 // Finally, encode the vector of deltas.
122 vector<uint64> deltas;
123 deltas.reserve(v.size());
124 for (auto cellid : v) {
125 deltas.push_back((cellid.id() - e_base) >> e_shift);
126 }
127 EncodeUintVector<uint64>(deltas, encoder);
128 }
129
Init(Decoder * decoder)130 bool EncodedS2CellIdVector::Init(Decoder* decoder) {
131 // All encodings have at least 2 bytes (one for our header and one for the
132 // EncodedUintVector header), so this is safe.
133 if (decoder->avail() < 2) return false;
134
135 // Invert the encoding of (shift_code, base_len) described above.
136 int code_plus_len = decoder->get8();
137 int shift_code = code_plus_len >> 3;
138 if (shift_code == 31) {
139 shift_code = 29 + decoder->get8();
140 }
141 // Decode the "base_len" most-significant bytes of "base".
142 int base_len = code_plus_len & 7;
143 if (!DecodeUintWithLength(base_len, decoder, &base_)) return false;
144 base_ <<= 64 - 8 * max(1, base_len);
145
146 // Invert the encoding of "shift_code" described above.
147 if (shift_code >= 29) {
148 shift_ = 2 * (shift_code - 29) + 1;
149 base_ |= 1ULL << (shift_ - 1);
150 } else {
151 shift_ = 2 * shift_code;
152 }
153 return deltas_.Init(decoder);
154 }
155
Decode() const156 vector<S2CellId> EncodedS2CellIdVector::Decode() const {
157 vector<S2CellId> result(size());
158 for (int i = 0; i < size(); ++i) {
159 result[i] = (*this)[i];
160 }
161 return result;
162 }
163
164 } // namespace s2coding
165