1 // Copyright 2018 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS-IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 
16 // Author: ericv@google.com (Eric Veach)
17 
18 #include "s2/encoded_s2cell_id_vector.h"
19 
20 using absl::Span;
21 using std::max;
22 using std::min;
23 using std::vector;
24 
25 namespace s2coding {
26 
EncodeS2CellIdVector(Span<const S2CellId> v,Encoder * encoder)27 void EncodeS2CellIdVector(Span<const S2CellId> v, Encoder* encoder) {
28   // v[i] is encoded as (base + (deltas[i] << shift)).
29   //
30   // "base" consists of 0-7 bytes, and is always shifted so that its bytes are
31   // the most-significant bytes of a uint64.
32   //
33   // "deltas" is an EncodedUintVector<uint64>, which means that all deltas
34   // have a fixed-length encoding determined by the largest delta.
35   //
36   // "shift" is in the range 0..56.  The shift value is odd only if all
37   // S2CellIds are at the same level, in which case the bit at position
38   // (shift - 1) is automatically set to 1 in "base".
39   //
40   // "base" (3 bits) and "shift" (6 bits) are encoded in either one or two
41   // bytes as follows:
42   //
43   //   - if (shift <= 4 or shift is even), then 1 byte
44   //   - otherwise 2 bytes
45   //
46   // Note that (shift == 1) means that all S2CellIds are leaf cells, and
47   // (shift == 2) means that all S2CellIds are at level 29.
48   //
49   // The full encoded format is as follows:
50   //
51   //  Byte 0, bits 0-2: base length (0-7 bytes)
52   //  Byte 0, bits 3-7: encoded shift (see below)
53   //  Byte 1: extended shift code (only needed for odd shifts >= 5)
54   //  Followed by 0-7 bytes of "base"
55   //  Followed by an EncodedUintVector of deltas.
56 
57   uint64 v_or = 0, v_and = ~0ULL, v_min = ~0ULL, v_max = 0;
58   for (auto cellid : v) {
59     v_or |= cellid.id();
60     v_and &= cellid.id();
61     v_min = min(v_min, cellid.id());
62     v_max = max(v_max, cellid.id());
63   }
64   // These variables represent the values that will used during encoding.
65   uint64 e_base = 0;        // Base value.
66   int e_base_len = 0;       // Number of bytes to represent "base".
67   int e_shift = 0;          // Delta shift.
68   int e_max_delta_msb = 0;  // Bit position of the MSB of the largest delta.
69   if (v_or > 0) {
70     // We only allow even shifts, unless all values have the same low bit (in
71     // which case the shift is odd and the preceding bit is implicitly on).
72     // There is no point in allowing shifts > 56 since deltas are encoded in
73     // at least 1 byte each.
74     e_shift = min(56, Bits::FindLSBSetNonZero64(v_or) & ~1);
75     if (v_and & (1ULL << e_shift)) ++e_shift;  // All S2CellIds same level.
76 
77     // "base" consists of the "base_len" most significant bytes of the minimum
78     // S2CellId.  We consider all possible values of "base_len" (0..7) and
79     // choose the one that minimizes the total encoding size.
80     uint64 e_bytes = ~0ULL;  // Best encoding size so far.
81     for (int len = 0; len < 8; ++len) {
82       // "t_base" is the base value being tested (first "len" bytes of v_min).
83       // "t_max_delta_msb" is the most-significant bit position of the largest
84       // delta (or zero if there are no deltas, i.e. if v.size() == 0).
85       // "t_bytes" is the total size of the variable portion of the encoding.
86       uint64 t_base = v_min & ~(~0ULL >> (8 * len));
87       int t_max_delta_msb =
88           max(0, Bits::Log2Floor64((v_max - t_base) >> e_shift));
89       uint64 t_bytes = len + v.size() * ((t_max_delta_msb >> 3) + 1);
90       if (t_bytes < e_bytes) {
91         e_base = t_base;
92         e_base_len = len;
93         e_max_delta_msb = t_max_delta_msb;
94         e_bytes = t_bytes;
95       }
96     }
97     // It takes one extra byte to encode odd shifts (i.e., the case where all
98     // S2CellIds are at the same level), so check whether we can get the same
99     // encoding size per delta using an even shift.
100     if ((e_shift & 1) && (e_max_delta_msb & 7) != 7) --e_shift;
101   }
102   S2_DCHECK_LE(e_base_len, 7);
103   S2_DCHECK_LE(e_shift, 56);
104   encoder->Ensure(2 + e_base_len);
105 
106   // As described above, "shift" and "base_len" are encoded in 1 or 2 bytes.
107   // "shift_code" is 5 bits:
108   //   values <= 28 represent even shifts in the range 0..56
109   //   values 29, 30 represent odd shifts 1 and 3
110   //   value 31 indicates that the shift is odd and encoded in the next byte
111   int shift_code = e_shift >> 1;
112   if (e_shift & 1) shift_code = min(31, shift_code + 29);
113   encoder->put8((shift_code << 3) | e_base_len);
114   if (shift_code == 31) {
115     encoder->put8(e_shift >> 1);  // Shift is always odd, so 3 bits unused.
116   }
117   // Encode the "base_len" most-significant bytes of "base".
118   uint64 base_bytes = e_base >> (64 - 8 * max(1, e_base_len));
119   EncodeUintWithLength<uint64>(base_bytes, e_base_len, encoder);
120 
121   // Finally, encode the vector of deltas.
122   vector<uint64> deltas;
123   deltas.reserve(v.size());
124   for (auto cellid : v) {
125     deltas.push_back((cellid.id() - e_base) >> e_shift);
126   }
127   EncodeUintVector<uint64>(deltas, encoder);
128 }
129 
Init(Decoder * decoder)130 bool EncodedS2CellIdVector::Init(Decoder* decoder) {
131   // All encodings have at least 2 bytes (one for our header and one for the
132   // EncodedUintVector header), so this is safe.
133   if (decoder->avail() < 2) return false;
134 
135   // Invert the encoding of (shift_code, base_len) described above.
136   int code_plus_len = decoder->get8();
137   int shift_code = code_plus_len >> 3;
138   if (shift_code == 31) {
139     shift_code = 29 + decoder->get8();
140   }
141   // Decode the "base_len" most-significant bytes of "base".
142   int base_len = code_plus_len & 7;
143   if (!DecodeUintWithLength(base_len, decoder, &base_)) return false;
144   base_ <<= 64 - 8 * max(1, base_len);
145 
146   // Invert the encoding of "shift_code" described above.
147   if (shift_code >= 29) {
148     shift_ = 2 * (shift_code - 29) + 1;
149     base_ |= 1ULL << (shift_ - 1);
150   } else {
151     shift_ = 2 * shift_code;
152   }
153   return deltas_.Init(decoder);
154 }
155 
Decode() const156 vector<S2CellId> EncodedS2CellIdVector::Decode() const {
157   vector<S2CellId> result(size());
158   for (int i = 0; i < size(); ++i) {
159     result[i] = (*this)[i];
160   }
161   return result;
162 }
163 
164 }  // namespace s2coding
165