1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <folly/GroupVarint.h>
18 
19 #include <folly/container/Array.h>
20 
21 #if FOLLY_HAVE_GROUP_VARINT
22 namespace folly {
23 
24 const uint32_t GroupVarint32::kMask[] = {
25     0xff,
26     0xffff,
27     0xffffff,
28     0xffffffff,
29 };
30 
31 const uint64_t GroupVarint64::kMask[] = {
32     0xff,
33     0xffff,
34     0xffffff,
35     0xffffffff,
36     0xffffffffffULL,
37     0xffffffffffffULL,
38     0xffffffffffffffULL,
39     0xffffffffffffffffULL,
40 };
41 
42 namespace detail {
43 
44 struct group_varint_table_base_make_item {
get_dfolly::detail::group_varint_table_base_make_item45   constexpr std::size_t get_d(std::size_t index, std::size_t j) const {
46     return 1u + ((index >> (2 * j)) & 3u);
47   }
get_offsetfolly::detail::group_varint_table_base_make_item48   constexpr std::size_t get_offset(std::size_t index, std::size_t j) const {
49     // clang-format off
50     return
51         (j > 0 ? get_d(index, 0) : 0) +
52         (j > 1 ? get_d(index, 1) : 0) +
53         (j > 2 ? get_d(index, 2) : 0) +
54         (j > 3 ? get_d(index, 3) : 0) +
55         0;
56     // clang-format on
57   }
58 };
59 
60 struct group_varint_table_length_make_item : group_varint_table_base_make_item {
operator ()folly::detail::group_varint_table_length_make_item61   constexpr std::uint8_t operator()(std::size_t index) const {
62     return 1u + get_offset(index, 4);
63   }
64 };
65 
66 //  Reference: http://www.stepanovpapers.com/CIKM_2011.pdf
67 //
68 //  From 17 encoded bytes, we may use between 5 and 17 bytes to encode 4
69 //  integers.  The first byte is a key that indicates how many bytes each of
70 //  the 4 integers takes:
71 //
72 //  bit 0..1: length-1 of first integer
73 //  bit 2..3: length-1 of second integer
74 //  bit 4..5: length-1 of third integer
75 //  bit 6..7: length-1 of fourth integer
76 //
77 //  The value of the first byte is used as the index in a table which returns
78 //  a mask value for the SSSE3 PSHUFB instruction, which takes an XMM register
79 //  (16 bytes) and shuffles bytes from it into a destination XMM register
80 //  (optionally setting some of them to 0)
81 //
82 //  For example, if the key has value 4, that means that the first integer
83 //  uses 1 byte, the second uses 2 bytes, the third and fourth use 1 byte each,
84 //  so we set the mask value so that
85 //
86 //  r[0] = a[0]
87 //  r[1] = 0
88 //  r[2] = 0
89 //  r[3] = 0
90 //
91 //  r[4] = a[1]
92 //  r[5] = a[2]
93 //  r[6] = 0
94 //  r[7] = 0
95 //
96 //  r[8] = a[3]
97 //  r[9] = 0
98 //  r[10] = 0
99 //  r[11] = 0
100 //
101 //  r[12] = a[4]
102 //  r[13] = 0
103 //  r[14] = 0
104 //  r[15] = 0
105 
106 struct group_varint_table_sse_mask_make_item
107     : group_varint_table_base_make_item {
partial_itemfolly::detail::group_varint_table_sse_mask_make_item108   constexpr auto partial_item(
109       std::size_t d, std::size_t offset, std::size_t k) const {
110     // if k < d, the j'th integer uses d bytes, consume them
111     // set remaining bytes in result to 0
112     // 0xff: set corresponding byte in result to 0
113     return std::uint32_t((k < d ? offset + k : std::size_t(0xff)) << (8 * k));
114   }
115 
item_implfolly::detail::group_varint_table_sse_mask_make_item116   constexpr auto item_impl(std::size_t d, std::size_t offset) const {
117     // clang-format off
118     return
119         partial_item(d, offset, 0) |
120         partial_item(d, offset, 1) |
121         partial_item(d, offset, 2) |
122         partial_item(d, offset, 3) |
123         0;
124     // clang-format on
125   }
126 
itemfolly::detail::group_varint_table_sse_mask_make_item127   constexpr auto item(std::size_t index, std::size_t j) const {
128     return item_impl(get_d(index, j), get_offset(index, j));
129   }
130 
operator ()folly::detail::group_varint_table_sse_mask_make_item131   constexpr auto operator()(std::size_t index) const {
132     return std::array<std::uint32_t, 4>{{
133         item(index, 0),
134         item(index, 1),
135         item(index, 2),
136         item(index, 3),
137     }};
138   }
139 };
140 
141 #if FOLLY_SSE >= 4
142 alignas(16) FOLLY_STORAGE_CONSTEXPR
143     decltype(groupVarintSSEMasks) groupVarintSSEMasks =
144         make_array_with<256>(group_varint_table_sse_mask_make_item{});
145 #endif
146 
147 FOLLY_STORAGE_CONSTEXPR decltype(groupVarintLengths) groupVarintLengths =
148     make_array_with<256>(group_varint_table_length_make_item{});
149 
150 } // namespace detail
151 
152 } // namespace folly
153 #endif
154