1 #include <stdint.h>
2 #include <immintrin.h>
3 #include <string.h>
4 #include "params.h"
5 #include "consts.h"
6 #include "rejsample.h"
7 
8 //#define BMI
9 
10 #ifndef BMI
11 static const uint8_t idx[256][8] = {
12   {-1, -1, -1, -1, -1, -1, -1, -1},
13   { 0, -1, -1, -1, -1, -1, -1, -1},
14   { 2, -1, -1, -1, -1, -1, -1, -1},
15   { 0,  2, -1, -1, -1, -1, -1, -1},
16   { 4, -1, -1, -1, -1, -1, -1, -1},
17   { 0,  4, -1, -1, -1, -1, -1, -1},
18   { 2,  4, -1, -1, -1, -1, -1, -1},
19   { 0,  2,  4, -1, -1, -1, -1, -1},
20   { 6, -1, -1, -1, -1, -1, -1, -1},
21   { 0,  6, -1, -1, -1, -1, -1, -1},
22   { 2,  6, -1, -1, -1, -1, -1, -1},
23   { 0,  2,  6, -1, -1, -1, -1, -1},
24   { 4,  6, -1, -1, -1, -1, -1, -1},
25   { 0,  4,  6, -1, -1, -1, -1, -1},
26   { 2,  4,  6, -1, -1, -1, -1, -1},
27   { 0,  2,  4,  6, -1, -1, -1, -1},
28   { 8, -1, -1, -1, -1, -1, -1, -1},
29   { 0,  8, -1, -1, -1, -1, -1, -1},
30   { 2,  8, -1, -1, -1, -1, -1, -1},
31   { 0,  2,  8, -1, -1, -1, -1, -1},
32   { 4,  8, -1, -1, -1, -1, -1, -1},
33   { 0,  4,  8, -1, -1, -1, -1, -1},
34   { 2,  4,  8, -1, -1, -1, -1, -1},
35   { 0,  2,  4,  8, -1, -1, -1, -1},
36   { 6,  8, -1, -1, -1, -1, -1, -1},
37   { 0,  6,  8, -1, -1, -1, -1, -1},
38   { 2,  6,  8, -1, -1, -1, -1, -1},
39   { 0,  2,  6,  8, -1, -1, -1, -1},
40   { 4,  6,  8, -1, -1, -1, -1, -1},
41   { 0,  4,  6,  8, -1, -1, -1, -1},
42   { 2,  4,  6,  8, -1, -1, -1, -1},
43   { 0,  2,  4,  6,  8, -1, -1, -1},
44   {10, -1, -1, -1, -1, -1, -1, -1},
45   { 0, 10, -1, -1, -1, -1, -1, -1},
46   { 2, 10, -1, -1, -1, -1, -1, -1},
47   { 0,  2, 10, -1, -1, -1, -1, -1},
48   { 4, 10, -1, -1, -1, -1, -1, -1},
49   { 0,  4, 10, -1, -1, -1, -1, -1},
50   { 2,  4, 10, -1, -1, -1, -1, -1},
51   { 0,  2,  4, 10, -1, -1, -1, -1},
52   { 6, 10, -1, -1, -1, -1, -1, -1},
53   { 0,  6, 10, -1, -1, -1, -1, -1},
54   { 2,  6, 10, -1, -1, -1, -1, -1},
55   { 0,  2,  6, 10, -1, -1, -1, -1},
56   { 4,  6, 10, -1, -1, -1, -1, -1},
57   { 0,  4,  6, 10, -1, -1, -1, -1},
58   { 2,  4,  6, 10, -1, -1, -1, -1},
59   { 0,  2,  4,  6, 10, -1, -1, -1},
60   { 8, 10, -1, -1, -1, -1, -1, -1},
61   { 0,  8, 10, -1, -1, -1, -1, -1},
62   { 2,  8, 10, -1, -1, -1, -1, -1},
63   { 0,  2,  8, 10, -1, -1, -1, -1},
64   { 4,  8, 10, -1, -1, -1, -1, -1},
65   { 0,  4,  8, 10, -1, -1, -1, -1},
66   { 2,  4,  8, 10, -1, -1, -1, -1},
67   { 0,  2,  4,  8, 10, -1, -1, -1},
68   { 6,  8, 10, -1, -1, -1, -1, -1},
69   { 0,  6,  8, 10, -1, -1, -1, -1},
70   { 2,  6,  8, 10, -1, -1, -1, -1},
71   { 0,  2,  6,  8, 10, -1, -1, -1},
72   { 4,  6,  8, 10, -1, -1, -1, -1},
73   { 0,  4,  6,  8, 10, -1, -1, -1},
74   { 2,  4,  6,  8, 10, -1, -1, -1},
75   { 0,  2,  4,  6,  8, 10, -1, -1},
76   {12, -1, -1, -1, -1, -1, -1, -1},
77   { 0, 12, -1, -1, -1, -1, -1, -1},
78   { 2, 12, -1, -1, -1, -1, -1, -1},
79   { 0,  2, 12, -1, -1, -1, -1, -1},
80   { 4, 12, -1, -1, -1, -1, -1, -1},
81   { 0,  4, 12, -1, -1, -1, -1, -1},
82   { 2,  4, 12, -1, -1, -1, -1, -1},
83   { 0,  2,  4, 12, -1, -1, -1, -1},
84   { 6, 12, -1, -1, -1, -1, -1, -1},
85   { 0,  6, 12, -1, -1, -1, -1, -1},
86   { 2,  6, 12, -1, -1, -1, -1, -1},
87   { 0,  2,  6, 12, -1, -1, -1, -1},
88   { 4,  6, 12, -1, -1, -1, -1, -1},
89   { 0,  4,  6, 12, -1, -1, -1, -1},
90   { 2,  4,  6, 12, -1, -1, -1, -1},
91   { 0,  2,  4,  6, 12, -1, -1, -1},
92   { 8, 12, -1, -1, -1, -1, -1, -1},
93   { 0,  8, 12, -1, -1, -1, -1, -1},
94   { 2,  8, 12, -1, -1, -1, -1, -1},
95   { 0,  2,  8, 12, -1, -1, -1, -1},
96   { 4,  8, 12, -1, -1, -1, -1, -1},
97   { 0,  4,  8, 12, -1, -1, -1, -1},
98   { 2,  4,  8, 12, -1, -1, -1, -1},
99   { 0,  2,  4,  8, 12, -1, -1, -1},
100   { 6,  8, 12, -1, -1, -1, -1, -1},
101   { 0,  6,  8, 12, -1, -1, -1, -1},
102   { 2,  6,  8, 12, -1, -1, -1, -1},
103   { 0,  2,  6,  8, 12, -1, -1, -1},
104   { 4,  6,  8, 12, -1, -1, -1, -1},
105   { 0,  4,  6,  8, 12, -1, -1, -1},
106   { 2,  4,  6,  8, 12, -1, -1, -1},
107   { 0,  2,  4,  6,  8, 12, -1, -1},
108   {10, 12, -1, -1, -1, -1, -1, -1},
109   { 0, 10, 12, -1, -1, -1, -1, -1},
110   { 2, 10, 12, -1, -1, -1, -1, -1},
111   { 0,  2, 10, 12, -1, -1, -1, -1},
112   { 4, 10, 12, -1, -1, -1, -1, -1},
113   { 0,  4, 10, 12, -1, -1, -1, -1},
114   { 2,  4, 10, 12, -1, -1, -1, -1},
115   { 0,  2,  4, 10, 12, -1, -1, -1},
116   { 6, 10, 12, -1, -1, -1, -1, -1},
117   { 0,  6, 10, 12, -1, -1, -1, -1},
118   { 2,  6, 10, 12, -1, -1, -1, -1},
119   { 0,  2,  6, 10, 12, -1, -1, -1},
120   { 4,  6, 10, 12, -1, -1, -1, -1},
121   { 0,  4,  6, 10, 12, -1, -1, -1},
122   { 2,  4,  6, 10, 12, -1, -1, -1},
123   { 0,  2,  4,  6, 10, 12, -1, -1},
124   { 8, 10, 12, -1, -1, -1, -1, -1},
125   { 0,  8, 10, 12, -1, -1, -1, -1},
126   { 2,  8, 10, 12, -1, -1, -1, -1},
127   { 0,  2,  8, 10, 12, -1, -1, -1},
128   { 4,  8, 10, 12, -1, -1, -1, -1},
129   { 0,  4,  8, 10, 12, -1, -1, -1},
130   { 2,  4,  8, 10, 12, -1, -1, -1},
131   { 0,  2,  4,  8, 10, 12, -1, -1},
132   { 6,  8, 10, 12, -1, -1, -1, -1},
133   { 0,  6,  8, 10, 12, -1, -1, -1},
134   { 2,  6,  8, 10, 12, -1, -1, -1},
135   { 0,  2,  6,  8, 10, 12, -1, -1},
136   { 4,  6,  8, 10, 12, -1, -1, -1},
137   { 0,  4,  6,  8, 10, 12, -1, -1},
138   { 2,  4,  6,  8, 10, 12, -1, -1},
139   { 0,  2,  4,  6,  8, 10, 12, -1},
140   {14, -1, -1, -1, -1, -1, -1, -1},
141   { 0, 14, -1, -1, -1, -1, -1, -1},
142   { 2, 14, -1, -1, -1, -1, -1, -1},
143   { 0,  2, 14, -1, -1, -1, -1, -1},
144   { 4, 14, -1, -1, -1, -1, -1, -1},
145   { 0,  4, 14, -1, -1, -1, -1, -1},
146   { 2,  4, 14, -1, -1, -1, -1, -1},
147   { 0,  2,  4, 14, -1, -1, -1, -1},
148   { 6, 14, -1, -1, -1, -1, -1, -1},
149   { 0,  6, 14, -1, -1, -1, -1, -1},
150   { 2,  6, 14, -1, -1, -1, -1, -1},
151   { 0,  2,  6, 14, -1, -1, -1, -1},
152   { 4,  6, 14, -1, -1, -1, -1, -1},
153   { 0,  4,  6, 14, -1, -1, -1, -1},
154   { 2,  4,  6, 14, -1, -1, -1, -1},
155   { 0,  2,  4,  6, 14, -1, -1, -1},
156   { 8, 14, -1, -1, -1, -1, -1, -1},
157   { 0,  8, 14, -1, -1, -1, -1, -1},
158   { 2,  8, 14, -1, -1, -1, -1, -1},
159   { 0,  2,  8, 14, -1, -1, -1, -1},
160   { 4,  8, 14, -1, -1, -1, -1, -1},
161   { 0,  4,  8, 14, -1, -1, -1, -1},
162   { 2,  4,  8, 14, -1, -1, -1, -1},
163   { 0,  2,  4,  8, 14, -1, -1, -1},
164   { 6,  8, 14, -1, -1, -1, -1, -1},
165   { 0,  6,  8, 14, -1, -1, -1, -1},
166   { 2,  6,  8, 14, -1, -1, -1, -1},
167   { 0,  2,  6,  8, 14, -1, -1, -1},
168   { 4,  6,  8, 14, -1, -1, -1, -1},
169   { 0,  4,  6,  8, 14, -1, -1, -1},
170   { 2,  4,  6,  8, 14, -1, -1, -1},
171   { 0,  2,  4,  6,  8, 14, -1, -1},
172   {10, 14, -1, -1, -1, -1, -1, -1},
173   { 0, 10, 14, -1, -1, -1, -1, -1},
174   { 2, 10, 14, -1, -1, -1, -1, -1},
175   { 0,  2, 10, 14, -1, -1, -1, -1},
176   { 4, 10, 14, -1, -1, -1, -1, -1},
177   { 0,  4, 10, 14, -1, -1, -1, -1},
178   { 2,  4, 10, 14, -1, -1, -1, -1},
179   { 0,  2,  4, 10, 14, -1, -1, -1},
180   { 6, 10, 14, -1, -1, -1, -1, -1},
181   { 0,  6, 10, 14, -1, -1, -1, -1},
182   { 2,  6, 10, 14, -1, -1, -1, -1},
183   { 0,  2,  6, 10, 14, -1, -1, -1},
184   { 4,  6, 10, 14, -1, -1, -1, -1},
185   { 0,  4,  6, 10, 14, -1, -1, -1},
186   { 2,  4,  6, 10, 14, -1, -1, -1},
187   { 0,  2,  4,  6, 10, 14, -1, -1},
188   { 8, 10, 14, -1, -1, -1, -1, -1},
189   { 0,  8, 10, 14, -1, -1, -1, -1},
190   { 2,  8, 10, 14, -1, -1, -1, -1},
191   { 0,  2,  8, 10, 14, -1, -1, -1},
192   { 4,  8, 10, 14, -1, -1, -1, -1},
193   { 0,  4,  8, 10, 14, -1, -1, -1},
194   { 2,  4,  8, 10, 14, -1, -1, -1},
195   { 0,  2,  4,  8, 10, 14, -1, -1},
196   { 6,  8, 10, 14, -1, -1, -1, -1},
197   { 0,  6,  8, 10, 14, -1, -1, -1},
198   { 2,  6,  8, 10, 14, -1, -1, -1},
199   { 0,  2,  6,  8, 10, 14, -1, -1},
200   { 4,  6,  8, 10, 14, -1, -1, -1},
201   { 0,  4,  6,  8, 10, 14, -1, -1},
202   { 2,  4,  6,  8, 10, 14, -1, -1},
203   { 0,  2,  4,  6,  8, 10, 14, -1},
204   {12, 14, -1, -1, -1, -1, -1, -1},
205   { 0, 12, 14, -1, -1, -1, -1, -1},
206   { 2, 12, 14, -1, -1, -1, -1, -1},
207   { 0,  2, 12, 14, -1, -1, -1, -1},
208   { 4, 12, 14, -1, -1, -1, -1, -1},
209   { 0,  4, 12, 14, -1, -1, -1, -1},
210   { 2,  4, 12, 14, -1, -1, -1, -1},
211   { 0,  2,  4, 12, 14, -1, -1, -1},
212   { 6, 12, 14, -1, -1, -1, -1, -1},
213   { 0,  6, 12, 14, -1, -1, -1, -1},
214   { 2,  6, 12, 14, -1, -1, -1, -1},
215   { 0,  2,  6, 12, 14, -1, -1, -1},
216   { 4,  6, 12, 14, -1, -1, -1, -1},
217   { 0,  4,  6, 12, 14, -1, -1, -1},
218   { 2,  4,  6, 12, 14, -1, -1, -1},
219   { 0,  2,  4,  6, 12, 14, -1, -1},
220   { 8, 12, 14, -1, -1, -1, -1, -1},
221   { 0,  8, 12, 14, -1, -1, -1, -1},
222   { 2,  8, 12, 14, -1, -1, -1, -1},
223   { 0,  2,  8, 12, 14, -1, -1, -1},
224   { 4,  8, 12, 14, -1, -1, -1, -1},
225   { 0,  4,  8, 12, 14, -1, -1, -1},
226   { 2,  4,  8, 12, 14, -1, -1, -1},
227   { 0,  2,  4,  8, 12, 14, -1, -1},
228   { 6,  8, 12, 14, -1, -1, -1, -1},
229   { 0,  6,  8, 12, 14, -1, -1, -1},
230   { 2,  6,  8, 12, 14, -1, -1, -1},
231   { 0,  2,  6,  8, 12, 14, -1, -1},
232   { 4,  6,  8, 12, 14, -1, -1, -1},
233   { 0,  4,  6,  8, 12, 14, -1, -1},
234   { 2,  4,  6,  8, 12, 14, -1, -1},
235   { 0,  2,  4,  6,  8, 12, 14, -1},
236   {10, 12, 14, -1, -1, -1, -1, -1},
237   { 0, 10, 12, 14, -1, -1, -1, -1},
238   { 2, 10, 12, 14, -1, -1, -1, -1},
239   { 0,  2, 10, 12, 14, -1, -1, -1},
240   { 4, 10, 12, 14, -1, -1, -1, -1},
241   { 0,  4, 10, 12, 14, -1, -1, -1},
242   { 2,  4, 10, 12, 14, -1, -1, -1},
243   { 0,  2,  4, 10, 12, 14, -1, -1},
244   { 6, 10, 12, 14, -1, -1, -1, -1},
245   { 0,  6, 10, 12, 14, -1, -1, -1},
246   { 2,  6, 10, 12, 14, -1, -1, -1},
247   { 0,  2,  6, 10, 12, 14, -1, -1},
248   { 4,  6, 10, 12, 14, -1, -1, -1},
249   { 0,  4,  6, 10, 12, 14, -1, -1},
250   { 2,  4,  6, 10, 12, 14, -1, -1},
251   { 0,  2,  4,  6, 10, 12, 14, -1},
252   { 8, 10, 12, 14, -1, -1, -1, -1},
253   { 0,  8, 10, 12, 14, -1, -1, -1},
254   { 2,  8, 10, 12, 14, -1, -1, -1},
255   { 0,  2,  8, 10, 12, 14, -1, -1},
256   { 4,  8, 10, 12, 14, -1, -1, -1},
257   { 0,  4,  8, 10, 12, 14, -1, -1},
258   { 2,  4,  8, 10, 12, 14, -1, -1},
259   { 0,  2,  4,  8, 10, 12, 14, -1},
260   { 6,  8, 10, 12, 14, -1, -1, -1},
261   { 0,  6,  8, 10, 12, 14, -1, -1},
262   { 2,  6,  8, 10, 12, 14, -1, -1},
263   { 0,  2,  6,  8, 10, 12, 14, -1},
264   { 4,  6,  8, 10, 12, 14, -1, -1},
265   { 0,  4,  6,  8, 10, 12, 14, -1},
266   { 2,  4,  6,  8, 10, 12, 14, -1},
267   { 0,  2,  4,  6,  8, 10, 12, 14}
268 };
269 #endif
270 
271 #define _mm256_cmpge_epu16(a, b) _mm256_cmpeq_epi16(_mm256_max_epu16(a, b), a)
272 #define _mm_cmpge_epu16(a, b) _mm_cmpeq_epi16(_mm_max_epu16(a, b), a)
273 
rej_uniform_avx(int16_t * restrict r,const uint8_t * buf)274 unsigned int rej_uniform_avx(int16_t * restrict r, const uint8_t *buf)
275 {
276   unsigned int ctr, pos;
277   uint16_t val0, val1;
278   uint32_t good;
279 #ifdef BMI
280   uint64_t idx0, idx1, idx2, idx3;
281 #endif
282   const __m256i bound  = _mm256_load_si256(&qdata.vec[_16XQ/16]);
283   const __m256i ones   = _mm256_set1_epi8(1);
284   const __m256i mask  = _mm256_set1_epi16(0xFFF);
285   const __m256i idx8  = _mm256_set_epi8(15,14,14,13,12,11,11,10,
286                                          9, 8, 8, 7, 6, 5, 5, 4,
287                                         11,10,10, 9, 8, 7, 7, 6,
288                                          5, 4, 4, 3, 2, 1, 1, 0);
289   __m256i f0, f1, g0, g1, g2, g3;
290   __m128i f, t, pilo, pihi;
291 
292   ctr = pos = 0;
293   while(ctr <= KYBER_N - 32 && pos <= REJ_UNIFORM_AVX_BUFLEN - 48) {
294     f0 = _mm256_loadu_si256((__m256i *)&buf[pos]);
295     f1 = _mm256_loadu_si256((__m256i *)&buf[pos+24]);
296     f0 = _mm256_permute4x64_epi64(f0, 0x94);
297     f1 = _mm256_permute4x64_epi64(f1, 0x94);
298     f0 = _mm256_shuffle_epi8(f0, idx8);
299     f1 = _mm256_shuffle_epi8(f1, idx8);
300     g0 = _mm256_srli_epi16(f0, 4);
301     g1 = _mm256_srli_epi16(f1, 4);
302     f0 = _mm256_blend_epi16(f0, g0, 0xAA);
303     f1 = _mm256_blend_epi16(f1, g1, 0xAA);
304     f0 = _mm256_and_si256(f0, mask);
305     f1 = _mm256_and_si256(f1, mask);
306     pos += 48;
307 
308     g0 = _mm256_cmpgt_epi16(bound, f0);
309     g1 = _mm256_cmpgt_epi16(bound, f1);
310 
311     g0 = _mm256_packs_epi16(g0, g1);
312     good = _mm256_movemask_epi8(g0);
313 
314 #ifdef BMI
315     idx0 = _pdep_u64(good >>  0, 0x0101010101010101);
316     idx1 = _pdep_u64(good >>  8, 0x0101010101010101);
317     idx2 = _pdep_u64(good >> 16, 0x0101010101010101);
318     idx3 = _pdep_u64(good >> 24, 0x0101010101010101);
319     idx0 = (idx0 << 8) - idx0;
320     idx0  = _pext_u64(0x0E0C0A0806040200, idx0);
321     idx1 = (idx1 << 8) - idx1;
322     idx1  = _pext_u64(0x0E0C0A0806040200, idx1);
323     idx2 = (idx2 << 8) - idx2;
324     idx2  = _pext_u64(0x0E0C0A0806040200, idx2);
325     idx3 = (idx3 << 8) - idx3;
326     idx3  = _pext_u64(0x0E0C0A0806040200, idx3);
327 
328     g0 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx0));
329     g1 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx1));
330     g0 = _mm256_inserti128_si256(g0, _mm_cvtsi64_si128(idx2), 1);
331     g1 = _mm256_inserti128_si256(g1, _mm_cvtsi64_si128(idx3), 1);
332 #else
333     g0 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >>  0) & 0xFF]));
334     g1 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >>  8) & 0xFF]));
335     g0 = _mm256_inserti128_si256(g0, _mm_loadl_epi64((__m128i *)&idx[(good >> 16) & 0xFF]), 1);
336     g1 = _mm256_inserti128_si256(g1, _mm_loadl_epi64((__m128i *)&idx[(good >> 24) & 0xFF]), 1);
337 #endif
338 
339     g2 = _mm256_add_epi8(g0, ones);
340     g3 = _mm256_add_epi8(g1, ones);
341     g0 = _mm256_unpacklo_epi8(g0, g2);
342     g1 = _mm256_unpacklo_epi8(g1, g3);
343 
344     f0 = _mm256_shuffle_epi8(f0, g0);
345     f1 = _mm256_shuffle_epi8(f1, g1);
346 
347     _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f0));
348     ctr += _mm_popcnt_u32((good >>  0) & 0xFF);
349     _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f0, 1));
350     ctr += _mm_popcnt_u32((good >> 16) & 0xFF);
351     _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f1));
352     ctr += _mm_popcnt_u32((good >>  8) & 0xFF);
353     _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f1, 1));
354     ctr += _mm_popcnt_u32((good >> 24) & 0xFF);
355   }
356 
357   while(ctr <= KYBER_N - 8 && pos <= REJ_UNIFORM_AVX_BUFLEN - 12) {
358     f = _mm_loadu_si128((__m128i *)&buf[pos]);
359     f = _mm_shuffle_epi8(f, _mm256_castsi256_si128(idx8));
360     t = _mm_srli_epi16(f, 4);
361     f = _mm_blend_epi16(f, t, 0xAA);
362     f = _mm_and_si128(f, _mm256_castsi256_si128(mask));
363     pos += 12;
364 
365     t = _mm_cmpgt_epi16(_mm256_castsi256_si128(bound), f);
366     good = _mm_movemask_epi8(t);
367 
368 #ifdef BMI
369     good &= 0x5555;
370     idx0 = _pdep_u64(good, 0x1111111111111111);
371     idx0 = (idx0 << 8) - idx0;
372     idx0 = _pext_u64(0x0E0C0A0806040200, idx0);
373     pilo = _mm_cvtsi64_si128(idx0);
374 #else
375     good = _pext_u32(good, 0x5555);
376     pilo = _mm_loadl_epi64((__m128i *)&idx[good]);
377 #endif
378 
379     pihi = _mm_add_epi8(pilo, _mm256_castsi256_si128(ones));
380     pilo = _mm_unpacklo_epi8(pilo, pihi);
381     f = _mm_shuffle_epi8(f, pilo);
382     _mm_storeu_si128((__m128i *)&r[ctr], f);
383     ctr += _mm_popcnt_u32(good);
384   }
385 
386   while(ctr < KYBER_N && pos <= REJ_UNIFORM_AVX_BUFLEN - 3) {
387     val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF;
388     val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4));
389     pos += 3;
390 
391     if(val0 < KYBER_Q)
392       r[ctr++] = val0;
393     if(val1 < KYBER_Q && ctr < KYBER_N)
394       r[ctr++] = val1;
395   }
396 
397   return ctr;
398 }
399