1 /*
2 * NIST prime reductions
3 * (C) 2014,2015,2018 Jack Lloyd
4 *
5 * Botan is released under the Simplified BSD License (see license.txt)
6 */
7 
8 #include <botan/curve_nistp.h>
9 #include <botan/internal/mp_core.h>
10 #include <botan/internal/mp_asmi.h>
11 #include <botan/internal/ct_utils.h>
12 
13 namespace Botan {
14 
prime_p521()15 const BigInt& prime_p521()
16    {
17    static const BigInt p521("0x1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
18                                "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF");
19 
20    return p521;
21    }
22 
redc_p521(BigInt & x,secure_vector<word> & ws)23 void redc_p521(BigInt& x, secure_vector<word>& ws)
24    {
25    const size_t p_full_words = 521 / BOTAN_MP_WORD_BITS;
26    const size_t p_top_bits = 521 % BOTAN_MP_WORD_BITS;
27    const size_t p_words = p_full_words + 1;
28 
29 #if (BOTAN_MP_WORD_BITS == 64)
30    static const word p521_words[p_words] = {
31       0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,
32       0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,
33       0x1FF };
34 #else
35    static const word p521_words[p_words] = {
36       0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
37       0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
38       0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
39       0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
40       0x1FF };
41 #endif
42 
43    if(ws.size() < p_words + 1)
44       ws.resize(p_words + 1);
45 
46    clear_mem(ws.data(), ws.size());
47    bigint_shr2(ws.data(), x.data(), std::min(x.size(), 2*p_words), p_full_words, p_top_bits);
48 
49    x.mask_bits(521);
50    x.grow_to(p_words);
51 
52    // Word-level carry will be zero
53    word carry = bigint_add3_nc(x.mutable_data(), x.data(), p_words, ws.data(), p_words);
54    BOTAN_ASSERT_EQUAL(carry, 0, "Final carry in P-521 reduction");
55 
56    const word top_word = x.word_at(p_full_words);
57 
58    /*
59    * Check if we need to reduce modulo P
60    * There are two possible cases:
61    * - The result overflowed past 521 bits, in which case bit 522 will be set
62    * - The result is exactly 2**521 - 1
63    */
64    const auto bit_522_set = CT::Mask<word>::expand(top_word >> p_top_bits);
65 
66    word and_512 = MP_WORD_MAX;
67    for(size_t i = 0; i != p_full_words; ++i)
68       and_512 &= x.word_at(i);
69    const auto all_512_low_bits_set = CT::Mask<word>::is_equal(and_512, MP_WORD_MAX);
70    const auto has_p521_top_word = CT::Mask<word>::is_equal(top_word, 0x1FF);
71    const auto is_p521 = all_512_low_bits_set & has_p521_top_word;
72 
73    const auto needs_reduction = is_p521 | bit_522_set;
74 
75    bigint_cnd_sub(needs_reduction.value(), x.mutable_data(), p521_words, p_words);
76    }
77 
78 namespace {
79 
80 /**
81 * Treating this MPI as a sequence of 32-bit words in big-endian
82 * order, return word i. The array is assumed to be large enough.
83 */
get_uint32(const word xw[],size_t i)84 inline uint32_t get_uint32(const word xw[], size_t i)
85    {
86 #if (BOTAN_MP_WORD_BITS == 32)
87    return xw[i];
88 #else
89    return static_cast<uint32_t>(xw[i/2] >> ((i % 2)*32));
90 #endif
91    }
92 
set_words(word x[],size_t i,uint32_t R0,uint32_t R1)93 inline void set_words(word x[], size_t i, uint32_t R0, uint32_t R1)
94    {
95 #if (BOTAN_MP_WORD_BITS == 32)
96    x[i] = R0;
97    x[i+1] = R1;
98 #else
99    x[i/2] = (static_cast<uint64_t>(R1) << 32) | R0;
100 #endif
101    }
102 
103 }
104 
prime_p192()105 const BigInt& prime_p192()
106    {
107    static const BigInt p192("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFF");
108    return p192;
109    }
110 
redc_p192(BigInt & x,secure_vector<word> & ws)111 void redc_p192(BigInt& x, secure_vector<word>& ws)
112    {
113    BOTAN_UNUSED(ws);
114 
115    static const size_t p192_limbs = 192 / BOTAN_MP_WORD_BITS;
116 
117    x.grow_to(2*p192_limbs);
118    word* xw = x.mutable_data();
119 
120    const uint64_t X00 = get_uint32(xw,  0);
121    const uint64_t X01 = get_uint32(xw,  1);
122    const uint64_t X02 = get_uint32(xw,  2);
123    const uint64_t X03 = get_uint32(xw,  3);
124    const uint64_t X04 = get_uint32(xw,  4);
125    const uint64_t X05 = get_uint32(xw,  5);
126    const uint64_t X06 = get_uint32(xw,  6);
127    const uint64_t X07 = get_uint32(xw,  7);
128    const uint64_t X08 = get_uint32(xw,  8);
129    const uint64_t X09 = get_uint32(xw,  9);
130    const uint64_t X10 = get_uint32(xw, 10);
131    const uint64_t X11 = get_uint32(xw, 11);
132 
133    const uint64_t S0 = X00 + X06 + X10;
134    const uint64_t S1 = X01 + X07 + X11;
135    const uint64_t S2 = X02 + X06 + X08 + X10;
136    const uint64_t S3 = X03 + X07 + X09 + X11;
137    const uint64_t S4 = X04 + X08 + X10;
138    const uint64_t S5 = X05 + X09 + X11;
139 
140    uint64_t S = 0;
141    uint32_t R0 = 0, R1 = 0;
142 
143    S += S0;
144    R0 = static_cast<uint32_t>(S);
145    S >>= 32;
146 
147    S += S1;
148    R1 = static_cast<uint32_t>(S);
149    S >>= 32;
150 
151    set_words(xw, 0, R0, R1);
152 
153    S += S2;
154    R0 = static_cast<uint32_t>(S);
155    S >>= 32;
156 
157    S += S3;
158    R1 = static_cast<uint32_t>(S);
159    S >>= 32;
160 
161    set_words(xw, 2, R0, R1);
162 
163    S += S4;
164    R0 = static_cast<uint32_t>(S);
165    S >>= 32;
166 
167    S += S5;
168    R1 = static_cast<uint32_t>(S);
169    S >>= 32;
170 
171    set_words(xw, 4, R0, R1);
172 
173    // No underflow possible
174 
175    /*
176    This is a table of (i*P-192) % 2**192 for i in 1...3
177    */
178    static const word p192_mults[3][p192_limbs] = {
179 #if (BOTAN_MP_WORD_BITS == 64)
180       {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF},
181       {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFF},
182       {0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF},
183 #else
184       {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
185       {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
186       {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
187 #endif
188    };
189 
190    CT::unpoison(S);
191    BOTAN_ASSERT(S <= 2, "Expected overflow");
192 
193    BOTAN_ASSERT_NOMSG(x.size() >= p192_limbs + 1);
194    x.mask_bits(192);
195    word borrow = bigint_sub2(x.mutable_data(), p192_limbs + 1, p192_mults[S], p192_limbs);
196    BOTAN_DEBUG_ASSERT(borrow == 0 || borrow == 1);
197    bigint_cnd_add(borrow, x.mutable_data(), p192_limbs + 1, p192_mults[0], p192_limbs);
198    }
199 
prime_p224()200 const BigInt& prime_p224()
201    {
202    static const BigInt p224("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000000000000000000000001");
203    return p224;
204    }
205 
redc_p224(BigInt & x,secure_vector<word> & ws)206 void redc_p224(BigInt& x, secure_vector<word>& ws)
207    {
208    static const size_t p224_limbs = (BOTAN_MP_WORD_BITS == 32) ? 7 : 4;
209 
210    BOTAN_UNUSED(ws);
211 
212    x.grow_to(2*p224_limbs);
213    word* xw = x.mutable_data();
214 
215    const int64_t X00 = get_uint32(xw,  0);
216    const int64_t X01 = get_uint32(xw,  1);
217    const int64_t X02 = get_uint32(xw,  2);
218    const int64_t X03 = get_uint32(xw,  3);
219    const int64_t X04 = get_uint32(xw,  4);
220    const int64_t X05 = get_uint32(xw,  5);
221    const int64_t X06 = get_uint32(xw,  6);
222    const int64_t X07 = get_uint32(xw,  7);
223    const int64_t X08 = get_uint32(xw,  8);
224    const int64_t X09 = get_uint32(xw,  9);
225    const int64_t X10 = get_uint32(xw, 10);
226    const int64_t X11 = get_uint32(xw, 11);
227    const int64_t X12 = get_uint32(xw, 12);
228    const int64_t X13 = get_uint32(xw, 13);
229 
230    // One full copy of P224 is added, so the result is always positive
231 
232    const int64_t S0 = 0x00000001 + X00 - X07 - X11;
233    const int64_t S1 = 0x00000000 + X01 - X08 - X12;
234    const int64_t S2 = 0x00000000 + X02 - X09 - X13;
235    const int64_t S3 = 0xFFFFFFFF + X03 + X07 + X11 - X10;
236    const int64_t S4 = 0xFFFFFFFF + X04 + X08 + X12 - X11;
237    const int64_t S5 = 0xFFFFFFFF + X05 + X09 + X13 - X12;
238    const int64_t S6 = 0xFFFFFFFF + X06 + X10 - X13;
239 
240    int64_t S = 0;
241    uint32_t R0 = 0, R1 = 0;
242 
243    S += S0;
244    R0 = static_cast<uint32_t>(S);
245    S >>= 32;
246 
247    S += S1;
248    R1 = static_cast<uint32_t>(S);
249    S >>= 32;
250 
251    set_words(xw, 0, R0, R1);
252 
253    S += S2;
254    R0 = static_cast<uint32_t>(S);
255    S >>= 32;
256 
257    S += S3;
258    R1 = static_cast<uint32_t>(S);
259    S >>= 32;
260 
261    set_words(xw, 2, R0, R1);
262 
263    S += S4;
264    R0 = static_cast<uint32_t>(S);
265    S >>= 32;
266 
267    S += S5;
268    R1 = static_cast<uint32_t>(S);
269    S >>= 32;
270 
271    set_words(xw, 4, R0, R1);
272 
273    S += S6;
274    R0 = static_cast<uint32_t>(S);
275    S >>= 32;
276 
277    set_words(xw, 6, R0, 0);
278 
279    static const word p224_mults[3][p224_limbs] = {
280 #if (BOTAN_MP_WORD_BITS == 64)
281     {0x0000000000000001, 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF},
282     {0x0000000000000002, 0xFFFFFFFE00000000, 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF},
283     {0x0000000000000003, 0xFFFFFFFD00000000, 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF},
284 #else
285     {0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
286     {0x00000002, 0x00000000, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
287     {0x00000003, 0x00000000, 0x00000000, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
288 #endif
289 
290    };
291 
292    CT::unpoison(S);
293    BOTAN_ASSERT(S >= 0 && S <= 2, "Expected overflow");
294 
295    BOTAN_ASSERT_NOMSG(x.size() >= p224_limbs + 1);
296    x.mask_bits(224);
297    word borrow = bigint_sub2(x.mutable_data(), p224_limbs + 1, p224_mults[S], p224_limbs);
298    BOTAN_DEBUG_ASSERT(borrow == 0 || borrow == 1);
299    bigint_cnd_add(borrow, x.mutable_data(), p224_limbs + 1, p224_mults[0], p224_limbs);
300    }
301 
prime_p256()302 const BigInt& prime_p256()
303    {
304    static const BigInt p256("0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF");
305    return p256;
306    }
307 
redc_p256(BigInt & x,secure_vector<word> & ws)308 void redc_p256(BigInt& x, secure_vector<word>& ws)
309    {
310    static const size_t p256_limbs = (BOTAN_MP_WORD_BITS == 32) ? 8 : 4;
311 
312    BOTAN_UNUSED(ws);
313 
314    x.grow_to(2*p256_limbs);
315    word* xw = x.mutable_data();
316 
317    const int64_t X00 = get_uint32(xw,  0);
318    const int64_t X01 = get_uint32(xw,  1);
319    const int64_t X02 = get_uint32(xw,  2);
320    const int64_t X03 = get_uint32(xw,  3);
321    const int64_t X04 = get_uint32(xw,  4);
322    const int64_t X05 = get_uint32(xw,  5);
323    const int64_t X06 = get_uint32(xw,  6);
324    const int64_t X07 = get_uint32(xw,  7);
325    const int64_t X08 = get_uint32(xw,  8);
326    const int64_t X09 = get_uint32(xw,  9);
327    const int64_t X10 = get_uint32(xw, 10);
328    const int64_t X11 = get_uint32(xw, 11);
329    const int64_t X12 = get_uint32(xw, 12);
330    const int64_t X13 = get_uint32(xw, 13);
331    const int64_t X14 = get_uint32(xw, 14);
332    const int64_t X15 = get_uint32(xw, 15);
333 
334    // Adds 6 * P-256 to prevent underflow
335    const int64_t S0 = 0xFFFFFFFA + X00 + X08 + X09 - (X11 + X12 + X13) - X14;
336    const int64_t S1 = 0xFFFFFFFF + X01 + X09 + X10 - X12 - (X13 + X14 + X15);
337    const int64_t S2 = 0xFFFFFFFF + X02 + X10 + X11 - (X13 + X14 + X15);
338    const int64_t S3 = 0x00000005 + X03 + (X11 + X12)*2 + X13 - X15 - X08 - X09;
339    const int64_t S4 = 0x00000000 + X04 + (X12 + X13)*2 + X14 - X09 - X10;
340    const int64_t S5 = 0x00000000 + X05 + (X13 + X14)*2 + X15 - X10 - X11;
341    const int64_t S6 = 0x00000006 + X06 + X13 + X14*3 + X15*2 - X08 - X09;
342    const int64_t S7 = 0xFFFFFFFA + X07 + X15*3 + X08 - X10 - (X11 + X12 + X13);
343 
344    int64_t S = 0;
345 
346    uint32_t R0 = 0, R1 = 0;
347 
348    S += S0;
349    R0 = static_cast<uint32_t>(S);
350    S >>= 32;
351 
352    S += S1;
353    R1 = static_cast<uint32_t>(S);
354    S >>= 32;
355 
356    set_words(xw, 0, R0, R1);
357 
358    S += S2;
359    R0 = static_cast<uint32_t>(S);
360    S >>= 32;
361 
362    S += S3;
363    R1 = static_cast<uint32_t>(S);
364    S >>= 32;
365 
366    set_words(xw, 2, R0, R1);
367 
368    S += S4;
369    R0 = static_cast<uint32_t>(S);
370    S >>= 32;
371 
372    S += S5;
373    R1 = static_cast<uint32_t>(S);
374    S >>= 32;
375 
376    set_words(xw, 4, R0, R1);
377 
378    S += S6;
379    R0 = static_cast<uint32_t>(S);
380    S >>= 32;
381 
382    S += S7;
383    R1 = static_cast<uint32_t>(S);
384    S >>= 32;
385    set_words(xw, 6, R0, R1);
386 
387    S += 5; // the top digits of 6*P-256
388 
389    /*
390    This is a table of (i*P-256) % 2**256 for i in 1...10
391    */
392    static const word p256_mults[11][p256_limbs] = {
393 #if (BOTAN_MP_WORD_BITS == 64)
394       {0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001},
395       {0xFFFFFFFFFFFFFFFE, 0x00000001FFFFFFFF, 0x0000000000000000, 0xFFFFFFFE00000002},
396       {0xFFFFFFFFFFFFFFFD, 0x00000002FFFFFFFF, 0x0000000000000000, 0xFFFFFFFD00000003},
397       {0xFFFFFFFFFFFFFFFC, 0x00000003FFFFFFFF, 0x0000000000000000, 0xFFFFFFFC00000004},
398       {0xFFFFFFFFFFFFFFFB, 0x00000004FFFFFFFF, 0x0000000000000000, 0xFFFFFFFB00000005},
399       {0xFFFFFFFFFFFFFFFA, 0x00000005FFFFFFFF, 0x0000000000000000, 0xFFFFFFFA00000006},
400       {0xFFFFFFFFFFFFFFF9, 0x00000006FFFFFFFF, 0x0000000000000000, 0xFFFFFFF900000007},
401       {0xFFFFFFFFFFFFFFF8, 0x00000007FFFFFFFF, 0x0000000000000000, 0xFFFFFFF800000008},
402       {0xFFFFFFFFFFFFFFF7, 0x00000008FFFFFFFF, 0x0000000000000000, 0xFFFFFFF700000009},
403       {0xFFFFFFFFFFFFFFF6, 0x00000009FFFFFFFF, 0x0000000000000000, 0xFFFFFFF60000000A},
404       {0xFFFFFFFFFFFFFFF5, 0x0000000AFFFFFFFF, 0x0000000000000000, 0xFFFFFFF50000000B},
405 #else
406       {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF},
407       {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000001, 0x00000000, 0x00000000, 0x00000002, 0xFFFFFFFE},
408       {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000002, 0x00000000, 0x00000000, 0x00000003, 0xFFFFFFFD},
409       {0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000003, 0x00000000, 0x00000000, 0x00000004, 0xFFFFFFFC},
410       {0xFFFFFFFB, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000004, 0x00000000, 0x00000000, 0x00000005, 0xFFFFFFFB},
411       {0xFFFFFFFA, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000005, 0x00000000, 0x00000000, 0x00000006, 0xFFFFFFFA},
412       {0xFFFFFFF9, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000006, 0x00000000, 0x00000000, 0x00000007, 0xFFFFFFF9},
413       {0xFFFFFFF8, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000007, 0x00000000, 0x00000000, 0x00000008, 0xFFFFFFF8},
414       {0xFFFFFFF7, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000008, 0x00000000, 0x00000000, 0x00000009, 0xFFFFFFF7},
415       {0xFFFFFFF6, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000009, 0x00000000, 0x00000000, 0x0000000A, 0xFFFFFFF6},
416       {0xFFFFFFF5, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000000A, 0x00000000, 0x00000000, 0x0000000B, 0xFFFFFFF5},
417 #endif
418    };
419 
420    CT::unpoison(S);
421    BOTAN_ASSERT(S >= 0 && S <= 10, "Expected overflow");
422 
423    BOTAN_ASSERT_NOMSG(x.size() >= p256_limbs + 1);
424    x.mask_bits(256);
425    word borrow = bigint_sub2(x.mutable_data(), p256_limbs + 1, p256_mults[S], p256_limbs);
426    BOTAN_DEBUG_ASSERT(borrow == 0 || borrow == 1);
427    bigint_cnd_add(borrow, x.mutable_data(), p256_limbs + 1, p256_mults[0], p256_limbs);
428    }
429 
prime_p384()430 const BigInt& prime_p384()
431    {
432    static const BigInt p384("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFF");
433    return p384;
434    }
435 
redc_p384(BigInt & x,secure_vector<word> & ws)436 void redc_p384(BigInt& x, secure_vector<word>& ws)
437    {
438    BOTAN_UNUSED(ws);
439 
440    static const size_t p384_limbs = (BOTAN_MP_WORD_BITS == 32) ? 12 : 6;
441 
442    x.grow_to(2*p384_limbs);
443    word* xw = x.mutable_data();
444 
445    const int64_t X00 = get_uint32(xw,  0);
446    const int64_t X01 = get_uint32(xw,  1);
447    const int64_t X02 = get_uint32(xw,  2);
448    const int64_t X03 = get_uint32(xw,  3);
449    const int64_t X04 = get_uint32(xw,  4);
450    const int64_t X05 = get_uint32(xw,  5);
451    const int64_t X06 = get_uint32(xw,  6);
452    const int64_t X07 = get_uint32(xw,  7);
453    const int64_t X08 = get_uint32(xw,  8);
454    const int64_t X09 = get_uint32(xw,  9);
455    const int64_t X10 = get_uint32(xw, 10);
456    const int64_t X11 = get_uint32(xw, 11);
457    const int64_t X12 = get_uint32(xw, 12);
458    const int64_t X13 = get_uint32(xw, 13);
459    const int64_t X14 = get_uint32(xw, 14);
460    const int64_t X15 = get_uint32(xw, 15);
461    const int64_t X16 = get_uint32(xw, 16);
462    const int64_t X17 = get_uint32(xw, 17);
463    const int64_t X18 = get_uint32(xw, 18);
464    const int64_t X19 = get_uint32(xw, 19);
465    const int64_t X20 = get_uint32(xw, 20);
466    const int64_t X21 = get_uint32(xw, 21);
467    const int64_t X22 = get_uint32(xw, 22);
468    const int64_t X23 = get_uint32(xw, 23);
469 
470    // One copy of P-384 is added to prevent underflow
471    const int64_t S0 = 0xFFFFFFFF + X00 + X12 + X20 + X21 - X23;
472    const int64_t S1 = 0x00000000 + X01 + X13 + X22 + X23 - X12 - X20;
473    const int64_t S2 = 0x00000000 + X02 + X14 + X23 - X13 - X21;
474    const int64_t S3 = 0xFFFFFFFF + X03 + X12 + X15 + X20 + X21 - X14 - X22 - X23;
475    const int64_t S4 = 0xFFFFFFFE + X04 + X12 + X13 + X16 + X20 + X21*2 + X22 - X15 - X23*2;
476    const int64_t S5 = 0xFFFFFFFF + X05 + X13 + X14 + X17 + X21 + X22*2 + X23 - X16;
477    const int64_t S6 = 0xFFFFFFFF + X06 + X14 + X15 + X18 + X22 + X23*2 - X17;
478    const int64_t S7 = 0xFFFFFFFF + X07 + X15 + X16 + X19 + X23 - X18;
479    const int64_t S8 = 0xFFFFFFFF + X08 + X16 + X17 + X20 - X19;
480    const int64_t S9 = 0xFFFFFFFF + X09 + X17 + X18 + X21 - X20;
481    const int64_t SA = 0xFFFFFFFF + X10 + X18 + X19 + X22 - X21;
482    const int64_t SB = 0xFFFFFFFF + X11 + X19 + X20 + X23 - X22;
483 
484    int64_t S = 0;
485 
486    uint32_t R0 = 0, R1 = 0;
487 
488    S += S0;
489    R0 = static_cast<uint32_t>(S);
490    S >>= 32;
491 
492    S += S1;
493    R1 = static_cast<uint32_t>(S);
494    S >>= 32;
495 
496    set_words(xw, 0, R0, R1);
497 
498    S += S2;
499    R0 = static_cast<uint32_t>(S);
500    S >>= 32;
501 
502    S += S3;
503    R1 = static_cast<uint32_t>(S);
504    S >>= 32;
505 
506    set_words(xw, 2, R0, R1);
507 
508    S += S4;
509    R0 = static_cast<uint32_t>(S);
510    S >>= 32;
511 
512    S += S5;
513    R1 = static_cast<uint32_t>(S);
514    S >>= 32;
515 
516    set_words(xw, 4, R0, R1);
517 
518    S += S6;
519    R0 = static_cast<uint32_t>(S);
520    S >>= 32;
521 
522    S += S7;
523    R1 = static_cast<uint32_t>(S);
524    S >>= 32;
525 
526    set_words(xw, 6, R0, R1);
527 
528    S += S8;
529    R0 = static_cast<uint32_t>(S);
530    S >>= 32;
531 
532    S += S9;
533    R1 = static_cast<uint32_t>(S);
534    S >>= 32;
535 
536    set_words(xw, 8, R0, R1);
537 
538    S += SA;
539    R0 = static_cast<uint32_t>(S);
540    S >>= 32;
541 
542    S += SB;
543    R1 = static_cast<uint32_t>(S);
544    S >>= 32;
545 
546    set_words(xw, 10, R0, R1);
547 
548    /*
549    This is a table of (i*P-384) % 2**384 for i in 1...4
550    */
551    static const word p384_mults[5][p384_limbs] = {
552 #if (BOTAN_MP_WORD_BITS == 64)
553       {0x00000000FFFFFFFF, 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
554       {0x00000001FFFFFFFE, 0xFFFFFFFE00000000, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
555       {0x00000002FFFFFFFD, 0xFFFFFFFD00000000, 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
556       {0x00000003FFFFFFFC, 0xFFFFFFFC00000000, 0xFFFFFFFFFFFFFFFB, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
557       {0x00000004FFFFFFFB, 0xFFFFFFFB00000000, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
558 
559 #else
560       {0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
561        0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
562       {0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
563        0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
564       {0xFFFFFFFD, 0x00000002, 0x00000000, 0xFFFFFFFD, 0xFFFFFFFC, 0xFFFFFFFF,
565        0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
566       {0xFFFFFFFC, 0x00000003, 0x00000000, 0xFFFFFFFC, 0xFFFFFFFB, 0xFFFFFFFF,
567        0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
568       {0xFFFFFFFB, 0x00000004, 0x00000000, 0xFFFFFFFB, 0xFFFFFFFA, 0xFFFFFFFF,
569        0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
570 #endif
571    };
572 
573    CT::unpoison(S);
574    BOTAN_ASSERT(S >= 0 && S <= 4, "Expected overflow");
575 
576    BOTAN_ASSERT_NOMSG(x.size() >= p384_limbs + 1);
577    x.mask_bits(384);
578    word borrow = bigint_sub2(x.mutable_data(), p384_limbs + 1, p384_mults[S], p384_limbs);
579    BOTAN_DEBUG_ASSERT(borrow == 0 || borrow == 1);
580    bigint_cnd_add(borrow, x.mutable_data(), p384_limbs + 1, p384_mults[0], p384_limbs);
581    }
582 
583 }
584