1 /*****************************************************************************
2  * Copyright (c) 2013, 2014, 2017 Pieter Wuille, Andrew Poelstra, Jonas Nick *
3  * Distributed under the MIT software license, see the accompanying          *
4  * file COPYING or http://www.opensource.org/licenses/mit-license.php.       *
5  *****************************************************************************/
6 
7 #ifndef SECP256K1_ECMULT_IMPL_H
8 #define SECP256K1_ECMULT_IMPL_H
9 
10 #include <string.h>
11 #include <stdint.h>
12 
13 #include "util.h"
14 #include "group.h"
15 #include "scalar.h"
16 #include "ecmult.h"
17 
18 #if defined(EXHAUSTIVE_TEST_ORDER)
19 /* We need to lower these values for exhaustive tests because
20  * the tables cannot have infinities in them (this breaks the
21  * affine-isomorphism stuff which tracks z-ratios) */
22 #  if EXHAUSTIVE_TEST_ORDER > 128
23 #    define WINDOW_A 5
24 #    define WINDOW_G 8
25 #  elif EXHAUSTIVE_TEST_ORDER > 8
26 #    define WINDOW_A 4
27 #    define WINDOW_G 4
28 #  else
29 #    define WINDOW_A 2
30 #    define WINDOW_G 2
31 #  endif
32 #else
33 /* optimal for 128-bit and 256-bit exponents. */
34 #  define WINDOW_A 5
35 /** Larger values for ECMULT_WINDOW_SIZE result in possibly better
36  *  performance at the cost of an exponentially larger precomputed
37  *  table. The exact table size is
38  *      (1 << (WINDOW_G - 2)) * sizeof(secp256k1_ge_storage)  bytes,
39  *  where sizeof(secp256k1_ge_storage) is typically 64 bytes but can
40  *  be larger due to platform-specific padding and alignment.
41  *  Two tables of this size are used (due to the endomorphism
42  *  optimization).
43  */
44 #  define WINDOW_G ECMULT_WINDOW_SIZE
45 #endif
46 
47 /* Noone will ever need more than a window size of 24. The code might
48  * be correct for larger values of ECMULT_WINDOW_SIZE but this is not
49  * not tested.
50  *
51  * The following limitations are known, and there are probably more:
52  * If WINDOW_G > 27 and size_t has 32 bits, then the code is incorrect
53  * because the size of the memory object that we allocate (in bytes)
54  * will not fit in a size_t.
55  * If WINDOW_G > 31 and int has 32 bits, then the code is incorrect
56  * because certain expressions will overflow.
57  */
58 #if ECMULT_WINDOW_SIZE < 2 || ECMULT_WINDOW_SIZE > 24
59 #  error Set ECMULT_WINDOW_SIZE to an integer in range [2..24].
60 #endif
61 
62 #define WNAF_BITS 128
63 #define WNAF_SIZE_BITS(bits, w) (((bits) + (w) - 1) / (w))
64 #define WNAF_SIZE(w) WNAF_SIZE_BITS(WNAF_BITS, w)
65 
66 /** The number of entries a table with precomputed multiples needs to have. */
67 #define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
68 
69 /* The number of objects allocated on the scratch space for ecmult_multi algorithms */
70 #define PIPPENGER_SCRATCH_OBJECTS 6
71 #define STRAUSS_SCRATCH_OBJECTS 6
72 
73 #define PIPPENGER_MAX_BUCKET_WINDOW 12
74 
75 /* Minimum number of points for which pippenger_wnaf is faster than strauss wnaf */
76 #define ECMULT_PIPPENGER_THRESHOLD 88
77 
78 #define ECMULT_MAX_POINTS_PER_BATCH 5000000
79 
80 /** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain
81  *  the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will
82  *  contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z.
83  *  Prej's Z values are undefined, except for the last value.
84  */
secp256k1_ecmult_odd_multiples_table(int n,secp256k1_gej * prej,secp256k1_fe * zr,const secp256k1_gej * a)85 static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej *prej, secp256k1_fe *zr, const secp256k1_gej *a) {
86     secp256k1_gej d;
87     secp256k1_ge a_ge, d_ge;
88     int i;
89 
90     VERIFY_CHECK(!a->infinity);
91 
92     secp256k1_gej_double_var(&d, a, NULL);
93 
94     /*
95      * Perform the additions on an isomorphism where 'd' is affine: drop the z coordinate
96      * of 'd', and scale the 1P starting value's x/y coordinates without changing its z.
97      */
98     d_ge.x = d.x;
99     d_ge.y = d.y;
100     d_ge.infinity = 0;
101 
102     secp256k1_ge_set_gej_zinv(&a_ge, a, &d.z);
103     prej[0].x = a_ge.x;
104     prej[0].y = a_ge.y;
105     prej[0].z = a->z;
106     prej[0].infinity = 0;
107 
108     zr[0] = d.z;
109     for (i = 1; i < n; i++) {
110         secp256k1_gej_add_ge_var(&prej[i], &prej[i-1], &d_ge, &zr[i]);
111     }
112 
113     /*
114      * Each point in 'prej' has a z coordinate too small by a factor of 'd.z'. Only
115      * the final point's z coordinate is actually used though, so just update that.
116      */
117     secp256k1_fe_mul(&prej[n-1].z, &prej[n-1].z, &d.z);
118 }
119 
120 /** Fill a table 'pre' with precomputed odd multiples of a.
121  *
122  *  There are two versions of this function:
123  *  - secp256k1_ecmult_odd_multiples_table_globalz_windowa which brings its
124  *    resulting point set to a single constant Z denominator, stores the X and Y
125  *    coordinates as ge_storage points in pre, and stores the global Z in rz.
126  *    It only operates on tables sized for WINDOW_A wnaf multiples.
127  *  - secp256k1_ecmult_odd_multiples_table_storage_var, which converts its
128  *    resulting point set to actually affine points, and stores those in pre.
129  *    It operates on tables of any size.
130  *
131  *  To compute a*P + b*G, we compute a table for P using the first function,
132  *  and for G using the second (which requires an inverse, but it only needs to
133  *  happen once).
134  */
secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge * pre,secp256k1_fe * globalz,const secp256k1_gej * a)135 static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *pre, secp256k1_fe *globalz, const secp256k1_gej *a) {
136     secp256k1_gej prej[ECMULT_TABLE_SIZE(WINDOW_A)];
137     secp256k1_fe zr[ECMULT_TABLE_SIZE(WINDOW_A)];
138 
139     /* Compute the odd multiples in Jacobian form. */
140     secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), prej, zr, a);
141     /* Bring them to the same Z denominator. */
142     secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr);
143 }
144 
secp256k1_ecmult_odd_multiples_table_storage_var(const int n,secp256k1_ge_storage * pre,const secp256k1_gej * a)145 static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp256k1_ge_storage *pre, const secp256k1_gej *a) {
146     secp256k1_gej d;
147     secp256k1_ge d_ge, p_ge;
148     secp256k1_gej pj;
149     secp256k1_fe zi;
150     secp256k1_fe zr;
151     secp256k1_fe dx_over_dz_squared;
152     int i;
153 
154     VERIFY_CHECK(!a->infinity);
155 
156     secp256k1_gej_double_var(&d, a, NULL);
157 
158     /* First, we perform all the additions in an isomorphic curve obtained by multiplying
159      * all `z` coordinates by 1/`d.z`. In these coordinates `d` is affine so we can use
160      * `secp256k1_gej_add_ge_var` to perform the additions. For each addition, we store
161      * the resulting y-coordinate and the z-ratio, since we only have enough memory to
162      * store two field elements. These are sufficient to efficiently undo the isomorphism
163      * and recompute all the `x`s.
164      */
165     d_ge.x = d.x;
166     d_ge.y = d.y;
167     d_ge.infinity = 0;
168 
169     secp256k1_ge_set_gej_zinv(&p_ge, a, &d.z);
170     pj.x = p_ge.x;
171     pj.y = p_ge.y;
172     pj.z = a->z;
173     pj.infinity = 0;
174 
175     for (i = 0; i < (n - 1); i++) {
176         secp256k1_fe_normalize_var(&pj.y);
177         secp256k1_fe_to_storage(&pre[i].y, &pj.y);
178         secp256k1_gej_add_ge_var(&pj, &pj, &d_ge, &zr);
179         secp256k1_fe_normalize_var(&zr);
180         secp256k1_fe_to_storage(&pre[i].x, &zr);
181     }
182 
183     /* Invert d.z in the same batch, preserving pj.z so we can extract 1/d.z */
184     secp256k1_fe_mul(&zi, &pj.z, &d.z);
185     secp256k1_fe_inv_var(&zi, &zi);
186 
187     /* Directly set `pre[n - 1]` to `pj`, saving the inverted z-coordinate so
188      * that we can combine it with the saved z-ratios to compute the other zs
189      * without any more inversions. */
190     secp256k1_ge_set_gej_zinv(&p_ge, &pj, &zi);
191     secp256k1_ge_to_storage(&pre[n - 1], &p_ge);
192 
193     /* Compute the actual x-coordinate of D, which will be needed below. */
194     secp256k1_fe_mul(&d.z, &zi, &pj.z);  /* d.z = 1/d.z */
195     secp256k1_fe_sqr(&dx_over_dz_squared, &d.z);
196     secp256k1_fe_mul(&dx_over_dz_squared, &dx_over_dz_squared, &d.x);
197 
198     /* Going into the second loop, we have set `pre[n-1]` to its final affine
199      * form, but still need to set `pre[i]` for `i` in 0 through `n-2`. We
200      * have `zi = (p.z * d.z)^-1`, where
201      *
202      *     `p.z` is the z-coordinate of the point on the isomorphic curve
203      *           which was ultimately assigned to `pre[n-1]`.
204      *     `d.z` is the multiplier that must be applied to all z-coordinates
205      *           to move from our isomorphic curve back to secp256k1; so the
206      *           product `p.z * d.z` is the z-coordinate of the secp256k1
207      *           point assigned to `pre[n-1]`.
208      *
209      * All subsequent inverse-z-coordinates can be obtained by multiplying this
210      * factor by successive z-ratios, which is much more efficient than directly
211      * computing each one.
212      *
213      * Importantly, these inverse-zs will be coordinates of points on secp256k1,
214      * while our other stored values come from computations on the isomorphic
215      * curve. So in the below loop, we will take care not to actually use `zi`
216      * or any derived values until we're back on secp256k1.
217      */
218     i = n - 1;
219     while (i > 0) {
220         secp256k1_fe zi2, zi3;
221         const secp256k1_fe *rzr;
222         i--;
223 
224         secp256k1_ge_from_storage(&p_ge, &pre[i]);
225 
226         /* For each remaining point, we extract the z-ratio from the stored
227          * x-coordinate, compute its z^-1 from that, and compute the full
228          * point from that. */
229         rzr = &p_ge.x;
230         secp256k1_fe_mul(&zi, &zi, rzr);
231         secp256k1_fe_sqr(&zi2, &zi);
232         secp256k1_fe_mul(&zi3, &zi2, &zi);
233         /* To compute the actual x-coordinate, we use the stored z ratio and
234          * y-coordinate, which we obtained from `secp256k1_gej_add_ge_var`
235          * in the loop above, as well as the inverse of the square of its
236          * z-coordinate. We store the latter in the `zi2` variable, which is
237          * computed iteratively starting from the overall Z inverse then
238          * multiplying by each z-ratio in turn.
239          *
240          * Denoting the z-ratio as `rzr`, we observe that it is equal to `h`
241          * from the inside of the above `gej_add_ge_var` call. This satisfies
242          *
243          *    rzr = d_x * z^2 - x * d_z^2
244          *
245          * where (`d_x`, `d_z`) are Jacobian coordinates of `D` and `(x, z)`
246          * are Jacobian coordinates of our desired point -- except both are on
247          * the isomorphic curve that we were using when we called `gej_add_ge_var`.
248          * To get back to secp256k1, we must multiply both `z`s by `d_z`, or
249          * equivalently divide both `x`s by `d_z^2`. Our equation then becomes
250          *
251          *    rzr = d_x * z^2 / d_z^2 - x
252          *
253          * (The left-hand-side, being a ratio of z-coordinates, is unaffected
254          * by the isomorphism.)
255          *
256          * Rearranging to solve for `x`, we have
257          *
258          *     x = d_x * z^2 / d_z^2 - rzr
259          *
260          * But what we actually want is the affine coordinate `X = x/z^2`,
261          * which will satisfy
262          *
263          *     X = d_x / d_z^2 - rzr / z^2
264          *       = dx_over_dz_squared - rzr * zi2
265          */
266         secp256k1_fe_mul(&p_ge.x, rzr, &zi2);
267         secp256k1_fe_negate(&p_ge.x, &p_ge.x, 1);
268         secp256k1_fe_add(&p_ge.x, &dx_over_dz_squared);
269         /* y is stored_y/z^3, as we expect */
270         secp256k1_fe_mul(&p_ge.y, &p_ge.y, &zi3);
271         /* Store */
272         secp256k1_ge_to_storage(&pre[i], &p_ge);
273     }
274 }
275 
276 /** The following two macro retrieves a particular odd multiple from a table
277  *  of precomputed multiples. */
278 #define ECMULT_TABLE_GET_GE(r,pre,n,w) do { \
279     VERIFY_CHECK(((n) & 1) == 1); \
280     VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
281     VERIFY_CHECK((n) <=  ((1 << ((w)-1)) - 1)); \
282     if ((n) > 0) { \
283         *(r) = (pre)[((n)-1)/2]; \
284     } else { \
285         *(r) = (pre)[(-(n)-1)/2]; \
286         secp256k1_fe_negate(&((r)->y), &((r)->y), 1); \
287     } \
288 } while(0)
289 
290 #define ECMULT_TABLE_GET_GE_STORAGE(r,pre,n,w) do { \
291     VERIFY_CHECK(((n) & 1) == 1); \
292     VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
293     VERIFY_CHECK((n) <=  ((1 << ((w)-1)) - 1)); \
294     if ((n) > 0) { \
295         secp256k1_ge_from_storage((r), &(pre)[((n)-1)/2]); \
296     } else { \
297         secp256k1_ge_from_storage((r), &(pre)[(-(n)-1)/2]); \
298         secp256k1_fe_negate(&((r)->y), &((r)->y), 1); \
299     } \
300 } while(0)
301 
302 static const size_t SECP256K1_ECMULT_CONTEXT_PREALLOCATED_SIZE =
303     ROUND_TO_ALIGN(sizeof((*((secp256k1_ecmult_context*) NULL)->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G))
304     + ROUND_TO_ALIGN(sizeof((*((secp256k1_ecmult_context*) NULL)->pre_g_128)[0]) * ECMULT_TABLE_SIZE(WINDOW_G))
305     ;
306 
secp256k1_ecmult_context_init(secp256k1_ecmult_context * ctx)307 static void secp256k1_ecmult_context_init(secp256k1_ecmult_context *ctx) {
308     ctx->pre_g = NULL;
309     ctx->pre_g_128 = NULL;
310 }
311 
secp256k1_ecmult_context_build(secp256k1_ecmult_context * ctx,void ** prealloc)312 static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, void **prealloc) {
313     secp256k1_gej gj;
314     void* const base = *prealloc;
315     size_t const prealloc_size = SECP256K1_ECMULT_CONTEXT_PREALLOCATED_SIZE;
316 
317     if (ctx->pre_g != NULL) {
318         return;
319     }
320 
321     /* get the generator */
322     secp256k1_gej_set_ge(&gj, &secp256k1_ge_const_g);
323 
324     {
325         size_t size = sizeof((*ctx->pre_g)[0]) * ((size_t)ECMULT_TABLE_SIZE(WINDOW_G));
326         /* check for overflow */
327         VERIFY_CHECK(size / sizeof((*ctx->pre_g)[0]) == ((size_t)ECMULT_TABLE_SIZE(WINDOW_G)));
328         ctx->pre_g = (secp256k1_ge_storage (*)[])manual_alloc(prealloc, sizeof((*ctx->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G), base, prealloc_size);
329     }
330 
331     /* precompute the tables with odd multiples */
332     secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj);
333 
334     {
335         secp256k1_gej g_128j;
336         int i;
337 
338         size_t size = sizeof((*ctx->pre_g_128)[0]) * ((size_t) ECMULT_TABLE_SIZE(WINDOW_G));
339         /* check for overflow */
340         VERIFY_CHECK(size / sizeof((*ctx->pre_g_128)[0]) == ((size_t)ECMULT_TABLE_SIZE(WINDOW_G)));
341         ctx->pre_g_128 = (secp256k1_ge_storage (*)[])manual_alloc(prealloc, sizeof((*ctx->pre_g_128)[0]) * ECMULT_TABLE_SIZE(WINDOW_G), base, prealloc_size);
342 
343         /* calculate 2^128*generator */
344         g_128j = gj;
345         for (i = 0; i < 128; i++) {
346             secp256k1_gej_double_var(&g_128j, &g_128j, NULL);
347         }
348         secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j);
349     }
350 }
351 
secp256k1_ecmult_context_finalize_memcpy(secp256k1_ecmult_context * dst,const secp256k1_ecmult_context * src)352 static void secp256k1_ecmult_context_finalize_memcpy(secp256k1_ecmult_context *dst, const secp256k1_ecmult_context *src) {
353     if (src->pre_g != NULL) {
354         /* We cast to void* first to suppress a -Wcast-align warning. */
355         dst->pre_g = (secp256k1_ge_storage (*)[])(void*)((unsigned char*)dst + ((unsigned char*)(src->pre_g) - (unsigned char*)src));
356     }
357     if (src->pre_g_128 != NULL) {
358         dst->pre_g_128 = (secp256k1_ge_storage (*)[])(void*)((unsigned char*)dst + ((unsigned char*)(src->pre_g_128) - (unsigned char*)src));
359     }
360 }
361 
secp256k1_ecmult_context_is_built(const secp256k1_ecmult_context * ctx)362 static int secp256k1_ecmult_context_is_built(const secp256k1_ecmult_context *ctx) {
363     return ctx->pre_g != NULL;
364 }
365 
secp256k1_ecmult_context_clear(secp256k1_ecmult_context * ctx)366 static void secp256k1_ecmult_context_clear(secp256k1_ecmult_context *ctx) {
367     secp256k1_ecmult_context_init(ctx);
368 }
369 
370 /** Convert a number to WNAF notation. The number becomes represented by sum(2^i * wnaf[i], i=0..bits),
371  *  with the following guarantees:
372  *  - each wnaf[i] is either 0, or an odd integer between -(1<<(w-1) - 1) and (1<<(w-1) - 1)
373  *  - two non-zero entries in wnaf are separated by at least w-1 zeroes.
374  *  - the number of set values in wnaf is returned. This number is at most 256, and at most one more
375  *    than the number of bits in the (absolute value) of the input.
376  */
secp256k1_ecmult_wnaf(int * wnaf,int len,const secp256k1_scalar * a,int w)377 static int secp256k1_ecmult_wnaf(int *wnaf, int len, const secp256k1_scalar *a, int w) {
378     secp256k1_scalar s;
379     int last_set_bit = -1;
380     int bit = 0;
381     int sign = 1;
382     int carry = 0;
383 
384     VERIFY_CHECK(wnaf != NULL);
385     VERIFY_CHECK(0 <= len && len <= 256);
386     VERIFY_CHECK(a != NULL);
387     VERIFY_CHECK(2 <= w && w <= 31);
388 
389     memset(wnaf, 0, len * sizeof(wnaf[0]));
390 
391     s = *a;
392     if (secp256k1_scalar_get_bits(&s, 255, 1)) {
393         secp256k1_scalar_negate(&s, &s);
394         sign = -1;
395     }
396 
397     while (bit < len) {
398         int now;
399         int word;
400         if (secp256k1_scalar_get_bits(&s, bit, 1) == (unsigned int)carry) {
401             bit++;
402             continue;
403         }
404 
405         now = w;
406         if (now > len - bit) {
407             now = len - bit;
408         }
409 
410         word = secp256k1_scalar_get_bits_var(&s, bit, now) + carry;
411 
412         carry = (word >> (w-1)) & 1;
413         word -= carry << w;
414 
415         wnaf[bit] = sign * word;
416         last_set_bit = bit;
417 
418         bit += now;
419     }
420 #ifdef VERIFY
421     CHECK(carry == 0);
422     while (bit < 256) {
423         CHECK(secp256k1_scalar_get_bits(&s, bit++, 1) == 0);
424     }
425 #endif
426     return last_set_bit + 1;
427 }
428 
429 struct secp256k1_strauss_point_state {
430     secp256k1_scalar na_1, na_lam;
431     int wnaf_na_1[129];
432     int wnaf_na_lam[129];
433     int bits_na_1;
434     int bits_na_lam;
435     size_t input_pos;
436 };
437 
438 struct secp256k1_strauss_state {
439     secp256k1_gej* prej;
440     secp256k1_fe* zr;
441     secp256k1_ge* pre_a;
442     secp256k1_ge* pre_a_lam;
443     struct secp256k1_strauss_point_state* ps;
444 };
445 
secp256k1_ecmult_strauss_wnaf(const secp256k1_ecmult_context * ctx,const struct secp256k1_strauss_state * state,secp256k1_gej * r,size_t num,const secp256k1_gej * a,const secp256k1_scalar * na,const secp256k1_scalar * ng)446 static void secp256k1_ecmult_strauss_wnaf(const secp256k1_ecmult_context *ctx, const struct secp256k1_strauss_state *state, secp256k1_gej *r, size_t num, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) {
447     secp256k1_ge tmpa;
448     secp256k1_fe Z;
449     /* Splitted G factors. */
450     secp256k1_scalar ng_1, ng_128;
451     int wnaf_ng_1[129];
452     int bits_ng_1 = 0;
453     int wnaf_ng_128[129];
454     int bits_ng_128 = 0;
455     int i;
456     int bits = 0;
457     size_t np;
458     size_t no = 0;
459 
460     for (np = 0; np < num; ++np) {
461         if (secp256k1_scalar_is_zero(&na[np]) || secp256k1_gej_is_infinity(&a[np])) {
462             continue;
463         }
464         state->ps[no].input_pos = np;
465         /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
466         secp256k1_scalar_split_lambda(&state->ps[no].na_1, &state->ps[no].na_lam, &na[np]);
467 
468         /* build wnaf representation for na_1 and na_lam. */
469         state->ps[no].bits_na_1   = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_1,   129, &state->ps[no].na_1,   WINDOW_A);
470         state->ps[no].bits_na_lam = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_lam, 129, &state->ps[no].na_lam, WINDOW_A);
471         VERIFY_CHECK(state->ps[no].bits_na_1 <= 129);
472         VERIFY_CHECK(state->ps[no].bits_na_lam <= 129);
473         if (state->ps[no].bits_na_1 > bits) {
474             bits = state->ps[no].bits_na_1;
475         }
476         if (state->ps[no].bits_na_lam > bits) {
477             bits = state->ps[no].bits_na_lam;
478         }
479         ++no;
480     }
481 
482     /* Calculate odd multiples of a.
483      * All multiples are brought to the same Z 'denominator', which is stored
484      * in Z. Due to secp256k1' isomorphism we can do all operations pretending
485      * that the Z coordinate was 1, use affine addition formulae, and correct
486      * the Z coordinate of the result once at the end.
487      * The exception is the precomputed G table points, which are actually
488      * affine. Compared to the base used for other points, they have a Z ratio
489      * of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same
490      * isomorphism to efficiently add with a known Z inverse.
491      */
492     if (no > 0) {
493         /* Compute the odd multiples in Jacobian form. */
494         secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej, state->zr, &a[state->ps[0].input_pos]);
495         for (np = 1; np < no; ++np) {
496             secp256k1_gej tmp = a[state->ps[np].input_pos];
497 #ifdef VERIFY
498             secp256k1_fe_normalize_var(&(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z));
499 #endif
500             secp256k1_gej_rescale(&tmp, &(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z));
501             secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &tmp);
502             secp256k1_fe_mul(state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &(a[state->ps[np].input_pos].z));
503         }
504         /* Bring them to the same Z denominator. */
505         secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, &Z, state->prej, state->zr);
506     } else {
507         secp256k1_fe_set_int(&Z, 1);
508     }
509 
510     for (np = 0; np < no; ++np) {
511         for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
512             secp256k1_ge_mul_lambda(&state->pre_a_lam[np * ECMULT_TABLE_SIZE(WINDOW_A) + i], &state->pre_a[np * ECMULT_TABLE_SIZE(WINDOW_A) + i]);
513         }
514     }
515 
516     if (ng) {
517         /* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */
518         secp256k1_scalar_split_128(&ng_1, &ng_128, ng);
519 
520         /* Build wnaf representation for ng_1 and ng_128 */
521         bits_ng_1   = secp256k1_ecmult_wnaf(wnaf_ng_1,   129, &ng_1,   WINDOW_G);
522         bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, 129, &ng_128, WINDOW_G);
523         if (bits_ng_1 > bits) {
524             bits = bits_ng_1;
525         }
526         if (bits_ng_128 > bits) {
527             bits = bits_ng_128;
528         }
529     }
530 
531     secp256k1_gej_set_infinity(r);
532 
533     for (i = bits - 1; i >= 0; i--) {
534         int n;
535         secp256k1_gej_double_var(r, r, NULL);
536         for (np = 0; np < no; ++np) {
537             if (i < state->ps[np].bits_na_1 && (n = state->ps[np].wnaf_na_1[i])) {
538                 ECMULT_TABLE_GET_GE(&tmpa, state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A);
539                 secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
540             }
541             if (i < state->ps[np].bits_na_lam && (n = state->ps[np].wnaf_na_lam[i])) {
542                 ECMULT_TABLE_GET_GE(&tmpa, state->pre_a_lam + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A);
543                 secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
544             }
545         }
546         if (i < bits_ng_1 && (n = wnaf_ng_1[i])) {
547             ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
548             secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
549         }
550         if (i < bits_ng_128 && (n = wnaf_ng_128[i])) {
551             ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g_128, n, WINDOW_G);
552             secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
553         }
554     }
555 
556     if (!r->infinity) {
557         secp256k1_fe_mul(&r->z, &r->z, &Z);
558     }
559 }
560 
secp256k1_ecmult(const secp256k1_ecmult_context * ctx,secp256k1_gej * r,const secp256k1_gej * a,const secp256k1_scalar * na,const secp256k1_scalar * ng)561 static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) {
562     secp256k1_gej prej[ECMULT_TABLE_SIZE(WINDOW_A)];
563     secp256k1_fe zr[ECMULT_TABLE_SIZE(WINDOW_A)];
564     secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
565     struct secp256k1_strauss_point_state ps[1];
566     secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
567     struct secp256k1_strauss_state state;
568 
569     state.prej = prej;
570     state.zr = zr;
571     state.pre_a = pre_a;
572     state.pre_a_lam = pre_a_lam;
573     state.ps = ps;
574     secp256k1_ecmult_strauss_wnaf(ctx, &state, r, 1, a, na, ng);
575 }
576 
secp256k1_strauss_scratch_size(size_t n_points)577 static size_t secp256k1_strauss_scratch_size(size_t n_points) {
578     static const size_t point_size = (2 * sizeof(secp256k1_ge) + sizeof(secp256k1_gej) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar);
579     return n_points*point_size;
580 }
581 
secp256k1_ecmult_strauss_batch(const secp256k1_callback * error_callback,const secp256k1_ecmult_context * ctx,secp256k1_scratch * scratch,secp256k1_gej * r,const secp256k1_scalar * inp_g_sc,secp256k1_ecmult_multi_callback cb,void * cbdata,size_t n_points,size_t cb_offset)582 static int secp256k1_ecmult_strauss_batch(const secp256k1_callback* error_callback, const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) {
583     secp256k1_gej* points;
584     secp256k1_scalar* scalars;
585     struct secp256k1_strauss_state state;
586     size_t i;
587     const size_t scratch_checkpoint = secp256k1_scratch_checkpoint(error_callback, scratch);
588 
589     secp256k1_gej_set_infinity(r);
590     if (inp_g_sc == NULL && n_points == 0) {
591         return 1;
592     }
593 
594     points = (secp256k1_gej*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_gej));
595     scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_scalar));
596     state.prej = (secp256k1_gej*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_gej));
597     state.zr = (secp256k1_fe*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe));
598     state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(error_callback, scratch, n_points * 2 * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
599     state.pre_a_lam = state.pre_a + n_points * ECMULT_TABLE_SIZE(WINDOW_A);
600     state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(struct secp256k1_strauss_point_state));
601 
602     if (points == NULL || scalars == NULL || state.prej == NULL || state.zr == NULL || state.pre_a == NULL) {
603         secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
604         return 0;
605     }
606 
607     for (i = 0; i < n_points; i++) {
608         secp256k1_ge point;
609         if (!cb(&scalars[i], &point, i+cb_offset, cbdata)) {
610             secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
611             return 0;
612         }
613         secp256k1_gej_set_ge(&points[i], &point);
614     }
615     secp256k1_ecmult_strauss_wnaf(ctx, &state, r, n_points, points, scalars, inp_g_sc);
616     secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
617     return 1;
618 }
619 
620 /* Wrapper for secp256k1_ecmult_multi_func interface */
secp256k1_ecmult_strauss_batch_single(const secp256k1_callback * error_callback,const secp256k1_ecmult_context * actx,secp256k1_scratch * scratch,secp256k1_gej * r,const secp256k1_scalar * inp_g_sc,secp256k1_ecmult_multi_callback cb,void * cbdata,size_t n)621 static int secp256k1_ecmult_strauss_batch_single(const secp256k1_callback* error_callback, const secp256k1_ecmult_context *actx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) {
622     return secp256k1_ecmult_strauss_batch(error_callback, actx, scratch, r, inp_g_sc, cb, cbdata, n, 0);
623 }
624 
secp256k1_strauss_max_points(const secp256k1_callback * error_callback,secp256k1_scratch * scratch)625 static size_t secp256k1_strauss_max_points(const secp256k1_callback* error_callback, secp256k1_scratch *scratch) {
626     return secp256k1_scratch_max_allocation(error_callback, scratch, STRAUSS_SCRATCH_OBJECTS) / secp256k1_strauss_scratch_size(1);
627 }
628 
629 /** Convert a number to WNAF notation.
630  *  The number becomes represented by sum(2^{wi} * wnaf[i], i=0..WNAF_SIZE(w)+1) - return_val.
631  *  It has the following guarantees:
632  *  - each wnaf[i] is either 0 or an odd integer between -(1 << w) and (1 << w)
633  *  - the number of words set is always WNAF_SIZE(w)
634  *  - the returned skew is 0 or 1
635  */
secp256k1_wnaf_fixed(int * wnaf,const secp256k1_scalar * s,int w)636 static int secp256k1_wnaf_fixed(int *wnaf, const secp256k1_scalar *s, int w) {
637     int skew = 0;
638     int pos;
639     int max_pos;
640     int last_w;
641     const secp256k1_scalar *work = s;
642 
643     if (secp256k1_scalar_is_zero(s)) {
644         for (pos = 0; pos < WNAF_SIZE(w); pos++) {
645             wnaf[pos] = 0;
646         }
647         return 0;
648     }
649 
650     if (secp256k1_scalar_is_even(s)) {
651         skew = 1;
652     }
653 
654     wnaf[0] = secp256k1_scalar_get_bits_var(work, 0, w) + skew;
655     /* Compute last window size. Relevant when window size doesn't divide the
656      * number of bits in the scalar */
657     last_w = WNAF_BITS - (WNAF_SIZE(w) - 1) * w;
658 
659     /* Store the position of the first nonzero word in max_pos to allow
660      * skipping leading zeros when calculating the wnaf. */
661     for (pos = WNAF_SIZE(w) - 1; pos > 0; pos--) {
662         int val = secp256k1_scalar_get_bits_var(work, pos * w, pos == WNAF_SIZE(w)-1 ? last_w : w);
663         if(val != 0) {
664             break;
665         }
666         wnaf[pos] = 0;
667     }
668     max_pos = pos;
669     pos = 1;
670 
671     while (pos <= max_pos) {
672         int val = secp256k1_scalar_get_bits_var(work, pos * w, pos == WNAF_SIZE(w)-1 ? last_w : w);
673         if ((val & 1) == 0) {
674             wnaf[pos - 1] -= (1 << w);
675             wnaf[pos] = (val + 1);
676         } else {
677             wnaf[pos] = val;
678         }
679         /* Set a coefficient to zero if it is 1 or -1 and the proceeding digit
680          * is strictly negative or strictly positive respectively. Only change
681          * coefficients at previous positions because above code assumes that
682          * wnaf[pos - 1] is odd.
683          */
684         if (pos >= 2 && ((wnaf[pos - 1] == 1 && wnaf[pos - 2] < 0) || (wnaf[pos - 1] == -1 && wnaf[pos - 2] > 0))) {
685             if (wnaf[pos - 1] == 1) {
686                 wnaf[pos - 2] += 1 << w;
687             } else {
688                 wnaf[pos - 2] -= 1 << w;
689             }
690             wnaf[pos - 1] = 0;
691         }
692         ++pos;
693     }
694 
695     return skew;
696 }
697 
698 struct secp256k1_pippenger_point_state {
699     int skew_na;
700     size_t input_pos;
701 };
702 
703 struct secp256k1_pippenger_state {
704     int *wnaf_na;
705     struct secp256k1_pippenger_point_state* ps;
706 };
707 
708 /*
709  * pippenger_wnaf computes the result of a multi-point multiplication as
710  * follows: The scalars are brought into wnaf with n_wnaf elements each. Then
711  * for every i < n_wnaf, first each point is added to a "bucket" corresponding
712  * to the point's wnaf[i]. Second, the buckets are added together such that
713  * r += 1*bucket[0] + 3*bucket[1] + 5*bucket[2] + ...
714  */
secp256k1_ecmult_pippenger_wnaf(secp256k1_gej * buckets,int bucket_window,struct secp256k1_pippenger_state * state,secp256k1_gej * r,const secp256k1_scalar * sc,const secp256k1_ge * pt,size_t num)715 static int secp256k1_ecmult_pippenger_wnaf(secp256k1_gej *buckets, int bucket_window, struct secp256k1_pippenger_state *state, secp256k1_gej *r, const secp256k1_scalar *sc, const secp256k1_ge *pt, size_t num) {
716     size_t n_wnaf = WNAF_SIZE(bucket_window+1);
717     size_t np;
718     size_t no = 0;
719     int i;
720     int j;
721 
722     for (np = 0; np < num; ++np) {
723         if (secp256k1_scalar_is_zero(&sc[np]) || secp256k1_ge_is_infinity(&pt[np])) {
724             continue;
725         }
726         state->ps[no].input_pos = np;
727         state->ps[no].skew_na = secp256k1_wnaf_fixed(&state->wnaf_na[no*n_wnaf], &sc[np], bucket_window+1);
728         no++;
729     }
730     secp256k1_gej_set_infinity(r);
731 
732     if (no == 0) {
733         return 1;
734     }
735 
736     for (i = n_wnaf - 1; i >= 0; i--) {
737         secp256k1_gej running_sum;
738 
739         for(j = 0; j < ECMULT_TABLE_SIZE(bucket_window+2); j++) {
740             secp256k1_gej_set_infinity(&buckets[j]);
741         }
742 
743         for (np = 0; np < no; ++np) {
744             int n = state->wnaf_na[np*n_wnaf + i];
745             struct secp256k1_pippenger_point_state point_state = state->ps[np];
746             secp256k1_ge tmp;
747             int idx;
748 
749             if (i == 0) {
750                 /* correct for wnaf skew */
751                 int skew = point_state.skew_na;
752                 if (skew) {
753                     secp256k1_ge_neg(&tmp, &pt[point_state.input_pos]);
754                     secp256k1_gej_add_ge_var(&buckets[0], &buckets[0], &tmp, NULL);
755                 }
756             }
757             if (n > 0) {
758                 idx = (n - 1)/2;
759                 secp256k1_gej_add_ge_var(&buckets[idx], &buckets[idx], &pt[point_state.input_pos], NULL);
760             } else if (n < 0) {
761                 idx = -(n + 1)/2;
762                 secp256k1_ge_neg(&tmp, &pt[point_state.input_pos]);
763                 secp256k1_gej_add_ge_var(&buckets[idx], &buckets[idx], &tmp, NULL);
764             }
765         }
766 
767         for(j = 0; j < bucket_window; j++) {
768             secp256k1_gej_double_var(r, r, NULL);
769         }
770 
771         secp256k1_gej_set_infinity(&running_sum);
772         /* Accumulate the sum: bucket[0] + 3*bucket[1] + 5*bucket[2] + 7*bucket[3] + ...
773          *                   = bucket[0] +   bucket[1] +   bucket[2] +   bucket[3] + ...
774          *                   +         2 *  (bucket[1] + 2*bucket[2] + 3*bucket[3] + ...)
775          * using an intermediate running sum:
776          * running_sum = bucket[0] +   bucket[1] +   bucket[2] + ...
777          *
778          * The doubling is done implicitly by deferring the final window doubling (of 'r').
779          */
780         for(j = ECMULT_TABLE_SIZE(bucket_window+2) - 1; j > 0; j--) {
781             secp256k1_gej_add_var(&running_sum, &running_sum, &buckets[j], NULL);
782             secp256k1_gej_add_var(r, r, &running_sum, NULL);
783         }
784 
785         secp256k1_gej_add_var(&running_sum, &running_sum, &buckets[0], NULL);
786         secp256k1_gej_double_var(r, r, NULL);
787         secp256k1_gej_add_var(r, r, &running_sum, NULL);
788     }
789     return 1;
790 }
791 
792 /**
793  * Returns optimal bucket_window (number of bits of a scalar represented by a
794  * set of buckets) for a given number of points.
795  */
secp256k1_pippenger_bucket_window(size_t n)796 static int secp256k1_pippenger_bucket_window(size_t n) {
797     if (n <= 1) {
798         return 1;
799     } else if (n <= 4) {
800         return 2;
801     } else if (n <= 20) {
802         return 3;
803     } else if (n <= 57) {
804         return 4;
805     } else if (n <= 136) {
806         return 5;
807     } else if (n <= 235) {
808         return 6;
809     } else if (n <= 1260) {
810         return 7;
811     } else if (n <= 4420) {
812         return 9;
813     } else if (n <= 7880) {
814         return 10;
815     } else if (n <= 16050) {
816         return 11;
817     } else {
818         return PIPPENGER_MAX_BUCKET_WINDOW;
819     }
820 }
821 
822 /**
823  * Returns the maximum optimal number of points for a bucket_window.
824  */
secp256k1_pippenger_bucket_window_inv(int bucket_window)825 static size_t secp256k1_pippenger_bucket_window_inv(int bucket_window) {
826     switch(bucket_window) {
827         case 1: return 1;
828         case 2: return 4;
829         case 3: return 20;
830         case 4: return 57;
831         case 5: return 136;
832         case 6: return 235;
833         case 7: return 1260;
834         case 8: return 1260;
835         case 9: return 4420;
836         case 10: return 7880;
837         case 11: return 16050;
838         case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX;
839     }
840     return 0;
841 }
842 
843 
secp256k1_ecmult_endo_split(secp256k1_scalar * s1,secp256k1_scalar * s2,secp256k1_ge * p1,secp256k1_ge * p2)844 SECP256K1_INLINE static void secp256k1_ecmult_endo_split(secp256k1_scalar *s1, secp256k1_scalar *s2, secp256k1_ge *p1, secp256k1_ge *p2) {
845     secp256k1_scalar tmp = *s1;
846     secp256k1_scalar_split_lambda(s1, s2, &tmp);
847     secp256k1_ge_mul_lambda(p2, p1);
848 
849     if (secp256k1_scalar_is_high(s1)) {
850         secp256k1_scalar_negate(s1, s1);
851         secp256k1_ge_neg(p1, p1);
852     }
853     if (secp256k1_scalar_is_high(s2)) {
854         secp256k1_scalar_negate(s2, s2);
855         secp256k1_ge_neg(p2, p2);
856     }
857 }
858 
859 /**
860  * Returns the scratch size required for a given number of points (excluding
861  * base point G) without considering alignment.
862  */
secp256k1_pippenger_scratch_size(size_t n_points,int bucket_window)863 static size_t secp256k1_pippenger_scratch_size(size_t n_points, int bucket_window) {
864     size_t entries = 2*n_points + 2;
865     size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int);
866     return (sizeof(secp256k1_gej) << bucket_window) + sizeof(struct secp256k1_pippenger_state) + entries * entry_size;
867 }
868 
secp256k1_ecmult_pippenger_batch(const secp256k1_callback * error_callback,const secp256k1_ecmult_context * ctx,secp256k1_scratch * scratch,secp256k1_gej * r,const secp256k1_scalar * inp_g_sc,secp256k1_ecmult_multi_callback cb,void * cbdata,size_t n_points,size_t cb_offset)869 static int secp256k1_ecmult_pippenger_batch(const secp256k1_callback* error_callback, const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) {
870     const size_t scratch_checkpoint = secp256k1_scratch_checkpoint(error_callback, scratch);
871     /* Use 2(n+1) with the endomorphism, when calculating batch
872      * sizes. The reason for +1 is that we add the G scalar to the list of
873      * other scalars. */
874     size_t entries = 2*n_points + 2;
875     secp256k1_ge *points;
876     secp256k1_scalar *scalars;
877     secp256k1_gej *buckets;
878     struct secp256k1_pippenger_state *state_space;
879     size_t idx = 0;
880     size_t point_idx = 0;
881     int i, j;
882     int bucket_window;
883 
884     (void)ctx;
885     secp256k1_gej_set_infinity(r);
886     if (inp_g_sc == NULL && n_points == 0) {
887         return 1;
888     }
889 
890     bucket_window = secp256k1_pippenger_bucket_window(n_points);
891     points = (secp256k1_ge *) secp256k1_scratch_alloc(error_callback, scratch, entries * sizeof(*points));
892     scalars = (secp256k1_scalar *) secp256k1_scratch_alloc(error_callback, scratch, entries * sizeof(*scalars));
893     state_space = (struct secp256k1_pippenger_state *) secp256k1_scratch_alloc(error_callback, scratch, sizeof(*state_space));
894     if (points == NULL || scalars == NULL || state_space == NULL) {
895         secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
896         return 0;
897     }
898 
899     state_space->ps = (struct secp256k1_pippenger_point_state *) secp256k1_scratch_alloc(error_callback, scratch, entries * sizeof(*state_space->ps));
900     state_space->wnaf_na = (int *) secp256k1_scratch_alloc(error_callback, scratch, entries*(WNAF_SIZE(bucket_window+1)) * sizeof(int));
901     buckets = (secp256k1_gej *) secp256k1_scratch_alloc(error_callback, scratch, (1<<bucket_window) * sizeof(*buckets));
902     if (state_space->ps == NULL || state_space->wnaf_na == NULL || buckets == NULL) {
903         secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
904         return 0;
905     }
906 
907     if (inp_g_sc != NULL) {
908         scalars[0] = *inp_g_sc;
909         points[0] = secp256k1_ge_const_g;
910         idx++;
911         secp256k1_ecmult_endo_split(&scalars[0], &scalars[1], &points[0], &points[1]);
912         idx++;
913     }
914 
915     while (point_idx < n_points) {
916         if (!cb(&scalars[idx], &points[idx], point_idx + cb_offset, cbdata)) {
917             secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
918             return 0;
919         }
920         idx++;
921         secp256k1_ecmult_endo_split(&scalars[idx - 1], &scalars[idx], &points[idx - 1], &points[idx]);
922         idx++;
923         point_idx++;
924     }
925 
926     secp256k1_ecmult_pippenger_wnaf(buckets, bucket_window, state_space, r, scalars, points, idx);
927 
928     /* Clear data */
929     for(i = 0; (size_t)i < idx; i++) {
930         secp256k1_scalar_clear(&scalars[i]);
931         state_space->ps[i].skew_na = 0;
932         for(j = 0; j < WNAF_SIZE(bucket_window+1); j++) {
933             state_space->wnaf_na[i * WNAF_SIZE(bucket_window+1) + j] = 0;
934         }
935     }
936     for(i = 0; i < 1<<bucket_window; i++) {
937         secp256k1_gej_clear(&buckets[i]);
938     }
939     secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
940     return 1;
941 }
942 
943 /* Wrapper for secp256k1_ecmult_multi_func interface */
secp256k1_ecmult_pippenger_batch_single(const secp256k1_callback * error_callback,const secp256k1_ecmult_context * actx,secp256k1_scratch * scratch,secp256k1_gej * r,const secp256k1_scalar * inp_g_sc,secp256k1_ecmult_multi_callback cb,void * cbdata,size_t n)944 static int secp256k1_ecmult_pippenger_batch_single(const secp256k1_callback* error_callback, const secp256k1_ecmult_context *actx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) {
945     return secp256k1_ecmult_pippenger_batch(error_callback, actx, scratch, r, inp_g_sc, cb, cbdata, n, 0);
946 }
947 
948 /**
949  * Returns the maximum number of points in addition to G that can be used with
950  * a given scratch space. The function ensures that fewer points may also be
951  * used.
952  */
secp256k1_pippenger_max_points(const secp256k1_callback * error_callback,secp256k1_scratch * scratch)953 static size_t secp256k1_pippenger_max_points(const secp256k1_callback* error_callback, secp256k1_scratch *scratch) {
954     size_t max_alloc = secp256k1_scratch_max_allocation(error_callback, scratch, PIPPENGER_SCRATCH_OBJECTS);
955     int bucket_window;
956     size_t res = 0;
957 
958     for (bucket_window = 1; bucket_window <= PIPPENGER_MAX_BUCKET_WINDOW; bucket_window++) {
959         size_t n_points;
960         size_t max_points = secp256k1_pippenger_bucket_window_inv(bucket_window);
961         size_t space_for_points;
962         size_t space_overhead;
963         size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int);
964 
965         entry_size = 2*entry_size;
966         space_overhead = (sizeof(secp256k1_gej) << bucket_window) + entry_size + sizeof(struct secp256k1_pippenger_state);
967         if (space_overhead > max_alloc) {
968             break;
969         }
970         space_for_points = max_alloc - space_overhead;
971 
972         n_points = space_for_points/entry_size;
973         n_points = n_points > max_points ? max_points : n_points;
974         if (n_points > res) {
975             res = n_points;
976         }
977         if (n_points < max_points) {
978             /* A larger bucket_window may support even more points. But if we
979              * would choose that then the caller couldn't safely use any number
980              * smaller than what this function returns */
981             break;
982         }
983     }
984     return res;
985 }
986 
987 /* Computes ecmult_multi by simply multiplying and adding each point. Does not
988  * require a scratch space */
secp256k1_ecmult_multi_simple_var(const secp256k1_ecmult_context * ctx,secp256k1_gej * r,const secp256k1_scalar * inp_g_sc,secp256k1_ecmult_multi_callback cb,void * cbdata,size_t n_points)989 static int secp256k1_ecmult_multi_simple_var(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points) {
990     size_t point_idx;
991     secp256k1_scalar szero;
992     secp256k1_gej tmpj;
993 
994     secp256k1_scalar_set_int(&szero, 0);
995     secp256k1_gej_set_infinity(r);
996     secp256k1_gej_set_infinity(&tmpj);
997     /* r = inp_g_sc*G */
998     secp256k1_ecmult(ctx, r, &tmpj, &szero, inp_g_sc);
999     for (point_idx = 0; point_idx < n_points; point_idx++) {
1000         secp256k1_ge point;
1001         secp256k1_gej pointj;
1002         secp256k1_scalar scalar;
1003         if (!cb(&scalar, &point, point_idx, cbdata)) {
1004             return 0;
1005         }
1006         /* r += scalar*point */
1007         secp256k1_gej_set_ge(&pointj, &point);
1008         secp256k1_ecmult(ctx, &tmpj, &pointj, &scalar, NULL);
1009         secp256k1_gej_add_var(r, r, &tmpj, NULL);
1010     }
1011     return 1;
1012 }
1013 
1014 /* Compute the number of batches and the batch size given the maximum batch size and the
1015  * total number of points */
secp256k1_ecmult_multi_batch_size_helper(size_t * n_batches,size_t * n_batch_points,size_t max_n_batch_points,size_t n)1016 static int secp256k1_ecmult_multi_batch_size_helper(size_t *n_batches, size_t *n_batch_points, size_t max_n_batch_points, size_t n) {
1017     if (max_n_batch_points == 0) {
1018         return 0;
1019     }
1020     if (max_n_batch_points > ECMULT_MAX_POINTS_PER_BATCH) {
1021         max_n_batch_points = ECMULT_MAX_POINTS_PER_BATCH;
1022     }
1023     if (n == 0) {
1024         *n_batches = 0;
1025         *n_batch_points = 0;
1026         return 1;
1027     }
1028     /* Compute ceil(n/max_n_batch_points) and ceil(n/n_batches) */
1029     *n_batches = 1 + (n - 1) / max_n_batch_points;
1030     *n_batch_points = 1 + (n - 1) / *n_batches;
1031     return 1;
1032 }
1033 
1034 typedef int (*secp256k1_ecmult_multi_func)(const secp256k1_callback* error_callback, const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t);
secp256k1_ecmult_multi_var(const secp256k1_callback * error_callback,const secp256k1_ecmult_context * ctx,secp256k1_scratch * scratch,secp256k1_gej * r,const secp256k1_scalar * inp_g_sc,secp256k1_ecmult_multi_callback cb,void * cbdata,size_t n)1035 static int secp256k1_ecmult_multi_var(const secp256k1_callback* error_callback, const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) {
1036     size_t i;
1037 
1038     int (*f)(const secp256k1_callback* error_callback, const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t, size_t);
1039     size_t n_batches;
1040     size_t n_batch_points;
1041 
1042     secp256k1_gej_set_infinity(r);
1043     if (inp_g_sc == NULL && n == 0) {
1044         return 1;
1045     } else if (n == 0) {
1046         secp256k1_scalar szero;
1047         secp256k1_scalar_set_int(&szero, 0);
1048         secp256k1_ecmult(ctx, r, r, &szero, inp_g_sc);
1049         return 1;
1050     }
1051     if (scratch == NULL) {
1052         return secp256k1_ecmult_multi_simple_var(ctx, r, inp_g_sc, cb, cbdata, n);
1053     }
1054 
1055     /* Compute the batch sizes for Pippenger's algorithm given a scratch space. If it's greater than
1056      * a threshold use Pippenger's algorithm. Otherwise use Strauss' algorithm.
1057      * As a first step check if there's enough space for Pippenger's algo (which requires less space
1058      * than Strauss' algo) and if not, use the simple algorithm. */
1059     if (!secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, secp256k1_pippenger_max_points(error_callback, scratch), n)) {
1060         return secp256k1_ecmult_multi_simple_var(ctx, r, inp_g_sc, cb, cbdata, n);
1061     }
1062     if (n_batch_points >= ECMULT_PIPPENGER_THRESHOLD) {
1063         f = secp256k1_ecmult_pippenger_batch;
1064     } else {
1065         if (!secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, secp256k1_strauss_max_points(error_callback, scratch), n)) {
1066             return secp256k1_ecmult_multi_simple_var(ctx, r, inp_g_sc, cb, cbdata, n);
1067         }
1068         f = secp256k1_ecmult_strauss_batch;
1069     }
1070     for(i = 0; i < n_batches; i++) {
1071         size_t nbp = n < n_batch_points ? n : n_batch_points;
1072         size_t offset = n_batch_points*i;
1073         secp256k1_gej tmp;
1074         if (!f(error_callback, ctx, scratch, &tmp, i == 0 ? inp_g_sc : NULL, cb, cbdata, nbp, offset)) {
1075             return 0;
1076         }
1077         secp256k1_gej_add_var(r, r, &tmp, NULL);
1078         n -= nbp;
1079     }
1080     return 1;
1081 }
1082 
1083 #endif /* SECP256K1_ECMULT_IMPL_H */
1084