1 /*****************************************************************************
2  * Copyright (c) 2013, 2014, 2017 Pieter Wuille, Andrew Poelstra, Jonas Nick *
3  * Distributed under the MIT software license, see the accompanying          *
4  * file COPYING or http://www.opensource.org/licenses/mit-license.php.       *
5  *****************************************************************************/
6 
7 #ifndef SECP256K1_ECMULT_IMPL_H
8 #define SECP256K1_ECMULT_IMPL_H
9 
10 #include <string.h>
11 #include <stdint.h>
12 
13 #include "group.h"
14 #include "scalar.h"
15 #include "ecmult.h"
16 
17 #if defined(EXHAUSTIVE_TEST_ORDER)
18 /* We need to lower these values for exhaustive tests because
19  * the tables cannot have infinities in them (this breaks the
20  * affine-isomorphism stuff which tracks z-ratios) */
21 #  if EXHAUSTIVE_TEST_ORDER > 128
22 #    define WINDOW_A 5
23 #    define WINDOW_G 8
24 #  elif EXHAUSTIVE_TEST_ORDER > 8
25 #    define WINDOW_A 4
26 #    define WINDOW_G 4
27 #  else
28 #    define WINDOW_A 2
29 #    define WINDOW_G 2
30 #  endif
31 #else
32 /* optimal for 128-bit and 256-bit exponents. */
33 #define WINDOW_A 5
34 /** larger numbers may result in slightly better performance, at the cost of
35     exponentially larger precomputed tables. */
36 #ifdef USE_ENDOMORPHISM
37 /** Two tables for window size 15: 1.375 MiB. */
38 #define WINDOW_G 15
39 #else
40 /** One table for window size 16: 1.375 MiB. */
41 #define WINDOW_G 16
42 #endif
43 #endif
44 
45 #ifdef USE_ENDOMORPHISM
46     #define WNAF_BITS 128
47 #else
48     #define WNAF_BITS 256
49 #endif
50 #define WNAF_SIZE_BITS(bits, w) (((bits) + (w) - 1) / (w))
51 #define WNAF_SIZE(w) WNAF_SIZE_BITS(WNAF_BITS, w)
52 
53 /** The number of entries a table with precomputed multiples needs to have. */
54 #define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
55 
56 /* The number of objects allocated on the scratch space for ecmult_multi algorithms */
57 #define PIPPENGER_SCRATCH_OBJECTS 6
58 #define STRAUSS_SCRATCH_OBJECTS 6
59 
60 #define PIPPENGER_MAX_BUCKET_WINDOW 12
61 
62 /* Minimum number of points for which pippenger_wnaf is faster than strauss wnaf */
63 #ifdef USE_ENDOMORPHISM
64     #define ECMULT_PIPPENGER_THRESHOLD 88
65 #else
66     #define ECMULT_PIPPENGER_THRESHOLD 160
67 #endif
68 
69 #ifdef USE_ENDOMORPHISM
70     #define ECMULT_MAX_POINTS_PER_BATCH 5000000
71 #else
72     #define ECMULT_MAX_POINTS_PER_BATCH 10000000
73 #endif
74 
75 /** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain
76  *  the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will
77  *  contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z.
78  *  Prej's Z values are undefined, except for the last value.
79  */
secp256k1_ecmult_odd_multiples_table(int n,secp256k1_gej * prej,secp256k1_fe * zr,const secp256k1_gej * a)80 static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej *prej, secp256k1_fe *zr, const secp256k1_gej *a) {
81     secp256k1_gej d;
82     secp256k1_ge a_ge, d_ge;
83     int i;
84 
85     VERIFY_CHECK(!a->infinity);
86 
87     secp256k1_gej_double_var(&d, a, NULL);
88 
89     /*
90      * Perform the additions on an isomorphism where 'd' is affine: drop the z coordinate
91      * of 'd', and scale the 1P starting value's x/y coordinates without changing its z.
92      */
93     d_ge.x = d.x;
94     d_ge.y = d.y;
95     d_ge.infinity = 0;
96 
97     secp256k1_ge_set_gej_zinv(&a_ge, a, &d.z);
98     prej[0].x = a_ge.x;
99     prej[0].y = a_ge.y;
100     prej[0].z = a->z;
101     prej[0].infinity = 0;
102 
103     zr[0] = d.z;
104     for (i = 1; i < n; i++) {
105         secp256k1_gej_add_ge_var(&prej[i], &prej[i-1], &d_ge, &zr[i]);
106     }
107 
108     /*
109      * Each point in 'prej' has a z coordinate too small by a factor of 'd.z'. Only
110      * the final point's z coordinate is actually used though, so just update that.
111      */
112     secp256k1_fe_mul(&prej[n-1].z, &prej[n-1].z, &d.z);
113 }
114 
115 /** Fill a table 'pre' with precomputed odd multiples of a.
116  *
117  *  There are two versions of this function:
118  *  - secp256k1_ecmult_odd_multiples_table_globalz_windowa which brings its
119  *    resulting point set to a single constant Z denominator, stores the X and Y
120  *    coordinates as ge_storage points in pre, and stores the global Z in rz.
121  *    It only operates on tables sized for WINDOW_A wnaf multiples.
122  *  - secp256k1_ecmult_odd_multiples_table_storage_var, which converts its
123  *    resulting point set to actually affine points, and stores those in pre.
124  *    It operates on tables of any size, but uses heap-allocated temporaries.
125  *
126  *  To compute a*P + b*G, we compute a table for P using the first function,
127  *  and for G using the second (which requires an inverse, but it only needs to
128  *  happen once).
129  */
secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge * pre,secp256k1_fe * globalz,const secp256k1_gej * a)130 static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *pre, secp256k1_fe *globalz, const secp256k1_gej *a) {
131     secp256k1_gej prej[ECMULT_TABLE_SIZE(WINDOW_A)];
132     secp256k1_fe zr[ECMULT_TABLE_SIZE(WINDOW_A)];
133 
134     /* Compute the odd multiples in Jacobian form. */
135     secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), prej, zr, a);
136     /* Bring them to the same Z denominator. */
137     secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr);
138 }
139 
secp256k1_ecmult_odd_multiples_table_storage_var(const int n,secp256k1_ge_storage * pre,const secp256k1_gej * a)140 static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp256k1_ge_storage *pre, const secp256k1_gej *a) {
141     secp256k1_gej d;
142     secp256k1_ge d_ge, p_ge;
143     secp256k1_gej pj;
144     secp256k1_fe zi;
145     secp256k1_fe zr;
146     secp256k1_fe dx_over_dz_squared;
147     int i;
148 
149     VERIFY_CHECK(!a->infinity);
150 
151     secp256k1_gej_double_var(&d, a, NULL);
152 
153     /* First, we perform all the additions in an isomorphic curve obtained by multiplying
154      * all `z` coordinates by 1/`d.z`. In these coordinates `d` is affine so we can use
155      * `secp256k1_gej_add_ge_var` to perform the additions. For each addition, we store
156      * the resulting y-coordinate and the z-ratio, since we only have enough memory to
157      * store two field elements. These are sufficient to efficiently undo the isomorphism
158      * and recompute all the `x`s.
159      */
160     d_ge.x = d.x;
161     d_ge.y = d.y;
162     d_ge.infinity = 0;
163 
164     secp256k1_ge_set_gej_zinv(&p_ge, a, &d.z);
165     pj.x = p_ge.x;
166     pj.y = p_ge.y;
167     pj.z = a->z;
168     pj.infinity = 0;
169 
170     for (i = 0; i < (n - 1); i++) {
171         secp256k1_fe_normalize_var(&pj.y);
172         secp256k1_fe_to_storage(&pre[i].y, &pj.y);
173         secp256k1_gej_add_ge_var(&pj, &pj, &d_ge, &zr);
174         secp256k1_fe_normalize_var(&zr);
175         secp256k1_fe_to_storage(&pre[i].x, &zr);
176     }
177 
178     /* Invert d.z in the same batch, preserving pj.z so we can extract 1/d.z */
179     secp256k1_fe_mul(&zi, &pj.z, &d.z);
180     secp256k1_fe_inv_var(&zi, &zi);
181 
182     /* Directly set `pre[n - 1]` to `pj`, saving the inverted z-coordinate so
183      * that we can combine it with the saved z-ratios to compute the other zs
184      * without any more inversions. */
185     secp256k1_ge_set_gej_zinv(&p_ge, &pj, &zi);
186     secp256k1_ge_to_storage(&pre[n - 1], &p_ge);
187 
188     /* Compute the actual x-coordinate of D, which will be needed below. */
189     secp256k1_fe_mul(&d.z, &zi, &pj.z);  /* d.z = 1/d.z */
190     secp256k1_fe_sqr(&dx_over_dz_squared, &d.z);
191     secp256k1_fe_mul(&dx_over_dz_squared, &dx_over_dz_squared, &d.x);
192 
193     /* Going into the second loop, we have set `pre[n-1]` to its final affine
194      * form, but still need to set `pre[i]` for `i` in 0 through `n-2`. We
195      * have `zi = (p.z * d.z)^-1`, where
196      *
197      *     `p.z` is the z-coordinate of the point on the isomorphic curve
198      *           which was ultimately assigned to `pre[n-1]`.
199      *     `d.z` is the multiplier that must be applied to all z-coordinates
200      *           to move from our isomorphic curve back to secp256k1; so the
201      *           product `p.z * d.z` is the z-coordinate of the secp256k1
202      *           point assigned to `pre[n-1]`.
203      *
204      * All subsequent inverse-z-coordinates can be obtained by multiplying this
205      * factor by successive z-ratios, which is much more efficient than directly
206      * computing each one.
207      *
208      * Importantly, these inverse-zs will be coordinates of points on secp256k1,
209      * while our other stored values come from computations on the isomorphic
210      * curve. So in the below loop, we will take care not to actually use `zi`
211      * or any derived values until we're back on secp256k1.
212      */
213     i = n - 1;
214     while (i > 0) {
215         secp256k1_fe zi2, zi3;
216         const secp256k1_fe *rzr;
217         i--;
218 
219         secp256k1_ge_from_storage(&p_ge, &pre[i]);
220 
221         /* For each remaining point, we extract the z-ratio from the stored
222          * x-coordinate, compute its z^-1 from that, and compute the full
223          * point from that. */
224         rzr = &p_ge.x;
225         secp256k1_fe_mul(&zi, &zi, rzr);
226         secp256k1_fe_sqr(&zi2, &zi);
227         secp256k1_fe_mul(&zi3, &zi2, &zi);
228         /* To compute the actual x-coordinate, we use the stored z ratio and
229          * y-coordinate, which we obtained from `secp256k1_gej_add_ge_var`
230          * in the loop above, as well as the inverse of the square of its
231          * z-coordinate. We store the latter in the `zi2` variable, which is
232          * computed iteratively starting from the overall Z inverse then
233          * multiplying by each z-ratio in turn.
234          *
235          * Denoting the z-ratio as `rzr`, we observe that it is equal to `h`
236          * from the inside of the above `gej_add_ge_var` call. This satisfies
237          *
238          *    rzr = d_x * z^2 - x * d_z^2
239          *
240          * where (`d_x`, `d_z`) are Jacobian coordinates of `D` and `(x, z)`
241          * are Jacobian coordinates of our desired point -- except both are on
242          * the isomorphic curve that we were using when we called `gej_add_ge_var`.
243          * To get back to secp256k1, we must multiply both `z`s by `d_z`, or
244          * equivalently divide both `x`s by `d_z^2`. Our equation then becomes
245          *
246          *    rzr = d_x * z^2 / d_z^2 - x
247          *
248          * (The left-hand-side, being a ratio of z-coordinates, is unaffected
249          * by the isomorphism.)
250          *
251          * Rearranging to solve for `x`, we have
252          *
253          *     x = d_x * z^2 / d_z^2 - rzr
254          *
255          * But what we actually want is the affine coordinate `X = x/z^2`,
256          * which will satisfy
257          *
258          *     X = d_x / d_z^2 - rzr / z^2
259          *       = dx_over_dz_squared - rzr * zi2
260          */
261         secp256k1_fe_mul(&p_ge.x, rzr, &zi2);
262         secp256k1_fe_negate(&p_ge.x, &p_ge.x, 1);
263         secp256k1_fe_add(&p_ge.x, &dx_over_dz_squared);
264         /* y is stored_y/z^3, as we expect */
265         secp256k1_fe_mul(&p_ge.y, &p_ge.y, &zi3);
266         /* Store */
267         secp256k1_ge_to_storage(&pre[i], &p_ge);
268     }
269 }
270 
271 /** The following two macro retrieves a particular odd multiple from a table
272  *  of precomputed multiples. */
273 #define ECMULT_TABLE_GET_GE(r,pre,n,w) do { \
274     VERIFY_CHECK(((n) & 1) == 1); \
275     VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
276     VERIFY_CHECK((n) <=  ((1 << ((w)-1)) - 1)); \
277     if ((n) > 0) { \
278         *(r) = (pre)[((n)-1)/2]; \
279     } else { \
280         *(r) = (pre)[(-(n)-1)/2]; \
281         secp256k1_fe_negate(&((r)->y), &((r)->y), 1); \
282     } \
283 } while(0)
284 
285 #define ECMULT_TABLE_GET_GE_STORAGE(r,pre,n,w) do { \
286     VERIFY_CHECK(((n) & 1) == 1); \
287     VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
288     VERIFY_CHECK((n) <=  ((1 << ((w)-1)) - 1)); \
289     if ((n) > 0) { \
290         secp256k1_ge_from_storage((r), &(pre)[((n)-1)/2]); \
291     } else { \
292         secp256k1_ge_from_storage((r), &(pre)[(-(n)-1)/2]); \
293         secp256k1_fe_negate(&((r)->y), &((r)->y), 1); \
294     } \
295 } while(0)
296 
secp256k1_ecmult_context_init(secp256k1_ecmult_context * ctx)297 static void secp256k1_ecmult_context_init(secp256k1_ecmult_context *ctx) {
298     ctx->pre_g = NULL;
299 #ifdef USE_ENDOMORPHISM
300     ctx->pre_g_128 = NULL;
301 #endif
302 }
303 
secp256k1_ecmult_context_build(secp256k1_ecmult_context * ctx,const secp256k1_callback * cb)304 static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, const secp256k1_callback *cb) {
305     secp256k1_gej gj;
306 
307     if (ctx->pre_g != NULL) {
308         return;
309     }
310 
311     /* get the generator */
312     secp256k1_gej_set_ge(&gj, &secp256k1_ge_const_g);
313 
314     ctx->pre_g = (secp256k1_ge_storage (*)[])checked_malloc(cb, sizeof((*ctx->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G));
315 
316     /* precompute the tables with odd multiples */
317     secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj);
318 
319 #ifdef USE_ENDOMORPHISM
320     {
321         secp256k1_gej g_128j;
322         int i;
323 
324         ctx->pre_g_128 = (secp256k1_ge_storage (*)[])checked_malloc(cb, sizeof((*ctx->pre_g_128)[0]) * ECMULT_TABLE_SIZE(WINDOW_G));
325 
326         /* calculate 2^128*generator */
327         g_128j = gj;
328         for (i = 0; i < 128; i++) {
329             secp256k1_gej_double_var(&g_128j, &g_128j, NULL);
330         }
331         secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j);
332     }
333 #endif
334 }
335 
secp256k1_ecmult_context_clone(secp256k1_ecmult_context * dst,const secp256k1_ecmult_context * src,const secp256k1_callback * cb)336 static void secp256k1_ecmult_context_clone(secp256k1_ecmult_context *dst,
337                                            const secp256k1_ecmult_context *src, const secp256k1_callback *cb) {
338     if (src->pre_g == NULL) {
339         dst->pre_g = NULL;
340     } else {
341         size_t size = sizeof((*dst->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G);
342         dst->pre_g = (secp256k1_ge_storage (*)[])checked_malloc(cb, size);
343         memcpy(dst->pre_g, src->pre_g, size);
344     }
345 #ifdef USE_ENDOMORPHISM
346     if (src->pre_g_128 == NULL) {
347         dst->pre_g_128 = NULL;
348     } else {
349         size_t size = sizeof((*dst->pre_g_128)[0]) * ECMULT_TABLE_SIZE(WINDOW_G);
350         dst->pre_g_128 = (secp256k1_ge_storage (*)[])checked_malloc(cb, size);
351         memcpy(dst->pre_g_128, src->pre_g_128, size);
352     }
353 #endif
354 }
355 
secp256k1_ecmult_context_is_built(const secp256k1_ecmult_context * ctx)356 static int secp256k1_ecmult_context_is_built(const secp256k1_ecmult_context *ctx) {
357     return ctx->pre_g != NULL;
358 }
359 
secp256k1_ecmult_context_clear(secp256k1_ecmult_context * ctx)360 static void secp256k1_ecmult_context_clear(secp256k1_ecmult_context *ctx) {
361     free(ctx->pre_g);
362 #ifdef USE_ENDOMORPHISM
363     free(ctx->pre_g_128);
364 #endif
365     secp256k1_ecmult_context_init(ctx);
366 }
367 
368 /** Convert a number to WNAF notation. The number becomes represented by sum(2^i * wnaf[i], i=0..bits),
369  *  with the following guarantees:
370  *  - each wnaf[i] is either 0, or an odd integer between -(1<<(w-1) - 1) and (1<<(w-1) - 1)
371  *  - two non-zero entries in wnaf are separated by at least w-1 zeroes.
372  *  - the number of set values in wnaf is returned. This number is at most 256, and at most one more
373  *    than the number of bits in the (absolute value) of the input.
374  */
secp256k1_ecmult_wnaf(int * wnaf,int len,const secp256k1_scalar * a,int w)375 static int secp256k1_ecmult_wnaf(int *wnaf, int len, const secp256k1_scalar *a, int w) {
376     secp256k1_scalar s = *a;
377     int last_set_bit = -1;
378     int bit = 0;
379     int sign = 1;
380     int carry = 0;
381 
382     VERIFY_CHECK(wnaf != NULL);
383     VERIFY_CHECK(0 <= len && len <= 256);
384     VERIFY_CHECK(a != NULL);
385     VERIFY_CHECK(2 <= w && w <= 31);
386 
387     memset(wnaf, 0, len * sizeof(wnaf[0]));
388 
389     if (secp256k1_scalar_get_bits(&s, 255, 1)) {
390         secp256k1_scalar_negate(&s, &s);
391         sign = -1;
392     }
393 
394     while (bit < len) {
395         int now;
396         int word;
397         if (secp256k1_scalar_get_bits(&s, bit, 1) == (unsigned int)carry) {
398             bit++;
399             continue;
400         }
401 
402         now = w;
403         if (now > len - bit) {
404             now = len - bit;
405         }
406 
407         word = secp256k1_scalar_get_bits_var(&s, bit, now) + carry;
408 
409         carry = (word >> (w-1)) & 1;
410         word -= carry << w;
411 
412         wnaf[bit] = sign * word;
413         last_set_bit = bit;
414 
415         bit += now;
416     }
417 #ifdef VERIFY
418     CHECK(carry == 0);
419     while (bit < 256) {
420         CHECK(secp256k1_scalar_get_bits(&s, bit++, 1) == 0);
421     }
422 #endif
423     return last_set_bit + 1;
424 }
425 
426 struct secp256k1_strauss_point_state {
427 #ifdef USE_ENDOMORPHISM
428     secp256k1_scalar na_1, na_lam;
429     int wnaf_na_1[130];
430     int wnaf_na_lam[130];
431     int bits_na_1;
432     int bits_na_lam;
433 #else
434     int wnaf_na[256];
435     int bits_na;
436 #endif
437     size_t input_pos;
438 };
439 
440 struct secp256k1_strauss_state {
441     secp256k1_gej* prej;
442     secp256k1_fe* zr;
443     secp256k1_ge* pre_a;
444 #ifdef USE_ENDOMORPHISM
445     secp256k1_ge* pre_a_lam;
446 #endif
447     struct secp256k1_strauss_point_state* ps;
448 };
449 
secp256k1_ecmult_strauss_wnaf(const secp256k1_ecmult_context * ctx,const struct secp256k1_strauss_state * state,secp256k1_gej * r,int num,const secp256k1_gej * a,const secp256k1_scalar * na,const secp256k1_scalar * ng)450 static void secp256k1_ecmult_strauss_wnaf(const secp256k1_ecmult_context *ctx, const struct secp256k1_strauss_state *state, secp256k1_gej *r, int num, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) {
451     secp256k1_ge tmpa;
452     secp256k1_fe Z;
453 #ifdef USE_ENDOMORPHISM
454     /* Splitted G factors. */
455     secp256k1_scalar ng_1, ng_128;
456     int wnaf_ng_1[129];
457     int bits_ng_1 = 0;
458     int wnaf_ng_128[129];
459     int bits_ng_128 = 0;
460 #else
461     int wnaf_ng[256];
462     int bits_ng = 0;
463 #endif
464     int i;
465     int bits = 0;
466     int np;
467     int no = 0;
468 
469     for (np = 0; np < num; ++np) {
470         if (secp256k1_scalar_is_zero(&na[np]) || secp256k1_gej_is_infinity(&a[np])) {
471             continue;
472         }
473         state->ps[no].input_pos = np;
474 #ifdef USE_ENDOMORPHISM
475         /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
476         secp256k1_scalar_split_lambda(&state->ps[no].na_1, &state->ps[no].na_lam, &na[np]);
477 
478         /* build wnaf representation for na_1 and na_lam. */
479         state->ps[no].bits_na_1   = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_1,   130, &state->ps[no].na_1,   WINDOW_A);
480         state->ps[no].bits_na_lam = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_lam, 130, &state->ps[no].na_lam, WINDOW_A);
481         VERIFY_CHECK(state->ps[no].bits_na_1 <= 130);
482         VERIFY_CHECK(state->ps[no].bits_na_lam <= 130);
483         if (state->ps[no].bits_na_1 > bits) {
484             bits = state->ps[no].bits_na_1;
485         }
486         if (state->ps[no].bits_na_lam > bits) {
487             bits = state->ps[no].bits_na_lam;
488         }
489 #else
490         /* build wnaf representation for na. */
491         state->ps[no].bits_na     = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na,     256, &na[np],      WINDOW_A);
492         if (state->ps[no].bits_na > bits) {
493             bits = state->ps[no].bits_na;
494         }
495 #endif
496         ++no;
497     }
498 
499     /* Calculate odd multiples of a.
500      * All multiples are brought to the same Z 'denominator', which is stored
501      * in Z. Due to secp256k1' isomorphism we can do all operations pretending
502      * that the Z coordinate was 1, use affine addition formulae, and correct
503      * the Z coordinate of the result once at the end.
504      * The exception is the precomputed G table points, which are actually
505      * affine. Compared to the base used for other points, they have a Z ratio
506      * of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same
507      * isomorphism to efficiently add with a known Z inverse.
508      */
509     if (no > 0) {
510         /* Compute the odd multiples in Jacobian form. */
511         secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej, state->zr, &a[state->ps[0].input_pos]);
512         for (np = 1; np < no; ++np) {
513             secp256k1_gej tmp = a[state->ps[np].input_pos];
514 #ifdef VERIFY
515             secp256k1_fe_normalize_var(&(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z));
516 #endif
517             secp256k1_gej_rescale(&tmp, &(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z));
518             secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &tmp);
519             secp256k1_fe_mul(state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &(a[state->ps[np].input_pos].z));
520         }
521         /* Bring them to the same Z denominator. */
522         secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, &Z, state->prej, state->zr);
523     } else {
524         secp256k1_fe_set_int(&Z, 1);
525     }
526 
527 #ifdef USE_ENDOMORPHISM
528     for (np = 0; np < no; ++np) {
529         for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
530             secp256k1_ge_mul_lambda(&state->pre_a_lam[np * ECMULT_TABLE_SIZE(WINDOW_A) + i], &state->pre_a[np * ECMULT_TABLE_SIZE(WINDOW_A) + i]);
531         }
532     }
533 
534     if (ng) {
535         /* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */
536         secp256k1_scalar_split_128(&ng_1, &ng_128, ng);
537 
538         /* Build wnaf representation for ng_1 and ng_128 */
539         bits_ng_1   = secp256k1_ecmult_wnaf(wnaf_ng_1,   129, &ng_1,   WINDOW_G);
540         bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, 129, &ng_128, WINDOW_G);
541         if (bits_ng_1 > bits) {
542             bits = bits_ng_1;
543         }
544         if (bits_ng_128 > bits) {
545             bits = bits_ng_128;
546         }
547     }
548 #else
549     if (ng) {
550         bits_ng     = secp256k1_ecmult_wnaf(wnaf_ng,     256, ng,      WINDOW_G);
551         if (bits_ng > bits) {
552             bits = bits_ng;
553         }
554     }
555 #endif
556 
557     secp256k1_gej_set_infinity(r);
558 
559     for (i = bits - 1; i >= 0; i--) {
560         int n;
561         secp256k1_gej_double_var(r, r, NULL);
562 #ifdef USE_ENDOMORPHISM
563         for (np = 0; np < no; ++np) {
564             if (i < state->ps[np].bits_na_1 && (n = state->ps[np].wnaf_na_1[i])) {
565                 ECMULT_TABLE_GET_GE(&tmpa, state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A);
566                 secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
567             }
568             if (i < state->ps[np].bits_na_lam && (n = state->ps[np].wnaf_na_lam[i])) {
569                 ECMULT_TABLE_GET_GE(&tmpa, state->pre_a_lam + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A);
570                 secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
571             }
572         }
573         if (i < bits_ng_1 && (n = wnaf_ng_1[i])) {
574             ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
575             secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
576         }
577         if (i < bits_ng_128 && (n = wnaf_ng_128[i])) {
578             ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g_128, n, WINDOW_G);
579             secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
580         }
581 #else
582         for (np = 0; np < no; ++np) {
583             if (i < state->ps[np].bits_na && (n = state->ps[np].wnaf_na[i])) {
584                 ECMULT_TABLE_GET_GE(&tmpa, state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A);
585                 secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
586             }
587         }
588         if (i < bits_ng && (n = wnaf_ng[i])) {
589             ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
590             secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
591         }
592 #endif
593     }
594 
595     if (!r->infinity) {
596         secp256k1_fe_mul(&r->z, &r->z, &Z);
597     }
598 }
599 
secp256k1_ecmult(const secp256k1_ecmult_context * ctx,secp256k1_gej * r,const secp256k1_gej * a,const secp256k1_scalar * na,const secp256k1_scalar * ng)600 static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) {
601     secp256k1_gej prej[ECMULT_TABLE_SIZE(WINDOW_A)];
602     secp256k1_fe zr[ECMULT_TABLE_SIZE(WINDOW_A)];
603     secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
604     struct secp256k1_strauss_point_state ps[1];
605 #ifdef USE_ENDOMORPHISM
606     secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
607 #endif
608     struct secp256k1_strauss_state state;
609 
610     state.prej = prej;
611     state.zr = zr;
612     state.pre_a = pre_a;
613 #ifdef USE_ENDOMORPHISM
614     state.pre_a_lam = pre_a_lam;
615 #endif
616     state.ps = ps;
617     secp256k1_ecmult_strauss_wnaf(ctx, &state, r, 1, a, na, ng);
618 }
619 
secp256k1_strauss_scratch_size(size_t n_points)620 static size_t secp256k1_strauss_scratch_size(size_t n_points) {
621 #ifdef USE_ENDOMORPHISM
622     static const size_t point_size = (2 * sizeof(secp256k1_ge) + sizeof(secp256k1_gej) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar);
623 #else
624     static const size_t point_size = (sizeof(secp256k1_ge) + sizeof(secp256k1_gej) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar);
625 #endif
626     return n_points*point_size;
627 }
628 
secp256k1_ecmult_strauss_batch(const secp256k1_ecmult_context * ctx,secp256k1_scratch * scratch,secp256k1_gej * r,const secp256k1_scalar * inp_g_sc,secp256k1_ecmult_multi_callback cb,void * cbdata,size_t n_points,size_t cb_offset)629 static int secp256k1_ecmult_strauss_batch(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) {
630     secp256k1_gej* points;
631     secp256k1_scalar* scalars;
632     struct secp256k1_strauss_state state;
633     size_t i;
634 
635     secp256k1_gej_set_infinity(r);
636     if (inp_g_sc == NULL && n_points == 0) {
637         return 1;
638     }
639 
640     if (!secp256k1_scratch_allocate_frame(scratch, secp256k1_strauss_scratch_size(n_points), STRAUSS_SCRATCH_OBJECTS)) {
641         return 0;
642     }
643     points = (secp256k1_gej*)secp256k1_scratch_alloc(scratch, n_points * sizeof(secp256k1_gej));
644     scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(scratch, n_points * sizeof(secp256k1_scalar));
645     state.prej = (secp256k1_gej*)secp256k1_scratch_alloc(scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_gej));
646     state.zr = (secp256k1_fe*)secp256k1_scratch_alloc(scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe));
647 #ifdef USE_ENDOMORPHISM
648     state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(scratch, n_points * 2 * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
649     state.pre_a_lam = state.pre_a + n_points * ECMULT_TABLE_SIZE(WINDOW_A);
650 #else
651     state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
652 #endif
653     state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(scratch, n_points * sizeof(struct secp256k1_strauss_point_state));
654 
655     for (i = 0; i < n_points; i++) {
656         secp256k1_ge point;
657         if (!cb(&scalars[i], &point, i+cb_offset, cbdata)) {
658             secp256k1_scratch_deallocate_frame(scratch);
659             return 0;
660         }
661         secp256k1_gej_set_ge(&points[i], &point);
662     }
663     secp256k1_ecmult_strauss_wnaf(ctx, &state, r, n_points, points, scalars, inp_g_sc);
664     secp256k1_scratch_deallocate_frame(scratch);
665     return 1;
666 }
667 
668 /* Wrapper for secp256k1_ecmult_multi_func interface */
secp256k1_ecmult_strauss_batch_single(const secp256k1_ecmult_context * actx,secp256k1_scratch * scratch,secp256k1_gej * r,const secp256k1_scalar * inp_g_sc,secp256k1_ecmult_multi_callback cb,void * cbdata,size_t n)669 static int secp256k1_ecmult_strauss_batch_single(const secp256k1_ecmult_context *actx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) {
670     return secp256k1_ecmult_strauss_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0);
671 }
672 
secp256k1_strauss_max_points(secp256k1_scratch * scratch)673 static size_t secp256k1_strauss_max_points(secp256k1_scratch *scratch) {
674     return secp256k1_scratch_max_allocation(scratch, STRAUSS_SCRATCH_OBJECTS) / secp256k1_strauss_scratch_size(1);
675 }
676 
677 /** Convert a number to WNAF notation.
678  *  The number becomes represented by sum(2^{wi} * wnaf[i], i=0..WNAF_SIZE(w)+1) - return_val.
679  *  It has the following guarantees:
680  *  - each wnaf[i] is either 0 or an odd integer between -(1 << w) and (1 << w)
681  *  - the number of words set is always WNAF_SIZE(w)
682  *  - the returned skew is 0 or 1
683  */
secp256k1_wnaf_fixed(int * wnaf,const secp256k1_scalar * s,int w)684 static int secp256k1_wnaf_fixed(int *wnaf, const secp256k1_scalar *s, int w) {
685     int skew = 0;
686     int pos;
687     int max_pos;
688     int last_w;
689     const secp256k1_scalar *work = s;
690 
691     if (secp256k1_scalar_is_zero(s)) {
692         for (pos = 0; pos < WNAF_SIZE(w); pos++) {
693             wnaf[pos] = 0;
694         }
695         return 0;
696     }
697 
698     if (secp256k1_scalar_is_even(s)) {
699         skew = 1;
700     }
701 
702     wnaf[0] = secp256k1_scalar_get_bits_var(work, 0, w) + skew;
703     /* Compute last window size. Relevant when window size doesn't divide the
704      * number of bits in the scalar */
705     last_w = WNAF_BITS - (WNAF_SIZE(w) - 1) * w;
706 
707     /* Store the position of the first nonzero word in max_pos to allow
708      * skipping leading zeros when calculating the wnaf. */
709     for (pos = WNAF_SIZE(w) - 1; pos > 0; pos--) {
710         int val = secp256k1_scalar_get_bits_var(work, pos * w, pos == WNAF_SIZE(w)-1 ? last_w : w);
711         if(val != 0) {
712             break;
713         }
714         wnaf[pos] = 0;
715     }
716     max_pos = pos;
717     pos = 1;
718 
719     while (pos <= max_pos) {
720         int val = secp256k1_scalar_get_bits_var(work, pos * w, pos == WNAF_SIZE(w)-1 ? last_w : w);
721         if ((val & 1) == 0) {
722             wnaf[pos - 1] -= (1 << w);
723             wnaf[pos] = (val + 1);
724         } else {
725             wnaf[pos] = val;
726         }
727         /* Set a coefficient to zero if it is 1 or -1 and the proceeding digit
728          * is strictly negative or strictly positive respectively. Only change
729          * coefficients at previous positions because above code assumes that
730          * wnaf[pos - 1] is odd.
731          */
732         if (pos >= 2 && ((wnaf[pos - 1] == 1 && wnaf[pos - 2] < 0) || (wnaf[pos - 1] == -1 && wnaf[pos - 2] > 0))) {
733             if (wnaf[pos - 1] == 1) {
734                 wnaf[pos - 2] += 1 << w;
735             } else {
736                 wnaf[pos - 2] -= 1 << w;
737             }
738             wnaf[pos - 1] = 0;
739         }
740         ++pos;
741     }
742 
743     return skew;
744 }
745 
746 struct secp256k1_pippenger_point_state {
747     int skew_na;
748     size_t input_pos;
749 };
750 
751 struct secp256k1_pippenger_state {
752     int *wnaf_na;
753     struct secp256k1_pippenger_point_state* ps;
754 };
755 
756 /*
757  * pippenger_wnaf computes the result of a multi-point multiplication as
758  * follows: The scalars are brought into wnaf with n_wnaf elements each. Then
759  * for every i < n_wnaf, first each point is added to a "bucket" corresponding
760  * to the point's wnaf[i]. Second, the buckets are added together such that
761  * r += 1*bucket[0] + 3*bucket[1] + 5*bucket[2] + ...
762  */
secp256k1_ecmult_pippenger_wnaf(secp256k1_gej * buckets,int bucket_window,struct secp256k1_pippenger_state * state,secp256k1_gej * r,const secp256k1_scalar * sc,const secp256k1_ge * pt,size_t num)763 static int secp256k1_ecmult_pippenger_wnaf(secp256k1_gej *buckets, int bucket_window, struct secp256k1_pippenger_state *state, secp256k1_gej *r, const secp256k1_scalar *sc, const secp256k1_ge *pt, size_t num) {
764     size_t n_wnaf = WNAF_SIZE(bucket_window+1);
765     size_t np;
766     size_t no = 0;
767     int i;
768     int j;
769 
770     for (np = 0; np < num; ++np) {
771         if (secp256k1_scalar_is_zero(&sc[np]) || secp256k1_ge_is_infinity(&pt[np])) {
772             continue;
773         }
774         state->ps[no].input_pos = np;
775         state->ps[no].skew_na = secp256k1_wnaf_fixed(&state->wnaf_na[no*n_wnaf], &sc[np], bucket_window+1);
776         no++;
777     }
778     secp256k1_gej_set_infinity(r);
779 
780     if (no == 0) {
781         return 1;
782     }
783 
784     for (i = n_wnaf - 1; i >= 0; i--) {
785         secp256k1_gej running_sum;
786 
787         for(j = 0; j < ECMULT_TABLE_SIZE(bucket_window+2); j++) {
788             secp256k1_gej_set_infinity(&buckets[j]);
789         }
790 
791         for (np = 0; np < no; ++np) {
792             int n = state->wnaf_na[np*n_wnaf + i];
793             struct secp256k1_pippenger_point_state point_state = state->ps[np];
794             secp256k1_ge tmp;
795             int idx;
796 
797             if (i == 0) {
798                 /* correct for wnaf skew */
799                 int skew = point_state.skew_na;
800                 if (skew) {
801                     secp256k1_ge_neg(&tmp, &pt[point_state.input_pos]);
802                     secp256k1_gej_add_ge_var(&buckets[0], &buckets[0], &tmp, NULL);
803                 }
804             }
805             if (n > 0) {
806                 idx = (n - 1)/2;
807                 secp256k1_gej_add_ge_var(&buckets[idx], &buckets[idx], &pt[point_state.input_pos], NULL);
808             } else if (n < 0) {
809                 idx = -(n + 1)/2;
810                 secp256k1_ge_neg(&tmp, &pt[point_state.input_pos]);
811                 secp256k1_gej_add_ge_var(&buckets[idx], &buckets[idx], &tmp, NULL);
812             }
813         }
814 
815         for(j = 0; j < bucket_window; j++) {
816             secp256k1_gej_double_var(r, r, NULL);
817         }
818 
819         secp256k1_gej_set_infinity(&running_sum);
820         /* Accumulate the sum: bucket[0] + 3*bucket[1] + 5*bucket[2] + 7*bucket[3] + ...
821          *                   = bucket[0] +   bucket[1] +   bucket[2] +   bucket[3] + ...
822          *                   +         2 *  (bucket[1] + 2*bucket[2] + 3*bucket[3] + ...)
823          * using an intermediate running sum:
824          * running_sum = bucket[0] +   bucket[1] +   bucket[2] + ...
825          *
826          * The doubling is done implicitly by deferring the final window doubling (of 'r').
827          */
828         for(j = ECMULT_TABLE_SIZE(bucket_window+2) - 1; j > 0; j--) {
829             secp256k1_gej_add_var(&running_sum, &running_sum, &buckets[j], NULL);
830             secp256k1_gej_add_var(r, r, &running_sum, NULL);
831         }
832 
833         secp256k1_gej_add_var(&running_sum, &running_sum, &buckets[0], NULL);
834         secp256k1_gej_double_var(r, r, NULL);
835         secp256k1_gej_add_var(r, r, &running_sum, NULL);
836     }
837     return 1;
838 }
839 
840 /**
841  * Returns optimal bucket_window (number of bits of a scalar represented by a
842  * set of buckets) for a given number of points.
843  */
secp256k1_pippenger_bucket_window(size_t n)844 static int secp256k1_pippenger_bucket_window(size_t n) {
845 #ifdef USE_ENDOMORPHISM
846     if (n <= 1) {
847         return 1;
848     } else if (n <= 4) {
849         return 2;
850     } else if (n <= 20) {
851         return 3;
852     } else if (n <= 57) {
853         return 4;
854     } else if (n <= 136) {
855         return 5;
856     } else if (n <= 235) {
857         return 6;
858     } else if (n <= 1260) {
859         return 7;
860     } else if (n <= 4420) {
861         return 9;
862     } else if (n <= 7880) {
863         return 10;
864     } else if (n <= 16050) {
865         return 11;
866     } else {
867         return PIPPENGER_MAX_BUCKET_WINDOW;
868     }
869 #else
870     if (n <= 1) {
871         return 1;
872     } else if (n <= 11) {
873         return 2;
874     } else if (n <= 45) {
875         return 3;
876     } else if (n <= 100) {
877         return 4;
878     } else if (n <= 275) {
879         return 5;
880     } else if (n <= 625) {
881         return 6;
882     } else if (n <= 1850) {
883         return 7;
884     } else if (n <= 3400) {
885         return 8;
886     } else if (n <= 9630) {
887         return 9;
888     } else if (n <= 17900) {
889         return 10;
890     } else if (n <= 32800) {
891         return 11;
892     } else {
893         return PIPPENGER_MAX_BUCKET_WINDOW;
894     }
895 #endif
896 }
897 
898 /**
899  * Returns the maximum optimal number of points for a bucket_window.
900  */
secp256k1_pippenger_bucket_window_inv(int bucket_window)901 static size_t secp256k1_pippenger_bucket_window_inv(int bucket_window) {
902     switch(bucket_window) {
903 #ifdef USE_ENDOMORPHISM
904         case 1: return 1;
905         case 2: return 4;
906         case 3: return 20;
907         case 4: return 57;
908         case 5: return 136;
909         case 6: return 235;
910         case 7: return 1260;
911         case 8: return 1260;
912         case 9: return 4420;
913         case 10: return 7880;
914         case 11: return 16050;
915         case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX;
916 #else
917         case 1: return 1;
918         case 2: return 11;
919         case 3: return 45;
920         case 4: return 100;
921         case 5: return 275;
922         case 6: return 625;
923         case 7: return 1850;
924         case 8: return 3400;
925         case 9: return 9630;
926         case 10: return 17900;
927         case 11: return 32800;
928         case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX;
929 #endif
930     }
931     return 0;
932 }
933 
934 
935 #ifdef USE_ENDOMORPHISM
secp256k1_ecmult_endo_split(secp256k1_scalar * s1,secp256k1_scalar * s2,secp256k1_ge * p1,secp256k1_ge * p2)936 SECP256K1_INLINE static void secp256k1_ecmult_endo_split(secp256k1_scalar *s1, secp256k1_scalar *s2, secp256k1_ge *p1, secp256k1_ge *p2) {
937     secp256k1_scalar tmp = *s1;
938     secp256k1_scalar_split_lambda(s1, s2, &tmp);
939     secp256k1_ge_mul_lambda(p2, p1);
940 
941     if (secp256k1_scalar_is_high(s1)) {
942         secp256k1_scalar_negate(s1, s1);
943         secp256k1_ge_neg(p1, p1);
944     }
945     if (secp256k1_scalar_is_high(s2)) {
946         secp256k1_scalar_negate(s2, s2);
947         secp256k1_ge_neg(p2, p2);
948     }
949 }
950 #endif
951 
952 /**
953  * Returns the scratch size required for a given number of points (excluding
954  * base point G) without considering alignment.
955  */
secp256k1_pippenger_scratch_size(size_t n_points,int bucket_window)956 static size_t secp256k1_pippenger_scratch_size(size_t n_points, int bucket_window) {
957 #ifdef USE_ENDOMORPHISM
958     size_t entries = 2*n_points + 2;
959 #else
960     size_t entries = n_points + 1;
961 #endif
962     size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int);
963     return ((1<<bucket_window) * sizeof(secp256k1_gej) + sizeof(struct secp256k1_pippenger_state) + entries * entry_size);
964 }
965 
secp256k1_ecmult_pippenger_batch(const secp256k1_ecmult_context * ctx,secp256k1_scratch * scratch,secp256k1_gej * r,const secp256k1_scalar * inp_g_sc,secp256k1_ecmult_multi_callback cb,void * cbdata,size_t n_points,size_t cb_offset)966 static int secp256k1_ecmult_pippenger_batch(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) {
967     /* Use 2(n+1) with the endomorphism, n+1 without, when calculating batch
968      * sizes. The reason for +1 is that we add the G scalar to the list of
969      * other scalars. */
970 #ifdef USE_ENDOMORPHISM
971     size_t entries = 2*n_points + 2;
972 #else
973     size_t entries = n_points + 1;
974 #endif
975     secp256k1_ge *points;
976     secp256k1_scalar *scalars;
977     secp256k1_gej *buckets;
978     struct secp256k1_pippenger_state *state_space;
979     size_t idx = 0;
980     size_t point_idx = 0;
981     int i, j;
982     int bucket_window;
983 
984     (void)ctx;
985     secp256k1_gej_set_infinity(r);
986     if (inp_g_sc == NULL && n_points == 0) {
987         return 1;
988     }
989 
990     bucket_window = secp256k1_pippenger_bucket_window(n_points);
991     if (!secp256k1_scratch_allocate_frame(scratch, secp256k1_pippenger_scratch_size(n_points, bucket_window), PIPPENGER_SCRATCH_OBJECTS)) {
992         return 0;
993     }
994     points = (secp256k1_ge *) secp256k1_scratch_alloc(scratch, entries * sizeof(*points));
995     scalars = (secp256k1_scalar *) secp256k1_scratch_alloc(scratch, entries * sizeof(*scalars));
996     state_space = (struct secp256k1_pippenger_state *) secp256k1_scratch_alloc(scratch, sizeof(*state_space));
997     state_space->ps = (struct secp256k1_pippenger_point_state *) secp256k1_scratch_alloc(scratch, entries * sizeof(*state_space->ps));
998     state_space->wnaf_na = (int *) secp256k1_scratch_alloc(scratch, entries*(WNAF_SIZE(bucket_window+1)) * sizeof(int));
999     buckets = (secp256k1_gej *) secp256k1_scratch_alloc(scratch, (1<<bucket_window) * sizeof(*buckets));
1000 
1001     if (inp_g_sc != NULL) {
1002         scalars[0] = *inp_g_sc;
1003         points[0] = secp256k1_ge_const_g;
1004         idx++;
1005 #ifdef USE_ENDOMORPHISM
1006         secp256k1_ecmult_endo_split(&scalars[0], &scalars[1], &points[0], &points[1]);
1007         idx++;
1008 #endif
1009     }
1010 
1011     while (point_idx < n_points) {
1012         if (!cb(&scalars[idx], &points[idx], point_idx + cb_offset, cbdata)) {
1013             secp256k1_scratch_deallocate_frame(scratch);
1014             return 0;
1015         }
1016         idx++;
1017 #ifdef USE_ENDOMORPHISM
1018         secp256k1_ecmult_endo_split(&scalars[idx - 1], &scalars[idx], &points[idx - 1], &points[idx]);
1019         idx++;
1020 #endif
1021         point_idx++;
1022     }
1023 
1024     secp256k1_ecmult_pippenger_wnaf(buckets, bucket_window, state_space, r, scalars, points, idx);
1025 
1026     /* Clear data */
1027     for(i = 0; (size_t)i < idx; i++) {
1028         secp256k1_scalar_clear(&scalars[i]);
1029         state_space->ps[i].skew_na = 0;
1030         for(j = 0; j < WNAF_SIZE(bucket_window+1); j++) {
1031             state_space->wnaf_na[i * WNAF_SIZE(bucket_window+1) + j] = 0;
1032         }
1033     }
1034     for(i = 0; i < 1<<bucket_window; i++) {
1035         secp256k1_gej_clear(&buckets[i]);
1036     }
1037     secp256k1_scratch_deallocate_frame(scratch);
1038     return 1;
1039 }
1040 
1041 /* Wrapper for secp256k1_ecmult_multi_func interface */
secp256k1_ecmult_pippenger_batch_single(const secp256k1_ecmult_context * actx,secp256k1_scratch * scratch,secp256k1_gej * r,const secp256k1_scalar * inp_g_sc,secp256k1_ecmult_multi_callback cb,void * cbdata,size_t n)1042 static int secp256k1_ecmult_pippenger_batch_single(const secp256k1_ecmult_context *actx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) {
1043     return secp256k1_ecmult_pippenger_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0);
1044 }
1045 
1046 /**
1047  * Returns the maximum number of points in addition to G that can be used with
1048  * a given scratch space. The function ensures that fewer points may also be
1049  * used.
1050  */
secp256k1_pippenger_max_points(secp256k1_scratch * scratch)1051 static size_t secp256k1_pippenger_max_points(secp256k1_scratch *scratch) {
1052     size_t max_alloc = secp256k1_scratch_max_allocation(scratch, PIPPENGER_SCRATCH_OBJECTS);
1053     int bucket_window;
1054     size_t res = 0;
1055 
1056     for (bucket_window = 1; bucket_window <= PIPPENGER_MAX_BUCKET_WINDOW; bucket_window++) {
1057         size_t n_points;
1058         size_t max_points = secp256k1_pippenger_bucket_window_inv(bucket_window);
1059         size_t space_for_points;
1060         size_t space_overhead;
1061         size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int);
1062 
1063 #ifdef USE_ENDOMORPHISM
1064         entry_size = 2*entry_size;
1065 #endif
1066         space_overhead = ((1<<bucket_window) * sizeof(secp256k1_gej) + entry_size + sizeof(struct secp256k1_pippenger_state));
1067         if (space_overhead > max_alloc) {
1068             break;
1069         }
1070         space_for_points = max_alloc - space_overhead;
1071 
1072         n_points = space_for_points/entry_size;
1073         n_points = n_points > max_points ? max_points : n_points;
1074         if (n_points > res) {
1075             res = n_points;
1076         }
1077         if (n_points < max_points) {
1078             /* A larger bucket_window may support even more points. But if we
1079              * would choose that then the caller couldn't safely use any number
1080              * smaller than what this function returns */
1081             break;
1082         }
1083     }
1084     return res;
1085 }
1086 
1087 /* Computes ecmult_multi by simply multiplying and adding each point. Does not
1088  * require a scratch space */
secp256k1_ecmult_multi_simple_var(const secp256k1_ecmult_context * ctx,secp256k1_gej * r,const secp256k1_scalar * inp_g_sc,secp256k1_ecmult_multi_callback cb,void * cbdata,size_t n_points)1089 static int secp256k1_ecmult_multi_simple_var(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points) {
1090     size_t point_idx;
1091     secp256k1_scalar szero;
1092     secp256k1_gej tmpj;
1093 
1094     secp256k1_scalar_set_int(&szero, 0);
1095     secp256k1_gej_set_infinity(r);
1096     secp256k1_gej_set_infinity(&tmpj);
1097     /* r = inp_g_sc*G */
1098     secp256k1_ecmult(ctx, r, &tmpj, &szero, inp_g_sc);
1099     for (point_idx = 0; point_idx < n_points; point_idx++) {
1100         secp256k1_ge point;
1101         secp256k1_gej pointj;
1102         secp256k1_scalar scalar;
1103         if (!cb(&scalar, &point, point_idx, cbdata)) {
1104             return 0;
1105         }
1106         /* r += scalar*point */
1107         secp256k1_gej_set_ge(&pointj, &point);
1108         secp256k1_ecmult(ctx, &tmpj, &pointj, &scalar, NULL);
1109         secp256k1_gej_add_var(r, r, &tmpj, NULL);
1110     }
1111     return 1;
1112 }
1113 
1114 /* Compute the number of batches and the batch size given the maximum batch size and the
1115  * total number of points */
secp256k1_ecmult_multi_batch_size_helper(size_t * n_batches,size_t * n_batch_points,size_t max_n_batch_points,size_t n)1116 static int secp256k1_ecmult_multi_batch_size_helper(size_t *n_batches, size_t *n_batch_points, size_t max_n_batch_points, size_t n) {
1117     if (max_n_batch_points == 0) {
1118         return 0;
1119     }
1120     if (max_n_batch_points > ECMULT_MAX_POINTS_PER_BATCH) {
1121         max_n_batch_points = ECMULT_MAX_POINTS_PER_BATCH;
1122     }
1123     if (n == 0) {
1124         *n_batches = 0;
1125         *n_batch_points = 0;
1126         return 1;
1127     }
1128     /* Compute ceil(n/max_n_batch_points) and ceil(n/n_batches) */
1129     *n_batches = 1 + (n - 1) / max_n_batch_points;
1130     *n_batch_points = 1 + (n - 1) / *n_batches;
1131     return 1;
1132 }
1133 
1134 typedef int (*secp256k1_ecmult_multi_func)(const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t);
secp256k1_ecmult_multi_var(const secp256k1_ecmult_context * ctx,secp256k1_scratch * scratch,secp256k1_gej * r,const secp256k1_scalar * inp_g_sc,secp256k1_ecmult_multi_callback cb,void * cbdata,size_t n)1135 static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) {
1136     size_t i;
1137 
1138     int (*f)(const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t, size_t);
1139     size_t n_batches;
1140     size_t n_batch_points;
1141 
1142     secp256k1_gej_set_infinity(r);
1143     if (inp_g_sc == NULL && n == 0) {
1144         return 1;
1145     } else if (n == 0) {
1146         secp256k1_scalar szero;
1147         secp256k1_scalar_set_int(&szero, 0);
1148         secp256k1_ecmult(ctx, r, r, &szero, inp_g_sc);
1149         return 1;
1150     }
1151     if (scratch == NULL) {
1152         return secp256k1_ecmult_multi_simple_var(ctx, r, inp_g_sc, cb, cbdata, n);
1153     }
1154 
1155     /* Compute the batch sizes for pippenger given a scratch space. If it's greater than a threshold
1156      * use pippenger. Otherwise use strauss */
1157     if (!secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, secp256k1_pippenger_max_points(scratch), n)) {
1158         return 0;
1159     }
1160     if (n_batch_points >= ECMULT_PIPPENGER_THRESHOLD) {
1161         f = secp256k1_ecmult_pippenger_batch;
1162     } else {
1163         if (!secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, secp256k1_strauss_max_points(scratch), n)) {
1164             return 0;
1165         }
1166         f = secp256k1_ecmult_strauss_batch;
1167     }
1168     for(i = 0; i < n_batches; i++) {
1169         size_t nbp = n < n_batch_points ? n : n_batch_points;
1170         size_t offset = n_batch_points*i;
1171         secp256k1_gej tmp;
1172         if (!f(ctx, scratch, &tmp, i == 0 ? inp_g_sc : NULL, cb, cbdata, nbp, offset)) {
1173             return 0;
1174         }
1175         secp256k1_gej_add_var(r, r, &tmp, NULL);
1176         n -= nbp;
1177     }
1178     return 1;
1179 }
1180 
1181 #endif /* SECP256K1_ECMULT_IMPL_H */
1182