1 // integer.cpp - originally written and placed in the public domain by Wei Dai
2 // contains public domain code contributed by Alister Lee and Leonard Janke
3
4 // Notes by JW: The Integer class needs to do two things. First, it needs
5 // to set function pointers on some platforms, like X86 and X64. The
6 // function pointers select a fast multiply and addition based on the cpu.
7 // Second, it wants to create Integer::Zero(), Integer::One() and
8 // Integer::Two().
9 // The function pointers are initialized in the InitializeInteger class by
10 // calling SetFunctionPointers(). The call to SetFunctionPointers() is
11 // guarded to run once using a double-checked pattern. We don't use C++
12 // std::call_once due to bad interactions between libstdc++, glibc and
13 // pthreads. The bad interactions were causing crashes for us on platforms
14 // like Sparc and PowerPC. Since we are only setting function pointers we
15 // don't have to worry about leaking memory. The worst case seems to be the
16 // pointers gets written twice until the init flag is set and visible to
17 // all threads.
18 // For Integer::Zero(), Integer::One() and Integer::Two(), we use one of three
19 // strategies. First, if initialization priorities are available then we use
20 // them. Initialization priorities are init_priority() on Linux and init_seg()
21 // on Windows. OS X and several other platforms lack them. Initialization
22 // priorities are platform specific but they are also the most trouble free
23 // with determisitic destruction.
24 // Second, if C++11 dynamic initialization is available, then we use it. After
25 // the std::call_once fiasco we moved to dynamic initialization to avoid
26 // unknown troubles platforms that are tested less frequently. In addition
27 // Microsoft platforms mostly do not provide dynamic initialization.
28 // The MSDN docs claim they do but they don't in practice because we need
29 // Visual Studio 2017 and Windows 10 or above.
30 // Third, we fall back to Wei's original code of a Singleton. Wei's original
31 // code was much simpler. It simply used the Singleton pattern, but it always
32 // produced memory findings on some platforms. The Singleton generates memory
33 // findings because it uses a Create On First Use pattern (a dumb Nifty
34 // Counter) and the compiler had to be smart enough to fold them to return
35 // the same object. Unix and Linux compilers do a good job of folding objects,
36 // but Microsoft compilers do a rather poor job for some versions of the
37 // compiler.
38 // Another problem with the Singleton is resource destruction requires running
39 // resource acquisition in reverse. For resources provided through the
40 // Singletons, there is no way to express the dependency order to safely
41 // destroy resources. (That's one of the problems C++11 dynamic
42 // intitialization with concurrent execution is supposed to solve).
43 // The final problem with Singletons is resource/memory exhaustion in languages
44 // like Java and .Net. Java and .Net load and unload a native DLL hundreds or
45 // thousands of times during the life of a program. Each load produces a
46 // memory leak and they can add up quickly. If they library is being used in
47 // Java or .Net then Singleton must be avoided at all costs.
48 //
49 // The code below has a path cut-in for BMI2 using mulx and adcx instructions.
50 // There was a modest speedup of approximately 0.03 ms in public key Integer
51 // operations. We had to disable BMI2 for the moment because some OS X machines
52 // were advertising BMI/BMI2 support but caused SIGILL's at runtime. Also see
53 // https://github.com/weidai11/cryptopp/issues/850.
54
55 #include "pch.h"
56 #include "config.h"
57
58 #ifndef CRYPTOPP_IMPORTS
59
60 #include "integer.h"
61 #include "secblock.h"
62 #include "modarith.h"
63 #include "nbtheory.h"
64 #include "smartptr.h"
65 #include "algparam.h"
66 #include "filters.h"
67 #include "stdcpp.h"
68 #include "asn.h"
69 #include "oids.h"
70 #include "words.h"
71 #include "pubkey.h" // for P1363_KDF2
72 #include "sha.h"
73 #include "cpu.h"
74 #include "misc.h"
75
76 #include <iostream>
77
78 #if (_MSC_VER >= 1400) && !defined(_M_ARM)
79 #include <intrin.h>
80 #endif
81
82 #ifdef __DECCXX
83 #include <c_asm.h>
84 #endif
85
86 // "Error: The operand ___LKDB cannot be assigned to",
87 // http://github.com/weidai11/cryptopp/issues/188
88 #if (__SUNPRO_CC >= 0x5130)
89 # define MAYBE_CONST
90 # define MAYBE_UNCONST_CAST(x) const_cast<word*>(x)
91 #else
92 # define MAYBE_CONST const
93 # define MAYBE_UNCONST_CAST(x) x
94 #endif
95
96 // "Inline assembly operands don't work with .intel_syntax",
97 // http://llvm.org/bugs/show_bug.cgi?id=24232
98 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_MIXED_ASM)
99 # undef CRYPTOPP_X86_ASM_AVAILABLE
100 # undef CRYPTOPP_X32_ASM_AVAILABLE
101 # undef CRYPTOPP_X64_ASM_AVAILABLE
102 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
103 # undef CRYPTOPP_SSSE3_ASM_AVAILABLE
104 #else
105 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86))
106 #endif
107
108 // ***************** C++ Static Initialization ********************
109
110 NAMESPACE_BEGIN(CryptoPP)
111
112 // Function body near the middle of the file
113 static void SetFunctionPointers();
114
115 // Use a double-checked pattern. We are not leaking anything so it
116 // does not matter if a pointer is written twice during a race.
117 // Avoid std::call_once due to too many problems on platforms like
118 // Solaris and Sparc. Also see
119 // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=66146 and
120 // http://github.com/weidai11/cryptopp/issues/707.
InitializeInteger()121 InitializeInteger::InitializeInteger()
122 {
123 static bool s_flag;
124 MEMORY_BARRIER();
125 if (s_flag == false)
126 {
127 SetFunctionPointers();
128 s_flag = true;
129 MEMORY_BARRIER();
130 }
131 }
132
133 template <long i>
134 struct NewInteger
135 {
operator ()NewInteger136 Integer * operator()() const
137 {
138 return new Integer(i);
139 }
140 };
141
142 // ***************** Library code ********************
143
Compare(const word * A,const word * B,size_t N)144 inline static int Compare(const word *A, const word *B, size_t N)
145 {
146 while (N--)
147 if (A[N] > B[N])
148 return 1;
149 else if (A[N] < B[N])
150 return -1;
151
152 return 0;
153 }
154
Increment(word * A,size_t N,word B=1)155 inline static int Increment(word *A, size_t N, word B=1)
156 {
157 CRYPTOPP_ASSERT(N);
158 word t = A[0];
159 A[0] = t+B;
160 if (A[0] >= t)
161 return 0;
162 for (unsigned i=1; i<N; i++)
163 if (++A[i])
164 return 0;
165 return 1;
166 }
167
Decrement(word * A,size_t N,word B=1)168 inline static int Decrement(word *A, size_t N, word B=1)
169 {
170 CRYPTOPP_ASSERT(N);
171 word t = A[0];
172 A[0] = t-B;
173 if (A[0] <= t)
174 return 0;
175 for (unsigned i=1; i<N; i++)
176 if (A[i]--)
177 return 0;
178 return 1;
179 }
180
TwosComplement(word * A,size_t N)181 static void TwosComplement(word *A, size_t N)
182 {
183 Decrement(A, N);
184 for (unsigned i=0; i<N; i++)
185 A[i] = ~A[i];
186 }
187
AtomicInverseModPower2(word A)188 static word AtomicInverseModPower2(word A)
189 {
190 CRYPTOPP_ASSERT(A%2==1);
191
192 word R=A%8;
193
194 for (unsigned i=3; i<WORD_BITS; i*=2)
195 R = R*(2-R*A);
196
197 CRYPTOPP_ASSERT(R*A==1);
198 return R;
199 }
200
201 // ********************************************************
202
203 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || ((defined(__aarch64__) || defined(__x86_64__)) && defined(CRYPTOPP_WORD128_AVAILABLE))
204 #define TWO_64_BIT_WORDS 1
205 #define Declare2Words(x) word x##0, x##1;
206 #define AssignWord(a, b) a##0 = b; a##1 = 0;
207 #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
208 #define LowWord(a) a##0
209 #define HighWord(a) a##1
210 #ifdef _MSC_VER
211 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
212 #ifndef __INTEL_COMPILER
213 #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
214 #endif
215 #elif defined(__aarch32__) || defined(__aarch64__)
216 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; asm ("umulh %0,%1,%2" : "=r"(p1) : "r"(a), "r"(b));
217 #elif defined(__DECCXX)
218 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
219 #elif defined(__x86_64__)
220 #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
221 // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
222 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
223 #elif defined(__BMI2__) && 0
224 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulxq %3, %0, %1" : "=r"(p0), "=r"(p1) : "d"(a), "r"(b));
225 #define MulAcc(c, d, a, b) asm ("mulxq %6, %3, %4; addq %3, %0; adcxq %4, %1; adcxq %7, %2;" : "+&r"(c), "+&r"(d##0), "+&r"(d##1), "=&r"(p0), "=&r"(p1) : "d"(a), "r"(b), "r"(W64LIT(0)) : "cc");
226 #define Double3Words(c, d) asm ("addq %0, %0; adcxq %1, %1; adcxq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
227 #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcxq %3, %1;" : "+&r"(a##0), "+r"(a##1) : "r"(b), "r"(W64LIT(0)) : "cc");
228 #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcxq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
229 #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcxq %6, %1; adcxq %7, %2;" : "+r"(c), "=&r"(e##0), "=&r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1), "r"(W64LIT(0)) : "cc");
230 #else
231 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
232 #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
233 #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
234 #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
235 #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
236 #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
237 #endif
238 #endif
239 #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
240 #ifndef Double3Words
241 #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
242 #endif
243 #ifndef Acc2WordsBy2
244 #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
245 #endif
246 #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
247 #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
248 #define GetCarry(u) u##1
249 #define GetBorrow(u) u##1
250 #else
251 #define Declare2Words(x) dword x;
252 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64))
253 #define MultiplyWords(p, a, b) p = __emulu(a, b);
254 #else
255 #define MultiplyWords(p, a, b) p = (dword)a*b;
256 #endif
257 #define AssignWord(a, b) a = b;
258 #define Add2WordsBy1(a, b, c) a = b + c;
259 #define Acc2WordsBy2(a, b) a += b;
260 #define LowWord(a) word(a)
261 #define HighWord(a) word(a>>WORD_BITS)
262 #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
263 #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
264 #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
265 #define GetCarry(u) HighWord(u)
266 #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
267 #endif
268 #ifndef MulAcc
269 #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
270 #endif
271 #ifndef Acc2WordsBy1
272 #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
273 #endif
274 #ifndef Acc3WordsBy2
275 #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
276 #endif
277
278 class DWord
279 {
280 public:
281 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
DWord()282 DWord() {std::memset(&m_whole, 0x00, sizeof(m_whole));}
283 #else
284 DWord() {std::memset(&m_halfs, 0x00, sizeof(m_halfs));}
285 #endif
286
287 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
DWord(word low)288 explicit DWord(word low) : m_whole(low) { }
289 #else
DWord(word low)290 explicit DWord(word low)
291 {
292 m_halfs.high = 0;
293 m_halfs.low = low;
294 }
295 #endif
296
297 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
DWord(word low,word high)298 DWord(word low, word high) : m_whole()
299 #else
300 DWord(word low, word high) : m_halfs()
301 #endif
302 {
303 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
304 # if (CRYPTOPP_LITTLE_ENDIAN)
305 const word t[2] = {low,high};
306 memcpy(&m_whole, t, sizeof(m_whole));
307 # else
308 const word t[2] = {high,low};
309 memcpy(&m_whole, t, sizeof(m_whole));
310 # endif
311 #else
312 m_halfs.low = low;
313 m_halfs.high = high;
314 #endif
315 }
316
Multiply(word a,word b)317 static DWord Multiply(word a, word b)
318 {
319 DWord r;
320 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
321 r.m_whole = (dword)a * b;
322 #elif defined(MultiplyWordsLoHi)
323 MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
324 #else
325 CRYPTOPP_ASSERT(0);
326 #endif
327 return r;
328 }
329
MultiplyAndAdd(word a,word b,word c)330 static DWord MultiplyAndAdd(word a, word b, word c)
331 {
332 DWord r = Multiply(a, b);
333 return r += c;
334 }
335
operator +=(word a)336 DWord & operator+=(word a)
337 {
338 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
339 m_whole = m_whole + a;
340 #else
341 m_halfs.low += a;
342 m_halfs.high += (m_halfs.low < a);
343 #endif
344 return *this;
345 }
346
operator +(word a)347 DWord operator+(word a)
348 {
349 DWord r;
350 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
351 r.m_whole = m_whole + a;
352 #else
353 r.m_halfs.low = m_halfs.low + a;
354 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
355 #endif
356 return r;
357 }
358
operator -(DWord a)359 DWord operator-(DWord a)
360 {
361 DWord r;
362 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
363 r.m_whole = m_whole - a.m_whole;
364 #else
365 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
366 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
367 #endif
368 return r;
369 }
370
operator -(word a)371 DWord operator-(word a)
372 {
373 DWord r;
374 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
375 r.m_whole = m_whole - a;
376 #else
377 r.m_halfs.low = m_halfs.low - a;
378 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
379 #endif
380 return r;
381 }
382
383 // returns quotient, which must fit in a word
384 word operator/(word divisor);
385
386 word operator%(word a);
387
operator !() const388 bool operator!() const
389 {
390 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
391 return !m_whole;
392 #else
393 return !m_halfs.high && !m_halfs.low;
394 #endif
395 }
396
397 // TODO: When NATIVE_DWORD is in effect, we access high and low, which are inactive
398 // union members, and that's UB. Also see http://stackoverflow.com/q/11373203.
GetLowHalf() const399 word GetLowHalf() const {return m_halfs.low;}
GetHighHalf() const400 word GetHighHalf() const {return m_halfs.high;}
GetHighHalfAsBorrow() const401 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
402
403 private:
404 // Issue 274, "Types cannot be declared in anonymous union",
405 // http://github.com/weidai11/cryptopp/issues/274
406 // Thanks to Martin Bonner at http://stackoverflow.com/a/39507183
407 struct half_words
408 {
409 #if (CRYPTOPP_LITTLE_ENDIAN)
410 word low;
411 word high;
412 #else
413 word high;
414 word low;
415 #endif
416 };
417 union
418 {
419 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
420 dword m_whole;
421 #endif
422 half_words m_halfs;
423 };
424 };
425
426 class Word
427 {
428 public:
Word()429 Word() : m_whole(0) {}
Word(word value)430 Word(word value) : m_whole(value) {}
Word(hword low,hword high)431 Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
432
Multiply(hword a,hword b)433 static Word Multiply(hword a, hword b)
434 {
435 Word r;
436 r.m_whole = (word)a * b;
437 return r;
438 }
439
operator -(Word a)440 Word operator-(Word a)
441 {
442 Word r;
443 r.m_whole = m_whole - a.m_whole;
444 return r;
445 }
446
operator -(hword a)447 Word operator-(hword a)
448 {
449 Word r;
450 r.m_whole = m_whole - a;
451 return r;
452 }
453
454 // returns quotient, which must fit in a word
operator /(hword divisor)455 hword operator/(hword divisor)
456 {
457 return hword(m_whole / divisor);
458 }
459
operator !() const460 bool operator!() const
461 {
462 return !m_whole;
463 }
464
GetWhole() const465 word GetWhole() const {return m_whole;}
GetLowHalf() const466 hword GetLowHalf() const {return hword(m_whole);}
GetHighHalf() const467 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
GetHighHalfAsBorrow() const468 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
469
470 private:
471 word m_whole;
472 };
473
474 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
475 template <class S, class D>
476 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULLPTR)
477 {
478 CRYPTOPP_UNUSED(dummy);
479
480 // Assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
481 CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
482
483 // Profiling guided the flow below.
484
485 // estimate the quotient: do a 2 S by 1 S divide.
486 S Q; bool pre = (S(B1+1) == 0);
487 if (B1 > 0 && !pre)
488 Q = D(A[1], A[2]) / S(B1+1);
489 else if (pre)
490 Q = A[2];
491 else
492 Q = D(A[0], A[1]) / B0;
493
494 // now subtract Q*B from A
495 D p = D::Multiply(B0, Q);
496 D u = (D) A[0] - p.GetLowHalf();
497 A[0] = u.GetLowHalf();
498 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
499 A[1] = u.GetLowHalf();
500 A[2] += u.GetHighHalf();
501
502 // Q <= actual quotient, so fix it
503 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
504 {
505 u = (D) A[0] - B0;
506 A[0] = u.GetLowHalf();
507 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
508 A[1] = u.GetLowHalf();
509 A[2] += u.GetHighHalf();
510 Q++;
511 CRYPTOPP_ASSERT(Q); // shouldn't overflow
512 }
513
514 return Q;
515 }
516
517 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
518 template <class S, class D>
DivideFourWordsByTwo(S * T,const D & Al,const D & Ah,const D & B)519 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
520 {
521 // Profiling guided the flow below.
522
523 if (!!B)
524 {
525 S Q[2];
526 T[0] = Al.GetLowHalf();
527 T[1] = Al.GetHighHalf();
528 T[2] = Ah.GetLowHalf();
529 T[3] = Ah.GetHighHalf();
530 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
531 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
532 return D(Q[0], Q[1]);
533 }
534 else // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
535 {
536 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
537 }
538 }
539
540 // returns quotient, which must fit in a word
operator /(word a)541 inline word DWord::operator/(word a)
542 {
543 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
544 return word(m_whole / a);
545 #else
546 hword r[4];
547 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
548 #endif
549 }
550
operator %(word a)551 inline word DWord::operator%(word a)
552 {
553 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
554 return word(m_whole % a);
555 #else
556 if (a < (word(1) << (WORD_BITS/2)))
557 {
558 hword h = hword(a);
559 word r = m_halfs.high % h;
560 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
561 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
562 }
563 else
564 {
565 hword r[4];
566 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
567 return Word(r[0], r[1]).GetWhole();
568 }
569 #endif
570 }
571
572 // ********************************************************
573
574 // Use some tricks to share assembly code between MSVC, GCC, Clang and Sun CC.
575 #if defined(__GNUC__)
576 #define AddPrologue \
577 int result; \
578 __asm__ __volatile__ \
579 ( \
580 INTEL_NOPREFIX
581 #define AddEpilogue \
582 ATT_PREFIX \
583 : "=a" (result)\
584 : "d" (C), "a" (A), "D" (B), "c" (N) \
585 : "%esi", "memory", "cc" \
586 );\
587 return result;
588 #define MulPrologue \
589 __asm__ __volatile__ \
590 ( \
591 INTEL_NOPREFIX \
592 AS1( push ebx) \
593 AS2( mov ebx, edx)
594 #define MulEpilogue \
595 AS1( pop ebx) \
596 ATT_PREFIX \
597 : \
598 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
599 : "%esi", "memory", "cc" \
600 );
601 #define SquPrologue MulPrologue
602 #define SquEpilogue \
603 AS1( pop ebx) \
604 ATT_PREFIX \
605 : \
606 : "d" (s_maskLow16), "c" (C), "a" (A) \
607 : "%esi", "%edi", "memory", "cc" \
608 );
609 #define TopPrologue MulPrologue
610 #define TopEpilogue \
611 AS1( pop ebx) \
612 ATT_PREFIX \
613 : \
614 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
615 : "memory", "cc" \
616 );
617 #else
618 #define AddPrologue \
619 __asm push edi \
620 __asm push esi \
621 __asm mov eax, [esp+12] \
622 __asm mov edi, [esp+16]
623 #define AddEpilogue \
624 __asm pop esi \
625 __asm pop edi \
626 __asm ret 8
627 #define SaveEBX
628 #define RestoreEBX
629 #define SquPrologue \
630 AS2( mov eax, A) \
631 AS2( mov ecx, C) \
632 SaveEBX \
633 AS2( lea ebx, s_maskLow16)
634 #define MulPrologue \
635 AS2( mov eax, A) \
636 AS2( mov edi, B) \
637 AS2( mov ecx, C) \
638 SaveEBX \
639 AS2( lea ebx, s_maskLow16)
640 #define TopPrologue \
641 AS2( mov eax, A) \
642 AS2( mov edi, B) \
643 AS2( mov ecx, C) \
644 AS2( mov esi, L) \
645 SaveEBX \
646 AS2( lea ebx, s_maskLow16)
647 #define SquEpilogue RestoreEBX
648 #define MulEpilogue RestoreEBX
649 #define TopEpilogue RestoreEBX
650 #endif
651
652 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
653 extern "C" {
654 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
655 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
656 }
657 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
Baseline_Add(size_t N,word * C,const word * A,const word * B)658 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
659 {
660 word result;
661 __asm__ __volatile__
662 (
663 INTEL_NOPREFIX
664 AS1( neg %1)
665 ASJ( jz, 1, f)
666 AS2( mov %0,[%3+8*%1])
667 AS2( add %0,[%4+8*%1])
668 AS2( mov [%2+8*%1],%0)
669 ASL(0)
670 AS2( mov %0,[%3+8*%1+8])
671 AS2( adc %0,[%4+8*%1+8])
672 AS2( mov [%2+8*%1+8],%0)
673 AS2( lea %1,[%1+2])
674 ASJ( jrcxz, 1, f)
675 AS2( mov %0,[%3+8*%1])
676 AS2( adc %0,[%4+8*%1])
677 AS2( mov [%2+8*%1],%0)
678 ASJ( jmp, 0, b)
679 ASL(1)
680 AS2( mov %0, 0)
681 AS2( adc %0, %0)
682 ATT_NOPREFIX
683 : "=&r" (result), "+c" (N)
684 : "r" (C+N), "r" (A+N), "r" (B+N)
685 : "memory", "cc"
686 );
687 return (int)result;
688 }
689
Baseline_Sub(size_t N,word * C,const word * A,const word * B)690 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
691 {
692 word result;
693 __asm__ __volatile__
694 (
695 INTEL_NOPREFIX
696 AS1( neg %1)
697 ASJ( jz, 1, f)
698 AS2( mov %0,[%3+8*%1])
699 AS2( sub %0,[%4+8*%1])
700 AS2( mov [%2+8*%1],%0)
701 ASL(0)
702 AS2( mov %0,[%3+8*%1+8])
703 AS2( sbb %0,[%4+8*%1+8])
704 AS2( mov [%2+8*%1+8],%0)
705 AS2( lea %1,[%1+2])
706 ASJ( jrcxz, 1, f)
707 AS2( mov %0,[%3+8*%1])
708 AS2( sbb %0,[%4+8*%1])
709 AS2( mov [%2+8*%1],%0)
710 ASJ( jmp, 0, b)
711 ASL(1)
712 AS2( mov %0, 0)
713 AS2( adc %0, %0)
714 ATT_NOPREFIX
715 : "=&r" (result), "+c" (N)
716 : "r" (C+N), "r" (A+N), "r" (B+N)
717 : "memory", "cc"
718 );
719 return (int)result;
720 }
721 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
Baseline_Add(size_t N,word * C,const word * A,const word * B)722 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
723 {
724 AddPrologue
725
726 // now: eax = A, edi = B, edx = C, ecx = N
727 AS2( lea eax, [eax+4*ecx])
728 AS2( lea edi, [edi+4*ecx])
729 AS2( lea edx, [edx+4*ecx])
730
731 AS1( neg ecx) // ecx is negative index
732 AS2( test ecx, 2) // this clears carry flag
733 ASJ( jz, 0, f)
734 AS2( sub ecx, 2)
735 ASJ( jmp, 1, f)
736
737 ASL(0)
738 ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
739 AS2( mov esi,[eax+4*ecx])
740 AS2( adc esi,[edi+4*ecx])
741 AS2( mov [edx+4*ecx],esi)
742 AS2( mov esi,[eax+4*ecx+4])
743 AS2( adc esi,[edi+4*ecx+4])
744 AS2( mov [edx+4*ecx+4],esi)
745 ASL(1)
746 AS2( mov esi,[eax+4*ecx+8])
747 AS2( adc esi,[edi+4*ecx+8])
748 AS2( mov [edx+4*ecx+8],esi)
749 AS2( mov esi,[eax+4*ecx+12])
750 AS2( adc esi,[edi+4*ecx+12])
751 AS2( mov [edx+4*ecx+12],esi)
752
753 AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
754 ASJ( jmp, 0, b)
755
756 ASL(2)
757 AS2( mov eax, 0)
758 AS1( setc al) // store carry into eax (return result register)
759
760 AddEpilogue
761
762 // http://github.com/weidai11/cryptopp/issues/340
763 CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
764 CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
765 }
766
Baseline_Sub(size_t N,word * C,const word * A,const word * B)767 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
768 {
769 AddPrologue
770
771 // now: eax = A, edi = B, edx = C, ecx = N
772 AS2( lea eax, [eax+4*ecx])
773 AS2( lea edi, [edi+4*ecx])
774 AS2( lea edx, [edx+4*ecx])
775
776 AS1( neg ecx) // ecx is negative index
777 AS2( test ecx, 2) // this clears carry flag
778 ASJ( jz, 0, f)
779 AS2( sub ecx, 2)
780 ASJ( jmp, 1, f)
781
782 ASL(0)
783 ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
784 AS2( mov esi,[eax+4*ecx])
785 AS2( sbb esi,[edi+4*ecx])
786 AS2( mov [edx+4*ecx],esi)
787 AS2( mov esi,[eax+4*ecx+4])
788 AS2( sbb esi,[edi+4*ecx+4])
789 AS2( mov [edx+4*ecx+4],esi)
790 ASL(1)
791 AS2( mov esi,[eax+4*ecx+8])
792 AS2( sbb esi,[edi+4*ecx+8])
793 AS2( mov [edx+4*ecx+8],esi)
794 AS2( mov esi,[eax+4*ecx+12])
795 AS2( sbb esi,[edi+4*ecx+12])
796 AS2( mov [edx+4*ecx+12],esi)
797
798 AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
799 ASJ( jmp, 0, b)
800
801 ASL(2)
802 AS2( mov eax, 0)
803 AS1( setc al) // store carry into eax (return result register)
804
805 AddEpilogue
806
807 // http://github.com/weidai11/cryptopp/issues/340
808 CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
809 CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
810 }
811
812 #if CRYPTOPP_INTEGER_SSE2
SSE2_Add(size_t N,word * C,const word * A,const word * B)813 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
814 {
815 AddPrologue
816
817 // now: eax = A, edi = B, edx = C, ecx = N
818 AS2( lea eax, [eax+4*ecx])
819 AS2( lea edi, [edi+4*ecx])
820 AS2( lea edx, [edx+4*ecx])
821
822 AS1( neg ecx) // ecx is negative index
823 AS2( pxor mm2, mm2)
824 ASJ( jz, 2, f)
825 AS2( test ecx, 2) // this clears carry flag
826 ASJ( jz, 0, f)
827 AS2( sub ecx, 2)
828 ASJ( jmp, 1, f)
829
830 ASL(0)
831 AS2( movd mm0, DWORD PTR [eax+4*ecx])
832 AS2( movd mm1, DWORD PTR [edi+4*ecx])
833 AS2( paddq mm0, mm1)
834 AS2( paddq mm2, mm0)
835 AS2( movd DWORD PTR [edx+4*ecx], mm2)
836 AS2( psrlq mm2, 32)
837
838 AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
839 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
840 AS2( paddq mm0, mm1)
841 AS2( paddq mm2, mm0)
842 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
843 AS2( psrlq mm2, 32)
844
845 ASL(1)
846 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
847 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
848 AS2( paddq mm0, mm1)
849 AS2( paddq mm2, mm0)
850 AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
851 AS2( psrlq mm2, 32)
852
853 AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
854 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
855 AS2( paddq mm0, mm1)
856 AS2( paddq mm2, mm0)
857 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
858 AS2( psrlq mm2, 32)
859
860 AS2( add ecx, 4)
861 ASJ( jnz, 0, b)
862
863 ASL(2)
864 AS2( movd eax, mm2)
865 AS1( emms)
866
867 AddEpilogue
868
869 // http://github.com/weidai11/cryptopp/issues/340
870 CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
871 CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
872 }
SSE2_Sub(size_t N,word * C,const word * A,const word * B)873 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
874 {
875 AddPrologue
876
877 // now: eax = A, edi = B, edx = C, ecx = N
878 AS2( lea eax, [eax+4*ecx])
879 AS2( lea edi, [edi+4*ecx])
880 AS2( lea edx, [edx+4*ecx])
881
882 AS1( neg ecx) // ecx is negative index
883 AS2( pxor mm2, mm2)
884 ASJ( jz, 2, f)
885 AS2( test ecx, 2) // this clears carry flag
886 ASJ( jz, 0, f)
887 AS2( sub ecx, 2)
888 ASJ( jmp, 1, f)
889
890 ASL(0)
891 AS2( movd mm0, DWORD PTR [eax+4*ecx])
892 AS2( movd mm1, DWORD PTR [edi+4*ecx])
893 AS2( psubq mm0, mm1)
894 AS2( psubq mm0, mm2)
895 AS2( movd DWORD PTR [edx+4*ecx], mm0)
896 AS2( psrlq mm0, 63)
897
898 AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
899 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
900 AS2( psubq mm2, mm1)
901 AS2( psubq mm2, mm0)
902 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
903 AS2( psrlq mm2, 63)
904
905 ASL(1)
906 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
907 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
908 AS2( psubq mm0, mm1)
909 AS2( psubq mm0, mm2)
910 AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
911 AS2( psrlq mm0, 63)
912
913 AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
914 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
915 AS2( psubq mm2, mm1)
916 AS2( psubq mm2, mm0)
917 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
918 AS2( psrlq mm2, 63)
919
920 AS2( add ecx, 4)
921 ASJ( jnz, 0, b)
922
923 ASL(2)
924 AS2( movd eax, mm2)
925 AS1( emms)
926
927 AddEpilogue
928
929 // http://github.com/weidai11/cryptopp/issues/340
930 CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
931 CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
932 }
933 #endif // CRYPTOPP_INTEGER_SSE2
934 #else // CRYPTOPP_SSE2_ASM_AVAILABLE
Baseline_Add(size_t N,word * C,const word * A,const word * B)935 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
936 {
937 CRYPTOPP_ASSERT (N%2 == 0);
938
939 Declare2Words(u);
940 AssignWord(u, 0);
941 for (size_t i=0; i<N; i+=2)
942 {
943 AddWithCarry(u, A[i], B[i]);
944 C[i] = LowWord(u);
945 AddWithCarry(u, A[i+1], B[i+1]);
946 C[i+1] = LowWord(u);
947 }
948 return int(GetCarry(u));
949 }
950
Baseline_Sub(size_t N,word * C,const word * A,const word * B)951 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
952 {
953 CRYPTOPP_ASSERT (N%2 == 0);
954
955 Declare2Words(u);
956 AssignWord(u, 0);
957 for (size_t i=0; i<N; i+=2)
958 {
959 SubtractWithBorrow(u, A[i], B[i]);
960 C[i] = LowWord(u);
961 SubtractWithBorrow(u, A[i+1], B[i+1]);
962 C[i+1] = LowWord(u);
963 }
964 return int(GetBorrow(u));
965 }
966 #endif
967
LinearMultiply(word * C,const word * AA,word B,size_t N)968 static word LinearMultiply(word *C, const word *AA, word B, size_t N)
969 {
970 // http://github.com/weidai11/cryptopp/issues/188
971 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
972
973 word carry=0;
974 for(unsigned i=0; i<N; i++)
975 {
976 Declare2Words(p);
977 MultiplyWords(p, A[i], B);
978 Acc2WordsBy1(p, carry);
979 C[i] = LowWord(p);
980 carry = HighWord(p);
981 }
982 return carry;
983 }
984
985 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
986
987 #define Mul_2 \
988 Mul_Begin(2) \
989 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
990 Mul_End(1, 1)
991
992 #define Mul_4 \
993 Mul_Begin(4) \
994 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
995 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
996 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
997 Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
998 Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
999 Mul_End(5, 3)
1000
1001 #define Mul_8 \
1002 Mul_Begin(8) \
1003 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1004 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1005 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1006 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1007 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1008 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1009 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1010 Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1011 Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1012 Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1013 Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1014 Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1015 Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
1016 Mul_End(13, 7)
1017
1018 #define Mul_16 \
1019 Mul_Begin(16) \
1020 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1021 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1022 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1023 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1024 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1025 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1026 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1027 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1028 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1029 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1030 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1031 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1032 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1033 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1034 Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1035 Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1036 Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1037 Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1038 Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1039 Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1040 Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1041 Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1042 Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1043 Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1044 Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1045 Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1046 Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1047 Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1048 Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
1049 Mul_End(29, 15)
1050
1051 #define Squ_2 \
1052 Squ_Begin(2) \
1053 Squ_End(2)
1054
1055 #define Squ_4 \
1056 Squ_Begin(4) \
1057 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1058 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1059 Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
1060 Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
1061 Squ_End(4)
1062
1063 #define Squ_8 \
1064 Squ_Begin(8) \
1065 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1066 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1067 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1068 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1069 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1070 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1071 Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1072 Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1073 Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1074 Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1075 Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
1076 Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
1077 Squ_End(8)
1078
1079 #define Squ_16 \
1080 Squ_Begin(16) \
1081 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1082 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1083 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1084 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1085 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1086 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1087 Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1088 Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1089 Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1090 Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1091 Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
1092 Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
1093 Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
1094 Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1095 Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1096 Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1097 Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1098 Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1099 Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1100 Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1101 Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1102 Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1103 Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1104 Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1105 Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1106 Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1107 Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1108 Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1109 Squ_End(16)
1110
1111 #define Bot_2 \
1112 Mul_Begin(2) \
1113 Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1114 Bot_End(2)
1115
1116 #define Bot_4 \
1117 Mul_Begin(4) \
1118 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1119 Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1120 Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1121 Bot_End(4)
1122
1123 #define Bot_8 \
1124 Mul_Begin(8) \
1125 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1126 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1127 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1128 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1129 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1130 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1131 Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1132 Bot_End(8)
1133
1134 #define Bot_16 \
1135 Mul_Begin(16) \
1136 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1137 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1138 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1139 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1140 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1141 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1142 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1143 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1144 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1145 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1146 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1147 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1148 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1149 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1150 Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1151 Bot_End(16)
1152
1153 #endif
1154
1155 #if 0
1156 #define Mul_Begin(n) \
1157 Declare2Words(p) \
1158 Declare2Words(c) \
1159 Declare2Words(d) \
1160 MultiplyWords(p, A[0], B[0]) \
1161 AssignWord(c, LowWord(p)) \
1162 AssignWord(d, HighWord(p))
1163
1164 #define Mul_Acc(i, j) \
1165 MultiplyWords(p, A[i], B[j]) \
1166 Acc2WordsBy1(c, LowWord(p)) \
1167 Acc2WordsBy1(d, HighWord(p))
1168
1169 #define Mul_SaveAcc(k, i, j) \
1170 R[k] = LowWord(c); \
1171 Add2WordsBy1(c, d, HighWord(c)) \
1172 MultiplyWords(p, A[i], B[j]) \
1173 AssignWord(d, HighWord(p)) \
1174 Acc2WordsBy1(c, LowWord(p))
1175
1176 #define Mul_End(n) \
1177 R[2*n-3] = LowWord(c); \
1178 Acc2WordsBy1(d, HighWord(c)) \
1179 MultiplyWords(p, A[n-1], B[n-1])\
1180 Acc2WordsBy2(d, p) \
1181 R[2*n-2] = LowWord(d); \
1182 R[2*n-1] = HighWord(d);
1183
1184 #define Bot_SaveAcc(k, i, j) \
1185 R[k] = LowWord(c); \
1186 word e = LowWord(d) + HighWord(c); \
1187 e += A[i] * B[j];
1188
1189 #define Bot_Acc(i, j) \
1190 e += A[i] * B[j];
1191
1192 #define Bot_End(n) \
1193 R[n-1] = e;
1194 #else
1195 #define Mul_Begin(n) \
1196 Declare2Words(p) \
1197 word c; \
1198 Declare2Words(d) \
1199 MultiplyWords(p, A[0], B[0]) \
1200 c = LowWord(p); \
1201 AssignWord(d, HighWord(p))
1202
1203 #define Mul_Acc(i, j) \
1204 MulAcc(c, d, A[i], B[j])
1205
1206 #define Mul_SaveAcc(k, i, j) \
1207 R[k] = c; \
1208 c = LowWord(d); \
1209 AssignWord(d, HighWord(d)) \
1210 MulAcc(c, d, A[i], B[j])
1211
1212 #define Mul_End(k, i) \
1213 R[k] = c; \
1214 MultiplyWords(p, A[i], B[i]) \
1215 Acc2WordsBy2(p, d) \
1216 R[k+1] = LowWord(p); \
1217 R[k+2] = HighWord(p);
1218
1219 #define Bot_SaveAcc(k, i, j) \
1220 R[k] = c; \
1221 c = LowWord(d); \
1222 c += A[i] * B[j];
1223
1224 #define Bot_Acc(i, j) \
1225 c += A[i] * B[j];
1226
1227 #define Bot_End(n) \
1228 R[n-1] = c;
1229 #endif
1230
1231 #define Squ_Begin(n) \
1232 Declare2Words(p) \
1233 word c; \
1234 Declare2Words(d) \
1235 Declare2Words(e) \
1236 MultiplyWords(p, A[0], A[0]) \
1237 R[0] = LowWord(p); \
1238 AssignWord(e, HighWord(p)) \
1239 MultiplyWords(p, A[0], A[1]) \
1240 c = LowWord(p); \
1241 AssignWord(d, HighWord(p)) \
1242 Squ_NonDiag \
1243
1244 #define Squ_NonDiag \
1245 Double3Words(c, d)
1246
1247 #define Squ_SaveAcc(k, i, j) \
1248 Acc3WordsBy2(c, d, e) \
1249 R[k] = c; \
1250 MultiplyWords(p, A[i], A[j]) \
1251 c = LowWord(p); \
1252 AssignWord(d, HighWord(p)) \
1253
1254 #define Squ_Acc(i, j) \
1255 MulAcc(c, d, A[i], A[j])
1256
1257 #define Squ_Diag(i) \
1258 Squ_NonDiag \
1259 MulAcc(c, d, A[i], A[i])
1260
1261 #define Squ_End(n) \
1262 Acc3WordsBy2(c, d, e) \
1263 R[2*n-3] = c; \
1264 MultiplyWords(p, A[n-1], A[n-1])\
1265 Acc2WordsBy2(p, e) \
1266 R[2*n-2] = LowWord(p); \
1267 R[2*n-1] = HighWord(p);
1268
1269
Baseline_Multiply2(word * R,const word * AA,const word * BB)1270 void Baseline_Multiply2(word *R, const word *AA, const word *BB)
1271 {
1272 // http://github.com/weidai11/cryptopp/issues/188
1273 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1274 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1275
1276 Mul_2
1277 }
1278
Baseline_Multiply4(word * R,const word * AA,const word * BB)1279 void Baseline_Multiply4(word *R, const word *AA, const word *BB)
1280 {
1281 // http://github.com/weidai11/cryptopp/issues/188
1282 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1283 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1284
1285 Mul_4
1286 }
1287
Baseline_Multiply8(word * R,const word * AA,const word * BB)1288 void Baseline_Multiply8(word *R, const word *AA, const word *BB)
1289 {
1290 // http://github.com/weidai11/cryptopp/issues/188
1291 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1292 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1293
1294 Mul_8
1295 }
1296
Baseline_Square2(word * R,const word * AA)1297 void Baseline_Square2(word *R, const word *AA)
1298 {
1299 // http://github.com/weidai11/cryptopp/issues/188
1300 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1301
1302 Squ_2
1303 }
1304
Baseline_Square4(word * R,const word * AA)1305 void Baseline_Square4(word *R, const word *AA)
1306 {
1307 // http://github.com/weidai11/cryptopp/issues/188
1308 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1309
1310 Squ_4
1311 }
1312
Baseline_Square8(word * R,const word * AA)1313 void Baseline_Square8(word *R, const word *AA)
1314 {
1315 // http://github.com/weidai11/cryptopp/issues/188
1316 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1317
1318 Squ_8
1319 }
1320
Baseline_MultiplyBottom2(word * R,const word * AA,const word * BB)1321 void Baseline_MultiplyBottom2(word *R, const word *AA, const word *BB)
1322 {
1323 // http://github.com/weidai11/cryptopp/issues/188
1324 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1325 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1326
1327 Bot_2
1328
1329 // http://github.com/weidai11/cryptopp/issues/340
1330 #if defined(TWO_64_BIT_WORDS)
1331 CRYPTOPP_UNUSED(d0); CRYPTOPP_UNUSED(d1);
1332 #endif
1333 }
1334
Baseline_MultiplyBottom4(word * R,const word * AA,const word * BB)1335 void Baseline_MultiplyBottom4(word *R, const word *AA, const word *BB)
1336 {
1337 // http://github.com/weidai11/cryptopp/issues/188
1338 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1339 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1340
1341 Bot_4
1342 }
1343
Baseline_MultiplyBottom8(word * R,const word * AA,const word * BB)1344 void Baseline_MultiplyBottom8(word *R, const word *AA, const word *BB)
1345 {
1346 // http://github.com/weidai11/cryptopp/issues/188
1347 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1348 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1349
1350 Bot_8
1351 }
1352
1353 #define Top_Begin(n) \
1354 Declare2Words(p) \
1355 word c; \
1356 Declare2Words(d) \
1357 MultiplyWords(p, A[0], B[n-2]);\
1358 AssignWord(d, HighWord(p));
1359
1360 #define Top_Acc(i, j) \
1361 MultiplyWords(p, A[i], B[j]);\
1362 Acc2WordsBy1(d, HighWord(p));
1363
1364 #define Top_SaveAcc0(i, j) \
1365 c = LowWord(d); \
1366 AssignWord(d, HighWord(d)) \
1367 MulAcc(c, d, A[i], B[j])
1368
1369 #define Top_SaveAcc1(i, j) \
1370 c = L<c; \
1371 Acc2WordsBy1(d, c); \
1372 c = LowWord(d); \
1373 AssignWord(d, HighWord(d)) \
1374 MulAcc(c, d, A[i], B[j])
1375
Baseline_MultiplyTop2(word * R,const word * A,const word * B,word L)1376 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
1377 {
1378 CRYPTOPP_UNUSED(L);
1379 word T[4];
1380 Baseline_Multiply2(T, A, B);
1381 R[0] = T[2];
1382 R[1] = T[3];
1383 }
1384
Baseline_MultiplyTop4(word * R,const word * AA,const word * BB,word L)1385 void Baseline_MultiplyTop4(word *R, const word *AA, const word *BB, word L)
1386 {
1387 // http://github.com/weidai11/cryptopp/issues/188
1388 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1389 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1390
1391 Top_Begin(4)
1392 Top_Acc(1, 1) Top_Acc(2, 0) \
1393 Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1394 Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1395 Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1396 Mul_End(1, 3)
1397 }
1398
Baseline_MultiplyTop8(word * R,const word * AA,const word * BB,word L)1399 void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L)
1400 {
1401 // http://github.com/weidai11/cryptopp/issues/188
1402 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1403 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1404
1405 Top_Begin(8)
1406 Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1407 Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1408 Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1409 Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1410 Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1411 Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1412 Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1413 Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1414 Mul_End(5, 7)
1415 }
1416
1417 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
Baseline_Multiply16(word * R,const word * AA,const word * BB)1418 void Baseline_Multiply16(word *R, const word *AA, const word *BB)
1419 {
1420 // http://github.com/weidai11/cryptopp/issues/188
1421 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1422 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1423
1424 Mul_16
1425 }
1426
Baseline_Square16(word * R,const word * AA)1427 void Baseline_Square16(word *R, const word *AA)
1428 {
1429 // http://github.com/weidai11/cryptopp/issues/188
1430 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1431
1432 Squ_16
1433 }
1434
Baseline_MultiplyBottom16(word * R,const word * AA,const word * BB)1435 void Baseline_MultiplyBottom16(word *R, const word *AA, const word *BB)
1436 {
1437 // http://github.com/weidai11/cryptopp/issues/188
1438 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1439 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1440
1441 Bot_16
1442 }
1443
Baseline_MultiplyTop16(word * R,const word * AA,const word * BB,word L)1444 void Baseline_MultiplyTop16(word *R, const word *AA, const word *BB, word L)
1445 {
1446 // http://github.com/weidai11/cryptopp/issues/188
1447 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1448 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1449
1450 Top_Begin(16)
1451 Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1452 Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1453 Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1454 Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1455 Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1456 Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1457 Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1458 Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1459 Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1460 Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1461 Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1462 Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1463 Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1464 Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1465 Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1466 Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1467 Mul_End(13, 15)
1468 }
1469 #endif
1470
1471 // ********************************************************
1472
1473 #if CRYPTOPP_INTEGER_SSE2
1474
1475 CRYPTOPP_ALIGN_DATA(16)
1476 CRYPTOPP_TABLE
1477 const word32 s_maskLow16[4] = {
1478 0xffff,0xffff,0xffff,0xffff
1479 };
1480
1481 #undef Mul_Begin
1482 #undef Mul_Acc
1483 #undef Top_Begin
1484 #undef Top_Acc
1485 #undef Squ_Acc
1486 #undef Squ_NonDiag
1487 #undef Squ_Diag
1488 #undef Squ_SaveAcc
1489 #undef Squ_Begin
1490 #undef Mul_SaveAcc
1491 #undef Bot_Acc
1492 #undef Bot_SaveAcc
1493 #undef Bot_End
1494 #undef Squ_End
1495 #undef Mul_End
1496
1497 #define SSE2_FinalSave(k) \
1498 AS2( psllq xmm5, 16) \
1499 AS2( paddq xmm4, xmm5) \
1500 AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1501
1502 #define SSE2_SaveShift(k) \
1503 AS2( movq xmm0, xmm6) \
1504 AS2( punpckhqdq xmm6, xmm0) \
1505 AS2( movq xmm1, xmm7) \
1506 AS2( punpckhqdq xmm7, xmm1) \
1507 AS2( paddd xmm6, xmm0) \
1508 AS2( pslldq xmm6, 4) \
1509 AS2( paddd xmm7, xmm1) \
1510 AS2( paddd xmm4, xmm6) \
1511 AS2( pslldq xmm7, 4) \
1512 AS2( movq xmm6, xmm4) \
1513 AS2( paddd xmm5, xmm7) \
1514 AS2( movq xmm7, xmm5) \
1515 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1516 AS2( psrlq xmm6, 16) \
1517 AS2( paddq xmm6, xmm7) \
1518 AS2( punpckhqdq xmm4, xmm0) \
1519 AS2( punpckhqdq xmm5, xmm0) \
1520 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1521 AS2( psrlq xmm6, 3*16) \
1522 AS2( paddd xmm4, xmm6) \
1523
1524 #define Squ_SSE2_SaveShift(k) \
1525 AS2( movq xmm0, xmm6) \
1526 AS2( punpckhqdq xmm6, xmm0) \
1527 AS2( movq xmm1, xmm7) \
1528 AS2( punpckhqdq xmm7, xmm1) \
1529 AS2( paddd xmm6, xmm0) \
1530 AS2( pslldq xmm6, 4) \
1531 AS2( paddd xmm7, xmm1) \
1532 AS2( paddd xmm4, xmm6) \
1533 AS2( pslldq xmm7, 4) \
1534 AS2( movhlps xmm6, xmm4) \
1535 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1536 AS2( paddd xmm5, xmm7) \
1537 AS2( movhps QWORD PTR [esp+12], xmm5)\
1538 AS2( psrlq xmm4, 16) \
1539 AS2( paddq xmm4, xmm5) \
1540 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1541 AS2( psrlq xmm4, 3*16) \
1542 AS2( paddd xmm4, xmm6) \
1543 AS2( movq QWORD PTR [esp+4], xmm4)\
1544
1545 #define SSE2_FirstMultiply(i) \
1546 AS2( movdqa xmm7, [esi+(i)*16])\
1547 AS2( movdqa xmm5, [edi-(i)*16])\
1548 AS2( pmuludq xmm5, xmm7) \
1549 AS2( movdqa xmm4, [ebx])\
1550 AS2( movdqa xmm6, xmm4) \
1551 AS2( pand xmm4, xmm5) \
1552 AS2( psrld xmm5, 16) \
1553 AS2( pmuludq xmm7, [edx-(i)*16])\
1554 AS2( pand xmm6, xmm7) \
1555 AS2( psrld xmm7, 16)
1556
1557 #define Squ_Begin(n) \
1558 SquPrologue \
1559 AS2( mov esi, esp)\
1560 AS2( and esp, 0xfffffff0)\
1561 AS2( lea edi, [esp-32*n])\
1562 AS2( sub esp, 32*n+16)\
1563 AS1( push esi)\
1564 AS2( mov esi, edi) \
1565 AS2( xor edx, edx) \
1566 ASL(1) \
1567 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1568 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1569 AS2( movdqa [edi+2*edx], xmm0) \
1570 AS2( psrlq xmm0, 32) \
1571 AS2( movdqa [edi+2*edx+16], xmm0) \
1572 AS2( movdqa [edi+16*n+2*edx], xmm1) \
1573 AS2( psrlq xmm1, 32) \
1574 AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1575 AS2( add edx, 16) \
1576 AS2( cmp edx, 8*(n)) \
1577 ASJ( jne, 1, b) \
1578 AS2( lea edx, [edi+16*n])\
1579 SSE2_FirstMultiply(0) \
1580
1581 #define Squ_Acc(i) \
1582 ASL(LSqu##i) \
1583 AS2( movdqa xmm1, [esi+(i)*16]) \
1584 AS2( movdqa xmm0, [edi-(i)*16]) \
1585 AS2( movdqa xmm2, [ebx]) \
1586 AS2( pmuludq xmm0, xmm1) \
1587 AS2( pmuludq xmm1, [edx-(i)*16]) \
1588 AS2( movdqa xmm3, xmm2) \
1589 AS2( pand xmm2, xmm0) \
1590 AS2( psrld xmm0, 16) \
1591 AS2( paddd xmm4, xmm2) \
1592 AS2( paddd xmm5, xmm0) \
1593 AS2( pand xmm3, xmm1) \
1594 AS2( psrld xmm1, 16) \
1595 AS2( paddd xmm6, xmm3) \
1596 AS2( paddd xmm7, xmm1) \
1597
1598 #define Squ_Acc1(i)
1599 #define Squ_Acc2(i) ASC(call, LSqu##i)
1600 #define Squ_Acc3(i) Squ_Acc2(i)
1601 #define Squ_Acc4(i) Squ_Acc2(i)
1602 #define Squ_Acc5(i) Squ_Acc2(i)
1603 #define Squ_Acc6(i) Squ_Acc2(i)
1604 #define Squ_Acc7(i) Squ_Acc2(i)
1605 #define Squ_Acc8(i) Squ_Acc2(i)
1606
1607 #define SSE2_End(E, n) \
1608 SSE2_SaveShift(2*(n)-3) \
1609 AS2( movdqa xmm7, [esi+16]) \
1610 AS2( movdqa xmm0, [edi]) \
1611 AS2( pmuludq xmm0, xmm7) \
1612 AS2( movdqa xmm2, [ebx]) \
1613 AS2( pmuludq xmm7, [edx]) \
1614 AS2( movdqa xmm6, xmm2) \
1615 AS2( pand xmm2, xmm0) \
1616 AS2( psrld xmm0, 16) \
1617 AS2( paddd xmm4, xmm2) \
1618 AS2( paddd xmm5, xmm0) \
1619 AS2( pand xmm6, xmm7) \
1620 AS2( psrld xmm7, 16) \
1621 SSE2_SaveShift(2*(n)-2) \
1622 SSE2_FinalSave(2*(n)-1) \
1623 AS1( pop esp)\
1624 E
1625
1626 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1627 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1628 #define Top_End(n) SSE2_End(TopEpilogue, n)
1629
1630 #define Squ_Column1(k, i) \
1631 Squ_SSE2_SaveShift(k) \
1632 AS2( add esi, 16) \
1633 SSE2_FirstMultiply(1)\
1634 Squ_Acc##i(i) \
1635 AS2( paddd xmm4, xmm4) \
1636 AS2( paddd xmm5, xmm5) \
1637 AS2( movdqa xmm3, [esi]) \
1638 AS2( movq xmm1, QWORD PTR [esi+8]) \
1639 AS2( pmuludq xmm1, xmm3) \
1640 AS2( pmuludq xmm3, xmm3) \
1641 AS2( movdqa xmm0, [ebx])\
1642 AS2( movdqa xmm2, xmm0) \
1643 AS2( pand xmm0, xmm1) \
1644 AS2( psrld xmm1, 16) \
1645 AS2( paddd xmm6, xmm0) \
1646 AS2( paddd xmm7, xmm1) \
1647 AS2( pand xmm2, xmm3) \
1648 AS2( psrld xmm3, 16) \
1649 AS2( paddd xmm6, xmm6) \
1650 AS2( paddd xmm7, xmm7) \
1651 AS2( paddd xmm4, xmm2) \
1652 AS2( paddd xmm5, xmm3) \
1653 AS2( movq xmm0, QWORD PTR [esp+4])\
1654 AS2( movq xmm1, QWORD PTR [esp+12])\
1655 AS2( paddd xmm4, xmm0)\
1656 AS2( paddd xmm5, xmm1)\
1657
1658 #define Squ_Column0(k, i) \
1659 Squ_SSE2_SaveShift(k) \
1660 AS2( add edi, 16) \
1661 AS2( add edx, 16) \
1662 SSE2_FirstMultiply(1)\
1663 Squ_Acc##i(i) \
1664 AS2( paddd xmm6, xmm6) \
1665 AS2( paddd xmm7, xmm7) \
1666 AS2( paddd xmm4, xmm4) \
1667 AS2( paddd xmm5, xmm5) \
1668 AS2( movq xmm0, QWORD PTR [esp+4])\
1669 AS2( movq xmm1, QWORD PTR [esp+12])\
1670 AS2( paddd xmm4, xmm0)\
1671 AS2( paddd xmm5, xmm1)\
1672
1673 #define SSE2_MulAdd45 \
1674 AS2( movdqa xmm7, [esi]) \
1675 AS2( movdqa xmm0, [edi]) \
1676 AS2( pmuludq xmm0, xmm7) \
1677 AS2( movdqa xmm2, [ebx]) \
1678 AS2( pmuludq xmm7, [edx]) \
1679 AS2( movdqa xmm6, xmm2) \
1680 AS2( pand xmm2, xmm0) \
1681 AS2( psrld xmm0, 16) \
1682 AS2( paddd xmm4, xmm2) \
1683 AS2( paddd xmm5, xmm0) \
1684 AS2( pand xmm6, xmm7) \
1685 AS2( psrld xmm7, 16)
1686
1687 #define Mul_Begin(n) \
1688 MulPrologue \
1689 AS2( mov esi, esp)\
1690 AS2( and esp, 0xfffffff0)\
1691 AS2( sub esp, 48*n+16)\
1692 AS1( push esi)\
1693 AS2( xor edx, edx) \
1694 ASL(1) \
1695 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1696 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1697 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1698 AS2( movdqa [esp+20+2*edx], xmm0) \
1699 AS2( psrlq xmm0, 32) \
1700 AS2( movdqa [esp+20+2*edx+16], xmm0) \
1701 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1702 AS2( psrlq xmm1, 32) \
1703 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1704 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1705 AS2( psrlq xmm2, 32) \
1706 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1707 AS2( add edx, 16) \
1708 AS2( cmp edx, 8*(n)) \
1709 ASJ( jne, 1, b) \
1710 AS2( lea edi, [esp+20])\
1711 AS2( lea edx, [esp+20+16*n])\
1712 AS2( lea esi, [esp+20+32*n])\
1713 SSE2_FirstMultiply(0) \
1714
1715 #define Mul_Acc(i) \
1716 ASL(LMul##i) \
1717 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1718 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1719 AS2( movdqa xmm2, [ebx]) \
1720 AS2( pmuludq xmm0, xmm1) \
1721 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1722 AS2( movdqa xmm3, xmm2) \
1723 AS2( pand xmm2, xmm0) \
1724 AS2( psrld xmm0, 16) \
1725 AS2( paddd xmm4, xmm2) \
1726 AS2( paddd xmm5, xmm0) \
1727 AS2( pand xmm3, xmm1) \
1728 AS2( psrld xmm1, 16) \
1729 AS2( paddd xmm6, xmm3) \
1730 AS2( paddd xmm7, xmm1) \
1731
1732 #define Mul_Acc1(i)
1733 #define Mul_Acc2(i) ASC(call, LMul##i)
1734 #define Mul_Acc3(i) Mul_Acc2(i)
1735 #define Mul_Acc4(i) Mul_Acc2(i)
1736 #define Mul_Acc5(i) Mul_Acc2(i)
1737 #define Mul_Acc6(i) Mul_Acc2(i)
1738 #define Mul_Acc7(i) Mul_Acc2(i)
1739 #define Mul_Acc8(i) Mul_Acc2(i)
1740 #define Mul_Acc9(i) Mul_Acc2(i)
1741 #define Mul_Acc10(i) Mul_Acc2(i)
1742 #define Mul_Acc11(i) Mul_Acc2(i)
1743 #define Mul_Acc12(i) Mul_Acc2(i)
1744 #define Mul_Acc13(i) Mul_Acc2(i)
1745 #define Mul_Acc14(i) Mul_Acc2(i)
1746 #define Mul_Acc15(i) Mul_Acc2(i)
1747 #define Mul_Acc16(i) Mul_Acc2(i)
1748
1749 #define Mul_Column1(k, i) \
1750 SSE2_SaveShift(k) \
1751 AS2( add esi, 16) \
1752 SSE2_MulAdd45\
1753 Mul_Acc##i(i) \
1754
1755 #define Mul_Column0(k, i) \
1756 SSE2_SaveShift(k) \
1757 AS2( add edi, 16) \
1758 AS2( add edx, 16) \
1759 SSE2_MulAdd45\
1760 Mul_Acc##i(i) \
1761
1762 #define Bot_Acc(i) \
1763 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1764 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1765 AS2( pmuludq xmm0, xmm1) \
1766 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1767 AS2( paddq xmm4, xmm0) \
1768 AS2( paddd xmm6, xmm1)
1769
1770 #define Bot_SaveAcc(k) \
1771 SSE2_SaveShift(k) \
1772 AS2( add edi, 16) \
1773 AS2( add edx, 16) \
1774 AS2( movdqa xmm6, [esi]) \
1775 AS2( movdqa xmm0, [edi]) \
1776 AS2( pmuludq xmm0, xmm6) \
1777 AS2( paddq xmm4, xmm0) \
1778 AS2( psllq xmm5, 16) \
1779 AS2( paddq xmm4, xmm5) \
1780 AS2( pmuludq xmm6, [edx])
1781
1782 #define Bot_End(n) \
1783 AS2( movhlps xmm7, xmm6) \
1784 AS2( paddd xmm6, xmm7) \
1785 AS2( psllq xmm6, 32) \
1786 AS2( paddd xmm4, xmm6) \
1787 AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1788 AS1( pop esp)\
1789 MulEpilogue
1790
1791 #define Top_Begin(n) \
1792 TopPrologue \
1793 AS2( mov edx, esp)\
1794 AS2( and esp, 0xfffffff0)\
1795 AS2( sub esp, 48*n+16)\
1796 AS1( push edx)\
1797 AS2( xor edx, edx) \
1798 ASL(1) \
1799 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1800 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1801 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1802 AS2( movdqa [esp+20+2*edx], xmm0) \
1803 AS2( psrlq xmm0, 32) \
1804 AS2( movdqa [esp+20+2*edx+16], xmm0) \
1805 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1806 AS2( psrlq xmm1, 32) \
1807 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1808 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1809 AS2( psrlq xmm2, 32) \
1810 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1811 AS2( add edx, 16) \
1812 AS2( cmp edx, 8*(n)) \
1813 ASJ( jne, 1, b) \
1814 AS2( mov eax, esi) \
1815 AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1816 AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1817 AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1818 AS2( pxor xmm4, xmm4)\
1819 AS2( pxor xmm5, xmm5)
1820
1821 #define Top_Acc(i) \
1822 AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1823 AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1824 AS2( psrlq xmm0, 48) \
1825 AS2( paddd xmm5, xmm0)\
1826
1827 #define Top_Column0(i) \
1828 AS2( psllq xmm5, 32) \
1829 AS2( add edi, 16) \
1830 AS2( add edx, 16) \
1831 SSE2_MulAdd45\
1832 Mul_Acc##i(i) \
1833
1834 #define Top_Column1(i) \
1835 SSE2_SaveShift(0) \
1836 AS2( add esi, 16) \
1837 SSE2_MulAdd45\
1838 Mul_Acc##i(i) \
1839 AS2( shr eax, 16) \
1840 AS2( movd xmm0, eax)\
1841 AS2( movd xmm1, [ecx+4])\
1842 AS2( psrld xmm1, 16)\
1843 AS2( pcmpgtd xmm1, xmm0)\
1844 AS2( psrld xmm1, 31)\
1845 AS2( paddd xmm4, xmm1)\
1846
SSE2_Square4(word * C,const word * A)1847 void SSE2_Square4(word *C, const word *A)
1848 {
1849 Squ_Begin(2)
1850 Squ_Column0(0, 1)
1851 Squ_End(2)
1852 }
1853
SSE2_Square8(word * C,const word * A)1854 void SSE2_Square8(word *C, const word *A)
1855 {
1856 Squ_Begin(4)
1857 #ifndef __GNUC__
1858 ASJ( jmp, 0, f)
1859 Squ_Acc(2)
1860 AS1( ret) ASL(0)
1861 #endif
1862 Squ_Column0(0, 1)
1863 Squ_Column1(1, 1)
1864 Squ_Column0(2, 2)
1865 Squ_Column1(3, 1)
1866 Squ_Column0(4, 1)
1867 Squ_End(4)
1868 }
1869
SSE2_Square16(word * C,const word * A)1870 void SSE2_Square16(word *C, const word *A)
1871 {
1872 Squ_Begin(8)
1873 #ifndef __GNUC__
1874 ASJ( jmp, 0, f)
1875 Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1876 AS1( ret) ASL(0)
1877 #endif
1878 Squ_Column0(0, 1)
1879 Squ_Column1(1, 1)
1880 Squ_Column0(2, 2)
1881 Squ_Column1(3, 2)
1882 Squ_Column0(4, 3)
1883 Squ_Column1(5, 3)
1884 Squ_Column0(6, 4)
1885 Squ_Column1(7, 3)
1886 Squ_Column0(8, 3)
1887 Squ_Column1(9, 2)
1888 Squ_Column0(10, 2)
1889 Squ_Column1(11, 1)
1890 Squ_Column0(12, 1)
1891 Squ_End(8)
1892 }
1893
SSE2_Square32(word * C,const word * A)1894 void SSE2_Square32(word *C, const word *A)
1895 {
1896 Squ_Begin(16)
1897 ASJ( jmp, 0, f)
1898 Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1899 AS1( ret) ASL(0)
1900 Squ_Column0(0, 1)
1901 Squ_Column1(1, 1)
1902 Squ_Column0(2, 2)
1903 Squ_Column1(3, 2)
1904 Squ_Column0(4, 3)
1905 Squ_Column1(5, 3)
1906 Squ_Column0(6, 4)
1907 Squ_Column1(7, 4)
1908 Squ_Column0(8, 5)
1909 Squ_Column1(9, 5)
1910 Squ_Column0(10, 6)
1911 Squ_Column1(11, 6)
1912 Squ_Column0(12, 7)
1913 Squ_Column1(13, 7)
1914 Squ_Column0(14, 8)
1915 Squ_Column1(15, 7)
1916 Squ_Column0(16, 7)
1917 Squ_Column1(17, 6)
1918 Squ_Column0(18, 6)
1919 Squ_Column1(19, 5)
1920 Squ_Column0(20, 5)
1921 Squ_Column1(21, 4)
1922 Squ_Column0(22, 4)
1923 Squ_Column1(23, 3)
1924 Squ_Column0(24, 3)
1925 Squ_Column1(25, 2)
1926 Squ_Column0(26, 2)
1927 Squ_Column1(27, 1)
1928 Squ_Column0(28, 1)
1929 Squ_End(16)
1930 }
1931
SSE2_Multiply4(word * C,const word * A,const word * B)1932 void SSE2_Multiply4(word *C, const word *A, const word *B)
1933 {
1934 Mul_Begin(2)
1935 #ifndef __GNUC__
1936 ASJ( jmp, 0, f)
1937 Mul_Acc(2)
1938 AS1( ret) ASL(0)
1939 #endif
1940 Mul_Column0(0, 2)
1941 Mul_End(2)
1942 }
1943
SSE2_Multiply8(word * C,const word * A,const word * B)1944 void SSE2_Multiply8(word *C, const word *A, const word *B)
1945 {
1946 Mul_Begin(4)
1947 #ifndef __GNUC__
1948 ASJ( jmp, 0, f)
1949 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1950 AS1( ret) ASL(0)
1951 #endif
1952 Mul_Column0(0, 2)
1953 Mul_Column1(1, 3)
1954 Mul_Column0(2, 4)
1955 Mul_Column1(3, 3)
1956 Mul_Column0(4, 2)
1957 Mul_End(4)
1958 }
1959
SSE2_Multiply16(word * C,const word * A,const word * B)1960 void SSE2_Multiply16(word *C, const word *A, const word *B)
1961 {
1962 Mul_Begin(8)
1963 #ifndef __GNUC__
1964 ASJ( jmp, 0, f)
1965 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1966 AS1( ret) ASL(0)
1967 #endif
1968 Mul_Column0(0, 2)
1969 Mul_Column1(1, 3)
1970 Mul_Column0(2, 4)
1971 Mul_Column1(3, 5)
1972 Mul_Column0(4, 6)
1973 Mul_Column1(5, 7)
1974 Mul_Column0(6, 8)
1975 Mul_Column1(7, 7)
1976 Mul_Column0(8, 6)
1977 Mul_Column1(9, 5)
1978 Mul_Column0(10, 4)
1979 Mul_Column1(11, 3)
1980 Mul_Column0(12, 2)
1981 Mul_End(8)
1982 }
1983
SSE2_Multiply32(word * C,const word * A,const word * B)1984 void SSE2_Multiply32(word *C, const word *A, const word *B)
1985 {
1986 Mul_Begin(16)
1987 ASJ( jmp, 0, f)
1988 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1989 AS1( ret) ASL(0)
1990 Mul_Column0(0, 2)
1991 Mul_Column1(1, 3)
1992 Mul_Column0(2, 4)
1993 Mul_Column1(3, 5)
1994 Mul_Column0(4, 6)
1995 Mul_Column1(5, 7)
1996 Mul_Column0(6, 8)
1997 Mul_Column1(7, 9)
1998 Mul_Column0(8, 10)
1999 Mul_Column1(9, 11)
2000 Mul_Column0(10, 12)
2001 Mul_Column1(11, 13)
2002 Mul_Column0(12, 14)
2003 Mul_Column1(13, 15)
2004 Mul_Column0(14, 16)
2005 Mul_Column1(15, 15)
2006 Mul_Column0(16, 14)
2007 Mul_Column1(17, 13)
2008 Mul_Column0(18, 12)
2009 Mul_Column1(19, 11)
2010 Mul_Column0(20, 10)
2011 Mul_Column1(21, 9)
2012 Mul_Column0(22, 8)
2013 Mul_Column1(23, 7)
2014 Mul_Column0(24, 6)
2015 Mul_Column1(25, 5)
2016 Mul_Column0(26, 4)
2017 Mul_Column1(27, 3)
2018 Mul_Column0(28, 2)
2019 Mul_End(16)
2020 }
2021
SSE2_MultiplyBottom4(word * C,const word * A,const word * B)2022 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
2023 {
2024 Mul_Begin(2)
2025 Bot_SaveAcc(0) Bot_Acc(2)
2026 Bot_End(2)
2027 }
2028
SSE2_MultiplyBottom8(word * C,const word * A,const word * B)2029 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
2030 {
2031 Mul_Begin(4)
2032 #ifndef __GNUC__
2033 ASJ( jmp, 0, f)
2034 Mul_Acc(3) Mul_Acc(2)
2035 AS1( ret) ASL(0)
2036 #endif
2037 Mul_Column0(0, 2)
2038 Mul_Column1(1, 3)
2039 Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2040 Bot_End(4)
2041 }
2042
SSE2_MultiplyBottom16(word * C,const word * A,const word * B)2043 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
2044 {
2045 Mul_Begin(8)
2046 #ifndef __GNUC__
2047 ASJ( jmp, 0, f)
2048 Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2049 AS1( ret) ASL(0)
2050 #endif
2051 Mul_Column0(0, 2)
2052 Mul_Column1(1, 3)
2053 Mul_Column0(2, 4)
2054 Mul_Column1(3, 5)
2055 Mul_Column0(4, 6)
2056 Mul_Column1(5, 7)
2057 Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2058 Bot_End(8)
2059 }
2060
SSE2_MultiplyBottom32(word * C,const word * A,const word * B)2061 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
2062 {
2063 Mul_Begin(16)
2064 #ifndef __GNUC__
2065 ASJ( jmp, 0, f)
2066 Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2067 AS1( ret) ASL(0)
2068 #endif
2069 Mul_Column0(0, 2)
2070 Mul_Column1(1, 3)
2071 Mul_Column0(2, 4)
2072 Mul_Column1(3, 5)
2073 Mul_Column0(4, 6)
2074 Mul_Column1(5, 7)
2075 Mul_Column0(6, 8)
2076 Mul_Column1(7, 9)
2077 Mul_Column0(8, 10)
2078 Mul_Column1(9, 11)
2079 Mul_Column0(10, 12)
2080 Mul_Column1(11, 13)
2081 Mul_Column0(12, 14)
2082 Mul_Column1(13, 15)
2083 Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2084 Bot_End(16)
2085 }
2086
SSE2_MultiplyTop8(word * C,const word * A,const word * B,word L)2087 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
2088 {
2089 Top_Begin(4)
2090 Top_Acc(3) Top_Acc(2) Top_Acc(1)
2091 #ifndef __GNUC__
2092 ASJ( jmp, 0, f)
2093 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2094 AS1( ret) ASL(0)
2095 #endif
2096 Top_Column0(4)
2097 Top_Column1(3)
2098 Mul_Column0(0, 2)
2099 Top_End(2)
2100 }
2101
SSE2_MultiplyTop16(word * C,const word * A,const word * B,word L)2102 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
2103 {
2104 Top_Begin(8)
2105 Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2106 #ifndef __GNUC__
2107 ASJ( jmp, 0, f)
2108 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2109 AS1( ret) ASL(0)
2110 #endif
2111 Top_Column0(8)
2112 Top_Column1(7)
2113 Mul_Column0(0, 6)
2114 Mul_Column1(1, 5)
2115 Mul_Column0(2, 4)
2116 Mul_Column1(3, 3)
2117 Mul_Column0(4, 2)
2118 Top_End(4)
2119 }
2120
SSE2_MultiplyTop32(word * C,const word * A,const word * B,word L)2121 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
2122 {
2123 Top_Begin(16)
2124 Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2125 #ifndef __GNUC__
2126 ASJ( jmp, 0, f)
2127 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2128 AS1( ret) ASL(0)
2129 #endif
2130 Top_Column0(16)
2131 Top_Column1(15)
2132 Mul_Column0(0, 14)
2133 Mul_Column1(1, 13)
2134 Mul_Column0(2, 12)
2135 Mul_Column1(3, 11)
2136 Mul_Column0(4, 10)
2137 Mul_Column1(5, 9)
2138 Mul_Column0(6, 8)
2139 Mul_Column1(7, 7)
2140 Mul_Column0(8, 6)
2141 Mul_Column1(9, 5)
2142 Mul_Column0(10, 4)
2143 Mul_Column1(11, 3)
2144 Mul_Column0(12, 2)
2145 Top_End(8)
2146 }
2147
2148 #endif // #if CRYPTOPP_INTEGER_SSE2
2149
2150 // ********************************************************
2151
2152 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
2153 typedef void (* PMul)(word *C, const word *A, const word *B);
2154 typedef void (* PSqu)(word *C, const word *A);
2155 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
2156
2157 #if CRYPTOPP_INTEGER_SSE2
2158 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2159 static size_t s_recursionLimit = 8;
2160 #else
2161 static const size_t s_recursionLimit = 16;
2162 #endif // CRYPTOPP_INTEGER_SSE2
2163
2164 static PMul s_pMul[9], s_pBot[9];
2165 static PSqu s_pSqu[9];
2166 static PMulTop s_pTop[9];
2167
SetFunctionPointers()2168 void SetFunctionPointers()
2169 {
2170 s_pMul[0] = &Baseline_Multiply2;
2171 s_pBot[0] = &Baseline_MultiplyBottom2;
2172 s_pSqu[0] = &Baseline_Square2;
2173 s_pTop[0] = &Baseline_MultiplyTop2;
2174 s_pTop[1] = &Baseline_MultiplyTop4;
2175
2176 #if CRYPTOPP_INTEGER_SSE2
2177 if (HasSSE2())
2178 {
2179 if (IsP4())
2180 {
2181 s_pAdd = &SSE2_Add;
2182 s_pSub = &SSE2_Sub;
2183 }
2184
2185 s_recursionLimit = 32;
2186
2187 s_pMul[1] = &SSE2_Multiply4;
2188 s_pMul[2] = &SSE2_Multiply8;
2189 s_pMul[4] = &SSE2_Multiply16;
2190 s_pMul[8] = &SSE2_Multiply32;
2191
2192 s_pBot[1] = &SSE2_MultiplyBottom4;
2193 s_pBot[2] = &SSE2_MultiplyBottom8;
2194 s_pBot[4] = &SSE2_MultiplyBottom16;
2195 s_pBot[8] = &SSE2_MultiplyBottom32;
2196
2197 s_pSqu[1] = &SSE2_Square4;
2198 s_pSqu[2] = &SSE2_Square8;
2199 s_pSqu[4] = &SSE2_Square16;
2200 s_pSqu[8] = &SSE2_Square32;
2201
2202 s_pTop[2] = &SSE2_MultiplyTop8;
2203 s_pTop[4] = &SSE2_MultiplyTop16;
2204 s_pTop[8] = &SSE2_MultiplyTop32;
2205 }
2206 else
2207 #endif // CRYPTOPP_INTEGER_SSE2
2208 {
2209 s_pMul[1] = &Baseline_Multiply4;
2210 s_pMul[2] = &Baseline_Multiply8;
2211
2212 s_pBot[1] = &Baseline_MultiplyBottom4;
2213 s_pBot[2] = &Baseline_MultiplyBottom8;
2214
2215 s_pSqu[1] = &Baseline_Square4;
2216 s_pSqu[2] = &Baseline_Square8;
2217
2218 s_pTop[2] = &Baseline_MultiplyTop8;
2219
2220 #if !CRYPTOPP_INTEGER_SSE2
2221 s_pMul[4] = &Baseline_Multiply16;
2222 s_pBot[4] = &Baseline_MultiplyBottom16;
2223 s_pSqu[4] = &Baseline_Square16;
2224 s_pTop[4] = &Baseline_MultiplyTop16;
2225 #endif // !CRYPTOPP_INTEGER_SSE2
2226 }
2227 }
2228
Add(word * C,const word * A,const word * B,size_t N)2229 inline int Add(word *C, const word *A, const word *B, size_t N)
2230 {
2231 #if CRYPTOPP_INTEGER_SSE2
2232 return s_pAdd(N, C, A, B);
2233 #else
2234 return Baseline_Add(N, C, A, B);
2235 #endif // CRYPTOPP_INTEGER_SSE2
2236 }
2237
Subtract(word * C,const word * A,const word * B,size_t N)2238 inline int Subtract(word *C, const word *A, const word *B, size_t N)
2239 {
2240 #if CRYPTOPP_INTEGER_SSE2
2241 return s_pSub(N, C, A, B);
2242 #else
2243 return Baseline_Sub(N, C, A, B);
2244 #endif // CRYPTOPP_INTEGER_SSE2
2245 }
2246
2247 // ********************************************************
2248
2249
2250 #define A0 A
2251 #define A1 (A+N2)
2252 #define B0 B
2253 #define B1 (B+N2)
2254
2255 #define T0 T
2256 #define T1 (T+N2)
2257 #define T2 (T+N)
2258 #define T3 (T+N+N2)
2259
2260 #define R0 R
2261 #define R1 (R+N2)
2262 #define R2 (R+N)
2263 #define R3 (R+N+N2)
2264
2265 // R[2*N] - result = A*B
2266 // T[2*N] - temporary work space
2267 // A[N] --- multiplier
2268 // B[N] --- multiplicant
2269
RecursiveMultiply(word * R,word * T,const word * A,const word * B,size_t N)2270 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
2271 {
2272 CRYPTOPP_ASSERT(N>=2 && N%2==0);
2273
2274 if (N <= s_recursionLimit)
2275 s_pMul[N/4](R, A, B);
2276 else
2277 {
2278 const size_t N2 = N/2;
2279
2280 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2281 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2282
2283 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2284 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2285
2286 RecursiveMultiply(R2, T2, A1, B1, N2);
2287 RecursiveMultiply(T0, T2, R0, R1, N2);
2288 RecursiveMultiply(R0, T2, A0, B0, N2);
2289
2290 // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
2291
2292 int c2 = Add(R2, R2, R1, N2);
2293 int c3 = c2;
2294 c2 += Add(R1, R2, R0, N2);
2295 c3 += Add(R2, R2, R3, N2);
2296
2297 if (AN2 == BN2)
2298 c3 -= Subtract(R1, R1, T0, N);
2299 else
2300 c3 += Add(R1, R1, T0, N);
2301
2302 c3 += Increment(R2, N2, c2);
2303 CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2304 Increment(R3, N2, c3);
2305 }
2306 }
2307
2308 // R[2*N] - result = A*A
2309 // T[2*N] - temporary work space
2310 // A[N] --- number to be squared
2311
RecursiveSquare(word * R,word * T,const word * A,size_t N)2312 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
2313 {
2314 CRYPTOPP_ASSERT(N && N%2==0);
2315
2316 if (N <= s_recursionLimit)
2317 s_pSqu[N/4](R, A);
2318 else
2319 {
2320 const size_t N2 = N/2;
2321
2322 RecursiveSquare(R0, T2, A0, N2);
2323 RecursiveSquare(R2, T2, A1, N2);
2324 RecursiveMultiply(T0, T2, A0, A1, N2);
2325
2326 int carry = Add(R1, R1, T0, N);
2327 carry += Add(R1, R1, T0, N);
2328 Increment(R3, N2, carry);
2329 }
2330 }
2331
2332 // R[N] - bottom half of A*B
2333 // T[3*N/2] - temporary work space
2334 // A[N] - multiplier
2335 // B[N] - multiplicant
2336
RecursiveMultiplyBottom(word * R,word * T,const word * A,const word * B,size_t N)2337 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2338 {
2339 CRYPTOPP_ASSERT(N>=2 && N%2==0);
2340
2341 if (N <= s_recursionLimit)
2342 s_pBot[N/4](R, A, B);
2343 else
2344 {
2345 const size_t N2 = N/2;
2346
2347 RecursiveMultiply(R, T, A0, B0, N2);
2348 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2349 Add(R1, R1, T0, N2);
2350 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2351 Add(R1, R1, T0, N2);
2352 }
2353 }
2354
2355 // R[N] --- upper half of A*B
2356 // T[2*N] - temporary work space
2357 // L[N] --- lower half of A*B
2358 // A[N] --- multiplier
2359 // B[N] --- multiplicant
2360
MultiplyTop(word * R,word * T,const word * L,const word * A,const word * B,size_t N)2361 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
2362 {
2363 CRYPTOPP_ASSERT(N>=2 && N%2==0);
2364
2365 if (N <= s_recursionLimit)
2366 s_pTop[N/4](R, A, B, L[N-1]);
2367 else
2368 {
2369 const size_t N2 = N/2;
2370
2371 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2372 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2373
2374 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2375 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2376
2377 RecursiveMultiply(T0, T2, R0, R1, N2);
2378 RecursiveMultiply(R0, T2, A1, B1, N2);
2379
2380 // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
2381
2382 int t, c3;
2383 int c2 = Subtract(T2, L+N2, L, N2);
2384
2385 if (AN2 == BN2)
2386 {
2387 c2 -= Add(T2, T2, T0, N2);
2388 t = (Compare(T2, R0, N2) == -1);
2389 c3 = t - Subtract(T2, T2, T1, N2);
2390 }
2391 else
2392 {
2393 c2 += Subtract(T2, T2, T0, N2);
2394 t = (Compare(T2, R0, N2) == -1);
2395 c3 = t + Add(T2, T2, T1, N2);
2396 }
2397
2398 c2 += t;
2399 if (c2 >= 0)
2400 c3 += Increment(T2, N2, c2);
2401 else
2402 c3 -= Decrement(T2, N2, -c2);
2403 c3 += Add(R0, T2, R1, N2);
2404
2405 CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2406 Increment(R1, N2, c3);
2407 }
2408 }
2409
Multiply(word * R,word * T,const word * A,const word * B,size_t N)2410 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
2411 {
2412 RecursiveMultiply(R, T, A, B, N);
2413 }
2414
Square(word * R,word * T,const word * A,size_t N)2415 inline void Square(word *R, word *T, const word *A, size_t N)
2416 {
2417 RecursiveSquare(R, T, A, N);
2418 }
2419
MultiplyBottom(word * R,word * T,const word * A,const word * B,size_t N)2420 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2421 {
2422 RecursiveMultiplyBottom(R, T, A, B, N);
2423 }
2424
2425 // R[NA+NB] - result = A*B
2426 // T[NA+NB] - temporary work space
2427 // A[NA] ---- multiplier
2428 // B[NB] ---- multiplicant
2429
AsymmetricMultiply(word * R,word * T,const word * A,size_t NA,const word * B,size_t NB)2430 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
2431 {
2432 if (NA == NB)
2433 {
2434 // Profiling guided the flow below.
2435 if (A != B)
2436 Multiply(R, T, A, B, NA);
2437 else
2438 Square(R, T, A, NA);
2439
2440 return;
2441 }
2442
2443 if (NA > NB)
2444 {
2445 std::swap(A, B);
2446 std::swap(NA, NB);
2447 }
2448
2449 CRYPTOPP_ASSERT(NB % NA == 0);
2450
2451 if (NA==2 && !A[1])
2452 {
2453 // Profiling guided the flow below.
2454 switch (A[0])
2455 {
2456 default:
2457 R[NB] = LinearMultiply(R, B, A[0], NB);
2458 R[NB+1] = 0;
2459 return;
2460 case 0:
2461 SetWords(R, 0, NB+2);
2462 return;
2463 case 1:
2464 CopyWords(R, B, NB);
2465 R[NB] = R[NB+1] = 0;
2466 return;
2467 }
2468 }
2469
2470 size_t i;
2471 if ((NB/NA)%2 == 0)
2472 {
2473 Multiply(R, T, A, B, NA);
2474 CopyWords(T+2*NA, R+NA, NA);
2475
2476 for (i=2*NA; i<NB; i+=2*NA)
2477 Multiply(T+NA+i, T, A, B+i, NA);
2478 for (i=NA; i<NB; i+=2*NA)
2479 Multiply(R+i, T, A, B+i, NA);
2480 }
2481 else
2482 {
2483 for (i=0; i<NB; i+=2*NA)
2484 Multiply(R+i, T, A, B+i, NA);
2485 for (i=NA; i<NB; i+=2*NA)
2486 Multiply(T+NA+i, T, A, B+i, NA);
2487 }
2488
2489 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2490 Increment(R+NB, NA);
2491 }
2492
2493 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
2494 // T[3*N/2] - temporary work space
2495 // A[N] ----- an odd number as input
2496
RecursiveInverseModPower2(word * R,word * T,const word * A,size_t N)2497 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
2498 {
2499 // Profiling guided the flow below.
2500 if (N!=2)
2501 {
2502 const size_t N2 = N/2;
2503 RecursiveInverseModPower2(R0, T0, A0, N2);
2504 T0[0] = 1;
2505 SetWords(T0+1, 0, N2-1);
2506 MultiplyTop(R1, T1, T0, R0, A0, N2);
2507 MultiplyBottom(T0, T1, R0, A1, N2);
2508 Add(T0, R1, T0, N2);
2509 TwosComplement(T0, N2);
2510 MultiplyBottom(R1, T1, R0, T0, N2);
2511 }
2512 else
2513 {
2514 T[0] = AtomicInverseModPower2(A[0]);
2515 T[1] = 0;
2516 s_pBot[0](T+2, T, A);
2517 TwosComplement(T+2, 2);
2518 Increment(T+2, 2, 2);
2519 s_pBot[0](R, T, T+2);
2520 }
2521 }
2522
2523 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
2524 // T[3*N] - temporary work space
2525 // X[2*N] - number to be reduced
2526 // M[N] --- modulus
2527 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
2528
MontgomeryReduce(word * R,word * T,word * X,const word * M,const word * U,size_t N)2529 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
2530 {
2531 #if 1
2532 MultiplyBottom(R, T, X, U, N);
2533 MultiplyTop(T, T+N, X, R, M, N);
2534 word borrow = Subtract(T, X+N, T, N);
2535 // defend against timing attack by doing this Add even when not needed
2536 word carry = Add(T+N, T, M, N);
2537 CRYPTOPP_ASSERT(carry | !borrow);
2538 CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2539 CopyWords(R, T + ((0-borrow) & N), N);
2540 #elif 0
2541 const word u = 0-U[0];
2542 Declare2Words(p)
2543 for (size_t i=0; i<N; i++)
2544 {
2545 const word t = u * X[i];
2546 word c = 0;
2547 for (size_t j=0; j<N; j+=2)
2548 {
2549 MultiplyWords(p, t, M[j]);
2550 Acc2WordsBy1(p, X[i+j]);
2551 Acc2WordsBy1(p, c);
2552 X[i+j] = LowWord(p);
2553 c = HighWord(p);
2554 MultiplyWords(p, t, M[j+1]);
2555 Acc2WordsBy1(p, X[i+j+1]);
2556 Acc2WordsBy1(p, c);
2557 X[i+j+1] = LowWord(p);
2558 c = HighWord(p);
2559 }
2560
2561 if (Increment(X+N+i, N-i, c))
2562 while (!Subtract(X+N, X+N, M, N)) {}
2563 }
2564
2565 memcpy(R, X+N, N*WORD_SIZE);
2566 #else
2567 __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2568 for (size_t i=0; i<N; i++)
2569 {
2570 __m64 t = _mm_cvtsi32_si64(X[i]);
2571 t = _mm_mul_su32(t, u);
2572 __m64 c = _mm_setzero_si64();
2573 for (size_t j=0; j<N; j+=2)
2574 {
2575 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2576 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2577 c = _mm_add_si64(c, p);
2578 X[i+j] = _mm_cvtsi64_si32(c);
2579 c = _mm_srli_si64(c, 32);
2580 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2581 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2582 c = _mm_add_si64(c, p);
2583 X[i+j+1] = _mm_cvtsi64_si32(c);
2584 c = _mm_srli_si64(c, 32);
2585 }
2586
2587 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2588 while (!Subtract(X+N, X+N, M, N)) {}
2589 }
2590
2591 memcpy(R, X+N, N*WORD_SIZE);
2592 _mm_empty();
2593 #endif
2594 }
2595
2596 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
2597 // T[2*N] - temporary work space
2598 // X[2*N] - number to be reduced
2599 // M[N] --- modulus
2600 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
2601 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
2602
HalfMontgomeryReduce(word * R,word * T,const word * X,const word * M,const word * U,const word * V,size_t N)2603 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
2604 {
2605 CRYPTOPP_ASSERT(N%2==0 && N>=4);
2606
2607 #define M0 M
2608 #define M1 (M+N2)
2609 #define V0 V
2610 #define V1 (V+N2)
2611
2612 #define X0 X
2613 #define X1 (X+N2)
2614 #define X2 (X+N)
2615 #define X3 (X+N+N2)
2616
2617 const size_t N2 = N/2;
2618 Multiply(T0, T2, V0, X3, N2);
2619 int c2 = Add(T0, T0, X0, N);
2620 MultiplyBottom(T3, T2, T0, U, N2);
2621 MultiplyTop(T2, R, T0, T3, M0, N2);
2622 c2 -= Subtract(T2, T1, T2, N2);
2623 Multiply(T0, R, T3, M1, N2);
2624 c2 -= Subtract(T0, T2, T0, N2);
2625 int c3 = -(int)Subtract(T1, X2, T1, N2);
2626 Multiply(R0, T2, V1, X3, N2);
2627 c3 += Add(R, R, T, N);
2628
2629 if (c2>0)
2630 c3 += Increment(R1, N2);
2631 else if (c2<0)
2632 c3 -= Decrement(R1, N2, -c2);
2633
2634 CRYPTOPP_ASSERT(c3>=-1 && c3<=1);
2635 if (c3>0)
2636 Subtract(R, R, M, N);
2637 else if (c3<0)
2638 Add(R, R, M, N);
2639
2640 #undef M0
2641 #undef M1
2642 #undef V0
2643 #undef V1
2644
2645 #undef X0
2646 #undef X1
2647 #undef X2
2648 #undef X3
2649 }
2650
2651 #undef A0
2652 #undef A1
2653 #undef B0
2654 #undef B1
2655
2656 #undef T0
2657 #undef T1
2658 #undef T2
2659 #undef T3
2660
2661 #undef R0
2662 #undef R1
2663 #undef R2
2664 #undef R3
2665
2666 /*
2667 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2668 static word SubatomicDivide(word *A, word B0, word B1)
2669 {
2670 // Assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2671 CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
2672
2673 // estimate the quotient: do a 2 word by 1 word divide
2674 word Q;
2675 if (B1+1 == 0)
2676 Q = A[2];
2677 else
2678 Q = DWord(A[1], A[2]).DividedBy(B1+1);
2679
2680 // now subtract Q*B from A
2681 DWord p = DWord::Multiply(B0, Q);
2682 DWord u = (DWord) A[0] - p.GetLowHalf();
2683 A[0] = u.GetLowHalf();
2684 u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2685 A[1] = u.GetLowHalf();
2686 A[2] += u.GetHighHalf();
2687
2688 // Q <= actual quotient, so fix it
2689 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
2690 {
2691 u = (DWord) A[0] - B0;
2692 A[0] = u.GetLowHalf();
2693 u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2694 A[1] = u.GetLowHalf();
2695 A[2] += u.GetHighHalf();
2696 Q++;
2697 CRYPTOPP_ASSERT(Q); // shouldn't overflow
2698 }
2699
2700 return Q;
2701 }
2702
2703 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2704 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2705 {
2706 if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
2707 {
2708 Q[0] = A[2];
2709 Q[1] = A[3];
2710 }
2711 else
2712 {
2713 word T[4];
2714 T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2715 Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2716 Q[0] = SubatomicDivide(T, B[0], B[1]);
2717
2718 #if defined(CRYPTOPP_DEBUG)
2719 // multiply quotient and divisor and add remainder, make sure it equals dividend
2720 CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2721 word P[4];
2722 LowLevel::Multiply2(P, Q, B);
2723 Add(P, P, T, 4);
2724 CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2725 #endif
2726 }
2727 }
2728 */
2729
AtomicDivide(word * Q,const word * A,const word * B)2730 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2731 {
2732 word T[4];
2733 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2734 Q[0] = q.GetLowHalf();
2735 Q[1] = q.GetHighHalf();
2736
2737 #if defined(CRYPTOPP_DEBUG)
2738 if (B[0] || B[1])
2739 {
2740 // multiply quotient and divisor and add remainder, make sure it equals dividend
2741 CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2742 word P[4];
2743 s_pMul[0](P, Q, B);
2744 Add(P, P, T, 4);
2745 CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2746 }
2747 #endif
2748 }
2749
2750 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
CorrectQuotientEstimate(word * R,word * T,word * Q,const word * B,size_t N)2751 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
2752 {
2753 CRYPTOPP_ASSERT(N && N%2==0);
2754
2755 AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2756
2757 word borrow = Subtract(R, R, T, N+2);
2758 CRYPTOPP_ASSERT(!borrow && !R[N+1]);
2759 CRYPTOPP_UNUSED(borrow);
2760
2761 while (R[N] || Compare(R, B, N) >= 0)
2762 {
2763 R[N] -= Subtract(R, R, B, N);
2764 Q[1] += (++Q[0]==0);
2765 CRYPTOPP_ASSERT(Q[0] || Q[1]); // no overflow
2766 }
2767 }
2768
2769 // R[NB] -------- remainder = A%B
2770 // Q[NA-NB+2] --- quotient = A/B
2771 // T[NA+3*(NB+2)] - temp work space
2772 // A[NA] -------- dividend
2773 // B[NB] -------- divisor
2774
Divide(word * R,word * Q,word * T,const word * A,size_t NA,const word * B,size_t NB)2775 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
2776 {
2777 CRYPTOPP_ASSERT(NA && NB && NA%2==0 && NB%2==0);
2778 CRYPTOPP_ASSERT(B[NB-1] || B[NB-2]);
2779 CRYPTOPP_ASSERT(NB <= NA);
2780
2781 // set up temporary work space
2782 word *const TA=T;
2783 word *const TB=T+NA+2;
2784 word *const TP=T+NA+2+NB;
2785
2786 // copy B into TB and normalize it so that TB has highest bit set to 1
2787 unsigned shiftWords = (B[NB-1]==0);
2788 TB[0] = TB[NB-1] = 0;
2789 CopyWords(TB+shiftWords, B, NB-shiftWords);
2790 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2791 CRYPTOPP_ASSERT(shiftBits < WORD_BITS);
2792 ShiftWordsLeftByBits(TB, NB, shiftBits);
2793
2794 // copy A into TA and normalize it
2795 TA[0] = TA[NA] = TA[NA+1] = 0;
2796 CopyWords(TA+shiftWords, A, NA);
2797 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2798
2799 if (TA[NA+1]==0 && TA[NA] <= 1)
2800 {
2801 Q[NA-NB+1] = Q[NA-NB] = 0;
2802 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2803 {
2804 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2805 ++Q[NA-NB];
2806 }
2807 }
2808 else
2809 {
2810 NA+=2;
2811 CRYPTOPP_ASSERT(Compare(TA+NA-NB, TB, NB) < 0);
2812 }
2813
2814 word BT[2];
2815 BT[0] = TB[NB-2] + 1;
2816 BT[1] = TB[NB-1] + (BT[0]==0);
2817
2818 // start reducing TA mod TB, 2 words at a time
2819 for (size_t i=NA-2; i>=NB; i-=2)
2820 {
2821 AtomicDivide(Q+i-NB, TA+i-2, BT);
2822 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2823 }
2824
2825 // copy TA into R, and denormalize it
2826 CopyWords(R, TA+shiftWords, NB);
2827 ShiftWordsRightByBits(R, NB, shiftBits);
2828 }
2829
EvenWordCount(const word * X,size_t N)2830 static inline size_t EvenWordCount(const word *X, size_t N)
2831 {
2832 while (N && X[N-2]==0 && X[N-1]==0)
2833 N-=2;
2834 return N;
2835 }
2836
2837 // return k
2838 // R[N] --- result = A^(-1) * 2^k mod M
2839 // T[4*N] - temporary work space
2840 // A[NA] -- number to take inverse of
2841 // M[N] --- modulus
2842
AlmostInverse(word * R,word * T,const word * A,size_t NA,const word * M,size_t N)2843 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
2844 {
2845 CRYPTOPP_ASSERT(NA<=N && N && N%2==0);
2846
2847 word *b = T;
2848 word *c = T+N;
2849 word *f = T+2*N;
2850 word *g = T+3*N;
2851 size_t bcLen=2, fgLen=EvenWordCount(M, N);
2852 unsigned int k=0;
2853 bool s=false;
2854
2855 SetWords(T, 0, 3*N);
2856 b[0]=1;
2857 CopyWords(f, A, NA);
2858 CopyWords(g, M, N);
2859
2860 while (1)
2861 {
2862 word t=f[0];
2863 while (!t)
2864 {
2865 if (EvenWordCount(f, fgLen)==0)
2866 {
2867 SetWords(R, 0, N);
2868 return 0;
2869 }
2870
2871 ShiftWordsRightByWords(f, fgLen, 1);
2872 bcLen += 2 * (c[bcLen-1] != 0);
2873 CRYPTOPP_ASSERT(bcLen <= N);
2874 ShiftWordsLeftByWords(c, bcLen, 1);
2875 k+=WORD_BITS;
2876 t=f[0];
2877 }
2878
2879 // t must be non-0; otherwise undefined.
2880 unsigned int i = TrailingZeros(t);
2881 t >>= i;
2882 k += i;
2883
2884 if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2885 {
2886 if (s)
2887 Subtract(R, M, b, N);
2888 else
2889 CopyWords(R, b, N);
2890 return k;
2891 }
2892
2893 ShiftWordsRightByBits(f, fgLen, i);
2894 t = ShiftWordsLeftByBits(c, bcLen, i);
2895 c[bcLen] += t;
2896 bcLen += 2 * (t!=0);
2897 CRYPTOPP_ASSERT(bcLen <= N);
2898
2899 bool swap = Compare(f, g, fgLen)==-1;
2900 ConditionalSwapPointers(swap, f, g);
2901 ConditionalSwapPointers(swap, b, c);
2902 s ^= swap;
2903
2904 fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2905
2906 Subtract(f, f, g, fgLen);
2907 t = Add(b, b, c, bcLen);
2908 b[bcLen] += t;
2909 bcLen += 2*t;
2910 CRYPTOPP_ASSERT(bcLen <= N);
2911 }
2912 }
2913
2914 // R[N] - result = A/(2^k) mod M
2915 // A[N] - input
2916 // M[N] - modulus
2917
DivideByPower2Mod(word * R,const word * A,size_t k,const word * M,size_t N)2918 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2919 {
2920 CopyWords(R, A, N);
2921
2922 while (k--)
2923 {
2924 if (R[0]%2==0)
2925 ShiftWordsRightByBits(R, N, 1);
2926 else
2927 {
2928 word carry = Add(R, R, M, N);
2929 ShiftWordsRightByBits(R, N, 1);
2930 R[N-1] += carry<<(WORD_BITS-1);
2931 }
2932 }
2933 }
2934
2935 // R[N] - result = A*(2^k) mod M
2936 // A[N] - input
2937 // M[N] - modulus
2938
MultiplyByPower2Mod(word * R,const word * A,size_t k,const word * M,size_t N)2939 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2940 {
2941 CopyWords(R, A, N);
2942
2943 while (k--)
2944 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2945 Subtract(R, R, M, N);
2946 }
2947
2948 // ******************************************************************
2949
2950 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2951
RoundupSize(size_t n)2952 static inline size_t RoundupSize(size_t n)
2953 {
2954 if (n<=8)
2955 return RoundupSizeTable[n];
2956 else if (n<=16)
2957 return 16;
2958 else if (n<=32)
2959 return 32;
2960 else if (n<=64)
2961 return 64;
2962 else
2963 return size_t(1) << BitPrecision(n-1);
2964 }
2965
Integer()2966 Integer::Integer()
2967 : reg(2), sign(POSITIVE)
2968 {
2969 reg[0] = reg[1] = 0;
2970 }
2971
Integer(const Integer & t)2972 Integer::Integer(const Integer& t)
2973 : reg(RoundupSize(t.WordCount())), sign(t.sign)
2974 {
2975 CopyWords(reg, t.reg, reg.size());
2976 }
2977
Integer(Sign s,lword value)2978 Integer::Integer(Sign s, lword value)
2979 : reg(2), sign(s)
2980 {
2981 reg[0] = word(value);
2982 reg[1] = word(SafeRightShift<WORD_BITS>(value));
2983 }
2984
Integer(signed long value)2985 Integer::Integer(signed long value)
2986 : reg(2)
2987 {
2988 if (value >= 0)
2989 sign = POSITIVE;
2990 else
2991 {
2992 sign = NEGATIVE;
2993 value = -value;
2994 }
2995 reg[0] = word(value);
2996 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2997 }
2998
Integer(Sign s,word high,word low)2999 Integer::Integer(Sign s, word high, word low)
3000 : reg(2), sign(s)
3001 {
3002 reg[0] = low;
3003 reg[1] = high;
3004 }
3005
IsConvertableToLong() const3006 bool Integer::IsConvertableToLong() const
3007 {
3008 if (ByteCount() > sizeof(long))
3009 return false;
3010
3011 unsigned long value = (unsigned long)reg[0];
3012 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
3013
3014 if (sign==POSITIVE)
3015 return (signed long)value >= 0;
3016 else
3017 return -(signed long)value < 0;
3018 }
3019
ConvertToLong() const3020 signed long Integer::ConvertToLong() const
3021 {
3022 CRYPTOPP_ASSERT(IsConvertableToLong());
3023
3024 unsigned long value = (unsigned long)reg[0];
3025 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
3026 return sign==POSITIVE ? value : -(signed long)value;
3027 }
3028
Integer(BufferedTransformation & encodedInteger,size_t byteCount,Signedness s,ByteOrder o)3029 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
3030 {
3031 CRYPTOPP_ASSERT(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
3032
3033 if (o != LITTLE_ENDIAN_ORDER)
3034 {
3035 Decode(encodedInteger, byteCount, s);
3036 }
3037 else
3038 {
3039 SecByteBlock block(byteCount);
3040 encodedInteger.Get(block, block.size());
3041 std::reverse(block.begin(), block.begin()+block.size());
3042
3043 Decode(block.begin(), block.size(), s);
3044 }
3045 }
3046
Integer(const byte * encodedInteger,size_t byteCount,Signedness s,ByteOrder o)3047 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
3048 {
3049 CRYPTOPP_ASSERT(encodedInteger && byteCount); // NULL buffer
3050 CRYPTOPP_ASSERT(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
3051
3052 if (o != LITTLE_ENDIAN_ORDER)
3053 {
3054 Decode(encodedInteger, byteCount, s);
3055 }
3056 else
3057 {
3058 SecByteBlock block(byteCount);
3059 #if (_MSC_VER >= 1500)
3060 std::reverse_copy(encodedInteger, encodedInteger+byteCount,
3061 stdext::make_checked_array_iterator(block.begin(), block.size()));
3062 #else
3063 std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
3064 #endif
3065 Decode(block.begin(), block.size(), s);
3066 return;
3067 }
3068 }
3069
Integer(BufferedTransformation & bt)3070 Integer::Integer(BufferedTransformation &bt)
3071 {
3072 // Make explicit call to avoid virtual-dispatch findings in ctor
3073 Integer::BERDecode(bt);
3074 }
3075
Integer(RandomNumberGenerator & rng,size_t bitcount)3076 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
3077 {
3078 Randomize(rng, bitcount);
3079 }
3080
Integer(RandomNumberGenerator & rng,const Integer & min,const Integer & max,RandomNumberType rnType,const Integer & equiv,const Integer & mod)3081 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3082 {
3083 if (!Randomize(rng, min, max, rnType, equiv, mod))
3084 throw Integer::RandomNumberNotFound();
3085 }
3086
Power2(size_t e)3087 Integer Integer::Power2(size_t e)
3088 {
3089 Integer r((word)0, BitsToWords(e+1));
3090 r.SetBit(e);
3091 return r;
3092 }
3093
operator !() const3094 bool Integer::operator!() const
3095 {
3096 return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
3097 }
3098
operator =(const Integer & t)3099 Integer& Integer::operator=(const Integer& t)
3100 {
3101 if (this != &t)
3102 {
3103 if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
3104 reg.New(RoundupSize(t.WordCount()));
3105 CopyWords(reg, t.reg, reg.size());
3106 sign = t.sign;
3107 }
3108 return *this;
3109 }
3110
GetBit(size_t n) const3111 bool Integer::GetBit(size_t n) const
3112 {
3113 // Profiling guided the flow below.
3114 if (n/WORD_BITS < reg.size())
3115 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3116 else
3117 return 0;
3118 }
3119
SetBit(size_t n,bool value)3120 void Integer::SetBit(size_t n, bool value)
3121 {
3122 if (value)
3123 {
3124 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
3125 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
3126 }
3127 else
3128 {
3129 if (n/WORD_BITS < reg.size())
3130 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3131 }
3132 }
3133
GetByte(size_t n) const3134 byte Integer::GetByte(size_t n) const
3135 {
3136 // Profiling guided the flow below.
3137 if (n/WORD_SIZE < reg.size())
3138 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3139 else
3140 return 0;
3141 }
3142
SetByte(size_t n,byte value)3143 void Integer::SetByte(size_t n, byte value)
3144 {
3145 reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
3146 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3147 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3148 }
3149
GetBits(size_t i,size_t n) const3150 lword Integer::GetBits(size_t i, size_t n) const
3151 {
3152 lword v = 0;
3153 CRYPTOPP_ASSERT(n <= sizeof(v)*8);
3154 for (unsigned int j=0; j<n; j++)
3155 v |= lword(GetBit(i+j)) << j;
3156 return v;
3157 }
3158
operator -() const3159 Integer Integer::operator-() const
3160 {
3161 Integer result(*this);
3162 result.Negate();
3163 return result;
3164 }
3165
AbsoluteValue() const3166 Integer Integer::AbsoluteValue() const
3167 {
3168 Integer result(*this);
3169 result.sign = POSITIVE;
3170 return result;
3171 }
3172
swap(Integer & a)3173 void Integer::swap(Integer &a)
3174 {
3175 reg.swap(a.reg);
3176 std::swap(sign, a.sign);
3177 }
3178
Integer(word value,size_t length)3179 Integer::Integer(word value, size_t length)
3180 : reg(RoundupSize(length)), sign(POSITIVE)
3181 {
3182 reg[0] = value;
3183 SetWords(reg+1, 0, reg.size()-1);
3184 }
3185
3186 template <class T>
StringToInteger(const T * str,ByteOrder order)3187 static Integer StringToInteger(const T *str, ByteOrder order)
3188 {
3189 CRYPTOPP_ASSERT( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER );
3190
3191 int radix, sign = 1;
3192 // GCC workaround
3193 // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3194 unsigned int length;
3195 for (length = 0; str[length] != 0; length++) {}
3196
3197 Integer v;
3198
3199 if (length == 0)
3200 return Integer::Zero();
3201
3202 switch (str[length-1])
3203 {
3204 case 'h':
3205 case 'H':
3206 radix=16;
3207 break;
3208 case 'o':
3209 case 'O':
3210 radix=8;
3211 break;
3212 case 'b':
3213 case 'B':
3214 radix=2;
3215 break;
3216 default:
3217 radix=10;
3218 }
3219
3220 // 'str' is of length 1 or more
3221 if (str[0] == '-')
3222 {
3223 sign = -1;
3224 str += 1, length -= 1;
3225 }
3226
3227 if (length > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
3228 {
3229 radix = 16;
3230 str += 2, length -= 2;
3231 }
3232
3233 if (order == BIG_ENDIAN_ORDER)
3234 {
3235 for (unsigned int i=0; i<length; i++)
3236 {
3237 int digit, ch = static_cast<int>(str[i]);
3238
3239 // Profiling guided the flow below.
3240 if (ch >= '0' && ch <= '9')
3241 digit = ch - '0';
3242 else if (ch >= 'a' && ch <= 'f')
3243 digit = ch - 'a' + 10;
3244 else if (ch >= 'A' && ch <= 'F')
3245 digit = ch - 'A' + 10;
3246 else
3247 digit = radix;
3248
3249 if (digit < radix)
3250 {
3251 v *= radix;
3252 v += digit;
3253 }
3254 }
3255 }
3256 else if (radix == 16 && order == LITTLE_ENDIAN_ORDER)
3257 {
3258 // Nibble high, low and count
3259 unsigned int nh = 0, nl = 0, nc = 0;
3260 Integer position(Integer::One());
3261
3262 for (unsigned int i=0; i<length; i++)
3263 {
3264 int digit, ch = static_cast<int>(str[i]);
3265
3266 if (ch >= '0' && ch <= '9')
3267 digit = ch - '0';
3268 else if (ch >= 'a' && ch <= 'f')
3269 digit = ch - 'a' + 10;
3270 else if (ch >= 'A' && ch <= 'F')
3271 digit = ch - 'A' + 10;
3272 else
3273 digit = radix;
3274
3275 if (digit < radix)
3276 {
3277 if (nc++ == 0)
3278 nh = digit;
3279 else
3280 nl = digit;
3281
3282 if (nc == 2)
3283 {
3284 v += position * (nh << 4 | nl);
3285 nc = 0, position <<= 8;
3286 }
3287 }
3288 }
3289
3290 if (nc == 1)
3291 v += nh * position;
3292 }
3293 else // LITTLE_ENDIAN_ORDER && radix != 16
3294 {
3295 for (int i=static_cast<int>(length)-1; i>=0; i--)
3296 {
3297 int digit, ch = static_cast<int>(str[i]);
3298
3299 if (ch >= '0' && ch <= '9')
3300 digit = ch - '0';
3301 else if (ch >= 'a' && ch <= 'f')
3302 digit = ch - 'a' + 10;
3303 else if (ch >= 'A' && ch <= 'F')
3304 digit = ch - 'A' + 10;
3305 else
3306 digit = radix;
3307
3308 if (digit < radix)
3309 {
3310 v *= radix;
3311 v += digit;
3312 }
3313 }
3314 }
3315
3316 if (sign == -1)
3317 v.Negate();
3318
3319 return v;
3320 }
3321
Integer(const char * str,ByteOrder order)3322 Integer::Integer(const char *str, ByteOrder order)
3323 : reg(2), sign(POSITIVE)
3324 {
3325 *this = StringToInteger(str,order);
3326 }
3327
Integer(const wchar_t * str,ByteOrder order)3328 Integer::Integer(const wchar_t *str, ByteOrder order)
3329 : reg(2), sign(POSITIVE)
3330 {
3331 *this = StringToInteger(str,order);
3332 }
3333
WordCount() const3334 unsigned int Integer::WordCount() const
3335 {
3336 return (unsigned int)CountWords(reg, reg.size());
3337 }
3338
ByteCount() const3339 unsigned int Integer::ByteCount() const
3340 {
3341 unsigned wordCount = WordCount();
3342 if (wordCount)
3343 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3344 else
3345 return 0;
3346 }
3347
BitCount() const3348 unsigned int Integer::BitCount() const
3349 {
3350 unsigned wordCount = WordCount();
3351 if (wordCount)
3352 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3353 else
3354 return 0;
3355 }
3356
Decode(const byte * input,size_t inputLen,Signedness s)3357 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3358 {
3359 CRYPTOPP_ASSERT(input && inputLen); // NULL buffer
3360 StringStore store(input, inputLen);
3361 Decode(store, inputLen, s);
3362 }
3363
Decode(BufferedTransformation & bt,size_t inputLen,Signedness s)3364 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
3365 {
3366 CRYPTOPP_ASSERT(bt.MaxRetrievable() >= inputLen);
3367 if (bt.MaxRetrievable() < inputLen)
3368 throw InvalidArgument("Integer: input length is too small");
3369
3370 byte b;
3371 bt.Peek(b);
3372 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3373
3374 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3375 {
3376 bt.Skip(1);
3377 inputLen--;
3378 bt.Peek(b);
3379 }
3380
3381 reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
3382 for (size_t i=inputLen; i > 0; i--)
3383 {
3384 (void)bt.Get(b);
3385 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3386 }
3387
3388 if (sign == NEGATIVE)
3389 {
3390 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3391 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3392 TwosComplement(reg, reg.size());
3393 }
3394 }
3395
MinEncodedSize(Signedness signedness) const3396 size_t Integer::MinEncodedSize(Signedness signedness) const
3397 {
3398 // Profiling guided the flow below.
3399 unsigned int outputLen = STDMAX(1U, ByteCount());
3400 const bool pre = (signedness == UNSIGNED);
3401 if (!pre && NotNegative() && (GetByte(outputLen-1) & 0x80))
3402 outputLen++;
3403 if (pre)
3404 return outputLen;
3405 if (IsNegative() && *this < -Power2(outputLen*8-1))
3406 outputLen++;
3407 return outputLen;
3408 }
3409
3410 // PKCS12_PBKDF and other classes use undersized buffers
Encode(byte * output,size_t outputLen,Signedness signedness) const3411 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3412 {
3413 CRYPTOPP_ASSERT(output && outputLen); // NULL buffer
3414 ArraySink sink(output, outputLen);
3415 Encode(sink, outputLen, signedness);
3416 }
3417
Encode(BufferedTransformation & bt,size_t outputLen,Signedness signedness) const3418 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3419 {
3420 if (signedness == UNSIGNED || NotNegative())
3421 {
3422 for (size_t i=outputLen; i > 0; i--)
3423 bt.Put(GetByte(i-1));
3424 }
3425 else
3426 {
3427 // take two's complement of *this
3428 Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
3429 temp.Encode(bt, outputLen, UNSIGNED);
3430 }
3431 }
3432
DEREncode(BufferedTransformation & bt) const3433 void Integer::DEREncode(BufferedTransformation &bt) const
3434 {
3435 DERGeneralEncoder enc(bt, INTEGER);
3436 Encode(enc, MinEncodedSize(SIGNED), SIGNED);
3437 enc.MessageEnd();
3438 }
3439
BERDecode(const byte * input,size_t len)3440 void Integer::BERDecode(const byte *input, size_t len)
3441 {
3442 CRYPTOPP_ASSERT(input && len); // NULL buffer
3443 StringStore store(input, len);
3444 BERDecode(store);
3445 }
3446
BERDecode(BufferedTransformation & bt)3447 void Integer::BERDecode(BufferedTransformation &bt)
3448 {
3449 BERGeneralDecoder dec(bt, INTEGER);
3450 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
3451 BERDecodeError();
3452 Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3453 dec.MessageEnd();
3454 }
3455
DEREncodeAsOctetString(BufferedTransformation & bt,size_t length) const3456 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
3457 {
3458 DERGeneralEncoder enc(bt, OCTET_STRING);
3459 Encode(enc, length);
3460 enc.MessageEnd();
3461 }
3462
BERDecodeAsOctetString(BufferedTransformation & bt,size_t length)3463 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
3464 {
3465 BERGeneralDecoder dec(bt, OCTET_STRING);
3466 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3467 BERDecodeError();
3468 Decode(dec, length);
3469 dec.MessageEnd();
3470 }
3471
OpenPGPEncode(byte * output,size_t bufferSize) const3472 size_t Integer::OpenPGPEncode(byte *output, size_t bufferSize) const
3473 {
3474 CRYPTOPP_ASSERT(output && bufferSize); // NULL buffer
3475 CRYPTOPP_ASSERT(bufferSize >= 2+ByteCount()); // Undersized buffer
3476 ArraySink sink(output, bufferSize);
3477 return OpenPGPEncode(sink);
3478 }
3479
OpenPGPEncode(BufferedTransformation & bt) const3480 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
3481 {
3482 word16 bitCount = word16(BitCount());
3483 bt.PutWord16(bitCount);
3484 size_t byteCount = BitsToBytes(bitCount);
3485 Encode(bt, byteCount);
3486 return 2 + byteCount;
3487 }
3488
OpenPGPDecode(const byte * input,size_t len)3489 void Integer::OpenPGPDecode(const byte *input, size_t len)
3490 {
3491 CRYPTOPP_ASSERT(input && len); // NULL buffer
3492 StringStore store(input, len);
3493 OpenPGPDecode(store);
3494 }
3495
OpenPGPDecode(BufferedTransformation & bt)3496 void Integer::OpenPGPDecode(BufferedTransformation &bt)
3497 {
3498 word16 bitCount;
3499 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
3500 throw OpenPGPDecodeErr();
3501 Decode(bt, BitsToBytes(bitCount));
3502 }
3503
Randomize(RandomNumberGenerator & rng,size_t nbits)3504 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
3505 {
3506 const size_t nbytes = nbits/8 + 1;
3507 SecByteBlock buf(nbytes);
3508 rng.GenerateBlock(buf, nbytes);
3509 if (nbytes)
3510 buf[0] = (byte)Crop(buf[0], nbits % 8);
3511 Decode(buf, nbytes, UNSIGNED);
3512 }
3513
Randomize(RandomNumberGenerator & rng,const Integer & min,const Integer & max)3514 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3515 {
3516 if (min > max)
3517 throw InvalidArgument("Integer: Min must be no greater than Max");
3518
3519 Integer range = max - min;
3520 const unsigned int nbits = range.BitCount();
3521
3522 do
3523 {
3524 Randomize(rng, nbits);
3525 }
3526 while (*this > range);
3527
3528 *this += min;
3529 }
3530
Randomize(RandomNumberGenerator & rng,const Integer & min,const Integer & max,RandomNumberType rnType,const Integer & equiv,const Integer & mod)3531 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3532 {
3533 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)
3534 ("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3535 }
3536
3537 class KDF2_RNG : public RandomNumberGenerator
3538 {
3539 public:
KDF2_RNG(const byte * seed,size_t seedSize)3540 KDF2_RNG(const byte *seed, size_t seedSize)
3541 : m_counter(0), m_counterAndSeed(ClampSize(seedSize) + 4)
3542 {
3543 memcpy(m_counterAndSeed + 4, seed, ClampSize(seedSize));
3544 }
3545
GenerateBlock(byte * output,size_t size)3546 void GenerateBlock(byte *output, size_t size)
3547 {
3548 CRYPTOPP_ASSERT(output && size); // NULL buffer
3549 PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3550 ++m_counter;
3551 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULLPTR, 0);
3552 }
3553
3554 // UBsan finding, -Wstringop-overflow
ClampSize(size_t req) const3555 inline size_t ClampSize(size_t req) const
3556 {
3557 // Clamp at 16 MB
3558 if (req > 16U*1024*1024)
3559 return 16U*1024*1024;
3560 return req;
3561 }
3562
3563 private:
3564 word32 m_counter;
3565 SecByteBlock m_counterAndSeed;
3566 };
3567
GenerateRandomNoThrow(RandomNumberGenerator & i_rng,const NameValuePairs & params)3568 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms)
3569 {
3570 Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3571 Integer max;
3572 if (!params.GetValue("Max", max))
3573 {
3574 int bitLength;
3575 if (params.GetIntValue("BitLength", bitLength))
3576 max = Integer::Power2(bitLength);
3577 else
3578 throw InvalidArgument("Integer: missing Max argument");
3579 }
3580 if (min > max)
3581 throw InvalidArgument("Integer: Min must be no greater than Max");
3582
3583 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3584 Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3585
3586 if (equiv.IsNegative() || equiv >= mod)
3587 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3588
3589 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3590
3591 member_ptr<KDF2_RNG> kdf2Rng;
3592 ConstByteArrayParameter seed;
3593 if (params.GetValue(Name::Seed(), seed))
3594 {
3595 ByteQueue bq;
3596 DERSequenceEncoder seq(bq);
3597 min.DEREncode(seq);
3598 max.DEREncode(seq);
3599 equiv.DEREncode(seq);
3600 mod.DEREncode(seq);
3601 DEREncodeUnsigned(seq, rnType);
3602 DEREncodeOctetString(seq, seed.begin(), seed.size());
3603 seq.MessageEnd();
3604
3605 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3606 bq.Get(finalSeed, finalSeed.size());
3607 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3608 }
3609 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3610
3611 switch (rnType)
3612 {
3613 case ANY:
3614 if (mod == One())
3615 Randomize(rng, min, max);
3616 else
3617 {
3618 Integer min1 = min + (equiv-min)%mod;
3619 if (max < min1)
3620 return false;
3621 Randomize(rng, Zero(), (max - min1) / mod);
3622 *this *= mod;
3623 *this += min1;
3624 }
3625 return true;
3626
3627 case PRIME:
3628 {
3629 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULLPTR);
3630
3631 int i;
3632 i = 0;
3633 while (1)
3634 {
3635 if (++i==16)
3636 {
3637 // check if there are any suitable primes in [min, max]
3638 Integer first = min;
3639 if (FirstPrime(first, max, equiv, mod, pSelector))
3640 {
3641 // if there is only one suitable prime, we're done
3642 *this = first;
3643 if (!FirstPrime(first, max, equiv, mod, pSelector))
3644 return true;
3645 }
3646 else
3647 return false;
3648 }
3649
3650 Randomize(rng, min, max);
3651 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3652 return true;
3653 }
3654 }
3655
3656 default:
3657 throw InvalidArgument("Integer: invalid RandomNumberType argument");
3658 }
3659 }
3660
operator >>(std::istream & in,Integer & a)3661 std::istream& operator>>(std::istream& in, Integer &a)
3662 {
3663 char c;
3664 unsigned int length = 0;
3665 SecBlock<char> str(length + 16);
3666
3667 std::ws(in);
3668
3669 do
3670 {
3671 in.read(&c, 1);
3672 str[length++] = c;
3673 if (length >= str.size())
3674 str.Grow(length + 16);
3675 }
3676 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
3677
3678 if (in.gcount())
3679 in.putback(c);
3680 str[length-1] = '\0';
3681 a = Integer(str);
3682
3683 return in;
3684 }
3685
3686 // Ensure base 10 is default
FlagToBase(long f)3687 inline int FlagToBase(long f) {
3688 return f == std::ios::hex ? 16 : (f == std::ios::oct ? 8 : 10);
3689 }
3690
FlagToSuffix(long f)3691 inline char FlagToSuffix(long f) {
3692 return f == std::ios::hex ? 'h' : (f == std::ios::oct ? 'o' : '.');
3693 }
3694
3695 // Ensure base 10 is default
operator <<(std::ostream & out,const Integer & a)3696 std::ostream& operator<<(std::ostream& out, const Integer &a)
3697 {
3698 // Get relevant conversion specifications from ostream.
3699 const long f = out.flags() & std::ios::basefield;
3700 const int base = FlagToBase(f);
3701 const char suffix = FlagToSuffix(f);
3702
3703 Integer temp1=a, temp2;
3704 if (a.IsNegative())
3705 {
3706 out << '-';
3707 temp1.Negate();
3708 }
3709
3710 if (!a)
3711 out << '0';
3712
3713 static const char upper[]="0123456789ABCDEF";
3714 static const char lower[]="0123456789abcdef";
3715
3716 const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3717 unsigned int i=0;
3718 SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3719
3720 while (!!temp1)
3721 {
3722 word digit;
3723 Integer::Divide(digit, temp2, temp1, base);
3724 s[i++]=vec[digit];
3725 temp1.swap(temp2);
3726 }
3727
3728 while (i--)
3729 {
3730 out << s[i];
3731 }
3732
3733 #ifdef CRYPTOPP_USE_STD_SHOWBASE
3734 if (out.flags() & std::ios_base::showbase)
3735 out << suffix;
3736
3737 return out;
3738 #else
3739 return out << suffix;
3740 #endif
3741 }
3742
operator ++()3743 Integer& Integer::operator++()
3744 {
3745 if (NotNegative())
3746 {
3747 if (Increment(reg, reg.size()))
3748 {
3749 reg.CleanGrow(2*reg.size());
3750 reg[reg.size()/2]=1;
3751 }
3752 }
3753 else
3754 {
3755 word borrow = Decrement(reg, reg.size());
3756 CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow);
3757
3758 if (WordCount()==0)
3759 *this = Zero();
3760 }
3761 return *this;
3762 }
3763
operator --()3764 Integer& Integer::operator--()
3765 {
3766 if (IsNegative())
3767 {
3768 if (Increment(reg, reg.size()))
3769 {
3770 reg.CleanGrow(2*reg.size());
3771 reg[reg.size()/2]=1;
3772 }
3773 }
3774 else
3775 {
3776 if (Decrement(reg, reg.size()))
3777 *this = -One();
3778 }
3779 return *this;
3780 }
3781
3782 // This is a bit operation. We set sign to POSITIVE, so there's no need to
3783 // worry about negative zero. Also see http://stackoverflow.com/q/11644362.
And(const Integer & t) const3784 Integer Integer::And(const Integer& t) const
3785 {
3786 if (this == &t)
3787 {
3788 return AbsoluteValue();
3789 }
3790 else if (reg.size() >= t.reg.size())
3791 {
3792 Integer result(t);
3793 AndWords(result.reg, reg, t.reg.size());
3794
3795 result.sign = POSITIVE;
3796 return result;
3797 }
3798 else // reg.size() < t.reg.size()
3799 {
3800 Integer result(*this);
3801 AndWords(result.reg, t.reg, reg.size());
3802
3803 result.sign = POSITIVE;
3804 return result;
3805 }
3806 }
3807
3808 // This is a bit operation. We set sign to POSITIVE, so there's no need to
3809 // worry about negative zero. Also see http://stackoverflow.com/q/11644362.
Or(const Integer & t) const3810 Integer Integer::Or(const Integer& t) const
3811 {
3812 if (this == &t)
3813 {
3814 return AbsoluteValue();
3815 }
3816 else if (reg.size() >= t.reg.size())
3817 {
3818 Integer result(*this);
3819 OrWords(result.reg, t.reg, t.reg.size());
3820
3821 result.sign = POSITIVE;
3822 return result;
3823 }
3824 else // reg.size() < t.reg.size()
3825 {
3826 Integer result(t);
3827 OrWords(result.reg, reg, reg.size());
3828
3829 result.sign = POSITIVE;
3830 return result;
3831 }
3832 }
3833
3834 // This is a bit operation. We set sign to POSITIVE, so there's no need to
3835 // worry about negative zero. Also see http://stackoverflow.com/q/11644362.
Xor(const Integer & t) const3836 Integer Integer::Xor(const Integer& t) const
3837 {
3838 if (this == &t)
3839 {
3840 return Integer::Zero();
3841 }
3842 else if (reg.size() >= t.reg.size())
3843 {
3844 Integer result(*this);
3845 XorWords(result.reg, t.reg, t.reg.size());
3846
3847 result.sign = POSITIVE;
3848 return result;
3849 }
3850 else // reg.size() < t.reg.size()
3851 {
3852 Integer result(t);
3853 XorWords(result.reg, reg, reg.size());
3854
3855 result.sign = POSITIVE;
3856 return result;
3857 }
3858 }
3859
PositiveAdd(Integer & sum,const Integer & a,const Integer & b)3860 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3861 {
3862 // Profiling guided the flow below.
3863 int carry; const bool pre = (a.reg.size() == b.reg.size());
3864 if (!pre && a.reg.size() > b.reg.size())
3865 {
3866 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3867 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3868 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3869 }
3870 else if (pre)
3871 {
3872 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3873 }
3874 else
3875 {
3876 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3877 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3878 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3879 }
3880
3881 if (carry)
3882 {
3883 sum.reg.CleanGrow(2*sum.reg.size());
3884 sum.reg[sum.reg.size()/2] = 1;
3885 }
3886 sum.sign = Integer::POSITIVE;
3887 }
3888
PositiveSubtract(Integer & diff,const Integer & a,const Integer & b)3889 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3890 {
3891 unsigned aSize = a.WordCount();
3892 aSize += aSize%2;
3893 unsigned bSize = b.WordCount();
3894 bSize += bSize%2;
3895
3896 // Profiling guided the flow below.
3897 if (aSize > bSize)
3898 {
3899 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3900 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3901 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3902 CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow);
3903 diff.sign = Integer::POSITIVE;
3904 }
3905 else if (aSize == bSize)
3906 {
3907 if (Compare(a.reg, b.reg, aSize) >= 0)
3908 {
3909 Subtract(diff.reg, a.reg, b.reg, aSize);
3910 diff.sign = Integer::POSITIVE;
3911 }
3912 else
3913 {
3914 Subtract(diff.reg, b.reg, a.reg, aSize);
3915 diff.sign = Integer::NEGATIVE;
3916 }
3917 }
3918 else
3919 {
3920 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3921 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3922 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3923 CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow);
3924 diff.sign = Integer::NEGATIVE;
3925 }
3926 }
3927
3928 // MSVC .NET 2003 workaround
STDMAX2(const T & a,const T & b)3929 template <class T> inline const T& STDMAX2(const T& a, const T& b)
3930 {
3931 return a < b ? b : a;
3932 }
3933
Plus(const Integer & b) const3934 Integer Integer::Plus(const Integer& b) const
3935 {
3936 Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3937 if (NotNegative())
3938 {
3939 if (b.NotNegative())
3940 PositiveAdd(sum, *this, b);
3941 else
3942 PositiveSubtract(sum, *this, b);
3943 }
3944 else
3945 {
3946 if (b.NotNegative())
3947 PositiveSubtract(sum, b, *this);
3948 else
3949 {
3950 PositiveAdd(sum, *this, b);
3951 sum.sign = Integer::NEGATIVE;
3952 }
3953 }
3954 return sum;
3955 }
3956
operator +=(const Integer & t)3957 Integer& Integer::operator+=(const Integer& t)
3958 {
3959 reg.CleanGrow(t.reg.size());
3960 if (NotNegative())
3961 {
3962 if (t.NotNegative())
3963 PositiveAdd(*this, *this, t);
3964 else
3965 PositiveSubtract(*this, *this, t);
3966 }
3967 else
3968 {
3969 if (t.NotNegative())
3970 PositiveSubtract(*this, t, *this);
3971 else
3972 {
3973 PositiveAdd(*this, *this, t);
3974 sign = Integer::NEGATIVE;
3975 }
3976 }
3977 return *this;
3978 }
3979
Minus(const Integer & b) const3980 Integer Integer::Minus(const Integer& b) const
3981 {
3982 Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
3983 if (NotNegative())
3984 {
3985 if (b.NotNegative())
3986 PositiveSubtract(diff, *this, b);
3987 else
3988 PositiveAdd(diff, *this, b);
3989 }
3990 else
3991 {
3992 if (b.NotNegative())
3993 {
3994 PositiveAdd(diff, *this, b);
3995 diff.sign = Integer::NEGATIVE;
3996 }
3997 else
3998 PositiveSubtract(diff, b, *this);
3999 }
4000 return diff;
4001 }
4002
operator -=(const Integer & t)4003 Integer& Integer::operator-=(const Integer& t)
4004 {
4005 reg.CleanGrow(t.reg.size());
4006 if (NotNegative())
4007 {
4008 if (t.NotNegative())
4009 PositiveSubtract(*this, *this, t);
4010 else
4011 PositiveAdd(*this, *this, t);
4012 }
4013 else
4014 {
4015 if (t.NotNegative())
4016 {
4017 PositiveAdd(*this, *this, t);
4018 sign = Integer::NEGATIVE;
4019 }
4020 else
4021 PositiveSubtract(*this, t, *this);
4022 }
4023 return *this;
4024 }
4025
operator <<=(size_t n)4026 Integer& Integer::operator<<=(size_t n)
4027 {
4028 const size_t wordCount = WordCount();
4029 const size_t shiftWords = n / WORD_BITS;
4030 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
4031
4032 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
4033 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
4034 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
4035 return *this;
4036 }
4037
operator >>=(size_t n)4038 Integer& Integer::operator>>=(size_t n)
4039 {
4040 const size_t wordCount = WordCount();
4041 const size_t shiftWords = n / WORD_BITS;
4042 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
4043
4044 ShiftWordsRightByWords(reg, wordCount, shiftWords);
4045 if (wordCount > shiftWords)
4046 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
4047 if (IsNegative() && WordCount()==0) // avoid -0
4048 *this = Zero();
4049 return *this;
4050 }
4051
operator &=(const Integer & t)4052 Integer& Integer::operator&=(const Integer& t)
4053 {
4054 if (this != &t)
4055 {
4056 const size_t size = STDMIN(reg.size(), t.reg.size());
4057 reg.resize(size);
4058 AndWords(reg, t.reg, size);
4059 }
4060 sign = POSITIVE;
4061 return *this;
4062 }
4063
operator |=(const Integer & t)4064 Integer& Integer::operator|=(const Integer& t)
4065 {
4066 if (this != &t)
4067 {
4068 if (reg.size() >= t.reg.size())
4069 {
4070 OrWords(reg, t.reg, t.reg.size());
4071 }
4072 else // reg.size() < t.reg.size()
4073 {
4074 const size_t head = reg.size();
4075 const size_t tail = t.reg.size() - reg.size();
4076 reg.resize(head+tail);
4077 OrWords(reg, t.reg, head);
4078 CopyWords(reg+head,t.reg+head,tail);
4079 }
4080 }
4081 sign = POSITIVE;
4082 return *this;
4083 }
4084
operator ^=(const Integer & t)4085 Integer& Integer::operator^=(const Integer& t)
4086 {
4087 if (this == &t)
4088 {
4089 *this = Zero();
4090 }
4091 else
4092 {
4093 if (reg.size() >= t.reg.size())
4094 {
4095 XorWords(reg, t.reg, t.reg.size());
4096 }
4097 else // reg.size() < t.reg.size()
4098 {
4099 const size_t head = reg.size();
4100 const size_t tail = t.reg.size() - reg.size();
4101 reg.resize(head+tail);
4102 XorWords(reg, t.reg, head);
4103 CopyWords(reg+head,t.reg+head,tail);
4104 }
4105 }
4106 sign = POSITIVE;
4107 return *this;
4108 }
4109
PositiveMultiply(Integer & product,const Integer & a,const Integer & b)4110 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
4111 {
4112 size_t aSize = RoundupSize(a.WordCount());
4113 size_t bSize = RoundupSize(b.WordCount());
4114
4115 product.reg.CleanNew(RoundupSize(aSize+bSize));
4116 product.sign = Integer::POSITIVE;
4117
4118 IntegerSecBlock workspace(aSize + bSize);
4119 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
4120 }
4121
Multiply(Integer & product,const Integer & a,const Integer & b)4122 void Multiply(Integer &product, const Integer &a, const Integer &b)
4123 {
4124 PositiveMultiply(product, a, b);
4125
4126 if (a.NotNegative() != b.NotNegative())
4127 product.Negate();
4128 }
4129
Times(const Integer & b) const4130 Integer Integer::Times(const Integer &b) const
4131 {
4132 Integer product;
4133 Multiply(product, *this, b);
4134 return product;
4135 }
4136
4137 /*
4138 void PositiveDivide(Integer &remainder, Integer "ient,
4139 const Integer ÷nd, const Integer &divisor)
4140 {
4141 remainder.reg.CleanNew(divisor.reg.size());
4142 remainder.sign = Integer::POSITIVE;
4143 quotient.reg.New(0);
4144 quotient.sign = Integer::POSITIVE;
4145 unsigned i=dividend.BitCount();
4146 while (i--)
4147 {
4148 word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
4149 remainder.reg[0] |= dividend[i];
4150 if (overflow || remainder >= divisor)
4151 {
4152 Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
4153 quotient.SetBit(i);
4154 }
4155 }
4156 }
4157 */
4158
PositiveDivide(Integer & remainder,Integer & quotient,const Integer & a,const Integer & b)4159 void PositiveDivide(Integer &remainder, Integer "ient,
4160 const Integer &a, const Integer &b)
4161 {
4162 unsigned aSize = a.WordCount();
4163 unsigned bSize = b.WordCount();
4164
4165 if (!bSize)
4166 throw Integer::DivideByZero();
4167
4168 if (aSize < bSize)
4169 {
4170 remainder = a;
4171 remainder.sign = Integer::POSITIVE;
4172 quotient = Integer::Zero();
4173 return;
4174 }
4175
4176 aSize += aSize%2; // round up to next even number
4177 bSize += bSize%2;
4178
4179 remainder.reg.CleanNew(RoundupSize(bSize));
4180 remainder.sign = Integer::POSITIVE;
4181 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
4182 quotient.sign = Integer::POSITIVE;
4183
4184 IntegerSecBlock T(aSize+3*(bSize+2));
4185 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
4186 }
4187
Divide(Integer & remainder,Integer & quotient,const Integer & dividend,const Integer & divisor)4188 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor)
4189 {
4190 PositiveDivide(remainder, quotient, dividend, divisor);
4191
4192 if (dividend.IsNegative())
4193 {
4194 quotient.Negate();
4195 if (remainder.NotZero())
4196 {
4197 --quotient;
4198 remainder = divisor.AbsoluteValue() - remainder;
4199 }
4200 }
4201
4202 if (divisor.IsNegative())
4203 quotient.Negate();
4204 }
4205
DivideByPowerOf2(Integer & r,Integer & q,const Integer & a,unsigned int n)4206 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
4207 {
4208 q = a;
4209 q >>= n;
4210
4211 const size_t wordCount = BitsToWords(n);
4212 if (wordCount <= a.WordCount())
4213 {
4214 r.reg.resize(RoundupSize(wordCount));
4215 CopyWords(r.reg, a.reg, wordCount);
4216 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
4217 if (n % WORD_BITS != 0)
4218 r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
4219 }
4220 else
4221 {
4222 r.reg.resize(RoundupSize(a.WordCount()));
4223 CopyWords(r.reg, a.reg, r.reg.size());
4224 }
4225 r.sign = POSITIVE;
4226
4227 if (a.IsNegative() && r.NotZero())
4228 {
4229 --q;
4230 r = Power2(n) - r;
4231 }
4232 }
4233
DividedBy(const Integer & b) const4234 Integer Integer::DividedBy(const Integer &b) const
4235 {
4236 Integer remainder, quotient;
4237 Integer::Divide(remainder, quotient, *this, b);
4238 return quotient;
4239 }
4240
Modulo(const Integer & b) const4241 Integer Integer::Modulo(const Integer &b) const
4242 {
4243 Integer remainder, quotient;
4244 Integer::Divide(remainder, quotient, *this, b);
4245 return remainder;
4246 }
4247
Divide(word & remainder,Integer & quotient,const Integer & dividend,word divisor)4248 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor)
4249 {
4250 if (!divisor)
4251 throw Integer::DivideByZero();
4252
4253 // IsPowerOf2 uses BMI on x86 if available. There is a small
4254 // but measurable improvement during decryption and signing.
4255 if (IsPowerOf2(divisor))
4256 {
4257 quotient = dividend >> (BitPrecision(divisor)-1);
4258 remainder = dividend.reg[0] & (divisor-1);
4259 return;
4260 }
4261
4262 unsigned int i = dividend.WordCount();
4263 quotient.reg.CleanNew(RoundupSize(i));
4264 remainder = 0;
4265 while (i--)
4266 {
4267 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
4268 remainder = DWord(dividend.reg[i], remainder) % divisor;
4269 }
4270
4271 if (dividend.NotNegative())
4272 quotient.sign = POSITIVE;
4273 else
4274 {
4275 quotient.sign = NEGATIVE;
4276 if (remainder)
4277 {
4278 --quotient;
4279 remainder = divisor - remainder;
4280 }
4281 }
4282 }
4283
DividedBy(word b) const4284 Integer Integer::DividedBy(word b) const
4285 {
4286 word remainder;
4287 Integer quotient;
4288 Integer::Divide(remainder, quotient, *this, b);
4289 return quotient;
4290 }
4291
Modulo(word divisor) const4292 word Integer::Modulo(word divisor) const
4293 {
4294 if (!divisor)
4295 throw Integer::DivideByZero();
4296
4297 word remainder;
4298
4299 // Profiling guided the flow below.
4300 if ((divisor & (divisor-1)) != 0) // divisor is not a power of 2
4301 {
4302 // Profiling guided the flow below.
4303 unsigned int i = WordCount();
4304 if (divisor > 5)
4305 {
4306 remainder = 0;
4307 while (i--)
4308 remainder = DWord(reg[i], remainder) % divisor;
4309 }
4310 else
4311 {
4312 DWord sum(0, 0);
4313 while (i--)
4314 sum += reg[i];
4315 remainder = sum % divisor;
4316 }
4317 }
4318 else // divisor is a power of 2
4319 {
4320 remainder = reg[0] & (divisor-1);
4321 }
4322
4323 if (IsNegative() && remainder)
4324 remainder = divisor - remainder;
4325
4326 return remainder;
4327 }
4328
Negate()4329 void Integer::Negate()
4330 {
4331 if (!!(*this)) // don't flip sign if *this==0
4332 sign = Sign(1-sign);
4333 }
4334
PositiveCompare(const Integer & t) const4335 int Integer::PositiveCompare(const Integer& t) const
4336 {
4337 // Profiling guided the flow below.
4338 const unsigned size = WordCount(), tSize = t.WordCount();
4339 if (size != tSize)
4340 return size > tSize ? 1 : -1;
4341 else
4342 return CryptoPP::Compare(reg, t.reg, size);
4343 }
4344
Compare(const Integer & t) const4345 int Integer::Compare(const Integer& t) const
4346 {
4347 if (NotNegative())
4348 {
4349 if (t.NotNegative())
4350 return PositiveCompare(t);
4351 else
4352 return 1;
4353 }
4354 else
4355 {
4356 if (t.NotNegative())
4357 return -1;
4358 else
4359 return -PositiveCompare(t);
4360 }
4361 }
4362
SquareRoot() const4363 Integer Integer::SquareRoot() const
4364 {
4365 if (!IsPositive())
4366 return Zero();
4367
4368 // overestimate square root
4369 Integer x, y = Power2((BitCount()+1)/2);
4370 CRYPTOPP_ASSERT(y*y >= *this);
4371
4372 do
4373 {
4374 x = y;
4375 y = (x + *this/x) >> 1;
4376 } while (y<x);
4377
4378 return x;
4379 }
4380
IsSquare() const4381 bool Integer::IsSquare() const
4382 {
4383 Integer r = SquareRoot();
4384 return *this == r.Squared();
4385 }
4386
IsUnit() const4387 bool Integer::IsUnit() const
4388 {
4389 return (WordCount() == 1) && (reg[0] == 1);
4390 }
4391
MultiplicativeInverse() const4392 Integer Integer::MultiplicativeInverse() const
4393 {
4394 return IsUnit() ? *this : Zero();
4395 }
4396
a_times_b_mod_c(const Integer & x,const Integer & y,const Integer & m)4397 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4398 {
4399 CRYPTOPP_ASSERT(m.NotZero());
4400 if (m.IsZero())
4401 throw Integer::DivideByZero();
4402
4403 return x*y%m;
4404 }
4405
a_exp_b_mod_c(const Integer & x,const Integer & e,const Integer & m)4406 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4407 {
4408 CRYPTOPP_ASSERT(m.NotZero());
4409 if (m.IsZero())
4410 throw Integer::DivideByZero();
4411
4412 ModularArithmetic mr(m);
4413 return mr.Exponentiate(x, e);
4414 }
4415
Gcd(const Integer & a,const Integer & b)4416 Integer Integer::Gcd(const Integer &a, const Integer &b)
4417 {
4418 return EuclideanDomainOf<Integer>().Gcd(a, b);
4419 }
4420
InverseMod(const Integer & m) const4421 Integer Integer::InverseMod(const Integer &m) const
4422 {
4423 CRYPTOPP_ASSERT(m.NotNegative());
4424 CRYPTOPP_ASSERT(m.NotZero());
4425
4426 if (IsNegative())
4427 return Modulo(m).InverseModNext(m);
4428
4429 // http://github.com/weidai11/cryptopp/issues/602
4430 if (*this >= m)
4431 return Modulo(m).InverseModNext(m);
4432
4433 return InverseModNext(m);
4434 }
4435
InverseModNext(const Integer & m) const4436 Integer Integer::InverseModNext(const Integer &m) const
4437 {
4438 CRYPTOPP_ASSERT(m.NotNegative());
4439 CRYPTOPP_ASSERT(m.NotZero());
4440
4441 if (m.IsEven())
4442 {
4443 if (!m || IsEven())
4444 return Zero(); // no inverse
4445 if (*this == One())
4446 return One();
4447
4448 Integer u = m.Modulo(*this).InverseModNext(*this);
4449 return !u ? Zero() : (m*(*this-u)+1)/(*this);
4450 }
4451
4452 // AlmostInverse requires a 4x workspace
4453 IntegerSecBlock T(m.reg.size() * 4);
4454 Integer r((word)0, m.reg.size());
4455 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4456 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4457 return r;
4458 }
4459
InverseMod(word mod) const4460 word Integer::InverseMod(word mod) const
4461 {
4462 CRYPTOPP_ASSERT(mod != 0);
4463
4464 word g0 = mod, g1 = *this % mod;
4465 word v0 = 0, v1 = 1;
4466 word y;
4467
4468 while (g1)
4469 {
4470 if (g1 == 1)
4471 return v1;
4472 y = g0 / g1;
4473 g0 = g0 % g1;
4474 v0 += y * v1;
4475
4476 if (!g0)
4477 break;
4478 if (g0 == 1)
4479 return mod-v0;
4480 y = g1 / g0;
4481 g1 = g1 % g0;
4482 v1 += y * v0;
4483 }
4484 return 0;
4485 }
4486
4487 // ********************************************************
4488
ModularArithmetic(BufferedTransformation & bt)4489 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
4490 {
4491 BERSequenceDecoder seq(bt);
4492 OID oid(seq);
4493 if (oid != ASN1::prime_field())
4494 BERDecodeError();
4495 m_modulus.BERDecode(seq);
4496 seq.MessageEnd();
4497 m_result.reg.resize(m_modulus.reg.size());
4498 }
4499
DEREncode(BufferedTransformation & bt) const4500 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
4501 {
4502 DERSequenceEncoder seq(bt);
4503 ASN1::prime_field().DEREncode(seq);
4504 m_modulus.DEREncode(seq);
4505 seq.MessageEnd();
4506 }
4507
DEREncodeElement(BufferedTransformation & out,const Element & a) const4508 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
4509 {
4510 a.DEREncodeAsOctetString(out, MaxElementByteLength());
4511 }
4512
BERDecodeElement(BufferedTransformation & in,Element & a) const4513 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
4514 {
4515 a.BERDecodeAsOctetString(in, MaxElementByteLength());
4516 }
4517
Half(const Integer & a) const4518 const Integer& ModularArithmetic::Half(const Integer &a) const
4519 {
4520 if (a.reg.size()==m_modulus.reg.size())
4521 {
4522 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4523 return m_result;
4524 }
4525 else
4526 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4527 }
4528
Add(const Integer & a,const Integer & b) const4529 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4530 {
4531 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4532 {
4533 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4534 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4535 {
4536 CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4537 }
4538 return m_result;
4539 }
4540 else
4541 {
4542 m_result1 = a+b;
4543 if (m_result1 >= m_modulus)
4544 m_result1 -= m_modulus;
4545 return m_result1;
4546 }
4547 }
4548
Accumulate(Integer & a,const Integer & b) const4549 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
4550 {
4551 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4552 {
4553 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4554 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4555 {
4556 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4557 }
4558 }
4559 else
4560 {
4561 a+=b;
4562 if (a>=m_modulus)
4563 a-=m_modulus;
4564 }
4565
4566 return a;
4567 }
4568
Subtract(const Integer & a,const Integer & b) const4569 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4570 {
4571 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4572 {
4573 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4574 CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4575 return m_result;
4576 }
4577 else
4578 {
4579 m_result1 = a-b;
4580 if (m_result1.IsNegative())
4581 m_result1 += m_modulus;
4582 return m_result1;
4583 }
4584 }
4585
Reduce(Integer & a,const Integer & b) const4586 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
4587 {
4588 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4589 {
4590 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4591 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4592 }
4593 else
4594 {
4595 a-=b;
4596 if (a.IsNegative())
4597 a+=m_modulus;
4598 }
4599
4600 return a;
4601 }
4602
Inverse(const Integer & a) const4603 const Integer& ModularArithmetic::Inverse(const Integer &a) const
4604 {
4605 if (!a)
4606 return a;
4607
4608 CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4609 if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4610 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4611
4612 return m_result;
4613 }
4614
CascadeExponentiate(const Integer & x,const Integer & e1,const Integer & y,const Integer & e2) const4615 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4616 {
4617 if (m_modulus.IsOdd())
4618 {
4619 MontgomeryRepresentation dr(m_modulus);
4620 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4621 }
4622 else
4623 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4624 }
4625
SimultaneousExponentiate(Integer * results,const Integer & base,const Integer * exponents,unsigned int exponentsCount) const4626 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
4627 {
4628 if (m_modulus.IsOdd())
4629 {
4630 MontgomeryRepresentation dr(m_modulus);
4631 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4632 for (unsigned int i=0; i<exponentsCount; i++)
4633 results[i] = dr.ConvertOut(results[i]);
4634 }
4635 else
4636 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4637 }
4638
MontgomeryRepresentation(const Integer & m)4639 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m) // modulus must be odd
4640 : ModularArithmetic(m),
4641 m_u((word)0, m_modulus.reg.size()),
4642 m_workspace(5*m_modulus.reg.size())
4643 {
4644 if (!m_modulus.IsOdd())
4645 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4646
4647 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4648 }
4649
Multiply(const Integer & a,const Integer & b) const4650 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
4651 {
4652 word *const T = m_workspace.begin();
4653 word *const R = m_result.reg.begin();
4654 const size_t N = m_modulus.reg.size();
4655 CRYPTOPP_ASSERT(a.reg.size()<=N && b.reg.size()<=N);
4656
4657 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4658 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4659 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4660 return m_result;
4661 }
4662
Square(const Integer & a) const4663 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
4664 {
4665 word *const T = m_workspace.begin();
4666 word *const R = m_result.reg.begin();
4667 const size_t N = m_modulus.reg.size();
4668 CRYPTOPP_ASSERT(a.reg.size()<=N);
4669
4670 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4671 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4672 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4673 return m_result;
4674 }
4675
ConvertOut(const Integer & a) const4676 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
4677 {
4678 word *const T = m_workspace.begin();
4679 word *const R = m_result.reg.begin();
4680 const size_t N = m_modulus.reg.size();
4681 CRYPTOPP_ASSERT(a.reg.size()<=N);
4682
4683 CopyWords(T, a.reg, a.reg.size());
4684 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4685 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4686 return m_result;
4687 }
4688
MultiplicativeInverse(const Integer & a) const4689 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
4690 {
4691 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
4692 word *const T = m_workspace.begin();
4693 word *const R = m_result.reg.begin();
4694 const size_t N = m_modulus.reg.size();
4695 CRYPTOPP_ASSERT(a.reg.size()<=N);
4696
4697 CopyWords(T, a.reg, a.reg.size());
4698 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4699 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4700 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4701
4702 // cout << "k=" << k << " N*32=" << 32*N << endl;
4703
4704 if (k>N*WORD_BITS)
4705 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4706 else
4707 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4708
4709 return m_result;
4710 }
4711
4712 // Specialization declared in misc.h to allow us to print integers
4713 // with additional control options, like arbirary bases and uppercase.
4714 template <> CRYPTOPP_DLL
IntToString(Integer value,unsigned int base)4715 std::string IntToString<Integer>(Integer value, unsigned int base)
4716 {
4717 // Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4718 static const unsigned int BIT_32 = (1U << 31);
4719 const bool UPPER = !!(base & BIT_32);
4720 static const unsigned int BIT_31 = (1U << 30);
4721 const bool BASE = !!(base & BIT_31);
4722
4723 const char CH = UPPER ? 'A' : 'a';
4724 base &= ~(BIT_32|BIT_31);
4725 CRYPTOPP_ASSERT(base >= 2 && base <= 32);
4726
4727 if (value == 0)
4728 return "0";
4729
4730 bool negative = false, zero = false;
4731 if (value.IsNegative())
4732 {
4733 negative = true;
4734 value.Negate();
4735 }
4736
4737 if (!value)
4738 zero = true;
4739
4740 SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4741 Integer temp;
4742
4743 unsigned int i=0;
4744 while (!!value)
4745 {
4746 word digit;
4747 Integer::Divide(digit, temp, value, word(base));
4748 s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4749 value.swap(temp);
4750 }
4751
4752 std::string result;
4753 result.reserve(i+2);
4754
4755 if (negative)
4756 result += '-';
4757
4758 if (zero)
4759 result += '0';
4760
4761 while (i--)
4762 result += s[i];
4763
4764 if (BASE)
4765 {
4766 if (base == 10)
4767 result += '.';
4768 else if (base == 16)
4769 result += 'h';
4770 else if (base == 8)
4771 result += 'o';
4772 else if (base == 2)
4773 result += 'b';
4774 }
4775
4776 return result;
4777 }
4778
4779 // Specialization declared in misc.h to avoid Coverity findings.
4780 template <> CRYPTOPP_DLL
IntToString(word64 value,unsigned int base)4781 std::string IntToString<word64>(word64 value, unsigned int base)
4782 {
4783 // Hack... set the high bit for uppercase.
4784 static const unsigned int HIGH_BIT = (1U << 31);
4785 const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4786 base &= ~HIGH_BIT;
4787
4788 CRYPTOPP_ASSERT(base >= 2);
4789 if (value == 0)
4790 return "0";
4791
4792 std::string result;
4793 while (value > 0)
4794 {
4795 word64 digit = value % base;
4796 result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4797 value /= base;
4798 }
4799 return result;
4800 }
4801
4802 #ifndef CRYPTOPP_NO_ASSIGN_TO_INTEGER
4803 // Allow the linker to discard Integer code if not needed.
4804 // Also see http://github.com/weidai11/cryptopp/issues/389.
AssignIntToInteger(const std::type_info & valueType,void * pInteger,const void * pInt)4805 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
4806 {
4807 if (valueType != typeid(Integer))
4808 return false;
4809 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
4810 return true;
4811 }
4812 #endif // CRYPTOPP_NO_ASSIGN_TO_INTEGER
4813
4814 // *************************** C++ Static Initialization ***************************
4815
4816 class InitInteger
4817 {
4818 public:
InitInteger()4819 InitInteger()
4820 {
4821 SetFunctionPointers();
4822 }
4823 };
4824
4825 // This is not really needed because each Integer can dynamically initialize
4826 // itself, but we take a peephole optimization and initialize the class once
4827 // if init priorities are available. Dynamic initialization will be used if
4828 // init priorities are not available.
4829
4830 #if defined(HAVE_GCC_INIT_PRIORITY)
4831 const InitInteger s_init __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 10))) = InitInteger();
4832 const Integer g_zero __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 11))) = Integer(0L);
4833 const Integer g_one __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 12))) = Integer(1L);
4834 const Integer g_two __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 13))) = Integer(2L);
4835 #elif defined(HAVE_MSC_INIT_PRIORITY)
4836 #pragma warning(disable: 4075)
4837 #pragma init_seg(".CRT$XCU")
4838 const InitInteger s_init;
4839 const Integer g_zero(0L);
4840 const Integer g_one(1L);
4841 const Integer g_two(2L);
4842 #pragma warning(default: 4075)
4843 #elif HAVE_XLC_INIT_PRIORITY
4844 // XLC needs constant, not a define
4845 #pragma priority(280)
4846 const InitInteger s_init;
4847 const Integer g_zero(0L);
4848 const Integer g_one(1L);
4849 const Integer g_two(2L);
4850 #else
4851 const InitInteger s_init;
4852 #endif
4853
4854 // ***************** Library code ********************
4855
Zero()4856 const Integer &Integer::Zero()
4857 {
4858 #if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY)
4859 return g_zero;
4860 #elif defined(CRYPTOPP_CXX11_STATIC_INIT)
4861 static const Integer s_zero(0L);
4862 return s_zero;
4863 #else // Potential memory leak. Avoid if possible.
4864 return Singleton<Integer, NewInteger<0L> >().Ref();
4865 #endif
4866 }
4867
One()4868 const Integer &Integer::One()
4869 {
4870 #if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY)
4871 return g_one;
4872 #elif defined(CRYPTOPP_CXX11_STATIC_INIT)
4873 static const Integer s_one(1L);
4874 return s_one;
4875 #else // Potential memory leak. Avoid if possible.
4876 return Singleton<Integer, NewInteger<1L> >().Ref();
4877 #endif
4878 }
4879
Two()4880 const Integer &Integer::Two()
4881 {
4882 #if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY)
4883 return g_two;
4884 #elif defined(CRYPTOPP_CXX11_STATIC_INIT)
4885 static const Integer s_two(2L);
4886 return s_two;
4887 #else // Potential memory leak. Avoid if possible.
4888 return Singleton<Integer, NewInteger<2L> >().Ref();
4889 #endif
4890 }
4891
4892 NAMESPACE_END
4893
4894 #endif // CRYPTOPP_IMPORTS
4895