Crypto++  5.6.5
Free C++ class library of cryptographic schemes
integer.cpp
1 // integer.cpp - written and placed in the public domain by Wei Dai
2 // contains public domain code contributed by Alister Lee and Leonard Janke
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #if CRYPTOPP_MSC_VERSION
8 # pragma warning(disable: 4100)
9 #endif
10 
11 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
12 # pragma GCC diagnostic ignored "-Wunused"
13 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
14 #endif
15 
16 #ifndef CRYPTOPP_IMPORTS
17 
18 #include "integer.h"
19 #include "secblock.h"
20 #include "modarith.h"
21 #include "nbtheory.h"
22 #include "smartptr.h"
23 #include "algparam.h"
24 #include "filters.h"
25 #include "asn.h"
26 #include "oids.h"
27 #include "words.h"
28 #include "pubkey.h" // for P1363_KDF2
29 #include "sha.h"
30 #include "cpu.h"
31 #include "misc.h"
32 
33 #include <iostream>
34 
35 #if (_MSC_VER >= 1400) && !defined(_M_ARM)
36  #include <intrin.h>
37 #endif
38 
39 #ifdef __DECCXX
40  #include <c_asm.h>
41 #endif
42 
43 #ifdef CRYPTOPP_MSVC6_NO_PP
44  #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
45 #endif
46 
47 // "Error: The operand ___LKDB cannot be assigned to", http://github.com/weidai11/cryptopp/issues/188
48 #if (__SUNPRO_CC >= 0x5130)
49 # define MAYBE_CONST
50 # define MAYBE_UNCONST_CAST const_cast<word*>
51 #else
52 # define MAYBE_CONST const
53 # define MAYBE_UNCONST_CAST
54 #endif
55 
56 // "Inline assembly operands don't work with .intel_syntax",
57 // http://llvm.org/bugs/show_bug.cgi?id=24232
58 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
59 # undef CRYPTOPP_X86_ASM_AVAILABLE
60 # undef CRYPTOPP_X32_ASM_AVAILABLE
61 # undef CRYPTOPP_X64_ASM_AVAILABLE
62 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
63 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
64 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
65 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
66 #else
67 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
68 #endif
69 
70 NAMESPACE_BEGIN(CryptoPP)
71 
72 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
73 {
74  if (valueType != typeid(Integer))
75  return false;
76  *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
77  return true;
78 }
79 
80 inline static int Compare(const word *A, const word *B, size_t N)
81 {
82  while (N--)
83  if (A[N] > B[N])
84  return 1;
85  else if (A[N] < B[N])
86  return -1;
87 
88  return 0;
89 }
90 
91 inline static int Increment(word *A, size_t N, word B=1)
92 {
93  CRYPTOPP_ASSERT(N);
94  word t = A[0];
95  A[0] = t+B;
96  if (A[0] >= t)
97  return 0;
98  for (unsigned i=1; i<N; i++)
99  if (++A[i])
100  return 0;
101  return 1;
102 }
103 
104 inline static int Decrement(word *A, size_t N, word B=1)
105 {
106  CRYPTOPP_ASSERT(N);
107  word t = A[0];
108  A[0] = t-B;
109  if (A[0] <= t)
110  return 0;
111  for (unsigned i=1; i<N; i++)
112  if (A[i]--)
113  return 0;
114  return 1;
115 }
116 
117 static void TwosComplement(word *A, size_t N)
118 {
119  Decrement(A, N);
120  for (unsigned i=0; i<N; i++)
121  A[i] = ~A[i];
122 }
123 
124 static word AtomicInverseModPower2(word A)
125 {
126  CRYPTOPP_ASSERT(A%2==1);
127 
128  word R=A%8;
129 
130  for (unsigned i=3; i<WORD_BITS; i*=2)
131  R = R*(2-R*A);
132 
133  CRYPTOPP_ASSERT(R*A==1);
134  return R;
135 }
136 
137 // ********************************************************
138 
139 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
140  #define Declare2Words(x) word x##0, x##1;
141  #define AssignWord(a, b) a##0 = b; a##1 = 0;
142  #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
143  #define LowWord(a) a##0
144  #define HighWord(a) a##1
145  #ifdef _MSC_VER
146  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
147  #ifndef __INTEL_COMPILER
148  #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
149  #endif
150  #elif defined(__DECCXX)
151  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
152  #elif defined(__x86_64__)
153  #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
154  // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
155  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
156  #else
157  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
158  #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
159  #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
160  #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
161  #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
162  #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
163  #endif
164  #endif
165  #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
166  #ifndef Double3Words
167  #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
168  #endif
169  #ifndef Acc2WordsBy2
170  #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
171  #endif
172  #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
173  #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
174  #define GetCarry(u) u##1
175  #define GetBorrow(u) u##1
176 #else
177  #define Declare2Words(x) dword x;
178  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
179  #define MultiplyWords(p, a, b) p = __emulu(a, b);
180  #else
181  #define MultiplyWords(p, a, b) p = (dword)a*b;
182  #endif
183  #define AssignWord(a, b) a = b;
184  #define Add2WordsBy1(a, b, c) a = b + c;
185  #define Acc2WordsBy2(a, b) a += b;
186  #define LowWord(a) word(a)
187  #define HighWord(a) word(a>>WORD_BITS)
188  #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
189  #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
190  #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
191  #define GetCarry(u) HighWord(u)
192  #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
193 #endif
194 #ifndef MulAcc
195  #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
196 #endif
197 #ifndef Acc2WordsBy1
198  #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
199 #endif
200 #ifndef Acc3WordsBy2
201  #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
202 #endif
203 
204 class DWord
205 {
206 public:
207 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
208  DWord() : m_whole() { }
209 #else
210  DWord() : m_halfs() { }
211 #endif
212 
213 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
214  explicit DWord(word low) : m_whole(low) { }
215 #else
216  explicit DWord(word low) : m_halfs()
217  {
218  m_halfs.low = low;
219  }
220 #endif
221 
222 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
223  DWord(word low, word high) : m_whole()
224 #else
225  DWord(word low, word high) : m_halfs()
226 #endif
227  {
228 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
229 # if defined(IS_LITTLE_ENDIAN)
230  const word t[2] = {low,high};
231  memcpy(&m_whole, &t, sizeof(m_whole));
232 # else
233  const word t[2] = {high,low};
234  memcpy(&m_whole, &t, sizeof(m_whole));
235 # endif
236 #else
237  m_halfs.low = low;
238  m_halfs.high = high;
239 #endif
240  }
241 
242  static DWord Multiply(word a, word b)
243  {
244  DWord r;
245  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
246  r.m_whole = (dword)a * b;
247  #elif defined(MultiplyWordsLoHi)
248  MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
249  #else
250  CRYPTOPP_ASSERT(0);
251  #endif
252  return r;
253  }
254 
255  static DWord MultiplyAndAdd(word a, word b, word c)
256  {
257  DWord r = Multiply(a, b);
258  return r += c;
259  }
260 
261  DWord & operator+=(word a)
262  {
263  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
264  m_whole = m_whole + a;
265  #else
266  m_halfs.low += a;
267  m_halfs.high += (m_halfs.low < a);
268  #endif
269  return *this;
270  }
271 
272  DWord operator+(word a)
273  {
274  DWord r;
275  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
276  r.m_whole = m_whole + a;
277  #else
278  r.m_halfs.low = m_halfs.low + a;
279  r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
280  #endif
281  return r;
282  }
283 
284  DWord operator-(DWord a)
285  {
286  DWord r;
287  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
288  r.m_whole = m_whole - a.m_whole;
289  #else
290  r.m_halfs.low = m_halfs.low - a.m_halfs.low;
291  r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
292  #endif
293  return r;
294  }
295 
296  DWord operator-(word a)
297  {
298  DWord r;
299  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
300  r.m_whole = m_whole - a;
301  #else
302  r.m_halfs.low = m_halfs.low - a;
303  r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
304  #endif
305  return r;
306  }
307 
308  // returns quotient, which must fit in a word
309  word operator/(word divisor);
310 
311  word operator%(word a);
312 
313  bool operator!() const
314  {
315  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
316  return !m_whole;
317  #else
318  return !m_halfs.high && !m_halfs.low;
319  #endif
320  }
321 
322  // TODO: When NATIVE_DWORD is in effect, we access high and low, which are inactive
323  // union members, and that's UB. Also see http://stackoverflow.com/q/11373203.
324  word GetLowHalf() const {return m_halfs.low;}
325  word GetHighHalf() const {return m_halfs.high;}
326  word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
327 
328 private:
329  // Issue 274, "Types cannot be declared in anonymous union",
330  // http://github.com/weidai11/cryptopp/issues/274
331  // Thanks to Martin Bonner at http://stackoverflow.com/a/39507183
332  struct half_words
333  {
334  #ifdef IS_LITTLE_ENDIAN
335  word low;
336  word high;
337  #else
338  word high;
339  word low;
340  #endif
341  };
342  union
343  {
344  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
345  dword m_whole;
346  #endif
347  half_words m_halfs;
348  };
349 };
350 
351 class Word
352 {
353 public:
354  // Converity finding on default ctor. We've isntrumented the code,
355  // and cannot uncover a case where it affects a result.
356 #if defined(__COVERITY__)
357  Word() : m_whole(0) {}
358 #elif CRYPTOPP_DEBUG
359  // Repeating pattern of 1010 for debug builds to break things...
360  Word() : m_whole(0) {memset(&m_whole, 0xaa, sizeof(m_whole));}
361 #else
362  Word() {}
363 #endif
364 
365  Word(word value) : m_whole(value) {}
366  Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
367 
368  static Word Multiply(hword a, hword b)
369  {
370  Word r;
371  r.m_whole = (word)a * b;
372  return r;
373  }
374 
375  Word operator-(Word a)
376  {
377  Word r;
378  r.m_whole = m_whole - a.m_whole;
379  return r;
380  }
381 
382  Word operator-(hword a)
383  {
384  Word r;
385  r.m_whole = m_whole - a;
386  return r;
387  }
388 
389  // returns quotient, which must fit in a word
390  hword operator/(hword divisor)
391  {
392  return hword(m_whole / divisor);
393  }
394 
395  bool operator!() const
396  {
397  return !m_whole;
398  }
399 
400  word GetWhole() const {return m_whole;}
401  hword GetLowHalf() const {return hword(m_whole);}
402  hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
403  hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
404 
405 private:
406  word m_whole;
407 };
408 
409 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
410 template <class S, class D>
411 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
412 {
413  CRYPTOPP_UNUSED(dummy);
414 
415  // CRYPTOPP_ASSERT {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
416  CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
417 
418  // estimate the quotient: do a 2 S by 1 S divide.
419  // Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
420  // The code change occurred at Commit dc99266599a0e72d.
421 
422  S Q; bool pre = (S(B1+1) == 0);
423  if (B1 > 0 && !pre)
424  Q = D(A[1], A[2]) / S(B1+1);
425  else if (pre)
426  Q = A[2];
427  else
428  Q = D(A[0], A[1]) / B0;
429 
430  // now subtract Q*B from A
431  D p = D::Multiply(B0, Q);
432  D u = (D) A[0] - p.GetLowHalf();
433  A[0] = u.GetLowHalf();
434  u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
435  A[1] = u.GetLowHalf();
436  A[2] += u.GetHighHalf();
437 
438  // Q <= actual quotient, so fix it
439  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
440  {
441  u = (D) A[0] - B0;
442  A[0] = u.GetLowHalf();
443  u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
444  A[1] = u.GetLowHalf();
445  A[2] += u.GetHighHalf();
446  Q++;
447  CRYPTOPP_ASSERT(Q); // shouldn't overflow
448  }
449 
450  return Q;
451 }
452 
453 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
454 template <class S, class D>
455 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
456 {
457  // Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
458  // The code change occurred at Commit dc99266599a0e72d.
459 
460  if (!!B)
461  {
462  S Q[2];
463  T[0] = Al.GetLowHalf();
464  T[1] = Al.GetHighHalf();
465  T[2] = Ah.GetLowHalf();
466  T[3] = Ah.GetHighHalf();
467  Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
468  Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
469  return D(Q[0], Q[1]);
470  }
471  else // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
472  {
473  return D(Ah.GetLowHalf(), Ah.GetHighHalf());
474  }
475 }
476 
477 // returns quotient, which must fit in a word
478 inline word DWord::operator/(word a)
479 {
480  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
481  return word(m_whole / a);
482  #else
483  hword r[4];
484  return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
485  #endif
486 }
487 
488 inline word DWord::operator%(word a)
489 {
490  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
491  return word(m_whole % a);
492  #else
493  if (a < (word(1) << (WORD_BITS/2)))
494  {
495  hword h = hword(a);
496  word r = m_halfs.high % h;
497  r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
498  return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
499  }
500  else
501  {
502  hword r[4];
503  DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
504  return Word(r[0], r[1]).GetWhole();
505  }
506  #endif
507 }
508 
509 // ********************************************************
510 
511 // Use some tricks to share assembly code between MSVC, GCC, Clang and Sun CC.
512 #if defined(__GNUC__)
513  #define AddPrologue \
514  int result; \
515  __asm__ __volatile__ \
516  ( \
517  INTEL_NOPREFIX
518  #define AddEpilogue \
519  ATT_PREFIX \
520  : "=a" (result)\
521  : "d" (C), "a" (A), "D" (B), "c" (N) \
522  : "%esi", "memory", "cc" \
523  );\
524  return result;
525  #define MulPrologue \
526  __asm__ __volatile__ \
527  ( \
528  INTEL_NOPREFIX \
529  AS1( push ebx) \
530  AS2( mov ebx, edx)
531  #define MulEpilogue \
532  AS1( pop ebx) \
533  ATT_PREFIX \
534  : \
535  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
536  : "%esi", "memory", "cc" \
537  );
538  #define SquPrologue MulPrologue
539  #define SquEpilogue \
540  AS1( pop ebx) \
541  ATT_PREFIX \
542  : \
543  : "d" (s_maskLow16), "c" (C), "a" (A) \
544  : "%esi", "%edi", "memory", "cc" \
545  );
546  #define TopPrologue MulPrologue
547  #define TopEpilogue \
548  AS1( pop ebx) \
549  ATT_PREFIX \
550  : \
551  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
552  : "memory", "cc" \
553  );
554 #else
555  #define AddPrologue \
556  __asm push edi \
557  __asm push esi \
558  __asm mov eax, [esp+12] \
559  __asm mov edi, [esp+16]
560  #define AddEpilogue \
561  __asm pop esi \
562  __asm pop edi \
563  __asm ret 8
564 #if _MSC_VER < 1300
565  #define SaveEBX __asm push ebx
566  #define RestoreEBX __asm pop ebx
567 #else
568  #define SaveEBX
569  #define RestoreEBX
570 #endif
571  #define SquPrologue \
572  AS2( mov eax, A) \
573  AS2( mov ecx, C) \
574  SaveEBX \
575  AS2( lea ebx, s_maskLow16)
576  #define MulPrologue \
577  AS2( mov eax, A) \
578  AS2( mov edi, B) \
579  AS2( mov ecx, C) \
580  SaveEBX \
581  AS2( lea ebx, s_maskLow16)
582  #define TopPrologue \
583  AS2( mov eax, A) \
584  AS2( mov edi, B) \
585  AS2( mov ecx, C) \
586  AS2( mov esi, L) \
587  SaveEBX \
588  AS2( lea ebx, s_maskLow16)
589  #define SquEpilogue RestoreEBX
590  #define MulEpilogue RestoreEBX
591  #define TopEpilogue RestoreEBX
592 #endif
593 
594 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
595 extern "C" {
596 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
597 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
598 }
599 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
600 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
601 {
602  word result;
603  __asm__ __volatile__
604  (
605  INTEL_NOPREFIX
606  AS1( neg %1)
607  ASJ( jz, 1, f)
608  AS2( mov %0,[%3+8*%1])
609  AS2( add %0,[%4+8*%1])
610  AS2( mov [%2+8*%1],%0)
611  ASL(0)
612  AS2( mov %0,[%3+8*%1+8])
613  AS2( adc %0,[%4+8*%1+8])
614  AS2( mov [%2+8*%1+8],%0)
615  AS2( lea %1,[%1+2])
616  ASJ( jrcxz, 1, f)
617  AS2( mov %0,[%3+8*%1])
618  AS2( adc %0,[%4+8*%1])
619  AS2( mov [%2+8*%1],%0)
620  ASJ( jmp, 0, b)
621  ASL(1)
622  AS2( mov %0, 0)
623  AS2( adc %0, %0)
624  ATT_NOPREFIX
625  : "=&r" (result), "+c" (N)
626  : "r" (C+N), "r" (A+N), "r" (B+N)
627  : "memory", "cc"
628  );
629  return (int)result;
630 }
631 
632 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
633 {
634  word result;
635  __asm__ __volatile__
636  (
637  INTEL_NOPREFIX
638  AS1( neg %1)
639  ASJ( jz, 1, f)
640  AS2( mov %0,[%3+8*%1])
641  AS2( sub %0,[%4+8*%1])
642  AS2( mov [%2+8*%1],%0)
643  ASL(0)
644  AS2( mov %0,[%3+8*%1+8])
645  AS2( sbb %0,[%4+8*%1+8])
646  AS2( mov [%2+8*%1+8],%0)
647  AS2( lea %1,[%1+2])
648  ASJ( jrcxz, 1, f)
649  AS2( mov %0,[%3+8*%1])
650  AS2( sbb %0,[%4+8*%1])
651  AS2( mov [%2+8*%1],%0)
652  ASJ( jmp, 0, b)
653  ASL(1)
654  AS2( mov %0, 0)
655  AS2( adc %0, %0)
656  ATT_NOPREFIX
657  : "=&r" (result), "+c" (N)
658  : "r" (C+N), "r" (A+N), "r" (B+N)
659  : "memory", "cc"
660  );
661  return (int)result;
662 }
663 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
664 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
665 {
666  AddPrologue
667 
668  // now: eax = A, edi = B, edx = C, ecx = N
669  AS2( lea eax, [eax+4*ecx])
670  AS2( lea edi, [edi+4*ecx])
671  AS2( lea edx, [edx+4*ecx])
672 
673  AS1( neg ecx) // ecx is negative index
674  AS2( test ecx, 2) // this clears carry flag
675  ASJ( jz, 0, f)
676  AS2( sub ecx, 2)
677  ASJ( jmp, 1, f)
678 
679  ASL(0)
680  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
681  AS2( mov esi,[eax+4*ecx])
682  AS2( adc esi,[edi+4*ecx])
683  AS2( mov [edx+4*ecx],esi)
684  AS2( mov esi,[eax+4*ecx+4])
685  AS2( adc esi,[edi+4*ecx+4])
686  AS2( mov [edx+4*ecx+4],esi)
687  ASL(1)
688  AS2( mov esi,[eax+4*ecx+8])
689  AS2( adc esi,[edi+4*ecx+8])
690  AS2( mov [edx+4*ecx+8],esi)
691  AS2( mov esi,[eax+4*ecx+12])
692  AS2( adc esi,[edi+4*ecx+12])
693  AS2( mov [edx+4*ecx+12],esi)
694 
695  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
696  ASJ( jmp, 0, b)
697 
698  ASL(2)
699  AS2( mov eax, 0)
700  AS1( setc al) // store carry into eax (return result register)
701 
702  AddEpilogue
703 }
704 
705 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
706 {
707  AddPrologue
708 
709  // now: eax = A, edi = B, edx = C, ecx = N
710  AS2( lea eax, [eax+4*ecx])
711  AS2( lea edi, [edi+4*ecx])
712  AS2( lea edx, [edx+4*ecx])
713 
714  AS1( neg ecx) // ecx is negative index
715  AS2( test ecx, 2) // this clears carry flag
716  ASJ( jz, 0, f)
717  AS2( sub ecx, 2)
718  ASJ( jmp, 1, f)
719 
720  ASL(0)
721  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
722  AS2( mov esi,[eax+4*ecx])
723  AS2( sbb esi,[edi+4*ecx])
724  AS2( mov [edx+4*ecx],esi)
725  AS2( mov esi,[eax+4*ecx+4])
726  AS2( sbb esi,[edi+4*ecx+4])
727  AS2( mov [edx+4*ecx+4],esi)
728  ASL(1)
729  AS2( mov esi,[eax+4*ecx+8])
730  AS2( sbb esi,[edi+4*ecx+8])
731  AS2( mov [edx+4*ecx+8],esi)
732  AS2( mov esi,[eax+4*ecx+12])
733  AS2( sbb esi,[edi+4*ecx+12])
734  AS2( mov [edx+4*ecx+12],esi)
735 
736  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
737  ASJ( jmp, 0, b)
738 
739  ASL(2)
740  AS2( mov eax, 0)
741  AS1( setc al) // store carry into eax (return result register)
742 
743  AddEpilogue
744 }
745 
746 #if CRYPTOPP_INTEGER_SSE2
747 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
748 {
749  AddPrologue
750 
751  // now: eax = A, edi = B, edx = C, ecx = N
752  AS2( lea eax, [eax+4*ecx])
753  AS2( lea edi, [edi+4*ecx])
754  AS2( lea edx, [edx+4*ecx])
755 
756  AS1( neg ecx) // ecx is negative index
757  AS2( pxor mm2, mm2)
758  ASJ( jz, 2, f)
759  AS2( test ecx, 2) // this clears carry flag
760  ASJ( jz, 0, f)
761  AS2( sub ecx, 2)
762  ASJ( jmp, 1, f)
763 
764  ASL(0)
765  AS2( movd mm0, DWORD PTR [eax+4*ecx])
766  AS2( movd mm1, DWORD PTR [edi+4*ecx])
767  AS2( paddq mm0, mm1)
768  AS2( paddq mm2, mm0)
769  AS2( movd DWORD PTR [edx+4*ecx], mm2)
770  AS2( psrlq mm2, 32)
771 
772  AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
773  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
774  AS2( paddq mm0, mm1)
775  AS2( paddq mm2, mm0)
776  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
777  AS2( psrlq mm2, 32)
778 
779  ASL(1)
780  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
781  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
782  AS2( paddq mm0, mm1)
783  AS2( paddq mm2, mm0)
784  AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
785  AS2( psrlq mm2, 32)
786 
787  AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
788  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
789  AS2( paddq mm0, mm1)
790  AS2( paddq mm2, mm0)
791  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
792  AS2( psrlq mm2, 32)
793 
794  AS2( add ecx, 4)
795  ASJ( jnz, 0, b)
796 
797  ASL(2)
798  AS2( movd eax, mm2)
799  AS1( emms)
800 
801  AddEpilogue
802 }
803 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
804 {
805  AddPrologue
806 
807  // now: eax = A, edi = B, edx = C, ecx = N
808  AS2( lea eax, [eax+4*ecx])
809  AS2( lea edi, [edi+4*ecx])
810  AS2( lea edx, [edx+4*ecx])
811 
812  AS1( neg ecx) // ecx is negative index
813  AS2( pxor mm2, mm2)
814  ASJ( jz, 2, f)
815  AS2( test ecx, 2) // this clears carry flag
816  ASJ( jz, 0, f)
817  AS2( sub ecx, 2)
818  ASJ( jmp, 1, f)
819 
820  ASL(0)
821  AS2( movd mm0, DWORD PTR [eax+4*ecx])
822  AS2( movd mm1, DWORD PTR [edi+4*ecx])
823  AS2( psubq mm0, mm1)
824  AS2( psubq mm0, mm2)
825  AS2( movd DWORD PTR [edx+4*ecx], mm0)
826  AS2( psrlq mm0, 63)
827 
828  AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
829  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
830  AS2( psubq mm2, mm1)
831  AS2( psubq mm2, mm0)
832  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
833  AS2( psrlq mm2, 63)
834 
835  ASL(1)
836  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
837  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
838  AS2( psubq mm0, mm1)
839  AS2( psubq mm0, mm2)
840  AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
841  AS2( psrlq mm0, 63)
842 
843  AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
844  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
845  AS2( psubq mm2, mm1)
846  AS2( psubq mm2, mm0)
847  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
848  AS2( psrlq mm2, 63)
849 
850  AS2( add ecx, 4)
851  ASJ( jnz, 0, b)
852 
853  ASL(2)
854  AS2( movd eax, mm2)
855  AS1( emms)
856 
857  AddEpilogue
858 }
859 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
860 #else
861 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
862 {
863  CRYPTOPP_ASSERT (N%2 == 0);
864 
865  Declare2Words(u);
866  AssignWord(u, 0);
867  for (size_t i=0; i<N; i+=2)
868  {
869  AddWithCarry(u, A[i], B[i]);
870  C[i] = LowWord(u);
871  AddWithCarry(u, A[i+1], B[i+1]);
872  C[i+1] = LowWord(u);
873  }
874  return int(GetCarry(u));
875 }
876 
877 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
878 {
879  CRYPTOPP_ASSERT (N%2 == 0);
880 
881  Declare2Words(u);
882  AssignWord(u, 0);
883  for (size_t i=0; i<N; i+=2)
884  {
885  SubtractWithBorrow(u, A[i], B[i]);
886  C[i] = LowWord(u);
887  SubtractWithBorrow(u, A[i+1], B[i+1]);
888  C[i+1] = LowWord(u);
889  }
890  return int(GetBorrow(u));
891 }
892 #endif
893 
894 static word LinearMultiply(word *C, const word *AA, word B, size_t N)
895 {
896  // http://github.com/weidai11/cryptopp/issues/188
897  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
898 
899  word carry=0;
900  for(unsigned i=0; i<N; i++)
901  {
902  Declare2Words(p);
903  MultiplyWords(p, A[i], B);
904  Acc2WordsBy1(p, carry);
905  C[i] = LowWord(p);
906  carry = HighWord(p);
907  }
908  return carry;
909 }
910 
911 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
912 
913 #define Mul_2 \
914  Mul_Begin(2) \
915  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
916  Mul_End(1, 1)
917 
918 #define Mul_4 \
919  Mul_Begin(4) \
920  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
921  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
922  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
923  Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
924  Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
925  Mul_End(5, 3)
926 
927 #define Mul_8 \
928  Mul_Begin(8) \
929  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
930  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
931  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
932  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
933  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
934  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
935  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
936  Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
937  Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
938  Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
939  Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
940  Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
941  Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
942  Mul_End(13, 7)
943 
944 #define Mul_16 \
945  Mul_Begin(16) \
946  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
947  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
948  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
949  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
950  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
951  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
952  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
953  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
954  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
955  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
956  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
957  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
958  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
959  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
960  Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
961  Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
962  Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
963  Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
964  Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
965  Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
966  Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
967  Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
968  Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
969  Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
970  Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
971  Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
972  Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
973  Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
974  Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
975  Mul_End(29, 15)
976 
977 #define Squ_2 \
978  Squ_Begin(2) \
979  Squ_End(2)
980 
981 #define Squ_4 \
982  Squ_Begin(4) \
983  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
984  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
985  Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
986  Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
987  Squ_End(4)
988 
989 #define Squ_8 \
990  Squ_Begin(8) \
991  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
992  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
993  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
994  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
995  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
996  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
997  Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
998  Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
999  Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1000  Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1001  Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
1002  Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
1003  Squ_End(8)
1004 
1005 #define Squ_16 \
1006  Squ_Begin(16) \
1007  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1008  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1009  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1010  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1011  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1012  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1013  Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1014  Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1015  Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1016  Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1017  Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
1018  Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
1019  Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
1020  Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1021  Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1022  Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1023  Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1024  Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1025  Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1026  Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1027  Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1028  Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1029  Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1030  Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1031  Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1032  Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1033  Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1034  Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1035  Squ_End(16)
1036 
1037 #define Bot_2 \
1038  Mul_Begin(2) \
1039  Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1040  Bot_End(2)
1041 
1042 #define Bot_4 \
1043  Mul_Begin(4) \
1044  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1045  Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1046  Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1047  Bot_End(4)
1048 
1049 #define Bot_8 \
1050  Mul_Begin(8) \
1051  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1052  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1053  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1054  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1055  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1056  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1057  Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1058  Bot_End(8)
1059 
1060 #define Bot_16 \
1061  Mul_Begin(16) \
1062  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1063  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1064  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1065  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1066  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1067  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1068  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1069  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1070  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1071  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1072  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1073  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1074  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1075  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1076  Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1077  Bot_End(16)
1078 
1079 #endif
1080 
1081 #if 0
1082 #define Mul_Begin(n) \
1083  Declare2Words(p) \
1084  Declare2Words(c) \
1085  Declare2Words(d) \
1086  MultiplyWords(p, A[0], B[0]) \
1087  AssignWord(c, LowWord(p)) \
1088  AssignWord(d, HighWord(p))
1089 
1090 #define Mul_Acc(i, j) \
1091  MultiplyWords(p, A[i], B[j]) \
1092  Acc2WordsBy1(c, LowWord(p)) \
1093  Acc2WordsBy1(d, HighWord(p))
1094 
1095 #define Mul_SaveAcc(k, i, j) \
1096  R[k] = LowWord(c); \
1097  Add2WordsBy1(c, d, HighWord(c)) \
1098  MultiplyWords(p, A[i], B[j]) \
1099  AssignWord(d, HighWord(p)) \
1100  Acc2WordsBy1(c, LowWord(p))
1101 
1102 #define Mul_End(n) \
1103  R[2*n-3] = LowWord(c); \
1104  Acc2WordsBy1(d, HighWord(c)) \
1105  MultiplyWords(p, A[n-1], B[n-1])\
1106  Acc2WordsBy2(d, p) \
1107  R[2*n-2] = LowWord(d); \
1108  R[2*n-1] = HighWord(d);
1109 
1110 #define Bot_SaveAcc(k, i, j) \
1111  R[k] = LowWord(c); \
1112  word e = LowWord(d) + HighWord(c); \
1113  e += A[i] * B[j];
1114 
1115 #define Bot_Acc(i, j) \
1116  e += A[i] * B[j];
1117 
1118 #define Bot_End(n) \
1119  R[n-1] = e;
1120 #else
1121 #define Mul_Begin(n) \
1122  Declare2Words(p) \
1123  word c; \
1124  Declare2Words(d) \
1125  MultiplyWords(p, A[0], B[0]) \
1126  c = LowWord(p); \
1127  AssignWord(d, HighWord(p))
1128 
1129 #define Mul_Acc(i, j) \
1130  MulAcc(c, d, A[i], B[j])
1131 
1132 #define Mul_SaveAcc(k, i, j) \
1133  R[k] = c; \
1134  c = LowWord(d); \
1135  AssignWord(d, HighWord(d)) \
1136  MulAcc(c, d, A[i], B[j])
1137 
1138 #define Mul_End(k, i) \
1139  R[k] = c; \
1140  MultiplyWords(p, A[i], B[i]) \
1141  Acc2WordsBy2(p, d) \
1142  R[k+1] = LowWord(p); \
1143  R[k+2] = HighWord(p);
1144 
1145 #define Bot_SaveAcc(k, i, j) \
1146  R[k] = c; \
1147  c = LowWord(d); \
1148  c += A[i] * B[j];
1149 
1150 #define Bot_Acc(i, j) \
1151  c += A[i] * B[j];
1152 
1153 #define Bot_End(n) \
1154  R[n-1] = c;
1155 #endif
1156 
1157 #define Squ_Begin(n) \
1158  Declare2Words(p) \
1159  word c; \
1160  Declare2Words(d) \
1161  Declare2Words(e) \
1162  MultiplyWords(p, A[0], A[0]) \
1163  R[0] = LowWord(p); \
1164  AssignWord(e, HighWord(p)) \
1165  MultiplyWords(p, A[0], A[1]) \
1166  c = LowWord(p); \
1167  AssignWord(d, HighWord(p)) \
1168  Squ_NonDiag \
1169 
1170 #define Squ_NonDiag \
1171  Double3Words(c, d)
1172 
1173 #define Squ_SaveAcc(k, i, j) \
1174  Acc3WordsBy2(c, d, e) \
1175  R[k] = c; \
1176  MultiplyWords(p, A[i], A[j]) \
1177  c = LowWord(p); \
1178  AssignWord(d, HighWord(p)) \
1179 
1180 #define Squ_Acc(i, j) \
1181  MulAcc(c, d, A[i], A[j])
1182 
1183 #define Squ_Diag(i) \
1184  Squ_NonDiag \
1185  MulAcc(c, d, A[i], A[i])
1186 
1187 #define Squ_End(n) \
1188  Acc3WordsBy2(c, d, e) \
1189  R[2*n-3] = c; \
1190  MultiplyWords(p, A[n-1], A[n-1])\
1191  Acc2WordsBy2(p, e) \
1192  R[2*n-2] = LowWord(p); \
1193  R[2*n-1] = HighWord(p);
1194 
1195 
1196 void Baseline_Multiply2(word *R, const word *AA, const word *BB)
1197 {
1198  // http://github.com/weidai11/cryptopp/issues/188
1199  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1200  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1201 
1202  Mul_2
1203 }
1204 
1205 void Baseline_Multiply4(word *R, const word *AA, const word *BB)
1206 {
1207  // http://github.com/weidai11/cryptopp/issues/188
1208  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1209  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1210 
1211  Mul_4
1212 }
1213 
1214 void Baseline_Multiply8(word *R, const word *AA, const word *BB)
1215 {
1216  // http://github.com/weidai11/cryptopp/issues/188
1217  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1218  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1219 
1220  Mul_8
1221 }
1222 
1223 void Baseline_Square2(word *R, const word *AA)
1224 {
1225  // http://github.com/weidai11/cryptopp/issues/188
1226  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1227 
1228  Squ_2
1229 }
1230 
1231 void Baseline_Square4(word *R, const word *AA)
1232 {
1233  // http://github.com/weidai11/cryptopp/issues/188
1234  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1235 
1236  Squ_4
1237 }
1238 
1239 void Baseline_Square8(word *R, const word *AA)
1240 {
1241  // http://github.com/weidai11/cryptopp/issues/188
1242  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1243 
1244  Squ_8
1245 }
1246 
1247 void Baseline_MultiplyBottom2(word *R, const word *AA, const word *BB)
1248 {
1249  // http://github.com/weidai11/cryptopp/issues/188
1250  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1251  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1252 
1253  Bot_2
1254 }
1255 
1256 void Baseline_MultiplyBottom4(word *R, const word *AA, const word *BB)
1257 {
1258  // http://github.com/weidai11/cryptopp/issues/188
1259  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1260  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1261 
1262  Bot_4
1263 }
1264 
1265 void Baseline_MultiplyBottom8(word *R, const word *AA, const word *BB)
1266 {
1267  // http://github.com/weidai11/cryptopp/issues/188
1268  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1269  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1270 
1271  Bot_8
1272 }
1273 
1274 #define Top_Begin(n) \
1275  Declare2Words(p) \
1276  word c; \
1277  Declare2Words(d) \
1278  MultiplyWords(p, A[0], B[n-2]);\
1279  AssignWord(d, HighWord(p));
1280 
1281 #define Top_Acc(i, j) \
1282  MultiplyWords(p, A[i], B[j]);\
1283  Acc2WordsBy1(d, HighWord(p));
1284 
1285 #define Top_SaveAcc0(i, j) \
1286  c = LowWord(d); \
1287  AssignWord(d, HighWord(d)) \
1288  MulAcc(c, d, A[i], B[j])
1289 
1290 #define Top_SaveAcc1(i, j) \
1291  c = L<c; \
1292  Acc2WordsBy1(d, c); \
1293  c = LowWord(d); \
1294  AssignWord(d, HighWord(d)) \
1295  MulAcc(c, d, A[i], B[j])
1296 
1297 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
1298 {
1299  CRYPTOPP_UNUSED(L);
1300  word T[4];
1301  Baseline_Multiply2(T, A, B);
1302  R[0] = T[2];
1303  R[1] = T[3];
1304 }
1305 
1306 void Baseline_MultiplyTop4(word *R, const word *AA, const word *BB, word L)
1307 {
1308  // http://github.com/weidai11/cryptopp/issues/188
1309  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1310  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1311 
1312  Top_Begin(4)
1313  Top_Acc(1, 1) Top_Acc(2, 0) \
1314  Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1315  Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1316  Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1317  Mul_End(1, 3)
1318 }
1319 
1320 void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L)
1321 {
1322  // http://github.com/weidai11/cryptopp/issues/188
1323  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1324  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1325 
1326  Top_Begin(8)
1327  Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1328  Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1329  Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1330  Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1331  Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1332  Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1333  Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1334  Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1335  Mul_End(5, 7)
1336 }
1337 
1338 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1339 void Baseline_Multiply16(word *R, const word *AA, const word *BB)
1340 {
1341  // http://github.com/weidai11/cryptopp/issues/188
1342  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1343  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1344 
1345  Mul_16
1346 }
1347 
1348 void Baseline_Square16(word *R, const word *AA)
1349 {
1350  // http://github.com/weidai11/cryptopp/issues/188
1351  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1352 
1353  Squ_16
1354 }
1355 
1356 void Baseline_MultiplyBottom16(word *R, const word *AA, const word *BB)
1357 {
1358  // http://github.com/weidai11/cryptopp/issues/188
1359  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1360  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1361 
1362  Bot_16
1363 }
1364 
1365 void Baseline_MultiplyTop16(word *R, const word *AA, const word *BB, word L)
1366 {
1367  // http://github.com/weidai11/cryptopp/issues/188
1368  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1369  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1370 
1371  Top_Begin(16)
1372  Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1373  Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1374  Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1375  Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1376  Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1377  Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1378  Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1379  Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1380  Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1381  Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1382  Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1383  Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1384  Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1385  Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1386  Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1387  Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1388  Mul_End(13, 15)
1389 }
1390 #endif
1391 
1392 // ********************************************************
1393 
1394 #if CRYPTOPP_INTEGER_SSE2
1395 
1396 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
1397 
1398 #undef Mul_Begin
1399 #undef Mul_Acc
1400 #undef Top_Begin
1401 #undef Top_Acc
1402 #undef Squ_Acc
1403 #undef Squ_NonDiag
1404 #undef Squ_Diag
1405 #undef Squ_SaveAcc
1406 #undef Squ_Begin
1407 #undef Mul_SaveAcc
1408 #undef Bot_Acc
1409 #undef Bot_SaveAcc
1410 #undef Bot_End
1411 #undef Squ_End
1412 #undef Mul_End
1413 
1414 #define SSE2_FinalSave(k) \
1415  AS2( psllq xmm5, 16) \
1416  AS2( paddq xmm4, xmm5) \
1417  AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1418 
1419 #define SSE2_SaveShift(k) \
1420  AS2( movq xmm0, xmm6) \
1421  AS2( punpckhqdq xmm6, xmm0) \
1422  AS2( movq xmm1, xmm7) \
1423  AS2( punpckhqdq xmm7, xmm1) \
1424  AS2( paddd xmm6, xmm0) \
1425  AS2( pslldq xmm6, 4) \
1426  AS2( paddd xmm7, xmm1) \
1427  AS2( paddd xmm4, xmm6) \
1428  AS2( pslldq xmm7, 4) \
1429  AS2( movq xmm6, xmm4) \
1430  AS2( paddd xmm5, xmm7) \
1431  AS2( movq xmm7, xmm5) \
1432  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1433  AS2( psrlq xmm6, 16) \
1434  AS2( paddq xmm6, xmm7) \
1435  AS2( punpckhqdq xmm4, xmm0) \
1436  AS2( punpckhqdq xmm5, xmm0) \
1437  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1438  AS2( psrlq xmm6, 3*16) \
1439  AS2( paddd xmm4, xmm6) \
1440 
1441 #define Squ_SSE2_SaveShift(k) \
1442  AS2( movq xmm0, xmm6) \
1443  AS2( punpckhqdq xmm6, xmm0) \
1444  AS2( movq xmm1, xmm7) \
1445  AS2( punpckhqdq xmm7, xmm1) \
1446  AS2( paddd xmm6, xmm0) \
1447  AS2( pslldq xmm6, 4) \
1448  AS2( paddd xmm7, xmm1) \
1449  AS2( paddd xmm4, xmm6) \
1450  AS2( pslldq xmm7, 4) \
1451  AS2( movhlps xmm6, xmm4) \
1452  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1453  AS2( paddd xmm5, xmm7) \
1454  AS2( movhps QWORD PTR [esp+12], xmm5)\
1455  AS2( psrlq xmm4, 16) \
1456  AS2( paddq xmm4, xmm5) \
1457  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1458  AS2( psrlq xmm4, 3*16) \
1459  AS2( paddd xmm4, xmm6) \
1460  AS2( movq QWORD PTR [esp+4], xmm4)\
1461 
1462 #define SSE2_FirstMultiply(i) \
1463  AS2( movdqa xmm7, [esi+(i)*16])\
1464  AS2( movdqa xmm5, [edi-(i)*16])\
1465  AS2( pmuludq xmm5, xmm7) \
1466  AS2( movdqa xmm4, [ebx])\
1467  AS2( movdqa xmm6, xmm4) \
1468  AS2( pand xmm4, xmm5) \
1469  AS2( psrld xmm5, 16) \
1470  AS2( pmuludq xmm7, [edx-(i)*16])\
1471  AS2( pand xmm6, xmm7) \
1472  AS2( psrld xmm7, 16)
1473 
1474 #define Squ_Begin(n) \
1475  SquPrologue \
1476  AS2( mov esi, esp)\
1477  AS2( and esp, 0xfffffff0)\
1478  AS2( lea edi, [esp-32*n])\
1479  AS2( sub esp, 32*n+16)\
1480  AS1( push esi)\
1481  AS2( mov esi, edi) \
1482  AS2( xor edx, edx) \
1483  ASL(1) \
1484  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1485  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1486  AS2( movdqa [edi+2*edx], xmm0) \
1487  AS2( psrlq xmm0, 32) \
1488  AS2( movdqa [edi+2*edx+16], xmm0) \
1489  AS2( movdqa [edi+16*n+2*edx], xmm1) \
1490  AS2( psrlq xmm1, 32) \
1491  AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1492  AS2( add edx, 16) \
1493  AS2( cmp edx, 8*(n)) \
1494  ASJ( jne, 1, b) \
1495  AS2( lea edx, [edi+16*n])\
1496  SSE2_FirstMultiply(0) \
1497 
1498 #define Squ_Acc(i) \
1499  ASL(LSqu##i) \
1500  AS2( movdqa xmm1, [esi+(i)*16]) \
1501  AS2( movdqa xmm0, [edi-(i)*16]) \
1502  AS2( movdqa xmm2, [ebx]) \
1503  AS2( pmuludq xmm0, xmm1) \
1504  AS2( pmuludq xmm1, [edx-(i)*16]) \
1505  AS2( movdqa xmm3, xmm2) \
1506  AS2( pand xmm2, xmm0) \
1507  AS2( psrld xmm0, 16) \
1508  AS2( paddd xmm4, xmm2) \
1509  AS2( paddd xmm5, xmm0) \
1510  AS2( pand xmm3, xmm1) \
1511  AS2( psrld xmm1, 16) \
1512  AS2( paddd xmm6, xmm3) \
1513  AS2( paddd xmm7, xmm1) \
1514 
1515 #define Squ_Acc1(i)
1516 #define Squ_Acc2(i) ASC(call, LSqu##i)
1517 #define Squ_Acc3(i) Squ_Acc2(i)
1518 #define Squ_Acc4(i) Squ_Acc2(i)
1519 #define Squ_Acc5(i) Squ_Acc2(i)
1520 #define Squ_Acc6(i) Squ_Acc2(i)
1521 #define Squ_Acc7(i) Squ_Acc2(i)
1522 #define Squ_Acc8(i) Squ_Acc2(i)
1523 
1524 #define SSE2_End(E, n) \
1525  SSE2_SaveShift(2*(n)-3) \
1526  AS2( movdqa xmm7, [esi+16]) \
1527  AS2( movdqa xmm0, [edi]) \
1528  AS2( pmuludq xmm0, xmm7) \
1529  AS2( movdqa xmm2, [ebx]) \
1530  AS2( pmuludq xmm7, [edx]) \
1531  AS2( movdqa xmm6, xmm2) \
1532  AS2( pand xmm2, xmm0) \
1533  AS2( psrld xmm0, 16) \
1534  AS2( paddd xmm4, xmm2) \
1535  AS2( paddd xmm5, xmm0) \
1536  AS2( pand xmm6, xmm7) \
1537  AS2( psrld xmm7, 16) \
1538  SSE2_SaveShift(2*(n)-2) \
1539  SSE2_FinalSave(2*(n)-1) \
1540  AS1( pop esp)\
1541  E
1542 
1543 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1544 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1545 #define Top_End(n) SSE2_End(TopEpilogue, n)
1546 
1547 #define Squ_Column1(k, i) \
1548  Squ_SSE2_SaveShift(k) \
1549  AS2( add esi, 16) \
1550  SSE2_FirstMultiply(1)\
1551  Squ_Acc##i(i) \
1552  AS2( paddd xmm4, xmm4) \
1553  AS2( paddd xmm5, xmm5) \
1554  AS2( movdqa xmm3, [esi]) \
1555  AS2( movq xmm1, QWORD PTR [esi+8]) \
1556  AS2( pmuludq xmm1, xmm3) \
1557  AS2( pmuludq xmm3, xmm3) \
1558  AS2( movdqa xmm0, [ebx])\
1559  AS2( movdqa xmm2, xmm0) \
1560  AS2( pand xmm0, xmm1) \
1561  AS2( psrld xmm1, 16) \
1562  AS2( paddd xmm6, xmm0) \
1563  AS2( paddd xmm7, xmm1) \
1564  AS2( pand xmm2, xmm3) \
1565  AS2( psrld xmm3, 16) \
1566  AS2( paddd xmm6, xmm6) \
1567  AS2( paddd xmm7, xmm7) \
1568  AS2( paddd xmm4, xmm2) \
1569  AS2( paddd xmm5, xmm3) \
1570  AS2( movq xmm0, QWORD PTR [esp+4])\
1571  AS2( movq xmm1, QWORD PTR [esp+12])\
1572  AS2( paddd xmm4, xmm0)\
1573  AS2( paddd xmm5, xmm1)\
1574 
1575 #define Squ_Column0(k, i) \
1576  Squ_SSE2_SaveShift(k) \
1577  AS2( add edi, 16) \
1578  AS2( add edx, 16) \
1579  SSE2_FirstMultiply(1)\
1580  Squ_Acc##i(i) \
1581  AS2( paddd xmm6, xmm6) \
1582  AS2( paddd xmm7, xmm7) \
1583  AS2( paddd xmm4, xmm4) \
1584  AS2( paddd xmm5, xmm5) \
1585  AS2( movq xmm0, QWORD PTR [esp+4])\
1586  AS2( movq xmm1, QWORD PTR [esp+12])\
1587  AS2( paddd xmm4, xmm0)\
1588  AS2( paddd xmm5, xmm1)\
1589 
1590 #define SSE2_MulAdd45 \
1591  AS2( movdqa xmm7, [esi]) \
1592  AS2( movdqa xmm0, [edi]) \
1593  AS2( pmuludq xmm0, xmm7) \
1594  AS2( movdqa xmm2, [ebx]) \
1595  AS2( pmuludq xmm7, [edx]) \
1596  AS2( movdqa xmm6, xmm2) \
1597  AS2( pand xmm2, xmm0) \
1598  AS2( psrld xmm0, 16) \
1599  AS2( paddd xmm4, xmm2) \
1600  AS2( paddd xmm5, xmm0) \
1601  AS2( pand xmm6, xmm7) \
1602  AS2( psrld xmm7, 16)
1603 
1604 #define Mul_Begin(n) \
1605  MulPrologue \
1606  AS2( mov esi, esp)\
1607  AS2( and esp, 0xfffffff0)\
1608  AS2( sub esp, 48*n+16)\
1609  AS1( push esi)\
1610  AS2( xor edx, edx) \
1611  ASL(1) \
1612  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1613  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1614  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1615  AS2( movdqa [esp+20+2*edx], xmm0) \
1616  AS2( psrlq xmm0, 32) \
1617  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1618  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1619  AS2( psrlq xmm1, 32) \
1620  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1621  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1622  AS2( psrlq xmm2, 32) \
1623  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1624  AS2( add edx, 16) \
1625  AS2( cmp edx, 8*(n)) \
1626  ASJ( jne, 1, b) \
1627  AS2( lea edi, [esp+20])\
1628  AS2( lea edx, [esp+20+16*n])\
1629  AS2( lea esi, [esp+20+32*n])\
1630  SSE2_FirstMultiply(0) \
1631 
1632 #define Mul_Acc(i) \
1633  ASL(LMul##i) \
1634  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1635  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1636  AS2( movdqa xmm2, [ebx]) \
1637  AS2( pmuludq xmm0, xmm1) \
1638  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1639  AS2( movdqa xmm3, xmm2) \
1640  AS2( pand xmm2, xmm0) \
1641  AS2( psrld xmm0, 16) \
1642  AS2( paddd xmm4, xmm2) \
1643  AS2( paddd xmm5, xmm0) \
1644  AS2( pand xmm3, xmm1) \
1645  AS2( psrld xmm1, 16) \
1646  AS2( paddd xmm6, xmm3) \
1647  AS2( paddd xmm7, xmm1) \
1648 
1649 #define Mul_Acc1(i)
1650 #define Mul_Acc2(i) ASC(call, LMul##i)
1651 #define Mul_Acc3(i) Mul_Acc2(i)
1652 #define Mul_Acc4(i) Mul_Acc2(i)
1653 #define Mul_Acc5(i) Mul_Acc2(i)
1654 #define Mul_Acc6(i) Mul_Acc2(i)
1655 #define Mul_Acc7(i) Mul_Acc2(i)
1656 #define Mul_Acc8(i) Mul_Acc2(i)
1657 #define Mul_Acc9(i) Mul_Acc2(i)
1658 #define Mul_Acc10(i) Mul_Acc2(i)
1659 #define Mul_Acc11(i) Mul_Acc2(i)
1660 #define Mul_Acc12(i) Mul_Acc2(i)
1661 #define Mul_Acc13(i) Mul_Acc2(i)
1662 #define Mul_Acc14(i) Mul_Acc2(i)
1663 #define Mul_Acc15(i) Mul_Acc2(i)
1664 #define Mul_Acc16(i) Mul_Acc2(i)
1665 
1666 #define Mul_Column1(k, i) \
1667  SSE2_SaveShift(k) \
1668  AS2( add esi, 16) \
1669  SSE2_MulAdd45\
1670  Mul_Acc##i(i) \
1671 
1672 #define Mul_Column0(k, i) \
1673  SSE2_SaveShift(k) \
1674  AS2( add edi, 16) \
1675  AS2( add edx, 16) \
1676  SSE2_MulAdd45\
1677  Mul_Acc##i(i) \
1678 
1679 #define Bot_Acc(i) \
1680  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1681  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1682  AS2( pmuludq xmm0, xmm1) \
1683  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1684  AS2( paddq xmm4, xmm0) \
1685  AS2( paddd xmm6, xmm1)
1686 
1687 #define Bot_SaveAcc(k) \
1688  SSE2_SaveShift(k) \
1689  AS2( add edi, 16) \
1690  AS2( add edx, 16) \
1691  AS2( movdqa xmm6, [esi]) \
1692  AS2( movdqa xmm0, [edi]) \
1693  AS2( pmuludq xmm0, xmm6) \
1694  AS2( paddq xmm4, xmm0) \
1695  AS2( psllq xmm5, 16) \
1696  AS2( paddq xmm4, xmm5) \
1697  AS2( pmuludq xmm6, [edx])
1698 
1699 #define Bot_End(n) \
1700  AS2( movhlps xmm7, xmm6) \
1701  AS2( paddd xmm6, xmm7) \
1702  AS2( psllq xmm6, 32) \
1703  AS2( paddd xmm4, xmm6) \
1704  AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1705  AS1( pop esp)\
1706  MulEpilogue
1707 
1708 #define Top_Begin(n) \
1709  TopPrologue \
1710  AS2( mov edx, esp)\
1711  AS2( and esp, 0xfffffff0)\
1712  AS2( sub esp, 48*n+16)\
1713  AS1( push edx)\
1714  AS2( xor edx, edx) \
1715  ASL(1) \
1716  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1717  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1718  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1719  AS2( movdqa [esp+20+2*edx], xmm0) \
1720  AS2( psrlq xmm0, 32) \
1721  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1722  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1723  AS2( psrlq xmm1, 32) \
1724  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1725  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1726  AS2( psrlq xmm2, 32) \
1727  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1728  AS2( add edx, 16) \
1729  AS2( cmp edx, 8*(n)) \
1730  ASJ( jne, 1, b) \
1731  AS2( mov eax, esi) \
1732  AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1733  AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1734  AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1735  AS2( pxor xmm4, xmm4)\
1736  AS2( pxor xmm5, xmm5)
1737 
1738 #define Top_Acc(i) \
1739  AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1740  AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1741  AS2( psrlq xmm0, 48) \
1742  AS2( paddd xmm5, xmm0)\
1743 
1744 #define Top_Column0(i) \
1745  AS2( psllq xmm5, 32) \
1746  AS2( add edi, 16) \
1747  AS2( add edx, 16) \
1748  SSE2_MulAdd45\
1749  Mul_Acc##i(i) \
1750 
1751 #define Top_Column1(i) \
1752  SSE2_SaveShift(0) \
1753  AS2( add esi, 16) \
1754  SSE2_MulAdd45\
1755  Mul_Acc##i(i) \
1756  AS2( shr eax, 16) \
1757  AS2( movd xmm0, eax)\
1758  AS2( movd xmm1, [ecx+4])\
1759  AS2( psrld xmm1, 16)\
1760  AS2( pcmpgtd xmm1, xmm0)\
1761  AS2( psrld xmm1, 31)\
1762  AS2( paddd xmm4, xmm1)\
1763 
1764 void SSE2_Square4(word *C, const word *A)
1765 {
1766  Squ_Begin(2)
1767  Squ_Column0(0, 1)
1768  Squ_End(2)
1769 }
1770 
1771 void SSE2_Square8(word *C, const word *A)
1772 {
1773  Squ_Begin(4)
1774 #ifndef __GNUC__
1775  ASJ( jmp, 0, f)
1776  Squ_Acc(2)
1777  AS1( ret) ASL(0)
1778 #endif
1779  Squ_Column0(0, 1)
1780  Squ_Column1(1, 1)
1781  Squ_Column0(2, 2)
1782  Squ_Column1(3, 1)
1783  Squ_Column0(4, 1)
1784  Squ_End(4)
1785 }
1786 
1787 void SSE2_Square16(word *C, const word *A)
1788 {
1789  Squ_Begin(8)
1790 #ifndef __GNUC__
1791  ASJ( jmp, 0, f)
1792  Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1793  AS1( ret) ASL(0)
1794 #endif
1795  Squ_Column0(0, 1)
1796  Squ_Column1(1, 1)
1797  Squ_Column0(2, 2)
1798  Squ_Column1(3, 2)
1799  Squ_Column0(4, 3)
1800  Squ_Column1(5, 3)
1801  Squ_Column0(6, 4)
1802  Squ_Column1(7, 3)
1803  Squ_Column0(8, 3)
1804  Squ_Column1(9, 2)
1805  Squ_Column0(10, 2)
1806  Squ_Column1(11, 1)
1807  Squ_Column0(12, 1)
1808  Squ_End(8)
1809 }
1810 
1811 void SSE2_Square32(word *C, const word *A)
1812 {
1813  Squ_Begin(16)
1814  ASJ( jmp, 0, f)
1815  Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1816  AS1( ret) ASL(0)
1817  Squ_Column0(0, 1)
1818  Squ_Column1(1, 1)
1819  Squ_Column0(2, 2)
1820  Squ_Column1(3, 2)
1821  Squ_Column0(4, 3)
1822  Squ_Column1(5, 3)
1823  Squ_Column0(6, 4)
1824  Squ_Column1(7, 4)
1825  Squ_Column0(8, 5)
1826  Squ_Column1(9, 5)
1827  Squ_Column0(10, 6)
1828  Squ_Column1(11, 6)
1829  Squ_Column0(12, 7)
1830  Squ_Column1(13, 7)
1831  Squ_Column0(14, 8)
1832  Squ_Column1(15, 7)
1833  Squ_Column0(16, 7)
1834  Squ_Column1(17, 6)
1835  Squ_Column0(18, 6)
1836  Squ_Column1(19, 5)
1837  Squ_Column0(20, 5)
1838  Squ_Column1(21, 4)
1839  Squ_Column0(22, 4)
1840  Squ_Column1(23, 3)
1841  Squ_Column0(24, 3)
1842  Squ_Column1(25, 2)
1843  Squ_Column0(26, 2)
1844  Squ_Column1(27, 1)
1845  Squ_Column0(28, 1)
1846  Squ_End(16)
1847 }
1848 
1849 void SSE2_Multiply4(word *C, const word *A, const word *B)
1850 {
1851  Mul_Begin(2)
1852 #ifndef __GNUC__
1853  ASJ( jmp, 0, f)
1854  Mul_Acc(2)
1855  AS1( ret) ASL(0)
1856 #endif
1857  Mul_Column0(0, 2)
1858  Mul_End(2)
1859 }
1860 
1861 void SSE2_Multiply8(word *C, const word *A, const word *B)
1862 {
1863  Mul_Begin(4)
1864 #ifndef __GNUC__
1865  ASJ( jmp, 0, f)
1866  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1867  AS1( ret) ASL(0)
1868 #endif
1869  Mul_Column0(0, 2)
1870  Mul_Column1(1, 3)
1871  Mul_Column0(2, 4)
1872  Mul_Column1(3, 3)
1873  Mul_Column0(4, 2)
1874  Mul_End(4)
1875 }
1876 
1877 void SSE2_Multiply16(word *C, const word *A, const word *B)
1878 {
1879  Mul_Begin(8)
1880 #ifndef __GNUC__
1881  ASJ( jmp, 0, f)
1882  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1883  AS1( ret) ASL(0)
1884 #endif
1885  Mul_Column0(0, 2)
1886  Mul_Column1(1, 3)
1887  Mul_Column0(2, 4)
1888  Mul_Column1(3, 5)
1889  Mul_Column0(4, 6)
1890  Mul_Column1(5, 7)
1891  Mul_Column0(6, 8)
1892  Mul_Column1(7, 7)
1893  Mul_Column0(8, 6)
1894  Mul_Column1(9, 5)
1895  Mul_Column0(10, 4)
1896  Mul_Column1(11, 3)
1897  Mul_Column0(12, 2)
1898  Mul_End(8)
1899 }
1900 
1901 void SSE2_Multiply32(word *C, const word *A, const word *B)
1902 {
1903  Mul_Begin(16)
1904  ASJ( jmp, 0, f)
1905  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1906  AS1( ret) ASL(0)
1907  Mul_Column0(0, 2)
1908  Mul_Column1(1, 3)
1909  Mul_Column0(2, 4)
1910  Mul_Column1(3, 5)
1911  Mul_Column0(4, 6)
1912  Mul_Column1(5, 7)
1913  Mul_Column0(6, 8)
1914  Mul_Column1(7, 9)
1915  Mul_Column0(8, 10)
1916  Mul_Column1(9, 11)
1917  Mul_Column0(10, 12)
1918  Mul_Column1(11, 13)
1919  Mul_Column0(12, 14)
1920  Mul_Column1(13, 15)
1921  Mul_Column0(14, 16)
1922  Mul_Column1(15, 15)
1923  Mul_Column0(16, 14)
1924  Mul_Column1(17, 13)
1925  Mul_Column0(18, 12)
1926  Mul_Column1(19, 11)
1927  Mul_Column0(20, 10)
1928  Mul_Column1(21, 9)
1929  Mul_Column0(22, 8)
1930  Mul_Column1(23, 7)
1931  Mul_Column0(24, 6)
1932  Mul_Column1(25, 5)
1933  Mul_Column0(26, 4)
1934  Mul_Column1(27, 3)
1935  Mul_Column0(28, 2)
1936  Mul_End(16)
1937 }
1938 
1939 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
1940 {
1941  Mul_Begin(2)
1942  Bot_SaveAcc(0) Bot_Acc(2)
1943  Bot_End(2)
1944 }
1945 
1946 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
1947 {
1948  Mul_Begin(4)
1949 #ifndef __GNUC__
1950  ASJ( jmp, 0, f)
1951  Mul_Acc(3) Mul_Acc(2)
1952  AS1( ret) ASL(0)
1953 #endif
1954  Mul_Column0(0, 2)
1955  Mul_Column1(1, 3)
1956  Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1957  Bot_End(4)
1958 }
1959 
1960 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
1961 {
1962  Mul_Begin(8)
1963 #ifndef __GNUC__
1964  ASJ( jmp, 0, f)
1965  Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1966  AS1( ret) ASL(0)
1967 #endif
1968  Mul_Column0(0, 2)
1969  Mul_Column1(1, 3)
1970  Mul_Column0(2, 4)
1971  Mul_Column1(3, 5)
1972  Mul_Column0(4, 6)
1973  Mul_Column1(5, 7)
1974  Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1975  Bot_End(8)
1976 }
1977 
1978 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
1979 {
1980  Mul_Begin(16)
1981 #ifndef __GNUC__
1982  ASJ( jmp, 0, f)
1983  Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1984  AS1( ret) ASL(0)
1985 #endif
1986  Mul_Column0(0, 2)
1987  Mul_Column1(1, 3)
1988  Mul_Column0(2, 4)
1989  Mul_Column1(3, 5)
1990  Mul_Column0(4, 6)
1991  Mul_Column1(5, 7)
1992  Mul_Column0(6, 8)
1993  Mul_Column1(7, 9)
1994  Mul_Column0(8, 10)
1995  Mul_Column1(9, 11)
1996  Mul_Column0(10, 12)
1997  Mul_Column1(11, 13)
1998  Mul_Column0(12, 14)
1999  Mul_Column1(13, 15)
2000  Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2001  Bot_End(16)
2002 }
2003 
2004 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
2005 {
2006  Top_Begin(4)
2007  Top_Acc(3) Top_Acc(2) Top_Acc(1)
2008 #ifndef __GNUC__
2009  ASJ( jmp, 0, f)
2010  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2011  AS1( ret) ASL(0)
2012 #endif
2013  Top_Column0(4)
2014  Top_Column1(3)
2015  Mul_Column0(0, 2)
2016  Top_End(2)
2017 }
2018 
2019 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
2020 {
2021  Top_Begin(8)
2022  Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2023 #ifndef __GNUC__
2024  ASJ( jmp, 0, f)
2025  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2026  AS1( ret) ASL(0)
2027 #endif
2028  Top_Column0(8)
2029  Top_Column1(7)
2030  Mul_Column0(0, 6)
2031  Mul_Column1(1, 5)
2032  Mul_Column0(2, 4)
2033  Mul_Column1(3, 3)
2034  Mul_Column0(4, 2)
2035  Top_End(4)
2036 }
2037 
2038 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
2039 {
2040  Top_Begin(16)
2041  Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2042 #ifndef __GNUC__
2043  ASJ( jmp, 0, f)
2044  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2045  AS1( ret) ASL(0)
2046 #endif
2047  Top_Column0(16)
2048  Top_Column1(15)
2049  Mul_Column0(0, 14)
2050  Mul_Column1(1, 13)
2051  Mul_Column0(2, 12)
2052  Mul_Column1(3, 11)
2053  Mul_Column0(4, 10)
2054  Mul_Column1(5, 9)
2055  Mul_Column0(6, 8)
2056  Mul_Column1(7, 7)
2057  Mul_Column0(8, 6)
2058  Mul_Column1(9, 5)
2059  Mul_Column0(10, 4)
2060  Mul_Column1(11, 3)
2061  Mul_Column0(12, 2)
2062  Top_End(8)
2063 }
2064 
2065 #endif // #if CRYPTOPP_INTEGER_SSE2
2066 
2067 // ********************************************************
2068 
2069 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
2070 typedef void (* PMul)(word *C, const word *A, const word *B);
2071 typedef void (* PSqu)(word *C, const word *A);
2072 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
2073 
2074 #if CRYPTOPP_INTEGER_SSE2
2075 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2076 static size_t s_recursionLimit = 8;
2077 #else
2078 static const size_t s_recursionLimit = 16;
2079 #endif
2080 
2081 static PMul s_pMul[9], s_pBot[9];
2082 static PSqu s_pSqu[9];
2083 static PMulTop s_pTop[9];
2084 
2085 static void SetFunctionPointers()
2086 {
2087  s_pMul[0] = &Baseline_Multiply2;
2088  s_pBot[0] = &Baseline_MultiplyBottom2;
2089  s_pSqu[0] = &Baseline_Square2;
2090  s_pTop[0] = &Baseline_MultiplyTop2;
2091  s_pTop[1] = &Baseline_MultiplyTop4;
2092 
2093 #if CRYPTOPP_INTEGER_SSE2
2094  if (HasSSE2())
2095  {
2096 #if _MSC_VER != 1200 || !(CRYPTOPP_DEBUG)
2097  if (IsP4())
2098  {
2099  s_pAdd = &SSE2_Add;
2100  s_pSub = &SSE2_Sub;
2101  }
2102 #endif
2103 
2104  s_recursionLimit = 32;
2105 
2106  s_pMul[1] = &SSE2_Multiply4;
2107  s_pMul[2] = &SSE2_Multiply8;
2108  s_pMul[4] = &SSE2_Multiply16;
2109  s_pMul[8] = &SSE2_Multiply32;
2110 
2111  s_pBot[1] = &SSE2_MultiplyBottom4;
2112  s_pBot[2] = &SSE2_MultiplyBottom8;
2113  s_pBot[4] = &SSE2_MultiplyBottom16;
2114  s_pBot[8] = &SSE2_MultiplyBottom32;
2115 
2116  s_pSqu[1] = &SSE2_Square4;
2117  s_pSqu[2] = &SSE2_Square8;
2118  s_pSqu[4] = &SSE2_Square16;
2119  s_pSqu[8] = &SSE2_Square32;
2120 
2121  s_pTop[2] = &SSE2_MultiplyTop8;
2122  s_pTop[4] = &SSE2_MultiplyTop16;
2123  s_pTop[8] = &SSE2_MultiplyTop32;
2124  }
2125  else
2126 #endif
2127  {
2128  s_pMul[1] = &Baseline_Multiply4;
2129  s_pMul[2] = &Baseline_Multiply8;
2130 
2131  s_pBot[1] = &Baseline_MultiplyBottom4;
2132  s_pBot[2] = &Baseline_MultiplyBottom8;
2133 
2134  s_pSqu[1] = &Baseline_Square4;
2135  s_pSqu[2] = &Baseline_Square8;
2136 
2137  s_pTop[2] = &Baseline_MultiplyTop8;
2138 
2139 #if !CRYPTOPP_INTEGER_SSE2
2140  s_pMul[4] = &Baseline_Multiply16;
2141  s_pBot[4] = &Baseline_MultiplyBottom16;
2142  s_pSqu[4] = &Baseline_Square16;
2143  s_pTop[4] = &Baseline_MultiplyTop16;
2144 #endif
2145  }
2146 }
2147 
2148 inline int Add(word *C, const word *A, const word *B, size_t N)
2149 {
2150 #if CRYPTOPP_INTEGER_SSE2
2151  return s_pAdd(N, C, A, B);
2152 #else
2153  return Baseline_Add(N, C, A, B);
2154 #endif
2155 }
2156 
2157 inline int Subtract(word *C, const word *A, const word *B, size_t N)
2158 {
2159 #if CRYPTOPP_INTEGER_SSE2
2160  return s_pSub(N, C, A, B);
2161 #else
2162  return Baseline_Sub(N, C, A, B);
2163 #endif
2164 }
2165 
2166 // ********************************************************
2167 
2168 
2169 #define A0 A
2170 #define A1 (A+N2)
2171 #define B0 B
2172 #define B1 (B+N2)
2173 
2174 #define T0 T
2175 #define T1 (T+N2)
2176 #define T2 (T+N)
2177 #define T3 (T+N+N2)
2178 
2179 #define R0 R
2180 #define R1 (R+N2)
2181 #define R2 (R+N)
2182 #define R3 (R+N+N2)
2183 
2184 // R[2*N] - result = A*B
2185 // T[2*N] - temporary work space
2186 // A[N] --- multiplier
2187 // B[N] --- multiplicant
2188 
2189 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
2190 {
2191  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2192 
2193  if (N <= s_recursionLimit)
2194  s_pMul[N/4](R, A, B);
2195  else
2196  {
2197  const size_t N2 = N/2;
2198 
2199  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2200  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2201 
2202  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2203  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2204 
2205  RecursiveMultiply(R2, T2, A1, B1, N2);
2206  RecursiveMultiply(T0, T2, R0, R1, N2);
2207  RecursiveMultiply(R0, T2, A0, B0, N2);
2208 
2209  // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
2210 
2211  int c2 = Add(R2, R2, R1, N2);
2212  int c3 = c2;
2213  c2 += Add(R1, R2, R0, N2);
2214  c3 += Add(R2, R2, R3, N2);
2215 
2216  if (AN2 == BN2)
2217  c3 -= Subtract(R1, R1, T0, N);
2218  else
2219  c3 += Add(R1, R1, T0, N);
2220 
2221  c3 += Increment(R2, N2, c2);
2222  CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2223  Increment(R3, N2, c3);
2224  }
2225 }
2226 
2227 // R[2*N] - result = A*A
2228 // T[2*N] - temporary work space
2229 // A[N] --- number to be squared
2230 
2231 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
2232 {
2233  CRYPTOPP_ASSERT(N && N%2==0);
2234 
2235  if (N <= s_recursionLimit)
2236  s_pSqu[N/4](R, A);
2237  else
2238  {
2239  const size_t N2 = N/2;
2240 
2241  RecursiveSquare(R0, T2, A0, N2);
2242  RecursiveSquare(R2, T2, A1, N2);
2243  RecursiveMultiply(T0, T2, A0, A1, N2);
2244 
2245  int carry = Add(R1, R1, T0, N);
2246  carry += Add(R1, R1, T0, N);
2247  Increment(R3, N2, carry);
2248  }
2249 }
2250 
2251 // R[N] - bottom half of A*B
2252 // T[3*N/2] - temporary work space
2253 // A[N] - multiplier
2254 // B[N] - multiplicant
2255 
2256 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2257 {
2258  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2259 
2260  if (N <= s_recursionLimit)
2261  s_pBot[N/4](R, A, B);
2262  else
2263  {
2264  const size_t N2 = N/2;
2265 
2266  RecursiveMultiply(R, T, A0, B0, N2);
2267  RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2268  Add(R1, R1, T0, N2);
2269  RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2270  Add(R1, R1, T0, N2);
2271  }
2272 }
2273 
2274 // R[N] --- upper half of A*B
2275 // T[2*N] - temporary work space
2276 // L[N] --- lower half of A*B
2277 // A[N] --- multiplier
2278 // B[N] --- multiplicant
2279 
2280 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
2281 {
2282  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2283 
2284  if (N <= s_recursionLimit)
2285  s_pTop[N/4](R, A, B, L[N-1]);
2286  else
2287  {
2288  const size_t N2 = N/2;
2289 
2290  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2291  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2292 
2293  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2294  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2295 
2296  RecursiveMultiply(T0, T2, R0, R1, N2);
2297  RecursiveMultiply(R0, T2, A1, B1, N2);
2298 
2299  // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
2300 
2301  int t, c3;
2302  int c2 = Subtract(T2, L+N2, L, N2);
2303 
2304  if (AN2 == BN2)
2305  {
2306  c2 -= Add(T2, T2, T0, N2);
2307  t = (Compare(T2, R0, N2) == -1);
2308  c3 = t - Subtract(T2, T2, T1, N2);
2309  }
2310  else
2311  {
2312  c2 += Subtract(T2, T2, T0, N2);
2313  t = (Compare(T2, R0, N2) == -1);
2314  c3 = t + Add(T2, T2, T1, N2);
2315  }
2316 
2317  c2 += t;
2318  if (c2 >= 0)
2319  c3 += Increment(T2, N2, c2);
2320  else
2321  c3 -= Decrement(T2, N2, -c2);
2322  c3 += Add(R0, T2, R1, N2);
2323 
2324  CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2325  Increment(R1, N2, c3);
2326  }
2327 }
2328 
2329 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
2330 {
2331  RecursiveMultiply(R, T, A, B, N);
2332 }
2333 
2334 inline void Square(word *R, word *T, const word *A, size_t N)
2335 {
2336  RecursiveSquare(R, T, A, N);
2337 }
2338 
2339 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2340 {
2341  RecursiveMultiplyBottom(R, T, A, B, N);
2342 }
2343 
2344 // R[NA+NB] - result = A*B
2345 // T[NA+NB] - temporary work space
2346 // A[NA] ---- multiplier
2347 // B[NB] ---- multiplicant
2348 
2349 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
2350 {
2351  if (NA == NB)
2352  {
2353  // Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
2354  // The code change occurred at Commit dc99266599a0e72d.
2355  if (A != B)
2356  Multiply(R, T, A, B, NA);
2357  else
2358  Square(R, T, A, NA);
2359 
2360  return;
2361  }
2362 
2363  if (NA > NB)
2364  {
2365  std::swap(A, B);
2366  std::swap(NA, NB);
2367  }
2368 
2369  CRYPTOPP_ASSERT(NB % NA == 0);
2370 
2371  if (NA==2 && !A[1])
2372  {
2373  // Profiling tells us the original Default case was dominant, so it was promoted to the first Case statement.
2374  // The code change occurred at Commit dc99266599a0e72d.
2375  switch (A[0])
2376  {
2377  default:
2378  R[NB] = LinearMultiply(R, B, A[0], NB);
2379  R[NB+1] = 0;
2380  return;
2381  case 0:
2382  SetWords(R, 0, NB+2);
2383  return;
2384  case 1:
2385  CopyWords(R, B, NB);
2386  R[NB] = R[NB+1] = 0;
2387  return;
2388  }
2389  }
2390 
2391  size_t i;
2392  if ((NB/NA)%2 == 0)
2393  {
2394  Multiply(R, T, A, B, NA);
2395  CopyWords(T+2*NA, R+NA, NA);
2396 
2397  for (i=2*NA; i<NB; i+=2*NA)
2398  Multiply(T+NA+i, T, A, B+i, NA);
2399  for (i=NA; i<NB; i+=2*NA)
2400  Multiply(R+i, T, A, B+i, NA);
2401  }
2402  else
2403  {
2404  for (i=0; i<NB; i+=2*NA)
2405  Multiply(R+i, T, A, B+i, NA);
2406  for (i=NA; i<NB; i+=2*NA)
2407  Multiply(T+NA+i, T, A, B+i, NA);
2408  }
2409 
2410  if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2411  Increment(R+NB, NA);
2412 }
2413 
2414 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
2415 // T[3*N/2] - temporary work space
2416 // A[N] ----- an odd number as input
2417 
2418 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
2419 {
2420  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
2421  // The code change occurred at Commit dc99266599a0e72d.
2422  if (N!=2)
2423  {
2424  const size_t N2 = N/2;
2425  RecursiveInverseModPower2(R0, T0, A0, N2);
2426  T0[0] = 1;
2427  SetWords(T0+1, 0, N2-1);
2428  MultiplyTop(R1, T1, T0, R0, A0, N2);
2429  MultiplyBottom(T0, T1, R0, A1, N2);
2430  Add(T0, R1, T0, N2);
2431  TwosComplement(T0, N2);
2432  MultiplyBottom(R1, T1, R0, T0, N2);
2433  }
2434  else
2435  {
2436  T[0] = AtomicInverseModPower2(A[0]);
2437  T[1] = 0;
2438  s_pBot[0](T+2, T, A);
2439  TwosComplement(T+2, 2);
2440  Increment(T+2, 2, 2);
2441  s_pBot[0](R, T, T+2);
2442  }
2443 }
2444 
2445 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
2446 // T[3*N] - temporary work space
2447 // X[2*N] - number to be reduced
2448 // M[N] --- modulus
2449 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
2450 
2451 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
2452 {
2453 #if 1
2454  MultiplyBottom(R, T, X, U, N);
2455  MultiplyTop(T, T+N, X, R, M, N);
2456  word borrow = Subtract(T, X+N, T, N);
2457  // defend against timing attack by doing this Add even when not needed
2458  word carry = Add(T+N, T, M, N);
2459  CRYPTOPP_ASSERT(carry | !borrow);
2460  CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2461  CopyWords(R, T + ((0-borrow) & N), N);
2462 #elif 0
2463  const word u = 0-U[0];
2464  Declare2Words(p)
2465  for (size_t i=0; i<N; i++)
2466  {
2467  const word t = u * X[i];
2468  word c = 0;
2469  for (size_t j=0; j<N; j+=2)
2470  {
2471  MultiplyWords(p, t, M[j]);
2472  Acc2WordsBy1(p, X[i+j]);
2473  Acc2WordsBy1(p, c);
2474  X[i+j] = LowWord(p);
2475  c = HighWord(p);
2476  MultiplyWords(p, t, M[j+1]);
2477  Acc2WordsBy1(p, X[i+j+1]);
2478  Acc2WordsBy1(p, c);
2479  X[i+j+1] = LowWord(p);
2480  c = HighWord(p);
2481  }
2482 
2483  if (Increment(X+N+i, N-i, c))
2484  while (!Subtract(X+N, X+N, M, N)) {}
2485  }
2486 
2487  memcpy(R, X+N, N*WORD_SIZE);
2488 #else
2489  __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2490  for (size_t i=0; i<N; i++)
2491  {
2492  __m64 t = _mm_cvtsi32_si64(X[i]);
2493  t = _mm_mul_su32(t, u);
2494  __m64 c = _mm_setzero_si64();
2495  for (size_t j=0; j<N; j+=2)
2496  {
2497  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2498  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2499  c = _mm_add_si64(c, p);
2500  X[i+j] = _mm_cvtsi64_si32(c);
2501  c = _mm_srli_si64(c, 32);
2502  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2503  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2504  c = _mm_add_si64(c, p);
2505  X[i+j+1] = _mm_cvtsi64_si32(c);
2506  c = _mm_srli_si64(c, 32);
2507  }
2508 
2509  if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2510  while (!Subtract(X+N, X+N, M, N)) {}
2511  }
2512 
2513  memcpy(R, X+N, N*WORD_SIZE);
2514  _mm_empty();
2515 #endif
2516 }
2517 
2518 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
2519 // T[2*N] - temporary work space
2520 // X[2*N] - number to be reduced
2521 // M[N] --- modulus
2522 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
2523 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
2524 
2525 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
2526 {
2527  CRYPTOPP_ASSERT(N%2==0 && N>=4);
2528 
2529 #define M0 M
2530 #define M1 (M+N2)
2531 #define V0 V
2532 #define V1 (V+N2)
2533 
2534 #define X0 X
2535 #define X1 (X+N2)
2536 #define X2 (X+N)
2537 #define X3 (X+N+N2)
2538 
2539  const size_t N2 = N/2;
2540  Multiply(T0, T2, V0, X3, N2);
2541  int c2 = Add(T0, T0, X0, N);
2542  MultiplyBottom(T3, T2, T0, U, N2);
2543  MultiplyTop(T2, R, T0, T3, M0, N2);
2544  c2 -= Subtract(T2, T1, T2, N2);
2545  Multiply(T0, R, T3, M1, N2);
2546  c2 -= Subtract(T0, T2, T0, N2);
2547  int c3 = -(int)Subtract(T1, X2, T1, N2);
2548  Multiply(R0, T2, V1, X3, N2);
2549  c3 += Add(R, R, T, N);
2550 
2551  if (c2>0)
2552  c3 += Increment(R1, N2);
2553  else if (c2<0)
2554  c3 -= Decrement(R1, N2, -c2);
2555 
2556  CRYPTOPP_ASSERT(c3>=-1 && c3<=1);
2557  if (c3>0)
2558  Subtract(R, R, M, N);
2559  else if (c3<0)
2560  Add(R, R, M, N);
2561 
2562 #undef M0
2563 #undef M1
2564 #undef V0
2565 #undef V1
2566 
2567 #undef X0
2568 #undef X1
2569 #undef X2
2570 #undef X3
2571 }
2572 
2573 #undef A0
2574 #undef A1
2575 #undef B0
2576 #undef B1
2577 
2578 #undef T0
2579 #undef T1
2580 #undef T2
2581 #undef T3
2582 
2583 #undef R0
2584 #undef R1
2585 #undef R2
2586 #undef R3
2587 
2588 /*
2589 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2590 static word SubatomicDivide(word *A, word B0, word B1)
2591 {
2592  // CRYPTOPP_ASSERT {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2593  CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
2594 
2595  // estimate the quotient: do a 2 word by 1 word divide
2596  word Q;
2597  if (B1+1 == 0)
2598  Q = A[2];
2599  else
2600  Q = DWord(A[1], A[2]).DividedBy(B1+1);
2601 
2602  // now subtract Q*B from A
2603  DWord p = DWord::Multiply(B0, Q);
2604  DWord u = (DWord) A[0] - p.GetLowHalf();
2605  A[0] = u.GetLowHalf();
2606  u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2607  A[1] = u.GetLowHalf();
2608  A[2] += u.GetHighHalf();
2609 
2610  // Q <= actual quotient, so fix it
2611  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
2612  {
2613  u = (DWord) A[0] - B0;
2614  A[0] = u.GetLowHalf();
2615  u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2616  A[1] = u.GetLowHalf();
2617  A[2] += u.GetHighHalf();
2618  Q++;
2619  CRYPTOPP_ASSERT(Q); // shouldn't overflow
2620  }
2621 
2622  return Q;
2623 }
2624 
2625 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2626 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2627 {
2628  if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
2629  {
2630  Q[0] = A[2];
2631  Q[1] = A[3];
2632  }
2633  else
2634  {
2635  word T[4];
2636  T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2637  Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2638  Q[0] = SubatomicDivide(T, B[0], B[1]);
2639 
2640 #if CRYPTOPP_DEBUG
2641  // multiply quotient and divisor and add remainder, make sure it equals dividend
2642  CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2643  word P[4];
2644  LowLevel::Multiply2(P, Q, B);
2645  Add(P, P, T, 4);
2646  CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2647 #endif
2648  }
2649 }
2650 */
2651 
2652 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2653 {
2654  word T[4];
2655  DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2656  Q[0] = q.GetLowHalf();
2657  Q[1] = q.GetHighHalf();
2658 
2659 #if CRYPTOPP_DEBUG
2660  if (B[0] || B[1])
2661  {
2662  // multiply quotient and divisor and add remainder, make sure it equals dividend
2663  CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2664  word P[4];
2665  s_pMul[0](P, Q, B);
2666  Add(P, P, T, 4);
2667  CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2668  }
2669 #endif
2670 }
2671 
2672 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
2673 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
2674 {
2675  CRYPTOPP_ASSERT(N && N%2==0);
2676 
2677  AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2678 
2679  word borrow = Subtract(R, R, T, N+2);
2680  CRYPTOPP_ASSERT(!borrow && !R[N+1]);
2681  CRYPTOPP_UNUSED(borrow);
2682 
2683  while (R[N] || Compare(R, B, N) >= 0)
2684  {
2685  R[N] -= Subtract(R, R, B, N);
2686  Q[1] += (++Q[0]==0);
2687  CRYPTOPP_ASSERT(Q[0] || Q[1]); // no overflow
2688  }
2689 }
2690 
2691 // R[NB] -------- remainder = A%B
2692 // Q[NA-NB+2] --- quotient = A/B
2693 // T[NA+3*(NB+2)] - temp work space
2694 // A[NA] -------- dividend
2695 // B[NB] -------- divisor
2696 
2697 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
2698 {
2699  CRYPTOPP_ASSERT(NA && NB && NA%2==0 && NB%2==0);
2700  CRYPTOPP_ASSERT(B[NB-1] || B[NB-2]);
2701  CRYPTOPP_ASSERT(NB <= NA);
2702 
2703  // set up temporary work space
2704  word *const TA=T;
2705  word *const TB=T+NA+2;
2706  word *const TP=T+NA+2+NB;
2707 
2708  // copy B into TB and normalize it so that TB has highest bit set to 1
2709  unsigned shiftWords = (B[NB-1]==0);
2710  TB[0] = TB[NB-1] = 0;
2711  CopyWords(TB+shiftWords, B, NB-shiftWords);
2712  unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2713  CRYPTOPP_ASSERT(shiftBits < WORD_BITS);
2714  ShiftWordsLeftByBits(TB, NB, shiftBits);
2715 
2716  // copy A into TA and normalize it
2717  TA[0] = TA[NA] = TA[NA+1] = 0;
2718  CopyWords(TA+shiftWords, A, NA);
2719  ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2720 
2721  if (TA[NA+1]==0 && TA[NA] <= 1)
2722  {
2723  Q[NA-NB+1] = Q[NA-NB] = 0;
2724  while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2725  {
2726  TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2727  ++Q[NA-NB];
2728  }
2729  }
2730  else
2731  {
2732  NA+=2;
2733  CRYPTOPP_ASSERT(Compare(TA+NA-NB, TB, NB) < 0);
2734  }
2735 
2736  word BT[2];
2737  BT[0] = TB[NB-2] + 1;
2738  BT[1] = TB[NB-1] + (BT[0]==0);
2739 
2740  // start reducing TA mod TB, 2 words at a time
2741  for (size_t i=NA-2; i>=NB; i-=2)
2742  {
2743  AtomicDivide(Q+i-NB, TA+i-2, BT);
2744  CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2745  }
2746 
2747  // copy TA into R, and denormalize it
2748  CopyWords(R, TA+shiftWords, NB);
2749  ShiftWordsRightByBits(R, NB, shiftBits);
2750 }
2751 
2752 static inline size_t EvenWordCount(const word *X, size_t N)
2753 {
2754  while (N && X[N-2]==0 && X[N-1]==0)
2755  N-=2;
2756  return N;
2757 }
2758 
2759 // return k
2760 // R[N] --- result = A^(-1) * 2^k mod M
2761 // T[4*N] - temporary work space
2762 // A[NA] -- number to take inverse of
2763 // M[N] --- modulus
2764 
2765 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
2766 {
2767  CRYPTOPP_ASSERT(NA<=N && N && N%2==0);
2768 
2769  word *b = T;
2770  word *c = T+N;
2771  word *f = T+2*N;
2772  word *g = T+3*N;
2773  size_t bcLen=2, fgLen=EvenWordCount(M, N);
2774  unsigned int k=0;
2775  bool s=false;
2776 
2777  SetWords(T, 0, 3*N);
2778  b[0]=1;
2779  CopyWords(f, A, NA);
2780  CopyWords(g, M, N);
2781 
2782  while (1)
2783  {
2784  word t=f[0];
2785  while (!t)
2786  {
2787  if (EvenWordCount(f, fgLen)==0)
2788  {
2789  SetWords(R, 0, N);
2790  return 0;
2791  }
2792 
2793  ShiftWordsRightByWords(f, fgLen, 1);
2794  bcLen += 2 * (c[bcLen-1] != 0);
2795  CRYPTOPP_ASSERT(bcLen <= N);
2796  ShiftWordsLeftByWords(c, bcLen, 1);
2797  k+=WORD_BITS;
2798  t=f[0];
2799  }
2800 
2801  // t must be non-0; otherwise undefined.
2802  unsigned int i = TrailingZeros(t);
2803  t >>= i;
2804  k += i;
2805 
2806  if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2807  {
2808  if (s)
2809  Subtract(R, M, b, N);
2810  else
2811  CopyWords(R, b, N);
2812  return k;
2813  }
2814 
2815  ShiftWordsRightByBits(f, fgLen, i);
2816  t = ShiftWordsLeftByBits(c, bcLen, i);
2817  c[bcLen] += t;
2818  bcLen += 2 * (t!=0);
2819  CRYPTOPP_ASSERT(bcLen <= N);
2820 
2821  bool swap = Compare(f, g, fgLen)==-1;
2822  ConditionalSwapPointers(swap, f, g);
2823  ConditionalSwapPointers(swap, b, c);
2824  s ^= swap;
2825 
2826  fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2827 
2828  Subtract(f, f, g, fgLen);
2829  t = Add(b, b, c, bcLen);
2830  b[bcLen] += t;
2831  bcLen += 2*t;
2832  CRYPTOPP_ASSERT(bcLen <= N);
2833  }
2834 }
2835 
2836 // R[N] - result = A/(2^k) mod M
2837 // A[N] - input
2838 // M[N] - modulus
2839 
2840 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2841 {
2842  CopyWords(R, A, N);
2843 
2844  while (k--)
2845  {
2846  if (R[0]%2==0)
2847  ShiftWordsRightByBits(R, N, 1);
2848  else
2849  {
2850  word carry = Add(R, R, M, N);
2851  ShiftWordsRightByBits(R, N, 1);
2852  R[N-1] += carry<<(WORD_BITS-1);
2853  }
2854  }
2855 }
2856 
2857 // R[N] - result = A*(2^k) mod M
2858 // A[N] - input
2859 // M[N] - modulus
2860 
2861 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2862 {
2863  CopyWords(R, A, N);
2864 
2865  while (k--)
2866  if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2867  Subtract(R, R, M, N);
2868 }
2869 
2870 // ******************************************************************
2871 
2872 InitializeInteger::InitializeInteger()
2873 {
2874  if (!g_pAssignIntToInteger)
2875  {
2876  SetFunctionPointers();
2877  g_pAssignIntToInteger = (CryptoPP::PAssignIntToInteger)AssignIntToInteger;
2878  }
2879 }
2880 
2881 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2882 
2883 static inline size_t RoundupSize(size_t n)
2884 {
2885  if (n<=8)
2886  return RoundupSizeTable[n];
2887  else if (n<=16)
2888  return 16;
2889  else if (n<=32)
2890  return 32;
2891  else if (n<=64)
2892  return 64;
2893  else
2894  return size_t(1) << BitPrecision(n-1);
2895 }
2896 
2898  : reg(2), sign(POSITIVE)
2899 {
2900  reg[0] = reg[1] = 0;
2901 }
2902 
2904  : reg(RoundupSize(t.WordCount())), sign(t.sign)
2905 {
2906  CopyWords(reg, t.reg, reg.size());
2907 }
2908 
2909 Integer::Integer(Sign s, lword value)
2910  : reg(2), sign(s)
2911 {
2912  reg[0] = word(value);
2913  reg[1] = word(SafeRightShift<WORD_BITS>(value));
2914 }
2915 
2916 Integer::Integer(signed long value)
2917  : reg(2)
2918 {
2919  if (value >= 0)
2920  sign = POSITIVE;
2921  else
2922  {
2923  sign = NEGATIVE;
2924  value = -value;
2925  }
2926  reg[0] = word(value);
2927  reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2928 }
2929 
2930 Integer::Integer(Sign s, word high, word low)
2931  : reg(2), sign(s)
2932 {
2933  reg[0] = low;
2934  reg[1] = high;
2935 }
2936 
2938 {
2939  if (ByteCount() > sizeof(long))
2940  return false;
2941 
2942  unsigned long value = (unsigned long)reg[0];
2943  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2944 
2945  if (sign==POSITIVE)
2946  return (signed long)value >= 0;
2947  else
2948  return -(signed long)value < 0;
2949 }
2950 
2951 signed long Integer::ConvertToLong() const
2952 {
2954 
2955  unsigned long value = (unsigned long)reg[0];
2956  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2957  return sign==POSITIVE ? value : -(signed long)value;
2958 }
2959 
2960 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2961 {
2963 
2964  if (o == LITTLE_ENDIAN_ORDER)
2965  {
2966  SecByteBlock block(byteCount);
2967  encodedInteger.Get(block, block.size());
2968  std::reverse(block.begin(), block.begin()+block.size());
2969 
2970  Decode(block.begin(), block.size(), s);
2971  return;
2972  }
2973 
2974  Decode(encodedInteger, byteCount, s);
2975 }
2976 
2977 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2978 {
2980 
2981  if (o == LITTLE_ENDIAN_ORDER)
2982  {
2983  SecByteBlock block(byteCount);
2984 #if (CRYPTOPP_MSC_VERSION >= 1400)
2985  std::reverse_copy(encodedInteger, encodedInteger+byteCount,
2986  stdext::make_checked_array_iterator(block.begin(), block.size()));
2987 #else
2988  std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
2989 #endif
2990  Decode(block.begin(), block.size(), s);
2991  return;
2992  }
2993 
2994  Decode(encodedInteger, byteCount, s);
2995 }
2996 
2998 {
2999  BERDecode(bt);
3000 }
3001 
3003 {
3004  Randomize(rng, bitcount);
3005 }
3006 
3007 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3008 {
3009  if (!Randomize(rng, min, max, rnType, equiv, mod))
3011 }
3012 
3014 {
3015  Integer r((word)0, BitsToWords(e+1));
3016  r.SetBit(e);
3017  return r;
3018 }
3019 
3020 template <long i>
3022 {
3023  Integer * operator()() const
3024  {
3025  return new Integer(i);
3026  }
3027 };
3028 
3029 // File scope static due to subtle initialization problems in a threaded
3030 // Windows environment. See the comments for Singleton. Thanks DB.
3031 namespace { const Integer& s_zero = Singleton<Integer>().Ref(); }
3033 {
3034  return s_zero;
3035 }
3036 
3037 // File scope static due to subtle initialization problems in a threaded
3038 // Windows environment. See the comments for Singleton. Thanks DB.
3039 namespace { const Integer& s_one = Singleton<Integer, NewInteger<1> >().Ref(); }
3041 {
3042  return s_one;
3043 }
3044 
3045 // File scope static due to subtle initialization problems in a threaded
3046 // Windows environment. See the comments for Singleton. Thanks DB.
3047 namespace { const Integer& s_two = Singleton<Integer, NewInteger<2> >().Ref(); }
3049 {
3050  return s_two;
3051 }
3052 
3053 bool Integer::operator!() const
3054 {
3055  return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
3056 }
3057 
3058 Integer& Integer::operator=(const Integer& t)
3059 {
3060  if (this != &t)
3061  {
3062  if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
3063  reg.New(RoundupSize(t.WordCount()));
3064  CopyWords(reg, t.reg, reg.size());
3065  sign = t.sign;
3066  }
3067  return *this;
3068 }
3069 
3070 bool Integer::GetBit(size_t n) const
3071 {
3072  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
3073  // The code change occurred at Commit dc99266599a0e72d.
3074  if (n/WORD_BITS < reg.size())
3075  return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3076  else
3077  return 0;
3078 }
3079 
3080 void Integer::SetBit(size_t n, bool value)
3081 {
3082  if (value)
3083  {
3084  reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
3085  reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
3086  }
3087  else
3088  {
3089  if (n/WORD_BITS < reg.size())
3090  reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3091  }
3092 }
3093 
3094 byte Integer::GetByte(size_t n) const
3095 {
3096  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
3097  // The code change occurred at Commit dc99266599a0e72d.
3098  if (n/WORD_SIZE < reg.size())
3099  return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3100  else
3101  return 0;
3102 }
3103 
3104 void Integer::SetByte(size_t n, byte value)
3105 {
3106  reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
3107  reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3108  reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3109 }
3110 
3111 lword Integer::GetBits(size_t i, size_t n) const
3112 {
3113  lword v = 0;
3114  CRYPTOPP_ASSERT(n <= sizeof(v)*8);
3115  for (unsigned int j=0; j<n; j++)
3116  v |= lword(GetBit(i+j)) << j;
3117  return v;
3118 }
3119 
3120 Integer Integer::operator-() const
3121 {
3122  Integer result(*this);
3123  result.Negate();
3124  return result;
3125 }
3126 
3127 Integer Integer::AbsoluteValue() const
3128 {
3129  Integer result(*this);
3130  result.sign = POSITIVE;
3131  return result;
3132 }
3133 
3135 {
3136  reg.swap(a.reg);
3137  std::swap(sign, a.sign);
3138 }
3139 
3140 Integer::Integer(word value, size_t length)
3141  : reg(RoundupSize(length)), sign(POSITIVE)
3142 {
3143  reg[0] = value;
3144  SetWords(reg+1, 0, reg.size()-1);
3145 }
3146 
3147 template <class T>
3148 static Integer StringToInteger(const T *str, ByteOrder order)
3149 {
3150  CRYPTOPP_ASSERT( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER );
3151 
3152  int radix, sign = 1;
3153  // GCC workaround
3154  // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3155  unsigned int length;
3156  for (length = 0; str[length] != 0; length++) {}
3157 
3158  Integer v;
3159 
3160  if (length == 0)
3161  return Integer::Zero();
3162 
3163  switch (str[length-1])
3164  {
3165  case 'h':
3166  case 'H':
3167  radix=16;
3168  break;
3169  case 'o':
3170  case 'O':
3171  radix=8;
3172  break;
3173  case 'b':
3174  case 'B':
3175  radix=2;
3176  break;
3177  default:
3178  radix=10;
3179  }
3180 
3181  // 'str' is of length 1 or more
3182  if (str[0] == '-')
3183  {
3184  sign = -1;
3185  str += 1, length -= 1;
3186  }
3187 
3188  if (length > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
3189  {
3190  radix = 16;
3191  str += 2, length -= 2;
3192  }
3193 
3194  if (order == BIG_ENDIAN_ORDER)
3195  {
3196  for (unsigned int i=0; i<length; i++)
3197  {
3198  int digit, ch = static_cast<int>(str[i]);
3199 
3200  // Profiling showd the second and third Else needed to be swapped
3201  // The code change occurred at Commit dc99266599a0e72d.
3202  if (ch >= '0' && ch <= '9')
3203  digit = ch - '0';
3204  else if (ch >= 'a' && ch <= 'f')
3205  digit = ch - 'a' + 10;
3206  else if (ch >= 'A' && ch <= 'F')
3207  digit = ch - 'A' + 10;
3208  else
3209  digit = radix;
3210 
3211  if (digit < radix)
3212  {
3213  v *= radix;
3214  v += digit;
3215  }
3216  }
3217  }
3218  else if (radix == 16 && order == LITTLE_ENDIAN_ORDER)
3219  {
3220  // Nibble high, low and count
3221  unsigned int nh = 0, nl = 0, nc = 0;
3222  Integer position(Integer::One());
3223 
3224  for (unsigned int i=0; i<length; i++)
3225  {
3226  int digit, ch = static_cast<int>(str[i]);
3227 
3228  if (ch >= '0' && ch <= '9')
3229  digit = ch - '0';
3230  else if (ch >= 'a' && ch <= 'f')
3231  digit = ch - 'a' + 10;
3232  else if (ch >= 'A' && ch <= 'F')
3233  digit = ch - 'A' + 10;
3234  else
3235  digit = radix;
3236 
3237  if (digit < radix)
3238  {
3239  if (nc++ == 0)
3240  nh = digit;
3241  else
3242  nl = digit;
3243 
3244  if (nc == 2)
3245  {
3246  v += position * (nh << 4 | nl);
3247  nc = 0, position <<= 8;
3248  }
3249  }
3250  }
3251 
3252  if (nc == 1)
3253  v += nh * position;
3254  }
3255  else // LITTLE_ENDIAN_ORDER && radix != 16
3256  {
3257  for (int i=static_cast<int>(length)-1; i>=0; i--)
3258  {
3259  int digit, ch = static_cast<int>(str[i]);
3260 
3261  if (ch >= '0' && ch <= '9')
3262  digit = ch - '0';
3263  else if (ch >= 'a' && ch <= 'f')
3264  digit = ch - 'a' + 10;
3265  else if (ch >= 'A' && ch <= 'F')
3266  digit = ch - 'A' + 10;
3267  else
3268  digit = radix;
3269 
3270  if (digit < radix)
3271  {
3272  v *= radix;
3273  v += digit;
3274  }
3275  }
3276  }
3277 
3278  if (sign == -1)
3279  v.Negate();
3280 
3281  return v;
3282 }
3283 
3284 Integer::Integer(const char *str, ByteOrder order)
3285  : reg(2), sign(POSITIVE)
3286 {
3287  *this = StringToInteger(str,order);
3288 }
3289 
3290 Integer::Integer(const wchar_t *str, ByteOrder order)
3291  : reg(2), sign(POSITIVE)
3292 {
3293  *this = StringToInteger(str,order);
3294 }
3295 
3296 unsigned int Integer::WordCount() const
3297 {
3298  return (unsigned int)CountWords(reg, reg.size());
3299 }
3300 
3301 unsigned int Integer::ByteCount() const
3302 {
3303  unsigned wordCount = WordCount();
3304  if (wordCount)
3305  return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3306  else
3307  return 0;
3308 }
3309 
3310 unsigned int Integer::BitCount() const
3311 {
3312  unsigned wordCount = WordCount();
3313  if (wordCount)
3314  return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3315  else
3316  return 0;
3317 }
3318 
3319 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3320 {
3321  StringStore store(input, inputLen);
3322  Decode(store, inputLen, s);
3323 }
3324 
3326 {
3327  CRYPTOPP_ASSERT(bt.MaxRetrievable() >= inputLen);
3328 
3329  byte b;
3330  bt.Peek(b);
3331  sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3332 
3333  while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3334  {
3335  bt.Skip(1);
3336  inputLen--;
3337  bt.Peek(b);
3338  }
3339 
3340  // The call to CleanNew is optimized away above -O0/-Og.
3341  const size_t size = RoundupSize(BytesToWords(inputLen));
3342  reg.CleanNew(size);
3343 
3344  CRYPTOPP_ASSERT(reg.SizeInBytes() >= inputLen);
3345  for (size_t i=inputLen; i > 0; i--)
3346  {
3347  bt.Get(b);
3348  reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3349  }
3350 
3351  if (sign == NEGATIVE)
3352  {
3353  for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3354  reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3355  TwosComplement(reg, reg.size());
3356  }
3357 }
3358 
3359 size_t Integer::MinEncodedSize(Signedness signedness) const
3360 {
3361  // Profiling tells us the original second If was dominant, so it was promoted to the first If statement.
3362  // The code change occurred at Commit dc99266599a0e72d.
3363  unsigned int outputLen = STDMAX(1U, ByteCount());
3364  const bool pre = (signedness == UNSIGNED);
3365  if (!pre && NotNegative() && (GetByte(outputLen-1) & 0x80))
3366  outputLen++;
3367  if (pre)
3368  return outputLen;
3369  if (IsNegative() && *this < -Power2(outputLen*8-1))
3370  outputLen++;
3371  return outputLen;
3372 }
3373 
3374 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3375 {
3376  CRYPTOPP_ASSERT(output && outputLen);
3377  ArraySink sink(output, outputLen);
3378  Encode(sink, outputLen, signedness);
3379 }
3380 
3381 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3382 {
3383  if (signedness == UNSIGNED || NotNegative())
3384  {
3385  for (size_t i=outputLen; i > 0; i--)
3386  bt.Put(GetByte(i-1));
3387  }
3388  else
3389  {
3390  // take two's complement of *this
3391  Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
3392  temp.Encode(bt, outputLen, UNSIGNED);
3393  }
3394 }
3395 
3397 {
3398  DERGeneralEncoder enc(bt, INTEGER);
3400  enc.MessageEnd();
3401 }
3402 
3403 void Integer::BERDecode(const byte *input, size_t len)
3404 {
3405  StringStore store(input, len);
3406  BERDecode(store);
3407 }
3408 
3410 {
3411  BERGeneralDecoder dec(bt, INTEGER);
3412  if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
3413  BERDecodeError();
3414  Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3415  dec.MessageEnd();
3416 }
3417 
3419 {
3420  DERGeneralEncoder enc(bt, OCTET_STRING);
3421  Encode(enc, length);
3422  enc.MessageEnd();
3423 }
3424 
3426 {
3427  BERGeneralDecoder dec(bt, OCTET_STRING);
3428  if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3429  BERDecodeError();
3430  Decode(dec, length);
3431  dec.MessageEnd();
3432 }
3433 
3434 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
3435 {
3436  ArraySink sink(output, len);
3437  return OpenPGPEncode(sink);
3438 }
3439 
3441 {
3442  word16 bitCount = word16(BitCount());
3443  bt.PutWord16(bitCount);
3444  size_t byteCount = BitsToBytes(bitCount);
3445  Encode(bt, byteCount);
3446  return 2 + byteCount;
3447 }
3448 
3449 void Integer::OpenPGPDecode(const byte *input, size_t len)
3450 {
3451  StringStore store(input, len);
3452  OpenPGPDecode(store);
3453 }
3454 
3456 {
3457  word16 bitCount;
3458  if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
3459  throw OpenPGPDecodeErr();
3460  Decode(bt, BitsToBytes(bitCount));
3461 }
3462 
3464 {
3465  const size_t nbytes = nbits/8 + 1;
3466  SecByteBlock buf(nbytes);
3467  rng.GenerateBlock(buf, nbytes);
3468  if (nbytes)
3469  buf[0] = (byte)Crop(buf[0], nbits % 8);
3470  Decode(buf, nbytes, UNSIGNED);
3471 }
3472 
3473 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3474 {
3475  if (min > max)
3476  throw InvalidArgument("Integer: Min must be no greater than Max");
3477 
3478  Integer range = max - min;
3479  const unsigned int nbits = range.BitCount();
3480 
3481  do
3482  {
3483  Randomize(rng, nbits);
3484  }
3485  while (*this > range);
3486 
3487  *this += min;
3488 }
3489 
3490 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3491 {
3492  return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3493 }
3494 
3496 {
3497 public:
3498  KDF2_RNG(const byte *seed, size_t seedSize)
3499  : m_counter(0), m_counterAndSeed(seedSize + 4)
3500  {
3501  memcpy(m_counterAndSeed + 4, seed, seedSize);
3502  }
3503 
3504  void GenerateBlock(byte *output, size_t size)
3505  {
3506  PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3507  ++m_counter;
3508  P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
3509  }
3510 
3511 private:
3512  word32 m_counter;
3513  SecByteBlock m_counterAndSeed;
3514 };
3515 
3516 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
3517 {
3518  Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3519  Integer max;
3520  if (!params.GetValue("Max", max))
3521  {
3522  int bitLength;
3523  if (params.GetIntValue("BitLength", bitLength))
3524  max = Integer::Power2(bitLength);
3525  else
3526  throw InvalidArgument("Integer: missing Max argument");
3527  }
3528  if (min > max)
3529  throw InvalidArgument("Integer: Min must be no greater than Max");
3530 
3531  Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3532  Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3533 
3534  if (equiv.IsNegative() || equiv >= mod)
3535  throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3536 
3537  Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3538 
3539  member_ptr<KDF2_RNG> kdf2Rng;
3541  if (params.GetValue(Name::Seed(), seed))
3542  {
3543  ByteQueue bq;
3544  DERSequenceEncoder seq(bq);
3545  min.DEREncode(seq);
3546  max.DEREncode(seq);
3547  equiv.DEREncode(seq);
3548  mod.DEREncode(seq);
3549  DEREncodeUnsigned(seq, rnType);
3550  DEREncodeOctetString(seq, seed.begin(), seed.size());
3551  seq.MessageEnd();
3552 
3553  SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3554  bq.Get(finalSeed, finalSeed.size());
3555  kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3556  }
3557  RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3558 
3559  switch (rnType)
3560  {
3561  case ANY:
3562  if (mod == One())
3563  Randomize(rng, min, max);
3564  else
3565  {
3566  Integer min1 = min + (equiv-min)%mod;
3567  if (max < min1)
3568  return false;
3569  Randomize(rng, Zero(), (max - min1) / mod);
3570  *this *= mod;
3571  *this += min1;
3572  }
3573  return true;
3574 
3575  case PRIME:
3576  {
3577  const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
3578 
3579  int i;
3580  i = 0;
3581  while (1)
3582  {
3583  if (++i==16)
3584  {
3585  // check if there are any suitable primes in [min, max]
3586  Integer first = min;
3587  if (FirstPrime(first, max, equiv, mod, pSelector))
3588  {
3589  // if there is only one suitable prime, we're done
3590  *this = first;
3591  if (!FirstPrime(first, max, equiv, mod, pSelector))
3592  return true;
3593  }
3594  else
3595  return false;
3596  }
3597 
3598  Randomize(rng, min, max);
3599  if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3600  return true;
3601  }
3602  }
3603 
3604  default:
3605  throw InvalidArgument("Integer: invalid RandomNumberType argument");
3606  }
3607 }
3608 
3609 std::istream& operator>>(std::istream& in, Integer &a)
3610 {
3611  char c;
3612  unsigned int length = 0;
3613  SecBlock<char> str(length + 16);
3614 
3615  std::ws(in);
3616 
3617  do
3618  {
3619  in.read(&c, 1);
3620  str[length++] = c;
3621  if (length >= str.size())
3622  str.Grow(length + 16);
3623  }
3624  while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
3625 
3626  if (in.gcount())
3627  in.putback(c);
3628  str[length-1] = '\0';
3629  a = Integer(str);
3630 
3631  return in;
3632 }
3633 
3634 std::ostream& operator<<(std::ostream& out, const Integer &a)
3635 {
3636  // Get relevant conversion specifications from ostream.
3637  const long f = out.flags() & std::ios::basefield; // Get base digits.
3638  int base, block;
3639  char suffix;
3640  switch(f)
3641  {
3642  case std::ios::oct :
3643  base = 8;
3644  block = 8;
3645  suffix = 'o';
3646  break;
3647  case std::ios::hex :
3648  base = 16;
3649  block = 4;
3650  suffix = 'h';
3651  break;
3652  default :
3653  base = 10;
3654  block = 3;
3655  suffix = '.';
3656  }
3657 
3658  Integer temp1=a, temp2;
3659 
3660  if (a.IsNegative())
3661  {
3662  out << '-';
3663  temp1.Negate();
3664  }
3665 
3666  if (!a)
3667  out << '0';
3668 
3669  static const char upper[]="0123456789ABCDEF";
3670  static const char lower[]="0123456789abcdef";
3671 
3672  const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3673  unsigned int i=0;
3674  SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3675 
3676  while (!!temp1)
3677  {
3678  word digit;
3679  Integer::Divide(digit, temp2, temp1, base);
3680  s[i++]=vec[digit];
3681  temp1.swap(temp2);
3682  }
3683 
3684  while (i--)
3685  {
3686  out << s[i];
3687 // if (i && !(i%block))
3688 // out << ",";
3689  }
3690 
3691 #ifdef CRYPTOPP_USE_STD_SHOWBASE
3692  if (out.flags() & std::ios_base::showbase)
3693  out << suffix;
3694 
3695  return out;
3696 #else
3697  return out << suffix;
3698 #endif
3699 }
3700 
3701 Integer& Integer::operator++()
3702 {
3703  if (NotNegative())
3704  {
3705  if (Increment(reg, reg.size()))
3706  {
3707  reg.CleanGrow(2*reg.size());
3708  reg[reg.size()/2]=1;
3709  }
3710  }
3711  else
3712  {
3713  word borrow = Decrement(reg, reg.size());
3714  CRYPTOPP_ASSERT(!borrow);
3715  CRYPTOPP_UNUSED(borrow);
3716 
3717  if (WordCount()==0)
3718  *this = Zero();
3719  }
3720  return *this;
3721 }
3722 
3723 Integer& Integer::operator--()
3724 {
3725  if (IsNegative())
3726  {
3727  if (Increment(reg, reg.size()))
3728  {
3729  reg.CleanGrow(2*reg.size());
3730  reg[reg.size()/2]=1;
3731  }
3732  }
3733  else
3734  {
3735  if (Decrement(reg, reg.size()))
3736  *this = -One();
3737  }
3738  return *this;
3739 }
3740 
3741 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3742 {
3743  // Profiling tells us the original second Else If was dominant, so it was promoted to the first If statement.
3744  // The code change occurred at Commit dc99266599a0e72d.
3745  int carry; const bool pre = (a.reg.size() == b.reg.size());
3746  if (!pre && a.reg.size() > b.reg.size())
3747  {
3748  carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3749  CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3750  carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3751  }
3752  else if (pre)
3753  {
3754  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3755  }
3756  else
3757  {
3758  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3759  CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3760  carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3761  }
3762 
3763  if (carry)
3764  {
3765  sum.reg.CleanGrow(2*sum.reg.size());
3766  sum.reg[sum.reg.size()/2] = 1;
3767  }
3768  sum.sign = Integer::POSITIVE;
3769 }
3770 
3771 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3772 {
3773  unsigned aSize = a.WordCount();
3774  aSize += aSize%2;
3775  unsigned bSize = b.WordCount();
3776  bSize += bSize%2;
3777 
3778  // Profiling tells us the original second Else If was dominant, so it was promoted to the first If statement.
3779  // The code change occurred at Commit dc99266599a0e72d.
3780  if (aSize > bSize)
3781  {
3782  word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3783  CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3784  borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3785  CRYPTOPP_ASSERT(!borrow);
3786  diff.sign = Integer::POSITIVE;
3787  }
3788  else if (aSize == bSize)
3789  {
3790  if (Compare(a.reg, b.reg, aSize) >= 0)
3791  {
3792  Subtract(diff.reg, a.reg, b.reg, aSize);
3793  diff.sign = Integer::POSITIVE;
3794  }
3795  else
3796  {
3797  Subtract(diff.reg, b.reg, a.reg, aSize);
3798  diff.sign = Integer::NEGATIVE;
3799  }
3800  }
3801  else
3802  {
3803  word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3804  CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3805  borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3806  CRYPTOPP_ASSERT(!borrow);
3807  diff.sign = Integer::NEGATIVE;
3808  }
3809 }
3810 
3811 // MSVC .NET 2003 workaround
3812 template <class T> inline const T& STDMAX2(const T& a, const T& b)
3813 {
3814  return a < b ? b : a;
3815 }
3816 
3817 Integer Integer::Plus(const Integer& b) const
3818 {
3819  Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3820  if (NotNegative())
3821  {
3822  if (b.NotNegative())
3823  PositiveAdd(sum, *this, b);
3824  else
3825  PositiveSubtract(sum, *this, b);
3826  }
3827  else
3828  {
3829  if (b.NotNegative())
3830  PositiveSubtract(sum, b, *this);
3831  else
3832  {
3833  PositiveAdd(sum, *this, b);
3834  sum.sign = Integer::NEGATIVE;
3835  }
3836  }
3837  return sum;
3838 }
3839 
3840 Integer& Integer::operator+=(const Integer& t)
3841 {
3842  reg.CleanGrow(t.reg.size());
3843  if (NotNegative())
3844  {
3845  if (t.NotNegative())
3846  PositiveAdd(*this, *this, t);
3847  else
3848  PositiveSubtract(*this, *this, t);
3849  }
3850  else
3851  {
3852  if (t.NotNegative())
3853  PositiveSubtract(*this, t, *this);
3854  else
3855  {
3856  PositiveAdd(*this, *this, t);
3857  sign = Integer::NEGATIVE;
3858  }
3859  }
3860  return *this;
3861 }
3862 
3863 Integer Integer::Minus(const Integer& b) const
3864 {
3865  Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
3866  if (NotNegative())
3867  {
3868  if (b.NotNegative())
3869  PositiveSubtract(diff, *this, b);
3870  else
3871  PositiveAdd(diff, *this, b);
3872  }
3873  else
3874  {
3875  if (b.NotNegative())
3876  {
3877  PositiveAdd(diff, *this, b);
3878  diff.sign = Integer::NEGATIVE;
3879  }
3880  else
3881  PositiveSubtract(diff, b, *this);
3882  }
3883  return diff;
3884 }
3885 
3886 Integer& Integer::operator-=(const Integer& t)
3887 {
3888  reg.CleanGrow(t.reg.size());
3889  if (NotNegative())
3890  {
3891  if (t.NotNegative())
3892  PositiveSubtract(*this, *this, t);
3893  else
3894  PositiveAdd(*this, *this, t);
3895  }
3896  else
3897  {
3898  if (t.NotNegative())
3899  {
3900  PositiveAdd(*this, *this, t);
3901  sign = Integer::NEGATIVE;
3902  }
3903  else
3904  PositiveSubtract(*this, t, *this);
3905  }
3906  return *this;
3907 }
3908 
3909 Integer& Integer::operator<<=(size_t n)
3910 {
3911  const size_t wordCount = WordCount();
3912  const size_t shiftWords = n / WORD_BITS;
3913  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3914 
3915  reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
3916  ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
3917  ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
3918  return *this;
3919 }
3920 
3921 Integer& Integer::operator>>=(size_t n)
3922 {
3923  const size_t wordCount = WordCount();
3924  const size_t shiftWords = n / WORD_BITS;
3925  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3926 
3927  ShiftWordsRightByWords(reg, wordCount, shiftWords);
3928  if (wordCount > shiftWords)
3929  ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
3930  if (IsNegative() && WordCount()==0) // avoid -0
3931  *this = Zero();
3932  return *this;
3933 }
3934 
3935 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
3936 {
3937  size_t aSize = RoundupSize(a.WordCount());
3938  size_t bSize = RoundupSize(b.WordCount());
3939 
3940  product.reg.CleanNew(RoundupSize(aSize+bSize));
3941  product.sign = Integer::POSITIVE;
3942 
3943  IntegerSecBlock workspace(aSize + bSize);
3944  AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
3945 }
3946 
3947 void Multiply(Integer &product, const Integer &a, const Integer &b)
3948 {
3949  PositiveMultiply(product, a, b);
3950 
3951  if (a.NotNegative() != b.NotNegative())
3952  product.Negate();
3953 }
3954 
3956 {
3957  Integer product;
3958  Multiply(product, *this, b);
3959  return product;
3960 }
3961 
3962 /*
3963 void PositiveDivide(Integer &remainder, Integer &quotient,
3964  const Integer &dividend, const Integer &divisor)
3965 {
3966  remainder.reg.CleanNew(divisor.reg.size());
3967  remainder.sign = Integer::POSITIVE;
3968  quotient.reg.New(0);
3969  quotient.sign = Integer::POSITIVE;
3970  unsigned i=dividend.BitCount();
3971  while (i--)
3972  {
3973  word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
3974  remainder.reg[0] |= dividend[i];
3975  if (overflow || remainder >= divisor)
3976  {
3977  Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
3978  quotient.SetBit(i);
3979  }
3980  }
3981 }
3982 */
3983 
3984 void PositiveDivide(Integer &remainder, Integer &quotient,
3985  const Integer &a, const Integer &b)
3986 {
3987  unsigned aSize = a.WordCount();
3988  unsigned bSize = b.WordCount();
3989 
3990  if (!bSize)
3991  throw Integer::DivideByZero();
3992 
3993  if (aSize < bSize)
3994  {
3995  remainder = a;
3996  remainder.sign = Integer::POSITIVE;
3997  quotient = Integer::Zero();
3998  return;
3999  }
4000 
4001  aSize += aSize%2; // round up to next even number
4002  bSize += bSize%2;
4003 
4004  remainder.reg.CleanNew(RoundupSize(bSize));
4005  remainder.sign = Integer::POSITIVE;
4006  quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
4007  quotient.sign = Integer::POSITIVE;
4008 
4009  IntegerSecBlock T(aSize+3*(bSize+2));
4010  Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
4011 }
4012 
4013 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
4014 {
4015  PositiveDivide(remainder, quotient, dividend, divisor);
4016 
4017  if (dividend.IsNegative())
4018  {
4019  quotient.Negate();
4020  if (remainder.NotZero())
4021  {
4022  --quotient;
4023  remainder = divisor.AbsoluteValue() - remainder;
4024  }
4025  }
4026 
4027  if (divisor.IsNegative())
4028  quotient.Negate();
4029 }
4030 
4031 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
4032 {
4033  q = a;
4034  q >>= n;
4035 
4036  const size_t wordCount = BitsToWords(n);
4037  if (wordCount <= a.WordCount())
4038  {
4039  r.reg.resize(RoundupSize(wordCount));
4040  CopyWords(r.reg, a.reg, wordCount);
4041  SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
4042  if (n % WORD_BITS != 0)
4043  r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
4044  }
4045  else
4046  {
4047  r.reg.resize(RoundupSize(a.WordCount()));
4048  CopyWords(r.reg, a.reg, r.reg.size());
4049  }
4050  r.sign = POSITIVE;
4051 
4052  if (a.IsNegative() && r.NotZero())
4053  {
4054  --q;
4055  r = Power2(n) - r;
4056  }
4057 }
4058 
4059 Integer Integer::DividedBy(const Integer &b) const
4060 {
4061  Integer remainder, quotient;
4062  Integer::Divide(remainder, quotient, *this, b);
4063  return quotient;
4064 }
4065 
4067 {
4068  Integer remainder, quotient;
4069  Integer::Divide(remainder, quotient, *this, b);
4070  return remainder;
4071 }
4072 
4073 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
4074 {
4075  if (!divisor)
4076  throw Integer::DivideByZero();
4077 
4078  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4079  {
4080  quotient = dividend >> (BitPrecision(divisor)-1);
4081  remainder = dividend.reg[0] & (divisor-1);
4082  return;
4083  }
4084 
4085  unsigned int i = dividend.WordCount();
4086  quotient.reg.CleanNew(RoundupSize(i));
4087  remainder = 0;
4088  while (i--)
4089  {
4090  quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
4091  remainder = DWord(dividend.reg[i], remainder) % divisor;
4092  }
4093 
4094  if (dividend.NotNegative())
4095  quotient.sign = POSITIVE;
4096  else
4097  {
4098  quotient.sign = NEGATIVE;
4099  if (remainder)
4100  {
4101  --quotient;
4102  remainder = divisor - remainder;
4103  }
4104  }
4105 }
4106 
4107 Integer Integer::DividedBy(word b) const
4108 {
4109  word remainder;
4110  Integer quotient;
4111  Integer::Divide(remainder, quotient, *this, b);
4112  return quotient;
4113 }
4114 
4115 word Integer::Modulo(word divisor) const
4116 {
4117  if (!divisor)
4118  throw Integer::DivideByZero();
4119 
4120  word remainder;
4121 
4122  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4123  // The code change occurred at Commit dc99266599a0e72d.
4124  if ((divisor & (divisor-1)) != 0) // divisor is not a power of 2
4125  {
4126  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4127  // The code change occurred at Commit dc99266599a0e72d.
4128  unsigned int i = WordCount();
4129  if (divisor > 5)
4130  {
4131  remainder = 0;
4132  while (i--)
4133  remainder = DWord(reg[i], remainder) % divisor;
4134  }
4135  else
4136  {
4137  DWord sum(0, 0);
4138  while (i--)
4139  sum += reg[i];
4140  remainder = sum % divisor;
4141  }
4142  }
4143  else // divisor is a power of 2
4144  {
4145  remainder = reg[0] & (divisor-1);
4146  }
4147 
4148  if (IsNegative() && remainder)
4149  remainder = divisor - remainder;
4150 
4151  return remainder;
4152 }
4153 
4155 {
4156  if (!!(*this)) // don't flip sign if *this==0
4157  sign = Sign(1-sign);
4158 }
4159 
4160 int Integer::PositiveCompare(const Integer& t) const
4161 {
4162  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4163  // The code change occurred at Commit dc99266599a0e72d.
4164  const unsigned size = WordCount(), tSize = t.WordCount();
4165  if (size != tSize)
4166  return size > tSize ? 1 : -1;
4167  else
4168  return CryptoPP::Compare(reg, t.reg, size);
4169 }
4170 
4171 int Integer::Compare(const Integer& t) const
4172 {
4173  if (NotNegative())
4174  {
4175  if (t.NotNegative())
4176  return PositiveCompare(t);
4177  else
4178  return 1;
4179  }
4180  else
4181  {
4182  if (t.NotNegative())
4183  return -1;
4184  else
4185  return -PositiveCompare(t);
4186  }
4187 }
4188 
4190 {
4191  if (!IsPositive())
4192  return Zero();
4193 
4194  // overestimate square root
4195  Integer x, y = Power2((BitCount()+1)/2);
4196  CRYPTOPP_ASSERT(y*y >= *this);
4197 
4198  do
4199  {
4200  x = y;
4201  y = (x + *this/x) >> 1;
4202  } while (y<x);
4203 
4204  return x;
4205 }
4206 
4207 bool Integer::IsSquare() const
4208 {
4209  Integer r = SquareRoot();
4210  return *this == r.Squared();
4211 }
4212 
4213 bool Integer::IsUnit() const
4214 {
4215  return (WordCount() == 1) && (reg[0] == 1);
4216 }
4217 
4219 {
4220  return IsUnit() ? *this : Zero();
4221 }
4222 
4223 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4224 {
4225  return x*y%m;
4226 }
4227 
4228 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4229 {
4230  ModularArithmetic mr(m);
4231  return mr.Exponentiate(x, e);
4232 }
4233 
4235 {
4236  return EuclideanDomainOf<Integer>().Gcd(a, b);
4237 }
4238 
4240 {
4242 
4243  if (IsNegative())
4244  return Modulo(m).InverseMod(m);
4245 
4246  if (m.IsEven())
4247  {
4248  if (!m || IsEven())
4249  return Zero(); // no inverse
4250  if (*this == One())
4251  return One();
4252 
4253  Integer u = m.Modulo(*this).InverseMod(*this);
4254  return !u ? Zero() : (m*(*this-u)+1)/(*this);
4255  }
4256 
4257  SecBlock<word> T(m.reg.size() * 4);
4258  Integer r((word)0, m.reg.size());
4259  unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4260  DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4261  return r;
4262 }
4263 
4264 word Integer::InverseMod(word mod) const
4265 {
4266  word g0 = mod, g1 = *this % mod;
4267  word v0 = 0, v1 = 1;
4268  word y;
4269 
4270  while (g1)
4271  {
4272  if (g1 == 1)
4273  return v1;
4274  y = g0 / g1;
4275  g0 = g0 % g1;
4276  v0 += y * v1;
4277 
4278  if (!g0)
4279  break;
4280  if (g0 == 1)
4281  return mod-v0;
4282  y = g1 / g0;
4283  g1 = g1 % g0;
4284  v1 += y * v0;
4285  }
4286  return 0;
4287 }
4288 
4289 // ********************************************************
4290 
4292 {
4293  BERSequenceDecoder seq(bt);
4294  OID oid(seq);
4295  if (oid != ASN1::prime_field())
4296  BERDecodeError();
4297  m_modulus.BERDecode(seq);
4298  seq.MessageEnd();
4299  m_result.reg.resize(m_modulus.reg.size());
4300 }
4301 
4303 {
4304  DERSequenceEncoder seq(bt);
4305  ASN1::prime_field().DEREncode(seq);
4306  m_modulus.DEREncode(seq);
4307  seq.MessageEnd();
4308 }
4309 
4311 {
4313 }
4314 
4316 {
4318 }
4319 
4321 {
4322  if (a.reg.size()==m_modulus.reg.size())
4323  {
4324  CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4325  return m_result;
4326  }
4327  else
4328  return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4329 }
4330 
4331 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4332 {
4333  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4334  {
4335  if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4336  || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4337  {
4338  CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4339  }
4340  return m_result;
4341  }
4342  else
4343  {
4344  m_result1 = a+b;
4345  if (m_result1 >= m_modulus)
4346  m_result1 -= m_modulus;
4347  return m_result1;
4348  }
4349 }
4350 
4352 {
4353  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4354  {
4355  if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4356  || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4357  {
4358  CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4359  }
4360  }
4361  else
4362  {
4363  a+=b;
4364  if (a>=m_modulus)
4365  a-=m_modulus;
4366  }
4367 
4368  return a;
4369 }
4370 
4371 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4372 {
4373  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4374  {
4375  if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4376  CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4377  return m_result;
4378  }
4379  else
4380  {
4381  m_result1 = a-b;
4382  if (m_result1.IsNegative())
4383  m_result1 += m_modulus;
4384  return m_result1;
4385  }
4386 }
4387 
4389 {
4390  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4391  {
4392  if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4393  CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4394  }
4395  else
4396  {
4397  a-=b;
4398  if (a.IsNegative())
4399  a+=m_modulus;
4400  }
4401 
4402  return a;
4403 }
4404 
4406 {
4407  if (!a)
4408  return a;
4409 
4410  CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4411  if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4412  Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4413 
4414  return m_result;
4415 }
4416 
4417 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4418 {
4419  if (m_modulus.IsOdd())
4420  {
4421  MontgomeryRepresentation dr(m_modulus);
4422  return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4423  }
4424  else
4425  return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4426 }
4427 
4428 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
4429 {
4430  if (m_modulus.IsOdd())
4431  {
4432  MontgomeryRepresentation dr(m_modulus);
4433  dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4434  for (unsigned int i=0; i<exponentsCount; i++)
4435  results[i] = dr.ConvertOut(results[i]);
4436  }
4437  else
4438  AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4439 }
4440 
4442  : ModularArithmetic(m),
4443  m_u((word)0, m_modulus.reg.size()),
4444  m_workspace(5*m_modulus.reg.size())
4445 {
4446  if (!m_modulus.IsOdd())
4447  throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4448 
4449  RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4450 }
4451 
4453 {
4454  word *const T = m_workspace.begin();
4455  word *const R = m_result.reg.begin();
4456  const size_t N = m_modulus.reg.size();
4457  CRYPTOPP_ASSERT(a.reg.size()<=N && b.reg.size()<=N);
4458 
4459  AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4460  SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4461  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4462  return m_result;
4463 }
4464 
4466 {
4467  word *const T = m_workspace.begin();
4468  word *const R = m_result.reg.begin();
4469  const size_t N = m_modulus.reg.size();
4470  CRYPTOPP_ASSERT(a.reg.size()<=N);
4471 
4472  CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4473  SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4474  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4475  return m_result;
4476 }
4477 
4479 {
4480  word *const T = m_workspace.begin();
4481  word *const R = m_result.reg.begin();
4482  const size_t N = m_modulus.reg.size();
4483  CRYPTOPP_ASSERT(a.reg.size()<=N);
4484 
4485  CopyWords(T, a.reg, a.reg.size());
4486  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4487  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4488  return m_result;
4489 }
4490 
4492 {
4493 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
4494  word *const T = m_workspace.begin();
4495  word *const R = m_result.reg.begin();
4496  const size_t N = m_modulus.reg.size();
4497  CRYPTOPP_ASSERT(a.reg.size()<=N);
4498 
4499  CopyWords(T, a.reg, a.reg.size());
4500  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4501  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4502  unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4503 
4504 // cout << "k=" << k << " N*32=" << 32*N << endl;
4505 
4506  if (k>N*WORD_BITS)
4507  DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4508  else
4509  MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4510 
4511  return m_result;
4512 }
4513 
4514 // Specialization declared in misc.h to allow us to print integers
4515 // with additional control options, like arbirary bases and uppercase.
4516 template <> CRYPTOPP_DLL
4517 std::string IntToString<Integer>(Integer value, unsigned int base)
4518 {
4519  // Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4520  static const unsigned int BIT_32 = (1U << 31);
4521  const bool UPPER = !!(base & BIT_32);
4522  static const unsigned int BIT_31 = (1U << 30);
4523  const bool BASE = !!(base & BIT_31);
4524 
4525  const char CH = UPPER ? 'A' : 'a';
4526  base &= ~(BIT_32|BIT_31);
4527  CRYPTOPP_ASSERT(base >= 2 && base <= 32);
4528 
4529  if (value == 0)
4530  return "0";
4531 
4532  bool negative = false, zero = false;
4533  if (value.IsNegative())
4534  {
4535  negative = true;
4536  value.Negate();
4537  }
4538 
4539  if (!value)
4540  zero = true;
4541 
4542  SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4543  Integer temp;
4544 
4545  unsigned int i=0;
4546  while (!!value)
4547  {
4548  word digit;
4549  Integer::Divide(digit, temp, value, word(base));
4550  s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4551  value.swap(temp);
4552  }
4553 
4554  std::string result;
4555  result.reserve(i+2);
4556 
4557  if (negative)
4558  result += '-';
4559 
4560  if (zero)
4561  result += '0';
4562 
4563  while (i--)
4564  result += s[i];
4565 
4566  if (BASE)
4567  {
4568  if (base == 10)
4569  result += '.';
4570  else if (base == 16)
4571  result += 'h';
4572  else if (base == 8)
4573  result += 'o';
4574  else if (base == 2)
4575  result += 'b';
4576  }
4577 
4578  return result;
4579 }
4580 
4581 // Specialization declared in misc.h to avoid Coverity findings.
4582 template <> CRYPTOPP_DLL
4583 std::string IntToString<word64>(word64 value, unsigned int base)
4584 {
4585  // Hack... set the high bit for uppercase.
4586  static const unsigned int HIGH_BIT = (1U << 31);
4587  const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4588  base &= ~HIGH_BIT;
4589 
4590  CRYPTOPP_ASSERT(base >= 2);
4591  if (value == 0)
4592  return "0";
4593 
4594  std::string result;
4595  while (value > 0)
4596  {
4597  word64 digit = value % base;
4598  result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4599  value /= base;
4600  }
4601  return result;
4602 }
4603 
4604 NAMESPACE_END
4605 
4606 #endif
Used to pass byte array input as part of a NameValuePairs object.
Definition: algparam.h:29
An invalid argument was detected.
Definition: cryptlib.h:187
void DEREncode(BufferedTransformation &bt) const
Encodes in DER format.
Definition: integer.cpp:4302
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: integer.cpp:4417
Classes for working with NameValuePairs.
const Integer & Square(const Integer &a) const
Square an element in the ring.
Definition: integer.cpp:4465
const Integer & MultiplicativeInverse(const Integer &a) const
Calculate the multiplicative inverse of an element in the ring.
Definition: integer.cpp:4491
void swap(SecBlock< T, A > &b)
Swap contents with another SecBlock.
Definition: secblock.h:714
a number which is probabilistically prime
Definition: integer.h:91
Utility functions for the Crypto++ library.
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:128
Restricts the instantiation of a class to one static object without locks.
Definition: misc.h:267
bool NotZero() const
Determines if the Integer is non-0.
Definition: integer.h:329
bool GetBit(size_t i) const
Provides the i-th bit of the Integer.
Definition: integer.cpp:3070
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:660
void Encode(byte *output, size_t outputLen, Signedness sign=UNSIGNED) const
Encode in big-endian format.
Definition: integer.cpp:3374
an unsigned value
Definition: integer.h:81
Integer & Reduce(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4388
virtual size_t Peek(byte &outByte) const
Peek a 8-bit byte.
Definition: cryptlib.cpp:534
bool IsOdd() const
Determines if the Integer is odd parity.
Definition: integer.h:347
T GetValueWithDefault(const char *name, T defaultValue) const
Get a named value.
Definition: cryptlib.h:353
size_t DEREncodeUnsigned(BufferedTransformation &out, T w, byte asnTag=INTEGER)
DER Encode unsigned value.
Definition: asn.h:442
virtual void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: cryptlib.cpp:326
size_t BitsToBytes(size_t bitCount)
Returns the number of 8-bit bytes or octets required for the specified number of bits.
Definition: misc.h:743
This file contains helper classes/functions for implementing public key algorithms.
bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Integer &mod, const PrimeSelector *pSelector)
Finds a random prime of special form.
Definition: nbtheory.cpp:381
static Integer Gcd(const Integer &a, const Integer &n)
greatest common divisor
Definition: integer.cpp:4234
void resize(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:705
size_t BitsToWords(size_t bitCount)
Returns the number of words required for the specified number of bits.
Definition: misc.h:763
const Integer & Subtract(const Integer &a, const Integer &b) const
Subtracts elements in the ring.
Definition: integer.cpp:4371
unsigned int BytePrecision(const T &value)
Returns the number of 8-bit bytes or octets required for a value.
Definition: misc.h:626
void CleanGrow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:689
bool IsEven() const
Determines if the Integer is even parity.
Definition: integer.h:344
Secure memory block with allocator and cleanup.
Definition: secblock.h:437
unsigned int WordCount() const
Determines the number of words required to represent the Integer.
Definition: integer.cpp:3296
void OpenPGPDecode(const byte *input, size_t inputLen)
Decode from OpenPGP format.
Definition: integer.cpp:3449
Signedness
Used when importing and exporting integers.
Definition: integer.h:79
size_type size() const
Provides the count of elements in the SecBlock.
Definition: secblock.h:524
ASN.1 object identifiers for algorthms and schemes.
Square block cipher.
Definition: square.h:24
Classes for automatic resource management.
size_t size() const
Length of the memory block.
Definition: algparam.h:93
bool IsP4()
Determines if the CPU is an Intel P4.
Definition: cpu.h:215
byte GetByte(size_t i) const
Provides the i-th byte of the Integer.
Definition: integer.cpp:3094
Library configuration file.
MontgomeryRepresentation(const Integer &modulus)
Construct a IsMontgomeryRepresentation.
Definition: integer.cpp:4441
static void DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
returns same result as Divide(r, q, a, Power2(n)), but faster
Definition: integer.cpp:4031
Ring of congruence classes modulo n.
Definition: modarith.h:34
Interface for random number generators.
Definition: cryptlib.h:1193
size_t BytesToWords(size_t byteCount)
Returns the number of words required for the specified number of bytes.
Definition: misc.h:753
void Randomize(RandomNumberGenerator &rng, size_t bitCount)
Set this Integer to random integer.
Definition: integer.cpp:3463
std::string IntToString< word64 >(word64 value, unsigned int base)
Converts an unsigned value to a string.
Definition: integer.cpp:4583
size_t MinEncodedSize(Signedness sign=UNSIGNED) const
The minimum number of bytes to encode this integer.
Definition: integer.cpp:3359
void New(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:647
void SetByte(size_t n, byte value)
Set the n-th byte to value.
Definition: integer.cpp:3104
the value is negative
Definition: integer.h:73
SecBlock&lt;byte&gt; typedef.
Definition: secblock.h:731
virtual void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the Ring.
Definition: algebra.cpp:334
BER Sequence Decoder.
Definition: asn.h:294
Interface for buffered transformations.
Definition: cryptlib.h:1359
void DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
encode absolute value as big-endian octet string
Definition: integer.cpp:3418
lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: queue.h:35
const byte * begin() const
Pointer to the first byte in the memory block.
Definition: algparam.h:89
bool IsConvertableToLong() const
Determines if the Integer is convertable to Long.
Definition: integer.cpp:2937
Integer MultiplicativeInverse() const
return inverse if 1 or -1, otherwise return 0
Definition: integer.cpp:4218
static const Integer & One()
Integer representing 1.
Definition: integer.cpp:3040
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:376
const Integer & Add(const Integer &a, const Integer &b) const
Adds elements in the ring.
Definition: integer.cpp:4331
byte order is little-endian
Definition: cryptlib.h:130
Sign
Used internally to represent the integer.
Definition: integer.h:69
Pointer that overloads operator -&gt;
Definition: smartptr.h:39
bool IsSquare() const
return whether this integer is a perfect square
Definition: integer.cpp:4207
size_t OpenPGPEncode(byte *output, size_t bufferSize) const
Encode absolute value in OpenPGP format.
Definition: integer.cpp:3434
Classes and functions for secure memory allocations.
unsigned int BitCount() const
Determines the number of bits required to represent the Integer.
Definition: integer.cpp:3310
void BERDecodeElement(BufferedTransformation &in, Element &a) const
Decodes element in DER format.
Definition: integer.cpp:4315
bool IsUnit() const
is 1 or -1
Definition: integer.cpp:4213
Copy input to a memory buffer.
Definition: filters.h:1025
Integer SquareRoot() const
extract square root, if negative return 0, else return floor of square root
Definition: integer.cpp:4189
bool IsPositive() const
Determines if the Integer is positive.
Definition: integer.h:338
bool GetValue(const char *name, T &value) const
Get a named value.
Definition: cryptlib.h:340
a number with no special properties
Definition: integer.h:89
Integer Squared() const
Definition: integer.h:496
size_t Put(byte inByte, bool blocking=true)
Input a byte for processing.
Definition: cryptlib.h:1385
bool IsNegative() const
Determines if the Integer is negative.
Definition: integer.h:332
AlgorithmParameters MakeParameters(const char *name, const T &value, bool throwIfNotUsed=true)
Create an object that implements NameValuePairs.
Definition: algparam.h:563
void swap(Integer &a)
Swaps this Integer with another Integer.
Definition: integer.cpp:3134
Integer()
Creates the zero integer.
Definition: integer.cpp:2897
unsigned int TrailingZeros(word32 v)
Determines the number of trailing 0-bits in a value.
Definition: misc.h:673
signed long ConvertToLong() const
Convert the Integer to Long.
Definition: integer.cpp:2951
Exception thrown when an error is encountered decoding an OpenPGP integer.
Definition: integer.h:278
void Negate()
Reverse the Sign of the Integer.
Definition: integer.cpp:4154
T Crop(T value, size_t bits)
Truncates the value to the specified number of bits.
Definition: misc.h:731
Integer Times(const Integer &b) const
Definition: integer.cpp:3955
void BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
Decode nonnegative value from big-endian octet string.
Definition: integer.cpp:3425
Application callback to signal suitability of a cabdidate prime.
Definition: nbtheory.h:80
void ConditionalSwapPointers(bool c, T &a, T &b)
Performs a branchless swap of pointers a and b if condition c is true.
Definition: misc.h:1067
static Integer Power2(size_t e)
Exponentiates to a power of 2.
Definition: integer.cpp:3013
Multiple precision integer with arithmetic operations.
Definition: integer.h:45
a signed value
Definition: integer.h:83
static const Integer & Two()
Integer representing 2.
Definition: integer.cpp:3048
Integer ConvertOut(const Integer &a) const
Reduces an element in the congruence class.
Definition: integer.cpp:4478
virtual Element CascadeExponentiate(const Element &x, const Integer &e1, const Element &y, const Integer &e2) const
TODO.
Definition: algebra.cpp:323
virtual lword Skip(lword skipMax=LWORD_MAX)
Discard skipMax bytes from the output buffer.
Definition: cryptlib.cpp:553
RandomNumberType
Properties of a random integer.
Definition: integer.h:87
const Integer & Inverse(const Integer &a) const
Inverts the element in the ring.
Definition: integer.cpp:4405
const char * Seed()
ConstByteArrayParameter.
Definition: argnames.h:19
byte order is big-endian
Definition: cryptlib.h:132
Safely right shift values when undefined behavior could occur.
const T & STDMIN(const T &a, const T &b)
Replacement function for std::min.
Definition: misc.h:470
String-based implementation of Store interface.
Definition: filters.h:1075
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:62
static void Divide(Integer &r, Integer &q, const Integer &a, const Integer &d)
calculate r and q such that (a == d*q + r) &amp;&amp; (0 &lt;= r &lt; abs(d))
Definition: integer.cpp:4013
void BERDecodeError()
Raises a BERDecodeErr.
Definition: asn.h:61
Data structure used to store byte strings.
Definition: queue.h:20
Functions for CPU features and intrinsics.
Classes and functions for working with ANS.1 objects.
Classes for SHA-1 and SHA-2 family of message digests.
void SetBit(size_t n, bool value=1)
Set the n-th bit to value.
Definition: integer.cpp:3080
size_t GetWord16(word16 &value, ByteOrder order=BIG_ENDIAN_ORDER)
Retrieve a 16-bit word.
Definition: cryptlib.cpp:750
iterator begin()
Provides an iterator pointing to the first element in the memory block.
Definition: secblock.h:499
Integer & Accumulate(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4351
const char * PointerToPrimeSelector()
const PrimeSelector *
Definition: argnames.h:41
Implementation of BufferedTransformation&#39;s attachment interface.
Classes and functions for number theoretic operations.
const Integer & Half(const Integer &a) const
TODO.
Definition: integer.cpp:4320
void DEREncode(BufferedTransformation &bt) const
Encode in DER format.
Definition: integer.cpp:3396
Exception thrown when division by 0 is encountered.
Definition: integer.h:51
DER Sequence Encoder.
Definition: asn.h:304
T1 SaturatingSubtract1(const T1 &a, const T2 &b)
Performs a saturating subtract clamped at 1.
Definition: misc.h:856
Exception thrown when a random number cannot be found that satisfies the condition.
Definition: integer.h:59
Performs modular arithmetic in Montgomery representation for increased speed.
Definition: modarith.h:274
DER General Encoder.
Definition: asn.h:271
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:160
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: modarith.h:308
const Integer & Multiply(const Integer &a, const Integer &b) const
Multiplies elements in the ring.
Definition: integer.cpp:4452
void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: integer.cpp:3504
Integer InverseMod(const Integer &n) const
calculate multiplicative inverse of *this mod n
Definition: integer.cpp:4239
void Decode(const byte *input, size_t inputLen, Signedness sign=UNSIGNED)
Decode from big-endian byte array.
Definition: integer.cpp:3319
size_t PutWord16(word16 value, ByteOrder order=BIG_ENDIAN_ORDER, bool blocking=true)
Input a 16-bit word for processing.
Definition: cryptlib.cpp:714
size_t DEREncodeOctetString(BufferedTransformation &bt, const byte *str, size_t strLen)
DER encode octet string.
Definition: asn.cpp:104
virtual lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: cryptlib.cpp:496
Multiple precision integer with arithmetic operations.
void DEREncodeElement(BufferedTransformation &out, const Element &a) const
Encodes element in DER format.
Definition: integer.cpp:4310
Safely left shift values when undefined behavior could occur.
static const Integer & Zero()
Integer representing 0.
Definition: integer.cpp:3032
const T & STDMAX(const T &a, const T &b)
Replacement function for std::max.
Definition: misc.h:480
Euclidean domain.
Definition: algebra.h:315
void Grow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:673
void BERDecode(const byte *input, size_t inputLen)
Decode from BER format.
Definition: integer.cpp:3403
std::string IntToString< Integer >(Integer value, unsigned int base)
Converts an Integer to a string.
Definition: integer.cpp:4517
lword GetBits(size_t i, size_t n) const
Provides the low order bits of the Integer.
Definition: integer.cpp:3111
virtual size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: cryptlib.cpp:515
Class file for performing modular arithmetic.
ModularArithmetic(const ModularArithmetic &ma)
Copy construct a ModularArithmetic.
Definition: modarith.h:52
size_type SizeInBytes() const
Provides the number of bytes in the SecBlock.
Definition: secblock.h:538
BER General Decoder.
Definition: asn.h:234
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: integer.cpp:4428
int Compare(const Integer &a) const
Perform signed comparison.
Definition: integer.cpp:4171
unsigned int MaxElementByteLength() const
Provides the maximum byte size of an element in the ring.
Definition: modarith.h:230
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: modarith.h:311
Object Identifier.
Definition: asn.h:158
Integer Modulo(const Integer &b) const
Definition: integer.cpp:4066
size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: queue.cpp:300
unsigned int BitPrecision(const T &value)
Returns the number of bits required for a value.
Definition: misc.h:648
unsigned int ByteCount() const
Determines the number of bytes required to represent the Integer.
Definition: integer.cpp:3301
Integer ConvertIn(const Integer &a) const
Reduces an element in the congruence class.
Definition: modarith.h:294
the value is positive or 0
Definition: integer.h:71
bool NotNegative() const
Determines if the Integer is non-negative.
Definition: integer.h:335
Interface for retrieving values given their names.
Definition: cryptlib.h:282