8 #ifndef CRYPTOPP_GENERATE_X64_MASM
15 #if CRYPTOPP_MSC_VERSION
16 # pragma warning(disable: 4702 4740)
23 #if defined(CRYPTOPP_DISABLE_SALSA_ASM)
24 # undef CRYPTOPP_X86_ASM_AVAILABLE
25 # undef CRYPTOPP_X32_ASM_AVAILABLE
26 # undef CRYPTOPP_X64_ASM_AVAILABLE
27 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
28 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
29 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
30 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
33 NAMESPACE_BEGIN(CryptoPP)
35 #if CRYPTOPP_DEBUG && !defined(CRYPTOPP_DOXYGEN_PROCESSING)
36 void Salsa20_TestInstantiations()
43 void Salsa20_Policy::CipherSetKey(
const NameValuePairs ¶ms,
const byte *key,
size_t length)
47 if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
48 throw InvalidRounds(Salsa20::StaticAlgorithmName(), m_rounds);
52 get1(m_state[13])(m_state[10])(m_state[7])(m_state[4]);
54 get2(m_state[15])(m_state[12])(m_state[9])(m_state[6]);
57 m_state[0] = 0x61707865;
58 m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e;
59 m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32;
60 m_state[3] = 0x6b206574;
63 void Salsa20_Policy::CipherResynchronize(byte *keystreamBuffer,
const byte *
IV,
size_t length)
65 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
69 get(m_state[14])(m_state[11]);
70 m_state[8] = m_state[5] = 0;
73 void Salsa20_Policy::SeekToIteration(lword iterationCount)
75 m_state[8] = (word32)iterationCount;
79 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SALSA_ASM)
82 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
87 return GetAlignmentOf<word32>();
92 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
94 return 4*BYTES_PER_ITERATION;
97 return BYTES_PER_ITERATION;
101 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
103 void Salsa20_OperateKeystream(byte *output,
const byte *input,
size_t iterationCount,
int rounds,
void *state);
107 #if CRYPTOPP_MSC_VERSION
108 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
111 void Salsa20_Policy::OperateKeystream(
KeystreamOperation operation, byte *output,
const byte *input,
size_t iterationCount)
113 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
115 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
116 Salsa20_OperateKeystream(output, input, iterationCount, m_rounds, m_state.
data());
120 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
121 #ifdef CRYPTOPP_GENERATE_X64_MASM
123 Salsa20_OperateKeystream PROC FRAME
124 mov r10, [rsp + 5*8] ; state
125 alloc_stack(10*16 + 32*16 + 8)
126 save_xmm128 xmm6, 0200h
127 save_xmm128 xmm7, 0210h
128 save_xmm128 xmm8, 0220h
129 save_xmm128 xmm9, 0230h
130 save_xmm128 xmm10, 0240h
131 save_xmm128 xmm11, 0250h
132 save_xmm128 xmm12, 0260h
133 save_xmm128 xmm13, 0270h
134 save_xmm128 xmm14, 0280h
135 save_xmm128 xmm15, 0290h
138 #define REG_output rcx
139 #define REG_input rdx
140 #define REG_iterationCount r8
141 #define REG_state r10
142 #define REG_rounds e9d
143 #define REG_roundsLeft eax
144 #define REG_temp32 r11d
146 #define SSE2_WORKSPACE rsp
150 #if CRYPTOPP_BOOL_X64
151 #define REG_output %1
153 #define REG_iterationCount %2
155 #define REG_rounds %3
156 #define REG_roundsLeft eax
157 #define REG_temp32 edx
159 #define SSE2_WORKSPACE %5
161 CRYPTOPP_ALIGN_DATA(16) byte workspace[16*32];
163 #define REG_output edi
164 #define REG_input eax
165 #define REG_iterationCount ecx
166 #define REG_state esi
167 #define REG_rounds edx
168 #define REG_roundsLeft ebx
169 #define REG_temp32 ebp
171 #define SSE2_WORKSPACE esp + WORD_SZ
180 void *s = m_state.
data();
183 AS2( mov REG_iterationCount, iterationCount)
184 AS2( mov REG_input, input)
185 AS2( mov REG_output, output)
186 AS2( mov REG_state, s)
187 AS2( mov REG_rounds, r)
189 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
192 AS2( cmp REG_iterationCount, 4)
195 #if CRYPTOPP_BOOL_X86
202 #define SSE2_EXPAND_S(i, j) \
203 ASS( pshufd xmm4, xmm##i, j, j, j, j) \
204 AS2( movdqa [SSE2_WORKSPACE + (i*4+j)*16 + 256], xmm4)
206 AS2( movdqa xmm0, [REG_state + 0*16])
207 AS2( movdqa xmm1, [REG_state + 1*16])
208 AS2( movdqa xmm2, [REG_state + 2*16])
209 AS2( movdqa xmm3, [REG_state + 3*16])
225 #define SSE2_EXPAND_S85(i) \
226 AS2( mov dword ptr [SSE2_WORKSPACE + 8*16 + i*4 + 256], REG_roundsLeft) \
227 AS2( mov dword ptr [SSE2_WORKSPACE + 5*16 + i*4 + 256], REG_temp32) \
228 AS2( add REG_roundsLeft, 1) \
229 AS2( adc REG_temp32, 0)
232 AS2( mov REG_roundsLeft, dword ptr [REG_state + 8*4])
233 AS2( mov REG_temp32, dword ptr [REG_state + 5*4])
238 AS2( mov dword ptr [REG_state + 8*4], REG_roundsLeft)
239 AS2( mov dword ptr [REG_state + 5*4], REG_temp32)
241 #define SSE2_QUARTER_ROUND(a, b, d, i) \
242 AS2( movdqa xmm4, xmm##d) \
243 AS2( paddd xmm4, xmm##a) \
244 AS2( movdqa xmm5, xmm4) \
245 AS2( pslld xmm4, i) \
246 AS2( psrld xmm5, 32-i) \
247 AS2( pxor xmm##b, xmm4) \
248 AS2( pxor xmm##b, xmm5)
250 #define L01(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##A, [SSE2_WORKSPACE + d*16 + i*256])
251 #define L02(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##C, [SSE2_WORKSPACE + a*16 + i*256])
252 #define L03(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C)
253 #define L04(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A)
254 #define L05(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 7)
255 #define L06(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-7)
256 #define L07(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + b*16 + i*256])
257 #define L08(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B)
258 #define L09(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + b*16], xmm##A)
259 #define L10(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A)
260 #define L11(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C)
261 #define L12(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A)
262 #define L13(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 9)
263 #define L14(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-9)
264 #define L15(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + c*16 + i*256])
265 #define L16(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D)
266 #define L17(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + c*16], xmm##A)
267 #define L18(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A)
268 #define L19(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##B)
269 #define L20(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A)
270 #define L21(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 13)
271 #define L22(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-13)
272 #define L23(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + d*16 + i*256])
273 #define L24(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B)
274 #define L25(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + d*16], xmm##A)
275 #define L26(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##D)
276 #define L27(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A)
277 #define L28(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 18)
278 #define L29(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-18)
279 #define L30(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##C)
280 #define L31(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D)
281 #define L32(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + a*16], xmm##A)
283 #define SSE2_QUARTER_ROUND_X8(i, a, b, c, d, e, f, g, h) \
284 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) \
285 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) \
286 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) \
287 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) \
288 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) \
289 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) \
290 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) \
291 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) \
292 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) \
293 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) \
294 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) \
295 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) \
296 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) \
297 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) \
298 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) \
299 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) \
300 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) \
301 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) \
302 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) \
303 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) \
304 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) \
305 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) \
306 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) \
307 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) \
308 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) \
309 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) \
310 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) \
311 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) \
312 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) \
313 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) \
314 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) \
315 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i)
317 #define SSE2_QUARTER_ROUND_X16(i, a, b, c, d, e, f, g, h, A, B, C, D, E, F, G, H) \
318 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) L01(8,9,10,11, A,B,C,D, i) L01(12,13,14,15, E,F,G,H, i) \
319 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) L02(8,9,10,11, A,B,C,D, i) L02(12,13,14,15, E,F,G,H, i) \
320 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) L03(8,9,10,11, A,B,C,D, i) L03(12,13,14,15, E,F,G,H, i) \
321 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) L04(8,9,10,11, A,B,C,D, i) L04(12,13,14,15, E,F,G,H, i) \
322 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) L05(8,9,10,11, A,B,C,D, i) L05(12,13,14,15, E,F,G,H, i) \
323 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) L06(8,9,10,11, A,B,C,D, i) L06(12,13,14,15, E,F,G,H, i) \
324 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) L07(8,9,10,11, A,B,C,D, i) L07(12,13,14,15, E,F,G,H, i) \
325 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) L08(8,9,10,11, A,B,C,D, i) L08(12,13,14,15, E,F,G,H, i) \
326 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) L09(8,9,10,11, A,B,C,D, i) L09(12,13,14,15, E,F,G,H, i) \
327 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) L10(8,9,10,11, A,B,C,D, i) L10(12,13,14,15, E,F,G,H, i) \
328 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) L11(8,9,10,11, A,B,C,D, i) L11(12,13,14,15, E,F,G,H, i) \
329 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) L12(8,9,10,11, A,B,C,D, i) L12(12,13,14,15, E,F,G,H, i) \
330 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) L13(8,9,10,11, A,B,C,D, i) L13(12,13,14,15, E,F,G,H, i) \
331 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) L14(8,9,10,11, A,B,C,D, i) L14(12,13,14,15, E,F,G,H, i) \
332 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) L15(8,9,10,11, A,B,C,D, i) L15(12,13,14,15, E,F,G,H, i) \
333 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) L16(8,9,10,11, A,B,C,D, i) L16(12,13,14,15, E,F,G,H, i) \
334 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) L17(8,9,10,11, A,B,C,D, i) L17(12,13,14,15, E,F,G,H, i) \
335 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) L18(8,9,10,11, A,B,C,D, i) L18(12,13,14,15, E,F,G,H, i) \
336 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) L19(8,9,10,11, A,B,C,D, i) L19(12,13,14,15, E,F,G,H, i) \
337 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) L20(8,9,10,11, A,B,C,D, i) L20(12,13,14,15, E,F,G,H, i) \
338 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) L21(8,9,10,11, A,B,C,D, i) L21(12,13,14,15, E,F,G,H, i) \
339 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) L22(8,9,10,11, A,B,C,D, i) L22(12,13,14,15, E,F,G,H, i) \
340 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) L23(8,9,10,11, A,B,C,D, i) L23(12,13,14,15, E,F,G,H, i) \
341 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) L24(8,9,10,11, A,B,C,D, i) L24(12,13,14,15, E,F,G,H, i) \
342 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) L25(8,9,10,11, A,B,C,D, i) L25(12,13,14,15, E,F,G,H, i) \
343 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) L26(8,9,10,11, A,B,C,D, i) L26(12,13,14,15, E,F,G,H, i) \
344 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) L27(8,9,10,11, A,B,C,D, i) L27(12,13,14,15, E,F,G,H, i) \
345 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) L28(8,9,10,11, A,B,C,D, i) L28(12,13,14,15, E,F,G,H, i) \
346 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) L29(8,9,10,11, A,B,C,D, i) L29(12,13,14,15, E,F,G,H, i) \
347 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) L30(8,9,10,11, A,B,C,D, i) L30(12,13,14,15, E,F,G,H, i) \
348 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) L31(8,9,10,11, A,B,C,D, i) L31(12,13,14,15, E,F,G,H, i) \
349 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) L32(8,9,10,11, A,B,C,D, i) L32(12,13,14,15, E,F,G,H, i)
351 #if CRYPTOPP_BOOL_X64
352 SSE2_QUARTER_ROUND_X16(1, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
354 SSE2_QUARTER_ROUND_X8(1, 2, 6, 10, 14, 3, 7, 11, 15)
355 SSE2_QUARTER_ROUND_X8(1, 0, 4, 8, 12, 1, 5, 9, 13)
357 AS2( mov REG_roundsLeft, REG_rounds)
360 ASL(SSE2_Salsa_Output)
361 AS2( movdqa xmm0, xmm4)
362 AS2( punpckldq xmm4, xmm5)
363 AS2( movdqa xmm1, xmm6)
364 AS2( punpckldq xmm6, xmm7)
365 AS2( movdqa xmm2, xmm4)
366 AS2( punpcklqdq xmm4, xmm6)
367 AS2( punpckhqdq xmm2, xmm6)
368 AS2( punpckhdq xmm0, xmm5)
369 AS2( punpckhdq xmm1, xmm7)
370 AS2( movdqa xmm6, xmm0)
371 AS2( punpcklqdq xmm0, xmm1)
372 AS2( punpckhqdq xmm6, xmm1)
373 AS_XMM_OUTPUT4(SSE2_Salsa_Output_A, REG_input, REG_output, 4, 2, 0, 6, 1, 0, 4, 8, 12, 1)
377 #if CRYPTOPP_BOOL_X64
378 SSE2_QUARTER_ROUND_X16(0, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
380 SSE2_QUARTER_ROUND_X16(0, 0, 13, 10, 7, 1, 14, 11, 4, 2, 15, 8, 5, 3, 12, 9, 6)
382 SSE2_QUARTER_ROUND_X8(0, 2, 6, 10, 14, 3, 7, 11, 15)
383 SSE2_QUARTER_ROUND_X8(0, 0, 4, 8, 12, 1, 5, 9, 13)
385 SSE2_QUARTER_ROUND_X8(0, 2, 15, 8, 5, 3, 12, 9, 6)
386 SSE2_QUARTER_ROUND_X8(0, 0, 13, 10, 7, 1, 14, 11, 4)
388 AS2( sub REG_roundsLeft, 2)
391 #define SSE2_OUTPUT_4(a, b, c, d) \
392 AS2( movdqa xmm4, [SSE2_WORKSPACE + a*16 + 256])\
393 AS2( paddd xmm4, [SSE2_WORKSPACE + a*16])\
394 AS2( movdqa xmm5, [SSE2_WORKSPACE + b*16 + 256])\
395 AS2( paddd xmm5, [SSE2_WORKSPACE + b*16])\
396 AS2( movdqa xmm6, [SSE2_WORKSPACE + c*16 + 256])\
397 AS2( paddd xmm6, [SSE2_WORKSPACE + c*16])\
398 AS2( movdqa xmm7, [SSE2_WORKSPACE + d*16 + 256])\
399 AS2( paddd xmm7, [SSE2_WORKSPACE + d*16])\
400 ASC( call, SSE2_Salsa_Output)
402 SSE2_OUTPUT_4(0, 13, 10, 7)
403 SSE2_OUTPUT_4(4, 1, 14, 11)
404 SSE2_OUTPUT_4(8, 5, 2, 15)
405 SSE2_OUTPUT_4(12, 9, 6, 3)
406 AS2( test REG_input, REG_input)
408 AS2( add REG_input, 12*16)
410 AS2( add REG_output, 12*16)
411 AS2( sub REG_iterationCount, 4)
412 AS2( cmp REG_iterationCount, 4)
417 AS2( sub REG_iterationCount, 1)
419 AS2( movdqa xmm0, [REG_state + 0*16])
420 AS2( movdqa xmm1, [REG_state + 1*16])
421 AS2( movdqa xmm2, [REG_state + 2*16])
422 AS2( movdqa xmm3, [REG_state + 3*16])
423 AS2( mov REG_roundsLeft, REG_rounds)
426 SSE2_QUARTER_ROUND(0, 1, 3, 7)
427 SSE2_QUARTER_ROUND(1, 2, 0, 9)
428 SSE2_QUARTER_ROUND(2, 3, 1, 13)
429 SSE2_QUARTER_ROUND(3, 0, 2, 18)
430 ASS( pshufd xmm1, xmm1, 2, 1, 0, 3)
431 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
432 ASS( pshufd xmm3, xmm3, 0, 3, 2, 1)
433 SSE2_QUARTER_ROUND(0, 3, 1, 7)
434 SSE2_QUARTER_ROUND(3, 2, 0, 9)
435 SSE2_QUARTER_ROUND(2, 1, 3, 13)
436 SSE2_QUARTER_ROUND(1, 0, 2, 18)
437 ASS( pshufd xmm1, xmm1, 0, 3, 2, 1)
438 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
439 ASS( pshufd xmm3, xmm3, 2, 1, 0, 3)
440 AS2( sub REG_roundsLeft, 2)
443 AS2( paddd xmm0, [REG_state + 0*16])
444 AS2( paddd xmm1, [REG_state + 1*16])
445 AS2( paddd xmm2, [REG_state + 2*16])
446 AS2( paddd xmm3, [REG_state + 3*16])
448 AS2( add dword ptr [REG_state + 8*4], 1)
449 AS2( adc dword ptr [REG_state + 5*4], 0)
451 AS2( pcmpeqb xmm6, xmm6)
453 ASS( pshufd xmm7, xmm6, 0, 1, 2, 3)
454 AS2( movdqa xmm4, xmm0)
455 AS2( movdqa xmm5, xmm3)
456 AS2( pand xmm0, xmm7)
457 AS2( pand xmm4, xmm6)
458 AS2( pand xmm3, xmm6)
459 AS2( pand xmm5, xmm7)
461 AS2( movdqa xmm5, xmm1)
462 AS2( pand xmm1, xmm7)
463 AS2( pand xmm5, xmm6)
465 AS2( pand xmm6, xmm2)
466 AS2( pand xmm2, xmm7)
470 AS2( movdqa xmm5, xmm4)
471 AS2( movdqa xmm6, xmm0)
472 AS3( shufpd xmm4, xmm1, 2)
473 AS3( shufpd xmm0, xmm2, 2)
474 AS3( shufpd xmm1, xmm5, 2)
475 AS3( shufpd xmm2, xmm6, 2)
478 AS_XMM_OUTPUT4(SSE2_Salsa_Output_B, REG_input, REG_output, 4, 0, 1, 2, 3, 0, 1, 2, 3, 4)
486 #if CRYPTOPP_BOOL_X64
487 :
"+r" (input),
"+r" (output),
"+r" (iterationCount)
488 :
"r" (m_rounds),
"r" (m_state.m_ptr),
"r" (workspace)
489 :
"%eax",
"%rdx",
"memory",
"cc",
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm4",
"%xmm5",
"%xmm6",
"%xmm7",
"%xmm8",
"%xmm9",
"%xmm10",
"%xmm11",
"%xmm12",
"%xmm13",
"%xmm14",
"%xmm15"
491 :
"+a" (input),
"+D" (output),
"+c" (iterationCount)
492 :
"d" (m_rounds),
"S" (m_state.m_ptr)
497 #ifdef CRYPTOPP_GENERATE_X64_MASM
498 movdqa xmm6, [rsp + 0200h]
499 movdqa xmm7, [rsp + 0210h]
500 movdqa xmm8, [rsp + 0220h]
501 movdqa xmm9, [rsp + 0230h]
502 movdqa xmm10, [rsp + 0240h]
503 movdqa xmm11, [rsp + 0250h]
504 movdqa xmm12, [rsp + 0260h]
505 movdqa xmm13, [rsp + 0270h]
506 movdqa xmm14, [rsp + 0280h]
507 movdqa xmm15, [rsp + 0290h]
508 add rsp, 10*16 + 32*16 + 8
510 Salsa20_OperateKeystream ENDP
516 #ifndef CRYPTOPP_GENERATE_X64_MASM
518 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
520 while (iterationCount--)
522 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
523 x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7];
524 x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11];
525 x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15];
527 for (
int i=m_rounds; i>0; i-=2)
529 #define QUARTER_ROUND(a, b, c, d) \
530 b = b ^ rotlFixed(a + d, 7); \
531 c = c ^ rotlFixed(b + a, 9); \
532 d = d ^ rotlFixed(c + b, 13); \
533 a = a ^ rotlFixed(d + c, 18);
535 QUARTER_ROUND(x0, x4, x8, x12)
536 QUARTER_ROUND(x1, x5, x9, x13)
537 QUARTER_ROUND(x2, x6, x10, x14)
538 QUARTER_ROUND(x3, x7, x11, x15)
540 QUARTER_ROUND(x0, x13, x10, x7)
541 QUARTER_ROUND(x1, x14, x11, x4)
542 QUARTER_ROUND(x2, x15, x8, x5)
543 QUARTER_ROUND(x3, x12, x9, x6)
546 #define SALSA_OUTPUT(x) {\
547 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\
548 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x13 + m_state[13]);\
549 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x10 + m_state[10]);\
550 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x7 + m_state[7]);\
551 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\
552 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x1 + m_state[1]);\
553 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x14 + m_state[14]);\
554 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x11 + m_state[11]);\
555 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\
556 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x5 + m_state[5]);\
557 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x2 + m_state[2]);\
558 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x15 + m_state[15]);\
559 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\
560 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x9 + m_state[9]);\
561 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x6 + m_state[6]);\
562 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x3 + m_state[3]);}
564 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
568 if (++m_state[8] == 0)
578 if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
579 throw InvalidRounds(XSalsa20::StaticAlgorithmName(), m_rounds);
583 memcpy(m_key.begin()+4, m_key.begin(), 16);
586 m_state[0] = 0x61707865;
587 m_state[1] = 0x3320646e;
588 m_state[2] = 0x79622d32;
589 m_state[3] = 0x6b206574;
594 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
597 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
600 get(x14)(x11)(x8)(x5)(m_state[14])(m_state[11]);
602 x13 = m_key[0]; x10 = m_key[1]; x7 = m_key[2]; x4 = m_key[3];
603 x15 = m_key[4]; x12 = m_key[5]; x9 = m_key[6]; x6 = m_key[7];
604 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
606 for (
int i=m_rounds; i>0; i-=2)
608 QUARTER_ROUND(x0, x4, x8, x12)
609 QUARTER_ROUND(x1, x5, x9, x13)
610 QUARTER_ROUND(x2, x6, x10, x14)
611 QUARTER_ROUND(x3, x7, x11, x15)
613 QUARTER_ROUND(x0, x13, x10, x7)
614 QUARTER_ROUND(x1, x14, x11, x4)
615 QUARTER_ROUND(x2, x15, x8, x5)
616 QUARTER_ROUND(x3, x12, x9, x6)
619 m_state[13] = x0; m_state[10] = x1; m_state[7] = x2; m_state[4] = x3;
620 m_state[15] = x14; m_state[12] = x11; m_state[9] = x8; m_state[6] = x5;
621 m_state[8] = m_state[5] = 0;
626 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
Standard names for retrieving values by name when working with NameValuePairs.
virtual unsigned int GetOptimalBlockSize() const
Provides number of ideal bytes to process.
Utility functions for the Crypto++ library.
unsigned int GetAlignment() const
Provides data alignment requirements.
Library configuration file.
#define CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(x, y)
Helper macro to implement OperateKeystream.
byte order is little-endian
void CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
Key the cipher.
int GetIntValueWithDefault(const char *name, int defaultValue) const
Get a named value with type int, with default.
Exception thrown when an invalid number of rounds is encountered.
A::pointer data()
Provides a pointer to the first element in the memory block.
void CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length)
Resynchronize the cipher.
Safely right shift values when undefined behavior could occur.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Functions for CPU features and intrinsics.
Classes for Salsa and Salsa20 stream ciphers.
const char * IV()
ConstByteArrayParameter, also accepts const byte * for backwards compatibility.
bool HasSSE2()
Determines SSE2 availability.
Access a block of memory.
KeystreamOperation
Keystream operation flags.
SymmetricCipher implementation.
Interface for retrieving values given their names.