20 #if (CRYPTOPP_SSSE3_AVAILABLE) 21 # include <pmmintrin.h> 22 # include <tmmintrin.h> 25 #if (CRYPTOPP_SSE41_AVAILABLE) 26 # include <smmintrin.h> 30 # include <ammintrin.h> 33 #if defined(__AVX512F__) && defined(__AVX512VL__) 34 # define CRYPTOPP_AVX512_ROTATE 1 35 # include <immintrin.h> 38 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 39 # include <arm_neon.h> 44 #if (CRYPTOPP_ARM_ACLE_AVAILABLE) 46 # include <arm_acle.h> 49 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) 54 extern const char SPECK64_SIMD_FNAME[] = __FILE__;
56 ANONYMOUS_NAMESPACE_BEGIN
59 using CryptoPP::word32;
60 using CryptoPP::word64;
64 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 67 inline T UnpackHigh32(
const T& a,
const T& b)
69 const uint32x2_t x(vget_high_u32((uint32x4_t)a));
70 const uint32x2_t y(vget_high_u32((uint32x4_t)b));
71 const uint32x2x2_t r = vzip_u32(x, y);
72 return (T)vcombine_u32(r.val[0], r.val[1]);
76 inline T UnpackLow32(
const T& a,
const T& b)
78 const uint32x2_t x(vget_low_u32((uint32x4_t)a));
79 const uint32x2_t y(vget_low_u32((uint32x4_t)b));
80 const uint32x2x2_t r = vzip_u32(x, y);
81 return (T)vcombine_u32(r.val[0], r.val[1]);
84 template <
unsigned int R>
85 inline uint32x4_t RotateLeft32(
const uint32x4_t& val)
87 const uint32x4_t a(vshlq_n_u32(val, R));
88 const uint32x4_t b(vshrq_n_u32(val, 32 - R));
89 return vorrq_u32(a, b);
92 template <
unsigned int R>
93 inline uint32x4_t RotateRight32(
const uint32x4_t& val)
95 const uint32x4_t a(vshlq_n_u32(val, 32 - R));
96 const uint32x4_t b(vshrq_n_u32(val, R));
97 return vorrq_u32(a, b);
100 #if defined(__aarch32__) || defined(__aarch64__) 103 inline uint32x4_t RotateLeft32<8>(
const uint32x4_t& val)
105 #if (CRYPTOPP_BIG_ENDIAN) 106 const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 };
107 const uint8x16_t mask = vld1q_u8(maskb);
109 const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
110 const uint8x16_t mask = vld1q_u8(maskb);
113 return vreinterpretq_u32_u8(
114 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
119 inline uint32x4_t RotateRight32<8>(
const uint32x4_t& val)
121 #if (CRYPTOPP_BIG_ENDIAN) 122 const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 };
123 const uint8x16_t mask = vld1q_u8(maskb);
125 const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,15,12 };
126 const uint8x16_t mask = vld1q_u8(maskb);
129 return vreinterpretq_u32_u8(
130 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
132 #endif // Aarch32 or Aarch64 134 inline void SPECK64_Enc_Block(uint32x4_t &block0, uint32x4_t &block1,
135 const word32 *subkeys,
unsigned int rounds)
138 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
139 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
141 for (
int i=0; i < static_cast<int>(rounds); ++i)
143 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
145 x1 = RotateRight32<8>(x1);
146 x1 = vaddq_u32(x1, y1);
147 x1 = veorq_u32(x1, rk);
148 y1 = RotateLeft32<3>(y1);
149 y1 = veorq_u32(y1, x1);
153 block0 = UnpackLow32(y1, x1);
154 block1 = UnpackHigh32(y1, x1);
157 inline void SPECK64_Dec_Block(uint32x4_t &block0, uint32x4_t &block1,
158 const word32 *subkeys,
unsigned int rounds)
161 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
162 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
164 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
166 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
168 y1 = veorq_u32(y1, x1);
169 y1 = RotateRight32<3>(y1);
170 x1 = veorq_u32(x1, rk);
171 x1 = vsubq_u32(x1, y1);
172 x1 = RotateLeft32<8>(x1);
176 block0 = UnpackLow32(y1, x1);
177 block1 = UnpackHigh32(y1, x1);
180 inline void SPECK64_Enc_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
181 uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
182 const word32 *subkeys,
unsigned int rounds)
185 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
186 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
187 uint32x4_t x2 = vuzpq_u32(block2, block3).val[1];
188 uint32x4_t y2 = vuzpq_u32(block2, block3).val[0];
189 uint32x4_t x3 = vuzpq_u32(block4, block5).val[1];
190 uint32x4_t y3 = vuzpq_u32(block4, block5).val[0];
192 for (
int i=0; i < static_cast<int>(rounds); ++i)
194 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
196 x1 = RotateRight32<8>(x1);
197 x2 = RotateRight32<8>(x2);
198 x3 = RotateRight32<8>(x3);
199 x1 = vaddq_u32(x1, y1);
200 x2 = vaddq_u32(x2, y2);
201 x3 = vaddq_u32(x3, y3);
202 x1 = veorq_u32(x1, rk);
203 x2 = veorq_u32(x2, rk);
204 x3 = veorq_u32(x3, rk);
205 y1 = RotateLeft32<3>(y1);
206 y2 = RotateLeft32<3>(y2);
207 y3 = RotateLeft32<3>(y3);
208 y1 = veorq_u32(y1, x1);
209 y2 = veorq_u32(y2, x2);
210 y3 = veorq_u32(y3, x3);
214 block0 = UnpackLow32(y1, x1);
215 block1 = UnpackHigh32(y1, x1);
216 block2 = UnpackLow32(y2, x2);
217 block3 = UnpackHigh32(y2, x2);
218 block4 = UnpackLow32(y3, x3);
219 block5 = UnpackHigh32(y3, x3);
222 inline void SPECK64_Dec_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
223 uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
224 const word32 *subkeys,
unsigned int rounds)
227 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
228 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
229 uint32x4_t x2 = vuzpq_u32(block2, block3).val[1];
230 uint32x4_t y2 = vuzpq_u32(block2, block3).val[0];
231 uint32x4_t x3 = vuzpq_u32(block4, block5).val[1];
232 uint32x4_t y3 = vuzpq_u32(block4, block5).val[0];
234 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
236 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
238 y1 = veorq_u32(y1, x1);
239 y2 = veorq_u32(y2, x2);
240 y3 = veorq_u32(y3, x3);
241 y1 = RotateRight32<3>(y1);
242 y2 = RotateRight32<3>(y2);
243 y3 = RotateRight32<3>(y3);
244 x1 = veorq_u32(x1, rk);
245 x2 = veorq_u32(x2, rk);
246 x3 = veorq_u32(x3, rk);
247 x1 = vsubq_u32(x1, y1);
248 x2 = vsubq_u32(x2, y2);
249 x3 = vsubq_u32(x3, y3);
250 x1 = RotateLeft32<8>(x1);
251 x2 = RotateLeft32<8>(x2);
252 x3 = RotateLeft32<8>(x3);
256 block0 = UnpackLow32(y1, x1);
257 block1 = UnpackHigh32(y1, x1);
258 block2 = UnpackLow32(y2, x2);
259 block3 = UnpackHigh32(y2, x2);
260 block4 = UnpackLow32(y3, x3);
261 block5 = UnpackHigh32(y3, x3);
264 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 268 #if defined(CRYPTOPP_SSE41_AVAILABLE) 270 template <
unsigned int R>
271 inline __m128i RotateLeft32(
const __m128i& val)
274 return _mm_roti_epi32(val, R);
277 _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
281 template <
unsigned int R>
282 inline __m128i RotateRight32(
const __m128i& val)
285 return _mm_roti_epi32(val, 32-R);
288 _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
294 __m128i RotateLeft32<8>(
const __m128i& val)
297 return _mm_roti_epi32(val, 8);
299 const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
300 return _mm_shuffle_epi8(val, mask);
306 __m128i RotateRight32<8>(
const __m128i& val)
309 return _mm_roti_epi32(val, 32-8);
311 const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
312 return _mm_shuffle_epi8(val, mask);
316 inline void SPECK64_Enc_Block(__m128i &block0, __m128i &block1,
317 const word32 *subkeys,
unsigned int rounds)
320 const __m128 t0 = _mm_castsi128_ps(block0);
321 const __m128 t1 = _mm_castsi128_ps(block1);
322 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
323 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
325 for (
int i=0; i < static_cast<int>(rounds); ++i)
327 const __m128i rk = _mm_set1_epi32(subkeys[i]);
329 x1 = RotateRight32<8>(x1);
330 x1 = _mm_add_epi32(x1, y1);
331 x1 = _mm_xor_si128(x1, rk);
332 y1 = RotateLeft32<3>(y1);
333 y1 = _mm_xor_si128(y1, x1);
338 block0 = _mm_unpacklo_epi32(y1, x1);
339 block1 = _mm_unpackhi_epi32(y1, x1);
342 inline void SPECK64_Dec_Block(__m128i &block0, __m128i &block1,
343 const word32 *subkeys,
unsigned int rounds)
346 const __m128 t0 = _mm_castsi128_ps(block0);
347 const __m128 t1 = _mm_castsi128_ps(block1);
348 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
349 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
351 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
353 const __m128i rk = _mm_set1_epi32(subkeys[i]);
355 y1 = _mm_xor_si128(y1, x1);
356 y1 = RotateRight32<3>(y1);
357 x1 = _mm_xor_si128(x1, rk);
358 x1 = _mm_sub_epi32(x1, y1);
359 x1 = RotateLeft32<8>(x1);
364 block0 = _mm_unpacklo_epi32(y1, x1);
365 block1 = _mm_unpackhi_epi32(y1, x1);
368 inline void SPECK64_Enc_6_Blocks(__m128i &block0, __m128i &block1,
369 __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
370 const word32 *subkeys,
unsigned int rounds)
373 const __m128 t0 = _mm_castsi128_ps(block0);
374 const __m128 t1 = _mm_castsi128_ps(block1);
375 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
376 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
378 const __m128 t2 = _mm_castsi128_ps(block2);
379 const __m128 t3 = _mm_castsi128_ps(block3);
380 __m128i x2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(3,1,3,1)));
381 __m128i y2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(2,0,2,0)));
383 const __m128 t4 = _mm_castsi128_ps(block4);
384 const __m128 t5 = _mm_castsi128_ps(block5);
385 __m128i x3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(3,1,3,1)));
386 __m128i y3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(2,0,2,0)));
388 for (
int i=0; i < static_cast<int>(rounds); ++i)
390 const __m128i rk = _mm_set1_epi32(subkeys[i]);
392 x1 = RotateRight32<8>(x1);
393 x2 = RotateRight32<8>(x2);
394 x3 = RotateRight32<8>(x3);
395 x1 = _mm_add_epi32(x1, y1);
396 x2 = _mm_add_epi32(x2, y2);
397 x3 = _mm_add_epi32(x3, y3);
398 x1 = _mm_xor_si128(x1, rk);
399 x2 = _mm_xor_si128(x2, rk);
400 x3 = _mm_xor_si128(x3, rk);
401 y1 = RotateLeft32<3>(y1);
402 y2 = RotateLeft32<3>(y2);
403 y3 = RotateLeft32<3>(y3);
404 y1 = _mm_xor_si128(y1, x1);
405 y2 = _mm_xor_si128(y2, x2);
406 y3 = _mm_xor_si128(y3, x3);
411 block0 = _mm_unpacklo_epi32(y1, x1);
412 block1 = _mm_unpackhi_epi32(y1, x1);
413 block2 = _mm_unpacklo_epi32(y2, x2);
414 block3 = _mm_unpackhi_epi32(y2, x2);
415 block4 = _mm_unpacklo_epi32(y3, x3);
416 block5 = _mm_unpackhi_epi32(y3, x3);
419 inline void SPECK64_Dec_6_Blocks(__m128i &block0, __m128i &block1,
420 __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
421 const word32 *subkeys,
unsigned int rounds)
424 const __m128 t0 = _mm_castsi128_ps(block0);
425 const __m128 t1 = _mm_castsi128_ps(block1);
426 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
427 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
429 const __m128 t2 = _mm_castsi128_ps(block2);
430 const __m128 t3 = _mm_castsi128_ps(block3);
431 __m128i x2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(3,1,3,1)));
432 __m128i y2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(2,0,2,0)));
434 const __m128 t4 = _mm_castsi128_ps(block4);
435 const __m128 t5 = _mm_castsi128_ps(block5);
436 __m128i x3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(3,1,3,1)));
437 __m128i y3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(2,0,2,0)));
439 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
441 const __m128i rk = _mm_set1_epi32(subkeys[i]);
443 y1 = _mm_xor_si128(y1, x1);
444 y2 = _mm_xor_si128(y2, x2);
445 y3 = _mm_xor_si128(y3, x3);
446 y1 = RotateRight32<3>(y1);
447 y2 = RotateRight32<3>(y2);
448 y3 = RotateRight32<3>(y3);
449 x1 = _mm_xor_si128(x1, rk);
450 x2 = _mm_xor_si128(x2, rk);
451 x3 = _mm_xor_si128(x3, rk);
452 x1 = _mm_sub_epi32(x1, y1);
453 x2 = _mm_sub_epi32(x2, y2);
454 x3 = _mm_sub_epi32(x3, y3);
455 x1 = RotateLeft32<8>(x1);
456 x2 = RotateLeft32<8>(x2);
457 x3 = RotateLeft32<8>(x3);
462 block0 = _mm_unpacklo_epi32(y1, x1);
463 block1 = _mm_unpackhi_epi32(y1, x1);
464 block2 = _mm_unpacklo_epi32(y2, x2);
465 block3 = _mm_unpackhi_epi32(y2, x2);
466 block4 = _mm_unpacklo_epi32(y3, x3);
467 block5 = _mm_unpackhi_epi32(y3, x3);
470 #endif // CRYPTOPP_SSE41_AVAILABLE 474 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) 485 template<
unsigned int C>
489 return vec_rl(val, m);
493 template<
unsigned int C>
496 const uint32x4_p m = {32-C, 32-C, 32-C, 32-C};
497 return vec_rl(val, m);
501 const word32 *subkeys,
unsigned int rounds)
503 #if (CRYPTOPP_BIG_ENDIAN) 504 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
505 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
507 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
508 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
515 for (
int i=0; i < static_cast<int>(rounds); ++i)
517 #if CRYPTOPP_POWER7_AVAILABLE 521 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
526 x1 = RotateRight32<8>(x1);
530 y1 = RotateLeft32<3>(y1);
534 #if (CRYPTOPP_BIG_ENDIAN) 535 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
536 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
538 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
539 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
548 const word32 *subkeys,
unsigned int rounds)
550 #if (CRYPTOPP_BIG_ENDIAN) 551 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
552 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
554 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
555 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
562 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
564 #if CRYPTOPP_POWER7_AVAILABLE 568 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
574 y1 = RotateRight32<3>(y1);
578 x1 = RotateLeft32<8>(x1);
581 #if (CRYPTOPP_BIG_ENDIAN) 582 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
583 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
585 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
586 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
596 uint32x4_p &block5,
const word32 *subkeys,
unsigned int rounds)
598 #if (CRYPTOPP_BIG_ENDIAN) 599 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
600 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
602 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
603 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
614 for (
int i=0; i < static_cast<int>(rounds); ++i)
616 #if CRYPTOPP_POWER7_AVAILABLE 620 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
625 x1 = RotateRight32<8>(x1);
626 x2 = RotateRight32<8>(x2);
627 x3 = RotateRight32<8>(x3);
637 y1 = RotateLeft32<3>(y1);
638 y2 = RotateLeft32<3>(y2);
639 y3 = RotateLeft32<3>(y3);
646 #if (CRYPTOPP_BIG_ENDIAN) 647 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
648 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
650 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
651 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
665 uint32x4_p &block5,
const word32 *subkeys,
unsigned int rounds)
667 #if (CRYPTOPP_BIG_ENDIAN) 668 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
669 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
671 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
672 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
683 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
685 #if CRYPTOPP_POWER7_AVAILABLE 689 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
698 y1 = RotateRight32<3>(y1);
699 y2 = RotateRight32<3>(y2);
700 y3 = RotateRight32<3>(y3);
710 x1 = RotateLeft32<8>(x1);
711 x2 = RotateLeft32<8>(x2);
712 x3 = RotateLeft32<8>(x3);
715 #if (CRYPTOPP_BIG_ENDIAN) 716 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
717 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
719 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
720 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
732 #endif // CRYPTOPP_ALTIVEC_AVAILABLE 734 ANONYMOUS_NAMESPACE_END
742 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 743 size_t SPECK64_Enc_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
744 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
746 return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
747 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
750 size_t SPECK64_Dec_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
751 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
753 return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
754 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
760 #if defined(CRYPTOPP_SSE41_AVAILABLE) 761 size_t SPECK64_Enc_AdvancedProcessBlocks_SSE41(
const word32* subKeys,
size_t rounds,
762 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
764 return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
765 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
768 size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(
const word32* subKeys,
size_t rounds,
769 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
771 return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
772 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
778 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) 779 size_t SPECK64_Enc_AdvancedProcessBlocks_ALTIVEC(
const word32* subKeys,
size_t rounds,
780 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
782 return AdvancedProcessBlocks64_6x2_ALTIVEC(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
783 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
786 size_t SPECK64_Dec_AdvancedProcessBlocks_ALTIVEC(
const word32* subKeys,
size_t rounds,
787 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
789 return AdvancedProcessBlocks64_6x2_ALTIVEC(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
790 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
Utility functions for the Crypto++ library.
T1 VecSub(const T1 vec1, const T2 vec2)
Subtract two vectors.
Library configuration file.
T1 VecAdd(const T1 vec1, const T2 vec2)
Add two vectors.
T1 VecPermute(const T1 vec, const T2 mask)
Permutes a vector.
__vector unsigned int uint32x4_p
Vector of 32-bit elements.
Support functions for PowerPC and vector operations.
Template for AdvancedProcessBlocks and SIMD processing.
Classes for the Speck block cipher.
T1 VecXor(const T1 vec1, const T2 vec2)
XOR two vectors.
Crypto++ library namespace.
uint32x4_p VecLoad(const byte src[16])
Loads a vector from a byte array.
__vector unsigned char uint8x16_p
Vector of 8-bit elements.