20 #if (CRYPTOPP_SSSE3_AVAILABLE) 21 # include <pmmintrin.h> 22 # include <tmmintrin.h> 26 # include <ammintrin.h> 29 #if defined(__AVX512F__) && defined(__AVX512VL__) 30 # define CRYPTOPP_AVX512_ROTATE 1 31 # include <immintrin.h> 34 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 35 # include <arm_neon.h> 40 #if (CRYPTOPP_ARM_ACLE_AVAILABLE) 42 # include <arm_acle.h> 59 #undef CRYPTOPP_POWER8_AVAILABLE 60 #if defined(CRYPTOPP_POWER8_AVAILABLE) 65 extern const char LEA_SIMD_FNAME[] = __FILE__;
67 ANONYMOUS_NAMESPACE_BEGIN
69 using CryptoPP::word32;
73 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 75 inline uint32x4_t Xor(
const uint32x4_t& a,
const uint32x4_t& b)
77 return veorq_u32(a, b);
80 inline uint32x4_t Add(
const uint32x4_t& a,
const uint32x4_t& b)
82 return vaddq_u32(a, b);
85 inline uint32x4_t Sub(
const uint32x4_t& a,
const uint32x4_t& b)
87 return vsubq_u32(a, b);
90 template <
unsigned int R>
91 inline uint32x4_t RotateLeft(
const uint32x4_t& val)
93 const uint32x4_t a(vshlq_n_u32(val, R));
94 const uint32x4_t b(vshrq_n_u32(val, 32 - R));
95 return vorrq_u32(a, b);
98 template <
unsigned int R>
99 inline uint32x4_t RotateRight(
const uint32x4_t& val)
101 const uint32x4_t a(vshlq_n_u32(val, 32 - R));
102 const uint32x4_t b(vshrq_n_u32(val, R));
103 return vorrq_u32(a, b);
106 #if defined(__aarch32__) || defined(__aarch64__) 108 inline uint32x4_t RotateLeft<8>(
const uint32x4_t& val)
110 #if (CRYPTOPP_BIG_ENDIAN) 111 const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 };
112 const uint8x16_t mask = vld1q_u8(maskb);
114 const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
115 const uint8x16_t mask = vld1q_u8(maskb);
118 return vreinterpretq_u32_u8(
119 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
123 inline uint32x4_t RotateRight<8>(
const uint32x4_t& val)
125 #if (CRYPTOPP_BIG_ENDIAN) 126 const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 };
127 const uint8x16_t mask = vld1q_u8(maskb);
129 const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,14,12 };
130 const uint8x16_t mask = vld1q_u8(maskb);
133 return vreinterpretq_u32_u8(
134 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
138 uint32x4_t UnpackLow32(uint32x4_t a, uint32x4_t b)
140 uint32x2_t a1 = vget_low_u32(a);
141 uint32x2_t b1 = vget_low_u32(b);
142 uint32x2x2_t result = vzip_u32(a1, b1);
143 return vcombine_u32(result.val[0], result.val[1]);
146 uint32x4_t UnpackHigh32(uint32x4_t a, uint32x4_t b)
148 uint32x2_t a1 = vget_high_u32(a);
149 uint32x2_t b1 = vget_high_u32(b);
150 uint32x2x2_t result = vzip_u32(a1, b1);
151 return vcombine_u32(result.val[0], result.val[1]);
154 uint32x4_t UnpackLow64(uint32x4_t a, uint32x4_t b)
156 uint64x1_t a1 = vget_low_u64((uint64x2_t)a);
157 uint64x1_t b1 = vget_low_u64((uint64x2_t)b);
158 return (uint32x4_t)vcombine_u64(a1, b1);
161 uint32x4_t UnpackHigh64(uint32x4_t a, uint32x4_t b)
163 uint64x1_t a1 = vget_high_u64((uint64x2_t)a);
164 uint64x1_t b1 = vget_high_u64((uint64x2_t)b);
165 return (uint32x4_t)vcombine_u64(a1, b1);
168 template <
unsigned int IDX>
169 inline uint32x4_t LoadKey(
const word32 rkey[])
171 return vdupq_n_u32(rkey[IDX]);
174 template <
unsigned int IDX>
175 inline uint32x4_t UnpackNEON(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
179 return vmovq_n_u32(0);
183 inline uint32x4_t UnpackNEON<0>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
185 const uint32x4_t r1 = UnpackLow32(a, b);
186 const uint32x4_t r2 = UnpackLow32(c, d);
187 return UnpackLow64(r1, r2);
191 inline uint32x4_t UnpackNEON<1>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
193 const uint32x4_t r1 = UnpackLow32(a, b);
194 const uint32x4_t r2 = UnpackLow32(c, d);
195 return UnpackHigh64(r1, r2);
199 inline uint32x4_t UnpackNEON<2>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
201 const uint32x4_t r1 = UnpackHigh32(a, b);
202 const uint32x4_t r2 = UnpackHigh32(c, d);
203 return UnpackLow64(r1, r2);
207 inline uint32x4_t UnpackNEON<3>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
209 const uint32x4_t r1 = UnpackHigh32(a, b);
210 const uint32x4_t r2 = UnpackHigh32(c, d);
211 return UnpackHigh64(r1, r2);
214 template <
unsigned int IDX>
215 inline uint32x4_t UnpackNEON(
const uint32x4_t& v)
219 return vmovq_n_u32(0);
223 inline uint32x4_t UnpackNEON<0>(
const uint32x4_t& v)
226 return vdupq_n_u32(vgetq_lane_u32(v, 0));
230 inline uint32x4_t UnpackNEON<1>(
const uint32x4_t& v)
233 return vdupq_n_u32(vgetq_lane_u32(v, 1));
237 inline uint32x4_t UnpackNEON<2>(
const uint32x4_t& v)
240 return vdupq_n_u32(vgetq_lane_u32(v, 2));
244 inline uint32x4_t UnpackNEON<3>(
const uint32x4_t& v)
247 return vdupq_n_u32(vgetq_lane_u32(v, 3));
250 template <
unsigned int IDX>
251 inline uint32x4_t RepackNEON(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
253 return UnpackNEON<IDX>(a, b, c, d);
256 template <
unsigned int IDX>
257 inline uint32x4_t RepackNEON(
const uint32x4_t& v)
259 return UnpackNEON<IDX>(v);
262 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 266 #if (CRYPTOPP_SSSE3_AVAILABLE) 268 inline __m128i Xor(
const __m128i& a,
const __m128i& b)
270 return _mm_xor_si128(a, b);
273 inline __m128i Add(
const __m128i& a,
const __m128i& b)
275 return _mm_add_epi32(a, b);
278 inline __m128i Sub(
const __m128i& a,
const __m128i& b)
280 return _mm_sub_epi32(a, b);
283 template <
unsigned int R>
284 inline __m128i RotateLeft(
const __m128i& val)
287 return _mm_roti_epi32(val, R);
290 _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
294 template <
unsigned int R>
295 inline __m128i RotateRight(
const __m128i& val)
298 return _mm_roti_epi32(val, 32-R);
301 _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
307 inline __m128i RotateLeft<8>(
const __m128i& val)
310 return _mm_roti_epi32(val, 8);
312 const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
313 return _mm_shuffle_epi8(val, mask);
319 inline __m128i RotateRight<8>(
const __m128i& val)
322 return _mm_roti_epi32(val, 32-8);
324 const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
325 return _mm_shuffle_epi8(val, mask);
329 template <
unsigned int IDX>
330 inline __m128i LoadKey(
const word32 rkey[])
332 float rk; std::memcpy(&rk, rkey+IDX,
sizeof(rk));
333 return _mm_castps_si128(_mm_load_ps1(&rk));
336 template <
unsigned int IDX>
337 inline __m128i UnpackXMM(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
340 CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
341 CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
343 return _mm_setzero_si128();
347 inline __m128i UnpackXMM<0>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
350 const __m128i r1 = _mm_unpacklo_epi32(a, b);
351 const __m128i r2 = _mm_unpacklo_epi32(c, d);
352 return _mm_unpacklo_epi64(r1, r2);
356 inline __m128i UnpackXMM<1>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
359 const __m128i r1 = _mm_unpacklo_epi32(a, b);
360 const __m128i r2 = _mm_unpacklo_epi32(c, d);
361 return _mm_unpackhi_epi64(r1, r2);
365 inline __m128i UnpackXMM<2>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
368 const __m128i r1 = _mm_unpackhi_epi32(a, b);
369 const __m128i r2 = _mm_unpackhi_epi32(c, d);
370 return _mm_unpacklo_epi64(r1, r2);
374 inline __m128i UnpackXMM<3>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
377 const __m128i r1 = _mm_unpackhi_epi32(a, b);
378 const __m128i r2 = _mm_unpackhi_epi32(c, d);
379 return _mm_unpackhi_epi64(r1, r2);
382 template <
unsigned int IDX>
383 inline __m128i UnpackXMM(
const __m128i& v)
387 return _mm_setzero_si128();
391 inline __m128i UnpackXMM<0>(
const __m128i& v)
394 return _mm_shuffle_epi8(v, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
398 inline __m128i UnpackXMM<1>(
const __m128i& v)
401 return _mm_shuffle_epi8(v, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
405 inline __m128i UnpackXMM<2>(
const __m128i& v)
408 return _mm_shuffle_epi8(v, _mm_set_epi8(11,10,9,8, 11,10,9,8, 11,10,9,8, 11,10,9,8));
412 inline __m128i UnpackXMM<3>(
const __m128i& v)
415 return _mm_shuffle_epi8(v, _mm_set_epi8(15,14,13,12, 15,14,13,12, 15,14,13,12, 15,14,13,12));
418 template <
unsigned int IDX>
419 inline __m128i RepackXMM(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
421 return UnpackXMM<IDX>(a, b, c, d);
424 template <
unsigned int IDX>
425 inline __m128i RepackXMM(
const __m128i& v)
427 return UnpackXMM<IDX>(v);
430 #endif // CRYPTOPP_SSSE3_AVAILABLE 434 #if (CRYPTOPP_POWER8_AVAILABLE) 455 template <
unsigned int R>
459 return vec_rl(val, m);
462 template <
unsigned int R>
465 const uint32x4_p m = {32-R, 32-R, 32-R, 32-R};
466 return vec_rl(val, m);
469 template <
unsigned int IDX>
470 inline uint32x4_p LoadKey(
const word32 rkey[])
472 return vec_splats(rkey[IDX]);
475 template <
unsigned int IDX>
479 CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
480 CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
517 template <
unsigned int IDX>
529 const uint8x16_p m = {3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0};
537 const uint8x16_p m = {7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4};
545 const uint8x16_p m = {11,10,9,8, 11,10,9,8, 11,10,9,8, 11,10,9,8};
553 const uint8x16_p m = {15,14,13,12, 15,14,13,12, 15,14,13,12, 15,14,13,12};
557 template <
unsigned int IDX>
560 return UnpackSIMD<IDX>(a, b, c, d);
563 template <
unsigned int IDX>
566 return UnpackSIMD<IDX>(v);
569 #endif // CRYPTOPP_POWER8_AVAILABLE 573 #if (CRYPTOPP_ARM_NEON_AVAILABLE || CRYPTOPP_SSSE3_AVAILABLE) 576 inline void LEA_Encryption(W temp[4],
const word32 *subkeys,
unsigned int rounds)
578 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<4>(subkeys)), Xor(temp[3], LoadKey<5>(subkeys))));
579 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<2>(subkeys)), Xor(temp[2], LoadKey<3>(subkeys))));
580 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<0>(subkeys)), Xor(temp[1], LoadKey<1>(subkeys))));
581 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<10>(subkeys)), Xor(temp[0], LoadKey<11>(subkeys))));
582 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<8>(subkeys)), Xor(temp[3], LoadKey<9>(subkeys))));
583 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<6>(subkeys)), Xor(temp[2], LoadKey<7>(subkeys))));
584 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<16>(subkeys)), Xor(temp[1], LoadKey<17>(subkeys))));
585 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<14>(subkeys)), Xor(temp[0], LoadKey<15>(subkeys))));
586 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<12>(subkeys)), Xor(temp[3], LoadKey<13>(subkeys))));
587 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<22>(subkeys)), Xor(temp[2], LoadKey<23>(subkeys))));
588 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<20>(subkeys)), Xor(temp[1], LoadKey<21>(subkeys))));
589 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<18>(subkeys)), Xor(temp[0], LoadKey<19>(subkeys))));
591 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<28>(subkeys)), Xor(temp[3], LoadKey<29>(subkeys))));
592 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<26>(subkeys)), Xor(temp[2], LoadKey<27>(subkeys))));
593 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<24>(subkeys)), Xor(temp[1], LoadKey<25>(subkeys))));
594 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<34>(subkeys)), Xor(temp[0], LoadKey<35>(subkeys))));
595 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<32>(subkeys)), Xor(temp[3], LoadKey<33>(subkeys))));
596 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<30>(subkeys)), Xor(temp[2], LoadKey<31>(subkeys))));
597 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<40>(subkeys)), Xor(temp[1], LoadKey<41>(subkeys))));
598 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<38>(subkeys)), Xor(temp[0], LoadKey<39>(subkeys))));
599 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<36>(subkeys)), Xor(temp[3], LoadKey<37>(subkeys))));
600 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<46>(subkeys)), Xor(temp[2], LoadKey<47>(subkeys))));
601 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<44>(subkeys)), Xor(temp[1], LoadKey<45>(subkeys))));
602 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<42>(subkeys)), Xor(temp[0], LoadKey<43>(subkeys))));
604 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<52>(subkeys)), Xor(temp[3], LoadKey<53>(subkeys))));
605 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<50>(subkeys)), Xor(temp[2], LoadKey<51>(subkeys))));
606 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<48>(subkeys)), Xor(temp[1], LoadKey<49>(subkeys))));
607 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<58>(subkeys)), Xor(temp[0], LoadKey<59>(subkeys))));
608 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<56>(subkeys)), Xor(temp[3], LoadKey<57>(subkeys))));
609 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<54>(subkeys)), Xor(temp[2], LoadKey<55>(subkeys))));
610 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<64>(subkeys)), Xor(temp[1], LoadKey<65>(subkeys))));
611 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<62>(subkeys)), Xor(temp[0], LoadKey<63>(subkeys))));
612 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<60>(subkeys)), Xor(temp[3], LoadKey<61>(subkeys))));
613 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<70>(subkeys)), Xor(temp[2], LoadKey<71>(subkeys))));
614 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<68>(subkeys)), Xor(temp[1], LoadKey<69>(subkeys))));
615 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<66>(subkeys)), Xor(temp[0], LoadKey<67>(subkeys))));
617 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<76>(subkeys)), Xor(temp[3], LoadKey<77>(subkeys))));
618 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<74>(subkeys)), Xor(temp[2], LoadKey<75>(subkeys))));
619 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<72>(subkeys)), Xor(temp[1], LoadKey<73>(subkeys))));
620 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<82>(subkeys)), Xor(temp[0], LoadKey<83>(subkeys))));
621 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<80>(subkeys)), Xor(temp[3], LoadKey<81>(subkeys))));
622 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<78>(subkeys)), Xor(temp[2], LoadKey<79>(subkeys))));
623 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<88>(subkeys)), Xor(temp[1], LoadKey<89>(subkeys))));
624 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<86>(subkeys)), Xor(temp[0], LoadKey<87>(subkeys))));
625 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<84>(subkeys)), Xor(temp[3], LoadKey<85>(subkeys))));
626 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<94>(subkeys)), Xor(temp[2], LoadKey<95>(subkeys))));
627 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<92>(subkeys)), Xor(temp[1], LoadKey<93>(subkeys))));
628 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<90>(subkeys)), Xor(temp[0], LoadKey<91>(subkeys))));
630 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<100>(subkeys)), Xor(temp[3], LoadKey<101>(subkeys))));
631 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<98>(subkeys)), Xor(temp[2], LoadKey<99>(subkeys))));
632 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<96>(subkeys)), Xor(temp[1], LoadKey<97>(subkeys))));
633 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<106>(subkeys)), Xor(temp[0], LoadKey<107>(subkeys))));
634 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<104>(subkeys)), Xor(temp[3], LoadKey<105>(subkeys))));
635 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<102>(subkeys)), Xor(temp[2], LoadKey<103>(subkeys))));
636 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<112>(subkeys)), Xor(temp[1], LoadKey<113>(subkeys))));
637 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<110>(subkeys)), Xor(temp[0], LoadKey<111>(subkeys))));
638 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<108>(subkeys)), Xor(temp[3], LoadKey<109>(subkeys))));
639 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<118>(subkeys)), Xor(temp[2], LoadKey<119>(subkeys))));
640 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<116>(subkeys)), Xor(temp[1], LoadKey<117>(subkeys))));
641 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<114>(subkeys)), Xor(temp[0], LoadKey<115>(subkeys))));
643 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<124>(subkeys)), Xor(temp[3], LoadKey<125>(subkeys))));
644 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<122>(subkeys)), Xor(temp[2], LoadKey<123>(subkeys))));
645 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<120>(subkeys)), Xor(temp[1], LoadKey<121>(subkeys))));
646 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<130>(subkeys)), Xor(temp[0], LoadKey<131>(subkeys))));
647 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<128>(subkeys)), Xor(temp[3], LoadKey<129>(subkeys))));
648 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<126>(subkeys)), Xor(temp[2], LoadKey<127>(subkeys))));
649 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<136>(subkeys)), Xor(temp[1], LoadKey<137>(subkeys))));
650 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<134>(subkeys)), Xor(temp[0], LoadKey<135>(subkeys))));
651 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<132>(subkeys)), Xor(temp[3], LoadKey<133>(subkeys))));
652 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<142>(subkeys)), Xor(temp[2], LoadKey<143>(subkeys))));
653 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<140>(subkeys)), Xor(temp[1], LoadKey<141>(subkeys))));
654 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<138>(subkeys)), Xor(temp[0], LoadKey<139>(subkeys))));
658 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<148>(subkeys)), Xor(temp[3], LoadKey<149>(subkeys))));
659 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<146>(subkeys)), Xor(temp[2], LoadKey<147>(subkeys))));
660 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<144>(subkeys)), Xor(temp[1], LoadKey<145>(subkeys))));
661 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<154>(subkeys)), Xor(temp[0], LoadKey<155>(subkeys))));
662 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<152>(subkeys)), Xor(temp[3], LoadKey<153>(subkeys))));
663 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<150>(subkeys)), Xor(temp[2], LoadKey<151>(subkeys))));
664 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<160>(subkeys)), Xor(temp[1], LoadKey<161>(subkeys))));
665 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<158>(subkeys)), Xor(temp[0], LoadKey<159>(subkeys))));
666 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<156>(subkeys)), Xor(temp[3], LoadKey<157>(subkeys))));
667 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<166>(subkeys)), Xor(temp[2], LoadKey<167>(subkeys))));
668 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<164>(subkeys)), Xor(temp[1], LoadKey<165>(subkeys))));
669 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<162>(subkeys)), Xor(temp[0], LoadKey<163>(subkeys))));
674 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<172>(subkeys)), Xor(temp[3], LoadKey<173>(subkeys))));
675 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<170>(subkeys)), Xor(temp[2], LoadKey<171>(subkeys))));
676 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<168>(subkeys)), Xor(temp[1], LoadKey<169>(subkeys))));
677 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<178>(subkeys)), Xor(temp[0], LoadKey<179>(subkeys))));
678 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<176>(subkeys)), Xor(temp[3], LoadKey<177>(subkeys))));
679 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<174>(subkeys)), Xor(temp[2], LoadKey<175>(subkeys))));
680 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<184>(subkeys)), Xor(temp[1], LoadKey<185>(subkeys))));
681 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<182>(subkeys)), Xor(temp[0], LoadKey<183>(subkeys))));
682 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<180>(subkeys)), Xor(temp[3], LoadKey<181>(subkeys))));
683 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<190>(subkeys)), Xor(temp[2], LoadKey<191>(subkeys))));
684 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<188>(subkeys)), Xor(temp[1], LoadKey<189>(subkeys))));
685 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<186>(subkeys)), Xor(temp[0], LoadKey<187>(subkeys))));
692 inline void LEA_Decryption(W temp[4],
const word32 *subkeys,
unsigned int rounds)
696 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<186>(subkeys))), LoadKey<187>(subkeys));
697 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<188>(subkeys))), LoadKey<189>(subkeys));
698 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<190>(subkeys))), LoadKey<191>(subkeys));
699 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<180>(subkeys))), LoadKey<181>(subkeys));
700 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<182>(subkeys))), LoadKey<183>(subkeys));
701 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<184>(subkeys))), LoadKey<185>(subkeys));
702 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<174>(subkeys))), LoadKey<175>(subkeys));
703 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<176>(subkeys))), LoadKey<177>(subkeys));
704 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<178>(subkeys))), LoadKey<179>(subkeys));
705 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<168>(subkeys))), LoadKey<169>(subkeys));
706 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<170>(subkeys))), LoadKey<171>(subkeys));
707 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<172>(subkeys))), LoadKey<173>(subkeys));
712 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<162>(subkeys))), LoadKey<163>(subkeys));
713 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<164>(subkeys))), LoadKey<165>(subkeys));
714 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<166>(subkeys))), LoadKey<167>(subkeys));
715 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<156>(subkeys))), LoadKey<157>(subkeys));
716 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<158>(subkeys))), LoadKey<159>(subkeys));
717 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<160>(subkeys))), LoadKey<161>(subkeys));
718 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<150>(subkeys))), LoadKey<151>(subkeys));
719 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<152>(subkeys))), LoadKey<153>(subkeys));
720 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<154>(subkeys))), LoadKey<155>(subkeys));
721 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<144>(subkeys))), LoadKey<145>(subkeys));
722 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<146>(subkeys))), LoadKey<147>(subkeys));
723 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<148>(subkeys))), LoadKey<149>(subkeys));
726 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<138>(subkeys))), LoadKey<139>(subkeys));
727 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<140>(subkeys))), LoadKey<141>(subkeys));
728 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<142>(subkeys))), LoadKey<143>(subkeys));
729 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<132>(subkeys))), LoadKey<133>(subkeys));
730 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<134>(subkeys))), LoadKey<135>(subkeys));
731 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<136>(subkeys))), LoadKey<137>(subkeys));
732 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<126>(subkeys))), LoadKey<127>(subkeys));
733 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<128>(subkeys))), LoadKey<129>(subkeys));
734 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<130>(subkeys))), LoadKey<131>(subkeys));
735 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<120>(subkeys))), LoadKey<121>(subkeys));
736 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<122>(subkeys))), LoadKey<123>(subkeys));
737 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<124>(subkeys))), LoadKey<125>(subkeys));
739 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<114>(subkeys))), LoadKey<115>(subkeys));
740 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<116>(subkeys))), LoadKey<117>(subkeys));
741 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<118>(subkeys))), LoadKey<119>(subkeys));
742 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<108>(subkeys))), LoadKey<109>(subkeys));
743 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<110>(subkeys))), LoadKey<111>(subkeys));
744 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<112>(subkeys))), LoadKey<113>(subkeys));
745 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<102>(subkeys))), LoadKey<103>(subkeys));
746 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<104>(subkeys))), LoadKey<105>(subkeys));
747 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<106>(subkeys))), LoadKey<107>(subkeys));
748 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<96>(subkeys))), LoadKey<97>(subkeys));
749 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<98>(subkeys))), LoadKey<99>(subkeys));
750 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<100>(subkeys))), LoadKey<101>(subkeys));
752 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<90>(subkeys))), LoadKey<91>(subkeys));
753 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<92>(subkeys))), LoadKey<93>(subkeys));
754 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<94>(subkeys))), LoadKey<95>(subkeys));
755 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<84>(subkeys))), LoadKey<85>(subkeys));
756 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<86>(subkeys))), LoadKey<87>(subkeys));
757 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<88>(subkeys))), LoadKey<89>(subkeys));
758 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<78>(subkeys))), LoadKey<79>(subkeys));
759 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<80>(subkeys))), LoadKey<81>(subkeys));
760 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<82>(subkeys))), LoadKey<83>(subkeys));
761 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<72>(subkeys))), LoadKey<73>(subkeys));
762 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<74>(subkeys))), LoadKey<75>(subkeys));
763 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<76>(subkeys))), LoadKey<77>(subkeys));
765 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<66>(subkeys))), LoadKey<67>(subkeys));
766 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<68>(subkeys))), LoadKey<69>(subkeys));
767 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<70>(subkeys))), LoadKey<71>(subkeys));
768 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<60>(subkeys))), LoadKey<61>(subkeys));
769 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<62>(subkeys))), LoadKey<63>(subkeys));
770 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<64>(subkeys))), LoadKey<65>(subkeys));
771 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<54>(subkeys))), LoadKey<55>(subkeys));
772 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<56>(subkeys))), LoadKey<57>(subkeys));
773 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<58>(subkeys))), LoadKey<59>(subkeys));
774 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<48>(subkeys))), LoadKey<49>(subkeys));
775 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<50>(subkeys))), LoadKey<51>(subkeys));
776 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<52>(subkeys))), LoadKey<53>(subkeys));
778 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<42>(subkeys))), LoadKey<43>(subkeys));
779 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<44>(subkeys))), LoadKey<45>(subkeys));
780 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<46>(subkeys))), LoadKey<47>(subkeys));
781 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<36>(subkeys))), LoadKey<37>(subkeys));
782 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<38>(subkeys))), LoadKey<39>(subkeys));
783 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<40>(subkeys))), LoadKey<41>(subkeys));
784 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<30>(subkeys))), LoadKey<31>(subkeys));
785 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<32>(subkeys))), LoadKey<33>(subkeys));
786 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<34>(subkeys))), LoadKey<35>(subkeys));
787 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<24>(subkeys))), LoadKey<25>(subkeys));
788 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<26>(subkeys))), LoadKey<27>(subkeys));
789 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<28>(subkeys))), LoadKey<29>(subkeys));
791 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<18>(subkeys))), LoadKey<19>(subkeys));
792 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<20>(subkeys))), LoadKey<21>(subkeys));
793 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<22>(subkeys))), LoadKey<23>(subkeys));
794 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<12>(subkeys))), LoadKey<13>(subkeys));
795 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<14>(subkeys))), LoadKey<15>(subkeys));
796 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<16>(subkeys))), LoadKey<17>(subkeys));
797 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<6>(subkeys))), LoadKey<7>(subkeys));
798 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<8>(subkeys))), LoadKey<9>(subkeys));
799 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<10>(subkeys))), LoadKey<11>(subkeys));
800 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<0>(subkeys))), LoadKey<1>(subkeys));
801 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<2>(subkeys))), LoadKey<3>(subkeys));
802 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<4>(subkeys))), LoadKey<5>(subkeys));
805 #endif // LEA Encryption and Decryption 809 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 811 inline void LEA_Enc_Block(uint32x4_t &block0,
812 const word32 *subkeys,
unsigned int rounds)
815 temp[0] = UnpackNEON<0>(block0);
816 temp[1] = UnpackNEON<1>(block0);
817 temp[2] = UnpackNEON<2>(block0);
818 temp[3] = UnpackNEON<3>(block0);
820 LEA_Encryption(temp, subkeys, rounds);
822 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
825 inline void LEA_Dec_Block(uint32x4_t &block0,
826 const word32 *subkeys,
unsigned int rounds)
829 temp[0] = UnpackNEON<0>(block0);
830 temp[1] = UnpackNEON<1>(block0);
831 temp[2] = UnpackNEON<2>(block0);
832 temp[3] = UnpackNEON<3>(block0);
834 LEA_Decryption(temp, subkeys, rounds);
836 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
839 inline void LEA_Enc_4_Blocks(uint32x4_t &block0, uint32x4_t &block1,
840 uint32x4_t &block2, uint32x4_t &block3,
const word32 *subkeys,
unsigned int rounds)
843 temp[0] = UnpackNEON<0>(block0, block1, block2, block3);
844 temp[1] = UnpackNEON<1>(block0, block1, block2, block3);
845 temp[2] = UnpackNEON<2>(block0, block1, block2, block3);
846 temp[3] = UnpackNEON<3>(block0, block1, block2, block3);
848 LEA_Encryption(temp, subkeys, rounds);
850 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
851 block1 = RepackNEON<1>(temp[0], temp[1], temp[2], temp[3]);
852 block2 = RepackNEON<2>(temp[0], temp[1], temp[2], temp[3]);
853 block3 = RepackNEON<3>(temp[0], temp[1], temp[2], temp[3]);
856 inline void LEA_Dec_4_Blocks(uint32x4_t &block0, uint32x4_t &block1,
857 uint32x4_t &block2, uint32x4_t &block3,
const word32 *subkeys,
unsigned int rounds)
860 temp[0] = UnpackNEON<0>(block0, block1, block2, block3);
861 temp[1] = UnpackNEON<1>(block0, block1, block2, block3);
862 temp[2] = UnpackNEON<2>(block0, block1, block2, block3);
863 temp[3] = UnpackNEON<3>(block0, block1, block2, block3);
865 LEA_Decryption(temp, subkeys, rounds);
867 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
868 block1 = RepackNEON<1>(temp[0], temp[1], temp[2], temp[3]);
869 block2 = RepackNEON<2>(temp[0], temp[1], temp[2], temp[3]);
870 block3 = RepackNEON<3>(temp[0], temp[1], temp[2], temp[3]);
873 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 877 #if (CRYPTOPP_SSSE3_AVAILABLE) 879 inline void LEA_Enc_Block(__m128i &block0,
880 const word32 *subkeys,
unsigned int rounds)
883 temp[0] = UnpackXMM<0>(block0);
884 temp[1] = UnpackXMM<1>(block0);
885 temp[2] = UnpackXMM<2>(block0);
886 temp[3] = UnpackXMM<3>(block0);
888 LEA_Encryption(temp, subkeys, rounds);
890 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
893 inline void LEA_Dec_Block(__m128i &block0,
894 const word32 *subkeys,
unsigned int rounds)
897 temp[0] = UnpackXMM<0>(block0);
898 temp[1] = UnpackXMM<1>(block0);
899 temp[2] = UnpackXMM<2>(block0);
900 temp[3] = UnpackXMM<3>(block0);
902 LEA_Decryption(temp, subkeys, rounds);
904 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
907 inline void LEA_Enc_4_Blocks(__m128i &block0, __m128i &block1,
908 __m128i &block2, __m128i &block3,
const word32 *subkeys,
unsigned int rounds)
911 temp[0] = UnpackXMM<0>(block0, block1, block2, block3);
912 temp[1] = UnpackXMM<1>(block0, block1, block2, block3);
913 temp[2] = UnpackXMM<2>(block0, block1, block2, block3);
914 temp[3] = UnpackXMM<3>(block0, block1, block2, block3);
916 LEA_Encryption(temp, subkeys, rounds);
918 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
919 block1 = RepackXMM<1>(temp[0], temp[1], temp[2], temp[3]);
920 block2 = RepackXMM<2>(temp[0], temp[1], temp[2], temp[3]);
921 block3 = RepackXMM<3>(temp[0], temp[1], temp[2], temp[3]);
924 inline void LEA_Dec_4_Blocks(__m128i &block0, __m128i &block1,
925 __m128i &block2, __m128i &block3,
const word32 *subkeys,
unsigned int rounds)
928 temp[0] = UnpackXMM<0>(block0, block1, block2, block3);
929 temp[1] = UnpackXMM<1>(block0, block1, block2, block3);
930 temp[2] = UnpackXMM<2>(block0, block1, block2, block3);
931 temp[3] = UnpackXMM<3>(block0, block1, block2, block3);
933 LEA_Decryption(temp, subkeys, rounds);
935 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
936 block1 = RepackXMM<1>(temp[0], temp[1], temp[2], temp[3]);
937 block2 = RepackXMM<2>(temp[0], temp[1], temp[2], temp[3]);
938 block3 = RepackXMM<3>(temp[0], temp[1], temp[2], temp[3]);
941 #endif // CRYPTOPP_SSSE3_AVAILABLE 945 #if (CRYPTOPP_POWER8_AVAILABLE) 948 const word32 *subkeys,
unsigned int rounds)
951 temp[0] = UnpackSIMD<0>(block0);
952 temp[1] = UnpackSIMD<1>(block0);
953 temp[2] = UnpackSIMD<2>(block0);
954 temp[3] = UnpackSIMD<3>(block0);
956 LEA_Encryption(temp, subkeys, rounds);
958 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
962 const word32 *subkeys,
unsigned int rounds)
965 temp[0] = UnpackSIMD<0>(block0);
966 temp[1] = UnpackSIMD<1>(block0);
967 temp[2] = UnpackSIMD<2>(block0);
968 temp[3] = UnpackSIMD<3>(block0);
970 LEA_Decryption(temp, subkeys, rounds);
972 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
979 temp[0] = UnpackSIMD<0>(block0, block1, block2, block3);
980 temp[1] = UnpackSIMD<1>(block0, block1, block2, block3);
981 temp[2] = UnpackSIMD<2>(block0, block1, block2, block3);
982 temp[3] = UnpackSIMD<3>(block0, block1, block2, block3);
984 LEA_Encryption(temp, subkeys, rounds);
986 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
987 block1 = RepackSIMD<1>(temp[0], temp[1], temp[2], temp[3]);
988 block2 = RepackSIMD<2>(temp[0], temp[1], temp[2], temp[3]);
989 block3 = RepackSIMD<3>(temp[0], temp[1], temp[2], temp[3]);
996 temp[0] = UnpackSIMD<0>(block0, block1, block2, block3);
997 temp[1] = UnpackSIMD<1>(block0, block1, block2, block3);
998 temp[2] = UnpackSIMD<2>(block0, block1, block2, block3);
999 temp[3] = UnpackSIMD<3>(block0, block1, block2, block3);
1001 LEA_Decryption(temp, subkeys, rounds);
1003 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
1004 block1 = RepackSIMD<1>(temp[0], temp[1], temp[2], temp[3]);
1005 block2 = RepackSIMD<2>(temp[0], temp[1], temp[2], temp[3]);
1006 block3 = RepackSIMD<3>(temp[0], temp[1], temp[2], temp[3]);
1009 #endif // CRYPTOPP_POWER8_AVAILABLE 1011 ANONYMOUS_NAMESPACE_END
1017 #if defined(CRYPTOPP_SSSE3_AVAILABLE) 1018 size_t LEA_Enc_AdvancedProcessBlocks_SSSE3(
const word32* subKeys,
size_t rounds,
1019 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1021 return AdvancedProcessBlocks128_4x1_SSE(LEA_Enc_Block, LEA_Enc_4_Blocks,
1022 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1025 size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(
const word32* subKeys,
size_t rounds,
1026 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1028 return AdvancedProcessBlocks128_4x1_SSE(LEA_Dec_Block, LEA_Dec_4_Blocks,
1029 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1031 #endif // CRYPTOPP_SSSE3_AVAILABLE 1033 #if defined(CRYPTOPP_ARM_NEON_AVAILABLE) 1034 size_t LEA_Enc_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
1035 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1038 return AdvancedProcessBlocks128_4x1_NEON(LEA_Enc_Block, LEA_Enc_4_Blocks,
1039 unused, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1042 size_t LEA_Dec_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
1043 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1046 return AdvancedProcessBlocks128_4x1_NEON(LEA_Dec_Block, LEA_Dec_4_Blocks,
1047 unused, subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1049 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 1051 #if defined(CRYPTOPP_POWER8_AVAILABLE) 1052 size_t LEA_Enc_AdvancedProcessBlocks_POWER8(
const word32* subKeys,
size_t rounds,
1053 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1055 return AdvancedProcessBlocks128_4x1_ALTIVEC(LEA_Enc_Block, LEA_Enc_4_Blocks,
1056 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1059 size_t LEA_Dec_AdvancedProcessBlocks_POWER8(
const word32* subKeys,
size_t rounds,
1060 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1062 return AdvancedProcessBlocks128_4x1_ALTIVEC(LEA_Dec_Block, LEA_Dec_4_Blocks,
1063 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1065 #endif // CRYPTOPP_POWER8_AVAILABLE Utility functions for the Crypto++ library.
Classes for the LEA block cipher.
T1 VecSub(const T1 vec1, const T2 vec2)
Subtract two vectors.
Library configuration file.
T1 VecAdd(const T1 vec1, const T2 vec2)
Add two vectors.
T1 VecPermute(const T1 vec, const T2 mask)
Permutes a vector.
__vector unsigned int uint32x4_p
Vector of 32-bit elements.
Support functions for PowerPC and vector operations.
Template for AdvancedProcessBlocks and SIMD processing.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
T1 VecXor(const T1 vec1, const T2 vec2)
XOR two vectors.
__vector unsigned long long uint64x2_p
Vector of 64-bit elements.
Crypto++ library namespace.
__vector unsigned char uint8x16_p
Vector of 8-bit elements.