14 #if defined(CRYPTOPP_DISABLE_SHA_ASM) 15 # undef CRYPTOPP_X86_ASM_AVAILABLE 16 # undef CRYPTOPP_X32_ASM_AVAILABLE 17 # undef CRYPTOPP_X64_ASM_AVAILABLE 18 # undef CRYPTOPP_SSE2_ASM_AVAILABLE 21 #if (CRYPTOPP_SHANI_AVAILABLE) 22 # include <nmmintrin.h> 23 # include <immintrin.h> 26 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 27 # include <arm_neon.h> 30 #if (CRYPTOPP_ARM_ACLE_AVAILABLE) 32 # include <arm_acle.h> 35 #if CRYPTOPP_POWER8_SHA_AVAILABLE 39 #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY 44 #ifndef EXCEPTION_EXECUTE_HANDLER 45 # define EXCEPTION_EXECUTE_HANDLER 1 49 #define M128_CAST(x) ((__m128i *)(void *)(x)) 50 #define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x)) 53 extern const char SHA_SIMD_FNAME[] = __FILE__;
59 extern const word32 SHA256_K[64];
60 extern const word64 SHA512_K[80];
64 #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY 66 typedef void (*SigHandler)(int);
68 static jmp_buf s_jmpSIGILL;
69 static void SigIllHandler(
int)
71 longjmp(s_jmpSIGILL, 1);
74 #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY 76 #if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARMV8) 79 #if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) 81 #elif (CRYPTOPP_ARM_SHA1_AVAILABLE) 82 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) 83 volatile bool result =
true;
86 uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
88 uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
89 uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
90 uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
91 uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
92 uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
94 result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
96 __except (EXCEPTION_EXECUTE_HANDLER)
105 volatile bool result =
true;
107 volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
108 if (oldHandler == SIG_ERR)
111 volatile sigset_t oldMask;
112 if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
115 if (setjmp(s_jmpSIGILL))
119 uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
121 uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
122 uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
123 uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
124 uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
125 uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
127 result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
130 sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
131 signal(SIGILL, oldHandler);
136 #endif // CRYPTOPP_ARM_SHA1_AVAILABLE 141 #if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) 143 #elif (CRYPTOPP_ARM_SHA2_AVAILABLE) 144 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) 145 volatile bool result =
true;
148 uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
150 uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
151 uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
152 uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
153 uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
155 result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
157 __except (EXCEPTION_EXECUTE_HANDLER)
166 volatile bool result =
true;
168 volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
169 if (oldHandler == SIG_ERR)
172 volatile sigset_t oldMask;
173 if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
176 if (setjmp(s_jmpSIGILL))
180 uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
182 uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
183 uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
184 uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
185 uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
187 result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
190 sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
191 signal(SIGILL, oldHandler);
196 #endif // CRYPTOPP_ARM_SHA2_AVAILABLE 198 #endif // ARM32 or ARM64 206 #if CRYPTOPP_SHANI_AVAILABLE 208 void SHA1_HashMultipleBlocks_SHANI(word32 *state,
const word32 *data,
size_t length,
ByteOrder order)
214 __m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
215 __m128i MASK, MSG0, MSG1, MSG2, MSG3;
218 ABCD = _mm_loadu_si128(CONST_M128_CAST(state));
219 E0 = _mm_set_epi32(state[4], 0, 0, 0);
220 ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
226 _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15) :
227 _mm_set_epi8(3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12) ;
229 while (length >= SHA1::BLOCKSIZE)
236 MSG0 = _mm_loadu_si128(CONST_M128_CAST(data+0));
237 MSG0 = _mm_shuffle_epi8(MSG0, MASK);
238 E0 = _mm_add_epi32(E0, MSG0);
240 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
243 MSG1 = _mm_loadu_si128(CONST_M128_CAST(data+4));
244 MSG1 = _mm_shuffle_epi8(MSG1, MASK);
245 E1 = _mm_sha1nexte_epu32(E1, MSG1);
247 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
248 MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
251 MSG2 = _mm_loadu_si128(CONST_M128_CAST(data+8));
252 MSG2 = _mm_shuffle_epi8(MSG2, MASK);
253 E0 = _mm_sha1nexte_epu32(E0, MSG2);
255 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
256 MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
257 MSG0 = _mm_xor_si128(MSG0, MSG2);
260 MSG3 = _mm_loadu_si128(CONST_M128_CAST(data+12));
261 MSG3 = _mm_shuffle_epi8(MSG3, MASK);
262 E1 = _mm_sha1nexte_epu32(E1, MSG3);
264 MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
265 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
266 MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
267 MSG1 = _mm_xor_si128(MSG1, MSG3);
270 E0 = _mm_sha1nexte_epu32(E0, MSG0);
272 MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
273 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
274 MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
275 MSG2 = _mm_xor_si128(MSG2, MSG0);
278 E1 = _mm_sha1nexte_epu32(E1, MSG1);
280 MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
281 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
282 MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
283 MSG3 = _mm_xor_si128(MSG3, MSG1);
286 E0 = _mm_sha1nexte_epu32(E0, MSG2);
288 MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
289 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
290 MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
291 MSG0 = _mm_xor_si128(MSG0, MSG2);
294 E1 = _mm_sha1nexte_epu32(E1, MSG3);
296 MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
297 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
298 MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
299 MSG1 = _mm_xor_si128(MSG1, MSG3);
302 E0 = _mm_sha1nexte_epu32(E0, MSG0);
304 MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
305 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
306 MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
307 MSG2 = _mm_xor_si128(MSG2, MSG0);
310 E1 = _mm_sha1nexte_epu32(E1, MSG1);
312 MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
313 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
314 MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
315 MSG3 = _mm_xor_si128(MSG3, MSG1);
318 E0 = _mm_sha1nexte_epu32(E0, MSG2);
320 MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
321 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
322 MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
323 MSG0 = _mm_xor_si128(MSG0, MSG2);
326 E1 = _mm_sha1nexte_epu32(E1, MSG3);
328 MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
329 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
330 MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
331 MSG1 = _mm_xor_si128(MSG1, MSG3);
334 E0 = _mm_sha1nexte_epu32(E0, MSG0);
336 MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
337 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
338 MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
339 MSG2 = _mm_xor_si128(MSG2, MSG0);
342 E1 = _mm_sha1nexte_epu32(E1, MSG1);
344 MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
345 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
346 MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
347 MSG3 = _mm_xor_si128(MSG3, MSG1);
350 E0 = _mm_sha1nexte_epu32(E0, MSG2);
352 MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
353 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
354 MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
355 MSG0 = _mm_xor_si128(MSG0, MSG2);
358 E1 = _mm_sha1nexte_epu32(E1, MSG3);
360 MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
361 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
362 MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
363 MSG1 = _mm_xor_si128(MSG1, MSG3);
366 E0 = _mm_sha1nexte_epu32(E0, MSG0);
368 MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
369 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
370 MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
371 MSG2 = _mm_xor_si128(MSG2, MSG0);
374 E1 = _mm_sha1nexte_epu32(E1, MSG1);
376 MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
377 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
378 MSG3 = _mm_xor_si128(MSG3, MSG1);
381 E0 = _mm_sha1nexte_epu32(E0, MSG2);
383 MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
384 ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
387 E1 = _mm_sha1nexte_epu32(E1, MSG3);
389 ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
392 E0 = _mm_sha1nexte_epu32(E0, E0_SAVE);
393 ABCD = _mm_add_epi32(ABCD, ABCD_SAVE);
395 data += SHA1::BLOCKSIZE/
sizeof(word32);
396 length -= SHA1::BLOCKSIZE;
400 ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
401 _mm_storeu_si128(M128_CAST(state), ABCD);
402 state[4] = _mm_extract_epi32(E0, 3);
406 void SHA256_HashMultipleBlocks_SHANI(word32 *state,
const word32 *data,
size_t length,
ByteOrder order)
412 __m128i STATE0, STATE1;
413 __m128i MSG, TMP, MASK;
414 __m128i TMSG0, TMSG1, TMSG2, TMSG3;
415 __m128i ABEF_SAVE, CDGH_SAVE;
418 TMP = _mm_loadu_si128(M128_CAST(&state[0]));
419 STATE1 = _mm_loadu_si128(M128_CAST(&state[4]));
425 _mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3) :
426 _mm_set_epi8(15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0) ;
428 TMP = _mm_shuffle_epi32(TMP, 0xB1);
429 STATE1 = _mm_shuffle_epi32(STATE1, 0x1B);
430 STATE0 = _mm_alignr_epi8(TMP, STATE1, 8);
431 STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0);
433 while (length >= SHA256::BLOCKSIZE)
440 MSG = _mm_loadu_si128(CONST_M128_CAST(data+0));
441 TMSG0 = _mm_shuffle_epi8(MSG, MASK);
442 MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0xE9B5DBA5B5C0FBCF), W64LIT(0x71374491428A2F98)));
443 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
444 MSG = _mm_shuffle_epi32(MSG, 0x0E);
445 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
448 TMSG1 = _mm_loadu_si128(CONST_M128_CAST(data+4));
449 TMSG1 = _mm_shuffle_epi8(TMSG1, MASK);
450 MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0xAB1C5ED5923F82A4), W64LIT(0x59F111F13956C25B)));
451 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
452 MSG = _mm_shuffle_epi32(MSG, 0x0E);
453 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
454 TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
457 TMSG2 = _mm_loadu_si128(CONST_M128_CAST(data+8));
458 TMSG2 = _mm_shuffle_epi8(TMSG2, MASK);
459 MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0x550C7DC3243185BE), W64LIT(0x12835B01D807AA98)));
460 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
461 MSG = _mm_shuffle_epi32(MSG, 0x0E);
462 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
463 TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
466 TMSG3 = _mm_loadu_si128(CONST_M128_CAST(data+12));
467 TMSG3 = _mm_shuffle_epi8(TMSG3, MASK);
468 MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0xC19BF1749BDC06A7), W64LIT(0x80DEB1FE72BE5D74)));
469 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
470 TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
471 TMSG0 = _mm_add_epi32(TMSG0, TMP);
472 TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
473 MSG = _mm_shuffle_epi32(MSG, 0x0E);
474 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
475 TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
478 MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0x240CA1CC0FC19DC6), W64LIT(0xEFBE4786E49B69C1)));
479 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
480 TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
481 TMSG1 = _mm_add_epi32(TMSG1, TMP);
482 TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
483 MSG = _mm_shuffle_epi32(MSG, 0x0E);
484 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
485 TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
488 MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0x76F988DA5CB0A9DC), W64LIT(0x4A7484AA2DE92C6F)));
489 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
490 TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
491 TMSG2 = _mm_add_epi32(TMSG2, TMP);
492 TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
493 MSG = _mm_shuffle_epi32(MSG, 0x0E);
494 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
495 TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
498 MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0xBF597FC7B00327C8), W64LIT(0xA831C66D983E5152)));
499 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
500 TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
501 TMSG3 = _mm_add_epi32(TMSG3, TMP);
502 TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
503 MSG = _mm_shuffle_epi32(MSG, 0x0E);
504 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
505 TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
508 MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0x1429296706CA6351), W64LIT(0xD5A79147C6E00BF3)));
509 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
510 TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
511 TMSG0 = _mm_add_epi32(TMSG0, TMP);
512 TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
513 MSG = _mm_shuffle_epi32(MSG, 0x0E);
514 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
515 TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
518 MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0x53380D134D2C6DFC), W64LIT(0x2E1B213827B70A85)));
519 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
520 TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
521 TMSG1 = _mm_add_epi32(TMSG1, TMP);
522 TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
523 MSG = _mm_shuffle_epi32(MSG, 0x0E);
524 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
525 TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
528 MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0x92722C8581C2C92E), W64LIT(0x766A0ABB650A7354)));
529 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
530 TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
531 TMSG2 = _mm_add_epi32(TMSG2, TMP);
532 TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
533 MSG = _mm_shuffle_epi32(MSG, 0x0E);
534 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
535 TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
538 MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0xC76C51A3C24B8B70), W64LIT(0xA81A664BA2BFE8A1)));
539 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
540 TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
541 TMSG3 = _mm_add_epi32(TMSG3, TMP);
542 TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
543 MSG = _mm_shuffle_epi32(MSG, 0x0E);
544 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
545 TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
548 MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0x106AA070F40E3585), W64LIT(0xD6990624D192E819)));
549 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
550 TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
551 TMSG0 = _mm_add_epi32(TMSG0, TMP);
552 TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
553 MSG = _mm_shuffle_epi32(MSG, 0x0E);
554 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
555 TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
558 MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0x34B0BCB52748774C), W64LIT(0x1E376C0819A4C116)));
559 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
560 TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
561 TMSG1 = _mm_add_epi32(TMSG1, TMP);
562 TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
563 MSG = _mm_shuffle_epi32(MSG, 0x0E);
564 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
565 TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
568 MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0x682E6FF35B9CCA4F), W64LIT(0x4ED8AA4A391C0CB3)));
569 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
570 TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
571 TMSG2 = _mm_add_epi32(TMSG2, TMP);
572 TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
573 MSG = _mm_shuffle_epi32(MSG, 0x0E);
574 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
577 MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0x8CC7020884C87814), W64LIT(0x78A5636F748F82EE)));
578 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
579 TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
580 TMSG3 = _mm_add_epi32(TMSG3, TMP);
581 TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
582 MSG = _mm_shuffle_epi32(MSG, 0x0E);
583 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
586 MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0xC67178F2BEF9A3F7), W64LIT(0xA4506CEB90BEFFFA)));
587 STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
588 MSG = _mm_shuffle_epi32(MSG, 0x0E);
589 STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
592 STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
593 STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);
595 data += SHA256::BLOCKSIZE/
sizeof(word32);
596 length -= SHA256::BLOCKSIZE;
599 TMP = _mm_shuffle_epi32(STATE0, 0x1B);
600 STATE1 = _mm_shuffle_epi32(STATE1, 0xB1);
601 STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0);
602 STATE1 = _mm_alignr_epi8(STATE1, TMP, 8);
605 _mm_storeu_si128(M128_CAST(&state[0]), STATE0);
606 _mm_storeu_si128(M128_CAST(&state[4]), STATE1);
608 #endif // CRYPTOPP_SHANI_AVAILABLE 620 #if CRYPTOPP_ARM_SHA1_AVAILABLE 621 void SHA1_HashMultipleBlocks_ARMV8(word32 *state,
const word32 *data,
size_t length,
ByteOrder order)
627 uint32x4_t C0, C1, C2, C3;
628 uint32x4_t ABCD, ABCD_SAVED;
629 uint32x4_t MSG0, MSG1, MSG2, MSG3;
630 uint32x4_t TMP0, TMP1;
631 uint32_t E0, E0_SAVED, E1;
634 C0 = vdupq_n_u32(0x5A827999);
635 C1 = vdupq_n_u32(0x6ED9EBA1);
636 C2 = vdupq_n_u32(0x8F1BBCDC);
637 C3 = vdupq_n_u32(0xCA62C1D6);
639 ABCD = vld1q_u32(&state[0]);
642 while (length >= SHA1::BLOCKSIZE)
648 MSG0 = vld1q_u32(data + 0);
649 MSG1 = vld1q_u32(data + 4);
650 MSG2 = vld1q_u32(data + 8);
651 MSG3 = vld1q_u32(data + 12);
655 MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0)));
656 MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1)));
657 MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2)));
658 MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3)));
661 TMP0 = vaddq_u32(MSG0, C0);
662 TMP1 = vaddq_u32(MSG1, C0);
665 E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
666 ABCD = vsha1cq_u32(ABCD, E0, TMP0);
667 TMP0 = vaddq_u32(MSG2, C0);
668 MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
671 E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
672 ABCD = vsha1cq_u32(ABCD, E1, TMP1);
673 TMP1 = vaddq_u32(MSG3, C0);
674 MSG0 = vsha1su1q_u32(MSG0, MSG3);
675 MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
678 E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
679 ABCD = vsha1cq_u32(ABCD, E0, TMP0);
680 TMP0 = vaddq_u32(MSG0, C0);
681 MSG1 = vsha1su1q_u32(MSG1, MSG0);
682 MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
685 E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
686 ABCD = vsha1cq_u32(ABCD, E1, TMP1);
687 TMP1 = vaddq_u32(MSG1, C1);
688 MSG2 = vsha1su1q_u32(MSG2, MSG1);
689 MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
692 E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
693 ABCD = vsha1cq_u32(ABCD, E0, TMP0);
694 TMP0 = vaddq_u32(MSG2, C1);
695 MSG3 = vsha1su1q_u32(MSG3, MSG2);
696 MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
699 E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
700 ABCD = vsha1pq_u32(ABCD, E1, TMP1);
701 TMP1 = vaddq_u32(MSG3, C1);
702 MSG0 = vsha1su1q_u32(MSG0, MSG3);
703 MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
706 E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
707 ABCD = vsha1pq_u32(ABCD, E0, TMP0);
708 TMP0 = vaddq_u32(MSG0, C1);
709 MSG1 = vsha1su1q_u32(MSG1, MSG0);
710 MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
713 E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
714 ABCD = vsha1pq_u32(ABCD, E1, TMP1);
715 TMP1 = vaddq_u32(MSG1, C1);
716 MSG2 = vsha1su1q_u32(MSG2, MSG1);
717 MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
720 E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
721 ABCD = vsha1pq_u32(ABCD, E0, TMP0);
722 TMP0 = vaddq_u32(MSG2, C2);
723 MSG3 = vsha1su1q_u32(MSG3, MSG2);
724 MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
727 E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
728 ABCD = vsha1pq_u32(ABCD, E1, TMP1);
729 TMP1 = vaddq_u32(MSG3, C2);
730 MSG0 = vsha1su1q_u32(MSG0, MSG3);
731 MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
734 E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
735 ABCD = vsha1mq_u32(ABCD, E0, TMP0);
736 TMP0 = vaddq_u32(MSG0, C2);
737 MSG1 = vsha1su1q_u32(MSG1, MSG0);
738 MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
741 E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
742 ABCD = vsha1mq_u32(ABCD, E1, TMP1);
743 TMP1 = vaddq_u32(MSG1, C2);
744 MSG2 = vsha1su1q_u32(MSG2, MSG1);
745 MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
748 E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
749 ABCD = vsha1mq_u32(ABCD, E0, TMP0);
750 TMP0 = vaddq_u32(MSG2, C2);
751 MSG3 = vsha1su1q_u32(MSG3, MSG2);
752 MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
755 E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
756 ABCD = vsha1mq_u32(ABCD, E1, TMP1);
757 TMP1 = vaddq_u32(MSG3, C3);
758 MSG0 = vsha1su1q_u32(MSG0, MSG3);
759 MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
762 E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
763 ABCD = vsha1mq_u32(ABCD, E0, TMP0);
764 TMP0 = vaddq_u32(MSG0, C3);
765 MSG1 = vsha1su1q_u32(MSG1, MSG0);
766 MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
769 E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
770 ABCD = vsha1pq_u32(ABCD, E1, TMP1);
771 TMP1 = vaddq_u32(MSG1, C3);
772 MSG2 = vsha1su1q_u32(MSG2, MSG1);
773 MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
776 E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
777 ABCD = vsha1pq_u32(ABCD, E0, TMP0);
778 TMP0 = vaddq_u32(MSG2, C3);
779 MSG3 = vsha1su1q_u32(MSG3, MSG2);
780 MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
783 E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
784 ABCD = vsha1pq_u32(ABCD, E1, TMP1);
785 TMP1 = vaddq_u32(MSG3, C3);
786 MSG0 = vsha1su1q_u32(MSG0, MSG3);
789 E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
790 ABCD = vsha1pq_u32(ABCD, E0, TMP0);
793 E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
794 ABCD = vsha1pq_u32(ABCD, E1, TMP1);
797 ABCD = vaddq_u32(ABCD_SAVED, ABCD);
799 data += SHA1::BLOCKSIZE/
sizeof(word32);
800 length -= SHA1::BLOCKSIZE;
804 vst1q_u32(&state[0], ABCD);
807 #endif // CRYPTOPP_ARM_SHA1_AVAILABLE 809 #if CRYPTOPP_ARM_SHA2_AVAILABLE 810 void SHA256_HashMultipleBlocks_ARMV8(word32 *state,
const word32 *data,
size_t length,
ByteOrder order)
816 uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
817 uint32x4_t MSG0, MSG1, MSG2, MSG3;
818 uint32x4_t TMP0, TMP1, TMP2;
821 STATE0 = vld1q_u32(&state[0]);
822 STATE1 = vld1q_u32(&state[4]);
824 while (length >= SHA256::BLOCKSIZE)
831 MSG0 = vld1q_u32(data + 0);
832 MSG1 = vld1q_u32(data + 4);
833 MSG2 = vld1q_u32(data + 8);
834 MSG3 = vld1q_u32(data + 12);
838 MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0)));
839 MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1)));
840 MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2)));
841 MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3)));
844 TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x00]));
847 MSG0 = vsha256su0q_u32(MSG0, MSG1);
849 TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x04]));
850 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
851 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
852 MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);;
855 MSG1 = vsha256su0q_u32(MSG1, MSG2);
857 TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x08]));
858 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
859 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
860 MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);;
863 MSG2 = vsha256su0q_u32(MSG2, MSG3);
865 TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x0c]));
866 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
867 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
868 MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);;
871 MSG3 = vsha256su0q_u32(MSG3, MSG0);
873 TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x10]));
874 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
875 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
876 MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);;
879 MSG0 = vsha256su0q_u32(MSG0, MSG1);
881 TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x14]));
882 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
883 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
884 MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);;
887 MSG1 = vsha256su0q_u32(MSG1, MSG2);
889 TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x18]));
890 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
891 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
892 MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);;
895 MSG2 = vsha256su0q_u32(MSG2, MSG3);
897 TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x1c]));
898 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
899 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
900 MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);;
903 MSG3 = vsha256su0q_u32(MSG3, MSG0);
905 TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x20]));
906 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
907 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
908 MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);;
911 MSG0 = vsha256su0q_u32(MSG0, MSG1);
913 TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x24]));
914 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
915 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
916 MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);;
919 MSG1 = vsha256su0q_u32(MSG1, MSG2);
921 TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x28]));
922 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
923 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
924 MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);;
927 MSG2 = vsha256su0q_u32(MSG2, MSG3);
929 TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x2c]));
930 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
931 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
932 MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);;
935 MSG3 = vsha256su0q_u32(MSG3, MSG0);
937 TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x30]));
938 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
939 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
940 MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);;
944 TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x34]));
945 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
946 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);;
950 TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x38]));
951 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
952 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);;
956 TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x3c]));
957 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
958 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);;
962 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
963 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);;
966 STATE0 = vaddq_u32(STATE0, ABEF_SAVE);
967 STATE1 = vaddq_u32(STATE1, CDGH_SAVE);
969 data += SHA256::BLOCKSIZE/
sizeof(word32);
970 length -= SHA256::BLOCKSIZE;
974 vst1q_u32(&state[0], STATE0);
975 vst1q_u32(&state[4], STATE1);
977 #endif // CRYPTOPP_ARM_SHA2_AVAILABLE 989 #if CRYPTOPP_POWER8_SHA_AVAILABLE 992 enum {A=0, B=1, C, D, E, F, G, H};
995 uint32x4_p VecLoad32(
const word32* data,
int offset)
997 #if (CRYPTOPP_LITTLE_ENDIAN) 998 const uint8x16_p mask = {3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12};
1006 template<
class T>
inline 1007 void VecStore32(
const T data, word32 dest[4])
1016 return vec_sel(z,y,x);
1023 return vec_sel(y, z,
VecXor(x, y));
1029 return VecSHA256<0,0>(val);
1035 return VecSHA256<0,0xf>(val);
1041 return VecSHA256<1,0>(val);
1047 return VecSHA256<1,0xf>(val);
1054 const uint8x16_p m1 = {0,1,2,3, 16,17,18,19, 0,0,0,0, 0,0,0,0};
1055 const uint8x16_p m2 = {0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
1059 template <
unsigned int R>
inline 1065 T1 = S[H] + VectorSigma1(S[E]) + VectorCh(S[E],S[F],S[G]) + K + M;
1066 T2 = VectorSigma0(S[A]) + VectorMaj(S[A],S[B],S[C]);
1068 S[H] = S[G]; S[G] = S[F]; S[F] = S[E];
1070 S[D] = S[C]; S[C] = S[B]; S[B] = S[A];
1074 template <
unsigned int R>
inline 1078 enum {IDX0=(R+0)&0xf, IDX1=(R+1)&0xf, IDX9=(R+9)&0xf, IDX14=(R+14)&0xf};
1080 const uint32x4_p s0 = Vector_sigma0(W[IDX1]);
1081 const uint32x4_p s1 = Vector_sigma1(W[IDX14]);
1083 uint32x4_p T1 = (W[IDX0] += s0 + s1 + W[IDX9]);
1084 T1 += S[H] + VectorSigma1(S[E]) + VectorCh(S[E],S[F],S[G]) + K;
1085 uint32x4_p T2 = VectorSigma0(S[A]) + VectorMaj(S[A],S[B],S[C]);
1087 S[H] = S[G]; S[G] = S[F]; S[F] = S[E];
1089 S[D] = S[C]; S[C] = S[B]; S[B] = S[A];
1093 void SHA256_HashMultipleBlocks_POWER8(word32 *state,
const word32 *data,
size_t length,
ByteOrder order)
1097 CRYPTOPP_UNUSED(order);
1099 const uint32_t* k =
reinterpret_cast<const uint32_t*
>(SHA256_K);
1100 const uint32_t* m =
reinterpret_cast<const uint32_t*
>(data);
1106 size_t blocks = length / SHA256::BLOCKSIZE;
1109 unsigned int offset=0;
1111 S[A] = abcd; S[E] = efgh;
1112 S[B] = VecShiftLeftOctet<4>(S[A]);
1113 S[F] = VecShiftLeftOctet<4>(S[E]);
1114 S[C] = VecShiftLeftOctet<4>(S[B]);
1115 S[G] = VecShiftLeftOctet<4>(S[F]);
1116 S[D] = VecShiftLeftOctet<4>(S[C]);
1117 S[H] = VecShiftLeftOctet<4>(S[G]);
1121 vm = VecLoad32(m, offset);
1122 SHA256_ROUND1<0>(W,S, vk,vm);
1125 vk = VecShiftLeftOctet<4>(vk);
1126 vm = VecShiftLeftOctet<4>(vm);
1127 SHA256_ROUND1<1>(W,S, vk,vm);
1129 vk = VecShiftLeftOctet<4>(vk);
1130 vm = VecShiftLeftOctet<4>(vm);
1131 SHA256_ROUND1<2>(W,S, vk,vm);
1133 vk = VecShiftLeftOctet<4>(vk);
1134 vm = VecShiftLeftOctet<4>(vm);
1135 SHA256_ROUND1<3>(W,S, vk,vm);
1138 vm = VecLoad32(m, offset);
1139 SHA256_ROUND1<4>(W,S, vk,vm);
1142 vk = VecShiftLeftOctet<4>(vk);
1143 vm = VecShiftLeftOctet<4>(vm);
1144 SHA256_ROUND1<5>(W,S, vk,vm);
1146 vk = VecShiftLeftOctet<4>(vk);
1147 vm = VecShiftLeftOctet<4>(vm);
1148 SHA256_ROUND1<6>(W,S, vk,vm);
1150 vk = VecShiftLeftOctet<4>(vk);
1151 vm = VecShiftLeftOctet<4>(vm);
1152 SHA256_ROUND1<7>(W,S, vk,vm);
1155 vm = VecLoad32(m, offset);
1156 SHA256_ROUND1<8>(W,S, vk,vm);
1159 vk = VecShiftLeftOctet<4>(vk);
1160 vm = VecShiftLeftOctet<4>(vm);
1161 SHA256_ROUND1<9>(W,S, vk,vm);
1163 vk = VecShiftLeftOctet<4>(vk);
1164 vm = VecShiftLeftOctet<4>(vm);
1165 SHA256_ROUND1<10>(W,S, vk,vm);
1167 vk = VecShiftLeftOctet<4>(vk);
1168 vm = VecShiftLeftOctet<4>(vm);
1169 SHA256_ROUND1<11>(W,S, vk,vm);
1172 vm = VecLoad32(m, offset);
1173 SHA256_ROUND1<12>(W,S, vk,vm);
1176 vk = VecShiftLeftOctet<4>(vk);
1177 vm = VecShiftLeftOctet<4>(vm);
1178 SHA256_ROUND1<13>(W,S, vk,vm);
1180 vk = VecShiftLeftOctet<4>(vk);
1181 vm = VecShiftLeftOctet<4>(vm);
1182 SHA256_ROUND1<14>(W,S, vk,vm);
1184 vk = VecShiftLeftOctet<4>(vk);
1185 vm = VecShiftLeftOctet<4>(vm);
1186 SHA256_ROUND1<15>(W,S, vk,vm);
1191 for (
unsigned int i=16; i<64; i+=16)
1194 SHA256_ROUND2<0>(W,S, vk);
1195 SHA256_ROUND2<1>(W,S, VecShiftLeftOctet<4>(vk));
1196 SHA256_ROUND2<2>(W,S, VecShiftLeftOctet<8>(vk));
1197 SHA256_ROUND2<3>(W,S, VecShiftLeftOctet<12>(vk));
1201 SHA256_ROUND2<4>(W,S, vk);
1202 SHA256_ROUND2<5>(W,S, VecShiftLeftOctet<4>(vk));
1203 SHA256_ROUND2<6>(W,S, VecShiftLeftOctet<8>(vk));
1204 SHA256_ROUND2<7>(W,S, VecShiftLeftOctet<12>(vk));
1208 SHA256_ROUND2<8>(W,S, vk);
1209 SHA256_ROUND2<9>(W,S, VecShiftLeftOctet<4>(vk));
1210 SHA256_ROUND2<10>(W,S, VecShiftLeftOctet<8>(vk));
1211 SHA256_ROUND2<11>(W,S, VecShiftLeftOctet<12>(vk));
1215 SHA256_ROUND2<12>(W,S, vk);
1216 SHA256_ROUND2<13>(W,S, VecShiftLeftOctet<4>(vk));
1217 SHA256_ROUND2<14>(W,S, VecShiftLeftOctet<8>(vk));
1218 SHA256_ROUND2<15>(W,S, VecShiftLeftOctet<12>(vk));
1222 abcd += VectorPack(S[A],S[B],S[C],S[D]);
1223 efgh += VectorPack(S[E],S[F],S[G],S[H]);
1226 VecStore32(abcd, state+0);
1227 VecStore32(efgh, state+4);
1231 void VecStore64(
const uint64x2_p val, word64* data)
1237 uint64x2_p VecLoad64(
const word64* data,
int offset)
1239 #if (CRYPTOPP_LITTLE_ENDIAN) 1240 const uint8x16_p mask = {0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15};
1251 return vec_sel(z,y,x);
1258 return vec_sel(y, z,
VecXor(x, y));
1264 return VecSHA512<0,0>(val);
1270 return VecSHA512<0,0xf>(val);
1276 return VecSHA512<1,0>(val);
1282 return VecSHA512<1,0xf>(val);
1288 const uint8x16_p m = {0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
1292 template <
unsigned int R>
inline 1298 T1 = S[H] + VectorSigma1(S[E]) + VectorCh(S[E],S[F],S[G]) + K + M;
1299 T2 = VectorSigma0(S[A]) + VectorMaj(S[A],S[B],S[C]);
1301 S[H] = S[G]; S[G] = S[F]; S[F] = S[E];
1303 S[D] = S[C]; S[C] = S[B]; S[B] = S[A];
1307 template <
unsigned int R>
inline 1311 enum {IDX0=(R+0)&0xf, IDX1=(R+1)&0xf, IDX9=(R+9)&0xf, IDX14=(R+14)&0xf};
1313 const uint64x2_p s0 = Vector_sigma0(W[IDX1]);
1314 const uint64x2_p s1 = Vector_sigma1(W[IDX14]);
1316 uint64x2_p T1 = (W[IDX0] += s0 + s1 + W[IDX9]);
1317 T1 += S[H] + VectorSigma1(S[E]) + VectorCh(S[E],S[F],S[G]) + K;
1318 uint64x2_p T2 = VectorSigma0(S[A]) + VectorMaj(S[A],S[B],S[C]);
1320 S[H] = S[G]; S[G] = S[F]; S[F] = S[E];
1322 S[D] = S[C]; S[C] = S[B]; S[B] = S[A];
1326 void SHA512_HashMultipleBlocks_POWER8(word64 *state,
const word64 *data,
size_t length,
ByteOrder order)
1330 CRYPTOPP_UNUSED(order);
1332 const uint64_t* k =
reinterpret_cast<const uint64_t*
>(SHA512_K);
1333 const uint64_t* m =
reinterpret_cast<const uint64_t*
>(data);
1341 size_t blocks = length / SHA512::BLOCKSIZE;
1344 unsigned int offset=0;
1346 S[A] = ab; S[C] = cd;
1347 S[E] = ef; S[G] = gh;
1348 S[B] = VecShiftLeftOctet<8>(S[A]);
1349 S[D] = VecShiftLeftOctet<8>(S[C]);
1350 S[F] = VecShiftLeftOctet<8>(S[E]);
1351 S[H] = VecShiftLeftOctet<8>(S[G]);
1355 vm = VecLoad64(m, offset);
1356 SHA512_ROUND1<0>(W,S, vk,vm);
1359 vk = VecShiftLeftOctet<8>(vk);
1360 vm = VecShiftLeftOctet<8>(vm);
1361 SHA512_ROUND1<1>(W,S, vk,vm);
1364 vm = VecLoad64(m, offset);
1365 SHA512_ROUND1<2>(W,S, vk,vm);
1368 vk = VecShiftLeftOctet<8>(vk);
1369 vm = VecShiftLeftOctet<8>(vm);
1370 SHA512_ROUND1<3>(W,S, vk,vm);
1373 vm = VecLoad64(m, offset);
1374 SHA512_ROUND1<4>(W,S, vk,vm);
1377 vk = VecShiftLeftOctet<8>(vk);
1378 vm = VecShiftLeftOctet<8>(vm);
1379 SHA512_ROUND1<5>(W,S, vk,vm);
1382 vm = VecLoad64(m, offset);
1383 SHA512_ROUND1<6>(W,S, vk,vm);
1386 vk = VecShiftLeftOctet<8>(vk);
1387 vm = VecShiftLeftOctet<8>(vm);
1388 SHA512_ROUND1<7>(W,S, vk,vm);
1391 vm = VecLoad64(m, offset);
1392 SHA512_ROUND1<8>(W,S, vk,vm);
1395 vk = VecShiftLeftOctet<8>(vk);
1396 vm = VecShiftLeftOctet<8>(vm);
1397 SHA512_ROUND1<9>(W,S, vk,vm);
1400 vm = VecLoad64(m, offset);
1401 SHA512_ROUND1<10>(W,S, vk,vm);
1404 vk = VecShiftLeftOctet<8>(vk);
1405 vm = VecShiftLeftOctet<8>(vm);
1406 SHA512_ROUND1<11>(W,S, vk,vm);
1409 vm = VecLoad64(m, offset);
1410 SHA512_ROUND1<12>(W,S, vk,vm);
1413 vk = VecShiftLeftOctet<8>(vk);
1414 vm = VecShiftLeftOctet<8>(vm);
1415 SHA512_ROUND1<13>(W,S, vk,vm);
1418 vm = VecLoad64(m, offset);
1419 SHA512_ROUND1<14>(W,S, vk,vm);
1422 vk = VecShiftLeftOctet<8>(vk);
1423 vm = VecShiftLeftOctet<8>(vm);
1424 SHA512_ROUND1<15>(W,S, vk,vm);
1429 for (
unsigned int i=16; i<80; i+=16)
1432 SHA512_ROUND2<0>(W,S, vk);
1433 SHA512_ROUND2<1>(W,S, VecShiftLeftOctet<8>(vk));
1437 SHA512_ROUND2<2>(W,S, vk);
1438 SHA512_ROUND2<3>(W,S, VecShiftLeftOctet<8>(vk));
1442 SHA512_ROUND2<4>(W,S, vk);
1443 SHA512_ROUND2<5>(W,S, VecShiftLeftOctet<8>(vk));
1447 SHA512_ROUND2<6>(W,S, vk);
1448 SHA512_ROUND2<7>(W,S, VecShiftLeftOctet<8>(vk));
1452 SHA512_ROUND2<8>(W,S, vk);
1453 SHA512_ROUND2<9>(W,S, VecShiftLeftOctet<8>(vk));
1457 SHA512_ROUND2<10>(W,S, vk);
1458 SHA512_ROUND2<11>(W,S, VecShiftLeftOctet<8>(vk));
1462 SHA512_ROUND2<12>(W,S, vk);
1463 SHA512_ROUND2<13>(W,S, VecShiftLeftOctet<8>(vk));
1467 SHA512_ROUND2<14>(W,S, vk);
1468 SHA512_ROUND2<15>(W,S, VecShiftLeftOctet<8>(vk));
1472 ab += VectorPack(S[A],S[B]);
1473 cd += VectorPack(S[C],S[D]);
1474 ef += VectorPack(S[E],S[F]);
1475 gh += VectorPack(S[G],S[H]);
1478 VecStore64(ab, state+0);
1479 VecStore64(cd, state+2);
1480 VecStore64(ef, state+4);
1481 VecStore64(gh, state+6);
1484 #endif // CRYPTOPP_POWER8_SHA_AVAILABLE Utility functions for the Crypto++ library.
ByteOrder
Provides the byte ordering.
Library configuration file.
T1 VecPermute(const T1 vec, const T2 mask)
Permutes a vector.
__vector unsigned int uint32x4_p
Vector of 32-bit elements.
Support functions for PowerPC and vector operations.
void VecStore(const T data, byte dest[16])
Stores a vector to a byte array.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
T1 VecXor(const T1 vec1, const T2 vec2)
XOR two vectors.
Classes for SHA-1 and SHA-2 family of message digests.
__vector unsigned long long uint64x2_p
Vector of 64-bit elements.
Crypto++ library namespace.
uint32x4_p VecLoad(const byte src[16])
Loads a vector from a byte array.
__vector unsigned char uint8x16_p
Vector of 8-bit elements.