blamka_ssse3.h 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. #pragma once
  2. #include <library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h>
  3. namespace NArgonish {
  4. static inline void BlamkaG1SSSE3(
  5. __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1,
  6. __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) {
  7. __m128i ml = _mm_mul_epu32(a0, b0);
  8. ml = _mm_add_epi64(ml, ml);
  9. a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml));
  10. ml = _mm_mul_epu32(a1, b1);
  11. ml = _mm_add_epi64(ml, ml);
  12. a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml));
  13. d0 = _mm_xor_si128(d0, a0);
  14. d1 = _mm_xor_si128(d1, a1);
  15. d0 = Rotr32(d0);
  16. d1 = Rotr32(d1);
  17. ml = _mm_mul_epu32(c0, d0);
  18. ml = _mm_add_epi64(ml, ml);
  19. c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml));
  20. ml = _mm_mul_epu32(c1, d1);
  21. ml = _mm_add_epi64(ml, ml);
  22. c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1));
  23. b0 = _mm_xor_si128(b0, c0);
  24. b1 = _mm_xor_si128(b1, c1);
  25. b0 = Rotr24(b0);
  26. b1 = Rotr24(b1);
  27. }
  28. static inline void BlamkaG2SSSE3(
  29. __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1,
  30. __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) {
  31. __m128i ml = _mm_mul_epu32(a0, b0);
  32. ml = _mm_add_epi64(ml, ml);
  33. a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml));
  34. ml = _mm_mul_epu32(a1, b1);
  35. ml = _mm_add_epi64(ml, ml);
  36. a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml));
  37. d0 = _mm_xor_si128(d0, a0);
  38. d1 = _mm_xor_si128(d1, a1);
  39. d0 = Rotr16(d0);
  40. d1 = Rotr16(d1);
  41. ml = _mm_mul_epu32(c0, d0);
  42. ml = _mm_add_epi64(ml, ml);
  43. c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml));
  44. ml = _mm_mul_epu32(c1, d1);
  45. ml = _mm_add_epi64(ml, ml);
  46. c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1));
  47. b0 = _mm_xor_si128(b0, c0);
  48. b1 = _mm_xor_si128(b1, c1);
  49. b0 = Rotr63(b0);
  50. b1 = Rotr63(b1);
  51. }
  52. static inline void DiagonalizeSSSE3(
  53. __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) {
  54. __m128i t0 = _mm_alignr_epi8(b1, b0, 8);
  55. __m128i t1 = _mm_alignr_epi8(b0, b1, 8);
  56. b0 = t0;
  57. b1 = t1;
  58. t0 = c0;
  59. c0 = c1;
  60. c1 = t0;
  61. t0 = _mm_alignr_epi8(d1, d0, 8);
  62. t1 = _mm_alignr_epi8(d0, d1, 8);
  63. d0 = t1;
  64. d1 = t0;
  65. }
  66. static inline void UndiagonalizeSSSE3(
  67. __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) {
  68. __m128i t0 = _mm_alignr_epi8(b0, b1, 8);
  69. __m128i t1 = _mm_alignr_epi8(b1, b0, 8);
  70. b0 = t0;
  71. b1 = t1;
  72. t0 = c0;
  73. c0 = c1;
  74. c1 = t0;
  75. t0 = _mm_alignr_epi8(d0, d1, 8);
  76. t1 = _mm_alignr_epi8(d1, d0, 8);
  77. d0 = t1;
  78. d1 = t0;
  79. }
  80. }