blake3_portable.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. #include "blake3_impl.h"
  2. #include <string.h>
  3. INLINE uint32_t rotr32(uint32_t w, uint32_t c) {
  4. return (w >> c) | (w << (32 - c));
  5. }
  6. INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
  7. uint32_t x, uint32_t y) {
  8. state[a] = state[a] + state[b] + x;
  9. state[d] = rotr32(state[d] ^ state[a], 16);
  10. state[c] = state[c] + state[d];
  11. state[b] = rotr32(state[b] ^ state[c], 12);
  12. state[a] = state[a] + state[b] + y;
  13. state[d] = rotr32(state[d] ^ state[a], 8);
  14. state[c] = state[c] + state[d];
  15. state[b] = rotr32(state[b] ^ state[c], 7);
  16. }
  17. INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) {
  18. // Select the message schedule based on the round.
  19. const uint8_t *schedule = MSG_SCHEDULE[round];
  20. // Mix the columns.
  21. g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
  22. g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
  23. g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
  24. g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
  25. // Mix the rows.
  26. g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
  27. g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
  28. g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
  29. g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
  30. }
  31. INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8],
  32. const uint8_t block[BLAKE3_BLOCK_LEN],
  33. uint8_t block_len, uint64_t counter, uint8_t flags) {
  34. uint32_t block_words[16];
  35. block_words[0] = load32(block + 4 * 0);
  36. block_words[1] = load32(block + 4 * 1);
  37. block_words[2] = load32(block + 4 * 2);
  38. block_words[3] = load32(block + 4 * 3);
  39. block_words[4] = load32(block + 4 * 4);
  40. block_words[5] = load32(block + 4 * 5);
  41. block_words[6] = load32(block + 4 * 6);
  42. block_words[7] = load32(block + 4 * 7);
  43. block_words[8] = load32(block + 4 * 8);
  44. block_words[9] = load32(block + 4 * 9);
  45. block_words[10] = load32(block + 4 * 10);
  46. block_words[11] = load32(block + 4 * 11);
  47. block_words[12] = load32(block + 4 * 12);
  48. block_words[13] = load32(block + 4 * 13);
  49. block_words[14] = load32(block + 4 * 14);
  50. block_words[15] = load32(block + 4 * 15);
  51. state[0] = cv[0];
  52. state[1] = cv[1];
  53. state[2] = cv[2];
  54. state[3] = cv[3];
  55. state[4] = cv[4];
  56. state[5] = cv[5];
  57. state[6] = cv[6];
  58. state[7] = cv[7];
  59. state[8] = IV[0];
  60. state[9] = IV[1];
  61. state[10] = IV[2];
  62. state[11] = IV[3];
  63. state[12] = counter_low(counter);
  64. state[13] = counter_high(counter);
  65. state[14] = (uint32_t)block_len;
  66. state[15] = (uint32_t)flags;
  67. round_fn(state, &block_words[0], 0);
  68. round_fn(state, &block_words[0], 1);
  69. round_fn(state, &block_words[0], 2);
  70. round_fn(state, &block_words[0], 3);
  71. round_fn(state, &block_words[0], 4);
  72. round_fn(state, &block_words[0], 5);
  73. round_fn(state, &block_words[0], 6);
  74. }
  75. void blake3_compress_in_place_portable(uint32_t cv[8],
  76. const uint8_t block[BLAKE3_BLOCK_LEN],
  77. uint8_t block_len, uint64_t counter,
  78. uint8_t flags) {
  79. uint32_t state[16];
  80. compress_pre(state, cv, block, block_len, counter, flags);
  81. cv[0] = state[0] ^ state[8];
  82. cv[1] = state[1] ^ state[9];
  83. cv[2] = state[2] ^ state[10];
  84. cv[3] = state[3] ^ state[11];
  85. cv[4] = state[4] ^ state[12];
  86. cv[5] = state[5] ^ state[13];
  87. cv[6] = state[6] ^ state[14];
  88. cv[7] = state[7] ^ state[15];
  89. }
  90. void blake3_compress_xof_portable(const uint32_t cv[8],
  91. const uint8_t block[BLAKE3_BLOCK_LEN],
  92. uint8_t block_len, uint64_t counter,
  93. uint8_t flags, uint8_t out[64]) {
  94. uint32_t state[16];
  95. compress_pre(state, cv, block, block_len, counter, flags);
  96. store32(&out[0 * 4], state[0] ^ state[8]);
  97. store32(&out[1 * 4], state[1] ^ state[9]);
  98. store32(&out[2 * 4], state[2] ^ state[10]);
  99. store32(&out[3 * 4], state[3] ^ state[11]);
  100. store32(&out[4 * 4], state[4] ^ state[12]);
  101. store32(&out[5 * 4], state[5] ^ state[13]);
  102. store32(&out[6 * 4], state[6] ^ state[14]);
  103. store32(&out[7 * 4], state[7] ^ state[15]);
  104. store32(&out[8 * 4], state[8] ^ cv[0]);
  105. store32(&out[9 * 4], state[9] ^ cv[1]);
  106. store32(&out[10 * 4], state[10] ^ cv[2]);
  107. store32(&out[11 * 4], state[11] ^ cv[3]);
  108. store32(&out[12 * 4], state[12] ^ cv[4]);
  109. store32(&out[13 * 4], state[13] ^ cv[5]);
  110. store32(&out[14 * 4], state[14] ^ cv[6]);
  111. store32(&out[15 * 4], state[15] ^ cv[7]);
  112. }
  113. INLINE void hash_one_portable(const uint8_t *input, size_t blocks,
  114. const uint32_t key[8], uint64_t counter,
  115. uint8_t flags, uint8_t flags_start,
  116. uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
  117. uint32_t cv[8];
  118. memcpy(cv, key, BLAKE3_KEY_LEN);
  119. uint8_t block_flags = flags | flags_start;
  120. while (blocks > 0) {
  121. if (blocks == 1) {
  122. block_flags |= flags_end;
  123. }
  124. blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
  125. block_flags);
  126. input = &input[BLAKE3_BLOCK_LEN];
  127. blocks -= 1;
  128. block_flags = flags;
  129. }
  130. store_cv_words(out, cv);
  131. }
  132. void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
  133. size_t blocks, const uint32_t key[8],
  134. uint64_t counter, bool increment_counter,
  135. uint8_t flags, uint8_t flags_start,
  136. uint8_t flags_end, uint8_t *out) {
  137. while (num_inputs > 0) {
  138. hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start,
  139. flags_end, out);
  140. if (increment_counter) {
  141. counter += 1;
  142. }
  143. inputs += 1;
  144. num_inputs -= 1;
  145. out = &out[BLAKE3_OUT_LEN];
  146. }
  147. }