argon2_base.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. #pragma once
  2. #include <util/generic/yexception.h>
  3. #include <library/cpp/digest/argonish/argon2.h>
  4. #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h>
  5. #include <library/cpp/threading/poor_man_openmp/thread_helper.h>
  6. namespace NArgonish {
  7. const ui32 ARGON2_PREHASH_DIGEST_LENGTH = 64;
  8. const ui32 ARGON2_SECRET_MAX_LENGTH = 64;
  9. const ui32 ARGON2_PREHASH_SEED_LENGTH = 72;
  10. const ui32 ARGON2_BLOCK_SIZE = 1024;
  11. const ui32 ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8;
  12. const ui32 ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16;
  13. const ui32 ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32;
  14. const ui32 ARGON2_ADDRESSES_IN_BLOCK = 128;
  15. const ui32 ARGON2_SYNC_POINTS = 4;
  16. const ui32 ARGON2_SALT_MIN_LEN = 8;
  17. const ui32 ARGON2_MIN_OUTLEN = 4;
  18. struct TBlock {
  19. ui64 V[ARGON2_QWORDS_IN_BLOCK];
  20. };
  21. template <EInstructionSet instructionSet, ui32 mcost, ui32 threads>
  22. class TArgon2: public IArgon2Base {
  23. public:
  24. TArgon2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen)
  25. : SecretLen_(keylen)
  26. , Tcost_(tcost)
  27. , Atype_(atype)
  28. {
  29. if (SecretLen_)
  30. memcpy(Secret_, key, keylen);
  31. }
  32. virtual ~TArgon2() override {
  33. if (SecretLen_) {
  34. SecureZeroMemory_(Secret_, SecretLen_);
  35. SecretLen_ = 0;
  36. }
  37. }
  38. virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,
  39. ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override {
  40. TArrayHolder<TBlock> buffer(new TBlock[MemoryBlocks_]);
  41. InternalHash_(buffer.Get(), pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen);
  42. }
  43. virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,
  44. const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override {
  45. TArrayHolder<ui8> hashResult(new ui8[hashlen]);
  46. Hash(pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen);
  47. return SecureCompare_(hash, hashResult.Get(), hashlen);
  48. }
  49. virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,
  50. const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen,
  51. const ui8* aad = nullptr, ui32 aadlen = 0) const override {
  52. if (memory == nullptr || mlen < sizeof(TBlock) * MemoryBlocks_)
  53. ythrow yexception() << "memory is null or its size is not enough";
  54. InternalHash_((TBlock*)memory, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen);
  55. }
  56. virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,
  57. const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen,
  58. const ui8* aad = nullptr, ui32 aadlen = 0) const override {
  59. TArrayHolder<ui8> hashResult(new ui8[hashlen]);
  60. HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen);
  61. return SecureCompare_(hashResult.Get(), hash, hashlen);
  62. }
  63. virtual size_t GetMemorySize() const override {
  64. return MemoryBlocks_ * sizeof(TBlock);
  65. }
  66. protected: /* Constants */
  67. ui8 Secret_[ARGON2_SECRET_MAX_LENGTH] = {0};
  68. ui32 SecretLen_ = 0;
  69. ui32 Tcost_;
  70. EArgon2Type Atype_;
  71. static constexpr ui32 Lanes_ = threads;
  72. static constexpr ui32 MemoryBlocks_ = (mcost >= 2 * ARGON2_SYNC_POINTS * Lanes_) ? (mcost - mcost % (Lanes_ * ARGON2_SYNC_POINTS)) : 2 * ARGON2_SYNC_POINTS * Lanes_;
  73. static constexpr ui32 SegmentLength_ = MemoryBlocks_ / (Lanes_ * ARGON2_SYNC_POINTS);
  74. static constexpr ui32 LaneLength_ = SegmentLength_ * ARGON2_SYNC_POINTS;
  75. protected: /* Prototypes */
  76. virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock,
  77. TBlock* nextBlock, bool withXor) const = 0;
  78. virtual void CopyBlock_(TBlock* dst, const TBlock* src) const = 0;
  79. virtual void XorBlock_(TBlock* dst, const TBlock* src) const = 0;
  80. protected: /* Static functions */
  81. static bool SecureCompare_(const ui8* buffer1, const ui8* buffer2, ui32 len) {
  82. bool result = true;
  83. for (ui32 i = 0; i < len; ++i) {
  84. result &= (buffer1[i] == buffer2[i]);
  85. }
  86. return result;
  87. }
  88. static void SecureZeroMemory_(void* src, size_t len) {
  89. static void* (*const volatile memset_v)(void*, int, size_t) = &memset;
  90. memset_v(src, 0, len);
  91. }
  92. static void Store32_(ui32 value, void* mem) {
  93. *((ui32*)mem) = value;
  94. }
  95. static void Blake2BHash64_(ui8 out[BLAKE2B_OUTBYTES], const ui8 in[BLAKE2B_OUTBYTES]) {
  96. TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES);
  97. hash.Update(in, BLAKE2B_OUTBYTES);
  98. hash.Final(out, BLAKE2B_OUTBYTES);
  99. }
  100. static void ExpandBlockhash_(ui8 expanded[ARGON2_BLOCK_SIZE], const ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]) {
  101. ui8 out_buffer[BLAKE2B_OUTBYTES];
  102. ui8 in_buffer[BLAKE2B_OUTBYTES];
  103. const ui32 HALF_OUT_BYTES = BLAKE2B_OUTBYTES / 2;
  104. const ui32 HASH_BLOCKS_COUNT = ((ARGON2_BLOCK_SIZE / HALF_OUT_BYTES));
  105. TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES);
  106. hash.Update(ARGON2_BLOCK_SIZE);
  107. hash.Update(blockhash, ARGON2_PREHASH_SEED_LENGTH);
  108. hash.Final(out_buffer, BLAKE2B_OUTBYTES);
  109. memcpy(expanded, out_buffer, HALF_OUT_BYTES);
  110. for (ui32 i = 1; i < HASH_BLOCKS_COUNT - 2; ++i) {
  111. memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
  112. Blake2BHash64_(out_buffer, in_buffer);
  113. memcpy(expanded + (i * HALF_OUT_BYTES), out_buffer, HALF_OUT_BYTES);
  114. }
  115. Blake2BHash64_(in_buffer, out_buffer);
  116. memcpy(expanded + HALF_OUT_BYTES * (HASH_BLOCKS_COUNT - 2), in_buffer, BLAKE2B_OUTBYTES);
  117. }
  118. static void Blake2BLong_(ui8* out, ui32 outlen, const ui8* in, ui32 inlen) {
  119. if (outlen < BLAKE2B_OUTBYTES) {
  120. TBlake2B<instructionSet> hash(outlen);
  121. hash.Update(outlen);
  122. hash.Update(in, inlen);
  123. hash.Final(out, outlen);
  124. } else {
  125. ui8 out_buffer[BLAKE2B_OUTBYTES];
  126. ui8 in_buffer[BLAKE2B_OUTBYTES];
  127. ui32 toproduce = outlen - BLAKE2B_OUTBYTES / 2;
  128. TBlake2B<instructionSet> hash1(BLAKE2B_OUTBYTES);
  129. hash1.Update(outlen);
  130. hash1.Update(in, inlen);
  131. hash1.Final(out_buffer, BLAKE2B_OUTBYTES);
  132. memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
  133. out += BLAKE2B_OUTBYTES / 2;
  134. while (toproduce > BLAKE2B_OUTBYTES) {
  135. memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
  136. TBlake2B<instructionSet> hash2(BLAKE2B_OUTBYTES);
  137. hash2.Update(in_buffer, BLAKE2B_OUTBYTES);
  138. hash2.Final(out_buffer, BLAKE2B_OUTBYTES);
  139. memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
  140. out += BLAKE2B_OUTBYTES / 2;
  141. toproduce -= BLAKE2B_OUTBYTES / 2;
  142. }
  143. memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
  144. {
  145. TBlake2B<instructionSet> hash3(toproduce);
  146. hash3.Update(in_buffer, BLAKE2B_OUTBYTES);
  147. hash3.Final(out_buffer, toproduce);
  148. memcpy(out, out_buffer, toproduce);
  149. }
  150. }
  151. }
  152. static void InitBlockValue_(TBlock* b, ui8 in) {
  153. memset(b->V, in, sizeof(b->V));
  154. }
  155. protected: /* Functions */
  156. void InternalHash_(TBlock* memory, const ui8* pwd, ui32 pwdlen,
  157. const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen,
  158. const ui8* aad, ui32 aadlen) const {
  159. /*
  160. * all parameters checks are in proxy objects
  161. */
  162. Initialize_(memory, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen);
  163. FillMemoryBlocks_(memory);
  164. Finalize_(memory, out, outlen);
  165. }
  166. void InitialHash_(ui8 blockhash[ARGON2_PREHASH_DIGEST_LENGTH],
  167. ui32 outlen, const ui8* pwd, ui32 pwdlen,
  168. const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const {
  169. TBlake2B<instructionSet> hash(ARGON2_PREHASH_DIGEST_LENGTH);
  170. /* lanes, but lanes == threads */
  171. hash.Update(Lanes_);
  172. /* outlen */
  173. hash.Update(outlen);
  174. /* m_cost */
  175. hash.Update(mcost);
  176. /* t_cost */
  177. hash.Update(Tcost_);
  178. /* version */
  179. hash.Update(0x00000013);
  180. /* Argon2 type */
  181. hash.Update((ui32)Atype_);
  182. /* pwdlen */
  183. hash.Update(pwdlen);
  184. /* pwd */
  185. hash.Update(pwd, pwdlen);
  186. /* saltlen */
  187. hash.Update(saltlen);
  188. /* salt */
  189. if (saltlen)
  190. hash.Update(salt, saltlen);
  191. /* secret */
  192. hash.Update(SecretLen_);
  193. if (SecretLen_)
  194. hash.Update((void*)Secret_, SecretLen_);
  195. /* aadlen */
  196. hash.Update(aadlen);
  197. if (aadlen)
  198. hash.Update((void*)aad, aadlen);
  199. hash.Final(blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
  200. }
  201. void FillFirstBlocks_(TBlock* blocks, ui8* blockhash) const {
  202. for (ui32 l = 0; l < Lanes_; l++) {
  203. /* fill the first block of the lane */
  204. Store32_(l, blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4);
  205. Store32_(0, blockhash + ARGON2_PREHASH_DIGEST_LENGTH);
  206. ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_]), blockhash);
  207. /* fill the second block of the lane */
  208. Store32_(1, blockhash + ARGON2_PREHASH_DIGEST_LENGTH);
  209. ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_ + 1]), blockhash);
  210. }
  211. }
  212. /* The 'if' will be optimized out as the number of threads is known at the compile time */
  213. void FillMemoryBlocks_(TBlock* memory) const {
  214. for (ui32 t = 0; t < Tcost_; ++t) {
  215. for (ui32 s = 0; s < ARGON2_SYNC_POINTS; ++s) {
  216. if (Lanes_ == 1)
  217. FillSegment_(memory, t, 0, s);
  218. else {
  219. NYmp::SetThreadCount(Lanes_);
  220. NYmp::ParallelForStaticAutoChunk<ui32>(0, Lanes_, [this, &memory, s, t](int k) {
  221. this->FillSegment_(memory, t, k, s);
  222. });
  223. }
  224. }
  225. }
  226. }
  227. void Initialize_(TBlock* memory, ui32 outlen, const ui8* pwd, ui32 pwdlen,
  228. const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const {
  229. ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH];
  230. InitialHash_(blockhash, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen);
  231. FillFirstBlocks_(memory, blockhash);
  232. }
  233. ui32 ComputeReferenceArea_(ui32 pass, ui32 slice, ui32 index, bool sameLane) const {
  234. ui32 passVal = pass == 0 ? (slice * SegmentLength_) : (LaneLength_ - SegmentLength_);
  235. return sameLane ? passVal + (index - 1) : passVal + (index == 0 ? -1 : 0);
  236. }
  237. ui32 IndexAlpha_(ui32 pass, ui32 slice, ui32 index, ui32 pseudoRand, bool sameLane) const {
  238. ui32 referenceAreaSize = ComputeReferenceArea_(pass, slice, index, sameLane);
  239. ui64 relativePosition = pseudoRand;
  240. relativePosition = relativePosition * relativePosition >> 32;
  241. relativePosition = referenceAreaSize - 1 - (referenceAreaSize * relativePosition >> 32);
  242. ui32 startPosition = 0;
  243. if (pass != 0)
  244. startPosition = (slice == ARGON2_SYNC_POINTS - 1) ? 0 : (slice + 1) * SegmentLength_;
  245. return (ui32)((startPosition + relativePosition) % LaneLength_);
  246. }
  247. void NextAddresses_(TBlock* addressBlock, TBlock* inputBlock, const TBlock* zeroBlock) const {
  248. inputBlock->V[6]++;
  249. FillBlock_(zeroBlock, inputBlock, addressBlock, false);
  250. FillBlock_(zeroBlock, addressBlock, addressBlock, false);
  251. }
  252. void Finalize_(const TBlock* memory, ui8* out, ui32 outlen) const {
  253. TBlock blockhash;
  254. CopyBlock_(&blockhash, memory + LaneLength_ - 1);
  255. /* XOR the last blocks */
  256. for (ui32 l = 1; l < Lanes_; ++l) {
  257. ui32 lastBlockInLane = l * LaneLength_ + (LaneLength_ - 1);
  258. XorBlock_(&blockhash, memory + lastBlockInLane);
  259. }
  260. Blake2BLong_(out, outlen, (ui8*)blockhash.V, ARGON2_BLOCK_SIZE);
  261. }
  262. /* The switch will be optimized out by the compiler as the type is known at the compile time */
  263. void FillSegment_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const {
  264. switch (Atype_) {
  265. case EArgon2Type::Argon2d:
  266. FillSegmentD_(memory, pass, lane, slice);
  267. return;
  268. case EArgon2Type::Argon2i:
  269. FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2i);
  270. return;
  271. case EArgon2Type::Argon2id:
  272. if (pass == 0 && slice < ARGON2_SYNC_POINTS / 2)
  273. FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2id);
  274. else
  275. FillSegmentD_(memory, pass, lane, slice);
  276. return;
  277. }
  278. }
  279. void FillSegmentD_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const {
  280. ui32 startingIndex = (pass == 0 && slice == 0) ? 2 : 0;
  281. ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex;
  282. ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1;
  283. for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) {
  284. if (currOffset % LaneLength_ == 1) {
  285. prevOffset = currOffset - 1;
  286. }
  287. ui64 pseudoRand = memory[prevOffset].V[0];
  288. ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_);
  289. ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane);
  290. TBlock* refBlock = memory + LaneLength_ * refLane + refIndex;
  291. FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0);
  292. }
  293. }
  294. void FillSegmentI_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice, EArgon2Type atp) const {
  295. TBlock addressBlock, inputBlock, zeroBlock;
  296. InitBlockValue_(&zeroBlock, 0);
  297. InitBlockValue_(&inputBlock, 0);
  298. inputBlock.V[0] = pass;
  299. inputBlock.V[1] = lane;
  300. inputBlock.V[2] = slice;
  301. inputBlock.V[3] = MemoryBlocks_;
  302. inputBlock.V[4] = Tcost_;
  303. inputBlock.V[5] = (ui64)atp;
  304. ui32 startingIndex = 0;
  305. if (pass == 0 && slice == 0) {
  306. startingIndex = 2;
  307. NextAddresses_(&addressBlock, &inputBlock, &zeroBlock);
  308. }
  309. ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex;
  310. ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1;
  311. for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) {
  312. if (currOffset % LaneLength_ == 1) {
  313. prevOffset = currOffset - 1;
  314. }
  315. if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
  316. NextAddresses_(&addressBlock, &inputBlock, &zeroBlock);
  317. }
  318. ui64 pseudoRand = addressBlock.V[i % ARGON2_ADDRESSES_IN_BLOCK];
  319. ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_);
  320. ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane);
  321. TBlock* refBlock = memory + LaneLength_ * refLane + refIndex;
  322. FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0);
  323. }
  324. }
  325. };
  326. }