#pragma once #include #include #include #include namespace NArgonish { const ui32 ARGON2_PREHASH_DIGEST_LENGTH = 64; const ui32 ARGON2_SECRET_MAX_LENGTH = 64; const ui32 ARGON2_PREHASH_SEED_LENGTH = 72; const ui32 ARGON2_BLOCK_SIZE = 1024; const ui32 ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8; const ui32 ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16; const ui32 ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32; const ui32 ARGON2_ADDRESSES_IN_BLOCK = 128; const ui32 ARGON2_SYNC_POINTS = 4; const ui32 ARGON2_SALT_MIN_LEN = 8; const ui32 ARGON2_MIN_OUTLEN = 4; struct TBlock { ui64 V[ARGON2_QWORDS_IN_BLOCK]; }; template class TArgon2: public IArgon2Base { public: TArgon2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) : SecretLen_(keylen) , Tcost_(tcost) , Atype_(atype) { if (SecretLen_) memcpy(Secret_, key, keylen); } virtual ~TArgon2() override { if (SecretLen_) { SecureZeroMemory_(Secret_, SecretLen_); SecretLen_ = 0; } } virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { TArrayHolder buffer(new TBlock[MemoryBlocks_]); InternalHash_(buffer.Get(), pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); } virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { TArrayHolder hashResult(new ui8[hashlen]); Hash(pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen); return SecureCompare_(hash, hashResult.Get(), hashlen); } virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { if (memory == nullptr || mlen < sizeof(TBlock) * MemoryBlocks_) ythrow yexception() << "memory is null or its size is not enough"; InternalHash_((TBlock*)memory, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); } virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { TArrayHolder hashResult(new ui8[hashlen]); HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen); return SecureCompare_(hashResult.Get(), hash, hashlen); } virtual size_t GetMemorySize() const override { return MemoryBlocks_ * sizeof(TBlock); } protected: /* Constants */ ui8 Secret_[ARGON2_SECRET_MAX_LENGTH] = {0}; ui32 SecretLen_ = 0; ui32 Tcost_; EArgon2Type Atype_; static constexpr ui32 Lanes_ = threads; static constexpr ui32 MemoryBlocks_ = (mcost >= 2 * ARGON2_SYNC_POINTS * Lanes_) ? (mcost - mcost % (Lanes_ * ARGON2_SYNC_POINTS)) : 2 * ARGON2_SYNC_POINTS * Lanes_; static constexpr ui32 SegmentLength_ = MemoryBlocks_ / (Lanes_ * ARGON2_SYNC_POINTS); static constexpr ui32 LaneLength_ = SegmentLength_ * ARGON2_SYNC_POINTS; protected: /* Prototypes */ virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const = 0; virtual void CopyBlock_(TBlock* dst, const TBlock* src) const = 0; virtual void XorBlock_(TBlock* dst, const TBlock* src) const = 0; protected: /* Static functions */ static bool SecureCompare_(const ui8* buffer1, const ui8* buffer2, ui32 len) { bool result = true; for (ui32 i = 0; i < len; ++i) { result &= (buffer1[i] == buffer2[i]); } return result; } static void SecureZeroMemory_(void* src, size_t len) { static void* (*const volatile memset_v)(void*, int, size_t) = &memset; memset_v(src, 0, len); } static void Store32_(ui32 value, void* mem) { *((ui32*)mem) = value; } static void Blake2BHash64_(ui8 out[BLAKE2B_OUTBYTES], const ui8 in[BLAKE2B_OUTBYTES]) { TBlake2B hash(BLAKE2B_OUTBYTES); hash.Update(in, BLAKE2B_OUTBYTES); hash.Final(out, BLAKE2B_OUTBYTES); } static void ExpandBlockhash_(ui8 expanded[ARGON2_BLOCK_SIZE], const ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]) { ui8 out_buffer[BLAKE2B_OUTBYTES]; ui8 in_buffer[BLAKE2B_OUTBYTES]; const ui32 HALF_OUT_BYTES = BLAKE2B_OUTBYTES / 2; const ui32 HASH_BLOCKS_COUNT = ((ARGON2_BLOCK_SIZE / HALF_OUT_BYTES)); TBlake2B hash(BLAKE2B_OUTBYTES); hash.Update(ARGON2_BLOCK_SIZE); hash.Update(blockhash, ARGON2_PREHASH_SEED_LENGTH); hash.Final(out_buffer, BLAKE2B_OUTBYTES); memcpy(expanded, out_buffer, HALF_OUT_BYTES); for (ui32 i = 1; i < HASH_BLOCKS_COUNT - 2; ++i) { memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); Blake2BHash64_(out_buffer, in_buffer); memcpy(expanded + (i * HALF_OUT_BYTES), out_buffer, HALF_OUT_BYTES); } Blake2BHash64_(in_buffer, out_buffer); memcpy(expanded + HALF_OUT_BYTES * (HASH_BLOCKS_COUNT - 2), in_buffer, BLAKE2B_OUTBYTES); } static void Blake2BLong_(ui8* out, ui32 outlen, const ui8* in, ui32 inlen) { if (outlen < BLAKE2B_OUTBYTES) { TBlake2B hash(outlen); hash.Update(outlen); hash.Update(in, inlen); hash.Final(out, outlen); } else { ui8 out_buffer[BLAKE2B_OUTBYTES]; ui8 in_buffer[BLAKE2B_OUTBYTES]; ui32 toproduce = outlen - BLAKE2B_OUTBYTES / 2; TBlake2B hash1(BLAKE2B_OUTBYTES); hash1.Update(outlen); hash1.Update(in, inlen); hash1.Final(out_buffer, BLAKE2B_OUTBYTES); memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); out += BLAKE2B_OUTBYTES / 2; while (toproduce > BLAKE2B_OUTBYTES) { memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); TBlake2B hash2(BLAKE2B_OUTBYTES); hash2.Update(in_buffer, BLAKE2B_OUTBYTES); hash2.Final(out_buffer, BLAKE2B_OUTBYTES); memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); out += BLAKE2B_OUTBYTES / 2; toproduce -= BLAKE2B_OUTBYTES / 2; } memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); { TBlake2B hash3(toproduce); hash3.Update(in_buffer, BLAKE2B_OUTBYTES); hash3.Final(out_buffer, toproduce); memcpy(out, out_buffer, toproduce); } } } static void InitBlockValue_(TBlock* b, ui8 in) { memset(b->V, in, sizeof(b->V)); } protected: /* Functions */ void InternalHash_(TBlock* memory, const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, const ui8* aad, ui32 aadlen) const { /* * all parameters checks are in proxy objects */ Initialize_(memory, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen); FillMemoryBlocks_(memory); Finalize_(memory, out, outlen); } void InitialHash_(ui8 blockhash[ARGON2_PREHASH_DIGEST_LENGTH], ui32 outlen, const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const { TBlake2B hash(ARGON2_PREHASH_DIGEST_LENGTH); /* lanes, but lanes == threads */ hash.Update(Lanes_); /* outlen */ hash.Update(outlen); /* m_cost */ hash.Update(mcost); /* t_cost */ hash.Update(Tcost_); /* version */ hash.Update(0x00000013); /* Argon2 type */ hash.Update((ui32)Atype_); /* pwdlen */ hash.Update(pwdlen); /* pwd */ hash.Update(pwd, pwdlen); /* saltlen */ hash.Update(saltlen); /* salt */ if (saltlen) hash.Update(salt, saltlen); /* secret */ hash.Update(SecretLen_); if (SecretLen_) hash.Update((void*)Secret_, SecretLen_); /* aadlen */ hash.Update(aadlen); if (aadlen) hash.Update((void*)aad, aadlen); hash.Final(blockhash, ARGON2_PREHASH_DIGEST_LENGTH); } void FillFirstBlocks_(TBlock* blocks, ui8* blockhash) const { for (ui32 l = 0; l < Lanes_; l++) { /* fill the first block of the lane */ Store32_(l, blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4); Store32_(0, blockhash + ARGON2_PREHASH_DIGEST_LENGTH); ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_]), blockhash); /* fill the second block of the lane */ Store32_(1, blockhash + ARGON2_PREHASH_DIGEST_LENGTH); ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_ + 1]), blockhash); } } /* The 'if' will be optimized out as the number of threads is known at the compile time */ void FillMemoryBlocks_(TBlock* memory) const { for (ui32 t = 0; t < Tcost_; ++t) { for (ui32 s = 0; s < ARGON2_SYNC_POINTS; ++s) { if (Lanes_ == 1) FillSegment_(memory, t, 0, s); else { NYmp::SetThreadCount(Lanes_); NYmp::ParallelForStaticAutoChunk(0, Lanes_, [this, &memory, s, t](int k) { this->FillSegment_(memory, t, k, s); }); } } } } void Initialize_(TBlock* memory, ui32 outlen, const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const { ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]; InitialHash_(blockhash, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen); FillFirstBlocks_(memory, blockhash); } ui32 ComputeReferenceArea_(ui32 pass, ui32 slice, ui32 index, bool sameLane) const { ui32 passVal = pass == 0 ? (slice * SegmentLength_) : (LaneLength_ - SegmentLength_); return sameLane ? passVal + (index - 1) : passVal + (index == 0 ? -1 : 0); } ui32 IndexAlpha_(ui32 pass, ui32 slice, ui32 index, ui32 pseudoRand, bool sameLane) const { ui32 referenceAreaSize = ComputeReferenceArea_(pass, slice, index, sameLane); ui64 relativePosition = pseudoRand; relativePosition = relativePosition * relativePosition >> 32; relativePosition = referenceAreaSize - 1 - (referenceAreaSize * relativePosition >> 32); ui32 startPosition = 0; if (pass != 0) startPosition = (slice == ARGON2_SYNC_POINTS - 1) ? 0 : (slice + 1) * SegmentLength_; return (ui32)((startPosition + relativePosition) % LaneLength_); } void NextAddresses_(TBlock* addressBlock, TBlock* inputBlock, const TBlock* zeroBlock) const { inputBlock->V[6]++; FillBlock_(zeroBlock, inputBlock, addressBlock, false); FillBlock_(zeroBlock, addressBlock, addressBlock, false); } void Finalize_(const TBlock* memory, ui8* out, ui32 outlen) const { TBlock blockhash; CopyBlock_(&blockhash, memory + LaneLength_ - 1); /* XOR the last blocks */ for (ui32 l = 1; l < Lanes_; ++l) { ui32 lastBlockInLane = l * LaneLength_ + (LaneLength_ - 1); XorBlock_(&blockhash, memory + lastBlockInLane); } Blake2BLong_(out, outlen, (ui8*)blockhash.V, ARGON2_BLOCK_SIZE); } /* The switch will be optimized out by the compiler as the type is known at the compile time */ void FillSegment_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const { switch (Atype_) { case EArgon2Type::Argon2d: FillSegmentD_(memory, pass, lane, slice); return; case EArgon2Type::Argon2i: FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2i); return; case EArgon2Type::Argon2id: if (pass == 0 && slice < ARGON2_SYNC_POINTS / 2) FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2id); else FillSegmentD_(memory, pass, lane, slice); return; } } void FillSegmentD_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const { ui32 startingIndex = (pass == 0 && slice == 0) ? 2 : 0; ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex; ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1; for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) { if (currOffset % LaneLength_ == 1) { prevOffset = currOffset - 1; } ui64 pseudoRand = memory[prevOffset].V[0]; ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_); ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane); TBlock* refBlock = memory + LaneLength_ * refLane + refIndex; FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0); } } void FillSegmentI_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice, EArgon2Type atp) const { TBlock addressBlock, inputBlock, zeroBlock; InitBlockValue_(&zeroBlock, 0); InitBlockValue_(&inputBlock, 0); inputBlock.V[0] = pass; inputBlock.V[1] = lane; inputBlock.V[2] = slice; inputBlock.V[3] = MemoryBlocks_; inputBlock.V[4] = Tcost_; inputBlock.V[5] = (ui64)atp; ui32 startingIndex = 0; if (pass == 0 && slice == 0) { startingIndex = 2; NextAddresses_(&addressBlock, &inputBlock, &zeroBlock); } ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex; ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1; for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) { if (currOffset % LaneLength_ == 1) { prevOffset = currOffset - 1; } if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { NextAddresses_(&addressBlock, &inputBlock, &zeroBlock); } ui64 pseudoRand = addressBlock.V[i % ARGON2_ADDRESSES_IN_BLOCK]; ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_); ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane); TBlock* refBlock = memory + LaneLength_ * refLane + refIndex; FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0); } } }; }