123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388 |
- #pragma once
- #include <util/generic/yexception.h>
- #include <library/cpp/digest/argonish/argon2.h>
- #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h>
- #include <library/cpp/threading/poor_man_openmp/thread_helper.h>
- namespace NArgonish {
- const ui32 ARGON2_PREHASH_DIGEST_LENGTH = 64;
- const ui32 ARGON2_SECRET_MAX_LENGTH = 64;
- const ui32 ARGON2_PREHASH_SEED_LENGTH = 72;
- const ui32 ARGON2_BLOCK_SIZE = 1024;
- const ui32 ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8;
- const ui32 ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16;
- const ui32 ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32;
- const ui32 ARGON2_ADDRESSES_IN_BLOCK = 128;
- const ui32 ARGON2_SYNC_POINTS = 4;
- const ui32 ARGON2_SALT_MIN_LEN = 8;
- const ui32 ARGON2_MIN_OUTLEN = 4;
- struct TBlock {
- ui64 V[ARGON2_QWORDS_IN_BLOCK];
- };
- template <EInstructionSet instructionSet, ui32 mcost, ui32 threads>
- class TArgon2: public IArgon2Base {
- public:
- TArgon2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen)
- : SecretLen_(keylen)
- , Tcost_(tcost)
- , Atype_(atype)
- {
- if (SecretLen_)
- memcpy(Secret_, key, keylen);
- }
- virtual ~TArgon2() override {
- if (SecretLen_) {
- SecureZeroMemory_(Secret_, SecretLen_);
- SecretLen_ = 0;
- }
- }
- virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,
- ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override {
- TArrayHolder<TBlock> buffer(new TBlock[MemoryBlocks_]);
- InternalHash_(buffer.Get(), pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen);
- }
- virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,
- const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override {
- TArrayHolder<ui8> hashResult(new ui8[hashlen]);
- Hash(pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen);
- return SecureCompare_(hash, hashResult.Get(), hashlen);
- }
- virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,
- const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen,
- const ui8* aad = nullptr, ui32 aadlen = 0) const override {
- if (memory == nullptr || mlen < sizeof(TBlock) * MemoryBlocks_)
- ythrow yexception() << "memory is null or its size is not enough";
- InternalHash_((TBlock*)memory, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen);
- }
- virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,
- const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen,
- const ui8* aad = nullptr, ui32 aadlen = 0) const override {
- TArrayHolder<ui8> hashResult(new ui8[hashlen]);
- HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen);
- return SecureCompare_(hashResult.Get(), hash, hashlen);
- }
- virtual size_t GetMemorySize() const override {
- return MemoryBlocks_ * sizeof(TBlock);
- }
- protected: /* Constants */
- ui8 Secret_[ARGON2_SECRET_MAX_LENGTH] = {0};
- ui32 SecretLen_ = 0;
- ui32 Tcost_;
- EArgon2Type Atype_;
- static constexpr ui32 Lanes_ = threads;
- static constexpr ui32 MemoryBlocks_ = (mcost >= 2 * ARGON2_SYNC_POINTS * Lanes_) ? (mcost - mcost % (Lanes_ * ARGON2_SYNC_POINTS)) : 2 * ARGON2_SYNC_POINTS * Lanes_;
- static constexpr ui32 SegmentLength_ = MemoryBlocks_ / (Lanes_ * ARGON2_SYNC_POINTS);
- static constexpr ui32 LaneLength_ = SegmentLength_ * ARGON2_SYNC_POINTS;
- protected: /* Prototypes */
- virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock,
- TBlock* nextBlock, bool withXor) const = 0;
- virtual void CopyBlock_(TBlock* dst, const TBlock* src) const = 0;
- virtual void XorBlock_(TBlock* dst, const TBlock* src) const = 0;
- protected: /* Static functions */
- static bool SecureCompare_(const ui8* buffer1, const ui8* buffer2, ui32 len) {
- bool result = true;
- for (ui32 i = 0; i < len; ++i) {
- result &= (buffer1[i] == buffer2[i]);
- }
- return result;
- }
- static void SecureZeroMemory_(void* src, size_t len) {
- static void* (*const volatile memset_v)(void*, int, size_t) = &memset;
- memset_v(src, 0, len);
- }
- static void Store32_(ui32 value, void* mem) {
- *((ui32*)mem) = value;
- }
- static void Blake2BHash64_(ui8 out[BLAKE2B_OUTBYTES], const ui8 in[BLAKE2B_OUTBYTES]) {
- TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES);
- hash.Update(in, BLAKE2B_OUTBYTES);
- hash.Final(out, BLAKE2B_OUTBYTES);
- }
- static void ExpandBlockhash_(ui8 expanded[ARGON2_BLOCK_SIZE], const ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]) {
- ui8 out_buffer[BLAKE2B_OUTBYTES];
- ui8 in_buffer[BLAKE2B_OUTBYTES];
- const ui32 HALF_OUT_BYTES = BLAKE2B_OUTBYTES / 2;
- const ui32 HASH_BLOCKS_COUNT = ((ARGON2_BLOCK_SIZE / HALF_OUT_BYTES));
- TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES);
- hash.Update(ARGON2_BLOCK_SIZE);
- hash.Update(blockhash, ARGON2_PREHASH_SEED_LENGTH);
- hash.Final(out_buffer, BLAKE2B_OUTBYTES);
- memcpy(expanded, out_buffer, HALF_OUT_BYTES);
- for (ui32 i = 1; i < HASH_BLOCKS_COUNT - 2; ++i) {
- memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
- Blake2BHash64_(out_buffer, in_buffer);
- memcpy(expanded + (i * HALF_OUT_BYTES), out_buffer, HALF_OUT_BYTES);
- }
- Blake2BHash64_(in_buffer, out_buffer);
- memcpy(expanded + HALF_OUT_BYTES * (HASH_BLOCKS_COUNT - 2), in_buffer, BLAKE2B_OUTBYTES);
- }
- static void Blake2BLong_(ui8* out, ui32 outlen, const ui8* in, ui32 inlen) {
- if (outlen < BLAKE2B_OUTBYTES) {
- TBlake2B<instructionSet> hash(outlen);
- hash.Update(outlen);
- hash.Update(in, inlen);
- hash.Final(out, outlen);
- } else {
- ui8 out_buffer[BLAKE2B_OUTBYTES];
- ui8 in_buffer[BLAKE2B_OUTBYTES];
- ui32 toproduce = outlen - BLAKE2B_OUTBYTES / 2;
- TBlake2B<instructionSet> hash1(BLAKE2B_OUTBYTES);
- hash1.Update(outlen);
- hash1.Update(in, inlen);
- hash1.Final(out_buffer, BLAKE2B_OUTBYTES);
- memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
- out += BLAKE2B_OUTBYTES / 2;
- while (toproduce > BLAKE2B_OUTBYTES) {
- memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
- TBlake2B<instructionSet> hash2(BLAKE2B_OUTBYTES);
- hash2.Update(in_buffer, BLAKE2B_OUTBYTES);
- hash2.Final(out_buffer, BLAKE2B_OUTBYTES);
- memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
- out += BLAKE2B_OUTBYTES / 2;
- toproduce -= BLAKE2B_OUTBYTES / 2;
- }
- memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
- {
- TBlake2B<instructionSet> hash3(toproduce);
- hash3.Update(in_buffer, BLAKE2B_OUTBYTES);
- hash3.Final(out_buffer, toproduce);
- memcpy(out, out_buffer, toproduce);
- }
- }
- }
- static void InitBlockValue_(TBlock* b, ui8 in) {
- memset(b->V, in, sizeof(b->V));
- }
- protected: /* Functions */
- void InternalHash_(TBlock* memory, const ui8* pwd, ui32 pwdlen,
- const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen,
- const ui8* aad, ui32 aadlen) const {
- /*
- * all parameters checks are in proxy objects
- */
- Initialize_(memory, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen);
- FillMemoryBlocks_(memory);
- Finalize_(memory, out, outlen);
- }
- void InitialHash_(ui8 blockhash[ARGON2_PREHASH_DIGEST_LENGTH],
- ui32 outlen, const ui8* pwd, ui32 pwdlen,
- const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const {
- TBlake2B<instructionSet> hash(ARGON2_PREHASH_DIGEST_LENGTH);
- /* lanes, but lanes == threads */
- hash.Update(Lanes_);
- /* outlen */
- hash.Update(outlen);
- /* m_cost */
- hash.Update(mcost);
- /* t_cost */
- hash.Update(Tcost_);
- /* version */
- hash.Update(0x00000013);
- /* Argon2 type */
- hash.Update((ui32)Atype_);
- /* pwdlen */
- hash.Update(pwdlen);
- /* pwd */
- hash.Update(pwd, pwdlen);
- /* saltlen */
- hash.Update(saltlen);
- /* salt */
- if (saltlen)
- hash.Update(salt, saltlen);
- /* secret */
- hash.Update(SecretLen_);
- if (SecretLen_)
- hash.Update((void*)Secret_, SecretLen_);
- /* aadlen */
- hash.Update(aadlen);
- if (aadlen)
- hash.Update((void*)aad, aadlen);
- hash.Final(blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
- }
- void FillFirstBlocks_(TBlock* blocks, ui8* blockhash) const {
- for (ui32 l = 0; l < Lanes_; l++) {
- /* fill the first block of the lane */
- Store32_(l, blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4);
- Store32_(0, blockhash + ARGON2_PREHASH_DIGEST_LENGTH);
- ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_]), blockhash);
- /* fill the second block of the lane */
- Store32_(1, blockhash + ARGON2_PREHASH_DIGEST_LENGTH);
- ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_ + 1]), blockhash);
- }
- }
- /* The 'if' will be optimized out as the number of threads is known at the compile time */
- void FillMemoryBlocks_(TBlock* memory) const {
- for (ui32 t = 0; t < Tcost_; ++t) {
- for (ui32 s = 0; s < ARGON2_SYNC_POINTS; ++s) {
- if (Lanes_ == 1)
- FillSegment_(memory, t, 0, s);
- else {
- NYmp::SetThreadCount(Lanes_);
- NYmp::ParallelForStaticAutoChunk<ui32>(0, Lanes_, [this, &memory, s, t](int k) {
- this->FillSegment_(memory, t, k, s);
- });
- }
- }
- }
- }
- void Initialize_(TBlock* memory, ui32 outlen, const ui8* pwd, ui32 pwdlen,
- const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const {
- ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH];
- InitialHash_(blockhash, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen);
- FillFirstBlocks_(memory, blockhash);
- }
- ui32 ComputeReferenceArea_(ui32 pass, ui32 slice, ui32 index, bool sameLane) const {
- ui32 passVal = pass == 0 ? (slice * SegmentLength_) : (LaneLength_ - SegmentLength_);
- return sameLane ? passVal + (index - 1) : passVal + (index == 0 ? -1 : 0);
- }
- ui32 IndexAlpha_(ui32 pass, ui32 slice, ui32 index, ui32 pseudoRand, bool sameLane) const {
- ui32 referenceAreaSize = ComputeReferenceArea_(pass, slice, index, sameLane);
- ui64 relativePosition = pseudoRand;
- relativePosition = relativePosition * relativePosition >> 32;
- relativePosition = referenceAreaSize - 1 - (referenceAreaSize * relativePosition >> 32);
- ui32 startPosition = 0;
- if (pass != 0)
- startPosition = (slice == ARGON2_SYNC_POINTS - 1) ? 0 : (slice + 1) * SegmentLength_;
- return (ui32)((startPosition + relativePosition) % LaneLength_);
- }
- void NextAddresses_(TBlock* addressBlock, TBlock* inputBlock, const TBlock* zeroBlock) const {
- inputBlock->V[6]++;
- FillBlock_(zeroBlock, inputBlock, addressBlock, false);
- FillBlock_(zeroBlock, addressBlock, addressBlock, false);
- }
- void Finalize_(const TBlock* memory, ui8* out, ui32 outlen) const {
- TBlock blockhash;
- CopyBlock_(&blockhash, memory + LaneLength_ - 1);
- /* XOR the last blocks */
- for (ui32 l = 1; l < Lanes_; ++l) {
- ui32 lastBlockInLane = l * LaneLength_ + (LaneLength_ - 1);
- XorBlock_(&blockhash, memory + lastBlockInLane);
- }
- Blake2BLong_(out, outlen, (ui8*)blockhash.V, ARGON2_BLOCK_SIZE);
- }
- /* The switch will be optimized out by the compiler as the type is known at the compile time */
- void FillSegment_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const {
- switch (Atype_) {
- case EArgon2Type::Argon2d:
- FillSegmentD_(memory, pass, lane, slice);
- return;
- case EArgon2Type::Argon2i:
- FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2i);
- return;
- case EArgon2Type::Argon2id:
- if (pass == 0 && slice < ARGON2_SYNC_POINTS / 2)
- FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2id);
- else
- FillSegmentD_(memory, pass, lane, slice);
- return;
- }
- }
- void FillSegmentD_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const {
- ui32 startingIndex = (pass == 0 && slice == 0) ? 2 : 0;
- ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex;
- ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1;
- for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) {
- if (currOffset % LaneLength_ == 1) {
- prevOffset = currOffset - 1;
- }
- ui64 pseudoRand = memory[prevOffset].V[0];
- ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_);
- ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane);
- TBlock* refBlock = memory + LaneLength_ * refLane + refIndex;
- FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0);
- }
- }
- void FillSegmentI_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice, EArgon2Type atp) const {
- TBlock addressBlock, inputBlock, zeroBlock;
- InitBlockValue_(&zeroBlock, 0);
- InitBlockValue_(&inputBlock, 0);
- inputBlock.V[0] = pass;
- inputBlock.V[1] = lane;
- inputBlock.V[2] = slice;
- inputBlock.V[3] = MemoryBlocks_;
- inputBlock.V[4] = Tcost_;
- inputBlock.V[5] = (ui64)atp;
- ui32 startingIndex = 0;
- if (pass == 0 && slice == 0) {
- startingIndex = 2;
- NextAddresses_(&addressBlock, &inputBlock, &zeroBlock);
- }
- ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex;
- ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1;
- for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) {
- if (currOffset % LaneLength_ == 1) {
- prevOffset = currOffset - 1;
- }
- if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
- NextAddresses_(&addressBlock, &inputBlock, &zeroBlock);
- }
- ui64 pseudoRand = addressBlock.V[i % ARGON2_ADDRESSES_IN_BLOCK];
- ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_);
- ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane);
- TBlock* refBlock = memory + LaneLength_ * refLane + refIndex;
- FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0);
- }
- }
- };
- }
|