123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170 |
- #pragma once
- #include "public.h"
- #include "helpers.h"
- #include <library/cpp/yt/assert/assert.h>
- #include <util/generic/array_ref.h>
- #include <util/generic/ptr.h>
- #include <util/generic/singleton.h>
- #include <vector>
- extern "C" {
- #include <contrib/libs/isa-l/include/erasure_code.h>
- }
- namespace NErasure {
- template <class TBlobType>
- static inline unsigned char* ConstCast(typename TBlobType::const_iterator blobIter) {
- return const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(blobIter));
- }
- template <int DataPartCount, int ParityPartCount, class TCodecTraits, class TBlobType = typename TCodecTraits::TBlobType, class TMutableBlobType = typename TCodecTraits::TMutableBlobType>
- std::vector<TBlobType> ISAErasureEncode(
- const std::vector<unsigned char>& encodeGFTables,
- const std::vector<TBlobType>& dataBlocks)
- {
- YT_VERIFY(dataBlocks.size() == DataPartCount);
- size_t blockLength = dataBlocks.front().Size();
- for (size_t i = 1; i < dataBlocks.size(); ++i) {
- YT_VERIFY(dataBlocks[i].Size() == blockLength);
- }
- std::vector<unsigned char*> dataPointers;
- for (const auto& block : dataBlocks) {
- dataPointers.emplace_back(ConstCast<TBlobType>(block.Begin()));
- }
- std::vector<TMutableBlobType> parities(ParityPartCount);
- std::vector<unsigned char*> parityPointers(ParityPartCount);
- for (size_t i = 0; i < ParityPartCount; ++i) {
- parities[i] = TCodecTraits::AllocateBlob(blockLength);
- parityPointers[i] = ConstCast<TBlobType>(parities[i].Begin());
- memset(parityPointers[i], 0, blockLength);
- }
- ec_encode_data(
- blockLength,
- DataPartCount,
- ParityPartCount,
- const_cast<unsigned char*>(encodeGFTables.data()),
- dataPointers.data(),
- parityPointers.data());
- return std::vector<TBlobType>(parities.begin(), parities.end());
- }
- template <int DataPartCount, int ParityPartCount, class TCodecTraits, class TBlobType = typename TCodecTraits::TBlobType, class TMutableBlobType = typename TCodecTraits::TMutableBlobType>
- std::vector<TBlobType> ISAErasureDecode(
- const std::vector<TBlobType>& dataBlocks,
- const TPartIndexList& erasedIndices,
- TConstArrayRef<TPartIndexList> groups,
- const std::vector<unsigned char>& fullGeneratorMatrix)
- {
- YT_VERIFY(dataBlocks.size() >= DataPartCount);
- YT_VERIFY(erasedIndices.size() <= ParityPartCount);
- size_t blockLength = dataBlocks.front().Size();
- for (size_t i = 1; i < dataBlocks.size(); ++i) {
- YT_VERIFY(dataBlocks[i].Size() == blockLength);
- }
- std::vector<unsigned char> partialGeneratorMatrix(DataPartCount * DataPartCount, 0);
- std::vector<unsigned char*> recoveryBlocks;
- for (size_t i = 0; i < DataPartCount; ++i) {
- recoveryBlocks.emplace_back(ConstCast<TBlobType>(dataBlocks[i].Begin()));
- }
- // Groups check is specific for LRC.
- std::vector<int> isGroupHealthy(2, 1);
- for (size_t i = 0; i < 2; ++i) {
- for (const auto& index : erasedIndices) {
- if (!groups.empty() && Contains(groups[0], index)) {
- isGroupHealthy[0] = 0;
- } else if (!groups.empty() && Contains(groups[1], index)) {
- isGroupHealthy[1] = 0;
- }
- }
- }
- // When a group is healthy we cannot use its local parity, thus skip it using gap.
- size_t gap = 0;
- size_t decodeMatrixIndex = 0;
- size_t erasedBlockIndex = 0;
- while (decodeMatrixIndex < DataPartCount) {
- size_t globalIndex = decodeMatrixIndex + erasedBlockIndex + gap;
- if (erasedBlockIndex < erasedIndices.size() &&
- globalIndex == static_cast<size_t>(erasedIndices[erasedBlockIndex]))
- {
- ++erasedBlockIndex;
- continue;
- }
- if (!groups.empty() && globalIndex >= DataPartCount && globalIndex < DataPartCount + 2) {
- if (Contains(groups[0], globalIndex) && isGroupHealthy[0]) {
- ++gap;
- continue;
- }
- if (Contains(groups[1], globalIndex) && isGroupHealthy[1]) {
- ++gap;
- continue;
- }
- }
- memcpy(&partialGeneratorMatrix[decodeMatrixIndex * DataPartCount], &fullGeneratorMatrix[globalIndex * DataPartCount], DataPartCount);
- ++decodeMatrixIndex;
- }
- std::vector<unsigned char> invertedGeneratorMatrix(DataPartCount * DataPartCount, 0);
- int res = gf_invert_matrix(partialGeneratorMatrix.data(), invertedGeneratorMatrix.data(), DataPartCount);
- YT_VERIFY(res == 0);
- std::vector<unsigned char> decodeMatrix(DataPartCount * (DataPartCount + ParityPartCount), 0);
- //! Some magical code from library example.
- for (size_t i = 0; i < erasedIndices.size(); ++i) {
- if (erasedIndices[i] < DataPartCount) {
- memcpy(&decodeMatrix[i * DataPartCount], &invertedGeneratorMatrix[erasedIndices[i] * DataPartCount], DataPartCount);
- } else {
- for (int k = 0; k < DataPartCount; ++k) {
- int val = 0;
- for (int j = 0; j < DataPartCount; ++j) {
- val ^= gf_mul_erasure(invertedGeneratorMatrix[j * DataPartCount + k], fullGeneratorMatrix[DataPartCount * erasedIndices[i] + j]);
- }
- decodeMatrix[DataPartCount * i + k] = val;
- }
- }
- }
- std::vector<unsigned char> decodeGFTables(DataPartCount * erasedIndices.size() * 32);
- ec_init_tables(DataPartCount, erasedIndices.size(), decodeMatrix.data(), decodeGFTables.data());
- std::vector<TMutableBlobType> recoveredParts;
- std::vector<unsigned char*> recoveredPartsPointers;
- for (size_t i = 0; i < erasedIndices.size(); ++i) {
- recoveredParts.emplace_back(TCodecTraits::AllocateBlob(blockLength));
- recoveredPartsPointers.emplace_back(ConstCast<TBlobType>(recoveredParts.back().Begin()));
- memset(recoveredPartsPointers.back(), 0, blockLength);
- }
- ec_encode_data(
- blockLength,
- DataPartCount,
- erasedIndices.size(),
- decodeGFTables.data(),
- recoveryBlocks.data(),
- recoveredPartsPointers.data());
- return std::vector<TBlobType>(recoveredParts.begin(), recoveredParts.end());
- }
- } // namespace NErasure
|