SMusatov
/
ydb
mirror of https://github.com/ydb-platform/ydb.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
							//===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Define several functions to decode x86 specific shuffle semantics using
// constants from the constant pool.
//
//===----------------------------------------------------------------------===//

#include "X86ShuffleDecodeConstantPool.h"
#include "MCTargetDesc/X86ShuffleDecode.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Constants.h"

//===----------------------------------------------------------------------===//
//  Vector Mask Decoding
//===----------------------------------------------------------------------===//

namespace llvm {

static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
                                APInt &UndefElts,
                                SmallVectorImpl<uint64_t> &RawMask) {
  // It is not an error for shuffle masks to not be a vector of
  // MaskEltSizeInBits because the constant pool uniques constants by their
  // bit representation.
  // e.g. the following take up the same space in the constant pool:
  //   i128 -170141183420855150465331762880109871104
  //
  //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
  //
  //   <4 x i32> <i32 -2147483648, i32 -2147483648,
  //              i32 -2147483648, i32 -2147483648>
  auto *CstTy = dyn_cast<FixedVectorType>(C->getType());
  if (!CstTy)
    return false;

  Type *CstEltTy = CstTy->getElementType();
  if (!CstEltTy->isIntegerTy())
    return false;

  unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
  unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
  unsigned NumCstElts = CstTy->getNumElements();

  assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
         "Unaligned shuffle mask size");

  unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
  UndefElts = APInt(NumMaskElts, 0);
  RawMask.resize(NumMaskElts, 0);

  // Fast path - if the constants match the mask size then copy direct.
  if (MaskEltSizeInBits == CstEltSizeInBits) {
    assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size");
    for (unsigned i = 0; i != NumMaskElts; ++i) {
      Constant *COp = C->getAggregateElement(i);
      if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
        return false;

      if (isa<UndefValue>(COp)) {
        UndefElts.setBit(i);
        RawMask[i] = 0;
        continue;
      }

      auto *Elt = cast<ConstantInt>(COp);
      RawMask[i] = Elt->getValue().getZExtValue();
    }
    return true;
  }

  // Extract all the undef/constant element data and pack into single bitsets.
  APInt UndefBits(CstSizeInBits, 0);
  APInt MaskBits(CstSizeInBits, 0);
  for (unsigned i = 0; i != NumCstElts; ++i) {
    Constant *COp = C->getAggregateElement(i);
    if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
      return false;

    unsigned BitOffset = i * CstEltSizeInBits;

    if (isa<UndefValue>(COp)) {
      UndefBits.setBits(BitOffset, BitOffset + CstEltSizeInBits);
      continue;
    }

    MaskBits.insertBits(cast<ConstantInt>(COp)->getValue(), BitOffset);
  }

  // Now extract the undef/constant bit data into the raw shuffle masks.
  for (unsigned i = 0; i != NumMaskElts; ++i) {
    unsigned BitOffset = i * MaskEltSizeInBits;
    APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);

    // Only treat the element as UNDEF if all bits are UNDEF, otherwise
    // treat it as zero.
    if (EltUndef.isAllOnes()) {
      UndefElts.setBit(i);
      RawMask[i] = 0;
      continue;
    }

    APInt EltBits = MaskBits.extractBits(MaskEltSizeInBits, BitOffset);
    RawMask[i] = EltBits.getZExtValue();
  }

  return true;
}

void DecodePSHUFBMask(const Constant *C, unsigned Width,
                      SmallVectorImpl<int> &ShuffleMask) {
  assert((Width == 128 || Width == 256 || Width == 512) &&
         C->getType()->getPrimitiveSizeInBits() >= Width &&
         "Unexpected vector size.");

  // The shuffle mask requires a byte vector.
  APInt UndefElts;
  SmallVector<uint64_t, 64> RawMask;
  if (!extractConstantMask(C, 8, UndefElts, RawMask))
    return;

  unsigned NumElts = Width / 8;
  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
         "Unexpected number of vector elements.");

  for (unsigned i = 0; i != NumElts; ++i) {
    if (UndefElts[i]) {
      ShuffleMask.push_back(SM_SentinelUndef);
      continue;
    }

    uint64_t Element = RawMask[i];
    // If the high bit (7) of the byte is set, the element is zeroed.
    if (Element & (1 << 7))
      ShuffleMask.push_back(SM_SentinelZero);
    else {
      // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
      // lane of the vector we're inside.
      unsigned Base = i & ~0xf;

      // Only the least significant 4 bits of the byte are used.
      int Index = Base + (Element & 0xf);
      ShuffleMask.push_back(Index);
    }
  }
}

void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
                        SmallVectorImpl<int> &ShuffleMask) {
  assert((Width == 128 || Width == 256 || Width == 512) &&
         C->getType()->getPrimitiveSizeInBits() >= Width &&
         "Unexpected vector size.");
  assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");

  // The shuffle mask requires elements the same size as the target.
  APInt UndefElts;
  SmallVector<uint64_t, 16> RawMask;
  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
    return;

  unsigned NumElts = Width / ElSize;
  unsigned NumEltsPerLane = 128 / ElSize;
  assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) &&
         "Unexpected number of vector elements.");

  for (unsigned i = 0; i != NumElts; ++i) {
    if (UndefElts[i]) {
      ShuffleMask.push_back(SM_SentinelUndef);
      continue;
    }

    int Index = i & ~(NumEltsPerLane - 1);
    uint64_t Element = RawMask[i];
    if (ElSize == 64)
      Index += (Element >> 1) & 0x1;
    else
      Index += Element & 0x3;

    ShuffleMask.push_back(Index);
  }
}

void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
                         unsigned Width, SmallVectorImpl<int> &ShuffleMask) {
  Type *MaskTy = C->getType();
  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
  (void)MaskTySize;
  assert((MaskTySize == 128 || MaskTySize == 256) && Width >= MaskTySize &&
         "Unexpected vector size.");

  // The shuffle mask requires elements the same size as the target.
  APInt UndefElts;
  SmallVector<uint64_t, 8> RawMask;
  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
    return;

  unsigned NumElts = Width / ElSize;
  unsigned NumEltsPerLane = 128 / ElSize;
  assert((NumElts == 2 || NumElts == 4 || NumElts == 8) &&
         "Unexpected number of vector elements.");

  for (unsigned i = 0; i != NumElts; ++i) {
    if (UndefElts[i]) {
      ShuffleMask.push_back(SM_SentinelUndef);
      continue;
    }

    // VPERMIL2 Operation.
    // Bits[3] - Match Bit.
    // Bits[2:1] - (Per Lane) PD Shuffle Mask.
    // Bits[2:0] - (Per Lane) PS Shuffle Mask.
    uint64_t Selector = RawMask[i];
    unsigned MatchBit = (Selector >> 3) & 0x1;

    // M2Z[0:1]     MatchBit
    //   0Xb           X        Source selected by Selector index.
    //   10b           0        Source selected by Selector index.
    //   10b           1        Zero.
    //   11b           0        Zero.
    //   11b           1        Source selected by Selector index.
    if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) {
      ShuffleMask.push_back(SM_SentinelZero);
      continue;
    }

    int Index = i & ~(NumEltsPerLane - 1);
    if (ElSize == 64)
      Index += (Selector >> 1) & 0x1;
    else
      Index += Selector & 0x3;

    int Src = (Selector >> 2) & 0x1;
    Index += Src * NumElts;
    ShuffleMask.push_back(Index);
  }
}

void DecodeVPPERMMask(const Constant *C, unsigned Width,
                      SmallVectorImpl<int> &ShuffleMask) {
  Type *MaskTy = C->getType();
  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
  (void)MaskTySize;
  assert(Width == 128 && Width >= MaskTySize && "Unexpected vector size.");

  // The shuffle mask requires a byte vector.
  APInt UndefElts;
  SmallVector<uint64_t, 16> RawMask;
  if (!extractConstantMask(C, 8, UndefElts, RawMask))
    return;

  unsigned NumElts = Width / 8;
  assert(NumElts == 16 && "Unexpected number of vector elements.");

  for (unsigned i = 0; i != NumElts; ++i) {
    if (UndefElts[i]) {
      ShuffleMask.push_back(SM_SentinelUndef);
      continue;
    }

    // VPPERM Operation
    // Bits[4:0] - Byte Index (0 - 31)
    // Bits[7:5] - Permute Operation
    //
    // Permute Operation:
    // 0 - Source byte (no logical operation).
    // 1 - Invert source byte.
    // 2 - Bit reverse of source byte.
    // 3 - Bit reverse of inverted source byte.
    // 4 - 00h (zero - fill).
    // 5 - FFh (ones - fill).
    // 6 - Most significant bit of source byte replicated in all bit positions.
    // 7 - Invert most significant bit of source byte and replicate in all bit
    // positions.
    uint64_t Element = RawMask[i];
    uint64_t Index = Element & 0x1F;
    uint64_t PermuteOp = (Element >> 5) & 0x7;

    if (PermuteOp == 4) {
      ShuffleMask.push_back(SM_SentinelZero);
      continue;
    }
    if (PermuteOp != 0) {
      ShuffleMask.clear();
      return;
    }
    ShuffleMask.push_back((int)Index);
  }
}

} // namespace llvm