const char *SrcName = isSrc1 ? Src1Name : Src2Name;
OS << (SrcName ? SrcName : "mem") << '[';
bool IsFirst = true;
- while (i != e &&
- (int)ShuffleMask[i] >= 0 &&
+ while (i != e && (int)ShuffleMask[i] != SM_SentinelZero &&
(ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
if (!IsFirst)
OS << ',';
else
IsFirst = false;
- OS << ShuffleMask[i] % ShuffleMask.size();
+ if (ShuffleMask[i] == SM_SentinelUndef)
+ OS << "u";
+ else
+ OS << ShuffleMask[i] % ShuffleMask.size();
++i;
}
OS << ']';
}
}
-void DecodePSHUFBMask(const ConstantDataSequential *C,
- SmallVectorImpl<int> &ShuffleMask) {
+void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");
assert(MaskTy->getVectorElementType()->isIntegerTy(8) &&
// FIXME: Add support for AVX-512.
assert((NumElements == 16 || NumElements == 32) &&
"Only 128-bit and 256-bit vectors supported!");
- assert((unsigned)NumElements == C->getNumElements() &&
- "Constant mask has a different number of elements!");
-
ShuffleMask.reserve(NumElements);
- for (int i = 0; i < NumElements; ++i) {
- // For AVX vectors with 32 bytes the base of the shuffle is the half of the
- // vector we're inside.
- int Base = i < 16 ? 0 : 16;
- uint64_t Element = C->getElementAsInteger(i);
- // If the high bit (7) of the byte is set, the element is zeroed.
- if (Element & (1 << 7))
- ShuffleMask.push_back(SM_SentinelZero);
- else {
- // Only the least significant 4 bits of the byte are used.
- int Index = Base + (Element & 0xf);
- ShuffleMask.push_back(Index);
+
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ assert((unsigned)NumElements == CDS->getNumElements() &&
+ "Constant mask has a different number of elements!");
+
+ for (int i = 0; i < NumElements; ++i) {
+ // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
+ // lane of the vector we're inside.
+ int Base = i < 16 ? 0 : 16;
+ uint64_t Element = CDS->getElementAsInteger(i);
+ // If the high bit (7) of the byte is set, the element is zeroed.
+ if (Element & (1 << 7))
+ ShuffleMask.push_back(SM_SentinelZero);
+ else {
+ // Only the least significant 4 bits of the byte are used.
+ int Index = Base + (Element & 0xf);
+ ShuffleMask.push_back(Index);
+ }
+ }
+ } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ assert((unsigned)NumElements == CV->getNumOperands() &&
+ "Constant mask has a different number of elements!");
+
+ for (int i = 0; i < NumElements; ++i) {
+ // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
+ // lane of the vector we're inside.
+ int Base = i < 16 ? 0 : 16;
+ Constant *COp = CV->getOperand(i);
+ if (isa<UndefValue>(COp)) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ // If the high bit (7) of the byte is set, the element is zeroed.
+ if (Element & (1 << 7))
+ ShuffleMask.push_back(SM_SentinelZero);
+ else {
+ // Only the least significant 4 bits of the byte are used.
+ int Index = Base + (Element & 0xf);
+ ShuffleMask.push_back(Index);
+ }
}
}
}
SmallVectorImpl<int> &ShuffleMask) {
for (int i = 0, e = RawMask.size(); i < e; ++i) {
uint64_t M = RawMask[i];
+ if (M == (uint64_t)SM_SentinelUndef) {
+ ShuffleMask.push_back(M);
+ continue;
+ }
// For AVX vectors with 32 bytes the base of the shuffle is the half of
// the vector we're inside.
int Base = i < 16 ? 0 : 16;
}
}
-void DecodeVPERMILPMask(const ConstantDataSequential *C,
- SmallVectorImpl<int> &ShuffleMask) {
+void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");
assert(MaskTy->getVectorElementType()->isIntegerTy() &&
int NumElements = MaskTy->getVectorNumElements();
assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
"Unexpected number of vector elements.");
- assert((unsigned)NumElements == C->getNumElements() &&
- "Constant mask has a different number of elements!");
-
ShuffleMask.reserve(NumElements);
- for (int i = 0; i < NumElements; ++i) {
- int Base = (i * ElementBits / 128) * (128 / ElementBits);
- uint64_t Element = C->getElementAsInteger(i);
- // Only the least significant 2 bits of the integer are used.
- int Index = Base + (Element & 0x3);
- ShuffleMask.push_back(Index);
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ assert((unsigned)NumElements == CDS->getNumElements() &&
+ "Constant mask has a different number of elements!");
+
+ for (int i = 0; i < NumElements; ++i) {
+ int Base = (i * ElementBits / 128) * (128 / ElementBits);
+ uint64_t Element = CDS->getElementAsInteger(i);
+ // Only the least significant 2 bits of the integer are used.
+ int Index = Base + (Element & 0x3);
+ ShuffleMask.push_back(Index);
+ }
+ } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ assert((unsigned)NumElements == C->getNumOperands() &&
+ "Constant mask has a different number of elements!");
+
+ for (int i = 0; i < NumElements; ++i) {
+ int Base = (i * ElementBits / 128) * (128 / ElementBits);
+ Constant *COp = CV->getOperand(i);
+ if (isa<UndefValue>(COp)) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ // Only the least significant 2 bits of the integer are used.
+ int Index = Base + (Element & 0x3);
+ ShuffleMask.push_back(Index);
+ }
}
}
//===----------------------------------------------------------------------===//
namespace llvm {
-class ConstantDataSequential;
+class Constant;
class MVT;
-enum {
- SM_SentinelZero = -1
-};
+enum { SM_SentinelZero = -1, SM_SentinelUndef = -2 };
void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a PSHUFB mask from an IR-level vector constant.
-void DecodePSHUFBMask(const ConstantDataSequential *C,
- SmallVectorImpl<int> &ShuffleMask);
+void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a PSHUFB mask from a raw array of constants such as from
/// BUILD_VECTOR.
void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
-void DecodeVPERMILPMask(const ConstantDataSequential *C,
- SmallVectorImpl<int> &ShuffleMask);
+void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
SmallVector<uint64_t, 32> RawMask;
for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) {
- auto *CN = dyn_cast<ConstantSDNode>(MaskNode->getOperand(i));
+ SDValue Op = MaskNode->getOperand(i);
+ if (Op->getOpcode() == ISD::UNDEF) {
+ RawMask.push_back((uint64_t)SM_SentinelUndef);
+ continue;
+ }
+ auto *CN = dyn_cast<ConstantSDNode>(Op.getNode());
if (!CN)
return false;
APInt MaskElement = CN->getAPIntValue();
if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
return false;
- if (auto *C = dyn_cast<ConstantDataSequential>(MaskCP->getConstVal())) {
+ if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
// FIXME: Support AVX-512 here.
- if (!C->getType()->isVectorTy() ||
- (C->getNumElements() != 16 && C->getNumElements() != 32))
+ Type *Ty = C->getType();
+ if (!Ty->isVectorTy() || (Ty->getVectorNumElements() != 16 &&
+ Ty->getVectorNumElements() != 32))
return false;
- assert(C->getType()->isVectorTy() && "Expected a vector constant.");
DecodePSHUFBMask(C, Mask);
break;
}
SDValue V2Mask[16];
for (int i = 0; i < 16; ++i)
if (Mask[i] == -1) {
- V1Mask[i] = V2Mask[i] = DAG.getConstant(0x80, MVT::i8);
+ V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
} else {
V1Mask[i] = DAG.getConstant(Mask[i] < 16 ? Mask[i] : 0x80, MVT::i8);
V2Mask[i] =
assert(Mask.size() <= 16 && "Can't shuffle elements smaller than bytes!");
int Ratio = 16 / Mask.size();
for (unsigned i = 0; i < 16; ++i) {
+ if (Mask[i / Ratio] == SM_SentinelUndef) {
+ PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8));
+ continue;
+ }
int M = Mask[i / Ratio] != SM_SentinelZero
? Ratio * Mask[i / Ratio] + i % Ratio
: 255;
// for this order is that we are recursing up the operation chain.
for (int i = 0, e = std::max(OpMask.size(), RootMask.size()); i < e; ++i) {
int RootIdx = i / RootRatio;
- if (RootMask[RootIdx] == SM_SentinelZero) {
- // This is a zero-ed lane, we're done.
- Mask.push_back(SM_SentinelZero);
+ if (RootMask[RootIdx] < 0) {
+ // This is a zero or undef lane, we're done.
+ Mask.push_back(RootMask[RootIdx]);
continue;
}
int RootMaskedIdx = RootMask[RootIdx] * RootRatio + i % RootRatio;
int OpIdx = RootMaskedIdx / OpRatio;
- if (OpMask[OpIdx] == SM_SentinelZero) {
- // The incoming lanes are zero, it doesn't matter which ones we are using.
- Mask.push_back(SM_SentinelZero);
+ if (OpMask[OpIdx] < 0) {
+ // The incoming lanes are zero or undef, it doesn't matter which ones we
+ // are using.
+ Mask.push_back(OpMask[OpIdx]);
continue;
}
Type *MaskTy = MaskConstantEntry.getType();
(void)MaskTy;
if (!MaskConstantEntry.isMachineConstantPoolEntry())
- if (auto *C = dyn_cast<ConstantDataSequential>(
- MaskConstantEntry.Val.ConstVal)) {
+ if (auto *C = dyn_cast<Constant>(MaskConstantEntry.Val.ConstVal)) {
assert(MaskTy == C->getType() &&
"Expected a constant of the same type!");
DecodeVPERMILPMask(C, Mask);
}
- assert(Mask.size() == MaskTy->getVectorNumElements() &&
- "Shuffle mask has a different size than its type!");
+ assert(
+ (Mask.empty() || Mask.size() == MaskTy->getVectorNumElements()) &&
+ "Shuffle mask has a different size than its type!");
}
}
InSrc = true;
CS << SrcName << "[";
}
- CS << M;
+ if (M == SM_SentinelUndef)
+ CS << "u";
+ else
+ CS << M;
}
}
if (InSrc)
;
; SSSE3-LABEL: @trunc_v4i32_shuffle
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: @trunc_v4i32_shuffle
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i8> %shuffle
define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: @shuffle_v8f32_002u6u44
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,u,6,u,4,4]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
ret <8 x float> %shuffle
define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: @shuffle_v8f32_00uu66uu
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,u,u,6,6,u,u]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
ret <8 x float> %shuffle
define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: @shuffle_v8f32_103245uu
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,0,3,2,4,5,u,u]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
ret <8 x float> %shuffle
define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: @shuffle_v8f32_1133uu67
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,1,3,3,u,u,6,7]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
ret <8 x float> %shuffle
define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: @shuffle_v8f32_0uu354uu
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,u,u,3,5,4,u,u]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
ret <8 x float> %shuffle
define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: @shuffle_v8f32_uuu3uu66
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[u,u,u,3,u,u,6,6]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
ret <8 x float> %shuffle
define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
; ALL-LABEL: @shuffle_v8i32_002u6u44
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,u,6,u,4,4]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
ret <8 x i32> %shuffle
define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
; ALL-LABEL: @shuffle_v8i32_00uu66uu
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,u,u,6,6,u,u]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
ret <8 x i32> %shuffle
define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
; ALL-LABEL: @shuffle_v8i32_103245uu
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,0,3,2,4,5,u,u]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
ret <8 x i32> %shuffle
define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
; ALL-LABEL: @shuffle_v8i32_1133uu67
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,1,3,3,u,u,6,7]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
ret <8 x i32> %shuffle
define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
; ALL-LABEL: @shuffle_v8i32_0uu354uu
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,u,u,3,5,4,u,u]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
ret <8 x i32> %shuffle
define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
; ALL-LABEL: @shuffle_v8i32_uuu3uu66
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[u,u,u,3,u,u,6,6]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
ret <8 x i32> %shuffle