return MI;
}
- return nullptr;
-}
-
-static Value *SimplifyX86extend(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder,
- bool SignExtend) {
+ return nullptr;\r
+}\r
+\r
+static Value *SimplifyX86immshift(const IntrinsicInst &II,\r
+ InstCombiner::BuilderTy &Builder,\r
+ bool ShiftLeft) {\r
+ // Simplify if count is constant. To 0 if >= BitWidth,\r
+ // otherwise to shl/lshr.\r
+ auto CDV = dyn_cast<ConstantDataVector>(II.getArgOperand(1));\r
+ auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(1));\r
+ if (!CDV && !CInt)\r
+ return nullptr;\r
+ ConstantInt *Count;\r
+ if (CDV)\r
+ Count = cast<ConstantInt>(CDV->getElementAsConstant(0));\r
+ else\r
+ Count = CInt;\r
+\r
+ auto Vec = II.getArgOperand(0);\r
+ auto VT = cast<VectorType>(Vec->getType());\r
+ auto SVT = VT->getElementType();\r
+ if (Count->getZExtValue() > (SVT->getPrimitiveSizeInBits() - 1))\r
+ return ConstantAggregateZero::get(VT);\r
+\r
+ unsigned VWidth = VT->getNumElements();\r
+\r
+ // Get a constant vector of the same type as the first operand.\r
+ auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());\r
+\r
+ if (ShiftLeft)\r
+ return Builder.CreateShl(Vec, Builder.CreateVectorSplat(VWidth, VTCI));\r
+\r
+ return Builder.CreateLShr(Vec, Builder.CreateVectorSplat(VWidth, VTCI));\r
+}\r
+\r
+static Value *SimplifyX86extend(const IntrinsicInst &II,\r
+ InstCombiner::BuilderTy &Builder,\r
+ bool SignExtend) {\r
VectorType *SrcTy = cast<VectorType>(II.getArgOperand(0)->getType());
VectorType *DstTy = cast<VectorType>(II.getType());
unsigned NumDstElts = DstTy->getNumElements();
II->setArgOperand(0, V);
return II;
}
- break;
- }
-
- // Constant fold <A x Bi> << Ci.
- // FIXME: We don't handle _dq because it's a shift of an i128, but is
- // represented in the IR as <2 x i64>. A per element shift is wrong.
- case Intrinsic::x86_sse2_psll_d:
- case Intrinsic::x86_sse2_psll_q:
- case Intrinsic::x86_sse2_psll_w:
+ break;\r
+ }\r
+\r
+ // Constant fold lshr( <A x Bi>, Ci ).\r
+ case Intrinsic::x86_sse2_psrl_d:\r
+ case Intrinsic::x86_sse2_psrl_q:\r
+ case Intrinsic::x86_sse2_psrl_w:\r
+ case Intrinsic::x86_sse2_psrli_d:\r
+ case Intrinsic::x86_sse2_psrli_q:\r
+ case Intrinsic::x86_sse2_psrli_w:\r
+ case Intrinsic::x86_avx2_psrl_d:\r
+ case Intrinsic::x86_avx2_psrl_q:\r
+ case Intrinsic::x86_avx2_psrl_w:\r
+ case Intrinsic::x86_avx2_psrli_d:\r
+ case Intrinsic::x86_avx2_psrli_q:\r
+ case Intrinsic::x86_avx2_psrli_w:\r
+ if (Value *V = SimplifyX86immshift(*II, *Builder, false))\r
+ return ReplaceInstUsesWith(*II, V);\r
+ break;\r
+\r
+ // Constant fold shl( <A x Bi>, Ci ).\r
+ case Intrinsic::x86_sse2_psll_d:\r
+ case Intrinsic::x86_sse2_psll_q:\r
+ case Intrinsic::x86_sse2_psll_w:\r
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
case Intrinsic::x86_sse2_pslli_w:
case Intrinsic::x86_avx2_psll_d:
case Intrinsic::x86_avx2_psll_q:
case Intrinsic::x86_avx2_psll_w:
- case Intrinsic::x86_avx2_pslli_d:
- case Intrinsic::x86_avx2_pslli_q:
- case Intrinsic::x86_avx2_pslli_w:
- case Intrinsic::x86_sse2_psrl_d:
- case Intrinsic::x86_sse2_psrl_q:
- case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_sse2_psrli_w:
- case Intrinsic::x86_avx2_psrl_d:
- case Intrinsic::x86_avx2_psrl_q:
- case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w: {
- // Simplify if count is constant. To 0 if >= BitWidth,
- // otherwise to shl/lshr.
- auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1));
- auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1));
- if (!CDV && !CInt)
- break;
- ConstantInt *Count;
- if (CDV)
- Count = cast<ConstantInt>(CDV->getElementAsConstant(0));
- else
- Count = CInt;
-
- auto Vec = II->getArgOperand(0);
- auto VT = cast<VectorType>(Vec->getType());
- if (Count->getZExtValue() >
- VT->getElementType()->getPrimitiveSizeInBits() - 1)
- return ReplaceInstUsesWith(
- CI, ConstantAggregateZero::get(Vec->getType()));
-
- bool isPackedShiftLeft = true;
- switch (II->getIntrinsicID()) {
- default : break;
- case Intrinsic::x86_sse2_psrl_d:
- case Intrinsic::x86_sse2_psrl_q:
- case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_sse2_psrli_w:
- case Intrinsic::x86_avx2_psrl_d:
- case Intrinsic::x86_avx2_psrl_q:
- case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break;
- }
-
- unsigned VWidth = VT->getNumElements();
- // Get a constant vector of the same type as the first operand.
- auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
- if (isPackedShiftLeft)
- return BinaryOperator::CreateShl(Vec,
- Builder->CreateVectorSplat(VWidth, VTCI));
-
- return BinaryOperator::CreateLShr(Vec,
- Builder->CreateVectorSplat(VWidth, VTCI));
- }
-
- case Intrinsic::x86_sse41_pmovsxbd:
- case Intrinsic::x86_sse41_pmovsxbq:
- case Intrinsic::x86_sse41_pmovsxbw:
+ case Intrinsic::x86_avx2_pslli_d:\r
+ case Intrinsic::x86_avx2_pslli_q:\r
+ case Intrinsic::x86_avx2_pslli_w:\r
+ if (Value *V = SimplifyX86immshift(*II, *Builder, true))\r
+ return ReplaceInstUsesWith(*II, V);\r
+ break;\r
+\r
+ case Intrinsic::x86_sse41_pmovsxbd:\r
+ case Intrinsic::x86_sse41_pmovsxbq:\r
+ case Intrinsic::x86_sse41_pmovsxbw:\r
case Intrinsic::x86_sse41_pmovsxdq:
case Intrinsic::x86_sse41_pmovsxwd:
case Intrinsic::x86_sse41_pmovsxwq: