From: Craig Topper Date: Mon, 16 Feb 2015 20:51:59 +0000 (+0000) Subject: [X86] Remove x86.avx2.psll.dq.bs and x86.avx2.psrl.dq.bs intrinsics. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=e124dc723b26c58f9dbfda49fc1713396f5d5be7;p=oota-llvm.git [X86] Remove x86.avx2.psll.dq.bs and x86.avx2.psrl.dq.bs intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229430 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 90ce74c30b5..fef39a61c5b 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -1586,12 +1586,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_psrl_dq : Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx2_psll_dq_bs : - Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx2_psrl_dq_bs : - Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, - llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 0c494ef12e4..3c5e469b34d 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -165,6 +165,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "x86.avx.vbroadcast.sd.256" || Name == "x86.sse2.psll.dq.bs" || Name == "x86.sse2.psrl.dq.bs" || + Name == "x86.avx2.psll.dq.bs" || + Name == "x86.avx2.psrl.dq.bs" || (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { NewFn = nullptr; return true; @@ -526,6 +528,52 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateBitCast(Op1, VectorType::get(Type::getInt64Ty(C), 2), "cast"); + } else if (Name == "llvm.x86.avx2.psll.dq.bs") { + Value *Op0 = ConstantVector::getSplat(32, Builder.getInt8(0)); + Value *Op1 = Builder.CreateBitCast(CI->getArgOperand(0), + VectorType::get(Type::getInt8Ty(C),32), + "cast"); + + unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue(); + + if (Shift < 16) { + SmallVector Idxs; + for (unsigned l = 0; l < 32; l += 16) + for (unsigned i = 0; i != 16; ++i) { + unsigned Idx = i + Shift; + if (Idx >= 16) Idx += 16; // end of lane, switch operand. + Idxs.push_back(Builder.getInt32(Idx + l)); + } + + Op1 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs)); + } + + Rep = Builder.CreateBitCast(Op1, + VectorType::get(Type::getInt64Ty(C), 4), + "cast"); + } else if (Name == "llvm.x86.avx2.psrl.dq.bs") { + Value *Op0 = Builder.CreateBitCast(CI->getArgOperand(0), + VectorType::get(Type::getInt8Ty(C),32), + "cast"); + Value *Op1 = ConstantVector::getSplat(32, Builder.getInt8(0)); + + unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue(); + + if (Shift < 16) { + SmallVector Idxs; + for (unsigned l = 0; l < 32; l += 16) + for (unsigned i = 0; i != 16; ++i) { + unsigned Idx = 32 + i - Shift; + if (Idx < 32) Idx -= 16; // end of lane, switch operand. + Idxs.push_back(Builder.getInt32(Idx + l)); + } + + Op0 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs)); + } + + Rep = Builder.CreateBitCast(Op0, + VectorType::get(Type::getInt64Ty(C), 4), + "cast"); } else { bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; if (Name == "llvm.x86.avx.vpermil.pd.256") diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index dc54fc5c9ca..9799b61ca25 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4216,20 +4216,16 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR256, v8i32, v4i32, bc_v4i32, loadv2i64, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; -let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in { // 256-bit logical shifts. def VPSLLDQYri : PDIi8<0x73, MRM7r, (outs VR256:$dst), (ins VR256:$src1, i32u8imm:$src2), "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, - (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>, - VEX_4V, VEX_L; + []>, VEX_4V, VEX_L; def VPSRLDQYri : PDIi8<0x73, MRM3r, (outs VR256:$dst), (ins VR256:$src1, i32u8imm:$src2), "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, - (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>, - VEX_4V, VEX_L; + []>, VEX_4V, VEX_L; // PSRADQYri doesn't exist in SSE[1-3]. } } // Predicates = [HasAVX2]