def int_x86_sse2_psrl_dq :
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_sse2_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi128_byteshift">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_sse2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi128_byteshift">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
- llvm_i32_ty], [IntrNoMem]>;
}
// Conversion ops
Name == "x86.avx.vbroadcast.ss" ||
Name == "x86.avx.vbroadcast.ss.256" ||
Name == "x86.avx.vbroadcast.sd.256" ||
+ Name == "x86.sse2.psll.dq.bs" ||
+ Name == "x86.sse2.psrl.dq.bs" ||
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
NewFn = nullptr;
return true;
for (unsigned I = 0; I < EltNum; ++I)
Rep = Builder.CreateInsertElement(Rep, Load,
ConstantInt::get(I32Ty, I));
+ } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
+ Value *Op0 = ConstantVector::getSplat(16, Builder.getInt8(0));
+ Value *Op1 = Builder.CreateBitCast(CI->getArgOperand(0),
+ VectorType::get(Type::getInt8Ty(C),16),
+ "cast");
+
+ unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+
+ if (Shift < 16) {
+ SmallVector<Constant*, 16> Idxs;
+ for (unsigned i = 16; i != 32; ++i)
+ Idxs.push_back(Builder.getInt32(i - Shift));
+
+ Op0 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
+ }
+
+ Rep = Builder.CreateBitCast(Op0,
+ VectorType::get(Type::getInt64Ty(C), 2),
+ "cast");
+ } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
+ Value *Op0 = Builder.CreateBitCast(CI->getArgOperand(0),
+ VectorType::get(Type::getInt8Ty(C),16),
+ "cast");
+ Value *Op1 = ConstantVector::getSplat(16, Builder.getInt8(0));
+
+ unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+
+ if (Shift < 16) {
+ SmallVector<Constant*, 16> Idxs;
+ for (unsigned i = 0; i != 16; ++i)
+ Idxs.push_back(Builder.getInt32(i + Shift));
+
+ Op1 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
+ }
+ Rep = Builder.CreateBitCast(Op1,
+ VectorType::get(Type::getInt64Ty(C), 2),
+ "cast");
} else {
bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
if (Name == "llvm.x86.avx.vpermil.pd.256")
VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
// 128-bit logical shifts.
def VPSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32u8imm:$src2),
"vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>,
- VEX_4V;
+ []>, VEX_4V;
def VPSRLDQri : PDIi8<0x73, MRM3r,
(outs VR128:$dst), (ins VR128:$src1, i32u8imm:$src2),
"vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>,
- VEX_4V;
+ []>, VEX_4V;
// PSRADQri doesn't exist in SSE[1-3].
}
} // Predicates = [HasAVX]
VR128, v4i32, v4i32, bc_v4i32, memopv2i64,
SSE_INTSHIFT_ITINS_P>;
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
// 128-bit logical shifts.
def PSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32u8imm:$src2),
"pslldq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))],
- IIC_SSE_INTSHDQ_P_RI>;
+ [], IIC_SSE_INTSHDQ_P_RI>;
def PSRLDQri : PDIi8<0x73, MRM3r,
(outs VR128:$dst), (ins VR128:$src1, i32u8imm:$src2),
"psrldq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))],
- IIC_SSE_INTSHDQ_P_RI>;
+ [], IIC_SSE_INTSHDQ_P_RI>;
// PSRADQri doesn't exist in SSE[1-3].
}
} // Constraints = "$src1 = $dst"
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
-define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
- ; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
- %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
-
-
define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: vpsllq
%res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
-define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
- ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
- %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
-
-
define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: vpsrlq
%res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
-define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
- ; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
- %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
-
-
define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: psllq
%res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
-define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
- ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
- %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
-
-
define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: psrlq
%res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]