From ae475496f69cc7bfe54e514108f648bdad52a585 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 29 Nov 2015 22:53:22 +0000 Subject: [PATCH] [X86] int_x86_avx2_permps and X86ISD::VPERMV should take an integer vector for its shuffle indices. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254269 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 2 +- lib/Target/X86/X86ISelLowering.cpp | 4 +--- lib/Target/X86/X86InstrFragmentsSIMD.td | 6 +++++- test/CodeGen/X86/avx2-intrinsics-x86.ll | 6 +++--- test/CodeGen/X86/stack-folding-int-avx2.ll | 6 +++--- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index a07c4c46abc..7ec0f4b86f1 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2431,7 +2431,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">, - Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3904d273c7d..59350ab2184 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10539,9 +10539,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, if (Subtarget->hasAVX2()) return DAG.getNode( X86ISD::VPERMV, DL, MVT::v8f32, - DAG.getBitcast(MVT::v8f32, DAG.getNode(ISD::BUILD_VECTOR, DL, - MVT::v8i32, VPermMask)), - V1); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1); // Otherwise, fall back. return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 30ca3b470fc..8d7d48bd61a 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -376,7 +376,11 @@ def X86vpmaddwd : SDNode<"X86ISD::VPMADDWD" , SDTPack>; def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>; def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>; -def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>; +def X86VPermv : SDNode<"X86ISD::VPERMV", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<1>, + SDTCisSameNumEltsAs<0,1>, + SDTCisSameSizeAs<0,1>, + SDTCisSameAs<0,2>]>>; def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>; def X86VPermt2 : SDNode<"X86ISD::VPERMV3", SDTypeProfile<1, 3, [SDTCisVec<0>, diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index 3b2a009f271..606aca9dc02 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -668,15 +668,15 @@ define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) { declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly -define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) { +define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) { ; Check that the arguments are swapped between the intrinsic definition ; and its lowering. Indeed, the offsets are the first source in ; the instruction. ; CHECK: vpermps %ymm0, %ymm1, %ymm0 - %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] ret <8 x float> %res } -declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly +declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) { diff --git a/test/CodeGen/X86/stack-folding-int-avx2.ll b/test/CodeGen/X86/stack-folding-int-avx2.ll index 7febee242cf..235a10ed467 100644 --- a/test/CodeGen/X86/stack-folding-int-avx2.ll +++ b/test/CodeGen/X86/stack-folding-int-avx2.ll @@ -455,14 +455,14 @@ define <4 x double> @stack_fold_permpd(<4 x double> %a0) { ret <4 x double> %3 } -define <8 x float> @stack_fold_permps(<8 x float> %a0, <8 x float> %a1) { +define <8 x float> @stack_fold_permps(<8 x i32> %a0, <8 x float> %a1) { ;CHECK-LABEL: stack_fold_permps ;CHECK: vpermps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x float> %a0) + %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0) ret <8 x float> %2 } -declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly +declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly define <4 x i64> @stack_fold_permq(<4 x i64> %a0) { ;CHECK-LABEL: stack_fold_permq -- 2.34.1