From: Michael Liao Date: Wed, 5 Jun 2013 18:12:26 +0000 (+0000) Subject: [PATCH] Fix VGATHER* operand constraints X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9a508ef64a194f0f4a3362c55a6e33bec18b7554;p=oota-llvm.git [PATCH] Fix VGATHER* operand constraints Add earlyclobber constaints to prevent input register being allocated as the output register because, according to Intel spec [1], "If any pair of the index, mask, or destination registers are the same, this instruction results a UD fault." --- [1] http://software.intel.com/sites/default/files/319433-014.pdf git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183327 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 02f5fe49525..14ec2ddb4ec 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2041,6 +2041,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::x86_avx2_gather_d_d_256: case Intrinsic::x86_avx2_gather_q_d: case Intrinsic::x86_avx2_gather_q_d_256: { + if (!Subtarget->hasAVX2()) + break; unsigned Opc; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index cce7788fb3d..a1d3e813119 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -8372,7 +8372,9 @@ multiclass avx2_gather opc, string OpcodeStr, RegisterClass RC256, []>, VEX_4VOp3, VEX_L; } -let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in { +let mayLoad = 1, Constraints + = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" + in { defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W; defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W; defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>; diff --git a/test/CodeGen/X86/avx2-gather.ll b/test/CodeGen/X86/avx2-gather.ll new file mode 100644 index 00000000000..ee50c457fe8 --- /dev/null +++ b/test/CodeGen/X86/avx2-gather.ll @@ -0,0 +1,18 @@ +; RUN: not llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s + +declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, + <4 x i32>, <4 x float>, i8) nounwind readonly + +define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1, + <4 x i32> %idx, <4 x float> %mask) { + %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, + i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} + +; CHECK: test_x86_avx2_gather_d_ps +; CHECK: vgatherdps +; CHECK-NOT: [[DST]] +; CHECK: [[DST:%xmm[0-9]+]]{{$}} +; CHECK: ret