From 3a7cd951c12dfafa9c182b2a027a283c63cdb4d0 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sat, 7 Oct 2006 21:55:32 +0000 Subject: [PATCH] completely disable folding of loads into scalar sse instructions and provide a framework for doing it right. This fixes CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll. Once X86DAGToDAGISel::SelectScalarSSELoad is implemented right, this task will be done. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30817 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 25 +++++++++++++++ lib/Target/X86/X86InstrSSE.td | 50 +++++++++++++++++++++--------- 2 files changed, 61 insertions(+), 14 deletions(-) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 68d390cf5ca..785af30e3a7 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -147,6 +147,8 @@ namespace { SDOperand &Index, SDOperand &Disp); bool SelectLEAAddr(SDOperand N, SDOperand &Base, SDOperand &Scale, SDOperand &Index, SDOperand &Disp); + bool SelectScalarSSELoad(SDOperand N, SDOperand &Base, SDOperand &Scale, + SDOperand &Index, SDOperand &Disp); bool TryFoldLoad(SDOperand P, SDOperand N, SDOperand &Base, SDOperand &Scale, SDOperand &Index, SDOperand &Disp); @@ -724,6 +726,29 @@ bool X86DAGToDAGISel::SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale, return true; } +/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to +/// match a load whose top elements are either undef or zeros. The load flavor +/// is derived from the type of N, which is either v4f32 or v2f64. +bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand N, SDOperand &Base, + SDOperand &Scale, + SDOperand &Index, SDOperand &Disp) { +#if 0 + if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { + if (N.getOperand(0).getOpcode() == ISD::LOAD) { + SDOperand LoadAddr = N.getOperand(0).getOperand(0); + if (!SelectAddr(LoadAddr, Base, Scale, Index, Disp)) + return false; + return true; + } + } + // TODO: Also handle the case where we explicitly require zeros in the top + // elements. This is a vector shuffle from the zero vector. +#endif + + return false; +} + + /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LEA instruction. bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 84ccfeaedfc..55f45287619 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// + //===----------------------------------------------------------------------===// // SSE specific DAG Nodes. //===----------------------------------------------------------------------===// @@ -31,6 +32,27 @@ def X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>; def X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>; def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>; +//===----------------------------------------------------------------------===// +// SSE Complex Patterns +//===----------------------------------------------------------------------===// + +// These are 'extloads' from a scalar to the low element of a vector, zeroing +// the top elements. These are used for the SSE 'ss' and 'sd' instruction +// forms. +def sse_load_f32 : ComplexPattern; +def sse_load_f64 : ComplexPattern; + +def ssmem : Operand { + let PrintMethod = "printf32mem"; + let NumMIOperands = 4; + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm); +} +def sdmem : Operand { + let PrintMethod = "printf64mem"; + let NumMIOperands = 4; + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm); +} + //===----------------------------------------------------------------------===// // SSE pattern fragments //===----------------------------------------------------------------------===// @@ -185,18 +207,18 @@ multiclass SS_IntUnary o, string OpcodeStr, Intrinsic IntId> { def r : SSI; - def m : SSI; + [(set VR128:$dst, (v4f32 (IntId sse_load_f32:$src)))]>; } multiclass SD_IntUnary o, string OpcodeStr, Intrinsic IntId> { def r : SDI; - def m : SDI; + [(set VR128:$dst, (v2f64 (IntId sse_load_f64:$src)))]>; } class PS_Intr o, string OpcodeStr, Intrinsic IntId> @@ -315,10 +337,10 @@ multiclass scalar_sse12_fp_binop_rm opc, string OpcodeStr, // Scalar operation, reg+mem. def SSrm : SSI; + [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>; def SDrm : SDI; + [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>; // Vector intrinsic operation, reg+reg. def SSrr_Int : SSI opc, string OpcodeStr, let isCommutable = Commutable; } // Vector intrinsic operation, reg+mem. - def SSrm_Int : SSI; - def SDrm_Int : SDI; + def SDrm_Int : SDI; + sse_load_f64:$src2))]>; } } @@ -373,17 +395,17 @@ class SS_Intrr o, string OpcodeStr, Intrinsic IntId> !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>; class SS_Intrm o, string OpcodeStr, Intrinsic IntId> - : SSI; + [(set VR128:$dst, (v4f32 (IntId VR128:$src1, sse_load_f32:$src2)))]>; class SD_Intrr o, string OpcodeStr, Intrinsic IntId> : SDI; class SD_Intrm o, string OpcodeStr, Intrinsic IntId> - : SDI; + [(set VR128:$dst, (v2f64 (IntId VR128:$src1, sse_load_f64:$src2)))]>; // Aliases to match intrinsics which expect XMM operand(s). -- 2.34.1