- Use movaps to store 128-bit vector integers.

author Evan Cheng <evan.cheng@apple.com>

Tue, 21 Mar 2006 23:01:21 +0000 (23:01 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Tue, 21 Mar 2006 23:01:21 +0000 (23:01 +0000)
author Evan Cheng <evan.cheng@apple.com>
Tue, 21 Mar 2006 23:01:21 +0000 (23:01 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Tue, 21 Mar 2006 23:01:21 +0000 (23:01 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 174c7979043434a1afccb3b5d84a6cee229996c3..1d4221414fff5f2864cdb17ceefafcdf71937703 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -265,19 +265,19 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
      addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
  
      // FIXME: add MMX packed arithmetics
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8,  Expand);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand);
+    setOperationAction(ISD::BUILD_VECTOR,     MVT::v8i8,  Expand);
+    setOperationAction(ISD::BUILD_VECTOR,     MVT::v4i16, Expand);
+    setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i32, Expand);
    }
  
    if (TM.getSubtarget<X86Subtarget>().hasSSE1()) {
      addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
  
-    setOperationAction(ISD::ADD        , MVT::v4f32, Legal);
-    setOperationAction(ISD::SUB        , MVT::v4f32, Legal);
-    setOperationAction(ISD::MUL        , MVT::v4f32, Legal);
-    setOperationAction(ISD::LOAD       , MVT::v4f32, Legal);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Expand);
+    setOperationAction(ISD::ADD,              MVT::v4f32, Legal);
+    setOperationAction(ISD::SUB,              MVT::v4f32, Legal);
+    setOperationAction(ISD::MUL,              MVT::v4f32, Legal);
+    setOperationAction(ISD::LOAD,             MVT::v4f32, Legal);
+    setOperationAction(ISD::BUILD_VECTOR,     MVT::v4f32, Expand);
    }
  
    if (TM.getSubtarget<X86Subtarget>().hasSSE2()) {
@@ -288,15 +288,17 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
      addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
  
  
-    setOperationAction(ISD::ADD        , MVT::v2f64, Legal);
-    setOperationAction(ISD::SUB        , MVT::v2f64, Legal);
-    setOperationAction(ISD::MUL        , MVT::v2f64, Legal);
-    setOperationAction(ISD::LOAD       , MVT::v2f64, Legal);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Expand);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Expand);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Expand);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Expand);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Expand);
+    setOperationAction(ISD::ADD,              MVT::v2f64, Legal);
+    setOperationAction(ISD::SUB,              MVT::v2f64, Legal);
+    setOperationAction(ISD::MUL,              MVT::v2f64, Legal);
+    setOperationAction(ISD::LOAD,             MVT::v2f64, Legal);
+    setOperationAction(ISD::BUILD_VECTOR,     MVT::v2f64, Expand);
+    setOperationAction(ISD::BUILD_VECTOR,     MVT::v16i8, Expand);
+    setOperationAction(ISD::BUILD_VECTOR,     MVT::v8i16, Expand);
+    setOperationAction(ISD::BUILD_VECTOR,     MVT::v4i32, Expand);
+    setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i64, Expand);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
    }
  
    computeRegisterProperties();
@@ -2135,6 +2137,10 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
                         Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
                         Copy.getValue(1));
    }
+  case ISD::SCALAR_TO_VECTOR: {
+    SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
+    return DAG.getNode(X86ISD::SCALAR_TO_VECTOR, Op.getValueType(), AnyExt);
+  }
    }
  }
  
@@ -2168,6 +2174,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
    case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
    case X86ISD::Wrapper:            return "X86ISD::Wrapper";
+  case X86ISD::SCALAR_TO_VECTOR:   return "X86ISD::SCALAR_TO_VECTOR";
    }
  }
  
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 823fa6a0144cc74b9186e9d354720158cfa45288..bdbe46d2b02000072b000efd33eb90f16a73614c 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -145,6 +145,10 @@ namespace llvm {
        /// TCPWrapper - A wrapper node for TargetConstantPool,
        /// TargetExternalSymbol, and TargetGlobalAddress.
        Wrapper,
+
+      /// SCALAR_TO_VECTOR - X86 version of SCALAR_TO_VECTOR. The destination base
+      /// type does not have to match the operand type.
+      SCALAR_TO_VECTOR,
      };
  
      // X86 specific condition code. These correspond to X86_*_COND in
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td

index 244cac69d1f11648b8d2e648a2c9821f62b34562..38b40a73342582d717c67bbd1af9279e30ba5be9 100644 (file)
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -24,7 +24,9 @@ def : Pat<(v2i32 (undef)), (IMPLICIT_DEF_VR64)>,  Requires<[HasMMX]>;
  
  // Move Instructions
  def MOVD64rr : I<0x6E, MRMSrcReg, (ops VR64:$dst, R32:$src),
-                 "movd {$src, $dst|$dst, $src}", []>, TB,
+                 "movd {$src, $dst|$dst, $src}",
+                 [(set VR64:$dst,
+                      (v2i32 (scalar_to_vector R32:$src)))]>, TB,
                 Requires<[HasMMX]>;
  def MOVD64rm : I<0x6E, MRMSrcMem, (ops VR64:$dst, i32mem:$src),
                   "movd {$src, $dst|$dst, $src}", []>, TB,
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index b80b8e08efbad903292f446206e5b179b0e237e0..866203846b4bb65a05cb7d3fb9bedbcc4159564d 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -17,12 +17,14 @@
  // SSE specific DAG Nodes.
  //===----------------------------------------------------------------------===//
  
-def X86loadp   : SDNode<"X86ISD::LOAD_PACK", SDTLoad, 
-                        [SDNPHasChain]>;
-def X86fand    : SDNode<"X86ISD::FAND",     SDTFPBinOp,
-                        [SDNPCommutative, SDNPAssociative]>;
-def X86fxor    : SDNode<"X86ISD::FXOR",     SDTFPBinOp,
-                        [SDNPCommutative, SDNPAssociative]>;
+def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, 
+                      [SDNPHasChain]>;
+def X86fand  : SDNode<"X86ISD::FAND",     SDTFPBinOp,
+                      [SDNPCommutative, SDNPAssociative]>;
+def X86fxor  : SDNode<"X86ISD::FXOR",     SDTFPBinOp,
+                      [SDNPCommutative, SDNPAssociative]>;
+def X86s2vec : SDNode<"X86ISD::SCALAR_TO_VECTOR",
+                      SDTypeProfile<1, 1, []>, []>;
  
  //===----------------------------------------------------------------------===//
  // SSE pattern fragments
@@ -347,12 +349,6 @@ def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
                             [(set VR128:$dst, (v4f32 (undef)))]>,
                           Requires<[HasSSE1]>;
  
-def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
-def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
-def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
-
  // Move Instructions
  def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
                     "movaps {$src, $dst|$dst, $src}", []>;
@@ -700,7 +696,9 @@ def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
  
  // Move Instructions
  def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
-                  "movd {$src, $dst|$dst, $src}", []>;
+                  "movd {$src, $dst|$dst, $src}",
+                    [(set VR128:$dst,
+                      (v4i32 (scalar_to_vector R32:$src)))]>;
  def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
                    "movd {$src, $dst|$dst, $src}", []>;
  def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
@@ -708,11 +706,12 @@ def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
  
  // SSE2 instructions with XS prefix
  def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
-                  "movq {$src, $dst|$dst, $src}", []>, XS,
+                  "movq {$src, $dst|$dst, $src}",
+                  [(set VR128:$dst,
+                      (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
                  Requires<[HasSSE2]>;
  def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
-                  "movq {$src, $dst|$dst, $src}", []>, XS,
-                Requires<[HasSSE2]>;
+                  "movq {$src, $dst|$dst, $src}", []>, XS;
  
  def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
                    "movq {$src, $dst|$dst, $src}", []>;
@@ -731,3 +730,28 @@ def FR64ToV2F64 : PDI<0x28, MRMSrcReg, (ops VR128:$dst, FR64:$src),
                        "movapd {$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
                          (v2f64 (scalar_to_vector FR64:$src)))]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// 128-bit vector undef's.
+def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
+def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
+def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
+
+// Store 128-bit integer vector values.
+def : Pat<(store (v16i8 VR128:$src), addr:$dst), (MOVAPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (v8i16 VR128:$src), addr:$dst), (MOVAPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (v4i32 VR128:$src), addr:$dst), (MOVAPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (v2i64 VR128:$src), addr:$dst), (MOVAPSmr addr:$dst, VR128:$src)>;
+
+// Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
+// 16-bits matter.
+def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
+  Requires<[HasSSE2]>;
+def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
+  Requires<[HasSSE2]>;
+
author	Evan Cheng <evan.cheng@apple.com>
	Tue, 21 Mar 2006 23:01:21 +0000 (23:01 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Tue, 21 Mar 2006 23:01:21 +0000 (23:01 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
lib/Target/X86/X86InstrMMX.td		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history