Create a new ARM-specific DAG node, VDUP, to represent a splat from a

author Bob Wilson <bob.wilson@apple.com>

Fri, 14 Aug 2009 05:13:08 +0000 (05:13 +0000)

committer Bob Wilson <bob.wilson@apple.com>

Fri, 14 Aug 2009 05:13:08 +0000 (05:13 +0000)
author Bob Wilson <bob.wilson@apple.com>
Fri, 14 Aug 2009 05:13:08 +0000 (05:13 +0000)
committer Bob Wilson <bob.wilson@apple.com>
Fri, 14 Aug 2009 05:13:08 +0000 (05:13 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 3c40192ea577952d47fe85dd5db4da8582bc9d0a..641476c090a33b0dc5e704fa96738cc5e3c1ab7c 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -477,6 +477,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
    case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
    case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
+  case ARMISD::VDUP:          return "ARMISD::VDUP";
    case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
    case ARMISD::VLD2D:         return "ARMISD::VLD2D";
    case ARMISD::VLD3D:         return "ARMISD::VLD3D";
@@ -2449,9 +2450,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
    // of the same time so that they get CSEd properly.
    if (SVN->isSplat()) {
      int Lane = SVN->getSplatIndex();
-    if (Lane != 0)
-      return DAG.getNode(ARMISD::VDUPLANE, dl, VT, SVN->getOperand(0),
-                        DAG.getConstant(Lane, MVT::i32));
+    SDValue Op0 = SVN->getOperand(0);
+    if (Lane == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+      return DAG.getNode(ARMISD::VDUP, dl, VT, Op0.getOperand(0));
+    }
+    return DAG.getNode(ARMISD::VDUPLANE, dl, VT, SVN->getOperand(0),
+                      DAG.getConstant(Lane, MVT::i32));
    }
    if (isVREVMask(SVN, 64))
      return DAG.getNode(ARMISD::VREV64, dl, VT, SVN->getOperand(0));
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h

index d98b6ef9878ee38ee56ac21d6959f86bd10b05c8..88dddf2289e1beb01ce98b650b45019867c871c3 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -115,7 +115,8 @@ namespace llvm {
        VGETLANEu,    // zero-extend vector extract element
        VGETLANEs,    // sign-extend vector extract element
  
-      // Vector duplicate lane:
+      // Vector duplicate:
+      VDUP,
        VDUPLANE,
  
        // Vector load/store with (de)interleaving
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index acd6533c27fd2fc5660563aad9f04fcbe3544364..fad3308c0d9593e078c9e37f054f2193d9414012 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -65,6 +65,8 @@ def SDTARMVGETLN  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
  def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
  def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
  
+def NEONvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
+
  // VDUPLANE can produce a quad-register result from a double-register source,
  // so the result is not constrained to match the source.
  def NEONvduplane  : SDNode<"ARMISD::VDUPLANE",
@@ -1747,20 +1749,14 @@ def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
  
  //   VDUP     : Vector Duplicate (from ARM core register to all elements)
  
-def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
-                       (vector_shuffle node:$lhs, node:$rhs), [{
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
-}]>;
-
  class VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty>
    : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src),
            NoItinerary, "vdup", !strconcat(asmSize, "\t$dst, $src"),
-          [(set DPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>;
+          [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
  class VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty>
    : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src),
            NoItinerary, "vdup", !strconcat(asmSize, "\t$dst, $src"),
-          [(set QPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>;
+          [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
  
  def  VDUP8d   : VDUPD<0b11101100, 0b00, ".8", v8i8>;
  def  VDUP16d  : VDUPD<0b11101000, 0b01, ".16", v4i16>;
@@ -1771,16 +1767,12 @@ def  VDUP32q  : VDUPQ<0b11101010, 0b00, ".32", v4i32>;
  
  def  VDUPfd   : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src),
                        NoItinerary, "vdup", ".32\t$dst, $src",
-                      [(set DPR:$dst, (v2f32 (splat_lo
-                                              (scalar_to_vector
-                                               (f32 (bitconvert GPR:$src))),
-                                              undef)))]>;
+                      [(set DPR:$dst, (v2f32 (NEONvdup
+                                              (f32 (bitconvert GPR:$src)))))]>;
  def  VDUPfq   : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src),
                        NoItinerary, "vdup", ".32\t$dst, $src",
-                      [(set QPR:$dst, (v4f32 (splat_lo
-                                              (scalar_to_vector
-                                               (f32 (bitconvert GPR:$src))),
-                                              undef)))]>;
+                      [(set QPR:$dst, (v4f32 (NEONvdup
+                                              (f32 (bitconvert GPR:$src)))))]>;
  
  //   VDUP     : Vector Duplicate Lane (from scalar to all elements)
  
@@ -1826,16 +1818,12 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
  def VDUPfdf   : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0,
                      (outs DPR:$dst), (ins SPR:$src),
                      NoItinerary, "vdup.32\t$dst, ${src:lane}", "",
-                    [(set DPR:$dst, (v2f32 (splat_lo
-                                            (scalar_to_vector SPR:$src),
-                                            undef)))]>;
+                    [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
  
  def VDUPfqf   : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 1, 0,
                      (outs QPR:$dst), (ins SPR:$src),
                      NoItinerary, "vdup.32\t$dst, ${src:lane}", "",
-                    [(set QPR:$dst, (v4f32 (splat_lo
-                                            (scalar_to_vector SPR:$src),
-                                            undef)))]>;
+                    [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
  
  //   VMOVN    : Vector Narrowing Move
  defm VMOVN    : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, "vmovn.i",
author	Bob Wilson <bob.wilson@apple.com>
	Fri, 14 Aug 2009 05:13:08 +0000 (05:13 +0000)
committer	Bob Wilson <bob.wilson@apple.com>
	Fri, 14 Aug 2009 05:13:08 +0000 (05:13 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
lib/Target/ARM/ARMISelLowering.h		patch \| blob \| history
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history