Reverting 40504 for now. It's breaking oggenc.

author Evan Cheng <evan.cheng@apple.com>

Fri, 27 Jul 2007 01:37:47 +0000 (01:37 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Fri, 27 Jul 2007 01:37:47 +0000 (01:37 +0000)
author Evan Cheng <evan.cheng@apple.com>
Fri, 27 Jul 2007 01:37:47 +0000 (01:37 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Fri, 27 Jul 2007 01:37:47 +0000 (01:37 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 94505c45c0f6a63fd9d5803dc86b498a5e9c463c..23f9e9500c2dde606a1cc11adde7835c8aaf3e29 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3367,10 +3367,14 @@ SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
      CV.push_back(C);
      CV.push_back(C);
    }
-  Constant *C = ConstantVector::get(CV);
-  SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
-  SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
-                               false, 16);
+  Constant *CS = ConstantStruct::get(CV);
+  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
+  SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+  SmallVector<SDOperand, 3> Ops;
+  Ops.push_back(DAG.getEntryNode());
+  Ops.push_back(CPIdx);
+  Ops.push_back(DAG.getSrcValue(NULL));
+  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
    return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
  }
  
@@ -3395,16 +3399,21 @@ SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
      CV.push_back(C);
      CV.push_back(C);
    }
-  Constant *C = ConstantVector::get(CV);
-  SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
-  SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
-                               false, 16);
+  Constant *CS = ConstantStruct::get(CV);
+  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
    if (MVT::isVector(VT)) {
+    SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0);
      return DAG.getNode(ISD::BIT_CONVERT, VT,
                         DAG.getNode(ISD::XOR, MVT::v2i64,
                      DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
                      DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
    } else {
+    SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+    SmallVector<SDOperand, 3> Ops;
+    Ops.push_back(DAG.getEntryNode());
+    Ops.push_back(CPIdx);
+    Ops.push_back(DAG.getSrcValue(NULL));
+    SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
      return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
    }
  }
@@ -3433,10 +3442,14 @@ SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
      CV.push_back(ConstantFP::get(SrcTy, 0.0));
      CV.push_back(ConstantFP::get(SrcTy, 0.0));
    }
-  Constant *C = ConstantVector::get(CV);
-  SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
-  SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0,
-                                false, 16);
+  Constant *CS = ConstantStruct::get(CV);
+  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
+  SDVTList Tys = DAG.getVTList(SrcVT, MVT::Other);
+  SmallVector<SDOperand, 3> Ops;
+  Ops.push_back(DAG.getEntryNode());
+  Ops.push_back(CPIdx);
+  Ops.push_back(DAG.getSrcValue(NULL));
+  SDOperand Mask1 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
    SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
  
    // Shift sign bit right or left if the two operands have different types.
@@ -3461,10 +3474,14 @@ SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
      CV.push_back(ConstantFP::get(SrcTy, 0.0));
      CV.push_back(ConstantFP::get(SrcTy, 0.0));
    }
-  C = ConstantVector::get(CV);
-  CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
-  SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
-                                false, 16);
+  CS = ConstantStruct::get(CV);
+  CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
+  Tys = DAG.getVTList(VT, MVT::Other);
+  Ops.clear();
+  Ops.push_back(DAG.getEntryNode());
+  Ops.push_back(CPIdx);
+  Ops.push_back(DAG.getSrcValue(NULL));
+  SDOperand Mask2 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
    SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
  
    // Or the value with the sign bit.
@@ -4340,6 +4357,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
    case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
    case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
+  case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
+  case X86ISD::LOAD_UA:            return "X86ISD::LOAD_UA";
    case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
    case X86ISD::Wrapper:            return "X86ISD::Wrapper";
    case X86ISD::S2VEC:              return "X86ISD::S2VEC";
@@ -4737,14 +4756,19 @@ static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
    }
  
    bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
-  LoadSDNode *LD = cast<LoadSDNode>(Base);
    if (isAlign16) {
+    LoadSDNode *LD = cast<LoadSDNode>(Base);
      return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
-                       LD->getSrcValueOffset(), LD->isVolatile());
+                       LD->getSrcValueOffset());
    } else {
-    return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
-                       LD->getSrcValueOffset(), LD->isVolatile(),
-                       LD->getAlignment());
+    // Just use movups, it's shorter.
+    SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
+    SmallVector<SDOperand, 3> Ops;
+    Ops.push_back(Base->getOperand(0));
+    Ops.push_back(Base->getOperand(1));
+    Ops.push_back(Base->getOperand(2));
+    return DAG.getNode(ISD::BIT_CONVERT, VT,
+                       DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size()));
    }
  }
  
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 521916e035acd1fe3f018ee0bfe106136ef83e9b..07a96d3569100f069164942c65ad5dc38f033511 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -143,6 +143,14 @@ namespace llvm {
        /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
        REP_MOVS,
  
+      /// LOAD_PACK Load a 128-bit packed float / double value. It has the same
+      /// operands as a normal load.
+      LOAD_PACK,
+
+      /// LOAD_UA Load an unaligned 128-bit value. It has the same operands as
+      /// a normal load.
+      LOAD_UA,
+
        /// GlobalBaseReg - On Darwin, this node represents the result of the popl
        /// at function entry, used for PIC code.
        GlobalBaseReg,
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 774b9bb4fc00782bea2193a7e183909207ce1112..9e0f75de997dd5d2d3778c6fa6b731527896474c 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -21,6 +21,8 @@
  def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
                                              SDTCisFP<0>, SDTCisInt<2> ]>;
  
+def X86loadp   : SDNode<"X86ISD::LOAD_PACK", SDTLoad, [SDNPHasChain]>;
+def X86loadu   : SDNode<"X86ISD::LOAD_UA",   SDTLoad, [SDNPHasChain]>;
  def X86fmin    : SDNode<"X86ISD::FMIN",      SDTFPBinOp>;
  def X86fmax    : SDNode<"X86ISD::FMAX",      SDTFPBinOp>;
  def X86fand    : SDNode<"X86ISD::FAND",      SDTFPBinOp,
@@ -80,6 +82,9 @@ def sdmem : Operand<v2f64> {
  // SSE pattern fragments
  //===----------------------------------------------------------------------===//
  
+def X86loadpf32  : PatFrag<(ops node:$ptr), (f32   (X86loadp node:$ptr))>;
+def X86loadpf64  : PatFrag<(ops node:$ptr), (f64   (X86loadp node:$ptr))>;
+
  def loadv4f32    : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
  def loadv2f64    : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
  def loadv4i32    : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
@@ -104,8 +109,6 @@ def alignedload : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
    return false;
  }]>;
  
-def alignedloadf32   : PatFrag<(ops node:$ptr), (f32   (alignedload node:$ptr))>;
-def alignedloadf64   : PatFrag<(ops node:$ptr), (f64   (alignedload node:$ptr))>;
  def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>;
  def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>;
  def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>;
@@ -407,7 +410,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
  // disregarded.
  def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
                       "movaps {$src, $dst|$dst, $src}",
-                     [(set FR32:$dst, (alignedloadf32 addr:$src))]>;
+                     [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
  
  // Alias bitwise logical operations using SSE logical ops on packed FP values.
  let isTwoAddress = 1 in {
@@ -426,15 +429,15 @@ let isCommutable = 1 in {
  def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
                      "andps {$src2, $dst|$dst, $src2}",
                      [(set FR32:$dst, (X86fand FR32:$src1,
-                                      (alignedloadf32 addr:$src2)))]>;
+                                      (X86loadpf32 addr:$src2)))]>;
  def FsORPSrm  : PSI<0x56, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
                      "orps {$src2, $dst|$dst, $src2}",
                      [(set FR32:$dst, (X86for FR32:$src1,
-                                      (alignedloadf32 addr:$src2)))]>;
+                                      (X86loadpf32 addr:$src2)))]>;
  def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
                      "xorps {$src2, $dst|$dst, $src2}",
                      [(set FR32:$dst, (X86fxor FR32:$src1,
-                                      (alignedloadf32 addr:$src2)))]>;
+                                      (X86loadpf32 addr:$src2)))]>;
  
  def FsANDNPSrr : PSI<0x55, MRMSrcReg,
                       (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
@@ -1080,7 +1083,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
  // disregarded.
  def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
                       "movapd {$src, $dst|$dst, $src}",
-                     [(set FR64:$dst, (alignedloadf64 addr:$src))]>;
+                     [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
  
  // Alias bitwise logical operations using SSE logical ops on packed FP values.
  let isTwoAddress = 1 in {
@@ -1099,15 +1102,15 @@ let isCommutable = 1 in {
  def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
                      "andpd {$src2, $dst|$dst, $src2}",
                      [(set FR64:$dst, (X86fand FR64:$src1,
-                                      (alignedloadf64 addr:$src2)))]>;
+                                      (X86loadpf64 addr:$src2)))]>;
  def FsORPDrm  : PDI<0x56, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
                      "orpd {$src2, $dst|$dst, $src2}",
                      [(set FR64:$dst, (X86for FR64:$src1,
-                                      (alignedloadf64 addr:$src2)))]>;
+                                      (X86loadpf64 addr:$src2)))]>;
  def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
                      "xorpd {$src2, $dst|$dst, $src2}",
                      [(set FR64:$dst, (X86fxor FR64:$src1,
-                                      (alignedloadf64 addr:$src2)))]>;
+                                      (X86loadpf64 addr:$src2)))]>;
  
  def FsANDNPDrr : PDI<0x55, MRMSrcReg,
                       (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
@@ -2627,11 +2630,11 @@ def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
                    (load addr:$src2))),
            (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
  
+// Unaligned load
+def : Pat<(v4f32 (X86loadu addr:$src)), (MOVUPSrm addr:$src)>,
+      Requires<[HasSSE1]>;
+
  // Use movaps / movups for SSE integer load / store (one byte shorter).
-def : Pat<(alignedloadv4i32 addr:$src),
-          (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>;
-def : Pat<(loadv4i32 addr:$src),
-          (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>;
  def : Pat<(alignedloadv2i64 addr:$src),
            (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
  def : Pat<(loadv2i64 addr:$src),
diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll

index 16ce3dac5498e16217cfca28c9ffba3e73468310..d06efa5ff264dac19eb27ba538c116d989f57309 100644 (file)
--- a/test/CodeGen/X86/vec_shuffle.ll
+++ b/test/CodeGen/X86/vec_shuffle.ll
@@ -1,6 +1,6 @@
  ; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -mattr=+sse2 -o %t -f
  ; RUN: grep shufp   %t | wc -l | grep 1 
-; RUN: grep movupd  %t | wc -l | grep 1 
+; RUN: grep movups  %t | wc -l | grep 1 
  ; RUN: grep pshufhw %t | wc -l | grep 1
  
  void %test_v4sf(<4 x float>* %P, float %X, float %Y) {
author	Evan Cheng <evan.cheng@apple.com>
	Fri, 27 Jul 2007 01:37:47 +0000 (01:37 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Fri, 27 Jul 2007 01:37:47 +0000 (01:37 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/vec_shuffle.ll		patch \| blob \| history