More CellSPU refinement and progress:

author Scott Michel <scottm@aero.org>

Fri, 11 Jan 2008 02:53:15 +0000 (02:53 +0000)

committer Scott Michel <scottm@aero.org>

Fri, 11 Jan 2008 02:53:15 +0000 (02:53 +0000)
author Scott Michel <scottm@aero.org>
Fri, 11 Jan 2008 02:53:15 +0000 (02:53 +0000)
committer Scott Michel <scottm@aero.org>
Fri, 11 Jan 2008 02:53:15 +0000 (02:53 +0000)
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h

index e222e1b4636ed5427a63a91d761876350d263d7f..2d801a1f8b97bcce048c2a04711d33143b3d9cca 100644 (file)
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1454,11 +1454,63 @@ public:
    }
  };
  
+/// LSBaseSDNode - Base class for LoadSDNode and StoreSDNode
+///
+class LSBaseSDNode : public SDNode {
+private:
+  //! SrcValue - Memory location for alias analysis.
+  const Value *SrcValue;
+
+  //! SVOffset - Memory location offset.
+  int SVOffset;
+
+  //! Alignment - Alignment of memory location in bytes.
+  unsigned Alignment;
+
+  //! IsVolatile - True if the store is volatile.
+  bool IsVolatile;
+protected:
+  //! Operand array for load and store
+  /*!
+    \note Moving this array to the base class captures more
+    common functionality shared between LoadSDNode and
+    StoreSDNode
+   */
+  SDOperand Ops[4];
+public:
+  LSBaseSDNode(ISD::NodeType NodeTy, SDVTList VTs, const Value *SV, int SVO,
+               unsigned Align, bool Vol)
+    : SDNode(NodeTy, VTs),
+      SrcValue(SV), SVOffset(SVO), Alignment(Align), IsVolatile(Vol)
+  { }
+
+  const SDOperand getChain() const {
+    return getOperand(0);
+  }
+  const SDOperand getBasePtr() const {
+    return getOperand(getOpcode() == ISD::LOAD ? 1 : 2);
+  }
+  const SDOperand getOffset() const {
+    return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
+  }
+  const SDOperand getValue() const {
+    assert(getOpcode() == ISD::STORE);
+    return getOperand(1);
+  }
+
+  const Value *getSrcValue() const { return SrcValue; }
+  int getSrcValueOffset() const { return SVOffset; }
+  unsigned getAlignment() const { return Alignment; }
+  bool isVolatile() const { return IsVolatile; }
+
+  static bool classof(const LSBaseSDNode *N) { return true; }
+  static bool classof(const SDNode *N) { return true; }
+};
+
  /// LoadSDNode - This class is used to represent ISD::LOAD nodes.
  ///
-class LoadSDNode : public SDNode {
+class LoadSDNode : public LSBaseSDNode {
    virtual void ANCHOR();  // Out-of-line virtual method to give class a home.
-  SDOperand Ops[3];
    
    // AddrMode - unindexed, pre-indexed, post-indexed.
    ISD::MemIndexedMode AddrMode;
@@ -1468,26 +1520,13 @@ class LoadSDNode : public SDNode {
  
    // LoadedVT - VT of loaded value before extension.
    MVT::ValueType LoadedVT;
-
-  // SrcValue - Memory location for alias analysis.
-  const Value *SrcValue;
-
-  // SVOffset - Memory location offset.
-  int SVOffset;
-
-  // Alignment - Alignment of memory location in bytes.
-  unsigned Alignment;
-
-  // IsVolatile - True if the load is volatile.
-  bool IsVolatile;
  protected:
    friend class SelectionDAG;
    LoadSDNode(SDOperand *ChainPtrOff, SDVTList VTs,
               ISD::MemIndexedMode AM, ISD::LoadExtType ETy, MVT::ValueType LVT,
               const Value *SV, int O=0, unsigned Align=0, bool Vol=false)
-    : SDNode(ISD::LOAD, VTs),
-      AddrMode(AM), ExtType(ETy), LoadedVT(LVT), SrcValue(SV), SVOffset(O),
-      Alignment(Align), IsVolatile(Vol) {
+    : LSBaseSDNode(ISD::LOAD, VTs, SV, O, Align, Vol),
+      AddrMode(AM), ExtType(ETy), LoadedVT(LVT) {
      Ops[0] = ChainPtrOff[0]; // Chain
      Ops[1] = ChainPtrOff[1]; // Ptr
      Ops[2] = ChainPtrOff[2]; // Off
@@ -1499,18 +1538,12 @@ protected:
    }
  public:
  
-  const SDOperand getChain() const { return getOperand(0); }
-  const SDOperand getBasePtr() const { return getOperand(1); }
-  const SDOperand getOffset() const { return getOperand(2); }
    ISD::MemIndexedMode getAddressingMode() const { return AddrMode; }
    ISD::LoadExtType getExtensionType() const { return ExtType; }
    MVT::ValueType getLoadedVT() const { return LoadedVT; }
-  const Value *getSrcValue() const { return SrcValue; }
-  int getSrcValueOffset() const { return SVOffset; }
-  unsigned getAlignment() const { return Alignment; }
-  bool isVolatile() const { return IsVolatile; }
  
    static bool classof(const LoadSDNode *) { return true; }
+  static bool classof(const LSBaseSDNode *N) { return true; }
    static bool classof(const SDNode *N) {
      return N->getOpcode() == ISD::LOAD;
    }
@@ -1518,9 +1551,8 @@ public:
  
  /// StoreSDNode - This class is used to represent ISD::STORE nodes.
  ///
-class StoreSDNode : public SDNode {
+class StoreSDNode : public LSBaseSDNode {
    virtual void ANCHOR();  // Out-of-line virtual method to give class a home.
-  SDOperand Ops[4];
      
    // AddrMode - unindexed, pre-indexed, post-indexed.
    ISD::MemIndexedMode AddrMode;
@@ -1530,26 +1562,13 @@ class StoreSDNode : public SDNode {
  
    // StoredVT - VT of the value after truncation.
    MVT::ValueType StoredVT;
-
-  // SrcValue - Memory location for alias analysis.
-  const Value *SrcValue;
-
-  // SVOffset - Memory location offset.
-  int SVOffset;
-
-  // Alignment - Alignment of memory location in bytes.
-  unsigned Alignment;
-
-  // IsVolatile - True if the store is volatile.
-  bool IsVolatile;
  protected:
    friend class SelectionDAG;
    StoreSDNode(SDOperand *ChainValuePtrOff, SDVTList VTs,
                ISD::MemIndexedMode AM, bool isTrunc, MVT::ValueType SVT,
                const Value *SV, int O=0, unsigned Align=0, bool Vol=false)
-    : SDNode(ISD::STORE, VTs),
-      AddrMode(AM), IsTruncStore(isTrunc), StoredVT(SVT), SrcValue(SV),
-      SVOffset(O), Alignment(Align), IsVolatile(Vol) {
+    : LSBaseSDNode(ISD::STORE, VTs, SV, O, Align, Vol),
+      AddrMode(AM), IsTruncStore(isTrunc), StoredVT(SVT) {
      Ops[0] = ChainValuePtrOff[0]; // Chain
      Ops[1] = ChainValuePtrOff[1]; // Value
      Ops[2] = ChainValuePtrOff[2]; // Ptr
@@ -1562,19 +1581,12 @@ protected:
    }
  public:
  
-  const SDOperand getChain() const { return getOperand(0); }
-  const SDOperand getValue() const { return getOperand(1); }
-  const SDOperand getBasePtr() const { return getOperand(2); }
-  const SDOperand getOffset() const { return getOperand(3); }
    ISD::MemIndexedMode getAddressingMode() const { return AddrMode; }
    bool isTruncatingStore() const { return IsTruncStore; }
    MVT::ValueType getStoredVT() const { return StoredVT; }
-  const Value *getSrcValue() const { return SrcValue; }
-  int getSrcValueOffset() const { return SVOffset; }
-  unsigned getAlignment() const { return Alignment; }
-  bool isVolatile() const { return IsVolatile; }
  
    static bool classof(const StoreSDNode *) { return true; }
+  static bool classof(const LSBaseSDNode *N) { return true; }
    static bool classof(const SDNode *N) {
      return N->getOpcode() == ISD::STORE;
    }
diff --git a/include/llvm/IntrinsicsCellSPU.td b/include/llvm/IntrinsicsCellSPU.td

index 0e257c150aea5b74e13c2f101c4e7862c7fe8b40..7030278708e62e2f600dc2bc11d84b4ace11f7ab 100644 (file)
--- a/include/llvm/IntrinsicsCellSPU.td
+++ b/include/llvm/IntrinsicsCellSPU.td
@@ -17,8 +17,8 @@
  //===----------------------------------------------------------------------===//
  
  // 7-bit integer type, used as an immediate:
-def cell_i7_ty: LLVMType<i16>;   // Note: This was i8
-def cell_i8_ty: LLVMType<i16>;   // Note: This was i8
+def cell_i7_ty: LLVMType<i8>;
+def cell_i8_ty: LLVMType<i8>;
  
  class v16i8_u7imm<string builtin_suffix> :
    GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
@@ -27,7 +27,7 @@ class v16i8_u7imm<string builtin_suffix> :
  
  class v16i8_u8imm<string builtin_suffix> :
    GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
+  Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
              [IntrNoMem]>;
  
  class v16i8_s10imm<string builtin_suffix> :
diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td

index cfa0089d848c680a245d918800d2d4fa76c19f35..2f453b1feb16bfec963e3b69bc2033b3c6bdbd7f 100644 (file)
--- a/lib/Target/CellSPU/CellSDKIntrinsics.td
+++ b/lib/Target/CellSPU/CellSDKIntrinsics.td
@@ -108,18 +108,18 @@ def CellSDKmpyhhau:
  
  def CellSDKand:
          RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-          "add\t $rT, $rA, $rB", IntegerOp,
+          "and\t $rT, $rA, $rB", IntegerOp,
            [(set (v4i32 VECREG:$rT),
                  (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
  
  def CellSDKandc:
          RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-          "addc\t $rT, $rA, $rB", IntegerOp,
+          "andc\t $rT, $rA, $rB", IntegerOp,
            [(set (v4i32 VECREG:$rT),
                  (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
  
  def CellSDKandbi:
-     RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+     RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
         "andbi\t $rT, $rA, $val", BranchResolv,
         [(set (v16i8 VECREG:$rT),
               (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>;
@@ -149,7 +149,7 @@ def CellSDKorc:
                  (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
  
  def CellSDKorbi:
-     RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+     RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
         "orbi\t $rT, $rA, $val", BranchResolv,
         [(set (v16i8 VECREG:$rT),
               (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>;
@@ -173,7 +173,7 @@ def CellSDKxor:
                  (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
  
  def CellSDKxorbi:
-     RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+     RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
         "xorbi\t $rT, $rA, $val", BranchResolv,
         [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>;
  
@@ -248,7 +248,7 @@ def CellSDKceqb:
                (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
  
  def CellSDKceqbi:
-  RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+  RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
          "ceqbi\t $rT, $rA, $val", BranchResolv,
      [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>;
  
@@ -294,7 +294,7 @@ def CellSDKcgtb:
                (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
  
  def CellSDKcgtbi:
-  RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+  RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
      "cgtbi\t $rT, $rA, $val", BranchResolv,
          [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
  
@@ -329,7 +329,7 @@ def CellSDKclgtb:
            (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
  
  def CellSDKclgtbi:
-  RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+  RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
      "clgtbi\t $rT, $rA, $val", BranchResolv,
      [(set (v16i8 VECREG:$rT),
            (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp

index 73e46fff1be136332961890106d27d29d4046eb6..bb3b100d57700f8206f50df0e537b804ead8c40d 100644 (file)
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -384,11 +384,17 @@ bool
  SPUDAGToDAGISel::SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
                     SDOperand &Index) {
    // These match the addr256k operand type:
-  MVT::ValueType PtrVT = SPUtli.getPointerTy();
    MVT::ValueType OffsVT = MVT::i16;
+  MVT::ValueType PtrVT = SPUtli.getPointerTy();
  
    switch (N.getOpcode()) {
    case ISD::Constant:
+  case ISD::ConstantPool:
+  case ISD::GlobalAddress:
+    cerr << "SPU SelectAFormAddr: Constant/Pool/Global not lowered.\n";
+    abort();
+    /*NOTREACHED*/
+
    case ISD::TargetConstant: {
      // Loading from a constant address.
      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
@@ -400,23 +406,15 @@ SPUDAGToDAGISel::SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
        return true;
      }
    }
-  case ISD::ConstantPool:
-  case ISD::TargetConstantPool: {
-    // The constant pool address is N. Base is a dummy that will be ignored by
+  case ISD::TargetGlobalAddress:
+  case ISD::TargetConstantPool:
+  case SPUISD::AFormAddr: {
+    // The address is in Base. N is a dummy that will be ignored by
      // the assembly printer.
      Base = N;
      Index = CurDAG->getTargetConstant(0, OffsVT);
      return true;
    }
-
-  case ISD::GlobalAddress:
-  case ISD::TargetGlobalAddress: {
-    // The global address is N. Base is a dummy that is ignored by the
-    // assembly printer.
-    Base = N;
-    Index = CurDAG->getTargetConstant(0, OffsVT);
-    return true;
-  }
    }
  
    return false;
@@ -445,10 +443,9 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
      Index = CurDAG->getTargetConstant(0, PtrTy);
      return true;
    } else if (Opc == ISD::FrameIndex) {
-    // Stack frame index must be less than 512 (divided by 16):
      FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
      DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = "
-         << FI->getIndex() << "\n");
+              << FI->getIndex() << "\n");
      if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) {
        Base = CurDAG->getTargetConstant(0, PtrTy);
        Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy);
@@ -458,45 +455,49 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
      // Generated by getelementptr
      const SDOperand Op0 = N.getOperand(0); // Frame index/base
      const SDOperand Op1 = N.getOperand(1); // Offset within base
-    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1);
  
-    // Not a constant?
-    if (CN == 0)
-      return false;
-
-    int32_t offset = (int32_t) CN->getSignExtended();
-    unsigned Opc0 = Op0.getOpcode();
-
-    if ((offset & 0xf) != 0) {
-      cerr << "SelectDFormAddr: unaligned offset = " << offset << "\n";
-      abort();
-      /*NOTREACHED*/
-    }
+    if (Op1.getOpcode() == ISD::Constant
+        || Op1.getOpcode() == ISD::TargetConstant) {
+      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1);
+      assert(CN != 0 && "SelectDFormAddr: Expected a constant");
  
-    if (Opc0 == ISD::FrameIndex) {
-      FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0);
-      DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset
-           << " frame index = " << FI->getIndex() << "\n");
+      int32_t offset = (int32_t) CN->getSignExtended();
+      unsigned Opc0 = Op0.getOpcode();
  
-      if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) {
-       Base = CurDAG->getTargetConstant(offset, PtrTy);
-       Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy);
-       return true;
+      if ((offset & 0xf) != 0) {
+        // Unaligned offset: punt and let X-form address handle it.
+        // NOTE: This really doesn't have to be strictly 16-byte aligned,
+        // since the load/store quadword instructions will implicitly
+        // zero the lower 4 bits of the resulting address.
+        return false;
        }
-    } else if (offset > SPUFrameInfo::minFrameOffset()
-              && offset < SPUFrameInfo::maxFrameOffset()) {
-      Base = CurDAG->getTargetConstant(offset, PtrTy);
-      if (Opc0 == ISD::GlobalAddress) {
-       // Convert global address to target global address
-       GlobalAddressSDNode *GV = dyn_cast<GlobalAddressSDNode>(Op0);
-       Index = CurDAG->getTargetGlobalAddress(GV->getGlobal(), PtrTy);
-       return true;
-      } else {
-       // Otherwise, just take operand 0
-       Index = Op0;
-       return true;
+
+      if (Opc0 == ISD::FrameIndex) {
+        FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0);
+        DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset
+              << " frame index = " << FI->getIndex() << "\n");
+
+        if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) {
+          Base = CurDAG->getTargetConstant(offset, PtrTy);
+          Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy);
+          return true;
+        }
+      } else if (offset > SPUFrameInfo::minFrameOffset()
+                 && offset < SPUFrameInfo::maxFrameOffset()) {
+        Base = CurDAG->getTargetConstant(offset, PtrTy);
+        if (Opc0 == ISD::GlobalAddress) {
+          // Convert global address to target global address
+          GlobalAddressSDNode *GV = dyn_cast<GlobalAddressSDNode>(Op0);
+          Index = CurDAG->getTargetGlobalAddress(GV->getGlobal(), PtrTy);
+          return true;
+        } else {
+          // Otherwise, just take operand 0
+          Index = Op0;
+          return true;
+        }
        }
-    }
+    } else
+      return false;
    } else if (Opc == SPUISD::DFormAddr) {
      // D-Form address: This is pretty straightforward, naturally...
      ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
@@ -504,6 +505,16 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
      Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy);
      Index = N.getOperand(0);
      return true;
+  } else if (Opc == ISD::FrameIndex) {
+    // Stack frame index must be less than 512 (divided by 16):
+    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
+    DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = "
+         << FI->getIndex() << "\n");
+    if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) {
+      Base = CurDAG->getTargetConstant(0, PtrTy);
+      Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy);
+      return true;
+    }
    }
  
    return false;
@@ -535,7 +546,8 @@ SPUDAGToDAGISel::SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
      unsigned N2Opc = N2.getOpcode();
  
      if ((N1Opc == SPUISD::Hi && N2Opc == SPUISD::Lo)
-        || (N1Opc == SPUISD::Lo && N2Opc == SPUISD::Hi)) {
+        || (N1Opc == SPUISD::Lo && N2Opc == SPUISD::Hi)
+        || (N1Opc == SPUISD::XFormAddr)) {
        Base = N.getOperand(0);
        Index = N.getOperand(1);
        return true;
@@ -548,6 +560,10 @@ SPUDAGToDAGISel::SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
        abort();
        /*UNREACHED*/
      }
+  } else if (Opc == SPUISD::XFormAddr) {
+    Base = N;
+    Index = N.getOperand(1);
+    return true;
    } else if (N.getNumOperands() == 2) {
      SDOperand N1 = N.getOperand(0);
      SDOperand N2 = N.getOperand(1);
@@ -591,11 +607,14 @@ SPUDAGToDAGISel::Select(SDOperand Op) {
    } else if (Opc == ISD::FrameIndex) {
      // Selects to AIr32 FI, 0 which in turn will become AIr32 SP, imm.
      int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    SDOperand TFI = CurDAG->getTargetFrameIndex(FI, SPUtli.getPointerTy());
+    MVT::ValueType PtrVT = SPUtli.getPointerTy();
+    SDOperand Zero = CurDAG->getTargetConstant(0, PtrVT);
+    SDOperand TFI = CurDAG->getTargetFrameIndex(FI, PtrVT);
  
      DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AI32 <FI>, 0\n");
-    return CurDAG->SelectNodeTo(N, SPU::AIr32, Op.getValueType(), TFI,
-                               CurDAG->getTargetConstant(0, MVT::i32));
+    if (N->hasOneUse())
+      return CurDAG->SelectNodeTo(N, SPU::AIr32, Op.getValueType(), TFI, Zero);
+    CurDAG->getTargetNode(SPU::AIr32, Op.getValueType(), TFI, Zero);
    } else if (Opc == SPUISD::LDRESULT) {
      // Custom select instructions for LDRESULT
      unsigned VT = N->getValueType(0);
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp

index 7893e677fbb0e4b414ad3c3fa56d9f4931c3dba8..59e2068a7a0a168a171cffd7123a75ab72b18c0c 100644 (file)
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -82,7 +82,7 @@ namespace {
    /*!
      \arg Op Operand to test
      \return true if the operand is a memory target (i.e., global
-    address, external symbol, constant pool) or an existing D-Form
+    address, external symbol, constant pool) or an A-form
      address.
     */
    bool isMemoryOperand(const SDOperand &Op)
@@ -90,17 +90,17 @@ namespace {
      const unsigned Opc = Op.getOpcode();
      return (Opc == ISD::GlobalAddress
              || Opc == ISD::GlobalTLSAddress
-            || Opc ==  ISD::FrameIndex
+            /* || Opc ==  ISD::FrameIndex */
              || Opc == ISD::JumpTable
              || Opc == ISD::ConstantPool
              || Opc == ISD::ExternalSymbol
              || Opc == ISD::TargetGlobalAddress
              || Opc == ISD::TargetGlobalTLSAddress
-            || Opc == ISD::TargetFrameIndex
+            /* || Opc == ISD::TargetFrameIndex */
              || Opc == ISD::TargetJumpTable
              || Opc == ISD::TargetConstantPool
              || Opc == ISD::TargetExternalSymbol
-           || Opc == SPUISD::DFormAddr);
+            || Opc == SPUISD::AFormAddr);
    }
  }
  
@@ -356,7 +356,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    setOperationAction(ISD::OR,  MVT::v16i8, Custom);
    setOperationAction(ISD::XOR, MVT::v16i8, Custom);
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
-  
+
    setSetCCResultType(MVT::i32);
    setShiftAmountType(MVT::i32);
    setSetCCResultContents(ZeroOrOneSetCCResult);
@@ -377,6 +377,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
      node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
      node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
      node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
+    node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
      node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
      node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
      node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
@@ -430,6 +431,105 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
  //  LowerOperation implementation
  //===----------------------------------------------------------------------===//
  
+/// Aligned load common code for CellSPU
+/*!
+  \param[in] Op The SelectionDAG load or store operand
+  \param[in] DAG The selection DAG
+  \param[in] ST CellSPU subtarget information structure
+  \param[in,out] alignment Caller initializes this to the load or store node's
+  value from getAlignment(), may be updated while generating the aligned load
+  \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
+  offset (divisible by 16, modulo 16 == 0)
+  \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
+  offset of the preferred slot (modulo 16 != 0)
+  \param[in,out] VT Caller initializes this value type to the the load or store
+  node's loaded or stored value type; may be updated if an i1-extended load or
+  store.
+  \param[out] was16aligned true if the base pointer had 16-byte alignment,
+  otherwise false. Can help to determine if the chunk needs to be rotated.
+
+ Both load and store lowering load a block of data aligned on a 16-byte
+ boundary. This is the common aligned load code shared between both.
+ */
+static SDOperand
+AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
+            LSBaseSDNode *LSN,
+            unsigned &alignment, int &alignOffs, int &prefSlotOffs,
+            unsigned &VT, bool &was16aligned)
+{
+  MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  const valtype_map_s *vtm = getValueTypeMapEntry(VT);
+  SDOperand basePtr = LSN->getBasePtr();
+  SDOperand chain = LSN->getChain();
+
+  if (basePtr.getOpcode() == ISD::ADD) {
+    SDOperand Op1 = basePtr.Val->getOperand(1);
+
+    if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
+      const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.Val->getOperand(1));
+
+      alignOffs = (int) CN->getValue();
+      prefSlotOffs = (int) (alignOffs & 0xf);
+
+      // Adjust the rotation amount to ensure that the final result ends up in
+      // the preferred slot:
+      prefSlotOffs -= vtm->prefslot_byte;
+      basePtr = basePtr.getOperand(0);
+
+      // Modify alignment, since the ADD is likely from getElementPtr:
+      switch (basePtr.getOpcode()) {
+      case ISD::GlobalAddress:
+      case ISD::TargetGlobalAddress: {
+        GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(basePtr.Val);
+        const GlobalValue *GV = GN->getGlobal();
+        alignment = GV->getAlignment();
+        break;
+      }
+      }
+    } else {
+      alignOffs = 0;
+      prefSlotOffs = -vtm->prefslot_byte;
+    }
+  } else {
+    alignOffs = 0;
+    prefSlotOffs = -vtm->prefslot_byte;
+  }
+
+  if (alignment == 16) {
+    // Realign the base pointer as a D-Form address:
+    if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
+      if (isMemoryOperand(basePtr)) {
+        SDOperand Zero = DAG.getConstant(0, PtrVT);
+        unsigned Opc = (!ST->usingLargeMem()
+                        ? SPUISD::AFormAddr
+                        : SPUISD::XFormAddr);
+        basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero);
+      }
+      basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
+                          basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT));
+    }
+
+    // Emit the vector load:
+    was16aligned = true;
+    return DAG.getLoad(MVT::v16i8, chain, basePtr,
+                       LSN->getSrcValue(), LSN->getSrcValueOffset(),
+                       LSN->isVolatile(), 16);
+  }
+
+  // Unaligned load or we're using the "large memory" model, which means that
+  // we have to be very pessimistic:
+  if (isMemoryOperand(basePtr)) {
+    basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
+  }
+
+  // Add the offset
+  basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT));
+  was16aligned = false;
+  return DAG.getLoad(MVT::v16i8, chain, basePtr,
+                     LSN->getSrcValue(), LSN->getSrcValueOffset(),
+                     LSN->isVolatile(), 16);
+}
+
  /// Custom lower loads for CellSPU
  /*!
   All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
@@ -438,22 +538,13 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
  static SDOperand
  LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    LoadSDNode *LN = cast<LoadSDNode>(Op);
-  SDOperand basep = LN->getBasePtr();
    SDOperand the_chain = LN->getChain();
-  MVT::ValueType BasepOpc = basep.Val->getOpcode();
    MVT::ValueType VT = LN->getLoadedVT();
    MVT::ValueType OpVT = Op.Val->getValueType(0);
-  MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    ISD::LoadExtType ExtType = LN->getExtensionType();
    unsigned alignment = LN->getAlignment();
-  const valtype_map_s *vtm = getValueTypeMapEntry(VT);
    SDOperand Ops[8];
  
-  if (BasepOpc == ISD::FrameIndex) {
-    // Loading from a frame index is always properly aligned. Always.
-    return SDOperand();
-  }
-
    // For an extending load of an i1 variable, just call it i8 (or whatever we
    // were passed) and make it zero-extended:
    if (VT == MVT::i1) {
@@ -463,178 +554,76 @@ LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
  
    switch (LN->getAddressingMode()) {
    case ISD::UNINDEXED: {
-    SDOperand result;
-    SDOperand rot_op, rotamt;
-    SDOperand ptrp;
-    int c_offset;
-    int c_rotamt;
-
-    // The vector type we really want to be when we load the 16-byte chunk
-    MVT::ValueType vecVT, opVecVT;
-    
-    vecVT = MVT::v16i8;
-    if (VT != MVT::i1)
-      vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
-    opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
+    int offset, rotamt;
+    bool was16aligned;
+    SDOperand result =
+      AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
  
-    if (basep.getOpcode() == ISD::ADD) {
-      const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
+    if (result.Val == 0)
+      return result;
  
-      assert(CN != NULL
-             && "LowerLOAD: ISD::ADD operand 1 is not constant");
+    the_chain = result.getValue(1);
+    // Rotate the chunk if necessary
+    if (rotamt < 0)
+      rotamt += 16;
+    if (rotamt != 0) {
+      SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
+
+      if (was16aligned) {
+        Ops[0] = the_chain;
+        Ops[1] = result;
+        Ops[2] = DAG.getConstant(rotamt, MVT::i16);
+      } else {
+        LoadSDNode *LN1 = cast<LoadSDNode>(result);
+        Ops[0] = the_chain;
+        Ops[1] = result;
+        Ops[2] = LN1->getBasePtr();
+      }
  
-      c_offset = (int) CN->getValue();
-      c_rotamt = (int) (c_offset & 0xf);
+      result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
+      the_chain = result.getValue(1);
+    }
  
-      // Adjust the rotation amount to ensure that the final result ends up in
-      // the preferred slot:
-      c_rotamt -= vtm->prefslot_byte;
-      ptrp = basep.getOperand(0);
+    if (VT == OpVT || ExtType == ISD::EXTLOAD) {
+      SDVTList scalarvts;
+      MVT::ValueType vecVT = MVT::v16i8;
+    
+      // Convert the loaded v16i8 vector to the appropriate vector type
+      // specified by the operand:
+      if (OpVT == VT) {
+        if (VT != MVT::i1)
+          vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
+      } else
+        vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
+
+      Ops[0] = the_chain;
+      Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
+      scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
+      result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
+      the_chain = result.getValue(1);
      } else {
-      c_offset = 0;
-      c_rotamt = -vtm->prefslot_byte;
-      ptrp = basep;
-    }
+      // Handle the sign and zero-extending loads for i1 and i8:
+      unsigned NewOpC;
  
-    if (alignment == 16) {
-      // 16-byte aligned load into preferred slot, no rotation
-      if (c_rotamt == 0) {
-       if (isMemoryOperand(ptrp))
-         // Return unchanged
-         return SDOperand();
-       else {
-         // Return modified D-Form address for pointer:
-         ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
-                            ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
-         if (VT == OpVT)
-           return DAG.getLoad(VT, LN->getChain(), ptrp,
-                              LN->getSrcValue(), LN->getSrcValueOffset(),
-                              LN->isVolatile(), 16);
-         else
-           return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
-                                 LN->getSrcValueOffset(), OpVT,
-                                 LN->isVolatile(), 16);
-       }
+      if (ExtType == ISD::SEXTLOAD) {
+        NewOpC = (OpVT == MVT::i1
+                  ? SPUISD::EXTRACT_I1_SEXT
+                  : SPUISD::EXTRACT_I8_SEXT);
        } else {
-       // Need to rotate...
-       if (c_rotamt < 0)
-         c_rotamt += 16;
-       // Realign the base pointer, with a D-Form address
-       if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
-         basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
-                             ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
-       else
-         basep = ptrp;
-
-       // Rotate the load:
-       rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
-                            LN->getSrcValue(), LN->getSrcValueOffset(),
-                            LN->isVolatile(), 16);
-       the_chain = rot_op.getValue(1);
-       rotamt = DAG.getConstant(c_rotamt, MVT::i16);
-
-       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
-       Ops[0] = the_chain;
-       Ops[1] = rot_op;
-       Ops[2] = rotamt;
-
-       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
-       the_chain = result.getValue(1);
-
-       if (VT == OpVT || ExtType == ISD::EXTLOAD) {
-         SDVTList scalarvts;
-         Ops[0] = the_chain;
-         Ops[1] = result;
-         if (OpVT == VT) {
-           scalarvts = DAG.getVTList(VT, MVT::Other);
-         } else {
-           scalarvts = DAG.getVTList(OpVT, MVT::Other);
-         }
-
-         result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
-                              result);
-         Ops[0] = the_chain;
-         Ops[1] = result;
-         result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
-         the_chain = result.getValue(1);
-       } else {
-         // Handle the sign and zero-extending loads for i1 and i8:
-         unsigned NewOpC;
-
-         if (ExtType == ISD::SEXTLOAD) {
-           NewOpC = (OpVT == MVT::i1
-                     ? SPUISD::EXTRACT_I1_SEXT
-                     : SPUISD::EXTRACT_I8_SEXT);
-         } else {
-      assert(ExtType == ISD::ZEXTLOAD);
-           NewOpC = (OpVT == MVT::i1
-                     ? SPUISD::EXTRACT_I1_ZEXT
-                     : SPUISD::EXTRACT_I8_ZEXT);
-         }
-
-         result = DAG.getNode(NewOpC, OpVT, result);
-       }
-
-       SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
-       SDOperand retops[2] = { result, the_chain };
-
-       result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
-       return result;
-       /*UNREACHED*/
-      }
-    } else {
-      // Misaligned 16-byte load:
-      if (basep.getOpcode() == ISD::LOAD) {
-       LN = cast<LoadSDNode>(basep);
-       if (LN->getAlignment() == 16) {
-         // We can verify that we're really loading from a 16-byte aligned
-         // chunk. Encapsulate basep as a D-Form address and return a new
-         // load:
-         basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
-                             DAG.getConstant(0, PtrVT));
-         if (OpVT == VT)
-           return DAG.getLoad(VT, LN->getChain(), basep,
-                              LN->getSrcValue(), LN->getSrcValueOffset(),
-                              LN->isVolatile(), 16);
-         else
-           return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
-                                 LN->getSrcValue(), LN->getSrcValueOffset(),
-                                 OpVT, LN->isVolatile(), 16);
-       }
+        assert(ExtType == ISD::ZEXTLOAD);
+        NewOpC = (OpVT == MVT::i1
+                  ? SPUISD::EXTRACT_I1_ZEXT
+                  : SPUISD::EXTRACT_I8_ZEXT);
        }
  
-      // Catch all other cases where we can't guarantee that we have a
-      // 16-byte aligned entity, which means resorting to an X-form
-      // address scheme:
-
-      SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
-      SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs);
-      SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs);
-
-      ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
-
-      SDOperand alignLoad =
-       DAG.getLoad(opVecVT, LN->getChain(), ptrp,
-                   LN->getSrcValue(), LN->getSrcValueOffset(),
-                   LN->isVolatile(), 16);
-
-      SDOperand insertEltOp =
-       DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
-
-      result = DAG.getNode(SPUISD::SHUFB, opVecVT,
-                          alignLoad,
-                          alignLoad,
-                          DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
-
-      result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
+      result = DAG.getNode(NewOpC, OpVT, result);
+    }
  
-      SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
-      SDOperand retops[2] = { result, the_chain };
+    SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
+    SDOperand retops[2] = { result, the_chain };
  
-      result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
-      return result;
-    }
-    break;
+    result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
+    return result;
    }
    case ISD::PRE_INC:
    case ISD::PRE_DEC:
@@ -664,58 +653,31 @@ LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    MVT::ValueType VT = Value.getValueType();
    MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
    MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  SDOperand the_chain = SN->getChain();
-  //unsigned alignment = SN->getAlignment();
-  //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
+  unsigned alignment = SN->getAlignment();
  
    switch (SN->getAddressingMode()) {
    case ISD::UNINDEXED: {
-    SDOperand basep = SN->getBasePtr();
-    SDOperand ptrOp;
-    int offset;
-
-    if (basep.getOpcode() == ISD::FrameIndex) {
-      // FrameIndex nodes are always properly aligned. Really.
-      return SDOperand();
-    }
-
-    if (basep.getOpcode() == ISD::ADD) {
-      const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
-      assert(CN != NULL
-             && "LowerSTORE: ISD::ADD operand 1 is not constant");
-      offset = unsigned(CN->getValue());
-      ptrOp = basep.getOperand(0);
-      DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
-                << offset
-                << "\n");
-    } else {
-      ptrOp = basep;
-      offset = 0;
-    }
+    int chunk_offset, slot_offset;
+    bool was16aligned;
  
      // The vector type we really want to load from the 16-byte chunk, except
      // in the case of MVT::i1, which has to be v16i8.
-    unsigned vecVT, stVecVT;
-
+    unsigned vecVT, stVecVT = MVT::v16i8;
+ 
      if (StVT != MVT::i1)
        stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
-    else
-      stVecVT = MVT::v16i8;
      vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
  
-    // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
-    // the actual dform addr offs($reg).
-    basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
-                        DAG.getConstant((offset & ~0xf), PtrVT));
+    SDOperand alignLoadVec =
+      AlignedLoad(Op, DAG, ST, SN, alignment,
+                  chunk_offset, slot_offset, VT, was16aligned);
  
-    // Create the 16-byte aligned vector load
-    SDOperand alignLoad =
-      DAG.getLoad(vecVT, the_chain, basep,
-                  SN->getSrcValue(), SN->getSrcValueOffset(),
-                  SN->isVolatile(), 16);
-    the_chain = alignLoad.getValue(1);
+    if (alignLoadVec.Val == 0)
+      return alignLoadVec;
  
-    LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
+    LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
+    SDOperand basePtr = LN->getBasePtr();
+    SDOperand the_chain = alignLoadVec.getValue(1);
      SDOperand theValue = SN->getValue();
      SDOperand result;
  
@@ -727,18 +689,34 @@ LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
        theValue = theValue.getOperand(0); 
      }
  
-    SDOperand insertEltOp =
-      DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
-                 DAG.getNode(SPUISD::DFormAddr, PtrVT,
-                             ptrOp,
-                             DAG.getConstant((offset & 0xf), PtrVT)));
+    chunk_offset &= 0xf;
+    chunk_offset /= (MVT::getSizeInBits(StVT == MVT::i1 ? (unsigned) MVT::i8 : StVT) / 8);
+
+    SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
+    SDOperand insertEltPtr;
+    SDOperand insertEltOp;
+
+    // If the base pointer is already a D-form address, then just create
+    // a new D-form address with a slot offset and the orignal base pointer.
+    // Otherwise generate a D-form address with the slot offset relative
+    // to the stack pointer, which is always aligned.
+    if (basePtr.getOpcode() == SPUISD::DFormAddr) {
+      insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
+                                basePtr.getOperand(0),
+                                insertEltOffs);
+    } else {
+      insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
+                                DAG.getRegister(SPU::R1, PtrVT),
+                                insertEltOffs);
+    }
  
+    insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
      result = DAG.getNode(SPUISD::SHUFB, vecVT,
                          DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
-                        alignLoad,
+                        alignLoadVec,
                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
  
-    result = DAG.getStore(the_chain, result, basep,
+    result = DAG.getStore(the_chain, result, basePtr,
                            LN->getSrcValue(), LN->getSrcValueOffset(),
                            LN->isVolatile(), LN->getAlignment());
  
@@ -767,19 +745,23 @@ LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
    Constant *C = CP->getConstVal();
    SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
-  const TargetMachine &TM = DAG.getTarget();
    SDOperand Zero = DAG.getConstant(0, PtrVT);
+  const TargetMachine &TM = DAG.getTarget();
  
    if (TM.getRelocationModel() == Reloc::Static) {
      if (!ST->usingLargeMem()) {
        // Just return the SDOperand with the constant pool address in it.
        return CPI;
      } else {
+#if 1
        // Generate hi/lo address pair
        SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
        SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
  
        return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
+#else
+      return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero);
+#endif
      }
    }
  
@@ -797,16 +779,9 @@ LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    const TargetMachine &TM = DAG.getTarget();
  
    if (TM.getRelocationModel() == Reloc::Static) {
-    if (!ST->usingLargeMem()) {
-      // Just return the SDOperand with the jump table address in it.
-      return JTI;
-    } else {
-      // Generate hi/lo address pair
-      SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
-      SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
-
-      return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
-    }
+    return (!ST->usingLargeMem()
+            ? JTI
+            : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero));
    }
  
    assert(0 &&
@@ -820,20 +795,13 @@ LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
    GlobalValue *GV = GSDN->getGlobal();
    SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
-  SDOperand Zero = DAG.getConstant(0, PtrVT);
    const TargetMachine &TM = DAG.getTarget();
+  SDOperand Zero = DAG.getConstant(0, PtrVT);
    
    if (TM.getRelocationModel() == Reloc::Static) {
-    if (!ST->usingLargeMem()) {
-      // Generate a local store address
-      return GA;
-    } else {
-      // Generate hi/lo address pair
-      SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
-      SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
-
-      return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
-    }
+    return (!ST->usingLargeMem()
+            ? GA
+            : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero));
    } else {
      cerr << "LowerGlobalAddress: Relocation model other than static not "
          << "supported.\n";
@@ -1074,7 +1042,7 @@ static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
  
  static
  SDOperand
-LowerCALL(SDOperand Op, SelectionDAG &DAG) {
+LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    SDOperand Chain = Op.getOperand(0);
  #if 0
    bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
@@ -1184,25 +1152,35 @@ LowerCALL(SDOperand Op, SelectionDAG &DAG) {
    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
      GlobalValue *GV = G->getGlobal();
      unsigned CalleeVT = Callee.getValueType();
+    SDOperand Zero = DAG.getConstant(0, PtrVT);
+    SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
  
-    // Turn calls to targets that are defined (i.e., have bodies) into BRSL
-    // style calls, otherwise, external symbols are BRASL calls.
-    // NOTE:
-    // This may be an unsafe assumption for JIT and really large compilation
-    // units.
-    if (GV->isDeclaration()) {
-      Callee = DAG.getGlobalAddress(GV, CalleeVT);
+    if (!ST->usingLargeMem()) {
+      // Turn calls to targets that are defined (i.e., have bodies) into BRSL
+      // style calls, otherwise, external symbols are BRASL calls. This assumes
+      // that declared/defined symbols are in the same compilation unit and can
+      // be reached through PC-relative jumps.
+      //
+      // NOTE:
+      // This may be an unsafe assumption for JIT and really large compilation
+      // units.
+      if (GV->isDeclaration()) {
+        Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
+      } else {
+        Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
+      }
      } else {
-      Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
-                           DAG.getTargetGlobalAddress(GV, CalleeVT),
-                           DAG.getConstant(0, PtrVT));
+      // "Large memory" mode: Turn all calls into indirect calls with a X-form
+      // address pairs:
+      Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero);
      }
    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
      Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
-  else if (SDNode *Dest = isLSAAddress(Callee, DAG))
+  else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
      // If this is an absolute destination address that appears to be a legal
      // local store address, use the munged value.
      Callee = SDOperand(Dest, 0);
+  }
  
    Ops.push_back(Chain);
    Ops.push_back(Callee);
@@ -2468,7 +2446,7 @@ SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
    case ISD::FORMAL_ARGUMENTS:
        return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
    case ISD::CALL:
-    return LowerCALL(Op, DAG);
+    return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
    case ISD::RET:
      return LowerRET(Op, DAG, getTargetMachine());
  
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h

index b15aed6c0cbdf1a9262a26b806f0bb0e7155e7b1..d9e4e7ed4ed0ccf56d15815fa347b5fa7b3bfc6c 100644 (file)
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -31,8 +31,9 @@ namespace llvm {
        Hi,                       ///< High address component (upper 16)
        Lo,                       ///< Low address component (lower 16)
        PCRelAddr,                ///< Program counter relative address
+      AFormAddr,                ///< A-form address (local store)
        DFormAddr,                ///< D-Form address "imm($r)"
-      XFormAddr,               ///< X-Form address "$r1($r2)"
+      XFormAddr,                ///< X-Form address "$r($r)"
  
        LDRESULT,                 ///< Load result (value, chain)
        CALL,                     ///< CALL instruction
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td

index 2a0eef7f091e2bd9bc8ff107945665ca5fe5da58..94aa390fe9dc212b2e873bc506f13a19012d54a7 100644 (file)
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -158,7 +158,7 @@ let isSimpleLoad = 1 in {
    def LQAr32:
        RI16Form<0b100001100, (outs R32C:$rT), (ins addr256k:$src),
          "lqa\t$rT, $src", LoadStore,
-        [(set R32C:$rT, (load aform_addr:$src))]>;
+        [(set R32C:$rT, (load  aform_addr:$src))]>;
  
    def LQAf32:
        RI16Form<0b100001100, (outs R32FP:$rT), (ins addr256k:$src),
@@ -610,6 +610,13 @@ def IOHLf32:
        RegConstraint<"$rS = $rT">,
        NoEncode<"$rS">;
  
+def IOHLlo:
+    RI16Form<0b100000110, (outs R32C:$rT), (ins R32C:$rS, symbolLo:$val),
+      "iohl\t$rT, $val", ImmLoad,
+      [/* no pattern */]>,
+      RegConstraint<"$rS = $rT">,
+      NoEncode<"$rS">;
+
  // Form select mask for bytes using immediate, used in conjunction with the
  // SELB instruction:
  
@@ -2367,12 +2374,12 @@ def ROTIr32_i8:
  // are used here for type checking (instances where ROTQBI is used actually
  // use vector registers)
  def ROTQBYvec:
-    RRForm<0b00111011100, (outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
+    RRForm<0b00111011100, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
        "rotqby\t$rT, $rA, $rB", RotateShift,
-      [(set (v16i8 VECREG:$rT), (SPUrotbytes_left (v16i8 VECREG:$rA), R16C:$rB))]>;
+      [(set (v16i8 VECREG:$rT), (SPUrotbytes_left (v16i8 VECREG:$rA), R32C:$rB))]>;
  
-def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), R16C:$rB),
-          (ROTQBYvec VECREG:$rA, R16C:$rB)>;
+def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), R32C:$rB),
+          (ROTQBYvec VECREG:$rA, R32C:$rB)>;
  
  // See ROTQBY note above.
  def ROTQBYIvec:
@@ -2720,12 +2727,12 @@ def CEQBv16i8:
      [/* no pattern to match: intrinsic */]>;
  
  def CEQBIr8:
-  RI10Form<0b01111110, (outs R8C:$rT), (ins R8C:$rA, s7imm:$val),
+  RI10Form<0b01111110, (outs R8C:$rT), (ins R8C:$rA, s7imm_i8:$val),
      "ceqbi\t$rT, $rA, $val", ByteOp,
      [/* no pattern to match: intrinsic */]>;
  
  def CEQBIv16i8:
-  RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, s7imm:$val),
+  RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, s7imm_i8:$val),
      "ceqbi\t$rT, $rA, $val", ByteOp,
      [/* no pattern to match: intrinsic */]>;
  
@@ -2793,7 +2800,7 @@ let isCall = 1,
    def BRASL:
      BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops),
        "brasl\t$$lr, $func",
-      [(SPUcall tglobaladdr:$func)]>;
+      [(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
  
    // Branch indirect and set link if external data. These instructions are not
    // actually generated, matched by an intrinsic:
@@ -3468,20 +3475,21 @@ def : Pat<(i32 (anyext R16C:$rSrc)),
  // low parts in order to load them into a register.
  //===----------------------------------------------------------------------===//
  
-def : Pat<(SPUhi    tglobaladdr:$in, 0), (ILHUhi tglobaladdr:$in)>;
-def : Pat<(SPUlo    tglobaladdr:$in, 0), (ILAlo  tglobaladdr:$in)>;
-def : Pat<(SPUdform tglobaladdr:$in, imm:$imm), (ILAlsa tglobaladdr:$in)>;
-def : Pat<(SPUhi    tconstpool:$in , 0), (ILHUhi tconstpool:$in)>;
-def : Pat<(SPUlo    tconstpool:$in , 0), (ILAlo  tconstpool:$in)>;
-def : Pat<(SPUdform tconstpool:$in, imm:$imm), (ILAlsa tconstpool:$in)>;
-def : Pat<(SPUhi    tjumptable:$in, 0),  (ILHUhi tjumptable:$in)>;
-def : Pat<(SPUlo    tjumptable:$in, 0),  (ILAlo tjumptable:$in)>;
-def : Pat<(SPUdform tjumptable:$in, imm:$imm),  (ILAlsa tjumptable:$in)>;
-
-// Force load of global address to a register. These forms show up in
-// SPUISD::DFormAddr pseudo instructions:
-def : Pat<(add tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>;
-def : Pat<(add tconstpool:$in, 0),  (ILAlsa tglobaladdr:$in)>;
-def : Pat<(add tjumptable:$in, 0),  (ILAlsa tglobaladdr:$in)>;
+def : Pat<(SPUhi    tglobaladdr:$in, 0),        (ILHUhi tglobaladdr:$in)>;
+def : Pat<(SPUlo    tglobaladdr:$in, 0),        (ILAlo  tglobaladdr:$in)>;
+def : Pat<(SPUaform tglobaladdr:$in, 0),        (ILAlsa tglobaladdr:$in)>;
+def : Pat<(SPUxform tglobaladdr:$in, 0),
+          (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+def : Pat<(SPUhi    tjumptable:$in, 0),         (ILHUhi tjumptable:$in)>;
+def : Pat<(SPUlo    tjumptable:$in, 0),         (ILAlo  tjumptable:$in)>;
+def : Pat<(SPUaform tjumptable:$in, 0),         (ILAlsa tjumptable:$in)>;
+def : Pat<(SPUxform tjumptable:$in, 0),
+          (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+def : Pat<(SPUhi    tconstpool:$in , 0),        (ILHUhi tconstpool:$in)>;
+def : Pat<(SPUlo    tconstpool:$in , 0),        (ILAlo  tconstpool:$in)>;
+def : Pat<(SPUaform tconstpool:$in, 0),         (ILAlsa tconstpool:$in)>;
+/* def : Pat<(SPUxform tconstpool:$in, 0),
+          (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; */
+
  // Instrinsics:
  include "CellSDKIntrinsics.td"
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td

index 4e78bee9550382865638915725f3e6889fe9aba2..ae513d226637fd3df9702678e21b94ae05b74857 100644 (file)
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -186,9 +186,15 @@ def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>;
  // PC-relative address
  def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>;
  
+// A-Form local store addresses
+def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
+
  // D-Form "imm($reg)" addresses
  def SPUdform : SDNode<"SPUISD::DFormAddr", SDTIntBinOp, []>;
  
+// X-Form "$reg($reg)" addresses
+def SPUxform : SDNode<"SPUISD::XFormAddr", SDTIntBinOp, []>;
+
  // SPU 32-bit sign-extension to 64-bits
  def SPUsext32_to_64: SDNode<"SPUISD::SEXT32TO64", SDTIntExtendOp, []>;
  
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td

index a9ca3c2f9717ca7ca20932b4d3a911dd9cd8599a..2a3551d4f64383c4e09955278943d0fdc82ab4c0 100644 (file)
--- a/lib/Target/CellSPU/SPUOperands.td
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -140,6 +140,17 @@ def imm18  : PatLeaf<(imm), [{
    return ((Value & ((1 << 19) - 1)) == Value);
  }]>;
  
+def lo16 : PatLeaf<(imm), [{
+  // hi16 predicate - returns true if the immediate has all zeros in the
+  // low order bits and is a 32-bit constant:
+  if (N->getValueType(0) == MVT::i32) {
+    uint32_t val = N->getValue();
+    return ((val & 0x0000ffff) == val);
+  }
+
+  return false;
+}], LO16>;
+
  def hi16 : PatLeaf<(imm), [{
    // hi16 predicate - returns true if the immediate has all zeros in the
    // low order bits and is a 32-bit constant:
@@ -411,7 +422,11 @@ def v2i64Imm: PatLeaf<(build_vector), [{
  //===----------------------------------------------------------------------===//
  // Operand Definitions.
  
-def s7imm: Operand<i16> {
+def s7imm: Operand<i8> {
+  let PrintMethod = "printS7ImmOperand";
+}
+
+def s7imm_i8: Operand<i8> {
    let PrintMethod = "printS7ImmOperand";
  }
  
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll

index f23355ee53c83176ce12d26a2b07a57052c9bfbc..6858dbabe6499ff67245ea970616322eb2f7cf82 100644 (file)
--- a/test/CodeGen/CellSPU/and_ops.ll
+++ b/test/CodeGen/CellSPU/and_ops.ll
@@ -4,6 +4,8 @@
  ; RUN: grep andi   %t1.s | count 36
  ; RUN: grep andhi  %t1.s | count 30
  ; RUN: grep andbi  %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  ; AND instruction generation:
  define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll

new file mode 100644 (file)

index 0000000..7aa8abc
--- /dev/null
+++ b/test/CodeGen/CellSPU/call_indirect.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep bisl    %t1.s | count 6 &&
+; RUN: grep ila     %t1.s | count 1 &&
+; RUN: grep rotqbyi %t1.s | count 4 &&
+; RUN: grep lqa     %t1.s | count 4 &&
+; RUN: grep lqd     %t1.s | count 6 &&
+; RUN: grep dispatch_tab %t1.s | count 10
+; ModuleID = 'call_indirect.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
+target triple = "spu-unknown-elf"
+
+@dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16
+
+define void @dispatcher(i32 %i_arg, float %f_arg) {
+entry:
+       %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16
+       tail call void %tmp2( i32 %i_arg, float %f_arg )
+       %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4
+       tail call void %tmp2.1( i32 %i_arg, float %f_arg )
+       %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4
+       tail call void %tmp2.2( i32 %i_arg, float %f_arg )
+       %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4
+       tail call void %tmp2.3( i32 %i_arg, float %f_arg )
+       %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4
+       tail call void %tmp2.4( i32 %i_arg, float %f_arg )
+       %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4
+       tail call void %tmp2.5( i32 %i_arg, float %f_arg )
+       ret void
+}
diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll

index 3e2bc64f4d8e0c482a57d3aad8bb0a8ef75dfee7..406a20accc87d079b8fd28860f0f2d67f8cb64ef 100644 (file)
--- a/test/CodeGen/CellSPU/ctpop.ll
+++ b/test/CodeGen/CellSPU/ctpop.ll
@@ -3,6 +3,8 @@
  ; RUN: grep andi    %t1.s | count 3 &&
  ; RUN: grep rotmi   %t1.s | count 2 &&
  ; RUN: grep rothmi  %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  declare i32 @llvm.ctpop.i8(i8)
  declare i32 @llvm.ctpop.i16(i16)
diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll

index 58c56e14705fa7dfb0184ecb55e2eff395d2d0d3..5cdb33ee6811200f733247a4b841334b1b425224 100644 (file)
--- a/test/CodeGen/CellSPU/dp_farith.ll
+++ b/test/CodeGen/CellSPU/dp_farith.ll
@@ -7,6 +7,8 @@
  ; RUN: grep dfnms  %t1.s | count 4
  ;
  ; This file includes double precision floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  define double @fadd(double %arg1, double %arg2) {
         %A = add double %arg1, %arg2
diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll

index a4d6dbbbd4ee00db1c455a553696c7aeb0dbf91d..0f02180b226f5a7588a79f45ce10967a3d44060a 100644 (file)
--- a/test/CodeGen/CellSPU/eqv.ll
+++ b/test/CodeGen/CellSPU/eqv.ll
@@ -10,6 +10,8 @@
  ; Alternatively, a ^ ~b, which the compiler will also match.
  
  ; ModuleID = 'eqv.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
         %A = and <4 x i32> %arg1, %arg2         ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll

index ab485a81fd333e3016915f43ce522f41ecb95dba..f9cc32e8f292c9cd3a93f4b5ba4e1f756e3fd8ce 100644 (file)
--- a/test/CodeGen/CellSPU/extract_elt.ll
+++ b/test/CodeGen/CellSPU/extract_elt.ll
@@ -5,6 +5,8 @@
  ; RUN: grep   lqx %t2.s | count 27 &&
  ; RUN: grep space %t1.s | count 8 &&
  ; RUN: grep  byte %t1.s | count 424
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  define i32 @i32_extract_0(<4 x i32> %v) {
  entry:
diff --git a/test/CodeGen/CellSPU/fcmp.ll b/test/CodeGen/CellSPU/fcmp.ll

index 8ae97e6ff59ebb3eda79a5b2d3a798642651bcf1..f4406d63dfba080df02ef19d0a833bf1f47f7a35 100644 (file)
--- a/test/CodeGen/CellSPU/fcmp.ll
+++ b/test/CodeGen/CellSPU/fcmp.ll
@@ -3,6 +3,8 @@
  ; RUN: grep fcmeq %t1.s | count 1
  ;
  ; This file includes standard floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  declare double @fabs(double)
  declare float @fabsf(float)
diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll

index d55b12b9f51acd8c84595d08e8a46f33ea382905..a107bbe1f739c99289ddef4725b565c1ff94fcf5 100644 (file)
--- a/test/CodeGen/CellSPU/fdiv.ll
+++ b/test/CodeGen/CellSPU/fdiv.ll
@@ -6,6 +6,8 @@
  ; RUN: grep fnms     %t1.s | count 2
  ;
  ; This file includes standard floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  define float @fdiv32(float %arg1, float %arg2) {
         %A = fdiv float %arg1,  %arg2
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll

index 1abdcf6a34d9fc3d439f72845633343aa9d0847c..a183483cded48c9069dfa95c6ce97f15031789c4 100644 (file)
--- a/test/CodeGen/CellSPU/fneg-fabs.ll
+++ b/test/CodeGen/CellSPU/fneg-fabs.ll
@@ -4,6 +4,8 @@
  ; RUN: grep xor     %t1.s | count 4 &&
  ; RUN: grep and     %t1.s | count 5 &&
  ; RUN: grep andbi   %t1.s | count 3
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  define double @fneg_dp(double %X) {
         %Y = sub double -0.000000e+00, %X
diff --git a/test/CodeGen/CellSPU/immed16.ll b/test/CodeGen/CellSPU/immed16.ll

index 19cabc4e94c5095d65a6f0bc679bc4eb7c555a54..603ec058e9794c9cd7511a49382865ce5f96fdc6 100644 (file)
--- a/test/CodeGen/CellSPU/immed16.ll
+++ b/test/CodeGen/CellSPU/immed16.ll
@@ -1,5 +1,7 @@
  ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
  ; RUN: grep "ilh" %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  define i16 @test_1() {
    %x = alloca i16, align 16
diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll

index 6a5a36156197de15686a5fdc97080c41847e289b..4bf5bbd517ab9095dc392cbc6c65b7fa2e88a446 100644 (file)
--- a/test/CodeGen/CellSPU/immed32.ll
+++ b/test/CodeGen/CellSPU/immed32.ll
@@ -12,6 +12,8 @@
  ; RUN: grep 49077 %t1.s | count 1 &&
  ; RUN: grep  1267 %t1.s | count 2 &&
  ; RUN: grep 16309 %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  define i32 @test_1() {
    ret i32 4784128              ;; ILHU via pattern (0x49000)
diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll

index c4eec8ba81e10d7042b5be5316f101880093bc93..4d388b1d223355b2e38ea63205358d6008496fc5 100644 (file)
--- a/test/CodeGen/CellSPU/immed64.ll
+++ b/test/CodeGen/CellSPU/immed64.ll
@@ -11,6 +11,9 @@
  ; RUN: grep      128 %t1.s | count 30 &&
  ; RUN: grep      224 %t1.s | count 2
  
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
  ;  1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202)
  ; 18446744073709551591 => 0x ffffffff ffffffe7 (-25)
  ; 18446744073708516742 => 0x ffffffff fff03586 (-1034874)
diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll

index 95a498428ec1ad5ae24aa2121de1d4c81993276b..b4cfea8a0b4e2bfde9678ec4ec5a8e24f9105a0c 100644 (file)
--- a/test/CodeGen/CellSPU/int2fp.ll
+++ b/test/CodeGen/CellSPU/int2fp.ll
@@ -7,6 +7,9 @@
  ; RUN: grep andi  %t1.s | count 1 &&
  ; RUN: grep ila   %t1.s | count 1
  
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
  define float @sitofp_i32(i32 %arg1) {
         %A = sitofp i32 %arg1 to float          ; <float> [#uses=1]
         ret float %A
diff --git a/test/CodeGen/CellSPU/intrinsics_branch.ll b/test/CodeGen/CellSPU/intrinsics_branch.ll

new file mode 100644 (file)

index 0000000..5051cd5
--- /dev/null
+++ b/test/CodeGen/CellSPU/intrinsics_branch.ll
@@ -0,0 +1,150 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep ceq     %t1.s | count 30 &&
+; RUN: grep ceqb    %t1.s | count 10 &&
+; RUN: grep ceqhi   %t1.s | count 5 &&
+; RUN: grep ceqi    %t1.s | count 5 &&
+; RUN: grep cgt     %t1.s | count 30 &&
+; RUN: grep cgtb    %t1.s | count 10 &&
+; RUN: grep cgthi   %t1.s | count 5 &&
+; RUN: grep cgti    %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8)
+
+
+
+define <4 x i32> @test(<4 x i32> %A) {
+       call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+       %Y = bitcast <4 x i32> %1 to <4 x i32>
+       ret <4 x i32> %Y
+}
+
+define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) {
+       call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B)
+       %Y = bitcast <4 x i32> %1 to <4 x i32>
+       ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) {
+       call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B)
+       %Y = bitcast <8 x i16> %1 to <8 x i16>
+       ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) {
+       call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B)
+       %Y = bitcast <16 x i8> %1 to <16 x i8>
+       ret <16 x i8> %Y
+}
+
+define <4 x i32> @ceqitest(<4 x i32> %A) {
+       call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65)
+       %Y = bitcast <4 x i32> %1 to <4 x i32>
+       ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhitest(<8 x i16> %A) {
+       call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65)
+       %Y = bitcast <8 x i16> %1 to <8 x i16>
+       ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbitest(<16 x i8> %A) {
+       call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65)
+       %Y = bitcast <16 x i8> %1 to <16 x i8>
+       ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) {
+       call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B)
+       %Y = bitcast <4 x i32> %1 to <4 x i32>
+       ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) {
+       call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B)
+       %Y = bitcast <8 x i16> %1 to <8 x i16>
+       ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) {
+       call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B)
+       %Y = bitcast <16 x i8> %1 to <16 x i8>
+       ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgtitest(<4 x i32> %A) {
+       call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65)
+       %Y = bitcast <4 x i32> %1 to <4 x i32>
+       ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthitest(<8 x i16> %A) {
+       call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65)
+       %Y = bitcast <8 x i16> %1 to <8 x i16>
+       ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbitest(<16 x i8> %A) {
+       call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65)
+       %Y = bitcast <16 x i8> %1 to <16 x i8>
+       ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) {
+       call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B)
+       %Y = bitcast <4 x i32> %1 to <4 x i32>
+       ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) {
+       call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B)
+       %Y = bitcast <8 x i16> %1 to <8 x i16>
+       ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) {
+       call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B)
+       %Y = bitcast <16 x i8> %1 to <16 x i8>
+       ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgtitest(<4 x i32> %A) {
+       call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65)
+       %Y = bitcast <4 x i32> %1 to <4 x i32>
+       ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthitest(<8 x i16> %A) {
+       call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65)
+       %Y = bitcast <8 x i16> %1 to <8 x i16>
+       ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbitest(<16 x i8> %A) {
+       call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65)
+       %Y = bitcast <16 x i8> %1 to <16 x i8>
+       ret <16 x i8> %Y
+}
diff --git a/test/CodeGen/CellSPU/intrinsics_float.ll b/test/CodeGen/CellSPU/intrinsics_float.ll

new file mode 100644 (file)

index 0000000..f5a192a
--- /dev/null
+++ b/test/CodeGen/CellSPU/intrinsics_float.ll
@@ -0,0 +1,94 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep fa      %t1.s | count 5 &&
+; RUN: grep fs      %t1.s | count 5 &&
+; RUN: grep fm      %t1.s | count 15 &&
+; RUN: grep fceq    %t1.s | count 5 &&
+; RUN: grep fcmeq   %t1.s | count 5 &&
+; RUN: grep fcgt    %t1.s | count 5 &&
+; RUN: grep fcmgt   %t1.s | count 5 &&
+; RUN: grep fma     %t1.s | count 5 &&
+; RUN: grep fnms    %t1.s | count 5 &&
+; RUN: grep fms     %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>)
+
+define <4 x i32> @test(<4 x i32> %A) {
+       call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+       %Y = bitcast <4 x i32> %1 to <4 x i32>
+       ret <4 x i32> %Y
+}
+
+define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) {
+       call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B)
+       %Y = bitcast <4 x float> %1 to <4 x float>
+       ret <4 x float> %Y
+}
+
+define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) {
+       call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B)
+       %Y = bitcast <4 x float> %1 to <4 x float>
+       ret <4 x float> %Y
+}
+
+define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) {
+       call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B)
+       %Y = bitcast <4 x float> %1 to <4 x float>
+       ret <4 x float> %Y
+}
+
+define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) {
+       call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B)
+       %Y = bitcast <4 x float> %1 to <4 x float>
+       ret <4 x float> %Y
+}
+
+define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) {
+       call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B)
+       %Y = bitcast <4 x float> %1 to <4 x float>
+       ret <4 x float> %Y
+}
+
+define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) {
+       call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B)
+       %Y = bitcast <4 x float> %1 to <4 x float>
+       ret <4 x float> %Y
+}
+
+define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+       call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+       %Y = bitcast <4 x float> %1 to <4 x float>
+       ret <4 x float> %Y
+}
+
+define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+       call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+       %Y = bitcast <4 x float> %1 to <4 x float>
+       ret <4 x float> %Y
+}
+
+define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+       call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+       %Y = bitcast <4 x float> %1 to <4 x float>
+       ret <4 x float> %Y
+}
+\ No newline at end of file
diff --git a/test/CodeGen/CellSPU/intrinsics_logical.ll b/test/CodeGen/CellSPU/intrinsics_logical.ll

new file mode 100644 (file)

index 0000000..e43558c
--- /dev/null
+++ b/test/CodeGen/CellSPU/intrinsics_logical.ll
@@ -0,0 +1,49 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep and       %t1.s | count 20 &&
+; RUN: grep andc      %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) {
+       call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B)
+       %Y = bitcast <4 x i32> %1 to <4 x i32>
+       ret <4 x i32> %Y
+}
+
+define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) {
+        call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <4 x i32> @anditest(<4 x i32> %A) {
+       call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65)
+       %Y = bitcast <4 x i32> %1 to <4 x i32>
+       ret <4 x i32> %Y
+}
+
+define <8 x i16> @andhitest(<8 x i16> %A) {
+       call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65)
+       %Y = bitcast <8 x i16> %1 to <8 x i16>
+       ret <8 x i16> %Y
+}
diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll

index 091f4b2edcc38868e7462c7b105e80844c7ab638..841a3ec54d6f23085a21e36bbfde093c77ea9e1f 100644 (file)
--- a/test/CodeGen/CellSPU/nand.ll
+++ b/test/CodeGen/CellSPU/nand.ll
@@ -3,6 +3,8 @@
  ; RUN: grep and    %t1.s | count 94
  ; RUN: grep xsbh   %t1.s | count 2
  ; RUN: grep xshw   %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
          %A = and <4 x i32> %arg2, %arg1      ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll

index 6c46b413871ac39acd5865e1ce3030218ffc2058..91e3e2145ab59597f5939f7ae33c40c2a1b76b7e 100644 (file)
--- a/test/CodeGen/CellSPU/or_ops.ll
+++ b/test/CodeGen/CellSPU/or_ops.ll
@@ -4,6 +4,8 @@
  ; RUN: grep ori    %t1.s | count 30
  ; RUN: grep orhi   %t1.s | count 30
  ; RUN: grep orbi   %t1.s | count 15
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  ; OR instruction generation:
  define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll

index 6983c184c3c2097431a5d2216aff5858249ceedd..0386838a555a25d0bfe88f4c369bc657e7fcbce8 100644 (file)
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -8,6 +8,8 @@
  ; RUN grep rothi.*,.3    %t1.s | count 1
  ; RUN: grep andhi        %t1.s | count 4
  ; RUN: grep shlhi        %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  ; Vector rotates are not currently supported in gcc or llvm assembly. These are
  ; not tested.
diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll

index 3cbb7a06dc7ad118d427dd5484ac8b16ed7f0e5f..b1600bf8f2b844fd203dcd8263dc2d6e718efead 100644 (file)
--- a/test/CodeGen/CellSPU/select_bits.ll
+++ b/test/CodeGen/CellSPU/select_bits.ll
@@ -3,6 +3,8 @@
  ; RUN: grep and    %t1.s | count 2
  ; RUN: grep xsbh   %t1.s | count 1
  ; RUN: grep xshw   %t1.s | count 2
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  define <16 x i8> @selb_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
         %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll

index 162ca16776b900457d65fa560eaa9ad6bb01a266..4256d91fdb3c2b26e1b48c544b84e56310995078 100644 (file)
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -5,6 +5,8 @@
  ; RUN: grep shli   %t1.s | count 51
  ; RUN: grep xshw   %t1.s | count 5
  ; RUN: grep and    %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  ; Vector shifts are not currently supported in gcc or llvm assembly. These are
  ; not tested.
diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll

index c7e719982d6b731b16fb01c1f43f8465c818da49..473e9a3ecec90824b68b0b46aa489bc02c5b497d 100644 (file)
--- a/test/CodeGen/CellSPU/sp_farith.ll
+++ b/test/CodeGen/CellSPU/sp_farith.ll
@@ -8,6 +8,8 @@
  ;
  ; This file includes standard floating point arithmetic instructions
  ; NOTE fdiv is tested separately since it is a compound operation
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
  
  define float @fp_add(float %arg1, float %arg2) {
         %A = add float %arg1, %arg2     ; <float> [#uses=1]
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll

new file mode 100644 (file)

index 0000000..1159b55
--- /dev/null
+++ b/test/CodeGen/CellSPU/struct_1.ll
@@ -0,0 +1,107 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep lqa     %t1.s | count 10 &&
+; RUN: grep lqd     %t1.s | count 2 &&
+; RUN: grep rotqbyi %t1.s | count 5 &&
+; RUN: grep xshw    %t1.s | count 1 &&
+; RUN: grep andi    %t1.s | count 4 &&
+; RUN: grep cbd     %t1.s | count 3 &&
+; RUN: grep chd     %t1.s | count 1 &&
+; RUN: grep cwd     %t1.s | count 1 &&
+; RUN: grep shufb   %t1.s | count 5 &&
+; RUN: grep stqa    %t1.s | count 5
+; ModuleID = 'struct_1.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; struct hackstate {
+;   unsigned char c1;   // offset 0 (rotate left by 13 bytes to byte 3)
+;   unsigned char c2;   // offset 1 (rotate left by 14 bytes to byte 3)
+;   unsigned char c3;   // offset 2 (rotate left by 15 bytes to byte 3)
+;   int           i1;   // offset 4 (rotate left by 4 bytes to byte 0)
+;   short         s1;   // offset 8 (rotate left by 6 bytes to byte 2)
+;   int           i2;   // offset 12 [ignored]
+;   unsigned char c4;   // offset 16 [ignored]
+;   unsigned char c5;   // offset 17 [ignored]
+;   unsigned char c6;   // offset 18 [ignored]
+;   unsigned char c7;   // offset 19 (no rotate, in preferred slot)
+;   int           i3;   // offset 20 [ignored]
+;   int           i4;   // offset 24 [ignored]
+;   int           i5;   // offset 28 [ignored]
+;   int           i6;   // offset 32 (no rotate, in preferred slot)
+; }
+%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 }
+
+; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+@state = global %struct.hackstate zeroinitializer, align 16
+
+define i8 @get_hackstate_c1() zeroext  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+        ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c2() zeroext  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+        ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c3() zeroext  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+        ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i1() {
+entry:
+        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+        ret i32 %tmp2
+}
+
+define i16 @get_hackstate_s1() signext  {
+entry:
+        %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+        ret i16 %tmp2
+}
+
+define i8 @get_hackstate_c7() zeroext  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16
+        ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i6() zeroext  {
+entry:
+        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
+        ret i32 %tmp2
+}
+
+define void @set_hackstate_c1(i8 zeroext  %c) {
+entry:
+        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+        ret void
+}
+
+define void @set_hackstate_c2(i8 zeroext  %c) {
+entry:
+        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+        ret void
+}
+
+define void @set_hackstate_c3(i8 zeroext  %c) {
+entry:
+        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+        ret void
+}
+
+define void @set_hackstate_i1(i32 %i) {
+entry:
+        store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+        ret void
+}
+
+define void @set_hackstate_s1(i16 signext  %s) {
+entry:
+        store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+        ret void
+}
author	Scott Michel <scottm@aero.org>
	Fri, 11 Jan 2008 02:53:15 +0000 (02:53 +0000)
committer	Scott Michel <scottm@aero.org>
	Fri, 11 Jan 2008 02:53:15 +0000 (02:53 +0000)
include/llvm/CodeGen/SelectionDAGNodes.h		patch \| blob \| history
include/llvm/IntrinsicsCellSPU.td		patch \| blob \| history
lib/Target/CellSPU/CellSDKIntrinsics.td		patch \| blob \| history
lib/Target/CellSPU/SPUISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/CellSPU/SPUISelLowering.cpp		patch \| blob \| history
lib/Target/CellSPU/SPUISelLowering.h		patch \| blob \| history
lib/Target/CellSPU/SPUInstrInfo.td		patch \| blob \| history
lib/Target/CellSPU/SPUNodes.td		patch \| blob \| history
lib/Target/CellSPU/SPUOperands.td		patch \| blob \| history
test/CodeGen/CellSPU/and_ops.ll		patch \| blob \| history
test/CodeGen/CellSPU/call_indirect.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/CellSPU/ctpop.ll		patch \| blob \| history
test/CodeGen/CellSPU/dp_farith.ll		patch \| blob \| history
test/CodeGen/CellSPU/eqv.ll		patch \| blob \| history
test/CodeGen/CellSPU/extract_elt.ll		patch \| blob \| history
test/CodeGen/CellSPU/fcmp.ll		patch \| blob \| history
test/CodeGen/CellSPU/fdiv.ll		patch \| blob \| history
test/CodeGen/CellSPU/fneg-fabs.ll		patch \| blob \| history
test/CodeGen/CellSPU/immed16.ll		patch \| blob \| history
test/CodeGen/CellSPU/immed32.ll		patch \| blob \| history
test/CodeGen/CellSPU/immed64.ll		patch \| blob \| history
test/CodeGen/CellSPU/int2fp.ll		patch \| blob \| history
test/CodeGen/CellSPU/intrinsics_branch.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/CellSPU/intrinsics_float.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/CellSPU/intrinsics_logical.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/CellSPU/nand.ll		patch \| blob \| history
test/CodeGen/CellSPU/or_ops.ll		patch \| blob \| history
test/CodeGen/CellSPU/rotate_ops.ll		patch \| blob \| history
test/CodeGen/CellSPU/select_bits.ll		patch \| blob \| history
test/CodeGen/CellSPU/shift_ops.ll		patch \| blob \| history
test/CodeGen/CellSPU/sp_farith.ll		patch \| blob \| history
test/CodeGen/CellSPU/struct_1.ll	[new file with mode: 0644]	patch \| blob