From 915432ca1306d10453c9eb523cbc4b257642f62a Mon Sep 17 00:00:00 2001
From: Daniel Sanders <daniel.sanders@imgtec.com>
Date: Mon, 23 Sep 2013 13:22:24 +0000
Subject: [PATCH] [mips][msa] Added support for matching nor from normal IR
 (i.e. not intrinsics)

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191195 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/MipsISelLowering.cpp   |  1 +
 lib/Target/Mips/MipsISelLowering.h     |  3 ++
 lib/Target/Mips/MipsMSAInstrInfo.td    | 16 +++++-
 lib/Target/Mips/MipsSEISelLowering.cpp | 50 ++++++++++++++++++-
 test/CodeGen/Mips/msa/bitwise.ll       | 68 ++++++++++++++++++++++++++
 5 files changed, 135 insertions(+), 3 deletions(-)

diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 21c5edb39cb..443653f5f98 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -214,6 +214,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::VANY_NONZERO:      return "MipsISD::VANY_NONZERO";
   case MipsISD::VSPLAT:            return "MipsISD::VSPLAT";
   case MipsISD::VSPLATD:           return "MipsISD::VSPLATD";
+  case MipsISD::VNOR:              return "MipsISD::VNOR";
   default:                         return NULL;
   }
 }
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 57b5603ac2d..b56ac41955c 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -164,6 +164,9 @@ namespace llvm {
       // constant, and the operand fits in a signed 10-bits value.
       VSPLATD,
 
+      // Combined (XOR (OR $a, $b), -1)
+      VNOR,
+
       // Load/Store Left/Right nodes.
       LWL = ISD::FIRST_TARGET_MEMORY_OPCODE,
       LWR,
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index b7bc357117e..9783256a3fb 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -20,6 +20,8 @@ def MipsVAllZero : SDNode<"MipsISD::VALL_ZERO", SDT_MipsVecCond>;
 def MipsVAnyZero : SDNode<"MipsISD::VANY_ZERO", SDT_MipsVecCond>;
 def MipsVSplat  : SDNode<"MipsISD::VSPLAT", SDT_MipsSplat>;
 def MipsVSplatD : SDNode<"MipsISD::VSPLATD", SDT_MipsSplat>;
+def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp,
+                      [SDNPCommutative, SDNPAssociative]>;
 
 def vsplati8  : PatFrag<(ops node:$in), (v16i8 (MipsVSplat (i32 node:$in)))>;
 def vsplati16 : PatFrag<(ops node:$in), (v8i16 (MipsVSplat (i32 node:$in)))>;
@@ -1729,7 +1731,10 @@ class NLZC_H_DESC : MSA_2R_DESC_BASE<"nlzc.h", ctlz, MSA128H>;
 class NLZC_W_DESC : MSA_2R_DESC_BASE<"nlzc.w", ctlz, MSA128W>;
 class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", ctlz, MSA128D>;
 
-class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", int_mips_nor_v, MSA128B>;
+class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", MipsVNOR, MSA128B>;
+class NOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<MipsVNOR, MSA128H>;
+class NOR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<MipsVNOR, MSA128W>;
+class NOR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<MipsVNOR, MSA128D>;
 
 class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", int_mips_nori_b, MSA128B>;
 
@@ -2498,6 +2503,15 @@ def NLZC_W : NLZC_W_ENC, NLZC_W_DESC;
 def NLZC_D : NLZC_D_ENC, NLZC_D_DESC;
 
 def NOR_V : NOR_V_ENC, NOR_V_DESC;
+def NOR_V_H_PSEUDO : NOR_V_H_PSEUDO_DESC,
+                     PseudoInstExpansion<(NOR_V MSA128B:$wd,
+                                                MSA128B:$ws, MSA128B:$wt)>;
+def NOR_V_W_PSEUDO : NOR_V_W_PSEUDO_DESC,
+                     PseudoInstExpansion<(NOR_V MSA128B:$wd,
+                                                MSA128B:$ws, MSA128B:$wt)>;
+def NOR_V_D_PSEUDO : NOR_V_D_PSEUDO_DESC,
+                     PseudoInstExpansion<(NOR_V MSA128B:$wd,
+                                                MSA128B:$ws, MSA128B:$wt)>;
 
 def NORI_B : NORI_B_ENC, NORI_B_DESC;
 
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 9532c9d7275..ca3e06c8153 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -90,6 +90,8 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
     addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass);
     addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass);
     addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
+
+    setTargetDAGCombine(ISD::XOR);
   }
 
   if (!Subtarget->mipsSEUsesSoftFloat()) {
@@ -567,6 +569,44 @@ static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
                      N->getOperand(2), SetCC.getOperand(2));
 }
 
+static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
+                                 const MipsSubtarget *Subtarget) {
+  EVT Ty = N->getValueType(0);
+
+  if (Subtarget->hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
+    // Try the following combines:
+    //   (xor (or $a, $b), (build_vector allones))
+    //   (xor (or $a, $b), (bitcast (build_vector allones)))
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 = N->getOperand(1);
+    SDValue NotOp;
+    ConstantSDNode *Const;
+
+    if (ISD::isBuildVectorAllOnes(Op0.getNode()))
+      NotOp = Op1;
+    else if (ISD::isBuildVectorAllOnes(Op1.getNode()))
+      NotOp = Op0;
+    else if ((Op0->getOpcode() == MipsISD::VSPLAT ||
+              Op0->getOpcode() == MipsISD::VSPLATD) &&
+             (Const = dyn_cast<ConstantSDNode>(Op0->getOperand(0))) &&
+             Const->isAllOnesValue())
+      NotOp = Op1;
+    else if ((Op1->getOpcode() == MipsISD::VSPLAT ||
+              Op1->getOpcode() == MipsISD::VSPLATD) &&
+             (Const = dyn_cast<ConstantSDNode>(Op1->getOperand(0))) &&
+             Const->isAllOnesValue())
+      NotOp = Op0;
+    else
+      return SDValue();
+
+    if (NotOp->getOpcode() == ISD::OR)
+      return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0),
+                         NotOp->getOperand(1));
+  }
+
+  return SDValue();
+}
+
 SDValue
 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -587,11 +627,13 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
     return performSRLCombine(N, DAG, DCI, Subtarget);
   case ISD::VSELECT:
     return performVSELECTCombine(N, DAG);
-  case ISD::SETCC: {
+  case ISD::XOR:
+    Val = performXORCombine(N, DAG, Subtarget);
+    break;
+  case ISD::SETCC:
     Val = performSETCCCombine(N, DAG);
     break;
   }
-  }
 
   if (Val.getNode())
     return Val;
@@ -964,6 +1006,10 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_nlzc_w:
   case Intrinsic::mips_nlzc_d:
     return lowerMSAUnaryIntr(Op, DAG, ISD::CTLZ);
+  case Intrinsic::mips_nor_v: {
+    SDValue Res = lowerMSABinaryIntr(Op, DAG, ISD::OR);
+    return DAG.getNOT(SDLoc(Op), Res, Res->getValueType(0));
+  }
   case Intrinsic::mips_or_v:
     return lowerMSABinaryIntr(Op, DAG, ISD::OR);
   case Intrinsic::mips_sll_b:
diff --git a/test/CodeGen/Mips/msa/bitwise.ll b/test/CodeGen/Mips/msa/bitwise.ll
index 90b9589d9ba..6a428099b63 100644
--- a/test/CodeGen/Mips/msa/bitwise.ll
+++ b/test/CodeGen/Mips/msa/bitwise.ll
@@ -128,6 +128,74 @@ define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: .size or_v2i64
 }
 
+define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: nor_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <16 x i8> %1, %2
+  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v16i8
+}
+
+define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: nor_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <8 x i16> %1, %2
+  %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v8i16
+}
+
+define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: nor_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <4 x i32> %1, %2
+  %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
+  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v4i32
+}
+
+define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: nor_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <2 x i64> %1, %2
+  %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
+  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v2i64
+}
+
 define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: xor_v16i8:
 
-- 
2.34.1