From f7ea6c3ee89e605c8d0bb7cdb0ade79706c750e8 Mon Sep 17 00:00:00 2001
From: Mon P Wang <wangmp@apple.com>
Date: Wed, 10 Feb 2010 23:37:45 +0000
Subject: [PATCH] The previous fix of widening divides that trap was too
 fragile as it depends on custom lowering and requires that certain types
 exist in ValueTypes.h.  Modified widening to check if an op can trap and if
 so, the widening algorithm will apply only the op on the defined elements. 
 It is safer to do this in widening because the optimizer can't guarantee
 removing unused ops in some cases.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95823 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Target/TargetLowering.h          |  5 +
 .../SelectionDAG/LegalizeVectorTypes.cpp      | 92 ++++++++++++++++++-
 lib/CodeGen/SelectionDAG/TargetLowering.cpp   | 18 ++++
 lib/Target/X86/X86ISelLowering.cpp            | 21 -----
 test/CodeGen/X86/scalar_widen_div.ll          | 29 ++++++
 5 files changed, 141 insertions(+), 24 deletions(-)

diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index e172edf4dd7..c6ac89a8ac1 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -346,6 +346,11 @@ public:
     return true;
   }
 
+  /// canOpTrap - Returns true if the operation can trap for the value type.
+  /// VT must be a legal type. By default, we optimistically assume most
+  /// operations don't trap except for divide and remainder.
+  virtual bool canOpTrap(unsigned Op, EVT VT) const;
+
   /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
   /// used by Targets can use this to indicate if there is a suitable
   /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bf95bb532f8..e955e10619e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1242,10 +1242,96 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
 
 SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   // Binary op widening.
+  unsigned Opcode = N->getOpcode();
+  DebugLoc dl = N->getDebugLoc();
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
-  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
-  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2);
+  EVT WidenEltVT = WidenVT.getVectorElementType();
+  EVT VT = WidenVT;
+  unsigned NumElts =  VT.getVectorNumElements();
+  while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+     NumElts = NumElts / 2;
+     VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+  }
+
+  if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+    // Operation doesn't trap so just widen as normal.
+    SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+    SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+    return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+  } else if (NumElts == 1) {
+    // No legal vector version so unroll the vector operation and then widen.
+    return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+  } else {
+    // Since the operation can trap, apply operation on the original vector.
+    SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+    SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+    unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+    SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+    unsigned ConcatEnd = 0;  // Current ConcatOps index.
+    unsigned Idx = 0;        // Current Idx into input vectors.
+    while (CurNumElts != 0) {
+      while (CurNumElts >= NumElts) {
+        SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+                                   DAG.getIntPtrConstant(Idx));
+        SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+                                   DAG.getIntPtrConstant(Idx));
+        ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+        Idx += NumElts;
+        CurNumElts -= NumElts;
+      }
+      EVT PrevVecVT = VT;
+      do {
+        NumElts = NumElts / 2;
+        VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+      } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+      if (NumElts == 1) {
+        // Since we are using concat vector, build a vector from the scalar ops.
+        SDValue VecOp = DAG.getUNDEF(PrevVecVT);
+        for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+          SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 
+                                     InOp1, DAG.getIntPtrConstant(Idx));
+          SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 
+                                     InOp2, DAG.getIntPtrConstant(Idx));
+          VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp,
+                              DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2),
+                              DAG.getIntPtrConstant(i));
+        }
+        CurNumElts = 0;
+        ConcatOps[ConcatEnd++] = VecOp;
+      }
+    }
+
+    // Check to see if we have a single operation with the widen type.
+    if (ConcatEnd == 1) {
+      VT = ConcatOps[0].getValueType();
+      if (VT == WidenVT)
+        return ConcatOps[0];
+    }
+
+    // Rebuild vector to one with the widen type
+    Idx = ConcatEnd - 1;
+    while (Idx != 0) {
+      VT = ConcatOps[Idx--].getValueType();
+      while (Idx != 0 && ConcatOps[Idx].getValueType() == VT)
+        --Idx;
+      if (Idx != 0) {
+        VT = ConcatOps[Idx].getValueType();
+        ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+                                     &ConcatOps[Idx+1], ConcatEnd - Idx - 1);
+        ConcatEnd = Idx + 2;
+      }
+    }
+    
+    unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements();
+    if (NumOps != ConcatEnd ) {
+      SDValue UndefVal = DAG.getUNDEF(VT);
+      for (unsigned j = ConcatEnd; j < NumOps; ++j)
+        ConcatOps[j] = UndefVal;
+    }
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
+  }
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d74ec7e2e5d..1683d01bbab 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -540,6 +540,24 @@ TargetLowering::~TargetLowering() {
   delete &TLOF;
 }
 
+/// canOpTrap - Returns true if the operation can trap for the value type.
+/// VT must be a legal type.
+bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
+  assert(isTypeLegal(VT));
+  switch (Op) {
+  default:
+    return false;
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM:
+    return true;
+  }
+}
+
+
 static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
                                        unsigned &NumIntermediates,
                                        EVT &RegisterVT,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 515bc84dadb..a0f8e96f264 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1001,19 +1001,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   computeRegisterProperties();
 
-  // Divide and reminder operations have no vector equivalent and can
-  // trap. Do a custom widening for these operations in which we never
-  // generate more divides/remainder than the original vector width.
-  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
-    if (!isTypeLegal((MVT::SimpleValueType)VT)) {
-      setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom);
-      setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom);
-      setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom);
-      setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom);
-    }
-  }
-
   // FIXME: These should be based on subtarget info. Plus, the values should
   // be smaller when we are in optimizing for size mode.
   maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
@@ -7572,14 +7559,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(edx.getValue(1));
     return;
   }
-  case ISD::SDIV:
-  case ISD::UDIV:
-  case ISD::SREM:
-  case ISD::UREM: {
-    EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-    Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()));
-    return;
-  }
   case ISD::ATOMIC_CMP_SWAP: {
     EVT T = N->getValueType(0);
     assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll
index fc67e4417c4..77f320f1056 100644
--- a/test/CodeGen/X86/scalar_widen_div.ll
+++ b/test/CodeGen/X86/scalar_widen_div.ll
@@ -152,3 +152,32 @@ define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
   %rem.r = urem <5 x i64> %num, %rem
   ret <5 x i64>  %rem.r
 }
+
+define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
+; CHECK: ret
+entry:
+  %cmp13 = icmp sgt i32 %n, 0
+  br i1 %cmp13, label %bb.nph, label %for.end
+
+bb.nph:  
+  br label %for.body
+
+for.body:
+  %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] 
+  %arrayidx11 = getelementptr <3 x i32>* %dest, i32 %i.014
+  %tmp4 = load <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1]
+  %arrayidx7 = getelementptr inbounds <3 x i32>* %old, i32 %i.014
+  %tmp8 = load <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1]
+  %div = sdiv <3 x i32> %tmp4, %tmp8
+  store <3 x i32> %div, <3 x i32>* %arrayidx11
+  %inc = add nsw i32 %i.014, 1
+  %exitcond = icmp eq i32 %inc, %n 
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
-- 
2.34.1