Reapply "[Reassociate] Add initial support for vector instructions."

author Robert Lougher <rob.lougher@gmail.com>

Fri, 13 Mar 2015 20:53:01 +0000 (20:53 +0000)

committer Robert Lougher <rob.lougher@gmail.com>

Fri, 13 Mar 2015 20:53:01 +0000 (20:53 +0000)
author Robert Lougher <rob.lougher@gmail.com>
Fri, 13 Mar 2015 20:53:01 +0000 (20:53 +0000)
committer Robert Lougher <rob.lougher@gmail.com>
Fri, 13 Mar 2015 20:53:01 +0000 (20:53 +0000)
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp

index 98016b40c5643d1baaabec293a37511e3b4aab85..307cc73d991cf9a88a8a102ba7633e49ae8cebeb 100644 (file)
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -321,10 +321,8 @@ unsigned Reassociate::getRank(Value *V) {
  
    // If this is a not or neg instruction, do not count it for rank.  This
    // assures us that X and ~X will have the same rank.
-  Type *Ty = V->getType();
-  if ((!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) ||
-      (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
-       !BinaryOperator::isFNeg(I)))
+  if  (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
+       !BinaryOperator::isFNeg(I))
      ++Rank;
  
    DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n");
@@ -351,7 +349,7 @@ void Reassociate::canonicalizeOperands(Instruction *I) {
  
  static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
                                   Instruction *InsertBefore, Value *FlagsOp) {
-  if (S1->getType()->isIntegerTy())
+  if (S1->getType()->isIntOrIntVectorTy())
      return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
    else {
      BinaryOperator *Res =
@@ -363,7 +361,7 @@ static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
  
  static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
                                   Instruction *InsertBefore, Value *FlagsOp) {
-  if (S1->getType()->isIntegerTy())
+  if (S1->getType()->isIntOrIntVectorTy())
      return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
    else {
      BinaryOperator *Res =
@@ -375,7 +373,7 @@ static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
  
  static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
                                   Instruction *InsertBefore, Value *FlagsOp) {
-  if (S1->getType()->isIntegerTy())
+  if (S1->getType()->isIntOrIntVectorTy())
      return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
    else {
      BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore);
@@ -388,8 +386,8 @@ static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
  ///
  static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
    Type *Ty = Neg->getType();
-  Constant *NegOne = Ty->isIntegerTy() ? ConstantInt::getAllOnesValue(Ty)
-                                       : ConstantFP::get(Ty, -1.0);
+  Constant *NegOne = Ty->isIntOrIntVectorTy() ?
+    ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
  
    BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg);
    Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
@@ -872,7 +870,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
        Constant *Undef = UndefValue::get(I->getType());
        NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
                                       Undef, Undef, "", I);
-      if (NewOp->getType()->isFloatingPointTy())
+      if (NewOp->getType()->isFPOrFPVectorTy())
          NewOp->setFastMathFlags(I->getFastMathFlags());
      } else {
        NewOp = NodesToRewrite.pop_back_val();
@@ -1520,8 +1518,8 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
  
        // Insert a new multiply.
        Type *Ty = TheOp->getType();
-      Constant *C = Ty->isIntegerTy() ? ConstantInt::get(Ty, NumFound)
-                                      : ConstantFP::get(Ty, NumFound);
+      Constant *C = Ty->isIntOrIntVectorTy() ?
+        ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
        Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
  
        // Now that we have inserted a multiply, optimize it. This allows us to
@@ -1661,7 +1659,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
      // from an expression will drop a use of maxocc, and this can cause
      // RemoveFactorFromExpression on successive values to behave differently.
      Instruction *DummyInst =
-        I->getType()->isIntegerTy()
+        I->getType()->isIntOrIntVectorTy()
              ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
              : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
  
@@ -1792,7 +1790,7 @@ static Value *buildMultiplyTree(IRBuilder<> &Builder,
  
    Value *LHS = Ops.pop_back_val();
    do {
-    if (LHS->getType()->isIntegerTy())
+    if (LHS->getType()->isIntOrIntVectorTy())
        LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
      else
        LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
@@ -2090,8 +2088,9 @@ void Reassociate::OptimizeInst(Instruction *I) {
    if (I->isCommutative())
      canonicalizeOperands(I);
  
-  // Don't optimize vector instructions.
-  if (I->getType()->isVectorTy())
+  // TODO: We should optimize vector Xor instructions, but they are
+  // currently unsupported.
+  if (I->getType()->isVectorTy() && I->getOpcode() == Instruction::Xor)
      return;
  
    // Don't optimize floating point instructions that don't have unsafe algebra.
@@ -2170,9 +2169,6 @@ void Reassociate::OptimizeInst(Instruction *I) {
  }
  
  void Reassociate::ReassociateExpression(BinaryOperator *I) {
-  assert(!I->getType()->isVectorTy() &&
-         "Reassociation of vector instructions is not supported.");
-
    // First, walk the expression tree, linearizing the tree, collecting the
    // operand information.
    SmallVector<RepeatedValue, 8> Tree;
diff --git a/test/Transforms/Reassociate/fast-ReassociateVector.ll b/test/Transforms/Reassociate/fast-ReassociateVector.ll

index eeae096bf9449436df4066f3528a51dbb5e0e371..9fbb5ccfe9a25ad4ea85c949f1144408e59a06f3 100644 (file)
--- a/test/Transforms/Reassociate/fast-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/fast-ReassociateVector.ll
@@ -1,46 +1,192 @@
  ; RUN: opt < %s -reassociate -S | FileCheck %s
  
-; Canonicalize operands, but don't optimize floating point vector operations.
-define <4 x float> @test1() {
-; CHECK-LABEL: test1
-; CHECK-NEXT: %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer
-; CHECK-NEXT: %tmp2 = fmul fast <4 x float> %tmp1, zeroinitializer
-
-  %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer
-  %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1
-  ret <4 x float> %tmp2
-}
-
-; Commute integer vector operations.
-define <2 x i32> @test2(<2 x i32> %x, <2 x i32> %y) {
-; CHECK-LABEL: test2
-; CHECK-NEXT: %tmp1 = add <2 x i32> %x, %y
-; CHECK-NEXT: %tmp2 = add <2 x i32> %x, %y
-; CHECK-NEXT: %tmp3 = add <2 x i32> %tmp1, %tmp2
-
-  %tmp1 = add <2 x i32> %x, %y
-  %tmp2 = add <2 x i32> %y, %x
-  %tmp3 = add <2 x i32> %tmp1, %tmp2
-  ret <2 x i32> %tmp3
+; Check that a*c+b*c is turned into (a+b)*c
+define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT: %tmp = fadd fast <4 x float> %b, %a
+; CHECK-NEXT: %tmp1 = fmul fast <4 x float> %tmp, %c
+; CHECK-NEXT: ret <4 x float> %tmp1
+
+  %mul = fmul fast <4 x float> %a, %c
+  %mul1 = fmul fast <4 x float> %b, %c
+  %add = fadd fast <4 x float> %mul, %mul1
+  ret <4 x float> %add
  }
  
-define <2 x i32> @test3(<2 x i32> %x, <2 x i32> %y) {
-; CHECK-LABEL: test3
-; CHECK-NEXT: %tmp1 = mul <2 x i32> %x, %y
-; CHECK-NEXT: %tmp2 = mul <2 x i32> %x, %y
-; CHECK-NEXT: %tmp3 = mul <2 x i32> %tmp1, %tmp2
+; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
+define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: fadd fast <2 x float> %c, %b
+; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2
+; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a
+; CHECK-NEXT: ret <2 x float>
  
-  %tmp1 = mul <2 x i32> %x, %y
-  %tmp2 = mul <2 x i32> %y, %x
-  %tmp3 = mul <2 x i32> %tmp1, %tmp2
-  ret <2 x i32> %tmp3
+  %t0 = fmul fast <2 x float> %a, %b
+  %t1 = fmul fast <2 x float> %a, %t0
+  %t2 = fmul fast <2 x float> %a, %c
+  %t3 = fmul fast <2 x float> %a, %t2
+  %t4 = fadd fast <2 x float> %t1, %t3
+  ret <2 x float> %t4
+}
+
+; Check that a*b+a*c+d is turned into a*(b+c)+d.
+define <2 x double> @test3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
+; CHECK-LABEL: @test3
+; CHECK-NEXT: fadd fast <2 x double> %c, %b
+; CHECK-NEXT: fmul fast <2 x double> %tmp, %a
+; CHECK-NEXT: fadd fast <2 x double> %tmp1, %d
+; CHECK-NEXT: ret <2 x double>
+
+  %t0 = fmul fast <2 x double> %a, %b
+  %t1 = fmul fast <2 x double> %a, %c
+  %t2 = fadd fast <2 x double> %t1, %d
+  %t3 = fadd fast <2 x double> %t0, %t2
+  ret <2 x double> %t3
+}
+
+; No fast-math.
+define <2 x float> @test4(<2 x float> %A) {
+; CHECK-LABEL: @test4
+; CHECK-NEXT: %X = fadd <2 x float> %A, <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: %Y = fadd <2 x float> %A, <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: %R = fsub <2 x float> %X, %Y
+; CHECK-NEXT: ret <2 x float> %R
+
+  %X = fadd <2 x float> %A, < float 1.000000e+00, float 1.000000e+00 >
+  %Y = fadd <2 x float> %A, < float 1.000000e+00, float 1.000000e+00 >
+  %R = fsub <2 x float> %X, %Y
+  ret <2 x float> %R
+}
+
+; Check 47*X + 47*X -> 94*X.
+define <2 x float> @test5(<2 x float> %X) {
+; CHECK-LABEL: @test5
+; CHECK-NEXT: fmul fast <2 x float> %X, <float 9.400000e+01, float 9.400000e+01>
+; CHECK-NEXT: ret <2 x float>
+
+  %Y = fmul fast <2 x float> %X, <float 4.700000e+01, float 4.700000e+01>
+  %Z = fadd fast <2 x float> %Y, %Y
+  ret <2 x float> %Z
+}
+
+; Check X+X+X -> 3*X.
+define <2 x float> @test6(<2 x float> %X) {
+; CHECK-LABEL: @test6
+; CHECK-NEXT: fmul fast <2 x float> %X, <float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT: ret <2 x float>
+
+  %Y = fadd fast <2 x float> %X ,%X
+  %Z = fadd fast <2 x float> %Y, %X
+  ret <2 x float> %Z
+}
+
+; Check 127*W+50*W -> 177*W.
+define <2 x double> @test7(<2 x double> %W) {
+; CHECK-LABEL: @test7
+; CHECK-NEXT: fmul fast <2 x double> %W, <double 1.770000e+02, double 1.770000e+02>
+; CHECK-NEXT: ret <2 x double>
+
+  %X = fmul fast <2 x double> %W, <double 127.0, double 127.0>
+  %Y = fmul fast <2 x double> %W, <double 50.0, double 50.0>
+  %Z = fadd fast <2 x double> %Y, %X
+  ret <2 x double> %Z
+}
+
+; Check X*12*12 -> X*144.
+define <2 x float> @test8(<2 x float> %arg) {
+; CHECK-LABEL: @test8
+; CHECK: fmul fast <2 x float> %arg, <float 1.440000e+02, float 1.440000e+02>
+; CHECK-NEXT: ret <2 x float> %tmp2
+
+  %tmp1 = fmul fast <2 x float> <float 1.200000e+01, float 1.200000e+01>, %arg
+  %tmp2 = fmul fast <2 x float> %tmp1, <float 1.200000e+01, float 1.200000e+01>
+  ret <2 x float> %tmp2
+}
+
+; Check (b+(a+1234))+-a -> b+1234.
+define <2 x double> @test9(<2 x double> %b, <2 x double> %a) {
+; CHECK-LABEL: @test9
+; CHECK: fadd fast <2 x double> %b, <double 1.234000e+03, double 1.234000e+03>
+; CHECK-NEXT: ret <2 x double>
+
+  %1 = fadd fast <2 x double> %a, <double 1.234000e+03, double 1.234000e+03>
+  %2 = fadd fast <2 x double> %b, %1
+  %3 = fsub fast <2 x double> <double 0.000000e+00, double 0.000000e+00>, %a
+  %4 = fadd fast <2 x double> %2, %3
+  ret <2 x double> %4
+}
+
+; Check -(-(z*40)*a) -> a*40*z.
+define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
+; CHECK-LABEL: @test10
+; CHECK: fmul fast <2 x float> %a, <float 4.000000e+01, float 4.000000e+01>
+; CHECK-NEXT: fmul fast <2 x float> %e, %z
+; CHECK-NEXT: ret <2 x float>
+
+  %d = fmul fast <2 x float> %z, <float 4.000000e+01, float 4.000000e+01>
+  %c = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %d
+  %e = fmul fast <2 x float> %a, %c
+  %f = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %e
+  ret <2 x float> %f
+}
+
+; Check x*y+y*x -> x*y*2.
+define <2 x double> @test11(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: @test11
+; CHECK-NEXT: %factor = fmul fast <2 x double> %y, <double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %x
+; CHECK-NEXT: ret <2 x double> %tmp1
+
+  %1 = fmul fast <2 x double> %x, %y
+  %2 = fmul fast <2 x double> %y, %x
+  %3 = fadd fast <2 x double> %1, %2
+  ret <2 x double> %3
+}
+
+; FIXME: shifts should be converted to mul to assist further reassociation.
+define <2 x i64> @test12(<2 x i64> %b, <2 x i64> %c) {
+; CHECK-LABEL: @test12
+; CHECK-NEXT:  %mul = mul <2 x i64> %c, %b
+; CHECK-NEXT:  %shl = shl <2 x i64> %mul, <i64 5, i64 5>
+; CHECK-NEXT:  ret <2 x i64> %shl
+
+  %mul = mul <2 x i64> %c, %b
+  %shl = shl <2 x i64> %mul, <i64 5, i64 5>
+  ret <2 x i64> %shl
+}
+
+; FIXME: expressions with a negative const should be canonicalized to assist
+; further reassociation.
+; We would expect (-5*b)+a -> a-(5*b) but only the constant operand is commuted.
+define <4 x float> @test13(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test13
+; CHECK-NEXT:  %mul = fmul fast <4 x float> %b, <float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00>
+; CHECK-NEXT:  %add = fadd fast <4 x float> %mul, %a
+; CHECK-NEXT:  ret <4 x float> %add
+
+  %mul = fmul fast <4 x float> <float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00>, %b
+  %add = fadd fast <4 x float> %mul, %a
+  ret <4 x float> %add
+}
+
+; Break up subtract to assist further reassociation.
+; Check a+b-c -> a+b+-c.
+define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
+; CHECK-LABEL: @test14
+; CHECK-NEXT: %add = add <2 x i64> %b, %a
+; CHECK-NEXT: %c.neg = sub <2 x i64> zeroinitializer, %c
+; CHECK-NEXT: %sub = add <2 x i64> %add, %c.neg
+; CHECK-NEXT: ret <2 x i64> %sub
+
+  %add = add <2 x i64> %b, %a
+  %sub = sub <2 x i64> %add, %c
+  ret <2 x i64> %sub
  }
  
-define <2 x i32> @test4(<2 x i32> %x, <2 x i32> %y) {
-; CHECK-LABEL: test4
-; CHECK-NEXT: %tmp1 = and <2 x i32> %x, %y
-; CHECK-NEXT: %tmp2 = and <2 x i32> %x, %y
-; CHECK-NEXT: %tmp3 = and <2 x i32> %tmp1, %tmp2
+define <2 x i32> @test15(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test15
+; CHECK-NEXT: %tmp3 = and <2 x i32> %y, %x
+; CHECK-NEXT: ret <2 x i32> %tmp3
  
    %tmp1 = and <2 x i32> %x, %y
    %tmp2 = and <2 x i32> %y, %x
@@ -48,11 +194,10 @@ define <2 x i32> @test4(<2 x i32> %x, <2 x i32> %y) {
    ret <2 x i32> %tmp3
  }
  
-define <2 x i32> @test5(<2 x i32> %x, <2 x i32> %y) {
-; CHECK-LABEL: test5
-; CHECK-NEXT: %tmp1 = or <2 x i32> %x, %y
-; CHECK-NEXT: %tmp2 = or <2 x i32> %x, %y
-; CHECK-NEXT: %tmp3 = or <2 x i32> %tmp1, %tmp2
+define <2 x i32> @test16(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test16
+; CHECK-NEXT: %tmp3 = or <2 x i32> %y, %x
+; CHECK-NEXT: ret <2 x i32> %tmp3
  
    %tmp1 = or <2 x i32> %x, %y
    %tmp2 = or <2 x i32> %y, %x
@@ -60,8 +205,9 @@ define <2 x i32> @test5(<2 x i32> %x, <2 x i32> %y) {
    ret <2 x i32> %tmp3
  }
  
-define <2 x i32> @test6(<2 x i32> %x, <2 x i32> %y) {
-; CHECK-LABEL: test6
+; FIXME: Optimize vector xor.  Currently only commute operands.
+define <2 x i32> @test17(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test17
  ; CHECK-NEXT: %tmp1 = xor <2 x i32> %x, %y
  ; CHECK-NEXT: %tmp2 = xor <2 x i32> %x, %y
  ; CHECK-NEXT: %tmp3 = xor <2 x i32> %tmp1, %tmp2
author	Robert Lougher <rob.lougher@gmail.com>
	Fri, 13 Mar 2015 20:53:01 +0000 (20:53 +0000)
committer	Robert Lougher <rob.lougher@gmail.com>
	Fri, 13 Mar 2015 20:53:01 +0000 (20:53 +0000)
lib/Transforms/Scalar/Reassociate.cpp		patch \| blob \| history
test/Transforms/Reassociate/fast-ReassociateVector.ll		patch \| blob \| history