[x86] promote 'add nsw' to a wider type to allow more combines

author Sanjay Patel <spatel@rotateright.com>

Fri, 16 Oct 2015 22:14:12 +0000 (22:14 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Fri, 16 Oct 2015 22:14:12 +0000 (22:14 +0000)
author Sanjay Patel <spatel@rotateright.com>
Fri, 16 Oct 2015 22:14:12 +0000 (22:14 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Fri, 16 Oct 2015 22:14:12 +0000 (22:14 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index e48d98a3f4185b236e9ea350c39e80c4fcfdaaf5..1794c1097cce088362ad0e6ff437da4c6b4ee233 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -25767,6 +25767,57 @@ static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
    return SDValue();
  }
  
+/// sext(add_nsw(x, C)) --> add(sext(x), C_sext)
+/// Promoting a sign extension ahead of an 'add nsw' exposes opportunities
+/// to combine math ops, use an LEA, or use a complex addressing mode. This can
+/// eliminate extend, add, and shift instructions.
+static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG,
+                                       const X86Subtarget *Subtarget) {
+  // TODO: This should be valid for other integer types.
+  EVT VT = Sext->getValueType(0);
+  if (VT != MVT::i64)
+    return SDValue();
+
+  // We need an 'add nsw' feeding into the 'sext'.
+  SDValue Add = Sext->getOperand(0);
+  if (Add.getOpcode() != ISD::ADD || !Add->getFlags()->hasNoSignedWrap())
+    return SDValue();
+
+  // Having a constant operand to the 'add' ensures that we are not increasing
+  // the instruction count because the constant is extended for free below.
+  // A constant operand can also become the displacement field of an LEA.
+  auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1));
+  if (!AddOp1)
+    return SDValue();
+
+  // Don't make the 'add' bigger if there's no hope of combining it with some
+  // other 'add' or 'shl' instruction.
+  // TODO: It may be profitable to generate simpler LEA instructions in place
+  // of single 'add' instructions, but the cost model for selecting an LEA
+  // currently has a high threshold.
+  bool HasLEAPotential = false;
+  for (auto *User : Sext->uses()) {
+    if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) {
+      HasLEAPotential = true;
+      break;
+    }
+  }
+  if (!HasLEAPotential)
+    return SDValue();
+
+  // Everything looks good, so pull the 'sext' ahead of the 'add'.
+  int64_t AddConstant = AddOp1->getSExtValue();
+  SDValue AddOp0 = Add.getOperand(0);
+  SDValue NewSext = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Sext), VT, AddOp0);
+  SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT);
+
+  // The wider add is guaranteed to not wrap because both operands are
+  // sign-extended.
+  SDNodeFlags Flags;
+  Flags.setNoSignedWrap(true);
+  return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewSext, NewConstant, &Flags);
+}
+
  static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    const X86Subtarget *Subtarget) {
@@ -25861,6 +25912,9 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
      if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
        return R;
  
+  if (SDValue NewAdd = promoteSextBeforeAddNSW(N, DAG, Subtarget))
+    return NewAdd;
+
    return SDValue();
  }
  
diff --git a/test/CodeGen/X86/add-nsw-sext.ll b/test/CodeGen/X86/add-nsw-sext.ll

index 1c2558d60e29998155eacb64745bf75a5f86544b..c062f56090f5b11b9669c24201d581016798ea39 100644 (file)
--- a/test/CodeGen/X86/add-nsw-sext.ll
+++ b/test/CodeGen/X86/add-nsw-sext.ll
@@ -1,15 +1,14 @@
  ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
  
  ; The fundamental problem: an add separated from other arithmetic by a sext can't
-; be combined with the later instructions. However, if the first add is 'nsw', 
+; be combined with the later instructions. However, if the first add is 'nsw',
  ; then we can promote the sext ahead of that add to allow optimizations.
  
  define i64 @add_nsw_consts(i32 %i) {
  ; CHECK-LABEL: add_nsw_consts:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    addl $5, %edi
  ; CHECK-NEXT:    movslq %edi, %rax
-; CHECK-NEXT:    addq $7, %rax
+; CHECK-NEXT:    addq $12, %rax
  ; CHECK-NEXT:    retq
  
    %add = add nsw i32 %i, 5
@@ -24,9 +23,8 @@ define i64 @add_nsw_consts(i32 %i) {
  define i64 @add_nsw_sext_add(i32 %i, i64 %x) {
  ; CHECK-LABEL: add_nsw_sext_add:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    addl $5, %edi
  ; CHECK-NEXT:    movslq %edi, %rax
-; CHECK-NEXT:    addq %rsi, %rax
+; CHECK-NEXT:    leaq 5(%rax,%rsi), %rax
  ; CHECK-NEXT:    retq
  
    %add = add nsw i32 %i, 5
@@ -41,9 +39,8 @@ define i64 @add_nsw_sext_add(i32 %i, i64 %x) {
  define i64 @add_nsw_sext_lsh_add(i32 %i, i64 %x) {
  ; CHECK-LABEL: add_nsw_sext_lsh_add:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    addl $-5, %edi
  ; CHECK-NEXT:    movslq %edi, %rax
-; CHECK-NEXT:    leaq (%rsi,%rax,8), %rax
+; CHECK-NEXT:    leaq -40(%rsi,%rax,8), %rax
  ; CHECK-NEXT:    retq
  
    %add = add nsw i32 %i, -5
@@ -73,9 +70,8 @@ define i64 @add_nsw_sext(i32 %i, i64 %x) {
  define i8* @gep8(i32 %i, i8* %x) {
  ; CHECK-LABEL: gep8:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    addl $5, %edi
  ; CHECK-NEXT:    movslq %edi, %rax
-; CHECK-NEXT:    addq %rsi, %rax
+; CHECK-NEXT:    leaq 5(%rax,%rsi), %rax
  ; CHECK-NEXT:    retq
  
    %add = add nsw i32 %i, 5
@@ -87,9 +83,8 @@ define i8* @gep8(i32 %i, i8* %x) {
  define i16* @gep16(i32 %i, i16* %x) {
  ; CHECK-LABEL: gep16:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    addl $-5, %edi
  ; CHECK-NEXT:    movslq %edi, %rax
-; CHECK-NEXT:    leaq (%rsi,%rax,2), %rax
+; CHECK-NEXT:    leaq -10(%rsi,%rax,2), %rax
  ; CHECK-NEXT:    retq
  
    %add = add nsw i32 %i, -5
@@ -101,9 +96,8 @@ define i16* @gep16(i32 %i, i16* %x) {
  define i32* @gep32(i32 %i, i32* %x) {
  ; CHECK-LABEL: gep32:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    addl $5, %edi
  ; CHECK-NEXT:    movslq %edi, %rax
-; CHECK-NEXT:    leaq (%rsi,%rax,4), %rax
+; CHECK-NEXT:    leaq 20(%rsi,%rax,4), %rax
  ; CHECK-NEXT:    retq
  
    %add = add nsw i32 %i, 5
@@ -115,9 +109,8 @@ define i32* @gep32(i32 %i, i32* %x) {
  define i64* @gep64(i32 %i, i64* %x) {
  ; CHECK-LABEL: gep64:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    addl $-5, %edi
  ; CHECK-NEXT:    movslq %edi, %rax
-; CHECK-NEXT:    leaq (%rsi,%rax,8), %rax
+; CHECK-NEXT:    leaq -40(%rsi,%rax,8), %rax
  ; CHECK-NEXT:    retq
  
    %add = add nsw i32 %i, -5
@@ -131,10 +124,9 @@ define i64* @gep64(i32 %i, i64* %x) {
  define i128* @gep128(i32 %i, i128* %x) {
  ; CHECK-LABEL: gep128:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    addl $5, %edi
  ; CHECK-NEXT:    movslq %edi, %rax
  ; CHECK-NEXT:    shlq $4, %rax
-; CHECK-NEXT:    addq %rsi, %rax
+; CHECK-NEXT:    leaq 80(%rax,%rsi), %rax
  ; CHECK-NEXT:    retq
  
    %add = add nsw i32 %i, 5
@@ -150,14 +142,10 @@ define i128* @gep128(i32 %i, i128* %x) {
  define void @PR20134(i32* %a, i32 %i) {
  ; CHECK-LABEL: PR20134:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    leal 1(%rsi), %eax
-; CHECK-NEXT:    cltq
-; CHECK-NEXT:    movl (%rdi,%rax,4), %eax
-; CHECK-NEXT:    leal 2(%rsi), %ecx
-; CHECK-NEXT:    movslq %ecx, %rcx
-; CHECK-NEXT:    addl (%rdi,%rcx,4), %eax
-; CHECK-NEXT:    movslq %esi, %rcx
-; CHECK-NEXT:    movl %eax, (%rdi,%rcx,4)
+; CHECK-NEXT:    movslq %esi, %rax
+; CHECK-NEXT:    movl 4(%rdi,%rax,4), %ecx
+; CHECK-NEXT:    addl 8(%rdi,%rax,4), %ecx
+; CHECK-NEXT:    movl %ecx, (%rdi,%rax,4)
  ; CHECK-NEXT:    retq
  
    %add1 = add nsw i32 %i, 1
author	Sanjay Patel <spatel@rotateright.com>
	Fri, 16 Oct 2015 22:14:12 +0000 (22:14 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Fri, 16 Oct 2015 22:14:12 +0000 (22:14 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/add-nsw-sext.ll		patch \| blob \| history