AArch64/ARM64: make use of ANDS and BICS instructions for comparisons.

author Tim Northover <tnorthover@apple.com>

Tue, 22 Apr 2014 12:45:42 +0000 (12:45 +0000)

committer Tim Northover <tnorthover@apple.com>

Tue, 22 Apr 2014 12:45:42 +0000 (12:45 +0000)
author Tim Northover <tnorthover@apple.com>
Tue, 22 Apr 2014 12:45:42 +0000 (12:45 +0000)
committer Tim Northover <tnorthover@apple.com>
Tue, 22 Apr 2014 12:45:42 +0000 (12:45 +0000)
diff --git a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp b/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp

index e082baf8ae43780ce452beb4c69231fd0c2c1b00..7928c7e586e3896f4d7c37834bd40f001d99e707 100644 (file)
--- a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp
+++ b/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp
@@ -580,6 +580,10 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
    case ARM64::ANDXrr:
    case ARM64::BICWrr:
    case ARM64::BICXrr:
+  case ARM64::ANDSWrr:
+  case ARM64::ANDSXrr:
+  case ARM64::BICSWrr:
+  case ARM64::BICSXrr:
    case ARM64::EONWrr:
    case ARM64::EONXrr:
    case ARM64::EORWrr:
@@ -604,6 +608,10 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
      case ARM64::ANDXrr:      Opcode = ARM64::ANDXrs; break;
      case ARM64::BICWrr:      Opcode = ARM64::BICWrs; break;
      case ARM64::BICXrr:      Opcode = ARM64::BICXrs; break;
+    case ARM64::ANDSWrr:     Opcode = ARM64::ANDSWrs; break;
+    case ARM64::ANDSXrr:     Opcode = ARM64::ANDSXrs; break;
+    case ARM64::BICSWrr:     Opcode = ARM64::BICSWrs; break;
+    case ARM64::BICSXrr:     Opcode = ARM64::BICSXrs; break;
      case ARM64::EONWrr:      Opcode = ARM64::EONWrs; break;
      case ARM64::EONXrr:      Opcode = ARM64::EONXrs; break;
      case ARM64::EORWrr:      Opcode = ARM64::EORWrs; break;
diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp

index 5aa3a3652d0f02e65cf87bee9c23817aaaf87264..9ff9567ac281b5b24cd3067669986103bf47dd64 100644 (file)
--- a/lib/Target/ARM64/ARM64ISelLowering.cpp
+++ b/lib/Target/ARM64/ARM64ISelLowering.cpp
@@ -918,23 +918,32 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
    // SUBS means that it's possible to get CSE with subtract operations.
    // A later phase can perform the optimization of setting the destination
    // register to WZR/XZR if it ends up being unused.
-
-  // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on the
-  // grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags can be
-  // set differently by this operation. It comes down to whether "SInt(~op2)+1
-  // == SInt(~op2+1)" (and the same for UInt). If they are then everything is
-  // fine. If not then the optimization is wrong. Thus general comparisons are
-  // only valid if op2 != 0.
-
-  // So, finally, the only LLVM-native comparisons that don't mention C and V
-  // are SETEQ and SETNE. They're the only ones we can safely use CMN for in the
-  // absence of information about op2.
    unsigned Opcode = ARM64ISD::SUBS;
+
    if (RHS.getOpcode() == ISD::SUB && isa<ConstantSDNode>(RHS.getOperand(0)) &&
        cast<ConstantSDNode>(RHS.getOperand(0))->getZExtValue() == 0 &&
        (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+    // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
+    // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
+    // can be set differently by this operation. It comes down to whether
+    // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
+    // everything is fine. If not then the optimization is wrong. Thus general
+    // comparisons are only valid if op2 != 0.
+
+    // So, finally, the only LLVM-native comparisons that don't mention C and V
+    // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
+    // the absence of information about op2.
      Opcode = ARM64ISD::ADDS;
      RHS = RHS.getOperand(1);
+  } else if (LHS.getOpcode() == ISD::AND && isa<ConstantSDNode>(RHS) &&
+             cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
+             !isUnsignedIntSetCC(CC)) {
+    // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
+    // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
+    // of the signed comparisons.
+    Opcode = ARM64ISD::ANDS;
+    RHS = LHS.getOperand(1);
+    LHS = LHS.getOperand(0);
    }
  
    return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS)
diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td

index 696b15fbf3f9060c316c8820438e10fc317988b1..e6239fcbc5c451e50da944e8cda2e71fcf4c858e 100644 (file)
--- a/lib/Target/ARM64/ARM64InstrFormats.td
+++ b/lib/Target/ARM64/ARM64InstrFormats.td
@@ -1798,12 +1798,18 @@ multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
  }
  
  // Split from LogicalReg to allow setting CPSR Defs
-multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic> {
+multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic,
+                       SDPatternOperator OpNode = null_frag> {
    let Defs = [CPSR], mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-  def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic, []>{
+  def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>;
+  def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
+
+  def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic,
+            [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_shifted_reg32:$Rm))]> {
      let Inst{31} = 0;
    }
-  def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic, []>{
+  def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic,
+            [(set GPR64:$Rd, (OpNode GPR64:$Rn, logical_shifted_reg64:$Rm))]> {
      let Inst{31} = 1;
    }
    } // Defs = [CPSR]
diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td

index 1d894eff14a66c28775260148dc1cbf2a477aeac..9cfb38f48f00ce658f710254bdf65115f1c3f295 100644 (file)
--- a/lib/Target/ARM64/ARM64InstrInfo.td
+++ b/lib/Target/ARM64/ARM64InstrInfo.td
@@ -125,7 +125,8 @@ def ARM64sbc       : SDNode<"ARM64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
  def ARM64add_flag  : SDNode<"ARM64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
                              [SDNPCommutative]>;
  def ARM64sub_flag  : SDNode<"ARM64ISD::SUBS",  SDTBinaryArithWithFlagsOut>;
-def ARM64and_flag  : SDNode<"ARM64ISD::ANDS",  SDTBinaryArithWithFlagsOut>;
+def ARM64and_flag  : SDNode<"ARM64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
+                            [SDNPCommutative]>;
  def ARM64adc_flag  : SDNode<"ARM64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
  def ARM64sbc_flag  : SDNode<"ARM64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
  
@@ -619,8 +620,9 @@ def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
  
  
  // (register)
-defm ANDS : LogicalRegS<0b11, 0, "ands">;
-defm BICS : LogicalRegS<0b11, 1, "bics">;
+defm ANDS : LogicalRegS<0b11, 0, "ands", ARM64and_flag>;
+defm BICS : LogicalRegS<0b11, 1, "bics",
+                        BinOpFrag<(ARM64and_flag node:$LHS, (not node:$RHS))>>;
  defm AND  : LogicalReg<0b00, 0, "and", and>;
  defm BIC  : LogicalReg<0b00, 1, "bic",
                         BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
diff --git a/test/CodeGen/AArch64/fp128.ll b/test/CodeGen/AArch64/fp128.ll

index 317470be9d84e6e21f90b72af7e0c5122b4575e5..71c1cfe0e34f8090ba0b154154cb0455e50c74a8 100644 (file)
--- a/test/CodeGen/AArch64/fp128.ll
+++ b/test/CodeGen/AArch64/fp128.ll
@@ -1,6 +1,5 @@
  ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64
-; RUN: llc -mtriple=arm64-none-linux-gnu -mcpu=cyclone -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
-
+; arm64 has a separate copy of this test.
  @lhs = global fp128 zeroinitializer
  @rhs = global fp128 zeroinitializer
  
@@ -206,8 +205,9 @@ define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
  
    %val = select i1 %cond, fp128 %lhs, fp128 %rhs
    store fp128 %val, fp128* @lhs
-; CHECK: cmp {{w[0-9]+}}, #0
+; CHECK-AARCH64: cmp {{w[0-9]+}}, #0
  ; CHECK-AARCH64: str q1, [sp]
+; CHECK-ARM64: tst {{w[0-9]+}}, #0x1
  ; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]]
  ; CHECK-NEXT: BB#
  ; CHECK-AARCH64-NEXT: str q0, [sp]
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll

index a08ba20c7f119dd0704b03d484c79121b3fb22af..49b253bcfde62e2b17468aa118a5e20e49b484cf 100644 (file)
--- a/test/CodeGen/AArch64/logical_shifted_reg.ll
+++ b/test/CodeGen/AArch64/logical_shifted_reg.ll
@@ -1,4 +1,5 @@
  ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s
  
  @var1_32 = global i32 0
  @var2_32 = global i32 0
@@ -6,7 +7,7 @@
  @var1_64 = global i64 0
  @var2_64 = global i64 0
  
-define void @logical_32bit() {
+define void @logical_32bit() minsize {
  ; CHECK-LABEL: logical_32bit:
    %val1 = load i32* @var1_32
    %val2 = load i32* @var2_32
@@ -96,7 +97,7 @@ define void @logical_32bit() {
    ret void
  }
  
-define void @logical_64bit() {
+define void @logical_64bit() minsize {
  ; CHECK-LABEL: logical_64bit:
    %val1 = load i64* @var1_64
    %val2 = load i64* @var2_64
diff --git a/test/CodeGen/ARM64/fp128.ll b/test/CodeGen/ARM64/fp128.ll

index d3ac28c5076afe8166e6abf339088eba52199494..a1a956d23179a5da5095693f4e9ddd1ab0e94592 100644 (file)
--- a/test/CodeGen/ARM64/fp128.ll
+++ b/test/CodeGen/ARM64/fp128.ll
@@ -202,8 +202,7 @@ define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
  
    %val = select i1 %cond, fp128 %lhs, fp128 %rhs
    store fp128 %val, fp128* @lhs, align 16
-; CHECK: and [[BIT:w[0-9]+]], w0, #0x1
-; CHECK: cmp [[BIT]], #0
+; CHECK: tst w0, #0x1
  ; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]]
  ; CHECK-NEXT: BB#
  ; CHECK-NEXT: orr v[[VAL:[0-9]+]].16b, v0.16b, v0.16b
author	Tim Northover <tnorthover@apple.com>
	Tue, 22 Apr 2014 12:45:42 +0000 (12:45 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Tue, 22 Apr 2014 12:45:42 +0000 (12:45 +0000)
lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp		patch \| blob \| history
lib/Target/ARM64/ARM64ISelLowering.cpp		patch \| blob \| history
lib/Target/ARM64/ARM64InstrFormats.td		patch \| blob \| history
lib/Target/ARM64/ARM64InstrInfo.td		patch \| blob \| history
test/CodeGen/AArch64/fp128.ll		patch \| blob \| history
test/CodeGen/AArch64/logical_shifted_reg.ll		patch \| blob \| history
test/CodeGen/ARM64/fp128.ll		patch \| blob \| history