[FastISel][AArch64] Fix the immediate versions of the {s|u}{add|sub}.with.overflow...

author Juergen Ributzka <juergen@apple.com>

Fri, 1 Aug 2014 01:25:55 +0000 (01:25 +0000)

committer Juergen Ributzka <juergen@apple.com>

Fri, 1 Aug 2014 01:25:55 +0000 (01:25 +0000)
author Juergen Ributzka <juergen@apple.com>
Fri, 1 Aug 2014 01:25:55 +0000 (01:25 +0000)
committer Juergen Ributzka <juergen@apple.com>
Fri, 1 Aug 2014 01:25:55 +0000 (01:25 +0000)
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp

index 8d7bddbea95c5dff57fec2f0a75317ebd003938f..bfbf10a1ea1dda5fb1b17cc51d966eb39d6b2b16 100644 (file)
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -1759,59 +1759,53 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
        return false;
      bool LHSIsKill = hasTrivialKill(LHS);
  
-    unsigned RHSReg = 0;
-    bool RHSIsKill = false;
-    bool UseImm = true;
-    if (!isa<ConstantInt>(RHS)) {
-      RHSReg = getRegForValue(RHS);
-      if (!RHSReg)
-        return false;
-      RHSIsKill = hasTrivialKill(RHS);
-      UseImm = false;
+    // Check if the immediate can be encoded in the instruction and if we should
+    // invert the instruction (adds -> subs) to handle negative immediates.
+    bool UseImm = false;
+    bool UseInverse = false;
+    uint64_t Imm = 0;
+    if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
+      if (C->isNegative()) {
+        UseInverse = true;
+        Imm = -(C->getSExtValue());
+      } else
+        Imm = C->getZExtValue();
+
+      if (isUInt<12>(Imm))
+        UseImm = true;
+
+      UseInverse = UseImm && UseInverse;
      }
  
+    static const unsigned OpcTable[2][2][2] = {
+      { {AArch64::ADDSWrr, AArch64::ADDSXrr},
+        {AArch64::ADDSWri, AArch64::ADDSXri} },
+      { {AArch64::SUBSWrr, AArch64::SUBSXrr},
+        {AArch64::SUBSWri, AArch64::SUBSXri} }
+    };
      unsigned Opc = 0;
      unsigned MulReg = 0;
+    unsigned RHSReg = 0;
+    bool RHSIsKill = false;
      AArch64CC::CondCode CC = AArch64CC::Invalid;
      bool Is64Bit = VT == MVT::i64;
      switch (II->getIntrinsicID()) {
      default: llvm_unreachable("Unexpected intrinsic!");
      case Intrinsic::sadd_with_overflow:
-      if (UseImm)
-        Opc = Is64Bit ? AArch64::ADDSXri : AArch64::ADDSWri;
-      else
-        Opc = Is64Bit ? AArch64::ADDSXrr : AArch64::ADDSWrr;
-      CC = AArch64CC::VS;
-      break;
+      Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
      case Intrinsic::uadd_with_overflow:
-      if (UseImm)
-        Opc = Is64Bit ? AArch64::ADDSXri : AArch64::ADDSWri;
-      else
-        Opc = Is64Bit ? AArch64::ADDSXrr : AArch64::ADDSWrr;
-      CC = AArch64CC::HS;
-      break;
+      Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break;
      case Intrinsic::ssub_with_overflow:
-      if (UseImm)
-        Opc = Is64Bit ? AArch64::SUBSXri : AArch64::SUBSWri;
-      else
-        Opc = Is64Bit ? AArch64::SUBSXrr : AArch64::SUBSWrr;
-      CC = AArch64CC::VS;
-      break;
+      Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
      case Intrinsic::usub_with_overflow:
-      if (UseImm)
-        Opc = Is64Bit ? AArch64::SUBSXri : AArch64::SUBSWri;
-      else
-        Opc = Is64Bit ? AArch64::SUBSXrr : AArch64::SUBSWrr;
-      CC = AArch64CC::LO;
-      break;
+      Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break;
      case Intrinsic::smul_with_overflow: {
        CC = AArch64CC::NE;
-      if (UseImm) {
-        RHSReg = getRegForValue(RHS);
-        if (!RHSReg)
-          return false;
-        RHSIsKill = hasTrivialKill(RHS);
-      }
+      RHSReg = getRegForValue(RHS);
+      if (!RHSReg)
+        return false;
+      RHSIsKill = hasTrivialKill(RHS);
+
        if (VT == MVT::i32) {
          MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
          unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
@@ -1841,12 +1835,11 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
      }
      case Intrinsic::umul_with_overflow: {
        CC = AArch64CC::NE;
-      if (UseImm) {
-        RHSReg = getRegForValue(RHS);
-        if (!RHSReg)
-          return false;
-        RHSIsKill = hasTrivialKill(RHS);
-      }
+      RHSReg = getRegForValue(RHS);
+      if (!RHSReg)
+        return false;
+      RHSIsKill = hasTrivialKill(RHS);
+
        if (VT == MVT::i32) {
          MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
          unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
@@ -1872,15 +1865,23 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
      }
      }
  
+    if (!UseImm) {
+      RHSReg = getRegForValue(RHS);
+      if (!RHSReg)
+        return false;
+      RHSIsKill = hasTrivialKill(RHS);
+    }
+
      unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
      if (Opc) {
        MachineInstrBuilder MIB;
        MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
                      ResultReg)
                .addReg(LHSReg, getKillRegState(LHSIsKill));
-      if (UseImm)
-        MIB.addImm(cast<ConstantInt>(RHS)->getZExtValue());
-      else
+      if (UseImm) {
+        MIB.addImm(Imm);
+        MIB.addImm(0);
+      } else
          MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
      }
      else
diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll

index efc63dbbecf00abbb049263a4d5568d208e2c0b4..fe81d8d8debbab106b352464e454e0a575894059 100644 (file)
--- a/test/CodeGen/AArch64/arm64-xaluo.ll
+++ b/test/CodeGen/AArch64/arm64-xaluo.ll
@@ -4,9 +4,9 @@
  ;
  ; Get the actual value of the overflow bit.
  ;
-define zeroext i1 @saddo.i32(i32 %v1, i32 %v2, i32* %res) {
+define zeroext i1 @saddo1.i32(i32 %v1, i32 %v2, i32* %res) {
  entry:
-; CHECK-LABEL:  saddo.i32
+; CHECK-LABEL:  saddo1.i32
  ; CHECK:        adds {{w[0-9]+}}, w0, w1
  ; CHECK-NEXT:   cset {{w[0-9]+}}, vs
    %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
@@ -16,9 +16,48 @@ entry:
    ret i1 %obit
  }
  
-define zeroext i1 @saddo.i64(i64 %v1, i64 %v2, i64* %res) {
+; Test the immediate version.
+define zeroext i1 @saddo2.i32(i32 %v1, i32* %res) {
  entry:
-; CHECK-LABEL:  saddo.i64
+; CHECK-LABEL:  saddo2.i32
+; CHECK:        adds {{w[0-9]+}}, w0, #4
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 4)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+; Test negative immediates.
+define zeroext i1 @saddo3.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL:  saddo3.i32
+; CHECK:        subs {{w[0-9]+}}, w0, #4
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 -4)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+; Test immediates that are too large to be encoded.
+define zeroext i1 @saddo4.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL:  saddo4.i32
+; CHECK:        adds {{w[0-9]+}}, w0, {{w[0-9]+}}
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 16777215)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; CHECK-LABEL:  saddo1.i64
  ; CHECK:        adds {{x[0-9]+}}, x0, x1
  ; CHECK-NEXT:   cset {{w[0-9]+}}, vs
    %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
@@ -28,6 +67,30 @@ entry:
    ret i1 %obit
  }
  
+define zeroext i1 @saddo2.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL:  saddo2.i64
+; CHECK:        adds {{x[0-9]+}}, x0, #4
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 4)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo3.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL:  saddo3.i64
+; CHECK:        subs {{x[0-9]+}}, x0, #4
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -4)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
  define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) {
  entry:
  ; CHECK-LABEL:  uaddo.i32
@@ -52,9 +115,9 @@ entry:
    ret i1 %obit
  }
  
-define zeroext i1 @ssubo.i32(i32 %v1, i32 %v2, i32* %res) {
+define zeroext i1 @ssubo1.i32(i32 %v1, i32 %v2, i32* %res) {
  entry:
-; CHECK-LABEL:  ssubo.i32
+; CHECK-LABEL:  ssubo1.i32
  ; CHECK:        subs {{w[0-9]+}}, w0, w1
  ; CHECK-NEXT:   cset {{w[0-9]+}}, vs
    %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
@@ -64,6 +127,18 @@ entry:
    ret i1 %obit
  }
  
+define zeroext i1 @ssubo2.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL:  ssubo2.i32
+; CHECK:        adds {{w[0-9]+}}, w0, #4
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 -4)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
  define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) {
  entry:
  ; CHECK-LABEL:  ssubo.i64
author	Juergen Ributzka <juergen@apple.com>
	Fri, 1 Aug 2014 01:25:55 +0000 (01:25 +0000)
committer	Juergen Ributzka <juergen@apple.com>
	Fri, 1 Aug 2014 01:25:55 +0000 (01:25 +0000)
lib/Target/AArch64/AArch64FastISel.cpp		patch \| blob \| history
test/CodeGen/AArch64/arm64-xaluo.ll		patch \| blob \| history