return false;
bool LHSIsKill = hasTrivialKill(LHS);
- unsigned RHSReg = 0;
- bool RHSIsKill = false;
- bool UseImm = true;
- if (!isa<ConstantInt>(RHS)) {
- RHSReg = getRegForValue(RHS);
- if (!RHSReg)
- return false;
- RHSIsKill = hasTrivialKill(RHS);
- UseImm = false;
+ // Check if the immediate can be encoded in the instruction and if we should
+ // invert the instruction (adds -> subs) to handle negative immediates.
+ bool UseImm = false;
+ bool UseInverse = false;
+ uint64_t Imm = 0;
+ if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
+ if (C->isNegative()) {
+ UseInverse = true;
+ Imm = -(C->getSExtValue());
+ } else
+ Imm = C->getZExtValue();
+
+ if (isUInt<12>(Imm))
+ UseImm = true;
+
+ UseInverse = UseImm && UseInverse;
}
+ static const unsigned OpcTable[2][2][2] = {
+ { {AArch64::ADDSWrr, AArch64::ADDSXrr},
+ {AArch64::ADDSWri, AArch64::ADDSXri} },
+ { {AArch64::SUBSWrr, AArch64::SUBSXrr},
+ {AArch64::SUBSWri, AArch64::SUBSXri} }
+ };
unsigned Opc = 0;
unsigned MulReg = 0;
+ unsigned RHSReg = 0;
+ bool RHSIsKill = false;
AArch64CC::CondCode CC = AArch64CC::Invalid;
bool Is64Bit = VT == MVT::i64;
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
- if (UseImm)
- Opc = Is64Bit ? AArch64::ADDSXri : AArch64::ADDSWri;
- else
- Opc = Is64Bit ? AArch64::ADDSXrr : AArch64::ADDSWrr;
- CC = AArch64CC::VS;
- break;
+ Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
case Intrinsic::uadd_with_overflow:
- if (UseImm)
- Opc = Is64Bit ? AArch64::ADDSXri : AArch64::ADDSWri;
- else
- Opc = Is64Bit ? AArch64::ADDSXrr : AArch64::ADDSWrr;
- CC = AArch64CC::HS;
- break;
+ Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break;
case Intrinsic::ssub_with_overflow:
- if (UseImm)
- Opc = Is64Bit ? AArch64::SUBSXri : AArch64::SUBSWri;
- else
- Opc = Is64Bit ? AArch64::SUBSXrr : AArch64::SUBSWrr;
- CC = AArch64CC::VS;
- break;
+ Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
case Intrinsic::usub_with_overflow:
- if (UseImm)
- Opc = Is64Bit ? AArch64::SUBSXri : AArch64::SUBSWri;
- else
- Opc = Is64Bit ? AArch64::SUBSXrr : AArch64::SUBSWrr;
- CC = AArch64CC::LO;
- break;
+ Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break;
case Intrinsic::smul_with_overflow: {
CC = AArch64CC::NE;
- if (UseImm) {
- RHSReg = getRegForValue(RHS);
- if (!RHSReg)
- return false;
- RHSIsKill = hasTrivialKill(RHS);
- }
+ RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ RHSIsKill = hasTrivialKill(RHS);
+
if (VT == MVT::i32) {
MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
}
case Intrinsic::umul_with_overflow: {
CC = AArch64CC::NE;
- if (UseImm) {
- RHSReg = getRegForValue(RHS);
- if (!RHSReg)
- return false;
- RHSIsKill = hasTrivialKill(RHS);
- }
+ RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ RHSIsKill = hasTrivialKill(RHS);
+
if (VT == MVT::i32) {
MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
}
}
+ if (!UseImm) {
+ RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ RHSIsKill = hasTrivialKill(RHS);
+ }
+
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
if (Opc) {
MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
ResultReg)
.addReg(LHSReg, getKillRegState(LHSIsKill));
- if (UseImm)
- MIB.addImm(cast<ConstantInt>(RHS)->getZExtValue());
- else
+ if (UseImm) {
+ MIB.addImm(Imm);
+ MIB.addImm(0);
+ } else
MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
}
else
;
; Get the actual value of the overflow bit.
;
-define zeroext i1 @saddo.i32(i32 %v1, i32 %v2, i32* %res) {
+define zeroext i1 @saddo1.i32(i32 %v1, i32 %v2, i32* %res) {
entry:
-; CHECK-LABEL: saddo.i32
+; CHECK-LABEL: saddo1.i32
; CHECK: adds {{w[0-9]+}}, w0, w1
; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
ret i1 %obit
}
-define zeroext i1 @saddo.i64(i64 %v1, i64 %v2, i64* %res) {
+; Test the immediate version.
+define zeroext i1 @saddo2.i32(i32 %v1, i32* %res) {
entry:
-; CHECK-LABEL: saddo.i64
+; CHECK-LABEL: saddo2.i32
+; CHECK: adds {{w[0-9]+}}, w0, #4
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 4)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
+; Test negative immediates.
+define zeroext i1 @saddo3.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL: saddo3.i32
+; CHECK: subs {{w[0-9]+}}, w0, #4
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 -4)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
+; Test immediates that are too large to be encoded.
+define zeroext i1 @saddo4.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL: saddo4.i32
+; CHECK: adds {{w[0-9]+}}, w0, {{w[0-9]+}}
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 16777215)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; CHECK-LABEL: saddo1.i64
; CHECK: adds {{x[0-9]+}}, x0, x1
; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
ret i1 %obit
}
+define zeroext i1 @saddo2.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL: saddo2.i64
+; CHECK: adds {{x[0-9]+}}, x0, #4
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 4)
+ %val = extractvalue {i64, i1} %t, 0
+ %obit = extractvalue {i64, i1} %t, 1
+ store i64 %val, i64* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @saddo3.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL: saddo3.i64
+; CHECK: subs {{x[0-9]+}}, x0, #4
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -4)
+ %val = extractvalue {i64, i1} %t, 0
+ %obit = extractvalue {i64, i1} %t, 1
+ store i64 %val, i64* %res
+ ret i1 %obit
+}
+
define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) {
entry:
; CHECK-LABEL: uaddo.i32
ret i1 %obit
}
-define zeroext i1 @ssubo.i32(i32 %v1, i32 %v2, i32* %res) {
+define zeroext i1 @ssubo1.i32(i32 %v1, i32 %v2, i32* %res) {
entry:
-; CHECK-LABEL: ssubo.i32
+; CHECK-LABEL: ssubo1.i32
; CHECK: subs {{w[0-9]+}}, w0, w1
; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
ret i1 %obit
}
+define zeroext i1 @ssubo2.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL: ssubo2.i32
+; CHECK: adds {{w[0-9]+}}, w0, #4
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 -4)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) {
entry:
; CHECK-LABEL: ssubo.i64