-; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 < %s | FileCheck %s
-; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
;
; Get the actual value of the overflow bit.
;
-define zeroext i1 @saddo.i32(i32 %v1, i32 %v2, i32* %res) {
+define zeroext i1 @saddo1.i32(i32 %v1, i32 %v2, i32* %res) {
entry:
-; CHECK-LABEL: saddo.i32
+; CHECK-LABEL: saddo1.i32
; CHECK: adds {{w[0-9]+}}, w0, w1
; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
ret i1 %obit
}
-define zeroext i1 @saddo.i64(i64 %v1, i64 %v2, i64* %res) {
+; Test the immediate version.
+define zeroext i1 @saddo2.i32(i32 %v1, i32* %res) {
entry:
-; CHECK-LABEL: saddo.i64
+; CHECK-LABEL: saddo2.i32
+; CHECK: adds {{w[0-9]+}}, w0, #4
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 4)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
+; Test negative immediates.
+define zeroext i1 @saddo3.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL: saddo3.i32
+; CHECK: subs {{w[0-9]+}}, w0, #4
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 -4)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
+; Test immediates that are too large to be encoded.
+define zeroext i1 @saddo4.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL: saddo4.i32
+; CHECK: adds {{w[0-9]+}}, w0, {{w[0-9]+}}
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 16777215)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
+; Test shift folding.
+define zeroext i1 @saddo5.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; CHECK-LABEL: saddo5.i32
+; CHECK: adds {{w[0-9]+}}, w0, w1
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %lsl = shl i32 %v2, 16
+ %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %lsl)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; CHECK-LABEL: saddo1.i64
; CHECK: adds {{x[0-9]+}}, x0, x1
; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
ret i1 %obit
}
+define zeroext i1 @saddo2.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL: saddo2.i64
+; CHECK: adds {{x[0-9]+}}, x0, #4
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 4)
+ %val = extractvalue {i64, i1} %t, 0
+ %obit = extractvalue {i64, i1} %t, 1
+ store i64 %val, i64* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @saddo3.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL: saddo3.i64
+; CHECK: subs {{x[0-9]+}}, x0, #4
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -4)
+ %val = extractvalue {i64, i1} %t, 0
+ %obit = extractvalue {i64, i1} %t, 1
+ store i64 %val, i64* %res
+ ret i1 %obit
+}
+
define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) {
entry:
; CHECK-LABEL: uaddo.i32
ret i1 %obit
}
-define zeroext i1 @ssubo.i32(i32 %v1, i32 %v2, i32* %res) {
+define zeroext i1 @ssubo1.i32(i32 %v1, i32 %v2, i32* %res) {
entry:
-; CHECK-LABEL: ssubo.i32
+; CHECK-LABEL: ssubo1.i32
; CHECK: subs {{w[0-9]+}}, w0, w1
; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
ret i1 %obit
}
+define zeroext i1 @ssubo2.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL: ssubo2.i32
+; CHECK: adds {{w[0-9]+}}, w0, #4
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 -4)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) {
entry:
; CHECK-LABEL: ssubo.i64
ret i1 %obit
}
+define zeroext i1 @smulo2.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL: smulo2.i64
+; CHECK: adds [[MREG:x[0-9]+]], x0, x0
+; CHECK-NEXT: cset {{w[0-9]+}}, vs
+ %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 2)
+ %val = extractvalue {i64, i1} %t, 0
+ %obit = extractvalue {i64, i1} %t, 1
+ store i64 %val, i64* %res
+ ret i1 %obit
+}
+
define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
entry:
; CHECK-LABEL: umulo.i32
ret i1 %obit
}
+define zeroext i1 @umulo2.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL: umulo2.i64
+; CHECK: adds [[MREG:x[0-9]+]], x0, x0
+; CHECK-NEXT: cset {{w[0-9]+}}, hs
+ %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2)
+ %val = extractvalue {i64, i1} %t, 0
+ %obit = extractvalue {i64, i1} %t, 1
+ store i64 %val, i64* %res
+ ret i1 %obit
+}
+
;
; Check the use of the overflow bit in combination with a select instruction.
ret i1 true
}
+define zeroext i1 @smulo2.br.i64(i64 %v1) {
+entry:
+; CHECK-LABEL: smulo2.br.i64
+; CHECK: cmn x0, x0
+; CHECK-NEXT: b.vc
+ %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 2)
+ %val = extractvalue {i64, i1} %t, 0
+ %obit = extractvalue {i64, i1} %t, 1
+ br i1 %obit, label %overflow, label %continue
+
+overflow:
+ ret i1 false
+
+continue:
+ ret i1 true
+}
+
define zeroext i1 @umulo.br.i32(i32 %v1, i32 %v2) {
entry:
; CHECK-LABEL: umulo.br.i32
ret i1 true
}
+define zeroext i1 @umulo2.br.i64(i64 %v1) {
+entry:
+; CHECK-LABEL: umulo2.br.i64
+; CHECK: cmn x0, x0
+; CHECK-NEXT: b.lo
+ %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2)
+ %val = extractvalue {i64, i1} %t, 0
+ %obit = extractvalue {i64, i1} %t, 1
+ br i1 %obit, label %overflow, label %continue
+
+overflow:
+ ret i1 false
+
+continue:
+ ret i1 true
+}
+
declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone