}
// If we have T2 ops, we can materialize the address directly via movt/movw
- // pair. This is always cheaper.
- if (Subtarget->useMovt()) {
+ // pair. This is always cheaper in terms of performance, but uses at least 2
+ // extra bytes.
+ if (Subtarget->useMovt() &&
+ !DAG.getMachineFunction().getFunction()->hasFnAttr(Attribute::OptimizeForSize)) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// FIXME: Enable this for static codegen when tool issues are fixed.
- if (Subtarget->useMovt() && RelocM != Reloc::Static) {
+ if (Subtarget->useMovt() && RelocM != Reloc::Static &&
+ !DAG.getMachineFunction().getFunction()->hasFnAttr(Attribute::OptimizeForSize)) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
--- /dev/null
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-unknown-linux-eabi | FileCheck %s
+
+; Check that when optimizing for size, a literal pool load is used
+; instead of the (potentially faster) movw/movt pair when loading
+; a large constant.
+
+@x = global i32* inttoptr (i32 305419888 to i32*), align 4
+
+define i32 @f() optsize {
+ ; CHECK: f:
+ ; CHECK: ldr r{{.}}, {{.?}}LCPI{{.}}_{{.}}
+ ; CHECK: ldr r{{.}}, [{{(pc, )?}}r{{.}}]
+ ; CHECK: ldr r{{.}}, [r{{.}}]
+ %1 = load i32** @x, align 4
+ %2 = load i32* %1
+ ret i32 %2
+}
+
+define i32 @g() {
+ ; CHECK: g:
+ ; CHECK: movw
+ ; CHECK: movt
+ %1 = load i32** @x, align 4
+ %2 = load i32* %1
+ ret i32 %2
+}