-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd %rsi, %mm0}
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd %rdi, %mm1}
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw %mm0, %mm1}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | FileCheck %s
+
+; CHECK: movd %rsi, [[MM0:%mm[0-9]+]]
+; CHECK: movd %rdi, [[MM1:%mm[0-9]+]]
+; CHECK: paddusw [[MM0]], [[MM1]]
@R = external global x86_mmx ; <x86_mmx*> [#uses=1]
-; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split -regalloc=linearscan
define i32 @main() nounwind {
bb4.i.thread:
; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
-; RUN: -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false < %s | \
+; RUN: -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=linearscan < %s | \
; RUN: FileCheck %s
; rdar://6808032
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | not grep pcmpeqd
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | grep orps | grep CPI0_2 | count 2
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=X86-64 %s
; This testcase shouldn't need to spill the -1 value,
; so it should just use pcmpeqd to materialize an all-ones vector.
; For i386, cp load of -1 are folded.
+; With -regalloc=greedy, the live range is split before spilling, so the first
+; pcmpeq doesn't get folded as a constant pool load.
+
+; I386-NOT: pcmpeqd
+; I386: orps LCPI0_2, %xmm
+; I386-NOT: pcmpeqd
+; I386: orps LCPI0_2, %xmm
+
+; X86-64: pcmpeqd
+; X86-64-NOT: pcmpeqd
+
%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
%struct._cl_image_format_t = type <{ i32, i32, i32 }>
%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -regalloc=linearscan | FileCheck %s
; This testcase should need to spill the -1 value on both x86-32 and x86-64,
; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
-; RUN: llc < %s | grep {addl.\$4, %ecx}
-; RUN: llc < %s | not grep leal
+; RUN: llc < %s | FileCheck %s
; this should not sink %1 into bb1, that would increase reg pressure.
; rdar://6399178
+; CHECK: addl $4,
+; CHECK-NOT: leal
+
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin7"
; By starting the IV at -64 instead of 0, a cmp is eliminated,
; as the flags from the add can be used directly.
-; STATIC: movl $-64, %ecx
+; STATIC: movl $-64, [[ECX:%e..]]
-; STATIC: movl %eax, _state+76(%ecx)
-; STATIC: addl $16, %ecx
+; STATIC: movl [[EAX:%e..]], _state+76([[ECX]])
+; STATIC: addl $16, [[ECX]]
; STATIC: jne
; In PIC mode the symbol can't be folded, so the change-compare-stride
; Full strength reduction wouldn't reduce register pressure, so LSR should
; stick with indexing here.
-; CHECK: movaps (%{{rsi|rdx}},%rax,4), %xmm3
-; CHECK: movaps %xmm3, (%{{rdi|rcx}},%rax,4)
+; CHECK: movaps (%{{rsi|rdx}},%rax,4), [[X3:%xmm[0-9]+]]
+; CHECK: movaps
+; CHECK: [[X3]], (%{{rdi|rcx}},%rax,4)
; CHECK: addq $4, %rax
; CHECK: cmpl %eax, (%{{rdx|r8}})
; CHECK-NEXT: jg
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
-; X64: movq ({{%rsi|%rdx}}), %rax
-; RUN: llc < %s -march=x86 -mattr=-sse2 | FileCheck %s -check-prefix=X32
-; X32: movl 4(%eax),
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=XMM
-; XMM: movsd (%eax),
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=-sse2 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
; This test should use GPRs to copy the mmx value, not MMX regs. Using mmx regs,
; increases the places that need to use emms.
-
+; CHECK-NOT: %mm
+; CHECK-NOT: emms
; rdar://5741668
define void @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind {
; CHECK-NEXT: align
; CHECK-NEXT: BB1_2:
; CHECK-NEXT: callq
-; CHECK-NEXT: incl [[BX:%ebx|%esi]]
-; CHECK-NEXT: cmpl [[R14:%r14d|%edi]], [[BX]]
+; CHECK-NEXT: incl [[BX:%[a-z0-9]+]]
+; CHECK-NEXT: cmpl [[R14:%[a-z0-9]+]], [[BX]]
; CHECK-NEXT: movq %rax, %r{{di|cx}}
; CHECK-NEXT: jl
target triple = "x86_64-apple-darwin10.3"
-; CHECK: movl %{{.*}}, (%rdi,%rdx,4)
-; CHECK: movl %{{.*}}, 8(%rdi,%rdx,4)
-; CHECK: movl %{{.*}}, 4(%rdi,%rdx,4)
-; CHECK: movl %{{.*}}, 12(%rdi,%rdx,4)
+; CHECK: movl %{{.*}}, (%rdi,[[R0:.+]],4)
+; CHECK: movl %{{.*}}, 8(%rdi,[[R0]],4)
+; CHECK: movl %{{.*}}, 4(%rdi,[[R0]],4)
+; CHECK: movl %{{.*}}, 12(%rdi,[[R0]],4)
define void @test(i32* nocapture %array, i32 %r0) nounwind ssp noredzone {
bb.nph:
; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=X86-64
; MachineLICM should be able to hoist loop invariant reload out of the loop.
+; Only linear scan needs this, -regalloc=greedy sinks the spill instead.
; rdar://7233099
%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
bb23: ; preds = %imix_test.exit
unreachable
+; Verify that there are no loads inside the loop.
; X86-32: %bb26.preheader
-; X86-32: movl -16(%ebp),
-; X86-32-NEXT: .align 4
-; X86-32-NEXT: %bb28
+; X86-32: .align 4
+; X86-32-NOT: (%esp),
+; X86-32-NOT: (%ebp),
+; X86-32: jmp
bb28: ; preds = %bb28, %bb26.preheader
%counter.035 = phi i32 [ %3, %bb28 ], [ 0, %bb26.preheader ] ; <i32> [#uses=2]
-; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of loads added} | grep 1
+; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats -regalloc=linearscan |& grep {Number of loads added} | grep 1
; PR3495
;
; This test may not be testing what it was supposed to test.
-; RUN: llc < %s -march=x86 -stats |& grep {Number of loads added} | grep 2
-; RUN: llc < %s -march=x86 -stats |& grep {Number of register spills} | grep 1
-; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 34
+; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of loads added} | grep 2
+; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of register spills} | grep 1
+; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of machine instrs printed} | grep 34
; PR3495
target triple = "i386-pc-linux-gnu"
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
; XFAIL: *
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan
define i32 @main(i32 %argc, i8** %argv) nounwind {
entry:
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -pre-alloc-split | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -pre-alloc-split -regalloc=linearscan | FileCheck %s
@.str = private constant [28 x i8] c"\0A\0ADOUBLE D = %f\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
@.str1 = private constant [37 x i8] c"double to long l1 = %ld\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats -regalloc=linearscan |& \
; RUN: grep {pre-alloc-split} | count 2
define i32 @t(i32 %arg) {
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
define i32 @t(i32 %arg) {
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 2
define i32 @main(i32 %argc, i8** %argv) nounwind {
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan
target triple = "i386-apple-darwin9.5"
%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -pre-alloc-split | grep {divsd 24} | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -pre-alloc-split -regalloc=linearscan | grep {divsd 24} | count 1
@current_surfaces.b = external global i1 ; <i1*> [#uses=1]
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan
@object_distance = external global double, align 8 ; <double*> [#uses=1]
@axis_slope_angle = external global double, align 8 ; <double*> [#uses=1]
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
@current_surfaces.b = external global i1 ; <i1*> [#uses=1]
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
@current_surfaces.b = external global i1 ; <i1*> [#uses=1]
%tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1]
ret <4 x float> %tmp27
; CHECK: test14:
-; CHECK: addps %xmm1, %xmm0
-; CHECK: subps %xmm1, %xmm2
-; CHECK: movlhps %xmm2, %xmm0
+; CHECK: addps [[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
+; CHECK: subps [[X1]], [[X2:%xmm[0-9]+]]
+; CHECK: movlhps [[X2]], [[X0]]
}
define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
store <4 x i16> %6, <4 x i16>* @g2, align 8
ret void
; X64: t10:
-; X64: pextrw $4, %xmm0, %eax
-; X64: unpcklpd %xmm1, %xmm1
-; X64: pshuflw $8, %xmm1, %xmm1
-; X64: pinsrw $2, %eax, %xmm1
-; X64: pextrw $6, %xmm0, %eax
-; X64: pinsrw $3, %eax, %xmm1
+; X64: pextrw $4, [[X0:%xmm[0-9]+]], %eax
+; X64: unpcklpd [[X1:%xmm[0-9]+]]
+; X64: pshuflw $8, [[X1]], [[X1]]
+; X64: pinsrw $2, %eax, [[X1]]
+; X64: pextrw $6, [[X0]], %eax
+; X64: pinsrw $3, %eax, [[X1]]
}
; an unconditional jump to complete a two-way conditional branch.
; CHECK: c_expand_expr_stmt:
-; CHECK: jmp .LBB3_11
-; CHECK-NEXT: .LBB3_9:
-; CHECK-NEXT: movq 8(%rax), %rax
-; CHECK-NEXT: xorb %dl, %dl
-; CHECK-NEXT: movb 16(%rax), %al
-; CHECK-NEXT: cmpb $16, %al
-; CHECK-NEXT: je .LBB3_11
-; CHECK-NEXT: cmpb $23, %al
-; CHECK-NEXT: jne .LBB3_14
-; CHECK-NEXT: .LBB3_11:
+;
+; This test only works when register allocation happens to use %rax for both
+; load addresses.
+;
+; CHE: jmp .LBB3_11
+; CHE-NEXT: .LBB3_9:
+; CHE-NEXT: movq 8(%rax), %rax
+; CHE-NEXT: xorb %dl, %dl
+; CHE-NEXT: movb 16(%rax), %al
+; CHE-NEXT: cmpb $16, %al
+; CHE-NEXT: je .LBB3_11
+; CHE-NEXT: cmpb $23, %al
+; CHE-NEXT: jne .LBB3_14
+; CHE-NEXT: .LBB3_11:
%0 = type { %struct.rtx_def* }
%struct.lang_decl = type opaque
; Check that lowered arguments on the stack do not overwrite each other.
; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: movl [[A1:32|144]](%rsp), %eax
+; CHECK: movl [[A1:32|144]](%rsp), [[R1:%e..]]
; Move param %in1 to temp register (%r10d).
-; CHECK: movl [[A2:40|152]](%rsp), %r10d
+; CHECK: movl [[A2:40|152]](%rsp), [[R2:%[a-z0-9]+]]
; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: addl {{%edi|%ecx}}, %eax
+; CHECK: addl {{%edi|%ecx}}, [[R1]]
; Move param %in2 to stack.
-; CHECK: movl %r10d, [[A1]](%rsp)
+; CHECK: movl [[R2]], [[A1]](%rsp)
; Move result of addition to stack.
-; CHECK: movl %eax, [[A2]](%rsp)
+; CHECK: movl [[R1]], [[A2]](%rsp)
; Eventually, do a TAILCALL
; CHECK: TAILCALL
; W64: subq %rax, %rsp
; W64: movq %rsp, %rax
-; EFI: leaq 15(%rcx), %rax
-; EFI: andq $-16, %rax
+; EFI: leaq 15(%rcx), [[R1:%r..]]
+; EFI: andq $-16, [[R1]]
; EFI: movq %rsp, [[R64:%r..]]
-; EFI: subq %rax, [[R64]]
+; EFI: subq [[R1]], [[R64]]
; EFI: movq [[R64]], %rsp
%r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind