}
}
}
-
-namespace {
- struct MSAH : public MachineFunctionPass {
- static char ID;
- MSAH() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- const X86TargetMachine *TM =
- static_cast<const X86TargetMachine *>(&MF.getTarget());
- const TargetFrameLowering *TFI = TM->getFrameLowering();
- MachineRegisterInfo &RI = MF.getRegInfo();
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- unsigned StackAlignment = TFI->getStackAlignment();
-
- // Be over-conservative: scan over all vreg defs and find whether vector
- // registers are used. If yes, there is a possibility that vector register
- // will be spilled and thus require dynamic stack realignment.
- for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
- FuncInfo->setForceFramePointer(true);
- return true;
- }
- }
- // Nothing to do
- return false;
- }
-
- virtual const char *getPassName() const {
- return "X86 Maximal Stack Alignment Check";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-
- char MSAH::ID = 0;
-}
-
-FunctionPass*
-llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
; X64: call
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
%c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
ret <16 x float> %c
declare void @t4_helper(i32*, i32*, <8 x float>*)
-; Dynamic realignment + Spill
+; Spilling an AVX register shouldn't cause dynamic realignment
define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
entry:
%a = alloca i32, align 4
ret i32 %add
; CHECK: _t5
-; CHECK: pushq %rbp
-; CHECK: movq %rsp, %rbp
-; CHECK: andq $-32, %rsp
; CHECK: subq ${{[0-9]+}}, %rsp
;
; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
-; CHECK: vmovaps [[AVXREG]], (%rsp)
+; CHECK: vmovups [[AVXREG]], (%rsp)
; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
; CHECK: callq _t5_helper1
-; CHECK: vmovaps (%rsp), %ymm0
+; CHECK: vmovups (%rsp), %ymm0
; CHECK: callq _t5_helper2
; CHECK: movl {{[0-9]+}}(%rsp), %eax
-;
-; CHECK: movq %rbp, %rsp
-; CHECK: popq %rbp
}
declare void @t5_helper1(i32*)