{ X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 },
{ X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 },
{ X86::PXORrr, X86::PXORrm, TB_ALIGN_16 },
+ { X86::ROUNDSDr, X86::ROUNDSDm, 0 },
+ { X86::ROUNDSSr, X86::ROUNDSSm, 0 },
{ X86::SBB32rr, X86::SBB32rm, 0 },
{ X86::SBB64rr, X86::SBB64rm, 0 },
{ X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 },
{ X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 },
{ X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 },
{ X86::VPXORrr, X86::VPXORrm, 0 },
+ { X86::VROUNDSDr, X86::VROUNDSDm, 0 },
+ { X86::VROUNDSSr, X86::VROUNDSSm, 0 },
{ X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 },
{ X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 },
{ X86::VSUBPDrr, X86::VSUBPDrm, 0 },
hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
return true;
-
+
return false;
}
MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
MachineOperand &OpC = Root.getOperand(0);
-
+
unsigned RegA = OpA.getReg();
unsigned RegB = OpB.getReg();
unsigned RegX = OpX.getReg();
.addReg(RegX, getKillRegState(KillX))
.addReg(RegY, getKillRegState(KillY));
InsInstrs.push_back(MIB1);
-
+
MachineInstrBuilder MIB2 =
BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
.addReg(RegA, getKillRegState(KillA))
Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
}
assert(Prev && "Unknown pattern for machine combiner");
-
+
reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
return;
}
}
declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
-; TODO stack_fold_roundsd
+define double @stack_fold_roundsd(double %a0) optsize {
+ ;CHECK-LABEL: stack_fold_roundsd
+ ;CHECK: vroundsd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call double @llvm.floor.f64(double %a0)
+ ret double %2
+}
+declare double @llvm.floor.f64(double) nounwind readnone
; TODO stack_fold_roundsd_int
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
-; TODO stack_fold_roundss
+define float @stack_fold_roundss(float %a0) optsize {
+ ;CHECK-LABEL: stack_fold_roundss
+ ;CHECK: vroundss $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call float @llvm.floor.f32(float %a0)
+ ret float %2
+}
+declare float @llvm.floor.f32(float) nounwind readnone
; TODO stack_fold_roundss_int
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
}
declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
-; TODO stack_fold_roundsd
+define double @stack_fold_roundsd(double %a0) optsize {
+ ;CHECK-LABEL: stack_fold_roundsd
+ ;CHECK: roundsd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call double @llvm.floor.f64(double %a0)
+ ret double %2
+}
+declare double @llvm.floor.f64(double) nounwind readnone
+
; TODO stack_fold_roundsd_int
+declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+define float @stack_fold_roundss(float %a0) optsize {
+ ;CHECK-LABEL: stack_fold_roundss
+ ;CHECK: roundss $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call float @llvm.floor.f32(float %a0)
+ ret float %2
+}
+declare float @llvm.floor.f32(float) nounwind readnone
-; TODO stack_fold_roundss
; TODO stack_fold_roundss_int
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
; TODO stack_fold_rsqrtps