"true",
"Enable double precision operations">;
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+ "FP64Denormals",
+ "true",
+ "Enable double precision denormal handling",
+ [FeatureFP64]>;
+
+// Some instructions do not support denormals despite this flag. Using
+// fp32 denormals also causes instructions to run at the double
+// precision rate for the device.
+def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
+ "FP32Denormals",
+ "true",
+ "Enable single precision denormal handling">;
+
def Feature64BitPtr : SubtargetFeature<"64BitPtr",
"Is64bit",
"true",
// We want to use these instructions, and using fp32 denormals also causes
// instructions to run at the double precision rate for the device so it's
// probably best to just report no single precision denormals.
-static uint32_t getFPMode(const MachineFunction &) {
+static uint32_t getFPMode(const MachineFunction &F) {
+ const AMDGPUSubtarget& ST = F.getTarget().getSubtarget<AMDGPUSubtarget>();
+ // TODO: Is there any real use for the flush in only / flush out only modes?
+
+ uint32_t FP32Denormals =
+ ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
+ uint32_t FP64Denormals =
+ ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
- FP_DENORM_MODE_SP(FP_DENORM_FLUSH_IN_FLUSH_OUT) |
- FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE);
+ FP_DENORM_MODE_SP(FP32Denormals) |
+ FP_DENORM_MODE_DP(FP64Denormals);
}
static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
}
+def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">;
+def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">;
+
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
#include "AMDGPUSubtarget.h"
#include "R600InstrInfo.h"
#include "SIInstrInfo.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallString.h"
TexVTXClauseSize(0),
Gen(AMDGPUSubtarget::R600),
FP64(false),
+ FP64Denormals(false),
+ FP32Denormals(false),
CaymanISA(false),
EnableIRStructurizer(true),
EnablePromoteAlloca(false),
CFALUBug(false),
LocalMemorySize(0),
InstrItins(getInstrItineraryForCPU(GPU)) {
+ // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
+ // enabled, but some instructions do not respect them and they run at the
+ // double precision rate, so don't enable by default.
+ //
+ // We want to be able to turn these off, but making this a subtarget feature
+ // for SI has the unhelpful behavior that it unsets everything else if you
+ // disable it.
- SmallString<256> FullFS("+promote-alloca,");
+ SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
InstrInfo.reset(new R600InstrInfo(*this));
+
+ // FIXME: I don't think think Evergreen has any useful support for
+ // denormals, but should be checked. Should we issue a warning somewhere if
+ // someone tries to enable these?
+ FP32Denormals = false;
+ FP64Denormals = false;
} else {
InstrInfo.reset(new SIInstrInfo(*this));
}
short TexVTXClauseSize;
Generation Gen;
bool FP64;
+ bool FP64Denormals;
+ bool FP32Denormals;
bool CaymanISA;
bool EnableIRStructurizer;
bool EnablePromoteAlloca;
return CaymanISA;
}
+ bool hasFP32Denormals() const {
+ return FP32Denormals;
+ }
+
+ bool hasFP64Denormals() const {
+ return FP64Denormals;
+ }
+
bool hasBFE() const {
return (getGeneration() >= EVERGREEN);
}
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; SI-LABEL: @test_kernel
-; SI: FloatMode: 192
-; SI: IeeeMode: 0
+; FUNC-LABEL: @test_kernel
+
+; DEFAULT: FloatMode: 192
+; DEFAULT: IeeeMode: 0
+
+; FP64-DENORMAL: FloatMode: 192
+; FP64-DENORMAL: IeeeMode: 0
+
+; FP32-DENORMAL: FloatMode: 48
+; FP32-DENORMAL: IeeeMode: 0
+
+; BOTH-DENORMAL: FloatMode: 240
+; BOTH-DENORMAL: IeeeMode: 0
+
+; NO-DENORMAL: FloatMode: 0
+; NO-DENORMAL: IeeeMode: 0
define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
store float 0.0, float addrspace(1)* %out0
store double 0.0, double addrspace(1)* %out1