"Enable double precision denormal handling",
[FeatureFP64]>;
+def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
+ "FastFMAF32",
+ "true",
+ "Assuming f32 fma is at least as fast as mul + add",
+ []>;
+
// Some instructions do not support denormals despite this flag. Using
// fp32 denormals also causes instructions to run at the double
// precision rate for the device.
: AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false),
DumpCode(false), R600ALUInst(false), HasVertexCache(false),
TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
- FP64Denormals(false), FP32Denormals(false), CaymanISA(false),
+ FP64Denormals(false), FP32Denormals(false),
+ FastFMAF32(false), CaymanISA(false),
FlatAddressSpace(false), EnableIRStructurizer(true),
EnablePromoteAlloca(false), EnableIfCvt(true),
EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
bool FP64;
bool FP64Denormals;
bool FP32Denormals;
+ bool FastFMAF32;
bool CaymanISA;
bool FlatAddressSpace;
bool EnableIRStructurizer;
return FP64Denormals;
}
+ bool hasFastFMAF32() const {
+ return FastFMAF32;
+ }
+
bool hasFlatAddressSpace() const {
return FlatAddressSpace;
}
// Southern Islands
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"SI", SIFullSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"SI", SIFullSpeedModel,
+ [FeatureSouthernIslands, FeatureFastFMAF32]
+>;
-def : ProcessorModel<"tahiti", SIFullSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"tahiti", SIFullSpeedModel,
+ [FeatureSouthernIslands, FeatureFastFMAF32]
+>;
def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureSeaIslands]>;
-def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"hawaii", SIFullSpeedModel,
+ [FeatureSeaIslands, FeatureFastFMAF32]
+>;
def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
- return false; /* There is V_MAD_F32 for f32 */
+ return Subtarget->hasFastFMAF32();
case MVT::f64:
return true;
default: