[NVPTX] expand extload/truncstore for vectors of floats

author Jingyue Wu <jingyue@google.com>

Wed, 1 Jul 2015 21:32:42 +0000 (21:32 +0000)

committer Jingyue Wu <jingyue@google.com>

Wed, 1 Jul 2015 21:32:42 +0000 (21:32 +0000)
author Jingyue Wu <jingyue@google.com>
Wed, 1 Jul 2015 21:32:42 +0000 (21:32 +0000)
committer Jingyue Wu <jingyue@google.com>
Wed, 1 Jul 2015 21:32:42 +0000 (21:32 +0000)
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp

index b5af72ab855aa43cfb975d229b401a5d292d0286..09e0bd5d3d8839044eb47821c1b6c2365347e3ff 100644 (file)
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -206,7 +206,14 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
    setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
    // Turn FP truncstore into trunc + store.
+  // FIXME: vector types should also be expanded
    setTruncStoreAction(MVT::f32, MVT::f16, Expand);
    setTruncStoreAction(MVT::f64, MVT::f16, Expand);
    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
diff --git a/test/CodeGen/NVPTX/extloadv.ll b/test/CodeGen/NVPTX/extloadv.ll

new file mode 100644 (file)

index 0000000..8c264ae
--- /dev/null
+++ b/test/CodeGen/NVPTX/extloadv.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+
+define void @foo(float* nocapture readonly %x_value, double* nocapture %output) #0 {
+  %1 = bitcast float* %x_value to <4 x float>*
+  %2 = load <4 x float>, <4 x float>* %1, align 16
+  %3 = fpext <4 x float> %2 to <4 x double>
+; CHECK-NOT: ld.v2.f32 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}];
+; CHECK:  cvt.f64.f32
+; CHECK:  cvt.f64.f32
+; CHECK:  cvt.f64.f32
+; CHECK:  cvt.f64.f32
+  %4 = bitcast double* %output to <4 x double>*
+  store <4 x double> %3, <4 x double>* %4
+  ret void
+}
author	Jingyue Wu <jingyue@google.com>
	Wed, 1 Jul 2015 21:32:42 +0000 (21:32 +0000)
committer	Jingyue Wu <jingyue@google.com>
	Wed, 1 Jul 2015 21:32:42 +0000 (21:32 +0000)
lib/Target/NVPTX/NVPTXISelLowering.cpp		patch \| blob \| history
test/CodeGen/NVPTX/extloadv.ll	[new file with mode: 0644]	patch \| blob