From 511a3c71fcbe64acb6dd4ac42bfd99249b52191f Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 23 Jan 2015 22:05:45 +0000 Subject: [PATCH] R600/SI: Move i64 -> v2i32 load promotion into AMDGPUDAGToDAGISel::Select() We used to do this promotion during DAG legalization, but this caused an infinite loop in ExpandUnalignedLoad() because it assumed that i64 loads were legal if i64 was a legal type. It also seems better to report i64 loads as legal, since they actually are and we were just promoting them to simplify our tablegen files. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226945 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 22 ++++++++++++++++++++++ lib/Target/R600/AMDGPUISelLowering.cpp | 3 --- test/CodeGen/R600/misaligned-load.ll | 18 ++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/R600/misaligned-load.ll diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index e0e81680cc0..15112c7e54d 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -417,6 +417,28 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { N->getValueType(0), Ops); } + case ISD::LOAD: { + // To simplify the TableGen patters, we replace all i64 loads with + // v2i32 loads. Alternatively, we could promote i64 loads to v2i32 + // during DAG legalization, however, so places (ExpandUnalignedLoad) + // in the DAG legalizer assume that if i64 is legal, so doing this + // promotion early can cause problems. + EVT VT = N->getValueType(0); + LoadSDNode *LD = cast(N); + if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) + break; + + SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(), + LD->getBasePtr(), LD->getMemOperand()); + SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SDLoc(N), + MVT::i64, NewLoad); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1)); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast); + SelectCode(NewLoad.getNode()); + N = BitCast.getNode(); + break; + } + case AMDGPUISD::REGISTER_LOAD: { if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) break; diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index e169f912624..dee61d31e6e 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -189,9 +189,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::v2f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); - setOperationAction(ISD::LOAD, MVT::i64, Promote); - AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); - setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); diff --git a/test/CodeGen/R600/misaligned-load.ll b/test/CodeGen/R600/misaligned-load.ll new file mode 100644 index 00000000000..6290ca09d50 --- /dev/null +++ b/test/CodeGen/R600/misaligned-load.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI: @byte_aligned_load64 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: s_endpgm +define void @byte_aligned_load64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) { +entry: + %0 = load i64 addrspace(3)* %in, align 1 + store i64 %0, i64 addrspace(1)* %out + ret void +} -- 2.34.1