};
}
-#define END_OF_TEXT_LABEL_NAME "EndOfTextLabel"
-
} // End namespace llvm
namespace ShaderType {
}
}
-void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
-
- // This label is used to mark the end of the .text section.
- const TargetLoweringObjectFile &TLOF = getObjFileLowering();
- OutStreamer->SwitchSection(TLOF.getTextSection());
- MCSymbol *EndOfTextLabel =
- OutContext.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
- OutStreamer->EmitLabel(EndOfTextLabel);
-}
-
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
void EmitFunctionBodyStart() override;
- void EmitEndOfAsmFile(Module &M) override;
-
void EmitFunctionEntryLabel() override;
void EmitGlobalVariable(const GlobalVariable *GV) override;
+++ /dev/null
-//===-- AMDGPUHSATargetObjectFile.cpp - AMDGPU Object Files ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUHSATargetObjectFile.h"
-#include "AMDGPU.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
-
-using namespace llvm;
-
-void AMDGPUHSATargetObjectFile::Initialize(MCContext &Ctx,
- const TargetMachine &TM){
- TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
-
- TextSection = AMDGPU::getHSATextSection(Ctx);
-
- DataGlobalAgentSection = AMDGPU::getHSADataGlobalAgentSection(Ctx);
- DataGlobalProgramSection = AMDGPU::getHSADataGlobalProgramSection(Ctx);
-
- RodataReadonlyAgentSection = AMDGPU::getHSARodataReadonlyAgentSection(Ctx);
-}
-
-bool AMDGPUHSATargetObjectFile::isAgentAllocationSection(
- const char *SectionName) const {
- return cast<MCSectionELF>(DataGlobalAgentSection)
- ->getSectionName()
- .equals(SectionName);
-}
-
-bool AMDGPUHSATargetObjectFile::isAgentAllocation(const GlobalValue *GV) const {
- // Read-only segments can only have agent allocation.
- return AMDGPU::isReadOnlySegment(GV) ||
- (AMDGPU::isGlobalSegment(GV) && GV->hasSection() &&
- isAgentAllocationSection(GV->getSection()));
-}
-
-bool AMDGPUHSATargetObjectFile::isProgramAllocation(
- const GlobalValue *GV) const {
- // The default for global segments is program allocation.
- return AMDGPU::isGlobalSegment(GV) && !isAgentAllocation(GV);
-}
-
-MCSection *AMDGPUHSATargetObjectFile::SelectSectionForGlobal(
- const GlobalValue *GV, SectionKind Kind,
- Mangler &Mang,
- const TargetMachine &TM) const {
- if (Kind.isText() && !GV->hasComdat())
- return getTextSection();
-
- if (AMDGPU::isGlobalSegment(GV)) {
- if (isAgentAllocation(GV))
- return DataGlobalAgentSection;
-
- if (isProgramAllocation(GV))
- return DataGlobalProgramSection;
- }
-
- if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GV))
- return RodataReadonlyAgentSection;
-
- return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM);
-}
+++ /dev/null
-//===-- AMDGPUHSATargetObjectFile.h - AMDGPU HSA Object Info ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief This file declares the AMDGPU-specific subclass of
-/// TargetLoweringObjectFile use for targeting the HSA-runtime.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUHSATARGETOBJECTFILE_H
-#define LLVM_LIB_TARGET_AMDGPU_AMDGPUHSATARGETOBJECTFILE_H
-
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-
-class AMDGPUHSATargetObjectFile final : public TargetLoweringObjectFileELF {
-private:
- MCSection *DataGlobalAgentSection;
- MCSection *DataGlobalProgramSection;
- MCSection *RodataReadonlyAgentSection;
-
- bool isAgentAllocationSection(const char *SectionName) const;
- bool isAgentAllocation(const GlobalValue *GV) const;
- bool isProgramAllocation(const GlobalValue *GV) const;
-
-public:
- void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
-
- MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler &Mang,
- const TargetMachine &TM) const override;
-};
-
-} // end namespace llvm
-
-#endif
MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx));
break;
}
- case MachineOperand::MO_TargetIndex: {
- assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START);
- MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
- const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
- MCOp = MCOperand::createExpr(Expr);
- break;
- }
case MachineOperand::MO_ExternalSymbol: {
MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName()));
const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetMachine.h"
-#include "AMDGPUHSATargetObjectFile.h"
+#include "AMDGPUTargetObjectFile.h"
#include "AMDGPU.h"
#include "AMDGPUTargetTransformInfo.h"
#include "R600ISelLowering.h"
if (TT.getOS() == Triple::AMDHSA)
return make_unique<AMDGPUHSATargetObjectFile>();
- return make_unique<TargetLoweringObjectFileELF>();
+ return make_unique<AMDGPUTargetObjectFile>();
}
static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
--- /dev/null
+//===-- AMDGPUHSATargetObjectFile.cpp - AMDGPU Object Files ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetObjectFile.h"
+#include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Object File
+//===----------------------------------------------------------------------===//
+
+MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const {
+ if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GV))
+ return TextSection;
+
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
+
+//===----------------------------------------------------------------------===//
+// HSA Object File
+//===----------------------------------------------------------------------===//
+
+
+void AMDGPUHSATargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM){
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+
+ TextSection = AMDGPU::getHSATextSection(Ctx);
+
+ DataGlobalAgentSection = AMDGPU::getHSADataGlobalAgentSection(Ctx);
+ DataGlobalProgramSection = AMDGPU::getHSADataGlobalProgramSection(Ctx);
+
+ RodataReadonlyAgentSection = AMDGPU::getHSARodataReadonlyAgentSection(Ctx);
+}
+
+bool AMDGPUHSATargetObjectFile::isAgentAllocationSection(
+ const char *SectionName) const {
+ return cast<MCSectionELF>(DataGlobalAgentSection)
+ ->getSectionName()
+ .equals(SectionName);
+}
+
+bool AMDGPUHSATargetObjectFile::isAgentAllocation(const GlobalValue *GV) const {
+ // Read-only segments can only have agent allocation.
+ return AMDGPU::isReadOnlySegment(GV) ||
+ (AMDGPU::isGlobalSegment(GV) && GV->hasSection() &&
+ isAgentAllocationSection(GV->getSection()));
+}
+
+bool AMDGPUHSATargetObjectFile::isProgramAllocation(
+ const GlobalValue *GV) const {
+ // The default for global segments is program allocation.
+ return AMDGPU::isGlobalSegment(GV) && !isAgentAllocation(GV);
+}
+
+MCSection *AMDGPUHSATargetObjectFile::SelectSectionForGlobal(
+ const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const {
+ if (Kind.isText() && !GV->hasComdat())
+ return getTextSection();
+
+ if (AMDGPU::isGlobalSegment(GV)) {
+ if (isAgentAllocation(GV))
+ return DataGlobalAgentSection;
+
+ if (isProgramAllocation(GV))
+ return DataGlobalProgramSection;
+ }
+
+ if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GV))
+ return RodataReadonlyAgentSection;
+
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
--- /dev/null
+//===-- AMDGPUTargetObjectFile.h - AMDGPU Object Info ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares the AMDGPU-specific subclass of
+/// TargetLoweringObjectFile.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AMDGPUTargetObjectFile : public TargetLoweringObjectFileELF {
+ public:
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
+};
+
+class AMDGPUHSATargetObjectFile final : public AMDGPUTargetObjectFile {
+private:
+ MCSection *DataGlobalAgentSection;
+ MCSection *DataGlobalProgramSection;
+ MCSection *RodataReadonlyAgentSection;
+
+ bool isAgentAllocationSection(const char *SectionName) const;
+ bool isAgentAllocation(const GlobalValue *GV) const;
+ bool isProgramAllocation(const GlobalValue *GV) const;
+
+public:
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
+};
+
+} // end namespace llvm
+
+#endif
AMDGPUAsmPrinter.cpp
AMDGPUDiagnosticInfoUnsupported.cpp
AMDGPUFrameLowering.cpp
- AMDGPUHSATargetObjectFile.cpp
+ AMDGPUTargetObjectFile.cpp
AMDGPUIntrinsicInfo.cpp
AMDGPUISelDAGToDAG.cpp
AMDGPUMCInstLower.cpp
case AMDGPU::fixup_si_rodata: {
uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
- *Dst = Value;
- break;
- }
-
- case AMDGPU::fixup_si_end_of_text: {
- uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
- // The value points to the last instruction in the text section, so we
- // need to add 4 bytes to get to the start of the constants.
+ // We emit constant data at the end of the text section and generate its
+ // address using the following code sequence:
+ // s_getpc_b64 s[0:1]
+ // s_add_u32 s0, s0, $symbol
+ // s_addc_u32 s1, s1, 0
+ //
+ // s_getpc_b64 returns the address of the s_add_u32 instruction and then
+ // the fixup replaces $symbol with a literal constant, which is a
+ // pc-relative offset from the encoding of the $symbol operand to the
+ // constant data.
+ //
+ // What we want here is an offset from the start of the s_add_u32
+ // instruction to the constant data, but since the encoding of $symbol
+ // starts 4 bytes after the start of the add instruction, we end up
+ // with an offset that is 4 bytes too small. This requires us to
+ // add 4 to the fixup value before applying it.
*Dst = Value + 4;
break;
}
const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_si_rodata", 0, 32, 0 },
- { "fixup_si_end_of_text", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
+ { "fixup_si_rodata", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
};
if (Kind < FirstTargetFixupKind)
/// fixup for global addresses with constant initializers
fixup_si_rodata,
- /// fixup for offset from instruction to end of text section
- fixup_si_end_of_text,
-
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
if (MO.isExpr()) {
const MCSymbolRefExpr *Expr = cast<MCSymbolRefExpr>(MO.getExpr());
- MCFixupKind Kind;
- const MCSymbol *Sym =
- Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
-
- if (&Expr->getSymbol() == Sym) {
- // Add the offset to the beginning of the constant values.
- Kind = (MCFixupKind)AMDGPU::fixup_si_end_of_text;
- } else {
- // This is used for constant data stored in .rodata.
- Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
- }
+ MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc()));
}
const GlobalValue *GV = GSD->getGlobal();
MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace());
- SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT);
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
-
- SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
- DAG.getConstant(1, DL, MVT::i32));
-
- SDValue Lo = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i32, MVT::Glue),
- PtrLo, GA);
- SDValue Hi = DAG.getNode(ISD::ADDE, DL, DAG.getVTList(MVT::i32, MVT::Glue),
- PtrHi, DAG.getConstant(0, DL, MVT::i32),
- SDValue(Lo.getNode(), 1));
- return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+ return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT, GA);
}
SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
switch (MI->getOpcode()) {
default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
- case AMDGPU::SI_CONSTDATA_PTR: {
- unsigned Reg = MI->getOperand(0).getReg();
- unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
- unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
-
- BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg);
-
- // Add 32-bit offset from this instruction to the start of the constant data.
- BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo)
- .addReg(RegLo)
- .addTargetIndex(AMDGPU::TI_CONSTDATA_START)
- .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit);
- BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi)
- .addReg(RegHi)
- .addImm(0)
- .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit)
- .addReg(AMDGPU::SCC, RegState::Implicit);
- MI->eraseFromParent();
- break;
- }
case AMDGPU::SGPR_USE:
// This is just a placeholder for register allocation.
MI->eraseFromParent();
MI->eraseFromParent();
break;
}
+
+ case AMDGPU::SI_CONSTDATA_PTR: {
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
+ MachineFunction &MF = *MBB.getParent();
+ unsigned Reg = MI->getOperand(0).getReg();
+ unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0);
+ unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1);
+
+ // Create a bundle so these instructions won't be re-ordered by the
+ // post-RA scheduler.
+ MIBundleBuilder Bundler(MBB, MI);
+ Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
+
+ // Add 32-bit offset from this instruction to the start of the
+ // constant data.
+ Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
+ .addReg(RegLo)
+ .addOperand(MI->getOperand(1)));
+ Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
+ .addReg(RegHi)
+ .addImm(0));
+
+ llvm::finalizeBundle(MBB, Bundler.begin());
+
+ MI->eraseFromParent();
+ break;
+ }
}
return true;
}
def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
def SIconstdata_ptr : SDNode<
- "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]>
+ "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, i64>,
+ SDTCisVT<0, i64>]>
>;
//===----------------------------------------------------------------------===//
let ParserMatchClass = SoppBrTarget;
}
+def const_ga : Operand<iPTR>;
+
include "SIInstrFormats.td"
include "VIInstrFormats.td"
def SI_CONSTDATA_PTR : InstSI <
(outs SReg_64:$dst),
- (ins),
- "", [(set SReg_64:$dst, (i64 SIconstdata_ptr))]
+ (ins const_ga:$ptr),
+ "", [(set SReg_64:$dst, (i64 (SIconstdata_ptr (tglobaladdr:$ptr))))]
> {
let SALU = 1;
}
--- /dev/null
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+@readonly = private unnamed_addr addrspace(2) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0]
+@readonly2 = private unnamed_addr addrspace(2) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0]
+
+; GCN-LABEL: {{^}}main:
+; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], readonly
+; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], 0
+; GCN: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], readonly
+; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], 0
+; GCN: .text
+; GCN: readonly:
+; GCN: readonly2:
+define void @main(i32 %index, float addrspace(1)* %out) {
+ %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @readonly, i32 0, i32 %index
+ %val = load float, float addrspace(2)* %ptr
+ store float %val, float addrspace(1)* %out
+ %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @readonly2, i32 0, i32 %index
+ %val2 = load float, float addrspace(2)* %ptr2
+ store float %val2, float addrspace(1)* %out
+ ret void
+}
+