ISA version, *vendor*, and *arch* will all be stored in a single entry of the
.note section.
+
+.amd_kernel_code_t
+^^^^^^^^^^^^^^^^^^
+
+This directive marks the beginning of a list of key / value pairs that are used
+to specify the amd_kernel_code_t object that will be emitted by the assembler.
+The list must be terminated by the *.end_amd_kernel_code_t* directive. For
+any amd_kernel_code_t values that are unspecified a default value will be
+used. The default value for all keys is 0, with the following exceptions:
+
+- *kernel_code_version_major* defaults to 1.
+- *machine_kind* defaults to 1.
+- *machine_version_major*, *machine_version_minor*, and
+ *machine_version_stepping* are derived from the value of the -mcpu option
+ that is passed to the assembler.
+- *kernel_code_entry_byte_offset* defaults to 256.
+- *wavefront_size* defaults to 6.
+- *kernarg_segment_alignment*, *group_segment_alignment*, and
+ *private_segment_alignment* default to 4. Note that alignments are specified
+ as a power of two, so a value of **n** means an alignment of 2^ **n**.
+
+The *.amd_kernel_code_t* directive must be placed immediately after the
+function label and before any instructions.
+
+For a full list of amd_kernel_code_t keys, see the examples in
+test/CodeGen/AMDGPU/hsa.s. For an explanation of the meanings of the different
+keys, see the comments in lib/Target/AMDGPU/AmdKernelCodeT.h
+
+Here is an example of a minimal amd_kernel_code_t specification:
+
+.. code-block:: nasm
+
+ .hsa_code_object_version 1,0
+ .hsa_code_object_isa
+
+ .text
+
+ hello_world:
+
+ .amd_kernel_code_t
+ enable_sgpr_kernarg_segment_ptr = 1
+ is_ptr64 = 1
+ compute_pgm_rsrc1_vgprs = 0
+ compute_pgm_rsrc1_sgprs = 0
+ compute_pgm_rsrc2_user_sgpr = 2
+ kernarg_segment_byte_size = 8
+ wavefront_sgpr_count = 2
+ workitem_vgpr_count = 3
+ .end_amd_kernel_code_t
+
+ s_load_dwordx2 s[0:1], s[0:1] 0x0
+ v_mov_b32 v0, 3.14159
+ s_waitcnt lgkmcnt(0)
+ v_mov_b32 v1, s0
+ v_mov_b32 v2, s1
+ flat_store_dword v0, v[1:2]
+ s_endpgm
}
void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
- const SIProgramInfo &KernelInfo) const {
+ const SIProgramInfo &KernelInfo) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
amd_kernel_code_t header;
- memset(&header, 0, sizeof(header));
-
- header.amd_code_version_major = AMD_CODE_VERSION_MAJOR;
- header.amd_code_version_minor = AMD_CODE_VERSION_MINOR;
-
- header.struct_byte_size = sizeof(amd_kernel_code_t);
-
- header.target_chip = STM.getAmdKernelCodeChipID();
-
- header.kernel_code_entry_byte_offset = (1ULL << MF.getAlignment());
+ AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
header.compute_pgm_resource_registers =
KernelInfo.ComputePGMRSrc1 |
(KernelInfo.ComputePGMRSrc2 << 32);
+ header.code_properties =
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
+ AMD_CODE_PROPERTY_IS_PTR64;
- // Code Properties:
- header.code_properties = AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
- AMD_CODE_PROPERTY_IS_PTR64;
-
- if (KernelInfo.FlatUsed)
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
-
- if (KernelInfo.ScratchBlocks)
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
-
- header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
- header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
-
- // MFI->ABIArgOffset is the number of bytes for the kernel arguments
- // plus 36. 36 is the number of bytes reserved at the begining of the
- // input buffer to store work-group size information.
- // FIXME: We should be adding the size of the implicit arguments
- // to this value.
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
-
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
- // FIXME: What values do I put for these alignments
- header.kernarg_segment_alignment = 0;
- header.group_segment_alignment = 0;
- header.private_segment_alignment = 0;
-
- header.code_type = 1; // HSA_EXT_CODE_KERNEL
-
- header.wavefront_size = STM.getWavefrontSize();
-
- MCSectionELF *VersionSection =
- OutContext.getELFSection(".hsa.version", ELF::SHT_PROGBITS, 0);
- OutStreamer->SwitchSection(VersionSection);
- OutStreamer->EmitBytes(Twine("HSA Code Unit:" +
- Twine(header.hsail_version_major) + "." +
- Twine(header.hsail_version_minor) + ":" +
- "AMD:" +
- Twine(header.amd_code_version_major) + "." +
- Twine(header.amd_code_version_minor) + ":" +
- "GFX8.1:0").str());
-
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
-
- if (isVerbose()) {
- OutStreamer->emitRawComment("amd_code_version_major = " +
- Twine(header.amd_code_version_major), false);
- OutStreamer->emitRawComment("amd_code_version_minor = " +
- Twine(header.amd_code_version_minor), false);
- OutStreamer->emitRawComment("struct_byte_size = " +
- Twine(header.struct_byte_size), false);
- OutStreamer->emitRawComment("target_chip = " +
- Twine(header.target_chip), false);
- OutStreamer->emitRawComment(" compute_pgm_rsrc1: " +
- Twine::utohexstr(KernelInfo.ComputePGMRSrc1),
- false);
- OutStreamer->emitRawComment(" compute_pgm_rsrc2: " +
- Twine::utohexstr(KernelInfo.ComputePGMRSrc2),
- false);
- OutStreamer->emitRawComment("enable_sgpr_private_segment_buffer = " +
- Twine((bool)(header.code_properties &
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)), false);
- OutStreamer->emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
- Twine((bool)(header.code_properties &
- AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR)), false);
- OutStreamer->emitRawComment("private_element_size = 2 ", false);
- OutStreamer->emitRawComment("is_ptr64 = " +
- Twine((bool)(header.code_properties & AMD_CODE_PROPERTY_IS_PTR64)), false);
- OutStreamer->emitRawComment("workitem_private_segment_byte_size = " +
- Twine(header.workitem_private_segment_byte_size),
- false);
- OutStreamer->emitRawComment("workgroup_group_segment_byte_size = " +
- Twine(header.workgroup_group_segment_byte_size),
- false);
- OutStreamer->emitRawComment("gds_segment_byte_size = " +
- Twine(header.gds_segment_byte_size), false);
- OutStreamer->emitRawComment("kernarg_segment_byte_size = " +
- Twine(header.kernarg_segment_byte_size), false);
- OutStreamer->emitRawComment("wavefront_sgpr_count = " +
- Twine(header.wavefront_sgpr_count), false);
- OutStreamer->emitRawComment("workitem_vgpr_count = " +
- Twine(header.workitem_vgpr_count), false);
- OutStreamer->emitRawComment("code_type = " + Twine(header.code_type), false);
- OutStreamer->emitRawComment("wavefront_size = " +
- Twine((int)header.wavefront_size), false);
- OutStreamer->emitRawComment("optimization_level = " +
- Twine(header.optimization_level), false);
- OutStreamer->emitRawComment("hsail_profile = " +
- Twine(header.hsail_profile), false);
- OutStreamer->emitRawComment("hsail_machine_model = " +
- Twine(header.hsail_machine_model), false);
- OutStreamer->emitRawComment("hsail_version_major = " +
- Twine(header.hsail_version_major), false);
- OutStreamer->emitRawComment("hsail_version_minor = " +
- Twine(header.hsail_version_minor), false);
- }
- OutStreamer->EmitBytes(StringRef((char*)&header, sizeof(header)));
+ AMDGPUTargetStreamer *TS =
+ static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+ TS->EmitAMDKernelCodeT(header);
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
#ifndef AMDKERNELCODET_H
#define AMDKERNELCODET_H
+#include "llvm/MC/SubtargetFeature.h"
+
#include <cstddef>
#include <cstdint>
+#include "llvm/Support/Debug.h"
//---------------------------------------------------------------------------//
// AMD Kernel Code, and its dependencies //
//---------------------------------------------------------------------------//
/// the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
/// is generally DWORD.
///
- /// Use values from the amd_element_byte_size_t enum.
+ /// uSE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM.
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 11,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
/// Indicate if code generated has support for debugging.
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 15,
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
- AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT
+ AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT,
+
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 15,
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1,
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT
};
/// @brief The hsa_ext_control_directives_t specifies the values for the HSAIL
/// Scratch Wave Offset must be added by the kernel code and moved to
/// SGPRn-4 for use as the FLAT SCRATCH BASE in flat memory instructions.
///
-/// The second SGPR is 32 bit byte size of a single work-item\92s scratch
+/// The second SGPR is 32 bit byte size of a single work-item's scratch
/// memory usage. This is directly loaded from the dispatch packet Private
/// Segment Byte Size and rounded up to a multiple of DWORD.
///
///
/// Private Segment Size (enable_sgpr_private_segment_size):
/// Number of User SGPR registers: 1. The 32 bit byte size of a single
-/// work-item\92s scratch memory allocation. This is the value from the dispatch
+/// work-item's scratch memory allocation. This is the value from the dispatch
/// packet. Private Segment Byte Size rounded up by CP to a multiple of DWORD.
///
/// \todo [Does CP need to round this to >4 byte alignment?]
/// present
///
/// Work-Group Info (enable_sgpr_workgroup_info):
-/// Number of System SGPR registers: 1. {first_wave, 14\92b0000,
+/// Number of System SGPR registers: 1. {first_wave, 14'b0000,
/// ordered_append_term[10:0], threadgroup_size_in_waves[5:0]}
///
/// Private Segment Wave Byte Offset
/// Alternatively scalar loads can be used if the kernarg offset is uniform, as
/// the kernarg segment is constant for the duration of the kernel execution.
///
-typedef struct amd_kernel_code_s {
- /// The AMD major version of the Code Object. Must be the value
- /// AMD_CODE_VERSION_MAJOR.
- amd_code_version32_t amd_code_version_major;
- /// The AMD minor version of the Code Object. Minor versions must be
- /// backward compatible. Must be the value
- /// AMD_CODE_VERSION_MINOR.
- amd_code_version32_t amd_code_version_minor;
-
- /// The byte size of this struct. Must be set to
- /// sizeof(amd_kernel_code_t). Used for backward
- /// compatibility.
- uint32_t struct_byte_size;
-
- /// The target chip instruction set for which code has been
- /// generated. Values are from the E_SC_INSTRUCTION_SET enumeration
- /// in sc/Interface/SCCommon.h.
- uint32_t target_chip;
+typedef struct amd_kernel_code_s {
+ uint32_t amd_kernel_code_version_major;
+ uint32_t amd_kernel_code_version_minor;
+ uint16_t amd_machine_kind;
+ uint16_t amd_machine_version_major;
+ uint16_t amd_machine_version_minor;
+ uint16_t amd_machine_version_stepping;
/// Byte offset (possibly negative) from start of amd_kernel_code_t
/// object to kernel's entry point instruction. The actual code for
/// and size. The offset is from the start (possibly negative) of
/// amd_kernel_code_t object. Set both to 0 if no prefetch
/// information is available.
- ///
- /// \todo ttye 11/15/2013 Is the prefetch definition we want? Did
- /// not make the size a uint64_t as prefetching more than 4GiB seems
- /// excessive.
int64_t kernel_code_prefetch_byte_offset;
uint64_t kernel_code_prefetch_byte_size;
/// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
/// COMPUTE_PGM_RSRC2 registers.
- amd_compute_pgm_resource_register64_t compute_pgm_resource_registers;
+ uint64_t compute_pgm_resource_registers;
/// Code properties. See amd_code_property_mask_t for a full list of
/// properties.
- amd_code_property32_t code_properties;
+ uint32_t code_properties;
/// The amount of memory required for the combined private, spill
/// and arg segments for a work-item in bytes. If
/// The maximum byte alignment of variables used by the kernel in
/// the specified memory segment. Expressed as a power of two. Must
/// be at least HSA_POWERTWO_16.
- hsa_powertwo8_t kernarg_segment_alignment;
- hsa_powertwo8_t group_segment_alignment;
- hsa_powertwo8_t private_segment_alignment;
-
- uint8_t reserved3;
-
- /// Type of code object.
- hsa_ext_code_kind32_t code_type;
-
- /// Reserved for code properties if any are defined in the future.
- /// There are currently no code properties so this field must be 0.
- uint32_t reserved4;
+ uint8_t kernarg_segment_alignment;
+ uint8_t group_segment_alignment;
+ uint8_t private_segment_alignment;
/// Wavefront size expressed as a power of two. Must be a power of 2
/// in range 1..64 inclusive. Used to support runtime query that
/// obtains wavefront size, which may be used by application to
/// allocated dynamic group memory and set the dispatch work-group
/// size.
- hsa_powertwo8_t wavefront_size;
-
- /// The optimization level specified when the kernel was
- /// finalized.
- uint8_t optimization_level;
-
- /// The HSAIL profile defines which features are used. This
- /// information is from the HSAIL version directive. If this
- /// amd_kernel_code_t is not generated from an HSAIL compilation
- /// unit then must be 0.
- hsa_ext_brig_profile8_t hsail_profile;
-
- /// The HSAIL machine model gives the address sizes used by the
- /// code. This information is from the HSAIL version directive. If
- /// not generated from an HSAIL compilation unit then must still
- /// indicate for what machine mode the code is generated.
- hsa_ext_brig_machine_model8_t hsail_machine_model;
-
- /// The HSAIL major version. This information is from the HSAIL
- /// version directive. If this amd_kernel_code_t is not
- /// generated from an HSAIL compilation unit then must be 0.
- uint32_t hsail_version_major;
-
- /// The HSAIL minor version. This information is from the HSAIL
- /// version directive. If this amd_kernel_code_t is not
- /// generated from an HSAIL compilation unit then must be 0.
- uint32_t hsail_version_minor;
-
- /// Reserved for HSAIL target options if any are defined in the
- /// future. There are currently no target options so this field
- /// must be 0.
- uint16_t reserved5;
-
- /// Reserved. Must be 0.
- uint16_t reserved6;
-
- /// The values should be the actually values used by the finalizer
- /// in generating the code. This may be the union of values
- /// specified as finalizer arguments and explicit HSAIL control
- /// directives. If the finalizer chooses to ignore a control
- /// directive, and not generate constrained code, then the control
- /// directive should not be marked as enabled even though it was
- /// present in the HSAIL or finalizer argument. The values are
- /// intended to reflect the constraints that the code actually
- /// requires to correctly execute, not the values that were
- /// actually specified at finalize time.
- hsa_ext_control_directives_t control_directive;
-
- /// The code can immediately follow the amd_kernel_code_t, or can
- /// come after subsequent amd_kernel_code_t structs when there are
- /// multiple kernels in the compilation unit.
+ uint8_t wavefront_size;
+ int32_t call_convention;
+ uint8_t reserved3[12];
+ uint64_t runtime_loader_kernel_symbol;
+ uint64_t control_directives[16];
} amd_kernel_code_t;
#endif // AMDKERNELCODET_H
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "AMDKernelCodeT.h"
#include "SIDefines.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
bool ParseDirectiveHSACodeObjectVersion();
bool ParseDirectiveHSACodeObjectISA();
+ bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
+ bool ParseDirectiveAMDKernelCodeT();
public:
AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
return false;
}
+bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
+ amd_kernel_code_t &Header) {
+
+ if (getLexer().isNot(AsmToken::Equal))
+ return TokError("expected '='");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError("amd_kernel_code_t values must be integers");
+
+ uint64_t Value = getLexer().getTok().getIntVal();
+ Lex();
+
+ if (ID == "kernel_code_version_major")
+ Header.amd_kernel_code_version_major = Value;
+ else if (ID == "kernel_code_version_minor")
+ Header.amd_kernel_code_version_minor = Value;
+ else if (ID == "machine_kind")
+ Header.amd_machine_kind = Value;
+ else if (ID == "machine_version_major")
+ Header.amd_machine_version_major = Value;
+ else if (ID == "machine_version_minor")
+ Header.amd_machine_version_minor = Value;
+ else if (ID == "machine_version_stepping")
+ Header.amd_machine_version_stepping = Value;
+ else if (ID == "kernel_code_entry_byte_offset")
+ Header.kernel_code_entry_byte_offset = Value;
+ else if (ID == "kernel_code_prefetch_byte_size")
+ Header.kernel_code_prefetch_byte_size = Value;
+ else if (ID == "max_scratch_backing_memory_byte_size")
+ Header.max_scratch_backing_memory_byte_size = Value;
+ else if (ID == "compute_pgm_rsrc1_vgprs")
+ Header.compute_pgm_resource_registers |= S_00B848_VGPRS(Value);
+ else if (ID == "compute_pgm_rsrc1_sgprs")
+ Header.compute_pgm_resource_registers |= S_00B848_SGPRS(Value);
+ else if (ID == "compute_pgm_rsrc1_priority")
+ Header.compute_pgm_resource_registers |= S_00B848_PRIORITY(Value);
+ else if (ID == "compute_pgm_rsrc1_float_mode")
+ Header.compute_pgm_resource_registers |= S_00B848_FLOAT_MODE(Value);
+ else if (ID == "compute_pgm_rsrc1_priv")
+ Header.compute_pgm_resource_registers |= S_00B848_PRIV(Value);
+ else if (ID == "compute_pgm_rsrc1_dx10_clamp")
+ Header.compute_pgm_resource_registers |= S_00B848_DX10_CLAMP(Value);
+ else if (ID == "compute_pgm_rsrc1_debug_mode")
+ Header.compute_pgm_resource_registers |= S_00B848_DEBUG_MODE(Value);
+ else if (ID == "compute_pgm_rsrc1_ieee_mode")
+ Header.compute_pgm_resource_registers |= S_00B848_IEEE_MODE(Value);
+ else if (ID == "compute_pgm_rsrc2_scratch_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_SCRATCH_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_user_sgpr")
+ Header.compute_pgm_resource_registers |= (S_00B84C_USER_SGPR(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tgid_x_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_TGID_X_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tgid_y_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Y_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tgid_z_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Z_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tg_size_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_TG_SIZE_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tidig_comp_cnt")
+ Header.compute_pgm_resource_registers |=
+ (S_00B84C_TIDIG_COMP_CNT(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_excp_en_msb")
+ Header.compute_pgm_resource_registers |=
+ (S_00B84C_EXCP_EN_MSB(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_lds_size")
+ Header.compute_pgm_resource_registers |= (S_00B84C_LDS_SIZE(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_excp_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_EXCP_EN(Value) << 32);
+ else if (ID == "compute_pgm_resource_registers")
+ Header.compute_pgm_resource_registers = Value;
+ else if (ID == "enable_sgpr_private_segment_buffer")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT);
+ else if (ID == "enable_sgpr_dispatch_ptr")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT);
+ else if (ID == "enable_sgpr_queue_ptr")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT);
+ else if (ID == "enable_sgpr_kernarg_segment_ptr")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT);
+ else if (ID == "enable_sgpr_dispatch_id")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT);
+ else if (ID == "enable_sgpr_flat_scratch_init")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT);
+ else if (ID == "enable_sgpr_private_segment_size")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT);
+ else if (ID == "enable_sgpr_grid_workgroup_count_x")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT);
+ else if (ID == "enable_sgpr_grid_workgroup_count_y")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT);
+ else if (ID == "enable_sgpr_grid_workgroup_count_z")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT);
+ else if (ID == "enable_ordered_append_gds")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT);
+ else if (ID == "private_element_size")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT);
+ else if (ID == "is_ptr64")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_IS_PTR64_SHIFT);
+ else if (ID == "is_dynamic_callstack")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT);
+ else if (ID == "is_debug_enabled")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT);
+ else if (ID == "is_xnack_enabled")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT);
+ else if (ID == "workitem_private_segment_byte_size")
+ Header.workitem_private_segment_byte_size = Value;
+ else if (ID == "workgroup_group_segment_byte_size")
+ Header.workgroup_group_segment_byte_size = Value;
+ else if (ID == "gds_segment_byte_size")
+ Header.gds_segment_byte_size = Value;
+ else if (ID == "kernarg_segment_byte_size")
+ Header.kernarg_segment_byte_size = Value;
+ else if (ID == "workgroup_fbarrier_count")
+ Header.workgroup_fbarrier_count = Value;
+ else if (ID == "wavefront_sgpr_count")
+ Header.wavefront_sgpr_count = Value;
+ else if (ID == "workitem_vgpr_count")
+ Header.workitem_vgpr_count = Value;
+ else if (ID == "reserved_vgpr_first")
+ Header.reserved_vgpr_first = Value;
+ else if (ID == "reserved_vgpr_count")
+ Header.reserved_vgpr_count = Value;
+ else if (ID == "reserved_sgpr_first")
+ Header.reserved_sgpr_first = Value;
+ else if (ID == "reserved_sgpr_count")
+ Header.reserved_sgpr_count = Value;
+ else if (ID == "debug_wavefront_private_segment_offset_sgpr")
+ Header.debug_wavefront_private_segment_offset_sgpr = Value;
+ else if (ID == "debug_private_segment_buffer_sgpr")
+ Header.debug_private_segment_buffer_sgpr = Value;
+ else if (ID == "kernarg_segment_alignment")
+ Header.kernarg_segment_alignment = Value;
+ else if (ID == "group_segment_alignment")
+ Header.group_segment_alignment = Value;
+ else if (ID == "private_segment_alignment")
+ Header.private_segment_alignment = Value;
+ else if (ID == "wavefront_size")
+ Header.wavefront_size = Value;
+ else if (ID == "call_convention")
+ Header.call_convention = Value;
+ else if (ID == "runtime_loader_kernel_symbol")
+ Header.runtime_loader_kernel_symbol = Value;
+ else
+ return TokError("amd_kernel_code_t value not recognized.");
+
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
+
+ amd_kernel_code_t Header;
+ AMDGPU::initDefaultAMDKernelCodeT(Header, STI.getFeatureBits());
+
+ while (true) {
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("amd_kernel_code_t values must begin on a new line");
+
+ // Lex EndOfStatement. This is in a while loop, because lexing a comment
+ // will set the current token to EndOfStatement.
+ while(getLexer().is(AsmToken::EndOfStatement))
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Identifier))
+ return TokError("expected value identifier or .end_amd_kernel_code_t");
+
+ StringRef ID = getLexer().getTok().getIdentifier();
+ Lex();
+
+ if (ID == ".end_amd_kernel_code_t")
+ break;
+
+ if (ParseAMDKernelCodeTValue(ID, Header))
+ return true;
+ }
+
+ getTargetStreamer().EmitAMDKernelCodeT(Header);
+
+ return false;
+}
+
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
if (IDVal == ".hsa_code_object_isa")
return ParseDirectiveHSACodeObjectISA();
+ if (IDVal == ".amd_kernel_code_t")
+ return ParseDirectiveAMDKernelCodeT();
+
return true;
}
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetStreamer.h"
+#include "SIDefines.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/FormattedStream.h"
}
+void
+AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
+ uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32);
+ bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
+ bool EnableSGPRDispatchPtr = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
+ bool EnableSGPRQueuePtr = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
+ bool EnableSGPRKernargSegmentPtr = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
+ bool EnableSGPRDispatchID = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+ bool EnableSGPRFlatScratchInit = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ bool EnableSGPRPrivateSegmentSize = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+ bool EnableSGPRGridWorkgroupCountX = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X);
+ bool EnableSGPRGridWorkgroupCountY = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y);
+ bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z);
+ bool EnableOrderedAppendGDS = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS);
+ uint32_t PrivateElementSize = (Header.code_properties &
+ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >>
+ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT;
+ bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64);
+ bool IsDynamicCallstack = (Header.code_properties &
+ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK);
+ bool IsDebugEnabled = (Header.code_properties &
+ AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED);
+ bool IsXNackEnabled = (Header.code_properties &
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED);
+
+ OS << "\t.amd_kernel_code_t\n" <<
+ "\t\tkernel_code_version_major = " <<
+ Header.amd_kernel_code_version_major << '\n' <<
+ "\t\tkernel_code_version_minor = " <<
+ Header.amd_kernel_code_version_minor << '\n' <<
+ "\t\tmachine_kind = " <<
+ Header.amd_machine_kind << '\n' <<
+ "\t\tmachine_version_major = " <<
+ Header.amd_machine_version_major << '\n' <<
+ "\t\tmachine_version_minor = " <<
+ Header.amd_machine_version_minor << '\n' <<
+ "\t\tmachine_version_stepping = " <<
+ Header.amd_machine_version_stepping << '\n' <<
+ "\t\tkernel_code_entry_byte_offset = " <<
+ Header.kernel_code_entry_byte_offset << '\n' <<
+ "\t\tkernel_code_prefetch_byte_size = " <<
+ Header.kernel_code_prefetch_byte_size << '\n' <<
+ "\t\tmax_scratch_backing_memory_byte_size = " <<
+ Header.max_scratch_backing_memory_byte_size << '\n' <<
+ "\t\tcompute_pgm_rsrc1_vgprs = " <<
+ G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_sgprs = " <<
+ G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_priority = " <<
+ G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_float_mode = " <<
+ G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_priv = " <<
+ G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
+ G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_debug_mode = " <<
+ G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_ieee_mode = " <<
+ G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_scratch_en = " <<
+ G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_user_sgpr = " <<
+ G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
+ G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
+ G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
+ G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tg_size_en = " <<
+ G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
+ G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
+ G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_lds_size = " <<
+ G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_excp_en = " <<
+ G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' <<
+
+ "\t\tenable_sgpr_private_segment_buffer = " <<
+ EnableSGPRPrivateSegmentBuffer << '\n' <<
+ "\t\tenable_sgpr_dispatch_ptr = " <<
+ EnableSGPRDispatchPtr << '\n' <<
+ "\t\tenable_sgpr_queue_ptr = " <<
+ EnableSGPRQueuePtr << '\n' <<
+ "\t\tenable_sgpr_kernarg_segment_ptr = " <<
+ EnableSGPRKernargSegmentPtr << '\n' <<
+ "\t\tenable_sgpr_dispatch_id = " <<
+ EnableSGPRDispatchID << '\n' <<
+ "\t\tenable_sgpr_flat_scratch_init = " <<
+ EnableSGPRFlatScratchInit << '\n' <<
+ "\t\tenable_sgpr_private_segment_size = " <<
+ EnableSGPRPrivateSegmentSize << '\n' <<
+ "\t\tenable_sgpr_grid_workgroup_count_x = " <<
+ EnableSGPRGridWorkgroupCountX << '\n' <<
+ "\t\tenable_sgpr_grid_workgroup_count_y = " <<
+ EnableSGPRGridWorkgroupCountY << '\n' <<
+ "\t\tenable_sgpr_grid_workgroup_count_z = " <<
+ EnableSGPRGridWorkgroupCountZ << '\n' <<
+ "\t\tenable_ordered_append_gds = " <<
+ EnableOrderedAppendGDS << '\n' <<
+ "\t\tprivate_element_size = " <<
+ PrivateElementSize << '\n' <<
+ "\t\tis_ptr64 = " <<
+ IsPtr64 << '\n' <<
+ "\t\tis_dynamic_callstack = " <<
+ IsDynamicCallstack << '\n' <<
+ "\t\tis_debug_enabled = " <<
+ IsDebugEnabled << '\n' <<
+ "\t\tis_xnack_enabled = " <<
+ IsXNackEnabled << '\n' <<
+ "\t\tworkitem_private_segment_byte_size = " <<
+ Header.workitem_private_segment_byte_size << '\n' <<
+ "\t\tworkgroup_group_segment_byte_size = " <<
+ Header.workgroup_group_segment_byte_size << '\n' <<
+ "\t\tgds_segment_byte_size = " <<
+ Header.gds_segment_byte_size << '\n' <<
+ "\t\tkernarg_segment_byte_size = " <<
+ Header.kernarg_segment_byte_size << '\n' <<
+ "\t\tworkgroup_fbarrier_count = " <<
+ Header.workgroup_fbarrier_count << '\n' <<
+ "\t\twavefront_sgpr_count = " <<
+ Header.wavefront_sgpr_count << '\n' <<
+ "\t\tworkitem_vgpr_count = " <<
+ Header.workitem_vgpr_count << '\n' <<
+ "\t\treserved_vgpr_first = " <<
+ Header.reserved_vgpr_first << '\n' <<
+ "\t\treserved_vgpr_count = " <<
+ Header.reserved_vgpr_count << '\n' <<
+ "\t\treserved_sgpr_first = " <<
+ Header.reserved_sgpr_first << '\n' <<
+ "\t\treserved_sgpr_count = " <<
+ Header.reserved_sgpr_count << '\n' <<
+ "\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
+ Header.debug_wavefront_private_segment_offset_sgpr << '\n' <<
+ "\t\tdebug_private_segment_buffer_sgpr = " <<
+ Header.debug_private_segment_buffer_sgpr << '\n' <<
+ "\t\tkernarg_segment_alignment = " <<
+ (uint32_t)Header.kernarg_segment_alignment << '\n' <<
+ "\t\tgroup_segment_alignment = " <<
+ (uint32_t)Header.group_segment_alignment << '\n' <<
+ "\t\tprivate_segment_alignment = " <<
+ (uint32_t)Header.private_segment_alignment << '\n' <<
+ "\t\twavefront_size = " <<
+ (uint32_t)Header.wavefront_size << '\n' <<
+ "\t\tcall_convention = " <<
+ Header.call_convention << '\n' <<
+ "\t\truntime_loader_kernel_symbol = " <<
+ Header.runtime_loader_kernel_symbol << '\n' <<
+ // TODO: control_directives
+ "\t.end_amd_kernel_code_t\n";
+
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUTargetELFStreamer
//===----------------------------------------------------------------------===//
OS.EmitValueToAlignment(4);
OS.PopSection();
}
+
+void
+AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
+
+ MCStreamer &OS = getStreamer();
+ OS.PushSection();
+ OS.SwitchSection(OS.getContext().getObjectFileInfo()->getTextSection());
+ OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
+ OS.PopSection();
+}
//
//===----------------------------------------------------------------------===//
+#include "AMDKernelCodeT.h"
#include "llvm/MC/MCStreamer.h"
-
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
namespace llvm {
uint32_t Stepping,
StringRef VendorName,
StringRef ArchName) = 0;
+
+ virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0;
};
class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
uint32_t Stepping, StringRef VendorName,
StringRef ArchName) override;
+
+ void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
};
class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
uint32_t Stepping, StringRef VendorName,
StringRef ArchName) override;
+
+ void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+
};
}
#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
#define S_00B028_VGPRS(x) (((x) & 0x3F) << 0)
#define S_00B028_SGPRS(x) (((x) & 0x0F) << 6)
+
#define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C
#define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0)
+#define G_00B84C_SCRATCH_EN(x) (((x) >> 0) & 0x1)
+#define C_00B84C_SCRATCH_EN 0xFFFFFFFE
#define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
+#define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F)
+#define C_00B84C_USER_SGPR 0xFFFFFFC1
#define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
+#define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1)
+#define C_00B84C_TGID_X_EN 0xFFFFFF7F
#define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8)
+#define G_00B84C_TGID_Y_EN(x) (((x) >> 8) & 0x1)
+#define C_00B84C_TGID_Y_EN 0xFFFFFEFF
#define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9)
+#define G_00B84C_TGID_Z_EN(x) (((x) >> 9) & 0x1)
+#define C_00B84C_TGID_Z_EN 0xFFFFFDFF
#define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10)
+#define G_00B84C_TG_SIZE_EN(x) (((x) >> 10) & 0x1)
+#define C_00B84C_TG_SIZE_EN 0xFFFFFBFF
#define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11)
-
+#define G_00B84C_TIDIG_COMP_CNT(x) (((x) >> 11) & 0x03)
+#define C_00B84C_TIDIG_COMP_CNT 0xFFFFE7FF
+/* CIK */
+#define S_00B84C_EXCP_EN_MSB(x) (((x) & 0x03) << 13)
+#define G_00B84C_EXCP_EN_MSB(x) (((x) >> 13) & 0x03)
+#define C_00B84C_EXCP_EN_MSB 0xFFFF9FFF
+/* */
#define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
+#define G_00B84C_LDS_SIZE(x) (((x) >> 15) & 0x1FF)
+#define C_00B84C_LDS_SIZE 0xFF007FFF
+#define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24)
+#define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F)
+#define C_00B84C_EXCP_EN
+
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
return {0, 0, 0};
}
+void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+ const FeatureBitset &Features) {
+
+ IsaVersion ISA = getIsaVersion(Features);
+
+ memset(&Header, 0, sizeof(Header));
+
+ Header.amd_kernel_code_version_major = 1;
+ Header.amd_kernel_code_version_minor = 0;
+ Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
+ Header.amd_machine_version_major = ISA.Major;
+ Header.amd_machine_version_minor = ISA.Minor;
+ Header.amd_machine_version_stepping = ISA.Stepping;
+ Header.kernel_code_entry_byte_offset = sizeof(Header);
+ // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
+ Header.wavefront_size = 6;
+ // These alignment values are specified in powers of two, so alignment =
+ // 2^n. The minimum alignment is 2^4 = 16.
+ Header.kernarg_segment_alignment = 4;
+ Header.group_segment_alignment = 4;
+ Header.private_segment_alignment = 4;
+}
+
} // End namespace AMDGPU
} // End namespace llvm
};
IsaVersion getIsaVersion(const FeatureBitset &Features);
+void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+ const FeatureBitset &Features);
} // end namespace AMDGPU
} // end namespace llvm
; HSA: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA: {{^}}simple:
-; HSA: .section .hsa.version
-; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0"
-; Test that the amd_kernel_code_t object is emitted
-; HSA: .asciz
+; HSA: .amd_kernel_code_t
+; HSA: .end_amd_kernel_code_t
; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[0:1], 0x0
; Make sure we are setting the ATC bit:
; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
+.text
+amd_kernel_code_t_test_all:
+; Test all amd_kernel_code_t members with non-default values.
+.amd_kernel_code_t
+ kernel_code_version_major = 100
+ kernel_code_version_minor = 100
+ machine_kind = 0
+ machine_version_major = 5
+ machine_version_minor = 5
+ machine_version_stepping = 5
+ kernel_code_entry_byte_offset = 512
+ kernel_code_prefetch_byte_size = 1
+ max_scratch_backing_memory_byte_size = 1
+ compute_pgm_rsrc1_vgprs = 1
+ compute_pgm_rsrc1_sgprs = 1
+ compute_pgm_rsrc1_priority = 1
+ compute_pgm_rsrc1_float_mode = 1
+ compute_pgm_rsrc1_priv = 1
+ compute_pgm_rsrc1_dx10_clamp = 1
+ compute_pgm_rsrc1_debug_mode = 1
+ compute_pgm_rsrc1_ieee_mode = 1
+ compute_pgm_rsrc2_scratch_en = 1
+ compute_pgm_rsrc2_user_sgpr = 1
+ compute_pgm_rsrc2_tgid_x_en = 1
+ compute_pgm_rsrc2_tgid_y_en = 1
+ compute_pgm_rsrc2_tgid_z_en = 1
+ compute_pgm_rsrc2_tg_size_en = 1
+ compute_pgm_rsrc2_tidig_comp_cnt = 1
+ compute_pgm_rsrc2_excp_en_msb = 1
+ compute_pgm_rsrc2_lds_size = 1
+ compute_pgm_rsrc2_excp_en = 1
+ enable_sgpr_private_segment_buffer = 1
+ enable_sgpr_dispatch_ptr = 1
+ enable_sgpr_queue_ptr = 1
+ enable_sgpr_kernarg_segment_ptr = 1
+ enable_sgpr_dispatch_id = 1
+ enable_sgpr_flat_scratch_init = 1
+ enable_sgpr_private_segment_size = 1
+ enable_sgpr_grid_workgroup_count_x = 1
+ enable_sgpr_grid_workgroup_count_y = 1
+ enable_sgpr_grid_workgroup_count_z = 1
+ enable_ordered_append_gds = 1
+ private_element_size = 1
+ is_ptr64 = 1
+ is_dynamic_callstack = 1
+ is_debug_enabled = 1
+ is_xnack_enabled = 1
+ workitem_private_segment_byte_size = 1
+ workgroup_group_segment_byte_size = 1
+ gds_segment_byte_size = 1
+ kernarg_segment_byte_size = 1
+ workgroup_fbarrier_count = 1
+ wavefront_sgpr_count = 1
+ workitem_vgpr_count = 1
+ reserved_vgpr_first = 1
+ reserved_vgpr_count = 1
+ reserved_sgpr_first = 1
+ reserved_sgpr_count = 1
+ debug_wavefront_private_segment_offset_sgpr = 1
+ debug_private_segment_buffer_sgpr = 1
+ kernarg_segment_alignment = 5
+ group_segment_alignment = 5
+ private_segment_alignment = 5
+ wavefront_size = 5
+ call_convention = 1
+ runtime_loader_kernel_symbol = 1
+.end_amd_kernel_code_t
+
+// ASM-LABEL: {{^}}amd_kernel_code_t_test_all:
+// ASM: .amd_kernel_code_t
+// ASM: kernel_code_version_major = 100
+// ASM: kernel_code_version_minor = 100
+// ASM: machine_kind = 0
+// ASM: machine_version_major = 5
+// ASM: machine_version_minor = 5
+// ASM: machine_version_stepping = 5
+// ASM: kernel_code_entry_byte_offset = 512
+// ASM: kernel_code_prefetch_byte_size = 1
+// ASM: max_scratch_backing_memory_byte_size = 1
+// ASM: compute_pgm_rsrc1_vgprs = 1
+// ASM: compute_pgm_rsrc1_sgprs = 1
+// ASM: compute_pgm_rsrc1_priority = 1
+// ASM: compute_pgm_rsrc1_float_mode = 1
+// ASM: compute_pgm_rsrc1_priv = 1
+// ASM: compute_pgm_rsrc1_dx10_clamp = 1
+// ASM: compute_pgm_rsrc1_debug_mode = 1
+// ASM: compute_pgm_rsrc1_ieee_mode = 1
+// ASM: compute_pgm_rsrc2_scratch_en = 1
+// ASM: compute_pgm_rsrc2_user_sgpr = 1
+// ASM: compute_pgm_rsrc2_tgid_x_en = 1
+// ASM: compute_pgm_rsrc2_tgid_y_en = 1
+// ASM: compute_pgm_rsrc2_tgid_z_en = 1
+// ASM: compute_pgm_rsrc2_tg_size_en = 1
+// ASM: compute_pgm_rsrc2_tidig_comp_cnt = 1
+// ASM: compute_pgm_rsrc2_excp_en_msb = 1
+// ASM: compute_pgm_rsrc2_lds_size = 1
+// ASM: compute_pgm_rsrc2_excp_en = 1
+// ASM: enable_sgpr_private_segment_buffer = 1
+// ASM: enable_sgpr_dispatch_ptr = 1
+// ASM: enable_sgpr_queue_ptr = 1
+// ASM: enable_sgpr_kernarg_segment_ptr = 1
+// ASM: enable_sgpr_dispatch_id = 1
+// ASM: enable_sgpr_flat_scratch_init = 1
+// ASM: enable_sgpr_private_segment_size = 1
+// ASM: enable_sgpr_grid_workgroup_count_x = 1
+// ASM: enable_sgpr_grid_workgroup_count_y = 1
+// ASM: enable_sgpr_grid_workgroup_count_z = 1
+// ASM: enable_ordered_append_gds = 1
+// ASM: private_element_size = 1
+// ASM: is_ptr64 = 1
+// ASM: is_dynamic_callstack = 1
+// ASM: is_debug_enabled = 1
+// ASM: is_xnack_enabled = 1
+// ASM: workitem_private_segment_byte_size = 1
+// ASM: workgroup_group_segment_byte_size = 1
+// ASM: gds_segment_byte_size = 1
+// ASM: kernarg_segment_byte_size = 1
+// ASM: workgroup_fbarrier_count = 1
+// ASM: wavefront_sgpr_count = 1
+// ASM: workitem_vgpr_count = 1
+// ASM: reserved_vgpr_first = 1
+// ASM: reserved_vgpr_count = 1
+// ASM: reserved_sgpr_first = 1
+// ASM: reserved_sgpr_count = 1
+// ASM: debug_wavefront_private_segment_offset_sgpr = 1
+// ASM: debug_private_segment_buffer_sgpr = 1
+// ASM: kernarg_segment_alignment = 5
+// ASM: group_segment_alignment = 5
+// ASM: private_segment_alignment = 5
+// ASM: wavefront_size = 5
+// ASM: call_convention = 1
+// ASM: runtime_loader_kernel_symbol = 1
+// ASM: .end_amd_kernel_code_t
+
+amd_kernel_code_t_minimal:
+.amd_kernel_code_t
+ enable_sgpr_kernarg_segment_ptr = 1
+ is_ptr64 = 1
+ compute_pgm_rsrc1_vgprs = 1
+ compute_pgm_rsrc1_sgprs = 1
+ compute_pgm_rsrc2_user_sgpr = 2
+ kernarg_segment_byte_size = 16
+ wavefront_sgpr_count = 8
+// wavefront_sgpr_count = 7
+; wavefront_sgpr_count = 7
+// Make sure a blank line won't break anything:
+
+// Make sure a line with whitespace won't break anything:
+
+ workitem_vgpr_count = 16
+.end_amd_kernel_code_t
+
+// ASM-LABEL: {{^}}amd_kernel_code_t_minimal:
+// ASM: .amd_kernel_code_t
+// ASM: kernel_code_version_major = 1
+// ASM: kernel_code_version_minor = 0
+// ASM: machine_kind = 1
+// ASM: machine_version_major = 7
+// ASM: machine_version_minor = 0
+// ASM: machine_version_stepping = 0
+// ASM: kernel_code_entry_byte_offset = 256
+// ASM: kernel_code_prefetch_byte_size = 0
+// ASM: max_scratch_backing_memory_byte_size = 0
+// ASM: compute_pgm_rsrc1_vgprs = 1
+// ASM: compute_pgm_rsrc1_sgprs = 1
+// ASM: compute_pgm_rsrc1_priority = 0
+// ASM: compute_pgm_rsrc1_float_mode = 0
+// ASM: compute_pgm_rsrc1_priv = 0
+// ASM: compute_pgm_rsrc1_dx10_clamp = 0
+// ASM: compute_pgm_rsrc1_debug_mode = 0
+// ASM: compute_pgm_rsrc1_ieee_mode = 0
+// ASM: compute_pgm_rsrc2_scratch_en = 0
+// ASM: compute_pgm_rsrc2_user_sgpr = 2
+// ASM: compute_pgm_rsrc2_tgid_x_en = 0
+// ASM: compute_pgm_rsrc2_tgid_y_en = 0
+// ASM: compute_pgm_rsrc2_tgid_z_en = 0
+// ASM: compute_pgm_rsrc2_tg_size_en = 0
+// ASM: compute_pgm_rsrc2_tidig_comp_cnt = 0
+// ASM: compute_pgm_rsrc2_excp_en_msb = 0
+// ASM: compute_pgm_rsrc2_lds_size = 0
+// ASM: compute_pgm_rsrc2_excp_en = 0
+// ASM: enable_sgpr_private_segment_buffer = 0
+// ASM: enable_sgpr_dispatch_ptr = 0
+// ASM: enable_sgpr_queue_ptr = 0
+// ASM: enable_sgpr_kernarg_segment_ptr = 1
+// ASM: enable_sgpr_dispatch_id = 0
+// ASM: enable_sgpr_flat_scratch_init = 0
+// ASM: enable_sgpr_private_segment_size = 0
+// ASM: enable_sgpr_grid_workgroup_count_x = 0
+// ASM: enable_sgpr_grid_workgroup_count_y = 0
+// ASM: enable_sgpr_grid_workgroup_count_z = 0
+// ASM: enable_ordered_append_gds = 0
+// ASM: private_element_size = 0
+// ASM: is_ptr64 = 1
+// ASM: is_dynamic_callstack = 0
+// ASM: is_debug_enabled = 0
+// ASM: is_xnack_enabled = 0
+// ASM: workitem_private_segment_byte_size = 0
+// ASM: workgroup_group_segment_byte_size = 0
+// ASM: gds_segment_byte_size = 0
+// ASM: kernarg_segment_byte_size = 16
+// ASM: workgroup_fbarrier_count = 0
+// ASM: wavefront_sgpr_count = 8
+// ASM: workitem_vgpr_count = 16
+// ASM: reserved_vgpr_first = 0
+// ASM: reserved_vgpr_count = 0
+// ASM: reserved_sgpr_first = 0
+// ASM: reserved_sgpr_count = 0
+// ASM: debug_wavefront_private_segment_offset_sgpr = 0
+// ASM: debug_private_segment_buffer_sgpr = 0
+// ASM: kernarg_segment_alignment = 4
+// ASM: group_segment_alignment = 4
+// ASM: private_segment_alignment = 4
+// ASM: wavefront_size = 6
+// ASM: call_convention = 0
+// ASM: runtime_loader_kernel_symbol = 0
+// ASM: .end_amd_kernel_code_t