return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32);
}]>;
+def IMM8bit : PatLeaf <(imm),
+ [{return isUInt<8>(N->getZExtValue());}]
+>;
+
def IMM12bit : PatLeaf <(imm),
[{return isUInt<12>(N->getZExtValue());}]
>;
+def IMM16bit : PatLeaf <(imm),
+ [{return isUInt<16>(N->getZExtValue());}]
+>;
+
def mubuf_vaddr_offset : PatFrag<
(ops node:$ptr, node:$offset, node:$imm_offset),
(add (add node:$ptr, node:$offset), node:$imm_offset)
DS <op, outs, ins, asm, pat> {
bits<16> offset;
+ // Single load interpret the 2 i8imm operands as a single i16 offset.
let offset0 = offset{7-0};
let offset1 = offset{15-8};
}
/********** Load/Store Patterns **********/
/********** ======================= **********/
-class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
- (frag i32:$src0),
- (vt (inst 0, $src0, 0))
->;
+multiclass DSReadPat <DS inst, ValueType vt, PatFrag frag> {
+ def : Pat <
+ (vt (frag (add i32:$ptr, (i32 IMM16bit:$offset)))),
+ (inst (i1 0), $ptr, (as_i16imm $offset))
+ >;
-def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
-def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
-def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
-def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
-def : DSReadPat <DS_READ_B32, i32, local_load>;
-def : Pat <
- (local_load i32:$src0),
- (i32 (DS_READ_B32 0, $src0, 0))
->;
+ def : Pat <
+ (frag i32:$src0),
+ (vt (inst 0, $src0, 0))
+ >;
+}
-class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
- (frag i32:$src1, i32:$src0),
- (inst 0, $src0, $src1, 0)
->;
+defm : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
+defm : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
+defm : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
+defm : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
+defm : DSReadPat <DS_READ_B32, i32, local_load>;
+
+multiclass DSWritePat <DS inst, ValueType vt, PatFrag frag> {
+ def : Pat <
+ (frag vt:$value, (add i32:$ptr, (i32 IMM16bit:$offset))),
+ (inst (i1 0), $ptr, $value, (as_i16imm $offset))
+ >;
+
+ def : Pat <
+ (frag i32:$src1, i32:$src0),
+ (inst 0, $src0, $src1, 0)
+ >;
+}
-def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
-def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
-def : DSWritePat <DS_WRITE_B32, i32, local_store>;
+defm : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
+defm : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
+defm : DSWritePat <DS_WRITE_B32, i32, local_store>;
def : Pat <(atomic_load_add_local i32:$ptr, i32:$val),
(DS_ADD_U32_RTN 0, $ptr, $val, 0)>;
; CHECK-LABEL: @local_address_load
; CHECK: V_MOV_B32_e{{32|64}} [[PTR:v[0-9]]]
-; CHECK: DS_READ_B32 [[PTR]]
+; CHECK: DS_READ_B32 v{{[0-9]+}}, [[PTR]]
define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = load i32 addrspace(3)* %in
}
; CHECK-LABEL: @local_address_gep_const_offset
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]]
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]]
+; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
+; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 4,
define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = getelementptr i32 addrspace(3)* %in, i32 1
ret void
}
+; Offset too large, can't fold into 16-bit immediate offset.
+; CHECK-LABEL: @local_address_gep_large_const_offset
+; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 65540
+; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; CHECK: DS_READ_B32 [[VPTR]]
+define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
+entry:
+ %0 = getelementptr i32 addrspace(3)* %in, i32 16385
+ %1 = load i32 addrspace(3)* %0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
; CHECK-LABEL: @null_32bit_lds_ptr:
; CHECK: V_CMP_NE_I32
; CHECK-NOT: V_CMP_NE_I32
store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
ret void
}
+
+; CHECK-LABEL: @local_address_store
+; CHECK: DS_WRITE_B32
+define void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
+ store i32 %val, i32 addrspace(3)* %out
+ ret void
+}
+
+; CHECK-LABEL: @local_address_gep_store
+; CHECK: S_ADD_I32 [[SADDR:s[0-9]+]],
+; CHECK: V_MOV_B32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
+; CHECK: DS_WRITE_B32 [[ADDR]], v{{[0-9]+}},
+define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 %offset) {
+ %gep = getelementptr i32 addrspace(3)* %out, i32 %offset
+ store i32 %val, i32 addrspace(3)* %gep, align 4
+ ret void
+}
+
+; CHECK-LABEL: @local_address_gep_const_offset_store
+; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
+; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
+; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 4
+define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
+ %gep = getelementptr i32 addrspace(3)* %out, i32 1
+ store i32 %val, i32 addrspace(3)* %gep, align 4
+ ret void
+}
+
+; Offset too large, can't fold into 16-bit immediate offset.
+; CHECK-LABEL: @local_address_gep_large_const_offset_store
+; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 65540
+; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0
+define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
+ %gep = getelementptr i32 addrspace(3)* %out, i32 16385
+ store i32 %val, i32 addrspace(3)* %gep, align 4
+ ret void
+}
%struct.foo = type { [3 x float], [3 x float] }
+; FIXME: Extra V_MOV from SGPR to VGPR for second read. The address is
+; already in a VGPR after the first read.
+
; CHECK-LABEL: @do_as_ptr_calcs:
-; CHECK: S_ADD_I32 {{s[0-9]+}},
-; CHECK: S_ADD_I32 [[SREG1:s[0-9]+]],
+; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]],
; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
-; CHECK: DS_READ_B32 [[VREG1]],
+; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 20
+; CHECK: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 12
define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
entry:
%x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
; CHECK-LABEL: @use_gep_address_space:
-; CHECK: S_ADD_I32
+; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
+; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 64
%p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
store i32 99, i32 addrspace(3)* %p
ret void
}
+define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind {
+; CHECK-LABEL: @use_gep_address_space_large_offset:
+; CHECK: S_ADD_I32
+; CHECK: DS_WRITE_B32
+ %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
+ store i32 99, i32 addrspace(3)* %p
+ ret void
+}
+
define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
; CHECK-LABEL: @gep_as_vector_v4:
; CHECK: S_ADD_I32
; EG-CHECK: GROUP_BARRIER
; EG-CHECK-NEXT: ALU clause
-; Make sure the lds reads are using different addresses.
+; Make sure the lds reads are using different addresses, at different
+; constant offsets.
; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
-; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]]
-; SI-CHECK-NOT: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]]
+; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 16
+; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0,
define void @local_memory_two_objects(i32 addrspace(1)* %out) {
entry: