(EXTRACT_SUBREG $value, sub1), $offset0, $offset1)
>;
-multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
- def : Pat <
- (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset))
- >;
-
- def : Pat <
- (frag i32:$ptr, vt:$val),
- (inst 0, $ptr, $val, 0)
- >;
-}
+class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
+ (inst (i1 0), $ptr, $value, (as_i16imm $offset))
+>;
// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
//
// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
// needs to be a VGPR. The SGPR copy pass will fix this, and it's
// easier since there is no v_mov_b64.
-multiclass DSAtomicIncRetPat<DS inst, ValueType vt,
- Instruction LoadImm, PatFrag frag> {
- def : Pat <
- (frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)),
- (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
- >;
-
- def : Pat <
- (frag i32:$ptr, (vt 1)),
- (inst 0, $ptr, (LoadImm (vt -1)), 0)
- >;
-}
+class DSAtomicIncRetPat<DS inst, ValueType vt,
+ Instruction LoadImm, PatFrag frag> : Pat <
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
+ (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
+>;
-multiclass DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> {
- def : Pat <
- (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap),
- (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
- >;
- def : Pat <
- (frag i32:$ptr, vt:$cmp, vt:$swap),
- (inst 0, $ptr, $cmp, $swap, 0)
- >;
-}
+class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
+ (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
+>;
// 32-bit atomics.
-defm : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
- S_MOV_B32, atomic_load_add_local>;
-defm : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
- S_MOV_B32, atomic_load_sub_local>;
-
-defm : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
-defm : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
-defm : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
-defm : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
-defm : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
-defm : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
-defm : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
-defm : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
-defm : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
-defm : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
-
-defm : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
+def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
+ S_MOV_B32, atomic_load_add_local>;
+def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
+ S_MOV_B32, atomic_load_sub_local>;
+
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
+
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
// 64-bit atomics.
-defm : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
- S_MOV_B64, atomic_load_add_local>;
-defm : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
- S_MOV_B64, atomic_load_sub_local>;
-
-defm : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
-defm : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
-defm : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
-defm : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
-defm : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
-defm : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
-defm : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
-defm : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
-defm : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
-defm : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
-
-defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
+def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
+ S_MOV_B64, atomic_load_add_local>;
+def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
+ S_MOV_B64, atomic_load_sub_local>;
+
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
+
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
//===----------------------------------------------------------------------===//
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; FUNC-LABEL: @lds_atomic_xchg_ret_i32:
; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
ret void
}
+; FUNC-LABEL: @lds_atomic_add_ret_i32_bad_si_offset
+; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
+; CI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+ %sub = sub i32 %a, %b
+ %add = add i32 %sub, 4
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
; FUNC-LABEL: @lds_atomic_inc_ret_i32:
; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
ret void
}
+; FUNC-LABEL: @lds_atomic_inc_ret_i32_bad_si_offset:
+; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
+; CI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+ %sub = sub i32 %a, %b
+ %add = add i32 %sub, 4
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
; FUNC-LABEL: @lds_atomic_sub_ret_i32:
; SI: DS_SUB_RTN_U32
; SI: S_ENDPGM