test/CodeGen/R600/sext-in-reg.ll

   1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
   2 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
   3
   4 declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
   5
   6
   7 ; FUNC-LABEL: @sext_in_reg_i1_i32
   8 ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
   9 ; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
  10 ; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
  11 ; SI: BUFFER_STORE_DWORD [[EXTRACT]],
  12
  13 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
  14 ; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
  15 ; EG-NEXT: LSHR * [[ADDR]]
  16 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
  17   %shl = shl i32 %in, 31
  18   %sext = ashr i32 %shl, 31
  19   store i32 %sext, i32 addrspace(1)* %out
  20   ret void
  21 }
  22
  23 ; FUNC-LABEL: @sext_in_reg_i8_to_i32
  24 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
  25 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
  26 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
  27 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
  28
  29 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
  30 ; EG: ADD_INT
  31 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
  32 ; EG-NEXT: LSHR * [[ADDR]]
  33 define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
  34   %c = add i32 %a, %b ; add to prevent folding into extload
  35   %shl = shl i32 %c, 24
  36   %ashr = ashr i32 %shl, 24
  37   store i32 %ashr, i32 addrspace(1)* %out, align 4
  38   ret void
  39 }
  40
  41 ; FUNC-LABEL: @sext_in_reg_i16_to_i32
  42 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
  43 ; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
  44 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
  45 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
  46
  47 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
  48 ; EG: ADD_INT
  49 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
  50 ; EG-NEXT: LSHR * [[ADDR]]
  51 define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
  52   %c = add i32 %a, %b ; add to prevent folding into extload
  53   %shl = shl i32 %c, 16
  54   %ashr = ashr i32 %shl, 16
  55   store i32 %ashr, i32 addrspace(1)* %out, align 4
  56   ret void
  57 }
  58
  59 ; FUNC-LABEL: @sext_in_reg_i8_to_v1i32
  60 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
  61 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
  62 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
  63 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
  64
  65 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
  66 ; EG: ADD_INT
  67 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
  68 ; EG-NEXT: LSHR * [[ADDR]]
  69 define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
  70   %c = add <1 x i32> %a, %b ; add to prevent folding into extload
  71   %shl = shl <1 x i32> %c, <i32 24>
  72   %ashr = ashr <1 x i32> %shl, <i32 24>
  73   store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4
  74   ret void
  75 }
  76
  77 ; FUNC-LABEL: @sext_in_reg_i1_to_i64
  78 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
  79 ; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
  80 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
  81 ; SI: BUFFER_STORE_DWORDX2
  82 define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
  83   %c = add i64 %a, %b
  84   %shl = shl i64 %c, 63
  85   %ashr = ashr i64 %shl, 63
  86   store i64 %ashr, i64 addrspace(1)* %out, align 8
  87   ret void
  88 }
  89
  90 ; FUNC-LABEL: @sext_in_reg_i8_to_i64
  91 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
  92 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
  93 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
  94 ; SI: BUFFER_STORE_DWORDX2
  95
  96 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
  97 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
  98 ; EG: ADD_INT
  99 ; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
 100 ; EG: ASHR [[RES_HI]]
 101 ; EG-NOT: BFE_INT
 102 ; EG: LSHR
 103 ; EG: LSHR
 104 ;; TODO Check address computation, using | with variables in {{}} does not work,
 105 ;; also the _LO/_HI order might be different
 106 define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 107   %c = add i64 %a, %b
 108   %shl = shl i64 %c, 56
 109   %ashr = ashr i64 %shl, 56
 110   store i64 %ashr, i64 addrspace(1)* %out, align 8
 111   ret void
 112 }
 113
 114 ; FUNC-LABEL: @sext_in_reg_i16_to_i64
 115 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
 116 ; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
 117 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
 118 ; SI: BUFFER_STORE_DWORDX2
 119
 120 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
 121 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
 122 ; EG: ADD_INT
 123 ; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
 124 ; EG: ASHR [[RES_HI]]
 125 ; EG-NOT: BFE_INT
 126 ; EG: LSHR
 127 ; EG: LSHR
 128 ;; TODO Check address computation, using | with variables in {{}} does not work,
 129 ;; also the _LO/_HI order might be different
 130 define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 131   %c = add i64 %a, %b
 132   %shl = shl i64 %c, 48
 133   %ashr = ashr i64 %shl, 48
 134   store i64 %ashr, i64 addrspace(1)* %out, align 8
 135   ret void
 136 }
 137
 138 ; FUNC-LABEL: @sext_in_reg_i32_to_i64
 139 ; SI: S_LOAD_DWORD
 140 ; SI: S_LOAD_DWORD
 141 ; SI: S_ADD_I32 [[ADD:s[0-9]+]],
 142 ; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31
 143 ; SI: BUFFER_STORE_DWORDX2
 144
 145 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
 146 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
 147 ; EG-NOT: BFE_INT
 148 ; EG: ADD_INT {{\*?}} [[RES_LO]]
 149 ; EG: ASHR [[RES_HI]]
 150 ; EG: ADD_INT
 151 ; EG: LSHR
 152 ; EG: LSHR
 153 ;; TODO Check address computation, using | with variables in {{}} does not work,
 154 ;; also the _LO/_HI order might be different
 155 define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 156   %c = add i64 %a, %b
 157   %shl = shl i64 %c, 32
 158   %ashr = ashr i64 %shl, 32
 159   store i64 %ashr, i64 addrspace(1)* %out, align 8
 160   ret void
 161 }
 162
 163 ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
 164 ; XFUNC-LABEL: @sext_in_reg_i8_to_v1i64
 165 ; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
 166 ; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31
 167 ; XSI: BUFFER_STORE_DWORD
 168 ; XEG: BFE_INT
 169 ; XEG: ASHR
 170 ; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind {
 171 ;   %c = add <1 x i64> %a, %b
 172 ;   %shl = shl <1 x i64> %c, <i64 56>
 173 ;   %ashr = ashr <1 x i64> %shl, <i64 56>
 174 ;   store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8
 175 ;   ret void
 176 ; }
 177
 178 ; FUNC-LABEL: @sext_in_reg_i1_in_i32_other_amount
 179 ; SI-NOT: BFE
 180 ; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
 181 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7
 182
 183 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 184 ; EG-NOT: BFE
 185 ; EG: ADD_INT
 186 ; EG: LSHL
 187 ; EG: ASHR [[RES]]
 188 ; EG: LSHR {{\*?}} [[ADDR]]
 189 define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
 190   %c = add i32 %a, %b
 191   %x = shl i32 %c, 6
 192   %y = ashr i32 %x, 7
 193   store i32 %y, i32 addrspace(1)* %out
 194   ret void
 195 }
 196
 197 ; FUNC-LABEL: @sext_in_reg_v2i1_in_v2i32_other_amount
 198 ; SI: S_LSHL_B32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
 199 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7
 200 ; SI: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
 201 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7
 202
 203 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 204 ; EG-NOT: BFE
 205 ; EG: ADD_INT
 206 ; EG: LSHL
 207 ; EG: ASHR [[RES]]
 208 ; EG: LSHL
 209 ; EG: ASHR [[RES]]
 210 ; EG: LSHR {{\*?}} [[ADDR]]
 211 define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
 212   %c = add <2 x i32> %a, %b
 213   %x = shl <2 x i32> %c, <i32 6, i32 6>
 214   %y = ashr <2 x i32> %x, <i32 7, i32 7>
 215   store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2
 216   ret void
 217 }
 218
 219
 220 ; FUNC-LABEL: @sext_in_reg_v2i1_to_v2i32
 221 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 222 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 223 ; SI: BUFFER_STORE_DWORDX2
 224
 225 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 226 ; EG: BFE_INT [[RES]]
 227 ; EG: BFE_INT [[RES]]
 228 ; EG: LSHR {{\*?}} [[ADDR]]
 229 define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
 230   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
 231   %shl = shl <2 x i32> %c, <i32 31, i32 31>
 232   %ashr = ashr <2 x i32> %shl, <i32 31, i32 31>
 233   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
 234   ret void
 235 }
 236
 237 ; FUNC-LABEL: @sext_in_reg_v4i1_to_v4i32
 238 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 239 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 240 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 241 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 242 ; SI: BUFFER_STORE_DWORDX4
 243
 244 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 245 ; EG: BFE_INT [[RES]]
 246 ; EG: BFE_INT [[RES]]
 247 ; EG: BFE_INT [[RES]]
 248 ; EG: BFE_INT [[RES]]
 249 ; EG: LSHR {{\*?}} [[ADDR]]
 250 define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
 251   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
 252   %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
 253   %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
 254   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
 255   ret void
 256 }
 257
 258 ; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32
 259 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 260 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 261 ; SI: BUFFER_STORE_DWORDX2
 262
 263 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 264 ; EG: BFE_INT [[RES]]
 265 ; EG: BFE_INT [[RES]]
 266 ; EG: LSHR {{\*?}} [[ADDR]]
 267 define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
 268   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
 269   %shl = shl <2 x i32> %c, <i32 24, i32 24>
 270   %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
 271   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
 272   ret void
 273 }
 274
 275 ; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32
 276 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 277 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 278 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 279 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 280 ; SI: BUFFER_STORE_DWORDX4
 281
 282 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 283 ; EG: BFE_INT [[RES]]
 284 ; EG: BFE_INT [[RES]]
 285 ; EG: BFE_INT [[RES]]
 286 ; EG: BFE_INT [[RES]]
 287 ; EG: LSHR {{\*?}} [[ADDR]]
 288 define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
 289   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
 290   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
 291   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
 292   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
 293   ret void
 294 }
 295
 296 ; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32
 297 ; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
 298 ; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
 299 ; SI: BUFFER_STORE_DWORDX2
 300
 301 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 302 ; EG: BFE_INT [[RES]]
 303 ; EG: BFE_INT [[RES]]
 304 ; EG: LSHR {{\*?}} [[ADDR]]
 305 define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
 306   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
 307   %shl = shl <2 x i32> %c, <i32 16, i32 16>
 308   %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
 309   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
 310   ret void
 311 }
 312
 313 ; FUNC-LABEL: @testcase
 314 define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind {
 315   %and_a_1 = and i8 %a, 1
 316   %cmp_eq = icmp eq i8 %and_a_1, 0
 317   %cmp_slt = icmp slt i8 %a, 0
 318   %sel0 = select i1 %cmp_slt, i8 0, i8 %a
 319   %sel1 = select i1 %cmp_eq, i8 0, i8 %a
 320   %xor = xor i8 %sel0, %sel1
 321   store i8 %xor, i8 addrspace(1)* %out
 322   ret void
 323 }
 324
 325 ; FUNC-LABEL: @testcase_3
 326 define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
 327   %and_a_1 = and i8 %a, 1
 328   %cmp_eq = icmp eq i8 %and_a_1, 0
 329   %cmp_slt = icmp slt i8 %a, 0
 330   %sel0 = select i1 %cmp_slt, i8 0, i8 %a
 331   %sel1 = select i1 %cmp_eq, i8 0, i8 %a
 332   %xor = xor i8 %sel0, %sel1
 333   store i8 %xor, i8 addrspace(1)* %out
 334   ret void
 335 }
 336
 337 ; FUNC-LABEL: @vgpr_sext_in_reg_v4i8_to_v4i32
 338 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 339 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 340 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 341 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 342 define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
 343   %loada = load <4 x i32> addrspace(1)* %a, align 16
 344   %loadb = load <4 x i32> addrspace(1)* %b, align 16
 345   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
 346   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
 347   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
 348   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
 349   ret void
 350 }
 351
 352 ; FUNC-LABEL: @vgpr_sext_in_reg_v4i16_to_v4i32
 353 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
 354 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
 355 define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
 356   %loada = load <4 x i32> addrspace(1)* %a, align 16
 357   %loadb = load <4 x i32> addrspace(1)* %b, align 16
 358   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
 359   %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
 360   %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
 361   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
 362   ret void
 363 }
 364
 365 ; FIXME: The BFE should really be eliminated. I think it should happen
 366 ; when computeKnownBitsForTargetNode is implemented for imax.
 367
 368 ; FUNC-LABEL: @sext_in_reg_to_illegal_type
 369 ; SI: BUFFER_LOAD_SBYTE
 370 ; SI: V_MAX_I32
 371 ; SI: V_BFE_I32
 372 ; SI: BUFFER_STORE_SHORT
 373 define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
 374   %tmp5 = load i8 addrspace(1)* %src, align 1
 375   %tmp2 = sext i8 %tmp5 to i32
 376   %tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone
 377   %tmp4 = trunc i32 %tmp3 to i8
 378   %tmp6 = sext i8 %tmp4 to i16
 379   store i16 %tmp6, i16 addrspace(1)* %out, align 2
 380   ret void
 381 }
 382
 383 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
 384
 385 ; FUNC-LABEL: @bfe_0_width
 386 ; SI-NOT: BFE
 387 ; SI: S_ENDPGM
 388 define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
 389   %load = load i32 addrspace(1)* %ptr, align 4
 390   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
 391   store i32 %bfe, i32 addrspace(1)* %out, align 4
 392   ret void
 393 }
 394
 395 ; FUNC-LABEL: @bfe_8_bfe_8
 396 ; SI: V_BFE_I32
 397 ; SI-NOT: BFE
 398 ; SI: S_ENDPGM
 399 define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
 400   %load = load i32 addrspace(1)* %ptr, align 4
 401   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
 402   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
 403   store i32 %bfe1, i32 addrspace(1)* %out, align 4
 404   ret void
 405 }
 406
 407 ; FUNC-LABEL: @bfe_8_bfe_16
 408 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
 409 ; SI: S_ENDPGM
 410 define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
 411   %load = load i32 addrspace(1)* %ptr, align 4
 412   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
 413   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
 414   store i32 %bfe1, i32 addrspace(1)* %out, align 4
 415   ret void
 416 }
 417
 418 ; This really should be folded into 1
 419 ; FUNC-LABEL: @bfe_16_bfe_8
 420 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
 421 ; SI-NOT: BFE
 422 ; SI: S_ENDPGM
 423 define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
 424   %load = load i32 addrspace(1)* %ptr, align 4
 425   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
 426   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
 427   store i32 %bfe1, i32 addrspace(1)* %out, align 4
 428   ret void
 429 }
 430
 431 ; Make sure there isn't a redundant BFE
 432 ; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe
 433 ; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}}
 434 ; SI-NOT: BFE
 435 ; SI: S_ENDPGM
 436 define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
 437   %c = add i32 %a, %b ; add to prevent folding into extload
 438   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
 439   %shl = shl i32 %bfe, 24
 440   %ashr = ashr i32 %shl, 24
 441   store i32 %ashr, i32 addrspace(1)* %out, align 4
 442   ret void
 443 }
 444
 445 ; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe_wrong
 446 define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
 447   %c = add i32 %a, %b ; add to prevent folding into extload
 448   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone
 449   %shl = shl i32 %bfe, 24
 450   %ashr = ashr i32 %shl, 24
 451   store i32 %ashr, i32 addrspace(1)* %out, align 4
 452   ret void
 453 }
 454
 455 ; FUNC-LABEL: @sextload_i8_to_i32_bfe
 456 ; SI: BUFFER_LOAD_SBYTE
 457 ; SI-NOT: BFE
 458 ; SI: S_ENDPGM
 459 define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
 460   %load = load i8 addrspace(1)* %ptr, align 1
 461   %sext = sext i8 %load to i32
 462   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
 463   %shl = shl i32 %bfe, 24
 464   %ashr = ashr i32 %shl, 24
 465   store i32 %ashr, i32 addrspace(1)* %out, align 4
 466   ret void
 467 }
 468
 469 ; FUNC-LABEL: @sextload_i8_to_i32_bfe_0:
 470 ; SI-NOT: BFE
 471 ; SI: S_ENDPGM
 472 define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
 473   %load = load i8 addrspace(1)* %ptr, align 1
 474   %sext = sext i8 %load to i32
 475   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
 476   %shl = shl i32 %bfe, 24
 477   %ashr = ashr i32 %shl, 24
 478   store i32 %ashr, i32 addrspace(1)* %out, align 4
 479   ret void
 480 }
 481
 482 ; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_0:
 483 ; SI-NOT: SHR
 484 ; SI-NOT: SHL
 485 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
 486 ; SI: S_ENDPGM
 487 define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
 488   %x = load i32 addrspace(1)* %in, align 4
 489   %shl = shl i32 %x, 31
 490   %shr = ashr i32 %shl, 31
 491   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
 492   store i32 %bfe, i32 addrspace(1)* %out, align 4
 493   ret void
 494 }
 495
 496 ; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_1
 497 ; SI: BUFFER_LOAD_DWORD
 498 ; SI-NOT: SHL
 499 ; SI-NOT: SHR
 500 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
 501 ; SI: S_ENDPGM
 502 define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
 503   %x = load i32 addrspace(1)* %in, align 4
 504   %shl = shl i32 %x, 30
 505   %shr = ashr i32 %shl, 30
 506   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
 507   store i32 %bfe, i32 addrspace(1)* %out, align 4
 508   ret void
 509 }
 510
 511 ; FUNC-LABEL: @sext_in_reg_i2_bfe_offset_1:
 512 ; SI: BUFFER_LOAD_DWORD
 513 ; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
 514 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
 515 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
 516 ; SI: S_ENDPGM
 517 define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
 518   %x = load i32 addrspace(1)* %in, align 4
 519   %shl = shl i32 %x, 30
 520   %shr = ashr i32 %shl, 30
 521   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
 522   store i32 %bfe, i32 addrspace(1)* %out, align 4
 523   ret void
 524 }