test/CodeGen/AMDGPU/ret.ll

   1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
   2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
   3
   4 attributes #0 = { "ShaderType"="1" }
   5
   6 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
   7
   8 ; GCN-LABEL: {{^}}vgpr:
   9 ; GCN: v_mov_b32_e32 v1, v0
  10 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
  11 ; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
  12 ; GCN: s_waitcnt expcnt(0)
  13 ; GCN-NOT: s_endpgm
  14 define {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
  15   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
  16   %x = fadd float %3, 1.0
  17   %a = insertvalue {float, float} undef, float %x, 0
  18   %b = insertvalue {float, float} %a, float %3, 1
  19   ret {float, float} %b
  20 }
  21
  22 ; GCN-LABEL: {{^}}vgpr_literal:
  23 ; GCN: v_mov_b32_e32 v4, v0
  24 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
  25 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
  26 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
  27 ; GCN-DAG: v_mov_b32_e32 v3, -1.0
  28 ; GCN: exp 15, 0, 1, 1, 1, v4, v4, v4, v4
  29 ; GCN: s_waitcnt expcnt(0)
  30 ; GCN-NOT: s_endpgm
  31 define {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
  32   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
  33   ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0}
  34 }
  35
  36
  37 ; GCN: .long 165580
  38 ; GCN-NEXT: .long 562
  39 ; GCN-NEXT: .long 165584
  40 ; GCN-NEXT: .long 562
  41 ; GCN-LABEL: {{^}}vgpr_ps_addr0:
  42 ; GCN-NOT: v_mov_b32_e32 v0
  43 ; GCN-NOT: v_mov_b32_e32 v1
  44 ; GCN-NOT: v_mov_b32_e32 v2
  45 ; GCN: v_mov_b32_e32 v3, v4
  46 ; GCN: v_mov_b32_e32 v4, v6
  47 ; GCN-NOT: s_endpgm
  48 attributes #1 = { "ShaderType"="0" "InitialPSInputAddr"="0" }
  49 define {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
  50   %i0 = extractelement <2 x i32> %4, i32 0
  51   %i1 = extractelement <2 x i32> %4, i32 1
  52   %i2 = extractelement <2 x i32> %7, i32 0
  53   %i3 = extractelement <2 x i32> %8, i32 0
  54   %f0 = bitcast i32 %i0 to float
  55   %f1 = bitcast i32 %i1 to float
  56   %f2 = bitcast i32 %i2 to float
  57   %f3 = bitcast i32 %i3 to float
  58   %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
  59   %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
  60   %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
  61   %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
  62   %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
  63   ret {float, float, float, float, float} %r4
  64 }
  65
  66
  67 ; GCN: .long 165580
  68 ; GCN-NEXT: .long 1
  69 ; GCN-NEXT: .long 165584
  70 ; GCN-NEXT: .long 1
  71 ; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
  72 ; GCN: v_mov_b32_e32 v0, 1.0
  73 ; GCN-NOT: s_endpgm
  74 define float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
  75   ret float 1.0
  76 }
  77
  78
  79 ; GCN: .long 165580
  80 ; GCN-NEXT: .long 562
  81 ; GCN-NEXT: .long 165584
  82 ; GCN-NEXT: .long 563
  83 ; GCN-LABEL: {{^}}vgpr_ps_addr1:
  84 ; GCN-DAG: v_mov_b32_e32 v0, v2
  85 ; GCN-DAG: v_mov_b32_e32 v1, v3
  86 ; GCN: v_mov_b32_e32 v2, v4
  87 ; GCN-DAG: v_mov_b32_e32 v3, v6
  88 ; GCN-DAG: v_mov_b32_e32 v4, v8
  89 ; GCN-NOT: s_endpgm
  90 attributes #2 = { "ShaderType"="0" "InitialPSInputAddr"="1" }
  91 define {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
  92   %i0 = extractelement <2 x i32> %4, i32 0
  93   %i1 = extractelement <2 x i32> %4, i32 1
  94   %i2 = extractelement <2 x i32> %7, i32 0
  95   %i3 = extractelement <2 x i32> %8, i32 0
  96   %f0 = bitcast i32 %i0 to float
  97   %f1 = bitcast i32 %i1 to float
  98   %f2 = bitcast i32 %i2 to float
  99   %f3 = bitcast i32 %i3 to float
 100   %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
 101   %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
 102   %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
 103   %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
 104   %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
 105   ret {float, float, float, float, float} %r4
 106 }
 107
 108
 109 ; GCN: .long 165580
 110 ; GCN-NEXT: .long 562
 111 ; GCN-NEXT: .long 165584
 112 ; GCN-NEXT: .long 631
 113 ; GCN-LABEL: {{^}}vgpr_ps_addr119:
 114 ; GCN-DAG: v_mov_b32_e32 v0, v2
 115 ; GCN-DAG: v_mov_b32_e32 v1, v3
 116 ; GCN: v_mov_b32_e32 v2, v6
 117 ; GCN: v_mov_b32_e32 v3, v8
 118 ; GCN: v_mov_b32_e32 v4, v12
 119 ; GCN-NOT: s_endpgm
 120 attributes #3 = { "ShaderType"="0" "InitialPSInputAddr"="119" }
 121 define {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
 122   %i0 = extractelement <2 x i32> %4, i32 0
 123   %i1 = extractelement <2 x i32> %4, i32 1
 124   %i2 = extractelement <2 x i32> %7, i32 0
 125   %i3 = extractelement <2 x i32> %8, i32 0
 126   %f0 = bitcast i32 %i0 to float
 127   %f1 = bitcast i32 %i1 to float
 128   %f2 = bitcast i32 %i2 to float
 129   %f3 = bitcast i32 %i3 to float
 130   %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
 131   %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
 132   %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
 133   %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
 134   %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
 135   ret {float, float, float, float, float} %r4
 136 }
 137
 138
 139 ; GCN: .long 165580
 140 ; GCN-NEXT: .long 562
 141 ; GCN-NEXT: .long 165584
 142 ; GCN-NEXT: .long 946
 143 ; GCN-LABEL: {{^}}vgpr_ps_addr418:
 144 ; GCN-NOT: v_mov_b32_e32 v0
 145 ; GCN-NOT: v_mov_b32_e32 v1
 146 ; GCN-NOT: v_mov_b32_e32 v2
 147 ; GCN: v_mov_b32_e32 v3, v4
 148 ; GCN: v_mov_b32_e32 v4, v8
 149 ; GCN-NOT: s_endpgm
 150 attributes #4 = { "ShaderType"="0" "InitialPSInputAddr"="418" }
 151 define {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #4 {
 152   %i0 = extractelement <2 x i32> %4, i32 0
 153   %i1 = extractelement <2 x i32> %4, i32 1
 154   %i2 = extractelement <2 x i32> %7, i32 0
 155   %i3 = extractelement <2 x i32> %8, i32 0
 156   %f0 = bitcast i32 %i0 to float
 157   %f1 = bitcast i32 %i1 to float
 158   %f2 = bitcast i32 %i2 to float
 159   %f3 = bitcast i32 %i3 to float
 160   %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
 161   %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
 162   %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
 163   %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
 164   %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
 165   ret {float, float, float, float, float} %r4
 166 }
 167
 168
 169 ; GCN-LABEL: {{^}}sgpr:
 170 ; GCN: s_add_i32 s0, s3, 2
 171 ; GCN: s_mov_b32 s2, s3
 172 ; GCN-NOT: s_endpgm
 173 define {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
 174   %x = add i32 %2, 2
 175   %a = insertvalue {i32, i32, i32} undef, i32 %x, 0
 176   %b = insertvalue {i32, i32, i32} %a, i32 %1, 1
 177   %c = insertvalue {i32, i32, i32} %a, i32 %2, 2
 178   ret {i32, i32, i32} %c
 179 }
 180
 181
 182 ; GCN-LABEL: {{^}}sgpr_literal:
 183 ; GCN: s_mov_b32 s0, 5
 184 ; GCN-NOT: s_mov_b32 s0, s0
 185 ; GCN-DAG: s_mov_b32 s1, 6
 186 ; GCN-DAG: s_mov_b32 s2, 7
 187 ; GCN-DAG: s_mov_b32 s3, 8
 188 ; GCN-NOT: s_endpgm
 189 define {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
 190   %x = add i32 %2, 2
 191   ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8}
 192 }
 193
 194
 195 ; GCN-LABEL: {{^}}both:
 196 ; GCN: v_mov_b32_e32 v1, v0
 197 ; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
 198 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
 199 ; GCN-DAG: s_add_i32 s0, s3, 2
 200 ; GCN-DAG: s_mov_b32 s1, s2
 201 ; GCN: s_mov_b32 s2, s3
 202 ; GCN: s_waitcnt expcnt(0)
 203 ; GCN-NOT: s_endpgm
 204 define {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
 205   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
 206   %v = fadd float %3, 1.0
 207   %s = add i32 %2, 2
 208   %a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0
 209   %a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1
 210   %a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2
 211   %a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3
 212   %a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4
 213   ret {float, i32, float, i32, i32} %a4
 214 }
 215
 216
 217 ; GCN-LABEL: {{^}}structure_literal:
 218 ; GCN: v_mov_b32_e32 v3, v0
 219 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
 220 ; GCN-DAG: s_mov_b32 s0, 2
 221 ; GCN-DAG: s_mov_b32 s1, 3
 222 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
 223 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
 224 ; GCN-DAG: exp 15, 0, 1, 1, 1, v3, v3, v3, v3
 225 define {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
 226   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
 227   ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}}
 228 }