test/CodeGen/R600/alu-split.ll

   1 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
   2
   3 ;CHECK: ALU
   4 ;CHECK: ALU
   5 ;CHECK: ALU
   6 ;CHECK-NOT: ALU
   7 ;CHECK: CF_END
   8
   9 define void @main() #0 {
  10 main_body:
  11   %0 = call float @llvm.R600.load.input(i32 4)
  12   %1 = call float @llvm.R600.load.input(i32 5)
  13   %2 = call float @llvm.R600.load.input(i32 6)
  14   %3 = call float @llvm.R600.load.input(i32 7)
  15   %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
  16   %5 = extractelement <4 x float> %4, i32 0
  17   %6 = fcmp une float 0x4016F2B020000000, %5
  18   %7 = select i1 %6, float 1.000000e+00, float 0.000000e+00
  19   %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
  20   %9 = extractelement <4 x float> %8, i32 1
  21   %10 = fcmp une float 0x401FDCC640000000, %9
  22   %11 = select i1 %10, float 1.000000e+00, float 0.000000e+00
  23   %12 = fsub float -0.000000e+00, %7
  24   %13 = fptosi float %12 to i32
  25   %14 = fsub float -0.000000e+00, %11
  26   %15 = fptosi float %14 to i32
  27   %16 = bitcast i32 %13 to float
  28   %17 = bitcast i32 %15 to float
  29   %18 = bitcast float %16 to i32
  30   %19 = bitcast float %17 to i32
  31   %20 = or i32 %18, %19
  32   %21 = bitcast i32 %20 to float
  33   %22 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17)
  34   %23 = extractelement <4 x float> %22, i32 0
  35   %24 = fcmp une float 0xC00574BC60000000, %23
  36   %25 = select i1 %24, float 1.000000e+00, float 0.000000e+00
  37   %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17)
  38   %27 = extractelement <4 x float> %26, i32 1
  39   %28 = fcmp une float 0x40210068E0000000, %27
  40   %29 = select i1 %28, float 1.000000e+00, float 0.000000e+00
  41   %30 = fsub float -0.000000e+00, %25
  42   %31 = fptosi float %30 to i32
  43   %32 = fsub float -0.000000e+00, %29
  44   %33 = fptosi float %32 to i32
  45   %34 = bitcast i32 %31 to float
  46   %35 = bitcast i32 %33 to float
  47   %36 = bitcast float %34 to i32
  48   %37 = bitcast float %35 to i32
  49   %38 = or i32 %36, %37
  50   %39 = bitcast i32 %38 to float
  51   %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18)
  52   %41 = extractelement <4 x float> %40, i32 0
  53   %42 = fcmp une float 0xBFC9A6B500000000, %41
  54   %43 = select i1 %42, float 1.000000e+00, float 0.000000e+00
  55   %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18)
  56   %45 = extractelement <4 x float> %44, i32 1
  57   %46 = fcmp une float 0xC0119BDA60000000, %45
  58   %47 = select i1 %46, float 1.000000e+00, float 0.000000e+00
  59   %48 = fsub float -0.000000e+00, %43
  60   %49 = fptosi float %48 to i32
  61   %50 = fsub float -0.000000e+00, %47
  62   %51 = fptosi float %50 to i32
  63   %52 = bitcast i32 %49 to float
  64   %53 = bitcast i32 %51 to float
  65   %54 = bitcast float %52 to i32
  66   %55 = bitcast float %53 to i32
  67   %56 = or i32 %54, %55
  68   %57 = bitcast i32 %56 to float
  69   %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19)
  70   %59 = extractelement <4 x float> %58, i32 0
  71   %60 = fcmp une float 0xC02085D640000000, %59
  72   %61 = select i1 %60, float 1.000000e+00, float 0.000000e+00
  73   %62 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19)
  74   %63 = extractelement <4 x float> %62, i32 1
  75   %64 = fcmp une float 0xBFD7C1BDA0000000, %63
  76   %65 = select i1 %64, float 1.000000e+00, float 0.000000e+00
  77   %66 = fsub float -0.000000e+00, %61
  78   %67 = fptosi float %66 to i32
  79   %68 = fsub float -0.000000e+00, %65
  80   %69 = fptosi float %68 to i32
  81   %70 = bitcast i32 %67 to float
  82   %71 = bitcast i32 %69 to float
  83   %72 = bitcast float %70 to i32
  84   %73 = bitcast float %71 to i32
  85   %74 = or i32 %72, %73
  86   %75 = bitcast i32 %74 to float
  87   %76 = insertelement <4 x float> undef, float %21, i32 0
  88   %77 = insertelement <4 x float> %76, float %39, i32 1
  89   %78 = insertelement <4 x float> %77, float %57, i32 2
  90   %79 = insertelement <4 x float> %78, float %75, i32 3
  91   %80 = insertelement <4 x float> undef, float %21, i32 0
  92   %81 = insertelement <4 x float> %80, float %39, i32 1
  93   %82 = insertelement <4 x float> %81, float %57, i32 2
  94   %83 = insertelement <4 x float> %82, float %75, i32 3
  95   %84 = call float @llvm.AMDGPU.dp4(<4 x float> %79, <4 x float> %83)
  96   %85 = bitcast float %84 to i32
  97   %86 = icmp ne i32 %85, 0
  98   %87 = sext i1 %86 to i32
  99   %88 = bitcast i32 %87 to float
 100   %89 = bitcast float %88 to i32
 101   %90 = xor i32 %89, -1
 102   %91 = bitcast i32 %90 to float
 103   %92 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20)
 104   %93 = extractelement <4 x float> %92, i32 0
 105   %94 = fcmp une float 0x401FDCC640000000, %93
 106   %95 = select i1 %94, float 1.000000e+00, float 0.000000e+00
 107   %96 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20)
 108   %97 = extractelement <4 x float> %96, i32 1
 109   %98 = fcmp une float 0xC00574BC60000000, %97
 110   %99 = select i1 %98, float 1.000000e+00, float 0.000000e+00
 111   %100 = fsub float -0.000000e+00, %95
 112   %101 = fptosi float %100 to i32
 113   %102 = fsub float -0.000000e+00, %99
 114   %103 = fptosi float %102 to i32
 115   %104 = bitcast i32 %101 to float
 116   %105 = bitcast i32 %103 to float
 117   %106 = bitcast float %104 to i32
 118   %107 = bitcast float %105 to i32
 119   %108 = or i32 %106, %107
 120   %109 = bitcast i32 %108 to float
 121   %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21)
 122   %111 = extractelement <4 x float> %110, i32 0
 123   %112 = fcmp une float 0x40210068E0000000, %111
 124   %113 = select i1 %112, float 1.000000e+00, float 0.000000e+00
 125   %114 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21)
 126   %115 = extractelement <4 x float> %114, i32 1
 127   %116 = fcmp une float 0xBFC9A6B500000000, %115
 128   %117 = select i1 %116, float 1.000000e+00, float 0.000000e+00
 129   %118 = fsub float -0.000000e+00, %113
 130   %119 = fptosi float %118 to i32
 131   %120 = fsub float -0.000000e+00, %117
 132   %121 = fptosi float %120 to i32
 133   %122 = bitcast i32 %119 to float
 134   %123 = bitcast i32 %121 to float
 135   %124 = bitcast float %122 to i32
 136   %125 = bitcast float %123 to i32
 137   %126 = or i32 %124, %125
 138   %127 = bitcast i32 %126 to float
 139   %128 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22)
 140   %129 = extractelement <4 x float> %128, i32 0
 141   %130 = fcmp une float 0xC0119BDA60000000, %129
 142   %131 = select i1 %130, float 1.000000e+00, float 0.000000e+00
 143   %132 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22)
 144   %133 = extractelement <4 x float> %132, i32 1
 145   %134 = fcmp une float 0xC02085D640000000, %133
 146   %135 = select i1 %134, float 1.000000e+00, float 0.000000e+00
 147   %136 = fsub float -0.000000e+00, %131
 148   %137 = fptosi float %136 to i32
 149   %138 = fsub float -0.000000e+00, %135
 150   %139 = fptosi float %138 to i32
 151   %140 = bitcast i32 %137 to float
 152   %141 = bitcast i32 %139 to float
 153   %142 = bitcast float %140 to i32
 154   %143 = bitcast float %141 to i32
 155   %144 = or i32 %142, %143
 156   %145 = bitcast i32 %144 to float
 157   %146 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
 158   %147 = extractelement <4 x float> %146, i32 0
 159   %148 = fcmp une float 0xBFD7C1BDA0000000, %147
 160   %149 = select i1 %148, float 1.000000e+00, float 0.000000e+00
 161   %150 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
 162   %151 = extractelement <4 x float> %150, i32 1
 163   %152 = fcmp une float 0x401E1D7DC0000000, %151
 164   %153 = select i1 %152, float 1.000000e+00, float 0.000000e+00
 165   %154 = fsub float -0.000000e+00, %149
 166   %155 = fptosi float %154 to i32
 167   %156 = fsub float -0.000000e+00, %153
 168   %157 = fptosi float %156 to i32
 169   %158 = bitcast i32 %155 to float
 170   %159 = bitcast i32 %157 to float
 171   %160 = bitcast float %158 to i32
 172   %161 = bitcast float %159 to i32
 173   %162 = or i32 %160, %161
 174   %163 = bitcast i32 %162 to float
 175   %164 = insertelement <4 x float> undef, float %109, i32 0
 176   %165 = insertelement <4 x float> %164, float %127, i32 1
 177   %166 = insertelement <4 x float> %165, float %145, i32 2
 178   %167 = insertelement <4 x float> %166, float %163, i32 3
 179   %168 = insertelement <4 x float> undef, float %109, i32 0
 180   %169 = insertelement <4 x float> %168, float %127, i32 1
 181   %170 = insertelement <4 x float> %169, float %145, i32 2
 182   %171 = insertelement <4 x float> %170, float %163, i32 3
 183   %172 = call float @llvm.AMDGPU.dp4(<4 x float> %167, <4 x float> %171)
 184   %173 = bitcast float %172 to i32
 185   %174 = icmp ne i32 %173, 0
 186   %175 = sext i1 %174 to i32
 187   %176 = bitcast i32 %175 to float
 188   %177 = bitcast float %176 to i32
 189   %178 = xor i32 %177, -1
 190   %179 = bitcast i32 %178 to float
 191   %180 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
 192   %181 = extractelement <4 x float> %180, i32 0
 193   %182 = fcmp une float 0x401FDCC640000000, %181
 194   %183 = select i1 %182, float 1.000000e+00, float 0.000000e+00
 195   %184 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
 196   %185 = extractelement <4 x float> %184, i32 1
 197   %186 = fcmp une float 0xC00574BC60000000, %185
 198   %187 = select i1 %186, float 1.000000e+00, float 0.000000e+00
 199   %188 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
 200   %189 = extractelement <4 x float> %188, i32 2
 201   %190 = fcmp une float 0x40210068E0000000, %189
 202   %191 = select i1 %190, float 1.000000e+00, float 0.000000e+00
 203   %192 = fsub float -0.000000e+00, %183
 204   %193 = fptosi float %192 to i32
 205   %194 = fsub float -0.000000e+00, %187
 206   %195 = fptosi float %194 to i32
 207   %196 = fsub float -0.000000e+00, %191
 208   %197 = fptosi float %196 to i32
 209   %198 = bitcast i32 %193 to float
 210   %199 = bitcast i32 %195 to float
 211   %200 = bitcast i32 %197 to float
 212   %201 = bitcast float %199 to i32
 213   %202 = bitcast float %200 to i32
 214   %203 = or i32 %201, %202
 215   %204 = bitcast i32 %203 to float
 216   %205 = bitcast float %198 to i32
 217   %206 = bitcast float %204 to i32
 218   %207 = or i32 %205, %206
 219   %208 = bitcast i32 %207 to float
 220   %209 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
 221   %210 = extractelement <4 x float> %209, i32 0
 222   %211 = fcmp une float 0xBFC9A6B500000000, %210
 223   %212 = select i1 %211, float 1.000000e+00, float 0.000000e+00
 224   %213 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
 225   %214 = extractelement <4 x float> %213, i32 1
 226   %215 = fcmp une float 0xC0119BDA60000000, %214
 227   %216 = select i1 %215, float 1.000000e+00, float 0.000000e+00
 228   %217 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
 229   %218 = extractelement <4 x float> %217, i32 2
 230   %219 = fcmp une float 0xC02085D640000000, %218
 231   %220 = select i1 %219, float 1.000000e+00, float 0.000000e+00
 232   %221 = fsub float -0.000000e+00, %212
 233   %222 = fptosi float %221 to i32
 234   %223 = fsub float -0.000000e+00, %216
 235   %224 = fptosi float %223 to i32
 236   %225 = fsub float -0.000000e+00, %220
 237   %226 = fptosi float %225 to i32
 238   %227 = bitcast i32 %222 to float
 239   %228 = bitcast i32 %224 to float
 240   %229 = bitcast i32 %226 to float
 241   %230 = bitcast float %228 to i32
 242   %231 = bitcast float %229 to i32
 243   %232 = or i32 %230, %231
 244   %233 = bitcast i32 %232 to float
 245   %234 = bitcast float %227 to i32
 246   %235 = bitcast float %233 to i32
 247   %236 = or i32 %234, %235
 248   %237 = bitcast i32 %236 to float
 249   %238 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
 250   %239 = extractelement <4 x float> %238, i32 0
 251   %240 = fcmp une float 0xBFD7C1BDA0000000, %239
 252   %241 = select i1 %240, float 1.000000e+00, float 0.000000e+00
 253   %242 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
 254   %243 = extractelement <4 x float> %242, i32 1
 255   %244 = fcmp une float 0x401E1D7DC0000000, %243
 256   %245 = select i1 %244, float 1.000000e+00, float 0.000000e+00
 257   %246 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
 258   %247 = extractelement <4 x float> %246, i32 2
 259   %248 = fcmp une float 0xC019893740000000, %247
 260   %249 = select i1 %248, float 1.000000e+00, float 0.000000e+00
 261   %250 = fsub float -0.000000e+00, %241
 262   %251 = fptosi float %250 to i32
 263   %252 = fsub float -0.000000e+00, %245
 264   %253 = fptosi float %252 to i32
 265   %254 = fsub float -0.000000e+00, %249
 266   %255 = fptosi float %254 to i32
 267   %256 = bitcast i32 %251 to float
 268   %257 = bitcast i32 %253 to float
 269   %258 = bitcast i32 %255 to float
 270   %259 = bitcast float %257 to i32
 271   %260 = bitcast float %258 to i32
 272   %261 = or i32 %259, %260
 273   %262 = bitcast i32 %261 to float
 274   %263 = bitcast float %256 to i32
 275   %264 = bitcast float %262 to i32
 276   %265 = or i32 %263, %264
 277   %266 = bitcast i32 %265 to float
 278   %267 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
 279   %268 = extractelement <4 x float> %267, i32 0
 280   %269 = fcmp une float 0x40220F0D80000000, %268
 281   %270 = select i1 %269, float 1.000000e+00, float 0.000000e+00
 282   %271 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
 283   %272 = extractelement <4 x float> %271, i32 1
 284   %273 = fcmp une float 0xC018E2EB20000000, %272
 285   %274 = select i1 %273, float 1.000000e+00, float 0.000000e+00
 286   %275 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
 287   %276 = extractelement <4 x float> %275, i32 2
 288   %277 = fcmp une float 0xBFEA8DB8C0000000, %276
 289   %278 = select i1 %277, float 1.000000e+00, float 0.000000e+00
 290   %279 = fsub float -0.000000e+00, %270
 291   %280 = fptosi float %279 to i32
 292   %281 = fsub float -0.000000e+00, %274
 293   %282 = fptosi float %281 to i32
 294   %283 = fsub float -0.000000e+00, %278
 295   %284 = fptosi float %283 to i32
 296   %285 = bitcast i32 %280 to float
 297   %286 = bitcast i32 %282 to float
 298   %287 = bitcast i32 %284 to float
 299   %288 = bitcast float %286 to i32
 300   %289 = bitcast float %287 to i32
 301   %290 = or i32 %288, %289
 302   %291 = bitcast i32 %290 to float
 303   %292 = bitcast float %285 to i32
 304   %293 = bitcast float %291 to i32
 305   %294 = or i32 %292, %293
 306   %295 = bitcast i32 %294 to float
 307   %296 = insertelement <4 x float> undef, float %208, i32 0
 308   %297 = insertelement <4 x float> %296, float %237, i32 1
 309   %298 = insertelement <4 x float> %297, float %266, i32 2
 310   %299 = insertelement <4 x float> %298, float %295, i32 3
 311   %300 = insertelement <4 x float> undef, float %208, i32 0
 312   %301 = insertelement <4 x float> %300, float %237, i32 1
 313   %302 = insertelement <4 x float> %301, float %266, i32 2
 314   %303 = insertelement <4 x float> %302, float %295, i32 3
 315   %304 = call float @llvm.AMDGPU.dp4(<4 x float> %299, <4 x float> %303)
 316   %305 = bitcast float %304 to i32
 317   %306 = icmp ne i32 %305, 0
 318   %307 = sext i1 %306 to i32
 319   %308 = bitcast i32 %307 to float
 320   %309 = bitcast float %308 to i32
 321   %310 = xor i32 %309, -1
 322   %311 = bitcast i32 %310 to float
 323   %312 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
 324   %313 = extractelement <4 x float> %312, i32 0
 325   %314 = fcmp une float 0xC00574BC60000000, %313
 326   %315 = select i1 %314, float 1.000000e+00, float 0.000000e+00
 327   %316 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
 328   %317 = extractelement <4 x float> %316, i32 1
 329   %318 = fcmp une float 0x40210068E0000000, %317
 330   %319 = select i1 %318, float 1.000000e+00, float 0.000000e+00
 331   %320 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
 332   %321 = extractelement <4 x float> %320, i32 2
 333   %322 = fcmp une float 0xBFC9A6B500000000, %321
 334   %323 = select i1 %322, float 1.000000e+00, float 0.000000e+00
 335   %324 = fsub float -0.000000e+00, %315
 336   %325 = fptosi float %324 to i32
 337   %326 = fsub float -0.000000e+00, %319
 338   %327 = fptosi float %326 to i32
 339   %328 = fsub float -0.000000e+00, %323
 340   %329 = fptosi float %328 to i32
 341   %330 = bitcast i32 %325 to float
 342   %331 = bitcast i32 %327 to float
 343   %332 = bitcast i32 %329 to float
 344   %333 = bitcast float %331 to i32
 345   %334 = bitcast float %332 to i32
 346   %335 = or i32 %333, %334
 347   %336 = bitcast i32 %335 to float
 348   %337 = bitcast float %330 to i32
 349   %338 = bitcast float %336 to i32
 350   %339 = or i32 %337, %338
 351   %340 = bitcast i32 %339 to float
 352   %341 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
 353   %342 = extractelement <4 x float> %341, i32 0
 354   %343 = fcmp une float 0xC0119BDA60000000, %342
 355   %344 = select i1 %343, float 1.000000e+00, float 0.000000e+00
 356   %345 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
 357   %346 = extractelement <4 x float> %345, i32 1
 358   %347 = fcmp une float 0xC02085D640000000, %346
 359   %348 = select i1 %347, float 1.000000e+00, float 0.000000e+00
 360   %349 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
 361   %350 = extractelement <4 x float> %349, i32 2
 362   %351 = fcmp une float 0xBFD7C1BDA0000000, %350
 363   %352 = select i1 %351, float 1.000000e+00, float 0.000000e+00
 364   %353 = fsub float -0.000000e+00, %344
 365   %354 = fptosi float %353 to i32
 366   %355 = fsub float -0.000000e+00, %348
 367   %356 = fptosi float %355 to i32
 368   %357 = fsub float -0.000000e+00, %352
 369   %358 = fptosi float %357 to i32
 370   %359 = bitcast i32 %354 to float
 371   %360 = bitcast i32 %356 to float
 372   %361 = bitcast i32 %358 to float
 373   %362 = bitcast float %360 to i32
 374   %363 = bitcast float %361 to i32
 375   %364 = or i32 %362, %363
 376   %365 = bitcast i32 %364 to float
 377   %366 = bitcast float %359 to i32
 378   %367 = bitcast float %365 to i32
 379   %368 = or i32 %366, %367
 380   %369 = bitcast i32 %368 to float
 381   %370 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
 382   %371 = extractelement <4 x float> %370, i32 0
 383   %372 = fcmp une float 0x401E1D7DC0000000, %371
 384   %373 = select i1 %372, float 1.000000e+00, float 0.000000e+00
 385   %374 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
 386   %375 = extractelement <4 x float> %374, i32 1
 387   %376 = fcmp une float 0xC019893740000000, %375
 388   %377 = select i1 %376, float 1.000000e+00, float 0.000000e+00
 389   %378 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
 390   %379 = extractelement <4 x float> %378, i32 2
 391   %380 = fcmp une float 0x40220F0D80000000, %379
 392   %381 = select i1 %380, float 1.000000e+00, float 0.000000e+00
 393   %382 = fsub float -0.000000e+00, %373
 394   %383 = fptosi float %382 to i32
 395   %384 = fsub float -0.000000e+00, %377
 396   %385 = fptosi float %384 to i32
 397   %386 = fsub float -0.000000e+00, %381
 398   %387 = fptosi float %386 to i32
 399   %388 = bitcast i32 %383 to float
 400   %389 = bitcast i32 %385 to float
 401   %390 = bitcast i32 %387 to float
 402   %391 = bitcast float %389 to i32
 403   %392 = bitcast float %390 to i32
 404   %393 = or i32 %391, %392
 405   %394 = bitcast i32 %393 to float
 406   %395 = bitcast float %388 to i32
 407   %396 = bitcast float %394 to i32
 408   %397 = or i32 %395, %396
 409   %398 = bitcast i32 %397 to float
 410   %399 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
 411   %400 = extractelement <4 x float> %399, i32 0
 412   %401 = fcmp une float 0xC018E2EB20000000, %400
 413   %402 = select i1 %401, float 1.000000e+00, float 0.000000e+00
 414   %403 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
 415   %404 = extractelement <4 x float> %403, i32 1
 416   %405 = fcmp une float 0xBFEA8DB8C0000000, %404
 417   %406 = select i1 %405, float 1.000000e+00, float 0.000000e+00
 418   %407 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
 419   %408 = extractelement <4 x float> %407, i32 2
 420   %409 = fcmp une float 0x4015236E20000000, %408
 421   %410 = select i1 %409, float 1.000000e+00, float 0.000000e+00
 422   %411 = fsub float -0.000000e+00, %402
 423   %412 = fptosi float %411 to i32
 424   %413 = fsub float -0.000000e+00, %406
 425   %414 = fptosi float %413 to i32
 426   %415 = fsub float -0.000000e+00, %410
 427   %416 = fptosi float %415 to i32
 428   %417 = bitcast i32 %412 to float
 429   %418 = bitcast i32 %414 to float
 430   %419 = bitcast i32 %416 to float
 431   %420 = bitcast float %418 to i32
 432   %421 = bitcast float %419 to i32
 433   %422 = or i32 %420, %421
 434   %423 = bitcast i32 %422 to float
 435   %424 = bitcast float %417 to i32
 436   %425 = bitcast float %423 to i32
 437   %426 = or i32 %424, %425
 438   %427 = bitcast i32 %426 to float
 439   %428 = insertelement <4 x float> undef, float %340, i32 0
 440   %429 = insertelement <4 x float> %428, float %369, i32 1
 441   %430 = insertelement <4 x float> %429, float %398, i32 2
 442   %431 = insertelement <4 x float> %430, float %427, i32 3
 443   %432 = insertelement <4 x float> undef, float %340, i32 0
 444   %433 = insertelement <4 x float> %432, float %369, i32 1
 445   %434 = insertelement <4 x float> %433, float %398, i32 2
 446   %435 = insertelement <4 x float> %434, float %427, i32 3
 447   %436 = call float @llvm.AMDGPU.dp4(<4 x float> %431, <4 x float> %435)
 448   %437 = bitcast float %436 to i32
 449   %438 = icmp ne i32 %437, 0
 450   %439 = sext i1 %438 to i32
 451   %440 = bitcast i32 %439 to float
 452   %441 = bitcast float %440 to i32
 453   %442 = xor i32 %441, -1
 454   %443 = bitcast i32 %442 to float
 455   %444 = load <4 x float> addrspace(8)* null
 456   %445 = extractelement <4 x float> %444, i32 0
 457   %446 = fcmp une float 0xC00574BC60000000, %445
 458   %447 = select i1 %446, float 1.000000e+00, float 0.000000e+00
 459   %448 = load <4 x float> addrspace(8)* null
 460   %449 = extractelement <4 x float> %448, i32 1
 461   %450 = fcmp une float 0x40210068E0000000, %449
 462   %451 = select i1 %450, float 1.000000e+00, float 0.000000e+00
 463   %452 = load <4 x float> addrspace(8)* null
 464   %453 = extractelement <4 x float> %452, i32 2
 465   %454 = fcmp une float 0xBFC9A6B500000000, %453
 466   %455 = select i1 %454, float 1.000000e+00, float 0.000000e+00
 467   %456 = load <4 x float> addrspace(8)* null
 468   %457 = extractelement <4 x float> %456, i32 3
 469   %458 = fcmp une float 0xC0119BDA60000000, %457
 470   %459 = select i1 %458, float 1.000000e+00, float 0.000000e+00
 471   %460 = fsub float -0.000000e+00, %447
 472   %461 = fptosi float %460 to i32
 473   %462 = fsub float -0.000000e+00, %451
 474   %463 = fptosi float %462 to i32
 475   %464 = fsub float -0.000000e+00, %455
 476   %465 = fptosi float %464 to i32
 477   %466 = fsub float -0.000000e+00, %459
 478   %467 = fptosi float %466 to i32
 479   %468 = bitcast i32 %461 to float
 480   %469 = bitcast i32 %463 to float
 481   %470 = bitcast i32 %465 to float
 482   %471 = bitcast i32 %467 to float
 483   %472 = bitcast float %468 to i32
 484   %473 = bitcast float %469 to i32
 485   %474 = or i32 %472, %473
 486   %475 = bitcast i32 %474 to float
 487   %476 = bitcast float %470 to i32
 488   %477 = bitcast float %471 to i32
 489   %478 = or i32 %476, %477
 490   %479 = bitcast i32 %478 to float
 491   %480 = bitcast float %475 to i32
 492   %481 = bitcast float %479 to i32
 493   %482 = or i32 %480, %481
 494   %483 = bitcast i32 %482 to float
 495   %484 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
 496   %485 = extractelement <4 x float> %484, i32 0
 497   %486 = fcmp une float 0xC02085D640000000, %485
 498   %487 = select i1 %486, float 1.000000e+00, float 0.000000e+00
 499   %488 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
 500   %489 = extractelement <4 x float> %488, i32 1
 501   %490 = fcmp une float 0xBFD7C1BDA0000000, %489
 502   %491 = select i1 %490, float 1.000000e+00, float 0.000000e+00
 503   %492 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
 504   %493 = extractelement <4 x float> %492, i32 2
 505   %494 = fcmp une float 0x401E1D7DC0000000, %493
 506   %495 = select i1 %494, float 1.000000e+00, float 0.000000e+00
 507   %496 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
 508   %497 = extractelement <4 x float> %496, i32 3
 509   %498 = fcmp une float 0xC019893740000000, %497
 510   %499 = select i1 %498, float 1.000000e+00, float 0.000000e+00
 511   %500 = fsub float -0.000000e+00, %487
 512   %501 = fptosi float %500 to i32
 513   %502 = fsub float -0.000000e+00, %491
 514   %503 = fptosi float %502 to i32
 515   %504 = fsub float -0.000000e+00, %495
 516   %505 = fptosi float %504 to i32
 517   %506 = fsub float -0.000000e+00, %499
 518   %507 = fptosi float %506 to i32
 519   %508 = bitcast i32 %501 to float
 520   %509 = bitcast i32 %503 to float
 521   %510 = bitcast i32 %505 to float
 522   %511 = bitcast i32 %507 to float
 523   %512 = bitcast float %508 to i32
 524   %513 = bitcast float %509 to i32
 525   %514 = or i32 %512, %513
 526   %515 = bitcast i32 %514 to float
 527   %516 = bitcast float %510 to i32
 528   %517 = bitcast float %511 to i32
 529   %518 = or i32 %516, %517
 530   %519 = bitcast i32 %518 to float
 531   %520 = bitcast float %515 to i32
 532   %521 = bitcast float %519 to i32
 533   %522 = or i32 %520, %521
 534   %523 = bitcast i32 %522 to float
 535   %524 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
 536   %525 = extractelement <4 x float> %524, i32 0
 537   %526 = fcmp une float 0x40220F0D80000000, %525
 538   %527 = select i1 %526, float 1.000000e+00, float 0.000000e+00
 539   %528 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
 540   %529 = extractelement <4 x float> %528, i32 1
 541   %530 = fcmp une float 0xC018E2EB20000000, %529
 542   %531 = select i1 %530, float 1.000000e+00, float 0.000000e+00
 543   %532 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
 544   %533 = extractelement <4 x float> %532, i32 2
 545   %534 = fcmp une float 0xBFEA8DB8C0000000, %533
 546   %535 = select i1 %534, float 1.000000e+00, float 0.000000e+00
 547   %536 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
 548   %537 = extractelement <4 x float> %536, i32 3
 549   %538 = fcmp une float 0x4015236E20000000, %537
 550   %539 = select i1 %538, float 1.000000e+00, float 0.000000e+00
 551   %540 = fsub float -0.000000e+00, %527
 552   %541 = fptosi float %540 to i32
 553   %542 = fsub float -0.000000e+00, %531
 554   %543 = fptosi float %542 to i32
 555   %544 = fsub float -0.000000e+00, %535
 556   %545 = fptosi float %544 to i32
 557   %546 = fsub float -0.000000e+00, %539
 558   %547 = fptosi float %546 to i32
 559   %548 = bitcast i32 %541 to float
 560   %549 = bitcast i32 %543 to float
 561   %550 = bitcast i32 %545 to float
 562   %551 = bitcast i32 %547 to float
 563   %552 = bitcast float %548 to i32
 564   %553 = bitcast float %549 to i32
 565   %554 = or i32 %552, %553
 566   %555 = bitcast i32 %554 to float
 567   %556 = bitcast float %550 to i32
 568   %557 = bitcast float %551 to i32
 569   %558 = or i32 %556, %557
 570   %559 = bitcast i32 %558 to float
 571   %560 = bitcast float %555 to i32
 572   %561 = bitcast float %559 to i32
 573   %562 = or i32 %560, %561
 574   %563 = bitcast i32 %562 to float
 575   %564 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
 576   %565 = extractelement <4 x float> %564, i32 0
 577   %566 = fcmp une float 0x4016ED5D00000000, %565
 578   %567 = select i1 %566, float 1.000000e+00, float 0.000000e+00
 579   %568 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
 580   %569 = extractelement <4 x float> %568, i32 1
 581   %570 = fcmp une float 0x402332FEC0000000, %569
 582   %571 = select i1 %570, float 1.000000e+00, float 0.000000e+00
 583   %572 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
 584   %573 = extractelement <4 x float> %572, i32 2
 585   %574 = fcmp une float 0xC01484B5E0000000, %573
 586   %575 = select i1 %574, float 1.000000e+00, float 0.000000e+00
 587   %576 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
 588   %577 = extractelement <4 x float> %576, i32 3
 589   %578 = fcmp une float 0x400179A6C0000000, %577
 590   %579 = select i1 %578, float 1.000000e+00, float 0.000000e+00
 591   %580 = fsub float -0.000000e+00, %567
 592   %581 = fptosi float %580 to i32
 593   %582 = fsub float -0.000000e+00, %571
 594   %583 = fptosi float %582 to i32
 595   %584 = fsub float -0.000000e+00, %575
 596   %585 = fptosi float %584 to i32
 597   %586 = fsub float -0.000000e+00, %579
 598   %587 = fptosi float %586 to i32
 599   %588 = bitcast i32 %581 to float
 600   %589 = bitcast i32 %583 to float
 601   %590 = bitcast i32 %585 to float
 602   %591 = bitcast i32 %587 to float
 603   %592 = bitcast float %588 to i32
 604   %593 = bitcast float %589 to i32
 605   %594 = or i32 %592, %593
 606   %595 = bitcast i32 %594 to float
 607   %596 = bitcast float %590 to i32
 608   %597 = bitcast float %591 to i32
 609   %598 = or i32 %596, %597
 610   %599 = bitcast i32 %598 to float
 611   %600 = bitcast float %595 to i32
 612   %601 = bitcast float %599 to i32
 613   %602 = or i32 %600, %601
 614   %603 = bitcast i32 %602 to float
 615   %604 = insertelement <4 x float> undef, float %483, i32 0
 616   %605 = insertelement <4 x float> %604, float %523, i32 1
 617   %606 = insertelement <4 x float> %605, float %563, i32 2
 618   %607 = insertelement <4 x float> %606, float %603, i32 3
 619   %608 = insertelement <4 x float> undef, float %483, i32 0
 620   %609 = insertelement <4 x float> %608, float %523, i32 1
 621   %610 = insertelement <4 x float> %609, float %563, i32 2
 622   %611 = insertelement <4 x float> %610, float %603, i32 3
 623   %612 = call float @llvm.AMDGPU.dp4(<4 x float> %607, <4 x float> %611)
 624   %613 = bitcast float %612 to i32
 625   %614 = icmp ne i32 %613, 0
 626   %615 = sext i1 %614 to i32
 627   %616 = bitcast i32 %615 to float
 628   %617 = bitcast float %616 to i32
 629   %618 = xor i32 %617, -1
 630   %619 = bitcast i32 %618 to float
 631   %620 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
 632   %621 = extractelement <4 x float> %620, i32 0
 633   %622 = fcmp une float 0x40210068E0000000, %621
 634   %623 = select i1 %622, float 1.000000e+00, float 0.000000e+00
 635   %624 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
 636   %625 = extractelement <4 x float> %624, i32 1
 637   %626 = fcmp une float 0xBFC9A6B500000000, %625
 638   %627 = select i1 %626, float 1.000000e+00, float 0.000000e+00
 639   %628 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
 640   %629 = extractelement <4 x float> %628, i32 2
 641   %630 = fcmp une float 0xC0119BDA60000000, %629
 642   %631 = select i1 %630, float 1.000000e+00, float 0.000000e+00
 643   %632 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
 644   %633 = extractelement <4 x float> %632, i32 3
 645   %634 = fcmp une float 0xC02085D640000000, %633
 646   %635 = select i1 %634, float 1.000000e+00, float 0.000000e+00
 647   %636 = fsub float -0.000000e+00, %623
 648   %637 = fptosi float %636 to i32
 649   %638 = fsub float -0.000000e+00, %627
 650   %639 = fptosi float %638 to i32
 651   %640 = fsub float -0.000000e+00, %631
 652   %641 = fptosi float %640 to i32
 653   %642 = fsub float -0.000000e+00, %635
 654   %643 = fptosi float %642 to i32
 655   %644 = bitcast i32 %637 to float
 656   %645 = bitcast i32 %639 to float
 657   %646 = bitcast i32 %641 to float
 658   %647 = bitcast i32 %643 to float
 659   %648 = bitcast float %644 to i32
 660   %649 = bitcast float %645 to i32
 661   %650 = or i32 %648, %649
 662   %651 = bitcast i32 %650 to float
 663   %652 = bitcast float %646 to i32
 664   %653 = bitcast float %647 to i32
 665   %654 = or i32 %652, %653
 666   %655 = bitcast i32 %654 to float
 667   %656 = bitcast float %651 to i32
 668   %657 = bitcast float %655 to i32
 669   %658 = or i32 %656, %657
 670   %659 = bitcast i32 %658 to float
 671   %660 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
 672   %661 = extractelement <4 x float> %660, i32 0
 673   %662 = fcmp une float 0xBFD7C1BDA0000000, %661
 674   %663 = select i1 %662, float 1.000000e+00, float 0.000000e+00
 675   %664 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
 676   %665 = extractelement <4 x float> %664, i32 1
 677   %666 = fcmp une float 0x401E1D7DC0000000, %665
 678   %667 = select i1 %666, float 1.000000e+00, float 0.000000e+00
 679   %668 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
 680   %669 = extractelement <4 x float> %668, i32 2
 681   %670 = fcmp une float 0xC019893740000000, %669
 682   %671 = select i1 %670, float 1.000000e+00, float 0.000000e+00
 683   %672 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
 684   %673 = extractelement <4 x float> %672, i32 3
 685   %674 = fcmp une float 0x40220F0D80000000, %673
 686   %675 = select i1 %674, float 1.000000e+00, float 0.000000e+00
 687   %676 = fsub float -0.000000e+00, %663
 688   %677 = fptosi float %676 to i32
 689   %678 = fsub float -0.000000e+00, %667
 690   %679 = fptosi float %678 to i32
 691   %680 = fsub float -0.000000e+00, %671
 692   %681 = fptosi float %680 to i32
 693   %682 = fsub float -0.000000e+00, %675
 694   %683 = fptosi float %682 to i32
 695   %684 = bitcast i32 %677 to float
 696   %685 = bitcast i32 %679 to float
 697   %686 = bitcast i32 %681 to float
 698   %687 = bitcast i32 %683 to float
 699   %688 = bitcast float %684 to i32
 700   %689 = bitcast float %685 to i32
 701   %690 = or i32 %688, %689
 702   %691 = bitcast i32 %690 to float
 703   %692 = bitcast float %686 to i32
 704   %693 = bitcast float %687 to i32
 705   %694 = or i32 %692, %693
 706   %695 = bitcast i32 %694 to float
 707   %696 = bitcast float %691 to i32
 708   %697 = bitcast float %695 to i32
 709   %698 = or i32 %696, %697
 710   %699 = bitcast i32 %698 to float
 711   %700 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
 712   %701 = extractelement <4 x float> %700, i32 0
 713   %702 = fcmp une float 0xC018E2EB20000000, %701
 714   %703 = select i1 %702, float 1.000000e+00, float 0.000000e+00
 715   %704 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
 716   %705 = extractelement <4 x float> %704, i32 1
 717   %706 = fcmp une float 0xBFEA8DB8C0000000, %705
 718   %707 = select i1 %706, float 1.000000e+00, float 0.000000e+00
 719   %708 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
 720   %709 = extractelement <4 x float> %708, i32 2
 721   %710 = fcmp une float 0x4015236E20000000, %709
 722   %711 = select i1 %710, float 1.000000e+00, float 0.000000e+00
 723   %712 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
 724   %713 = extractelement <4 x float> %712, i32 3
 725   %714 = fcmp une float 0x4016ED5D00000000, %713
 726   %715 = select i1 %714, float 1.000000e+00, float 0.000000e+00
 727   %716 = fsub float -0.000000e+00, %703
 728   %717 = fptosi float %716 to i32
 729   %718 = fsub float -0.000000e+00, %707
 730   %719 = fptosi float %718 to i32
 731   %720 = fsub float -0.000000e+00, %711
 732   %721 = fptosi float %720 to i32
 733   %722 = fsub float -0.000000e+00, %715
 734   %723 = fptosi float %722 to i32
 735   %724 = bitcast i32 %717 to float
 736   %725 = bitcast i32 %719 to float
 737   %726 = bitcast i32 %721 to float
 738   %727 = bitcast i32 %723 to float
 739   %728 = bitcast float %724 to i32
 740   %729 = bitcast float %725 to i32
 741   %730 = or i32 %728, %729
 742   %731 = bitcast i32 %730 to float
 743   %732 = bitcast float %726 to i32
 744   %733 = bitcast float %727 to i32
 745   %734 = or i32 %732, %733
 746   %735 = bitcast i32 %734 to float
 747   %736 = bitcast float %731 to i32
 748   %737 = bitcast float %735 to i32
 749   %738 = or i32 %736, %737
 750   %739 = bitcast i32 %738 to float
 751   %740 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
 752   %741 = extractelement <4 x float> %740, i32 0
 753   %742 = fcmp une float 0x402332FEC0000000, %741
 754   %743 = select i1 %742, float 1.000000e+00, float 0.000000e+00
 755   %744 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
 756   %745 = extractelement <4 x float> %744, i32 1
 757   %746 = fcmp une float 0xC01484B5E0000000, %745
 758   %747 = select i1 %746, float 1.000000e+00, float 0.000000e+00
 759   %748 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
 760   %749 = extractelement <4 x float> %748, i32 2
 761   %750 = fcmp une float 0x400179A6C0000000, %749
 762   %751 = select i1 %750, float 1.000000e+00, float 0.000000e+00
 763   %752 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
 764   %753 = extractelement <4 x float> %752, i32 3
 765   %754 = fcmp une float 0xBFEE752540000000, %753
 766   %755 = select i1 %754, float 1.000000e+00, float 0.000000e+00
 767   %756 = fsub float -0.000000e+00, %743
 768   %757 = fptosi float %756 to i32
 769   %758 = fsub float -0.000000e+00, %747
 770   %759 = fptosi float %758 to i32
 771   %760 = fsub float -0.000000e+00, %751
 772   %761 = fptosi float %760 to i32
 773   %762 = fsub float -0.000000e+00, %755
 774   %763 = fptosi float %762 to i32
 775   %764 = bitcast i32 %757 to float
 776   %765 = bitcast i32 %759 to float
 777   %766 = bitcast i32 %761 to float
 778   %767 = bitcast i32 %763 to float
 779   %768 = bitcast float %764 to i32
 780   %769 = bitcast float %765 to i32
 781   %770 = or i32 %768, %769
 782   %771 = bitcast i32 %770 to float
 783   %772 = bitcast float %766 to i32
 784   %773 = bitcast float %767 to i32
 785   %774 = or i32 %772, %773
 786   %775 = bitcast i32 %774 to float
 787   %776 = bitcast float %771 to i32
 788   %777 = bitcast float %775 to i32
 789   %778 = or i32 %776, %777
 790   %779 = bitcast i32 %778 to float
 791   %780 = insertelement <4 x float> undef, float %659, i32 0
 792   %781 = insertelement <4 x float> %780, float %699, i32 1
 793   %782 = insertelement <4 x float> %781, float %739, i32 2
 794   %783 = insertelement <4 x float> %782, float %779, i32 3
 795   %784 = insertelement <4 x float> undef, float %659, i32 0
 796   %785 = insertelement <4 x float> %784, float %699, i32 1
 797   %786 = insertelement <4 x float> %785, float %739, i32 2
 798   %787 = insertelement <4 x float> %786, float %779, i32 3
 799   %788 = call float @llvm.AMDGPU.dp4(<4 x float> %783, <4 x float> %787)
 800   %789 = bitcast float %788 to i32
 801   %790 = icmp ne i32 %789, 0
 802   %791 = sext i1 %790 to i32
 803   %792 = bitcast i32 %791 to float
 804   %793 = bitcast float %792 to i32
 805   %794 = xor i32 %793, -1
 806   %795 = bitcast i32 %794 to float
 807   %796 = bitcast float %91 to i32
 808   %797 = bitcast float %179 to i32
 809   %798 = and i32 %796, %797
 810   %799 = bitcast i32 %798 to float
 811   %800 = bitcast float %311 to i32
 812   %801 = bitcast float %443 to i32
 813   %802 = and i32 %800, %801
 814   %803 = bitcast i32 %802 to float
 815   %804 = bitcast float %799 to i32
 816   %805 = bitcast float %803 to i32
 817   %806 = and i32 %804, %805
 818   %807 = bitcast i32 %806 to float
 819   %808 = bitcast float %619 to i32
 820   %809 = bitcast float %795 to i32
 821   %810 = and i32 %808, %809
 822   %811 = bitcast i32 %810 to float
 823   %812 = bitcast float %807 to i32
 824   %813 = bitcast float %811 to i32
 825   %814 = and i32 %812, %813
 826   %815 = bitcast i32 %814 to float
 827   %816 = bitcast float %815 to i32
 828   %817 = icmp ne i32 %816, 0
 829   %. = select i1 %817, float 1.000000e+00, float 0.000000e+00
 830   %.32 = select i1 %817, float 0.000000e+00, float 1.000000e+00
 831   %818 = insertelement <4 x float> undef, float %0, i32 0
 832   %819 = insertelement <4 x float> %818, float %1, i32 1
 833   %820 = insertelement <4 x float> %819, float %2, i32 2
 834   %821 = insertelement <4 x float> %820, float %3, i32 3
 835   call void @llvm.R600.store.swizzle(<4 x float> %821, i32 60, i32 1)
 836   %822 = insertelement <4 x float> undef, float %.32, i32 0
 837   %823 = insertelement <4 x float> %822, float %., i32 1
 838   %824 = insertelement <4 x float> %823, float 0.000000e+00, i32 2
 839   %825 = insertelement <4 x float> %824, float 1.000000e+00, i32 3
 840   call void @llvm.R600.store.swizzle(<4 x float> %825, i32 0, i32 2)
 841   ret void
 842 }
 843
 844 declare float @llvm.R600.load.input(i32) #1
 845
 846 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
 847
 848 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 849
 850 attributes #0 = { "ShaderType"="1" }
 851 attributes #1 = { readnone }