test/CodeGen/SystemZ/frame-14.ll

   1 ; Test the handling of base + displacement addresses for large frames,
   2 ; in cases where both 12-bit and 20-bit displacements are allowed.
   3 ;
   4 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
   5 ; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
   6
   7 ; This file tests what happens when a displacement is converted from
   8 ; being relative to the start of a frame object to being relative to
   9 ; the frame itself.  In some cases the test is only possible if two
  10 ; objects are allocated.
  11 ;
  12 ; Rather than rely on a particular order for those objects, the tests
  13 ; instead allocate two objects of the same size and apply the test to
  14 ; both of them.  For consistency, all tests follow this model, even if
  15 ; one object would actually be enough.
  16
  17 ; First check the highest offset that is in range of the 12-bit form.
  18 ;
  19 ; The last in-range doubleword offset is 4088.  Since the frame has two
  20 ; emergency spill slots at 160(%r15), the amount that we need to allocate
  21 ; in order to put another object at offset 4088 is 4088 - 176 = 3912 bytes.
  22 define void @f1() {
  23 ; CHECK-NOFP: f1:
  24 ; CHECK-NOFP: mvi 4095(%r15), 42
  25 ; CHECK-NOFP: br %r14
  26 ;
  27 ; CHECK-FP: f1:
  28 ; CHECK-FP: mvi 4095(%r11), 42
  29 ; CHECK-FP: br %r14
  30   %region1 = alloca [3912 x i8], align 8
  31   %region2 = alloca [3912 x i8], align 8
  32   %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 7
  33   %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 7
  34   store volatile i8 42, i8 *%ptr1
  35   store volatile i8 42, i8 *%ptr2
  36   ret void
  37 }
  38
  39 ; Test the first offset that is out-of-range of the 12-bit form.
  40 define void @f2() {
  41 ; CHECK-NOFP: f2:
  42 ; CHECK-NOFP: mviy 4096(%r15), 42
  43 ; CHECK-NOFP: br %r14
  44 ;
  45 ; CHECK-FP: f2:
  46 ; CHECK-FP: mviy 4096(%r11), 42
  47 ; CHECK-FP: br %r14
  48   %region1 = alloca [3912 x i8], align 8
  49   %region2 = alloca [3912 x i8], align 8
  50   %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 8
  51   %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 8
  52   store volatile i8 42, i8 *%ptr1
  53   store volatile i8 42, i8 *%ptr2
  54   ret void
  55 }
  56
  57 ; Test the last offset that is in range of the 20-bit form.
  58 ;
  59 ; The last in-range doubleword offset is 524280, so by the same reasoning
  60 ; as above, we need to allocate objects of 524280 - 176 = 524104 bytes.
  61 define void @f3() {
  62 ; CHECK-NOFP: f3:
  63 ; CHECK-NOFP: mviy 524287(%r15), 42
  64 ; CHECK-NOFP: br %r14
  65 ;
  66 ; CHECK-FP: f3:
  67 ; CHECK-FP: mviy 524287(%r11), 42
  68 ; CHECK-FP: br %r14
  69   %region1 = alloca [524104 x i8], align 8
  70   %region2 = alloca [524104 x i8], align 8
  71   %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 7
  72   %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 7
  73   store volatile i8 42, i8 *%ptr1
  74   store volatile i8 42, i8 *%ptr2
  75   ret void
  76 }
  77
  78 ; Test the first out-of-range offset.  We can't use an index register here,
  79 ; and the offset is also out of LAY's range, so expect a constant load
  80 ; followed by an addition.
  81 define void @f4() {
  82 ; CHECK-NOFP: f4:
  83 ; CHECK-NOFP: llilh %r1, 8
  84 ; CHECK-NOFP: agr %r1, %r15
  85 ; CHECK-NOFP: mvi 0(%r1), 42
  86 ; CHECK-NOFP: br %r14
  87 ;
  88 ; CHECK-FP: f4:
  89 ; CHECK-FP: llilh %r1, 8
  90 ; CHECK-FP: agr %r1, %r11
  91 ; CHECK-FP: mvi 0(%r1), 42
  92 ; CHECK-FP: br %r14
  93   %region1 = alloca [524104 x i8], align 8
  94   %region2 = alloca [524104 x i8], align 8
  95   %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
  96   %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
  97   store volatile i8 42, i8 *%ptr1
  98   store volatile i8 42, i8 *%ptr2
  99   ret void
 100 }
 101
 102 ; Add 4095 to the previous offset, to test the other end of the MVI range.
 103 ; The instruction will actually be STCY before frame lowering.
 104 define void @f5() {
 105 ; CHECK-NOFP: f5:
 106 ; CHECK-NOFP: llilh %r1, 8
 107 ; CHECK-NOFP: agr %r1, %r15
 108 ; CHECK-NOFP: mvi 4095(%r1), 42
 109 ; CHECK-NOFP: br %r14
 110 ;
 111 ; CHECK-FP: f5:
 112 ; CHECK-FP: llilh %r1, 8
 113 ; CHECK-FP: agr %r1, %r11
 114 ; CHECK-FP: mvi 4095(%r1), 42
 115 ; CHECK-FP: br %r14
 116   %region1 = alloca [524104 x i8], align 8
 117   %region2 = alloca [524104 x i8], align 8
 118   %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4103
 119   %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4103
 120   store volatile i8 42, i8 *%ptr1
 121   store volatile i8 42, i8 *%ptr2
 122   ret void
 123 }
 124
 125 ; Test the next offset after that, which uses MVIY instead of MVI.
 126 define void @f6() {
 127 ; CHECK-NOFP: f6:
 128 ; CHECK-NOFP: llilh %r1, 8
 129 ; CHECK-NOFP: agr %r1, %r15
 130 ; CHECK-NOFP: mviy 4096(%r1), 42
 131 ; CHECK-NOFP: br %r14
 132 ;
 133 ; CHECK-FP: f6:
 134 ; CHECK-FP: llilh %r1, 8
 135 ; CHECK-FP: agr %r1, %r11
 136 ; CHECK-FP: mviy 4096(%r1), 42
 137 ; CHECK-FP: br %r14
 138   %region1 = alloca [524104 x i8], align 8
 139   %region2 = alloca [524104 x i8], align 8
 140   %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4104
 141   %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4104
 142   store volatile i8 42, i8 *%ptr1
 143   store volatile i8 42, i8 *%ptr2
 144   ret void
 145 }
 146
 147 ; Now try an offset of 524287 from the start of the object, with the
 148 ; object being at offset 1048576 (1 << 20).  The backend prefers to create
 149 ; anchors 0x10000 bytes apart, so that the high part can be loaded using
 150 ; LLILH while still using MVI in more cases than 0x40000 anchors would.
 151 define void @f7() {
 152 ; CHECK-NOFP: f7:
 153 ; CHECK-NOFP: llilh %r1, 23
 154 ; CHECK-NOFP: agr %r1, %r15
 155 ; CHECK-NOFP: mviy 65535(%r1), 42
 156 ; CHECK-NOFP: br %r14
 157 ;
 158 ; CHECK-FP: f7:
 159 ; CHECK-FP: llilh %r1, 23
 160 ; CHECK-FP: agr %r1, %r11
 161 ; CHECK-FP: mviy 65535(%r1), 42
 162 ; CHECK-FP: br %r14
 163   %region1 = alloca [1048400 x i8], align 8
 164   %region2 = alloca [1048400 x i8], align 8
 165   %ptr1 = getelementptr inbounds [1048400 x i8]* %region1, i64 0, i64 524287
 166   %ptr2 = getelementptr inbounds [1048400 x i8]* %region2, i64 0, i64 524287
 167   store volatile i8 42, i8 *%ptr1
 168   store volatile i8 42, i8 *%ptr2
 169   ret void
 170 }
 171
 172 ; Keep the object-relative offset the same but bump the size of the
 173 ; objects by one doubleword.
 174 define void @f8() {
 175 ; CHECK-NOFP: f8:
 176 ; CHECK-NOFP: llilh %r1, 24
 177 ; CHECK-NOFP: agr %r1, %r15
 178 ; CHECK-NOFP: mvi 7(%r1), 42
 179 ; CHECK-NOFP: br %r14
 180 ;
 181 ; CHECK-FP: f8:
 182 ; CHECK-FP: llilh %r1, 24
 183 ; CHECK-FP: agr %r1, %r11
 184 ; CHECK-FP: mvi 7(%r1), 42
 185 ; CHECK-FP: br %r14
 186   %region1 = alloca [1048408 x i8], align 8
 187   %region2 = alloca [1048408 x i8], align 8
 188   %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287
 189   %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287
 190   store volatile i8 42, i8 *%ptr1
 191   store volatile i8 42, i8 *%ptr2
 192   ret void
 193 }
 194
 195 ; Check a case where the original displacement is out of range.  The backend
 196 ; should force separate address logic from the outset.  We don't yet do any
 197 ; kind of anchor optimization, so there should be no offset on the MVI itself.
 198 ;
 199 ; Before frame lowering this is an LA followed by the AGFI seen below.
 200 ; The LA then gets lowered into the LLILH/LA form.  The exact sequence
 201 ; isn't that important though.
 202 define void @f9() {
 203 ; CHECK-NOFP: f9:
 204 ; CHECK-NOFP: llilh [[R1:%r[1-5]]], 16
 205 ; CHECK-NOFP: la [[R2:%r[1-5]]], 8([[R1]],%r15)
 206 ; CHECK-NOFP: agfi [[R2]], 524288
 207 ; CHECK-NOFP: mvi 0([[R2]]), 42
 208 ; CHECK-NOFP: br %r14
 209 ;
 210 ; CHECK-FP: f9:
 211 ; CHECK-FP: llilh [[R1:%r[1-5]]], 16
 212 ; CHECK-FP: la [[R2:%r[1-5]]], 8([[R1]],%r11)
 213 ; CHECK-FP: agfi [[R2]], 524288
 214 ; CHECK-FP: mvi 0([[R2]]), 42
 215 ; CHECK-FP: br %r14
 216   %region1 = alloca [1048408 x i8], align 8
 217   %region2 = alloca [1048408 x i8], align 8
 218   %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524288
 219   %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524288
 220   store volatile i8 42, i8 *%ptr1
 221   store volatile i8 42, i8 *%ptr2
 222   ret void
 223 }
 224
 225 ; Repeat f4 in a case that needs the emergency spill slots (because all
 226 ; call-clobbered registers are live and no call-saved ones have been
 227 ; allocated).
 228 define void @f10(i32 *%vptr) {
 229 ; CHECK-NOFP: f10:
 230 ; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
 231 ; CHECK-NOFP: llilh [[REGISTER]], 8
 232 ; CHECK-NOFP: agr [[REGISTER]], %r15
 233 ; CHECK-NOFP: mvi 0([[REGISTER]]), 42
 234 ; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 235 ; CHECK-NOFP: br %r14
 236 ;
 237 ; CHECK-FP: f10:
 238 ; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
 239 ; CHECK-FP: llilh [[REGISTER]], 8
 240 ; CHECK-FP: agr [[REGISTER]], %r11
 241 ; CHECK-FP: mvi 0([[REGISTER]]), 42
 242 ; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 243 ; CHECK-FP: br %r14
 244   %i0 = load volatile i32 *%vptr
 245   %i1 = load volatile i32 *%vptr
 246   %i3 = load volatile i32 *%vptr
 247   %i4 = load volatile i32 *%vptr
 248   %i5 = load volatile i32 *%vptr
 249   %region1 = alloca [524104 x i8], align 8
 250   %region2 = alloca [524104 x i8], align 8
 251   %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
 252   %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
 253   store volatile i8 42, i8 *%ptr1
 254   store volatile i8 42, i8 *%ptr2
 255   store volatile i32 %i0, i32 *%vptr
 256   store volatile i32 %i1, i32 *%vptr
 257   store volatile i32 %i3, i32 *%vptr
 258   store volatile i32 %i4, i32 *%vptr
 259   store volatile i32 %i5, i32 *%vptr
 260   ret void
 261 }
 262
 263 ; And again with maximum register pressure.  The only spill slots that the
 264 ; NOFP case needs are the emergency ones, so the offsets are the same as for f4.
 265 ; However, the FP case uses %r11 as the frame pointer and must therefore
 266 ; spill a second register.  This leads to an extra displacement of 8.
 267 define void @f11(i32 *%vptr) {
 268 ; CHECK-NOFP: f11:
 269 ; CHECK-NOFP: stmg %r6, %r15,
 270 ; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
 271 ; CHECK-NOFP: llilh [[REGISTER]], 8
 272 ; CHECK-NOFP: agr [[REGISTER]], %r15
 273 ; CHECK-NOFP: mvi 0([[REGISTER]]), 42
 274 ; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 275 ; CHECK-NOFP: lmg %r6, %r15,
 276 ; CHECK-NOFP: br %r14
 277 ;
 278 ; CHECK-FP: f11:
 279 ; CHECK-FP: stmg %r6, %r15,
 280 ; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
 281 ; CHECK-FP: llilh [[REGISTER]], 8
 282 ; CHECK-FP: agr [[REGISTER]], %r11
 283 ; CHECK-FP: mvi 8([[REGISTER]]), 42
 284 ; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 285 ; CHECK-FP: lmg %r6, %r15,
 286 ; CHECK-FP: br %r14
 287   %i0 = load volatile i32 *%vptr
 288   %i1 = load volatile i32 *%vptr
 289   %i3 = load volatile i32 *%vptr
 290   %i4 = load volatile i32 *%vptr
 291   %i5 = load volatile i32 *%vptr
 292   %i6 = load volatile i32 *%vptr
 293   %i7 = load volatile i32 *%vptr
 294   %i8 = load volatile i32 *%vptr
 295   %i9 = load volatile i32 *%vptr
 296   %i10 = load volatile i32 *%vptr
 297   %i11 = load volatile i32 *%vptr
 298   %i12 = load volatile i32 *%vptr
 299   %i13 = load volatile i32 *%vptr
 300   %i14 = load volatile i32 *%vptr
 301   %region1 = alloca [524104 x i8], align 8
 302   %region2 = alloca [524104 x i8], align 8
 303   %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
 304   %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
 305   store volatile i8 42, i8 *%ptr1
 306   store volatile i8 42, i8 *%ptr2
 307   store volatile i32 %i0, i32 *%vptr
 308   store volatile i32 %i1, i32 *%vptr
 309   store volatile i32 %i3, i32 *%vptr
 310   store volatile i32 %i4, i32 *%vptr
 311   store volatile i32 %i5, i32 *%vptr
 312   store volatile i32 %i6, i32 *%vptr
 313   store volatile i32 %i7, i32 *%vptr
 314   store volatile i32 %i8, i32 *%vptr
 315   store volatile i32 %i9, i32 *%vptr
 316   store volatile i32 %i10, i32 *%vptr
 317   store volatile i32 %i11, i32 *%vptr
 318   store volatile i32 %i12, i32 *%vptr
 319   store volatile i32 %i13, i32 *%vptr
 320   store volatile i32 %i14, i32 *%vptr
 321   ret void
 322 }