From: Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Date: Tue, 10 Nov 2015 21:38:26 +0000 (+0000)
Subject: [PowerPC] Add an MI SSA peephole pass.
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9e24ab72522f2880625cc710c0684d5445f019b7;p=oota-llvm.git

[PowerPC] Add an MI SSA peephole pass.

This patch adds a pass for doing PowerPC peephole optimizations at the
MI level while the code is still in SSA form.  This allows for easy
modifications to the instructions while depending on a subsequent pass
of DCE.  Both passes are very fast due to the characteristics of SSA.

At this time, the only peepholes added are for cleaning up various
redundancies involving the XXPERMDI instruction.  However, I would
expect this will be a useful place to add more peepholes for
inefficiencies generated during instruction selection.  The pass is
placed after VSX swap optimization, as it is best to let that pass
remove unnecessary swaps before performing any remaining clean-ups.

The utility of these clean-ups are demonstrated by changes to four
existing test cases, all of which now have tighter expected code
generation.  I've also added Eric Schweiz's bugpoint-reduced test from
PR25157, for which we now generate tight code.  One other test started
failing for me, and I've fixed it
(test/Transforms/PlaceSafepoints/finite-loops.ll) as well; this is not
related to my changes, and I'm not sure why it works before and not
after.  The problem is that the CHECK-NOT: of "statepoint" from test1
fails because of the "statepoint" in test2, and so forth.  Adding a
CHECK-LABEL in between keeps the different occurrences of that string
properly scoped.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252651 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index ae8d8b4f5df..e157fd37c6e 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -41,6 +41,7 @@ namespace llvm {
   FunctionPass *createPPCVSXCopyPass();
   FunctionPass *createPPCVSXFMAMutatePass();
   FunctionPass *createPPCVSXSwapRemovalPass();
+  FunctionPass *createPPCMIPeepholePass();
   FunctionPass *createPPCBranchSelectionPass();
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
   FunctionPass *createPPCTLSDynamicCallPass();
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
new file mode 100644
index 00000000000..fe339d70d7d
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -0,0 +1,230 @@
+//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass performs peephole optimizations to clean up ugly code
+// sequences at the MachineInstruction layer.  It runs at the end of
+// the SSA phases, following VSX swap removal.  A pass of dead code
+// elimination follows this one for quick clean-up of any dead
+// instructions introduced here.  Although we could do this as callbacks
+// from the generic peephole pass, this would have a couple of bad
+// effects:  it might remove optimization opportunities for VSX swap
+// removal, and it would miss cleanups made possible following VSX
+// swap removal.
+//
+//===---------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-mi-peepholes"
+
+namespace llvm {
+  void initializePPCMIPeepholePass(PassRegistry&);
+}
+
+namespace {
+
+struct PPCMIPeephole : public MachineFunctionPass {
+
+  static char ID;
+  const PPCInstrInfo *TII;
+  MachineFunction *MF;
+  MachineRegisterInfo *MRI;
+
+  PPCMIPeephole() : MachineFunctionPass(ID) {
+    initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
+  }
+
+private:
+  // Initialize class variables.
+  void initialize(MachineFunction &MFParm);
+
+  // Perform peepholes.
+  bool simplifyCode(void);
+
+  // Find the "true" register represented by SrcReg (following chains
+  // of copies and subreg_to_reg operations).
+  unsigned lookThruCopyLike(unsigned SrcReg);
+
+public:
+  // Main entry point for this pass.
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    initialize(MF);
+    return simplifyCode();
+  }
+};
+
+// Initialize class variables.
+void PPCMIPeephole::initialize(MachineFunction &MFParm) {
+  MF = &MFParm;
+  MRI = &MF->getRegInfo();
+  TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
+  DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
+  DEBUG(MF->dump());
+}
+
+// Perform peephole optimizations.
+bool PPCMIPeephole::simplifyCode(void) {
+  bool Simplified = false;
+  MachineInstr* ToErase = nullptr;
+
+  for (MachineBasicBlock &MBB : *MF) {
+    for (MachineInstr &MI : MBB) {
+
+      // If the previous instruction was marked for elimination,
+      // remove it now.
+      if (ToErase) {
+        ToErase->eraseFromParent();
+        ToErase = nullptr;
+      }
+
+      // Ignore debug instructions.
+      if (MI.isDebugValue())
+        continue;
+
+      // Per-opcode peepholes.
+      switch (MI.getOpcode()) {
+
+      default:
+        break;
+
+      case PPC::XXPERMDI: {
+        // Perform simplifications of 2x64 vector swaps and splats.
+        // A swap is identified by an immediate value of 2, and a splat
+        // is identified by an immediate value of 0 or 3.
+        int Immed = MI.getOperand(3).getImm();
+
+        if (Immed != 1) {
+
+          // For each of these simplifications, we need the two source
+          // regs to match.  Unfortunately, MachineCSE ignores COPY and
+          // SUBREG_TO_REG, so for example we can see
+          //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
+          // We have to look through chains of COPY and SUBREG_TO_REG
+          // to find the real source values for comparison.
+          unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
+          unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
+
+          if (TrueReg1 == TrueReg2
+              && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
+            MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
+
+            // If this is a splat or a swap fed by another splat, we
+            // can replace it with a copy.
+            if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
+              unsigned FeedImmed = DefMI->getOperand(3).getImm();
+              unsigned FeedReg1
+                = lookThruCopyLike(DefMI->getOperand(1).getReg());
+              unsigned FeedReg2
+                = lookThruCopyLike(DefMI->getOperand(2).getReg());
+
+              if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
+                DEBUG(dbgs()
+                      << "Optimizing splat/swap or splat/splat "
+                      "to splat/copy: ");
+                DEBUG(MI.dump());
+                BuildMI(MBB, &MI, MI.getDebugLoc(),
+                        TII->get(PPC::COPY), MI.getOperand(0).getReg())
+                  .addOperand(MI.getOperand(1));
+                ToErase = &MI;
+                Simplified = true;
+              }
+
+              // If this is a splat fed by a swap, we can simplify modify
+              // the splat to splat the other value from the swap's input
+              // parameter.
+              else if ((Immed == 0 || Immed == 3)
+                       && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+                DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
+                DEBUG(MI.dump());
+                MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
+                MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
+                MI.getOperand(3).setImm(3 - Immed);
+                Simplified = true;
+              }
+
+              // If this is a swap fed by a swap, we can replace it
+              // with a copy from the first swap's input.
+              else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+                DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
+                DEBUG(MI.dump());
+                BuildMI(MBB, &MI, MI.getDebugLoc(),
+                        TII->get(PPC::COPY), MI.getOperand(0).getReg())
+                  .addOperand(DefMI->getOperand(1));
+                ToErase = &MI;
+                Simplified = true;
+              }
+            }
+          }
+        }
+        break;
+      }
+      }
+    }
+
+    // If the last instruction was marked for elimination,
+    // remove it now.
+    if (ToErase) {
+      ToErase->eraseFromParent();
+      ToErase = nullptr;
+    }
+  }
+
+  return Simplified;
+}
+
+// This is used to find the "true" source register for an
+// XXPERMDI instruction, since MachineCSE does not handle the
+// "copy-like" operations (Copy and SubregToReg).  Returns
+// the original SrcReg unless it is the target of a copy-like
+// operation, in which case we chain backwards through all
+// such operations to the ultimate source register.  If a
+// physical register is encountered, we stop the search.
+unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
+
+  while (true) {
+
+    MachineInstr *MI = MRI->getVRegDef(SrcReg);
+    if (!MI->isCopyLike())
+      return SrcReg;
+
+    unsigned CopySrcReg;
+    if (MI->isCopy())
+      CopySrcReg = MI->getOperand(1).getReg();
+    else {
+      assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
+      CopySrcReg = MI->getOperand(2).getReg();
+    }
+
+    if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
+      return CopySrcReg;
+
+    SrcReg = CopySrcReg;
+  }
+}
+
+} // end default namespace
+
+INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
+                      "PowerPC MI Peephole Optimization", false, false)
+INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
+                    "PowerPC MI Peephole Optimization", false, false)
+
+char PPCMIPeephole::ID = 0;
+FunctionPass*
+llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
+
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index df687b2cade..24a9ef0ef07 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -42,6 +42,10 @@ static cl::
 opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
                                 cl::desc("Disable VSX Swap Removal for PPC"));
 
+static cl::
+opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
+                            cl::desc("Disable machine peepholes for PPC"));
+
 static cl::opt<bool>
 EnableGEPOpt("ppc-gep-opt", cl::Hidden,
              cl::desc("Enable optimizations on complex GEPs"),
@@ -348,6 +352,12 @@ void PPCPassConfig::addMachineSSAOptimization() {
   if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
       !DisableVSXSwapRemoval)
     addPass(createPPCVSXSwapRemovalPass());
+  // Target-specific peephole cleanups performed after instruction
+  // selection.
+  if (!DisableMIPeephole) {
+    addPass(createPPCMIPeepholePass());
+    addPass(&DeadMachineInstructionElimID);
+  }
 }
 
 void PPCPassConfig::addPreRegAlloc() {
diff --git a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
index 35d501e40cb..7e8991647ae 100644
--- a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -63,7 +63,7 @@ entry:
   ret <2 x i64> %splat.splat
 ; CHECK: mtvsrd {{[0-9]+}}, 3
 ; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
-; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: xxspltd [[REG1]], [[REG1]], 0
 }
 
 ; Function Attrs: nounwind
diff --git a/test/CodeGen/PowerPC/pr25157-peephole.ll b/test/CodeGen/PowerPC/pr25157-peephole.ll
new file mode 100644
index 00000000000..c5bd49b492c
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr25157-peephole.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; Verify peephole simplification of splats and swaps.  Bugpoint-reduced
+; test from Eric Schweitz.
+
+%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625 = type <{ [28 x i8] }>
+%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626 = type <{ [64 x i8] }>
+
+@.BSS38 = external global %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, align 32
+@_main1_2_ = external global %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, section ".comm", align 16
+
+define void @aercalc_() {
+L.entry:
+  br i1 undef, label %L.LB38_2426, label %L.LB38_2911
+
+L.LB38_2911:
+  br i1 undef, label %L.LB38_2140, label %L.LB38_2640
+
+L.LB38_2640:
+  unreachable
+
+L.LB38_2426:
+  br i1 undef, label %L.LB38_2438, label %L.LB38_2920
+
+L.LB38_2920:
+  br i1 undef, label %L.LB38_2438, label %L.LB38_2921
+
+L.LB38_2921:
+  br label %L.LB38_2140
+
+L.LB38_2140:
+  ret void
+
+L.LB38_2438:
+  br i1 undef, label %L.LB38_2451, label %L.LB38_2935
+
+L.LB38_2935:
+  br i1 undef, label %L.LB38_2451, label %L.LB38_2936
+
+L.LB38_2936:
+  unreachable
+
+L.LB38_2451:
+  br i1 undef, label %L.LB38_2452, label %L.LB38_2937
+
+L.LB38_2937:
+  unreachable
+
+L.LB38_2452:
+  %0 = load float, float* bitcast (i8* getelementptr inbounds (%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625* @.BSS38, i64 0, i32 0, i64 16) to float*), align 16
+  %1 = fpext float %0 to double
+  %2 = insertelement <2 x double> undef, double %1, i32 1
+  store <2 x double> %2, <2 x double>* bitcast (i8* getelementptr inbounds (%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626* @_main1_2_, i64 0, i32 0, i64 32) to <2 x double>*), align 16
+  unreachable
+}
+
+; CHECK-LABEL: @aercalc_
+; CHECK: lxsspx
+; CHECK: xxspltd
+; CHECK: stxvd2x
+; CHECK-NOT: xxswapd
diff --git a/test/CodeGen/PowerPC/swaps-le-5.ll b/test/CodeGen/PowerPC/swaps-le-5.ll
index 5cd739a0efa..3e13bd16c23 100644
--- a/test/CodeGen/PowerPC/swaps-le-5.ll
+++ b/test/CodeGen/PowerPC/swaps-le-5.ll
@@ -15,11 +15,11 @@ entry:
 }
 
 ; CHECK-LABEL: @bar0
-; CHECK-DAG: xxswapd {{[0-9]+}}, 1
 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
 ; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
 ; CHECK: xxpermdi [[REG3:[0-9]+]], [[REG2]], [[REG1]], 1
 ; CHECK: stxvd2x [[REG3]]
+; CHECK-NOT: xxswapd
 
 define void @bar1(double %y) {
 entry:
@@ -30,11 +30,11 @@ entry:
 }
 
 ; CHECK-LABEL: @bar1
-; CHECK-DAG: xxswapd {{[0-9]+}}, 1
 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
 ; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
 ; CHECK: xxmrghd [[REG3:[0-9]+]], [[REG1]], [[REG2]]
 ; CHECK: stxvd2x [[REG3]]
+; CHECK-NOT: xxswapd
 
 define void @baz0() {
 entry:
diff --git a/test/CodeGen/PowerPC/swaps-le-6.ll b/test/CodeGen/PowerPC/swaps-le-6.ll
index 365aeee2d8f..df88322e4fd 100644
--- a/test/CodeGen/PowerPC/swaps-le-6.ll
+++ b/test/CodeGen/PowerPC/swaps-le-6.ll
@@ -20,8 +20,7 @@ entry:
 ; CHECK-LABEL: @bar0
 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
 ; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
-; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]]
-; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
 ; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
 ; CHECK: stxvd2x [[REG5]]
 
@@ -37,8 +36,7 @@ entry:
 ; CHECK-LABEL: @bar1
 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
 ; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
-; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]]
-; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
 ; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
 ; CHECK: stxvd2x [[REG5]]
 
diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll
index b4b1d248d1a..b2eefb66676 100644
--- a/test/CodeGen/PowerPC/vsx.ll
+++ b/test/CodeGen/PowerPC/vsx.ll
@@ -1228,9 +1228,8 @@ define <2 x i32> @test80(i32 %v) {
 ; CHECK-LE-LABEL: @test80
 ; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3
 ; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI
-; CHECK-LE-DAG: xxswapd [[V1:[0-9]+]], [[R1]]
 ; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]]
-; CHECK-LE-DAG: xxspltd 34, [[V1]]
+; CHECK-LE-DAG: xxspltd 34, [[R1]]
 ; CHECK-LE-DAG: xxswapd 35, [[V2]]
 ; CHECK-LE: vaddudm 2, 2, 3
 ; CHECK-LE: blr
diff --git a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index 6c89b1092bd..97e1548f965 100644
--- a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -33,12 +33,8 @@ define double @teste0(<2 x double>* %p1) {
   %r = extractelement <2 x double> %v, i32 0
   ret double %r
 
-; FIXME: Swap optimization will collapse this into lxvd2x 1, 0, 3.
-
 ; CHECK-LABEL: teste0
-; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 1, 0
+; CHECK: lxvd2x 1, 0, 3
 }
 
 define double @teste1(<2 x double>* %p1) {
diff --git a/test/CodeGen/PowerPC/vsx_shuffle_le.ll b/test/CodeGen/PowerPC/vsx_shuffle_le.ll
index dcfa0e78886..4f767c7ca78 100644
--- a/test/CodeGen/PowerPC/vsx_shuffle_le.ll
+++ b/test/CodeGen/PowerPC/vsx_shuffle_le.ll
@@ -8,8 +8,7 @@ define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) {
 
 ; CHECK-LABEL: test00
 ; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 1
+; CHECK: xxspltd 34, 0, 0
 }
 
 define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) {
@@ -58,9 +57,7 @@ define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) {
   ret <2 x double> %v3
 
 ; CHECK-LABEL: @test10
-; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 34, 0
+; CHECK: lxvd2x 34, 0, 3
 }
 
 define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
@@ -71,8 +68,7 @@ define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
 
 ; CHECK-LABEL: @test11
 ; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 0
+; CHECK: xxspltd 34, 0, 1
 }
 
 define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) {
@@ -139,8 +135,7 @@ define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) {
 
 ; CHECK-LABEL: @test22
 ; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 1
+; CHECK: xxspltd 34, 0, 0
 }
 
 define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) {
@@ -189,9 +184,7 @@ define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) {
   ret <2 x double> %v3
 
 ; CHECK-LABEL: @test32
-; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 34, 0
+; CHECK: lxvd2x 34, 0, 4
 }
 
 define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
@@ -202,6 +195,5 @@ define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
 
 ; CHECK-LABEL: @test33
 ; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 0
+; CHECK: xxspltd 34, 0, 1
 }
diff --git a/test/Transforms/PlaceSafepoints/finite-loops.ll b/test/Transforms/PlaceSafepoints/finite-loops.ll
index 9121e92896c..b98073d6a6e 100644
--- a/test/Transforms/PlaceSafepoints/finite-loops.ll
+++ b/test/Transforms/PlaceSafepoints/finite-loops.ll
@@ -11,6 +11,7 @@ define void @test1(i32) gc "statepoint-example" {
 ; CHECK: statepoint
 ; CHECK-LABEL: loop
 ; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
 
 entry:
   br label %loop
@@ -32,6 +33,7 @@ define void @test2(i32) gc "statepoint-example" {
 ; CHECK: statepoint
 ; CHECK-LABEL: loop
 ; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
 
 entry:
   br label %loop
@@ -56,6 +58,7 @@ define void @test3(i8 %upper) gc "statepoint-example" {
 ; CHECK: statepoint
 ; CHECK-LABEL: loop
 ; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
 
 entry:
   br label %loop
@@ -77,12 +80,14 @@ define void @test4(i64 %upper) gc "statepoint-example" {
 ; CHECK: statepoint
 ; CHECK-LABEL: loop
 ; CHECK: statepoint
+; CHECK-LABEL: exit
 
 ; COUNTED-64-LABEL: test4
 ; COUNTED-64-LABEL: entry
 ; COUNTED-64: statepoint
 ; COUNTED-64-LABEL: loop
 ; COUNTED-64-NOT: statepoint
+; COUNTED-64-LABEL: exit
 
 entry:
   br label %loop
@@ -105,12 +110,14 @@ define void @test5(i64 %upper) gc "statepoint-example" {
 ; CHECK: statepoint
 ; CHECK-LABEL: loop
 ; CHECK: statepoint
+; CHECK-LABEL: exit
 
 ; COUNTED-64-LABEL: test5
 ; COUNTED-64-LABEL: entry
 ; COUNTED-64: statepoint
 ; COUNTED-64-LABEL: loop
 ; COUNTED-64: statepoint
+; COUNTED-64-LABEL: exit
 
 entry:
   br label %loop