Expand VMOVQQQQ pseudo instructions.
authorBob Wilson <bob.wilson@apple.com>
Sat, 13 Aug 2011 05:14:55 +0000 (05:14 +0000)
committerBob Wilson <bob.wilson@apple.com>
Sat, 13 Aug 2011 05:14:55 +0000 (05:14 +0000)
Apparently we never added code to expand these pseudo instructions, and in
over a year, no one has noticed.  Our register allocator must be awesome!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137551 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/ARM/ARMExpandPseudoInsts.cpp
test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll [new file with mode: 0644]

index a4fcfd51bfd60f2aaf13a4bd71dec682b216c23a..a1be94ab878f37e2321285a94fe00069560298b3 100644 (file)
@@ -998,6 +998,52 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       return true;
     }
 
+    case ARM::VMOVQQQQ: {
+      unsigned DstReg = MI.getOperand(0).getReg();
+      bool DstIsDead = MI.getOperand(0).isDead();
+      unsigned Dst0 = TRI->getSubReg(DstReg, ARM::qsub_0);
+      unsigned Dst1 = TRI->getSubReg(DstReg, ARM::qsub_1);
+      unsigned Dst2 = TRI->getSubReg(DstReg, ARM::qsub_2);
+      unsigned Dst3 = TRI->getSubReg(DstReg, ARM::qsub_3);
+      unsigned SrcReg = MI.getOperand(1).getReg();
+      bool SrcIsKill = MI.getOperand(1).isKill();
+      unsigned Src0 = TRI->getSubReg(SrcReg, ARM::qsub_0);
+      unsigned Src1 = TRI->getSubReg(SrcReg, ARM::qsub_1);
+      unsigned Src2 = TRI->getSubReg(SrcReg, ARM::qsub_2);
+      unsigned Src3 = TRI->getSubReg(SrcReg, ARM::qsub_3);
+      MachineInstrBuilder Mov0 =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(ARM::VORRq))
+                       .addReg(Dst0,
+                               RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(Src0, getKillRegState(SrcIsKill))
+                       .addReg(Src0, getKillRegState(SrcIsKill)));
+      MachineInstrBuilder Mov1 =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(ARM::VORRq))
+                       .addReg(Dst1,
+                               RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(Src1, getKillRegState(SrcIsKill))
+                       .addReg(Src1, getKillRegState(SrcIsKill)));
+      MachineInstrBuilder Mov2 =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(ARM::VORRq))
+                       .addReg(Dst2,
+                               RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(Src2, getKillRegState(SrcIsKill))
+                       .addReg(Src2, getKillRegState(SrcIsKill)));
+      MachineInstrBuilder Mov3 =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(ARM::VORRq))
+                       .addReg(Dst3,
+                               RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(Src3, getKillRegState(SrcIsKill))
+                       .addReg(Src3, getKillRegState(SrcIsKill)));
+      TransferImpOps(MI, Mov0, Mov3);
+      MI.eraseFromParent();
+      return true;
+    }
+
     case ARM::VLDMQIA: {
       unsigned NewOpc = ARM::VLDMDIA;
       MachineInstrBuilder MIB =
diff --git a/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll b/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll
new file mode 100644 (file)
index 0000000..6afa016
--- /dev/null
@@ -0,0 +1,13 @@
+; RUN: llc %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 -O0
+; The following test is supposed to produce a VMOVQQQQ pseudo instruction.
+; Make sure that it gets expanded; otherwise, the compile fails when trying
+; to print the pseudo-instruction.
+
+define void @test_vmovqqqq_pseudo() nounwind ssp {
+entry:
+  %vld3_lane = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> zeroinitializer, i32 7, i32 2)
+  store { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, { <8 x i16>, <8 x i16>, <8 x i16> }* undef
+  ret void
+}
+
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly