[SystemZ] Add CodeGen support for scalar f64 ops in vector registers

author Ulrich Weigand <ulrich.weigand@de.ibm.com>

Tue, 5 May 2015 19:28:34 +0000 (19:28 +0000)

committer Ulrich Weigand <ulrich.weigand@de.ibm.com>

Tue, 5 May 2015 19:28:34 +0000 (19:28 +0000)
author Ulrich Weigand <ulrich.weigand@de.ibm.com>
Tue, 5 May 2015 19:28:34 +0000 (19:28 +0000)
committer Ulrich Weigand <ulrich.weigand@de.ibm.com>
Tue, 5 May 2015 19:28:34 +0000 (19:28 +0000)
diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td

index 4f3f18ed4991d30f2c81a407d6b490c26eae372e..d4d636d3479c5f15fad783e4b6be73898eaa1909 100644 (file)
--- a/lib/Target/SystemZ/SystemZ.td
+++ b/lib/Target/SystemZ/SystemZ.td
@@ -40,8 +40,8 @@ include "SystemZOperands.td"
  include "SystemZPatterns.td"
  include "SystemZInstrFormats.td"
  include "SystemZInstrInfo.td"
-include "SystemZInstrFP.td"
  include "SystemZInstrVector.td"
+include "SystemZInstrFP.td"
  
  def SystemZInstrInfo : InstrInfo {}
  
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp

index 026a75f21403eb2c0914c14ec0bfea06beda4e52..f16488063afbfe0a22ca40c8555f6d8add7058ac 100644 (file)
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -80,6 +80,27 @@ static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) {
                                   Context);
  }
  
+// MI loads the high part of a vector from memory.  Return an instruction
+// that uses replicating vector load Opcode to do the same thing.
+static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) {
+  return MCInstBuilder(Opcode)
+    .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+    .addReg(MI->getOperand(1).getReg())
+    .addImm(MI->getOperand(2).getImm())
+    .addReg(MI->getOperand(3).getReg());
+}
+
+// MI stores the high part of a vector to memory.  Return an instruction
+// that uses elemental vector store Opcode to do the same thing.
+static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) {
+  return MCInstBuilder(Opcode)
+    .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+    .addReg(MI->getOperand(1).getReg())
+    .addImm(MI->getOperand(2).getImm())
+    .addReg(MI->getOperand(3).getReg())
+    .addImm(0);
+}
+
  void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    SystemZMCInstLower Lower(MF->getContext(), *this);
    MCInst LoweredMI;
@@ -158,6 +179,29 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
        .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg()));
      break;
  
+  case SystemZ::VLR32:
+  case SystemZ::VLR64:
+    LoweredMI = MCInstBuilder(SystemZ::VLR)
+      .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+      .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg()));
+    break;
+
+  case SystemZ::VL32:
+    LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF);
+    break;
+
+  case SystemZ::VL64:
+    LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG);
+    break;
+
+  case SystemZ::VST32:
+    LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF);
+    break;
+
+  case SystemZ::VST64:
+    LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEG);
+    break;
+
    case SystemZ::LFER:
      LoweredMI = MCInstBuilder(SystemZ::VLGVF)
        .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg()))
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp

index 391cb8c6fc99c2aa168eea58bf68ba8815a89b6e..ff79a48179f725affe90fe1da97f8278c9b1620f 100644 (file)
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -91,9 +91,14 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
      addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
    else
      addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
-  addRegisterClass(MVT::i64,  &SystemZ::GR64BitRegClass);
-  addRegisterClass(MVT::f32,  &SystemZ::FP32BitRegClass);
-  addRegisterClass(MVT::f64,  &SystemZ::FP64BitRegClass);
+  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
+  if (Subtarget.hasVector()) {
+    addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
+    addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
+  } else {
+    addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
+    addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
+  }
    addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
  
    if (Subtarget.hasVector()) {
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td

index efa29fa9c00f0068992bb50d3926f240888089ad..27fbd7df2882ead93475429c98070571ca5906cd 100644 (file)
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -46,9 +46,14 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
    defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>;
    defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>;
  }
-defm : CompareZeroFP<LTEBRCompare, FP32>;
-defm : CompareZeroFP<LTDBRCompare, FP64>;
-defm : CompareZeroFP<LTXBRCompare, FP128>;
+// Note that the comparison against zero operation is not available if we
+// have vector support, since load-and-test instructions will partially
+// clobber the target (vector) register.
+let Predicates = [FeatureNoVector] in {
+  defm : CompareZeroFP<LTEBRCompare, FP32>;
+  defm : CompareZeroFP<LTDBRCompare, FP64>;
+  defm : CompareZeroFP<LTXBRCompare, FP128>;
+}
  
  // Moves between 64-bit integer and floating-point registers.
  def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>;
@@ -98,6 +103,9 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
    defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>;
    defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>;
  
+  // For z13 we prefer LDE over LE to avoid partial register dependencies.
+  def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>;
+
    // These instructions are split after register allocation, so we don't
    // want a custom inserter.
    let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td

index dc9dfa801fdf896c56e79fb8dbd0f337000796e6..71eb9986499b68ff75059d82c40d04e7d43b30e9 100644 (file)
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -2151,10 +2151,13 @@ class PrefetchRILPC<string mnemonic, bits<12> opcode,
  
  // A floating-point load-and test operation.  Create both a normal unary
  // operation and one that acts as a comparison against zero.
+// Note that the comparison against zero operation is not available if we
+// have vector support, since load-and-test instructions will partially
+// clobber the target (vector) register.
  multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
                            RegisterOperand cls> {
    def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>;
-  let isCodeGenOnly = 1 in
+  let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in
      def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>;
  }
  
@@ -2401,6 +2404,23 @@ class Alias<int size, dag outs, dag ins, list<dag> pattern>
  class UnaryAliasVRS<RegisterOperand cls1, RegisterOperand cls2>
   : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>;
  
+// An alias of a UnaryVRR*, but with different register sizes.
+class UnaryAliasVRR<SDPatternOperator operator, TypedReg tr1, TypedReg tr2>
+  : Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2),
+          [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]>;
+
+// An alias of a UnaryVRX, but with different register sizes.
+class UnaryAliasVRX<SDPatternOperator operator, TypedReg tr,
+                    AddressingMode mode = bdxaddr12only>
+  : Alias<6, (outs tr.op:$V1), (ins mode:$XBD2),
+          [(set tr.op:$V1, (tr.vt (operator mode:$XBD2)))]>;
+
+// An alias of a StoreVRX, but with different register sizes.
+class StoreAliasVRX<SDPatternOperator operator, TypedReg tr,
+                    AddressingMode mode = bdxaddr12only>
+  : Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2),
+          [(operator (tr.vt tr.op:$V1), mode:$XBD2)]>;
+
  // An alias of a BinaryRI, but with different register sizes.
  class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
                      Immediate imm>
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp

index 63101a9d000a2f95ad08109a1ad911b39119b4f1..8dbd9df32b7e79037d62c136ad6603509141623f 100644 (file)
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -578,6 +578,10 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
      Opcode = SystemZ::LDR;
    else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
      Opcode = SystemZ::LXR;
+  else if (SystemZ::VR32BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::VLR32;
+  else if (SystemZ::VR64BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::VLR64;
    else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg))
      Opcode = SystemZ::VLR;
    else
@@ -1118,6 +1122,12 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
    } else if (RC == &SystemZ::FP128BitRegClass) {
      LoadOpcode = SystemZ::LX;
      StoreOpcode = SystemZ::STX;
+  } else if (RC == &SystemZ::VR32BitRegClass) {
+    LoadOpcode = SystemZ::VL32;
+    StoreOpcode = SystemZ::VST32;
+  } else if (RC == &SystemZ::VR64BitRegClass) {
+    LoadOpcode = SystemZ::VL64;
+    StoreOpcode = SystemZ::VST64;
    } else if (RC == &SystemZ::VF128BitRegClass ||
               RC == &SystemZ::VR128BitRegClass) {
      LoadOpcode = SystemZ::VL;
diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td

index b6c8042b3c821ffea00145e7330865448b24016c..8abaeb69a20fe29060aef8c8350b3712781a4e29 100644 (file)
--- a/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/lib/Target/SystemZ/SystemZInstrVector.td
@@ -14,6 +14,8 @@
  let Predicates = [FeatureVector] in {
    // Register move.
    def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>;
+  def VLR32 : UnaryAliasVRR<null_frag, v32eb, v32eb>;
+  def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>;
  
    // Load GR from VR element.
    def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>;
@@ -123,6 +125,13 @@ let Predicates = [FeatureVector] in {
    def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)),
              (VLREPG bdxaddr12only:$addr)>;
  
+  // Use VLREP to load subvectors.  These patterns use "12pair" because
+  // LEY and LDY offer full 20-bit displacement fields.  It's often better
+  // to use those instructions rather than force a 20-bit displacement
+  // into a GPR temporary.
+  def VL32 : UnaryAliasVRX<load, v32eb, bdxaddr12pair>;
+  def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
+
    // Load logical element and zero.
    def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8,  v128b, 1, 0>;
    def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
@@ -193,6 +202,13 @@ let Predicates = [FeatureVector] in {
                         imm32zx1:$index),
              (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
  
+  // Use VSTE to store subvectors.  These patterns use "12pair" because
+  // STEY and STDY offer full 20-bit displacement fields.  It's often better
+  // to use those instructions rather than force a 20-bit displacement
+  // into a GPR temporary.
+  def VST32 : StoreAliasVRX<store, v32eb, bdxaddr12pair>;
+  def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>;
+
    // Scatter element.
    def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>;
    def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>;
@@ -778,7 +794,7 @@ multiclass VectorRounding<Instruction insn, TypedReg tr> {
  let Predicates = [FeatureVector] in {
    // Add.
    def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
-  def WFADB : BinaryVRRc<"wfadb", 0xE7E3, null_frag, v64db, v64db, 3, 8>;
+  def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
  
    // Convert from fixed 64-bit.
    def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
@@ -804,53 +820,55 @@ let Predicates = [FeatureVector] in {
  
    // Divide.
    def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
-  def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, null_frag, v64db, v64db, 3, 8>;
+  def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
  
    // Load FP integer.
    def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, null_frag, v128db, v128db, 3, 0>;
    def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
    defm : VectorRounding<VFIDB, v128db>;
+  defm : VectorRounding<WFIDB, v64db>;
  
    // Load lengthened.
    def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>;
-  def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, null_frag, v64db, v32eb, 2, 8>;
+  def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fextend, v64db, v32eb, 2, 8>;
  
    // Load rounded,
    def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>;
    def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>;
    def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
+  def : FPConversion<WLEDB, fround, v32eb, v64db, 0, 0>;
  
    // Multiply.
    def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
-  def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, null_frag, v64db, v64db, 3, 8>;
+  def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
  
    // Multiply and add.
    def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
-  def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, null_frag, v64db, v64db, 8, 3>;
+  def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
  
    // Multiply and subtract.
    def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
-  def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, null_frag, v64db, v64db, 8, 3>;
+  def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
  
    // Load complement,
    def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
-  def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 0>;
+  def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>;
  
    // Load negative.
    def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>;
-  def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 1>;
+  def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>;
  
    // Load positive.
    def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>;
-  def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 2>;
+  def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>;
  
    // Square root.
    def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
-  def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, null_frag, v64db, v64db, 3, 8>;
+  def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
  
    // Subtract.
    def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
-  def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, null_frag, v64db, v64db, 3, 8>;
+  def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
  
    // Test data class immediate.
    let Defs = [CC] in {
@@ -866,7 +884,7 @@ let Predicates = [FeatureVector] in {
  let Predicates = [FeatureVector] in {
    // Compare scalar.
    let Defs = [CC] in
-    def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, null_frag, v64db, 3>;
+    def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
  
    // Compare and signal scalar.
    let Defs = [CC] in
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp

index ec7a8c40d18a9f903cd43ce0df245c405e52c29d..d1a17c5500d65c70be0fa8859953b366f4e100f8 100644 (file)
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -15,6 +15,7 @@
  
  #include "SystemZTargetMachine.h"
  #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
  
  using namespace llvm;
  
@@ -36,6 +37,10 @@ public:
  private:
    bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther,
                    unsigned LLIxL, unsigned LLIxH);
+  bool shortenOn0(MachineInstr &MI, unsigned Opcode);
+  bool shortenOn01(MachineInstr &MI, unsigned Opcode);
+  bool shortenOn001(MachineInstr &MI, unsigned Opcode);
+  bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
  
    const SystemZInstrInfo *TII;
  
@@ -97,6 +102,64 @@ bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap,
    return false;
  }
  
+// Change MI's opcode to Opcode if register operand 0 has a 4-bit encoding.
+bool SystemZShortenInst::shortenOn0(MachineInstr &MI, unsigned Opcode) {
+  if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16) {
+    MI.setDesc(TII->get(Opcode));
+    return true;
+  }
+  return false;
+}
+
+// Change MI's opcode to Opcode if register operands 0 and 1 have a
+// 4-bit encoding.
+bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) {
+  if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
+      SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
+    MI.setDesc(TII->get(Opcode));
+    return true;
+  }
+  return false;
+}
+
+// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a
+// 4-bit encoding and if operands 0 and 1 are tied.
+bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) {
+  if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
+      MI.getOperand(1).getReg() == MI.getOperand(0).getReg() &&
+      SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) {
+    MI.setDesc(TII->get(Opcode));
+    return true;
+  }
+  return false;
+}
+
+// MI is a vector-style conversion instruction with the operand order:
+// destination, source, exact-suppress, rounding-mode.  If both registers
+// have a 4-bit encoding then change it to Opcode, which has operand order:
+// destination, rouding-mode, source, exact-suppress.
+bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
+  if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
+      SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
+    MachineOperand Dest(MI.getOperand(0));
+    MachineOperand Src(MI.getOperand(1));
+    MachineOperand Suppress(MI.getOperand(2));
+    MachineOperand Mode(MI.getOperand(3));
+    MI.RemoveOperand(3);
+    MI.RemoveOperand(2);
+    MI.RemoveOperand(1);
+    MI.RemoveOperand(0);
+    MI.setDesc(TII->get(Opcode));
+    MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
+      .addOperand(Dest)
+      .addOperand(Mode)
+      .addOperand(Src)
+      .addOperand(Suppress);
+    return true;
+  }
+  return false;
+}
+
  // Process all instructions in MBB.  Return true if something changed.
  bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
    bool Changed = false;
@@ -117,13 +180,83 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
    // Iterate backwards through the block looking for instructions to change.
    for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
      MachineInstr &MI = *MBBI;
-    unsigned Opcode = MI.getOpcode();
-    if (Opcode == SystemZ::IILF)
+    switch (MI.getOpcode()) {
+    case SystemZ::IILF:
        Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL,
                              SystemZ::LLILH);
-    else if (Opcode == SystemZ::IIHF)
+      break;
+
+    case SystemZ::IIHF:
        Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL,
                              SystemZ::LLIHH);
+      break;
+
+    case SystemZ::WFADB:
+      Changed |= shortenOn001(MI, SystemZ::ADBR);
+      break;
+
+    case SystemZ::WFDDB:
+      Changed |= shortenOn001(MI, SystemZ::DDBR);
+      break;
+
+    case SystemZ::WFIDB:
+      Changed |= shortenFPConv(MI, SystemZ::FIDBRA);
+      break;
+
+    case SystemZ::WLDEB:
+      Changed |= shortenOn01(MI, SystemZ::LDEBR);
+      break;
+
+    case SystemZ::WLEDB:
+      Changed |= shortenFPConv(MI, SystemZ::LEDBRA);
+      break;
+
+    case SystemZ::WFMDB:
+      Changed |= shortenOn001(MI, SystemZ::MDBR);
+      break;
+
+    case SystemZ::WFLCDB:
+      Changed |= shortenOn01(MI, SystemZ::LCDBR);
+      break;
+
+    case SystemZ::WFLNDB:
+      Changed |= shortenOn01(MI, SystemZ::LNDBR);
+      break;
+
+    case SystemZ::WFLPDB:
+      Changed |= shortenOn01(MI, SystemZ::LPDBR);
+      break;
+
+    case SystemZ::WFSQDB:
+      Changed |= shortenOn01(MI, SystemZ::SQDBR);
+      break;
+
+    case SystemZ::WFSDB:
+      Changed |= shortenOn001(MI, SystemZ::SDBR);
+      break;
+
+    case SystemZ::WFCDB:
+      Changed |= shortenOn01(MI, SystemZ::CDBR);
+      break;
+
+    case SystemZ::VL32:
+      // For z13 we prefer LDE over LE to avoid partial register dependencies.
+      Changed |= shortenOn0(MI, SystemZ::LDE32);
+      break;
+
+    case SystemZ::VST32:
+      Changed |= shortenOn0(MI, SystemZ::STE);
+      break;
+
+    case SystemZ::VL64:
+      Changed |= shortenOn0(MI, SystemZ::LD);
+      break;
+
+    case SystemZ::VST64:
+      Changed |= shortenOn0(MI, SystemZ::STD);
+      break;
+    }
+
      unsigned UsedLow = 0;
      unsigned UsedHigh = 0;
      for (auto MOI = MI.operands_begin(), MOE = MI.operands_end();
diff --git a/test/CodeGen/SystemZ/fp-abs-01.ll b/test/CodeGen/SystemZ/fp-abs-01.ll

index d14a92acae8afe84e5424353b9ee82e3db41a246..3b143d93315bf089213602e0d3a059d5f8cd7664 100644 (file)
--- a/test/CodeGen/SystemZ/fp-abs-01.ll
+++ b/test/CodeGen/SystemZ/fp-abs-01.ll
@@ -1,6 +1,7 @@
  ; Test floating-point absolute.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  ; Test f32.
  declare float @llvm.fabs.f32(float %f)
diff --git a/test/CodeGen/SystemZ/fp-abs-02.ll b/test/CodeGen/SystemZ/fp-abs-02.ll

index deec8c32b4a8ae08f932db5b57d8342443853a52..e831ddb86feaece44b0e1eb042ada2b28a3be27a 100644 (file)
--- a/test/CodeGen/SystemZ/fp-abs-02.ll
+++ b/test/CodeGen/SystemZ/fp-abs-02.ll
@@ -1,6 +1,7 @@
  ; Test negated floating-point absolute.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  ; Test f32.
  declare float @llvm.fabs.f32(float %f)
diff --git a/test/CodeGen/SystemZ/fp-add-02.ll b/test/CodeGen/SystemZ/fp-add-02.ll

index 07c7462020fb26c2f6eb2f906d32d76b1ec0078e..5be1ad79d45309ae8aec70241db940e0cca469ef 100644 (file)
--- a/test/CodeGen/SystemZ/fp-add-02.ll
+++ b/test/CodeGen/SystemZ/fp-add-02.ll
@@ -1,7 +1,8 @@
  ; Test 64-bit floating-point addition.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  declare double @foo()
  
  ; Check register addition.
@@ -76,7 +77,7 @@ define double @f6(double %f1, double *%base, i64 %index) {
  define double @f7(double *%ptr0) {
  ; CHECK-LABEL: f7:
  ; CHECK: brasl %r14, foo@PLT
-; CHECK: adb %f0, 160(%r15)
+; CHECK-SCALAR: adb %f0, 160(%r15)
  ; CHECK: br %r14
    %ptr1 = getelementptr double, double *%ptr0, i64 2
    %ptr2 = getelementptr double, double *%ptr0, i64 4
diff --git a/test/CodeGen/SystemZ/fp-cmp-02.ll b/test/CodeGen/SystemZ/fp-cmp-02.ll

index 95af309e795ad782e6d3806fa4e7ad9bbcc53545..94a256777c757e6ca113b5064faa882868ac07bc 100644 (file)
--- a/test/CodeGen/SystemZ/fp-cmp-02.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-02.ll
@@ -1,7 +1,10 @@
  ; Test 64-bit floating-point comparison.  The tests assume a z10 implementation
  ; of select, using conditional branches rather than LOCGR.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
  
  declare double @foo()
  
@@ -9,8 +12,9 @@ declare double @foo()
  define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) {
  ; CHECK-LABEL: f1:
  ; CHECK: cdbr %f0, %f2
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
  ; CHECK: br %r14
    %cond = fcmp oeq double %f1, %f2
    %res = select i1 %cond, i64 %a, i64 %b
@@ -21,8 +25,9 @@ define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) {
  define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) {
  ; CHECK-LABEL: f2:
  ; CHECK: cdb %f0, 0(%r4)
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
  ; CHECK: br %r14
    %f2 = load double , double *%ptr
    %cond = fcmp oeq double %f1, %f2
@@ -34,8 +39,9 @@ define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) {
  define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) {
  ; CHECK-LABEL: f3:
  ; CHECK: cdb %f0, 4088(%r4)
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
  ; CHECK: br %r14
    %ptr = getelementptr double, double *%base, i64 511
    %f2 = load double , double *%ptr
@@ -50,8 +56,9 @@ define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) {
  ; CHECK-LABEL: f4:
  ; CHECK: aghi %r4, 4096
  ; CHECK: cdb %f0, 0(%r4)
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
  ; CHECK: br %r14
    %ptr = getelementptr double, double *%base, i64 512
    %f2 = load double , double *%ptr
@@ -65,8 +72,9 @@ define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) {
  ; CHECK-LABEL: f5:
  ; CHECK: aghi %r4, -8
  ; CHECK: cdb %f0, 0(%r4)
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
  ; CHECK: br %r14
    %ptr = getelementptr double, double *%base, i64 -1
    %f2 = load double , double *%ptr
@@ -80,8 +88,9 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) {
  ; CHECK-LABEL: f6:
  ; CHECK: sllg %r1, %r5, 3
  ; CHECK: cdb %f0, 800(%r1,%r4)
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
  ; CHECK: br %r14
    %ptr1 = getelementptr double, double *%base, i64 %index
    %ptr2 = getelementptr double, double *%ptr1, i64 100
@@ -95,7 +104,7 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) {
  define double @f7(double *%ptr0) {
  ; CHECK-LABEL: f7:
  ; CHECK: brasl %r14, foo@PLT
-; CHECK: cdb {{%f[0-9]+}}, 160(%r15)
+; CHECK-SCALAR: cdb {{%f[0-9]+}}, 160(%r15)
  ; CHECK: br %r14
    %ptr1 = getelementptr double, double *%ptr0, i64 2
    %ptr2 = getelementptr double, double *%ptr0, i64 4
@@ -152,9 +161,12 @@ define double @f7(double *%ptr0) {
  ; Check comparison with zero.
  define i64 @f8(i64 %a, i64 %b, double %f) {
  ; CHECK-LABEL: f8:
-; CHECK: ltdbr %f0, %f0
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR: ltdbr %f0, %f0
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR: lzdr %f1
+; CHECK-VECTOR-NEXT: cdbr %f0, %f1
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
  ; CHECK: br %r14
    %cond = fcmp oeq double %f, 0.0
    %res = select i1 %cond, i64 %a, i64 %b
@@ -165,8 +177,9 @@ define i64 @f8(i64 %a, i64 %b, double %f) {
  define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) {
  ; CHECK-LABEL: f9:
  ; CHECK: cdb %f0, 0(%r4)
-; CHECK-NEXT: jl {{\.L.*}}
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: jl
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnl %r2, %r3
  ; CHECK: br %r14
    %f1 = load double , double *%ptr
    %cond = fcmp ogt double %f1, %f2
diff --git a/test/CodeGen/SystemZ/fp-conv-01.ll b/test/CodeGen/SystemZ/fp-conv-01.ll

index ebc174afada55544b819640d2ae25618a229b620..06740ed4b4a66a1f97541ed5dd30c297221b1dfa 100644 (file)
--- a/test/CodeGen/SystemZ/fp-conv-01.ll
+++ b/test/CodeGen/SystemZ/fp-conv-01.ll
@@ -1,11 +1,15 @@
  ; Test floating-point truncations.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
  
  ; Test f64->f32.
  define float @f1(double %d1, double %d2) {
  ; CHECK-LABEL: f1:
-; CHECK: ledbr %f0, %f2
+; CHECK-SCALAR: ledbr %f0, %f2
+; CHECK-VECTOR: ledbra %f0, 0, %f2, 0
  ; CHECK: br %r14
    %res = fptrunc double %d2 to float
    ret float %res
@@ -50,8 +54,10 @@ define double @f4(fp128 *%ptr) {
  define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) {
  ; CHECK-LABEL: f5:
  ; CHECK: ldxbr %f1, %f1
-; CHECK: adbr %f1, %f2
-; CHECK: std %f1, 0(%r2)
+; CHECK-SCALAR: adbr %f1, %f2
+; CHECK-SCALAR: std %f1, 0(%r2)
+; CHECK-VECTOR: wfadb [[REG:%f[0-9]+]], %f1, %f2
+; CHECK-VECTOR: std [[REG]], 0(%r2)
  ; CHECK: br %r14
    %val = load fp128 , fp128 *%ptr
    %conv = fptrunc fp128 %val to double
diff --git a/test/CodeGen/SystemZ/fp-conv-02.ll b/test/CodeGen/SystemZ/fp-conv-02.ll

index e9376ba6973c7cd91621ecffd3cb539973095275..be32bfe7ba9a0f3b9865d766bbaca6a810cd1344 100644 (file)
--- a/test/CodeGen/SystemZ/fp-conv-02.ll
+++ b/test/CodeGen/SystemZ/fp-conv-02.ll
@@ -1,6 +1,8 @@
  ; Test extensions of f32 to f64.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  ; Check register extension.
  define double @f1(float %val) {
@@ -74,7 +76,7 @@ define double @f6(float *%base, i64 %index) {
  ; to use LDEB if possible.
  define void @f7(double *%ptr1, float *%ptr2) {
  ; CHECK-LABEL: f7:
-; CHECK: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
+; CHECK-SCALAR: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
  ; CHECK: br %r14
    %val0 = load volatile float , float *%ptr2
    %val1 = load volatile float , float *%ptr2
diff --git a/test/CodeGen/SystemZ/fp-div-02.ll b/test/CodeGen/SystemZ/fp-div-02.ll

index 82eeb480602b27b8edbdf36eecbe3e1eea926a3c..f120e7c923dc6688e5e6c3e00bc2bdedab414b8d 100644 (file)
--- a/test/CodeGen/SystemZ/fp-div-02.ll
+++ b/test/CodeGen/SystemZ/fp-div-02.ll
@@ -1,6 +1,8 @@
  ; Test 64-bit floating-point division.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  declare double @foo()
  
@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) {
  define double @f7(double *%ptr0) {
  ; CHECK-LABEL: f7:
  ; CHECK: brasl %r14, foo@PLT
-; CHECK: ddb %f0, 160(%r15)
+; CHECK-SCALAR: ddb %f0, 160(%r15)
  ; CHECK: br %r14
    %ptr1 = getelementptr double, double *%ptr0, i64 2
    %ptr2 = getelementptr double, double *%ptr0, i64 4
diff --git a/test/CodeGen/SystemZ/fp-move-01.ll b/test/CodeGen/SystemZ/fp-move-01.ll

index 31a8fc55d77f1fc233768833a4718fc60658f855..843b1b6a6e641f9115b96eb38107f46af928944f 100644 (file)
--- a/test/CodeGen/SystemZ/fp-move-01.ll
+++ b/test/CodeGen/SystemZ/fp-move-01.ll
@@ -1,11 +1,13 @@
  ; Test moves between FPRs.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  ; Test f32 moves.
  define float @f1(float %a, float %b) {
  ; CHECK-LABEL: f1:
  ; CHECK: ler %f0, %f2
+; CHECK: br %r14
    ret float %b
  }
  
@@ -13,6 +15,7 @@ define float @f1(float %a, float %b) {
  define double @f2(double %a, double %b) {
  ; CHECK-LABEL: f2:
  ; CHECK: ldr %f0, %f2
+; CHECK: br %r14
    ret double %b
  }
  
@@ -22,6 +25,7 @@ define void @f3(fp128 *%x) {
  ; CHECK-LABEL: f3:
  ; CHECK: lxr
  ; CHECK: axbr
+; CHECK: br %r14
    %val = load volatile fp128 , fp128 *%x
    %sum = fadd fp128 %val, %val
    store volatile fp128 %sum, fp128 *%x
diff --git a/test/CodeGen/SystemZ/fp-move-04.ll b/test/CodeGen/SystemZ/fp-move-04.ll

index d3728d0e5858444679897c9cf7c3a18810cfac0a..6650419b2c389264997c65c533c21ac5ff192951 100644 (file)
--- a/test/CodeGen/SystemZ/fp-move-04.ll
+++ b/test/CodeGen/SystemZ/fp-move-04.ll
@@ -1,6 +1,7 @@
  ; Test 64-bit floating-point loads.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  ; Test the low end of the LD range.
  define double @f1(double *%src) {
diff --git a/test/CodeGen/SystemZ/fp-move-07.ll b/test/CodeGen/SystemZ/fp-move-07.ll

index c3ad2a59f66f0e0694b47778803a388b3a9cf720..5361002a97e05691dabc209a10f102c7f1716d19 100644 (file)
--- a/test/CodeGen/SystemZ/fp-move-07.ll
+++ b/test/CodeGen/SystemZ/fp-move-07.ll
@@ -1,6 +1,7 @@
  ; Test 64-bit floating-point stores.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  ; Test the low end of the STD range.
  define void @f1(double *%src, double %val) {
diff --git a/test/CodeGen/SystemZ/fp-move-11.ll b/test/CodeGen/SystemZ/fp-move-11.ll

new file mode 100644 (file)

index 0000000..ce45019
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-move-11.ll
@@ -0,0 +1,110 @@
+; Test 32-bit floating-point loads for z13.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test that we use LDE instead of LE - low end of the LE range.
+define float @f1(float *%src) {
+; CHECK-LABEL: f1:
+; CHECK: lde %f0, 0(%r2)
+; CHECK: br %r14
+  %val = load float, float *%src
+  ret float %val
+}
+
+; Test that we use LDE instead of LE - high end of the LE range.
+define float @f2(float *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lde %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%src, i64 1023
+  %val = load float, float *%ptr
+  ret float %val
+}
+
+; Check the next word up, which should use LEY instead of LDE.
+define float @f3(float *%src) {
+; CHECK-LABEL: f3:
+; CHECK: ley %f0, 4096(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%src, i64 1024
+  %val = load float, float *%ptr
+  ret float %val
+}
+
+; Check the high end of the aligned LEY range.
+define float @f4(float *%src) {
+; CHECK-LABEL: f4:
+; CHECK: ley %f0, 524284(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%src, i64 131071
+  %val = load float, float *%ptr
+  ret float %val
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f5(float *%src) {
+; CHECK-LABEL: f5:
+; CHECK: agfi %r2, 524288
+; CHECK: lde %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%src, i64 131072
+  %val = load float, float *%ptr
+  ret float %val
+}
+
+; Check the high end of the negative aligned LEY range.
+define float @f6(float *%src) {
+; CHECK-LABEL: f6:
+; CHECK: ley %f0, -4(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%src, i64 -1
+  %val = load float, float *%ptr
+  ret float %val
+}
+
+; Check the low end of the LEY range.
+define float @f7(float *%src) {
+; CHECK-LABEL: f7:
+; CHECK: ley %f0, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%src, i64 -131072
+  %val = load float, float *%ptr
+  ret float %val
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f8(float *%src) {
+; CHECK-LABEL: f8:
+; CHECK: agfi %r2, -524292
+; CHECK: lde %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%src, i64 -131073
+  %val = load float, float *%ptr
+  ret float %val
+}
+
+; Check that LDE allows an index.
+define float @f9(i64 %src, i64 %index) {
+; CHECK-LABEL: f9:
+; CHECK: lde %f0, 4092({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4092
+  %ptr = inttoptr i64 %add2 to float *
+  %val = load float, float *%ptr
+  ret float %val
+}
+
+; Check that LEY allows an index.
+define float @f10(i64 %src, i64 %index) {
+; CHECK-LABEL: f10:
+; CHECK: ley %f0, 4096({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to float *
+  %val = load float, float *%ptr
+  ret float %val
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-03.ll b/test/CodeGen/SystemZ/fp-mul-03.ll

index 701304ef3ee57d0e0ac9bd24e0801d44a7e2bd2a..0d52121f41c6b1dfcd3dbd911a406af69868c841 100644 (file)
--- a/test/CodeGen/SystemZ/fp-mul-03.ll
+++ b/test/CodeGen/SystemZ/fp-mul-03.ll
@@ -1,6 +1,8 @@
  ; Test multiplication of two f64s, producing an f64 result.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  declare double @foo()
  
@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) {
  define double @f7(double *%ptr0) {
  ; CHECK-LABEL: f7:
  ; CHECK: brasl %r14, foo@PLT
-; CHECK: mdb %f0, 160(%r15)
+; CHECK-SCALAR: mdb %f0, 160(%r15)
  ; CHECK: br %r14
    %ptr1 = getelementptr double, double *%ptr0, i64 2
    %ptr2 = getelementptr double, double *%ptr0, i64 4
diff --git a/test/CodeGen/SystemZ/fp-mul-07.ll b/test/CodeGen/SystemZ/fp-mul-07.ll

index b1d0ae3c520dd3fae3ffbd6458dd77046db57f9e..e0b4a5c5d780d0af7591ee3d727f0b0d42acf8d8 100644 (file)
--- a/test/CodeGen/SystemZ/fp-mul-07.ll
+++ b/test/CodeGen/SystemZ/fp-mul-07.ll
@@ -1,11 +1,15 @@
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
  
  declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
  
  define double @f1(double %f1, double %f2, double %acc) {
  ; CHECK-LABEL: f1:
-; CHECK: madbr %f4, %f0, %f2
-; CHECK: ldr %f0, %f4
+; CHECK-SCALAR: madbr %f4, %f0, %f2
+; CHECK-SCALAR: ldr %f0, %f4
+; CHECK-VECTOR: wfmadb %f0, %f0, %f2, %f4
  ; CHECK: br %r14
    %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
    ret double %res
diff --git a/test/CodeGen/SystemZ/fp-mul-09.ll b/test/CodeGen/SystemZ/fp-mul-09.ll

index f2eadf55ff3ef86d94c35a49b5aa1c130e1eeb3c..927a8064823c1b1250e69409f8449fdf75bc7cc7 100644 (file)
--- a/test/CodeGen/SystemZ/fp-mul-09.ll
+++ b/test/CodeGen/SystemZ/fp-mul-09.ll
@@ -1,11 +1,15 @@
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
  
  declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
  
  define double @f1(double %f1, double %f2, double %acc) {
  ; CHECK-LABEL: f1:
-; CHECK: msdbr %f4, %f0, %f2
-; CHECK: ldr %f0, %f4
+; CHECK-SCALAR: msdbr %f4, %f0, %f2
+; CHECK-SCALAR: ldr %f0, %f4
+; CHECK-VECTOR: wfmsdb %f0, %f0, %f2, %f4
  ; CHECK: br %r14
    %negacc = fsub double -0.0, %acc
    %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
diff --git a/test/CodeGen/SystemZ/fp-neg-01.ll b/test/CodeGen/SystemZ/fp-neg-01.ll

index 927bcd44d02c8d98f724d6e8a69272fe5879db40..fe2e5f67cf5b74f8d847b743bfe228f07936f96a 100644 (file)
--- a/test/CodeGen/SystemZ/fp-neg-01.ll
+++ b/test/CodeGen/SystemZ/fp-neg-01.ll
@@ -1,6 +1,7 @@
  ; Test floating-point negation.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  ; Test f32.
  define float @f1(float %f) {
diff --git a/test/CodeGen/SystemZ/fp-round-02.ll b/test/CodeGen/SystemZ/fp-round-02.ll

index bd5419dad1d73bb72c66413755fcb680582ff811..428261478dc8a6cda0696072dba830d2c6e6ed76 100644 (file)
--- a/test/CodeGen/SystemZ/fp-round-02.ll
+++ b/test/CodeGen/SystemZ/fp-round-02.ll
@@ -1,6 +1,9 @@
  ; Test rounding functions for z196 and above.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
  
  ; Test rint for f32.
  declare float @llvm.rint.f32(float %f)
@@ -16,7 +19,8 @@ define float @f1(float %f) {
  declare double @llvm.rint.f64(double %f)
  define double @f2(double %f) {
  ; CHECK-LABEL: f2:
-; CHECK: fidbr %f0, 0, %f0
+; CHECK-SCALAR: fidbr %f0, 0, %f0
+; CHECK-VECTOR: fidbra %f0, 0, %f0, 0
  ; CHECK: br %r14
    %res = call double @llvm.rint.f64(double %f)
    ret double %res
diff --git a/test/CodeGen/SystemZ/fp-sqrt-02.ll b/test/CodeGen/SystemZ/fp-sqrt-02.ll

index a6d987b0d765459224e2b578e4a95ae352ec7aa0..a162466064e831249393f3ebe4c68e50adc2af35 100644 (file)
--- a/test/CodeGen/SystemZ/fp-sqrt-02.ll
+++ b/test/CodeGen/SystemZ/fp-sqrt-02.ll
@@ -1,6 +1,8 @@
  ; Test 64-bit square root.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  declare double @llvm.sqrt.f64(double %f)
  declare double @sqrt(double)
@@ -77,7 +79,7 @@ define double @f6(double *%base, i64 %index) {
  ; to use SQDB if possible.
  define void @f7(double *%ptr) {
  ; CHECK-LABEL: f7:
-; CHECK: sqdb {{%f[0-9]+}}, 160(%r15)
+; CHECK-SCALAR: sqdb {{%f[0-9]+}}, 160(%r15)
  ; CHECK: br %r14
    %val0 = load volatile double , double *%ptr
    %val1 = load volatile double , double *%ptr
diff --git a/test/CodeGen/SystemZ/fp-sub-02.ll b/test/CodeGen/SystemZ/fp-sub-02.ll

index f59ec0a31d7a86a58b1e4292b1fb6c28b5b9887b..143baac23e194b331a5e095ac1ed77d6fcb92c50 100644 (file)
--- a/test/CodeGen/SystemZ/fp-sub-02.ll
+++ b/test/CodeGen/SystemZ/fp-sub-02.ll
@@ -1,6 +1,8 @@
  ; Test 64-bit floating-point subtraction.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  declare double @foo()
  
@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) {
  define double @f7(double *%ptr0) {
  ; CHECK-LABEL: f7:
  ; CHECK: brasl %r14, foo@PLT
-; CHECK: sdb %f0, 16{{[04]}}(%r15)
+; CHECK-SCALAR: sdb %f0, 16{{[04]}}(%r15)
  ; CHECK: br %r14
    %ptr1 = getelementptr double, double *%ptr0, i64 2
    %ptr2 = getelementptr double, double *%ptr0, i64 4
diff --git a/test/CodeGen/SystemZ/frame-03.ll b/test/CodeGen/SystemZ/frame-03.ll

index 029c6d6d37d8942eab5537bd229382d4cd997fc2..21b8fdb0d672450305d6e766f627d50e1b0abfd1 100644 (file)
--- a/test/CodeGen/SystemZ/frame-03.ll
+++ b/test/CodeGen/SystemZ/frame-03.ll
@@ -2,7 +2,7 @@
  ; uses a different register class, but the set of saved and restored
  ; registers should be the same.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
  
  ; This function should require all FPRs, but no other spill slots.
  ; We need to save and restore 8 of the 16 FPRs, so the frame size
diff --git a/test/CodeGen/SystemZ/frame-07.ll b/test/CodeGen/SystemZ/frame-07.ll

index 253bbc26c1fe8b8784d068b1fa7b157615567b80..dd8101429628287d2653513d23aaac739feae5f9 100644 (file)
--- a/test/CodeGen/SystemZ/frame-07.ll
+++ b/test/CodeGen/SystemZ/frame-07.ll
@@ -1,7 +1,7 @@
  ; Test the saving and restoring of FPRs in large frames.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
  
  ; Test a frame size that requires some FPRs to be saved and loaded using
  ; the 20-bit STDY and LDY while others can use the 12-bit STD and LD.
diff --git a/test/CodeGen/SystemZ/frame-17.ll b/test/CodeGen/SystemZ/frame-17.ll

index 485297a2b21dbca54ad4ae153d84160612cd1704..502e541bafc19c7eaa3afce7e50ff2f31cb724e5 100644 (file)
--- a/test/CodeGen/SystemZ/frame-17.ll
+++ b/test/CodeGen/SystemZ/frame-17.ll
@@ -1,6 +1,6 @@
  ; Test spilling of FPRs.
  ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
  
  ; We need to save and restore 8 of the 16 FPRs and allocate an additional
  ; 4-byte spill slot, rounded to 8 bytes.  The frame size should be exactly
diff --git a/test/CodeGen/SystemZ/frame-20.ll b/test/CodeGen/SystemZ/frame-20.ll

new file mode 100644 (file)

index 0000000..8d601c6
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-20.ll
@@ -0,0 +1,445 @@
+; Like frame-03.ll, but for z13.  In this case we have 16 more registers
+; available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; This function should require all FPRs, but no other spill slots.
+; We need to save and restore 8 of the 16 FPRs, so the frame size
+; should be exactly 160 + 8 * 8 = 224.  The CFA offset is 160
+; (the caller-allocated part of the frame) + 224.
+define void @f1(double *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: aghi %r15, -224
+; CHECK: .cfi_def_cfa_offset 384
+; CHECK: std %f8, 216(%r15)
+; CHECK: std %f9, 208(%r15)
+; CHECK: std %f10, 200(%r15)
+; CHECK: std %f11, 192(%r15)
+; CHECK: std %f12, 184(%r15)
+; CHECK: std %f13, 176(%r15)
+; CHECK: std %f14, 168(%r15)
+; CHECK: std %f15, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK: .cfi_offset %f9, -176
+; CHECK: .cfi_offset %f10, -184
+; CHECK: .cfi_offset %f11, -192
+; CHECK: .cfi_offset %f12, -200
+; CHECK: .cfi_offset %f13, -208
+; CHECK: .cfi_offset %f14, -216
+; CHECK: .cfi_offset %f15, -224
+; CHECK-DAG: ld %f0, 0(%r2)
+; CHECK-DAG: ld %f7, 0(%r2)
+; CHECK-DAG: ld %f8, 0(%r2)
+; CHECK-DAG: ld %f15, 0(%r2)
+; CHECK-DAG: vlrepg %v16, 0(%r2)
+; CHECK-DAG: vlrepg %v23, 0(%r2)
+; CHECK-DAG: vlrepg %v24, 0(%r2)
+; CHECK-DAG: vlrepg %v31, 0(%r2)
+; CHECK: ld %f8, 216(%r15)
+; CHECK: ld %f9, 208(%r15)
+; CHECK: ld %f10, 200(%r15)
+; CHECK: ld %f11, 192(%r15)
+; CHECK: ld %f12, 184(%r15)
+; CHECK: ld %f13, 176(%r15)
+; CHECK: ld %f14, 168(%r15)
+; CHECK: ld %f15, 160(%r15)
+; CHECK: aghi %r15, 224
+; CHECK: br %r14
+  %l0 = load volatile double, double *%ptr
+  %l1 = load volatile double, double *%ptr
+  %l2 = load volatile double, double *%ptr
+  %l3 = load volatile double, double *%ptr
+  %l4 = load volatile double, double *%ptr
+  %l5 = load volatile double, double *%ptr
+  %l6 = load volatile double, double *%ptr
+  %l7 = load volatile double, double *%ptr
+  %l8 = load volatile double, double *%ptr
+  %l9 = load volatile double, double *%ptr
+  %l10 = load volatile double, double *%ptr
+  %l11 = load volatile double, double *%ptr
+  %l12 = load volatile double, double *%ptr
+  %l13 = load volatile double, double *%ptr
+  %l14 = load volatile double, double *%ptr
+  %l15 = load volatile double, double *%ptr
+  %l16 = load volatile double, double *%ptr
+  %l17 = load volatile double, double *%ptr
+  %l18 = load volatile double, double *%ptr
+  %l19 = load volatile double, double *%ptr
+  %l20 = load volatile double, double *%ptr
+  %l21 = load volatile double, double *%ptr
+  %l22 = load volatile double, double *%ptr
+  %l23 = load volatile double, double *%ptr
+  %l24 = load volatile double, double *%ptr
+  %l25 = load volatile double, double *%ptr
+  %l26 = load volatile double, double *%ptr
+  %l27 = load volatile double, double *%ptr
+  %l28 = load volatile double, double *%ptr
+  %l29 = load volatile double, double *%ptr
+  %l30 = load volatile double, double *%ptr
+  %l31 = load volatile double, double *%ptr
+  %acc0 = fsub double %l0, %l0
+  %acc1 = fsub double %l1, %acc0
+  %acc2 = fsub double %l2, %acc1
+  %acc3 = fsub double %l3, %acc2
+  %acc4 = fsub double %l4, %acc3
+  %acc5 = fsub double %l5, %acc4
+  %acc6 = fsub double %l6, %acc5
+  %acc7 = fsub double %l7, %acc6
+  %acc8 = fsub double %l8, %acc7
+  %acc9 = fsub double %l9, %acc8
+  %acc10 = fsub double %l10, %acc9
+  %acc11 = fsub double %l11, %acc10
+  %acc12 = fsub double %l12, %acc11
+  %acc13 = fsub double %l13, %acc12
+  %acc14 = fsub double %l14, %acc13
+  %acc15 = fsub double %l15, %acc14
+  %acc16 = fsub double %l16, %acc15
+  %acc17 = fsub double %l17, %acc16
+  %acc18 = fsub double %l18, %acc17
+  %acc19 = fsub double %l19, %acc18
+  %acc20 = fsub double %l20, %acc19
+  %acc21 = fsub double %l21, %acc20
+  %acc22 = fsub double %l22, %acc21
+  %acc23 = fsub double %l23, %acc22
+  %acc24 = fsub double %l24, %acc23
+  %acc25 = fsub double %l25, %acc24
+  %acc26 = fsub double %l26, %acc25
+  %acc27 = fsub double %l27, %acc26
+  %acc28 = fsub double %l28, %acc27
+  %acc29 = fsub double %l29, %acc28
+  %acc30 = fsub double %l30, %acc29
+  %acc31 = fsub double %l31, %acc30
+  store volatile double %acc0, double *%ptr
+  store volatile double %acc1, double *%ptr
+  store volatile double %acc2, double *%ptr
+  store volatile double %acc3, double *%ptr
+  store volatile double %acc4, double *%ptr
+  store volatile double %acc5, double *%ptr
+  store volatile double %acc6, double *%ptr
+  store volatile double %acc7, double *%ptr
+  store volatile double %acc8, double *%ptr
+  store volatile double %acc9, double *%ptr
+  store volatile double %acc10, double *%ptr
+  store volatile double %acc11, double *%ptr
+  store volatile double %acc12, double *%ptr
+  store volatile double %acc13, double *%ptr
+  store volatile double %acc14, double *%ptr
+  store volatile double %acc15, double *%ptr
+  store volatile double %acc16, double *%ptr
+  store volatile double %acc17, double *%ptr
+  store volatile double %acc18, double *%ptr
+  store volatile double %acc19, double *%ptr
+  store volatile double %acc20, double *%ptr
+  store volatile double %acc21, double *%ptr
+  store volatile double %acc22, double *%ptr
+  store volatile double %acc23, double *%ptr
+  store volatile double %acc24, double *%ptr
+  store volatile double %acc25, double *%ptr
+  store volatile double %acc26, double *%ptr
+  store volatile double %acc27, double *%ptr
+  store volatile double %acc28, double *%ptr
+  store volatile double %acc29, double *%ptr
+  store volatile double %acc30, double *%ptr
+  store volatile double %acc31, double *%ptr
+  ret void
+}
+
+; Like f1, but requires one fewer FPR.  We allocate in numerical order,
+; so %f15 is the one that gets dropped.
+define void @f2(double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: aghi %r15, -216
+; CHECK: .cfi_def_cfa_offset 376
+; CHECK: std %f8, 208(%r15)
+; CHECK: std %f9, 200(%r15)
+; CHECK: std %f10, 192(%r15)
+; CHECK: std %f11, 184(%r15)
+; CHECK: std %f12, 176(%r15)
+; CHECK: std %f13, 168(%r15)
+; CHECK: std %f14, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK: .cfi_offset %f9, -176
+; CHECK: .cfi_offset %f10, -184
+; CHECK: .cfi_offset %f11, -192
+; CHECK: .cfi_offset %f12, -200
+; CHECK: .cfi_offset %f13, -208
+; CHECK: .cfi_offset %f14, -216
+; CHECK-NOT: %v15
+; CHECK-NOT: %f15
+; CHECK: ld %f8, 208(%r15)
+; CHECK: ld %f9, 200(%r15)
+; CHECK: ld %f10, 192(%r15)
+; CHECK: ld %f11, 184(%r15)
+; CHECK: ld %f12, 176(%r15)
+; CHECK: ld %f13, 168(%r15)
+; CHECK: ld %f14, 160(%r15)
+; CHECK: aghi %r15, 216
+; CHECK: br %r14
+  %l0 = load volatile double, double *%ptr
+  %l1 = load volatile double, double *%ptr
+  %l2 = load volatile double, double *%ptr
+  %l3 = load volatile double, double *%ptr
+  %l4 = load volatile double, double *%ptr
+  %l5 = load volatile double, double *%ptr
+  %l6 = load volatile double, double *%ptr
+  %l7 = load volatile double, double *%ptr
+  %l8 = load volatile double, double *%ptr
+  %l9 = load volatile double, double *%ptr
+  %l10 = load volatile double, double *%ptr
+  %l11 = load volatile double, double *%ptr
+  %l12 = load volatile double, double *%ptr
+  %l13 = load volatile double, double *%ptr
+  %l14 = load volatile double, double *%ptr
+  %l16 = load volatile double, double *%ptr
+  %l17 = load volatile double, double *%ptr
+  %l18 = load volatile double, double *%ptr
+  %l19 = load volatile double, double *%ptr
+  %l20 = load volatile double, double *%ptr
+  %l21 = load volatile double, double *%ptr
+  %l22 = load volatile double, double *%ptr
+  %l23 = load volatile double, double *%ptr
+  %l24 = load volatile double, double *%ptr
+  %l25 = load volatile double, double *%ptr
+  %l26 = load volatile double, double *%ptr
+  %l27 = load volatile double, double *%ptr
+  %l28 = load volatile double, double *%ptr
+  %l29 = load volatile double, double *%ptr
+  %l30 = load volatile double, double *%ptr
+  %l31 = load volatile double, double *%ptr
+  %acc0 = fsub double %l0, %l0
+  %acc1 = fsub double %l1, %acc0
+  %acc2 = fsub double %l2, %acc1
+  %acc3 = fsub double %l3, %acc2
+  %acc4 = fsub double %l4, %acc3
+  %acc5 = fsub double %l5, %acc4
+  %acc6 = fsub double %l6, %acc5
+  %acc7 = fsub double %l7, %acc6
+  %acc8 = fsub double %l8, %acc7
+  %acc9 = fsub double %l9, %acc8
+  %acc10 = fsub double %l10, %acc9
+  %acc11 = fsub double %l11, %acc10
+  %acc12 = fsub double %l12, %acc11
+  %acc13 = fsub double %l13, %acc12
+  %acc14 = fsub double %l14, %acc13
+  %acc16 = fsub double %l16, %acc14
+  %acc17 = fsub double %l17, %acc16
+  %acc18 = fsub double %l18, %acc17
+  %acc19 = fsub double %l19, %acc18
+  %acc20 = fsub double %l20, %acc19
+  %acc21 = fsub double %l21, %acc20
+  %acc22 = fsub double %l22, %acc21
+  %acc23 = fsub double %l23, %acc22
+  %acc24 = fsub double %l24, %acc23
+  %acc25 = fsub double %l25, %acc24
+  %acc26 = fsub double %l26, %acc25
+  %acc27 = fsub double %l27, %acc26
+  %acc28 = fsub double %l28, %acc27
+  %acc29 = fsub double %l29, %acc28
+  %acc30 = fsub double %l30, %acc29
+  %acc31 = fsub double %l31, %acc30
+  store volatile double %acc0, double *%ptr
+  store volatile double %acc1, double *%ptr
+  store volatile double %acc2, double *%ptr
+  store volatile double %acc3, double *%ptr
+  store volatile double %acc4, double *%ptr
+  store volatile double %acc5, double *%ptr
+  store volatile double %acc6, double *%ptr
+  store volatile double %acc7, double *%ptr
+  store volatile double %acc8, double *%ptr
+  store volatile double %acc9, double *%ptr
+  store volatile double %acc10, double *%ptr
+  store volatile double %acc11, double *%ptr
+  store volatile double %acc12, double *%ptr
+  store volatile double %acc13, double *%ptr
+  store volatile double %acc14, double *%ptr
+  store volatile double %acc16, double *%ptr
+  store volatile double %acc17, double *%ptr
+  store volatile double %acc18, double *%ptr
+  store volatile double %acc19, double *%ptr
+  store volatile double %acc20, double *%ptr
+  store volatile double %acc21, double *%ptr
+  store volatile double %acc22, double *%ptr
+  store volatile double %acc23, double *%ptr
+  store volatile double %acc24, double *%ptr
+  store volatile double %acc25, double *%ptr
+  store volatile double %acc26, double *%ptr
+  store volatile double %acc27, double *%ptr
+  store volatile double %acc28, double *%ptr
+  store volatile double %acc29, double *%ptr
+  store volatile double %acc30, double *%ptr
+  store volatile double %acc31, double *%ptr
+  ret void
+}
+
+; Like f1, but should require only one call-saved FPR.
+define void @f3(double *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: aghi %r15, -168
+; CHECK: .cfi_def_cfa_offset 328
+; CHECK: std %f8, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK-NOT: {{%[fv]9}}
+; CHECK-NOT: {{%[fv]1[0-5]}}
+; CHECK: ld %f8, 160(%r15)
+; CHECK: aghi %r15, 168
+; CHECK: br %r14
+  %l0 = load volatile double, double *%ptr
+  %l1 = load volatile double, double *%ptr
+  %l2 = load volatile double, double *%ptr
+  %l3 = load volatile double, double *%ptr
+  %l4 = load volatile double, double *%ptr
+  %l5 = load volatile double, double *%ptr
+  %l6 = load volatile double, double *%ptr
+  %l7 = load volatile double, double *%ptr
+  %l8 = load volatile double, double *%ptr
+  %l16 = load volatile double, double *%ptr
+  %l17 = load volatile double, double *%ptr
+  %l18 = load volatile double, double *%ptr
+  %l19 = load volatile double, double *%ptr
+  %l20 = load volatile double, double *%ptr
+  %l21 = load volatile double, double *%ptr
+  %l22 = load volatile double, double *%ptr
+  %l23 = load volatile double, double *%ptr
+  %l24 = load volatile double, double *%ptr
+  %l25 = load volatile double, double *%ptr
+  %l26 = load volatile double, double *%ptr
+  %l27 = load volatile double, double *%ptr
+  %l28 = load volatile double, double *%ptr
+  %l29 = load volatile double, double *%ptr
+  %l30 = load volatile double, double *%ptr
+  %l31 = load volatile double, double *%ptr
+  %acc0 = fsub double %l0, %l0
+  %acc1 = fsub double %l1, %acc0
+  %acc2 = fsub double %l2, %acc1
+  %acc3 = fsub double %l3, %acc2
+  %acc4 = fsub double %l4, %acc3
+  %acc5 = fsub double %l5, %acc4
+  %acc6 = fsub double %l6, %acc5
+  %acc7 = fsub double %l7, %acc6
+  %acc8 = fsub double %l8, %acc7
+  %acc16 = fsub double %l16, %acc8
+  %acc17 = fsub double %l17, %acc16
+  %acc18 = fsub double %l18, %acc17
+  %acc19 = fsub double %l19, %acc18
+  %acc20 = fsub double %l20, %acc19
+  %acc21 = fsub double %l21, %acc20
+  %acc22 = fsub double %l22, %acc21
+  %acc23 = fsub double %l23, %acc22
+  %acc24 = fsub double %l24, %acc23
+  %acc25 = fsub double %l25, %acc24
+  %acc26 = fsub double %l26, %acc25
+  %acc27 = fsub double %l27, %acc26
+  %acc28 = fsub double %l28, %acc27
+  %acc29 = fsub double %l29, %acc28
+  %acc30 = fsub double %l30, %acc29
+  %acc31 = fsub double %l31, %acc30
+  store volatile double %acc0, double *%ptr
+  store volatile double %acc1, double *%ptr
+  store volatile double %acc2, double *%ptr
+  store volatile double %acc3, double *%ptr
+  store volatile double %acc4, double *%ptr
+  store volatile double %acc5, double *%ptr
+  store volatile double %acc6, double *%ptr
+  store volatile double %acc7, double *%ptr
+  store volatile double %acc8, double *%ptr
+  store volatile double %acc16, double *%ptr
+  store volatile double %acc17, double *%ptr
+  store volatile double %acc18, double *%ptr
+  store volatile double %acc19, double *%ptr
+  store volatile double %acc20, double *%ptr
+  store volatile double %acc21, double *%ptr
+  store volatile double %acc22, double *%ptr
+  store volatile double %acc23, double *%ptr
+  store volatile double %acc24, double *%ptr
+  store volatile double %acc25, double *%ptr
+  store volatile double %acc26, double *%ptr
+  store volatile double %acc27, double *%ptr
+  store volatile double %acc28, double *%ptr
+  store volatile double %acc29, double *%ptr
+  store volatile double %acc30, double *%ptr
+  store volatile double %acc31, double *%ptr
+  ret void
+}
+
+; This function should use all call-clobbered FPRs and vector registers
+; but no call-saved ones.  It shouldn't need to create a frame.
+define void @f4(double *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: %r15
+; CHECK-NOT: {{%[fv][89]}}
+; CHECK-NOT: {{%[fv]1[0-5]}}
+; CHECK: br %r14
+  %l0 = load volatile double, double *%ptr
+  %l1 = load volatile double, double *%ptr
+  %l2 = load volatile double, double *%ptr
+  %l3 = load volatile double, double *%ptr
+  %l4 = load volatile double, double *%ptr
+  %l5 = load volatile double, double *%ptr
+  %l6 = load volatile double, double *%ptr
+  %l7 = load volatile double, double *%ptr
+  %l16 = load volatile double, double *%ptr
+  %l17 = load volatile double, double *%ptr
+  %l18 = load volatile double, double *%ptr
+  %l19 = load volatile double, double *%ptr
+  %l20 = load volatile double, double *%ptr
+  %l21 = load volatile double, double *%ptr
+  %l22 = load volatile double, double *%ptr
+  %l23 = load volatile double, double *%ptr
+  %l24 = load volatile double, double *%ptr
+  %l25 = load volatile double, double *%ptr
+  %l26 = load volatile double, double *%ptr
+  %l27 = load volatile double, double *%ptr
+  %l28 = load volatile double, double *%ptr
+  %l29 = load volatile double, double *%ptr
+  %l30 = load volatile double, double *%ptr
+  %l31 = load volatile double, double *%ptr
+  %acc0 = fsub double %l0, %l0
+  %acc1 = fsub double %l1, %acc0
+  %acc2 = fsub double %l2, %acc1
+  %acc3 = fsub double %l3, %acc2
+  %acc4 = fsub double %l4, %acc3
+  %acc5 = fsub double %l5, %acc4
+  %acc6 = fsub double %l6, %acc5
+  %acc7 = fsub double %l7, %acc6
+  %acc16 = fsub double %l16, %acc7
+  %acc17 = fsub double %l17, %acc16
+  %acc18 = fsub double %l18, %acc17
+  %acc19 = fsub double %l19, %acc18
+  %acc20 = fsub double %l20, %acc19
+  %acc21 = fsub double %l21, %acc20
+  %acc22 = fsub double %l22, %acc21
+  %acc23 = fsub double %l23, %acc22
+  %acc24 = fsub double %l24, %acc23
+  %acc25 = fsub double %l25, %acc24
+  %acc26 = fsub double %l26, %acc25
+  %acc27 = fsub double %l27, %acc26
+  %acc28 = fsub double %l28, %acc27
+  %acc29 = fsub double %l29, %acc28
+  %acc30 = fsub double %l30, %acc29
+  %acc31 = fsub double %l31, %acc30
+  store volatile double %acc0, double *%ptr
+  store volatile double %acc1, double *%ptr
+  store volatile double %acc2, double *%ptr
+  store volatile double %acc3, double *%ptr
+  store volatile double %acc4, double *%ptr
+  store volatile double %acc5, double *%ptr
+  store volatile double %acc6, double *%ptr
+  store volatile double %acc7, double *%ptr
+  store volatile double %acc16, double *%ptr
+  store volatile double %acc17, double *%ptr
+  store volatile double %acc18, double *%ptr
+  store volatile double %acc19, double *%ptr
+  store volatile double %acc20, double *%ptr
+  store volatile double %acc21, double *%ptr
+  store volatile double %acc22, double *%ptr
+  store volatile double %acc23, double *%ptr
+  store volatile double %acc24, double *%ptr
+  store volatile double %acc25, double *%ptr
+  store volatile double %acc26, double *%ptr
+  store volatile double %acc27, double *%ptr
+  store volatile double %acc28, double *%ptr
+  store volatile double %acc29, double *%ptr
+  store volatile double %acc30, double *%ptr
+  store volatile double %acc31, double *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/vec-abs-05.ll b/test/CodeGen/SystemZ/vec-abs-05.ll

index 89142b218544ff93ae14097c26c98e92314bca42..63210f87b94e5b5cffb15bf7eba48a32121e392b 100644 (file)
--- a/test/CodeGen/SystemZ/vec-abs-05.ll
+++ b/test/CodeGen/SystemZ/vec-abs-05.ll
@@ -1,7 +1,8 @@
-; Test v2f64 absolute.
+; Test f64 and v2f64 absolute.
  ;
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
+declare double @llvm.fabs.f64(double)
  declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
  
  ; Test a plain absolute.
@@ -22,3 +23,24 @@ define <2 x double> @f2(<2 x double> %val) {
    %ret = fsub <2 x double> <double -0.0, double -0.0>, %abs
    ret <2 x double> %ret
  }
+
+; Test an f64 absolute that uses vector registers.
+define double @f3(<2 x double> %val) {
+; CHECK-LABEL: f3:
+; CHECK: wflpdb %f0, %v24
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %ret = call double @llvm.fabs.f64(double %scalar)
+  ret double %ret
+}
+
+; Test an f64 negative absolute that uses vector registers.
+define double @f4(<2 x double> %val) {
+; CHECK-LABEL: f4:
+; CHECK: wflndb %f0, %v24
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %abs = call double @llvm.fabs.f64(double %scalar)
+  %ret = fsub double -0.0, %abs
+  ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-add-01.ll b/test/CodeGen/SystemZ/vec-add-01.ll

index 1de2aa2a1b92ee16493956b13711cf4832645b2e..317034377671f1c456cd5ae56c2c9dc75eb56cee 100644 (file)
--- a/test/CodeGen/SystemZ/vec-add-01.ll
+++ b/test/CodeGen/SystemZ/vec-add-01.ll
@@ -47,3 +47,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
    %ret = fadd <2 x double> %val1, %val2
    ret <2 x double> %ret
  }
+
+; Test an f64 addition that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfadb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <2 x double> %val1, i32 0
+  %scalar2 = extractelement <2 x double> %val2, i32 0
+  %ret = fadd double %scalar1, %scalar2
+  ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-cmp-06.ll b/test/CodeGen/SystemZ/vec-cmp-06.ll

index bdb8744631adf6e7efa8fc41695a6c9961181935..eef57555b48248e2605060a84fd6db97b226c41c 100644 (file)
--- a/test/CodeGen/SystemZ/vec-cmp-06.ll
+++ b/test/CodeGen/SystemZ/vec-cmp-06.ll
@@ -1,4 +1,4 @@
-; Test v2f64 comparisons.
+; Test f64 and v2f64 comparisons.
  ;
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
@@ -335,3 +335,15 @@ define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2,
    %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
    ret <2 x double> %ret
  }
+
+; Test an f64 comparison that uses vector registers.
+define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) {
+; CHECK-LABEL: f29:
+; CHECK: wfcdb %f0, %v24
+; CHECK-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+  %f2 = extractelement <2 x double> %vec, i32 0
+  %cond = fcmp oeq double %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/vec-conv-02.ll b/test/CodeGen/SystemZ/vec-conv-02.ll

index ceccfc60b37153d2b0f0ae302bf89e0cda8b5a82..ab84389f3c8e8cd5091f532ac11db94674dd98ac 100644 (file)
--- a/test/CodeGen/SystemZ/vec-conv-02.ll
+++ b/test/CodeGen/SystemZ/vec-conv-02.ll
@@ -11,3 +11,23 @@ define void @f1(<2 x double> %val, <2 x float> *%ptr) {
    store <2 x float> %res, <2 x float> *%ptr
    ret void
  }
+
+; Test conversion of an f64 in a vector register to an f32.
+define float @f2(<2 x double> %vec) {
+; CHECK-LABEL: f2:
+; CHECK: wledb %f0, %v24
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %vec, i32 0
+  %ret = fptrunc double %scalar to float
+  ret float %ret
+}
+
+; Test conversion of an f32 in a vector register to an f64.
+define double @f3(<4 x float> %vec) {
+; CHECK-LABEL: f3:
+; CHECK: wldeb %f0, %v24
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %vec, i32 0
+  %ret = fpext float %scalar to double
+  ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-div-01.ll b/test/CodeGen/SystemZ/vec-div-01.ll

index 5666444e9da360b9db16346092bd77d7f75fa782..506d40861d35f0f1340d5d39448af21e3a97c565 100644 (file)
--- a/test/CodeGen/SystemZ/vec-div-01.ll
+++ b/test/CodeGen/SystemZ/vec-div-01.ll
@@ -70,3 +70,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
    %ret = fdiv <2 x double> %val1, %val2
    ret <2 x double> %ret
  }
+
+; Test an f64 division that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfddb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <2 x double> %val1, i32 0
+  %scalar2 = extractelement <2 x double> %val2, i32 0
+  %ret = fdiv double %scalar1, %scalar2
+  ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-mul-01.ll b/test/CodeGen/SystemZ/vec-mul-01.ll

index d0018fa1f8c506a1a100119b0a412baf1af50505..5ecc30d4427ace135e9d95bd8b648e1ab1d1cafd 100644 (file)
--- a/test/CodeGen/SystemZ/vec-mul-01.ll
+++ b/test/CodeGen/SystemZ/vec-mul-01.ll
@@ -47,3 +47,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
    %ret = fmul <2 x double> %val1, %val2
    ret <2 x double> %ret
  }
+
+; Test an f64 multiplication that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfmdb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <2 x double> %val1, i32 0
+  %scalar2 = extractelement <2 x double> %val2, i32 0
+  %ret = fmul double %scalar1, %scalar2
+  ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-neg-01.ll b/test/CodeGen/SystemZ/vec-neg-01.ll

index 491e24bb34f658bcf5e95ab20d031d98f51a17a9..b1389ce4d6d0d916b3a7743b8d1888f301b14dcb 100644 (file)
--- a/test/CodeGen/SystemZ/vec-neg-01.ll
+++ b/test/CodeGen/SystemZ/vec-neg-01.ll
@@ -46,3 +46,13 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val) {
    %ret = fsub <2 x double> <double -0.0, double -0.0>, %val
    ret <2 x double> %ret
  }
+
+; Test an f64 negation that uses vector registers.
+define double @f6(<2 x double> %val) {
+; CHECK-LABEL: f6:
+; CHECK: wflcdb %f0, %v24
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %ret = fsub double -0.0, %scalar
+  ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-round-01.ll b/test/CodeGen/SystemZ/vec-round-01.ll

index 284b83e96f72ad5f2243dd0cd6d0e596b6f3a0c8..82718276bb08e575bc0c21f9fb915f09993ac4e8 100644 (file)
--- a/test/CodeGen/SystemZ/vec-round-01.ll
+++ b/test/CodeGen/SystemZ/vec-round-01.ll
@@ -2,6 +2,12 @@
  ;
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
+declare double @llvm.rint.f64(double)
+declare double @llvm.nearbyint.f64(double)
+declare double @llvm.floor.f64(double)
+declare double @llvm.ceil.f64(double)
+declare double @llvm.trunc.f64(double)
+declare double @llvm.round.f64(double)
  declare <2 x double> @llvm.rint.v2f64(<2 x double>)
  declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
  declare <2 x double> @llvm.floor.v2f64(<2 x double>)
@@ -56,3 +62,57 @@ define <2 x double> @f6(<2 x double> %val) {
    %res = call <2 x double> @llvm.round.v2f64(<2 x double> %val)
    ret <2 x double> %res
  }
+
+define double @f7(<2 x double> %val) {
+; CHECK-LABEL: f7:
+; CHECK: wfidb %f0, %v24, 0, 0
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.rint.f64(double %scalar)
+  ret double %res
+}
+
+define double @f8(<2 x double> %val) {
+; CHECK-LABEL: f8:
+; CHECK: wfidb %f0, %v24, 4, 0
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.nearbyint.f64(double %scalar)
+  ret double %res
+}
+
+define double @f9(<2 x double> %val) {
+; CHECK-LABEL: f9:
+; CHECK: wfidb %f0, %v24, 4, 7
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.floor.f64(double %scalar)
+  ret double %res
+}
+
+define double @f10(<2 x double> %val) {
+; CHECK-LABEL: f10:
+; CHECK: wfidb %f0, %v24, 4, 6
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.ceil.f64(double %scalar)
+  ret double %res
+}
+
+define double @f11(<2 x double> %val) {
+; CHECK-LABEL: f11:
+; CHECK: wfidb %f0, %v24, 4, 5
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.trunc.f64(double %scalar)
+  ret double %res
+}
+
+define double @f12(<2 x double> %val) {
+; CHECK-LABEL: f12:
+; CHECK: wfidb %f0, %v24, 4, 1
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.round.f64(double %scalar)
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/vec-sqrt-01.ll b/test/CodeGen/SystemZ/vec-sqrt-01.ll

index 0160c24a749cadb48012a4e6c7fbef6fa4880cdb..5c3ffb3b0643deef2efe23f84aba17fadb3a9775 100644 (file)
--- a/test/CodeGen/SystemZ/vec-sqrt-01.ll
+++ b/test/CodeGen/SystemZ/vec-sqrt-01.ll
@@ -1,7 +1,8 @@
-; Test v2f64 square root.
+; Test f64 and v2f64 square root.
  ;
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
+declare double @llvm.sqrt.f64(double)
  declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
  
  define <2 x double> @f1(<2 x double> %val) {
@@ -11,3 +12,12 @@ define <2 x double> @f1(<2 x double> %val) {
    %ret = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %val)
    ret <2 x double> %ret
  }
+
+define double @f2(<2 x double> %val) {
+; CHECK-LABEL: f2:
+; CHECK: wfsqdb %f0, %v24
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %ret = call double @llvm.sqrt.f64(double %scalar)
+  ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-sub-01.ll b/test/CodeGen/SystemZ/vec-sub-01.ll

index aabf1c9be4a32eb8c13c60f216fe2a2713839d7a..5620ebcb8c4b4c233a1d064eb82b18c8516e4374 100644 (file)
--- a/test/CodeGen/SystemZ/vec-sub-01.ll
+++ b/test/CodeGen/SystemZ/vec-sub-01.ll
@@ -74,3 +74,14 @@ define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1,
    %ret = fsub <2 x double> %val1, %val2
    ret <2 x double> %ret
  }
+
+; Test an f64 subtraction that uses vector registers.
+define double @f7(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: wfsdb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <2 x double> %val1, i32 0
+  %scalar2 = extractelement <2 x double> %val2, i32 0
+  %ret = fsub double %scalar1, %scalar2
+  ret double %ret
+}
author	Ulrich Weigand <ulrich.weigand@de.ibm.com>
	Tue, 5 May 2015 19:28:34 +0000 (19:28 +0000)
committer	Ulrich Weigand <ulrich.weigand@de.ibm.com>
	Tue, 5 May 2015 19:28:34 +0000 (19:28 +0000)
lib/Target/SystemZ/SystemZ.td		patch \| blob \| history
lib/Target/SystemZ/SystemZAsmPrinter.cpp		patch \| blob \| history
lib/Target/SystemZ/SystemZISelLowering.cpp		patch \| blob \| history
lib/Target/SystemZ/SystemZInstrFP.td		patch \| blob \| history
lib/Target/SystemZ/SystemZInstrFormats.td		patch \| blob \| history
lib/Target/SystemZ/SystemZInstrInfo.cpp		patch \| blob \| history
lib/Target/SystemZ/SystemZInstrVector.td		patch \| blob \| history
lib/Target/SystemZ/SystemZShortenInst.cpp		patch \| blob \| history
test/CodeGen/SystemZ/fp-abs-01.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-abs-02.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-add-02.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-cmp-02.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-conv-01.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-conv-02.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-div-02.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-move-01.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-move-04.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-move-07.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-move-11.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/SystemZ/fp-mul-03.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-mul-07.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-mul-09.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-neg-01.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-round-02.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-sqrt-02.ll		patch \| blob \| history
test/CodeGen/SystemZ/fp-sub-02.ll		patch \| blob \| history
test/CodeGen/SystemZ/frame-03.ll		patch \| blob \| history
test/CodeGen/SystemZ/frame-07.ll		patch \| blob \| history
test/CodeGen/SystemZ/frame-17.ll		patch \| blob \| history
test/CodeGen/SystemZ/frame-20.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/SystemZ/vec-abs-05.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-add-01.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-cmp-06.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-conv-02.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-div-01.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-mul-01.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-neg-01.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-round-01.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-sqrt-01.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-sub-01.ll		patch \| blob \| history