From cd6c57917db22a3913a2cdbadfa79fed3547bdec Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 30 Apr 2013 22:37:26 +0000 Subject: [PATCH] [mips] Instruction selection patterns for DSP-ASE vector select and compare instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180820 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsDSPInstrInfo.td | 73 ++ lib/Target/Mips/MipsISelLowering.cpp | 4 +- lib/Target/Mips/MipsISelLowering.h | 4 + lib/Target/Mips/MipsRegisterInfo.td | 4 + lib/Target/Mips/MipsSEISelLowering.cpp | 63 +- test/CodeGen/Mips/dsp-patterns-cmp-vselect.ll | 641 ++++++++++++++++++ 6 files changed, 786 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/Mips/dsp-patterns-cmp-vselect.ll diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index 730bca7d529..6719cd54a9b 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -79,6 +79,8 @@ def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>; def MipsSHLL_DSP : MipsDSPBase<"SHLL_DSP", SDT_MipsSHIFT_DSP>; def MipsSHRA_DSP : MipsDSPBase<"SHRA_DSP", SDT_MipsSHIFT_DSP>; def MipsSHRL_DSP : MipsDSPBase<"SHRL_DSP", SDT_MipsSHIFT_DSP>; +def MipsSETCC_DSP : MipsDSPBase<"SETCC_DSP", SDTSetCC>; +def MipsSELECT_CC_DSP : MipsDSPBase<"SELECT_CC_DSP", SDTSelectCC>; // Flags. class UseAC { @@ -1237,6 +1239,26 @@ let isPseudo = 1 in { def COPY_AC_DSP : PseudoSE<(outs ACRegsDSP:$dst), (ins ACRegsDSP:$src), []>; +// Pseudo CMP and PICK instructions. +class PseudoCMP : + PseudoDSP<(outs DSPCC:$cmp), (ins DSPRegs:$rs, DSPRegs:$rt), []>, + PseudoInstExpansion<(RealInst DSPRegs:$rs, DSPRegs:$rt)>, NeverHasSideEffects; + +class PseudoPICK : + PseudoDSP<(outs DSPRegs:$rd), (ins DSPCC:$cmp, DSPRegs:$rs, DSPRegs:$rt), []>, + PseudoInstExpansion<(RealInst DSPRegs:$rd, DSPRegs:$rs, DSPRegs:$rt)>, + NeverHasSideEffects; + +def PseudoCMP_EQ_PH : PseudoCMP; +def PseudoCMP_LT_PH : PseudoCMP; +def PseudoCMP_LE_PH : PseudoCMP; +def PseudoCMPU_EQ_QB : PseudoCMP; +def PseudoCMPU_LT_QB : PseudoCMP; +def PseudoCMPU_LE_QB : PseudoCMP; + +def PseudoPICK_PH : PseudoPICK; +def PseudoPICK_QB : PseudoPICK; + // Patterns. class DSPPat : Pat, Requires<[pred]>; @@ -1298,6 +1320,57 @@ def : DSPShiftPat; def : DSPShiftPat; def : DSPShiftPat; +// SETCC/SELECT_CC patterns. +class DSPSetCCPat : + DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)), + (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), + (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs)), + (ValTy ZERO)))>; + +class DSPSetCCPatInv : + DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)), + (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), + (ValTy ZERO), + (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs))))>; + +class DSPSelectCCPat : + DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)), + (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $c, $d))>; + +class DSPSelectCCPatInv : + DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)), + (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $d, $c))>; + +def : DSPSetCCPat; +def : DSPSetCCPat; +def : DSPSetCCPat; +def : DSPSetCCPatInv; +def : DSPSetCCPatInv; +def : DSPSetCCPatInv; +def : DSPSetCCPat; +def : DSPSetCCPat; +def : DSPSetCCPat; +def : DSPSetCCPatInv; +def : DSPSetCCPatInv; +def : DSPSetCCPatInv; + +def : DSPSelectCCPat; +def : DSPSelectCCPat; +def : DSPSelectCCPat; +def : DSPSelectCCPatInv; +def : DSPSelectCCPatInv; +def : DSPSelectCCPatInv; +def : DSPSelectCCPat; +def : DSPSelectCCPat; +def : DSPSelectCCPat; +def : DSPSelectCCPatInv; +def : DSPSelectCCPatInv; +def : DSPSelectCCPatInv; + // Extr patterns. class EXTR_W_TY1_R2_Pat : DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)), diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 8a0673feb64..611d48326e6 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -200,6 +200,8 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::SHLL_DSP: return "MipsISD::SHLL_DSP"; case MipsISD::SHRA_DSP: return "MipsISD::SHRA_DSP"; case MipsISD::SHRL_DSP: return "MipsISD::SHRL_DSP"; + case MipsISD::SETCC_DSP: return "MipsISD::SETCC_DSP"; + case MipsISD::SELECT_CC_DSP: return "MipsISD::SELECT_CC_DSP"; default: return NULL; } } @@ -213,7 +215,7 @@ MipsTargetLowering(MipsTargetMachine &TM) // Mips does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); - setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // Load extented operations for i1 types must be promoted setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index c1574fac578..5587e8f5814 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -148,6 +148,10 @@ namespace llvm { SHRA_DSP, SHRL_DSP, + // DSP setcc and select_cc nodes. + SETCC_DSP, + SELECT_CC_DSP, + // Load/Store Left/Right nodes. LWL = ISD::FIRST_TARGET_MEMORY_OPCODE, LWR, diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 30699ac4210..e314e946a01 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -265,6 +265,7 @@ let Namespace = "Mips" in { def AC0_64 : ACC<0, "ac0", [LO64, HI64]>; def DSPCtrl : Register<"dspctrl">; + def DSPCCond : Register<"">; } //===----------------------------------------------------------------------===// @@ -362,6 +363,9 @@ def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> { let Size = 64; } +def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>; + +// Register Operands. def CPURegsAsmOperand : AsmOperandClass { let Name = "CPURegsAsm"; let ParserMethod = "parseCPURegs"; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 7e103bbef74..8544bb89107 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -56,6 +56,8 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::VSELECT); } if (Subtarget->hasDSPR2()) @@ -373,9 +375,57 @@ static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); } +static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { + bool IsV216 = (Ty == MVT::v2i16); + + switch (CC) { + case ISD::SETEQ: + case ISD::SETNE: return true; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return IsV216; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return !IsV216; + default: return false; + } +} + +static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { + EVT Ty = N->getValueType(0); + + if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) + return SDValue(); + + if (!isLegalDSPCondCode(Ty, cast(N->getOperand(2))->get())) + return SDValue(); + + return DAG.getNode(MipsISD::SETCC_DSP, N->getDebugLoc(), Ty, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); +} + +static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { + EVT Ty = N->getValueType(0); + + if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) + return SDValue(); + + SDValue SetCC = N->getOperand(0); + + if (SetCC.getOpcode() != MipsISD::SETCC_DSP) + return SDValue(); + + return DAG.getNode(MipsISD::SELECT_CC_DSP, N->getDebugLoc(), Ty, + SetCC.getOperand(0), SetCC.getOperand(1), N->getOperand(1), + N->getOperand(2), SetCC.getOperand(2)); +} + SDValue MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; + SDValue Val; switch (N->getOpcode()) { case ISD::ADDE: @@ -388,9 +438,18 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { return performSRACombine(N, DAG, DCI, Subtarget); case ISD::SRL: return performSRLCombine(N, DAG, DCI, Subtarget); - default: - return MipsTargetLowering::PerformDAGCombine(N, DCI); + case ISD::VSELECT: + return performVSELECTCombine(N, DAG); + case ISD::SETCC: { + Val = performSETCCCombine(N, DAG); + break; } + } + + if (Val.getNode()) + return Val; + + return MipsTargetLowering::PerformDAGCombine(N, DCI); } MachineBasicBlock * diff --git a/test/CodeGen/Mips/dsp-patterns-cmp-vselect.ll b/test/CodeGen/Mips/dsp-patterns-cmp-vselect.ll new file mode 100644 index 00000000000..9f2f0661f99 --- /dev/null +++ b/test/CodeGen/Mips/dsp-patterns-cmp-vselect.ll @@ -0,0 +1,641 @@ +; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s + +; CHECK: select_v2q15_eq_: +; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}} +; CHECK: pick.ph ${{[0-9]+}}, $6, $7 + +define { i32 } @select_v2q15_eq_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = bitcast i32 %a3.coerce to <2 x i16> + %cmp = icmp eq <2 x i16> %0, %1 + %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3 + %4 = bitcast <2 x i16> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v2q15_lt_: +; CHECK: cmp.lt.ph $4, $5 +; CHECK: pick.ph ${{[0-9]+}}, $6, $7 + +define { i32 } @select_v2q15_lt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = bitcast i32 %a3.coerce to <2 x i16> + %cmp = icmp slt <2 x i16> %0, %1 + %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3 + %4 = bitcast <2 x i16> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v2q15_le_: +; CHECK: cmp.le.ph $4, $5 +; CHECK: pick.ph ${{[0-9]+}}, $6, $7 + +define { i32 } @select_v2q15_le_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = bitcast i32 %a3.coerce to <2 x i16> + %cmp = icmp sle <2 x i16> %0, %1 + %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3 + %4 = bitcast <2 x i16> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v2q15_ne_: +; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}} +; CHECK: pick.ph ${{[0-9]+}}, $7, $6 + +define { i32 } @select_v2q15_ne_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = bitcast i32 %a3.coerce to <2 x i16> + %cmp = icmp ne <2 x i16> %0, %1 + %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3 + %4 = bitcast <2 x i16> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v2q15_gt_: +; CHECK: cmp.le.ph $4, $5 +; CHECK: pick.ph ${{[0-9]+}}, $7, $6 + +define { i32 } @select_v2q15_gt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = bitcast i32 %a3.coerce to <2 x i16> + %cmp = icmp sgt <2 x i16> %0, %1 + %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3 + %4 = bitcast <2 x i16> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v2q15_ge_: +; CHECK: cmp.lt.ph $4, $5 +; CHECK: pick.ph ${{[0-9]+}}, $7, $6 + +define { i32 } @select_v2q15_ge_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = bitcast i32 %a3.coerce to <2 x i16> + %cmp = icmp sge <2 x i16> %0, %1 + %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3 + %4 = bitcast <2 x i16> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v4ui8_eq_: +; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}} +; CHECK: pick.qb ${{[0-9]+}}, $6, $7 + +define { i32 } @select_v4ui8_eq_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = bitcast i32 %a3.coerce to <4 x i8> + %cmp = icmp eq <4 x i8> %0, %1 + %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3 + %4 = bitcast <4 x i8> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v4ui8_lt_: +; CHECK: cmpu.lt.qb $4, $5 +; CHECK: pick.qb ${{[0-9]+}}, $6, $7 + +define { i32 } @select_v4ui8_lt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = bitcast i32 %a3.coerce to <4 x i8> + %cmp = icmp ult <4 x i8> %0, %1 + %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3 + %4 = bitcast <4 x i8> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v4ui8_le_: +; CHECK: cmpu.le.qb $4, $5 +; CHECK: pick.qb ${{[0-9]+}}, $6, $7 + +define { i32 } @select_v4ui8_le_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = bitcast i32 %a3.coerce to <4 x i8> + %cmp = icmp ule <4 x i8> %0, %1 + %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3 + %4 = bitcast <4 x i8> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v4ui8_ne_: +; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}} +; CHECK: pick.qb ${{[0-9]+}}, $7, $6 + +define { i32 } @select_v4ui8_ne_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = bitcast i32 %a3.coerce to <4 x i8> + %cmp = icmp ne <4 x i8> %0, %1 + %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3 + %4 = bitcast <4 x i8> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v4ui8_gt_: +; CHECK: cmpu.le.qb $4, $5 +; CHECK: pick.qb ${{[0-9]+}}, $7, $6 + +define { i32 } @select_v4ui8_gt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = bitcast i32 %a3.coerce to <4 x i8> + %cmp = icmp ugt <4 x i8> %0, %1 + %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3 + %4 = bitcast <4 x i8> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v4ui8_ge_: +; CHECK: cmpu.lt.qb $4, $5 +; CHECK: pick.qb ${{[0-9]+}}, $7, $6 + +define { i32 } @select_v4ui8_ge_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = bitcast i32 %a3.coerce to <4 x i8> + %cmp = icmp uge <4 x i8> %0, %1 + %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3 + %4 = bitcast <4 x i8> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v2ui16_lt_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @select_v2ui16_lt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = bitcast i32 %a3.coerce to <2 x i16> + %cmp = icmp ult <2 x i16> %0, %1 + %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3 + %4 = bitcast <2 x i16> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v2ui16_le_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @select_v2ui16_le_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = bitcast i32 %a3.coerce to <2 x i16> + %cmp = icmp ule <2 x i16> %0, %1 + %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3 + %4 = bitcast <2 x i16> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v2ui16_gt_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @select_v2ui16_gt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = bitcast i32 %a3.coerce to <2 x i16> + %cmp = icmp ugt <2 x i16> %0, %1 + %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3 + %4 = bitcast <2 x i16> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v2ui16_ge_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @select_v2ui16_ge_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %2 = bitcast i32 %a2.coerce to <2 x i16> + %3 = bitcast i32 %a3.coerce to <2 x i16> + %cmp = icmp uge <2 x i16> %0, %1 + %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3 + %4 = bitcast <2 x i16> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v4i8_lt_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @select_v4i8_lt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = bitcast i32 %a3.coerce to <4 x i8> + %cmp = icmp slt <4 x i8> %0, %1 + %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3 + %4 = bitcast <4 x i8> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v4i8_le_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @select_v4i8_le_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = bitcast i32 %a3.coerce to <4 x i8> + %cmp = icmp sle <4 x i8> %0, %1 + %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3 + %4 = bitcast <4 x i8> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v4i8_gt_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @select_v4i8_gt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = bitcast i32 %a3.coerce to <4 x i8> + %cmp = icmp sgt <4 x i8> %0, %1 + %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3 + %4 = bitcast <4 x i8> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: select_v4i8_ge_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @select_v4i8_ge_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %2 = bitcast i32 %a2.coerce to <4 x i8> + %3 = bitcast i32 %a3.coerce to <4 x i8> + %cmp = icmp sge <4 x i8> %0, %1 + %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3 + %4 = bitcast <4 x i8> %or to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v2q15_eq_: +; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}} +; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}} + +define { i32 } @compare_v2q15_eq_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %cmp = icmp eq <2 x i16> %0, %1 + %sext = sext <2 x i1> %cmp to <2 x i16> + %2 = bitcast <2 x i16> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v2q15_lt_: +; CHECK: cmp.lt.ph $4, $5 +; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}} + +define { i32 } @compare_v2q15_lt_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %cmp = icmp slt <2 x i16> %0, %1 + %sext = sext <2 x i1> %cmp to <2 x i16> + %2 = bitcast <2 x i16> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v2q15_le_: +; CHECK: cmp.le.ph $4, $5 +; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}} + +define { i32 } @compare_v2q15_le_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %cmp = icmp sle <2 x i16> %0, %1 + %sext = sext <2 x i1> %cmp to <2 x i16> + %2 = bitcast <2 x i16> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v2q15_ne_: +; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}} +; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}} + +define { i32 } @compare_v2q15_ne_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %cmp = icmp ne <2 x i16> %0, %1 + %sext = sext <2 x i1> %cmp to <2 x i16> + %2 = bitcast <2 x i16> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v2q15_gt_: +; CHECK: cmp.le.ph $4, $5 +; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}} + +define { i32 } @compare_v2q15_gt_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %cmp = icmp sgt <2 x i16> %0, %1 + %sext = sext <2 x i1> %cmp to <2 x i16> + %2 = bitcast <2 x i16> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v2q15_ge_: +; CHECK: cmp.lt.ph $4, $5 +; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}} + +define { i32 } @compare_v2q15_ge_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %cmp = icmp sge <2 x i16> %0, %1 + %sext = sext <2 x i1> %cmp to <2 x i16> + %2 = bitcast <2 x i16> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v4ui8_eq_: +; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}} +; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}} + +define { i32 } @compare_v4ui8_eq_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %cmp = icmp eq <4 x i8> %0, %1 + %sext = sext <4 x i1> %cmp to <4 x i8> + %2 = bitcast <4 x i8> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v4ui8_lt_: +; CHECK: cmpu.lt.qb $4, $5 +; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}} + +define { i32 } @compare_v4ui8_lt_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %cmp = icmp ult <4 x i8> %0, %1 + %sext = sext <4 x i1> %cmp to <4 x i8> + %2 = bitcast <4 x i8> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v4ui8_le_: +; CHECK: cmpu.le.qb $4, $5 +; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}} + +define { i32 } @compare_v4ui8_le_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %cmp = icmp ule <4 x i8> %0, %1 + %sext = sext <4 x i1> %cmp to <4 x i8> + %2 = bitcast <4 x i8> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v4ui8_ne_: +; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}} +; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}} + +define { i32 } @compare_v4ui8_ne_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %cmp = icmp ne <4 x i8> %0, %1 + %sext = sext <4 x i1> %cmp to <4 x i8> + %2 = bitcast <4 x i8> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v4ui8_gt_: +; CHECK: cmpu.le.qb $4, $5 +; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}} + +define { i32 } @compare_v4ui8_gt_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %cmp = icmp ugt <4 x i8> %0, %1 + %sext = sext <4 x i1> %cmp to <4 x i8> + %2 = bitcast <4 x i8> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v4ui8_ge_: +; CHECK: cmpu.lt.qb $4, $5 +; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}} + +define { i32 } @compare_v4ui8_ge_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %cmp = icmp uge <4 x i8> %0, %1 + %sext = sext <4 x i1> %cmp to <4 x i8> + %2 = bitcast <4 x i8> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v2ui16_lt_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @compare_v2ui16_lt_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %cmp = icmp ult <2 x i16> %0, %1 + %sext = sext <2 x i1> %cmp to <2 x i16> + %2 = bitcast <2 x i16> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v2ui16_le_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @compare_v2ui16_le_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %cmp = icmp ule <2 x i16> %0, %1 + %sext = sext <2 x i1> %cmp to <2 x i16> + %2 = bitcast <2 x i16> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v2ui16_gt_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @compare_v2ui16_gt_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %cmp = icmp ugt <2 x i16> %0, %1 + %sext = sext <2 x i1> %cmp to <2 x i16> + %2 = bitcast <2 x i16> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v2ui16_ge_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @compare_v2ui16_ge_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %1 = bitcast i32 %a1.coerce to <2 x i16> + %cmp = icmp uge <2 x i16> %0, %1 + %sext = sext <2 x i1> %cmp to <2 x i16> + %2 = bitcast <2 x i16> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v4i8_lt_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @compare_v4i8_lt_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %cmp = icmp slt <4 x i8> %0, %1 + %sext = sext <4 x i1> %cmp to <4 x i8> + %2 = bitcast <4 x i8> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v4i8_le_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @compare_v4i8_le_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %cmp = icmp sle <4 x i8> %0, %1 + %sext = sext <4 x i1> %cmp to <4 x i8> + %2 = bitcast <4 x i8> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v4i8_gt_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @compare_v4i8_gt_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %cmp = icmp sgt <4 x i8> %0, %1 + %sext = sext <4 x i1> %cmp to <4 x i8> + %2 = bitcast <4 x i8> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} + +; CHECK: compare_v4i8_ge_: +; CHECK-NOT: cmp +; CHECK-NOT: pick + +define { i32 } @compare_v4i8_ge_(i32 %a0.coerce, i32 %a1.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %1 = bitcast i32 %a1.coerce to <4 x i8> + %cmp = icmp sge <4 x i8> %0, %1 + %sext = sext <4 x i1> %cmp to <4 x i8> + %2 = bitcast <4 x i8> %sext to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0 + ret { i32 } %.fca.0.insert +} -- 2.34.1