From 506d3dfa90cfee6ab4736a6a2c892e9059a7864d Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 29 Mar 2006 23:07:14 +0000 Subject: [PATCH] - Added some SSE2 128-bit packed integer ops. - Added SSE2 128-bit integer pack with signed saturation ops. - Added pshufhw and pshuflw ops. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27252 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 106 +++++++++++++- lib/Target/X86/X86ISelLowering.h | 18 +++ lib/Target/X86/X86InstrSSE.td | 215 ++++++++++++++++++++++++++--- 3 files changed, 314 insertions(+), 25 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8383f431eb5..072e1990e79 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1399,10 +1399,67 @@ bool X86::isPSHUFDMask(SDNode *N) { return false; // Check if the value doesn't reference the second vector. - for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + assert(isa(N->getOperand(i)) && + "Invalid VECTOR_SHUFFLE mask!"); + if (cast(N->getOperand(i))->getValue() >= 4) + return false; + } + + return true; +} + +/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to PSHUFD. +bool X86::isPSHUFHWMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 8) + return false; + + // Lower quadword copied in order. + for (unsigned i = 0; i != 4; ++i) { assert(isa(N->getOperand(i)) && "Invalid VECTOR_SHUFFLE mask!"); - if (cast(N->getOperand(i))->getValue() >= 4) return false; + if (cast(N->getOperand(i))->getValue() != i) + return false; + } + + // Upper quadword shuffled. + for (unsigned i = 4; i != 8; ++i) { + assert(isa(N->getOperand(i)) && + "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(N->getOperand(i))->getValue(); + if (Val < 4 || Val > 7) + return false; + } + + return true; +} + +/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to PSHUFD. +bool X86::isPSHUFLWMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 8) + return false; + + // Upper quadword copied in order. + for (unsigned i = 4; i != 8; ++i) { + assert(isa(N->getOperand(i)) && + "Invalid VECTOR_SHUFFLE mask!"); + if (cast(N->getOperand(i))->getValue() != i) + return false; + } + + // Lower quadword shuffled. + for (unsigned i = 0; i != 4; ++i) { + assert(isa(N->getOperand(i)) && + "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(N->getOperand(i))->getValue(); + if (Val > 4) + return false; } return true; @@ -1431,7 +1488,7 @@ bool X86::isSHUFPMask(SDNode *N) { // Each half must refer to only one of the vector. SDOperand Elt = N->getOperand(0); assert(isa(Elt) && "Invalid VECTOR_SHUFFLE mask!"); - for (unsigned i = 1; i != NumElems / 2; ++i) { + for (unsigned i = 1; i < NumElems / 2; ++i) { assert(isa(N->getOperand(i)) && "Invalid VECTOR_SHUFFLE mask!"); if (cast(N->getOperand(i))->getValue() != @@ -1440,7 +1497,7 @@ bool X86::isSHUFPMask(SDNode *N) { } Elt = N->getOperand(NumElems / 2); assert(isa(Elt) && "Invalid VECTOR_SHUFFLE mask!"); - for (unsigned i = NumElems / 2; i != NumElems; ++i) { + for (unsigned i = NumElems / 2 + 1; i < NumElems; ++i) { assert(isa(N->getOperand(i)) && "Invalid VECTOR_SHUFFLE mask!"); if (cast(N->getOperand(i))->getValue() != @@ -1583,6 +1640,40 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) { return Mask; } +/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle +/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW +/// instructions. +unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { + unsigned Mask = 0; + // 8 nodes, but we only care about the last 4. + for (unsigned i = 7; i >= 4; --i) { + unsigned Val + = cast(N->getOperand(i))->getValue(); + Mask |= (Val - 4); + if (i != 4) + Mask <<= 2; + } + + return Mask; +} + +/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle +/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW +/// instructions. +unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { + unsigned Mask = 0; + // 8 nodes, but we only care about the first 4. + for (int i = 3; i >= 0; --i) { + unsigned Val + = cast(N->getOperand(i))->getValue(); + Mask |= Val; + if (i != 0) + Mask <<= 2; + } + + return Mask; +} + /// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as /// values in ther permute mask if needed. Use V1 as second vector if it is /// undef. Return an empty SDOperand is it is already well formed. @@ -2399,7 +2490,10 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { // Splat && PSHUFD's 2nd vector must be undef. if (X86::isSplatMask(PermMask.Val) || - ((MVT::isInteger(VT) && X86::isPSHUFDMask(PermMask.Val)))) { + ((MVT::isInteger(VT) && + (X86::isPSHUFDMask(PermMask.Val) || + X86::isPSHUFHWMask(PermMask.Val) || + X86::isPSHUFLWMask(PermMask.Val))))) { if (V2.getOpcode() != ISD::UNDEF) return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); @@ -2607,6 +2701,8 @@ X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { return (Mask.Val->getNumOperands() == 2 || X86::isSplatMask(Mask.Val) || X86::isPSHUFDMask(Mask.Val) || + X86::isPSHUFHWMask(Mask.Val) || + X86::isPSHUFLWMask(Mask.Val) || X86::isSHUFPMask(Mask.Val) || X86::isUNPCKLMask(Mask.Val) || X86::isUNPCKHMask(Mask.Val)); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index e9999c894eb..3bd2b88c6cc 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -184,6 +184,14 @@ namespace llvm { /// specifies a shuffle of elements that is suitable for input to PSHUFD. bool isPSHUFDMask(SDNode *N); + /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to PSHUFD. + bool isPSHUFHWMask(SDNode *N); + + /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to PSHUFD. + bool isPSHUFLWMask(SDNode *N); + /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. bool isSHUFPMask(SDNode *N); @@ -212,6 +220,16 @@ namespace llvm { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. unsigned getShuffleSHUFImmediate(SDNode *N); + + /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle + /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW + /// instructions. + unsigned getShufflePSHUFHWImmediate(SDNode *N); + + /// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle + /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW + /// instructions. + unsigned getShufflePSHUFLWImmediate(SDNode *N); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index da6effad354..2cd954a7a79 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -45,6 +45,8 @@ def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>; def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; +def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; +def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; @@ -58,6 +60,18 @@ def SHUFFLE_get_shuf_imm : SDNodeXForm; +// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to +// PSHUFHW imm. +def SHUFFLE_get_pshufhw_imm : SDNodeXForm; + +// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to +// PSHUFLW imm. +def SHUFFLE_get_pshuflw_imm : SDNodeXForm; + def SSE_splat_mask : PatLeaf<(build_vector), [{ return X86::isSplatMask(N); }], SHUFFLE_get_shuf_imm>; @@ -82,6 +96,14 @@ def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{ return X86::isPSHUFDMask(N); }], SHUFFLE_get_shuf_imm>; +def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isPSHUFHWMask(N); +}], SHUFFLE_get_pshufhw_imm>; + +def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isPSHUFLWMask(N); +}], SHUFFLE_get_pshuflw_imm>; + def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{ return X86::isSHUFPMask(N); }], SHUFFLE_get_shuf_imm>; @@ -935,25 +957,6 @@ def CMPPDrm : PDI<0xC2, MRMSrcMem, } // Shuffle and unpack instructions -def PSHUFWrr : PSIi8<0x70, MRMDestReg, - (ops VR64:$dst, VR64:$src1, i8imm:$src2), - "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>; -def PSHUFWrm : PSIi8<0x70, MRMSrcMem, - (ops VR64:$dst, i64mem:$src1, i8imm:$src2), - "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>; -def PSHUFDrr : PDIi8<0x70, MRMDestReg, - (ops VR128:$dst, VR128:$src1, i8imm:$src2), - "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (vector_shuffle - VR128:$src1, (undef), - PSHUFD_shuffle_mask:$src2)))]>; -def PSHUFDrm : PDIi8<0x70, MRMSrcMem, - (ops VR128:$dst, i128mem:$src1, i8imm:$src2), - "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (vector_shuffle - (load addr:$src1), (undef), - PSHUFD_shuffle_mask:$src2)))]>; - let isTwoAddress = 1 in { def SHUFPSrr : PSIi8<0xC6, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3), @@ -1081,6 +1084,10 @@ def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "paddd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>; + +def PADDQrr : PDI<0xD4, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "paddq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (add VR128:$src1, VR128:$src2)))]>; } def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "paddb {$src2, $dst|$dst, $src2}", @@ -1094,6 +1101,10 @@ def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "paddd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4i32 (add VR128:$src1, (load addr:$src2))))]>; +def PADDQrm : PDI<0xD4, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), + "paddd {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (add VR128:$src1, + (load addr:$src2))))]>; def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "psubb {$src2, $dst|$dst, $src2}", @@ -1104,6 +1115,9 @@ def PSUBWrr : PDI<0xF9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "psubd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>; +def PSUBQrr : PDI<0xFB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "psubq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (sub VR128:$src1, VR128:$src2)))]>; def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "psubb {$src2, $dst|$dst, $src2}", @@ -1117,8 +1131,146 @@ def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "psubd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4i32 (sub VR128:$src1, (load addr:$src2))))]>; +def PSUBQrm : PDI<0xFB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), + "psubd {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (sub VR128:$src1, + (load addr:$src2))))]>; +} + +// Logical +let isTwoAddress = 1 in { +let isCommutable = 1 in { +def PANDrr : PDI<0xDB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "pand {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>; + +def PANDrm : PDI<0xDB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "pand {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (and VR128:$src1, + (load addr:$src2))))]>; +def PORrr : PDI<0xDB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "por {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>; + +def PORrm : PDI<0xDB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "por {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (or VR128:$src1, + (load addr:$src2))))]>; +def PXORrr : PDI<0xEF, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "pxor {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>; + +def PXORrm : PDI<0xEF, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "pxor {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (xor VR128:$src1, + (load addr:$src2))))]>; +} + +def PANDNrr : PDI<0xDF, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "pandn {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + VR128:$src2)))]>; + +def PANDNrm : PDI<0xDF, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "pandn {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + (load addr:$src2))))]>; +} -// Unpack and interleave +// Pack instructions +let isTwoAddress = 1 in { +def PACKSSWBrr : PDI<0x63, MRMSrcReg, (ops VR128:$dst, VR128:$src1, + VR128:$src2), + "packsswb {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v16i8 (int_x86_sse2_packsswb_128 + (v8i16 VR128:$src1), + (v8i16 VR128:$src2))))]>; +def PACKSSWBrm : PDI<0x63, MRMSrcMem, (ops VR128:$dst, VR128:$src1, + i128mem:$src2), + "packsswb {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v16i8 (int_x86_sse2_packsswb_128 + (v8i16 VR128:$src1), + (loadv8i16 addr:$src2))))]>; +def PACKSSDWrr : PDI<0x6B, MRMSrcReg, (ops VR128:$dst, VR128:$src1, + VR128:$src2), + "packsswb {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v8i16 (int_x86_sse2_packssdw_128 + (v4i32 VR128:$src1), + (v4i32 VR128:$src2))))]>; +def PACKSSDWrm : PDI<0x6B, MRMSrcReg, (ops VR128:$dst, VR128:$src1, + i128mem:$src2), + "packsswb {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v8i16 (int_x86_sse2_packssdw_128 + (v4i32 VR128:$src1), + (loadv4i32 addr:$src2))))]>; +def PACKUSWBrr : PDI<0x67, MRMSrcReg, (ops VR128:$dst, VR128:$src1, + VR128:$src2), + "packuswb {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v16i8 (int_x86_sse2_packuswb_128 + (v8i16 VR128:$src1), + (v8i16 VR128:$src2))))]>; +def PACKUSWBrm : PDI<0x67, MRMSrcReg, (ops VR128:$dst, VR128:$src1, + i128mem:$src2), + "packuswb {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v16i8 (int_x86_sse2_packuswb_128 + (v8i16 VR128:$src1), + (loadv8i16 addr:$src2))))]>; +} + +// Shuffle and unpack instructions +def PSHUFWrr : PSIi8<0x70, MRMDestReg, + (ops VR64:$dst, VR64:$src1, i8imm:$src2), + "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>; +def PSHUFWrm : PSIi8<0x70, MRMSrcMem, + (ops VR64:$dst, i64mem:$src1, i8imm:$src2), + "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>; + +def PSHUFDrr : PDIi8<0x70, MRMDestReg, + (ops VR128:$dst, VR128:$src1, i8imm:$src2), + "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v4i32 (vector_shuffle + VR128:$src1, (undef), + PSHUFD_shuffle_mask:$src2)))]>; +def PSHUFDrm : PDIi8<0x70, MRMSrcMem, + (ops VR128:$dst, i128mem:$src1, i8imm:$src2), + "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v4i32 (vector_shuffle + (load addr:$src1), (undef), + PSHUFD_shuffle_mask:$src2)))]>; + +// SSE2 with ImmT == Imm8 and XS prefix. +def PSHUFHWrr : Ii8<0x70, MRMDestReg, + (ops VR128:$dst, VR128:$src1, i8imm:$src2), + "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v8i16 (vector_shuffle + VR128:$src1, (undef), + PSHUFHW_shuffle_mask:$src2)))]>, + XS, Requires<[HasSSE2]>; +def PSHUFHWrm : Ii8<0x70, MRMDestMem, + (ops VR128:$dst, i128mem:$src1, i8imm:$src2), + "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v8i16 (vector_shuffle + (bc_v8i16 (loadv2i64 addr:$src1)), (undef), + PSHUFHW_shuffle_mask:$src2)))]>, + XS, Requires<[HasSSE2]>; + +// SSE2 with ImmT == Imm8 and XD prefix. +def PSHUFLWrr : Ii8<0x70, MRMDestReg, + (ops VR128:$dst, VR128:$src1, i32i8imm:$src2), + "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v8i16 (vector_shuffle + VR128:$src1, (undef), + PSHUFLW_shuffle_mask:$src2)))]>, + XD, Requires<[HasSSE2]>; +def PSHUFLWrm : Ii8<0x70, MRMDestMem, + (ops VR128:$dst, i128mem:$src1, i32i8imm:$src2), + "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v8i16 (vector_shuffle + (bc_v8i16 (loadv2i64 addr:$src1)), (undef), + PSHUFLW_shuffle_mask:$src2)))]>, + XD, Requires<[HasSSE2]>; + +let isTwoAddress = 1 in { def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "punpcklbw {$src2, $dst|$dst, $src2}", @@ -1355,6 +1507,29 @@ def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>, // bit_convert def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>, + Requires<[HasSSE2]>; + def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>, -- 2.34.1