From: Kit Barton Date: Thu, 25 Jun 2015 15:17:40 +0000 (+0000) Subject: [PPC] Implement vmrgew and vmrgow instructions X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=1ebbc687195bd339d1246590bbfee29b4d5447a4;p=oota-llvm.git [PPC] Implement vmrgew and vmrgow instructions This patch adds support for the vector merge even word and vector merge odd word instructions introduced in POWER8. Phabricator review: http://reviews.llvm.org/D10704 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240650 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 2600ee5db17..af493c0931c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1279,6 +1279,99 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, } } +/** + * \brief Common function used to match vmrgew and vmrgow shuffles + * + * The indexOffset determines whether to look for even or odd words in + * the shuffle mask. This is based on the of the endianness of the target + * machine. + * - Little Endian: + * - Use offset of 0 to check for odd elements + * - Use offset of 4 to check for even elements + * - Big Endian: + * - Use offset of 0 to check for even elements + * - Use offset of 4 to check for odd elements + * A detailed description of the vector element ordering for little endian and + * big endian can be found at + * Targeting your applications - what little endian and big endian IBM XL C/C++ + * compiler differences mean to you + * + * The mask to the shuffle vector instruction specifies the indices of the + * elements from the two input vectors to place in the result. The elements are + * numbered in array-access order, starting with the first vector. These vectors + * are always of type v16i8, thus each vector will contain 16 elements of size + * 8. More info on the shuffle vector can be found in the Language + * Reference. + * + * The RHSStartValue indicates whether the same input vectors are used (unary) + * or two different input vectors are used, based on the following: + * - If the instruction uses the same vector for both inputs, the range of the + * indices will be 0 to 15. In this case, the RHSStart value passed should + * be 0. + * - If the instruction has two different vectors then the range of the + * indices will be 0 to 31. In this case, the RHSStart value passed should + * be 16 (indices 0-15 specify elements in the first vector while indices 16 + * to 31 specify elements in the second vector). + * + * \param[in] N The shuffle vector SD Node to analyze + * \param[in] IndexOffset Specifies whether to look for even or odd elements + * \param[in] RHSStartValue Specifies the starting index for the righthand input + * vector to the shuffle_vector instruction + * \return true iff this shuffle vector represents an even or odd word merge + */ +static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, + unsigned RHSStartValue) { + if (N->getValueType(0) != MVT::v16i8) + return false; + + for (unsigned i = 0; i < 2; ++i) + for (unsigned j = 0; j < 4; ++j) + if (!isConstantOrUndef(N->getMaskElt(i*4+j), + i*RHSStartValue+j+IndexOffset) || + !isConstantOrUndef(N->getMaskElt(i*4+j+8), + i*RHSStartValue+j+IndexOffset+8)) + return false; + return true; +} + +/** + * \brief Determine if the specified shuffle mask is suitable for the vmrgew or + * vmrgow instructions. + * + * \param[in] N The shuffle vector SD Node to analyze + * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) + * \param[in] ShuffleKind Identify the type of merge: + * - 0 = big-endian merge with two different inputs; + * - 1 = either-endian merge with two identical inputs; + * - 2 = little-endian merge with two different inputs (inputs are swapped for + * little-endian merges). + * \param[in] DAG The current SelectionDAG + * \return true iff this shuffle mask + */ +bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, + unsigned ShuffleKind, SelectionDAG &DAG) { + if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + unsigned indexOffset = CheckEven ? 4 : 0; + if (ShuffleKind == 1) // Unary + return isVMerge(N, indexOffset, 0); + else if (ShuffleKind == 2) // swapped + return isVMerge(N, indexOffset, 16); + else + return false; + } + else { + unsigned indexOffset = CheckEven ? 0 : 4; + if (ShuffleKind == 1) // Unary + return isVMerge(N, indexOffset, 0); + else if (ShuffleKind == 0) // Normal + return isVMerge(N, indexOffset, 16); + else + return false; + } + return false; +} /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. @@ -7046,7 +7139,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) { + PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) { return Op; } } @@ -7064,7 +7159,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG)) + PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 7fd3f9c3de3..02242b512a4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -382,6 +382,11 @@ namespace llvm { bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG); + /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for + /// a VMRGEW or VMRGOW instruction + bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, + unsigned ShuffleKind, SelectionDAG &DAG); + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the /// shift amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 9ff604bbee9..cb0271fe8d0 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -155,6 +155,33 @@ def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), }]>; +def vmrgew_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast(N), true, 0, *CurDAG); +}]>; +def vmrgow_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast(N), false, 0, *CurDAG); +}]>; +def vmrgew_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast(N), true, 1, *CurDAG); +}]>; +def vmrgow_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast(N), false, 1, *CurDAG); +}]>; +def vmrgew_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast(N), true, 2, *CurDAG); +}]>; +def vmrgow_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast(N), false, 2, *CurDAG); +}]>; + + + def VSLDOI_get_imm : SDNodeXForm; @@ -1008,6 +1035,29 @@ def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>; def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>; } // isCommutable +// Vector merge +def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmrgew $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, (vmrgew_shuffle v16i8:$vA, v16i8:$vB))]>; +def VMRGOW : VXForm_1<1676, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmrgow $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, (vmrgow_shuffle v16i8:$vA, v16i8:$vB))]>; + +// Match vmrgew(x,x) and vmrgow(x,x) +def:Pat<(vmrgew_unary_shuffle v16i8:$vA, undef), + (VMRGEW $vA, $vA)>; +def:Pat<(vmrgow_unary_shuffle v16i8:$vA, undef), + (VMRGOW $vA, $vA)>; + +// Match vmrgew(y,x) and vmrgow(y,x), i.e., swapped operands. These fragments +// are matched for little-endian, where the inputs must be swapped for correct +// semantics.w +def:Pat<(vmrgew_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGEW $vB, $vA)>; +def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGOW $vB, $vA)>; + + // Vector shifts def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>; def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), diff --git a/test/CodeGen/PowerPC/vec_mergeow.ll b/test/CodeGen/PowerPC/vec_mergeow.ll new file mode 100644 index 00000000000..c7c7448e3ae --- /dev/null +++ b/test/CodeGen/PowerPC/vec_mergeow.ll @@ -0,0 +1,101 @@ +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | \ +; RUN: FileCheck %s -check-prefix=CHECK-LE +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | \ +; RUN: FileCheck %s -check-prefix=CHECK-BE + +; Check for a vector merge instruction using two inputs +; The shufflevector specifies the even elements, using big endian element +; ordering. If run on a big endian machine, this should produce the vmrgew +; instruction. If run on a little endian machine, this should produce the +; vmrgow instruction. Note also that on little endian the input registers +; are swapped also. +define void @check_merge_even_xy(<16 x i8>* %A, <16 x i8>* %B) { +entry: +; CHECK-LE-LABEL: @check_merge_even_xy +; CHECK-BE-LABEL: @check_merge_even_xy + %tmp = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, + <16 x i32> +; CHECK-LE: vmrgow 2, 3, 2 +; CHECK-BE: vmrgew 2, 2, 3 + store <16 x i8> %tmp3, <16 x i8>* %A + ret void +; CHECK-LE: blr +; CHECK-BE: blr +} + +; Check for a vector merge instruction using a single input. +; The shufflevector specifies the even elements, using big endian element +; ordering. If run on a big endian machine, this should produce the vmrgew +; instruction. If run on a little endian machine, this should produce the +; vmrgow instruction. +define void @check_merge_even_xx(<16 x i8>* %A) { +entry: +; CHECK-LE-LABEL: @check_merge_even_xx +; CHECK-BE-LABEL: @check_merge_even_xx + %tmp = load <16 x i8>, <16 x i8>* %A + %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, + <16 x i32> +; CHECK-LE: vmrgow 2, 2, 2 +; CHECK-BE: vmrgew 2, 2, 2 + store <16 x i8> %tmp2, <16 x i8>* %A + ret void +; CHECK-LE: blr +; CHECK-BE: blr +} + +; Check for a vector merge instruction using two inputs. +; The shufflevector specifies the odd elements, using big endian element +; ordering. If run on a big endian machine, this should produce the vmrgow +; instruction. If run on a little endian machine, this should produce the +; vmrgew instruction. Note also that on little endian the input registers +; are swapped also. +define void @check_merge_odd_xy(<16 x i8>* %A, <16 x i8>* %B) { +entry: +; CHECK-LE-LABEL: @check_merge_odd_xy +; CHECK-BE-LABEL: @check_merge_odd_xy + %tmp = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, + <16 x i32> +; CHECK-LE: vmrgew 2, 3, 2 +; CHECK-BE: vmrgow 2, 2, 3 + store <16 x i8> %tmp3, <16 x i8>* %A + ret void +; CHECK-LE: blr +; CHECK-BE: blr +} + +; Check for a vector merge instruction using a single input. +; The shufflevector specifies the odd elements, using big endian element +; ordering. If run on a big endian machine, this should produce the vmrgow +; instruction. If run on a little endian machine, this should produce the +; vmrgew instruction. +define void @check_merge_odd_xx(<16 x i8>* %A) { +entry: +; CHECK-LE-LABEL: @check_merge_odd_xx +; CHECK-BE-LABEL: @check_merge_odd_xx + %tmp = load <16 x i8>, <16 x i8>* %A + %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, + <16 x i32> +; CHECK-LE: vmrgew 2, 2, 2 +; CHECK-BE: vmrgow 2, 2, 2 + store <16 x i8> %tmp2, <16 x i8>* %A + ret void +; CHECK-LE: blr +; CHECK-BE: blr +} + diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt index 0e3a83f6d3a..16ff14c794d 100644 --- a/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt +++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt @@ -99,6 +99,12 @@ # CHECK: vmrglw 2, 3, 4 0x10 0x43 0x21 0x8c +# CHECK: vmrgew 2, 3, 4 +0x10 0x43 0x27 0x8c + +# CHECK: vmrgow 2, 3, 4 +0x10 0x43 0x26 0x8c + # CHECK: vspltb 2, 3, 1 0x10 0x41 0x1a 0x0c diff --git a/test/MC/PowerPC/ppc64-encoding-vmx.s b/test/MC/PowerPC/ppc64-encoding-vmx.s index 5c62d2a6c95..d8825bf3340 100644 --- a/test/MC/PowerPC/ppc64-encoding-vmx.s +++ b/test/MC/PowerPC/ppc64-encoding-vmx.s @@ -1,5 +1,5 @@ -# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s +# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s # RUN: llvm-mc -triple powerpc64le-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s # Vector facility @@ -110,7 +110,13 @@ # CHECK-BE: vmrglw 2, 3, 4 # encoding: [0x10,0x43,0x21,0x8c] # CHECK-LE: vmrglw 2, 3, 4 # encoding: [0x8c,0x21,0x43,0x10] vmrglw 2, 3, 4 - +# CHECK-BE: vmrgew 2, 3, 4 # encoding: [0x10,0x43,0x27,0x8c] +# CHECK-LE: vmrgew 2, 3, 4 # encoding: [0x8c,0x27,0x43,0x10] + vmrgew 2, 3, 4 +# CHECK-BE: vmrgow 2, 3, 4 # encoding: [0x10,0x43,0x26,0x8c] +# CHECK-LE: vmrgow 2, 3, 4 # encoding: [0x8c,0x26,0x43,0x10] + vmrgow 2, 3, 4 + # CHECK-BE: vspltb 2, 3, 1 # encoding: [0x10,0x41,0x1a,0x0c] # CHECK-LE: vspltb 2, 3, 1 # encoding: [0x0c,0x1a,0x41,0x10] vspltb 2, 3, 1