From 1c23594d2c3887bbb714a55a51770524f7744293 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Thu, 10 Dec 2015 13:35:28 +0000 Subject: [PATCH] Bitcasts between FP and INT values using direct moves This patch corresponds to review: http://reviews.llvm.org/D15286 LLVM IR frequently contains bitcast operations between floating point and integer values of the same width. Doing this through memory operations is quite expensive on PPC. This patch allows the use of direct register moves between FPRs and GPRs for lowering bitcasts. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255246 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrVSX.td | 308 +++++++++++++----- .../PowerPC/p8-scalar_vector_conversions.ll | 4 +- .../PowerPC/variable_elem_vec_extracts.ll | 114 +++++++ 3 files changed, 334 insertions(+), 92 deletions(-) create mode 100644 test/CodeGen/PowerPC/variable_elem_vec_extracts.ll diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 2e54ef2ac0e..0c3788c55c7 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -1266,16 +1266,14 @@ def MovesToVSR { dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2); } -/* Direct moves of various widths from VSR's to GPR's. Each moves the - respective element out of the VSR and ensures that it is lined up - to the right side of the GPR. In addition to the extraction from positions - specified by a constant, a pattern for extracting from a variable position - is provided. This is useful when the element number is not known at - compile time. +/* Patterns for extracting elements out of vectors. Integer elements are + extracted using direct move operations. Patterns for extracting elements + whose indices are not available at compile time are also provided with + various _VARIABLE_ patterns. The numbering for the DAG's is for LE, but when used on BE, the correct LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13). */ -def MovesFromVSR { +def VectorExtractions { // Doubleword extraction dag LE_DWORD_0 = (MFVSRD @@ -1371,24 +1369,6 @@ def MovesFromVSR { dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT), sub_32); - /* BE variable byte - The algorithm here is the same as the LE variable byte except: - - The shift in the VMX register is by 0/8 for opposite element numbers so - we simply AND the element number with 0x8 - - The order of elements after the move to GPR is reversed, so we invert - the bits of the index prior to truncating to the range 0-7 - */ - dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8)); - dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC); - dag BE_MV_VBYTE = (MFVSRD - (EXTRACT_SUBREG - (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)), - sub_64)); - dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60), - sub_32); - dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT), - sub_32); - /* LE variable halfword Number 1. above: - For elements 0-3, we shift left by 8 since they're on the right @@ -1421,6 +1401,88 @@ def MovesFromVSR { dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT), sub_32); + /* LE variable word + Number 1. above: + - For elements 0-1, we shift left by 8 since they're on the right + - For elements 2-3, we need not shift + */ + dag LE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61)); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VWORD_PERMUTE = (VPERM $S, $S, LE_VWORD_PERM_VEC); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VWORD = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)), + sub_64)); + + /* Number 4. above: + - Truncate the element number to the range 0-1 (2-3 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 32 as we need to shift right by the number of bits + - Shift right in the GPR by the calculated value + */ + dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58), + sub_32); + dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT), + sub_32); + + /* LE variable doubleword + Number 1. above: + - For element 0, we shift left by 8 since it's on the right + - For element 1, we need not shift + */ + dag LE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60)); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VDWORD_PERMUTE = (VPERM $S, $S, LE_VDWORD_PERM_VEC); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + // - Number 4. is not needed for the doubleword as the value is 64-bits + dag LE_VARIABLE_DWORD = + (MFVSRD (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)), + sub_64)); + + /* LE variable float + - Shift the vector to line up the desired element to BE Word 0 + - Convert 32-bit float to a 64-bit single precision float + */ + dag LE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR (XOR8 (LI8 3), $Idx), 2, 61)); + dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC); + dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE); + + /* LE variable double + Same as the LE doubleword except there is no move. + */ + dag LE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC), + (COPY_TO_REGCLASS $S, VRRC), + LE_VDWORD_PERM_VEC); + dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC); + + /* BE variable byte + The algorithm here is the same as the LE variable byte except: + - The shift in the VMX register is by 0/8 for opposite element numbers so + we simply AND the element number with 0x8 + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-7 + */ + dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8)); + dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC); + dag BE_MV_VBYTE = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)), + sub_64)); + dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60), + sub_32); + dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT), + sub_32); + /* BE variable halfword The algorithm here is the same as the LE variable halfword except: - The shift in the VMX register is by 0/8 for opposite element numbers so @@ -1434,10 +1496,54 @@ def MovesFromVSR { (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)), sub_64)); - dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 60), + dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59), sub_32); dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT), sub_32); + + /* BE variable word + The algorithm is the same as the LE variable word except: + - The shift in the VMX register happens for opposite element numbers + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-1 + */ + dag BE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 2), 2, 61)); + dag BE_VWORD_PERMUTE = (VPERM $S, $S, BE_VWORD_PERM_VEC); + dag BE_MV_VWORD = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)), + sub_64)); + dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58), + sub_32); + dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT), + sub_32); + + /* BE variable doubleword + Same as the LE doubleword except we shift in the VMX register for opposite + element indices. + */ + dag BE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 1), 3, 60)); + dag BE_VDWORD_PERMUTE = (VPERM $S, $S, BE_VDWORD_PERM_VEC); + dag BE_VARIABLE_DWORD = + (MFVSRD (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)), + sub_64)); + + /* BE variable float + - Shift the vector to line up the desired element to BE Word 0 + - Convert 32-bit float to a 64-bit single precision float + */ + dag BE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR $Idx, 2, 61)); + dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC); + dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE); + + /* BE variable double + Same as the BE doubleword except there is no move. + */ + dag BE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC), + (COPY_TO_REGCLASS $S, VRRC), + BE_VDWORD_PERM_VEC); + dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC); } // v4f32 scalar <-> vector conversions (BE) @@ -1452,8 +1558,15 @@ let Predicates = [IsBigEndian, HasP8Vector] in { (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; + def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), + (f32 VectorExtractions.BE_VARIABLE_FLOAT)>; } // IsBigEndian, HasP8Vector +// Variable index vector_extract for v2f64 does not require P8Vector +let Predicates = [IsBigEndian, HasVSX] in + def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), + (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>; + let Predicates = [IsBigEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (BE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), @@ -1465,75 +1578,79 @@ let Predicates = [IsBigEndian, HasDirectMove] in { def : Pat<(v2i64 (scalar_to_vector i64:$A)), (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>; def : Pat<(i32 (vector_extract v16i8:$S, 0)), - (i32 MovesFromVSR.LE_BYTE_15)>; + (i32 VectorExtractions.LE_BYTE_15)>; def : Pat<(i32 (vector_extract v16i8:$S, 1)), - (i32 MovesFromVSR.LE_BYTE_14)>; + (i32 VectorExtractions.LE_BYTE_14)>; def : Pat<(i32 (vector_extract v16i8:$S, 2)), - (i32 MovesFromVSR.LE_BYTE_13)>; + (i32 VectorExtractions.LE_BYTE_13)>; def : Pat<(i32 (vector_extract v16i8:$S, 3)), - (i32 MovesFromVSR.LE_BYTE_12)>; + (i32 VectorExtractions.LE_BYTE_12)>; def : Pat<(i32 (vector_extract v16i8:$S, 4)), - (i32 MovesFromVSR.LE_BYTE_11)>; + (i32 VectorExtractions.LE_BYTE_11)>; def : Pat<(i32 (vector_extract v16i8:$S, 5)), - (i32 MovesFromVSR.LE_BYTE_10)>; + (i32 VectorExtractions.LE_BYTE_10)>; def : Pat<(i32 (vector_extract v16i8:$S, 6)), - (i32 MovesFromVSR.LE_BYTE_9)>; + (i32 VectorExtractions.LE_BYTE_9)>; def : Pat<(i32 (vector_extract v16i8:$S, 7)), - (i32 MovesFromVSR.LE_BYTE_8)>; + (i32 VectorExtractions.LE_BYTE_8)>; def : Pat<(i32 (vector_extract v16i8:$S, 8)), - (i32 MovesFromVSR.LE_BYTE_7)>; + (i32 VectorExtractions.LE_BYTE_7)>; def : Pat<(i32 (vector_extract v16i8:$S, 9)), - (i32 MovesFromVSR.LE_BYTE_6)>; + (i32 VectorExtractions.LE_BYTE_6)>; def : Pat<(i32 (vector_extract v16i8:$S, 10)), - (i32 MovesFromVSR.LE_BYTE_5)>; + (i32 VectorExtractions.LE_BYTE_5)>; def : Pat<(i32 (vector_extract v16i8:$S, 11)), - (i32 MovesFromVSR.LE_BYTE_4)>; + (i32 VectorExtractions.LE_BYTE_4)>; def : Pat<(i32 (vector_extract v16i8:$S, 12)), - (i32 MovesFromVSR.LE_BYTE_3)>; + (i32 VectorExtractions.LE_BYTE_3)>; def : Pat<(i32 (vector_extract v16i8:$S, 13)), - (i32 MovesFromVSR.LE_BYTE_2)>; + (i32 VectorExtractions.LE_BYTE_2)>; def : Pat<(i32 (vector_extract v16i8:$S, 14)), - (i32 MovesFromVSR.LE_BYTE_1)>; + (i32 VectorExtractions.LE_BYTE_1)>; def : Pat<(i32 (vector_extract v16i8:$S, 15)), - (i32 MovesFromVSR.LE_BYTE_0)>; + (i32 VectorExtractions.LE_BYTE_0)>; def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), - (i32 MovesFromVSR.BE_VARIABLE_BYTE)>; + (i32 VectorExtractions.BE_VARIABLE_BYTE)>; // v8i16 scalar <-> vector conversions (BE) def : Pat<(i32 (vector_extract v8i16:$S, 0)), - (i32 MovesFromVSR.LE_HALF_7)>; + (i32 VectorExtractions.LE_HALF_7)>; def : Pat<(i32 (vector_extract v8i16:$S, 1)), - (i32 MovesFromVSR.LE_HALF_6)>; + (i32 VectorExtractions.LE_HALF_6)>; def : Pat<(i32 (vector_extract v8i16:$S, 2)), - (i32 MovesFromVSR.LE_HALF_5)>; + (i32 VectorExtractions.LE_HALF_5)>; def : Pat<(i32 (vector_extract v8i16:$S, 3)), - (i32 MovesFromVSR.LE_HALF_4)>; + (i32 VectorExtractions.LE_HALF_4)>; def : Pat<(i32 (vector_extract v8i16:$S, 4)), - (i32 MovesFromVSR.LE_HALF_3)>; + (i32 VectorExtractions.LE_HALF_3)>; def : Pat<(i32 (vector_extract v8i16:$S, 5)), - (i32 MovesFromVSR.LE_HALF_2)>; + (i32 VectorExtractions.LE_HALF_2)>; def : Pat<(i32 (vector_extract v8i16:$S, 6)), - (i32 MovesFromVSR.LE_HALF_1)>; + (i32 VectorExtractions.LE_HALF_1)>; def : Pat<(i32 (vector_extract v8i16:$S, 7)), - (i32 MovesFromVSR.LE_HALF_0)>; + (i32 VectorExtractions.LE_HALF_0)>; def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), - (i32 MovesFromVSR.BE_VARIABLE_HALF)>; + (i32 VectorExtractions.BE_VARIABLE_HALF)>; // v4i32 scalar <-> vector conversions (BE) def : Pat<(i32 (vector_extract v4i32:$S, 0)), - (i32 MovesFromVSR.LE_WORD_3)>; + (i32 VectorExtractions.LE_WORD_3)>; def : Pat<(i32 (vector_extract v4i32:$S, 1)), - (i32 MovesFromVSR.LE_WORD_2)>; + (i32 VectorExtractions.LE_WORD_2)>; def : Pat<(i32 (vector_extract v4i32:$S, 2)), - (i32 MovesFromVSR.LE_WORD_1)>; + (i32 VectorExtractions.LE_WORD_1)>; def : Pat<(i32 (vector_extract v4i32:$S, 3)), - (i32 MovesFromVSR.LE_WORD_0)>; + (i32 VectorExtractions.LE_WORD_0)>; + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 VectorExtractions.BE_VARIABLE_WORD)>; // v2i64 scalar <-> vector conversions (BE) def : Pat<(i64 (vector_extract v2i64:$S, 0)), - (i64 MovesFromVSR.LE_DWORD_1)>; + (i64 VectorExtractions.LE_DWORD_1)>; def : Pat<(i64 (vector_extract v2i64:$S, 1)), - (i64 MovesFromVSR.LE_DWORD_0)>; + (i64 VectorExtractions.LE_DWORD_0)>; + def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.BE_VARIABLE_DWORD)>; } // IsBigEndian, HasDirectMove // v4f32 scalar <-> vector conversions (LE) @@ -1548,8 +1665,15 @@ let Predicates = [IsLittleEndian, HasP8Vector] in { (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), (f32 (XSCVSPDPN $S))>; + def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), + (f32 VectorExtractions.LE_VARIABLE_FLOAT)>; } // IsLittleEndian, HasP8Vector +// Variable index vector_extract for v2f64 does not require P8Vector +let Predicates = [IsLittleEndian, HasVSX] in + def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), + (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; + let Predicates = [IsLittleEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (LE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), @@ -1561,73 +1685,77 @@ let Predicates = [IsLittleEndian, HasDirectMove] in { def : Pat<(v2i64 (scalar_to_vector i64:$A)), (v2i64 MovesToVSR.LE_DWORD_0)>; def : Pat<(i32 (vector_extract v16i8:$S, 0)), - (i32 MovesFromVSR.LE_BYTE_0)>; + (i32 VectorExtractions.LE_BYTE_0)>; def : Pat<(i32 (vector_extract v16i8:$S, 1)), - (i32 MovesFromVSR.LE_BYTE_1)>; + (i32 VectorExtractions.LE_BYTE_1)>; def : Pat<(i32 (vector_extract v16i8:$S, 2)), - (i32 MovesFromVSR.LE_BYTE_2)>; + (i32 VectorExtractions.LE_BYTE_2)>; def : Pat<(i32 (vector_extract v16i8:$S, 3)), - (i32 MovesFromVSR.LE_BYTE_3)>; + (i32 VectorExtractions.LE_BYTE_3)>; def : Pat<(i32 (vector_extract v16i8:$S, 4)), - (i32 MovesFromVSR.LE_BYTE_4)>; + (i32 VectorExtractions.LE_BYTE_4)>; def : Pat<(i32 (vector_extract v16i8:$S, 5)), - (i32 MovesFromVSR.LE_BYTE_5)>; + (i32 VectorExtractions.LE_BYTE_5)>; def : Pat<(i32 (vector_extract v16i8:$S, 6)), - (i32 MovesFromVSR.LE_BYTE_6)>; + (i32 VectorExtractions.LE_BYTE_6)>; def : Pat<(i32 (vector_extract v16i8:$S, 7)), - (i32 MovesFromVSR.LE_BYTE_7)>; + (i32 VectorExtractions.LE_BYTE_7)>; def : Pat<(i32 (vector_extract v16i8:$S, 8)), - (i32 MovesFromVSR.LE_BYTE_8)>; + (i32 VectorExtractions.LE_BYTE_8)>; def : Pat<(i32 (vector_extract v16i8:$S, 9)), - (i32 MovesFromVSR.LE_BYTE_9)>; + (i32 VectorExtractions.LE_BYTE_9)>; def : Pat<(i32 (vector_extract v16i8:$S, 10)), - (i32 MovesFromVSR.LE_BYTE_10)>; + (i32 VectorExtractions.LE_BYTE_10)>; def : Pat<(i32 (vector_extract v16i8:$S, 11)), - (i32 MovesFromVSR.LE_BYTE_11)>; + (i32 VectorExtractions.LE_BYTE_11)>; def : Pat<(i32 (vector_extract v16i8:$S, 12)), - (i32 MovesFromVSR.LE_BYTE_12)>; + (i32 VectorExtractions.LE_BYTE_12)>; def : Pat<(i32 (vector_extract v16i8:$S, 13)), - (i32 MovesFromVSR.LE_BYTE_13)>; + (i32 VectorExtractions.LE_BYTE_13)>; def : Pat<(i32 (vector_extract v16i8:$S, 14)), - (i32 MovesFromVSR.LE_BYTE_14)>; + (i32 VectorExtractions.LE_BYTE_14)>; def : Pat<(i32 (vector_extract v16i8:$S, 15)), - (i32 MovesFromVSR.LE_BYTE_15)>; + (i32 VectorExtractions.LE_BYTE_15)>; def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), - (i32 MovesFromVSR.LE_VARIABLE_BYTE)>; + (i32 VectorExtractions.LE_VARIABLE_BYTE)>; // v8i16 scalar <-> vector conversions (LE) def : Pat<(i32 (vector_extract v8i16:$S, 0)), - (i32 MovesFromVSR.LE_HALF_0)>; + (i32 VectorExtractions.LE_HALF_0)>; def : Pat<(i32 (vector_extract v8i16:$S, 1)), - (i32 MovesFromVSR.LE_HALF_1)>; + (i32 VectorExtractions.LE_HALF_1)>; def : Pat<(i32 (vector_extract v8i16:$S, 2)), - (i32 MovesFromVSR.LE_HALF_2)>; + (i32 VectorExtractions.LE_HALF_2)>; def : Pat<(i32 (vector_extract v8i16:$S, 3)), - (i32 MovesFromVSR.LE_HALF_3)>; + (i32 VectorExtractions.LE_HALF_3)>; def : Pat<(i32 (vector_extract v8i16:$S, 4)), - (i32 MovesFromVSR.LE_HALF_4)>; + (i32 VectorExtractions.LE_HALF_4)>; def : Pat<(i32 (vector_extract v8i16:$S, 5)), - (i32 MovesFromVSR.LE_HALF_5)>; + (i32 VectorExtractions.LE_HALF_5)>; def : Pat<(i32 (vector_extract v8i16:$S, 6)), - (i32 MovesFromVSR.LE_HALF_6)>; + (i32 VectorExtractions.LE_HALF_6)>; def : Pat<(i32 (vector_extract v8i16:$S, 7)), - (i32 MovesFromVSR.LE_HALF_7)>; + (i32 VectorExtractions.LE_HALF_7)>; def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), - (i32 MovesFromVSR.LE_VARIABLE_HALF)>; + (i32 VectorExtractions.LE_VARIABLE_HALF)>; // v4i32 scalar <-> vector conversions (LE) def : Pat<(i32 (vector_extract v4i32:$S, 0)), - (i32 MovesFromVSR.LE_WORD_0)>; + (i32 VectorExtractions.LE_WORD_0)>; def : Pat<(i32 (vector_extract v4i32:$S, 1)), - (i32 MovesFromVSR.LE_WORD_1)>; + (i32 VectorExtractions.LE_WORD_1)>; def : Pat<(i32 (vector_extract v4i32:$S, 2)), - (i32 MovesFromVSR.LE_WORD_2)>; + (i32 VectorExtractions.LE_WORD_2)>; def : Pat<(i32 (vector_extract v4i32:$S, 3)), - (i32 MovesFromVSR.LE_WORD_3)>; + (i32 VectorExtractions.LE_WORD_3)>; + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 VectorExtractions.LE_VARIABLE_WORD)>; // v2i64 scalar <-> vector conversions (LE) def : Pat<(i64 (vector_extract v2i64:$S, 0)), - (i64 MovesFromVSR.LE_DWORD_0)>; + (i64 VectorExtractions.LE_DWORD_0)>; def : Pat<(i64 (vector_extract v2i64:$S, 1)), - (i64 MovesFromVSR.LE_DWORD_1)>; + (i64 VectorExtractions.LE_DWORD_1)>; + def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.LE_VARIABLE_DWORD)>; } // IsLittleEndian, HasDirectMove diff --git a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll index 7e8991647ae..8da8df58a85 100644 --- a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -1036,7 +1036,7 @@ entry: ; CHECK-DAG: mfvsrd [[MOV:[0-9]+]], ; CHECK-DAG: li [[IMM3:[0-9]+]], 3 ; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]] -; CHECK-DAG: rldicr [[SHL:[0-9]+]], [[ANDC]], 4, 60 +; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 4 ; CHECK-DAG: srd 3, [[MOV]], [[SHL]] ; CHECK-DAG: extsh 3, 3 ; CHECK-LE-LABEL: @getvelss @@ -1072,7 +1072,7 @@ entry: ; CHECK-DAG: mfvsrd [[MOV:[0-9]+]], ; CHECK-DAG: li [[IMM3:[0-9]+]], 3 ; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]] -; CHECK-DAG: rldicr [[SHL:[0-9]+]], [[ANDC]], 4, 60 +; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 4 ; CHECK-DAG: srd 3, [[MOV]], [[SHL]] ; CHECK-DAG: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getvelus diff --git a/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll b/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll new file mode 100644 index 00000000000..3d4789360f5 --- /dev/null +++ b/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll @@ -0,0 +1,114 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-BE +; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-P7 + +; Function Attrs: norecurse nounwind readnone +define signext i32 @geti(<4 x i32> %a, i32 signext %b) { +entry: + %vecext = extractelement <4 x i32> %a, i32 %b + ret i32 %vecext +; CHECK-LABEL: @geti +; CHECK-P7-LABEL: @geti +; CHECK-BE-LABEL: @geti +; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 2 +; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5 +; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 2 +; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]] +; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]] +; CHECK-DAG: li [[ONEREG:[0-9]+]], 1 +; CHECK-DAG: and [[ELEMSREG:[0-9]+]], [[ONEREG]], 5 +; CHECK-DAG: sldi [[SHAMREG:[0-9]+]], [[ELEMSREG]], 5 +; CHECK: mfvsrd [[TOGPR:[0-9]+]], +; CHECK: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]] +; CHECK: extsw 3, [[RSHREG]] +; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2 +; CHECK-P7-DAG: stxvw4x 34, +; CHECK-P7: lwax 3, [[ELEMOFFREG]], +; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 2 +; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 2 +; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] +; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; CHECK-BE-DAG: li [[IMMREG:[0-9]+]], 1 +; CHECK-BE-DAG: andc [[ANDCREG:[0-9]+]], [[IMMREG]], 5 +; CHECK-BE-DAG: sldi [[SHAMREG:[0-9]+]], [[ANDCREG]], 5 +; CHECK-BE: mfvsrd [[TOGPR:[0-9]+]], +; CHECK-BE: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]] +; CHECk-BE: extsw 3, [[RSHREG]] +} + +; Function Attrs: norecurse nounwind readnone +define i64 @getl(<2 x i64> %a, i32 signext %b) { +entry: + %vecext = extractelement <2 x i64> %a, i32 %b + ret i64 %vecext +; CHECK-LABEL: @getl +; CHECK-P7-LABEL: @getl +; CHECK-BE-LABEL: @getl +; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 1 +; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5 +; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3 +; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]] +; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]] +; CHECK: mfvsrd 3, +; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 3 +; CHECK-P7-DAG: stxvd2x 34, +; CHECK-P7: ldx 3, [[ELEMOFFREG]], +; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1 +; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3 +; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] +; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; CHECK-BE: mfvsrd 3, +} + +; Function Attrs: norecurse nounwind readnone +define float @getf(<4 x float> %a, i32 signext %b) { +entry: + %vecext = extractelement <4 x float> %a, i32 %b + ret float %vecext +; CHECK-LABEL: @getf +; CHECK-P7-LABEL: @getf +; CHECK-BE-LABEL: @getf +; CHECK: li [[IMMREG:[0-9]+]], 3 +; CHECK: xor [[TRUNCREG:[0-9]+]], [[IMMREG]], 5 +; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[TRUNCREG]] +; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; CHECK: xscvspdpn 1, +; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2 +; CHECK-P7-DAG: stxvw4x 34, +; CHECK-P7: lfsx 1, [[ELEMOFFREG]], +; CHECK-BE: sldi [[ELNOREG:[0-9]+]], 5, 2 +; CHECK-BE: lvsl [[SHMSKREG:[0-9]+]], 0, [[ELNOREG]] +; CHECK-BE: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; CHECK-BE: xscvspdpn 1, +} + +; Function Attrs: norecurse nounwind readnone +define double @getd(<2 x double> %a, i32 signext %b) { +entry: + %vecext = extractelement <2 x double> %a, i32 %b + ret double %vecext +; CHECK-LABEL: @getd +; CHECK-P7-LABEL: @getd +; CHECK-BE-LABEL: @getd +; CHECK: li [[TRUNCREG:[0-9]+]], 1 +; CHECK: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5 +; CHECK: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3 +; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]] +; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; FIXME: the instruction below is a redundant regclass copy, to be removed +; CHECK: xxlor 1, +; CHECK-P7-DAG: andi. [[ANDREG:[0-9]+]], 5, 1 +; CHECK-P7-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3 +; CHECK-P7-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] +; CHECK-P7-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; FIXME: the instruction below is a redundant regclass copy, to be removed +; CHECK-P7: xxlor 1, +; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1 +; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3 +; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] +; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; FIXME: the instruction below is a redundant regclass copy, to be removed +; CHECK-BE: xxlor 1, +} -- 2.34.1