From b386929d2e65d90acb1a829ad3e354ce36370a46 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Fri, 9 Oct 2015 11:12:18 +0000 Subject: [PATCH] Vector element extraction without stack operations on Power 8 This patch corresponds to review: http://reviews.llvm.org/D12032 This patch builds onto the patch that provided scalar to vector conversions without stack operations (D11471). Included in this patch: - Vector element extraction for all vector types with constant element number - Vector element extraction for v16i8 and v8i16 with variable element number - Removal of some unnecessary COPY_TO_REGCLASS operations that ended up unnecessarily moving things around between registers Not included in this patch (will be in upcoming patch): - Vector element extraction for v4i32, v4f32, v2i64 and v2f64 with variable element number - Vector element insertion for variable/constant element number Testing is provided for all extractions. The extractions that are not implemented yet are just placeholders. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249822 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 9 +- lib/Target/PowerPC/PPCInstrVSX.td | 382 ++++- lib/Target/PowerPC/PPCVSXCopy.cpp | 1 + .../PowerPC/p8-scalar_vector_conversions.ll | 1380 +++++++++++++++++ 4 files changed, 1749 insertions(+), 23 deletions(-) diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index b7dbd74eb68..52f10daed3f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -543,14 +543,21 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (Subtarget.hasVSX()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); - if (Subtarget.hasP8Vector()) + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); + if (Subtarget.hasP8Vector()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); + } if (Subtarget.hasDirectMove()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); // FIXME: this is causing bootstrap failures, disable temporarily //setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); } setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 7c1e6887094..2e54ef2ac0e 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -1237,59 +1237,397 @@ let Predicates = [HasDirectMove, HasVSX] in { [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; } // HasDirectMove, HasVSX -/* Direct moves of various size entities from GPR's into VSR's. Each lines +/* Direct moves of various widths from GPR's into VSR's. Each move lines the value up into element 0 (both BE and LE). Namely, entities smaller than a doubleword are shifted left and moved for BE. For LE, they're moved, then swapped to go into the least significant element of the VSR. */ -def Moves { - dag BE_BYTE_0 = (MTVSRD - (RLDICR - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7)); - dag BE_HALF_0 = (MTVSRD - (RLDICR - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15)); - dag BE_WORD_0 = (MTVSRD - (RLDICR - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31)); +def MovesToVSR { + dag BE_BYTE_0 = + (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7)); + dag BE_HALF_0 = + (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15)); + dag BE_WORD_0 = + (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31)); dag BE_DWORD_0 = (MTVSRD $A); dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32)); - dag LE_WORD_1 = (v2i64 (COPY_TO_REGCLASS LE_MTVSRW, VSRC)); + dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + LE_MTVSRW, sub_64)); dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2); - dag LE_DWORD_1 = (v2i64 (COPY_TO_REGCLASS BE_DWORD_0, VSRC)); + dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + BE_DWORD_0, sub_64)); dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2); } +/* Direct moves of various widths from VSR's to GPR's. Each moves the + respective element out of the VSR and ensures that it is lined up + to the right side of the GPR. In addition to the extraction from positions + specified by a constant, a pattern for extracting from a variable position + is provided. This is useful when the element number is not known at + compile time. + The numbering for the DAG's is for LE, but when used on BE, the correct + LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13). +*/ +def MovesFromVSR { + // Doubleword extraction + dag LE_DWORD_0 = + (MFVSRD + (EXTRACT_SUBREG + (XXPERMDI (COPY_TO_REGCLASS $S, VSRC), + (COPY_TO_REGCLASS $S, VSRC), 2), sub_64)); + dag LE_DWORD_1 = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); + + // Word extraction + dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 2), sub_64)); + dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64)); + dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); + dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64)); + + // Halfword extraction + dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32)); + dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32)); + dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32)); + dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32)); + dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32)); + dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32)); + dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32)); + dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32)); + + // Byte extraction + dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32)); + dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32)); + dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32)); + dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32)); + dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32)); + dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32)); + dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32)); + dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32)); + dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32)); + dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32)); + dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32)); + dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32)); + dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32)); + dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32)); + dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32)); + dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32)); + + /* Variable element number (BE and LE patterns must be specified separately) + This is a rather involved process. + + Conceptually, this is how the move is accomplished: + 1. Identify which doubleword contains the element + 2. Shift in the VMX register so that the correct doubleword is correctly + lined up for the MFVSRD + 3. Perform the move so that the element (along with some extra stuff) + is in the GPR + 4. Right shift within the GPR so that the element is right-justified + + Of course, the index is an element number which has a different meaning + on LE/BE so the patterns have to be specified separately. + + Note: The final result will be the element right-justified with high + order bits being arbitrarily defined (namely, whatever was in the + vector register to the left of the value originally). + */ + + /* LE variable byte + Number 1. above: + - For elements 0-7, we shift left by 8 bytes since they're on the right + - For elements 8-15, we need not shift (shift left by zero bytes) + This is accomplished by inverting the bits of the index and AND-ing + with 0x8 (i.e. clearing all bits of the index and inverting bit 60). + */ + dag LE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDC8 (LI8 8), $Idx)); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VBYTE_PERMUTE = (VPERM $S, $S, LE_VBYTE_PERM_VEC); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VBYTE = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)), + sub_64)); + + /* Number 4. above: + - Truncate the element number to the range 0-7 (8-15 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 8 as we need to shift right by the number of bits, not bytes + - Shift right in the GPR by the calculated value + */ + dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60), + sub_32); + dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT), + sub_32); + + /* BE variable byte + The algorithm here is the same as the LE variable byte except: + - The shift in the VMX register is by 0/8 for opposite element numbers so + we simply AND the element number with 0x8 + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-7 + */ + dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8)); + dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC); + dag BE_MV_VBYTE = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)), + sub_64)); + dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60), + sub_32); + dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT), + sub_32); + + /* LE variable halfword + Number 1. above: + - For elements 0-3, we shift left by 8 since they're on the right + - For elements 4-7, we need not shift (shift left by zero bytes) + Similarly to the byte pattern, we invert the bits of the index, but we + AND with 0x4 (i.e. clear all bits of the index and invert bit 61). + Of course, the shift is still by 8 bytes, so we must multiply by 2. + */ + dag LE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62)); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VHALF_PERMUTE = (VPERM $S, $S, LE_VHALF_PERM_VEC); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VHALF = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)), + sub_64)); + + /* Number 4. above: + - Truncate the element number to the range 0-3 (4-7 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 16 as we need to shift right by the number of bits + - Shift right in the GPR by the calculated value + */ + dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59), + sub_32); + dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT), + sub_32); + + /* BE variable halfword + The algorithm here is the same as the LE variable halfword except: + - The shift in the VMX register is by 0/8 for opposite element numbers so + we simply AND the element number with 0x4 and multiply by 2 + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-3 + */ + dag BE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 4), 1, 62)); + dag BE_VHALF_PERMUTE = (VPERM $S, $S, BE_VHALF_PERM_VEC); + dag BE_MV_VHALF = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)), + sub_64)); + dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 60), + sub_32); + dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT), + sub_32); +} + +// v4f32 scalar <-> vector conversions (BE) let Predicates = [IsBigEndian, HasP8Vector] in { def : Pat<(v4f32 (scalar_to_vector f32:$A)), (v4f32 (XSCVDPSPN $A))>; + def : Pat<(f32 (vector_extract v4f32:$S, 0)), + (f32 (XSCVSPDPN $S))>; + def : Pat<(f32 (vector_extract v4f32:$S, 1)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 2)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 3)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; } // IsBigEndian, HasP8Vector let Predicates = [IsBigEndian, HasDirectMove] in { + // v16i8 scalar <-> vector conversions (BE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), - (v16i8 (COPY_TO_REGCLASS Moves.BE_BYTE_0, VSRC))>; + (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>; def : Pat<(v8i16 (scalar_to_vector i32:$A)), - (v8i16 (COPY_TO_REGCLASS Moves.BE_HALF_0, VSRC))>; + (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>; def : Pat<(v4i32 (scalar_to_vector i32:$A)), - (v4i32 (COPY_TO_REGCLASS Moves.BE_WORD_0, VSRC))>; + (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>; def : Pat<(v2i64 (scalar_to_vector i64:$A)), - (v2i64 (COPY_TO_REGCLASS Moves.BE_DWORD_0, VSRC))>; + (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>; + def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 MovesFromVSR.LE_BYTE_15)>; + def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 MovesFromVSR.LE_BYTE_14)>; + def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 MovesFromVSR.LE_BYTE_13)>; + def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 MovesFromVSR.LE_BYTE_12)>; + def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 MovesFromVSR.LE_BYTE_11)>; + def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 MovesFromVSR.LE_BYTE_10)>; + def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 MovesFromVSR.LE_BYTE_9)>; + def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 MovesFromVSR.LE_BYTE_8)>; + def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 MovesFromVSR.LE_BYTE_7)>; + def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 MovesFromVSR.LE_BYTE_6)>; + def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 MovesFromVSR.LE_BYTE_5)>; + def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 MovesFromVSR.LE_BYTE_4)>; + def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 MovesFromVSR.LE_BYTE_3)>; + def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 MovesFromVSR.LE_BYTE_2)>; + def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 MovesFromVSR.LE_BYTE_1)>; + def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 MovesFromVSR.LE_BYTE_0)>; + def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 MovesFromVSR.BE_VARIABLE_BYTE)>; + + // v8i16 scalar <-> vector conversions (BE) + def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 MovesFromVSR.LE_HALF_7)>; + def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 MovesFromVSR.LE_HALF_6)>; + def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 MovesFromVSR.LE_HALF_5)>; + def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 MovesFromVSR.LE_HALF_4)>; + def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 MovesFromVSR.LE_HALF_3)>; + def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 MovesFromVSR.LE_HALF_2)>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 MovesFromVSR.LE_HALF_1)>; + def : Pat<(i32 (vector_extract v8i16:$S, 7)), + (i32 MovesFromVSR.LE_HALF_0)>; + def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 MovesFromVSR.BE_VARIABLE_HALF)>; + + // v4i32 scalar <-> vector conversions (BE) + def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 MovesFromVSR.LE_WORD_3)>; + def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 MovesFromVSR.LE_WORD_2)>; + def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 MovesFromVSR.LE_WORD_1)>; + def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 MovesFromVSR.LE_WORD_0)>; + + // v2i64 scalar <-> vector conversions (BE) + def : Pat<(i64 (vector_extract v2i64:$S, 0)), + (i64 MovesFromVSR.LE_DWORD_1)>; + def : Pat<(i64 (vector_extract v2i64:$S, 1)), + (i64 MovesFromVSR.LE_DWORD_0)>; } // IsBigEndian, HasDirectMove +// v4f32 scalar <-> vector conversions (LE) let Predicates = [IsLittleEndian, HasP8Vector] in { def : Pat<(v4f32 (scalar_to_vector f32:$A)), (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>; + def : Pat<(f32 (vector_extract v4f32:$S, 0)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 1)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 2)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 3)), + (f32 (XSCVSPDPN $S))>; } // IsLittleEndian, HasP8Vector let Predicates = [IsLittleEndian, HasDirectMove] in { + // v16i8 scalar <-> vector conversions (LE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), - (v16i8 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>; + (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; def : Pat<(v8i16 (scalar_to_vector i32:$A)), - (v8i16 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>; + (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; def : Pat<(v4i32 (scalar_to_vector i32:$A)), - (v4i32 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>; + (v4i32 MovesToVSR.LE_WORD_0)>; def : Pat<(v2i64 (scalar_to_vector i64:$A)), - (v2i64 Moves.LE_DWORD_0)>; + (v2i64 MovesToVSR.LE_DWORD_0)>; + def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 MovesFromVSR.LE_BYTE_0)>; + def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 MovesFromVSR.LE_BYTE_1)>; + def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 MovesFromVSR.LE_BYTE_2)>; + def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 MovesFromVSR.LE_BYTE_3)>; + def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 MovesFromVSR.LE_BYTE_4)>; + def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 MovesFromVSR.LE_BYTE_5)>; + def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 MovesFromVSR.LE_BYTE_6)>; + def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 MovesFromVSR.LE_BYTE_7)>; + def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 MovesFromVSR.LE_BYTE_8)>; + def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 MovesFromVSR.LE_BYTE_9)>; + def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 MovesFromVSR.LE_BYTE_10)>; + def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 MovesFromVSR.LE_BYTE_11)>; + def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 MovesFromVSR.LE_BYTE_12)>; + def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 MovesFromVSR.LE_BYTE_13)>; + def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 MovesFromVSR.LE_BYTE_14)>; + def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 MovesFromVSR.LE_BYTE_15)>; + def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 MovesFromVSR.LE_VARIABLE_BYTE)>; + + // v8i16 scalar <-> vector conversions (LE) + def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 MovesFromVSR.LE_HALF_0)>; + def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 MovesFromVSR.LE_HALF_1)>; + def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 MovesFromVSR.LE_HALF_2)>; + def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 MovesFromVSR.LE_HALF_3)>; + def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 MovesFromVSR.LE_HALF_4)>; + def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 MovesFromVSR.LE_HALF_5)>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 MovesFromVSR.LE_HALF_6)>; + def : Pat<(i32 (vector_extract v8i16:$S, 7)), + (i32 MovesFromVSR.LE_HALF_7)>; + def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 MovesFromVSR.LE_VARIABLE_HALF)>; + + // v4i32 scalar <-> vector conversions (LE) + def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 MovesFromVSR.LE_WORD_0)>; + def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 MovesFromVSR.LE_WORD_1)>; + def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 MovesFromVSR.LE_WORD_2)>; + def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 MovesFromVSR.LE_WORD_3)>; + + // v2i64 scalar <-> vector conversions (LE) + def : Pat<(i64 (vector_extract v2i64:$S, 0)), + (i64 MovesFromVSR.LE_DWORD_0)>; + def : Pat<(i64 (vector_extract v2i64:$S, 1)), + (i64 MovesFromVSR.LE_DWORD_1)>; } // IsLittleEndian, HasDirectMove - diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp index 76b681de7f4..11ee3051f98 100644 --- a/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -128,6 +128,7 @@ protected: IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass : &PPC::VSLRCRegClass; assert((IsF8Reg(DstMO.getReg(), MRI) || + IsVSFReg(DstMO.getReg(), MRI) || IsVRReg(DstMO.getReg(), MRI)) && "Unknown destination for a VSX copy"); diff --git a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll index 01edab0eb76..5f25139140f 100644 --- a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -77,3 +77,1383 @@ entry: ; CHECK-LE: xscvdpspn [[REG1:[0-9]+]], 1 ; CHECK-LE: xxsldwi {{[0-9]+}}, [[REG1]], [[REG1]], 1 } + +; Function Attrs: nounwind +define signext i8 @getsc0(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 0 + ret i8 %vecext +; CHECK-LABEL: @getsc0 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 8, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc0 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: clrldi 3, 3, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc1(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 1 + ret i8 %vecext +; CHECK-LABEL: @getsc1 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 16, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc1 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 56, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc2(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 2 + ret i8 %vecext +; CHECK-LABEL: @getsc2 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 24, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc2 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 48, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc3(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 3 + ret i8 %vecext +; CHECK-LABEL: @getsc3 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 32, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc3 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 40, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc4(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 4 + ret i8 %vecext +; CHECK-LABEL: @getsc4 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 40, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc4 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 32, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc5(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 5 + ret i8 %vecext +; CHECK-LABEL: @getsc5 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 48, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc5 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 24, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc6(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 6 + ret i8 %vecext +; CHECK-LABEL: @getsc6 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 56, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc6 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 16, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc7(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 7 + ret i8 %vecext +; CHECK-LABEL: @getsc7 +; CHECK: mfvsrd 3, 34 +; CHECK: clrldi 3, 3, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc7 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 8, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc8(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 8 + ret i8 %vecext +; CHECK-LABEL: @getsc8 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 8, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc8 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: clrldi 3, 3, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc9(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 9 + ret i8 %vecext +; CHECK-LABEL: @getsc9 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 16, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc9 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 56, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc10(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 10 + ret i8 %vecext +; CHECK-LABEL: @getsc10 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 24, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc10 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 48, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc11(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 11 + ret i8 %vecext +; CHECK-LABEL: @getsc11 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 32, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc11 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 40, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc12(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 12 + ret i8 %vecext +; CHECK-LABEL: @getsc12 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 40, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc12 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 32, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc13(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 13 + ret i8 %vecext +; CHECK-LABEL: @getsc13 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 48, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc13 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 24, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc14(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 14 + ret i8 %vecext +; CHECK-LABEL: @getsc14 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 56, 56 +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc14 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 16, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define signext i8 @getsc15(<16 x i8> %vsc) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 15 + ret i8 %vecext +; CHECK-LABEL: @getsc15 +; CHECK: mfvsrd 3, +; CHECK: extsb 3, 3 +; CHECK-LE-LABEL: @getsc15 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 8, 56 +; CHECK-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc0(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 0 + ret i8 %vecext +; CHECK-LABEL: @getuc0 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 8, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc0 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc1(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 1 + ret i8 %vecext +; CHECK-LABEL: @getuc1 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 16, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc1 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 56, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc2(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 2 + ret i8 %vecext +; CHECK-LABEL: @getuc2 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 24, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc2 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 48, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc3(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 3 + ret i8 %vecext +; CHECK-LABEL: @getuc3 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 32, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc3 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 40, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc4(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 4 + ret i8 %vecext +; CHECK-LABEL: @getuc4 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 40, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc4 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 32, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc5(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 5 + ret i8 %vecext +; CHECK-LABEL: @getuc5 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 48, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc5 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 24, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc6(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 6 + ret i8 %vecext +; CHECK-LABEL: @getuc6 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 56, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc6 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 16, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc7(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 7 + ret i8 %vecext +; CHECK-LABEL: @getuc7 +; CHECK: mfvsrd 3, 34 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc7 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 8, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc8(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 8 + ret i8 %vecext +; CHECK-LABEL: @getuc8 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 8, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc8 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc9(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 9 + ret i8 %vecext +; CHECK-LABEL: @getuc9 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 16, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc9 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 56, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc10(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 10 + ret i8 %vecext +; CHECK-LABEL: @getuc10 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 24, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc10 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 48, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc11(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 11 + ret i8 %vecext +; CHECK-LABEL: @getuc11 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 32, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc11 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 40, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc12(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 12 + ret i8 %vecext +; CHECK-LABEL: @getuc12 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 40, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc12 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 32, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc13(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 13 + ret i8 %vecext +; CHECK-LABEL: @getuc13 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 48, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc13 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 24, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc14(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 14 + ret i8 %vecext +; CHECK-LABEL: @getuc14 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 56, 56 +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc14 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 16, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define zeroext i8 @getuc15(<16 x i8> %vuc) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %vecext = extractelement <16 x i8> %0, i32 15 + ret i8 %vecext +; CHECK-LABEL: @getuc15 +; CHECK: mfvsrd 3, +; CHECK: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getuc15 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 8, 56 +; CHECK-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) { +entry: + %vsc.addr = alloca <16 x i8>, align 16 + %i.addr = alloca i32, align 4 + store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16 + store i32 %i, i32* %i.addr, align 4 + %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16 + %1 = load i32, i32* %i.addr, align 4 + %vecext = extractelement <16 x i8> %0, i32 %1 + ret i8 %vecext +; CHECK-LABEL: @getvelsc +; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 8 +; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[ANDI]] +; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]] +; CHECK-DAG: mfvsrd [[MOV:[0-9]+]], +; CHECK-DAG: li [[IMM7:[0-9]+]], 7 +; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM7]] +; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 3 +; CHECK-DAG: srd 3, [[MOV]], [[SHL]] +; CHECK-DAG: extsb 3, 3 +; CHECK-LE-LABEL: @getvelsc +; CHECK-DAG-LE: li [[IMM8:[0-9]+]], 8 +; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM8]] +; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[ANDC]] +; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]] +; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]], +; CHECK-DAG-LE: li [[IMM7:[0-9]+]], 7 +; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM7]] +; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 3 +; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]] +; CHECK-DAG-LE: extsb 3, 3 +} + +; Function Attrs: nounwind +define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) { +entry: + %vuc.addr = alloca <16 x i8>, align 16 + %i.addr = alloca i32, align 4 + store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16 + store i32 %i, i32* %i.addr, align 4 + %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16 + %1 = load i32, i32* %i.addr, align 4 + %vecext = extractelement <16 x i8> %0, i32 %1 + ret i8 %vecext +; CHECK-LABEL: @getveluc +; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 8 +; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[ANDI]] +; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]] +; CHECK-DAG: mfvsrd [[MOV:[0-9]+]], +; CHECK-DAG: li [[IMM7:[0-9]+]], 7 +; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM7]] +; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 3 +; CHECK-DAG: srd 3, [[MOV]], [[SHL]] +; CHECK-DAG: clrldi 3, 3, 56 +; CHECK-LE-LABEL: @getveluc +; CHECK-DAG-LE: li [[IMM8:[0-9]+]], 8 +; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM8]] +; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[ANDC]] +; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]] +; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]], +; CHECK-DAG-LE: li [[IMM7:[0-9]+]], 7 +; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM7]] +; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 3 +; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]] +; CHECK-DAG-LE: clrldi 3, 3, 56 +} + +; Function Attrs: nounwind +define signext i16 @getss0(<8 x i16> %vss) { +entry: + %vss.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 0 + ret i16 %vecext +; CHECK-LABEL: @getss0 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 16, 48 +; CHECK: extsh 3, 3 +; CHECK-LE-LABEL: @getss0 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: clrldi 3, 3, 48 +; CHECK-LE: extsh 3, 3 +} + +; Function Attrs: nounwind +define signext i16 @getss1(<8 x i16> %vss) { +entry: + %vss.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 1 + ret i16 %vecext +; CHECK-LABEL: @getss1 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 32, 48 +; CHECK: extsh 3, 3 +; CHECK-LE-LABEL: @getss1 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 48, 48 +; CHECK-LE: extsh 3, 3 +} + +; Function Attrs: nounwind +define signext i16 @getss2(<8 x i16> %vss) { +entry: + %vss.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 2 + ret i16 %vecext +; CHECK-LABEL: @getss2 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 48, 48 +; CHECK: extsh 3, 3 +; CHECK-LE-LABEL: @getss2 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 32, 48 +; CHECK-LE: extsh 3, 3 +} + +; Function Attrs: nounwind +define signext i16 @getss3(<8 x i16> %vss) { +entry: + %vss.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 3 + ret i16 %vecext +; CHECK-LABEL: @getss3 +; CHECK: mfvsrd 3, 34 +; CHECK: clrldi 3, 3, 48 +; CHECK: extsh 3, 3 +; CHECK-LE-LABEL: @getss3 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 16, 48 +; CHECK-LE: extsh 3, 3 +} + +; Function Attrs: nounwind +define signext i16 @getss4(<8 x i16> %vss) { +entry: + %vss.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 4 + ret i16 %vecext +; CHECK-LABEL: @getss4 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 16, 48 +; CHECK: extsh 3, 3 +; CHECK-LE-LABEL: @getss4 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: clrldi 3, 3, 48 +; CHECK-LE: extsh 3, 3 +} + +; Function Attrs: nounwind +define signext i16 @getss5(<8 x i16> %vss) { +entry: + %vss.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 5 + ret i16 %vecext +; CHECK-LABEL: @getss5 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 32, 48 +; CHECK: extsh 3, 3 +; CHECK-LE-LABEL: @getss5 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 48, 48 +; CHECK-LE: extsh 3, 3 +} + +; Function Attrs: nounwind +define signext i16 @getss6(<8 x i16> %vss) { +entry: + %vss.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 6 + ret i16 %vecext +; CHECK-LABEL: @getss6 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 48, 48 +; CHECK: extsh 3, 3 +; CHECK-LE-LABEL: @getss6 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 32, 48 +; CHECK-LE: extsh 3, 3 +} + +; Function Attrs: nounwind +define signext i16 @getss7(<8 x i16> %vss) { +entry: + %vss.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 7 + ret i16 %vecext +; CHECK-LABEL: @getss7 +; CHECK: mfvsrd 3, +; CHECK: extsh 3, 3 +; CHECK-LE-LABEL: @getss7 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 16, 48 +; CHECK-LE: extsh 3, 3 +} + +; Function Attrs: nounwind +define zeroext i16 @getus0(<8 x i16> %vus) { +entry: + %vus.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 0 + ret i16 %vecext +; CHECK-LABEL: @getus0 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 16, 48 +; CHECK: clrldi 3, 3, 48 +; CHECK-LE-LABEL: @getus0 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: clrldi 3, 3, 48 +} + +; Function Attrs: nounwind +define zeroext i16 @getus1(<8 x i16> %vus) { +entry: + %vus.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 1 + ret i16 %vecext +; CHECK-LABEL: @getus1 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 32, 48 +; CHECK: clrldi 3, 3, 48 +; CHECK-LE-LABEL: @getus1 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 48, 48 +; CHECK-LE: clrldi 3, 3, 48 +} + +; Function Attrs: nounwind +define zeroext i16 @getus2(<8 x i16> %vus) { +entry: + %vus.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 2 + ret i16 %vecext +; CHECK-LABEL: @getus2 +; CHECK: mfvsrd 3, 34 +; CHECK: rldicl 3, 3, 48, 48 +; CHECK: clrldi 3, 3, 48 +; CHECK-LE-LABEL: @getus2 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 32, 48 +; CHECK-LE: clrldi 3, 3, 48 +} + +; Function Attrs: nounwind +define zeroext i16 @getus3(<8 x i16> %vus) { +entry: + %vus.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 3 + ret i16 %vecext +; CHECK-LABEL: @getus3 +; CHECK: mfvsrd 3, 34 +; CHECK: clrldi 3, 3, 48 +; CHECK-LE-LABEL: @getus3 +; CHECK-LE: mfvsrd 3, +; CHECK-LE: rldicl 3, 3, 16, 48 +; CHECK-LE: clrldi 3, 3, 48 +} + +; Function Attrs: nounwind +define zeroext i16 @getus4(<8 x i16> %vus) { +entry: + %vus.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 4 + ret i16 %vecext +; CHECK-LABEL: @getus4 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 16, 48 +; CHECK: clrldi 3, 3, 48 +; CHECK-LE-LABEL: @getus4 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: clrldi 3, 3, 48 +} + +; Function Attrs: nounwind +define zeroext i16 @getus5(<8 x i16> %vus) { +entry: + %vus.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 5 + ret i16 %vecext +; CHECK-LABEL: @getus5 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 32, 48 +; CHECK: clrldi 3, 3, 48 +; CHECK-LE-LABEL: @getus5 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 48, 48 +; CHECK-LE: clrldi 3, 3, 48 +} + +; Function Attrs: nounwind +define zeroext i16 @getus6(<8 x i16> %vus) { +entry: + %vus.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 6 + ret i16 %vecext +; CHECK-LABEL: @getus6 +; CHECK: mfvsrd 3, +; CHECK: rldicl 3, 3, 48, 48 +; CHECK: clrldi 3, 3, 48 +; CHECK-LE-LABEL: @getus6 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 32, 48 +; CHECK-LE: clrldi 3, 3, 48 +} + +; Function Attrs: nounwind +define zeroext i16 @getus7(<8 x i16> %vus) { +entry: + %vus.addr = alloca <8 x i16>, align 16 + store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16 + %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16 + %vecext = extractelement <8 x i16> %0, i32 7 + ret i16 %vecext +; CHECK-LABEL: @getus7 +; CHECK: mfvsrd 3, +; CHECK: clrldi 3, 3, 48 +; CHECK-LE-LABEL: @getus7 +; CHECK-LE: mfvsrd 3, 34 +; CHECK-LE: rldicl 3, 3, 16, 48 +; CHECK-LE: clrldi 3, 3, 48 +} + +; Function Attrs: nounwind +define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) { +entry: + %vss.addr = alloca <8 x i16>, align 16 + %i.addr = alloca i32, align 4 + store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16 + store i32 %i, i32* %i.addr, align 4 + %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16 + %1 = load i32, i32* %i.addr, align 4 + %vecext = extractelement <8 x i16> %0, i32 %1 + ret i16 %vecext +; CHECK-LABEL: @getvelss +; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 4 +; CHECK-DAG: sldi [[MUL2:[0-9]+]], [[ANDI]], 1 +; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]] +; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]] +; CHECK-DAG: mfvsrd [[MOV:[0-9]+]], +; CHECK-DAG: li [[IMM3:[0-9]+]], 3 +; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]] +; CHECK-DAG: rldicr [[SHL:[0-9]+]], [[ANDC]], 4, 60 +; CHECK-DAG: srd 3, [[MOV]], [[SHL]] +; CHECK-DAG: extsh 3, 3 +; CHECK-LE-LABEL: @getvelss +; CHECK-DAG-LE: li [[IMM4:[0-9]+]], 4 +; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM4]] +; CHECK-DAG-LE: sldi [[MUL2:[0-9]+]], [[ANDC]], 1 +; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]] +; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]] +; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]], +; CHECK-DAG-LE: li [[IMM3:[0-9]+]], 3 +; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM3]] +; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 4 +; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]] +; CHECK-DAG-LE: extsh 3, 3 +} + +; Function Attrs: nounwind +define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) { +entry: + %vus.addr = alloca <8 x i16>, align 16 + %i.addr = alloca i32, align 4 + store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16 + store i32 %i, i32* %i.addr, align 4 + %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16 + %1 = load i32, i32* %i.addr, align 4 + %vecext = extractelement <8 x i16> %0, i32 %1 + ret i16 %vecext +; CHECK-LABEL: @getvelus +; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 4 +; CHECK-DAG: sldi [[MUL2:[0-9]+]], [[ANDI]], 1 +; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]] +; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]] +; CHECK-DAG: mfvsrd [[MOV:[0-9]+]], +; CHECK-DAG: li [[IMM3:[0-9]+]], 3 +; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]] +; CHECK-DAG: rldicr [[SHL:[0-9]+]], [[ANDC]], 4, 60 +; CHECK-DAG: srd 3, [[MOV]], [[SHL]] +; CHECK-DAG: clrldi 3, 3, 48 +; CHECK-LE-LABEL: @getvelus +; CHECK-DAG-LE: li [[IMM4:[0-9]+]], 4 +; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM4]] +; CHECK-DAG-LE: sldi [[MUL2:[0-9]+]], [[ANDC]], 1 +; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]] +; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]] +; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]], +; CHECK-DAG-LE: li [[IMM3:[0-9]+]], 3 +; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM3]] +; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 4 +; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]] +; CHECK-DAG-LE: clrldi 3, 3, 48 +} + +; Function Attrs: nounwind +define signext i32 @getsi0(<4 x i32> %vsi) { +entry: + %vsi.addr = alloca <4 x i32>, align 16 + store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16 + %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16 + %vecext = extractelement <4 x i32> %0, i32 0 + ret i32 %vecext +; CHECK-LABEL: @getsi0 +; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3 +; CHECK: mfvsrwz 3, [[SHL]] +; CHECK: extsw 3, 3 +; CHECK-LE-LABEL: @getsi0 +; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK-LE: mfvsrwz 3, [[SHL]] +; CHECK-LE: extsw 3, 3 +} + +; Function Attrs: nounwind +define signext i32 @getsi1(<4 x i32> %vsi) { +entry: + %vsi.addr = alloca <4 x i32>, align 16 + store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16 + %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16 + %vecext = extractelement <4 x i32> %0, i32 1 + ret i32 %vecext +; CHECK-LABEL: @getsi1 +; CHECK: mfvsrwz 3, 34 +; CHECK: extsw 3, 3 +; CHECK-LE-LABEL: @getsi1 +; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1 +; CHECK-LE: mfvsrwz 3, [[SHL]] +; CHECK-LE: extsw 3, 3 +} + +; Function Attrs: nounwind +define signext i32 @getsi2(<4 x i32> %vsi) { +entry: + %vsi.addr = alloca <4 x i32>, align 16 + store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16 + %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16 + %vecext = extractelement <4 x i32> %0, i32 2 + ret i32 %vecext +; CHECK-LABEL: @getsi2 +; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1 +; CHECK: mfvsrwz 3, [[SHL]] +; CHECK: extsw 3, 3 +; CHECK-LE-LABEL: @getsi2 +; CHECK-LE: mfvsrwz 3, 34 +; CHECK-LE: extsw 3, 3 +} + +; Function Attrs: nounwind +define signext i32 @getsi3(<4 x i32> %vsi) { +entry: + %vsi.addr = alloca <4 x i32>, align 16 + store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16 + %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16 + %vecext = extractelement <4 x i32> %0, i32 3 + ret i32 %vecext +; CHECK-LABEL: @getsi3 +; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK: mfvsrwz 3, [[SHL]] +; CHECK: extsw 3, 3 +; CHECK-LE-LABEL: @getsi3 +; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3 +; CHECK-LE: mfvsrwz 3, [[SHL]] +; CHECK-LE: extsw 3, 3 +} + +; Function Attrs: nounwind +define zeroext i32 @getui0(<4 x i32> %vui) { +entry: + %vui.addr = alloca <4 x i32>, align 16 + store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16 + %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16 + %vecext = extractelement <4 x i32> %0, i32 0 + ret i32 %vecext +; CHECK-LABEL: @getui0 +; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3 +; CHECK: mfvsrwz 3, [[SHL]] +; CHECK: clrldi 3, 3, 32 +; CHECK-LE-LABEL: @getui0 +; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK-LE: mfvsrwz 3, [[SHL]] +; CHECK-LE: clrldi 3, 3, 32 +} + +; Function Attrs: nounwind +define zeroext i32 @getui1(<4 x i32> %vui) { +entry: + %vui.addr = alloca <4 x i32>, align 16 + store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16 + %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16 + %vecext = extractelement <4 x i32> %0, i32 1 + ret i32 %vecext +; CHECK-LABEL: @getui1 +; CHECK: mfvsrwz 3, 34 +; CHECK: clrldi 3, 3, 32 +; CHECK-LE-LABEL: @getui1 +; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1 +; CHECK-LE: mfvsrwz 3, [[SHL]] +; CHECK-LE: clrldi 3, 3, 32 +} + +; Function Attrs: nounwind +define zeroext i32 @getui2(<4 x i32> %vui) { +entry: + %vui.addr = alloca <4 x i32>, align 16 + store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16 + %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16 + %vecext = extractelement <4 x i32> %0, i32 2 + ret i32 %vecext +; CHECK-LABEL: @getui2 +; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1 +; CHECK: mfvsrwz 3, [[SHL]] +; CHECK: clrldi 3, 3, 32 +; CHECK-LE-LABEL: @getui2 +; CHECK-LE: mfvsrwz 3, 34 +; CHECK-LE: clrldi 3, 3, 32 +} + +; Function Attrs: nounwind +define zeroext i32 @getui3(<4 x i32> %vui) { +entry: + %vui.addr = alloca <4 x i32>, align 16 + store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16 + %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16 + %vecext = extractelement <4 x i32> %0, i32 3 + ret i32 %vecext +; CHECK-LABEL: @getui3 +; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK: mfvsrwz 3, [[SHL]] +; CHECK: clrldi 3, 3, 32 +; CHECK-LE-LABEL: @getui3 +; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3 +; CHECK-LE: mfvsrwz 3, [[SHL]] +; CHECK-LE: clrldi 3, 3, 32 +} + +; Function Attrs: nounwind +define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) { +entry: + %vsi.addr = alloca <4 x i32>, align 16 + %i.addr = alloca i32, align 4 + store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16 + store i32 %i, i32* %i.addr, align 4 + %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16 + %1 = load i32, i32* %i.addr, align 4 + %vecext = extractelement <4 x i32> %0, i32 %1 + ret i32 %vecext +; CHECK-LABEL: @getvelsi +; CHECK-LE-LABEL: @getvelsi +; FIXME: add check patterns when variable element extraction is implemented +} + +; Function Attrs: nounwind +define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) { +entry: + %vui.addr = alloca <4 x i32>, align 16 + %i.addr = alloca i32, align 4 + store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16 + store i32 %i, i32* %i.addr, align 4 + %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16 + %1 = load i32, i32* %i.addr, align 4 + %vecext = extractelement <4 x i32> %0, i32 %1 + ret i32 %vecext +; CHECK-LABEL: @getvelui +; CHECK-LE-LABEL: @getvelui +; FIXME: add check patterns when variable element extraction is implemented +} + +; Function Attrs: nounwind +define i64 @getsl0(<2 x i64> %vsl) { +entry: + %vsl.addr = alloca <2 x i64>, align 16 + store <2 x i64> %vsl, <2 x i64>* %vsl.addr, align 16 + %0 = load <2 x i64>, <2 x i64>* %vsl.addr, align 16 + %vecext = extractelement <2 x i64> %0, i32 0 + ret i64 %vecext +; CHECK-LABEL: @getsl0 +; CHECK: mfvsrd 3, 34 +; CHECK-LE-LABEL: @getsl0 +; CHECK-LE: xxswapd [[SWP:[0-9]+]], 34 +; CHECK-LE: mfvsrd 3, [[SWP]] +} + +; Function Attrs: nounwind +define i64 @getsl1(<2 x i64> %vsl) { +entry: + %vsl.addr = alloca <2 x i64>, align 16 + store <2 x i64> %vsl, <2 x i64>* %vsl.addr, align 16 + %0 = load <2 x i64>, <2 x i64>* %vsl.addr, align 16 + %vecext = extractelement <2 x i64> %0, i32 1 + ret i64 %vecext +; CHECK-LABEL: @getsl1 +; CHECK: xxswapd [[SWP:[0-9]+]], 34 +; CHECK: mfvsrd 3, [[SWP]] +; CHECK-LE-LABEL: @getsl1 +; CHECK-LE: mfvsrd 3, 34 +} + +; Function Attrs: nounwind +define i64 @getul0(<2 x i64> %vul) { +entry: + %vul.addr = alloca <2 x i64>, align 16 + store <2 x i64> %vul, <2 x i64>* %vul.addr, align 16 + %0 = load <2 x i64>, <2 x i64>* %vul.addr, align 16 + %vecext = extractelement <2 x i64> %0, i32 0 + ret i64 %vecext +; CHECK-LABEL: @getul0 +; CHECK: mfvsrd 3, 34 +; CHECK-LE-LABEL: @getul0 +; CHECK-LE: xxswapd [[SWP:[0-9]+]], 34 +; CHECK-LE: mfvsrd 3, [[SWP]] +} + +; Function Attrs: nounwind +define i64 @getul1(<2 x i64> %vul) { +entry: + %vul.addr = alloca <2 x i64>, align 16 + store <2 x i64> %vul, <2 x i64>* %vul.addr, align 16 + %0 = load <2 x i64>, <2 x i64>* %vul.addr, align 16 + %vecext = extractelement <2 x i64> %0, i32 1 + ret i64 %vecext +; CHECK-LABEL: @getul1 +; CHECK: xxswapd [[SWP:[0-9]+]], 34 +; CHECK: mfvsrd 3, [[SWP]] +; CHECK-LE-LABEL: @getul1 +; CHECK-LE: mfvsrd 3, 34 +} + +; Function Attrs: nounwind +define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) { +entry: + %vsl.addr = alloca <2 x i64>, align 16 + %i.addr = alloca i32, align 4 + store <2 x i64> %vsl, <2 x i64>* %vsl.addr, align 16 + store i32 %i, i32* %i.addr, align 4 + %0 = load <2 x i64>, <2 x i64>* %vsl.addr, align 16 + %1 = load i32, i32* %i.addr, align 4 + %vecext = extractelement <2 x i64> %0, i32 %1 + ret i64 %vecext +; CHECK-LABEL: @getvelsl +; CHECK-LE-LABEL: @getvelsl +; FIXME: add check patterns when variable element extraction is implemented +} + +; Function Attrs: nounwind +define i64 @getvelul(<2 x i64> %vul, i32 signext %i) { +entry: + %vul.addr = alloca <2 x i64>, align 16 + %i.addr = alloca i32, align 4 + store <2 x i64> %vul, <2 x i64>* %vul.addr, align 16 + store i32 %i, i32* %i.addr, align 4 + %0 = load <2 x i64>, <2 x i64>* %vul.addr, align 16 + %1 = load i32, i32* %i.addr, align 4 + %vecext = extractelement <2 x i64> %0, i32 %1 + ret i64 %vecext +; CHECK-LABEL: @getvelul +; CHECK-LE-LABEL: @getvelul +; FIXME: add check patterns when variable element extraction is implemented +} + +; Function Attrs: nounwind +define float @getf0(<4 x float> %vf) { +entry: + %vf.addr = alloca <4 x float>, align 16 + store <4 x float> %vf, <4 x float>* %vf.addr, align 16 + %0 = load <4 x float>, <4 x float>* %vf.addr, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + ret float %vecext +; CHECK-LABEL: @getf0 +; CHECK: xscvspdpn 1, 34 +; CHECK-LE-LABEL: @getf0 +; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3 +; CHECK-LE: xscvspdpn 1, [[SHL]] +} + +; Function Attrs: nounwind +define float @getf1(<4 x float> %vf) { +entry: + %vf.addr = alloca <4 x float>, align 16 + store <4 x float> %vf, <4 x float>* %vf.addr, align 16 + %0 = load <4 x float>, <4 x float>* %vf.addr, align 16 + %vecext = extractelement <4 x float> %0, i32 1 + ret float %vecext +; CHECK-LABEL: @getf1 +; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1 +; CHECK: xscvspdpn 1, [[SHL]] +; CHECK-LE-LABEL: @getf1 +; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK-LE: xscvspdpn 1, [[SHL]] +} + +; Function Attrs: nounwind +define float @getf2(<4 x float> %vf) { +entry: + %vf.addr = alloca <4 x float>, align 16 + store <4 x float> %vf, <4 x float>* %vf.addr, align 16 + %0 = load <4 x float>, <4 x float>* %vf.addr, align 16 + %vecext = extractelement <4 x float> %0, i32 2 + ret float %vecext +; CHECK-LABEL: @getf2 +; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK: xscvspdpn 1, [[SHL]] +; CHECK-LE-LABEL: @getf2 +; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1 +; CHECK-LE: xscvspdpn 1, [[SHL]] +} + +; Function Attrs: nounwind +define float @getf3(<4 x float> %vf) { +entry: + %vf.addr = alloca <4 x float>, align 16 + store <4 x float> %vf, <4 x float>* %vf.addr, align 16 + %0 = load <4 x float>, <4 x float>* %vf.addr, align 16 + %vecext = extractelement <4 x float> %0, i32 3 + ret float %vecext +; CHECK-LABEL: @getf3 +; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3 +; CHECK: xscvspdpn 1, [[SHL]] +; CHECK-LE-LABEL: @getf3 +; CHECK-LE: xscvspdpn 1, 34 +} + +; Function Attrs: nounwind +define float @getvelf(<4 x float> %vf, i32 signext %i) { +entry: + %vf.addr = alloca <4 x float>, align 16 + %i.addr = alloca i32, align 4 + store <4 x float> %vf, <4 x float>* %vf.addr, align 16 + store i32 %i, i32* %i.addr, align 4 + %0 = load <4 x float>, <4 x float>* %vf.addr, align 16 + %1 = load i32, i32* %i.addr, align 4 + %vecext = extractelement <4 x float> %0, i32 %1 + ret float %vecext +; CHECK-LABEL: @getvelf +; CHECK-LE-LABEL: @getvelf +; FIXME: add check patterns when variable element extraction is implemented +} + +; Function Attrs: nounwind +define double @getd0(<2 x double> %vd) { +entry: + %vd.addr = alloca <2 x double>, align 16 + store <2 x double> %vd, <2 x double>* %vd.addr, align 16 + %0 = load <2 x double>, <2 x double>* %vd.addr, align 16 + %vecext = extractelement <2 x double> %0, i32 0 + ret double %vecext +; CHECK-LABEL: @getd0 +; CHECK: xxlor 1, 34, 34 +; CHECK-LE-LABEL: @getd0 +; CHECK-LE: xxswapd 1, 34 +} + +; Function Attrs: nounwind +define double @getd1(<2 x double> %vd) { +entry: + %vd.addr = alloca <2 x double>, align 16 + store <2 x double> %vd, <2 x double>* %vd.addr, align 16 + %0 = load <2 x double>, <2 x double>* %vd.addr, align 16 + %vecext = extractelement <2 x double> %0, i32 1 + ret double %vecext +; CHECK-LABEL: @getd1 +; CHECK: xxswapd 1, 34 +; CHECK-LE-LABEL: @getd1 +; CHECK-LE: xxlor 1, 34, 34 +} + +; Function Attrs: nounwind +define double @getveld(<2 x double> %vd, i32 signext %i) { +entry: + %vd.addr = alloca <2 x double>, align 16 + %i.addr = alloca i32, align 4 + store <2 x double> %vd, <2 x double>* %vd.addr, align 16 + store i32 %i, i32* %i.addr, align 4 + %0 = load <2 x double>, <2 x double>* %vd.addr, align 16 + %1 = load i32, i32* %i.addr, align 4 + %vecext = extractelement <2 x double> %0, i32 %1 + ret double %vecext +; CHECK-LABEL: @getveld +; CHECK-LE-LABEL: @getveld +; FIXME: add check patterns when variable element extraction is implemented +} -- 2.34.1