From 672996e18ed8cd148eb1c8174114c567c9bac5a2 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 5 May 2015 10:32:24 +0000 Subject: [PATCH] [mips] Generate code for insert/extract operations when using the N64 ABI and MSA. Summary: When using the N64 ABI, element-indices use the i64 type instead of i32. In many cases, we can use iPTR to account for this but additional patterns and pseudo's are also required. This fixes most (but not quite all) failures in the test-suite when using N64 and MSA together. Reviewers: vkalintiris Reviewed By: vkalintiris Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D9342 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236494 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 157 ++++++++++++++++-- lib/Target/Mips/MipsSEISelLowering.cpp | 6 + test/CodeGen/Mips/msa/basic_operations.ll | 6 +- .../Mips/msa/basic_operations_float.ll | 2 + 4 files changed, 153 insertions(+), 18 deletions(-) diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 68230e6957a..e5b80eac518 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -63,6 +63,9 @@ def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT", def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT", SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; +def immZExt4Ptr : ImmLeaf(Imm);}]>; +def immZExt6Ptr : ImmLeaf(Imm);}]>; + // Operands // The immediate of an LSA instruction needs special handling @@ -84,6 +87,14 @@ def uimm4 : Operand { let PrintMethod = "printUnsignedImm8"; } +def uimm4_ptr : Operand { + let PrintMethod = "printUnsignedImm8"; +} + +def uimm6_ptr : Operand { + let PrintMethod = "printUnsignedImm8"; +} + def uimm8 : Operand { let PrintMethod = "printUnsignedImm8"; } @@ -1273,9 +1284,9 @@ class MSA_COPY_DESC_BASE { dag OutOperandList = (outs ROD:$rd); - dag InOperandList = (ins ROWS:$ws, uimm4:$n); + dag InOperandList = (ins ROWS:$ws, uimm4_ptr:$n); string AsmString = !strconcat(instr_asm, "\t$rd, $ws[$n]"); - list Pattern = [(set ROD:$rd, (OpNode (VecTy ROWS:$ws), immZExt4:$n))]; + list Pattern = [(set ROD:$rd, (OpNode (VecTy ROWS:$ws), immZExt4Ptr:$n))]; InstrItinClass Itinerary = itin; } @@ -1293,8 +1304,8 @@ class MSA_ELM_SLD_DESC_BASE : - MSAPseudo<(outs RCD:$wd), (ins RCWS:$ws, uimm4:$n), - [(set RCD:$wd, (OpNode (VecTy RCWS:$ws), immZExt4:$n))]> { + MSAPseudo<(outs RCD:$wd), (ins RCWS:$ws, uimm4_ptr:$n), + [(set RCD:$wd, (OpNode (VecTy RCWS:$ws), immZExt4Ptr:$n))]> { bit usesCustomInserter = 1; } @@ -1479,29 +1490,30 @@ class MSA_INSERT_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWD:$wd_in, ROS:$rs, uimm6:$n); + dag InOperandList = (ins ROWD:$wd_in, ROS:$rs, uimm6_ptr:$n); string AsmString = !strconcat(instr_asm, "\t$wd[$n], $rs"); list Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, ROS:$rs, - immZExt6:$n))]; + immZExt6Ptr:$n))]; InstrItinClass Itinerary = itin; string Constraints = "$wd = $wd_in"; } class MSA_INSERT_PSEUDO_BASE : - MSAPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, uimm6:$n, ROFS:$fs), + MSAPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, uimm6_ptr:$n, ROFS:$fs), [(set ROWD:$wd, (OpNode (Ty ROWD:$wd_in), ROFS:$fs, - immZExt6:$n))]> { + immZExt6Ptr:$n))]> { bit usesCustomInserter = 1; string Constraints = "$wd = $wd_in"; } class MSA_INSERT_VIDX_PSEUDO_BASE : - MSAPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, GPR32Opnd:$n, ROFS:$fs), + RegisterOperand ROWD, RegisterOperand ROFS, + RegisterOperand ROIdx> : + MSAPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, ROIdx:$n, ROFS:$fs), [(set ROWD:$wd, (OpNode (Ty ROWD:$wd_in), ROFS:$fs, - GPR32Opnd:$n))]> { + ROIdx:$n))]> { bit usesCustomInserter = 1; string Constraints = "$wd = $wd_in"; } @@ -2302,13 +2314,13 @@ class INSERT_D_DESC : MSA_INSERT_DESC_BASE<"insert.d", vinsert_v2i64, MSA128DOpnd, GPR64Opnd>; class INSERT_B_VIDX_PSEUDO_DESC : - MSA_INSERT_VIDX_PSEUDO_BASE; + MSA_INSERT_VIDX_PSEUDO_BASE; class INSERT_H_VIDX_PSEUDO_DESC : - MSA_INSERT_VIDX_PSEUDO_BASE; + MSA_INSERT_VIDX_PSEUDO_BASE; class INSERT_W_VIDX_PSEUDO_DESC : - MSA_INSERT_VIDX_PSEUDO_BASE; + MSA_INSERT_VIDX_PSEUDO_BASE; class INSERT_D_VIDX_PSEUDO_DESC : - MSA_INSERT_VIDX_PSEUDO_BASE; + MSA_INSERT_VIDX_PSEUDO_BASE; class INSERT_FW_PSEUDO_DESC : MSA_INSERT_PSEUDO_BASE; @@ -2316,9 +2328,23 @@ class INSERT_FD_PSEUDO_DESC : MSA_INSERT_PSEUDO_BASE; class INSERT_FW_VIDX_PSEUDO_DESC : - MSA_INSERT_VIDX_PSEUDO_BASE; + MSA_INSERT_VIDX_PSEUDO_BASE; class INSERT_FD_VIDX_PSEUDO_DESC : - MSA_INSERT_VIDX_PSEUDO_BASE; + MSA_INSERT_VIDX_PSEUDO_BASE; + +class INSERT_B_VIDX64_PSEUDO_DESC : + MSA_INSERT_VIDX_PSEUDO_BASE; +class INSERT_H_VIDX64_PSEUDO_DESC : + MSA_INSERT_VIDX_PSEUDO_BASE; +class INSERT_W_VIDX64_PSEUDO_DESC : + MSA_INSERT_VIDX_PSEUDO_BASE; +class INSERT_D_VIDX64_PSEUDO_DESC : + MSA_INSERT_VIDX_PSEUDO_BASE; + +class INSERT_FW_VIDX64_PSEUDO_DESC : + MSA_INSERT_VIDX_PSEUDO_BASE; +class INSERT_FD_VIDX64_PSEUDO_DESC : + MSA_INSERT_VIDX_PSEUDO_BASE; class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, MSA128BOpnd>; @@ -3236,6 +3262,13 @@ def INSERT_D_VIDX_PSEUDO : INSERT_D_VIDX_PSEUDO_DESC; def INSERT_FW_VIDX_PSEUDO : INSERT_FW_VIDX_PSEUDO_DESC; def INSERT_FD_VIDX_PSEUDO : INSERT_FD_VIDX_PSEUDO_DESC; +def INSERT_B_VIDX64_PSEUDO : INSERT_B_VIDX64_PSEUDO_DESC; +def INSERT_H_VIDX64_PSEUDO : INSERT_H_VIDX64_PSEUDO_DESC; +def INSERT_W_VIDX64_PSEUDO : INSERT_W_VIDX64_PSEUDO_DESC; +def INSERT_D_VIDX64_PSEUDO : INSERT_D_VIDX64_PSEUDO_DESC; +def INSERT_FW_VIDX64_PSEUDO : INSERT_FW_VIDX64_PSEUDO_DESC; +def INSERT_FD_VIDX64_PSEUDO : INSERT_FD_VIDX64_PSEUDO_DESC; + def LD_B: LD_B_ENC, LD_B_DESC; def LD_H: LD_H_ENC, LD_H_DESC; def LD_W: LD_W_ENC, LD_W_DESC; @@ -3805,3 +3838,93 @@ def : MSAPat<(f64 (vector_extract v2f64:$ws, i32:$idx)), (f64 (EXTRACT_SUBREG (SPLAT_D v2f64:$ws, i32:$idx), sub_64))>; + +// Vector extraction with variable index (N64 ABI) +def : MSAPat< + (i32 (vextract_sext_i8 v16i8:$ws, i64:$idx)), + (SRA (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (SPLAT_B v16i8:$ws, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32), + (i32 24))>; +def : MSAPat< + (i32 (vextract_sext_i16 v8i16:$ws, i64:$idx)), + (SRA (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (SPLAT_H v8i16:$ws, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32), + (i32 16))>; +def : MSAPat< + (i32 (vextract_sext_i32 v4i32:$ws, i64:$idx)), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (SPLAT_W v4i32:$ws, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32)>; +def : MSAPat< + (i64 (vextract_sext_i64 v2i64:$ws, i64:$idx)), + (COPY_TO_REGCLASS + (i64 (EXTRACT_SUBREG + (SPLAT_D v2i64:$ws, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_64)), + GPR64), [HasMSA, IsGP64bit]>; + +def : MSAPat< + (i32 (vextract_zext_i8 v16i8:$ws, i64:$idx)), + (SRL (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (SPLAT_B v16i8:$ws, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32), + (i32 24))>; +def : MSAPat< + (i32 (vextract_zext_i16 v8i16:$ws, i64:$idx)), + (SRL (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (SPLAT_H v8i16:$ws, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32), + (i32 16))>; +def : MSAPat< + (i32 (vextract_zext_i32 v4i32:$ws, i64:$idx)), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (SPLAT_W v4i32:$ws, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32)>; +def : MSAPat< + (i64 (vextract_zext_i64 v2i64:$ws, i64:$idx)), + (COPY_TO_REGCLASS + (i64 (EXTRACT_SUBREG + (SPLAT_D v2i64:$ws, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_64)), + GPR64), + [HasMSA, IsGP64bit]>; + +def : MSAPat< + (f32 (vector_extract v4f32:$ws, i64:$idx)), + (f32 (EXTRACT_SUBREG + (SPLAT_W v4f32:$ws, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo))>; +def : MSAPat< + (f64 (vector_extract v2f64:$ws, i64:$idx)), + (f64 (EXTRACT_SUBREG + (SPLAT_D v2f64:$ws, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_64))>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index ca6ddfd9064..d4adf00be6e 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1146,16 +1146,22 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case Mips::INSERT_FD_PSEUDO: return emitINSERT_FD(MI, BB); case Mips::INSERT_B_VIDX_PSEUDO: + case Mips::INSERT_B_VIDX64_PSEUDO: return emitINSERT_DF_VIDX(MI, BB, 1, false); case Mips::INSERT_H_VIDX_PSEUDO: + case Mips::INSERT_H_VIDX64_PSEUDO: return emitINSERT_DF_VIDX(MI, BB, 2, false); case Mips::INSERT_W_VIDX_PSEUDO: + case Mips::INSERT_W_VIDX64_PSEUDO: return emitINSERT_DF_VIDX(MI, BB, 4, false); case Mips::INSERT_D_VIDX_PSEUDO: + case Mips::INSERT_D_VIDX64_PSEUDO: return emitINSERT_DF_VIDX(MI, BB, 8, false); case Mips::INSERT_FW_VIDX_PSEUDO: + case Mips::INSERT_FW_VIDX64_PSEUDO: return emitINSERT_DF_VIDX(MI, BB, 4, true); case Mips::INSERT_FD_VIDX_PSEUDO: + case Mips::INSERT_FD_VIDX64_PSEUDO: return emitINSERT_DF_VIDX(MI, BB, 8, true); case Mips::FILL_FW_PSEUDO: return emitFILL_FW(MI, BB); diff --git a/test/CodeGen/Mips/msa/basic_operations.ll b/test/CodeGen/Mips/msa/basic_operations.ll index 3e42c3e4b10..2efec291193 100644 --- a/test/CodeGen/Mips/msa/basic_operations.ll +++ b/test/CodeGen/Mips/msa/basic_operations.ll @@ -2,6 +2,8 @@ ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=O32 -check-prefix=MIPS32 -check-prefix=ALL-LE %s ; RUN: llc -march=mips64 -target-abi n32 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 -check-prefix=MIPS64 -check-prefix=ALL-BE %s ; RUN: llc -march=mips64el -target-abi n32 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 -check-prefix=MIPS64 -check-prefix=ALL-LE %s +; RUN: llc -march=mips64 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N64 -check-prefix=MIPS64 -check-prefix=ALL-BE %s +; RUN: llc -march=mips64el -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N64 -check-prefix=MIPS64 -check-prefix=ALL-LE %s @v4i8 = global <4 x i8> @v16i8 = global <16 x i8> @@ -796,7 +798,9 @@ define void @insert_v4i32_vidx(i32 signext %a) nounwind { ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]] ; ALL-DAG: insert.w [[R1]][0], $4 - ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] + ; O32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] + ; N32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] + ; N64-DAG: dneg [[NIDX:\$[0-9]+]], [[BIDX]] ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]] store <4 x i32> %3, <4 x i32>* @v4i32 diff --git a/test/CodeGen/Mips/msa/basic_operations_float.ll b/test/CodeGen/Mips/msa/basic_operations_float.ll index 961a0158e35..f19cb9b7c2e 100644 --- a/test/CodeGen/Mips/msa/basic_operations_float.ll +++ b/test/CodeGen/Mips/msa/basic_operations_float.ll @@ -2,6 +2,8 @@ ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=O32 %s ; RUN: llc -march=mips64 -target-abi=n32 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 %s ; RUN: llc -march=mips64el -target-abi=n32 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 %s +; RUN: llc -march=mips64 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N64 %s +; RUN: llc -march=mips64el -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N64 %s @v4f32 = global <4 x float> @v2f64 = global <2 x double> -- 2.34.1