From 27d489f3b2bcdb328bd0048216044f6285f4f5a0 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 3 Apr 2014 09:26:16 +0000 Subject: [PATCH] ARM64: always use i64 for the RHS of shift operations Switching between i32 and i64 based on the LHS type is a good idea in theory, but pre-legalisation uses i64 regardless of our choice, leading to potential ISel errors. Should fix PR19294. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205519 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64ISelLowering.cpp | 23 ++++------ lib/Target/ARM64/ARM64InstrFormats.td | 63 +++++++++++--------------- lib/Target/ARM64/ARM64InstrInfo.td | 62 ++++++++++++------------- 3 files changed, 67 insertions(+), 81 deletions(-) diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index 8164e6d2967..641f5916102 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -573,11 +573,6 @@ void ARM64TargetLowering::computeMaskedBitsForTargetNode( } MVT ARM64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const { - if (!LHSTy.isSimple()) - return MVT::i64; - MVT SimpleVT = LHSTy.getSimpleVT(); - if (SimpleVT == MVT::i32) - return MVT::i32; return MVT::i64; } @@ -1534,10 +1529,10 @@ getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { // check we have to arithmetic shift right the 32nd bit of the result by // 31 bits. Then we compare the result to the upper 32 bits. SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add, - DAG.getConstant(32, MVT::i32)); + DAG.getConstant(32, MVT::i64)); UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits); SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value, - DAG.getConstant(31, MVT::i32)); + DAG.getConstant(31, MVT::i64)); // It is important that LowerBits is last, otherwise the arithmetic // shift will not be folded into the compare (SUBS). SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32); @@ -1550,7 +1545,7 @@ getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { // pattern: // (i64 ARM64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32) SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, - DAG.getConstant(32, MVT::i32)); + DAG.getConstant(32, MVT::i64)); SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); Overflow = DAG.getNode(ARM64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64), @@ -1564,7 +1559,7 @@ getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { if (IsSigned) { SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS); SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value, - DAG.getConstant(63, MVT::i32)); + DAG.getConstant(63, MVT::i64)); // It is important that LowerBits is last, otherwise the arithmetic // shift will not be folded into the compare (SUBS). SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); @@ -6330,16 +6325,18 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, if (VP1.isPowerOf2()) { // Multiplying by one less than a power of two, replace with a shift // and a subtract. - SDValue ShiftedVal = DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VP1.logBase2(), VT)); + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VP1.logBase2(), MVT::i64)); return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); } APInt VM1 = Value - 1; if (VM1.isPowerOf2()) { // Multiplying by one more than a power of two, replace with a shift // and an add. - SDValue ShiftedVal = DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VM1.logBase2(), VT)); + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VM1.logBase2(), MVT::i64)); return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); } } diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td index cf8c5037f6b..38406f8d6ca 100644 --- a/lib/Target/ARM64/ARM64InstrFormats.td +++ b/lib/Target/ARM64/ARM64InstrFormats.td @@ -440,57 +440,33 @@ def imm0_127 : Operand, ImmLeaf, ImmLeaf { let ParserMatchClass = Imm0_63Operand; } -// imm0_31x predicate - True if the immediate is in the range [0,31] -// NOTE: This has to be of type i64 because i64 is the shift-amount-size -// for X registers. -def imm0_31x : Operand, ImmLeaf { - let ParserMatchClass = Imm0_31Operand; -} - -// imm0_15x predicate - True if the immediate is in the range [0,15] -def imm0_15x : Operand, ImmLeaf { - let ParserMatchClass = Imm0_15Operand; -} - -// imm0_7x predicate - True if the immediate is in the range [0,7] -def imm0_7x : Operand, ImmLeaf { - let ParserMatchClass = Imm0_7Operand; -} - // imm0_31 predicate - True if the immediate is in the range [0,31] -// NOTE: This has to be of type i32 because i32 is the shift-amount-size -// for W registers. -def imm0_31 : Operand, ImmLeaf, ImmLeaf { let ParserMatchClass = Imm0_31Operand; } // imm0_15 predicate - True if the immediate is in the range [0,15] -def imm0_15 : Operand, ImmLeaf, ImmLeaf { let ParserMatchClass = Imm0_15Operand; } // imm0_7 predicate - True if the immediate is in the range [0,7] -def imm0_7 : Operand, ImmLeaf, ImmLeaf { let ParserMatchClass = Imm0_7Operand; } @@ -1127,21 +1103,34 @@ multiclass Div { } } -class BaseShift shift_type, RegisterClass regtype, - string asm, SDNode OpNode> +class BaseShift shift_type, RegisterClass regtype, string asm, + SDPatternOperator OpNode = null_frag> : BaseTwoOperand<{1,0,?,?}, regtype, asm, OpNode>, Sched<[WriteIS]> { let Inst{11-10} = shift_type; } multiclass Shift shift_type, string asm, SDNode OpNode> { - def Wr : BaseShift { + def Wr : BaseShift { let Inst{31} = 0; } def Xr : BaseShift { let Inst{31} = 1; } + + def : Pat<(i32 (OpNode GPR32:$Rn, i64:$Rm)), + (!cast(NAME # "Wr") GPR32:$Rn, + (EXTRACT_SUBREG i64:$Rm, sub_32))>; + + def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (zext GPR32:$Rm)))), + (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; + + def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (anyext GPR32:$Rm)))), + (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; + + def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (sext GPR32:$Rm)))), + (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; } class ShiftAlias @@ -1572,7 +1561,7 @@ multiclass AddSubS { // Extract //--- def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>]>; + SDTCisPtrTy<3>]>; def ARM64Extr : SDNode<"ARM64ISD::EXTR", SDTA64EXTR>; class BaseExtractImm; defm CLZ : OneOperandData<0b100, "clz", ctlz>; defm RBIT : OneOperandData<0b000, "rbit">; def REV16Wr : OneWRegData<0b001, "rev16", - UnOpFrag<(rotr (bswap node:$LHS), (i32 16))>>; + UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; def REV16Xr : OneXRegData<0b001, "rev16", UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; @@ -670,7 +670,7 @@ def : InstAlias<"ror $dst, $src, $shift", def : InstAlias<"ror $dst, $src, $shift", (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; -def : Pat<(rotr GPR32:$Rn, (i32 imm0_31:$imm)), +def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; @@ -684,28 +684,28 @@ defm SBFM : BitfieldImm<0b00, "sbfm">; defm UBFM : BitfieldImm<0b10, "ubfm">; } -def i32shift_a : Operand, SDNodeXForm, SDNodeXFormgetZExtValue()) & 0x1f; - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, MVT::i64); }]>; -def i32shift_b : Operand, SDNodeXForm, SDNodeXFormgetZExtValue(); - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, MVT::i64); }]>; // min(7, 31 - shift_amt) -def i32shift_sext_i8 : Operand, SDNodeXForm, SDNodeXFormgetZExtValue(); enc = enc > 7 ? 7 : enc; - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, MVT::i64); }]>; // min(15, 31 - shift_amt) -def i32shift_sext_i16 : Operand, SDNodeXForm, SDNodeXFormgetZExtValue(); enc = enc > 15 ? 15 : enc; - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, MVT::i64); }]>; def i64shift_a : Operand, SDNodeXForm, SDNodeXFormgetTargetConstant(enc, MVT::i64); }]>; -def : Pat<(shl GPR32:$Rn, (i32 imm0_31:$imm)), - (UBFMWri GPR32:$Rn, (i32 (i32shift_a imm0_31:$imm)), - (i32 (i32shift_b imm0_31:$imm)))>; +def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), + (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), + (i64 (i32shift_b imm0_31:$imm)))>; def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), (i64 (i64shift_b imm0_63:$imm)))>; let AddedComplexity = 10 in { -def : Pat<(sra GPR32:$Rn, (i32 imm0_31:$imm)), +def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; @@ -763,7 +763,7 @@ def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; -def : Pat<(srl GPR32:$Rn, (i32 imm0_31:$imm)), +def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; @@ -4247,16 +4247,16 @@ def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; -def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i32 imm0_31:$imm)), - (SBFMWri GPR32:$Rn, (i32 (i32shift_a imm0_31:$imm)), - (i32 (i32shift_sext_i8 imm0_31:$imm)))>; +def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), + (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), + (i64 (i32shift_sext_i8 imm0_31:$imm)))>; def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), (i64 (i64shift_sext_i8 imm0_63:$imm)))>; -def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i32 imm0_31:$imm)), - (SBFMWri GPR32:$Rn, (i32 (i32shift_a imm0_31:$imm)), - (i32 (i32shift_sext_i16 imm0_31:$imm)))>; +def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), + (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), + (i64 (i32shift_sext_i16 imm0_31:$imm)))>; def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), (i64 (i64shift_sext_i16 imm0_63:$imm)))>; @@ -4273,19 +4273,19 @@ let AddedComplexity = 20 in { // We support all sext + sra combinations which preserve at least one bit of the // original value which is to be sign extended. E.g. we support shifts up to // bitwidth-1 bits. -def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i32 imm0_7:$imm)), - (SBFMWri GPR32:$Rn, (i32 imm0_7:$imm), 7)>; -def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7x:$imm)), - (SBFMXri GPR64:$Rn, (i64 imm0_7x:$imm), 7)>; +def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), + (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; +def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), + (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; -def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i32 imm0_15:$imm)), - (SBFMWri GPR32:$Rn, (i32 imm0_15:$imm), 15)>; -def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15x:$imm)), - (SBFMXri GPR64:$Rn, (i64 imm0_15x:$imm), 15)>; +def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), + (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; +def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), + (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; -def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31x:$imm)), +def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), - (i64 imm0_31x:$imm), 31)>; + (i64 imm0_31:$imm), 31)>; } // AddedComplexity = 20 // To truncate, we can simply extract from a subregister. -- 2.34.1